From 5cb2cb3cb20cb2618c877a50db3c013449cc4e75 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 25 Apr 2024 11:13:34 +0800 Subject: net: introduce rstreason to detect why the RST is sent Add a new standalone file for the easy future extension to support both active reset and passive reset in the TCP/DCCP/MPTCP protocols. This patch only does the preparations for reset reason mechanism, nothing else changes. The reset reasons are divided into three parts: 1) reuse drop reasons for passive reset in TCP 2) our own independent reasons which aren't relying on other reasons at all 3) reuse MP_TCPRST option for MPTCP The benefits of a standalone reset reason are listed here: 1) it can cover more than one case, such as reset reasons in MPTCP, active reset reasons. 2) people can easily/fastly understand and maintain this mechanism. 3) we get unified format of output with prefix stripped. 4) more new reset reasons are on the way ... I will implement the basic codes of active/passive reset reason in those three protocols, which are not complete for this moment. For passive reset part in TCP, I only introduce the NO_SOCKET common case which could be set as an example. After this series applied, it will have the ability to open a new gate to let other people contribute more reasons into it :) Signed-off-by: Jason Xing Acked-by: Matthieu Baerts (NGI0) Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/net/rstreason.h | 106 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 include/net/rstreason.h diff --git a/include/net/rstreason.h b/include/net/rstreason.h new file mode 100644 index 000000000000..bc53b5a24505 --- /dev/null +++ b/include/net/rstreason.h @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _LINUX_RSTREASON_H +#define _LINUX_RSTREASON_H +#include +#include + +#define DEFINE_RST_REASON(FN, FNe) \ + FN(NOT_SPECIFIED) \ + FN(NO_SOCKET) \ + FN(MPTCP_RST_EUNSPEC) \ + FN(MPTCP_RST_EMPTCP) \ + FN(MPTCP_RST_ERESOURCE) \ + FN(MPTCP_RST_EPROHIBIT) \ + FN(MPTCP_RST_EWQ2BIG) \ + FN(MPTCP_RST_EBADPERF) \ + FN(MPTCP_RST_EMIDDLEBOX) \ + FN(ERROR) \ + FNe(MAX) + +/** + * enum sk_rst_reason - the reasons of socket reset + * + * The reasons of sk reset, which are used in DCCP/TCP/MPTCP protocols. + * + * There are three parts in order: + * 1) skb drop reasons: relying on drop reasons for such as passive reset + * 2) independent reset reasons: such as active reset reasons + * 3) reset reasons in MPTCP: only for MPTCP use + */ +enum sk_rst_reason { + /* Refer to include/net/dropreason-core.h + * Rely on skb drop reasons because it indicates exactly why RST + * could happen. + */ + /** @SK_RST_REASON_NOT_SPECIFIED: reset reason is not specified */ + SK_RST_REASON_NOT_SPECIFIED, + /** @SK_RST_REASON_NO_SOCKET: no valid socket that can be used */ + SK_RST_REASON_NO_SOCKET, + + /* Copy from include/uapi/linux/mptcp.h. + * These reset fields will not be changed since they adhere to + * RFC 8684. So do not touch them. I'm going to list each definition + * of them respectively. + */ + /** + * @SK_RST_REASON_MPTCP_RST_EUNSPEC: Unspecified error. + * This is the default error; it implies that the subflow is no + * longer available. The presence of this option shows that the + * RST was generated by an MPTCP-aware device. + */ + SK_RST_REASON_MPTCP_RST_EUNSPEC, + /** + * @SK_RST_REASON_MPTCP_RST_EMPTCP: MPTCP-specific error. + * An error has been detected in the processing of MPTCP options. + * This is the usual reason code to return in the cases where a RST + * is being sent to close a subflow because of an invalid response. + */ + SK_RST_REASON_MPTCP_RST_EMPTCP, + /** + * @SK_RST_REASON_MPTCP_RST_ERESOURCE: Lack of resources. + * This code indicates that the sending host does not have enough + * resources to support the terminated subflow. + */ + SK_RST_REASON_MPTCP_RST_ERESOURCE, + /** + * @SK_RST_REASON_MPTCP_RST_EPROHIBIT: Administratively prohibited. + * This code indicates that the requested subflow is prohibited by + * the policies of the sending host. + */ + SK_RST_REASON_MPTCP_RST_EPROHIBIT, + /** + * @SK_RST_REASON_MPTCP_RST_EWQ2BIG: Too much outstanding data. + * This code indicates that there is an excessive amount of data + * that needs to be transmitted over the terminated subflow while + * having already been acknowledged over one or more other subflows. + * This may occur if a path has been unavailable for a short period + * and it is more efficient to reset and start again than it is to + * retransmit the queued data. + */ + SK_RST_REASON_MPTCP_RST_EWQ2BIG, + /** + * @SK_RST_REASON_MPTCP_RST_EBADPERF: Unacceptable performance. + * This code indicates that the performance of this subflow was + * too low compared to the other subflows of this Multipath TCP + * connection. + */ + SK_RST_REASON_MPTCP_RST_EBADPERF, + /** + * @SK_RST_REASON_MPTCP_RST_EMIDDLEBOX: Middlebox interference. + * Middlebox interference has been detected over this subflow, + * making MPTCP signaling invalid. For example, this may be sent + * if the checksum does not validate. + */ + SK_RST_REASON_MPTCP_RST_EMIDDLEBOX, + + /** @SK_RST_REASON_ERROR: unexpected error happens */ + SK_RST_REASON_ERROR, + + /** + * @SK_RST_REASON_MAX: Maximum of socket reset reasons. + * It shouldn't be used as a real 'reason'. + */ + SK_RST_REASON_MAX, +}; +#endif -- cgit v1.2.3 From 6be49deaa09576c141002a2e6f816a1709bc2c86 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 25 Apr 2024 11:13:35 +0800 Subject: rstreason: prepare for passive reset Adjust the parameter and support passing reason of reset which is for now NOT_SPECIFIED. No functional changes. Signed-off-by: Jason Xing Acked-by: Matthieu Baerts (NGI0) Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/net/request_sock.h | 4 +++- net/dccp/ipv4.c | 10 ++++++---- net/dccp/ipv6.c | 10 ++++++---- net/dccp/minisocks.c | 3 ++- net/ipv4/tcp_ipv4.c | 12 +++++++----- net/ipv4/tcp_minisocks.c | 3 ++- net/ipv6/tcp_ipv6.c | 15 +++++++++------ net/mptcp/subflow.c | 8 +++++--- 8 files changed, 40 insertions(+), 25 deletions(-) diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 004e651e6067..bdc737832da6 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -18,6 +18,7 @@ #include #include +#include struct request_sock; struct sk_buff; @@ -34,7 +35,8 @@ struct request_sock_ops { void (*send_ack)(const struct sock *sk, struct sk_buff *skb, struct request_sock *req); void (*send_reset)(const struct sock *sk, - struct sk_buff *skb); + struct sk_buff *skb, + enum sk_rst_reason reason); void (*destructor)(struct request_sock *req); void (*syn_ack_timeout)(const struct request_sock *req); }; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 9fc9cea4c251..ff41bd6f99c3 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "ackvec.h" #include "ccid.h" @@ -521,7 +522,8 @@ out: return err; } -static void dccp_v4_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb) +static void dccp_v4_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb, + enum sk_rst_reason reason) { int err; const struct iphdr *rxiph; @@ -706,7 +708,7 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; reset: - dccp_v4_ctl_send_reset(sk, skb); + dccp_v4_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); kfree_skb(skb); return 0; } @@ -869,7 +871,7 @@ lookup: if (nsk == sk) { reqsk_put(req); } else if (dccp_child_process(sk, nsk, skb)) { - dccp_v4_ctl_send_reset(sk, skb); + dccp_v4_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); goto discard_and_relse; } else { sock_put(sk); @@ -909,7 +911,7 @@ no_dccp_socket: if (dh->dccph_type != DCCP_PKT_RESET) { DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; - dccp_v4_ctl_send_reset(sk, skb); + dccp_v4_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); } discard_it: diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index c8ca703dc331..85f4b8fdbe5e 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "dccp.h" #include "ipv6.h" @@ -256,7 +257,8 @@ static void dccp_v6_reqsk_destructor(struct request_sock *req) kfree_skb(inet_rsk(req)->pktopts); } -static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb) +static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb, + enum sk_rst_reason reason) { const struct ipv6hdr *rxip6h; struct sk_buff *skb; @@ -656,7 +658,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; reset: - dccp_v6_ctl_send_reset(sk, skb); + dccp_v6_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); discard: if (opt_skb != NULL) __kfree_skb(opt_skb); @@ -762,7 +764,7 @@ lookup: if (nsk == sk) { reqsk_put(req); } else if (dccp_child_process(sk, nsk, skb)) { - dccp_v6_ctl_send_reset(sk, skb); + dccp_v6_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); goto discard_and_relse; } else { sock_put(sk); @@ -801,7 +803,7 @@ no_dccp_socket: if (dh->dccph_type != DCCP_PKT_RESET) { DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; - dccp_v6_ctl_send_reset(sk, skb); + dccp_v6_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); } discard_it: diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 64d805b27add..251a57cf5822 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "ackvec.h" #include "ccid.h" @@ -202,7 +203,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; drop: if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET) - req->rsk_ops->send_reset(sk, skb); + req->rsk_ops->send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); inet_csk_reqsk_queue_drop(sk, req); out: diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e06f0cd04f7e..202f6dfa958b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -70,6 +70,7 @@ #include #include #include +#include #include #include @@ -723,7 +724,8 @@ out: * Exception: precedence violation. We do not implement it in any case. */ -static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) +static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb, + enum sk_rst_reason reason) { const struct tcphdr *th = tcp_hdr(skb); struct { @@ -1934,7 +1936,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; reset: - tcp_v4_send_reset(rsk, skb); + tcp_v4_send_reset(rsk, skb, SK_RST_REASON_NOT_SPECIFIED); discard: kfree_skb_reason(skb, reason); /* Be careful here. If this function gets more complicated and @@ -2285,7 +2287,7 @@ lookup: } else { drop_reason = tcp_child_process(sk, nsk, skb); if (drop_reason) { - tcp_v4_send_reset(nsk, skb); + tcp_v4_send_reset(nsk, skb, SK_RST_REASON_NOT_SPECIFIED); goto discard_and_relse; } sock_put(sk); @@ -2364,7 +2366,7 @@ csum_error: bad_packet: __TCP_INC_STATS(net, TCP_MIB_INERRS); } else { - tcp_v4_send_reset(NULL, skb); + tcp_v4_send_reset(NULL, skb, SK_RST_REASON_NOT_SPECIFIED); } discard_it: @@ -2416,7 +2418,7 @@ do_time_wait: tcp_v4_timewait_ack(sk, skb); break; case TCP_TW_RST: - tcp_v4_send_reset(sk, skb); + tcp_v4_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); inet_twsk_deschedule_put(inet_twsk(sk)); goto discard_it; case TCP_TW_SUCCESS:; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 146c061145b4..7d543569a180 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -22,6 +22,7 @@ #include #include #include +#include static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) { @@ -878,7 +879,7 @@ embryonic_reset: * avoid becoming vulnerable to outside attack aiming at * resetting legit local connections. */ - req->rsk_ops->send_reset(sk, skb); + req->rsk_ops->send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); } else if (fastopen) { /* received a valid RST pkt */ reqsk_fastopen_remove(sk, req, true); tcp_reset(sk, skb); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index bb7c3caf4f85..017f6293b5f4 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -60,6 +60,7 @@ #include #include #include +#include #include #include @@ -69,7 +70,8 @@ #include -static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); +static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, + enum sk_rst_reason reason); static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, struct request_sock *req); @@ -1008,7 +1010,8 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 kfree_skb(buff); } -static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) +static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, + enum sk_rst_reason reason) { const struct tcphdr *th = tcp_hdr(skb); struct ipv6hdr *ipv6h = ipv6_hdr(skb); @@ -1677,7 +1680,7 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; reset: - tcp_v6_send_reset(sk, skb); + tcp_v6_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); discard: if (opt_skb) __kfree_skb(opt_skb); @@ -1862,7 +1865,7 @@ lookup: } else { drop_reason = tcp_child_process(sk, nsk, skb); if (drop_reason) { - tcp_v6_send_reset(nsk, skb); + tcp_v6_send_reset(nsk, skb, SK_RST_REASON_NOT_SPECIFIED); goto discard_and_relse; } sock_put(sk); @@ -1939,7 +1942,7 @@ csum_error: bad_packet: __TCP_INC_STATS(net, TCP_MIB_INERRS); } else { - tcp_v6_send_reset(NULL, skb); + tcp_v6_send_reset(NULL, skb, SK_RST_REASON_NOT_SPECIFIED); } discard_it: @@ -1995,7 +1998,7 @@ do_time_wait: tcp_v6_timewait_ack(sk, skb); break; case TCP_TW_RST: - tcp_v6_send_reset(sk, skb); + tcp_v6_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); inet_twsk_deschedule_put(inet_twsk(sk)); goto discard_it; case TCP_TW_SUCCESS: diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index b94d1dca1094..32fe2ef36d56 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -20,6 +20,8 @@ #include #endif #include +#include + #include "protocol.h" #include "mib.h" @@ -308,7 +310,7 @@ static struct dst_entry *subflow_v4_route_req(const struct sock *sk, dst_release(dst); if (!req->syncookie) - tcp_request_sock_ops.send_reset(sk, skb); + tcp_request_sock_ops.send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); return NULL; } @@ -376,7 +378,7 @@ static struct dst_entry *subflow_v6_route_req(const struct sock *sk, dst_release(dst); if (!req->syncookie) - tcp6_request_sock_ops.send_reset(sk, skb); + tcp6_request_sock_ops.send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); return NULL; } #endif @@ -911,7 +913,7 @@ dispose_child: tcp_rsk(req)->drop_req = true; inet_csk_prepare_for_destroy_sock(child); tcp_done(child); - req->rsk_ops->send_reset(sk, skb); + req->rsk_ops->send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); /* The last child reference will be released by the caller */ return child; -- cgit v1.2.3 From 5691276b39daf90294c6a81fb6d62d667f634c92 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 25 Apr 2024 11:13:36 +0800 Subject: rstreason: prepare for active reset Like what we did to passive reset: only passing possible reset reason in each active reset path. No functional changes. Signed-off-by: Jason Xing Acked-by: Matthieu Baerts (NGI0) Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/net/tcp.h | 3 ++- net/ipv4/tcp.c | 15 ++++++++++----- net/ipv4/tcp_output.c | 3 ++- net/ipv4/tcp_timer.c | 9 ++++++--- net/mptcp/protocol.c | 4 +++- net/mptcp/subflow.c | 5 +++-- 6 files changed, 26 insertions(+), 13 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index ffc9371fe9de..a9eb21251195 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -670,7 +670,8 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, void tcp_send_probe0(struct sock *); int tcp_write_wakeup(struct sock *, int mib); void tcp_send_fin(struct sock *sk); -void tcp_send_active_reset(struct sock *sk, gfp_t priority); +void tcp_send_active_reset(struct sock *sk, gfp_t priority, + enum sk_rst_reason reason); int tcp_send_synack(struct sock *); void tcp_push_one(struct sock *, unsigned int mss_now); void __tcp_send_ack(struct sock *sk, u32 rcv_nxt); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f23b97777ea5..4ec0f4feee00 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -275,6 +275,7 @@ #include #include #include +#include #include #include @@ -2811,7 +2812,8 @@ void __tcp_close(struct sock *sk, long timeout) /* Unread data was tossed, zap the connection. */ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); tcp_set_state(sk, TCP_CLOSE); - tcp_send_active_reset(sk, sk->sk_allocation); + tcp_send_active_reset(sk, sk->sk_allocation, + SK_RST_REASON_NOT_SPECIFIED); } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { /* Check zero linger _after_ checking for unread data. */ sk->sk_prot->disconnect(sk, 0); @@ -2885,7 +2887,8 @@ adjudge_to_death: struct tcp_sock *tp = tcp_sk(sk); if (READ_ONCE(tp->linger2) < 0) { tcp_set_state(sk, TCP_CLOSE); - tcp_send_active_reset(sk, GFP_ATOMIC); + tcp_send_active_reset(sk, GFP_ATOMIC, + SK_RST_REASON_NOT_SPECIFIED); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONLINGER); } else { @@ -2903,7 +2906,8 @@ adjudge_to_death: if (sk->sk_state != TCP_CLOSE) { if (tcp_check_oom(sk, 0)) { tcp_set_state(sk, TCP_CLOSE); - tcp_send_active_reset(sk, GFP_ATOMIC); + tcp_send_active_reset(sk, GFP_ATOMIC, + SK_RST_REASON_NOT_SPECIFIED); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); } else if (!check_net(sock_net(sk))) { @@ -3007,7 +3011,7 @@ int tcp_disconnect(struct sock *sk, int flags) /* The last check adjusts for discrepancy of Linux wrt. RFC * states */ - tcp_send_active_reset(sk, gfp_any()); + tcp_send_active_reset(sk, gfp_any(), SK_RST_REASON_NOT_SPECIFIED); WRITE_ONCE(sk->sk_err, ECONNRESET); } else if (old_state == TCP_SYN_SENT) WRITE_ONCE(sk->sk_err, ECONNRESET); @@ -4564,7 +4568,8 @@ int tcp_abort(struct sock *sk, int err) smp_wmb(); sk_error_report(sk); if (tcp_need_reset(sk->sk_state)) - tcp_send_active_reset(sk, GFP_ATOMIC); + tcp_send_active_reset(sk, GFP_ATOMIC, + SK_RST_REASON_NOT_SPECIFIED); tcp_done(sk); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ce59e4499b66..41c352bf3394 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3615,7 +3615,8 @@ void tcp_send_fin(struct sock *sk) * was unread data in the receive queue. This behavior is recommended * by RFC 2525, section 2.17. -DaveM */ -void tcp_send_active_reset(struct sock *sk, gfp_t priority) +void tcp_send_active_reset(struct sock *sk, gfp_t priority, + enum sk_rst_reason reason) { struct sk_buff *skb; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 976db57b95d4..83fe7f62f7f1 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -22,6 +22,7 @@ #include #include #include +#include static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk) { @@ -127,7 +128,8 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) (!tp->snd_wnd && !tp->packets_out)) do_reset = true; if (do_reset) - tcp_send_active_reset(sk, GFP_ATOMIC); + tcp_send_active_reset(sk, GFP_ATOMIC, + SK_RST_REASON_NOT_SPECIFIED); tcp_done(sk); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); return 1; @@ -768,7 +770,7 @@ static void tcp_keepalive_timer (struct timer_list *t) goto out; } } - tcp_send_active_reset(sk, GFP_ATOMIC); + tcp_send_active_reset(sk, GFP_ATOMIC, SK_RST_REASON_NOT_SPECIFIED); goto death; } @@ -795,7 +797,8 @@ static void tcp_keepalive_timer (struct timer_list *t) icsk->icsk_probes_out > 0) || (user_timeout == 0 && icsk->icsk_probes_out >= keepalive_probes(tp))) { - tcp_send_active_reset(sk, GFP_ATOMIC); + tcp_send_active_reset(sk, GFP_ATOMIC, + SK_RST_REASON_NOT_SPECIFIED); tcp_write_err(sk); goto out; } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index f8bc34f0d973..065967086492 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -21,6 +21,7 @@ #endif #include #include +#include #include #include "protocol.h" #include "mib.h" @@ -2569,7 +2570,8 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk) slow = lock_sock_fast(tcp_sk); if (tcp_sk->sk_state != TCP_CLOSE) { - tcp_send_active_reset(tcp_sk, GFP_ATOMIC); + tcp_send_active_reset(tcp_sk, GFP_ATOMIC, + SK_RST_REASON_NOT_SPECIFIED); tcp_set_state(tcp_sk, TCP_CLOSE); } unlock_sock_fast(tcp_sk, slow); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 32fe2ef36d56..ac867d277860 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -414,7 +414,7 @@ void mptcp_subflow_reset(struct sock *ssk) /* must hold: tcp_done() could drop last reference on parent */ sock_hold(sk); - tcp_send_active_reset(ssk, GFP_ATOMIC); + tcp_send_active_reset(ssk, GFP_ATOMIC, SK_RST_REASON_NOT_SPECIFIED); tcp_done(ssk); if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags)) mptcp_schedule_work(sk); @@ -1350,7 +1350,8 @@ reset: tcp_set_state(ssk, TCP_CLOSE); while ((skb = skb_peek(&ssk->sk_receive_queue))) sk_eat_skb(ssk, skb); - tcp_send_active_reset(ssk, GFP_ATOMIC); + tcp_send_active_reset(ssk, GFP_ATOMIC, + SK_RST_REASON_NOT_SPECIFIED); WRITE_ONCE(subflow->data_avail, false); return false; } -- cgit v1.2.3 From 120391ef9ca8fe8f82ea3f2961ad802043468226 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 25 Apr 2024 11:13:37 +0800 Subject: tcp: support rstreason for passive reset Reuse the dropreason logic to show the exact reason of tcp reset, so we can finally display the corresponding item in enum sk_reset_reason instead of reinventing new reset reasons. This patch replaces all the prior NOT_SPECIFIED reasons. Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/net/rstreason.h | 15 +++++++++++++++ net/ipv4/tcp_ipv4.c | 11 +++++++---- net/ipv6/tcp_ipv6.c | 11 +++++++---- 3 files changed, 29 insertions(+), 8 deletions(-) diff --git a/include/net/rstreason.h b/include/net/rstreason.h index bc53b5a24505..df3b6ac0c9b3 100644 --- a/include/net/rstreason.h +++ b/include/net/rstreason.h @@ -103,4 +103,19 @@ enum sk_rst_reason { */ SK_RST_REASON_MAX, }; + +/* Convert skb drop reasons to enum sk_rst_reason type */ +static inline enum sk_rst_reason +sk_rst_convert_drop_reason(enum skb_drop_reason reason) +{ + switch (reason) { + case SKB_DROP_REASON_NOT_SPECIFIED: + return SK_RST_REASON_NOT_SPECIFIED; + case SKB_DROP_REASON_NO_SOCKET: + return SK_RST_REASON_NO_SOCKET; + default: + /* If we don't have our own corresponding reason */ + return SK_RST_REASON_NOT_SPECIFIED; + } +} #endif diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 202f6dfa958b..621879ba916d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1936,7 +1936,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; reset: - tcp_v4_send_reset(rsk, skb, SK_RST_REASON_NOT_SPECIFIED); + tcp_v4_send_reset(rsk, skb, sk_rst_convert_drop_reason(reason)); discard: kfree_skb_reason(skb, reason); /* Be careful here. If this function gets more complicated and @@ -2287,7 +2287,10 @@ lookup: } else { drop_reason = tcp_child_process(sk, nsk, skb); if (drop_reason) { - tcp_v4_send_reset(nsk, skb, SK_RST_REASON_NOT_SPECIFIED); + enum sk_rst_reason rst_reason; + + rst_reason = sk_rst_convert_drop_reason(drop_reason); + tcp_v4_send_reset(nsk, skb, rst_reason); goto discard_and_relse; } sock_put(sk); @@ -2366,7 +2369,7 @@ csum_error: bad_packet: __TCP_INC_STATS(net, TCP_MIB_INERRS); } else { - tcp_v4_send_reset(NULL, skb, SK_RST_REASON_NOT_SPECIFIED); + tcp_v4_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason)); } discard_it: @@ -2418,7 +2421,7 @@ do_time_wait: tcp_v4_timewait_ack(sk, skb); break; case TCP_TW_RST: - tcp_v4_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); + tcp_v4_send_reset(sk, skb, sk_rst_convert_drop_reason(drop_reason)); inet_twsk_deschedule_put(inet_twsk(sk)); goto discard_it; case TCP_TW_SUCCESS:; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 017f6293b5f4..317d7a6e6b01 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1680,7 +1680,7 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; reset: - tcp_v6_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); + tcp_v6_send_reset(sk, skb, sk_rst_convert_drop_reason(reason)); discard: if (opt_skb) __kfree_skb(opt_skb); @@ -1865,7 +1865,10 @@ lookup: } else { drop_reason = tcp_child_process(sk, nsk, skb); if (drop_reason) { - tcp_v6_send_reset(nsk, skb, SK_RST_REASON_NOT_SPECIFIED); + enum sk_rst_reason rst_reason; + + rst_reason = sk_rst_convert_drop_reason(drop_reason); + tcp_v6_send_reset(nsk, skb, rst_reason); goto discard_and_relse; } sock_put(sk); @@ -1942,7 +1945,7 @@ csum_error: bad_packet: __TCP_INC_STATS(net, TCP_MIB_INERRS); } else { - tcp_v6_send_reset(NULL, skb, SK_RST_REASON_NOT_SPECIFIED); + tcp_v6_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason)); } discard_it: @@ -1998,7 +2001,7 @@ do_time_wait: tcp_v6_timewait_ack(sk, skb); break; case TCP_TW_RST: - tcp_v6_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); + tcp_v6_send_reset(sk, skb, sk_rst_convert_drop_reason(drop_reason)); inet_twsk_deschedule_put(inet_twsk(sk)); goto discard_it; case TCP_TW_SUCCESS: -- cgit v1.2.3 From 3e140491dd80d8643261a21efde3ce2ff6fb9fdf Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 25 Apr 2024 11:13:38 +0800 Subject: mptcp: support rstreason for passive reset It relies on what reset options in the skb are as rfc8684 says. Reusing this logic can save us much energy. This patch replaces most of the prior NOT_SPECIFIED reasons. Signed-off-by: Jason Xing Reviewed-by: Matthieu Baerts (NGI0) Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- net/mptcp/protocol.h | 27 +++++++++++++++++++++++++++ net/mptcp/subflow.c | 22 +++++++++++++++++----- 2 files changed, 44 insertions(+), 5 deletions(-) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index fdfa843e2d88..252618859ee8 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -581,6 +581,33 @@ mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow) WRITE_ONCE(subflow->local_id, -1); } +/* Convert reset reasons in MPTCP to enum sk_rst_reason type */ +static inline enum sk_rst_reason +sk_rst_convert_mptcp_reason(u32 reason) +{ + switch (reason) { + case MPTCP_RST_EUNSPEC: + return SK_RST_REASON_MPTCP_RST_EUNSPEC; + case MPTCP_RST_EMPTCP: + return SK_RST_REASON_MPTCP_RST_EMPTCP; + case MPTCP_RST_ERESOURCE: + return SK_RST_REASON_MPTCP_RST_ERESOURCE; + case MPTCP_RST_EPROHIBIT: + return SK_RST_REASON_MPTCP_RST_EPROHIBIT; + case MPTCP_RST_EWQ2BIG: + return SK_RST_REASON_MPTCP_RST_EWQ2BIG; + case MPTCP_RST_EBADPERF: + return SK_RST_REASON_MPTCP_RST_EBADPERF; + case MPTCP_RST_EMIDDLEBOX: + return SK_RST_REASON_MPTCP_RST_EMIDDLEBOX; + default: + /* It should not happen, or else errors may occur + * in MPTCP layer + */ + return SK_RST_REASON_ERROR; + } +} + static inline u64 mptcp_subflow_get_map_offset(const struct mptcp_subflow_context *subflow) { diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index ac867d277860..fb7abf2d01ca 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -309,8 +309,13 @@ static struct dst_entry *subflow_v4_route_req(const struct sock *sk, return dst; dst_release(dst); - if (!req->syncookie) - tcp_request_sock_ops.send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); + if (!req->syncookie) { + struct mptcp_ext *mpext = mptcp_get_ext(skb); + enum sk_rst_reason reason; + + reason = sk_rst_convert_mptcp_reason(mpext->reset_reason); + tcp_request_sock_ops.send_reset(sk, skb, reason); + } return NULL; } @@ -377,8 +382,13 @@ static struct dst_entry *subflow_v6_route_req(const struct sock *sk, return dst; dst_release(dst); - if (!req->syncookie) - tcp6_request_sock_ops.send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); + if (!req->syncookie) { + struct mptcp_ext *mpext = mptcp_get_ext(skb); + enum sk_rst_reason reason; + + reason = sk_rst_convert_mptcp_reason(mpext->reset_reason); + tcp6_request_sock_ops.send_reset(sk, skb, reason); + } return NULL; } #endif @@ -783,6 +793,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct mptcp_subflow_request_sock *subflow_req; struct mptcp_options_received mp_opt; bool fallback, fallback_is_fatal; + enum sk_rst_reason reason; struct mptcp_sock *owner; struct sock *child; @@ -913,7 +924,8 @@ dispose_child: tcp_rsk(req)->drop_req = true; inet_csk_prepare_for_destroy_sock(child); tcp_done(child); - req->rsk_ops->send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); + reason = sk_rst_convert_mptcp_reason(mptcp_get_ext(skb)->reset_reason); + req->rsk_ops->send_reset(sk, skb, reason); /* The last child reference will be released by the caller */ return child; -- cgit v1.2.3 From 215d40248bde5562a21e4c6cdeaeca0495c9365a Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 25 Apr 2024 11:13:39 +0800 Subject: mptcp: introducing a helper into active reset logic Since we have mapped every mptcp reset reason definition in enum sk_rst_reason, introducing a new helper can cover some missing places where we have already set the subflow->reset_reason. Note: using SK_RST_REASON_NOT_SPECIFIED is the same as SK_RST_REASON_MPTCP_RST_EUNSPEC. They are both unknown. So we can convert it directly. Suggested-by: Paolo Abeni Signed-off-by: Jason Xing Reviewed-by: Matthieu Baerts (NGI0) Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- net/mptcp/protocol.c | 4 +--- net/mptcp/protocol.h | 11 +++++++++++ net/mptcp/subflow.c | 6 ++---- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 065967086492..4b13ca362efa 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -21,7 +21,6 @@ #endif #include #include -#include #include #include "protocol.h" #include "mib.h" @@ -2570,8 +2569,7 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk) slow = lock_sock_fast(tcp_sk); if (tcp_sk->sk_state != TCP_CLOSE) { - tcp_send_active_reset(tcp_sk, GFP_ATOMIC, - SK_RST_REASON_NOT_SPECIFIED); + mptcp_send_active_reset_reason(tcp_sk); tcp_set_state(tcp_sk, TCP_CLOSE); } unlock_sock_fast(tcp_sk, slow); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 252618859ee8..cfc5f9c3f113 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -12,6 +12,7 @@ #include #include #include +#include #include "mptcp_pm_gen.h" @@ -608,6 +609,16 @@ sk_rst_convert_mptcp_reason(u32 reason) } } +static inline void +mptcp_send_active_reset_reason(struct sock *sk) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + enum sk_rst_reason reason; + + reason = sk_rst_convert_mptcp_reason(subflow->reset_reason); + tcp_send_active_reset(sk, GFP_ATOMIC, reason); +} + static inline u64 mptcp_subflow_get_map_offset(const struct mptcp_subflow_context *subflow) { diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index fb7abf2d01ca..97ec44d1df30 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -20,7 +20,6 @@ #include #endif #include -#include #include "protocol.h" #include "mib.h" @@ -424,7 +423,7 @@ void mptcp_subflow_reset(struct sock *ssk) /* must hold: tcp_done() could drop last reference on parent */ sock_hold(sk); - tcp_send_active_reset(ssk, GFP_ATOMIC, SK_RST_REASON_NOT_SPECIFIED); + mptcp_send_active_reset_reason(ssk); tcp_done(ssk); if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags)) mptcp_schedule_work(sk); @@ -1362,8 +1361,7 @@ reset: tcp_set_state(ssk, TCP_CLOSE); while ((skb = skb_peek(&ssk->sk_receive_queue))) sk_eat_skb(ssk, skb); - tcp_send_active_reset(ssk, GFP_ATOMIC, - SK_RST_REASON_NOT_SPECIFIED); + mptcp_send_active_reset_reason(ssk); WRITE_ONCE(subflow->data_avail, false); return false; } -- cgit v1.2.3 From b533fb9cf4f7c6ca2aa255a5a1fdcde49fff2b24 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 25 Apr 2024 11:13:40 +0800 Subject: rstreason: make it work in trace world At last, we should let it work by introducing this reset reason in trace world. One of the possible expected outputs is: ... tcp_send_reset: skbaddr=xxx skaddr=xxx src=xxx dest=xxx state=TCP_ESTABLISHED reason=NOT_SPECIFIED Signed-off-by: Jason Xing Reviewed-by: Steven Rostedt (Google) Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/trace/events/tcp.h | 26 ++++++++++++++++++++++---- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/tcp_output.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- 4 files changed, 25 insertions(+), 7 deletions(-) diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 5c04a61a11c2..49b5ee091cf6 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -11,6 +11,7 @@ #include #include #include +#include /* * tcp event with arguments sk and skb @@ -74,20 +75,32 @@ DEFINE_EVENT(tcp_event_sk_skb, tcp_retransmit_skb, TP_ARGS(sk, skb) ); +#undef FN +#define FN(reason) TRACE_DEFINE_ENUM(SK_RST_REASON_##reason); +DEFINE_RST_REASON(FN, FN) + +#undef FN +#undef FNe +#define FN(reason) { SK_RST_REASON_##reason, #reason }, +#define FNe(reason) { SK_RST_REASON_##reason, #reason } + /* * skb of trace_tcp_send_reset is the skb that caused RST. In case of * active reset, skb should be NULL */ TRACE_EVENT(tcp_send_reset, - TP_PROTO(const struct sock *sk, const struct sk_buff *skb), + TP_PROTO(const struct sock *sk, + const struct sk_buff *skb, + const enum sk_rst_reason reason), - TP_ARGS(sk, skb), + TP_ARGS(sk, skb, reason), TP_STRUCT__entry( __field(const void *, skbaddr) __field(const void *, skaddr) __field(int, state) + __field(enum sk_rst_reason, reason) __array(__u8, saddr, sizeof(struct sockaddr_in6)) __array(__u8, daddr, sizeof(struct sockaddr_in6)) ), @@ -113,14 +126,19 @@ TRACE_EVENT(tcp_send_reset, */ TP_STORE_ADDR_PORTS_SKB(skb, th, entry->daddr, entry->saddr); } + __entry->reason = reason; ), - TP_printk("skbaddr=%p skaddr=%p src=%pISpc dest=%pISpc state=%s", + TP_printk("skbaddr=%p skaddr=%p src=%pISpc dest=%pISpc state=%s reason=%s", __entry->skbaddr, __entry->skaddr, __entry->saddr, __entry->daddr, - __entry->state ? show_tcp_state_name(__entry->state) : "UNKNOWN") + __entry->state ? show_tcp_state_name(__entry->state) : "UNKNOWN", + __print_symbolic(__entry->reason, DEFINE_RST_REASON(FN, FNe))) ); +#undef FN +#undef FNe + /* * tcp event with arguments sk * diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 621879ba916d..0427deca3e0e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -871,7 +871,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb, if (sk) arg.bound_dev_if = sk->sk_bound_dev_if; - trace_tcp_send_reset(sk, skb); + trace_tcp_send_reset(sk, skb, reason); BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) != offsetof(struct inet_timewait_sock, tw_bound_dev_if)); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 41c352bf3394..ece4726c8751 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3641,7 +3641,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority, /* skb of trace_tcp_send_reset() keeps the skb that caused RST, * skb here is different to the troublesome skb, so use NULL */ - trace_tcp_send_reset(sk, NULL); + trace_tcp_send_reset(sk, NULL, SK_RST_REASON_NOT_SPECIFIED); } /* Send a crossed SYN-ACK during socket establishment. diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 317d7a6e6b01..77958adf2e16 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1133,7 +1133,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, label = ip6_flowlabel(ipv6h); } - trace_tcp_send_reset(sk, skb); + trace_tcp_send_reset(sk, skb, reason); tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1, ipv6_get_dsfield(ipv6h), label, priority, txhash, -- cgit v1.2.3