summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2021-08-25 13:02:35 +0300
committerDavid S. Miller <davem@davemloft.net>2021-08-25 13:02:35 +0300
commitcb0f8b034c761346bac72abe2685f52e9f8c8bd0 (patch)
tree6cdc228b9c8f136dae2c952e1382969770778ed2
parentd484dc2b21a71642665159d2f7c33828e637ab91 (diff)
parent6bb3ab4913e97b083561d22f5afe1124b7ec4954 (diff)
downloadlinux-cb0f8b034c761346bac72abe2685f52e9f8c8bd0.tar.xz
Merge branch 'mptcp-next'
Mat Martineau says: ==================== mptcp: Optimize output options and add MP_FAIL This patch set contains two groups of changes that we've been testing in the MPTCP tree. The first optimizes the code path and data structure for populating MPTCP option headers when transmitting. Patch 1 reorganizes code to reduce the number of conditionals that need to be evaluated in common cases. Patch 2 rearranges struct mptcp_out_options to save 80 bytes (on x86_64). The next five patches add partial support for the MP_FAIL option as defined in RFC 8684. MP_FAIL is an option header used to cleanly handle MPTCP checksum failures. When the MPTCP checksum detects an error in the MPTCP DSS header or the data mapped by that header, the receiver uses a TCP RST with MP_FAIL to close the subflow that experienced the error and provide associated MPTCP sequence number information to the peer. RFC 8684 also describes how a single-subflow connection can discard corrupt data and remain connected under certain conditions using MP_FAIL, but that feature is not implemented here. Patches 3-5 implement MP_FAIL transmit and receive, and integrates with checksum validation. Patches 6 & 7 add MP_FAIL selftests and the MIBs required for those tests. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/mptcp.h29
-rw-r--r--net/mptcp/mib.c2
-rw-r--r--net/mptcp/mib.h2
-rw-r--r--net/mptcp/options.c305
-rw-r--r--net/mptcp/pm.c5
-rw-r--r--net/mptcp/protocol.h20
-rw-r--r--net/mptcp/subflow.c16
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh38
8 files changed, 295 insertions, 122 deletions
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 8b5af683a818..6026bbefbffd 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -58,10 +58,6 @@ struct mptcp_addr_info {
struct mptcp_out_options {
#if IS_ENABLED(CONFIG_MPTCP)
u16 suboptions;
- u64 sndr_key;
- u64 rcvr_key;
- u64 ahmac;
- struct mptcp_addr_info addr;
struct mptcp_rm_list rm_list;
u8 join_id;
u8 backup;
@@ -69,11 +65,26 @@ struct mptcp_out_options {
reset_transient:1,
csum_reqd:1,
allow_join_id0:1;
- u32 nonce;
- u64 thmac;
- u32 token;
- u8 hmac[20];
- struct mptcp_ext ext_copy;
+ union {
+ struct {
+ u64 sndr_key;
+ u64 rcvr_key;
+ };
+ struct {
+ struct mptcp_addr_info addr;
+ u64 ahmac;
+ };
+ struct {
+ struct mptcp_ext ext_copy;
+ u64 fail_seq;
+ };
+ struct {
+ u32 nonce;
+ u32 token;
+ u64 thmac;
+ u8 hmac[20];
+ };
+ };
#endif
};
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index 3a7c4e7b2d79..b21ff9be04c6 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -44,6 +44,8 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW),
SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX),
SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX),
+ SNMP_MIB_ITEM("MPFailTx", MPTCP_MIB_MPFAILTX),
+ SNMP_MIB_ITEM("MPFailRx", MPTCP_MIB_MPFAILRX),
SNMP_MIB_ITEM("RcvPruned", MPTCP_MIB_RCVPRUNED),
SNMP_MIB_ITEM("SubflowStale", MPTCP_MIB_SUBFLOWSTALE),
SNMP_MIB_ITEM("SubflowRecover", MPTCP_MIB_SUBFLOWRECOVER),
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index 8ec16c991aac..ecd3d8b117e0 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -37,6 +37,8 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_RMSUBFLOW, /* Remove a subflow */
MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */
MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */
+ MPTCP_MIB_MPFAILTX, /* Transmit a MP_FAIL */
+ MPTCP_MIB_MPFAILRX, /* Received a MP_FAIL */
MPTCP_MIB_RCVPRUNED, /* Incoming packet dropped due to memory limit */
MPTCP_MIB_SUBFLOWSTALE, /* Subflows entered 'stale' status */
MPTCP_MIB_SUBFLOWRECOVER, /* Subflows returned to active status after being stale */
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 4c37f4b215ee..bec3ed82e253 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -336,6 +336,16 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->reset_reason = *ptr;
break;
+ case MPTCPOPT_MP_FAIL:
+ if (opsize != TCPOLEN_MPTCP_FAIL)
+ break;
+
+ ptr += 2;
+ mp_opt->mp_fail = 1;
+ mp_opt->fail_seq = get_unaligned_be64(ptr);
+ pr_debug("MP_FAIL: data_seq=%llu", mp_opt->fail_seq);
+ break;
+
default:
break;
}
@@ -364,6 +374,7 @@ void mptcp_get_options(const struct sock *sk,
mp_opt->reset = 0;
mp_opt->csum_reqd = READ_ONCE(msk->csum_enabled);
mp_opt->deny_join_id0 = 0;
+ mp_opt->mp_fail = 0;
length = (th->doff * 4) - sizeof(struct tcphdr);
ptr = (const unsigned char *)(th + 1);
@@ -592,6 +603,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
dss_size = map_size;
if (skb && snd_data_fin_enable)
mptcp_write_data_fin(subflow, skb, &opts->ext_copy);
+ opts->suboptions = OPTION_MPTCP_DSS;
ret = true;
}
@@ -615,6 +627,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
opts->ext_copy.ack64 = 0;
}
opts->ext_copy.use_ack = 1;
+ opts->suboptions = OPTION_MPTCP_DSS;
WRITE_ONCE(msk->old_wspace, __mptcp_space((struct sock *)msk));
/* Add kind/length/subtype/flag overhead if mapping is not populated */
@@ -686,8 +699,13 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
if (drop_other_suboptions) {
pr_debug("drop other suboptions");
opts->suboptions = 0;
- opts->ext_copy.use_ack = 0;
- opts->ext_copy.use_map = 0;
+
+ /* note that e.g. DSS could have written into the memory
+ * aliased by ahmac, we must reset the field here
+ * to avoid appending the hmac even for ADD_ADDR echo
+ * options
+ */
+ opts->ahmac = 0;
*size -= opt_size;
}
opts->suboptions |= OPTION_MPTCP_ADD_ADDR;
@@ -739,7 +757,12 @@ static bool mptcp_established_options_mp_prio(struct sock *sk,
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
- if (!subflow->send_mp_prio)
+ /* can't send MP_PRIO with MPC, as they share the same option space:
+ * 'backup'. Also it makes no sense at all
+ */
+ if (!subflow->send_mp_prio ||
+ ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
+ OPTION_MPTCP_MPC_ACK) & opts->suboptions))
return false;
/* account for the trailing 'nop' option */
@@ -755,7 +778,7 @@ static bool mptcp_established_options_mp_prio(struct sock *sk,
return true;
}
-static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_buff *skb,
+static noinline bool mptcp_established_options_rst(struct sock *sk, struct sk_buff *skb,
unsigned int *size,
unsigned int remaining,
struct mptcp_out_options *opts)
@@ -763,12 +786,36 @@ static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_bu
const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
if (remaining < TCPOLEN_MPTCP_RST)
- return;
+ return false;
*size = TCPOLEN_MPTCP_RST;
opts->suboptions |= OPTION_MPTCP_RST;
opts->reset_transient = subflow->reset_transient;
opts->reset_reason = subflow->reset_reason;
+
+ return true;
+}
+
+static bool mptcp_established_options_mp_fail(struct sock *sk,
+ unsigned int *size,
+ unsigned int remaining,
+ struct mptcp_out_options *opts)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+
+ if (likely(!subflow->send_mp_fail))
+ return false;
+
+ if (remaining < TCPOLEN_MPTCP_FAIL)
+ return false;
+
+ *size = TCPOLEN_MPTCP_FAIL;
+ opts->suboptions |= OPTION_MPTCP_FAIL;
+ opts->fail_seq = subflow->map_seq;
+
+ pr_debug("MP_FAIL fail_seq=%llu", opts->fail_seq);
+
+ return true;
}
bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
@@ -787,15 +834,28 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
return false;
if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) {
- mptcp_established_options_rst(sk, skb, size, remaining, opts);
+ if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
+ *size += opt_size;
+ remaining -= opt_size;
+ }
+ if (mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts)) {
+ *size += opt_size;
+ remaining -= opt_size;
+ }
return true;
}
snd_data_fin = mptcp_data_fin_enabled(msk);
if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, remaining, opts))
ret = true;
- else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts))
+ else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts)) {
ret = true;
+ if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
+ *size += opt_size;
+ remaining -= opt_size;
+ return true;
+ }
+ }
/* we reserved enough space for the above options, and exceeding the
* TCP option space would be fatal
@@ -1096,6 +1156,12 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
mp_opt.mp_prio = 0;
}
+ if (mp_opt.mp_fail) {
+ mptcp_pm_mp_fail_received(sk, mp_opt.fail_seq);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILRX);
+ mp_opt.mp_fail = 0;
+ }
+
if (mp_opt.reset) {
subflow->reset_seen = 1;
subflow->reset_reason = mp_opt.reset_reason;
@@ -1198,8 +1264,88 @@ static u16 mptcp_make_csum(const struct mptcp_ext *mpext)
void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
struct mptcp_out_options *opts)
{
- if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
- OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
+ if (unlikely(OPTION_MPTCP_FAIL & opts->suboptions)) {
+ const struct sock *ssk = (const struct sock *)tp;
+ struct mptcp_subflow_context *subflow;
+
+ subflow = mptcp_subflow_ctx(ssk);
+ subflow->send_mp_fail = 0;
+
+ *ptr++ = mptcp_option(MPTCPOPT_MP_FAIL,
+ TCPOLEN_MPTCP_FAIL,
+ 0, 0);
+ put_unaligned_be64(opts->fail_seq, ptr);
+ ptr += 2;
+ }
+
+ /* RST is mutually exclusive with everything else */
+ if (unlikely(OPTION_MPTCP_RST & opts->suboptions)) {
+ *ptr++ = mptcp_option(MPTCPOPT_RST,
+ TCPOLEN_MPTCP_RST,
+ opts->reset_transient,
+ opts->reset_reason);
+ return;
+ }
+
+ /* DSS, MPC, MPJ and ADD_ADDR are mutually exclusive, see
+ * mptcp_established_options*()
+ */
+ if (likely(OPTION_MPTCP_DSS & opts->suboptions)) {
+ struct mptcp_ext *mpext = &opts->ext_copy;
+ u8 len = TCPOLEN_MPTCP_DSS_BASE;
+ u8 flags = 0;
+
+ if (mpext->use_ack) {
+ flags = MPTCP_DSS_HAS_ACK;
+ if (mpext->ack64) {
+ len += TCPOLEN_MPTCP_DSS_ACK64;
+ flags |= MPTCP_DSS_ACK64;
+ } else {
+ len += TCPOLEN_MPTCP_DSS_ACK32;
+ }
+ }
+
+ if (mpext->use_map) {
+ len += TCPOLEN_MPTCP_DSS_MAP64;
+
+ /* Use only 64-bit mapping flags for now, add
+ * support for optional 32-bit mappings later.
+ */
+ flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64;
+ if (mpext->data_fin)
+ flags |= MPTCP_DSS_DATA_FIN;
+
+ if (opts->csum_reqd)
+ len += TCPOLEN_MPTCP_DSS_CHECKSUM;
+ }
+
+ *ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags);
+
+ if (mpext->use_ack) {
+ if (mpext->ack64) {
+ put_unaligned_be64(mpext->data_ack, ptr);
+ ptr += 2;
+ } else {
+ put_unaligned_be32(mpext->data_ack32, ptr);
+ ptr += 1;
+ }
+ }
+
+ if (mpext->use_map) {
+ put_unaligned_be64(mpext->data_seq, ptr);
+ ptr += 2;
+ put_unaligned_be32(mpext->subflow_seq, ptr);
+ ptr += 1;
+ if (opts->csum_reqd) {
+ put_unaligned_be32(mpext->data_len << 16 |
+ mptcp_make_csum(mpext), ptr);
+ } else {
+ put_unaligned_be32(mpext->data_len << 16 |
+ TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+ }
+ }
+ } else if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
+ OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
u8 len, flag = MPTCP_CAP_HMAC_SHA256;
if (OPTION_MPTCP_MPC_SYN & opts->suboptions) {
@@ -1246,10 +1392,31 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
}
ptr += 1;
- }
-mp_capable_done:
- if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) {
+ /* MPC is additionally mutually exclusive with MP_PRIO */
+ goto mp_capable_done;
+ } else if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
+ *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
+ TCPOLEN_MPTCP_MPJ_SYN,
+ opts->backup, opts->join_id);
+ put_unaligned_be32(opts->token, ptr);
+ ptr += 1;
+ put_unaligned_be32(opts->nonce, ptr);
+ ptr += 1;
+ } else if (OPTION_MPTCP_MPJ_SYNACK & opts->suboptions) {
+ *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
+ TCPOLEN_MPTCP_MPJ_SYNACK,
+ opts->backup, opts->join_id);
+ put_unaligned_be64(opts->thmac, ptr);
+ ptr += 2;
+ put_unaligned_be32(opts->nonce, ptr);
+ ptr += 1;
+ } else if (OPTION_MPTCP_MPJ_ACK & opts->suboptions) {
+ *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
+ TCPOLEN_MPTCP_MPJ_ACK, 0, 0);
+ memcpy(ptr, opts->hmac, MPTCPOPT_HMAC_LEN);
+ ptr += 5;
+ } else if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) {
u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE;
u8 echo = MPTCP_ADDR_ECHO;
@@ -1307,6 +1474,19 @@ mp_capable_done:
}
}
+ if (OPTION_MPTCP_PRIO & opts->suboptions) {
+ const struct sock *ssk = (const struct sock *)tp;
+ struct mptcp_subflow_context *subflow;
+
+ subflow = mptcp_subflow_ctx(ssk);
+ subflow->send_mp_prio = 0;
+
+ *ptr++ = mptcp_option(MPTCPOPT_MP_PRIO,
+ TCPOLEN_MPTCP_PRIO,
+ opts->backup, TCPOPT_NOP);
+ }
+
+mp_capable_done:
if (OPTION_MPTCP_RM_ADDR & opts->suboptions) {
u8 i = 1;
@@ -1327,107 +1507,6 @@ mp_capable_done:
}
}
- if (OPTION_MPTCP_PRIO & opts->suboptions) {
- const struct sock *ssk = (const struct sock *)tp;
- struct mptcp_subflow_context *subflow;
-
- subflow = mptcp_subflow_ctx(ssk);
- subflow->send_mp_prio = 0;
-
- *ptr++ = mptcp_option(MPTCPOPT_MP_PRIO,
- TCPOLEN_MPTCP_PRIO,
- opts->backup, TCPOPT_NOP);
- }
-
- if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
- *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
- TCPOLEN_MPTCP_MPJ_SYN,
- opts->backup, opts->join_id);
- put_unaligned_be32(opts->token, ptr);
- ptr += 1;
- put_unaligned_be32(opts->nonce, ptr);
- ptr += 1;
- }
-
- if (OPTION_MPTCP_MPJ_SYNACK & opts->suboptions) {
- *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
- TCPOLEN_MPTCP_MPJ_SYNACK,
- opts->backup, opts->join_id);
- put_unaligned_be64(opts->thmac, ptr);
- ptr += 2;
- put_unaligned_be32(opts->nonce, ptr);
- ptr += 1;
- }
-
- if (OPTION_MPTCP_MPJ_ACK & opts->suboptions) {
- *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
- TCPOLEN_MPTCP_MPJ_ACK, 0, 0);
- memcpy(ptr, opts->hmac, MPTCPOPT_HMAC_LEN);
- ptr += 5;
- }
-
- if (OPTION_MPTCP_RST & opts->suboptions)
- *ptr++ = mptcp_option(MPTCPOPT_RST,
- TCPOLEN_MPTCP_RST,
- opts->reset_transient,
- opts->reset_reason);
-
- if (opts->ext_copy.use_ack || opts->ext_copy.use_map) {
- struct mptcp_ext *mpext = &opts->ext_copy;
- u8 len = TCPOLEN_MPTCP_DSS_BASE;
- u8 flags = 0;
-
- if (mpext->use_ack) {
- flags = MPTCP_DSS_HAS_ACK;
- if (mpext->ack64) {
- len += TCPOLEN_MPTCP_DSS_ACK64;
- flags |= MPTCP_DSS_ACK64;
- } else {
- len += TCPOLEN_MPTCP_DSS_ACK32;
- }
- }
-
- if (mpext->use_map) {
- len += TCPOLEN_MPTCP_DSS_MAP64;
-
- /* Use only 64-bit mapping flags for now, add
- * support for optional 32-bit mappings later.
- */
- flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64;
- if (mpext->data_fin)
- flags |= MPTCP_DSS_DATA_FIN;
-
- if (opts->csum_reqd)
- len += TCPOLEN_MPTCP_DSS_CHECKSUM;
- }
-
- *ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags);
-
- if (mpext->use_ack) {
- if (mpext->ack64) {
- put_unaligned_be64(mpext->data_ack, ptr);
- ptr += 2;
- } else {
- put_unaligned_be32(mpext->data_ack32, ptr);
- ptr += 1;
- }
- }
-
- if (mpext->use_map) {
- put_unaligned_be64(mpext->data_seq, ptr);
- ptr += 2;
- put_unaligned_be32(mpext->subflow_seq, ptr);
- ptr += 1;
- if (opts->csum_reqd) {
- put_unaligned_be32(mpext->data_len << 16 |
- mptcp_make_csum(mpext), ptr);
- } else {
- put_unaligned_be32(mpext->data_len << 16 |
- TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
- }
- }
- }
-
if (tp)
mptcp_set_rwin(tp);
}
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index da0c4c925350..6ab386ff3294 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -249,6 +249,11 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup)
mptcp_event(MPTCP_EVENT_SUB_PRIORITY, mptcp_sk(subflow->conn), sk, GFP_ATOMIC);
}
+void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
+{
+ pr_debug("fail_seq=%llu", fail_seq);
+}
+
/* path manager helpers */
bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, struct sk_buff *skb,
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 7cd3d5979bcd..57a50b1194a9 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -26,6 +26,8 @@
#define OPTION_MPTCP_FASTCLOSE BIT(8)
#define OPTION_MPTCP_PRIO BIT(9)
#define OPTION_MPTCP_RST BIT(10)
+#define OPTION_MPTCP_DSS BIT(11)
+#define OPTION_MPTCP_FAIL BIT(12)
/* MPTCP option subtypes */
#define MPTCPOPT_MP_CAPABLE 0
@@ -67,6 +69,7 @@
#define TCPOLEN_MPTCP_PRIO_ALIGN 4
#define TCPOLEN_MPTCP_FASTCLOSE 12
#define TCPOLEN_MPTCP_RST 4
+#define TCPOLEN_MPTCP_FAIL 12
#define TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM (TCPOLEN_MPTCP_DSS_CHECKSUM + TCPOLEN_MPTCP_MPC_ACK_DATA)
@@ -137,6 +140,7 @@ struct mptcp_options_received {
add_addr : 1,
rm_addr : 1,
mp_prio : 1,
+ mp_fail : 1,
echo : 1,
csum_reqd : 1,
backup : 1,
@@ -158,6 +162,7 @@ struct mptcp_options_received {
u64 ahmac;
u8 reset_reason:4;
u8 reset_transient:1;
+ u64 fail_seq;
};
static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
@@ -428,6 +433,7 @@ struct mptcp_subflow_context {
mpc_map : 1,
backup : 1,
send_mp_prio : 1,
+ send_mp_fail : 1,
rx_eof : 1,
can_ack : 1, /* only after processing the remote a key */
disposable : 1, /* ctx can be free at ulp release time */
@@ -608,6 +614,19 @@ static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops;
}
+static inline bool mptcp_has_another_subflow(struct sock *ssk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk), *tmp;
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+
+ mptcp_for_each_subflow(msk, tmp) {
+ if (tmp != subflow)
+ return true;
+ }
+
+ return false;
+}
+
void __init mptcp_proto_init(void);
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
int __init mptcp_proto_v6_init(void);
@@ -722,6 +741,7 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup);
int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
struct mptcp_addr_info *addr,
u8 bkup);
+void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq);
void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk);
struct mptcp_pm_add_entry *
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 8c43aa14897a..54b7ffc21861 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -910,6 +910,8 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
csum = csum_partial(&header, sizeof(header), subflow->map_data_csum);
if (unlikely(csum_fold(csum))) {
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
+ subflow->send_mp_fail = 1;
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX);
return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
}
@@ -1157,6 +1159,20 @@ no_data:
fallback:
/* RFC 8684 section 3.7. */
+ if (subflow->send_mp_fail) {
+ if (mptcp_has_another_subflow(ssk)) {
+ while ((skb = skb_peek(&ssk->sk_receive_queue)))
+ sk_eat_skb(ssk, skb);
+ }
+ ssk->sk_err = EBADMSG;
+ tcp_set_state(ssk, TCP_CLOSE);
+ subflow->reset_transient = 0;
+ subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
+ tcp_send_active_reset(ssk, GFP_ATOMIC);
+ WRITE_ONCE(subflow->data_avail, 0);
+ return true;
+ }
+
if (subflow->mp_join || subflow->fully_established) {
/* fatal protocol error, close the socket.
* subflow_error_report() will introduce the appropriate barriers
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 7b3e6cc56935..255793c5ac4f 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -578,6 +578,43 @@ chk_csum_nr()
fi
}
+chk_fail_nr()
+{
+ local mp_fail_nr_tx=$1
+ local mp_fail_nr_rx=$2
+ local count
+ local dump_stats
+
+ printf "%-39s %s" " " "ftx"
+ count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPFailTx | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != "$mp_fail_nr_tx" ]; then
+ echo "[fail] got $count MP_FAIL[s] TX expected $mp_fail_nr_tx"
+ ret=1
+ dump_stats=1
+ else
+ echo -n "[ ok ]"
+ fi
+
+ echo -n " - frx "
+ count=`ip netns exec $ns2 nstat -as | grep MPTcpExtMPFailRx | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != "$mp_fail_nr_rx" ]; then
+ echo "[fail] got $count MP_FAIL[s] RX expected $mp_fail_nr_rx"
+ ret=1
+ dump_stats=1
+ else
+ echo "[ ok ]"
+ fi
+
+ if [ "${dump_stats}" = 1 ]; then
+ echo Server ns stats
+ ip netns exec $ns1 nstat -as | grep MPTcp
+ echo Client ns stats
+ ip netns exec $ns2 nstat -as | grep MPTcp
+ fi
+}
+
chk_join_nr()
{
local msg="$1"
@@ -627,6 +664,7 @@ chk_join_nr()
fi
if [ $checksum -eq 1 ]; then
chk_csum_nr
+ chk_fail_nr 0 0
fi
}