summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2022-11-30 07:24:31 +0300
committerJakub Kicinski <kuba@kernel.org>2022-11-30 07:24:33 +0300
commit7f0c940be5c5f52b0a7acaf2b55df73337f5c7a8 (patch)
tree917eebb20ba632172411d69cd576d4d57efc5f89
parentf2bb566f5c977ff010baaa9e5e14d9a75b06e5f2 (diff)
parentca7ae89160434cd045a4795a235eb16587bd8f73 (diff)
downloadlinux-7f0c940be5c5f52b0a7acaf2b55df73337f5c7a8.tar.xz
Merge branch 'mptcp-msg_fastopen-and-tfo-listener-side-support'
Matthieu Baerts says: ==================== mptcp: MSG_FASTOPEN and TFO listener side support Before this series, only the initiator of a connection was able to combine both TCP FastOpen and MPTCP when using TCP_FASTOPEN_CONNECT socket option. These new patches here add (in theory) the full support of TFO with MPTCP, which means: - MSG_FASTOPEN sendmsg flag support (patch 1/8) - TFO support for the listener side (patches 2-5/8) - TCP_FASTOPEN socket option (patch 6/8) - TCP_FASTOPEN_KEY socket option (patch 7/8) To support TFO for the server side, a few preparation patches are needed (patches 2 to 5/8). Some of them were inspired by a previous work from Benjamin Hesmans. Note that TFO support with MPTCP has been validated with selftests (patch 8/8) but also with Packetdrill tests running with a modified but still very WIP version supporting MPTCP. Both the modified tool and the tests are available online: https://github.com/multipath-tcp/packetdrill/ ==================== Link: https://lore.kernel.org/r/20221125222958.958636-1-matthieu.baerts@tessares.net Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--net/mptcp/Makefile2
-rw-r--r--net/mptcp/fastopen.c73
-rw-r--r--net/mptcp/options.c25
-rw-r--r--net/mptcp/protocol.c39
-rw-r--r--net/mptcp/protocol.h28
-rw-r--r--net/mptcp/sockopt.c9
-rw-r--r--net/mptcp/subflow.c105
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c171
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh21
9 files changed, 358 insertions, 115 deletions
diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile
index 6e7df47c9584..a3829ce548f9 100644
--- a/net/mptcp/Makefile
+++ b/net/mptcp/Makefile
@@ -2,7 +2,7 @@
obj-$(CONFIG_MPTCP) += mptcp.o
mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \
- mib.o pm_netlink.o sockopt.o pm_userspace.o
+ mib.o pm_netlink.o sockopt.o pm_userspace.o fastopen.o
obj-$(CONFIG_SYN_COOKIES) += syncookies.o
obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o
diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c
new file mode 100644
index 000000000000..d237d142171c
--- /dev/null
+++ b/net/mptcp/fastopen.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* MPTCP Fast Open Mechanism
+ *
+ * Copyright (c) 2021-2022, Dmytro SHYTYI
+ */
+
+#include "protocol.h"
+
+void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow,
+ struct request_sock *req)
+{
+ struct sock *ssk = subflow->tcp_sock;
+ struct sock *sk = subflow->conn;
+ struct sk_buff *skb;
+ struct tcp_sock *tp;
+
+ tp = tcp_sk(ssk);
+
+ subflow->is_mptfo = 1;
+
+ skb = skb_peek(&ssk->sk_receive_queue);
+ if (WARN_ON_ONCE(!skb))
+ return;
+
+ /* dequeue the skb from sk receive queue */
+ __skb_unlink(skb, &ssk->sk_receive_queue);
+ skb_ext_reset(skb);
+ skb_orphan(skb);
+
+ /* We copy the fastopen data, but that don't belong to the mptcp sequence
+ * space, need to offset it in the subflow sequence, see mptcp_subflow_get_map_offset()
+ */
+ tp->copied_seq += skb->len;
+ subflow->ssn_offset += skb->len;
+
+ /* initialize a dummy sequence number, we will update it at MPC
+ * completion, if needed
+ */
+ MPTCP_SKB_CB(skb)->map_seq = -skb->len;
+ MPTCP_SKB_CB(skb)->end_seq = 0;
+ MPTCP_SKB_CB(skb)->offset = 0;
+ MPTCP_SKB_CB(skb)->has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp;
+
+ mptcp_data_lock(sk);
+
+ mptcp_set_owner_r(skb, sk);
+ __skb_queue_tail(&sk->sk_receive_queue, skb);
+
+ sk->sk_data_ready(sk);
+
+ mptcp_data_unlock(sk);
+}
+
+void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt)
+{
+ struct sock *sk = (struct sock *)msk;
+ struct sk_buff *skb;
+
+ mptcp_data_lock(sk);
+ skb = skb_peek_tail(&sk->sk_receive_queue);
+ if (skb) {
+ WARN_ON_ONCE(MPTCP_SKB_CB(skb)->end_seq);
+ pr_debug("msk %p moving seq %llx -> %llx end_seq %llx -> %llx", sk,
+ MPTCP_SKB_CB(skb)->map_seq, MPTCP_SKB_CB(skb)->map_seq + msk->ack_seq,
+ MPTCP_SKB_CB(skb)->end_seq, MPTCP_SKB_CB(skb)->end_seq + msk->ack_seq);
+ MPTCP_SKB_CB(skb)->map_seq += msk->ack_seq;
+ MPTCP_SKB_CB(skb)->end_seq += msk->ack_seq;
+ }
+
+ pr_debug("msk=%p ack_seq=%llx", msk, msk->ack_seq);
+ mptcp_data_unlock(sk);
+}
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 30d289044e71..5ded85e2c374 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -26,6 +26,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
{
u8 subtype = *ptr >> 4;
int expected_opsize;
+ u16 subopt;
u8 version;
u8 flags;
u8 i;
@@ -38,11 +39,15 @@ static void mptcp_parse_option(const struct sk_buff *skb,
expected_opsize = TCPOLEN_MPTCP_MPC_ACK_DATA;
else
expected_opsize = TCPOLEN_MPTCP_MPC_ACK;
+ subopt = OPTION_MPTCP_MPC_ACK;
} else {
- if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK) {
expected_opsize = TCPOLEN_MPTCP_MPC_SYNACK;
- else
+ subopt = OPTION_MPTCP_MPC_SYNACK;
+ } else {
expected_opsize = TCPOLEN_MPTCP_MPC_SYN;
+ subopt = OPTION_MPTCP_MPC_SYN;
+ }
}
/* Cfr RFC 8684 Section 3.3.0:
@@ -85,7 +90,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->deny_join_id0 = !!(flags & MPTCP_CAP_DENY_JOIN_ID0);
- mp_opt->suboptions |= OPTIONS_MPTCP_MPC;
+ mp_opt->suboptions |= subopt;
if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) {
mp_opt->sndr_key = get_unaligned_be64(ptr);
ptr += 8;
@@ -934,7 +939,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
subflow->mp_join && (mp_opt->suboptions & OPTIONS_MPTCP_MPJ) &&
!subflow->request_join)
tcp_send_ack(ssk);
- goto fully_established;
+ goto check_notify;
}
/* we must process OoO packets before the first subflow is fully
@@ -945,17 +950,20 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
if (TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1) {
if (subflow->mp_join)
goto reset;
+ if (subflow->is_mptfo && mp_opt->suboptions & OPTION_MPTCP_MPC_ACK)
+ goto set_fully_established;
return subflow->mp_capable;
}
- if (((mp_opt->suboptions & OPTION_MPTCP_DSS) && mp_opt->use_ack) ||
- ((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && !mp_opt->echo)) {
+ if (subflow->remote_key_valid &&
+ (((mp_opt->suboptions & OPTION_MPTCP_DSS) && mp_opt->use_ack) ||
+ ((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && !mp_opt->echo))) {
/* subflows are fully established as soon as we get any
* additional ack, including ADD_ADDR.
*/
subflow->fully_established = 1;
WRITE_ONCE(msk->fully_established, true);
- goto fully_established;
+ goto check_notify;
}
/* If the first established packet does not contain MP_CAPABLE + data
@@ -974,11 +982,12 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
if (mp_opt->deny_join_id0)
WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
+set_fully_established:
if (unlikely(!READ_ONCE(msk->pm.server_side)))
pr_warn_once("bogus mpc option on established client sk");
mptcp_subflow_fully_established(subflow, mp_opt);
-fully_established:
+check_notify:
/* if the subflow is not already linked into the conn_list, we can't
* notify the PM: this subflow is still on the listener queue
* and the PM possibly acquiring the subflow lock could race with
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 3722a8580b61..b0d387be500a 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -36,15 +36,6 @@ struct mptcp6_sock {
};
#endif
-struct mptcp_skb_cb {
- u64 map_seq;
- u64 end_seq;
- u32 offset;
- u8 has_rxtstamp:1;
-};
-
-#define MPTCP_SKB_CB(__skb) ((struct mptcp_skb_cb *)&((__skb)->cb[0]))
-
enum {
MPTCP_CMSG_TS = BIT(0),
MPTCP_CMSG_INQ = BIT(1),
@@ -200,7 +191,7 @@ static void mptcp_rfree(struct sk_buff *skb)
mptcp_rmem_uncharge(sk, len);
}
-static void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk)
+void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk)
{
skb_orphan(skb);
skb->sk = sk;
@@ -1711,17 +1702,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int ret = 0;
long timeo;
- /* we don't support FASTOPEN yet */
- if (msg->msg_flags & MSG_FASTOPEN)
- return -EOPNOTSUPP;
-
/* silently ignore everything else */
- msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL;
+ msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_FASTOPEN;
lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
- if (unlikely(ssock && inet_sk(ssock->sk)->defer_connect)) {
+ if (unlikely(ssock && (inet_sk(ssock->sk)->defer_connect ||
+ msg->msg_flags & MSG_FASTOPEN))) {
int copied_syn = 0;
ret = mptcp_sendmsg_fastopen(sk, ssock->sk, msg, len, &copied_syn);
@@ -3048,7 +3036,6 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC);
struct mptcp_sock *msk;
- u64 ack_seq;
if (!nsk)
return NULL;
@@ -3074,15 +3061,6 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
- if (mp_opt->suboptions & OPTIONS_MPTCP_MPC) {
- msk->can_ack = true;
- msk->remote_key = mp_opt->sndr_key;
- mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
- ack_seq++;
- WRITE_ONCE(msk->ack_seq, ack_seq);
- atomic64_set(&msk->rcv_wnd_sent, ack_seq);
- }
-
sock_reset_flag(nsk, SOCK_RCU_FREE);
/* will be fully established after successful MPC subflow creation */
inet_sk_state_store(nsk, TCP_SYN_RECV);
@@ -3355,7 +3333,6 @@ void mptcp_finish_connect(struct sock *ssk)
struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk;
struct sock *sk;
- u64 ack_seq;
subflow = mptcp_subflow_ctx(ssk);
sk = subflow->conn;
@@ -3363,22 +3340,16 @@ void mptcp_finish_connect(struct sock *ssk)
pr_debug("msk=%p, token=%u", sk, subflow->token);
- mptcp_crypto_key_sha(subflow->remote_key, NULL, &ack_seq);
- ack_seq++;
- subflow->map_seq = ack_seq;
+ subflow->map_seq = subflow->iasn;
subflow->map_subflow_seq = 1;
/* the socket is not connected yet, no msk/subflow ops can access/race
* accessing the field below
*/
- WRITE_ONCE(msk->remote_key, subflow->remote_key);
WRITE_ONCE(msk->local_key, subflow->local_key);
WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
WRITE_ONCE(msk->snd_nxt, msk->write_seq);
- WRITE_ONCE(msk->ack_seq, ack_seq);
- WRITE_ONCE(msk->can_ack, 1);
WRITE_ONCE(msk->snd_una, msk->write_seq);
- atomic64_set(&msk->rcv_wnd_sent, ack_seq);
mptcp_pm_new_connection(msk, ssk, 0);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 6a09ab99a12d..8b4379a2cd85 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -126,6 +126,15 @@
#define MPTCP_CONNECTED 6
#define MPTCP_RESET_SCHEDULER 7
+struct mptcp_skb_cb {
+ u64 map_seq;
+ u64 end_seq;
+ u32 offset;
+ u8 has_rxtstamp:1;
+};
+
+#define MPTCP_SKB_CB(__skb) ((struct mptcp_skb_cb *)&((__skb)->cb[0]))
+
static inline bool before64(__u64 seq1, __u64 seq2)
{
return (__s64)(seq1 - seq2) < 0;
@@ -467,17 +476,22 @@ struct mptcp_subflow_context {
send_fastclose : 1,
send_infinite_map : 1,
rx_eof : 1,
- can_ack : 1, /* only after processing the remote a key */
+ remote_key_valid : 1, /* received the peer key from */
disposable : 1, /* ctx can be free at ulp release time */
stale : 1, /* unable to snd/rcv data, do not use for xmit */
local_id_valid : 1, /* local_id is correctly initialized */
- valid_csum_seen : 1; /* at least one csum validated */
+ valid_csum_seen : 1, /* at least one csum validated */
+ is_mptfo : 1, /* subflow is doing TFO */
+ __unused : 8;
enum mptcp_data_avail data_avail;
u32 remote_nonce;
u64 thmac;
u32 local_nonce;
u32 remote_token;
- u8 hmac[MPTCPOPT_HMAC_LEN];
+ union {
+ u8 hmac[MPTCPOPT_HMAC_LEN]; /* MPJ subflow only */
+ u64 iasn; /* initial ack sequence number, MPC subflows only */
+ };
u8 local_id;
u8 remote_id;
u8 reset_seen:1;
@@ -603,7 +617,7 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net);
int mptcp_get_pm_type(const struct net *net);
void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk);
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
- struct mptcp_options_received *mp_opt);
+ const struct mptcp_options_received *mp_opt);
bool __mptcp_retransmit_pending_data(struct sock *sk);
void mptcp_check_and_set_pending(struct sock *sk);
void __mptcp_push_pending(struct sock *sk, unsigned int flags);
@@ -619,6 +633,7 @@ void mptcp_sock_graft(struct sock *sk, struct socket *parent);
struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
bool __mptcp_close(struct sock *sk, long timeout);
void mptcp_cancel_work(struct sock *sk);
+void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk);
bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
const struct mptcp_addr_info *b, bool use_port);
@@ -826,6 +841,11 @@ void mptcp_event_addr_announced(const struct sock *ssk, const struct mptcp_addr_
void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id);
bool mptcp_userspace_pm_active(const struct mptcp_sock *msk);
+void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt);
+void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow,
+ struct request_sock *req);
+
static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk)
{
return READ_ONCE(msk->pm.addr_signal) &
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index f62f6483ef77..a47423ebb33a 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -559,7 +559,9 @@ static bool mptcp_supported_sockopt(int level, int optname)
case TCP_NOTSENT_LOWAT:
case TCP_TX_DELAY:
case TCP_INQ:
+ case TCP_FASTOPEN:
case TCP_FASTOPEN_CONNECT:
+ case TCP_FASTOPEN_KEY:
case TCP_FASTOPEN_NO_COOKIE:
return true;
}
@@ -569,9 +571,6 @@ static bool mptcp_supported_sockopt(int level, int optname)
/* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS,
* TCP_REPAIR_WINDOW are not supported, better avoid this mess
*/
- /* TCP_FASTOPEN_KEY, TCP_FASTOPEN are not supported because
- * fastopen for the listener side is currently unsupported
- */
}
return false;
}
@@ -801,7 +800,9 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
/* See tcp.c: TCP_DEFER_ACCEPT does not fail */
mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen);
return 0;
+ case TCP_FASTOPEN:
case TCP_FASTOPEN_CONNECT:
+ case TCP_FASTOPEN_KEY:
case TCP_FASTOPEN_NO_COOKIE:
return mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname,
optval, optlen);
@@ -1166,7 +1167,9 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
case TCP_INFO:
case TCP_CC_INFO:
case TCP_DEFER_ACCEPT:
+ case TCP_FASTOPEN:
case TCP_FASTOPEN_CONNECT:
+ case TCP_FASTOPEN_KEY:
case TCP_FASTOPEN_NO_COOKIE:
return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname,
optval, optlen);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index f3c336872475..29904303f5c2 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -307,7 +307,48 @@ static struct dst_entry *subflow_v4_route_req(const struct sock *sk,
return NULL;
}
+static void subflow_prep_synack(const struct sock *sk, struct request_sock *req,
+ struct tcp_fastopen_cookie *foc,
+ enum tcp_synack_type synack_type)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct inet_request_sock *ireq = inet_rsk(req);
+
+ /* clear tstamp_ok, as needed depending on cookie */
+ if (foc && foc->len > -1)
+ ireq->tstamp_ok = 0;
+
+ if (synack_type == TCP_SYNACK_FASTOPEN)
+ mptcp_fastopen_subflow_synack_set_params(subflow, req);
+}
+
+static int subflow_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
+ struct flowi *fl,
+ struct request_sock *req,
+ struct tcp_fastopen_cookie *foc,
+ enum tcp_synack_type synack_type,
+ struct sk_buff *syn_skb)
+{
+ subflow_prep_synack(sk, req, foc, synack_type);
+
+ return tcp_request_sock_ipv4_ops.send_synack(sk, dst, fl, req, foc,
+ synack_type, syn_skb);
+}
+
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+static int subflow_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
+ struct flowi *fl,
+ struct request_sock *req,
+ struct tcp_fastopen_cookie *foc,
+ enum tcp_synack_type synack_type,
+ struct sk_buff *syn_skb)
+{
+ subflow_prep_synack(sk, req, foc, synack_type);
+
+ return tcp_request_sock_ipv6_ops.send_synack(sk, dst, fl, req, foc,
+ synack_type, syn_skb);
+}
+
static struct dst_entry *subflow_v6_route_req(const struct sock *sk,
struct sk_buff *skb,
struct flowi *fl,
@@ -392,11 +433,33 @@ static void mptcp_set_connected(struct sock *sk)
mptcp_data_unlock(sk);
}
+static void subflow_set_remote_key(struct mptcp_sock *msk,
+ struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt)
+{
+ /* active MPC subflow will reach here multiple times:
+ * at subflow_finish_connect() time and at 4th ack time
+ */
+ if (subflow->remote_key_valid)
+ return;
+
+ subflow->remote_key_valid = 1;
+ subflow->remote_key = mp_opt->sndr_key;
+ mptcp_crypto_key_sha(subflow->remote_key, NULL, &subflow->iasn);
+ subflow->iasn++;
+
+ WRITE_ONCE(msk->remote_key, subflow->remote_key);
+ WRITE_ONCE(msk->ack_seq, subflow->iasn);
+ WRITE_ONCE(msk->can_ack, true);
+ atomic64_set(&msk->rcv_wnd_sent, subflow->iasn);
+}
+
static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_options_received mp_opt;
struct sock *parent = subflow->conn;
+ struct mptcp_sock *msk;
subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
@@ -404,6 +467,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
if (subflow->conn_finished)
return;
+ msk = mptcp_sk(parent);
mptcp_propagate_sndbuf(parent, sk);
subflow->rel_write_seq = 1;
subflow->conn_finished = 1;
@@ -416,19 +480,16 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
MPTCP_INC_STATS(sock_net(sk),
MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
mptcp_do_fallback(sk);
- pr_fallback(mptcp_sk(subflow->conn));
+ pr_fallback(msk);
goto fallback;
}
if (mp_opt.suboptions & OPTION_MPTCP_CSUMREQD)
- WRITE_ONCE(mptcp_sk(parent)->csum_enabled, true);
+ WRITE_ONCE(msk->csum_enabled, true);
if (mp_opt.deny_join_id0)
- WRITE_ONCE(mptcp_sk(parent)->pm.remote_deny_join_id0, true);
+ WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
subflow->mp_capable = 1;
- subflow->can_ack = 1;
- subflow->remote_key = mp_opt.sndr_key;
- pr_debug("subflow=%p, remote_key=%llu", subflow,
- subflow->remote_key);
+ subflow_set_remote_key(msk, subflow, &mp_opt);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK);
mptcp_finish_connect(sk);
mptcp_set_connected(parent);
@@ -466,7 +527,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->mp_join = 1;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
- if (subflow_use_different_dport(mptcp_sk(parent), sk)) {
+ if (subflow_use_different_dport(msk, sk)) {
pr_debug("synack inet_dport=%d %d",
ntohs(inet_sk(sk)->inet_dport),
ntohs(inet_sk(parent)->inet_dport));
@@ -474,7 +535,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
}
} else if (mptcp_check_fallback(sk)) {
fallback:
- mptcp_rcv_space_init(mptcp_sk(parent), sk);
+ mptcp_rcv_space_init(msk, sk);
mptcp_set_connected(parent);
}
return;
@@ -637,14 +698,16 @@ static void subflow_drop_ctx(struct sock *ssk)
}
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
- struct mptcp_options_received *mp_opt)
+ const struct mptcp_options_received *mp_opt)
{
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
- subflow->remote_key = mp_opt->sndr_key;
+ subflow_set_remote_key(msk, subflow, mp_opt);
subflow->fully_established = 1;
- subflow->can_ack = 1;
WRITE_ONCE(msk->fully_established, true);
+
+ if (subflow->is_mptfo)
+ mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt);
}
static struct sock *subflow_syn_recv_sock(const struct sock *sk,
@@ -760,7 +823,7 @@ create_child:
/* with OoO packets we can reach here without ingress
* mpc option
*/
- if (mp_opt.suboptions & OPTIONS_MPTCP_MPC)
+ if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK)
mptcp_subflow_fully_established(ctx, &mp_opt);
} else if (ctx->mp_join) {
struct mptcp_sock *owner;
@@ -1198,16 +1261,8 @@ static bool subflow_check_data_avail(struct sock *ssk)
if (WARN_ON_ONCE(!skb))
goto no_data;
- /* if msk lacks the remote key, this subflow must provide an
- * MP_CAPABLE-based mapping
- */
- if (unlikely(!READ_ONCE(msk->can_ack))) {
- if (!subflow->mpc_map)
- goto fallback;
- WRITE_ONCE(msk->remote_key, subflow->remote_key);
- WRITE_ONCE(msk->ack_seq, subflow->map_seq);
- WRITE_ONCE(msk->can_ack, true);
- }
+ if (unlikely(!READ_ONCE(msk->can_ack)))
+ goto fallback;
old_ack = READ_ONCE(msk->ack_seq);
ack_seq = mptcp_subflow_get_mapped_dsn(subflow);
@@ -1480,6 +1535,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
mptcp_pm_get_flags_and_ifindex_by_id(msk, local_id,
&flags, &ifindex);
+ subflow->remote_key_valid = 1;
subflow->remote_key = msk->remote_key;
subflow->local_key = msk->local_key;
subflow->token = msk->token;
@@ -1873,6 +1929,7 @@ static void subflow_ulp_clone(const struct request_sock *req,
new_ctx->ssn_offset = subflow_req->ssn_offset;
new_ctx->mp_join = 1;
new_ctx->fully_established = 1;
+ new_ctx->remote_key_valid = 1;
new_ctx->backup = subflow_req->backup;
new_ctx->remote_id = subflow_req->remote_id;
new_ctx->token = subflow_req->token;
@@ -1929,6 +1986,7 @@ void __init mptcp_subflow_init(void)
subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops;
subflow_request_sock_ipv4_ops.route_req = subflow_v4_route_req;
+ subflow_request_sock_ipv4_ops.send_synack = subflow_v4_send_synack;
subflow_specific = ipv4_specific;
subflow_specific.conn_request = subflow_v4_conn_request;
@@ -1942,6 +2000,7 @@ void __init mptcp_subflow_init(void)
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops;
subflow_request_sock_ipv6_ops.route_req = subflow_v6_route_req;
+ subflow_request_sock_ipv6_ops.send_synack = subflow_v6_send_synack;
subflow_v6_specific = ipv6_specific;
subflow_v6_specific.conn_request = subflow_v6_conn_request;
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index e54653ea2ed4..8a8266957bc5 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -83,6 +83,7 @@ struct cfg_cmsg_types {
struct cfg_sockopt_types {
unsigned int transparent:1;
+ unsigned int mptfo:1;
};
struct tcp_inq_state {
@@ -90,6 +91,13 @@ struct tcp_inq_state {
bool expect_eof;
};
+struct wstate {
+ char buf[8192];
+ unsigned int len;
+ unsigned int off;
+ unsigned int total_len;
+};
+
static struct tcp_inq_state tcp_inq;
static struct cfg_cmsg_types cfg_cmsg_types;
@@ -232,6 +240,14 @@ static void set_transparent(int fd, int pf)
}
}
+static void set_mptfo(int fd, int pf)
+{
+ int qlen = 25;
+
+ if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) == -1)
+ perror("TCP_FASTOPEN");
+}
+
static int do_ulp_so(int sock, const char *name)
{
return setsockopt(sock, IPPROTO_TCP, TCP_ULP, name, strlen(name));
@@ -300,6 +316,9 @@ static int sock_listen_mptcp(const char * const listenaddr,
if (cfg_sockopt_types.transparent)
set_transparent(sock, pf);
+ if (cfg_sockopt_types.mptfo)
+ set_mptfo(sock, pf);
+
if (bind(sock, a->ai_addr, a->ai_addrlen) == 0)
break; /* success */
@@ -330,13 +349,15 @@ static int sock_listen_mptcp(const char * const listenaddr,
static int sock_connect_mptcp(const char * const remoteaddr,
const char * const port, int proto,
- struct addrinfo **peer)
+ struct addrinfo **peer,
+ int infd, struct wstate *winfo)
{
struct addrinfo hints = {
.ai_protocol = IPPROTO_TCP,
.ai_socktype = SOCK_STREAM,
};
struct addrinfo *a, *addr;
+ int syn_copied = 0;
int sock = -1;
hints.ai_family = pf;
@@ -354,14 +375,34 @@ static int sock_connect_mptcp(const char * const remoteaddr,
if (cfg_mark)
set_mark(sock, cfg_mark);
- if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) {
- *peer = a;
- break; /* success */
+ if (cfg_sockopt_types.mptfo) {
+ if (!winfo->total_len)
+ winfo->total_len = winfo->len = read(infd, winfo->buf,
+ sizeof(winfo->buf));
+
+ syn_copied = sendto(sock, winfo->buf, winfo->len, MSG_FASTOPEN,
+ a->ai_addr, a->ai_addrlen);
+ if (syn_copied >= 0) {
+ winfo->off = syn_copied;
+ winfo->len -= syn_copied;
+ *peer = a;
+ break; /* success */
+ }
+ } else {
+ if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) {
+ *peer = a;
+ break; /* success */
+ }
+ }
+ if (cfg_sockopt_types.mptfo) {
+ perror("sendto()");
+ close(sock);
+ sock = -1;
+ } else {
+ perror("connect()");
+ close(sock);
+ sock = -1;
}
-
- perror("connect()");
- close(sock);
- sock = -1;
}
freeaddrinfo(addr);
@@ -571,14 +612,14 @@ static void shut_wr(int fd)
shutdown(fd, SHUT_WR);
}
-static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after_out)
+static int copyfd_io_poll(int infd, int peerfd, int outfd,
+ bool *in_closed_after_out, struct wstate *winfo)
{
struct pollfd fds = {
.fd = peerfd,
.events = POLLIN | POLLOUT,
};
- unsigned int woff = 0, wlen = 0, total_wlen = 0, total_rlen = 0;
- char wbuf[8192];
+ unsigned int total_wlen = 0, total_rlen = 0;
set_nonblock(peerfd, true);
@@ -638,19 +679,19 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after
}
if (fds.revents & POLLOUT) {
- if (wlen == 0) {
- woff = 0;
- wlen = read(infd, wbuf, sizeof(wbuf));
+ if (winfo->len == 0) {
+ winfo->off = 0;
+ winfo->len = read(infd, winfo->buf, sizeof(winfo->buf));
}
- if (wlen > 0) {
+ if (winfo->len > 0) {
ssize_t bw;
/* limit the total amount of written data to the trunc value */
- if (cfg_truncate > 0 && wlen + total_wlen > cfg_truncate)
- wlen = cfg_truncate - total_wlen;
+ if (cfg_truncate > 0 && winfo->len + total_wlen > cfg_truncate)
+ winfo->len = cfg_truncate - total_wlen;
- bw = do_rnd_write(peerfd, wbuf + woff, wlen);
+ bw = do_rnd_write(peerfd, winfo->buf + winfo->off, winfo->len);
if (bw < 0) {
if (cfg_rcv_trunc)
return 0;
@@ -658,10 +699,10 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after
return 111;
}
- woff += bw;
- wlen -= bw;
+ winfo->off += bw;
+ winfo->len -= bw;
total_wlen += bw;
- } else if (wlen == 0) {
+ } else if (winfo->len == 0) {
/* We have no more data to send. */
fds.events &= ~POLLOUT;
@@ -717,10 +758,26 @@ static int do_recvfile(int infd, int outfd)
return (int)r;
}
-static int do_mmap(int infd, int outfd, unsigned int size)
+static int spool_buf(int fd, struct wstate *winfo)
+{
+ while (winfo->len) {
+ int ret = write(fd, winfo->buf + winfo->off, winfo->len);
+
+ if (ret < 0) {
+ perror("write");
+ return 4;
+ }
+ winfo->off += ret;
+ winfo->len -= ret;
+ }
+ return 0;
+}
+
+static int do_mmap(int infd, int outfd, unsigned int size,
+ struct wstate *winfo)
{
char *inbuf = mmap(NULL, size, PROT_READ, MAP_SHARED, infd, 0);
- ssize_t ret = 0, off = 0;
+ ssize_t ret = 0, off = winfo->total_len;
size_t rem;
if (inbuf == MAP_FAILED) {
@@ -728,7 +785,11 @@ static int do_mmap(int infd, int outfd, unsigned int size)
return 1;
}
- rem = size;
+ ret = spool_buf(outfd, winfo);
+ if (ret < 0)
+ return ret;
+
+ rem = size - winfo->total_len;
while (rem > 0) {
ret = write(outfd, inbuf + off, rem);
@@ -772,8 +833,16 @@ static int get_infd_size(int fd)
return (int)count;
}
-static int do_sendfile(int infd, int outfd, unsigned int count)
+static int do_sendfile(int infd, int outfd, unsigned int count,
+ struct wstate *winfo)
{
+ int ret = spool_buf(outfd, winfo);
+
+ if (ret < 0)
+ return ret;
+
+ count -= winfo->total_len;
+
while (count > 0) {
ssize_t r;
@@ -790,7 +859,8 @@ static int do_sendfile(int infd, int outfd, unsigned int count)
}
static int copyfd_io_mmap(int infd, int peerfd, int outfd,
- unsigned int size, bool *in_closed_after_out)
+ unsigned int size, bool *in_closed_after_out,
+ struct wstate *winfo)
{
int err;
@@ -799,9 +869,9 @@ static int copyfd_io_mmap(int infd, int peerfd, int outfd,
if (err)
return err;
- err = do_mmap(infd, peerfd, size);
+ err = do_mmap(infd, peerfd, size, winfo);
} else {
- err = do_mmap(infd, peerfd, size);
+ err = do_mmap(infd, peerfd, size, winfo);
if (err)
return err;
@@ -815,7 +885,7 @@ static int copyfd_io_mmap(int infd, int peerfd, int outfd,
}
static int copyfd_io_sendfile(int infd, int peerfd, int outfd,
- unsigned int size, bool *in_closed_after_out)
+ unsigned int size, bool *in_closed_after_out, struct wstate *winfo)
{
int err;
@@ -824,9 +894,9 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd,
if (err)
return err;
- err = do_sendfile(infd, peerfd, size);
+ err = do_sendfile(infd, peerfd, size, winfo);
} else {
- err = do_sendfile(infd, peerfd, size);
+ err = do_sendfile(infd, peerfd, size, winfo);
if (err)
return err;
@@ -839,7 +909,7 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd,
return err;
}
-static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd)
+static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd, struct wstate *winfo)
{
bool in_closed_after_out = false;
struct timespec start, end;
@@ -851,21 +921,24 @@ static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd)
switch (cfg_mode) {
case CFG_MODE_POLL:
- ret = copyfd_io_poll(infd, peerfd, outfd, &in_closed_after_out);
+ ret = copyfd_io_poll(infd, peerfd, outfd, &in_closed_after_out,
+ winfo);
break;
case CFG_MODE_MMAP:
file_size = get_infd_size(infd);
if (file_size < 0)
return file_size;
- ret = copyfd_io_mmap(infd, peerfd, outfd, file_size, &in_closed_after_out);
+ ret = copyfd_io_mmap(infd, peerfd, outfd, file_size,
+ &in_closed_after_out, winfo);
break;
case CFG_MODE_SENDFILE:
file_size = get_infd_size(infd);
if (file_size < 0)
return file_size;
- ret = copyfd_io_sendfile(infd, peerfd, outfd, file_size, &in_closed_after_out);
+ ret = copyfd_io_sendfile(infd, peerfd, outfd, file_size,
+ &in_closed_after_out, winfo);
break;
default:
@@ -999,6 +1072,7 @@ static void maybe_close(int fd)
int main_loop_s(int listensock)
{
struct sockaddr_storage ss;
+ struct wstate winfo;
struct pollfd polls;
socklen_t salen;
int remotesock;
@@ -1033,7 +1107,8 @@ again:
SOCK_TEST_TCPULP(remotesock, 0);
- copyfd_io(fd, remotesock, 1, true);
+ memset(&winfo, 0, sizeof(winfo));
+ copyfd_io(fd, remotesock, 1, true, &winfo);
} else {
perror("accept");
return 1;
@@ -1130,6 +1205,11 @@ static void parse_setsock_options(const char *name)
return;
}
+ if (strncmp(name, "MPTFO", len) == 0) {
+ cfg_sockopt_types.mptfo = 1;
+ return;
+ }
+
fprintf(stderr, "Unrecognized setsockopt option %s\n", name);
exit(1);
}
@@ -1166,11 +1246,18 @@ void xdisconnect(int fd, int addrlen)
int main_loop(void)
{
- int fd, ret, fd_in = 0;
+ int fd = 0, ret, fd_in = 0;
struct addrinfo *peer;
+ struct wstate winfo;
+
+ if (cfg_input && cfg_sockopt_types.mptfo) {
+ fd_in = open(cfg_input, O_RDONLY);
+ if (fd < 0)
+ xerror("can't open %s:%d", cfg_input, errno);
+ }
- /* listener is ready. */
- fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto, &peer);
+ memset(&winfo, 0, sizeof(winfo));
+ fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto, &peer, fd_in, &winfo);
if (fd < 0)
return 2;
@@ -1186,14 +1273,13 @@ again:
if (cfg_cmsg_types.cmsg_enabled)
apply_cmsg_types(fd, &cfg_cmsg_types);
- if (cfg_input) {
+ if (cfg_input && !cfg_sockopt_types.mptfo) {
fd_in = open(cfg_input, O_RDONLY);
if (fd < 0)
xerror("can't open %s:%d", cfg_input, errno);
}
- /* close the client socket open only if we are not going to reconnect */
- ret = copyfd_io(fd_in, fd, 1, 0);
+ ret = copyfd_io(fd_in, fd, 1, 0, &winfo);
if (ret)
return ret;
@@ -1210,6 +1296,7 @@ again:
xerror("can't reconnect: %d", errno);
if (cfg_input)
close(fd_in);
+ memset(&winfo, 0, sizeof(winfo));
goto again;
} else {
close(fd);
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 621af6895f4d..60198b91a530 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -762,6 +762,23 @@ run_tests_peekmode()
run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-P ${peekmode}"
}
+run_tests_mptfo()
+{
+ echo "INFO: with MPTFO start"
+ ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=2
+ ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=1
+
+ run_tests_lo "$ns1" "$ns2" 10.0.1.1 0 "-o MPTFO"
+ run_tests_lo "$ns1" "$ns2" 10.0.1.1 0 "-o MPTFO"
+
+ run_tests_lo "$ns1" "$ns2" dead:beef:1::1 0 "-o MPTFO"
+ run_tests_lo "$ns1" "$ns2" dead:beef:1::1 0 "-o MPTFO"
+
+ ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=0
+ ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=0
+ echo "INFO: with MPTFO end"
+}
+
run_tests_disconnect()
{
local peekmode="$1"
@@ -901,6 +918,10 @@ run_tests_peekmode "saveWithPeek"
run_tests_peekmode "saveAfterPeek"
stop_if_error "Tests with peek mode have failed"
+# MPTFO (MultiPath TCP Fatopen tests)
+run_tests_mptfo
+stop_if_error "Tests with MPTFO have failed"
+
# connect to ns4 ip address, ns2 should intercept/proxy
run_test_transparent 10.0.3.1 "tproxy ipv4"
run_test_transparent dead:beef:3::1 "tproxy ipv6"