summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2020-07-23 21:47:25 +0300
committerDavid S. Miller <davem@davemloft.net>2020-07-23 21:47:25 +0300
commita3c8c7f467313f6af15c73d118896291fc7712a4 (patch)
tree1b1a3e1a11bdb28bcf1943cdd5428abc836af391
parent205a55f4e65353dd4846547d376a6f85cdda3d04 (diff)
parent4cf8b7e48a09745145881b311fe6a9154ba69ebc (diff)
downloadlinux-a3c8c7f467313f6af15c73d118896291fc7712a4.tar.xz
Merge branch 'mptcp-non-backup-subflows-pre-reqs'
Paolo Abeni says: ==================== mptcp: non backup subflows pre-reqs This series contains a bunch of MPTCP improvements loosely related to concurrent subflows xmit usage, currently under development. The first 3 patches are actually bugfixes for issues that will become apparent as soon as we will enable the above feature. The later patches improve the handling of incoming additional subflows, improving significantly the performances in stress tests based on a high new connection rate. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/mptcp/options.c5
-rw-r--r--net/mptcp/protocol.c23
-rw-r--r--net/mptcp/protocol.h8
-rw-r--r--net/mptcp/subflow.c91
4 files changed, 81 insertions, 46 deletions
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 19707c07efc1..3bc56eb608d8 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -709,6 +709,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
* additional ack.
*/
subflow->fully_established = 1;
+ WRITE_ONCE(msk->fully_established, true);
goto fully_established;
}
@@ -724,9 +725,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
if (unlikely(!READ_ONCE(msk->pm.server_side)))
pr_warn_once("bogus mpc option on established client sk");
- subflow->fully_established = 1;
- subflow->remote_key = mp_opt->sndr_key;
- subflow->can_ack = 1;
+ mptcp_subflow_fully_established(subflow, mp_opt);
fully_established:
if (likely(subflow->pm_notified))
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index f0b0b503c262..979dfcd2aa14 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -460,15 +460,20 @@ static void mptcp_clean_una(struct sock *sk)
dfrag = mptcp_rtx_head(sk);
if (dfrag && after64(snd_una, dfrag->data_seq)) {
- u64 delta = dfrag->data_seq + dfrag->data_len - snd_una;
+ u64 delta = snd_una - dfrag->data_seq;
+
+ if (WARN_ON_ONCE(delta > dfrag->data_len))
+ goto out;
dfrag->data_seq += delta;
+ dfrag->offset += delta;
dfrag->data_len -= delta;
dfrag_uncharge(sk, delta);
cleaned = true;
}
+out:
if (cleaned) {
sk_mem_reclaim_partial(sk);
@@ -1517,6 +1522,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
msk->local_key = subflow_req->local_key;
msk->token = subflow_req->token;
msk->subflow = NULL;
+ WRITE_ONCE(msk->fully_established, false);
msk->write_seq = subflow_req->idsn + 1;
atomic64_set(&msk->snd_una, msk->write_seq);
@@ -1600,7 +1606,6 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
newsk = new_mptcp_sock;
mptcp_copy_inaddrs(newsk, ssk);
list_add(&subflow->node, &msk->conn_list);
- inet_sk_state_store(newsk, TCP_ESTABLISHED);
mptcp_rcv_space_init(msk, ssk);
bh_unlock_sock(new_mptcp_sock);
@@ -1814,7 +1819,6 @@ void mptcp_finish_connect(struct sock *ssk)
ack_seq++;
subflow->map_seq = ack_seq;
subflow->map_subflow_seq = 1;
- subflow->rel_write_seq = 1;
/* the socket is not connected yet, no msk/subflow ops can access/race
* accessing the field below
@@ -1851,7 +1855,7 @@ bool mptcp_finish_join(struct sock *sk)
pr_debug("msk=%p, subflow=%p", msk, subflow);
/* mptcp socket already closing? */
- if (inet_sk_state_load(parent) != TCP_ESTABLISHED)
+ if (!mptcp_is_fully_established(parent))
return false;
if (!msk->pm.server_side)
@@ -1940,6 +1944,13 @@ unlock:
return err;
}
+static void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
+ struct mptcp_subflow_context *subflow)
+{
+ subflow->request_mptcp = 0;
+ __mptcp_do_fallback(msk);
+}
+
static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags)
{
@@ -1971,10 +1982,10 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
* TCP option space.
*/
if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info))
- subflow->request_mptcp = 0;
+ mptcp_subflow_early_fallback(msk, subflow);
#endif
if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk))
- subflow->request_mptcp = 0;
+ mptcp_subflow_early_fallback(msk, subflow);
do_connect:
err = ssock->ops->connect(ssock, uaddr, addr_len, flags);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 6e114c09e5b4..67634b595466 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -198,6 +198,7 @@ struct mptcp_sock {
u32 token;
unsigned long flags;
bool can_ack;
+ bool fully_established;
spinlock_t join_list_lock;
struct work_struct work;
struct list_head conn_list;
@@ -342,6 +343,8 @@ mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow)
}
int mptcp_is_enabled(struct net *net);
+void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
+ struct mptcp_options_received *mp_opt);
bool mptcp_subflow_data_available(struct sock *sk);
void __init mptcp_subflow_init(void);
@@ -373,6 +376,11 @@ void mptcp_get_options(const struct sk_buff *skb,
struct mptcp_options_received *mp_opt);
void mptcp_finish_connect(struct sock *sk);
+static inline bool mptcp_is_fully_established(struct sock *sk)
+{
+ return inet_sk_state_load(sk) == TCP_ESTABLISHED &&
+ READ_ONCE(mptcp_sk(sk)->fully_established);
+}
void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk);
void mptcp_data_ready(struct sock *sk, struct sock *ssk);
bool mptcp_finish_join(struct sock *sk);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 519122e66f17..e645483d1200 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -53,6 +53,12 @@ static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac);
}
+static bool mptcp_can_accept_new_subflow(const struct mptcp_sock *msk)
+{
+ return mptcp_is_fully_established((void *)msk) &&
+ READ_ONCE(msk->pm.accept_subflow);
+}
+
/* validate received token and create truncated hmac and nonce for SYN-ACK */
static struct mptcp_sock *subflow_token_join_request(struct request_sock *req,
const struct sk_buff *skb)
@@ -200,49 +206,40 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
if (subflow->conn_finished)
return;
+ subflow->rel_write_seq = 1;
subflow->conn_finished = 1;
subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset);
mptcp_get_options(skb, &mp_opt);
- if (subflow->request_mptcp && mp_opt.mp_capable) {
+ if (subflow->request_mptcp) {
+ if (!mp_opt.mp_capable) {
+ MPTCP_INC_STATS(sock_net(sk),
+ MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
+ mptcp_do_fallback(sk);
+ pr_fallback(mptcp_sk(subflow->conn));
+ goto fallback;
+ }
+
subflow->mp_capable = 1;
subflow->can_ack = 1;
subflow->remote_key = mp_opt.sndr_key;
pr_debug("subflow=%p, remote_key=%llu", subflow,
subflow->remote_key);
- } else if (subflow->request_join && mp_opt.mp_join) {
- subflow->mp_join = 1;
+ mptcp_finish_connect(sk);
+ } else if (subflow->request_join) {
+ u8 hmac[SHA256_DIGEST_SIZE];
+
+ if (!mp_opt.mp_join)
+ goto do_reset;
+
subflow->thmac = mp_opt.thmac;
subflow->remote_nonce = mp_opt.nonce;
pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow,
subflow->thmac, subflow->remote_nonce);
- } else {
- if (subflow->request_mptcp)
- MPTCP_INC_STATS(sock_net(sk),
- MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
- mptcp_do_fallback(sk);
- pr_fallback(mptcp_sk(subflow->conn));
- }
-
- if (mptcp_check_fallback(sk)) {
- mptcp_rcv_space_init(mptcp_sk(parent), sk);
- return;
- }
-
- if (subflow->mp_capable) {
- pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk),
- subflow->remote_key);
- mptcp_finish_connect(sk);
- } else if (subflow->mp_join) {
- u8 hmac[SHA256_DIGEST_SIZE];
- pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u",
- subflow, subflow->thmac,
- subflow->remote_nonce);
if (!subflow_thmac_valid(subflow)) {
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC);
- subflow->mp_join = 0;
goto do_reset;
}
@@ -250,18 +247,22 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->local_nonce,
subflow->remote_nonce,
hmac);
-
memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN);
if (!mptcp_finish_join(sk))
goto do_reset;
+ subflow->mp_join = 1;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
- } else {
-do_reset:
- tcp_send_active_reset(sk, GFP_ATOMIC);
- tcp_done(sk);
+ } else if (mptcp_check_fallback(sk)) {
+fallback:
+ mptcp_rcv_space_init(mptcp_sk(parent), sk);
}
+ return;
+
+do_reset:
+ tcp_send_active_reset(sk, GFP_ATOMIC);
+ tcp_done(sk);
}
static struct request_sock_ops subflow_request_sock_ops;
@@ -386,6 +387,17 @@ static void subflow_drop_ctx(struct sock *ssk)
kfree_rcu(ctx, rcu);
}
+void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
+ struct mptcp_options_received *mp_opt)
+{
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+
+ subflow->remote_key = mp_opt->sndr_key;
+ subflow->fully_established = 1;
+ subflow->can_ack = 1;
+ WRITE_ONCE(msk->fully_established, true);
+}
+
static struct sock *subflow_syn_recv_sock(const struct sock *sk,
struct sk_buff *skb,
struct request_sock *req,
@@ -409,7 +421,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
/* hopefully temporary handling for MP_JOIN+syncookie */
subflow_req = mptcp_subflow_rsk(req);
- fallback_is_fatal = subflow_req->mp_join;
+ fallback_is_fatal = tcp_rsk(req)->is_mptcp && subflow_req->mp_join;
fallback = !tcp_rsk(req)->is_mptcp;
if (fallback)
goto create_child;
@@ -437,6 +449,7 @@ create_msk:
} else if (subflow_req->mp_join) {
mptcp_get_options(skb, &mp_opt);
if (!mp_opt.mp_join ||
+ !mptcp_can_accept_new_subflow(subflow_req->msk) ||
!subflow_hmac_valid(req, &mp_opt)) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
fallback = true;
@@ -465,6 +478,11 @@ create_child:
}
if (ctx->mp_capable) {
+ /* this can't race with mptcp_close(), as the msk is
+ * not yet exposted to user-space
+ */
+ inet_sk_state_store((void *)new_msk, TCP_ESTABLISHED);
+
/* new mpc subflow takes ownership of the newly
* created mptcp socket
*/
@@ -477,9 +495,8 @@ create_child:
/* with OoO packets we can reach here without ingress
* mpc option
*/
- ctx->remote_key = mp_opt.sndr_key;
- ctx->fully_established = mp_opt.mp_capable;
- ctx->can_ack = mp_opt.mp_capable;
+ if (mp_opt.mp_capable)
+ mptcp_subflow_fully_established(ctx, &mp_opt);
} else if (ctx->mp_join) {
struct mptcp_sock *owner;
@@ -514,9 +531,9 @@ out:
dispose_child:
subflow_drop_ctx(child);
tcp_rsk(req)->drop_req = true;
- tcp_send_active_reset(child, GFP_ATOMIC);
inet_csk_prepare_for_destroy_sock(child);
tcp_done(child);
+ req->rsk_ops->send_reset(sk, skb);
/* The last child reference will be released by the caller */
return child;
@@ -966,7 +983,7 @@ int __mptcp_subflow_connect(struct sock *sk, int ifindex,
int addrlen;
int err;
- if (sk->sk_state != TCP_ESTABLISHED)
+ if (!mptcp_is_fully_established(sk))
return -ENOTCONN;
err = mptcp_subflow_create_socket(sk, &sf);