summaryrefslogtreecommitdiff
path: root/net/mptcp
diff options
context:
space:
mode:
authorPaolo Abeni <pabeni@redhat.com>2023-07-20 21:47:50 +0300
committerJakub Kicinski <kuba@kernel.org>2023-07-25 02:36:05 +0300
commitb8dc6d6ce93142ccd4c976003bb6c25d63aac2ce (patch)
tree647b7513486333643f7ce38ba7b4561a9ac952ef /net/mptcp
parent004a04b97bbcbb630421e44aadfe675fe6b4c460 (diff)
downloadlinux-b8dc6d6ce93142ccd4c976003bb6c25d63aac2ce.tar.xz
mptcp: fix rcv buffer auto-tuning
The MPTCP code uses the assumption that the tcp_win_from_space() helper does not use any TCP-specific field, and thus works correctly operating on an MPTCP socket. The commit dfa2f0483360 ("tcp: get rid of sysctl_tcp_adv_win_scale") broke such assumption, and as a consequence most MPTCP connections stall on zero-window event due to auto-tuning changing the rcv buffer size quite randomly. Address the issue syncing again the MPTCP auto-tuning code with the TCP one. To achieve that, factor out the windows size logic in socket independent helpers, and reuse them in mptcp_rcv_space_adjust(). The MPTCP level scaling_ratio is selected as the minimum one from the all the subflows, as a worst-case estimate. Fixes: dfa2f0483360 ("tcp: get rid of sysctl_tcp_adv_win_scale") Signed-off-by: Paolo Abeni <pabeni@redhat.com> Co-developed-by: Matthieu Baerts <matthieu.baerts@tessares.net> Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net> Reviewed-by: Eric Dumazet <edumazet@google.com> Acked-by: Soheil Hassas Yeganeh <soheil@google.com> Link: https://lore.kernel.org/r/20230720-upstream-net-next-20230720-mptcp-fix-rcv-buffer-auto-tuning-v1-1-175ef12b8380@tessares.net Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/mptcp')
-rw-r--r--net/mptcp/protocol.c15
-rw-r--r--net/mptcp/protocol.h8
-rw-r--r--net/mptcp/subflow.c2
3 files changed, 15 insertions, 10 deletions
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 22323b1fa184..68da26539970 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -90,6 +90,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
if (err)
return err;
+ msk->scaling_ratio = tcp_sk(ssock->sk)->scaling_ratio;
WRITE_ONCE(msk->first, ssock->sk);
WRITE_ONCE(msk->subflow, ssock);
subflow = mptcp_subflow_ctx(ssock->sk);
@@ -1881,6 +1882,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
{
struct mptcp_subflow_context *subflow;
struct sock *sk = (struct sock *)msk;
+ u8 scaling_ratio = U8_MAX;
u32 time, advmss = 1;
u64 rtt_us, mstamp;
@@ -1911,9 +1913,11 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
rtt_us = max(sf_rtt_us, rtt_us);
advmss = max(sf_advmss, advmss);
+ scaling_ratio = min(tp->scaling_ratio, scaling_ratio);
}
msk->rcvq_space.rtt_us = rtt_us;
+ msk->scaling_ratio = scaling_ratio;
if (time < (rtt_us >> 3) || rtt_us == 0)
return;
@@ -1922,8 +1926,8 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
- int rcvmem, rcvbuf;
u64 rcvwin, grow;
+ int rcvbuf;
rcvwin = ((u64)msk->rcvq_space.copied << 1) + 16 * advmss;
@@ -1932,18 +1936,13 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
do_div(grow, msk->rcvq_space.space);
rcvwin += (grow << 1);
- rcvmem = SKB_TRUESIZE(advmss + MAX_TCP_HEADER);
- while (tcp_win_from_space(sk, rcvmem) < advmss)
- rcvmem += 128;
-
- do_div(rcvwin, advmss);
- rcvbuf = min_t(u64, rcvwin * rcvmem,
+ rcvbuf = min_t(u64, __tcp_space_from_win(scaling_ratio, rcvwin),
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
if (rcvbuf > sk->sk_rcvbuf) {
u32 window_clamp;
- window_clamp = tcp_win_from_space(sk, rcvbuf);
+ window_clamp = __tcp_win_from_space(scaling_ratio, rcvbuf);
WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
/* Make subflows follow along. If we do not do this, we
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 37fbe22e2433..795f422e8597 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -321,6 +321,7 @@ struct mptcp_sock {
u64 time; /* start time of measurement window */
u64 rtt_us; /* last maximum rtt of subflows */
} rcvq_space;
+ u8 scaling_ratio;
u32 subflow_id;
u32 setsockopt_seq;
@@ -351,9 +352,14 @@ static inline int __mptcp_rmem(const struct sock *sk)
return atomic_read(&sk->sk_rmem_alloc) - READ_ONCE(mptcp_sk(sk)->rmem_released);
}
+static inline int mptcp_win_from_space(const struct sock *sk, int space)
+{
+ return __tcp_win_from_space(mptcp_sk(sk)->scaling_ratio, space);
+}
+
static inline int __mptcp_space(const struct sock *sk)
{
- return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - __mptcp_rmem(sk));
+ return mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - __mptcp_rmem(sk));
}
static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk)
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 9ee3b7abbaf6..ad7080fabb2f 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1359,7 +1359,7 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space)
const struct sock *sk = subflow->conn;
*space = __mptcp_space(sk);
- *full_space = tcp_full_space(sk);
+ *full_space = mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf));
}
void __mptcp_error_report(struct sock *sk)