summaryrefslogtreecommitdiff
path: root/net/mptcp/protocol.h
diff options
context:
space:
mode:
authorPaolo Abeni <pabeni@redhat.com>2023-10-23 23:44:38 +0300
committerJakub Kicinski <kuba@kernel.org>2023-10-25 22:23:34 +0300
commit5684ab1a0effbfeb706f47d85785f653005b97b1 (patch)
tree545dd36c49a503cee46fc07f4e686801b3829d05 /net/mptcp/protocol.h
parent849ee75a38b297187c760bb1d23d8f2a7b1fc73e (diff)
downloadlinux-5684ab1a0effbfeb706f47d85785f653005b97b1.tar.xz
mptcp: give rcvlowat some love
The MPTCP protocol allow setting sk_rcvlowat, but the value there is currently ignored. Additionally, the default subflows sk_rcvlowat basically disables per subflow delayed ack: the MPTCP protocol move the incoming data from the subflows into the msk socket as soon as the TCP stacks invokes the subflow data_ready callback. Later, when __tcp_ack_snd_check() takes action, the subflow-level copied_seq matches rcv_nxt, and that mandate for an immediate ack. Let the mptcp receive path be aware of such threshold, explicitly tracking the amount of data available to be ready and checking vs sk_rcvlowat in mptcp_poll() and before waking-up readers. Additionally implement the set_rcvlowat() callback, to properly handle the rcvbuf auto-tuning on sk_rcvlowat changes. Finally to properly handle delayed ack, force the subflow level threshold to 0 and instead explicitly ask for an immediate ack when the msk level th is not reached. Reviewed-by: Mat Martineau <martineau@kernel.org> Signed-off-by: Paolo Abeni <pabeni@redhat.com> Signed-off-by: Mat Martineau <martineau@kernel.org> Link: https://lore.kernel.org/r/20231023-send-net-next-20231023-2-v1-5-9dc60939d371@kernel.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/mptcp/protocol.h')
-rw-r--r--net/mptcp/protocol.h20
1 files changed, 20 insertions, 0 deletions
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 6df62a8a73bc..5971d34a3dee 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -269,6 +269,7 @@ struct mptcp_sock {
atomic64_t rcv_wnd_sent;
u64 rcv_data_fin_seq;
u64 bytes_retrans;
+ u64 bytes_consumed;
int rmem_fwd_alloc;
int snd_burst;
int old_wspace;
@@ -659,6 +660,24 @@ struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk);
int mptcp_sched_get_send(struct mptcp_sock *msk);
int mptcp_sched_get_retrans(struct mptcp_sock *msk);
+static inline u64 mptcp_data_avail(const struct mptcp_sock *msk)
+{
+ return READ_ONCE(msk->bytes_received) - READ_ONCE(msk->bytes_consumed);
+}
+
+static inline bool mptcp_epollin_ready(const struct sock *sk)
+{
+ /* mptcp doesn't have to deal with small skbs in the receive queue,
+ * at it can always coalesce them
+ */
+ return (mptcp_data_avail(mptcp_sk(sk)) >= sk->sk_rcvlowat) ||
+ (mem_cgroup_sockets_enabled && sk->sk_memcg &&
+ mem_cgroup_under_socket_pressure(sk->sk_memcg)) ||
+ READ_ONCE(tcp_memory_pressure);
+}
+
+int mptcp_set_rcvlowat(struct sock *sk, int val);
+
static inline bool __tcp_can_send(const struct sock *ssk)
{
/* only send if our side has not closed yet */
@@ -733,6 +752,7 @@ static inline bool mptcp_is_fully_established(struct sock *sk)
return inet_sk_state_load(sk) == TCP_ESTABLISHED &&
READ_ONCE(mptcp_sk(sk)->fully_established);
}
+
void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk);
void mptcp_data_ready(struct sock *sk, struct sock *ssk);
bool mptcp_finish_join(struct sock *sk);