diff options
author | David S. Miller <davem@davemloft.net> | 2021-06-05 00:08:09 +0300 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2021-06-05 00:08:09 +0300 |
commit | d15fd7359ab18a46c5fefc36be45b36205f1acfc (patch) | |
tree | 3b036b03ebf2f591b3bf8ec5926ef498c780d157 /net | |
parent | ebbf5fcb94a7f3499747b282420a1c5f7e8d1c6f (diff) | |
parent | 5e6af0a729b669b1da6f9600867e2e4910505a6d (diff) | |
download | linux-d15fd7359ab18a46c5fefc36be45b36205f1acfc.tar.xz |
Merge branch 'mptcp-timestamps'
Mat Martineau says:
====================
mptcp: Add timestamp support
Enable the SO_TIMESTAMP and SO_TIMESTAMPING socket options for MPTCP
sockets and add receive path cmsg support for timestamps.
Patches 1, 2, and 5 expose existing sock and tcp helpers for timestamps
(no new EXPORT_SYMBOLS()s).
Patch 3 propagates timestamp options to subflows.
Patch 4 cleans up MPTCP handling of SOL_SOCKET options.
Patch 6 adds timestamp csmg data when receiving on sockets that have
been configured for timestamps.
Patch 7 adds self test coverage for timestamps.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/core/sock.c | 97 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 10 | ||||
-rw-r--r-- | net/mptcp/protocol.c | 28 | ||||
-rw-r--r-- | net/mptcp/sockopt.c | 149 |
4 files changed, 163 insertions, 121 deletions
diff --git a/net/core/sock.c b/net/core/sock.c index 958614ea16ed..bd887cb075ce 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -776,6 +776,58 @@ void sock_enable_timestamps(struct sock *sk) } EXPORT_SYMBOL(sock_enable_timestamps); +void sock_set_timestamp(struct sock *sk, int optname, bool valbool) +{ + switch (optname) { + case SO_TIMESTAMP_OLD: + __sock_set_timestamps(sk, valbool, false, false); + break; + case SO_TIMESTAMP_NEW: + __sock_set_timestamps(sk, valbool, true, false); + break; + case SO_TIMESTAMPNS_OLD: + __sock_set_timestamps(sk, valbool, false, true); + break; + case SO_TIMESTAMPNS_NEW: + __sock_set_timestamps(sk, valbool, true, true); + break; + } +} + +int sock_set_timestamping(struct sock *sk, int optname, int val) +{ + if (val & ~SOF_TIMESTAMPING_MASK) + return -EINVAL; + + if (val & SOF_TIMESTAMPING_OPT_ID && + !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) { + if (sk->sk_protocol == IPPROTO_TCP && + sk->sk_type == SOCK_STREAM) { + if ((1 << sk->sk_state) & + (TCPF_CLOSE | TCPF_LISTEN)) + return -EINVAL; + sk->sk_tskey = tcp_sk(sk)->snd_una; + } else { + sk->sk_tskey = 0; + } + } + + if (val & SOF_TIMESTAMPING_OPT_STATS && + !(val & SOF_TIMESTAMPING_OPT_TSONLY)) + return -EINVAL; + + sk->sk_tsflags = val; + sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW); + + if (val & SOF_TIMESTAMPING_RX_SOFTWARE) + sock_enable_timestamp(sk, + SOCK_TIMESTAMPING_RX_SOFTWARE); + else + sock_disable_timestamp(sk, + (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)); + return 0; +} + void sock_set_keepalive(struct sock *sk) { lock_sock(sk); @@ -989,54 +1041,15 @@ set_sndbuf: break; case SO_TIMESTAMP_OLD: - __sock_set_timestamps(sk, valbool, false, false); - break; case SO_TIMESTAMP_NEW: - __sock_set_timestamps(sk, valbool, true, false); - break; case SO_TIMESTAMPNS_OLD: - __sock_set_timestamps(sk, valbool, false, true); - break; case SO_TIMESTAMPNS_NEW: - __sock_set_timestamps(sk, valbool, true, true); + sock_set_timestamp(sk, valbool, optname); break; + case SO_TIMESTAMPING_NEW: case SO_TIMESTAMPING_OLD: - if (val & ~SOF_TIMESTAMPING_MASK) { - ret = -EINVAL; - break; - } - - if (val & SOF_TIMESTAMPING_OPT_ID && - !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) { - if (sk->sk_protocol == IPPROTO_TCP && - sk->sk_type == SOCK_STREAM) { - if ((1 << sk->sk_state) & - (TCPF_CLOSE | TCPF_LISTEN)) { - ret = -EINVAL; - break; - } - sk->sk_tskey = tcp_sk(sk)->snd_una; - } else { - sk->sk_tskey = 0; - } - } - - if (val & SOF_TIMESTAMPING_OPT_STATS && - !(val & SOF_TIMESTAMPING_OPT_TSONLY)) { - ret = -EINVAL; - break; - } - - sk->sk_tsflags = val; - sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW); - - if (val & SOF_TIMESTAMPING_RX_SOFTWARE) - sock_enable_timestamp(sk, - SOCK_TIMESTAMPING_RX_SOFTWARE); - else - sock_disable_timestamp(sk, - (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)); + ret = sock_set_timestamping(sk, optname, val); break; case SO_RCVLOWAT: diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f1c1f9e3de72..0e3f0e0e5b51 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1738,8 +1738,8 @@ int tcp_set_rcvlowat(struct sock *sk, int val) } EXPORT_SYMBOL(tcp_set_rcvlowat); -static void tcp_update_recv_tstamps(struct sk_buff *skb, - struct scm_timestamping_internal *tss) +void tcp_update_recv_tstamps(struct sk_buff *skb, + struct scm_timestamping_internal *tss) { if (skb->tstamp) tss->ts[0] = ktime_to_timespec64(skb->tstamp); @@ -2024,8 +2024,6 @@ static int tcp_zerocopy_vm_insert_batch(struct vm_area_struct *vma, } #define TCP_VALID_ZC_MSG_FLAGS (TCP_CMSG_TS) -static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, - struct scm_timestamping_internal *tss); static void tcp_zc_finalize_rx_tstamp(struct sock *sk, struct tcp_zerocopy_receive *zc, struct scm_timestamping_internal *tss) @@ -2197,8 +2195,8 @@ out: #endif /* Similar to __sock_recv_timestamp, but does not require an skb */ -static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, - struct scm_timestamping_internal *tss) +void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, + struct scm_timestamping_internal *tss) { int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW); bool has_timestamping = false; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 2bc199549a88..3897d35fd9df 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -39,10 +39,15 @@ struct mptcp_skb_cb { u64 map_seq; u64 end_seq; u32 offset; + u8 has_rxtstamp:1; }; #define MPTCP_SKB_CB(__skb) ((struct mptcp_skb_cb *)&((__skb)->cb[0])) +enum { + MPTCP_CMSG_TS = BIT(0), +}; + static struct percpu_counter mptcp_sockets_allocated; static void __mptcp_destroy_sock(struct sock *sk); @@ -272,6 +277,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct sock *sk = (struct sock *)msk; struct sk_buff *tail; + bool has_rxtstamp; __skb_unlink(skb, &ssk->sk_receive_queue); @@ -287,6 +293,8 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, goto drop; } + has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp; + /* the skb map_seq accounts for the skb offset: * mptcp_subflow_get_mapped_dsn() is based on the current tp->copied_seq * value @@ -294,6 +302,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, MPTCP_SKB_CB(skb)->map_seq = mptcp_subflow_get_mapped_dsn(subflow); MPTCP_SKB_CB(skb)->end_seq = MPTCP_SKB_CB(skb)->map_seq + copy_len; MPTCP_SKB_CB(skb)->offset = offset; + MPTCP_SKB_CB(skb)->has_rxtstamp = has_rxtstamp; if (MPTCP_SKB_CB(skb)->map_seq == msk->ack_seq) { /* in sequence */ @@ -1757,7 +1766,9 @@ static void mptcp_wait_data(struct sock *sk, long *timeo) static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, struct msghdr *msg, - size_t len, int flags) + size_t len, int flags, + struct scm_timestamping_internal *tss, + int *cmsg_flags) { struct sk_buff *skb, *tmp; int copied = 0; @@ -1777,6 +1788,11 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, } } + if (MPTCP_SKB_CB(skb)->has_rxtstamp) { + tcp_update_recv_tstamps(skb, tss); + *cmsg_flags |= MPTCP_CMSG_TS; + } + copied += count; if (count < data_len) { @@ -1964,7 +1980,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len) { struct mptcp_sock *msk = mptcp_sk(sk); - int copied = 0; + struct scm_timestamping_internal tss; + int copied = 0, cmsg_flags = 0; int target; long timeo; @@ -1986,7 +2003,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, while (copied < len) { int bytes_read; - bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied, flags); + bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied, flags, &tss, &cmsg_flags); if (unlikely(bytes_read < 0)) { if (!copied) copied = bytes_read; @@ -2067,6 +2084,11 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, set_bit(MPTCP_DATA_READY, &msk->flags); } out_err: + if (cmsg_flags && copied >= 0) { + if (cmsg_flags & MPTCP_CMSG_TS) + tcp_recv_timestamp(msg, sk, &tss); + } + pr_debug("msk=%p data_ready=%d rx queue empty=%d copied=%d", msk, test_bit(MPTCP_DATA_READY, &msk->flags), skb_queue_empty_lockless(&sk->sk_receive_queue), copied); diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index a79798189599..092d1f635d27 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -140,6 +140,43 @@ static void mptcp_so_incoming_cpu(struct mptcp_sock *msk, int val) mptcp_sol_socket_sync_intval(msk, SO_INCOMING_CPU, val); } +static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optname, int val) +{ + sockptr_t optval = KERNEL_SOCKPTR(&val); + struct mptcp_subflow_context *subflow; + struct sock *sk = (struct sock *)msk; + int ret; + + ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, + optval, sizeof(val)); + if (ret) + return ret; + + lock_sock(sk); + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + bool slow = lock_sock_fast(ssk); + + switch (optname) { + case SO_TIMESTAMP_OLD: + case SO_TIMESTAMP_NEW: + case SO_TIMESTAMPNS_OLD: + case SO_TIMESTAMPNS_NEW: + sock_set_timestamp(sk, optname, !!val); + break; + case SO_TIMESTAMPING_NEW: + case SO_TIMESTAMPING_OLD: + sock_set_timestamping(sk, optname, val); + break; + } + + unlock_sock_fast(ssk, slow); + } + + release_sock(sk); + return 0; +} + static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname, sockptr_t optval, unsigned int optlen) { @@ -164,6 +201,13 @@ static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname, case SO_INCOMING_CPU: mptcp_so_incoming_cpu(msk, val); return 0; + case SO_TIMESTAMP_OLD: + case SO_TIMESTAMP_NEW: + case SO_TIMESTAMPNS_OLD: + case SO_TIMESTAMPNS_NEW: + case SO_TIMESTAMPING_OLD: + case SO_TIMESTAMPING_NEW: + return mptcp_setsockopt_sol_socket_tstamp(msk, optname, val); } return -ENOPROTOOPT; @@ -251,9 +295,23 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, case SO_MARK: case SO_INCOMING_CPU: case SO_DEBUG: + case SO_TIMESTAMP_OLD: + case SO_TIMESTAMP_NEW: + case SO_TIMESTAMPNS_OLD: + case SO_TIMESTAMPNS_NEW: + case SO_TIMESTAMPING_OLD: + case SO_TIMESTAMPING_NEW: return mptcp_setsockopt_sol_socket_int(msk, optname, optval, optlen); case SO_LINGER: return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen); + case SO_RCVLOWAT: + case SO_RCVTIMEO_OLD: + case SO_RCVTIMEO_NEW: + case SO_BUSY_POLL: + case SO_PREFER_BUSY_POLL: + case SO_BUSY_POLL_BUDGET: + /* No need to copy: only relevant for msk */ + return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen); case SO_NO_CHECK: case SO_DONTROUTE: case SO_BROADCAST: @@ -267,7 +325,24 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, return 0; } - return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen); + /* SO_OOBINLINE is not supported, let's avoid the related mess + * SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF, + * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER, + * we must be careful with subflows + * + * SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks + * explicitly the sk_protocol field + * + * SO_PEEK_OFF is unsupported, as it is for plain TCP + * SO_MAX_PACING_RATE is unsupported, we must be careful with subflows + * SO_CNX_ADVICE is currently unsupported, could possibly be relevant, + * but likely needs careful design + * + * SO_ZEROCOPY is currently unsupported, TODO in sndmsg + * SO_TXTIME is currently unsupported + */ + + return -EOPNOTSUPP; } static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, @@ -299,72 +374,6 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, static bool mptcp_supported_sockopt(int level, int optname) { - if (level == SOL_SOCKET) { - switch (optname) { - case SO_DEBUG: - case SO_REUSEPORT: - case SO_REUSEADDR: - - /* the following ones need a better implementation, - * but are quite common we want to preserve them - */ - case SO_BINDTODEVICE: - case SO_SNDBUF: - case SO_SNDBUFFORCE: - case SO_RCVBUF: - case SO_RCVBUFFORCE: - case SO_KEEPALIVE: - case SO_PRIORITY: - case SO_LINGER: - case SO_TIMESTAMP_OLD: - case SO_TIMESTAMP_NEW: - case SO_TIMESTAMPNS_OLD: - case SO_TIMESTAMPNS_NEW: - case SO_TIMESTAMPING_OLD: - case SO_TIMESTAMPING_NEW: - case SO_RCVLOWAT: - case SO_RCVTIMEO_OLD: - case SO_RCVTIMEO_NEW: - case SO_SNDTIMEO_OLD: - case SO_SNDTIMEO_NEW: - case SO_MARK: - case SO_INCOMING_CPU: - case SO_BINDTOIFINDEX: - case SO_BUSY_POLL: - case SO_PREFER_BUSY_POLL: - case SO_BUSY_POLL_BUDGET: - - /* next ones are no-op for plain TCP */ - case SO_NO_CHECK: - case SO_DONTROUTE: - case SO_BROADCAST: - case SO_BSDCOMPAT: - case SO_PASSCRED: - case SO_PASSSEC: - case SO_RXQ_OVFL: - case SO_WIFI_STATUS: - case SO_NOFCS: - case SO_SELECT_ERR_QUEUE: - return true; - } - - /* SO_OOBINLINE is not supported, let's avoid the related mess */ - /* SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF, - * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER, - * we must be careful with subflows - */ - /* SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks - * explicitly the sk_protocol field - */ - /* SO_PEEK_OFF is unsupported, as it is for plain TCP */ - /* SO_MAX_PACING_RATE is unsupported, we must be careful with subflows */ - /* SO_CNX_ADVICE is currently unsupported, could possibly be relevant, - * but likely needs careful design - */ - /* SO_ZEROCOPY is currently unsupported, TODO in sndmsg */ - /* SO_TXTIME is currently unsupported */ - return false; - } if (level == SOL_IP) { switch (optname) { /* should work fine */ @@ -574,12 +583,12 @@ int mptcp_setsockopt(struct sock *sk, int level, int optname, pr_debug("msk=%p", msk); - if (!mptcp_supported_sockopt(level, optname)) - return -ENOPROTOOPT; - if (level == SOL_SOCKET) return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen); + if (!mptcp_supported_sockopt(level, optname)) + return -ENOPROTOOPT; + /* @@ the meaning of setsockopt() when the socket is connected and * there are multiple subflows is not yet defined. It is up to the * MPTCP-level socket to configure the subflows until the subflow |