diff options
Diffstat (limited to 'net/mptcp/protocol.h')
-rw-r--r-- | net/mptcp/protocol.h | 133 |
1 files changed, 96 insertions, 37 deletions
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 3517f2d24a22..a10ebf3ee10a 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -113,10 +113,9 @@ #define MPTCP_RST_TRANSIENT BIT(0) /* MPTCP socket atomic flags */ -#define MPTCP_NOSPACE 1 -#define MPTCP_WORK_RTX 2 -#define MPTCP_FALLBACK_DONE 4 -#define MPTCP_WORK_CLOSE_SUBFLOW 5 +#define MPTCP_WORK_RTX 1 +#define MPTCP_FALLBACK_DONE 2 +#define MPTCP_WORK_CLOSE_SUBFLOW 3 /* MPTCP socket release cb flags */ #define MPTCP_PUSH_PENDING 1 @@ -260,8 +259,10 @@ struct mptcp_data_frag { struct mptcp_sock { /* inet_connection_sock must be the first member */ struct inet_connection_sock sk; - u64 local_key; - u64 remote_key; + u64 local_key; /* protected by the first subflow socket lock + * lockless access read + */ + u64 remote_key; /* same as above */ u64 write_seq; u64 bytes_sent; u64 snd_nxt; @@ -286,7 +287,6 @@ struct mptcp_sock { int rmem_released; unsigned long flags; unsigned long cb_flags; - unsigned long push_pending; bool recovery; /* closing subflow write queue reinjected */ bool can_ack; bool fully_established; @@ -305,7 +305,9 @@ struct mptcp_sock { nodelay:1, fastopening:1, in_accept_queue:1, - free_first:1; + free_first:1, + rcvspace_init:1; + u32 notsent_lowat; struct work_struct work; struct sk_buff *ooo_last_skb; struct rb_root out_of_order_queue; @@ -341,12 +343,30 @@ struct mptcp_sock { #define mptcp_for_each_subflow_safe(__msk, __subflow, __tmp) \ list_for_each_entry_safe(__subflow, __tmp, &((__msk)->conn_list), node) +extern struct genl_family mptcp_genl_family; + static inline void msk_owned_by_me(const struct mptcp_sock *msk) { sock_owned_by_me((const struct sock *)msk); } +#ifdef CONFIG_DEBUG_NET +/* MPTCP-specific: we might (indirectly) call this helper with the wrong sk */ +#undef tcp_sk +#define tcp_sk(ptr) ({ \ + typeof(ptr) _ptr = (ptr); \ + WARN_ON(_ptr->sk_protocol != IPPROTO_TCP); \ + container_of_const(_ptr, struct tcp_sock, inet_conn.icsk_inet.sk); \ +}) +#define mptcp_sk(ptr) ({ \ + typeof(ptr) _ptr = (ptr); \ + WARN_ON(_ptr->sk_protocol != IPPROTO_MPTCP); \ + container_of_const(_ptr, struct mptcp_sock, sk.icsk_inet.sk); \ +}) + +#else /* !CONFIG_DEBUG_NET */ #define mptcp_sk(ptr) container_of_const(ptr, struct mptcp_sock, sk.icsk_inet.sk) +#endif /* the msk socket don't use the backlog, also account for the bulk * free memory @@ -400,7 +420,7 @@ static inline struct mptcp_data_frag *mptcp_rtx_head(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - if (msk->snd_una == READ_ONCE(msk->snd_nxt)) + if (msk->snd_una == msk->snd_nxt) return NULL; return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list); @@ -491,10 +511,9 @@ struct mptcp_subflow_context { remote_key_valid : 1, /* received the peer key from */ disposable : 1, /* ctx can be free at ulp release time */ stale : 1, /* unable to snd/rcv data, do not use for xmit */ - local_id_valid : 1, /* local_id is correctly initialized */ valid_csum_seen : 1, /* at least one csum validated */ is_mptfo : 1, /* subflow is doing TFO */ - __unused : 9; + __unused : 10; bool data_avail; bool scheduled; u32 remote_nonce; @@ -505,7 +524,7 @@ struct mptcp_subflow_context { u8 hmac[MPTCPOPT_HMAC_LEN]; /* MPJ subflow only */ u64 iasn; /* initial ack sequence number, MPC subflows only */ }; - u8 local_id; + s16 local_id; /* if negative not initialized yet */ u8 remote_id; u8 reset_seen:1; u8 reset_transient:1; @@ -556,6 +575,7 @@ mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow) { memset(&subflow->reset, 0, sizeof(subflow->reset)); subflow->request_mptcp = 1; + WRITE_ONCE(subflow->local_id, -1); } static inline u64 @@ -622,8 +642,9 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net); unsigned int mptcp_close_timeout(const struct sock *sk); int mptcp_get_pm_type(const struct net *net); const char *mptcp_get_scheduler(const struct net *net); -void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, - const struct mptcp_options_received *mp_opt); +void __mptcp_subflow_fully_established(struct mptcp_sock *msk, + struct mptcp_subflow_context *subflow, + const struct mptcp_options_received *mp_opt); bool __mptcp_retransmit_pending_data(struct sock *sk); void mptcp_check_and_set_pending(struct sock *sk); void __mptcp_push_pending(struct sock *sk, unsigned int flags); @@ -789,6 +810,38 @@ static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk) READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt); } +static inline u32 mptcp_notsent_lowat(const struct sock *sk) +{ + struct net *net = sock_net(sk); + u32 val; + + val = READ_ONCE(mptcp_sk(sk)->notsent_lowat); + return val ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat); +} + +static inline bool mptcp_stream_memory_free(const struct sock *sk, int wake) +{ + const struct mptcp_sock *msk = mptcp_sk(sk); + u32 notsent_bytes; + + notsent_bytes = READ_ONCE(msk->write_seq) - READ_ONCE(msk->snd_nxt); + return (notsent_bytes << wake) < mptcp_notsent_lowat(sk); +} + +static inline bool __mptcp_stream_is_writeable(const struct sock *sk, int wake) +{ + return mptcp_stream_memory_free(sk, wake) && + __sk_stream_is_writeable(sk, wake); +} + +static inline void mptcp_write_space(struct sock *sk) +{ + /* pairs with memory barrier in mptcp_poll */ + smp_mb(); + if (mptcp_stream_memory_free(sk, 1)) + sk_stream_write_space(sk); +} + static inline void __mptcp_sync_sndbuf(struct sock *sk) { struct mptcp_subflow_context *subflow; @@ -797,7 +850,7 @@ static inline void __mptcp_sync_sndbuf(struct sock *sk) if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) return; - new_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[0]; + new_sndbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[0]); mptcp_for_each_subflow(mptcp_sk(sk), subflow) { ssk_sndbuf = READ_ONCE(mptcp_subflow_tcp_sock(subflow)->sk_sndbuf); @@ -807,6 +860,7 @@ static inline void __mptcp_sync_sndbuf(struct sock *sk) /* the msk max wmem limit is <nr_subflows> * tcp wmem[2] */ WRITE_ONCE(sk->sk_sndbuf, new_sndbuf); + mptcp_write_space(sk); } /* The called held both the msk socket and the subflow socket locks, @@ -837,16 +891,6 @@ static inline void mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk) local_bh_enable(); } -static inline void mptcp_write_space(struct sock *sk) -{ - if (sk_stream_is_writeable(sk)) { - /* pairs with memory barrier in mptcp_poll */ - smp_mb(); - if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags)) - sk_stream_write_space(sk); - } -} - void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags); #define MPTCP_TOKEN_MAX_RETRIES 4 @@ -926,21 +970,15 @@ int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, u8 *flags, int *ifindex); -int mptcp_pm_set_flags(struct net *net, struct nlattr *token, - struct mptcp_pm_addr_entry *loc, - struct mptcp_pm_addr_entry *rem, u8 bkup); -int mptcp_pm_nl_set_flags(struct net *net, struct mptcp_pm_addr_entry *addr, u8 bkup); -int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token, - struct mptcp_pm_addr_entry *loc, - struct mptcp_pm_addr_entry *rem, u8 bkup); +int mptcp_pm_set_flags(struct sk_buff *skb, struct genl_info *info); +int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info); +int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info); int mptcp_pm_announce_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, bool echo); int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list); -void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, - struct list_head *rm_list); void mptcp_free_local_addr_list(struct mptcp_sock *msk); @@ -952,10 +990,12 @@ void mptcp_event_pm_listener(const struct sock *ssk, enum mptcp_event_type event); bool mptcp_userspace_pm_active(const struct mptcp_sock *msk); -void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, - const struct mptcp_options_received *mp_opt); +void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, + const struct mptcp_options_received *mp_opt); void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow, struct request_sock *req); +int mptcp_nl_fill_addr(struct sk_buff *skb, + struct mptcp_pm_addr_entry *entry); static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) { @@ -1020,6 +1060,24 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); +int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb); +int mptcp_pm_nl_dump_addr(struct sk_buff *msg, + struct netlink_callback *cb); +int mptcp_userspace_pm_dump_addr(struct sk_buff *msg, + struct netlink_callback *cb); +int mptcp_pm_get_addr(struct sk_buff *skb, struct genl_info *info); +int mptcp_pm_nl_get_addr(struct sk_buff *skb, struct genl_info *info); +int mptcp_userspace_pm_get_addr(struct sk_buff *skb, + struct genl_info *info); + +static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflow) +{ + int local_id = READ_ONCE(subflow->local_id); + + if (local_id < 0) + return 0; + return local_id; +} void __init mptcp_pm_nl_init(void); void mptcp_pm_nl_work(struct mptcp_sock *msk); @@ -1128,7 +1186,8 @@ static inline bool subflow_simultaneous_connect(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_FIN_WAIT1) && + return (1 << sk->sk_state) & + (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSING) && is_active_ssk(subflow) && !subflow->conn_finished; } |