From 04c494e68a1340cb5c70d4704ac32d863dc64293 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 12 May 2022 14:14:56 -0700 Subject: Revert "tcp/dccp: get rid of inet_twsk_purge()" This reverts commits: 0dad4087a86a2cbe177404dc73f18ada26a2c390 ("tcp/dccp: get rid of inet_twsk_purge()") d507204d3c5cc57d9a8bdf0a477615bb59ea1611 ("tcp/dccp: add tw->tw_bslot") As Leonard pointed out, a newly allocated netns can happen to reuse a freed 'struct net'. While TCP TW timers were covered by my patches, other things were not: 1) Lookups in rx path (INET_MATCH() and INET6_MATCH()), as they look at 4-tuple plus the 'struct net' pointer. 2) /proc/net/tcp[6] and inet_diag, same reason. 3) hashinfo->bhash[], same reason. Fixing all this seems risky, lets instead revert. In the future, we might have a per netns tcp hash table, or a per netns list of timewait sockets... Fixes: 0dad4087a86a ("tcp/dccp: get rid of inet_twsk_purge()") Signed-off-by: Eric Dumazet Reported-by: Leonard Crestez Tested-by: Leonard Crestez Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 6 +++++ net/dccp/ipv6.c | 6 +++++ net/ipv4/inet_timewait_sock.c | 58 +++++++++++++++++++++++++++++++++++++------ net/ipv4/tcp_ipv4.c | 2 ++ net/ipv6/tcp_ipv6.c | 6 +++++ 5 files changed, 71 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index ae662567a6cb..0ea29270d7e5 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -1030,9 +1030,15 @@ static void __net_exit dccp_v4_exit_net(struct net *net) inet_ctl_sock_destroy(pn->v4_ctl_sk); } +static void __net_exit dccp_v4_exit_batch(struct list_head *net_exit_list) +{ + inet_twsk_purge(&dccp_hashinfo, AF_INET); +} + static struct pernet_operations dccp_v4_ops = { .init = dccp_v4_init_net, .exit = dccp_v4_exit_net, + .exit_batch = dccp_v4_exit_batch, .id = &dccp_v4_pernet_id, .size = sizeof(struct dccp_v4_pernet), }; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index eab3bd1ee9a0..fa663518fa0e 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1115,9 +1115,15 @@ static void __net_exit dccp_v6_exit_net(struct net *net) inet_ctl_sock_destroy(pn->v6_ctl_sk); } +static void __net_exit dccp_v6_exit_batch(struct list_head *net_exit_list) +{ + inet_twsk_purge(&dccp_hashinfo, AF_INET6); +} + static struct pernet_operations dccp_v6_ops = { .init = dccp_v6_init_net, .exit = dccp_v6_exit_net, + .exit_batch = dccp_v6_exit_batch, .id = &dccp_v6_pernet_id, .size = sizeof(struct dccp_v6_pernet), }; diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 9e0bbd026560..0ec501845cb3 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -52,7 +52,8 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw) spin_unlock(lock); /* Disassociate with bind bucket. */ - bhead = &hashinfo->bhash[tw->tw_bslot]; + bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, + hashinfo->bhash_size)]; spin_lock(&bhead->lock); inet_twsk_bind_unhash(tw, hashinfo); @@ -111,12 +112,8 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, Note, that any socket with inet->num != 0 MUST be bound in binding cache, even if it is closed. */ - /* Cache inet_bhashfn(), because 'struct net' might be no longer - * available later in inet_twsk_kill(). - */ - tw->tw_bslot = inet_bhashfn(twsk_net(tw), inet->inet_num, - hashinfo->bhash_size); - bhead = &hashinfo->bhash[tw->tw_bslot]; + bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num, + hashinfo->bhash_size)]; spin_lock(&bhead->lock); tw->tw_tb = icsk->icsk_bind_hash; WARN_ON(!icsk->icsk_bind_hash); @@ -257,3 +254,50 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm) } } EXPORT_SYMBOL_GPL(__inet_twsk_schedule); + +void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family) +{ + struct inet_timewait_sock *tw; + struct sock *sk; + struct hlist_nulls_node *node; + unsigned int slot; + + for (slot = 0; slot <= hashinfo->ehash_mask; slot++) { + struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; +restart_rcu: + cond_resched(); + rcu_read_lock(); +restart: + sk_nulls_for_each_rcu(sk, node, &head->chain) { + if (sk->sk_state != TCP_TIME_WAIT) + continue; + tw = inet_twsk(sk); + if ((tw->tw_family != family) || + refcount_read(&twsk_net(tw)->ns.count)) + continue; + + if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt))) + continue; + + if (unlikely((tw->tw_family != family) || + refcount_read(&twsk_net(tw)->ns.count))) { + inet_twsk_put(tw); + goto restart; + } + + rcu_read_unlock(); + local_bh_disable(); + inet_twsk_deschedule_put(tw); + local_bh_enable(); + goto restart_rcu; + } + /* If the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != slot) + goto restart; + rcu_read_unlock(); + } +} +EXPORT_SYMBOL_GPL(inet_twsk_purge); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f9cec624068d..457f5b5d5d4a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -3173,6 +3173,8 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) { struct net *net; + inet_twsk_purge(&tcp_hashinfo, AF_INET); + list_for_each_entry(net, net_exit_list, exit_list) tcp_fastopen_ctx_destroy(net); } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 13678d3908fa..faaddaf43c90 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2207,9 +2207,15 @@ static void __net_exit tcpv6_net_exit(struct net *net) inet_ctl_sock_destroy(net->ipv6.tcp_sk); } +static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list) +{ + inet_twsk_purge(&tcp_hashinfo, AF_INET6); +} + static struct pernet_operations tcpv6_net_ops = { .init = tcpv6_net_init, .exit = tcpv6_net_exit, + .exit_batch = tcpv6_net_exit_batch, }; int __init tcpv6_init(void) -- cgit v1.2.3