From 61b7c691c7317529375f90f0a81a331990b1ec1b Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 1 Dec 2017 12:52:31 -0800 Subject: inet: Add a 2nd listener hashtable (port+addr) The current listener hashtable is hashed by port only. When a process is listening at many IP addresses with the same port (e.g. [IP1]:443, [IP2]:443... [IPN]:443), the inet[6]_lookup_listener() performance is degraded to a link list. It is prone to syn attack. UDP had a similar issue and a second hashtable was added to resolve it. This patch adds a second hashtable for the listener's sockets. The second hashtable is hashed by port and address. It cannot reuse the existing skc_portaddr_node which is shared with skc_bind_node. TCP listener needs to use skc_bind_node. Instead, this patch adds a hlist_node 'icsk_listen_portaddr_node' to the inet_connection_sock which the listener (like TCP) also belongs to. The new portaddr hashtable may need two lookup (First by IP:PORT. Second by INADDR_ANY:PORT if the IP:PORT is a not found). Hence, it implements a similar cut off as UDP such that it will only consult the new portaddr hashtable if the current port-only hashtable has >10 sk in the link-list. lhash2 and lhash2_mask are added to 'struct inet_hashinfo'. I take this chance to plug a 4 bytes hole. It is done by first moving the existing bind_bucket_cachep up and then add the new (int lhash2_mask, *lhash2) after the existing bhash_size. Signed-off-by: Martin KaFai Lau Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/inet6_hashtables.c | 66 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) (limited to 'net/ipv6/inet6_hashtables.c') diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 0d1451381f5c..2febe26de6a1 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -125,6 +125,40 @@ static inline int compute_score(struct sock *sk, struct net *net, } /* called with rcu_read_lock() */ +static struct sock *inet6_lhash2_lookup(struct net *net, + struct inet_listen_hashbucket *ilb2, + struct sk_buff *skb, int doff, + const struct in6_addr *saddr, + const __be16 sport, const struct in6_addr *daddr, + const unsigned short hnum, const int dif, const int sdif) +{ + bool exact_dif = inet6_exact_dif_match(net, skb); + struct inet_connection_sock *icsk; + struct sock *sk, *result = NULL; + int score, hiscore = 0; + u32 phash = 0; + + inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) { + sk = (struct sock *)icsk; + score = compute_score(sk, net, hnum, daddr, dif, sdif, + exact_dif); + if (score > hiscore) { + if (sk->sk_reuseport) { + phash = inet6_ehashfn(net, daddr, hnum, + saddr, sport); + result = reuseport_select_sock(sk, phash, + skb, doff); + if (result) + return result; + } + result = sk; + hiscore = score; + } + } + + return result; +} + struct sock *inet6_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, @@ -135,10 +169,42 @@ struct sock *inet6_lookup_listener(struct net *net, unsigned int hash = inet_lhashfn(net, hnum); struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; bool exact_dif = inet6_exact_dif_match(net, skb); + struct inet_listen_hashbucket *ilb2; struct sock *sk, *result = NULL; int score, hiscore = 0; + unsigned int hash2; u32 phash = 0; + if (ilb->count <= 10 || !hashinfo->lhash2) + goto port_lookup; + + /* Too many sk in the ilb bucket (which is hashed by port alone). + * Try lhash2 (which is hashed by port and addr) instead. + */ + + hash2 = ipv6_portaddr_hash(net, daddr, hnum); + ilb2 = inet_lhash2_bucket(hashinfo, hash2); + if (ilb2->count > ilb->count) + goto port_lookup; + + result = inet6_lhash2_lookup(net, ilb2, skb, doff, + saddr, sport, daddr, hnum, + dif, sdif); + if (result) + return result; + + /* Lookup lhash2 with in6addr_any */ + + hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum); + ilb2 = inet_lhash2_bucket(hashinfo, hash2); + if (ilb2->count > ilb->count) + goto port_lookup; + + return inet6_lhash2_lookup(net, ilb2, skb, doff, + saddr, sport, daddr, hnum, + dif, sdif); + +port_lookup: sk_for_each(sk, &ilb->head) { score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif); if (score > hiscore) { -- cgit v1.2.3