summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/net/inet_connection_sock.h3
-rw-r--r--include/net/inet_hashtables.h80
-rw-r--r--include/net/sock.h14
3 files changed, 95 insertions, 2 deletions
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index ee88f0f1350f..c2b15f7e5516 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -25,6 +25,7 @@
#undef INET_CSK_CLEAR_TIMERS
struct inet_bind_bucket;
+struct inet_bind2_bucket;
struct tcp_congestion_ops;
/*
@@ -57,6 +58,7 @@ struct inet_connection_sock_af_ops {
*
* @icsk_accept_queue: FIFO of established children
* @icsk_bind_hash: Bind node
+ * @icsk_bind2_hash: Bind node in the bhash2 table
* @icsk_timeout: Timeout
* @icsk_retransmit_timer: Resend (no ack)
* @icsk_rto: Retransmit timeout
@@ -83,6 +85,7 @@ struct inet_connection_sock {
struct inet_sock icsk_inet;
struct request_sock_queue icsk_accept_queue;
struct inet_bind_bucket *icsk_bind_hash;
+ struct inet_bind2_bucket *icsk_bind2_hash;
unsigned long icsk_timeout;
struct timer_list icsk_retransmit_timer;
struct timer_list icsk_delack_timer;
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index e9cf2157ed8a..44a419b9e3d5 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -23,6 +23,7 @@
#include <net/inet_connection_sock.h>
#include <net/inet_sock.h>
+#include <net/ip.h>
#include <net/sock.h>
#include <net/route.h>
#include <net/tcp_states.h>
@@ -90,7 +91,28 @@ struct inet_bind_bucket {
struct hlist_head owners;
};
-static inline struct net *ib_net(struct inet_bind_bucket *ib)
+struct inet_bind2_bucket {
+ possible_net_t ib_net;
+ int l3mdev;
+ unsigned short port;
+ union {
+#if IS_ENABLED(CONFIG_IPV6)
+ struct in6_addr v6_rcv_saddr;
+#endif
+ __be32 rcv_saddr;
+ };
+ /* Node in the bhash2 inet_bind_hashbucket chain */
+ struct hlist_node node;
+ /* List of sockets hashed to this bucket */
+ struct hlist_head owners;
+};
+
+static inline struct net *ib_net(const struct inet_bind_bucket *ib)
+{
+ return read_pnet(&ib->ib_net);
+}
+
+static inline struct net *ib2_net(const struct inet_bind2_bucket *ib)
{
return read_pnet(&ib->ib_net);
}
@@ -133,7 +155,14 @@ struct inet_hashinfo {
* TCP hash as well as the others for fast bind/connect.
*/
struct kmem_cache *bind_bucket_cachep;
+ /* This bind table is hashed by local port */
struct inet_bind_hashbucket *bhash;
+ struct kmem_cache *bind2_bucket_cachep;
+ /* This bind table is hashed by local port and sk->sk_rcv_saddr (ipv4)
+ * or sk->sk_v6_rcv_saddr (ipv6). This 2nd bind table is used
+ * primarily for expediting bind conflict resolution.
+ */
+ struct inet_bind_hashbucket *bhash2;
unsigned int bhash_size;
/* The 2nd listener table hashed by local port and address */
@@ -182,14 +211,61 @@ inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net,
void inet_bind_bucket_destroy(struct kmem_cache *cachep,
struct inet_bind_bucket *tb);
+bool inet_bind_bucket_match(const struct inet_bind_bucket *tb,
+ const struct net *net, unsigned short port,
+ int l3mdev);
+
+struct inet_bind2_bucket *
+inet_bind2_bucket_create(struct kmem_cache *cachep, struct net *net,
+ struct inet_bind_hashbucket *head,
+ unsigned short port, int l3mdev,
+ const struct sock *sk);
+
+void inet_bind2_bucket_destroy(struct kmem_cache *cachep,
+ struct inet_bind2_bucket *tb);
+
+struct inet_bind2_bucket *
+inet_bind2_bucket_find(const struct inet_bind_hashbucket *head,
+ const struct net *net,
+ unsigned short port, int l3mdev,
+ const struct sock *sk);
+
+bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb,
+ const struct net *net, unsigned short port,
+ int l3mdev, const struct sock *sk);
+
static inline u32 inet_bhashfn(const struct net *net, const __u16 lport,
const u32 bhash_size)
{
return (lport + net_hash_mix(net)) & (bhash_size - 1);
}
+static inline struct inet_bind_hashbucket *
+inet_bhashfn_portaddr(const struct inet_hashinfo *hinfo, const struct sock *sk,
+ const struct net *net, unsigned short port)
+{
+ u32 hash;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6)
+ hash = ipv6_portaddr_hash(net, &sk->sk_v6_rcv_saddr, port);
+ else
+#endif
+ hash = ipv4_portaddr_hash(net, sk->sk_rcv_saddr, port);
+ return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)];
+}
+
+struct inet_bind_hashbucket *
+inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, int port);
+
+/* This should be called whenever a socket's sk_rcv_saddr (ipv4) or
+ * sk_v6_rcv_saddr (ipv6) changes after it has been binded. The socket's
+ * rcv_saddr field should already have been updated when this is called.
+ */
+int inet_bhash2_update_saddr(struct inet_bind_hashbucket *prev_saddr, struct sock *sk);
+
void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
- const unsigned short snum);
+ struct inet_bind2_bucket *tb2, unsigned short port);
/* Caller must disable local BH processing. */
int __inet_inherit_port(const struct sock *sk, struct sock *child);
diff --git a/include/net/sock.h b/include/net/sock.h
index d08cfe190a78..ca469980e006 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -348,6 +348,7 @@ struct sk_filter;
* @sk_txtime_report_errors: set report errors mode for SO_TXTIME
* @sk_txtime_unused: unused txtime flags
* @ns_tracker: tracker for netns reference
+ * @sk_bind2_node: bind node in the bhash2 table
*/
struct sock {
/*
@@ -537,6 +538,7 @@ struct sock {
#endif
struct rcu_head sk_rcu;
netns_tracker ns_tracker;
+ struct hlist_node sk_bind2_node;
};
enum sk_pacing {
@@ -870,6 +872,16 @@ static inline void sk_add_bind_node(struct sock *sk,
hlist_add_head(&sk->sk_bind_node, list);
}
+static inline void __sk_del_bind2_node(struct sock *sk)
+{
+ __hlist_del(&sk->sk_bind2_node);
+}
+
+static inline void sk_add_bind2_node(struct sock *sk, struct hlist_head *list)
+{
+ hlist_add_head(&sk->sk_bind2_node, list);
+}
+
#define sk_for_each(__sk, list) \
hlist_for_each_entry(__sk, list, sk_node)
#define sk_for_each_rcu(__sk, list) \
@@ -887,6 +899,8 @@ static inline void sk_add_bind_node(struct sock *sk,
hlist_for_each_entry_safe(__sk, tmp, list, sk_node)
#define sk_for_each_bound(__sk, list) \
hlist_for_each_entry(__sk, list, sk_bind_node)
+#define sk_for_each_bound_bhash2(__sk, list) \
+ hlist_for_each_entry(__sk, list, sk_bind2_node)
/**
* sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset