summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2021-10-11 15:12:38 +0300
committerDavid S. Miller <davem@davemloft.net>2021-10-12 13:27:47 +0300
commit7482e3841d520a368426ac196720601687e2dc47 (patch)
treee64e1deb051e4b2992480af003a7779ce5b2b7f7 /include
parent2c611ad97a82b51221bb0920cc6cac0b1d4c0e52 (diff)
downloadlinux-7482e3841d520a368426ac196720601687e2dc47.tar.xz
net, neigh: Add NTF_MANAGED flag for managed neighbor entries
Allow a user space control plane to insert entries with a new NTF_EXT_MANAGED flag. The flag then indicates to the kernel that the neighbor entry should be periodically probed for keeping the entry in NUD_REACHABLE state iff possible. The use case for this is targeting XDP or tc BPF load-balancers which use the bpf_fib_lookup() BPF helper in order to piggyback on neighbor resolution for their backends. Given they cannot be resolved in fast-path, a control plane inserts the L3 (without L2) entries manually into the neighbor table and lets the kernel do the neighbor resolution either on the gateway or on the backend directly in case the latter resides in the same L2. This avoids to deal with L2 in the control plane and to rebuild what the kernel already does best anyway. NTF_EXT_MANAGED can be combined with NTF_EXT_LEARNED in order to avoid GC eviction. The kernel then adds NTF_MANAGED flagged entries to a per-neighbor table which gets triggered by the system work queue to periodically call neigh_event_send() for performing the resolution. The implementation allows migration from/to NTF_MANAGED neighbor entries, so that already existing entries can be converted by the control plane if needed. Potentially, we could make the interval for periodically calling neigh_event_send() configurable; right now it's set to DELAY_PROBE_TIME which is also in line with mlxsw which has similar driver-internal infrastructure c723c735fa6b ("mlxsw: spectrum_router: Periodically update the kernel's neigh table"). In future, the latter could possibly reuse the NTF_MANAGED neighbors as well. Example: # ./ip/ip n replace 192.168.178.30 dev enp5s0 managed extern_learn # ./ip/ip n 192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a managed extern_learn REACHABLE [...] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Roopa Prabhu <roopa@nvidia.com> Link: https://linuxplumbersconf.org/event/11/contributions/953/ Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/net/neighbour.h21
-rw-r--r--include/uapi/linux/neighbour.h34
2 files changed, 37 insertions, 18 deletions
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 26d4ada0aea9..e8e48be66755 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -155,6 +155,7 @@ struct neighbour {
int (*output)(struct neighbour *, struct sk_buff *);
const struct neigh_ops *ops;
struct list_head gc_list;
+ struct list_head managed_list;
struct rcu_head rcu;
struct net_device *dev;
u8 primary_key[0];
@@ -216,11 +217,13 @@ struct neigh_table {
int gc_thresh3;
unsigned long last_flush;
struct delayed_work gc_work;
+ struct delayed_work managed_work;
struct timer_list proxy_timer;
struct sk_buff_head proxy_queue;
atomic_t entries;
atomic_t gc_entries;
struct list_head gc_list;
+ struct list_head managed_list;
rwlock_t lock;
unsigned long last_rand;
struct neigh_statistics __percpu *stats;
@@ -250,17 +253,21 @@ static inline void *neighbour_priv(const struct neighbour *n)
}
/* flags for neigh_update() */
-#define NEIGH_UPDATE_F_OVERRIDE 0x00000001
-#define NEIGH_UPDATE_F_WEAK_OVERRIDE 0x00000002
-#define NEIGH_UPDATE_F_OVERRIDE_ISROUTER 0x00000004
-#define NEIGH_UPDATE_F_USE 0x10000000
-#define NEIGH_UPDATE_F_EXT_LEARNED 0x20000000
-#define NEIGH_UPDATE_F_ISROUTER 0x40000000
-#define NEIGH_UPDATE_F_ADMIN 0x80000000
+#define NEIGH_UPDATE_F_OVERRIDE BIT(0)
+#define NEIGH_UPDATE_F_WEAK_OVERRIDE BIT(1)
+#define NEIGH_UPDATE_F_OVERRIDE_ISROUTER BIT(2)
+#define NEIGH_UPDATE_F_USE BIT(3)
+#define NEIGH_UPDATE_F_MANAGED BIT(4)
+#define NEIGH_UPDATE_F_EXT_LEARNED BIT(5)
+#define NEIGH_UPDATE_F_ISROUTER BIT(6)
+#define NEIGH_UPDATE_F_ADMIN BIT(7)
/* In-kernel representation for NDA_FLAGS_EXT flags: */
#define NTF_OLD_MASK 0xff
#define NTF_EXT_SHIFT 8
+#define NTF_EXT_MASK (NTF_EXT_MANAGED)
+
+#define NTF_MANAGED (NTF_EXT_MANAGED << NTF_EXT_SHIFT)
extern const struct nla_policy nda_policy[];
diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h
index a80cca141855..db05fb55055e 100644
--- a/include/uapi/linux/neighbour.h
+++ b/include/uapi/linux/neighbour.h
@@ -41,14 +41,16 @@ enum {
* Neighbor Cache Entry Flags
*/
-#define NTF_USE 0x01
-#define NTF_SELF 0x02
-#define NTF_MASTER 0x04
-#define NTF_PROXY 0x08 /* == ATF_PUBL */
-#define NTF_EXT_LEARNED 0x10
-#define NTF_OFFLOADED 0x20
-#define NTF_STICKY 0x40
-#define NTF_ROUTER 0x80
+#define NTF_USE (1 << 0)
+#define NTF_SELF (1 << 1)
+#define NTF_MASTER (1 << 2)
+#define NTF_PROXY (1 << 3) /* == ATF_PUBL */
+#define NTF_EXT_LEARNED (1 << 4)
+#define NTF_OFFLOADED (1 << 5)
+#define NTF_STICKY (1 << 6)
+#define NTF_ROUTER (1 << 7)
+/* Extended flags under NDA_FLAGS_EXT: */
+#define NTF_EXT_MANAGED (1 << 0)
/*
* Neighbor Cache Entry States.
@@ -66,12 +68,22 @@ enum {
#define NUD_PERMANENT 0x80
#define NUD_NONE 0x00
-/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change
- * and make no address resolution or NUD.
- * NUD_PERMANENT also cannot be deleted by garbage collectors.
+/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change and make no
+ * address resolution or NUD.
+ *
+ * NUD_PERMANENT also cannot be deleted by garbage collectors. This holds true
+ * for dynamic entries with NTF_EXT_LEARNED flag as well. However, upon carrier
+ * down event, NUD_PERMANENT entries are not flushed whereas NTF_EXT_LEARNED
+ * flagged entries explicitly are (which is also consistent with the routing
+ * subsystem).
+ *
* When NTF_EXT_LEARNED is set for a bridge fdb entry the different cache entry
* states don't make sense and thus are ignored. Such entries don't age and
* can roam.
+ *
+ * NTF_EXT_MANAGED flagged neigbor entries are managed by the kernel on behalf
+ * of a user space control plane, and automatically refreshed so that (if
+ * possible) they remain in NUD_REACHABLE state.
*/
struct nda_cacheinfo {