summaryrefslogtreecommitdiff
path: root/include/linux/filter.h
diff options
context:
space:
mode:
authorHangbin Liu <liuhangbin@gmail.com>2021-05-19 12:07:45 +0300
committerDaniel Borkmann <daniel@iogearbox.net>2021-05-26 10:46:16 +0300
commite624d4ed4aa8cc3c69d1359b0aaea539203ed266 (patch)
tree985b608e6bbaf30b4f5e2b290165dbff4f534e1d /include/linux/filter.h
parentcb261b594b4108668e00f565184c7c221efe0359 (diff)
downloadlinux-e624d4ed4aa8cc3c69d1359b0aaea539203ed266.tar.xz
xdp: Extend xdp_redirect_map with broadcast support
This patch adds two flags BPF_F_BROADCAST and BPF_F_EXCLUDE_INGRESS to extend xdp_redirect_map for broadcast support. With BPF_F_BROADCAST the packet will be broadcasted to all the interfaces in the map. with BPF_F_EXCLUDE_INGRESS the ingress interface will be excluded when do broadcasting. When getting the devices in dev hash map via dev_map_hash_get_next_key(), there is a possibility that we fall back to the first key when a device was removed. This will duplicate packets on some interfaces. So just walk the whole buckets to avoid this issue. For dev array map, we also walk the whole map to find valid interfaces. Function bpf_clear_redirect_map() was removed in commit ee75aef23afe ("bpf, xdp: Restructure redirect actions"). Add it back as we need to use ri->map again. With test topology: +-------------------+ +-------------------+ | Host A (i40e 10G) | ---------- | eno1(i40e 10G) | +-------------------+ | | | Host B | +-------------------+ | | | Host C (i40e 10G) | ---------- | eno2(i40e 10G) | +-------------------+ | | | +------+ | | veth0 -- | Peer | | | veth1 -- | | | | veth2 -- | NS | | | +------+ | +-------------------+ On Host A: # pktgen/pktgen_sample03_burst_single_flow.sh -i eno1 -d $dst_ip -m $dst_mac -s 64 On Host B(Intel(R) Xeon(R) CPU E5-2690 v3 @ 2.60GHz, 128G Memory): Use xdp_redirect_map and xdp_redirect_map_multi in samples/bpf for testing. All the veth peers in the NS have a XDP_DROP program loaded. The forward_map max_entries in xdp_redirect_map_multi is modify to 4. Testing the performance impact on the regular xdp_redirect path with and without patch (to check impact of additional check for broadcast mode): 5.12 rc4 | redirect_map i40e->i40e | 2.0M | 9.7M 5.12 rc4 | redirect_map i40e->veth | 1.7M | 11.8M 5.12 rc4 + patch | redirect_map i40e->i40e | 2.0M | 9.6M 5.12 rc4 + patch | redirect_map i40e->veth | 1.7M | 11.7M Testing the performance when cloning packets with the redirect_map_multi test, using a redirect map size of 4, filled with 1-3 devices: 5.12 rc4 + patch | redirect_map multi i40e->veth (x1) | 1.7M | 11.4M 5.12 rc4 + patch | redirect_map multi i40e->veth (x2) | 1.1M | 4.3M 5.12 rc4 + patch | redirect_map multi i40e->veth (x3) | 0.8M | 2.6M Signed-off-by: Hangbin Liu <liuhangbin@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Acked-by: Martin KaFai Lau <kafai@fb.com> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Jesper Dangaard Brouer <brouer@redhat.com> Link: https://lore.kernel.org/bpf/20210519090747.1655268-3-liuhangbin@gmail.com
Diffstat (limited to 'include/linux/filter.h')
-rw-r--r--include/linux/filter.h19
1 files changed, 15 insertions, 4 deletions
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 9a09547bc7ba..c5ad7df029ed 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -646,6 +646,7 @@ struct bpf_redirect_info {
u32 flags;
u32 tgt_index;
void *tgt_value;
+ struct bpf_map *map;
u32 map_id;
enum bpf_map_type map_type;
u32 kern_flags;
@@ -1464,17 +1465,19 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
}
#endif /* IS_ENABLED(CONFIG_IPV6) */
-static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifindex, u64 flags,
+static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifindex,
+ u64 flags, const u64 flag_mask,
void *lookup_elem(struct bpf_map *map, u32 key))
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+ const u64 action_mask = XDP_ABORTED | XDP_DROP | XDP_PASS | XDP_TX;
/* Lower bits of the flags are used as return code on lookup failure */
- if (unlikely(flags > XDP_TX))
+ if (unlikely(flags & ~(action_mask | flag_mask)))
return XDP_ABORTED;
ri->tgt_value = lookup_elem(map, ifindex);
- if (unlikely(!ri->tgt_value)) {
+ if (unlikely(!ri->tgt_value) && !(flags & BPF_F_BROADCAST)) {
/* If the lookup fails we want to clear out the state in the
* redirect_info struct completely, so that if an eBPF program
* performs multiple lookups, the last one always takes
@@ -1482,13 +1485,21 @@ static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifind
*/
ri->map_id = INT_MAX; /* Valid map id idr range: [1,INT_MAX[ */
ri->map_type = BPF_MAP_TYPE_UNSPEC;
- return flags;
+ return flags & action_mask;
}
ri->tgt_index = ifindex;
ri->map_id = map->id;
ri->map_type = map->map_type;
+ if (flags & BPF_F_BROADCAST) {
+ WRITE_ONCE(ri->map, map);
+ ri->flags = flags;
+ } else {
+ WRITE_ONCE(ri->map, NULL);
+ ri->flags = 0;
+ }
+
return XDP_REDIRECT;
}