summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/uapi/linux/bpf.h28
-rw-r--r--net/core/filter.c86
-rw-r--r--samples/bpf/xdp_fwd_kern.c8
3 files changed, 81 insertions, 41 deletions
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 59b19b6a40d7..b7db3261c62d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1857,7 +1857,8 @@ union bpf_attr {
* is resolved), the nexthop address is returned in ipv4_dst
* or ipv6_dst based on family, smac is set to mac address of
* egress device, dmac is set to nexthop mac address, rt_metric
- * is set to metric from route (IPv4/IPv6 only).
+ * is set to metric from route (IPv4/IPv6 only), and ifindex
+ * is set to the device index of the nexthop from the FIB lookup.
*
* *plen* argument is the size of the passed in struct.
* *flags* argument can be a combination of one or more of the
@@ -1873,9 +1874,10 @@ union bpf_attr {
* *ctx* is either **struct xdp_md** for XDP programs or
* **struct sk_buff** tc cls_act programs.
* Return
- * Egress device index on success, 0 if packet needs to continue
- * up the stack for further processing or a negative error in case
- * of failure.
+ * * < 0 if any input argument is invalid
+ * * 0 on success (packet is forwarded, nexthop neighbor exists)
+ * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
+ * * packet is not forwarded or needs assist from full stack
*
* int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
* Description
@@ -2612,6 +2614,18 @@ struct bpf_raw_tracepoint_args {
#define BPF_FIB_LOOKUP_DIRECT BIT(0)
#define BPF_FIB_LOOKUP_OUTPUT BIT(1)
+enum {
+ BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */
+ BPF_FIB_LKUP_RET_BLACKHOLE, /* dest is blackholed; can be dropped */
+ BPF_FIB_LKUP_RET_UNREACHABLE, /* dest is unreachable; can be dropped */
+ BPF_FIB_LKUP_RET_PROHIBIT, /* dest not allowed; can be dropped */
+ BPF_FIB_LKUP_RET_NOT_FWDED, /* packet is not forwarded */
+ BPF_FIB_LKUP_RET_FWD_DISABLED, /* fwding is not enabled on ingress */
+ BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */
+ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */
+ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */
+};
+
struct bpf_fib_lookup {
/* input: network family for lookup (AF_INET, AF_INET6)
* output: network family of egress nexthop
@@ -2625,7 +2639,11 @@ struct bpf_fib_lookup {
/* total length of packet from network header - used for MTU check */
__u16 tot_len;
- __u32 ifindex; /* L3 device index for lookup */
+
+ /* input: L3 device index for lookup
+ * output: device index from FIB lookup
+ */
+ __u32 ifindex;
union {
/* inputs to lookup */
diff --git a/net/core/filter.c b/net/core/filter.c
index e7f12e9f598c..0ca6907d7efe 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4073,8 +4073,9 @@ static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
memcpy(params->smac, dev->dev_addr, ETH_ALEN);
params->h_vlan_TCI = 0;
params->h_vlan_proto = 0;
+ params->ifindex = dev->ifindex;
- return dev->ifindex;
+ return 0;
}
#endif
@@ -4098,7 +4099,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
/* verify forwarding is enabled on this interface */
in_dev = __in_dev_get_rcu(dev);
if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev)))
- return 0;
+ return BPF_FIB_LKUP_RET_FWD_DISABLED;
if (flags & BPF_FIB_LOOKUP_OUTPUT) {
fl4.flowi4_iif = 1;
@@ -4123,7 +4124,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
tb = fib_get_table(net, tbid);
if (unlikely(!tb))
- return 0;
+ return BPF_FIB_LKUP_RET_NOT_FWDED;
err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
} else {
@@ -4135,8 +4136,20 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_NOREF);
}
- if (err || res.type != RTN_UNICAST)
- return 0;
+ if (err) {
+ /* map fib lookup errors to RTN_ type */
+ if (err == -EINVAL)
+ return BPF_FIB_LKUP_RET_BLACKHOLE;
+ if (err == -EHOSTUNREACH)
+ return BPF_FIB_LKUP_RET_UNREACHABLE;
+ if (err == -EACCES)
+ return BPF_FIB_LKUP_RET_PROHIBIT;
+
+ return BPF_FIB_LKUP_RET_NOT_FWDED;
+ }
+
+ if (res.type != RTN_UNICAST)
+ return BPF_FIB_LKUP_RET_NOT_FWDED;
if (res.fi->fib_nhs > 1)
fib_select_path(net, &res, &fl4, NULL);
@@ -4144,19 +4157,16 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
if (check_mtu) {
mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst);
if (params->tot_len > mtu)
- return 0;
+ return BPF_FIB_LKUP_RET_FRAG_NEEDED;
}
nh = &res.fi->fib_nh[res.nh_sel];
/* do not handle lwt encaps right now */
if (nh->nh_lwtstate)
- return 0;
+ return BPF_FIB_LKUP_RET_UNSUPP_LWT;
dev = nh->nh_dev;
- if (unlikely(!dev))
- return 0;
-
if (nh->nh_gw)
params->ipv4_dst = nh->nh_gw;
@@ -4166,10 +4176,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
* rcu_read_lock_bh is not needed here
*/
neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)params->ipv4_dst);
- if (neigh)
- return bpf_fib_set_fwd_params(params, neigh, dev);
+ if (!neigh)
+ return BPF_FIB_LKUP_RET_NO_NEIGH;
- return 0;
+ return bpf_fib_set_fwd_params(params, neigh, dev);
}
#endif
@@ -4190,7 +4200,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
/* link local addresses are never forwarded */
if (rt6_need_strict(dst) || rt6_need_strict(src))
- return 0;
+ return BPF_FIB_LKUP_RET_NOT_FWDED;
dev = dev_get_by_index_rcu(net, params->ifindex);
if (unlikely(!dev))
@@ -4198,7 +4208,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
idev = __in6_dev_get_safely(dev);
if (unlikely(!idev || !net->ipv6.devconf_all->forwarding))
- return 0;
+ return BPF_FIB_LKUP_RET_FWD_DISABLED;
if (flags & BPF_FIB_LOOKUP_OUTPUT) {
fl6.flowi6_iif = 1;
@@ -4225,7 +4235,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
tb = ipv6_stub->fib6_get_table(net, tbid);
if (unlikely(!tb))
- return 0;
+ return BPF_FIB_LKUP_RET_NOT_FWDED;
f6i = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, strict);
} else {
@@ -4238,11 +4248,23 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
}
if (unlikely(IS_ERR_OR_NULL(f6i) || f6i == net->ipv6.fib6_null_entry))
- return 0;
+ return BPF_FIB_LKUP_RET_NOT_FWDED;
+
+ if (unlikely(f6i->fib6_flags & RTF_REJECT)) {
+ switch (f6i->fib6_type) {
+ case RTN_BLACKHOLE:
+ return BPF_FIB_LKUP_RET_BLACKHOLE;
+ case RTN_UNREACHABLE:
+ return BPF_FIB_LKUP_RET_UNREACHABLE;
+ case RTN_PROHIBIT:
+ return BPF_FIB_LKUP_RET_PROHIBIT;
+ default:
+ return BPF_FIB_LKUP_RET_NOT_FWDED;
+ }
+ }
- if (unlikely(f6i->fib6_flags & RTF_REJECT ||
- f6i->fib6_type != RTN_UNICAST))
- return 0;
+ if (f6i->fib6_type != RTN_UNICAST)
+ return BPF_FIB_LKUP_RET_NOT_FWDED;
if (f6i->fib6_nsiblings && fl6.flowi6_oif == 0)
f6i = ipv6_stub->fib6_multipath_select(net, f6i, &fl6,
@@ -4252,11 +4274,11 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
if (check_mtu) {
mtu = ipv6_stub->ip6_mtu_from_fib6(f6i, dst, src);
if (params->tot_len > mtu)
- return 0;
+ return BPF_FIB_LKUP_RET_FRAG_NEEDED;
}
if (f6i->fib6_nh.nh_lwtstate)
- return 0;
+ return BPF_FIB_LKUP_RET_UNSUPP_LWT;
if (f6i->fib6_flags & RTF_GATEWAY)
*dst = f6i->fib6_nh.nh_gw;
@@ -4270,10 +4292,10 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
*/
neigh = ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128,
ndisc_hashfn, dst, dev);
- if (neigh)
- return bpf_fib_set_fwd_params(params, neigh, dev);
+ if (!neigh)
+ return BPF_FIB_LKUP_RET_NO_NEIGH;
- return 0;
+ return bpf_fib_set_fwd_params(params, neigh, dev);
}
#endif
@@ -4315,7 +4337,7 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
struct bpf_fib_lookup *, params, int, plen, u32, flags)
{
struct net *net = dev_net(skb->dev);
- int index = -EAFNOSUPPORT;
+ int rc = -EAFNOSUPPORT;
if (plen < sizeof(*params))
return -EINVAL;
@@ -4326,25 +4348,25 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
switch (params->family) {
#if IS_ENABLED(CONFIG_INET)
case AF_INET:
- index = bpf_ipv4_fib_lookup(net, params, flags, false);
+ rc = bpf_ipv4_fib_lookup(net, params, flags, false);
break;
#endif
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
- index = bpf_ipv6_fib_lookup(net, params, flags, false);
+ rc = bpf_ipv6_fib_lookup(net, params, flags, false);
break;
#endif
}
- if (index > 0) {
+ if (!rc) {
struct net_device *dev;
- dev = dev_get_by_index_rcu(net, index);
+ dev = dev_get_by_index_rcu(net, params->ifindex);
if (!is_skb_forwardable(dev, skb))
- index = 0;
+ rc = BPF_FIB_LKUP_RET_FRAG_NEEDED;
}
- return index;
+ return rc;
}
static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
diff --git a/samples/bpf/xdp_fwd_kern.c b/samples/bpf/xdp_fwd_kern.c
index 6673cdb9f55c..a7e94e7ff87d 100644
--- a/samples/bpf/xdp_fwd_kern.c
+++ b/samples/bpf/xdp_fwd_kern.c
@@ -48,9 +48,9 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags)
struct ethhdr *eth = data;
struct ipv6hdr *ip6h;
struct iphdr *iph;
- int out_index;
u16 h_proto;
u64 nh_off;
+ int rc;
nh_off = sizeof(*eth);
if (data + nh_off > data_end)
@@ -101,7 +101,7 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags)
fib_params.ifindex = ctx->ingress_ifindex;
- out_index = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags);
+ rc = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags);
/* verify egress index has xdp support
* TO-DO bpf_map_lookup_elem(&tx_port, &key) fails with
@@ -109,7 +109,7 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags)
* NOTE: without verification that egress index supports XDP
* forwarding packets are dropped.
*/
- if (out_index > 0) {
+ if (rc == 0) {
if (h_proto == htons(ETH_P_IP))
ip_decrease_ttl(iph);
else if (h_proto == htons(ETH_P_IPV6))
@@ -117,7 +117,7 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags)
memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN);
memcpy(eth->h_source, fib_params.smac, ETH_ALEN);
- return bpf_redirect_map(&tx_port, out_index, 0);
+ return bpf_redirect_map(&tx_port, fib_params.ifindex, 0);
}
return XDP_PASS;