summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2021-06-29 01:28:03 +0300
committerDavid S. Miller <davem@davemloft.net>2021-06-29 01:28:03 +0300
commite1289cfb634c19b5755452ba03c82aa76c0cfd7c (patch)
treefbe559fefa4b0141b31dc00bc3fbe962741a19f8 /net/core
parent1fd07f33c3ea2b4aa77426f13e8cb91d4f55af8f (diff)
parenta78cae2476812cecaa4a33d0086bbb53986906bc (diff)
downloadlinux-e1289cfb634c19b5755452ba03c82aa76c0cfd7c.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== pull-request: bpf-next 2021-06-28 The following pull-request contains BPF updates for your *net-next* tree. We've added 37 non-merge commits during the last 12 day(s) which contain a total of 56 files changed, 394 insertions(+), 380 deletions(-). The main changes are: 1) XDP driver RCU cleanups, from Toke Høiland-Jørgensen and Paul E. McKenney. 2) Fix bpf_skb_change_proto() IPv4/v6 GSO handling, from Maciej Żenczykowski. 3) Fix false positive kmemleak report for BPF ringbuf alloc, from Rustam Kovhaev. 4) Fix x86 JIT's extable offset calculation for PROBE_LDX NULL, from Ravi Bangoria. 5) Enable libbpf fallback probing with tracing under RHEL7, from Jonathan Edwards. 6) Clean up x86 JIT to remove unused cnt tracking from EMIT macro, from Jiri Olsa. 7) Netlink cleanups for libbpf to please Coverity, from Kumar Kartikeya Dwivedi. 8) Allow to retrieve ancestor cgroup id in tracing programs, from Namhyung Kim. 9) Fix lirc BPF program query to use user-provided prog_cnt, from Sean Young. 10) Add initial libbpf doc including generated kdoc for its API, from Grant Seltzer. 11) Make xdp_rxq_info_unreg_mem_model() more robust, from Jakub Kicinski. 12) Fix up bpfilter startup log-level to info level, from Gary Lin. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core')
-rw-r--r--net/core/filter.c72
-rw-r--r--net/core/xdp.c11
2 files changed, 43 insertions, 40 deletions
diff --git a/net/core/filter.c b/net/core/filter.c
index 0b13d8157a8f..d22895caa164 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3235,15 +3235,12 @@ static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len)
return ret;
}
-static int bpf_skb_proto_4_to_6(struct sk_buff *skb, u64 flags)
+static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
{
const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
u32 off = skb_mac_header_len(skb);
int ret;
- if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
- return -ENOTSUPP;
-
ret = skb_cow(skb, len_diff);
if (unlikely(ret < 0))
return ret;
@@ -3255,21 +3252,11 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb, u64 flags)
if (skb_is_gso(skb)) {
struct skb_shared_info *shinfo = skb_shinfo(skb);
- /* SKB_GSO_TCPV4 needs to be changed into
- * SKB_GSO_TCPV6.
- */
+ /* SKB_GSO_TCPV4 needs to be changed into SKB_GSO_TCPV6. */
if (shinfo->gso_type & SKB_GSO_TCPV4) {
shinfo->gso_type &= ~SKB_GSO_TCPV4;
shinfo->gso_type |= SKB_GSO_TCPV6;
}
-
- /* Due to IPv6 header, MSS needs to be downgraded. */
- if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
- skb_decrease_gso_size(shinfo, len_diff);
-
- /* Header must be checked, and gso_segs recomputed. */
- shinfo->gso_type |= SKB_GSO_DODGY;
- shinfo->gso_segs = 0;
}
skb->protocol = htons(ETH_P_IPV6);
@@ -3278,15 +3265,12 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb, u64 flags)
return 0;
}
-static int bpf_skb_proto_6_to_4(struct sk_buff *skb, u64 flags)
+static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
{
const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
u32 off = skb_mac_header_len(skb);
int ret;
- if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
- return -ENOTSUPP;
-
ret = skb_unclone(skb, GFP_ATOMIC);
if (unlikely(ret < 0))
return ret;
@@ -3298,21 +3282,11 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb, u64 flags)
if (skb_is_gso(skb)) {
struct skb_shared_info *shinfo = skb_shinfo(skb);
- /* SKB_GSO_TCPV6 needs to be changed into
- * SKB_GSO_TCPV4.
- */
+ /* SKB_GSO_TCPV6 needs to be changed into SKB_GSO_TCPV4. */
if (shinfo->gso_type & SKB_GSO_TCPV6) {
shinfo->gso_type &= ~SKB_GSO_TCPV6;
shinfo->gso_type |= SKB_GSO_TCPV4;
}
-
- /* Due to IPv4 header, MSS can be upgraded. */
- if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
- skb_increase_gso_size(shinfo, len_diff);
-
- /* Header must be checked, and gso_segs recomputed. */
- shinfo->gso_type |= SKB_GSO_DODGY;
- shinfo->gso_segs = 0;
}
skb->protocol = htons(ETH_P_IP);
@@ -3321,17 +3295,17 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb, u64 flags)
return 0;
}
-static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto, u64 flags)
+static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto)
{
__be16 from_proto = skb->protocol;
if (from_proto == htons(ETH_P_IP) &&
to_proto == htons(ETH_P_IPV6))
- return bpf_skb_proto_4_to_6(skb, flags);
+ return bpf_skb_proto_4_to_6(skb);
if (from_proto == htons(ETH_P_IPV6) &&
to_proto == htons(ETH_P_IP))
- return bpf_skb_proto_6_to_4(skb, flags);
+ return bpf_skb_proto_6_to_4(skb);
return -ENOTSUPP;
}
@@ -3341,7 +3315,7 @@ BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
{
int ret;
- if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO)))
+ if (unlikely(flags))
return -EINVAL;
/* General idea is that this helper does the basic groundwork
@@ -3361,7 +3335,7 @@ BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
* that. For offloads, we mark packet as dodgy, so that headers
* need to be verified first.
*/
- ret = bpf_skb_proto_xlat(skb, proto, flags);
+ ret = bpf_skb_proto_xlat(skb, proto);
bpf_compute_data_pointers(skb);
return ret;
}
@@ -3923,6 +3897,34 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
.arg2_type = ARG_ANYTHING,
};
+/* XDP_REDIRECT works by a three-step process, implemented in the functions
+ * below:
+ *
+ * 1. The bpf_redirect() and bpf_redirect_map() helpers will lookup the target
+ * of the redirect and store it (along with some other metadata) in a per-CPU
+ * struct bpf_redirect_info.
+ *
+ * 2. When the program returns the XDP_REDIRECT return code, the driver will
+ * call xdp_do_redirect() which will use the information in struct
+ * bpf_redirect_info to actually enqueue the frame into a map type-specific
+ * bulk queue structure.
+ *
+ * 3. Before exiting its NAPI poll loop, the driver will call xdp_do_flush(),
+ * which will flush all the different bulk queues, thus completing the
+ * redirect.
+ *
+ * Pointers to the map entries will be kept around for this whole sequence of
+ * steps, protected by RCU. However, there is no top-level rcu_read_lock() in
+ * the core code; instead, the RCU protection relies on everything happening
+ * inside a single NAPI poll sequence, which means it's between a pair of calls
+ * to local_bh_disable()/local_bh_enable().
+ *
+ * The map entries are marked as __rcu and the map code makes sure to
+ * dereference those pointers with rcu_dereference_check() in a way that works
+ * for both sections that to hold an rcu_read_lock() and sections that are
+ * called from NAPI without a separate rcu_read_lock(). The code below does not
+ * use RCU annotations, but relies on those in the map code.
+ */
void xdp_do_flush(void)
{
__dev_flush();
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 725d20f1b100..cc92ccb38432 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -113,8 +113,13 @@ static void mem_allocator_disconnect(void *allocator)
void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
{
struct xdp_mem_allocator *xa;
+ int type = xdp_rxq->mem.type;
int id = xdp_rxq->mem.id;
+ /* Reset mem info to defaults */
+ xdp_rxq->mem.id = 0;
+ xdp_rxq->mem.type = 0;
+
if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
WARN(1, "Missing register, driver bug");
return;
@@ -123,7 +128,7 @@ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
if (id == 0)
return;
- if (xdp_rxq->mem.type == MEM_TYPE_PAGE_POOL) {
+ if (type == MEM_TYPE_PAGE_POOL) {
rcu_read_lock();
xa = rhashtable_lookup(mem_id_ht, &id, mem_id_rht_params);
page_pool_destroy(xa->page_pool);
@@ -144,10 +149,6 @@ void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
xdp_rxq->reg_state = REG_STATE_UNREGISTERED;
xdp_rxq->dev = NULL;
-
- /* Reset mem info to defaults */
- xdp_rxq->mem.id = 0;
- xdp_rxq->mem.type = 0;
}
EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg);