summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/net/veth.c6
-rw-r--r--drivers/net/virtio_net.c2
-rw-r--r--include/linux/bpf.h12
-rw-r--r--kernel/bpf/btf.c16
-rw-r--r--kernel/bpf/devmap.c29
-rw-r--r--kernel/bpf/dispatcher.c4
-rw-r--r--kernel/bpf/inode.c3
-rw-r--r--kernel/bpf/trampoline.c80
-rw-r--r--kernel/extable.c7
-rw-r--r--net/core/flow_dissector.c11
-rw-r--r--tools/bpf/bpftool/btf.c27
-rw-r--r--tools/bpf/runqslower/Makefile6
-rw-r--r--tools/lib/bpf/libbpf.c99
-rw-r--r--tools/lib/bpf/libbpf.h6
-rw-r--r--tools/testing/selftests/bpf/Makefile11
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_test.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_test.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/select_reuseport.c44
-rw-r--r--tools/testing/selftests/bpf/prog_tests/trampoline_count.c112
-rw-r--r--tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_trampoline_count.c21
-rwxr-xr-xtools/testing/selftests/bpf/test_flow_dissector.sh14
-rw-r--r--tools/testing/selftests/bpf/test_sockmap.c15
24 files changed, 433 insertions, 104 deletions
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 1c89017beebb..8cdc4415fa70 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -377,6 +377,7 @@ static int veth_xdp_xmit(struct net_device *dev, int n,
unsigned int max_len;
struct veth_rq *rq;
+ rcu_read_lock();
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
ret = -EINVAL;
goto drop;
@@ -418,11 +419,14 @@ static int veth_xdp_xmit(struct net_device *dev, int n,
if (flags & XDP_XMIT_FLUSH)
__veth_xdp_flush(rq);
- if (likely(!drops))
+ if (likely(!drops)) {
+ rcu_read_unlock();
return n;
+ }
ret = n - drops;
drop:
+ rcu_read_unlock();
atomic64_add(drops, &priv->dropped);
return ret;
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index c458cd313281..2fe7a3188282 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -501,7 +501,7 @@ static int virtnet_xdp_xmit(struct net_device *dev,
/* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
* indicate XDP resources have been successfully allocated.
*/
- xdp_prog = rcu_dereference(rq->xdp_prog);
+ xdp_prog = rcu_access_pointer(rq->xdp_prog);
if (!xdp_prog)
return -ENXIO;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a9687861fd7e..8e9ad3943cd9 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -525,7 +525,6 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key);
int bpf_trampoline_link_prog(struct bpf_prog *prog);
int bpf_trampoline_unlink_prog(struct bpf_prog *prog);
void bpf_trampoline_put(struct bpf_trampoline *tr);
-void *bpf_jit_alloc_exec_page(void);
#define BPF_DISPATCHER_INIT(name) { \
.mutex = __MUTEX_INITIALIZER(name.mutex), \
.func = &name##func, \
@@ -557,6 +556,13 @@ void *bpf_jit_alloc_exec_page(void);
#define BPF_DISPATCHER_PTR(name) (&name)
void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
struct bpf_prog *to);
+struct bpf_image {
+ struct latch_tree_node tnode;
+ unsigned char data[];
+};
+#define BPF_IMAGE_SIZE (PAGE_SIZE - sizeof(struct bpf_image))
+bool is_bpf_image_address(unsigned long address);
+void *bpf_image_alloc(void);
#else
static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
{
@@ -578,6 +584,10 @@ static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {}
static inline void bpf_dispatcher_change_prog(struct bpf_dispatcher *d,
struct bpf_prog *from,
struct bpf_prog *to) {}
+static inline bool is_bpf_image_address(unsigned long address)
+{
+ return false;
+}
#endif
struct bpf_func_info_aux {
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 32963b6d5a9c..b7c1660fb594 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3669,6 +3669,19 @@ struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog)
}
}
+static bool is_string_ptr(struct btf *btf, const struct btf_type *t)
+{
+ /* t comes in already as a pointer */
+ t = btf_type_by_id(btf, t->type);
+
+ /* allow const */
+ if (BTF_INFO_KIND(t->info) == BTF_KIND_CONST)
+ t = btf_type_by_id(btf, t->type);
+
+ /* char, signed char, unsigned char */
+ return btf_type_is_int(t) && t->size == 1;
+}
+
bool btf_ctx_access(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
@@ -3735,6 +3748,9 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
*/
return true;
+ if (is_string_ptr(btf, t))
+ return true;
+
/* this is a pointer to another type */
info->reg_type = PTR_TO_BTF_ID;
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index de630f980282..58bdca5d978a 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -190,10 +190,12 @@ static void dev_map_free(struct bpf_map *map)
/* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
* so the programs (can be more than one that used this map) were
- * disconnected from events. Wait for outstanding critical sections in
- * these programs to complete. The rcu critical section only guarantees
- * no further reads against netdev_map. It does __not__ ensure pending
- * flush operations (if any) are complete.
+ * disconnected from events. The following synchronize_rcu() guarantees
+ * both rcu read critical sections complete and waits for
+ * preempt-disable regions (NAPI being the relevant context here) so we
+ * are certain there will be no further reads against the netdev_map and
+ * all flush operations are complete. Flush operations can only be done
+ * from NAPI context for this reason.
*/
spin_lock(&dev_map_lock);
@@ -263,7 +265,8 @@ struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key)
struct hlist_head *head = dev_map_index_hash(dtab, key);
struct bpf_dtab_netdev *dev;
- hlist_for_each_entry_rcu(dev, head, index_hlist)
+ hlist_for_each_entry_rcu(dev, head, index_hlist,
+ lockdep_is_held(&dtab->index_lock))
if (dev->idx == key)
return dev;
@@ -363,16 +366,17 @@ error:
* from NET_RX_SOFTIRQ. Either way the poll routine must complete before the
* net device can be torn down. On devmap tear down we ensure the flush list
* is empty before completing to ensure all flush operations have completed.
+ * When drivers update the bpf program they may need to ensure any flush ops
+ * are also complete. Using synchronize_rcu or call_rcu will suffice for this
+ * because both wait for napi context to exit.
*/
void __dev_flush(void)
{
struct list_head *flush_list = this_cpu_ptr(&dev_flush_list);
struct xdp_dev_bulk_queue *bq, *tmp;
- rcu_read_lock();
list_for_each_entry_safe(bq, tmp, flush_list, flush_node)
bq_xmit_all(bq, XDP_XMIT_FLUSH);
- rcu_read_unlock();
}
/* rcu_read_lock (from syscall and BPF contexts) ensures that if a delete and/or
@@ -502,12 +506,11 @@ static int dev_map_delete_elem(struct bpf_map *map, void *key)
return -EINVAL;
/* Use call_rcu() here to ensure any rcu critical sections have
- * completed, but this does not guarantee a flush has happened
- * yet. Because driver side rcu_read_lock/unlock only protects the
- * running XDP program. However, for pending flush operations the
- * dev and ctx are stored in another per cpu map. And additionally,
- * the driver tear down ensures all soft irqs are complete before
- * removing the net device in the case of dev_put equals zero.
+ * completed as well as any flush operations because call_rcu
+ * will wait for preempt-disable region to complete, NAPI in this
+ * context. And additionally, the driver tear down ensures all
+ * soft irqs are complete before removing the net device in the
+ * case of dev_put equals zero.
*/
old_dev = xchg(&dtab->netdev_map[k], NULL);
if (old_dev)
diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c
index 204ee61a3904..b3e5b214fed8 100644
--- a/kernel/bpf/dispatcher.c
+++ b/kernel/bpf/dispatcher.c
@@ -113,7 +113,7 @@ static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs)
noff = 0;
} else {
old = d->image + d->image_off;
- noff = d->image_off ^ (PAGE_SIZE / 2);
+ noff = d->image_off ^ (BPF_IMAGE_SIZE / 2);
}
new = d->num_progs ? d->image + noff : NULL;
@@ -140,7 +140,7 @@ void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
mutex_lock(&d->mutex);
if (!d->image) {
- d->image = bpf_jit_alloc_exec_page();
+ d->image = bpf_image_alloc();
if (!d->image)
goto out;
}
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index e11059b99f18..bd2fd8eab470 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -196,6 +196,7 @@ static void *map_seq_next(struct seq_file *m, void *v, loff_t *pos)
void *key = map_iter(m)->key;
void *prev_key;
+ (*pos)++;
if (map_iter(m)->done)
return NULL;
@@ -208,8 +209,6 @@ static void *map_seq_next(struct seq_file *m, void *v, loff_t *pos)
map_iter(m)->done = true;
return NULL;
}
-
- ++(*pos);
return key;
}
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index eb64c245052b..6b264a92064b 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -4,6 +4,7 @@
#include <linux/bpf.h>
#include <linux/filter.h>
#include <linux/ftrace.h>
+#include <linux/rbtree_latch.h>
/* dummy _ops. The verifier will operate on target program's ops. */
const struct bpf_verifier_ops bpf_extension_verifier_ops = {
@@ -16,11 +17,12 @@ const struct bpf_prog_ops bpf_extension_prog_ops = {
#define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)
static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
+static struct latch_tree_root image_tree __cacheline_aligned;
-/* serializes access to trampoline_table */
+/* serializes access to trampoline_table and image_tree */
static DEFINE_MUTEX(trampoline_mutex);
-void *bpf_jit_alloc_exec_page(void)
+static void *bpf_jit_alloc_exec_page(void)
{
void *image;
@@ -36,6 +38,64 @@ void *bpf_jit_alloc_exec_page(void)
return image;
}
+static __always_inline bool image_tree_less(struct latch_tree_node *a,
+ struct latch_tree_node *b)
+{
+ struct bpf_image *ia = container_of(a, struct bpf_image, tnode);
+ struct bpf_image *ib = container_of(b, struct bpf_image, tnode);
+
+ return ia < ib;
+}
+
+static __always_inline int image_tree_comp(void *addr, struct latch_tree_node *n)
+{
+ void *image = container_of(n, struct bpf_image, tnode);
+
+ if (addr < image)
+ return -1;
+ if (addr >= image + PAGE_SIZE)
+ return 1;
+
+ return 0;
+}
+
+static const struct latch_tree_ops image_tree_ops = {
+ .less = image_tree_less,
+ .comp = image_tree_comp,
+};
+
+static void *__bpf_image_alloc(bool lock)
+{
+ struct bpf_image *image;
+
+ image = bpf_jit_alloc_exec_page();
+ if (!image)
+ return NULL;
+
+ if (lock)
+ mutex_lock(&trampoline_mutex);
+ latch_tree_insert(&image->tnode, &image_tree, &image_tree_ops);
+ if (lock)
+ mutex_unlock(&trampoline_mutex);
+ return image->data;
+}
+
+void *bpf_image_alloc(void)
+{
+ return __bpf_image_alloc(true);
+}
+
+bool is_bpf_image_address(unsigned long addr)
+{
+ bool ret;
+
+ rcu_read_lock();
+ ret = latch_tree_find((void *) addr, &image_tree, &image_tree_ops) != NULL;
+ rcu_read_unlock();
+
+ return ret;
+}
+
struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
{
struct bpf_trampoline *tr;
@@ -56,7 +116,7 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
goto out;
/* is_root was checked earlier. No need for bpf_jit_charge_modmem() */
- image = bpf_jit_alloc_exec_page();
+ image = __bpf_image_alloc(false);
if (!image) {
kfree(tr);
tr = NULL;
@@ -131,14 +191,14 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
}
/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
- * bytes on x86. Pick a number to fit into PAGE_SIZE / 2
+ * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2
*/
#define BPF_MAX_TRAMP_PROGS 40
static int bpf_trampoline_update(struct bpf_trampoline *tr)
{
- void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2;
- void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2;
+ void *old_image = tr->image + ((tr->selector + 1) & 1) * BPF_IMAGE_SIZE/2;
+ void *new_image = tr->image + (tr->selector & 1) * BPF_IMAGE_SIZE/2;
struct bpf_prog *progs_to_run[BPF_MAX_TRAMP_PROGS];
int fentry_cnt = tr->progs_cnt[BPF_TRAMP_FENTRY];
int fexit_cnt = tr->progs_cnt[BPF_TRAMP_FEXIT];
@@ -174,7 +234,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
*/
synchronize_rcu_tasks();
- err = arch_prepare_bpf_trampoline(new_image, new_image + PAGE_SIZE / 2,
+ err = arch_prepare_bpf_trampoline(new_image, new_image + BPF_IMAGE_SIZE / 2,
&tr->func.model, flags,
fentry, fentry_cnt,
fexit, fexit_cnt,
@@ -284,6 +344,8 @@ out:
void bpf_trampoline_put(struct bpf_trampoline *tr)
{
+ struct bpf_image *image;
+
if (!tr)
return;
mutex_lock(&trampoline_mutex);
@@ -294,9 +356,11 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
goto out;
if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT])))
goto out;
+ image = container_of(tr->image, struct bpf_image, data);
+ latch_tree_erase(&image->tnode, &image_tree, &image_tree_ops);
/* wait for tasks to get out of trampoline before freeing it */
synchronize_rcu_tasks();
- bpf_jit_free_exec(tr->image);
+ bpf_jit_free_exec(image);
hlist_del(&tr->hlist);
kfree(tr);
out:
diff --git a/kernel/extable.c b/kernel/extable.c
index f6920a11e28a..a0024f27d3a1 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -131,8 +131,9 @@ int kernel_text_address(unsigned long addr)
* triggers a stack trace, or a WARN() that happens during
* coming back from idle, or cpu on or offlining.
*
- * is_module_text_address() as well as the kprobe slots
- * and is_bpf_text_address() require RCU to be watching.
+ * is_module_text_address() as well as the kprobe slots,
+ * is_bpf_text_address() and is_bpf_image_address require
+ * RCU to be watching.
*/
no_rcu = !rcu_is_watching();
@@ -148,6 +149,8 @@ int kernel_text_address(unsigned long addr)
goto out;
if (is_bpf_text_address(addr))
goto out;
+ if (is_bpf_image_address(addr))
+ goto out;
ret = 0;
out:
if (no_rcu)
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index f560b4902060..a1670dff0629 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -834,10 +834,10 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
struct flow_dissector *flow_dissector,
void *target_container)
{
+ struct flow_dissector_key_ports *key_ports = NULL;
struct flow_dissector_key_control *key_control;
struct flow_dissector_key_basic *key_basic;
struct flow_dissector_key_addrs *key_addrs;
- struct flow_dissector_key_ports *key_ports;
struct flow_dissector_key_tags *key_tags;
key_control = skb_flow_dissector_target(flow_dissector,
@@ -876,10 +876,17 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
}
- if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) {
+ if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS))
key_ports = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_PORTS,
target_container);
+ else if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_PORTS_RANGE))
+ key_ports = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_PORTS_RANGE,
+ target_container);
+
+ if (key_ports) {
key_ports->src = flow_keys->sport;
key_ports->dst = flow_keys->dport;
}
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 4ba90d81b6a1..b3745ed711ba 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -77,6 +77,20 @@ static const char *btf_var_linkage_str(__u32 linkage)
}
}
+static const char *btf_func_linkage_str(const struct btf_type *t)
+{
+ switch (btf_vlen(t)) {
+ case BTF_FUNC_STATIC:
+ return "static";
+ case BTF_FUNC_GLOBAL:
+ return "global";
+ case BTF_FUNC_EXTERN:
+ return "extern";
+ default:
+ return "(unknown)";
+ }
+}
+
static const char *btf_str(const struct btf *btf, __u32 off)
{
if (!off)
@@ -231,12 +245,17 @@ static int dump_btf_type(const struct btf *btf, __u32 id,
printf(" fwd_kind=%s", fwd_kind);
break;
}
- case BTF_KIND_FUNC:
- if (json_output)
+ case BTF_KIND_FUNC: {
+ const char *linkage = btf_func_linkage_str(t);
+
+ if (json_output) {
jsonw_uint_field(w, "type_id", t->type);
- else
- printf(" type_id=%u", t->type);
+ jsonw_string_field(w, "linkage", linkage);
+ } else {
+ printf(" type_id=%u linkage=%s", t->type, linkage);
+ }
break;
+ }
case BTF_KIND_FUNC_PROTO: {
const struct btf_param *p = (const void *)(t + 1);
__u16 vlen = BTF_INFO_VLEN(t->info);
diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile
index faf5418609ea..0c021352beed 100644
--- a/tools/bpf/runqslower/Makefile
+++ b/tools/bpf/runqslower/Makefile
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
OUTPUT := .output
-CLANG := clang
-LLC := llc
-LLVM_STRIP := llvm-strip
+CLANG ?= clang
+LLC ?= llc
+LLVM_STRIP ?= llvm-strip
DEFAULT_BPFTOOL := $(OUTPUT)/sbin/bpftool
BPFTOOL ?= $(DEFAULT_BPFTOOL)
LIBBPF_SRC := $(abspath ../../lib/bpf)
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index ae34b681ae82..514b1a524abb 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -345,7 +345,6 @@ struct bpf_object {
bool loaded;
bool has_pseudo_calls;
- bool relaxed_core_relocs;
/*
* Information when doing elf related work. Only valid if fd
@@ -3870,7 +3869,9 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
if (strncmp(local_name, targ_name, local_essent_len) == 0) {
pr_debug("[%d] %s: found candidate [%d] %s\n",
local_type_id, local_name, i, targ_name);
- new_ids = realloc(cand_ids->data, cand_ids->len + 1);
+ new_ids = reallocarray(cand_ids->data,
+ cand_ids->len + 1,
+ sizeof(*cand_ids->data));
if (!new_ids) {
err = -ENOMEM;
goto err_out;
@@ -4238,25 +4239,38 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog,
*/
static int bpf_core_reloc_insn(struct bpf_program *prog,
const struct bpf_field_reloc *relo,
+ int relo_idx,
const struct bpf_core_spec *local_spec,
const struct bpf_core_spec *targ_spec)
{
- bool failed = false, validate = true;
__u32 orig_val, new_val;
struct bpf_insn *insn;
+ bool validate = true;
int insn_idx, err;
__u8 class;
if (relo->insn_off % sizeof(struct bpf_insn))
return -EINVAL;
insn_idx = relo->insn_off / sizeof(struct bpf_insn);
+ insn = &prog->insns[insn_idx];
+ class = BPF_CLASS(insn->code);
if (relo->kind == BPF_FIELD_EXISTS) {
orig_val = 1; /* can't generate EXISTS relo w/o local field */
new_val = targ_spec ? 1 : 0;
} else if (!targ_spec) {
- failed = true;
- new_val = (__u32)-1;
+ pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
+ bpf_program__title(prog, false), relo_idx, insn_idx);
+ insn->code = BPF_JMP | BPF_CALL;
+ insn->dst_reg = 0;
+ insn->src_reg = 0;
+ insn->off = 0;
+ /* if this instruction is reachable (not a dead code),
+ * verifier will complain with the following message:
+ * invalid func unknown#195896080
+ */
+ insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
+ return 0;
} else {
err = bpf_core_calc_field_relo(prog, relo, local_spec,
&orig_val, &validate);
@@ -4268,50 +4282,47 @@ static int bpf_core_reloc_insn(struct bpf_program *prog,
return err;
}
- insn = &prog->insns[insn_idx];
- class = BPF_CLASS(insn->code);
-
switch (class) {
case BPF_ALU:
case BPF_ALU64:
if (BPF_SRC(insn->code) != BPF_K)
return -EINVAL;
- if (!failed && validate && insn->imm != orig_val) {
- pr_warn("prog '%s': unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
- bpf_program__title(prog, false), insn_idx,
- insn->imm, orig_val, new_val);
+ if (validate && insn->imm != orig_val) {
+ pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
+ bpf_program__title(prog, false), relo_idx,
+ insn_idx, insn->imm, orig_val, new_val);
return -EINVAL;
}
orig_val = insn->imm;
insn->imm = new_val;
- pr_debug("prog '%s': patched insn #%d (ALU/ALU64)%s imm %u -> %u\n",
- bpf_program__title(prog, false), insn_idx,
- failed ? " w/ failed reloc" : "", orig_val, new_val);
+ pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n",
+ bpf_program__title(prog, false), relo_idx, insn_idx,
+ orig_val, new_val);
break;
case BPF_LDX:
case BPF_ST:
case BPF_STX:
- if (!failed && validate && insn->off != orig_val) {
- pr_warn("prog '%s': unexpected insn #%d (LD/LDX/ST/STX) value: got %u, exp %u -> %u\n",
- bpf_program__title(prog, false), insn_idx,
- insn->off, orig_val, new_val);
+ if (validate && insn->off != orig_val) {
+ pr_warn("prog '%s': relo #%d: unexpected insn #%d (LD/LDX/ST/STX) value: got %u, exp %u -> %u\n",
+ bpf_program__title(prog, false), relo_idx,
+ insn_idx, insn->off, orig_val, new_val);
return -EINVAL;
}
if (new_val > SHRT_MAX) {
- pr_warn("prog '%s': insn #%d (LD/LDX/ST/STX) value too big: %u\n",
- bpf_program__title(prog, false), insn_idx,
- new_val);
+ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n",
+ bpf_program__title(prog, false), relo_idx,
+ insn_idx, new_val);
return -ERANGE;
}
orig_val = insn->off;
insn->off = new_val;
- pr_debug("prog '%s': patched insn #%d (LD/LDX/ST/STX)%s off %u -> %u\n",
- bpf_program__title(prog, false), insn_idx,
- failed ? " w/ failed reloc" : "", orig_val, new_val);
+ pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n",
+ bpf_program__title(prog, false), relo_idx, insn_idx,
+ orig_val, new_val);
break;
default:
- pr_warn("prog '%s': trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n",
- bpf_program__title(prog, false),
+ pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n",
+ bpf_program__title(prog, false), relo_idx,
insn_idx, insn->code, insn->src_reg, insn->dst_reg,
insn->off, insn->imm);
return -EINVAL;
@@ -4510,24 +4521,33 @@ static int bpf_core_reloc_field(struct bpf_program *prog,
}
/*
- * For BPF_FIELD_EXISTS relo or when relaxed CO-RE reloc mode is
- * requested, it's expected that we might not find any candidates.
- * In this case, if field wasn't found in any candidate, the list of
- * candidates shouldn't change at all, we'll just handle relocating
- * appropriately, depending on relo's kind.
+ * For BPF_FIELD_EXISTS relo or when used BPF program has field
+ * existence checks or kernel version/config checks, it's expected
+ * that we might not find any candidates. In this case, if field
+ * wasn't found in any candidate, the list of candidates shouldn't
+ * change at all, we'll just handle relocating appropriately,
+ * depending on relo's kind.
*/
if (j > 0)
cand_ids->len = j;
- if (j == 0 && !prog->obj->relaxed_core_relocs &&
- relo->kind != BPF_FIELD_EXISTS) {
- pr_warn("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n",
- prog_name, relo_idx, local_id, local_name, spec_str);
- return -ESRCH;
- }
+ /*
+ * If no candidates were found, it might be both a programmer error,
+ * as well as expected case, depending whether instruction w/
+ * relocation is guarded in some way that makes it unreachable (dead
+ * code) if relocation can't be resolved. This is handled in
+ * bpf_core_reloc_insn() uniformly by replacing that instruction with
+ * BPF helper call insn (using invalid helper ID). If that instruction
+ * is indeed unreachable, then it will be ignored and eliminated by
+ * verifier. If it was an error, then verifier will complain and point
+ * to a specific instruction number in its log.
+ */
+ if (j == 0)
+ pr_debug("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n",
+ prog_name, relo_idx, local_id, local_name, spec_str);
/* bpf_core_reloc_insn should know how to handle missing targ_spec */
- err = bpf_core_reloc_insn(prog, relo, &local_spec,
+ err = bpf_core_reloc_insn(prog, relo, relo_idx, &local_spec,
j ? &targ_spec : NULL);
if (err) {
pr_warn("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n",
@@ -5057,7 +5077,6 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
if (IS_ERR(obj))
return obj;
- obj->relaxed_core_relocs = OPTS_GET(opts, relaxed_core_relocs, false);
kconfig = OPTS_GET(opts, kconfig, NULL);
if (kconfig) {
obj->kconfig = strdup(kconfig);
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 2a5e3b087002..3fe12c9d1f92 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -77,7 +77,11 @@ struct bpf_object_open_opts {
const char *object_name;
/* parse map definitions non-strictly, allowing extra attributes/data */
bool relaxed_maps;
- /* process CO-RE relocations non-strictly, allowing them to fail */
+ /* DEPRECATED: handle CO-RE relocations non-strictly, allowing failures.
+ * Value is ignored. Relocations always are processed non-strictly.
+ * Non-relocatable instructions are replaced with invalid ones to
+ * prevent accidental errors.
+ * */
bool relaxed_core_relocs;
/* maps that set the 'pinning' attribute in their definition will have
* their pin_path attribute set to a file in this directory, and be
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 5f41f5bd8033..257a1aaaa37d 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -155,16 +155,17 @@ $(OUTPUT)/test_sysctl: cgroup_helpers.c
DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool
BPFTOOL ?= $(DEFAULT_BPFTOOL)
-$(DEFAULT_BPFTOOL): $(BPFOBJ) | $(BUILD_DIR)/bpftool
- $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \
- OUTPUT=$(BUILD_DIR)/bpftool/ \
+$(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
+ $(BPFOBJ) | $(BUILD_DIR)/bpftool
+ $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \
+ OUTPUT=$(BUILD_DIR)/bpftool/ \
prefix= DESTDIR=$(SCRATCH_DIR)/ install
-$(BPFOBJ): $(wildcard $(BPFDIR)/*.c $(BPFDIR)/*.h $(BPFDIR)/Makefile) \
+$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
../../../include/uapi/linux/bpf.h \
| $(INCLUDE_DIR) $(BUILD_DIR)/libbpf
$(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \
- DESTDIR=$(SCRATCH_DIR) prefix= all install_headers
+ DESTDIR=$(SCRATCH_DIR) prefix= all install_headers
$(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(INCLUDE_DIR):
$(call msg,MKDIR,,$@)
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
index e1a379f5f7d2..5cc06021f27d 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
@@ -9,7 +9,7 @@ void test_fentry_test(void)
struct test_pkt_access *pkt_skel = NULL;
struct fentry_test *fentry_skel = NULL;
int err, pkt_fd, i;
- __u32 duration, retval;
+ __u32 duration = 0, retval;
__u64 *result;
pkt_skel = test_pkt_access__open_and_load();
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index db5c74d2ce6d..cde463af7071 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -11,7 +11,7 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
int err, pkt_fd, i;
struct bpf_link **link = NULL;
struct bpf_program **prog = NULL;
- __u32 duration, retval;
+ __u32 duration = 0, retval;
struct bpf_map *data_map;
const int zero = 0;
u64 *result = NULL;
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
index f99013222c74..d2c3655dd7a3 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
@@ -13,7 +13,7 @@ void test_fexit_test(void)
int err, pkt_fd, kfree_skb_fd, i;
struct bpf_link *link[6] = {};
struct bpf_program *prog[6];
- __u32 duration, retval;
+ __u32 duration = 0, retval;
struct bpf_map *data_map;
const int zero = 0;
u64 result[6];
diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
index 2c37ae7dc214..098bcae5f827 100644
--- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
+++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
@@ -33,7 +33,7 @@
#define REUSEPORT_ARRAY_SIZE 32
static int result_map, tmp_index_ovr_map, linum_map, data_check_map;
-static enum result expected_results[NR_RESULTS];
+static __u32 expected_results[NR_RESULTS];
static int sk_fds[REUSEPORT_ARRAY_SIZE];
static int reuseport_array = -1, outer_map = -1;
static int select_by_skb_data_prog;
@@ -316,6 +316,26 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd,
expected.len, result.len, get_linum());
}
+static const char *result_to_str(enum result res)
+{
+ switch (res) {
+ case DROP_ERR_INNER_MAP:
+ return "DROP_ERR_INNER_MAP";
+ case DROP_ERR_SKB_DATA:
+ return "DROP_ERR_SKB_DATA";
+ case DROP_ERR_SK_SELECT_REUSEPORT:
+ return "DROP_ERR_SK_SELECT_REUSEPORT";
+ case DROP_MISC:
+ return "DROP_MISC";
+ case PASS:
+ return "PASS";
+ case PASS_ERR_SK_SELECT_REUSEPORT:
+ return "PASS_ERR_SK_SELECT_REUSEPORT";
+ default:
+ return "UNKNOWN";
+ }
+}
+
static void check_results(void)
{
__u32 results[NR_RESULTS];
@@ -351,10 +371,10 @@ static void check_results(void)
printf(", %u", expected_results[i]);
printf("]\n");
- RET_IF(expected_results[broken] != results[broken],
- "unexpected result",
- "expected_results[%u] != results[%u] bpf_prog_linum:%ld\n",
- broken, broken, get_linum());
+ printf("mismatch on %s (bpf_prog_linum:%ld)\n", result_to_str(broken),
+ get_linum());
+
+ CHECK_FAIL(true);
}
static int send_data(int type, sa_family_t family, void *data, size_t len,
@@ -677,7 +697,19 @@ static void setup_per_test(int type, sa_family_t family, bool inany,
static void cleanup_per_test(bool no_inner_map)
{
- int i, err;
+ int i, err, zero = 0;
+
+ memset(expected_results, 0, sizeof(expected_results));
+
+ for (i = 0; i < NR_RESULTS; i++) {
+ err = bpf_map_update_elem(result_map, &i, &zero, BPF_ANY);
+ RET_IF(err, "reset elem in result_map",
+ "i:%u err:%d errno:%d\n", i, err, errno);
+ }
+
+ err = bpf_map_update_elem(linum_map, &zero, &zero, BPF_ANY);
+ RET_IF(err, "reset line number in linum_map", "err:%d errno:%d\n",
+ err, errno);
for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++)
close(sk_fds[i]);
diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
new file mode 100644
index 000000000000..1235f3d1cc50
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE
+#include <sched.h>
+#include <sys/prctl.h>
+#include <test_progs.h>
+
+#define MAX_TRAMP_PROGS 40
+
+struct inst {
+ struct bpf_object *obj;
+ struct bpf_link *link_fentry;
+ struct bpf_link *link_fexit;
+};
+
+static int test_task_rename(void)
+{
+ int fd, duration = 0, err;
+ char buf[] = "test_overhead";
+
+ fd = open("/proc/self/comm", O_WRONLY|O_TRUNC);
+ if (CHECK(fd < 0, "open /proc", "err %d", errno))
+ return -1;
+ err = write(fd, buf, sizeof(buf));
+ if (err < 0) {
+ CHECK(err < 0, "task rename", "err %d", errno);
+ close(fd);
+ return -1;
+ }
+ close(fd);
+ return 0;
+}
+
+static struct bpf_link *load(struct bpf_object *obj, const char *name)
+{
+ struct bpf_program *prog;
+ int duration = 0;
+
+ prog = bpf_object__find_program_by_title(obj, name);
+ if (CHECK(!prog, "find_probe", "prog '%s' not found\n", name))
+ return ERR_PTR(-EINVAL);
+ return bpf_program__attach_trace(prog);
+}
+
+void test_trampoline_count(void)
+{
+ const char *fentry_name = "fentry/__set_task_comm";
+ const char *fexit_name = "fexit/__set_task_comm";
+ const char *object = "test_trampoline_count.o";
+ struct inst inst[MAX_TRAMP_PROGS] = { 0 };
+ int err, i = 0, duration = 0;
+ struct bpf_object *obj;
+ struct bpf_link *link;
+ char comm[16] = {};
+
+ /* attach 'allowed' 40 trampoline programs */
+ for (i = 0; i < MAX_TRAMP_PROGS; i++) {
+ obj = bpf_object__open_file(object, NULL);
+ if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
+ goto cleanup;
+
+ err = bpf_object__load(obj);
+ if (CHECK(err, "obj_load", "err %d\n", err))
+ goto cleanup;
+ inst[i].obj = obj;
+
+ if (rand() % 2) {
+ link = load(obj, fentry_name);
+ if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link)))
+ goto cleanup;
+ inst[i].link_fentry = link;
+ } else {
+ link = load(obj, fexit_name);
+ if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link)))
+ goto cleanup;
+ inst[i].link_fexit = link;
+ }
+ }
+
+ /* and try 1 extra.. */
+ obj = bpf_object__open_file(object, NULL);
+ if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
+ goto cleanup;
+
+ err = bpf_object__load(obj);
+ if (CHECK(err, "obj_load", "err %d\n", err))
+ goto cleanup_extra;
+
+ /* ..that needs to fail */
+ link = load(obj, fentry_name);
+ if (CHECK(!IS_ERR(link), "cannot attach over the limit", "err %ld\n", PTR_ERR(link))) {
+ bpf_link__destroy(link);
+ goto cleanup_extra;
+ }
+
+ /* with E2BIG error */
+ CHECK(PTR_ERR(link) != -E2BIG, "proper error check", "err %ld\n", PTR_ERR(link));
+
+ /* and finaly execute the probe */
+ if (CHECK_FAIL(prctl(PR_GET_NAME, comm, 0L, 0L, 0L)))
+ goto cleanup_extra;
+ CHECK_FAIL(test_task_rename());
+ CHECK_FAIL(prctl(PR_SET_NAME, comm, 0L, 0L, 0L));
+
+cleanup_extra:
+ bpf_object__close(obj);
+cleanup:
+ while (--i) {
+ bpf_link__destroy(inst[i].link_fentry);
+ bpf_link__destroy(inst[i].link_fexit);
+ bpf_object__close(inst[i].obj);
+ }
+}
diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
index d69a1f2bbbfd..26e77dcc7e91 100644
--- a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
@@ -113,6 +113,12 @@ int _select_by_skb_data(struct sk_reuseport_md *reuse_md)
data_check.skb_ports[0] = th->source;
data_check.skb_ports[1] = th->dest;
+ if (th->fin)
+ /* The connection is being torn down at the end of a
+ * test. It can't contain a cmd, so return early.
+ */
+ return SK_PASS;
+
if ((th->doff << 2) + sizeof(*cmd) > data_check.len)
GOTO_DONE(DROP_ERR_SKB_DATA);
if (bpf_skb_load_bytes(reuse_md, th->doff << 2, &cmd_copy,
diff --git a/tools/testing/selftests/bpf/progs/test_trampoline_count.c b/tools/testing/selftests/bpf/progs/test_trampoline_count.c
new file mode 100644
index 000000000000..e51e6e3a81c2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_trampoline_count.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdbool.h>
+#include <stddef.h>
+#include <linux/bpf.h>
+#include "bpf_trace_helpers.h"
+
+struct task_struct;
+
+SEC("fentry/__set_task_comm")
+int BPF_PROG(prog1, struct task_struct *tsk, const char *buf, bool exec)
+{
+ return 0;
+}
+
+SEC("fexit/__set_task_comm")
+int BPF_PROG(prog2, struct task_struct *tsk, const char *buf, bool exec)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh
index a8485ae103d1..174b72a64a4c 100755
--- a/tools/testing/selftests/bpf/test_flow_dissector.sh
+++ b/tools/testing/selftests/bpf/test_flow_dissector.sh
@@ -139,6 +139,20 @@ echo "Testing IPv4 + GRE..."
tc filter del dev lo ingress pref 1337
+echo "Testing port range..."
+# Drops all IP/UDP packets coming from port 8-10
+tc filter add dev lo parent ffff: protocol ip pref 1337 flower ip_proto \
+ udp src_port 8-10 action drop
+
+# Send 10 IPv4/UDP packets from port 7. Filter should not drop any.
+./test_flow_dissector -i 4 -f 7
+# Send 10 IPv4/UDP packets from port 9. Filter should drop all.
+./test_flow_dissector -i 4 -f 9 -F
+# Send 10 IPv4/UDP packets from port 11. Filter should not drop any.
+./test_flow_dissector -i 4 -f 11
+
+tc filter del dev lo ingress pref 1337
+
echo "Testing IPv6..."
# Drops all IPv6/UDP packets coming from port 9
tc filter add dev lo parent ffff: protocol ipv6 pref 1337 flower ip_proto \
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 4a851513c842..779e11da979c 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -331,7 +331,7 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt,
FILE *file;
int i, fp;
- file = fopen(".sendpage_tst.tmp", "w+");
+ file = tmpfile();
if (!file) {
perror("create file for sendpage");
return 1;
@@ -340,13 +340,8 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt,
fwrite(&k, sizeof(char), 1, file);
fflush(file);
fseek(file, 0, SEEK_SET);
- fclose(file);
- fp = open(".sendpage_tst.tmp", O_RDONLY);
- if (fp < 0) {
- perror("reopen file for sendpage");
- return 1;
- }
+ fp = fileno(file);
clock_gettime(CLOCK_MONOTONIC, &s->start);
for (i = 0; i < cnt; i++) {
@@ -354,11 +349,11 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt,
if (!drop && sent < 0) {
perror("send loop error");
- close(fp);
+ fclose(file);
return sent;
} else if (drop && sent >= 0) {
printf("sendpage loop error expected: %i\n", sent);
- close(fp);
+ fclose(file);
return -EIO;
}
@@ -366,7 +361,7 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt,
s->bytes_sent += sent;
}
clock_gettime(CLOCK_MONOTONIC, &s->end);
- close(fp);
+ fclose(file);
return 0;
}