summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2020-03-14 06:52:03 +0300
committerDavid S. Miller <davem@davemloft.net>2020-03-14 06:52:03 +0300
commit44ef976ab3c4ccd6c886714e5349caa53c477010 (patch)
treefad7059aad1e1ac040e59a2f4870400bc8e9e30a /kernel
parent48f5d5cb80b4e414cb97dd7dba43b0370bdee130 (diff)
parent832165d225f71040a2c1fc2407752e462d00de1f (diff)
downloadlinux-44ef976ab3c4ccd6c886714e5349caa53c477010.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== pull-request: bpf-next 2020-03-13 The following pull-request contains BPF updates for your *net-next* tree. We've added 86 non-merge commits during the last 12 day(s) which contain a total of 107 files changed, 5771 insertions(+), 1700 deletions(-). The main changes are: 1) Add modify_return attach type which allows to attach to a function via BPF trampoline and is run after the fentry and before the fexit programs and can pass a return code to the original caller, from KP Singh. 2) Generalize BPF's kallsyms handling and add BPF trampoline and dispatcher objects to be visible in /proc/kallsyms so they can be annotated in stack traces, from Jiri Olsa. 3) Extend BPF sockmap to allow for UDP next to existing TCP support in order in order to enable this for BPF based socket dispatch, from Lorenz Bauer. 4) Introduce a new bpftool 'prog profile' command which attaches to existing BPF programs via fentry and fexit hooks and reads out hardware counters during that period, from Song Liu. Example usage: bpftool prog profile id 337 duration 3 cycles instructions llc_misses 4228 run_cnt 3403698 cycles (84.08%) 3525294 instructions # 1.04 insn per cycle (84.05%) 13 llc_misses # 3.69 LLC misses per million isns (83.50%) 5) Batch of improvements to libbpf, bpftool and BPF selftests. Also addition of a new bpf_link abstraction to keep in particular BPF tracing programs attached even when the applicaion owning them exits, from Andrii Nakryiko. 6) New bpf_get_current_pid_tgid() helper for tracing to perform PID filtering and which returns the PID as seen by the init namespace, from Carlos Neira. 7) Refactor of RISC-V JIT code to move out common pieces and addition of a new RV32G BPF JIT compiler, from Luke Nelson. 8) Add gso_size context member to __sk_buff in order to be able to know whether a given skb is GSO or not, from Willem de Bruijn. 9) Add a new bpf_xdp_output() helper which reuses XDP's existing perf RB output implementation but can be called from tracepoint programs, from Eelco Chaudron. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/bpf_struct_ops.c10
-rw-r--r--kernel/bpf/btf.c27
-rw-r--r--kernel/bpf/core.c121
-rw-r--r--kernel/bpf/dispatcher.c5
-rw-r--r--kernel/bpf/helpers.c45
-rw-r--r--kernel/bpf/inode.c42
-rw-r--r--kernel/bpf/syscall.c306
-rw-r--r--kernel/bpf/trampoline.c152
-rw-r--r--kernel/bpf/verifier.c29
-rw-r--r--kernel/events/core.c9
-rw-r--r--kernel/extable.c2
-rw-r--r--kernel/trace/bpf_trace.c13
12 files changed, 542 insertions, 219 deletions
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index c498f0fffb40..ca5cc8cdb6eb 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -320,6 +320,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
struct bpf_struct_ops_value *uvalue, *kvalue;
const struct btf_member *member;
const struct btf_type *t = st_ops->type;
+ struct bpf_tramp_progs *tprogs = NULL;
void *udata, *kdata;
int prog_fd, err = 0;
void *image;
@@ -343,6 +344,10 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
if (uvalue->state || refcount_read(&uvalue->refcnt))
return -EINVAL;
+ tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL);
+ if (!tprogs)
+ return -ENOMEM;
+
uvalue = (struct bpf_struct_ops_value *)st_map->uvalue;
kvalue = (struct bpf_struct_ops_value *)&st_map->kvalue;
@@ -425,10 +430,12 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
goto reset_unlock;
}
+ tprogs[BPF_TRAMP_FENTRY].progs[0] = prog;
+ tprogs[BPF_TRAMP_FENTRY].nr_progs = 1;
err = arch_prepare_bpf_trampoline(image,
st_map->image + PAGE_SIZE,
&st_ops->func_models[i], 0,
- &prog, 1, NULL, 0, NULL);
+ tprogs, NULL);
if (err < 0)
goto reset_unlock;
@@ -469,6 +476,7 @@ reset_unlock:
memset(uvalue, 0, map->value_size);
memset(kvalue, 0, map->value_size);
unlock:
+ kfree(tprogs);
mutex_unlock(&st_map->lock);
return err;
}
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 787140095e58..50080add2ab9 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3710,13 +3710,26 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
nr_args--;
}
- if (prog->expected_attach_type == BPF_TRACE_FEXIT &&
- arg == nr_args) {
- if (!t)
- /* Default prog with 5 args. 6th arg is retval. */
- return true;
- /* function return type */
- t = btf_type_by_id(btf, t->type);
+ if (arg == nr_args) {
+ if (prog->expected_attach_type == BPF_TRACE_FEXIT) {
+ if (!t)
+ return true;
+ t = btf_type_by_id(btf, t->type);
+ } else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
+ /* For now the BPF_MODIFY_RETURN can only be attached to
+ * functions that return an int.
+ */
+ if (!t)
+ return false;
+
+ t = btf_type_skip_modifiers(btf, t->type, NULL);
+ if (!btf_type_is_int(t)) {
+ bpf_log(log,
+ "ret type %s not allowed for fmod_ret\n",
+ btf_kind_str[BTF_INFO_KIND(t->info)]);
+ return false;
+ }
+ }
} else if (arg >= nr_args) {
bpf_log(log, "func '%s' doesn't have %d-th argument\n",
tname, arg + 1);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 973a20d49749..914f3463aa41 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -97,7 +97,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
fp->aux->prog = fp;
fp->jit_requested = ebpf_jit_enabled();
- INIT_LIST_HEAD_RCU(&fp->aux->ksym_lnode);
+ INIT_LIST_HEAD_RCU(&fp->aux->ksym.lnode);
return fp;
}
@@ -523,22 +523,22 @@ int bpf_jit_kallsyms __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON);
int bpf_jit_harden __read_mostly;
long bpf_jit_limit __read_mostly;
-static __always_inline void
-bpf_get_prog_addr_region(const struct bpf_prog *prog,
- unsigned long *symbol_start,
- unsigned long *symbol_end)
+static void
+bpf_prog_ksym_set_addr(struct bpf_prog *prog)
{
const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(prog);
unsigned long addr = (unsigned long)hdr;
WARN_ON_ONCE(!bpf_prog_ebpf_jited(prog));
- *symbol_start = addr;
- *symbol_end = addr + hdr->pages * PAGE_SIZE;
+ prog->aux->ksym.start = (unsigned long) prog->bpf_func;
+ prog->aux->ksym.end = addr + hdr->pages * PAGE_SIZE;
}
-void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
+static void
+bpf_prog_ksym_set_name(struct bpf_prog *prog)
{
+ char *sym = prog->aux->ksym.name;
const char *end = sym + KSYM_NAME_LEN;
const struct btf_type *type;
const char *func_name;
@@ -572,36 +572,27 @@ void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
*sym = 0;
}
-static __always_inline unsigned long
-bpf_get_prog_addr_start(struct latch_tree_node *n)
+static unsigned long bpf_get_ksym_start(struct latch_tree_node *n)
{
- unsigned long symbol_start, symbol_end;
- const struct bpf_prog_aux *aux;
-
- aux = container_of(n, struct bpf_prog_aux, ksym_tnode);
- bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end);
-
- return symbol_start;
+ return container_of(n, struct bpf_ksym, tnode)->start;
}
static __always_inline bool bpf_tree_less(struct latch_tree_node *a,
struct latch_tree_node *b)
{
- return bpf_get_prog_addr_start(a) < bpf_get_prog_addr_start(b);
+ return bpf_get_ksym_start(a) < bpf_get_ksym_start(b);
}
static __always_inline int bpf_tree_comp(void *key, struct latch_tree_node *n)
{
unsigned long val = (unsigned long)key;
- unsigned long symbol_start, symbol_end;
- const struct bpf_prog_aux *aux;
+ const struct bpf_ksym *ksym;
- aux = container_of(n, struct bpf_prog_aux, ksym_tnode);
- bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end);
+ ksym = container_of(n, struct bpf_ksym, tnode);
- if (val < symbol_start)
+ if (val < ksym->start)
return -1;
- if (val >= symbol_end)
+ if (val >= ksym->end)
return 1;
return 0;
@@ -616,20 +607,29 @@ static DEFINE_SPINLOCK(bpf_lock);
static LIST_HEAD(bpf_kallsyms);
static struct latch_tree_root bpf_tree __cacheline_aligned;
-static void bpf_prog_ksym_node_add(struct bpf_prog_aux *aux)
+void bpf_ksym_add(struct bpf_ksym *ksym)
{
- WARN_ON_ONCE(!list_empty(&aux->ksym_lnode));
- list_add_tail_rcu(&aux->ksym_lnode, &bpf_kallsyms);
- latch_tree_insert(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops);
+ spin_lock_bh(&bpf_lock);
+ WARN_ON_ONCE(!list_empty(&ksym->lnode));
+ list_add_tail_rcu(&ksym->lnode, &bpf_kallsyms);
+ latch_tree_insert(&ksym->tnode, &bpf_tree, &bpf_tree_ops);
+ spin_unlock_bh(&bpf_lock);
}
-static void bpf_prog_ksym_node_del(struct bpf_prog_aux *aux)
+static void __bpf_ksym_del(struct bpf_ksym *ksym)
{
- if (list_empty(&aux->ksym_lnode))
+ if (list_empty(&ksym->lnode))
return;
- latch_tree_erase(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops);
- list_del_rcu(&aux->ksym_lnode);
+ latch_tree_erase(&ksym->tnode, &bpf_tree, &bpf_tree_ops);
+ list_del_rcu(&ksym->lnode);
+}
+
+void bpf_ksym_del(struct bpf_ksym *ksym)
+{
+ spin_lock_bh(&bpf_lock);
+ __bpf_ksym_del(ksym);
+ spin_unlock_bh(&bpf_lock);
}
static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp)
@@ -639,8 +639,8 @@ static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp)
static bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp)
{
- return list_empty(&fp->aux->ksym_lnode) ||
- fp->aux->ksym_lnode.prev == LIST_POISON2;
+ return list_empty(&fp->aux->ksym.lnode) ||
+ fp->aux->ksym.lnode.prev == LIST_POISON2;
}
void bpf_prog_kallsyms_add(struct bpf_prog *fp)
@@ -649,9 +649,11 @@ void bpf_prog_kallsyms_add(struct bpf_prog *fp)
!capable(CAP_SYS_ADMIN))
return;
- spin_lock_bh(&bpf_lock);
- bpf_prog_ksym_node_add(fp->aux);
- spin_unlock_bh(&bpf_lock);
+ bpf_prog_ksym_set_addr(fp);
+ bpf_prog_ksym_set_name(fp);
+ fp->aux->ksym.prog = true;
+
+ bpf_ksym_add(&fp->aux->ksym);
}
void bpf_prog_kallsyms_del(struct bpf_prog *fp)
@@ -659,33 +661,30 @@ void bpf_prog_kallsyms_del(struct bpf_prog *fp)
if (!bpf_prog_kallsyms_candidate(fp))
return;
- spin_lock_bh(&bpf_lock);
- bpf_prog_ksym_node_del(fp->aux);
- spin_unlock_bh(&bpf_lock);
+ bpf_ksym_del(&fp->aux->ksym);
}
-static struct bpf_prog *bpf_prog_kallsyms_find(unsigned long addr)
+static struct bpf_ksym *bpf_ksym_find(unsigned long addr)
{
struct latch_tree_node *n;
n = latch_tree_find((void *)addr, &bpf_tree, &bpf_tree_ops);
- return n ?
- container_of(n, struct bpf_prog_aux, ksym_tnode)->prog :
- NULL;
+ return n ? container_of(n, struct bpf_ksym, tnode) : NULL;
}
const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
unsigned long *off, char *sym)
{
- unsigned long symbol_start, symbol_end;
- struct bpf_prog *prog;
+ struct bpf_ksym *ksym;
char *ret = NULL;
rcu_read_lock();
- prog = bpf_prog_kallsyms_find(addr);
- if (prog) {
- bpf_get_prog_addr_region(prog, &symbol_start, &symbol_end);
- bpf_get_prog_name(prog, sym);
+ ksym = bpf_ksym_find(addr);
+ if (ksym) {
+ unsigned long symbol_start = ksym->start;
+ unsigned long symbol_end = ksym->end;
+
+ strncpy(sym, ksym->name, KSYM_NAME_LEN);
ret = sym;
if (size)
@@ -703,19 +702,28 @@ bool is_bpf_text_address(unsigned long addr)
bool ret;
rcu_read_lock();
- ret = bpf_prog_kallsyms_find(addr) != NULL;
+ ret = bpf_ksym_find(addr) != NULL;
rcu_read_unlock();
return ret;
}
+static struct bpf_prog *bpf_prog_ksym_find(unsigned long addr)
+{
+ struct bpf_ksym *ksym = bpf_ksym_find(addr);
+
+ return ksym && ksym->prog ?
+ container_of(ksym, struct bpf_prog_aux, ksym)->prog :
+ NULL;
+}
+
const struct exception_table_entry *search_bpf_extables(unsigned long addr)
{
const struct exception_table_entry *e = NULL;
struct bpf_prog *prog;
rcu_read_lock();
- prog = bpf_prog_kallsyms_find(addr);
+ prog = bpf_prog_ksym_find(addr);
if (!prog)
goto out;
if (!prog->aux->num_exentries)
@@ -730,7 +738,7 @@ out:
int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
char *sym)
{
- struct bpf_prog_aux *aux;
+ struct bpf_ksym *ksym;
unsigned int it = 0;
int ret = -ERANGE;
@@ -738,13 +746,13 @@ int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
return ret;
rcu_read_lock();
- list_for_each_entry_rcu(aux, &bpf_kallsyms, ksym_lnode) {
+ list_for_each_entry_rcu(ksym, &bpf_kallsyms, lnode) {
if (it++ != symnum)
continue;
- bpf_get_prog_name(aux->prog, sym);
+ strncpy(sym, ksym->name, KSYM_NAME_LEN);
- *value = (unsigned long)aux->prog->bpf_func;
+ *value = ksym->start;
*type = BPF_SYM_ELF_TYPE;
ret = 0;
@@ -2149,6 +2157,7 @@ const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
const struct bpf_func_proto bpf_get_current_comm_proto __weak;
const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
const struct bpf_func_proto bpf_get_local_storage_proto __weak;
+const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto __weak;
const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
{
diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c
index b3e5b214fed8..2444bd15cc2d 100644
--- a/kernel/bpf/dispatcher.c
+++ b/kernel/bpf/dispatcher.c
@@ -113,7 +113,7 @@ static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs)
noff = 0;
} else {
old = d->image + d->image_off;
- noff = d->image_off ^ (BPF_IMAGE_SIZE / 2);
+ noff = d->image_off ^ (PAGE_SIZE / 2);
}
new = d->num_progs ? d->image + noff : NULL;
@@ -140,9 +140,10 @@ void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
mutex_lock(&d->mutex);
if (!d->image) {
- d->image = bpf_image_alloc();
+ d->image = bpf_jit_alloc_exec_page();
if (!d->image)
goto out;
+ bpf_image_ksym_add(d->image, &d->ksym);
}
prev_num_progs = d->num_progs;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index d8b7b110a1c5..01878db15eaf 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -12,6 +12,8 @@
#include <linux/filter.h>
#include <linux/ctype.h>
#include <linux/jiffies.h>
+#include <linux/pid_namespace.h>
+#include <linux/proc_ns.h>
#include "../../lib/kstrtox.h"
@@ -499,3 +501,46 @@ const struct bpf_func_proto bpf_strtoul_proto = {
.arg4_type = ARG_PTR_TO_LONG,
};
#endif
+
+BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino,
+ struct bpf_pidns_info *, nsdata, u32, size)
+{
+ struct task_struct *task = current;
+ struct pid_namespace *pidns;
+ int err = -EINVAL;
+
+ if (unlikely(size != sizeof(struct bpf_pidns_info)))
+ goto clear;
+
+ if (unlikely((u64)(dev_t)dev != dev))
+ goto clear;
+
+ if (unlikely(!task))
+ goto clear;
+
+ pidns = task_active_pid_ns(task);
+ if (unlikely(!pidns)) {
+ err = -ENOENT;
+ goto clear;
+ }
+
+ if (!ns_match(&pidns->ns, (dev_t)dev, ino))
+ goto clear;
+
+ nsdata->pid = task_pid_nr_ns(task, pidns);
+ nsdata->tgid = task_tgid_nr_ns(task, pidns);
+ return 0;
+clear:
+ memset((void *)nsdata, 0, (size_t) size);
+ return err;
+}
+
+const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto = {
+ .func = bpf_get_ns_current_pid_tgid,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_ANYTHING,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg4_type = ARG_CONST_SIZE,
+};
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 5e40e7fccc21..95087d9f4ed3 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -25,6 +25,7 @@ enum bpf_type {
BPF_TYPE_UNSPEC = 0,
BPF_TYPE_PROG,
BPF_TYPE_MAP,
+ BPF_TYPE_LINK,
};
static void *bpf_any_get(void *raw, enum bpf_type type)
@@ -36,6 +37,9 @@ static void *bpf_any_get(void *raw, enum bpf_type type)
case BPF_TYPE_MAP:
bpf_map_inc_with_uref(raw);
break;
+ case BPF_TYPE_LINK:
+ bpf_link_inc(raw);
+ break;
default:
WARN_ON_ONCE(1);
break;
@@ -53,6 +57,9 @@ static void bpf_any_put(void *raw, enum bpf_type type)
case BPF_TYPE_MAP:
bpf_map_put_with_uref(raw);
break;
+ case BPF_TYPE_LINK:
+ bpf_link_put(raw);
+ break;
default:
WARN_ON_ONCE(1);
break;
@@ -63,20 +70,32 @@ static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type)
{
void *raw;
- *type = BPF_TYPE_MAP;
raw = bpf_map_get_with_uref(ufd);
- if (IS_ERR(raw)) {
+ if (!IS_ERR(raw)) {
+ *type = BPF_TYPE_MAP;
+ return raw;
+ }
+
+ raw = bpf_prog_get(ufd);
+ if (!IS_ERR(raw)) {
*type = BPF_TYPE_PROG;
- raw = bpf_prog_get(ufd);
+ return raw;
}
- return raw;
+ raw = bpf_link_get_from_fd(ufd);
+ if (!IS_ERR(raw)) {
+ *type = BPF_TYPE_LINK;
+ return raw;
+ }
+
+ return ERR_PTR(-EINVAL);
}
static const struct inode_operations bpf_dir_iops;
static const struct inode_operations bpf_prog_iops = { };
static const struct inode_operations bpf_map_iops = { };
+static const struct inode_operations bpf_link_iops = { };
static struct inode *bpf_get_inode(struct super_block *sb,
const struct inode *dir,
@@ -114,6 +133,8 @@ static int bpf_inode_type(const struct inode *inode, enum bpf_type *type)
*type = BPF_TYPE_PROG;
else if (inode->i_op == &bpf_map_iops)
*type = BPF_TYPE_MAP;
+ else if (inode->i_op == &bpf_link_iops)
+ *type = BPF_TYPE_LINK;
else
return -EACCES;
@@ -335,6 +356,12 @@ static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg)
&bpffs_map_fops : &bpffs_obj_fops);
}
+static int bpf_mklink(struct dentry *dentry, umode_t mode, void *arg)
+{
+ return bpf_mkobj_ops(dentry, mode, arg, &bpf_link_iops,
+ &bpffs_obj_fops);
+}
+
static struct dentry *
bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags)
{
@@ -411,6 +438,9 @@ static int bpf_obj_do_pin(const char __user *pathname, void *raw,
case BPF_TYPE_MAP:
ret = vfs_mkobj(dentry, mode, bpf_mkmap, raw);
break;
+ case BPF_TYPE_LINK:
+ ret = vfs_mkobj(dentry, mode, bpf_mklink, raw);
+ break;
default:
ret = -EPERM;
}
@@ -487,6 +517,8 @@ int bpf_obj_get_user(const char __user *pathname, int flags)
ret = bpf_prog_new_fd(raw);
else if (type == BPF_TYPE_MAP)
ret = bpf_map_new_fd(raw, f_flags);
+ else if (type == BPF_TYPE_LINK)
+ ret = bpf_link_new_fd(raw);
else
return -ENOENT;
@@ -504,6 +536,8 @@ static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type
if (inode->i_op == &bpf_map_iops)
return ERR_PTR(-EINVAL);
+ if (inode->i_op == &bpf_link_iops)
+ return ERR_PTR(-EINVAL);
if (inode->i_op != &bpf_prog_iops)
return ERR_PTR(-EACCES);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index c536c65256ad..85567a6ea5f9 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2173,84 +2173,274 @@ static int bpf_obj_get(const union bpf_attr *attr)
attr->file_flags);
}
-static int bpf_tracing_prog_release(struct inode *inode, struct file *filp)
+struct bpf_link {
+ atomic64_t refcnt;
+ const struct bpf_link_ops *ops;
+ struct bpf_prog *prog;
+ struct work_struct work;
+};
+
+void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops,
+ struct bpf_prog *prog)
{
- struct bpf_prog *prog = filp->private_data;
+ atomic64_set(&link->refcnt, 1);
+ link->ops = ops;
+ link->prog = prog;
+}
- WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog));
- bpf_prog_put(prog);
+/* Clean up bpf_link and corresponding anon_inode file and FD. After
+ * anon_inode is created, bpf_link can't be just kfree()'d due to deferred
+ * anon_inode's release() call. This helper manages marking bpf_link as
+ * defunct, releases anon_inode file and puts reserved FD.
+ */
+static void bpf_link_cleanup(struct bpf_link *link, struct file *link_file,
+ int link_fd)
+{
+ link->prog = NULL;
+ fput(link_file);
+ put_unused_fd(link_fd);
+}
+
+void bpf_link_inc(struct bpf_link *link)
+{
+ atomic64_inc(&link->refcnt);
+}
+
+/* bpf_link_free is guaranteed to be called from process context */
+static void bpf_link_free(struct bpf_link *link)
+{
+ if (link->prog) {
+ /* detach BPF program, clean up used resources */
+ link->ops->release(link);
+ bpf_prog_put(link->prog);
+ }
+ /* free bpf_link and its containing memory */
+ link->ops->dealloc(link);
+}
+
+static void bpf_link_put_deferred(struct work_struct *work)
+{
+ struct bpf_link *link = container_of(work, struct bpf_link, work);
+
+ bpf_link_free(link);
+}
+
+/* bpf_link_put can be called from atomic context, but ensures that resources
+ * are freed from process context
+ */
+void bpf_link_put(struct bpf_link *link)
+{
+ if (!atomic64_dec_and_test(&link->refcnt))
+ return;
+
+ if (in_atomic()) {
+ INIT_WORK(&link->work, bpf_link_put_deferred);
+ schedule_work(&link->work);
+ } else {
+ bpf_link_free(link);
+ }
+}
+
+static int bpf_link_release(struct inode *inode, struct file *filp)
+{
+ struct bpf_link *link = filp->private_data;
+
+ bpf_link_put(link);
return 0;
}
-static const struct file_operations bpf_tracing_prog_fops = {
- .release = bpf_tracing_prog_release,
+#ifdef CONFIG_PROC_FS
+static const struct bpf_link_ops bpf_raw_tp_lops;
+static const struct bpf_link_ops bpf_tracing_link_lops;
+static const struct bpf_link_ops bpf_xdp_link_lops;
+
+static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
+{
+ const struct bpf_link *link = filp->private_data;
+ const struct bpf_prog *prog = link->prog;
+ char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
+ const char *link_type;
+
+ if (link->ops == &bpf_raw_tp_lops)
+ link_type = "raw_tracepoint";
+ else if (link->ops == &bpf_tracing_link_lops)
+ link_type = "tracing";
+ else
+ link_type = "unknown";
+
+ bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
+ seq_printf(m,
+ "link_type:\t%s\n"
+ "prog_tag:\t%s\n"
+ "prog_id:\t%u\n",
+ link_type,
+ prog_tag,
+ prog->aux->id);
+}
+#endif
+
+const struct file_operations bpf_link_fops = {
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = bpf_link_show_fdinfo,
+#endif
+ .release = bpf_link_release,
.read = bpf_dummy_read,
.write = bpf_dummy_write,
};
+int bpf_link_new_fd(struct bpf_link *link)
+{
+ return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC);
+}
+
+/* Similar to bpf_link_new_fd, create anon_inode for given bpf_link, but
+ * instead of immediately installing fd in fdtable, just reserve it and
+ * return. Caller then need to either install it with fd_install(fd, file) or
+ * release with put_unused_fd(fd).
+ * This is useful for cases when bpf_link attachment/detachment are
+ * complicated and expensive operations and should be delayed until all the fd
+ * reservation and anon_inode creation succeeds.
+ */
+struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd)
+{
+ struct file *file;
+ int fd;
+
+ fd = get_unused_fd_flags(O_CLOEXEC);
+ if (fd < 0)
+ return ERR_PTR(fd);
+
+ file = anon_inode_getfile("bpf_link", &bpf_link_fops, link, O_CLOEXEC);
+ if (IS_ERR(file)) {
+ put_unused_fd(fd);
+ return file;
+ }
+
+ *reserved_fd = fd;
+ return file;
+}
+
+struct bpf_link *bpf_link_get_from_fd(u32 ufd)
+{
+ struct fd f = fdget(ufd);
+ struct bpf_link *link;
+
+ if (!f.file)
+ return ERR_PTR(-EBADF);
+ if (f.file->f_op != &bpf_link_fops) {
+ fdput(f);
+ return ERR_PTR(-EINVAL);
+ }
+
+ link = f.file->private_data;
+ bpf_link_inc(link);
+ fdput(f);
+
+ return link;
+}
+
+struct bpf_tracing_link {
+ struct bpf_link link;
+};
+
+static void bpf_tracing_link_release(struct bpf_link *link)
+{
+ WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog));
+}
+
+static void bpf_tracing_link_dealloc(struct bpf_link *link)
+{
+ struct bpf_tracing_link *tr_link =
+ container_of(link, struct bpf_tracing_link, link);
+
+ kfree(tr_link);
+}
+
+static const struct bpf_link_ops bpf_tracing_link_lops = {
+ .release = bpf_tracing_link_release,
+ .dealloc = bpf_tracing_link_dealloc,
+};
+
static int bpf_tracing_prog_attach(struct bpf_prog *prog)
{
- int tr_fd, err;
+ struct bpf_tracing_link *link;
+ struct file *link_file;
+ int link_fd, err;
if (prog->expected_attach_type != BPF_TRACE_FENTRY &&
prog->expected_attach_type != BPF_TRACE_FEXIT &&
+ prog->expected_attach_type != BPF_MODIFY_RETURN &&
prog->type != BPF_PROG_TYPE_EXT) {
err = -EINVAL;
goto out_put_prog;
}
- err = bpf_trampoline_link_prog(prog);
- if (err)
+ link = kzalloc(sizeof(*link), GFP_USER);
+ if (!link) {
+ err = -ENOMEM;
+ goto out_put_prog;
+ }
+ bpf_link_init(&link->link, &bpf_tracing_link_lops, prog);
+
+ link_file = bpf_link_new_file(&link->link, &link_fd);
+ if (IS_ERR(link_file)) {
+ kfree(link);
+ err = PTR_ERR(link_file);
goto out_put_prog;
+ }
- tr_fd = anon_inode_getfd("bpf-tracing-prog", &bpf_tracing_prog_fops,
- prog, O_CLOEXEC);
- if (tr_fd < 0) {
- WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog));
- err = tr_fd;
+ err = bpf_trampoline_link_prog(prog);
+ if (err) {
+ bpf_link_cleanup(&link->link, link_file, link_fd);
goto out_put_prog;
}
- return tr_fd;
+
+ fd_install(link_fd, link_file);
+ return link_fd;
out_put_prog:
bpf_prog_put(prog);
return err;
}
-struct bpf_raw_tracepoint {
+struct bpf_raw_tp_link {
+ struct bpf_link link;
struct bpf_raw_event_map *btp;
- struct bpf_prog *prog;
};
-static int bpf_raw_tracepoint_release(struct inode *inode, struct file *filp)
+static void bpf_raw_tp_link_release(struct bpf_link *link)
{
- struct bpf_raw_tracepoint *raw_tp = filp->private_data;
+ struct bpf_raw_tp_link *raw_tp =
+ container_of(link, struct bpf_raw_tp_link, link);
- if (raw_tp->prog) {
- bpf_probe_unregister(raw_tp->btp, raw_tp->prog);
- bpf_prog_put(raw_tp->prog);
- }
+ bpf_probe_unregister(raw_tp->btp, raw_tp->link.prog);
bpf_put_raw_tracepoint(raw_tp->btp);
+}
+
+static void bpf_raw_tp_link_dealloc(struct bpf_link *link)
+{
+ struct bpf_raw_tp_link *raw_tp =
+ container_of(link, struct bpf_raw_tp_link, link);
+
kfree(raw_tp);
- return 0;
}
-static const struct file_operations bpf_raw_tp_fops = {
- .release = bpf_raw_tracepoint_release,
- .read = bpf_dummy_read,
- .write = bpf_dummy_write,
+static const struct bpf_link_ops bpf_raw_tp_lops = {
+ .release = bpf_raw_tp_link_release,
+ .dealloc = bpf_raw_tp_link_dealloc,
};
#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd
static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
{
- struct bpf_raw_tracepoint *raw_tp;
+ struct bpf_raw_tp_link *link;
struct bpf_raw_event_map *btp;
+ struct file *link_file;
struct bpf_prog *prog;
const char *tp_name;
char buf[128];
- int tp_fd, err;
+ int link_fd, err;
if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN))
return -EINVAL;
@@ -2297,29 +2487,30 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
goto out_put_prog;
}
- raw_tp = kzalloc(sizeof(*raw_tp), GFP_USER);
- if (!raw_tp) {
+ link = kzalloc(sizeof(*link), GFP_USER);
+ if (!link) {
err = -ENOMEM;
goto out_put_btp;
}
- raw_tp->btp = btp;
- raw_tp->prog = prog;
+ bpf_link_init(&link->link, &bpf_raw_tp_lops, prog);
+ link->btp = btp;
- err = bpf_probe_register(raw_tp->btp, prog);
- if (err)
- goto out_free_tp;
+ link_file = bpf_link_new_file(&link->link, &link_fd);
+ if (IS_ERR(link_file)) {
+ kfree(link);
+ err = PTR_ERR(link_file);
+ goto out_put_btp;
+ }
- tp_fd = anon_inode_getfd("bpf-raw-tracepoint", &bpf_raw_tp_fops, raw_tp,
- O_CLOEXEC);
- if (tp_fd < 0) {
- bpf_probe_unregister(raw_tp->btp, prog);
- err = tp_fd;
- goto out_free_tp;
+ err = bpf_probe_register(link->btp, prog);
+ if (err) {
+ bpf_link_cleanup(&link->link, link_file, link_fd);
+ goto out_put_btp;
}
- return tp_fd;
-out_free_tp:
- kfree(raw_tp);
+ fd_install(link_fd, link_file);
+ return link_fd;
+
out_put_btp:
bpf_put_raw_tracepoint(btp);
out_put_prog:
@@ -3266,15 +3457,21 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
if (err)
goto out;
- if (file->f_op == &bpf_raw_tp_fops) {
- struct bpf_raw_tracepoint *raw_tp = file->private_data;
- struct bpf_raw_event_map *btp = raw_tp->btp;
+ if (file->f_op == &bpf_link_fops) {
+ struct bpf_link *link = file->private_data;
- err = bpf_task_fd_query_copy(attr, uattr,
- raw_tp->prog->aux->id,
- BPF_FD_TYPE_RAW_TRACEPOINT,
- btp->tp->name, 0, 0);
- goto put_file;
+ if (link->ops == &bpf_raw_tp_lops) {
+ struct bpf_raw_tp_link *raw_tp =
+ container_of(link, struct bpf_raw_tp_link, link);
+ struct bpf_raw_event_map *btp = raw_tp->btp;
+
+ err = bpf_task_fd_query_copy(attr, uattr,
+ raw_tp->link.prog->aux->id,
+ BPF_FD_TYPE_RAW_TRACEPOINT,
+ btp->tp->name, 0, 0);
+ goto put_file;
+ }
+ goto out_not_supp;
}
event = perf_get_event(file);
@@ -3294,6 +3491,7 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
goto put_file;
}
+out_not_supp:
err = -ENOTSUPP;
put_file:
fput(file);
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 704fa787fec0..f30bca2a4d01 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -5,6 +5,7 @@
#include <linux/filter.h>
#include <linux/ftrace.h>
#include <linux/rbtree_latch.h>
+#include <linux/perf_event.h>
/* dummy _ops. The verifier will operate on target program's ops. */
const struct bpf_verifier_ops bpf_extension_verifier_ops = {
@@ -17,12 +18,11 @@ const struct bpf_prog_ops bpf_extension_prog_ops = {
#define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)
static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
-static struct latch_tree_root image_tree __cacheline_aligned;
-/* serializes access to trampoline_table and image_tree */
+/* serializes access to trampoline_table */
static DEFINE_MUTEX(trampoline_mutex);
-static void *bpf_jit_alloc_exec_page(void)
+void *bpf_jit_alloc_exec_page(void)
{
void *image;
@@ -38,62 +38,28 @@ static void *bpf_jit_alloc_exec_page(void)
return image;
}
-static __always_inline bool image_tree_less(struct latch_tree_node *a,
- struct latch_tree_node *b)
+void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym)
{
- struct bpf_image *ia = container_of(a, struct bpf_image, tnode);
- struct bpf_image *ib = container_of(b, struct bpf_image, tnode);
-
- return ia < ib;
-}
-
-static __always_inline int image_tree_comp(void *addr, struct latch_tree_node *n)
-{
- void *image = container_of(n, struct bpf_image, tnode);
-
- if (addr < image)
- return -1;
- if (addr >= image + PAGE_SIZE)
- return 1;
-
- return 0;
-}
-
-static const struct latch_tree_ops image_tree_ops = {
- .less = image_tree_less,
- .comp = image_tree_comp,
-};
-
-static void *__bpf_image_alloc(bool lock)
-{
- struct bpf_image *image;
-
- image = bpf_jit_alloc_exec_page();
- if (!image)
- return NULL;
-
- if (lock)
- mutex_lock(&trampoline_mutex);
- latch_tree_insert(&image->tnode, &image_tree, &image_tree_ops);
- if (lock)
- mutex_unlock(&trampoline_mutex);
- return image->data;
+ ksym->start = (unsigned long) data;
+ ksym->end = ksym->start + PAGE_SIZE;
+ bpf_ksym_add(ksym);
+ perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start,
+ PAGE_SIZE, false, ksym->name);
}
-void *bpf_image_alloc(void)
+void bpf_image_ksym_del(struct bpf_ksym *ksym)
{
- return __bpf_image_alloc(true);
+ bpf_ksym_del(ksym);
+ perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start,
+ PAGE_SIZE, true, ksym->name);
}
-bool is_bpf_image_address(unsigned long addr)
+static void bpf_trampoline_ksym_add(struct bpf_trampoline *tr)
{
- bool ret;
-
- rcu_read_lock();
- ret = latch_tree_find((void *) addr, &image_tree, &image_tree_ops) != NULL;
- rcu_read_unlock();
+ struct bpf_ksym *ksym = &tr->ksym;
- return ret;
+ snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu", tr->key);
+ bpf_image_ksym_add(tr->image, ksym);
}
struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
@@ -116,7 +82,7 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
goto out;
/* is_root was checked earlier. No need for bpf_jit_charge_modmem() */
- image = __bpf_image_alloc(false);
+ image = bpf_jit_alloc_exec_page();
if (!image) {
kfree(tr);
tr = NULL;
@@ -131,6 +97,8 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
for (i = 0; i < BPF_TRAMP_MAX; i++)
INIT_HLIST_HEAD(&tr->progs_hlist[i]);
tr->image = image;
+ INIT_LIST_HEAD_RCU(&tr->ksym.lnode);
+ bpf_trampoline_ksym_add(tr);
out:
mutex_unlock(&trampoline_mutex);
return tr;
@@ -190,40 +158,50 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
return ret;
}
-/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
- * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2
- */
-#define BPF_MAX_TRAMP_PROGS 40
+static struct bpf_tramp_progs *
+bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total)
+{
+ const struct bpf_prog_aux *aux;
+ struct bpf_tramp_progs *tprogs;
+ struct bpf_prog **progs;
+ int kind;
+
+ *total = 0;
+ tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL);
+ if (!tprogs)
+ return ERR_PTR(-ENOMEM);
+
+ for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
+ tprogs[kind].nr_progs = tr->progs_cnt[kind];
+ *total += tr->progs_cnt[kind];
+ progs = tprogs[kind].progs;
+
+ hlist_for_each_entry(aux, &tr->progs_hlist[kind], tramp_hlist)
+ *progs++ = aux->prog;
+ }
+ return tprogs;
+}
static int bpf_trampoline_update(struct bpf_trampoline *tr)
{
- void *old_image = tr->image + ((tr->selector + 1) & 1) * BPF_IMAGE_SIZE/2;
- void *new_image = tr->image + (tr->selector & 1) * BPF_IMAGE_SIZE/2;
- struct bpf_prog *progs_to_run[BPF_MAX_TRAMP_PROGS];
- int fentry_cnt = tr->progs_cnt[BPF_TRAMP_FENTRY];
- int fexit_cnt = tr->progs_cnt[BPF_TRAMP_FEXIT];
- struct bpf_prog **progs, **fentry, **fexit;
+ void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2;
+ void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2;
+ struct bpf_tramp_progs *tprogs;
u32 flags = BPF_TRAMP_F_RESTORE_REGS;
- struct bpf_prog_aux *aux;
- int err;
+ int err, total;
+
+ tprogs = bpf_trampoline_get_progs(tr, &total);
+ if (IS_ERR(tprogs))
+ return PTR_ERR(tprogs);
- if (fentry_cnt + fexit_cnt == 0) {
+ if (total == 0) {
err = unregister_fentry(tr, old_image);
tr->selector = 0;
goto out;
}
- /* populate fentry progs */
- fentry = progs = progs_to_run;
- hlist_for_each_entry(aux, &tr->progs_hlist[BPF_TRAMP_FENTRY], tramp_hlist)
- *progs++ = aux->prog;
-
- /* populate fexit progs */
- fexit = progs;
- hlist_for_each_entry(aux, &tr->progs_hlist[BPF_TRAMP_FEXIT], tramp_hlist)
- *progs++ = aux->prog;
-
- if (fexit_cnt)
+ if (tprogs[BPF_TRAMP_FEXIT].nr_progs ||
+ tprogs[BPF_TRAMP_MODIFY_RETURN].nr_progs)
flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
/* Though the second half of trampoline page is unused a task could be
@@ -232,12 +210,11 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
* preempted task. Hence wait for tasks to voluntarily schedule or go
* to userspace.
*/
+
synchronize_rcu_tasks();
- err = arch_prepare_bpf_trampoline(new_image, new_image + BPF_IMAGE_SIZE / 2,
- &tr->func.model, flags,
- fentry, fentry_cnt,
- fexit, fexit_cnt,
+ err = arch_prepare_bpf_trampoline(new_image, new_image + PAGE_SIZE / 2,
+ &tr->func.model, flags, tprogs,
tr->func.addr);
if (err < 0)
goto out;
@@ -252,6 +229,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
goto out;
tr->selector++;
out:
+ kfree(tprogs);
return err;
}
@@ -260,6 +238,8 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(enum bpf_attach_type t)
switch (t) {
case BPF_TRACE_FENTRY:
return BPF_TRAMP_FENTRY;
+ case BPF_MODIFY_RETURN:
+ return BPF_TRAMP_MODIFY_RETURN;
case BPF_TRACE_FEXIT:
return BPF_TRAMP_FEXIT;
default:
@@ -344,8 +324,6 @@ out:
void bpf_trampoline_put(struct bpf_trampoline *tr)
{
- struct bpf_image *image;
-
if (!tr)
return;
mutex_lock(&trampoline_mutex);
@@ -356,11 +334,10 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
goto out;
if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT])))
goto out;
- image = container_of(tr->image, struct bpf_image, data);
- latch_tree_erase(&image->tnode, &image_tree, &image_tree_ops);
+ bpf_image_ksym_del(&tr->ksym);
/* wait for tasks to get out of trampoline before freeing it */
synchronize_rcu_tasks();
- bpf_jit_free_exec(image);
+ bpf_jit_free_exec(tr->image);
hlist_del(&tr->hlist);
kfree(tr);
out:
@@ -375,6 +352,7 @@ out:
* call __bpf_prog_exit
*/
u64 notrace __bpf_prog_enter(void)
+ __acquires(RCU)
{
u64 start = 0;
@@ -386,6 +364,7 @@ u64 notrace __bpf_prog_enter(void)
}
void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
+ __releases(RCU)
{
struct bpf_prog_stats *stats;
@@ -409,8 +388,7 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
int __weak
arch_prepare_bpf_trampoline(void *image, void *image_end,
const struct btf_func_model *m, u32 flags,
- struct bpf_prog **fentry_progs, int fentry_cnt,
- struct bpf_prog **fexit_progs, int fexit_cnt,
+ struct bpf_tramp_progs *tprogs,
void *orig_call)
{
return -ENOTSUPP;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 289383edfc8c..745f3cfdf3b2 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -19,6 +19,7 @@
#include <linux/sort.h>
#include <linux/perf_event.h>
#include <linux/ctype.h>
+#include <linux/error-injection.h>
#include "disasm.h"
@@ -3649,7 +3650,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
if (func_id != BPF_FUNC_perf_event_read &&
func_id != BPF_FUNC_perf_event_output &&
func_id != BPF_FUNC_skb_output &&
- func_id != BPF_FUNC_perf_event_read_value)
+ func_id != BPF_FUNC_perf_event_read_value &&
+ func_id != BPF_FUNC_xdp_output)
goto error;
break;
case BPF_MAP_TYPE_STACK_TRACE:
@@ -3739,6 +3741,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
case BPF_FUNC_perf_event_output:
case BPF_FUNC_perf_event_read_value:
case BPF_FUNC_skb_output:
+ case BPF_FUNC_xdp_output:
if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
goto error;
break;
@@ -9800,6 +9803,26 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
return 0;
}
+#define SECURITY_PREFIX "security_"
+
+static int check_attach_modify_return(struct bpf_verifier_env *env)
+{
+ struct bpf_prog *prog = env->prog;
+ unsigned long addr = (unsigned long) prog->aux->trampoline->func.addr;
+
+ /* This is expected to be cleaned up in the future with the KRSI effort
+ * introducing the LSM_HOOK macro for cleaning up lsm_hooks.h.
+ */
+ if (within_error_injection_list(addr) ||
+ !strncmp(SECURITY_PREFIX, prog->aux->attach_func_name,
+ sizeof(SECURITY_PREFIX) - 1))
+ return 0;
+
+ verbose(env, "fmod_ret attach_btf_id %u (%s) is not modifiable\n",
+ prog->aux->attach_btf_id, prog->aux->attach_func_name);
+
+ return -EINVAL;
+}
static int check_attach_btf_id(struct bpf_verifier_env *env)
{
@@ -9950,6 +9973,7 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
if (!prog_extension)
return -EINVAL;
/* fallthrough */
+ case BPF_MODIFY_RETURN:
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
if (!btf_type_is_func(t)) {
@@ -9999,6 +10023,9 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
}
tr->func.addr = (void *)addr;
prog->aux->trampoline = tr;
+
+ if (prog->expected_attach_type == BPF_MODIFY_RETURN)
+ ret = check_attach_modify_return(env);
out:
mutex_unlock(&tr->mutex);
if (ret)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index bbdfac0182f4..9b89ef176247 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8255,23 +8255,22 @@ static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog,
enum perf_bpf_event_type type)
{
bool unregister = type == PERF_BPF_EVENT_PROG_UNLOAD;
- char sym[KSYM_NAME_LEN];
int i;
if (prog->aux->func_cnt == 0) {
- bpf_get_prog_name(prog, sym);
perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF,
(u64)(unsigned long)prog->bpf_func,
- prog->jited_len, unregister, sym);
+ prog->jited_len, unregister,
+ prog->aux->ksym.name);
} else {
for (i = 0; i < prog->aux->func_cnt; i++) {
struct bpf_prog *subprog = prog->aux->func[i];
- bpf_get_prog_name(subprog, sym);
perf_event_ksymbol(
PERF_RECORD_KSYMBOL_TYPE_BPF,
(u64)(unsigned long)subprog->bpf_func,
- subprog->jited_len, unregister, sym);
+ subprog->jited_len, unregister,
+ prog->aux->ksym.name);
}
}
}
diff --git a/kernel/extable.c b/kernel/extable.c
index a0024f27d3a1..7681f87e89dd 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -149,8 +149,6 @@ int kernel_text_address(unsigned long addr)
goto out;
if (is_bpf_text_address(addr))
goto out;
- if (is_bpf_image_address(addr))
- goto out;
ret = 0;
out:
if (no_rcu)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 07764c761073..e619eedb5919 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -843,6 +843,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_send_signal_thread_proto;
case BPF_FUNC_perf_event_read_value:
return &bpf_perf_event_read_value_proto;
+ case BPF_FUNC_get_ns_current_pid_tgid:
+ return &bpf_get_ns_current_pid_tgid_proto;
default:
return NULL;
}
@@ -1143,6 +1145,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
};
extern const struct bpf_func_proto bpf_skb_output_proto;
+extern const struct bpf_func_proto bpf_xdp_output_proto;
BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
struct bpf_map *, map, u64, flags)
@@ -1218,6 +1221,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
#ifdef CONFIG_NET
case BPF_FUNC_skb_output:
return &bpf_skb_output_proto;
+ case BPF_FUNC_xdp_output:
+ return &bpf_xdp_output_proto;
#endif
default:
return raw_tp_prog_func_proto(func_id, prog);
@@ -1252,6 +1257,13 @@ static bool tracing_prog_is_valid_access(int off, int size,
return btf_ctx_access(off, size, type, prog, info);
}
+int __weak bpf_prog_test_run_tracing(struct bpf_prog *prog,
+ const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ return -ENOTSUPP;
+}
+
const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
.get_func_proto = raw_tp_prog_func_proto,
.is_valid_access = raw_tp_prog_is_valid_access,
@@ -1266,6 +1278,7 @@ const struct bpf_verifier_ops tracing_verifier_ops = {
};
const struct bpf_prog_ops tracing_prog_ops = {
+ .test_run = bpf_prog_test_run_tracing,
};
static bool raw_tp_writable_prog_is_valid_access(int off, int size,