From 356ed64991c6847a0c4f2e8fa3b1133f7a14f1fc Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Tue, 14 Sep 2021 10:33:51 +0800 Subject: bpf: Handle return value of BPF_PROG_TYPE_STRUCT_OPS prog Currently if a function ptr in struct_ops has a return value, its caller will get a random return value from it, because the return value of related BPF_PROG_TYPE_STRUCT_OPS prog is just dropped. So adding a new flag BPF_TRAMP_F_RET_FENTRY_RET to tell bpf trampoline to save and return the return value of struct_ops prog if ret_size of the function ptr is greater than 0. Also restricting the flag to be used alone. Fixes: 85d33df357b6 ("bpf: Introduce BPF_MAP_TYPE_STRUCT_OPS") Signed-off-by: Hou Tao Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210914023351.3664499-1-houtao1@huawei.com --- kernel/bpf/bpf_struct_ops.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index d6731c32864e..9abcc33f02cf 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -368,6 +368,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, const struct btf_type *mtype, *ptype; struct bpf_prog *prog; u32 moff; + u32 flags; moff = btf_member_bit_offset(t, member) / 8; ptype = btf_type_resolve_ptr(btf_vmlinux, member->type, NULL); @@ -431,10 +432,12 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, tprogs[BPF_TRAMP_FENTRY].progs[0] = prog; tprogs[BPF_TRAMP_FENTRY].nr_progs = 1; + flags = st_ops->func_models[i].ret_size > 0 ? + BPF_TRAMP_F_RET_FENTRY_RET : 0; err = arch_prepare_bpf_trampoline(NULL, image, st_map->image + PAGE_SIZE, - &st_ops->func_models[i], 0, - tprogs, NULL); + &st_ops->func_models[i], + flags, tprogs, NULL); if (err < 0) goto reset_unlock; -- cgit v1.2.3 From 8a98ae12fbefdb583a7696de719a1d57e5e940a2 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Wed, 22 Sep 2021 12:11:52 +0100 Subject: bpf: Exempt CAP_BPF from checks against bpf_jit_limit When introducing CAP_BPF, bpf_jit_charge_modmem() was not changed to treat programs with CAP_BPF as privileged for the purpose of JIT memory allocation. This means that a program without CAP_BPF can block a program with CAP_BPF from loading a program. Fix this by checking bpf_capable() in bpf_jit_charge_modmem(). Fixes: 2c78ee898d8f ("bpf: Implement CAP_BPF") Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210922111153.19843-1-lmb@cloudflare.com --- kernel/bpf/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 9f4636d021b1..d6b7dfdd8066 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -827,7 +827,7 @@ int bpf_jit_charge_modmem(u32 pages) { if (atomic_long_add_return(pages, &bpf_jit_current) > (bpf_jit_limit >> PAGE_SHIFT)) { - if (!capable(CAP_SYS_ADMIN)) { + if (!bpf_capable()) { atomic_long_sub(pages, &bpf_jit_current); return -EPERM; } -- cgit v1.2.3 From 78cc316e9583067884eb8bd154301dc1e9ee945c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 27 Sep 2021 14:39:20 +0200 Subject: bpf, cgroup: Assign cgroup in cgroup_sk_alloc when called from interrupt If cgroup_sk_alloc() is called from interrupt context, then just assign the root cgroup to skcd->cgroup. Prior to commit 8520e224f547 ("bpf, cgroups: Fix cgroup v2 fallback on v1/v2 mixed mode") we would just return, and later on in sock_cgroup_ptr(), we were NULL-testing the cgroup in fast-path, and iff indeed NULL returning the root cgroup (v ?: &cgrp_dfl_root.cgrp). Rather than re-adding the NULL-test to the fast-path we can just assign it once from cgroup_sk_alloc() given v1/v2 handling has been simplified. The migration from NULL test with returning &cgrp_dfl_root.cgrp to assigning &cgrp_dfl_root.cgrp directly does /not/ change behavior for callers of sock_cgroup_ptr(). syzkaller was able to trigger a splat in the legacy netrom code base, where the RX handler in nr_rx_frame() calls nr_make_new() which calls sk_alloc() and therefore cgroup_sk_alloc() with in_interrupt() condition. Thus the NULL skcd->cgroup, where it trips over on cgroup_sk_free() side given it expects a non-NULL object. There are a few other candidates aside from netrom which have similar pattern where in their accept-like implementation, they just call to sk_alloc() and thus cgroup_sk_alloc() instead of sk_clone_lock() with the corresponding cgroup_sk_clone() which then inherits the cgroup from the parent socket. None of them are related to core protocols where BPF cgroup programs are running from. However, in future, they should follow to implement a similar inheritance mechanism. Additionally, with a !CONFIG_CGROUP_NET_PRIO and !CONFIG_CGROUP_NET_CLASSID configuration, the same issue was exposed also prior to 8520e224f547 due to commit e876ecc67db8 ("cgroup: memcg: net: do not associate sock with unrelated cgroup") which added the early in_interrupt() return back then. Fixes: 8520e224f547 ("bpf, cgroups: Fix cgroup v2 fallback on v1/v2 mixed mode") Fixes: e876ecc67db8 ("cgroup: memcg: net: do not associate sock with unrelated cgroup") Reported-by: syzbot+df709157a4ecaf192b03@syzkaller.appspotmail.com Reported-by: syzbot+533f389d4026d86a2a95@syzkaller.appspotmail.com Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Tested-by: syzbot+df709157a4ecaf192b03@syzkaller.appspotmail.com Tested-by: syzbot+533f389d4026d86a2a95@syzkaller.appspotmail.com Acked-by: Tejun Heo Link: https://lore.kernel.org/bpf/20210927123921.21535-1-daniel@iogearbox.net --- kernel/cgroup/cgroup.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 8afa8690d288..570b0c97392a 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -6574,22 +6574,29 @@ int cgroup_parse_float(const char *input, unsigned dec_shift, s64 *v) void cgroup_sk_alloc(struct sock_cgroup_data *skcd) { - /* Don't associate the sock with unrelated interrupted task's cgroup. */ - if (in_interrupt()) - return; + struct cgroup *cgroup; rcu_read_lock(); + /* Don't associate the sock with unrelated interrupted task's cgroup. */ + if (in_interrupt()) { + cgroup = &cgrp_dfl_root.cgrp; + cgroup_get(cgroup); + goto out; + } + while (true) { struct css_set *cset; cset = task_css_set(current); if (likely(cgroup_tryget(cset->dfl_cgrp))) { - skcd->cgroup = cset->dfl_cgrp; - cgroup_bpf_get(cset->dfl_cgrp); + cgroup = cset->dfl_cgrp; break; } cpu_relax(); } +out: + skcd->cgroup = cgroup; + cgroup_bpf_get(cgroup); rcu_read_unlock(); } -- cgit v1.2.3