summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2023-03-03 07:14:46 +0300
committerDaniel Borkmann <daniel@iogearbox.net>2023-03-03 19:42:20 +0300
commit6fcd486b3a0a628c41f12b3a7329a18a2c74b351 (patch)
treeac60957b666a5eac1e50dd7bb125bb7c106acb2f /kernel
parent0047d8343f6042c4feea24072ef254d47b8a33b3 (diff)
downloadlinux-6fcd486b3a0a628c41f12b3a7329a18a2c74b351.tar.xz
bpf: Refactor RCU enforcement in the verifier.
bpf_rcu_read_lock/unlock() are only available in clang compiled kernels. Lack of such key mechanism makes it impossible for sleepable bpf programs to use RCU pointers. Allow bpf_rcu_read_lock/unlock() in GCC compiled kernels (though GCC doesn't support btf_type_tag yet) and allowlist certain field dereferences in important data structures like tast_struct, cgroup, socket that are used by sleepable programs either as RCU pointer or full trusted pointer (which is valid outside of RCU CS). Use BTF_TYPE_SAFE_RCU and BTF_TYPE_SAFE_TRUSTED macros for such tagging. They will be removed once GCC supports btf_type_tag. With that refactor check_ptr_to_btf_access(). Make it strict in enforcing PTR_TRUSTED and PTR_UNTRUSTED while deprecating old PTR_TO_BTF_ID without modifier flags. There is a chance that this strict enforcement might break existing programs (especially on GCC compiled kernels), but this cleanup has to start sooner than later. Note PTR_TO_CTX access still yields old deprecated PTR_TO_BTF_ID. Once it's converted to strict PTR_TRUSTED or PTR_UNTRUSTED the kfuncs and helpers will be able to default to KF_TRUSTED_ARGS. KF_RCU will remain as a weaker version of KF_TRUSTED_ARGS where obj refcnt could be 0. Adjust rcu_read_lock selftest to run on gcc and clang compiled kernels. Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: David Vernet <void@manifault.com> Link: https://lore.kernel.org/bpf/20230303041446.3630-7-alexei.starovoitov@gmail.com
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/btf.c16
-rw-r--r--kernel/bpf/cpumask.c40
-rw-r--r--kernel/bpf/verifier.c178
3 files changed, 154 insertions, 80 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index c5e1d6955491..a8cb09e5973b 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -6163,6 +6163,7 @@ static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf,
const char *tname, *mname, *tag_value;
u32 vlen, elem_id, mid;
+ *flag = 0;
again:
tname = __btf_name_by_offset(btf, t->name_off);
if (!btf_type_is_struct(t)) {
@@ -6329,6 +6330,15 @@ error:
* of this field or inside of this struct
*/
if (btf_type_is_struct(mtype)) {
+ if (BTF_INFO_KIND(mtype->info) == BTF_KIND_UNION &&
+ btf_type_vlen(mtype) != 1)
+ /*
+ * walking unions yields untrusted pointers
+ * with exception of __bpf_md_ptr and other
+ * unions with a single member
+ */
+ *flag |= PTR_UNTRUSTED;
+
/* our field must be inside that union or struct */
t = mtype;
@@ -6373,7 +6383,7 @@ error:
stype = btf_type_skip_modifiers(btf, mtype->type, &id);
if (btf_type_is_struct(stype)) {
*next_btf_id = id;
- *flag = tmp_flag;
+ *flag |= tmp_flag;
return WALK_PTR;
}
}
@@ -8357,7 +8367,7 @@ out:
bool btf_nested_type_is_trusted(struct bpf_verifier_log *log,
const struct bpf_reg_state *reg,
- int off)
+ int off, const char *suffix)
{
struct btf *btf = reg->btf;
const struct btf_type *walk_type, *safe_type;
@@ -8374,7 +8384,7 @@ bool btf_nested_type_is_trusted(struct bpf_verifier_log *log,
tname = btf_name_by_offset(btf, walk_type->name_off);
- ret = snprintf(safe_tname, sizeof(safe_tname), "%s__safe_fields", tname);
+ ret = snprintf(safe_tname, sizeof(safe_tname), "%s%s", tname, suffix);
if (ret < 0)
return false;
diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c
index 2b3fbbfebdc5..b6587ec40f1b 100644
--- a/kernel/bpf/cpumask.c
+++ b/kernel/bpf/cpumask.c
@@ -427,26 +427,26 @@ BTF_ID_FLAGS(func, bpf_cpumask_create, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_cpumask_release, KF_RELEASE | KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cpumask_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cpumask_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL)
-BTF_ID_FLAGS(func, bpf_cpumask_first, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_first_zero, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_set_cpu, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_clear_cpu, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_test_cpu, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_test_and_set_cpu, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_test_and_clear_cpu, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_setall, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_clear, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_and, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_or, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_xor, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_equal, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_intersects, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_subset, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_empty, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_full, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_copy, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_any, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_any_and, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_cpumask_first, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_first_zero, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_set_cpu, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_clear_cpu, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_test_cpu, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_test_and_set_cpu, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_test_and_clear_cpu, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_setall, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_clear, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_and, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_or, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_xor, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_equal, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_intersects, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_subset, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_empty, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_full, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_copy, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_any, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_any_and, KF_RCU)
BTF_SET8_END(cpumask_kfunc_btf_ids)
static const struct btf_kfunc_id_set cpumask_kfunc_set = {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a095055d7ef4..c2adf3c24c64 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5073,29 +5073,76 @@ static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
return 0;
}
-#define BTF_TYPE_SAFE_NESTED(__type) __PASTE(__type, __safe_fields)
+#define BTF_TYPE_SAFE_RCU(__type) __PASTE(__type, __safe_rcu)
+#define BTF_TYPE_SAFE_TRUSTED(__type) __PASTE(__type, __safe_trusted)
-BTF_TYPE_SAFE_NESTED(struct task_struct) {
+/*
+ * Allow list few fields as RCU trusted or full trusted.
+ * This logic doesn't allow mix tagging and will be removed once GCC supports
+ * btf_type_tag.
+ */
+
+/* RCU trusted: these fields are trusted in RCU CS and never NULL */
+BTF_TYPE_SAFE_RCU(struct task_struct) {
const cpumask_t *cpus_ptr;
struct css_set __rcu *cgroups;
+ struct task_struct __rcu *real_parent;
+ struct task_struct *group_leader;
};
-BTF_TYPE_SAFE_NESTED(struct css_set) {
+BTF_TYPE_SAFE_RCU(struct css_set) {
struct cgroup *dfl_cgrp;
};
-static bool nested_ptr_is_trusted(struct bpf_verifier_env *env,
- struct bpf_reg_state *reg,
- int off)
+/* full trusted: these fields are trusted even outside of RCU CS and never NULL */
+BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta) {
+ __bpf_md_ptr(struct seq_file *, seq);
+};
+
+BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task) {
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
+ __bpf_md_ptr(struct task_struct *, task);
+};
+
+BTF_TYPE_SAFE_TRUSTED(struct linux_binprm) {
+ struct file *file;
+};
+
+BTF_TYPE_SAFE_TRUSTED(struct file) {
+ struct inode *f_inode;
+};
+
+BTF_TYPE_SAFE_TRUSTED(struct dentry) {
+ /* no negative dentry-s in places where bpf can see it */
+ struct inode *d_inode;
+};
+
+BTF_TYPE_SAFE_TRUSTED(struct socket) {
+ struct sock *sk;
+};
+
+static bool type_is_rcu(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg,
+ int off)
{
- /* If its parent is not trusted, it can't regain its trusted status. */
- if (!is_trusted_reg(reg))
- return false;
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct task_struct));
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct css_set));
- BTF_TYPE_EMIT(BTF_TYPE_SAFE_NESTED(struct task_struct));
- BTF_TYPE_EMIT(BTF_TYPE_SAFE_NESTED(struct css_set));
+ return btf_nested_type_is_trusted(&env->log, reg, off, "__safe_rcu");
+}
- return btf_nested_type_is_trusted(&env->log, reg, off);
+static bool type_is_trusted(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg,
+ int off)
+{
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta));
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task));
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct linux_binprm));
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct file));
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct dentry));
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct socket));
+
+ return btf_nested_type_is_trusted(&env->log, reg, off, "__safe_trusted");
}
static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
@@ -5181,49 +5228,58 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
if (ret < 0)
return ret;
- /* If this is an untrusted pointer, all pointers formed by walking it
- * also inherit the untrusted flag.
- */
- if (type_flag(reg->type) & PTR_UNTRUSTED)
- flag |= PTR_UNTRUSTED;
+ if (ret != PTR_TO_BTF_ID) {
+ /* just mark; */
- /* By default any pointer obtained from walking a trusted pointer is no
- * longer trusted, unless the field being accessed has explicitly been
- * marked as inheriting its parent's state of trust.
- *
- * An RCU-protected pointer can also be deemed trusted if we are in an
- * RCU read region. This case is handled below.
- */
- if (nested_ptr_is_trusted(env, reg, off)) {
- flag |= PTR_TRUSTED;
- /*
- * task->cgroups is trusted. It provides a stronger guarantee
- * than __rcu tag on 'cgroups' field in 'struct task_struct'.
- * Clear MEM_RCU in such case.
+ } else if (type_flag(reg->type) & PTR_UNTRUSTED) {
+ /* If this is an untrusted pointer, all pointers formed by walking it
+ * also inherit the untrusted flag.
+ */
+ flag = PTR_UNTRUSTED;
+
+ } else if (is_trusted_reg(reg) || is_rcu_reg(reg)) {
+ /* By default any pointer obtained from walking a trusted pointer is no
+ * longer trusted, unless the field being accessed has explicitly been
+ * marked as inheriting its parent's state of trust (either full or RCU).
+ * For example:
+ * 'cgroups' pointer is untrusted if task->cgroups dereference
+ * happened in a sleepable program outside of bpf_rcu_read_lock()
+ * section. In a non-sleepable program it's trusted while in RCU CS (aka MEM_RCU).
+ * Note bpf_rcu_read_unlock() converts MEM_RCU pointers to PTR_UNTRUSTED.
+ *
+ * A regular RCU-protected pointer with __rcu tag can also be deemed
+ * trusted if we are in an RCU CS. Such pointer can be NULL.
*/
- flag &= ~MEM_RCU;
+ if (type_is_trusted(env, reg, off)) {
+ flag |= PTR_TRUSTED;
+ } else if (in_rcu_cs(env) && !type_may_be_null(reg->type)) {
+ if (type_is_rcu(env, reg, off)) {
+ /* ignore __rcu tag and mark it MEM_RCU */
+ flag |= MEM_RCU;
+ } else if (flag & MEM_RCU) {
+ /* __rcu tagged pointers can be NULL */
+ flag |= PTR_MAYBE_NULL;
+ } else if (flag & (MEM_PERCPU | MEM_USER)) {
+ /* keep as-is */
+ } else {
+ /* walking unknown pointers yields untrusted pointer */
+ flag = PTR_UNTRUSTED;
+ }
+ } else {
+ /*
+ * If not in RCU CS or MEM_RCU pointer can be NULL then
+ * aggressively mark as untrusted otherwise such
+ * pointers will be plain PTR_TO_BTF_ID without flags
+ * and will be allowed to be passed into helpers for
+ * compat reasons.
+ */
+ flag = PTR_UNTRUSTED;
+ }
} else {
+ /* Old compat. Deprecated */
flag &= ~PTR_TRUSTED;
}
- if (flag & MEM_RCU) {
- /* Mark value register as MEM_RCU only if it is protected by
- * bpf_rcu_read_lock() and the ptr reg is rcu or trusted. MEM_RCU
- * itself can already indicate trustedness inside the rcu
- * read lock region. Also mark rcu pointer as PTR_MAYBE_NULL since
- * it could be null in some cases.
- */
- if (in_rcu_cs(env) && (is_trusted_reg(reg) || is_rcu_reg(reg)))
- flag |= PTR_MAYBE_NULL;
- else
- flag &= ~MEM_RCU;
- } else if (reg->type & MEM_RCU) {
- /* ptr (reg) is marked as MEM_RCU, but the struct field is not tagged
- * with __rcu. Mark the flag as PTR_UNTRUSTED conservatively.
- */
- flag |= PTR_UNTRUSTED;
- }
-
if (atype == BPF_READ && value_regno >= 0)
mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
@@ -10049,10 +10105,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
- if ((rcu_lock || rcu_unlock) && !env->rcu_tag_supported) {
- verbose(env, "no vmlinux btf rcu tag support for kfunc %s\n", func_name);
- return -EACCES;
- }
if (env->cur_state->active_rcu_lock) {
struct bpf_func_state *state;
@@ -14911,8 +14963,22 @@ static int do_check(struct bpf_verifier_env *env)
* src_reg == stack|map in some other branch.
* Reject it.
*/
- verbose(env, "same insn cannot be used with different pointers\n");
- return -EINVAL;
+ if (base_type(src_reg_type) == PTR_TO_BTF_ID &&
+ base_type(*prev_src_type) == PTR_TO_BTF_ID) {
+ /*
+ * Have to support a use case when one path through
+ * the program yields TRUSTED pointer while another
+ * is UNTRUSTED. Fallback to UNTRUSTED to generate
+ * BPF_PROBE_MEM.
+ */
+ *prev_src_type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
+ } else {
+ verbose(env,
+ "The same insn cannot be used with different pointers: %s",
+ reg_type_str(env, src_reg_type));
+ verbose(env, " != %s\n", reg_type_str(env, *prev_src_type));
+ return -EINVAL;
+ }
}
} else if (class == BPF_STX) {
@@ -17984,8 +18050,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
env->bypass_spec_v1 = bpf_bypass_spec_v1();
env->bypass_spec_v4 = bpf_bypass_spec_v4();
env->bpf_capable = bpf_capable();
- env->rcu_tag_supported = btf_vmlinux &&
- btf_find_by_name_kind(btf_vmlinux, "rcu", BTF_KIND_TYPE_TAG) > 0;
if (is_priv)
env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;