summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2022-09-11 03:27:33 +0300
committerAlexei Starovoitov <ast@kernel.org>2022-09-11 03:27:33 +0300
commitb8c62fe2025af081ea738a615c58d79d815f260f (patch)
tree772801bba2c6592be345947e6e5d165fdef6d2f5
parent57c92f11a215717bf90880828b7a23c736c3c0d9 (diff)
parente2d75e954c0a277b8fa0ddf666ddd4f9b73195f7 (diff)
downloadlinux-b8c62fe2025af081ea738a615c58d79d815f260f.tar.xz
Merge branch 'Support direct writes to nf_conn:mark'
Daniel Xu says: ==================== Support direct writes to nf_conn:mark from TC and XDP prog types. This is useful when applications want to store per-connection metadata. This is also particularly useful for applications that run both bpf and iptables/nftables because the latter can trivially access this metadata. One example use case would be if a bpf prog is responsible for advanced packet classification and iptables/nftables is later used for routing due to pre-existing/legacy code. Past discussion: - v4: https://lore.kernel.org/bpf/cover.1661192455.git.dxu@dxuuu.xyz/ - v3: https://lore.kernel.org/bpf/cover.1660951028.git.dxu@dxuuu.xyz/ - v2: https://lore.kernel.org/bpf/CAP01T74Sgn354dXGiFWFryu4vg+o8b9s9La1d9zEbC4LGvH4qg@mail.gmail.com/T/ - v1: https://lore.kernel.org/bpf/cover.1660592020.git.dxu@dxuuu.xyz/ Changes since v4: - Use exported function pointer + mutex to handle CONFIG_NF_CONNTRACK=m case Changes since v3: - Use a mutex to protect module load/unload critical section Changes since v2: - Remove use of NOT_INIT for btf_struct_access write path - Disallow nf_conn writing when nf_conntrack module not loaded - Support writing to nf_conn___init:mark Changes since v1: - Add unimplemented stub for when !CONFIG_BPF_SYSCALL ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r--include/linux/bpf.h9
-rw-r--r--include/net/netfilter/nf_conntrack_bpf.h23
-rw-r--r--kernel/bpf/btf.c1
-rw-r--r--kernel/bpf/verifier.c4
-rw-r--r--net/core/filter.c54
-rw-r--r--net/ipv4/bpf_tcp_ca.c2
-rw-r--r--net/netfilter/nf_conntrack_bpf.c66
-rw-r--r--net/netfilter/nf_conntrack_core.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_nf.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_nf.c9
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c14
11 files changed, 178 insertions, 7 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 48ae05099f36..54178b9e9c3a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2211,6 +2211,15 @@ static inline struct bpf_prog *bpf_prog_by_id(u32 id)
return ERR_PTR(-ENOTSUPP);
}
+static inline int btf_struct_access(struct bpf_verifier_log *log,
+ const struct btf *btf,
+ const struct btf_type *t, int off, int size,
+ enum bpf_access_type atype,
+ u32 *next_btf_id, enum bpf_type_flag *flag)
+{
+ return -EACCES;
+}
+
static inline const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id)
{
diff --git a/include/net/netfilter/nf_conntrack_bpf.h b/include/net/netfilter/nf_conntrack_bpf.h
index a473b56842c5..a61a93d1c6dc 100644
--- a/include/net/netfilter/nf_conntrack_bpf.h
+++ b/include/net/netfilter/nf_conntrack_bpf.h
@@ -3,13 +3,22 @@
#ifndef _NF_CONNTRACK_BPF_H
#define _NF_CONNTRACK_BPF_H
+#include <linux/bpf.h>
#include <linux/btf.h>
#include <linux/kconfig.h>
+#include <linux/mutex.h>
#if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \
(IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
extern int register_nf_conntrack_bpf(void);
+extern void cleanup_nf_conntrack_bpf(void);
+
+extern struct mutex nf_conn_btf_access_lock;
+extern int (*nfct_bsa)(struct bpf_verifier_log *log, const struct btf *btf,
+ const struct btf_type *t, int off, int size,
+ enum bpf_access_type atype, u32 *next_btf_id,
+ enum bpf_type_flag *flag);
#else
@@ -18,6 +27,20 @@ static inline int register_nf_conntrack_bpf(void)
return 0;
}
+static inline void cleanup_nf_conntrack_bpf(void)
+{
+}
+
+static inline int nf_conntrack_btf_struct_access(struct bpf_verifier_log *log,
+ const struct btf *btf,
+ const struct btf_type *t, int off,
+ int size, enum bpf_access_type atype,
+ u32 *next_btf_id,
+ enum bpf_type_flag *flag)
+{
+ return -EACCES;
+}
+
#endif
#endif /* _NF_CONNTRACK_BPF_H */
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 9d12212fcd61..98be25d13325 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -818,6 +818,7 @@ const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id)
return NULL;
return btf->types[type_id];
}
+EXPORT_SYMBOL_GPL(btf_type_by_id);
/*
* Regular int is not a bit field and it must be either
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c0f175ac187a..9109e07b759a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -370,6 +370,7 @@ __printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
bpf_verifier_vlog(log, fmt, args);
va_end(args);
}
+EXPORT_SYMBOL_GPL(bpf_log);
static const char *ltrim(const char *s)
{
@@ -13406,9 +13407,6 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
insn->code = BPF_LDX | BPF_PROBE_MEM |
BPF_SIZE((insn)->code);
env->prog->aux->num_exentries++;
- } else if (resolve_prog_type(env->prog) != BPF_PROG_TYPE_STRUCT_OPS) {
- verbose(env, "Writes through BTF pointers are not allowed\n");
- return -EINVAL;
}
continue;
default:
diff --git a/net/core/filter.c b/net/core/filter.c
index e872f45399b0..4b2be211bcbe 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -18,6 +18,7 @@
*/
#include <linux/atomic.h>
+#include <linux/bpf_verifier.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/mm.h>
@@ -8604,6 +8605,36 @@ static bool tc_cls_act_is_valid_access(int off, int size,
return bpf_skb_is_valid_access(off, size, type, prog, info);
}
+DEFINE_MUTEX(nf_conn_btf_access_lock);
+EXPORT_SYMBOL_GPL(nf_conn_btf_access_lock);
+
+int (*nfct_bsa)(struct bpf_verifier_log *log, const struct btf *btf,
+ const struct btf_type *t, int off, int size,
+ enum bpf_access_type atype, u32 *next_btf_id,
+ enum bpf_type_flag *flag);
+EXPORT_SYMBOL_GPL(nfct_bsa);
+
+static int tc_cls_act_btf_struct_access(struct bpf_verifier_log *log,
+ const struct btf *btf,
+ const struct btf_type *t, int off,
+ int size, enum bpf_access_type atype,
+ u32 *next_btf_id,
+ enum bpf_type_flag *flag)
+{
+ int ret = -EACCES;
+
+ if (atype == BPF_READ)
+ return btf_struct_access(log, btf, t, off, size, atype, next_btf_id,
+ flag);
+
+ mutex_lock(&nf_conn_btf_access_lock);
+ if (nfct_bsa)
+ ret = nfct_bsa(log, btf, t, off, size, atype, next_btf_id, flag);
+ mutex_unlock(&nf_conn_btf_access_lock);
+
+ return ret;
+}
+
static bool __is_valid_xdp_access(int off, int size)
{
if (off < 0 || off >= sizeof(struct xdp_md))
@@ -8663,6 +8694,27 @@ void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog,
}
EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
+static int xdp_btf_struct_access(struct bpf_verifier_log *log,
+ const struct btf *btf,
+ const struct btf_type *t, int off,
+ int size, enum bpf_access_type atype,
+ u32 *next_btf_id,
+ enum bpf_type_flag *flag)
+{
+ int ret = -EACCES;
+
+ if (atype == BPF_READ)
+ return btf_struct_access(log, btf, t, off, size, atype, next_btf_id,
+ flag);
+
+ mutex_lock(&nf_conn_btf_access_lock);
+ if (nfct_bsa)
+ ret = nfct_bsa(log, btf, t, off, size, atype, next_btf_id, flag);
+ mutex_unlock(&nf_conn_btf_access_lock);
+
+ return ret;
+}
+
static bool sock_addr_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
@@ -10557,6 +10609,7 @@ const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
.convert_ctx_access = tc_cls_act_convert_ctx_access,
.gen_prologue = tc_cls_act_prologue,
.gen_ld_abs = bpf_gen_ld_abs,
+ .btf_struct_access = tc_cls_act_btf_struct_access,
};
const struct bpf_prog_ops tc_cls_act_prog_ops = {
@@ -10568,6 +10621,7 @@ const struct bpf_verifier_ops xdp_verifier_ops = {
.is_valid_access = xdp_is_valid_access,
.convert_ctx_access = xdp_convert_ctx_access,
.gen_prologue = bpf_noop_prologue,
+ .btf_struct_access = xdp_btf_struct_access,
};
const struct bpf_prog_ops xdp_prog_ops = {
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 85a9e500c42d..6da16ae6a962 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -124,7 +124,7 @@ static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log,
return -EACCES;
}
- return NOT_INIT;
+ return 0;
}
BPF_CALL_2(bpf_tcp_send_ack, struct tcp_sock *, tp, u32, rcv_nxt)
diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
index 1cd87b28c9b0..77eb8e959f61 100644
--- a/net/netfilter/nf_conntrack_bpf.c
+++ b/net/netfilter/nf_conntrack_bpf.c
@@ -6,8 +6,10 @@
* are exposed through to BPF programs is explicitly unstable.
*/
+#include <linux/bpf_verifier.h>
#include <linux/bpf.h>
#include <linux/btf.h>
+#include <linux/mutex.h>
#include <linux/types.h>
#include <linux/btf_ids.h>
#include <linux/net_namespace.h>
@@ -184,6 +186,54 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
return ct;
}
+BTF_ID_LIST(btf_nf_conn_ids)
+BTF_ID(struct, nf_conn)
+BTF_ID(struct, nf_conn___init)
+
+/* Check writes into `struct nf_conn` */
+static int _nf_conntrack_btf_struct_access(struct bpf_verifier_log *log,
+ const struct btf *btf,
+ const struct btf_type *t, int off,
+ int size, enum bpf_access_type atype,
+ u32 *next_btf_id,
+ enum bpf_type_flag *flag)
+{
+ const struct btf_type *ncit;
+ const struct btf_type *nct;
+ size_t end;
+
+ ncit = btf_type_by_id(btf, btf_nf_conn_ids[1]);
+ nct = btf_type_by_id(btf, btf_nf_conn_ids[0]);
+
+ if (t != nct && t != ncit) {
+ bpf_log(log, "only read is supported\n");
+ return -EACCES;
+ }
+
+ /* `struct nf_conn` and `struct nf_conn___init` have the same layout
+ * so we are safe to simply merge offset checks here
+ */
+ switch (off) {
+#if defined(CONFIG_NF_CONNTRACK_MARK)
+ case offsetof(struct nf_conn, mark):
+ end = offsetofend(struct nf_conn, mark);
+ break;
+#endif
+ default:
+ bpf_log(log, "no write support to nf_conn at off %d\n", off);
+ return -EACCES;
+ }
+
+ if (off + size > end) {
+ bpf_log(log,
+ "write access at off %d with size %d beyond the member of nf_conn ended at %zu\n",
+ off, size, end);
+ return -EACCES;
+ }
+
+ return 0;
+}
+
__diag_push();
__diag_ignore_all("-Wmissing-prototypes",
"Global functions as their definitions will be in nf_conntrack BTF");
@@ -449,5 +499,19 @@ int register_nf_conntrack_bpf(void)
int ret;
ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_kfunc_set);
- return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_kfunc_set);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_kfunc_set);
+ if (!ret) {
+ mutex_lock(&nf_conn_btf_access_lock);
+ nfct_bsa = _nf_conntrack_btf_struct_access;
+ mutex_unlock(&nf_conn_btf_access_lock);
+ }
+
+ return ret;
+}
+
+void cleanup_nf_conntrack_bpf(void)
+{
+ mutex_lock(&nf_conn_btf_access_lock);
+ nfct_bsa = NULL;
+ mutex_unlock(&nf_conn_btf_access_lock);
}
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index da65c6e8eeeb..0195f60fc43b 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -2512,6 +2512,7 @@ static int kill_all(struct nf_conn *i, void *data)
void nf_conntrack_cleanup_start(void)
{
+ cleanup_nf_conntrack_bpf();
conntrack_gc_work.exiting = true;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
index 544bf90ac2a7..ab9117ae7545 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
@@ -17,6 +17,7 @@ struct {
{ "set_status_after_insert", "kernel function bpf_ct_set_status args#0 expected pointer to STRUCT nf_conn___init but" },
{ "change_timeout_after_alloc", "kernel function bpf_ct_change_timeout args#0 expected pointer to STRUCT nf_conn but" },
{ "change_status_after_alloc", "kernel function bpf_ct_change_status args#0 expected pointer to STRUCT nf_conn but" },
+ { "write_not_allowlisted_field", "no write support to nf_conn at off" },
};
enum {
@@ -113,6 +114,7 @@ static void test_bpf_nf_ct(int mode)
ASSERT_LE(skel->bss->test_delta_timeout, 10, "Test for max ct timeout update");
/* expected status is IPS_SEEN_REPLY */
ASSERT_EQ(skel->bss->test_status, 2, "Test for ct status update ");
+ ASSERT_EQ(skel->bss->test_insert_lookup_mark, 77, "Test for insert and lookup mark value");
ASSERT_EQ(skel->data->test_exist_lookup, 0, "Test existing connection lookup");
ASSERT_EQ(skel->bss->test_exist_lookup_mark, 43, "Test existing connection lookup ctmark");
end:
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
index 2722441850cc..b5e7079701e8 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
@@ -23,6 +23,7 @@ int test_insert_entry = -EAFNOSUPPORT;
int test_succ_lookup = -ENOENT;
u32 test_delta_timeout = 0;
u32 test_status = 0;
+u32 test_insert_lookup_mark = 0;
__be32 saddr = 0;
__be16 sport = 0;
__be32 daddr = 0;
@@ -144,6 +145,7 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
bpf_ct_set_timeout(ct, 10000);
bpf_ct_set_status(ct, IPS_CONFIRMED);
+ ct->mark = 77;
ct_ins = bpf_ct_insert_entry(ct);
if (ct_ins) {
@@ -157,6 +159,7 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
test_delta_timeout = ct_lk->timeout - bpf_jiffies64();
test_delta_timeout /= CONFIG_HZ;
test_status = IPS_SEEN_REPLY;
+ test_insert_lookup_mark = ct_lk->mark;
bpf_ct_change_status(ct_lk, IPS_SEEN_REPLY);
bpf_ct_release(ct_lk);
test_succ_lookup = 0;
@@ -175,8 +178,10 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
sizeof(opts_def));
if (ct) {
test_exist_lookup = 0;
- if (ct->mark == 42)
- test_exist_lookup_mark = 43;
+ if (ct->mark == 42) {
+ ct->mark++;
+ test_exist_lookup_mark = ct->mark;
+ }
bpf_ct_release(ct);
} else {
test_exist_lookup = opts_def.error;
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c b/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c
index bf79af15c808..0e4759ab38ff 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c
@@ -70,6 +70,20 @@ int lookup_insert(struct __sk_buff *ctx)
}
SEC("?tc")
+int write_not_allowlisted_field(struct __sk_buff *ctx)
+{
+ struct bpf_ct_opts___local opts = {};
+ struct bpf_sock_tuple tup = {};
+ struct nf_conn *ct;
+
+ ct = bpf_skb_ct_lookup(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts));
+ if (!ct)
+ return 0;
+ ct->status = 0xF00;
+ return 0;
+}
+
+SEC("?tc")
int set_timeout_after_insert(struct __sk_buff *ctx)
{
struct bpf_ct_opts___local opts = {};