From bf3849755ac606f2a04808b6b706a16867d1e1b8 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Sat, 28 Jan 2023 01:06:20 +0100 Subject: bpf: Use ARG_CONST_SIZE_OR_ZERO for 3rd argument of bpf_tcp_raw_gen_syncookie_ipv{4,6}() These functions already check that th_len < sizeof(*th), and propagating the lower bound (th_len > 0) may be challenging in complex code, e.g. as is the case with xdp_synproxy test on s390x [1]. Switch to ARG_CONST_SIZE_OR_ZERO in order to make the verifier accept code where it cannot prove that th_len > 0. [1] https://lore.kernel.org/bpf/CAEf4Bzb3uiSHtUbgVWmkWuJ5Sw1UZd4c_iuS4QXtUkXmTTtXuQ@mail.gmail.com/ Signed-off-by: Ilya Leoshkevich Link: https://lore.kernel.org/r/20230128000650.1516334-2-iii@linux.ibm.com Signed-off-by: Alexei Starovoitov --- net/core/filter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index d8f9b53f3db6..0039cf16713e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7536,7 +7536,7 @@ static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv4_proto = { .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM, .arg1_size = sizeof(struct iphdr), .arg2_type = ARG_PTR_TO_MEM, - .arg3_type = ARG_CONST_SIZE, + .arg3_type = ARG_CONST_SIZE_OR_ZERO, }; BPF_CALL_3(bpf_tcp_raw_gen_syncookie_ipv6, struct ipv6hdr *, iph, @@ -7568,7 +7568,7 @@ static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv6_proto = { .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM, .arg1_size = sizeof(struct ipv6hdr), .arg2_type = ARG_PTR_TO_MEM, - .arg3_type = ARG_CONST_SIZE, + .arg3_type = ARG_CONST_SIZE_OR_ZERO, }; BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv4, struct iphdr *, iph, -- cgit v1.2.3 From be6b5c10ecc4014446e5c807d6a69c5a7cc1c497 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Sat, 28 Jan 2023 01:06:33 +0100 Subject: selftests/bpf: Add a sign-extension test for kfuncs s390x ABI requires the caller to zero- or sign-extend the arguments. eBPF already deals with zero-extension (by definition of its ABI), but not with sign-extension. Add a test to cover that potentially problematic area. Signed-off-by: Ilya Leoshkevich Link: https://lore.kernel.org/r/20230128000650.1516334-15-iii@linux.ibm.com Signed-off-by: Alexei Starovoitov --- net/bpf/test_run.c | 9 +++++++++ tools/testing/selftests/bpf/prog_tests/kfunc_call.c | 1 + tools/testing/selftests/bpf/progs/kfunc_call_test.c | 18 ++++++++++++++++++ 3 files changed, 28 insertions(+) (limited to 'net') diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 8da0d73b368e..7dbefa4fd2eb 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -550,6 +550,14 @@ struct sock * noinline bpf_kfunc_call_test3(struct sock *sk) return sk; } +long noinline bpf_kfunc_call_test4(signed char a, short b, int c, long d) +{ + /* Provoke the compiler to assume that the caller has sign-extended a, + * b and c on platforms where this is required (e.g. s390x). + */ + return (long)a + (long)b + (long)c + d; +} + struct prog_test_member1 { int a; }; @@ -746,6 +754,7 @@ BTF_SET8_START(test_sk_check_kfunc_ids) BTF_ID_FLAGS(func, bpf_kfunc_call_test1) BTF_ID_FLAGS(func, bpf_kfunc_call_test2) BTF_ID_FLAGS(func, bpf_kfunc_call_test3) +BTF_ID_FLAGS(func, bpf_kfunc_call_test4) BTF_ID_FLAGS(func, bpf_kfunc_call_test_acquire, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_kfunc_call_memb_acquire, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_kfunc_call_test_release, KF_RELEASE) diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c index 5af1ee8f0e6e..bb4cd82a788a 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c @@ -72,6 +72,7 @@ static struct kfunc_test_params kfunc_tests[] = { /* success cases */ TC_TEST(kfunc_call_test1, 12), TC_TEST(kfunc_call_test2, 3), + TC_TEST(kfunc_call_test4, -1234), TC_TEST(kfunc_call_test_ref_btf_id, 0), TC_TEST(kfunc_call_test_get_mem, 42), SYSCALL_TEST(kfunc_syscall_test, 0), diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c index f636e50be259..d91c58d06d38 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_test.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c @@ -3,6 +3,7 @@ #include #include +extern long bpf_kfunc_call_test4(signed char a, short b, int c, long d) __ksym; extern int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym; extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b, __u32 c, __u64 d) __ksym; @@ -17,6 +18,23 @@ extern void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym; extern int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) __ksym; extern int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; +SEC("tc") +int kfunc_call_test4(struct __sk_buff *skb) +{ + struct bpf_sock *sk = skb->sk; + long tmp; + + if (!sk) + return -1; + + sk = bpf_sk_fullsock(sk); + if (!sk) + return -1; + + tmp = bpf_kfunc_call_test4(-3, -30, -200, -1000); + return (tmp >> 32) + tmp; +} + SEC("tc") int kfunc_call_test2(struct __sk_buff *skb) { -- cgit v1.2.3 From 400031e05adfcef9e80eca80bdfc3f4b63658be4 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Wed, 1 Feb 2023 11:30:15 -0600 Subject: bpf: Add __bpf_kfunc tag to all kfuncs Now that we have the __bpf_kfunc tag, we should use add it to all existing kfuncs to ensure that they'll never be elided in LTO builds. Signed-off-by: David Vernet Signed-off-by: Daniel Borkmann Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/bpf/20230201173016.342758-4-void@manifault.com --- kernel/bpf/cpumask.c | 60 +++++++++++----------- kernel/bpf/helpers.c | 38 +++++++------- kernel/cgroup/rstat.c | 4 +- kernel/kexec_core.c | 3 +- kernel/trace/bpf_trace.c | 8 +-- net/bpf/test_run.c | 55 ++++++++++---------- net/core/xdp.c | 5 +- net/ipv4/tcp_bbr.c | 16 +++--- net/ipv4/tcp_cong.c | 10 ++-- net/ipv4/tcp_cubic.c | 12 ++--- net/ipv4/tcp_dctcp.c | 12 ++--- net/netfilter/nf_conntrack_bpf.c | 20 ++++---- net/netfilter/nf_nat_bpf.c | 6 +-- net/xfrm/xfrm_interface_bpf.c | 7 +-- .../selftests/bpf/bpf_testmod/bpf_testmod.c | 2 +- 15 files changed, 130 insertions(+), 128 deletions(-) (limited to 'net') diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c index 6bbb67dfc998..52b981512a35 100644 --- a/kernel/bpf/cpumask.c +++ b/kernel/bpf/cpumask.c @@ -48,7 +48,7 @@ __diag_ignore_all("-Wmissing-prototypes", * bpf_cpumask_create() allocates memory using the BPF memory allocator, and * will not block. It may return NULL if no memory is available. */ -struct bpf_cpumask *bpf_cpumask_create(void) +__bpf_kfunc struct bpf_cpumask *bpf_cpumask_create(void) { struct bpf_cpumask *cpumask; @@ -74,7 +74,7 @@ struct bpf_cpumask *bpf_cpumask_create(void) * must either be embedded in a map as a kptr, or freed with * bpf_cpumask_release(). */ -struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask) +__bpf_kfunc struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask) { refcount_inc(&cpumask->usage); return cpumask; @@ -90,7 +90,7 @@ struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask) * kptr, or freed with bpf_cpumask_release(). This function may return NULL if * no BPF cpumask was found in the specified map value. */ -struct bpf_cpumask *bpf_cpumask_kptr_get(struct bpf_cpumask **cpumaskp) +__bpf_kfunc struct bpf_cpumask *bpf_cpumask_kptr_get(struct bpf_cpumask **cpumaskp) { struct bpf_cpumask *cpumask; @@ -116,7 +116,7 @@ struct bpf_cpumask *bpf_cpumask_kptr_get(struct bpf_cpumask **cpumaskp) * reference of the BPF cpumask has been released, it is subsequently freed in * an RCU callback in the BPF memory allocator. */ -void bpf_cpumask_release(struct bpf_cpumask *cpumask) +__bpf_kfunc void bpf_cpumask_release(struct bpf_cpumask *cpumask) { if (!cpumask) return; @@ -135,7 +135,7 @@ void bpf_cpumask_release(struct bpf_cpumask *cpumask) * Find the index of the first nonzero bit of the cpumask. A struct bpf_cpumask * pointer may be safely passed to this function. */ -u32 bpf_cpumask_first(const struct cpumask *cpumask) +__bpf_kfunc u32 bpf_cpumask_first(const struct cpumask *cpumask) { return cpumask_first(cpumask); } @@ -148,7 +148,7 @@ u32 bpf_cpumask_first(const struct cpumask *cpumask) * Find the index of the first unset bit of the cpumask. A struct bpf_cpumask * pointer may be safely passed to this function. */ -u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) +__bpf_kfunc u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) { return cpumask_first_zero(cpumask); } @@ -158,7 +158,7 @@ u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) * @cpu: The CPU to be set in the cpumask. * @cpumask: The BPF cpumask in which a bit is being set. */ -void bpf_cpumask_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) +__bpf_kfunc void bpf_cpumask_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) { if (!cpu_valid(cpu)) return; @@ -171,7 +171,7 @@ void bpf_cpumask_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) * @cpu: The CPU to be cleared from the cpumask. * @cpumask: The BPF cpumask in which a bit is being cleared. */ -void bpf_cpumask_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) +__bpf_kfunc void bpf_cpumask_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) { if (!cpu_valid(cpu)) return; @@ -188,7 +188,7 @@ void bpf_cpumask_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) * * true - @cpu is set in the cpumask * * false - @cpu was not set in the cpumask, or @cpu is an invalid cpu. */ -bool bpf_cpumask_test_cpu(u32 cpu, const struct cpumask *cpumask) +__bpf_kfunc bool bpf_cpumask_test_cpu(u32 cpu, const struct cpumask *cpumask) { if (!cpu_valid(cpu)) return false; @@ -205,7 +205,7 @@ bool bpf_cpumask_test_cpu(u32 cpu, const struct cpumask *cpumask) * * true - @cpu is set in the cpumask * * false - @cpu was not set in the cpumask, or @cpu is invalid. */ -bool bpf_cpumask_test_and_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) +__bpf_kfunc bool bpf_cpumask_test_and_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) { if (!cpu_valid(cpu)) return false; @@ -223,7 +223,7 @@ bool bpf_cpumask_test_and_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) * * true - @cpu is set in the cpumask * * false - @cpu was not set in the cpumask, or @cpu is invalid. */ -bool bpf_cpumask_test_and_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) +__bpf_kfunc bool bpf_cpumask_test_and_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) { if (!cpu_valid(cpu)) return false; @@ -235,7 +235,7 @@ bool bpf_cpumask_test_and_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) * bpf_cpumask_setall() - Set all of the bits in a BPF cpumask. * @cpumask: The BPF cpumask having all of its bits set. */ -void bpf_cpumask_setall(struct bpf_cpumask *cpumask) +__bpf_kfunc void bpf_cpumask_setall(struct bpf_cpumask *cpumask) { cpumask_setall((struct cpumask *)cpumask); } @@ -244,7 +244,7 @@ void bpf_cpumask_setall(struct bpf_cpumask *cpumask) * bpf_cpumask_clear() - Clear all of the bits in a BPF cpumask. * @cpumask: The BPF cpumask being cleared. */ -void bpf_cpumask_clear(struct bpf_cpumask *cpumask) +__bpf_kfunc void bpf_cpumask_clear(struct bpf_cpumask *cpumask) { cpumask_clear((struct cpumask *)cpumask); } @@ -261,9 +261,9 @@ void bpf_cpumask_clear(struct bpf_cpumask *cpumask) * * struct bpf_cpumask pointers may be safely passed to @src1 and @src2. */ -bool bpf_cpumask_and(struct bpf_cpumask *dst, - const struct cpumask *src1, - const struct cpumask *src2) +__bpf_kfunc bool bpf_cpumask_and(struct bpf_cpumask *dst, + const struct cpumask *src1, + const struct cpumask *src2) { return cpumask_and((struct cpumask *)dst, src1, src2); } @@ -276,9 +276,9 @@ bool bpf_cpumask_and(struct bpf_cpumask *dst, * * struct bpf_cpumask pointers may be safely passed to @src1 and @src2. */ -void bpf_cpumask_or(struct bpf_cpumask *dst, - const struct cpumask *src1, - const struct cpumask *src2) +__bpf_kfunc void bpf_cpumask_or(struct bpf_cpumask *dst, + const struct cpumask *src1, + const struct cpumask *src2) { cpumask_or((struct cpumask *)dst, src1, src2); } @@ -291,9 +291,9 @@ void bpf_cpumask_or(struct bpf_cpumask *dst, * * struct bpf_cpumask pointers may be safely passed to @src1 and @src2. */ -void bpf_cpumask_xor(struct bpf_cpumask *dst, - const struct cpumask *src1, - const struct cpumask *src2) +__bpf_kfunc void bpf_cpumask_xor(struct bpf_cpumask *dst, + const struct cpumask *src1, + const struct cpumask *src2) { cpumask_xor((struct cpumask *)dst, src1, src2); } @@ -309,7 +309,7 @@ void bpf_cpumask_xor(struct bpf_cpumask *dst, * * struct bpf_cpumask pointers may be safely passed to @src1 and @src2. */ -bool bpf_cpumask_equal(const struct cpumask *src1, const struct cpumask *src2) +__bpf_kfunc bool bpf_cpumask_equal(const struct cpumask *src1, const struct cpumask *src2) { return cpumask_equal(src1, src2); } @@ -325,7 +325,7 @@ bool bpf_cpumask_equal(const struct cpumask *src1, const struct cpumask *src2) * * struct bpf_cpumask pointers may be safely passed to @src1 and @src2. */ -bool bpf_cpumask_intersects(const struct cpumask *src1, const struct cpumask *src2) +__bpf_kfunc bool bpf_cpumask_intersects(const struct cpumask *src1, const struct cpumask *src2) { return cpumask_intersects(src1, src2); } @@ -341,7 +341,7 @@ bool bpf_cpumask_intersects(const struct cpumask *src1, const struct cpumask *sr * * struct bpf_cpumask pointers may be safely passed to @src1 and @src2. */ -bool bpf_cpumask_subset(const struct cpumask *src1, const struct cpumask *src2) +__bpf_kfunc bool bpf_cpumask_subset(const struct cpumask *src1, const struct cpumask *src2) { return cpumask_subset(src1, src2); } @@ -356,7 +356,7 @@ bool bpf_cpumask_subset(const struct cpumask *src1, const struct cpumask *src2) * * A struct bpf_cpumask pointer may be safely passed to @cpumask. */ -bool bpf_cpumask_empty(const struct cpumask *cpumask) +__bpf_kfunc bool bpf_cpumask_empty(const struct cpumask *cpumask) { return cpumask_empty(cpumask); } @@ -371,7 +371,7 @@ bool bpf_cpumask_empty(const struct cpumask *cpumask) * * A struct bpf_cpumask pointer may be safely passed to @cpumask. */ -bool bpf_cpumask_full(const struct cpumask *cpumask) +__bpf_kfunc bool bpf_cpumask_full(const struct cpumask *cpumask) { return cpumask_full(cpumask); } @@ -383,7 +383,7 @@ bool bpf_cpumask_full(const struct cpumask *cpumask) * * A struct bpf_cpumask pointer may be safely passed to @src. */ -void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) +__bpf_kfunc void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) { cpumask_copy((struct cpumask *)dst, src); } @@ -398,7 +398,7 @@ void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) * * A struct bpf_cpumask pointer may be safely passed to @src. */ -u32 bpf_cpumask_any(const struct cpumask *cpumask) +__bpf_kfunc u32 bpf_cpumask_any(const struct cpumask *cpumask) { return cpumask_any(cpumask); } @@ -415,7 +415,7 @@ u32 bpf_cpumask_any(const struct cpumask *cpumask) * * struct bpf_cpumask pointers may be safely passed to @src1 and @src2. */ -u32 bpf_cpumask_any_and(const struct cpumask *src1, const struct cpumask *src2) +__bpf_kfunc u32 bpf_cpumask_any_and(const struct cpumask *src1, const struct cpumask *src2) { return cpumask_any_and(src1, src2); } diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 458db2db2f81..2dae44581922 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1776,7 +1776,7 @@ __diag_push(); __diag_ignore_all("-Wmissing-prototypes", "Global functions as their definitions will be in vmlinux BTF"); -void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign) +__bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign) { struct btf_struct_meta *meta = meta__ign; u64 size = local_type_id__k; @@ -1790,7 +1790,7 @@ void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign) return p; } -void bpf_obj_drop_impl(void *p__alloc, void *meta__ign) +__bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign) { struct btf_struct_meta *meta = meta__ign; void *p = p__alloc; @@ -1811,12 +1811,12 @@ static void __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *hea tail ? list_add_tail(n, h) : list_add(n, h); } -void bpf_list_push_front(struct bpf_list_head *head, struct bpf_list_node *node) +__bpf_kfunc void bpf_list_push_front(struct bpf_list_head *head, struct bpf_list_node *node) { return __bpf_list_add(node, head, false); } -void bpf_list_push_back(struct bpf_list_head *head, struct bpf_list_node *node) +__bpf_kfunc void bpf_list_push_back(struct bpf_list_head *head, struct bpf_list_node *node) { return __bpf_list_add(node, head, true); } @@ -1834,12 +1834,12 @@ static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tai return (struct bpf_list_node *)n; } -struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head) +__bpf_kfunc struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head) { return __bpf_list_del(head, false); } -struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head) +__bpf_kfunc struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head) { return __bpf_list_del(head, true); } @@ -1850,7 +1850,7 @@ struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head) * bpf_task_release(). * @p: The task on which a reference is being acquired. */ -struct task_struct *bpf_task_acquire(struct task_struct *p) +__bpf_kfunc struct task_struct *bpf_task_acquire(struct task_struct *p) { return get_task_struct(p); } @@ -1861,7 +1861,7 @@ struct task_struct *bpf_task_acquire(struct task_struct *p) * released by calling bpf_task_release(). * @p: The task on which a reference is being acquired. */ -struct task_struct *bpf_task_acquire_not_zero(struct task_struct *p) +__bpf_kfunc struct task_struct *bpf_task_acquire_not_zero(struct task_struct *p) { /* For the time being this function returns NULL, as it's not currently * possible to safely acquire a reference to a task with RCU protection @@ -1913,7 +1913,7 @@ struct task_struct *bpf_task_acquire_not_zero(struct task_struct *p) * be released by calling bpf_task_release(). * @pp: A pointer to a task kptr on which a reference is being acquired. */ -struct task_struct *bpf_task_kptr_get(struct task_struct **pp) +__bpf_kfunc struct task_struct *bpf_task_kptr_get(struct task_struct **pp) { /* We must return NULL here until we have clarity on how to properly * leverage RCU for ensuring a task's lifetime. See the comment above @@ -1926,7 +1926,7 @@ struct task_struct *bpf_task_kptr_get(struct task_struct **pp) * bpf_task_release - Release the reference acquired on a task. * @p: The task on which a reference is being released. */ -void bpf_task_release(struct task_struct *p) +__bpf_kfunc void bpf_task_release(struct task_struct *p) { if (!p) return; @@ -1941,7 +1941,7 @@ void bpf_task_release(struct task_struct *p) * calling bpf_cgroup_release(). * @cgrp: The cgroup on which a reference is being acquired. */ -struct cgroup *bpf_cgroup_acquire(struct cgroup *cgrp) +__bpf_kfunc struct cgroup *bpf_cgroup_acquire(struct cgroup *cgrp) { cgroup_get(cgrp); return cgrp; @@ -1953,7 +1953,7 @@ struct cgroup *bpf_cgroup_acquire(struct cgroup *cgrp) * be released by calling bpf_cgroup_release(). * @cgrpp: A pointer to a cgroup kptr on which a reference is being acquired. */ -struct cgroup *bpf_cgroup_kptr_get(struct cgroup **cgrpp) +__bpf_kfunc struct cgroup *bpf_cgroup_kptr_get(struct cgroup **cgrpp) { struct cgroup *cgrp; @@ -1985,7 +1985,7 @@ struct cgroup *bpf_cgroup_kptr_get(struct cgroup **cgrpp) * drops to 0. * @cgrp: The cgroup on which a reference is being released. */ -void bpf_cgroup_release(struct cgroup *cgrp) +__bpf_kfunc void bpf_cgroup_release(struct cgroup *cgrp) { if (!cgrp) return; @@ -2000,7 +2000,7 @@ void bpf_cgroup_release(struct cgroup *cgrp) * @cgrp: The cgroup for which we're performing a lookup. * @level: The level of ancestor to look up. */ -struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) +__bpf_kfunc struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) { struct cgroup *ancestor; @@ -2019,7 +2019,7 @@ struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) * stored in a map, or released with bpf_task_release(). * @pid: The pid of the task being looked up. */ -struct task_struct *bpf_task_from_pid(s32 pid) +__bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid) { struct task_struct *p; @@ -2032,22 +2032,22 @@ struct task_struct *bpf_task_from_pid(s32 pid) return p; } -void *bpf_cast_to_kern_ctx(void *obj) +__bpf_kfunc void *bpf_cast_to_kern_ctx(void *obj) { return obj; } -void *bpf_rdonly_cast(void *obj__ign, u32 btf_id__k) +__bpf_kfunc void *bpf_rdonly_cast(void *obj__ign, u32 btf_id__k) { return obj__ign; } -void bpf_rcu_read_lock(void) +__bpf_kfunc void bpf_rcu_read_lock(void) { rcu_read_lock(); } -void bpf_rcu_read_unlock(void) +__bpf_kfunc void bpf_rcu_read_unlock(void) { rcu_read_unlock(); } diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index 793ecff29038..831f1f472bb8 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -26,7 +26,7 @@ static struct cgroup_rstat_cpu *cgroup_rstat_cpu(struct cgroup *cgrp, int cpu) * rstat_cpu->updated_children list. See the comment on top of * cgroup_rstat_cpu definition for details. */ -void cgroup_rstat_updated(struct cgroup *cgrp, int cpu) +__bpf_kfunc void cgroup_rstat_updated(struct cgroup *cgrp, int cpu) { raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu); unsigned long flags; @@ -231,7 +231,7 @@ static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep) * * This function may block. */ -void cgroup_rstat_flush(struct cgroup *cgrp) +__bpf_kfunc void cgroup_rstat_flush(struct cgroup *cgrp) { might_sleep(); diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 969e8f52f7da..b1cf259854ca 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -6,6 +6,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -975,7 +976,7 @@ void __noclone __crash_kexec(struct pt_regs *regs) } STACK_FRAME_NON_STANDARD(__crash_kexec); -void crash_kexec(struct pt_regs *regs) +__bpf_kfunc void crash_kexec(struct pt_regs *regs) { int old_cpu, this_cpu; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index b1eff2efd3b4..ff1458e541a8 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1236,7 +1236,7 @@ __diag_ignore_all("-Wmissing-prototypes", * Return: a bpf_key pointer with a valid key pointer if the key is found, a * NULL pointer otherwise. */ -struct bpf_key *bpf_lookup_user_key(u32 serial, u64 flags) +__bpf_kfunc struct bpf_key *bpf_lookup_user_key(u32 serial, u64 flags) { key_ref_t key_ref; struct bpf_key *bkey; @@ -1285,7 +1285,7 @@ struct bpf_key *bpf_lookup_user_key(u32 serial, u64 flags) * Return: a bpf_key pointer with an invalid key pointer set from the * pre-determined ID on success, a NULL pointer otherwise */ -struct bpf_key *bpf_lookup_system_key(u64 id) +__bpf_kfunc struct bpf_key *bpf_lookup_system_key(u64 id) { struct bpf_key *bkey; @@ -1309,7 +1309,7 @@ struct bpf_key *bpf_lookup_system_key(u64 id) * Decrement the reference count of the key inside *bkey*, if the pointer * is valid, and free *bkey*. */ -void bpf_key_put(struct bpf_key *bkey) +__bpf_kfunc void bpf_key_put(struct bpf_key *bkey) { if (bkey->has_ref) key_put(bkey->key); @@ -1329,7 +1329,7 @@ void bpf_key_put(struct bpf_key *bkey) * * Return: 0 on success, a negative value on error. */ -int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr, +__bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr, struct bpf_dynptr_kern *sig_ptr, struct bpf_key *trusted_keyring) { diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 7dbefa4fd2eb..af9827c4b351 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -484,7 +484,7 @@ out: __diag_push(); __diag_ignore_all("-Wmissing-prototypes", "Global functions as their definitions will be in vmlinux BTF"); -int noinline bpf_fentry_test1(int a) +__bpf_kfunc int bpf_fentry_test1(int a) { return a + 1; } @@ -529,23 +529,23 @@ int noinline bpf_fentry_test8(struct bpf_fentry_test_t *arg) return (long)arg->a; } -int noinline bpf_modify_return_test(int a, int *b) +__bpf_kfunc int bpf_modify_return_test(int a, int *b) { *b += 1; return a + *b; } -u64 noinline bpf_kfunc_call_test1(struct sock *sk, u32 a, u64 b, u32 c, u64 d) +__bpf_kfunc u64 bpf_kfunc_call_test1(struct sock *sk, u32 a, u64 b, u32 c, u64 d) { return a + b + c + d; } -int noinline bpf_kfunc_call_test2(struct sock *sk, u32 a, u32 b) +__bpf_kfunc int bpf_kfunc_call_test2(struct sock *sk, u32 a, u32 b) { return a + b; } -struct sock * noinline bpf_kfunc_call_test3(struct sock *sk) +__bpf_kfunc struct sock *bpf_kfunc_call_test3(struct sock *sk) { return sk; } @@ -582,21 +582,21 @@ static struct prog_test_ref_kfunc prog_test_struct = { .cnt = REFCOUNT_INIT(1), }; -noinline struct prog_test_ref_kfunc * +__bpf_kfunc struct prog_test_ref_kfunc * bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) { refcount_inc(&prog_test_struct.cnt); return &prog_test_struct; } -noinline struct prog_test_member * +__bpf_kfunc struct prog_test_member * bpf_kfunc_call_memb_acquire(void) { WARN_ON_ONCE(1); return NULL; } -noinline void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) +__bpf_kfunc void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) { if (!p) return; @@ -604,11 +604,11 @@ noinline void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) refcount_dec(&p->cnt); } -noinline void bpf_kfunc_call_memb_release(struct prog_test_member *p) +__bpf_kfunc void bpf_kfunc_call_memb_release(struct prog_test_member *p) { } -noinline void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p) +__bpf_kfunc void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p) { WARN_ON_ONCE(1); } @@ -621,12 +621,14 @@ static int *__bpf_kfunc_call_test_get_mem(struct prog_test_ref_kfunc *p, const i return (int *)p; } -noinline int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) +__bpf_kfunc int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, + const int rdwr_buf_size) { return __bpf_kfunc_call_test_get_mem(p, rdwr_buf_size); } -noinline int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) +__bpf_kfunc int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, + const int rdonly_buf_size) { return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size); } @@ -636,16 +638,17 @@ noinline int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, * Acquire functions must return struct pointers, so these ones are * failing. */ -noinline int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) +__bpf_kfunc int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, + const int rdonly_buf_size) { return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size); } -noinline void bpf_kfunc_call_int_mem_release(int *p) +__bpf_kfunc void bpf_kfunc_call_int_mem_release(int *p) { } -noinline struct prog_test_ref_kfunc * +__bpf_kfunc struct prog_test_ref_kfunc * bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **pp, int a, int b) { struct prog_test_ref_kfunc *p = READ_ONCE(*pp); @@ -694,47 +697,47 @@ struct prog_test_fail3 { char arr2[]; }; -noinline void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) +__bpf_kfunc void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) { } -noinline void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) +__bpf_kfunc void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) { } -noinline void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) +__bpf_kfunc void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) { } -noinline void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p) +__bpf_kfunc void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p) { } -noinline void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p) +__bpf_kfunc void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p) { } -noinline void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p) +__bpf_kfunc void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p) { } -noinline void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz) +__bpf_kfunc void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz) { } -noinline void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len) +__bpf_kfunc void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len) { } -noinline void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len) +__bpf_kfunc void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len) { } -noinline void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) +__bpf_kfunc void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) { } -noinline void bpf_kfunc_call_test_destructive(void) +__bpf_kfunc void bpf_kfunc_call_test_destructive(void) { } diff --git a/net/core/xdp.c b/net/core/xdp.c index a5a7ecf6391c..787fb9f92b36 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -4,6 +4,7 @@ * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc. */ #include +#include #include #include #include @@ -722,7 +723,7 @@ __diag_ignore_all("-Wmissing-prototypes", * * Returns 0 on success or ``-errno`` on error. */ -int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp) +__bpf_kfunc int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp) { return -EOPNOTSUPP; } @@ -734,7 +735,7 @@ int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp) * * Returns 0 on success or ``-errno`` on error. */ -int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash) +__bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash) { return -EOPNOTSUPP; } diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index d2c470524e58..146792cd26fe 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -295,7 +295,7 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) } /* override sysctl_tcp_min_tso_segs */ -static u32 bbr_min_tso_segs(struct sock *sk) +__bpf_kfunc static u32 bbr_min_tso_segs(struct sock *sk) { return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2; } @@ -328,7 +328,7 @@ static void bbr_save_cwnd(struct sock *sk) bbr->prior_cwnd = max(bbr->prior_cwnd, tcp_snd_cwnd(tp)); } -static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) +__bpf_kfunc static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); @@ -1023,7 +1023,7 @@ static void bbr_update_model(struct sock *sk, const struct rate_sample *rs) bbr_update_gains(sk); } -static void bbr_main(struct sock *sk, const struct rate_sample *rs) +__bpf_kfunc static void bbr_main(struct sock *sk, const struct rate_sample *rs) { struct bbr *bbr = inet_csk_ca(sk); u32 bw; @@ -1035,7 +1035,7 @@ static void bbr_main(struct sock *sk, const struct rate_sample *rs) bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain); } -static void bbr_init(struct sock *sk) +__bpf_kfunc static void bbr_init(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); @@ -1077,7 +1077,7 @@ static void bbr_init(struct sock *sk) cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED); } -static u32 bbr_sndbuf_expand(struct sock *sk) +__bpf_kfunc static u32 bbr_sndbuf_expand(struct sock *sk) { /* Provision 3 * cwnd since BBR may slow-start even during recovery. */ return 3; @@ -1086,7 +1086,7 @@ static u32 bbr_sndbuf_expand(struct sock *sk) /* In theory BBR does not need to undo the cwnd since it does not * always reduce cwnd on losses (see bbr_main()). Keep it for now. */ -static u32 bbr_undo_cwnd(struct sock *sk) +__bpf_kfunc static u32 bbr_undo_cwnd(struct sock *sk) { struct bbr *bbr = inet_csk_ca(sk); @@ -1097,7 +1097,7 @@ static u32 bbr_undo_cwnd(struct sock *sk) } /* Entering loss recovery, so save cwnd for when we exit or undo recovery. */ -static u32 bbr_ssthresh(struct sock *sk) +__bpf_kfunc static u32 bbr_ssthresh(struct sock *sk) { bbr_save_cwnd(sk); return tcp_sk(sk)->snd_ssthresh; @@ -1125,7 +1125,7 @@ static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr, return 0; } -static void bbr_set_state(struct sock *sk, u8 new_state) +__bpf_kfunc static void bbr_set_state(struct sock *sk, u8 new_state) { struct bbr *bbr = inet_csk_ca(sk); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index d3cae40749e8..db8b4b488c31 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -403,7 +403,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, * ABC caps N to 2. Slow start exits when cwnd grows over ssthresh and * returns the leftover acks to adjust cwnd in congestion avoidance mode. */ -u32 tcp_slow_start(struct tcp_sock *tp, u32 acked) +__bpf_kfunc u32 tcp_slow_start(struct tcp_sock *tp, u32 acked) { u32 cwnd = min(tcp_snd_cwnd(tp) + acked, tp->snd_ssthresh); @@ -417,7 +417,7 @@ EXPORT_SYMBOL_GPL(tcp_slow_start); /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd (or alternative w), * for every packet that was ACKed. */ -void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked) +__bpf_kfunc void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked) { /* If credits accumulated at a higher w, apply them gently now. */ if (tp->snd_cwnd_cnt >= w) { @@ -443,7 +443,7 @@ EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai); /* This is Jacobson's slow start and congestion avoidance. * SIGCOMM '88, p. 328. */ -void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked) +__bpf_kfunc void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); @@ -462,7 +462,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked) EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); /* Slow start threshold is half the congestion window (min 2) */ -u32 tcp_reno_ssthresh(struct sock *sk) +__bpf_kfunc u32 tcp_reno_ssthresh(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); @@ -470,7 +470,7 @@ u32 tcp_reno_ssthresh(struct sock *sk) } EXPORT_SYMBOL_GPL(tcp_reno_ssthresh); -u32 tcp_reno_undo_cwnd(struct sock *sk) +__bpf_kfunc u32 tcp_reno_undo_cwnd(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 768c10c1f649..0fd78ecb67e7 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -126,7 +126,7 @@ static inline void bictcp_hystart_reset(struct sock *sk) ca->sample_cnt = 0; } -static void cubictcp_init(struct sock *sk) +__bpf_kfunc static void cubictcp_init(struct sock *sk) { struct bictcp *ca = inet_csk_ca(sk); @@ -139,7 +139,7 @@ static void cubictcp_init(struct sock *sk) tcp_sk(sk)->snd_ssthresh = initial_ssthresh; } -static void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) +__bpf_kfunc static void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) { if (event == CA_EVENT_TX_START) { struct bictcp *ca = inet_csk_ca(sk); @@ -321,7 +321,7 @@ tcp_friendliness: ca->cnt = max(ca->cnt, 2U); } -static void cubictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) +__bpf_kfunc static void cubictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); @@ -338,7 +338,7 @@ static void cubictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) tcp_cong_avoid_ai(tp, ca->cnt, acked); } -static u32 cubictcp_recalc_ssthresh(struct sock *sk) +__bpf_kfunc static u32 cubictcp_recalc_ssthresh(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); @@ -355,7 +355,7 @@ static u32 cubictcp_recalc_ssthresh(struct sock *sk) return max((tcp_snd_cwnd(tp) * beta) / BICTCP_BETA_SCALE, 2U); } -static void cubictcp_state(struct sock *sk, u8 new_state) +__bpf_kfunc static void cubictcp_state(struct sock *sk, u8 new_state) { if (new_state == TCP_CA_Loss) { bictcp_reset(inet_csk_ca(sk)); @@ -445,7 +445,7 @@ static void hystart_update(struct sock *sk, u32 delay) } } -static void cubictcp_acked(struct sock *sk, const struct ack_sample *sample) +__bpf_kfunc static void cubictcp_acked(struct sock *sk, const struct ack_sample *sample) { const struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index e0a2ca7456ff..bb23bb5b387a 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -75,7 +75,7 @@ static void dctcp_reset(const struct tcp_sock *tp, struct dctcp *ca) ca->old_delivered_ce = tp->delivered_ce; } -static void dctcp_init(struct sock *sk) +__bpf_kfunc static void dctcp_init(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); @@ -104,7 +104,7 @@ static void dctcp_init(struct sock *sk) INET_ECN_dontxmit(sk); } -static u32 dctcp_ssthresh(struct sock *sk) +__bpf_kfunc static u32 dctcp_ssthresh(struct sock *sk) { struct dctcp *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -113,7 +113,7 @@ static u32 dctcp_ssthresh(struct sock *sk) return max(tcp_snd_cwnd(tp) - ((tcp_snd_cwnd(tp) * ca->dctcp_alpha) >> 11U), 2U); } -static void dctcp_update_alpha(struct sock *sk, u32 flags) +__bpf_kfunc static void dctcp_update_alpha(struct sock *sk, u32 flags) { const struct tcp_sock *tp = tcp_sk(sk); struct dctcp *ca = inet_csk_ca(sk); @@ -169,7 +169,7 @@ static void dctcp_react_to_loss(struct sock *sk) tp->snd_ssthresh = max(tcp_snd_cwnd(tp) >> 1U, 2U); } -static void dctcp_state(struct sock *sk, u8 new_state) +__bpf_kfunc static void dctcp_state(struct sock *sk, u8 new_state) { if (new_state == TCP_CA_Recovery && new_state != inet_csk(sk)->icsk_ca_state) @@ -179,7 +179,7 @@ static void dctcp_state(struct sock *sk, u8 new_state) */ } -static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) +__bpf_kfunc static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) { struct dctcp *ca = inet_csk_ca(sk); @@ -229,7 +229,7 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr, return 0; } -static u32 dctcp_cwnd_undo(struct sock *sk) +__bpf_kfunc static u32 dctcp_cwnd_undo(struct sock *sk) { const struct dctcp *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index 24002bc61e07..34913521c385 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -249,7 +249,7 @@ __diag_ignore_all("-Wmissing-prototypes", * @opts__sz - Length of the bpf_ct_opts structure * Must be NF_BPF_CT_OPTS_SZ (12) */ -struct nf_conn___init * +__bpf_kfunc struct nf_conn___init * bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple, u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz) { @@ -283,7 +283,7 @@ bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple, * @opts__sz - Length of the bpf_ct_opts structure * Must be NF_BPF_CT_OPTS_SZ (12) */ -struct nf_conn * +__bpf_kfunc struct nf_conn * bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple, u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz) { @@ -316,7 +316,7 @@ bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple, * @opts__sz - Length of the bpf_ct_opts structure * Must be NF_BPF_CT_OPTS_SZ (12) */ -struct nf_conn___init * +__bpf_kfunc struct nf_conn___init * bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple, u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz) { @@ -351,7 +351,7 @@ bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple, * @opts__sz - Length of the bpf_ct_opts structure * Must be NF_BPF_CT_OPTS_SZ (12) */ -struct nf_conn * +__bpf_kfunc struct nf_conn * bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple, u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz) { @@ -376,7 +376,7 @@ bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple, * @nfct - Pointer to referenced nf_conn___init object, obtained * using bpf_xdp_ct_alloc or bpf_skb_ct_alloc. */ -struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i) +__bpf_kfunc struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i) { struct nf_conn *nfct = (struct nf_conn *)nfct_i; int err; @@ -400,7 +400,7 @@ struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i) * @nf_conn - Pointer to referenced nf_conn object, obtained using * bpf_xdp_ct_lookup or bpf_skb_ct_lookup. */ -void bpf_ct_release(struct nf_conn *nfct) +__bpf_kfunc void bpf_ct_release(struct nf_conn *nfct) { if (!nfct) return; @@ -417,7 +417,7 @@ void bpf_ct_release(struct nf_conn *nfct) * bpf_xdp_ct_alloc or bpf_skb_ct_alloc. * @timeout - Timeout in msecs. */ -void bpf_ct_set_timeout(struct nf_conn___init *nfct, u32 timeout) +__bpf_kfunc void bpf_ct_set_timeout(struct nf_conn___init *nfct, u32 timeout) { __nf_ct_set_timeout((struct nf_conn *)nfct, msecs_to_jiffies(timeout)); } @@ -432,7 +432,7 @@ void bpf_ct_set_timeout(struct nf_conn___init *nfct, u32 timeout) * bpf_ct_insert_entry, bpf_xdp_ct_lookup, or bpf_skb_ct_lookup. * @timeout - New timeout in msecs. */ -int bpf_ct_change_timeout(struct nf_conn *nfct, u32 timeout) +__bpf_kfunc int bpf_ct_change_timeout(struct nf_conn *nfct, u32 timeout) { return __nf_ct_change_timeout(nfct, msecs_to_jiffies(timeout)); } @@ -447,7 +447,7 @@ int bpf_ct_change_timeout(struct nf_conn *nfct, u32 timeout) * bpf_xdp_ct_alloc or bpf_skb_ct_alloc. * @status - New status value. */ -int bpf_ct_set_status(const struct nf_conn___init *nfct, u32 status) +__bpf_kfunc int bpf_ct_set_status(const struct nf_conn___init *nfct, u32 status) { return nf_ct_change_status_common((struct nf_conn *)nfct, status); } @@ -462,7 +462,7 @@ int bpf_ct_set_status(const struct nf_conn___init *nfct, u32 status) * bpf_ct_insert_entry, bpf_xdp_ct_lookup or bpf_skb_ct_lookup. * @status - New status value. */ -int bpf_ct_change_status(struct nf_conn *nfct, u32 status) +__bpf_kfunc int bpf_ct_change_status(struct nf_conn *nfct, u32 status) { return nf_ct_change_status_common(nfct, status); } diff --git a/net/netfilter/nf_nat_bpf.c b/net/netfilter/nf_nat_bpf.c index 0fa5a0bbb0ff..141ee7783223 100644 --- a/net/netfilter/nf_nat_bpf.c +++ b/net/netfilter/nf_nat_bpf.c @@ -30,9 +30,9 @@ __diag_ignore_all("-Wmissing-prototypes", * interpreted as select a random port. * @manip - NF_NAT_MANIP_SRC or NF_NAT_MANIP_DST */ -int bpf_ct_set_nat_info(struct nf_conn___init *nfct, - union nf_inet_addr *addr, int port, - enum nf_nat_manip_type manip) +__bpf_kfunc int bpf_ct_set_nat_info(struct nf_conn___init *nfct, + union nf_inet_addr *addr, int port, + enum nf_nat_manip_type manip) { struct nf_conn *ct = (struct nf_conn *)nfct; u16 proto = nf_ct_l3num(ct); diff --git a/net/xfrm/xfrm_interface_bpf.c b/net/xfrm/xfrm_interface_bpf.c index 1ef2162cebcf..d74f3fd20f2b 100644 --- a/net/xfrm/xfrm_interface_bpf.c +++ b/net/xfrm/xfrm_interface_bpf.c @@ -39,8 +39,7 @@ __diag_ignore_all("-Wmissing-prototypes", * @to - Pointer to memory to which the metadata will be copied * Cannot be NULL */ -__used noinline -int bpf_skb_get_xfrm_info(struct __sk_buff *skb_ctx, struct bpf_xfrm_info *to) +__bpf_kfunc int bpf_skb_get_xfrm_info(struct __sk_buff *skb_ctx, struct bpf_xfrm_info *to) { struct sk_buff *skb = (struct sk_buff *)skb_ctx; struct xfrm_md_info *info; @@ -62,9 +61,7 @@ int bpf_skb_get_xfrm_info(struct __sk_buff *skb_ctx, struct bpf_xfrm_info *to) * @from - Pointer to memory from which the metadata will be copied * Cannot be NULL */ -__used noinline -int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx, - const struct bpf_xfrm_info *from) +__bpf_kfunc int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx, const struct bpf_xfrm_info *from) { struct sk_buff *skb = (struct sk_buff *)skb_ctx; struct metadata_dst *md_dst; diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 5085fea3cac5..46500636d8cd 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -59,7 +59,7 @@ bpf_testmod_test_struct_arg_5(void) { return bpf_testmod_test_struct_arg_result; } -noinline void +__bpf_kfunc void bpf_testmod_test_mod_kfunc(int i) { *(int *)this_cpu_ptr(&bpf_testmod_ksym_percpu) = i; -- cgit v1.2.3 From 6aed15e330bfec6a423f40582b2a8b53d9ce1757 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Wed, 1 Feb 2023 11:30:16 -0600 Subject: selftests/bpf: Add testcase for static kfunc with unused arg kfuncs are allowed to be static, or not use one or more of their arguments. For example, bpf_xdp_metadata_rx_hash() in net/core/xdp.c is meant to be implemented by drivers, with the default implementation just returning -EOPNOTSUPP. As described in [0], such kfuncs can have their arguments elided, which can cause BTF encoding to be skipped. The new __bpf_kfunc macro should address this, and this patch adds a selftest which verifies that a static kfunc with at least one unused argument can still be encoded and invoked by a BPF program. Signed-off-by: David Vernet Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230201173016.342758-5-void@manifault.com --- net/bpf/test_run.c | 6 ++++++ tools/testing/selftests/bpf/prog_tests/kfunc_call.c | 1 + tools/testing/selftests/bpf/progs/kfunc_call_test.c | 11 +++++++++++ 3 files changed, 18 insertions(+) (limited to 'net') diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index af9827c4b351..e6f773d12045 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -741,6 +741,11 @@ __bpf_kfunc void bpf_kfunc_call_test_destructive(void) { } +__bpf_kfunc static u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused) +{ + return arg; +} + __diag_pop(); BTF_SET8_START(bpf_test_modify_return_ids) @@ -779,6 +784,7 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1) BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2) BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg) BTF_SET8_END(test_sk_check_kfunc_ids) static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size, diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c index bb4cd82a788a..a543742cd7bd 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c @@ -77,6 +77,7 @@ static struct kfunc_test_params kfunc_tests[] = { TC_TEST(kfunc_call_test_get_mem, 42), SYSCALL_TEST(kfunc_syscall_test, 0), SYSCALL_NULL_CTX_TEST(kfunc_syscall_test_null, 0), + TC_TEST(kfunc_call_test_static_unused_arg, 0), }; struct syscall_test_args { diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c index d91c58d06d38..7daa8f5720b9 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_test.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c @@ -17,6 +17,7 @@ extern void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym; extern void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym; extern int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) __ksym; extern int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; +extern u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused) __ksym; SEC("tc") int kfunc_call_test4(struct __sk_buff *skb) @@ -181,4 +182,14 @@ int kfunc_call_test_get_mem(struct __sk_buff *skb) return ret; } +SEC("tc") +int kfunc_call_test_static_unused_arg(struct __sk_buff *skb) +{ + + u32 expected = 5, actual; + + actual = bpf_kfunc_call_test_static_unused_arg(expected, 0xdeadbeef); + return actual != expected ? -1 : 0; +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From d3d854fd6a1d97157f790604e07f6386e8df8fe4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 1 Feb 2023 11:24:17 +0100 Subject: netdev-genl: create a simple family for netdev stuff Add a Netlink spec-compatible family for netdevs. This is a very simple implementation without much thought going into it. It allows us to reap all the benefits of Netlink specs, one can use the generic client to issue the commands: $ ./cli.py --spec netdev.yaml --dump dev_get [{'ifindex': 1, 'xdp-features': set()}, {'ifindex': 2, 'xdp-features': {'basic', 'ndo-xmit', 'redirect'}}, {'ifindex': 3, 'xdp-features': {'rx-sg'}}] the generic python library does not have flags-by-name support, yet, but we also don't have to carry strings in the messages, as user space can get the names from the spec. Acked-by: Jesper Dangaard Brouer Co-developed-by: Lorenzo Bianconi Signed-off-by: Lorenzo Bianconi Co-developed-by: Kumar Kartikeya Dwivedi Signed-off-by: Kumar Kartikeya Dwivedi Co-developed-by: Marek Majtyka Signed-off-by: Marek Majtyka Signed-off-by: Jakub Kicinski Link: https://lore.kernel.org/r/327ad9c9868becbe1e601b580c962549c8cd81f2.1675245258.git.lorenzo@kernel.org Signed-off-by: Alexei Starovoitov --- Documentation/netlink/specs/netdev.yaml | 100 ++++++++++++++++++ include/linux/netdevice.h | 3 + include/net/xdp.h | 3 + include/uapi/linux/netdev.h | 59 +++++++++++ net/core/Makefile | 3 +- net/core/dev.c | 1 + net/core/netdev-genl-gen.c | 48 +++++++++ net/core/netdev-genl-gen.h | 23 ++++ net/core/netdev-genl.c | 179 ++++++++++++++++++++++++++++++++ tools/include/uapi/linux/netdev.h | 59 +++++++++++ 10 files changed, 477 insertions(+), 1 deletion(-) create mode 100644 Documentation/netlink/specs/netdev.yaml create mode 100644 include/uapi/linux/netdev.h create mode 100644 net/core/netdev-genl-gen.c create mode 100644 net/core/netdev-genl-gen.h create mode 100644 net/core/netdev-genl.c create mode 100644 tools/include/uapi/linux/netdev.h (limited to 'net') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml new file mode 100644 index 000000000000..b4dcdae54ffd --- /dev/null +++ b/Documentation/netlink/specs/netdev.yaml @@ -0,0 +1,100 @@ +name: netdev + +doc: + netdev configuration over generic netlink. + +definitions: + - + type: flags + name: xdp-act + entries: + - + name: basic + doc: + XDP feautues set supported by all drivers + (XDP_ABORTED, XDP_DROP, XDP_PASS, XDP_TX) + - + name: redirect + doc: + The netdev supports XDP_REDIRECT + - + name: ndo-xmit + doc: + This feature informs if netdev implements ndo_xdp_xmit callback. + - + name: xsk-zerocopy + doc: + This feature informs if netdev supports AF_XDP in zero copy mode. + - + name: hw-offload + doc: + This feature informs if netdev supports XDP hw oflloading. + - + name: rx-sg + doc: + This feature informs if netdev implements non-linear XDP buffer + support in the driver napi callback. + - + name: ndo-xmit-sg + doc: + This feature informs if netdev implements non-linear XDP buffer + support in ndo_xdp_xmit callback. + +attribute-sets: + - + name: dev + attributes: + - + name: ifindex + doc: netdev ifindex + type: u32 + value: 1 + checks: + min: 1 + - + name: pad + type: pad + - + name: xdp-features + doc: Bitmask of enabled xdp-features. + type: u64 + enum: xdp-act + enum-as-flags: true + +operations: + list: + - + name: dev-get + doc: Get / dump information about a netdev. + value: 1 + attribute-set: dev + do: + request: + attributes: + - ifindex + reply: &dev-all + attributes: + - ifindex + - xdp-features + dump: + reply: *dev-all + - + name: dev-add-ntf + doc: Notification about device appearing. + notify: dev-get + mcgrp: mgmt + - + name: dev-del-ntf + doc: Notification about device disappearing. + notify: dev-get + mcgrp: mgmt + - + name: dev-change-ntf + doc: Notification about device configuration being changed. + notify: dev-get + mcgrp: mgmt + +mcast-groups: + list: + - + name: mgmt diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2466afa25078..0f7967591288 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -2055,6 +2056,7 @@ struct net_device { /* Read-mostly cache-line for fast-path access */ unsigned int flags; + xdp_features_t xdp_features; unsigned long long priv_flags; const struct net_device_ops *netdev_ops; const struct xdp_metadata_ops *xdp_metadata_ops; @@ -2839,6 +2841,7 @@ enum netdev_cmd { NETDEV_OFFLOAD_XSTATS_DISABLE, NETDEV_OFFLOAD_XSTATS_REPORT_USED, NETDEV_OFFLOAD_XSTATS_REPORT_DELTA, + NETDEV_XDP_FEAT_CHANGE, }; const char *netdev_cmd_to_name(enum netdev_cmd cmd); diff --git a/include/net/xdp.h b/include/net/xdp.h index 91292aa13bc0..8d1c86914f4c 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -7,6 +7,7 @@ #define __LINUX_NET_XDP_H__ #include /* skb_shared_info */ +#include /** * DOC: XDP RX-queue information @@ -43,6 +44,8 @@ enum xdp_mem_type { MEM_TYPE_MAX, }; +typedef u32 xdp_features_t; + /* XDP flags for ndo_xdp_xmit */ #define XDP_XMIT_FLUSH (1U << 0) /* doorbell signal consumer */ #define XDP_XMIT_FLAGS_MASK XDP_XMIT_FLUSH diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h new file mode 100644 index 000000000000..9ee459872600 --- /dev/null +++ b/include/uapi/linux/netdev.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/netdev.yaml */ +/* YNL-GEN uapi header */ + +#ifndef _UAPI_LINUX_NETDEV_H +#define _UAPI_LINUX_NETDEV_H + +#define NETDEV_FAMILY_NAME "netdev" +#define NETDEV_FAMILY_VERSION 1 + +/** + * enum netdev_xdp_act + * @NETDEV_XDP_ACT_BASIC: XDP feautues set supported by all drivers + * (XDP_ABORTED, XDP_DROP, XDP_PASS, XDP_TX) + * @NETDEV_XDP_ACT_REDIRECT: The netdev supports XDP_REDIRECT + * @NETDEV_XDP_ACT_NDO_XMIT: This feature informs if netdev implements + * ndo_xdp_xmit callback. + * @NETDEV_XDP_ACT_XSK_ZEROCOPY: This feature informs if netdev supports AF_XDP + * in zero copy mode. + * @NETDEV_XDP_ACT_HW_OFFLOAD: This feature informs if netdev supports XDP hw + * oflloading. + * @NETDEV_XDP_ACT_RX_SG: This feature informs if netdev implements non-linear + * XDP buffer support in the driver napi callback. + * @NETDEV_XDP_ACT_NDO_XMIT_SG: This feature informs if netdev implements + * non-linear XDP buffer support in ndo_xdp_xmit callback. + */ +enum netdev_xdp_act { + NETDEV_XDP_ACT_BASIC = 1, + NETDEV_XDP_ACT_REDIRECT = 2, + NETDEV_XDP_ACT_NDO_XMIT = 4, + NETDEV_XDP_ACT_XSK_ZEROCOPY = 8, + NETDEV_XDP_ACT_HW_OFFLOAD = 16, + NETDEV_XDP_ACT_RX_SG = 32, + NETDEV_XDP_ACT_NDO_XMIT_SG = 64, +}; + +enum { + NETDEV_A_DEV_IFINDEX = 1, + NETDEV_A_DEV_PAD, + NETDEV_A_DEV_XDP_FEATURES, + + __NETDEV_A_DEV_MAX, + NETDEV_A_DEV_MAX = (__NETDEV_A_DEV_MAX - 1) +}; + +enum { + NETDEV_CMD_DEV_GET = 1, + NETDEV_CMD_DEV_ADD_NTF, + NETDEV_CMD_DEV_DEL_NTF, + NETDEV_CMD_DEV_CHANGE_NTF, + + __NETDEV_CMD_MAX, + NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) +}; + +#define NETDEV_MCGRP_MGMT "mgmt" + +#endif /* _UAPI_LINUX_NETDEV_H */ diff --git a/net/core/Makefile b/net/core/Makefile index 10edd66a8a37..8f367813bc68 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -12,7 +12,8 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o obj-y += dev.o dev_addr_lists.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \ - fib_notifier.o xdp.o flow_offload.o gro.o + fib_notifier.o xdp.o flow_offload.o gro.o \ + netdev-genl.o netdev-genl-gen.o obj-$(CONFIG_NETDEV_ADDR_LIST_TEST) += dev_addr_lists_test.o diff --git a/net/core/dev.c b/net/core/dev.c index f72f5c4ee7e2..9ac0eeb2c8cd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1614,6 +1614,7 @@ const char *netdev_cmd_to_name(enum netdev_cmd cmd) N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO) N(PRE_CHANGEADDR) N(OFFLOAD_XSTATS_ENABLE) N(OFFLOAD_XSTATS_DISABLE) N(OFFLOAD_XSTATS_REPORT_USED) N(OFFLOAD_XSTATS_REPORT_DELTA) + N(XDP_FEAT_CHANGE) } #undef N return "UNKNOWN_NETDEV_EVENT"; diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c new file mode 100644 index 000000000000..48812ec843f5 --- /dev/null +++ b/net/core/netdev-genl-gen.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/netdev.yaml */ +/* YNL-GEN kernel source */ + +#include +#include + +#include "netdev-genl-gen.h" + +#include + +/* NETDEV_CMD_DEV_GET - do */ +static const struct nla_policy netdev_dev_get_nl_policy[NETDEV_A_DEV_IFINDEX + 1] = { + [NETDEV_A_DEV_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1), +}; + +/* Ops table for netdev */ +static const struct genl_split_ops netdev_nl_ops[2] = { + { + .cmd = NETDEV_CMD_DEV_GET, + .doit = netdev_nl_dev_get_doit, + .policy = netdev_dev_get_nl_policy, + .maxattr = NETDEV_A_DEV_IFINDEX, + .flags = GENL_CMD_CAP_DO, + }, + { + .cmd = NETDEV_CMD_DEV_GET, + .dumpit = netdev_nl_dev_get_dumpit, + .flags = GENL_CMD_CAP_DUMP, + }, +}; + +static const struct genl_multicast_group netdev_nl_mcgrps[] = { + [NETDEV_NLGRP_MGMT] = { "mgmt", }, +}; + +struct genl_family netdev_nl_family __ro_after_init = { + .name = NETDEV_FAMILY_NAME, + .version = NETDEV_FAMILY_VERSION, + .netnsok = true, + .parallel_ops = true, + .module = THIS_MODULE, + .split_ops = netdev_nl_ops, + .n_split_ops = ARRAY_SIZE(netdev_nl_ops), + .mcgrps = netdev_nl_mcgrps, + .n_mcgrps = ARRAY_SIZE(netdev_nl_mcgrps), +}; diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h new file mode 100644 index 000000000000..b16dc7e026bb --- /dev/null +++ b/net/core/netdev-genl-gen.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/netdev.yaml */ +/* YNL-GEN kernel header */ + +#ifndef _LINUX_NETDEV_GEN_H +#define _LINUX_NETDEV_GEN_H + +#include +#include + +#include + +int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info); +int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); + +enum { + NETDEV_NLGRP_MGMT, +}; + +extern struct genl_family netdev_nl_family; + +#endif /* _LINUX_NETDEV_GEN_H */ diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c new file mode 100644 index 000000000000..a4270fafdf11 --- /dev/null +++ b/net/core/netdev-genl.c @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include +#include +#include +#include + +#include "netdev-genl-gen.h" + +static int +netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp, + u32 portid, u32 seq, int flags, u32 cmd) +{ + void *hdr; + + hdr = genlmsg_put(rsp, portid, seq, &netdev_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (nla_put_u32(rsp, NETDEV_A_DEV_IFINDEX, netdev->ifindex) || + nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_FEATURES, + netdev->xdp_features, NETDEV_A_DEV_PAD)) { + genlmsg_cancel(rsp, hdr); + return -EINVAL; + } + + genlmsg_end(rsp, hdr); + + return 0; +} + +static void +netdev_genl_dev_notify(struct net_device *netdev, int cmd) +{ + struct sk_buff *ntf; + + if (!genl_has_listeners(&netdev_nl_family, dev_net(netdev), + NETDEV_NLGRP_MGMT)) + return; + + ntf = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!ntf) + return; + + if (netdev_nl_dev_fill(netdev, ntf, 0, 0, 0, cmd)) { + nlmsg_free(ntf); + return; + } + + genlmsg_multicast_netns(&netdev_nl_family, dev_net(netdev), ntf, + 0, NETDEV_NLGRP_MGMT, GFP_KERNEL); +} + +int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct net_device *netdev; + struct sk_buff *rsp; + u32 ifindex; + int err; + + if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX)) + return -EINVAL; + + ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]); + + rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!rsp) + return -ENOMEM; + + rtnl_lock(); + + netdev = __dev_get_by_index(genl_info_net(info), ifindex); + if (netdev) + err = netdev_nl_dev_fill(netdev, rsp, info->snd_portid, + info->snd_seq, 0, info->genlhdr->cmd); + else + err = -ENODEV; + + rtnl_unlock(); + + if (err) + goto err_free_msg; + + return genlmsg_reply(rsp, info); + +err_free_msg: + nlmsg_free(rsp); + return err; +} + +int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct net_device *netdev; + int idx = 0, s_idx; + int h, s_h; + int err; + + s_h = cb->args[0]; + s_idx = cb->args[1]; + + rtnl_lock(); + + for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { + struct hlist_head *head; + + idx = 0; + head = &net->dev_index_head[h]; + hlist_for_each_entry(netdev, head, index_hlist) { + if (idx < s_idx) + goto cont; + err = netdev_nl_dev_fill(netdev, skb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, 0, + NETDEV_CMD_DEV_GET); + if (err < 0) + break; +cont: + idx++; + } + } + + rtnl_unlock(); + + if (err != -EMSGSIZE) + return err; + + cb->args[1] = idx; + cb->args[0] = h; + cb->seq = net->dev_base_seq; + + return skb->len; +} + +static int netdev_genl_netdevice_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + + switch (event) { + case NETDEV_REGISTER: + netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_ADD_NTF); + break; + case NETDEV_UNREGISTER: + netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_DEL_NTF); + break; + case NETDEV_XDP_FEAT_CHANGE: + netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_CHANGE_NTF); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block netdev_genl_nb = { + .notifier_call = netdev_genl_netdevice_event, +}; + +static int __init netdev_genl_init(void) +{ + int err; + + err = register_netdevice_notifier(&netdev_genl_nb); + if (err) + return err; + + err = genl_register_family(&netdev_nl_family); + if (err) + goto err_unreg_ntf; + + return 0; + +err_unreg_ntf: + unregister_netdevice_notifier(&netdev_genl_nb); + return err; +} + +subsys_initcall(netdev_genl_init); diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h new file mode 100644 index 000000000000..9ee459872600 --- /dev/null +++ b/tools/include/uapi/linux/netdev.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/netdev.yaml */ +/* YNL-GEN uapi header */ + +#ifndef _UAPI_LINUX_NETDEV_H +#define _UAPI_LINUX_NETDEV_H + +#define NETDEV_FAMILY_NAME "netdev" +#define NETDEV_FAMILY_VERSION 1 + +/** + * enum netdev_xdp_act + * @NETDEV_XDP_ACT_BASIC: XDP feautues set supported by all drivers + * (XDP_ABORTED, XDP_DROP, XDP_PASS, XDP_TX) + * @NETDEV_XDP_ACT_REDIRECT: The netdev supports XDP_REDIRECT + * @NETDEV_XDP_ACT_NDO_XMIT: This feature informs if netdev implements + * ndo_xdp_xmit callback. + * @NETDEV_XDP_ACT_XSK_ZEROCOPY: This feature informs if netdev supports AF_XDP + * in zero copy mode. + * @NETDEV_XDP_ACT_HW_OFFLOAD: This feature informs if netdev supports XDP hw + * oflloading. + * @NETDEV_XDP_ACT_RX_SG: This feature informs if netdev implements non-linear + * XDP buffer support in the driver napi callback. + * @NETDEV_XDP_ACT_NDO_XMIT_SG: This feature informs if netdev implements + * non-linear XDP buffer support in ndo_xdp_xmit callback. + */ +enum netdev_xdp_act { + NETDEV_XDP_ACT_BASIC = 1, + NETDEV_XDP_ACT_REDIRECT = 2, + NETDEV_XDP_ACT_NDO_XMIT = 4, + NETDEV_XDP_ACT_XSK_ZEROCOPY = 8, + NETDEV_XDP_ACT_HW_OFFLOAD = 16, + NETDEV_XDP_ACT_RX_SG = 32, + NETDEV_XDP_ACT_NDO_XMIT_SG = 64, +}; + +enum { + NETDEV_A_DEV_IFINDEX = 1, + NETDEV_A_DEV_PAD, + NETDEV_A_DEV_XDP_FEATURES, + + __NETDEV_A_DEV_MAX, + NETDEV_A_DEV_MAX = (__NETDEV_A_DEV_MAX - 1) +}; + +enum { + NETDEV_CMD_DEV_GET = 1, + NETDEV_CMD_DEV_ADD_NTF, + NETDEV_CMD_DEV_DEL_NTF, + NETDEV_CMD_DEV_CHANGE_NTF, + + __NETDEV_CMD_MAX, + NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) +}; + +#define NETDEV_MCGRP_MGMT "mgmt" + +#endif /* _UAPI_LINUX_NETDEV_H */ -- cgit v1.2.3 From 66c0e13ad236c74ea88c7c1518f3cef7f372e3da Mon Sep 17 00:00:00 2001 From: Marek Majtyka Date: Wed, 1 Feb 2023 11:24:18 +0100 Subject: drivers: net: turn on XDP features A summary of the flags being set for various drivers is given below. Note that XDP_F_REDIRECT_TARGET and XDP_F_FRAG_TARGET are features that can be turned off and on at runtime. This means that these flags may be set and unset under RTNL lock protection by the driver. Hence, READ_ONCE must be used by code loading the flag value. Also, these flags are not used for synchronization against the availability of XDP resources on a device. It is merely a hint, and hence the read may race with the actual teardown of XDP resources on the device. This may change in the future, e.g. operations taking a reference on the XDP resources of the driver, and in turn inhibiting turning off this flag. However, for now, it can only be used as a hint to check whether device supports becoming a redirection target. Turn 'hw-offload' feature flag on for: - netronome (nfp) - netdevsim. Turn 'native' and 'zerocopy' features flags on for: - intel (i40e, ice, ixgbe, igc) - mellanox (mlx5). - stmmac - netronome (nfp) Turn 'native' features flags on for: - amazon (ena) - broadcom (bnxt) - freescale (dpaa, dpaa2, enetc) - funeth - intel (igb) - marvell (mvneta, mvpp2, octeontx2) - mellanox (mlx4) - mtk_eth_soc - qlogic (qede) - sfc - socionext (netsec) - ti (cpsw) - tap - tsnep - veth - xen - virtio_net. Turn 'basic' (tx, pass, aborted and drop) features flags on for: - netronome (nfp) - cavium (thunder) - hyperv. Turn 'redirect_target' feature flag on for: - amanzon (ena) - broadcom (bnxt) - freescale (dpaa, dpaa2) - intel (i40e, ice, igb, ixgbe) - ti (cpsw) - marvell (mvneta, mvpp2) - sfc - socionext (netsec) - qlogic (qede) - mellanox (mlx5) - tap - veth - virtio_net - xen Reviewed-by: Gerhard Engleder Reviewed-by: Simon Horman Acked-by: Stanislav Fomichev Acked-by: Jakub Kicinski Co-developed-by: Kumar Kartikeya Dwivedi Signed-off-by: Kumar Kartikeya Dwivedi Co-developed-by: Lorenzo Bianconi Signed-off-by: Lorenzo Bianconi Signed-off-by: Marek Majtyka Link: https://lore.kernel.org/r/3eca9fafb308462f7edb1f58e451d59209aa07eb.1675245258.git.lorenzo@kernel.org Signed-off-by: Alexei Starovoitov --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 4 ++++ drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 5 +++++ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +++ drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 2 ++ drivers/net/ethernet/cavium/thunder/nicvf_main.c | 2 ++ drivers/net/ethernet/engleder/tsnep_main.c | 4 ++++ drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 4 ++++ drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 4 ++++ drivers/net/ethernet/freescale/enetc/enetc_pf.c | 3 +++ drivers/net/ethernet/fungible/funeth/funeth_main.c | 6 ++++++ drivers/net/ethernet/intel/i40e/i40e_main.c | 10 ++++++++-- drivers/net/ethernet/intel/ice/ice_main.c | 5 +++++ drivers/net/ethernet/intel/igb/igb_main.c | 9 ++++++++- drivers/net/ethernet/intel/igc/igc_main.c | 3 +++ drivers/net/ethernet/intel/igc/igc_xdp.c | 5 +++++ drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 6 ++++++ drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 1 + drivers/net/ethernet/marvell/mvneta.c | 3 +++ drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 4 ++++ drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 8 ++++++-- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 6 ++++++ drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 11 +++++++++++ drivers/net/ethernet/microsoft/mana/mana_en.c | 2 ++ drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 5 +++++ drivers/net/ethernet/qlogic/qede/qede_main.c | 3 +++ drivers/net/ethernet/sfc/efx.c | 4 ++++ drivers/net/ethernet/sfc/siena/efx.c | 4 ++++ drivers/net/ethernet/socionext/netsec.c | 3 +++ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 ++ drivers/net/ethernet/ti/cpsw.c | 4 ++++ drivers/net/ethernet/ti/cpsw_new.c | 4 ++++ drivers/net/hyperv/netvsc_drv.c | 2 ++ drivers/net/netdevsim/netdev.c | 1 + drivers/net/tun.c | 5 +++++ drivers/net/veth.c | 4 ++++ drivers/net/virtio_net.c | 4 ++++ drivers/net/xen-netfront.c | 2 ++ include/net/xdp.h | 12 ++++++++++++ net/core/xdp.c | 18 ++++++++++++++++++ 40 files changed, 184 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index e8ad5ea31aff..d3999db7c6a2 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -597,7 +597,9 @@ static int ena_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf) if (rc) return rc; } + xdp_features_set_redirect_target(netdev, false); } else if (old_bpf_prog) { + xdp_features_clear_redirect_target(netdev); rc = ena_destroy_and_free_all_xdp_queues(adapter); if (rc) return rc; @@ -4103,6 +4105,8 @@ static void ena_set_conf_feat_params(struct ena_adapter *adapter, /* Set offload features */ ena_set_dev_offloads(feat, netdev); + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT; + adapter->max_mtu = feat->dev_attr.max_mtu; netdev->max_mtu = adapter->max_mtu; netdev->min_mtu = ENA_MIN_MTU; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 06508eebb585..d6d6d5d37ff3 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -384,6 +384,11 @@ void aq_nic_ndev_init(struct aq_nic_s *self) self->ndev->mtu = aq_nic_cfg->mtu - ETH_HLEN; self->ndev->max_mtu = aq_hw_caps->mtu - ETH_FCS_LEN - ETH_HLEN; + self->ndev->xdp_features = NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT | + NETDEV_XDP_ACT_RX_SG | + NETDEV_XDP_ACT_NDO_XMIT_SG; } void aq_nic_set_tx_ring(struct aq_nic_s *self, unsigned int idx, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 240a7e8a7652..a1b4356dfb6c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -13686,6 +13686,9 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) netif_set_tso_max_size(dev, GSO_MAX_SIZE); + dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_RX_SG; + #ifdef CONFIG_BNXT_SRIOV init_waitqueue_head(&bp->sriov_cfg_wait); #endif diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 36d5202c0aee..5843c93b1711 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -422,9 +422,11 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog) if (prog) { bnxt_set_rx_skb_mode(bp, true); + xdp_features_set_redirect_target(dev, true); } else { int rx, tx; + xdp_features_clear_redirect_target(dev); bnxt_set_rx_skb_mode(bp, false); bnxt_get_max_rings(bp, &rx, &tx, true); if (rx > 1) { diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index f2f95493ec89..8b25313c7f6b 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -2218,6 +2218,8 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->netdev_ops = &nicvf_netdev_ops; netdev->watchdog_timeo = NICVF_TX_TIMEOUT; + netdev->xdp_features = NETDEV_XDP_ACT_BASIC; + /* MTU range: 64 - 9200 */ netdev->min_mtu = NIC_HW_MIN_FRS; netdev->max_mtu = NIC_HW_MAX_FRS; diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c index c3cf427a9409..6982aaa928b5 100644 --- a/drivers/net/ethernet/engleder/tsnep_main.c +++ b/drivers/net/ethernet/engleder/tsnep_main.c @@ -1926,6 +1926,10 @@ static int tsnep_probe(struct platform_device *pdev) netdev->features = NETIF_F_SG; netdev->hw_features = netdev->features | NETIF_F_LOOPBACK; + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT | + NETDEV_XDP_ACT_NDO_XMIT_SG; + /* carrier off reporting is important to ethtool even BEFORE open */ netif_carrier_off(netdev); diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 027fff9f7db0..9318a2554056 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -244,6 +244,10 @@ static int dpaa_netdev_init(struct net_device *net_dev, net_dev->features |= net_dev->hw_features; net_dev->vlan_features = net_dev->features; + net_dev->xdp_features = NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; + if (is_valid_ether_addr(mac_addr)) { memcpy(net_dev->perm_addr, mac_addr, net_dev->addr_len); eth_hw_addr_set(net_dev, mac_addr); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 2e79d18fc3c7..746ccfde7255 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -4596,6 +4596,10 @@ static int dpaa2_eth_netdev_init(struct net_device *net_dev) NETIF_F_LLTX | NETIF_F_HW_TC | NETIF_F_TSO; net_dev->gso_max_segs = DPAA2_ETH_ENQUEUE_MAX_FDS; net_dev->hw_features = net_dev->features; + net_dev->xdp_features = NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_XSK_ZEROCOPY | + NETDEV_XDP_ACT_NDO_XMIT; if (priv->dpni_attrs.vlan_filter_entries) net_dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index 7facc7d5261e..6b54071d4ecc 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -807,6 +807,9 @@ static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev, ndev->hw_features |= NETIF_F_RXHASH; ndev->priv_flags |= IFF_UNICAST_FLT; + ndev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_RX_SG | + NETDEV_XDP_ACT_NDO_XMIT_SG; if (si->hw_features & ENETC_SI_F_PSFP && !enetc_psfp_enable(priv)) { priv->active_offloads |= ENETC_F_QCI; diff --git a/drivers/net/ethernet/fungible/funeth/funeth_main.c b/drivers/net/ethernet/fungible/funeth/funeth_main.c index b4cce30e526a..df86770731ad 100644 --- a/drivers/net/ethernet/fungible/funeth/funeth_main.c +++ b/drivers/net/ethernet/fungible/funeth/funeth_main.c @@ -1160,6 +1160,11 @@ static int fun_xdp_setup(struct net_device *dev, struct netdev_bpf *xdp) WRITE_ONCE(rxqs[i]->xdp_prog, prog); } + if (prog) + xdp_features_set_redirect_target(dev, true); + else + xdp_features_clear_redirect_target(dev); + dev->max_mtu = prog ? XDP_MAX_MTU : FUN_MAX_MTU; old_prog = xchg(&fp->xdp_prog, prog); if (old_prog) @@ -1765,6 +1770,7 @@ static int fun_create_netdev(struct fun_ethdev *ed, unsigned int portid) netdev->vlan_features = netdev->features & VLAN_FEAT; netdev->mpls_features = netdev->vlan_features; netdev->hw_enc_features = netdev->hw_features; + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT; netdev->min_mtu = ETH_MIN_MTU; netdev->max_mtu = FUN_MAX_MTU; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 53d0083e35da..8a79cc18c428 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -13339,9 +13339,11 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog, old_prog = xchg(&vsi->xdp_prog, prog); if (need_reset) { - if (!prog) + if (!prog) { + xdp_features_clear_redirect_target(vsi->netdev); /* Wait until ndo_xsk_wakeup completes. */ synchronize_rcu(); + } i40e_reset_and_rebuild(pf, true, true); } @@ -13362,11 +13364,13 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog, /* Kick start the NAPI context if there is an AF_XDP socket open * on that queue id. This so that receiving will start. */ - if (need_reset && prog) + if (need_reset && prog) { for (i = 0; i < vsi->num_queue_pairs; i++) if (vsi->xdp_rings[i]->xsk_pool) (void)i40e_xsk_wakeup(vsi->netdev, i, XDP_WAKEUP_RX); + xdp_features_set_redirect_target(vsi->netdev, true); + } return 0; } @@ -13783,6 +13787,8 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) netdev->hw_enc_features |= NETIF_F_TSO_MANGLEID; netdev->features &= ~NETIF_F_HW_TC; + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_XSK_ZEROCOPY; if (vsi->type == I40E_VSI_MAIN) { SET_NETDEV_DEV(netdev, &pf->pdev->dev); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 26a8910a41ff..074b0e6d0e2d 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -22,6 +22,7 @@ #include "ice_eswitch.h" #include "ice_tc_lib.h" #include "ice_vsi_vlan_ops.h" +#include #define DRV_SUMMARY "Intel(R) Ethernet Connection E800 Series Linux Driver" static const char ice_driver_string[] = DRV_SUMMARY; @@ -2912,11 +2913,13 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, if (xdp_ring_err) NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed"); } + xdp_features_set_redirect_target(vsi->netdev, false); /* reallocate Rx queues that are used for zero-copy */ xdp_ring_err = ice_realloc_zc_buf(vsi, true); if (xdp_ring_err) NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Rx resources failed"); } else if (ice_is_xdp_ena_vsi(vsi) && !prog) { + xdp_features_clear_redirect_target(vsi->netdev); xdp_ring_err = ice_destroy_xdp_rings(vsi); if (xdp_ring_err) NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Tx resources failed"); @@ -3459,6 +3462,8 @@ static int ice_cfg_netdev(struct ice_vsi *vsi) np->vsi = vsi; ice_set_netdev_features(netdev); + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_XSK_ZEROCOPY; ice_set_ops(netdev); diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 3c0c35ecea10..0e11a082f7a1 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2871,8 +2871,14 @@ static int igb_xdp_setup(struct net_device *dev, struct netdev_bpf *bpf) bpf_prog_put(old_prog); /* bpf is just replaced, RXQ and MTU are already setup */ - if (!need_reset) + if (!need_reset) { return 0; + } else { + if (prog) + xdp_features_set_redirect_target(dev, true); + else + xdp_features_clear_redirect_target(dev); + } if (running) igb_open(dev); @@ -3317,6 +3323,7 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->priv_flags |= IFF_SUPP_NOFCS; netdev->priv_flags |= IFF_UNICAST_FLT; + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT; /* MTU range: 68 - 9216 */ netdev->min_mtu = ETH_MIN_MTU; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index e86b15efaeb8..8b572cd2c350 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6533,6 +6533,9 @@ static int igc_probe(struct pci_dev *pdev, netdev->mpls_features |= NETIF_F_HW_CSUM; netdev->hw_enc_features |= netdev->vlan_features; + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_XSK_ZEROCOPY; + /* MTU range: 68 - 9216 */ netdev->min_mtu = ETH_MIN_MTU; netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE; diff --git a/drivers/net/ethernet/intel/igc/igc_xdp.c b/drivers/net/ethernet/intel/igc/igc_xdp.c index aeeb34e64610..e27af72aada8 100644 --- a/drivers/net/ethernet/intel/igc/igc_xdp.c +++ b/drivers/net/ethernet/intel/igc/igc_xdp.c @@ -29,6 +29,11 @@ int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog, if (old_prog) bpf_prog_put(old_prog); + if (prog) + xdp_features_set_redirect_target(dev, true); + else + xdp_features_clear_redirect_target(dev); + if (if_running) igc_open(dev); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 43a44c1e1576..af4c12b6059f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -10301,6 +10301,8 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog) if (err) return -EINVAL; + if (!prog) + xdp_features_clear_redirect_target(dev); } else { for (i = 0; i < adapter->num_rx_queues; i++) { WRITE_ONCE(adapter->rx_ring[i]->xdp_prog, @@ -10321,6 +10323,7 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog) if (adapter->xdp_ring[i]->xsk_pool) (void)ixgbe_xsk_wakeup(adapter->netdev, i, XDP_WAKEUP_RX); + xdp_features_set_redirect_target(dev, true); } return 0; @@ -11018,6 +11021,9 @@ skip_sriov: netdev->priv_flags |= IFF_UNICAST_FLT; netdev->priv_flags |= IFF_SUPP_NOFCS; + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_XSK_ZEROCOPY; + /* MTU range: 68 - 9710 */ netdev->min_mtu = ETH_MIN_MTU; netdev->max_mtu = IXGBE_MAX_JUMBO_FRAME_SIZE - (ETH_HLEN + ETH_FCS_LEN); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index ea0a230c1153..a44e4bd56142 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -4634,6 +4634,7 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) NETIF_F_HW_VLAN_CTAG_TX; netdev->priv_flags |= IFF_UNICAST_FLT; + netdev->xdp_features = NETDEV_XDP_ACT_BASIC; /* MTU range: 68 - 1504 or 9710 */ netdev->min_mtu = ETH_MIN_MTU; diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index f8925cac61e4..dc2989103a77 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -5612,6 +5612,9 @@ static int mvneta_probe(struct platform_device *pdev) NETIF_F_TSO | NETIF_F_RXCSUM; dev->hw_features |= dev->features; dev->vlan_features |= dev->features; + dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_RX_SG | + NETDEV_XDP_ACT_NDO_XMIT_SG; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; netif_set_tso_max_segs(dev, MVNETA_MAX_TSO_SEGS); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 4da45c5abba5..9b4ecbe4f36d 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -6866,6 +6866,10 @@ static int mvpp2_port_probe(struct platform_device *pdev, dev->vlan_features |= features; netif_set_tso_max_segs(dev, MVPP2_MAX_TSO_SEGS); + + dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; + dev->priv_flags |= IFF_UNICAST_FLT; /* MTU range: 68 - 9704 */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index c1ea60bc2630..179433d0a54a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -2512,10 +2512,13 @@ static int otx2_xdp_setup(struct otx2_nic *pf, struct bpf_prog *prog) /* Network stack and XDP shared same rx queues. * Use separate tx queues for XDP and network stack. */ - if (pf->xdp_prog) + if (pf->xdp_prog) { pf->hw.xdp_queues = pf->hw.rx_queues; - else + xdp_features_set_redirect_target(dev, false); + } else { pf->hw.xdp_queues = 0; + xdp_features_clear_redirect_target(dev); + } pf->hw.tot_tx_queues += pf->hw.xdp_queues; @@ -2878,6 +2881,7 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) netdev->watchdog_timeo = OTX2_TX_TIMEOUT; netdev->netdev_ops = &otx2_netdev_ops; + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT; netdev->min_mtu = OTX2_MIN_MTU; netdev->max_mtu = otx2_get_max_mtu(pf); diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 801deac58bf7..ac54b6f2bb5c 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -4447,6 +4447,12 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) register_netdevice_notifier(&mac->device_notifier); } + if (mtk_page_pool_enabled(eth)) + eth->netdev[id]->xdp_features = NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT | + NETDEV_XDP_ACT_NDO_XMIT_SG; + return 0; free_netdev: diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index af4c4858f397..e11bc0ac880e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -3416,6 +3416,8 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, priv->rss_hash_fn = ETH_RSS_HASH_TOP; } + dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT; + /* MTU range: 68 - hw-specific max */ dev->min_mtu = ETH_MIN_MTU; dev->max_mtu = priv->max_mtu; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 0e87432ec6f1..e4996ef04d86 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4780,6 +4780,13 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) if (old_prog) bpf_prog_put(old_prog); + if (reset) { + if (prog) + xdp_features_set_redirect_target(netdev, true); + else + xdp_features_clear_redirect_target(netdev); + } + if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset) goto unlock; @@ -5175,6 +5182,10 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->features |= NETIF_F_HIGHDMA; netdev->features |= NETIF_F_HW_VLAN_STAG_FILTER; + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_XSK_ZEROCOPY | + NETDEV_XDP_ACT_RX_SG; + netdev->priv_flags |= IFF_UNICAST_FLT; netif_set_tso_max_size(netdev, GSO_MAX_SIZE); diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 2f6a048dee90..6120f2b6684f 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -2160,6 +2160,8 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, ndev->hw_features |= NETIF_F_RXHASH; ndev->features = ndev->hw_features; ndev->vlan_features = 0; + ndev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; err = register_netdev(ndev); if (err) { diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 18fc9971f1c8..e4825d885560 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2529,10 +2529,15 @@ static void nfp_net_netdev_init(struct nfp_net *nn) netdev->features &= ~NETIF_F_HW_VLAN_STAG_RX; nn->dp.ctrl &= ~NFP_NET_CFG_CTRL_RXQINQ; + netdev->xdp_features = NETDEV_XDP_ACT_BASIC; + if (nn->app && nn->app->type->id == NFP_APP_BPF_NIC) + netdev->xdp_features |= NETDEV_XDP_ACT_HW_OFFLOAD; + /* Finalise the netdev setup */ switch (nn->dp.ops->version) { case NFP_NFD_VER_NFD3: netdev->netdev_ops = &nfp_nfd3_netdev_ops; + netdev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; break; case NFP_NFD_VER_NFDK: netdev->netdev_ops = &nfp_nfdk_netdev_ops; diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 953f304b8588..b6d999927e86 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -892,6 +892,9 @@ static void qede_init_ndev(struct qede_dev *edev) ndev->hw_features = hw_features; + ndev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; + /* MTU range: 46 - 9600 */ ndev->min_mtu = ETH_ZLEN - ETH_HLEN; ndev->max_mtu = QEDE_MAX_JUMBO_PACKET_SIZE; diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 0556542d7a6b..18ff8d8cff42 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -1078,6 +1078,10 @@ static int efx_pci_probe(struct pci_dev *pci_dev, pci_info(pci_dev, "Solarflare NIC detected\n"); + efx->net_dev->xdp_features = NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; + if (!efx->type->is_vf) efx_probe_vpd_strings(efx); diff --git a/drivers/net/ethernet/sfc/siena/efx.c b/drivers/net/ethernet/sfc/siena/efx.c index 60e5b7c8ccf9..a6ef21845224 100644 --- a/drivers/net/ethernet/sfc/siena/efx.c +++ b/drivers/net/ethernet/sfc/siena/efx.c @@ -1048,6 +1048,10 @@ static int efx_pci_probe(struct pci_dev *pci_dev, pci_info(pci_dev, "Solarflare NIC detected\n"); + efx->net_dev->xdp_features = NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; + if (!efx->type->is_vf) efx_probe_vpd_strings(efx); diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index 9b46579b5a10..2d7347b71c41 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -2104,6 +2104,9 @@ static int netsec_probe(struct platform_device *pdev) NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; ndev->hw_features = ndev->features; + ndev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; + priv->rx_cksum_offload_flag = true; ret = netsec_register_mdio(priv, phy_addr); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index b7e5af58ab75..734d84263fd2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -7150,6 +7150,8 @@ int stmmac_dvr_probe(struct device *device, ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM; + ndev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; ret = stmmac_tc_init(priv, priv); if (!ret) { diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 13c9c2d6b79b..37f0b62ec5d6 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1458,6 +1458,8 @@ static int cpsw_probe_dual_emac(struct cpsw_priv *priv) priv_sl2->emac_port = 1; cpsw->slaves[1].ndev = ndev; ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_CTAG_RX; + ndev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; ndev->netdev_ops = &cpsw_netdev_ops; ndev->ethtool_ops = &cpsw_ethtool_ops; @@ -1635,6 +1637,8 @@ static int cpsw_probe(struct platform_device *pdev) cpsw->slaves[0].ndev = ndev; ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_CTAG_RX; + ndev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; ndev->netdev_ops = &cpsw_netdev_ops; ndev->ethtool_ops = &cpsw_ethtool_ops; diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index 83596ec0c7cb..35128dd45ffc 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -1405,6 +1405,10 @@ static int cpsw_create_ports(struct cpsw_common *cpsw) ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_NETNS_LOCAL | NETIF_F_HW_TC; + ndev->xdp_features = NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; + ndev->netdev_ops = &cpsw_netdev_ops; ndev->ethtool_ops = &cpsw_ethtool_ops; SET_NETDEV_DEV(ndev, dev); diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index f9b219e6cd58..a9b139bbdb2c 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2559,6 +2559,8 @@ static int netvsc_probe(struct hv_device *dev, netdev_lockdep_set_classes(net); + net->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT; + /* MTU range: 68 - 1500 or 65521 */ net->min_mtu = NETVSC_MTU_MIN; if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 6db6a75ff9b9..35fa1ca98671 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -286,6 +286,7 @@ static void nsim_setup(struct net_device *dev) NETIF_F_TSO; dev->hw_features |= NETIF_F_HW_TC; dev->max_mtu = ETH_MAX_MTU; + dev->xdp_features = NETDEV_XDP_ACT_HW_OFFLOAD; } static int nsim_init_netdevsim(struct netdevsim *ns) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index a7d17c680f4a..36620afde373 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1401,6 +1401,11 @@ static void tun_net_initialize(struct net_device *dev) eth_hw_addr_random(dev); + /* Currently tun does not support XDP, only tap does. */ + dev->xdp_features = NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; + break; } diff --git a/drivers/net/veth.c b/drivers/net/veth.c index ba3e05832843..1bb54de7124d 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1686,6 +1686,10 @@ static void veth_setup(struct net_device *dev) dev->hw_enc_features = VETH_FEATURES; dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE; netif_set_tso_max_size(dev, GSO_MAX_SIZE); + + dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_RX_SG | + NETDEV_XDP_ACT_NDO_XMIT_SG; } /* diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 7e1a98430190..692dff071782 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3280,7 +3280,10 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, if (i == 0 && !old_prog) virtnet_clear_guest_offloads(vi); } + if (!old_prog) + xdp_features_set_redirect_target(dev, false); } else { + xdp_features_clear_redirect_target(dev); vi->xdp_enabled = false; } @@ -3910,6 +3913,7 @@ static int virtnet_probe(struct virtio_device *vdev) dev->hw_features |= NETIF_F_GRO_HW; dev->vlan_features = dev->features; + dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT; /* MTU range: 68 - 65535 */ dev->min_mtu = MIN_MTU; diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 12b074286df9..47d54d8ea59d 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1741,6 +1741,8 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev) * negotiate with the backend regarding supported features. */ netdev->features |= netdev->hw_features; + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; netdev->ethtool_ops = &xennet_ethtool_ops; netdev->min_mtu = ETH_MIN_MTU; diff --git a/include/net/xdp.h b/include/net/xdp.h index 8d1c86914f4c..d517bfac937b 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -428,9 +428,21 @@ MAX_XDP_METADATA_KFUNC, #ifdef CONFIG_NET u32 bpf_xdp_metadata_kfunc_id(int id); bool bpf_dev_bound_kfunc_id(u32 btf_id); +void xdp_features_set_redirect_target(struct net_device *dev, bool support_sg); +void xdp_features_clear_redirect_target(struct net_device *dev); #else static inline u32 bpf_xdp_metadata_kfunc_id(int id) { return 0; } static inline bool bpf_dev_bound_kfunc_id(u32 btf_id) { return false; } + +static inline void +xdp_features_set_redirect_target(struct net_device *dev, bool support_sg) +{ +} + +static inline void +xdp_features_clear_redirect_target(struct net_device *dev) +{ +} #endif #endif /* __LINUX_NET_XDP_H__ */ diff --git a/net/core/xdp.c b/net/core/xdp.c index 787fb9f92b36..26483935b7a4 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -774,3 +774,21 @@ static int __init xdp_metadata_init(void) return register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &xdp_metadata_kfunc_set); } late_initcall(xdp_metadata_init); + +void xdp_features_set_redirect_target(struct net_device *dev, bool support_sg) +{ + dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT; + if (support_sg) + dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT_SG; + + call_netdevice_notifiers(NETDEV_XDP_FEAT_CHANGE, dev); +} +EXPORT_SYMBOL_GPL(xdp_features_set_redirect_target); + +void xdp_features_clear_redirect_target(struct net_device *dev) +{ + dev->xdp_features &= ~(NETDEV_XDP_ACT_NDO_XMIT | + NETDEV_XDP_ACT_NDO_XMIT_SG); + call_netdevice_notifiers(NETDEV_XDP_FEAT_CHANGE, dev); +} +EXPORT_SYMBOL_GPL(xdp_features_clear_redirect_target); -- cgit v1.2.3 From 0ae0cb2bb22eb8cf943fa07137068347e1b918c4 Mon Sep 17 00:00:00 2001 From: Marek Majtyka Date: Wed, 1 Feb 2023 11:24:19 +0100 Subject: xsk: add usage of XDP features flags Change necessary condition check for XSK from ndo functions to xdp features flags. Signed-off-by: Marek Majtyka Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/45a98ec67b4556a6a22dfd85df3eb8276beeeb74.1675245258.git.lorenzo@kernel.org Signed-off-by: Alexei Starovoitov --- net/xdp/xsk_buff_pool.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index ed6c71826d31..b2df1e0f8153 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -140,6 +140,10 @@ static void xp_disable_drv_zc(struct xsk_buff_pool *pool) } } +#define NETDEV_XDP_ACT_ZC (NETDEV_XDP_ACT_BASIC | \ + NETDEV_XDP_ACT_REDIRECT | \ + NETDEV_XDP_ACT_XSK_ZEROCOPY) + int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *netdev, u16 queue_id, u16 flags) { @@ -178,8 +182,7 @@ int xp_assign_dev(struct xsk_buff_pool *pool, /* For copy-mode, we are done. */ return 0; - if (!netdev->netdev_ops->ndo_bpf || - !netdev->netdev_ops->ndo_xsk_wakeup) { + if ((netdev->xdp_features & NETDEV_XDP_ACT_ZC) != NETDEV_XDP_ACT_ZC) { err = -EOPNOTSUPP; goto err_unreg_pool; } -- cgit v1.2.3 From b9d460c9245541b13de2369e79688f8e0acc0c3d Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 1 Feb 2023 11:24:22 +0100 Subject: bpf: devmap: check XDP features in __xdp_enqueue routine Check if the destination device implements ndo_xdp_xmit callback relying on NETDEV_XDP_ACT_NDO_XMIT flags. Moreover, check if the destination device supports XDP non-linear frame in __xdp_enqueue and is_valid_dst routines. This patch allows to perform XDP_REDIRECT on non-linear XDP buffers. Acked-by: Jesper Dangaard Brouer Co-developed-by: Kumar Kartikeya Dwivedi Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/26a94c33520c0bfba021b3fbb2cb8c1e69bf53b8.1675245258.git.lorenzo@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/devmap.c | 16 +++++++++++++--- net/core/filter.c | 13 +++++-------- 2 files changed, 18 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index d01e4c55b376..2675fefc6cb6 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -474,7 +474,11 @@ static inline int __xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf, { int err; - if (!dev->netdev_ops->ndo_xdp_xmit) + if (!(dev->xdp_features & NETDEV_XDP_ACT_NDO_XMIT)) + return -EOPNOTSUPP; + + if (unlikely(!(dev->xdp_features & NETDEV_XDP_ACT_NDO_XMIT_SG) && + xdp_frame_has_frags(xdpf))) return -EOPNOTSUPP; err = xdp_ok_fwd_dev(dev, xdp_get_frame_len(xdpf)); @@ -532,8 +536,14 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf, static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf) { - if (!obj || - !obj->dev->netdev_ops->ndo_xdp_xmit) + if (!obj) + return false; + + if (!(obj->dev->xdp_features & NETDEV_XDP_ACT_NDO_XMIT)) + return false; + + if (unlikely(!(obj->dev->xdp_features & NETDEV_XDP_ACT_NDO_XMIT_SG) && + xdp_frame_has_frags(xdpf))) return false; if (xdp_ok_fwd_dev(obj->dev, xdp_get_frame_len(xdpf))) diff --git a/net/core/filter.c b/net/core/filter.c index 0039cf16713e..2ce06a72a5ba 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4318,16 +4318,13 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); enum bpf_map_type map_type = ri->map_type; - /* XDP_REDIRECT is not fully supported yet for xdp frags since - * not all XDP capable drivers can map non-linear xdp_frame in - * ndo_xdp_xmit. - */ - if (unlikely(xdp_buff_has_frags(xdp) && - map_type != BPF_MAP_TYPE_CPUMAP)) - return -EOPNOTSUPP; + if (map_type == BPF_MAP_TYPE_XSKMAP) { + /* XDP_REDIRECT is not supported AF_XDP yet. */ + if (unlikely(xdp_buff_has_frags(xdp))) + return -EOPNOTSUPP; - if (map_type == BPF_MAP_TYPE_XSKMAP) return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog); + } return __xdp_do_redirect_frame(ri, dev, xdp_convert_buff_to_frame(xdp), xdp_prog); -- cgit v1.2.3