From b4b7a4099b8ccea224577003fcf9d321bf0817b7 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 5 Jan 2024 18:48:18 +0800 Subject: selftests/bpf: Factor out get_xlated_program() helper Both test_verifier and test_progs use get_xlated_program(), so moving the helper into testing_helpers.h to reuse it. Acked-by: Eduard Zingerman Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20240105104819.3916743-3-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/prog_tests/ctx_rewrite.c | 44 -------------------- tools/testing/selftests/bpf/test_verifier.c | 47 +--------------------- tools/testing/selftests/bpf/testing_helpers.c | 42 +++++++++++++++++++ tools/testing/selftests/bpf/testing_helpers.h | 6 +++ 4 files changed, 50 insertions(+), 89 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c index 4951aa978f33..3b7c57fe55a5 100644 --- a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c +++ b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c @@ -626,50 +626,6 @@ err: return false; } -/* Request BPF program instructions after all rewrites are applied, - * e.g. verifier.c:convert_ctx_access() is done. - */ -static int get_xlated_program(int fd_prog, struct bpf_insn **buf, __u32 *cnt) -{ - struct bpf_prog_info info = {}; - __u32 info_len = sizeof(info); - __u32 xlated_prog_len; - __u32 buf_element_size = sizeof(struct bpf_insn); - - if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) { - perror("bpf_prog_get_info_by_fd failed"); - return -1; - } - - xlated_prog_len = info.xlated_prog_len; - if (xlated_prog_len % buf_element_size) { - printf("Program length %d is not multiple of %d\n", - xlated_prog_len, buf_element_size); - return -1; - } - - *cnt = xlated_prog_len / buf_element_size; - *buf = calloc(*cnt, buf_element_size); - if (!buf) { - perror("can't allocate xlated program buffer"); - return -ENOMEM; - } - - bzero(&info, sizeof(info)); - info.xlated_prog_len = xlated_prog_len; - info.xlated_prog_insns = (__u64)(unsigned long)*buf; - if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) { - perror("second bpf_prog_get_info_by_fd failed"); - goto out_free_buf; - } - - return 0; - -out_free_buf: - free(*buf); - return -1; -} - static void print_insn(void *private_data, const char *fmt, ...) { va_list args; diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index f36e41435be7..87519d7fe4c6 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1341,48 +1341,6 @@ static bool cmp_str_seq(const char *log, const char *exp) return true; } -static struct bpf_insn *get_xlated_program(int fd_prog, int *cnt) -{ - __u32 buf_element_size = sizeof(struct bpf_insn); - struct bpf_prog_info info = {}; - __u32 info_len = sizeof(info); - __u32 xlated_prog_len; - struct bpf_insn *buf; - - if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) { - perror("bpf_prog_get_info_by_fd failed"); - return NULL; - } - - xlated_prog_len = info.xlated_prog_len; - if (xlated_prog_len % buf_element_size) { - printf("Program length %d is not multiple of %d\n", - xlated_prog_len, buf_element_size); - return NULL; - } - - *cnt = xlated_prog_len / buf_element_size; - buf = calloc(*cnt, buf_element_size); - if (!buf) { - perror("can't allocate xlated program buffer"); - return NULL; - } - - bzero(&info, sizeof(info)); - info.xlated_prog_len = xlated_prog_len; - info.xlated_prog_insns = (__u64)(unsigned long)buf; - if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) { - perror("second bpf_prog_get_info_by_fd failed"); - goto out_free_buf; - } - - return buf; - -out_free_buf: - free(buf); - return NULL; -} - static bool is_null_insn(struct bpf_insn *insn) { struct bpf_insn null_insn = {}; @@ -1505,7 +1463,7 @@ static void print_insn(struct bpf_insn *buf, int cnt) static bool check_xlated_program(struct bpf_test *test, int fd_prog) { struct bpf_insn *buf; - int cnt; + unsigned int cnt; bool result = true; bool check_expected = !is_null_insn(test->expected_insns); bool check_unexpected = !is_null_insn(test->unexpected_insns); @@ -1513,8 +1471,7 @@ static bool check_xlated_program(struct bpf_test *test, int fd_prog) if (!check_expected && !check_unexpected) goto out; - buf = get_xlated_program(fd_prog, &cnt); - if (!buf) { + if (get_xlated_program(fd_prog, &buf, &cnt)) { printf("FAIL: can't get xlated program\n"); result = false; goto out; diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c index d2458c1b1671..53c40f62fdcb 100644 --- a/tools/testing/selftests/bpf/testing_helpers.c +++ b/tools/testing/selftests/bpf/testing_helpers.c @@ -387,3 +387,45 @@ int kern_sync_rcu(void) { return syscall(__NR_membarrier, MEMBARRIER_CMD_SHARED, 0, 0); } + +int get_xlated_program(int fd_prog, struct bpf_insn **buf, __u32 *cnt) +{ + __u32 buf_element_size = sizeof(struct bpf_insn); + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + __u32 xlated_prog_len; + + if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) { + perror("bpf_prog_get_info_by_fd failed"); + return -1; + } + + xlated_prog_len = info.xlated_prog_len; + if (xlated_prog_len % buf_element_size) { + printf("Program length %u is not multiple of %u\n", + xlated_prog_len, buf_element_size); + return -1; + } + + *cnt = xlated_prog_len / buf_element_size; + *buf = calloc(*cnt, buf_element_size); + if (!buf) { + perror("can't allocate xlated program buffer"); + return -ENOMEM; + } + + bzero(&info, sizeof(info)); + info.xlated_prog_len = xlated_prog_len; + info.xlated_prog_insns = (__u64)(unsigned long)*buf; + if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) { + perror("second bpf_prog_get_info_by_fd failed"); + goto out_free_buf; + } + + return 0; + +out_free_buf: + free(*buf); + *buf = NULL; + return -1; +} diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h index 35284faff4f2..1ed5b9200d66 100644 --- a/tools/testing/selftests/bpf/testing_helpers.h +++ b/tools/testing/selftests/bpf/testing_helpers.h @@ -46,4 +46,10 @@ static inline __u64 get_time_ns(void) return (u64)t.tv_sec * 1000000000 + t.tv_nsec; } +struct bpf_insn; +/* Request BPF program instructions after all rewrites are applied, + * e.g. verifier.c:convert_ctx_access() is done. + */ +int get_xlated_program(int fd_prog, struct bpf_insn **buf, __u32 *cnt); + #endif /* __TESTING_HELPERS_H */ -- cgit v1.2.3 From 17bda53e43bc41d881ca6a02b3c6f5376c55b3d3 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 5 Jan 2024 18:48:19 +0800 Subject: selftests/bpf: Test the inlining of bpf_kptr_xchg() The test uses bpf_prog_get_info_by_fd() to obtain the xlated instructions of the program first. Since these instructions have already been rewritten by the verifier, the tests then checks whether the rewritten instructions are as expected. And to ensure LLVM generates code exactly as expected, use inline assembly and a naked function. Suggested-by: Eduard Zingerman Signed-off-by: Hou Tao Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20240105104819.3916743-4-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/kptr_xchg_inline.c | 51 ++++++++++++++++++++++ .../testing/selftests/bpf/progs/kptr_xchg_inline.c | 48 ++++++++++++++++++++ 2 files changed, 99 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c create mode 100644 tools/testing/selftests/bpf/progs/kptr_xchg_inline.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c b/tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c new file mode 100644 index 000000000000..5a4bee1cf970 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#include + +#include "linux/filter.h" +#include "kptr_xchg_inline.skel.h" + +void test_kptr_xchg_inline(void) +{ + struct kptr_xchg_inline *skel; + struct bpf_insn *insn = NULL; + struct bpf_insn exp; + unsigned int cnt; + int err; + +#if !defined(__x86_64__) + test__skip(); + return; +#endif + + skel = kptr_xchg_inline__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_load")) + return; + + err = get_xlated_program(bpf_program__fd(skel->progs.kptr_xchg_inline), &insn, &cnt); + if (!ASSERT_OK(err, "prog insn")) + goto out; + + /* The original instructions are: + * r1 = map[id:xxx][0]+0 + * r2 = 0 + * call bpf_kptr_xchg#yyy + * + * call bpf_kptr_xchg#yyy will be inlined as: + * r0 = r2 + * r0 = atomic64_xchg((u64 *)(r1 +0), r0) + */ + if (!ASSERT_GT(cnt, 5, "insn cnt")) + goto out; + + exp = BPF_MOV64_REG(BPF_REG_0, BPF_REG_2); + if (!ASSERT_OK(memcmp(&insn[3], &exp, sizeof(exp)), "mov")) + goto out; + + exp = BPF_ATOMIC_OP(BPF_DW, BPF_XCHG, BPF_REG_1, BPF_REG_0, 0); + if (!ASSERT_OK(memcmp(&insn[4], &exp, sizeof(exp)), "xchg")) + goto out; +out: + free(insn); + kptr_xchg_inline__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/kptr_xchg_inline.c b/tools/testing/selftests/bpf/progs/kptr_xchg_inline.c new file mode 100644 index 000000000000..2414ac20b6d5 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/kptr_xchg_inline.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#include +#include + +#include "bpf_experimental.h" +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct bin_data { + char blob[32]; +}; + +#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8))) +private(kptr) struct bin_data __kptr * ptr; + +SEC("tc") +__naked int kptr_xchg_inline(void) +{ + asm volatile ( + "r1 = %[ptr] ll;" + "r2 = 0;" + "call %[bpf_kptr_xchg];" + "if r0 == 0 goto 1f;" + "r1 = r0;" + "r2 = 0;" + "call %[bpf_obj_drop_impl];" + "1:" + "r0 = 0;" + "exit;" + : + : __imm_addr(ptr), + __imm(bpf_kptr_xchg), + __imm(bpf_obj_drop_impl) + : __clobber_all + ); +} + +/* BTF FUNC records are not generated for kfuncs referenced + * from inline assembly. These records are necessary for + * libbpf to link the program. The function below is a hack + * to ensure that BTF FUNC records are generated. + */ +void __btf_root(void) +{ + bpf_obj_drop(NULL); +} -- cgit v1.2.3 From e31f98c1af810a13395ee9ab57402d82272445af Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 4 Jan 2024 16:09:02 -0800 Subject: selftests/bpf: fix test_loader check message Seeing: process_subtest:PASS:Can't alloc specs array 0 nsec ... in verbose successful test log is very confusing. Use smaller identifier-like test tag to denote that we are asserting specs array allocation success. Now it's much less distracting: process_subtest:PASS:specs_alloc 0 nsec Signed-off-by: Andrii Nakryiko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20240105000909.2818934-2-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_loader.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index f01391021218..72906d27babf 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -688,7 +688,7 @@ static void process_subtest(struct test_loader *tester, ++nr_progs; specs = calloc(nr_progs, sizeof(struct test_spec)); - if (!ASSERT_OK_PTR(specs, "Can't alloc specs array")) + if (!ASSERT_OK_PTR(specs, "specs_alloc")) return; i = 0; -- cgit v1.2.3 From 56d3e44af80c6b4c7cb89a73061ee35d4a7aee18 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 9 Jan 2024 15:17:38 -0800 Subject: selftests/bpf: detect testing prog flags support Various tests specify extra testing prog_flags when loading BPF programs, like BPF_F_TEST_RND_HI32, and more recently also BPF_F_TEST_REG_INVARIANTS. While BPF_F_TEST_RND_HI32 is old enough to not cause much problem on older kernels, BPF_F_TEST_REG_INVARIANTS is very fresh and unconditionally specifying it causes selftests to fail on even slightly outdated kernels. This breaks libbpf CI test against 4.9 and 5.15 kernels, it can break some local development (done outside of VM), etc. To prevent this, and guard against similar problems in the future, do runtime detection of supported "testing flags", and only provide those that host kernel recognizes. Acked-by: Song Liu Acked-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20240109231738.575844-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/bpf_verif_scale.c | 2 +- .../testing/selftests/bpf/prog_tests/reg_bounds.c | 2 +- tools/testing/selftests/bpf/test_loader.c | 2 +- tools/testing/selftests/bpf/test_sock_addr.c | 3 +- tools/testing/selftests/bpf/test_verifier.c | 2 +- tools/testing/selftests/bpf/testing_helpers.c | 32 ++++++++++++++++++++-- tools/testing/selftests/bpf/testing_helpers.h | 1 + 7 files changed, 37 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c index e770912fc1d2..4c6ada5b270b 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c @@ -35,7 +35,7 @@ static int check_load(const char *file, enum bpf_prog_type type) } bpf_program__set_type(prog, type); - bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS); + bpf_program__set_flags(prog, testing_prog_flags()); bpf_program__set_log_level(prog, 4 | extra_prog_load_log_flags); err = bpf_object__load(obj); diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c index 820d0bcfc474..eb74363f9f70 100644 --- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c +++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c @@ -840,7 +840,7 @@ static int load_range_cmp_prog(struct range x, struct range y, enum op op, .log_level = 2, .log_buf = log_buf, .log_size = log_sz, - .prog_flags = BPF_F_TEST_REG_INVARIANTS, + .prog_flags = testing_prog_flags(), ); /* ; skip exit block below diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index 72906d27babf..ba57601c2a4d 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -181,7 +181,7 @@ static int parse_test_spec(struct test_loader *tester, memset(spec, 0, sizeof(*spec)); spec->prog_name = bpf_program__name(prog); - spec->prog_flags = BPF_F_TEST_REG_INVARIANTS; /* by default be strict */ + spec->prog_flags = testing_prog_flags(); btf = bpf_object__btf(obj); if (!btf) { diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c index b0068a9d2cfe..80c42583f597 100644 --- a/tools/testing/selftests/bpf/test_sock_addr.c +++ b/tools/testing/selftests/bpf/test_sock_addr.c @@ -19,6 +19,7 @@ #include #include "cgroup_helpers.h" +#include "testing_helpers.h" #include "bpf_util.h" #ifndef ENOTSUPP @@ -679,7 +680,7 @@ static int load_path(const struct sock_addr_test *test, const char *path) bpf_program__set_type(prog, BPF_PROG_TYPE_CGROUP_SOCK_ADDR); bpf_program__set_expected_attach_type(prog, test->expected_attach_type); - bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS); + bpf_program__set_flags(prog, testing_prog_flags()); err = bpf_object__load(obj); if (err) { diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 87519d7fe4c6..1a09fc34d093 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1545,7 +1545,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, if (fixup_skips != skips) return; - pflags = BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS; + pflags = testing_prog_flags(); if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT) pflags |= BPF_F_STRICT_ALIGNMENT; if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c index 53c40f62fdcb..106ef05586b8 100644 --- a/tools/testing/selftests/bpf/testing_helpers.c +++ b/tools/testing/selftests/bpf/testing_helpers.c @@ -252,6 +252,34 @@ __u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info) int extra_prog_load_log_flags = 0; +int testing_prog_flags(void) +{ + static int cached_flags = -1; + static int prog_flags[] = { BPF_F_TEST_RND_HI32, BPF_F_TEST_REG_INVARIANTS }; + static struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int insn_cnt = ARRAY_SIZE(insns), i, fd, flags = 0; + LIBBPF_OPTS(bpf_prog_load_opts, opts); + + if (cached_flags >= 0) + return cached_flags; + + for (i = 0; i < ARRAY_SIZE(prog_flags); i++) { + opts.prog_flags = prog_flags[i]; + fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "flag-test", "GPL", + insns, insn_cnt, &opts); + if (fd >= 0) { + flags |= prog_flags[i]; + close(fd); + } + } + + cached_flags = flags; + return cached_flags; +} + int bpf_prog_test_load(const char *file, enum bpf_prog_type type, struct bpf_object **pobj, int *prog_fd) { @@ -276,7 +304,7 @@ int bpf_prog_test_load(const char *file, enum bpf_prog_type type, if (type != BPF_PROG_TYPE_UNSPEC && bpf_program__type(prog) != type) bpf_program__set_type(prog, type); - flags = bpf_program__flags(prog) | BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS; + flags = bpf_program__flags(prog) | testing_prog_flags(); bpf_program__set_flags(prog, flags); err = bpf_object__load(obj); @@ -299,7 +327,7 @@ int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, { LIBBPF_OPTS(bpf_prog_load_opts, opts, .kern_version = kern_version, - .prog_flags = BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS, + .prog_flags = testing_prog_flags(), .log_level = extra_prog_load_log_flags, .log_buf = log_buf, .log_size = log_buf_sz, diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h index 1ed5b9200d66..e099aa4da611 100644 --- a/tools/testing/selftests/bpf/testing_helpers.h +++ b/tools/testing/selftests/bpf/testing_helpers.h @@ -51,5 +51,6 @@ struct bpf_insn; * e.g. verifier.c:convert_ctx_access() is done. */ int get_xlated_program(int fd_prog, struct bpf_insn **buf, __u32 *cnt); +int testing_prog_flags(void); #endif /* __TESTING_HELPERS_H */ -- cgit v1.2.3 From f067074bafd5060428211ee7bb8a3f86ff6bc58d Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 11 Jan 2024 13:16:48 -0700 Subject: selftests/bpf: Update LLVM Phabricator links reviews.llvm.org was LLVM's Phabricator instances for code review. It has been abandoned in favor of GitHub pull requests. While the majority of links in the kernel sources still work because of the work Fangrui has done turning the dynamic Phabricator instance into a static archive, there are some issues with that work, so preemptively convert all the links in the kernel sources to point to the commit on GitHub. Most of the commits have the corresponding differential review link in the commit message itself so there should not be any loss of fidelity in the relevant information. Additionally, fix a typo in the xdpwall.c print ("LLMV" -> "LLVM") while in the area. Link: https://discourse.llvm.org/t/update-on-github-pull-requests/71540/172 Acked-by: Yonghong Song Signed-off-by: Nathan Chancellor Link: https://lore.kernel.org/r/20240111-bpf-update-llvm-phabricator-links-v2-1-9a7ae976bd64@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/README.rst | 32 +++++++++++----------- tools/testing/selftests/bpf/prog_tests/xdpwall.c | 2 +- .../selftests/bpf/progs/test_core_reloc_type_id.c | 2 +- 3 files changed, 18 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst index 9af79c7a9b58..9b974e425af3 100644 --- a/tools/testing/selftests/bpf/README.rst +++ b/tools/testing/selftests/bpf/README.rst @@ -115,7 +115,7 @@ the insn 20 undoes map_value addition. It is currently impossible for the verifier to understand such speculative pointer arithmetic. Hence `this patch`__ addresses it on the compiler side. It was committed on llvm 12. -__ https://reviews.llvm.org/D85570 +__ https://github.com/llvm/llvm-project/commit/ddf1864ace484035e3cde5e83b3a31ac81e059c6 The corresponding C code @@ -165,7 +165,7 @@ This is due to a llvm BPF backend bug. `The fix`__ has been pushed to llvm 10.x release branch and will be available in 10.0.1. The patch is available in llvm 11.0.0 trunk. -__ https://reviews.llvm.org/D78466 +__ https://github.com/llvm/llvm-project/commit/3cb7e7bf959dcd3b8080986c62e10a75c7af43f0 bpf_verif_scale/loop6.bpf.o test failure with Clang 12 ====================================================== @@ -204,7 +204,7 @@ r5(w5) is eventually saved on stack at insn #24 for later use. This cause later verifier failure. The bug has been `fixed`__ in Clang 13. -__ https://reviews.llvm.org/D97479 +__ https://github.com/llvm/llvm-project/commit/1959ead525b8830cc8a345f45e1c3ef9902d3229 BPF CO-RE-based tests and Clang version ======================================= @@ -221,11 +221,11 @@ failures: - __builtin_btf_type_id() [0_, 1_, 2_]; - __builtin_preserve_type_info(), __builtin_preserve_enum_value() [3_, 4_]. -.. _0: https://reviews.llvm.org/D74572 -.. _1: https://reviews.llvm.org/D74668 -.. _2: https://reviews.llvm.org/D85174 -.. _3: https://reviews.llvm.org/D83878 -.. _4: https://reviews.llvm.org/D83242 +.. _0: https://github.com/llvm/llvm-project/commit/6b01b465388b204d543da3cf49efd6080db094a9 +.. _1: https://github.com/llvm/llvm-project/commit/072cde03aaa13a2c57acf62d79876bf79aa1919f +.. _2: https://github.com/llvm/llvm-project/commit/00602ee7ef0bf6c68d690a2bd729c12b95c95c99 +.. _3: https://github.com/llvm/llvm-project/commit/6d218b4adb093ff2e9764febbbc89f429412006c +.. _4: https://github.com/llvm/llvm-project/commit/6d6750696400e7ce988d66a1a00e1d0cb32815f8 Floating-point tests and Clang version ====================================== @@ -234,7 +234,7 @@ Certain selftests, e.g. core_reloc, require support for the floating-point types, which was introduced in `Clang 13`__. The older Clang versions will either crash when compiling these tests, or generate an incorrect BTF. -__ https://reviews.llvm.org/D83289 +__ https://github.com/llvm/llvm-project/commit/a7137b238a07d9399d3ae96c0b461571bd5aa8b2 Kernel function call test and Clang version =========================================== @@ -248,7 +248,7 @@ Without it, the error from compiling bpf selftests looks like: libbpf: failed to find BTF for extern 'tcp_slow_start' [25] section: -2 -__ https://reviews.llvm.org/D93563 +__ https://github.com/llvm/llvm-project/commit/886f9ff53155075bd5f1e994f17b85d1e1b7470c btf_tag test and Clang version ============================== @@ -264,8 +264,8 @@ Without them, the btf_tag selftest will be skipped and you will observe: # btf_tag:SKIP -.. _0: https://reviews.llvm.org/D111588 -.. _1: https://reviews.llvm.org/D111199 +.. _0: https://github.com/llvm/llvm-project/commit/a162b67c98066218d0d00aa13b99afb95d9bb5e6 +.. _1: https://github.com/llvm/llvm-project/commit/3466e00716e12e32fdb100e3fcfca5c2b3e8d784 Clang dependencies for static linking tests =========================================== @@ -274,7 +274,7 @@ linked_vars, linked_maps, and linked_funcs tests depend on `Clang fix`__ to generate valid BTF information for weak variables. Please make sure you use Clang that contains the fix. -__ https://reviews.llvm.org/D100362 +__ https://github.com/llvm/llvm-project/commit/968292cb93198442138128d850fd54dc7edc0035 Clang relocation changes ======================== @@ -292,7 +292,7 @@ Here, ``type 2`` refers to new relocation type ``R_BPF_64_ABS64``. To fix this issue, user newer libbpf. .. Links -.. _clang reloc patch: https://reviews.llvm.org/D102712 +.. _clang reloc patch: https://github.com/llvm/llvm-project/commit/6a2ea84600ba4bd3b2733bd8f08f5115eb32164b .. _kernel llvm reloc: /Documentation/bpf/llvm_reloc.rst Clang dependencies for the u32 spill test (xdpwall) @@ -304,6 +304,6 @@ from running test_progs will look like: .. code-block:: console - test_xdpwall:FAIL:Does LLVM have https://reviews.llvm.org/D109073? unexpected error: -4007 + test_xdpwall:FAIL:Does LLVM have https://github.com/llvm/llvm-project/commit/ea72b0319d7b0f0c2fcf41d121afa5d031b319d5? unexpected error: -4007 -__ https://reviews.llvm.org/D109073 +__ https://github.com/llvm/llvm-project/commit/ea72b0319d7b0f0c2fcf41d121afa5d031b319d5 diff --git a/tools/testing/selftests/bpf/prog_tests/xdpwall.c b/tools/testing/selftests/bpf/prog_tests/xdpwall.c index f3927829a55a..4599154c8e9b 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdpwall.c +++ b/tools/testing/selftests/bpf/prog_tests/xdpwall.c @@ -9,7 +9,7 @@ void test_xdpwall(void) struct xdpwall *skel; skel = xdpwall__open_and_load(); - ASSERT_OK_PTR(skel, "Does LLMV have https://reviews.llvm.org/D109073?"); + ASSERT_OK_PTR(skel, "Does LLVM have https://github.com/llvm/llvm-project/commit/ea72b0319d7b0f0c2fcf41d121afa5d031b319d5?"); xdpwall__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c index 22aba3f6e344..6fc8b9d66e34 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c @@ -80,7 +80,7 @@ int test_core_type_id(void *ctx) * to detect whether this test has to be executed, however strange * that might look like. * - * [0] https://reviews.llvm.org/D85174 + * [0] https://github.com/llvm/llvm-project/commit/00602ee7ef0bf6c68d690a2bd729c12b95c95c99 */ #if __has_builtin(__builtin_preserve_type_info) struct core_reloc_type_id_output *out = (void *)&data.out; -- cgit v1.2.3 From 242d18514149d86b431b6f5db5a33579ea79ebad Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Mon, 8 Jan 2024 22:51:55 +0200 Subject: selftests/bpf: Fix the u64_offset_to_skb_data test The u64_offset_to_skb_data test is supposed to make a 64-bit fill, but instead makes a 16-bit one. Fix the test according to its intention and update the comments accordingly (umax is no longer 0xffff). The 16-bit fill is covered by u16_offset_to_skb_data. Signed-off-by: Maxim Mikityanskiy Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20240108205209.838365-2-maxtram95@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/verifier_spill_fill.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c index 39fe3372e0e0..848f2930f599 100644 --- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c +++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c @@ -243,7 +243,7 @@ l0_%=: r0 = 0; \ SEC("tc") __description("Spill u32 const scalars. Refill as u64. Offset to skb->data") -__failure __msg("invalid access to packet") +__failure __msg("math between pkt pointer and register with unbounded min value is not allowed") __naked void u64_offset_to_skb_data(void) { asm volatile (" \ @@ -253,13 +253,11 @@ __naked void u64_offset_to_skb_data(void) w7 = 20; \ *(u32*)(r10 - 4) = r6; \ *(u32*)(r10 - 8) = r7; \ - r4 = *(u16*)(r10 - 8); \ + r4 = *(u64*)(r10 - 8); \ r0 = r2; \ - /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */\ + /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4= */ \ r0 += r4; \ - /* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=umax=65535 */\ if r0 > r3 goto l0_%=; \ - /* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=20 */\ r0 = *(u32*)(r2 + 0); \ l0_%=: r0 = 0; \ exit; \ -- cgit v1.2.3 From c035b3e555b5642f786fb2d089a6ddf7b00eb374 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Mon, 8 Jan 2024 22:51:57 +0200 Subject: selftests/bpf: check if imprecise stack spills confuse infinite loop detection Verify that infinite loop detection logic separates states with identical register states but different imprecise scalars spilled to stack. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20240108205209.838365-4-maxtram95@gmail.com Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/progs/verifier_loops1.c | 24 ++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_loops1.c b/tools/testing/selftests/bpf/progs/verifier_loops1.c index 71735dbf33d4..e07b43b78fd2 100644 --- a/tools/testing/selftests/bpf/progs/verifier_loops1.c +++ b/tools/testing/selftests/bpf/progs/verifier_loops1.c @@ -259,4 +259,28 @@ l0_%=: r2 += r1; \ " ::: __clobber_all); } +SEC("xdp") +__success +__naked void not_an_inifinite_loop(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + r0 &= 0xff; \ + *(u64 *)(r10 - 8) = r0; \ + r0 = 0; \ +loop_%=: \ + r0 = *(u64 *)(r10 - 8); \ + if r0 > 10 goto exit_%=; \ + r0 += 1; \ + *(u64 *)(r10 - 8) = r0; \ + r0 = 0; \ + goto loop_%=; \ +exit_%=: \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From b827eee4c4d83ad92094b38d49cfec6844fb5863 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Mon, 8 Jan 2024 22:51:59 +0200 Subject: selftests/bpf: Add a test case for 32-bit spill tracking When a range check is performed on a register that was 32-bit spilled to the stack, the IDs of the two instances of the register are the same, so the range should also be the same. Signed-off-by: Maxim Mikityanskiy Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20240108205209.838365-6-maxtram95@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/verifier_spill_fill.c | 31 ++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c index 848f2930f599..f303ac19cf41 100644 --- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c +++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c @@ -735,4 +735,35 @@ __naked void stack_load_preserves_const_precision_subreg(void) : __clobber_common); } +SEC("xdp") +__description("32-bit spilled reg range should be tracked") +__success __retval(0) +__naked void spill_32bit_range_track(void) +{ + asm volatile(" \ + call %[bpf_ktime_get_ns]; \ + /* Make r0 bounded. */ \ + r0 &= 65535; \ + /* Assign an ID to r0. */ \ + r1 = r0; \ + /* 32-bit spill r0 to stack. */ \ + *(u32*)(r10 - 8) = r0; \ + /* Boundary check on r0. */ \ + if r0 < 1 goto l0_%=; \ + /* 32-bit fill r1 from stack. */ \ + r1 = *(u32*)(r10 - 8); \ + /* r1 == r0 => r1 >= 1 always. */ \ + if r1 >= 1 goto l0_%=; \ + /* Dead branch: the verifier should prune it. \ + * Do an invalid memory access if the verifier \ + * follows it. \ + */ \ + r0 = *(u64*)(r9 + 0); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 8ecfc371d829bfed75e0ef2cab45b2290b982f64 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Mon, 8 Jan 2024 22:52:02 +0200 Subject: bpf: Assign ID to scalars on spill Currently, when a scalar bounded register is spilled to the stack, its ID is preserved, but only if was already assigned, i.e. if this register was MOVed before. Assign an ID on spill if none is set, so that equal scalars could be tracked if a register is spilled to the stack and filled into another register. One test is adjusted to reflect the change in register IDs. Signed-off-by: Maxim Mikityanskiy Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20240108205209.838365-9-maxtram95@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 8 +++++++- tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c | 2 +- tools/testing/selftests/bpf/verifier/precise.c | 6 +++--- 3 files changed, 11 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index dfcbe011aff1..744fc9b778fe 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4505,9 +4505,15 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, mark_stack_slot_scratched(env, spi); if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) && env->bpf_capable) { + bool reg_value_fits; + + reg_value_fits = get_reg_width(reg) <= BITS_PER_BYTE * size; + /* Make sure that reg had an ID to build a relation on spill. */ + if (reg_value_fits) + assign_scalar_id_before_mov(env, reg); save_register_state(env, state, spi, reg, size); /* Break the relation on a narrowing spill. */ - if (get_reg_width(reg) > BITS_PER_BYTE * size) + if (!reg_value_fits) state->stack[spi].spilled_ptr.id = 0; } else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) && insn->imm != 0 && env->bpf_capable) { diff --git a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c index be95570ab382..28b602ac9cbe 100644 --- a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c +++ b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c @@ -568,7 +568,7 @@ l0_%=: r0 = 0; \ SEC("tc") __description("direct packet access: test23 (x += pkt_ptr, 4)") -__failure __msg("invalid access to packet, off=0 size=8, R5(id=2,off=0,r=0)") +__failure __msg("invalid access to packet, off=0 size=8, R5(id=3,off=0,r=0)") __flag(BPF_F_ANY_ALIGNMENT) __naked void test23_x_pkt_ptr_4(void) { diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c index 8a2ff81d8350..0a9293a57211 100644 --- a/tools/testing/selftests/bpf/verifier/precise.c +++ b/tools/testing/selftests/bpf/verifier/precise.c @@ -183,10 +183,10 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = BPF_F_TEST_STATE_FREQ, .errstr = "mark_precise: frame0: last_idx 7 first_idx 7\ - mark_precise: frame0: parent state regs=r4 stack=:\ + mark_precise: frame0: parent state regs=r4 stack=-8:\ mark_precise: frame0: last_idx 6 first_idx 4\ - mark_precise: frame0: regs=r4 stack= before 6: (b7) r0 = -1\ - mark_precise: frame0: regs=r4 stack= before 5: (79) r4 = *(u64 *)(r10 -8)\ + mark_precise: frame0: regs=r4 stack=-8 before 6: (b7) r0 = -1\ + mark_precise: frame0: regs=r4 stack=-8 before 5: (79) r4 = *(u64 *)(r10 -8)\ mark_precise: frame0: regs= stack=-8 before 4: (7b) *(u64 *)(r3 -8) = r0\ mark_precise: frame0: parent state regs=r0 stack=:\ mark_precise: frame0: last_idx 3 first_idx 3\ -- cgit v1.2.3 From 3893f0b6a0698aeeb3d27cb22baef7c4ca1a07f1 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Mon, 8 Jan 2024 22:52:03 +0200 Subject: selftests/bpf: Test assigning ID to scalars on spill The previous commit implemented assigning IDs to registers holding scalars before spill. Add the test cases to check the new functionality. Signed-off-by: Maxim Mikityanskiy Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20240108205209.838365-10-maxtram95@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/verifier_spill_fill.c | 133 +++++++++++++++++++++ 1 file changed, 133 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c index f303ac19cf41..b05aab925ee5 100644 --- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c +++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c @@ -766,4 +766,137 @@ l0_%=: r0 = 0; \ : __clobber_all); } +SEC("xdp") +__description("64-bit spill of 64-bit reg should assign ID") +__success __retval(0) +__naked void spill_64bit_of_64bit_ok(void) +{ + asm volatile (" \ + /* Roll one bit to make the register inexact. */\ + call %[bpf_get_prandom_u32]; \ + r0 &= 0x80000000; \ + r0 <<= 32; \ + /* 64-bit spill r0 to stack - should assign an ID. */\ + *(u64*)(r10 - 8) = r0; \ + /* 64-bit fill r1 from stack - should preserve the ID. */\ + r1 = *(u64*)(r10 - 8); \ + /* Compare r1 with another register to trigger find_equal_scalars.\ + * Having one random bit is important here, otherwise the verifier cuts\ + * the corners. \ + */ \ + r2 = 0; \ + if r1 != r2 goto l0_%=; \ + /* The result of this comparison is predefined. */\ + if r0 == r2 goto l0_%=; \ + /* Dead branch: the verifier should prune it. Do an invalid memory\ + * access if the verifier follows it. \ + */ \ + r0 = *(u64*)(r9 + 0); \ + exit; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("xdp") +__description("32-bit spill of 32-bit reg should assign ID") +__success __retval(0) +__naked void spill_32bit_of_32bit_ok(void) +{ + asm volatile (" \ + /* Roll one bit to make the register inexact. */\ + call %[bpf_get_prandom_u32]; \ + w0 &= 0x80000000; \ + /* 32-bit spill r0 to stack - should assign an ID. */\ + *(u32*)(r10 - 8) = r0; \ + /* 32-bit fill r1 from stack - should preserve the ID. */\ + r1 = *(u32*)(r10 - 8); \ + /* Compare r1 with another register to trigger find_equal_scalars.\ + * Having one random bit is important here, otherwise the verifier cuts\ + * the corners. \ + */ \ + r2 = 0; \ + if r1 != r2 goto l0_%=; \ + /* The result of this comparison is predefined. */\ + if r0 == r2 goto l0_%=; \ + /* Dead branch: the verifier should prune it. Do an invalid memory\ + * access if the verifier follows it. \ + */ \ + r0 = *(u64*)(r9 + 0); \ + exit; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("xdp") +__description("16-bit spill of 16-bit reg should assign ID") +__success __retval(0) +__naked void spill_16bit_of_16bit_ok(void) +{ + asm volatile (" \ + /* Roll one bit to make the register inexact. */\ + call %[bpf_get_prandom_u32]; \ + r0 &= 0x8000; \ + /* 16-bit spill r0 to stack - should assign an ID. */\ + *(u16*)(r10 - 8) = r0; \ + /* 16-bit fill r1 from stack - should preserve the ID. */\ + r1 = *(u16*)(r10 - 8); \ + /* Compare r1 with another register to trigger find_equal_scalars.\ + * Having one random bit is important here, otherwise the verifier cuts\ + * the corners. \ + */ \ + r2 = 0; \ + if r1 != r2 goto l0_%=; \ + /* The result of this comparison is predefined. */\ + if r0 == r2 goto l0_%=; \ + /* Dead branch: the verifier should prune it. Do an invalid memory\ + * access if the verifier follows it. \ + */ \ + r0 = *(u64*)(r9 + 0); \ + exit; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("xdp") +__description("8-bit spill of 8-bit reg should assign ID") +__success __retval(0) +__naked void spill_8bit_of_8bit_ok(void) +{ + asm volatile (" \ + /* Roll one bit to make the register inexact. */\ + call %[bpf_get_prandom_u32]; \ + r0 &= 0x80; \ + /* 8-bit spill r0 to stack - should assign an ID. */\ + *(u8*)(r10 - 8) = r0; \ + /* 8-bit fill r1 from stack - should preserve the ID. */\ + r1 = *(u8*)(r10 - 8); \ + /* Compare r1 with another register to trigger find_equal_scalars.\ + * Having one random bit is important here, otherwise the verifier cuts\ + * the corners. \ + */ \ + r2 = 0; \ + if r1 != r2 goto l0_%=; \ + /* The result of this comparison is predefined. */\ + if r0 == r2 goto l0_%=; \ + /* Dead branch: the verifier should prune it. Do an invalid memory\ + * access if the verifier follows it. \ + */ \ + r0 = *(u64*)(r9 + 0); \ + exit; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 9a4c57f52b5e0de3a6b1f40c5b656730ce33ee01 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 9 Jan 2024 21:13:48 -0800 Subject: bpf: Track aligned st store as imprecise spilled registers With patch set [1], precision backtracing supports register spill/fill to/from the stack. The patch [2] allows initial imprecise register spill with content 0. This is a common case for cpuv3 and lower for initializing the stack variables with pattern r1 = 0 *(u64 *)(r10 - 8) = r1 and the [2] has demonstrated good verification improvement. For cpuv4, the initialization could be *(u64 *)(r10 - 8) = 0 The current verifier marks the r10-8 contents with STACK_ZERO. Similar to [2], let us permit the above insn to behave like imprecise register spill which can reduce number of verified states. The change is in function check_stack_write_fixed_off(). Before this patch, spilled zero will be marked as STACK_ZERO which can provide precise values. In check_stack_write_var_off(), STACK_ZERO will be maintained if writing a const zero so later it can provide precise values if needed. The above handling of '*(u64 *)(r10 - 8) = 0' as a spill will have issues in check_stack_write_var_off() as the spill will be converted to STACK_MISC and the precise value 0 is lost. To fix this issue, if the spill slots with const zero and the BPF_ST write also with const zero, the spill slots are preserved, which can later provide precise values if needed. Without the change in check_stack_write_var_off(), the test_verifier subtest 'BPF_ST_MEM stack imm zero, variable offset' will fail. I checked cpuv3 and cpuv4 with and without this patch with veristat. There is no state change for cpuv3 since '*(u64 *)(r10 - 8) = 0' is only generated with cpuv4. For cpuv4: $ ../veristat -C old.cpuv4.csv new.cpuv4.csv -e file,prog,insns,states -f 'insns_diff!=0' File Program Insns (A) Insns (B) Insns (DIFF) States (A) States (B) States (DIFF) ------------------------------------------ ------------------- --------- --------- --------------- ---------- ---------- ------------- local_storage_bench.bpf.linked3.o get_local 228 168 -60 (-26.32%) 17 14 -3 (-17.65%) pyperf600_bpf_loop.bpf.linked3.o on_event 6066 4889 -1177 (-19.40%) 403 321 -82 (-20.35%) test_cls_redirect.bpf.linked3.o cls_redirect 35483 35387 -96 (-0.27%) 2179 2177 -2 (-0.09%) test_l4lb_noinline.bpf.linked3.o balancer_ingress 4494 4522 +28 (+0.62%) 217 219 +2 (+0.92%) test_l4lb_noinline_dynptr.bpf.linked3.o balancer_ingress 1432 1455 +23 (+1.61%) 92 94 +2 (+2.17%) test_xdp_noinline.bpf.linked3.o balancer_ingress_v6 3462 3458 -4 (-0.12%) 216 216 +0 (+0.00%) verifier_iterating_callbacks.bpf.linked3.o widening 52 41 -11 (-21.15%) 4 3 -1 (-25.00%) xdp_synproxy_kern.bpf.linked3.o syncookie_tc 12412 11719 -693 (-5.58%) 345 330 -15 (-4.35%) xdp_synproxy_kern.bpf.linked3.o syncookie_xdp 12478 11794 -684 (-5.48%) 346 331 -15 (-4.34%) test_l4lb_noinline and test_l4lb_noinline_dynptr has minor regression, but pyperf600_bpf_loop and local_storage_bench gets pretty good improvement. [1] https://lore.kernel.org/all/20231205184248.1502704-1-andrii@kernel.org/ [2] https://lore.kernel.org/all/20231205184248.1502704-9-andrii@kernel.org/ Cc: Kuniyuki Iwashima Cc: Martin KaFai Lau Signed-off-by: Yonghong Song Tested-by: Kuniyuki Iwashima Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20240110051348.2737007-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 17 +++++++++++++++-- tools/testing/selftests/bpf/progs/verifier_spill_fill.c | 16 ++++++++-------- 2 files changed, 23 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 744fc9b778fe..bd1da0d3ebce 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4516,7 +4516,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, if (!reg_value_fits) state->stack[spi].spilled_ptr.id = 0; } else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) && - insn->imm != 0 && env->bpf_capable) { + env->bpf_capable) { struct bpf_reg_state fake_reg = {}; __mark_reg_known(&fake_reg, insn->imm); @@ -4663,7 +4663,20 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env, return -EINVAL; } - /* Erase all spilled pointers. */ + /* If writing_zero and the spi slot contains a spill of value 0, + * maintain the spill type. + */ + if (writing_zero && *stype == STACK_SPILL && + is_spilled_scalar_reg(&state->stack[spi])) { + struct bpf_reg_state *spill_reg = &state->stack[spi].spilled_ptr; + + if (tnum_is_const(spill_reg->var_off) && spill_reg->var_off.value == 0) { + zero_used = true; + continue; + } + } + + /* Erase all other spilled pointers. */ state->stack[spi].spilled_ptr.type = NOT_INIT; /* Update the slot type. */ diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c index b05aab925ee5..e38f29e73be7 100644 --- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c +++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c @@ -493,14 +493,14 @@ char single_byte_buf[1] SEC(".data.single_byte_buf"); SEC("raw_tp") __log_level(2) __success -/* make sure fp-8 is all STACK_ZERO */ -__msg("2: (7a) *(u64 *)(r10 -8) = 0 ; R10=fp0 fp-8_w=00000000") +/* fp-8 is spilled IMPRECISE value zero (represented by a zero value fake reg) */ +__msg("2: (7a) *(u64 *)(r10 -8) = 0 ; R10=fp0 fp-8_w=0") /* but fp-16 is spilled IMPRECISE zero const reg */ __msg("4: (7b) *(u64 *)(r10 -16) = r0 ; R0_w=0 R10=fp0 fp-16_w=0") -/* validate that assigning R2 from STACK_ZERO doesn't mark register +/* validate that assigning R2 from STACK_SPILL with zero value doesn't mark register * precise immediately; if necessary, it will be marked precise later */ -__msg("6: (71) r2 = *(u8 *)(r10 -1) ; R2_w=0 R10=fp0 fp-8_w=00000000") +__msg("6: (71) r2 = *(u8 *)(r10 -1) ; R2_w=0 R10=fp0 fp-8_w=0") /* similarly, when R2 is assigned from spilled register, it is initially * imprecise, but will be marked precise later once it is used in precise context */ @@ -518,14 +518,14 @@ __msg("mark_precise: frame0: regs=r0 stack= before 3: (b7) r0 = 0") __naked void partial_stack_load_preserves_zeros(void) { asm volatile ( - /* fp-8 is all STACK_ZERO */ + /* fp-8 is value zero (represented by a zero value fake reg) */ ".8byte %[fp8_st_zero];" /* LLVM-18+: *(u64 *)(r10 -8) = 0; */ /* fp-16 is const zero register */ "r0 = 0;" "*(u64 *)(r10 -16) = r0;" - /* load single U8 from non-aligned STACK_ZERO slot */ + /* load single U8 from non-aligned spilled value zero slot */ "r1 = %[single_byte_buf];" "r2 = *(u8 *)(r10 -1);" "r1 += r2;" @@ -537,7 +537,7 @@ __naked void partial_stack_load_preserves_zeros(void) "r1 += r2;" "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ - /* load single U16 from non-aligned STACK_ZERO slot */ + /* load single U16 from non-aligned spilled value zero slot */ "r1 = %[single_byte_buf];" "r2 = *(u16 *)(r10 -2);" "r1 += r2;" @@ -549,7 +549,7 @@ __naked void partial_stack_load_preserves_zeros(void) "r1 += r2;" "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ - /* load single U32 from non-aligned STACK_ZERO slot */ + /* load single U32 from non-aligned spilled value zero slot */ "r1 = %[single_byte_buf];" "r2 = *(u32 *)(r10 -4);" "r1 += r2;" -- cgit v1.2.3 From 6ae99ac8b7da30c9fdb15e380624dbc41f8200c8 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 9 Jan 2024 21:13:55 -0800 Subject: selftests/bpf: Add a selftest with not-8-byte aligned BPF_ST Add a selftest with a 4 bytes BPF_ST of 0 where the store is not 8-byte aligned. The goal is to ensure that STACK_ZERO is properly marked in stack slots and the STACK_ZERO value can propagate properly during the load. Acked-by: Andrii Nakryiko Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20240110051355.2737232-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/verifier_spill_fill.c | 41 ++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c index e38f29e73be7..7013a9694163 100644 --- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c +++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c @@ -581,6 +581,47 @@ __naked void partial_stack_load_preserves_zeros(void) : __clobber_common); } +SEC("raw_tp") +__log_level(2) +__success +/* fp-4 is STACK_ZERO */ +__msg("2: (62) *(u32 *)(r10 -4) = 0 ; R10=fp0 fp-8=0000????") +__msg("4: (71) r2 = *(u8 *)(r10 -1) ; R2_w=0 R10=fp0 fp-8=0000????") +__msg("5: (0f) r1 += r2") +__msg("mark_precise: frame0: last_idx 5 first_idx 0 subseq_idx -1") +__msg("mark_precise: frame0: regs=r2 stack= before 4: (71) r2 = *(u8 *)(r10 -1)") +__naked void partial_stack_load_preserves_partial_zeros(void) +{ + asm volatile ( + /* fp-4 is value zero */ + ".8byte %[fp4_st_zero];" /* LLVM-18+: *(u32 *)(r10 -4) = 0; */ + + /* load single U8 from non-aligned stack zero slot */ + "r1 = %[single_byte_buf];" + "r2 = *(u8 *)(r10 -1);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + /* load single U16 from non-aligned stack zero slot */ + "r1 = %[single_byte_buf];" + "r2 = *(u16 *)(r10 -2);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + /* load single U32 from non-aligned stack zero slot */ + "r1 = %[single_byte_buf];" + "r2 = *(u32 *)(r10 -4);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + "r0 = 0;" + "exit;" + : + : __imm_ptr(single_byte_buf), + __imm_insn(fp4_st_zero, BPF_ST_MEM(BPF_W, BPF_REG_FP, -4, 0)) + : __clobber_common); +} + char two_byte_buf[2] SEC(".data.two_byte_buf"); SEC("raw_tp") -- cgit v1.2.3 From 49c06547d5218b54fbcc6011864b8c8e3aa0b565 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 12 Jan 2024 14:01:34 -0800 Subject: bpf: Minor improvements for bpf_cmp. Few minor improvements for bpf_cmp() macro: . reduce number of args in __bpf_cmp() . rename NOFLIP to UNLIKELY . add a comment about 64-bit truncation in "i" constraint . use "ri" constraint for sizeof(rhs) <= 4 . improve error message for bpf_cmp_likely() Before: progs/iters_task_vma.c:31:7: error: variable 'ret' is uninitialized when used here [-Werror,-Wuninitialized] 31 | if (bpf_cmp_likely(seen, <==, 1000)) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../bpf/bpf_experimental.h:325:3: note: expanded from macro 'bpf_cmp_likely' 325 | ret; | ^~~ progs/iters_task_vma.c:31:7: note: variable 'ret' is declared here ../bpf/bpf_experimental.h:310:3: note: expanded from macro 'bpf_cmp_likely' 310 | bool ret; | ^ After: progs/iters_task_vma.c:31:7: error: invalid operand for instruction 31 | if (bpf_cmp_likely(seen, <==, 1000)) | ^ ../bpf/bpf_experimental.h:324:17: note: expanded from macro 'bpf_cmp_likely' 324 | asm volatile("r0 " #OP " invalid compare"); | ^ :1:5: note: instantiated into assembly here 1 | r0 <== invalid compare | ^ Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20240112220134.71209-1-alexei.starovoitov@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/bpf_experimental.h | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h index f44875f8b367..0d749006d107 100644 --- a/tools/testing/selftests/bpf/bpf_experimental.h +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -260,11 +260,11 @@ extern void bpf_throw(u64 cookie) __ksym; #define __is_signed_type(type) (((type)(-1)) < (type)1) -#define __bpf_cmp(LHS, OP, SIGN, PRED, RHS, DEFAULT) \ +#define __bpf_cmp(LHS, OP, PRED, RHS, DEFAULT) \ ({ \ __label__ l_true; \ bool ret = DEFAULT; \ - asm volatile goto("if %[lhs] " SIGN #OP " %[rhs] goto %l[l_true]" \ + asm volatile goto("if %[lhs] " OP " %[rhs] goto %l[l_true]" \ :: [lhs] "r"((short)LHS), [rhs] PRED (RHS) :: l_true); \ ret = !DEFAULT; \ l_true: \ @@ -276,7 +276,7 @@ l_true: \ * __lhs OP __rhs below will catch the mistake. * Be aware that we check only __lhs to figure out the sign of compare. */ -#define _bpf_cmp(LHS, OP, RHS, NOFLIP) \ +#define _bpf_cmp(LHS, OP, RHS, UNLIKELY) \ ({ \ typeof(LHS) __lhs = (LHS); \ typeof(RHS) __rhs = (RHS); \ @@ -285,14 +285,17 @@ l_true: \ (void)(__lhs OP __rhs); \ if (__cmp_cannot_be_signed(OP) || !__is_signed_type(typeof(__lhs))) { \ if (sizeof(__rhs) == 8) \ - ret = __bpf_cmp(__lhs, OP, "", "r", __rhs, NOFLIP); \ + /* "i" will truncate 64-bit constant into s32, \ + * so we have to use extra register via "r". \ + */ \ + ret = __bpf_cmp(__lhs, #OP, "r", __rhs, UNLIKELY); \ else \ - ret = __bpf_cmp(__lhs, OP, "", "i", __rhs, NOFLIP); \ + ret = __bpf_cmp(__lhs, #OP, "ri", __rhs, UNLIKELY); \ } else { \ if (sizeof(__rhs) == 8) \ - ret = __bpf_cmp(__lhs, OP, "s", "r", __rhs, NOFLIP); \ + ret = __bpf_cmp(__lhs, "s"#OP, "r", __rhs, UNLIKELY); \ else \ - ret = __bpf_cmp(__lhs, OP, "s", "i", __rhs, NOFLIP); \ + ret = __bpf_cmp(__lhs, "s"#OP, "ri", __rhs, UNLIKELY); \ } \ ret; \ }) @@ -304,7 +307,7 @@ l_true: \ #ifndef bpf_cmp_likely #define bpf_cmp_likely(LHS, OP, RHS) \ ({ \ - bool ret; \ + bool ret = 0; \ if (__builtin_strcmp(#OP, "==") == 0) \ ret = _bpf_cmp(LHS, !=, RHS, false); \ else if (__builtin_strcmp(#OP, "!=") == 0) \ @@ -318,7 +321,7 @@ l_true: \ else if (__builtin_strcmp(#OP, ">=") == 0) \ ret = _bpf_cmp(LHS, <, RHS, false); \ else \ - (void) "bug"; \ + asm volatile("r0 " #OP " invalid compare"); \ ret; \ }) #endif -- cgit v1.2.3 From f5f30386c78105cba520e443a6a9ee945ec1d066 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 16 Jan 2024 14:19:20 +0800 Subject: bpftool: Silence build warning about calloc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There exists the following warning when building bpftool: CC prog.o prog.c: In function ‘profile_open_perf_events’: prog.c:2301:24: warning: ‘calloc’ sizes specified with ‘sizeof’ in the earlier argument and not in the later argument [-Wcalloc-transposed-args] 2301 | sizeof(int), obj->rodata->num_cpu * obj->rodata->num_metric); | ^~~ prog.c:2301:24: note: earlier argument should specify number of elements, later size of each element Tested with the latest upstream GCC which contains a new warning option -Wcalloc-transposed-args. The first argument to calloc is documented to be number of elements in array, while the second argument is size of each element, just switch the first and second arguments of calloc() to silence the build warning, compile tested only. Fixes: 47c09d6a9f67 ("bpftool: Introduce "prog profile" command") Signed-off-by: Tiezhu Yang Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20240116061920.31172-1-yangtiezhu@loongson.cn Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/prog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index feb8e305804f..9cb42a3366c0 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -2298,7 +2298,7 @@ static int profile_open_perf_events(struct profiler_bpf *obj) int map_fd; profile_perf_events = calloc( - sizeof(int), obj->rodata->num_cpu * obj->rodata->num_metric); + obj->rodata->num_cpu * obj->rodata->num_metric, sizeof(int)); if (!profile_perf_events) { p_err("failed to allocate memory for perf_event array: %s", strerror(errno)); -- cgit v1.2.3 From d177c1be06ce28aa8c8710ac55be1b5ad3f314c6 Mon Sep 17 00:00:00 2001 From: Artem Savkov Date: Wed, 10 Jan 2024 09:57:37 +0100 Subject: selftests/bpf: Fix potential premature unload in bpf_testmod It is possible for bpf_kfunc_call_test_release() to be called from bpf_map_free_deferred() when bpf_testmod is already unloaded and perf_test_stuct.cnt which it tries to decrease is no longer in memory. This patch tries to fix the issue by waiting for all references to be dropped in bpf_testmod_exit(). The issue can be triggered by running 'test_progs -t map_kptr' in 6.5, but is obscured in 6.6 by d119357d07435 ("rcu-tasks: Treat only synchronous grace periods urgently"). Fixes: 65eb006d85a2 ("bpf: Move kernel test kfuncs to bpf_testmod") Signed-off-by: Artem Savkov Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Cc: Jiri Olsa Link: https://lore.kernel.org/bpf/82f55c0e-0ec8-4fe1-8d8c-b1de07558ad9@linux.dev Link: https://lore.kernel.org/bpf/20240110085737.8895-1-asavkov@redhat.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 91907b321f91..e7c9e1c7fde0 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -2,6 +2,7 @@ /* Copyright (c) 2020 Facebook */ #include #include +#include #include #include #include @@ -544,6 +545,14 @@ static int bpf_testmod_init(void) static void bpf_testmod_exit(void) { + /* Need to wait for all references to be dropped because + * bpf_kfunc_call_test_release() which currently resides in kernel can + * be called after bpf_testmod is unloaded. Once release function is + * moved into the module this wait can be removed. + */ + while (refcount_read(&prog_test_struct.cnt) > 1) + msleep(20); + return sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); } -- cgit v1.2.3 From a74712241b4675175cd8e3310fa206d8756ad5a1 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 15 Jan 2024 12:55:14 -0800 Subject: selftest: bpf: Test bpf_sk_assign_tcp_reqsk(). This commit adds a sample selftest to demonstrate how we can use bpf_sk_assign_tcp_reqsk() as the backend of SYN Proxy. The test creates IPv4/IPv6 x TCP connections and transfer messages over them on lo with BPF tc prog attached. The tc prog will process SYN and returns SYN+ACK with the following ISN and TS. In a real use case, this part will be done by other hosts. MSB LSB ISN: | 31 ... 8 | 7 6 | 5 | 4 | 3 2 1 0 | | Hash_1 | MSS | ECN | SACK | WScale | TS: | 31 ... 8 | 7 ... 0 | | Random | Hash_2 | WScale in SYN is reused in SYN+ACK. The client returns ACK, and tc prog will recalculate ISN and TS from ACK and validate SYN Cookie. If it's valid, the prog calls kfunc to allocate a reqsk for skb and configure the reqsk based on the argument created from SYN Cookie. Later, the reqsk will be processed in cookie_v[46]_check() to create a connection. Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240115205514.68364-7-kuniyu@amazon.com Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/bpf_kfuncs.h | 10 + tools/testing/selftests/bpf/config | 1 + .../bpf/prog_tests/tcp_custom_syncookie.c | 150 ++++++ .../testing/selftests/bpf/progs/bpf_tracing_net.h | 16 + tools/testing/selftests/bpf/progs/test_siphash.h | 64 +++ .../bpf/progs/test_tcp_custom_syncookie.c | 572 +++++++++++++++++++++ .../bpf/progs/test_tcp_custom_syncookie.h | 140 +++++ 7 files changed, 953 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/tcp_custom_syncookie.c create mode 100644 tools/testing/selftests/bpf/progs/test_siphash.h create mode 100644 tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c create mode 100644 tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h (limited to 'tools') diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index b4e78c1eb37b..23c24f852f4f 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -51,6 +51,16 @@ extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clo extern int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern, const __u8 *sun_path, __u32 sun_path__sz) __ksym; +/* Description + * Allocate and configure a reqsk and link it with a listener and skb. + * Returns + * Error code + */ +struct sock; +struct bpf_tcp_req_attrs; +extern int bpf_sk_assign_tcp_reqsk(struct __sk_buff *skb, struct sock *sk, + struct bpf_tcp_req_attrs *attrs, int attrs__sz) __ksym; + void *bpf_cast_to_kern_ctx(void *) __ksym; void *bpf_rdonly_cast(void *obj, __u32 btf_id) __ksym; diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index c125c441abc7..01f241ea2c67 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -81,6 +81,7 @@ CONFIG_NF_NAT=y CONFIG_RC_CORE=y CONFIG_SECURITY=y CONFIG_SECURITYFS=y +CONFIG_SYN_COOKIES=y CONFIG_TEST_BPF=m CONFIG_USERFAULTFD=y CONFIG_VSOCKETS=y diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_custom_syncookie.c b/tools/testing/selftests/bpf/prog_tests/tcp_custom_syncookie.c new file mode 100644 index 000000000000..eaf441dc7e79 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/tcp_custom_syncookie.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ + +#define _GNU_SOURCE +#include +#include +#include + +#include "test_progs.h" +#include "cgroup_helpers.h" +#include "network_helpers.h" +#include "test_tcp_custom_syncookie.skel.h" + +static struct test_tcp_custom_syncookie_case { + int family, type; + char addr[16]; + char name[10]; +} test_cases[] = { + { + .name = "IPv4 TCP", + .family = AF_INET, + .type = SOCK_STREAM, + .addr = "127.0.0.1", + }, + { + .name = "IPv6 TCP", + .family = AF_INET6, + .type = SOCK_STREAM, + .addr = "::1", + }, +}; + +static int setup_netns(void) +{ + if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns")) + return -1; + + if (!ASSERT_OK(system("ip link set dev lo up"), "ip")) + goto err; + + if (!ASSERT_OK(write_sysctl("/proc/sys/net/ipv4/tcp_ecn", "1"), + "write_sysctl")) + goto err; + + return 0; +err: + return -1; +} + +static int setup_tc(struct test_tcp_custom_syncookie *skel) +{ + LIBBPF_OPTS(bpf_tc_hook, qdisc_lo, .attach_point = BPF_TC_INGRESS); + LIBBPF_OPTS(bpf_tc_opts, tc_attach, + .prog_fd = bpf_program__fd(skel->progs.tcp_custom_syncookie)); + + qdisc_lo.ifindex = if_nametoindex("lo"); + if (!ASSERT_OK(bpf_tc_hook_create(&qdisc_lo), "qdisc add dev lo clsact")) + goto err; + + if (!ASSERT_OK(bpf_tc_attach(&qdisc_lo, &tc_attach), + "filter add dev lo ingress")) + goto err; + + return 0; +err: + return -1; +} + +#define msg "Hello World" +#define msglen 11 + +static void transfer_message(int sender, int receiver) +{ + char buf[msglen]; + int ret; + + ret = send(sender, msg, msglen, 0); + if (!ASSERT_EQ(ret, msglen, "send")) + return; + + memset(buf, 0, sizeof(buf)); + + ret = recv(receiver, buf, msglen, 0); + if (!ASSERT_EQ(ret, msglen, "recv")) + return; + + ret = strncmp(buf, msg, msglen); + if (!ASSERT_EQ(ret, 0, "strncmp")) + return; +} + +static void create_connection(struct test_tcp_custom_syncookie_case *test_case) +{ + int server, client, child; + + server = start_server(test_case->family, test_case->type, test_case->addr, 0, 0); + if (!ASSERT_NEQ(server, -1, "start_server")) + return; + + client = connect_to_fd(server, 0); + if (!ASSERT_NEQ(client, -1, "connect_to_fd")) + goto close_server; + + child = accept(server, NULL, 0); + if (!ASSERT_NEQ(child, -1, "accept")) + goto close_client; + + transfer_message(client, child); + transfer_message(child, client); + + close(child); +close_client: + close(client); +close_server: + close(server); +} + +void test_tcp_custom_syncookie(void) +{ + struct test_tcp_custom_syncookie *skel; + int i; + + if (setup_netns()) + return; + + skel = test_tcp_custom_syncookie__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + if (setup_tc(skel)) + goto destroy_skel; + + for (i = 0; i < ARRAY_SIZE(test_cases); i++) { + if (!test__start_subtest(test_cases[i].name)) + continue; + + skel->bss->handled_syn = false; + skel->bss->handled_ack = false; + + create_connection(&test_cases[i]); + + ASSERT_EQ(skel->bss->handled_syn, true, "SYN is not handled at tc."); + ASSERT_EQ(skel->bss->handled_ack, true, "ACK is not handled at tc"); + } + +destroy_skel: + system("tc qdisc del dev lo clsact"); + + test_tcp_custom_syncookie__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h index e8bd4b7b5ef7..7001965d1cc3 100644 --- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h +++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h @@ -51,9 +51,25 @@ #define ICSK_TIME_LOSS_PROBE 5 #define ICSK_TIME_REO_TIMEOUT 6 +#define ETH_ALEN 6 #define ETH_HLEN 14 +#define ETH_P_IP 0x0800 #define ETH_P_IPV6 0x86DD +#define NEXTHDR_TCP 6 + +#define TCPOPT_NOP 1 +#define TCPOPT_EOL 0 +#define TCPOPT_MSS 2 +#define TCPOPT_WINDOW 3 +#define TCPOPT_TIMESTAMP 8 +#define TCPOPT_SACK_PERM 4 + +#define TCPOLEN_MSS 4 +#define TCPOLEN_WINDOW 3 +#define TCPOLEN_TIMESTAMP 10 +#define TCPOLEN_SACK_PERM 2 + #define CHECKSUM_NONE 0 #define CHECKSUM_PARTIAL 3 diff --git a/tools/testing/selftests/bpf/progs/test_siphash.h b/tools/testing/selftests/bpf/progs/test_siphash.h new file mode 100644 index 000000000000..5d3a7ec36780 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_siphash.h @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ + +#ifndef _TEST_SIPHASH_H +#define _TEST_SIPHASH_H + +/* include/linux/bitops.h */ +static inline u64 rol64(u64 word, unsigned int shift) +{ + return (word << (shift & 63)) | (word >> ((-shift) & 63)); +} + +/* include/linux/siphash.h */ +#define SIPHASH_PERMUTATION(a, b, c, d) ( \ + (a) += (b), (b) = rol64((b), 13), (b) ^= (a), (a) = rol64((a), 32), \ + (c) += (d), (d) = rol64((d), 16), (d) ^= (c), \ + (a) += (d), (d) = rol64((d), 21), (d) ^= (a), \ + (c) += (b), (b) = rol64((b), 17), (b) ^= (c), (c) = rol64((c), 32)) + +#define SIPHASH_CONST_0 0x736f6d6570736575ULL +#define SIPHASH_CONST_1 0x646f72616e646f6dULL +#define SIPHASH_CONST_2 0x6c7967656e657261ULL +#define SIPHASH_CONST_3 0x7465646279746573ULL + +/* lib/siphash.c */ +#define SIPROUND SIPHASH_PERMUTATION(v0, v1, v2, v3) + +#define PREAMBLE(len) \ + u64 v0 = SIPHASH_CONST_0; \ + u64 v1 = SIPHASH_CONST_1; \ + u64 v2 = SIPHASH_CONST_2; \ + u64 v3 = SIPHASH_CONST_3; \ + u64 b = ((u64)(len)) << 56; \ + v3 ^= key->key[1]; \ + v2 ^= key->key[0]; \ + v1 ^= key->key[1]; \ + v0 ^= key->key[0]; + +#define POSTAMBLE \ + v3 ^= b; \ + SIPROUND; \ + SIPROUND; \ + v0 ^= b; \ + v2 ^= 0xff; \ + SIPROUND; \ + SIPROUND; \ + SIPROUND; \ + SIPROUND; \ + return (v0 ^ v1) ^ (v2 ^ v3); + +static inline u64 siphash_2u64(const u64 first, const u64 second, const siphash_key_t *key) +{ + PREAMBLE(16) + v3 ^= first; + SIPROUND; + SIPROUND; + v0 ^= first; + v3 ^= second; + SIPROUND; + SIPROUND; + v0 ^= second; + POSTAMBLE +} +#endif diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c new file mode 100644 index 000000000000..a5501b29979a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c @@ -0,0 +1,572 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ + +#include "vmlinux.h" + +#include +#include +#include "bpf_tracing_net.h" +#include "bpf_kfuncs.h" +#include "test_siphash.h" +#include "test_tcp_custom_syncookie.h" + +/* Hash is calculated for each client and split into ISN and TS. + * + * MSB LSB + * ISN: | 31 ... 8 | 7 6 | 5 | 4 | 3 2 1 0 | + * | Hash_1 | MSS | ECN | SACK | WScale | + * + * TS: | 31 ... 8 | 7 ... 0 | + * | Random | Hash_2 | + */ +#define COOKIE_BITS 8 +#define COOKIE_MASK (((__u32)1 << COOKIE_BITS) - 1) + +enum { + /* 0xf is invalid thus means that SYN did not have WScale. */ + BPF_SYNCOOKIE_WSCALE_MASK = (1 << 4) - 1, + BPF_SYNCOOKIE_SACK = (1 << 4), + BPF_SYNCOOKIE_ECN = (1 << 5), +}; + +#define MSS_LOCAL_IPV4 65495 +#define MSS_LOCAL_IPV6 65476 + +const __u16 msstab4[] = { + 536, + 1300, + 1460, + MSS_LOCAL_IPV4, +}; + +const __u16 msstab6[] = { + 1280 - 60, /* IPV6_MIN_MTU - 60 */ + 1480 - 60, + 9000 - 60, + MSS_LOCAL_IPV6, +}; + +static siphash_key_t test_key_siphash = { + { 0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL } +}; + +struct tcp_syncookie { + struct __sk_buff *skb; + void *data_end; + struct ethhdr *eth; + struct iphdr *ipv4; + struct ipv6hdr *ipv6; + struct tcphdr *tcp; + union { + char *ptr; + __be32 *ptr32; + }; + struct bpf_tcp_req_attrs attrs; + u32 cookie; + u64 first; +}; + +bool handled_syn, handled_ack; + +static int tcp_load_headers(struct tcp_syncookie *ctx) +{ + ctx->data_end = (void *)(long)ctx->skb->data_end; + ctx->eth = (struct ethhdr *)(long)ctx->skb->data; + + if (ctx->eth + 1 > ctx->data_end) + goto err; + + switch (bpf_ntohs(ctx->eth->h_proto)) { + case ETH_P_IP: + ctx->ipv4 = (struct iphdr *)(ctx->eth + 1); + + if (ctx->ipv4 + 1 > ctx->data_end) + goto err; + + if (ctx->ipv4->ihl != sizeof(*ctx->ipv4) / 4) + goto err; + + if (ctx->ipv4->version != 4) + goto err; + + if (ctx->ipv4->protocol != IPPROTO_TCP) + goto err; + + ctx->tcp = (struct tcphdr *)(ctx->ipv4 + 1); + break; + case ETH_P_IPV6: + ctx->ipv6 = (struct ipv6hdr *)(ctx->eth + 1); + + if (ctx->ipv6 + 1 > ctx->data_end) + goto err; + + if (ctx->ipv6->version != 6) + goto err; + + if (ctx->ipv6->nexthdr != NEXTHDR_TCP) + goto err; + + ctx->tcp = (struct tcphdr *)(ctx->ipv6 + 1); + break; + default: + goto err; + } + + if (ctx->tcp + 1 > ctx->data_end) + goto err; + + return 0; +err: + return -1; +} + +static int tcp_reload_headers(struct tcp_syncookie *ctx) +{ + /* Without volatile, + * R3 32-bit pointer arithmetic prohibited + */ + volatile u64 data_len = ctx->skb->data_end - ctx->skb->data; + + if (ctx->tcp->doff < sizeof(*ctx->tcp) / 4) + goto err; + + /* Needed to calculate csum and parse TCP options. */ + if (bpf_skb_change_tail(ctx->skb, data_len + 60 - ctx->tcp->doff * 4, 0)) + goto err; + + ctx->data_end = (void *)(long)ctx->skb->data_end; + ctx->eth = (struct ethhdr *)(long)ctx->skb->data; + if (ctx->ipv4) { + ctx->ipv4 = (struct iphdr *)(ctx->eth + 1); + ctx->ipv6 = NULL; + ctx->tcp = (struct tcphdr *)(ctx->ipv4 + 1); + } else { + ctx->ipv4 = NULL; + ctx->ipv6 = (struct ipv6hdr *)(ctx->eth + 1); + ctx->tcp = (struct tcphdr *)(ctx->ipv6 + 1); + } + + if ((void *)ctx->tcp + 60 > ctx->data_end) + goto err; + + return 0; +err: + return -1; +} + +static __sum16 tcp_v4_csum(struct tcp_syncookie *ctx, __wsum csum) +{ + return csum_tcpudp_magic(ctx->ipv4->saddr, ctx->ipv4->daddr, + ctx->tcp->doff * 4, IPPROTO_TCP, csum); +} + +static __sum16 tcp_v6_csum(struct tcp_syncookie *ctx, __wsum csum) +{ + return csum_ipv6_magic(&ctx->ipv6->saddr, &ctx->ipv6->daddr, + ctx->tcp->doff * 4, IPPROTO_TCP, csum); +} + +static int tcp_validate_header(struct tcp_syncookie *ctx) +{ + s64 csum; + + if (tcp_reload_headers(ctx)) + goto err; + + csum = bpf_csum_diff(0, 0, (void *)ctx->tcp, ctx->tcp->doff * 4, 0); + if (csum < 0) + goto err; + + if (ctx->ipv4) { + /* check tcp_v4_csum(csum) is 0 if not on lo. */ + + csum = bpf_csum_diff(0, 0, (void *)ctx->ipv4, ctx->ipv4->ihl * 4, 0); + if (csum < 0) + goto err; + + if (csum_fold(csum) != 0) + goto err; + } else if (ctx->ipv6) { + /* check tcp_v6_csum(csum) is 0 if not on lo. */ + } + + return 0; +err: + return -1; +} + +static int tcp_parse_option(__u32 index, struct tcp_syncookie *ctx) +{ + char opcode, opsize; + + if (ctx->ptr + 1 > ctx->data_end) + goto stop; + + opcode = *ctx->ptr++; + + if (opcode == TCPOPT_EOL) + goto stop; + + if (opcode == TCPOPT_NOP) + goto next; + + if (ctx->ptr + 1 > ctx->data_end) + goto stop; + + opsize = *ctx->ptr++; + + if (opsize < 2) + goto stop; + + switch (opcode) { + case TCPOPT_MSS: + if (opsize == TCPOLEN_MSS && ctx->tcp->syn && + ctx->ptr + (TCPOLEN_MSS - 2) < ctx->data_end) + ctx->attrs.mss = get_unaligned_be16(ctx->ptr); + break; + case TCPOPT_WINDOW: + if (opsize == TCPOLEN_WINDOW && ctx->tcp->syn && + ctx->ptr + (TCPOLEN_WINDOW - 2) < ctx->data_end) { + ctx->attrs.wscale_ok = 1; + ctx->attrs.snd_wscale = *ctx->ptr; + } + break; + case TCPOPT_TIMESTAMP: + if (opsize == TCPOLEN_TIMESTAMP && + ctx->ptr + (TCPOLEN_TIMESTAMP - 2) < ctx->data_end) { + ctx->attrs.rcv_tsval = get_unaligned_be32(ctx->ptr); + ctx->attrs.rcv_tsecr = get_unaligned_be32(ctx->ptr + 4); + + if (ctx->tcp->syn && ctx->attrs.rcv_tsecr) + ctx->attrs.tstamp_ok = 0; + else + ctx->attrs.tstamp_ok = 1; + } + break; + case TCPOPT_SACK_PERM: + if (opsize == TCPOLEN_SACK_PERM && ctx->tcp->syn && + ctx->ptr + (TCPOLEN_SACK_PERM - 2) < ctx->data_end) + ctx->attrs.sack_ok = 1; + break; + } + + ctx->ptr += opsize - 2; +next: + return 0; +stop: + return 1; +} + +static void tcp_parse_options(struct tcp_syncookie *ctx) +{ + ctx->ptr = (char *)(ctx->tcp + 1); + + bpf_loop(40, tcp_parse_option, ctx, 0); +} + +static int tcp_validate_sysctl(struct tcp_syncookie *ctx) +{ + if ((ctx->ipv4 && ctx->attrs.mss != MSS_LOCAL_IPV4) || + (ctx->ipv6 && ctx->attrs.mss != MSS_LOCAL_IPV6)) + goto err; + + if (!ctx->attrs.wscale_ok || ctx->attrs.snd_wscale != 7) + goto err; + + if (!ctx->attrs.tstamp_ok) + goto err; + + if (!ctx->attrs.sack_ok) + goto err; + + if (!ctx->tcp->ece || !ctx->tcp->cwr) + goto err; + + return 0; +err: + return -1; +} + +static void tcp_prepare_cookie(struct tcp_syncookie *ctx) +{ + u32 seq = bpf_ntohl(ctx->tcp->seq); + u64 first = 0, second; + int mssind = 0; + u32 hash; + + if (ctx->ipv4) { + for (mssind = ARRAY_SIZE(msstab4) - 1; mssind; mssind--) + if (ctx->attrs.mss >= msstab4[mssind]) + break; + + ctx->attrs.mss = msstab4[mssind]; + + first = (u64)ctx->ipv4->saddr << 32 | ctx->ipv4->daddr; + } else if (ctx->ipv6) { + for (mssind = ARRAY_SIZE(msstab6) - 1; mssind; mssind--) + if (ctx->attrs.mss >= msstab6[mssind]) + break; + + ctx->attrs.mss = msstab6[mssind]; + + first = (u64)ctx->ipv6->saddr.in6_u.u6_addr8[0] << 32 | + ctx->ipv6->daddr.in6_u.u6_addr32[0]; + } + + second = (u64)seq << 32 | ctx->tcp->source << 16 | ctx->tcp->dest; + hash = siphash_2u64(first, second, &test_key_siphash); + + if (ctx->attrs.tstamp_ok) { + ctx->attrs.rcv_tsecr = bpf_get_prandom_u32(); + ctx->attrs.rcv_tsecr &= ~COOKIE_MASK; + ctx->attrs.rcv_tsecr |= hash & COOKIE_MASK; + } + + hash &= ~COOKIE_MASK; + hash |= mssind << 6; + + if (ctx->attrs.wscale_ok) + hash |= ctx->attrs.snd_wscale & BPF_SYNCOOKIE_WSCALE_MASK; + + if (ctx->attrs.sack_ok) + hash |= BPF_SYNCOOKIE_SACK; + + if (ctx->attrs.tstamp_ok && ctx->tcp->ece && ctx->tcp->cwr) + hash |= BPF_SYNCOOKIE_ECN; + + ctx->cookie = hash; +} + +static void tcp_write_options(struct tcp_syncookie *ctx) +{ + ctx->ptr32 = (__be32 *)(ctx->tcp + 1); + + *ctx->ptr32++ = bpf_htonl(TCPOPT_MSS << 24 | TCPOLEN_MSS << 16 | + ctx->attrs.mss); + + if (ctx->attrs.wscale_ok) + *ctx->ptr32++ = bpf_htonl(TCPOPT_NOP << 24 | + TCPOPT_WINDOW << 16 | + TCPOLEN_WINDOW << 8 | + ctx->attrs.snd_wscale); + + if (ctx->attrs.tstamp_ok) { + if (ctx->attrs.sack_ok) + *ctx->ptr32++ = bpf_htonl(TCPOPT_SACK_PERM << 24 | + TCPOLEN_SACK_PERM << 16 | + TCPOPT_TIMESTAMP << 8 | + TCPOLEN_TIMESTAMP); + else + *ctx->ptr32++ = bpf_htonl(TCPOPT_NOP << 24 | + TCPOPT_NOP << 16 | + TCPOPT_TIMESTAMP << 8 | + TCPOLEN_TIMESTAMP); + + *ctx->ptr32++ = bpf_htonl(ctx->attrs.rcv_tsecr); + *ctx->ptr32++ = bpf_htonl(ctx->attrs.rcv_tsval); + } else if (ctx->attrs.sack_ok) { + *ctx->ptr32++ = bpf_htonl(TCPOPT_NOP << 24 | + TCPOPT_NOP << 16 | + TCPOPT_SACK_PERM << 8 | + TCPOLEN_SACK_PERM); + } +} + +static int tcp_handle_syn(struct tcp_syncookie *ctx) +{ + s64 csum; + + if (tcp_validate_header(ctx)) + goto err; + + tcp_parse_options(ctx); + + if (tcp_validate_sysctl(ctx)) + goto err; + + tcp_prepare_cookie(ctx); + tcp_write_options(ctx); + + swap(ctx->tcp->source, ctx->tcp->dest); + ctx->tcp->check = 0; + ctx->tcp->ack_seq = bpf_htonl(bpf_ntohl(ctx->tcp->seq) + 1); + ctx->tcp->seq = bpf_htonl(ctx->cookie); + ctx->tcp->doff = ((long)ctx->ptr32 - (long)ctx->tcp) >> 2; + ctx->tcp->ack = 1; + if (!ctx->attrs.tstamp_ok || !ctx->tcp->ece || !ctx->tcp->cwr) + ctx->tcp->ece = 0; + ctx->tcp->cwr = 0; + + csum = bpf_csum_diff(0, 0, (void *)ctx->tcp, ctx->tcp->doff * 4, 0); + if (csum < 0) + goto err; + + if (ctx->ipv4) { + swap(ctx->ipv4->saddr, ctx->ipv4->daddr); + ctx->tcp->check = tcp_v4_csum(ctx, csum); + + ctx->ipv4->check = 0; + ctx->ipv4->tos = 0; + ctx->ipv4->tot_len = bpf_htons((long)ctx->ptr32 - (long)ctx->ipv4); + ctx->ipv4->id = 0; + ctx->ipv4->ttl = 64; + + csum = bpf_csum_diff(0, 0, (void *)ctx->ipv4, sizeof(*ctx->ipv4), 0); + if (csum < 0) + goto err; + + ctx->ipv4->check = csum_fold(csum); + } else if (ctx->ipv6) { + swap(ctx->ipv6->saddr, ctx->ipv6->daddr); + ctx->tcp->check = tcp_v6_csum(ctx, csum); + + *(__be32 *)ctx->ipv6 = bpf_htonl(0x60000000); + ctx->ipv6->payload_len = bpf_htons((long)ctx->ptr32 - (long)ctx->tcp); + ctx->ipv6->hop_limit = 64; + } + + swap_array(ctx->eth->h_source, ctx->eth->h_dest); + + if (bpf_skb_change_tail(ctx->skb, (long)ctx->ptr32 - (long)ctx->eth, 0)) + goto err; + + return bpf_redirect(ctx->skb->ifindex, 0); +err: + return TC_ACT_SHOT; +} + +static int tcp_validate_cookie(struct tcp_syncookie *ctx) +{ + u32 cookie = bpf_ntohl(ctx->tcp->ack_seq) - 1; + u32 seq = bpf_ntohl(ctx->tcp->seq) - 1; + u64 first = 0, second; + int mssind; + u32 hash; + + if (ctx->ipv4) + first = (u64)ctx->ipv4->saddr << 32 | ctx->ipv4->daddr; + else if (ctx->ipv6) + first = (u64)ctx->ipv6->saddr.in6_u.u6_addr8[0] << 32 | + ctx->ipv6->daddr.in6_u.u6_addr32[0]; + + second = (u64)seq << 32 | ctx->tcp->source << 16 | ctx->tcp->dest; + hash = siphash_2u64(first, second, &test_key_siphash); + + if (ctx->attrs.tstamp_ok) + hash -= ctx->attrs.rcv_tsecr & COOKIE_MASK; + else + hash &= ~COOKIE_MASK; + + hash -= cookie & ~COOKIE_MASK; + if (hash) + goto err; + + mssind = (cookie & (3 << 6)) >> 6; + if (ctx->ipv4) { + if (mssind > ARRAY_SIZE(msstab4)) + goto err; + + ctx->attrs.mss = msstab4[mssind]; + } else { + if (mssind > ARRAY_SIZE(msstab6)) + goto err; + + ctx->attrs.mss = msstab6[mssind]; + } + + ctx->attrs.snd_wscale = cookie & BPF_SYNCOOKIE_WSCALE_MASK; + ctx->attrs.rcv_wscale = ctx->attrs.snd_wscale; + ctx->attrs.wscale_ok = ctx->attrs.snd_wscale == BPF_SYNCOOKIE_WSCALE_MASK; + ctx->attrs.sack_ok = cookie & BPF_SYNCOOKIE_SACK; + ctx->attrs.ecn_ok = cookie & BPF_SYNCOOKIE_ECN; + + return 0; +err: + return -1; +} + +static int tcp_handle_ack(struct tcp_syncookie *ctx) +{ + struct bpf_sock_tuple tuple; + struct bpf_sock *skc; + int ret = TC_ACT_OK; + struct sock *sk; + u32 tuple_size; + + if (ctx->ipv4) { + tuple.ipv4.saddr = ctx->ipv4->saddr; + tuple.ipv4.daddr = ctx->ipv4->daddr; + tuple.ipv4.sport = ctx->tcp->source; + tuple.ipv4.dport = ctx->tcp->dest; + tuple_size = sizeof(tuple.ipv4); + } else if (ctx->ipv6) { + __builtin_memcpy(tuple.ipv6.saddr, &ctx->ipv6->saddr, sizeof(tuple.ipv6.saddr)); + __builtin_memcpy(tuple.ipv6.daddr, &ctx->ipv6->daddr, sizeof(tuple.ipv6.daddr)); + tuple.ipv6.sport = ctx->tcp->source; + tuple.ipv6.dport = ctx->tcp->dest; + tuple_size = sizeof(tuple.ipv6); + } else { + goto out; + } + + skc = bpf_skc_lookup_tcp(ctx->skb, &tuple, tuple_size, -1, 0); + if (!skc) + goto out; + + if (skc->state != TCP_LISTEN) + goto release; + + sk = (struct sock *)bpf_skc_to_tcp_sock(skc); + if (!sk) + goto err; + + if (tcp_validate_header(ctx)) + goto err; + + tcp_parse_options(ctx); + + if (tcp_validate_cookie(ctx)) + goto err; + + ret = bpf_sk_assign_tcp_reqsk(ctx->skb, sk, &ctx->attrs, sizeof(ctx->attrs)); + if (ret < 0) + goto err; + +release: + bpf_sk_release(skc); +out: + return ret; + +err: + ret = TC_ACT_SHOT; + goto release; +} + +SEC("tc") +int tcp_custom_syncookie(struct __sk_buff *skb) +{ + struct tcp_syncookie ctx = { + .skb = skb, + }; + + if (tcp_load_headers(&ctx)) + return TC_ACT_OK; + + if (ctx.tcp->rst) + return TC_ACT_OK; + + if (ctx.tcp->syn) { + if (ctx.tcp->ack) + return TC_ACT_OK; + + handled_syn = true; + + return tcp_handle_syn(&ctx); + } + + handled_ack = true; + + return tcp_handle_ack(&ctx); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h new file mode 100644 index 000000000000..29a6a53cf229 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ + +#ifndef _TEST_TCP_SYNCOOKIE_H +#define _TEST_TCP_SYNCOOKIE_H + +#define __packed __attribute__((__packed__)) +#define __force + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +#define swap(a, b) \ + do { \ + typeof(a) __tmp = (a); \ + (a) = (b); \ + (b) = __tmp; \ + } while (0) + +#define swap_array(a, b) \ + do { \ + typeof(a) __tmp[sizeof(a)]; \ + __builtin_memcpy(__tmp, a, sizeof(a)); \ + __builtin_memcpy(a, b, sizeof(a)); \ + __builtin_memcpy(b, __tmp, sizeof(a)); \ + } while (0) + +/* asm-generic/unaligned.h */ +#define __get_unaligned_t(type, ptr) ({ \ + const struct { type x; } __packed * __pptr = (typeof(__pptr))(ptr); \ + __pptr->x; \ +}) + +#define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr)) + +static inline u16 get_unaligned_be16(const void *p) +{ + return bpf_ntohs(__get_unaligned_t(__be16, p)); +} + +static inline u32 get_unaligned_be32(const void *p) +{ + return bpf_ntohl(__get_unaligned_t(__be32, p)); +} + +/* lib/checksum.c */ +static inline u32 from64to32(u64 x) +{ + /* add up 32-bit and 32-bit for 32+c bit */ + x = (x & 0xffffffff) + (x >> 32); + /* add up carry.. */ + x = (x & 0xffffffff) + (x >> 32); + return (u32)x; +} + +static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, + __u32 len, __u8 proto, __wsum sum) +{ + unsigned long long s = (__force u32)sum; + + s += (__force u32)saddr; + s += (__force u32)daddr; +#ifdef __BIG_ENDIAN + s += proto + len; +#else + s += (proto + len) << 8; +#endif + return (__force __wsum)from64to32(s); +} + +/* asm-generic/checksum.h */ +static inline __sum16 csum_fold(__wsum csum) +{ + u32 sum = (__force u32)csum; + + sum = (sum & 0xffff) + (sum >> 16); + sum = (sum & 0xffff) + (sum >> 16); + return (__force __sum16)~sum; +} + +static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, + __u8 proto, __wsum sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); +} + +/* net/ipv6/ip6_checksum.c */ +static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, __u8 proto, __wsum csum) +{ + int carry; + __u32 ulen; + __u32 uproto; + __u32 sum = (__force u32)csum; + + sum += (__force u32)saddr->in6_u.u6_addr32[0]; + carry = (sum < (__force u32)saddr->in6_u.u6_addr32[0]); + sum += carry; + + sum += (__force u32)saddr->in6_u.u6_addr32[1]; + carry = (sum < (__force u32)saddr->in6_u.u6_addr32[1]); + sum += carry; + + sum += (__force u32)saddr->in6_u.u6_addr32[2]; + carry = (sum < (__force u32)saddr->in6_u.u6_addr32[2]); + sum += carry; + + sum += (__force u32)saddr->in6_u.u6_addr32[3]; + carry = (sum < (__force u32)saddr->in6_u.u6_addr32[3]); + sum += carry; + + sum += (__force u32)daddr->in6_u.u6_addr32[0]; + carry = (sum < (__force u32)daddr->in6_u.u6_addr32[0]); + sum += carry; + + sum += (__force u32)daddr->in6_u.u6_addr32[1]; + carry = (sum < (__force u32)daddr->in6_u.u6_addr32[1]); + sum += carry; + + sum += (__force u32)daddr->in6_u.u6_addr32[2]; + carry = (sum < (__force u32)daddr->in6_u.u6_addr32[2]); + sum += carry; + + sum += (__force u32)daddr->in6_u.u6_addr32[3]; + carry = (sum < (__force u32)daddr->in6_u.u6_addr32[3]); + sum += carry; + + ulen = (__force u32)bpf_htonl((__u32)len); + sum += ulen; + carry = (sum < ulen); + sum += carry; + + uproto = (__force u32)bpf_htonl(proto); + sum += uproto; + carry = (sum < uproto); + sum += carry; + + return csum_fold((__force __wsum)sum); +} +#endif -- cgit v1.2.3 From 091f2bf60d52ac205c48dffcb8646ed9299078c9 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 17 Jan 2024 10:16:11 +0100 Subject: bpf: Sync uapi bpf.h header for the tooling infra Both commit 91051f003948 ("tcp: Dump bound-only sockets in inet_diag.") and commit 985b8ea9ec7e ("bpf, docs: Fix bpf_redirect_peer header doc") missed the tooling header sync. Fix it. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- tools/include/uapi/linux/bpf.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 7f24d898efbb..a00f8a5623e1 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4839,9 +4839,9 @@ union bpf_attr { * going through the CPU's backlog queue. * * The *flags* argument is reserved and must be 0. The helper is - * currently only supported for tc BPF program types at the ingress - * hook and for veth device types. The peer device must reside in a - * different network namespace. + * currently only supported for tc BPF program types at the + * ingress hook and for veth and netkit target device types. The + * peer device must reside in a different network namespace. * Return * The helper returns **TC_ACT_REDIRECT** on success or * **TC_ACT_SHOT** on error. @@ -6904,6 +6904,7 @@ enum { BPF_TCP_LISTEN, BPF_TCP_CLOSING, /* Now a valid state */ BPF_TCP_NEW_SYN_RECV, + BPF_TCP_BOUND_INACTIVE, BPF_TCP_MAX_STATES /* Leave at the end! */ }; -- cgit v1.2.3 From f04deb90e516e8e48bf8693397529bc942a9e80b Mon Sep 17 00:00:00 2001 From: Andrey Grafin Date: Wed, 17 Jan 2024 16:06:18 +0300 Subject: libbpf: Apply map_set_def_max_entries() for inner_maps on creation This patch allows to auto create BPF_MAP_TYPE_ARRAY_OF_MAPS and BPF_MAP_TYPE_HASH_OF_MAPS with values of BPF_MAP_TYPE_PERF_EVENT_ARRAY by bpf_object__load(). Previous behaviour created a zero filled btf_map_def for inner maps and tried to use it for a map creation but the linux kernel forbids to create a BPF_MAP_TYPE_PERF_EVENT_ARRAY map with max_entries=0. Fixes: 646f02ffdd49 ("libbpf: Add BTF-defined map-in-map support") Signed-off-by: Andrey Grafin Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Acked-by: Hou Tao Link: https://lore.kernel.org/bpf/20240117130619.9403-1-conquistador@yandex-team.ru Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index afd09571c482..b8b00da62907 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -70,6 +70,7 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj); static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog); +static int map_set_def_max_entries(struct bpf_map *map); static const char * const attach_type_name[] = { [BPF_CGROUP_INET_INGRESS] = "cgroup_inet_ingress", @@ -5172,6 +5173,9 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b if (bpf_map_type__is_map_in_map(def->type)) { if (map->inner_map) { + err = map_set_def_max_entries(map->inner_map); + if (err) + return err; err = bpf_object__create_map(obj, map->inner_map, true); if (err) { pr_warn("map '%s': failed to create inner map: %d\n", -- cgit v1.2.3 From 40628f9fff73adecac77a9aa390f8016724cad99 Mon Sep 17 00:00:00 2001 From: Andrey Grafin Date: Wed, 17 Jan 2024 16:06:19 +0300 Subject: selftest/bpf: Add map_in_maps with BPF_MAP_TYPE_PERF_EVENT_ARRAY values Check that bpf_object__load() successfully creates map_in_maps with BPF_MAP_TYPE_PERF_EVENT_ARRAY values. These changes cover fix in the previous patch "libbpf: Apply map_set_def_max_entries() for inner_maps on creation". A command line output is: - w/o fix $ sudo ./test_maps libbpf: map 'mim_array_pe': failed to create inner map: -22 libbpf: map 'mim_array_pe': failed to create: Invalid argument(-22) libbpf: failed to load object './test_map_in_map.bpf.o' Failed to load test prog - with fix $ sudo ./test_maps ... test_maps: OK, 0 SKIPPED Fixes: 646f02ffdd49 ("libbpf: Add BTF-defined map-in-map support") Signed-off-by: Andrey Grafin Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Acked-by: Hou Tao Link: https://lore.kernel.org/bpf/20240117130619.9403-2-conquistador@yandex-team.ru Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/progs/test_map_in_map.c | 26 ++++++++++++++++++++++ tools/testing/selftests/bpf/test_maps.c | 6 ++++- 2 files changed, 31 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map.c b/tools/testing/selftests/bpf/progs/test_map_in_map.c index f416032ba858..b295f9b721bf 100644 --- a/tools/testing/selftests/bpf/progs/test_map_in_map.c +++ b/tools/testing/selftests/bpf/progs/test_map_in_map.c @@ -21,6 +21,32 @@ struct { __type(value, __u32); } mim_hash SEC(".maps"); +/* The following three maps are used to test + * perf_event_array map can be an inner + * map of hash/array_of_maps. + */ +struct perf_event_array { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __type(key, __u32); + __type(value, __u32); +} inner_map0 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 1); + __type(key, __u32); + __array(values, struct perf_event_array); +} mim_array_pe SEC(".maps") = { + .values = {&inner_map0}}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); + __uint(max_entries, 1); + __type(key, __u32); + __array(values, struct perf_event_array); +} mim_hash_pe SEC(".maps") = { + .values = {&inner_map0}}; + SEC("xdp") int xdp_mimtest0(struct xdp_md *ctx) { diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 767e0693df10..dfbab214f4d1 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -1190,7 +1190,11 @@ static void test_map_in_map(void) goto out_map_in_map; } - bpf_object__load(obj); + err = bpf_object__load(obj); + if (err) { + printf("Failed to load test prog\n"); + goto out_map_in_map; + } map = bpf_object__find_map_by_name(obj, "mim_array"); if (!map) { -- cgit v1.2.3 From 29f868887a7dd3efc6faecc6fc91b28fc25cf5b0 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 19 Jan 2024 18:25:29 +0800 Subject: selftests/bpf: Enable kptr_xchg_inline test for arm64 Now arm64 bpf jit has enable bpf_jit_supports_ptr_xchg(), so enable the test for arm64 as well. Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20240119102529.99581-3-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c b/tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c index 5a4bee1cf970..15144943e88b 100644 --- a/tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c +++ b/tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c @@ -13,7 +13,7 @@ void test_kptr_xchg_inline(void) unsigned int cnt; int err; -#if !defined(__x86_64__) +#if !(defined(__x86_64__) || defined(__aarch64__)) test__skip(); return; #endif -- cgit v1.2.3 From bc308d011ab8cc61bf1be15a2920bcd7d7b9b9d3 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 19 Jan 2024 13:02:01 -0800 Subject: libbpf: call dup2() syscall directly We've ran into issues with using dup2() API in production setting, where libbpf is linked into large production environment and ends up calling unintended custom implementations of dup2(). These custom implementations don't provide atomic FD replacement guarantees of dup2() syscall, leading to subtle and hard to debug issues. To prevent this in the future and guarantee that no libc implementation will do their own custom non-atomic dup2() implementation, call dup2() syscall directly with syscall(SYS_dup2). Note that some architectures don't seem to provide dup2 and have dup3 instead. Try to detect and pick best syscall. Signed-off-by: Andrii Nakryiko Acked-by: Song Liu Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20240119210201.1295511-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf_internal.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 27e4e320e1a6..58c547d473e0 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "relo_core.h" @@ -555,6 +556,15 @@ static inline int ensure_good_fd(int fd) return fd; } +static inline int sys_dup2(int oldfd, int newfd) +{ +#ifdef __NR_dup2 + return syscall(__NR_dup2, oldfd, newfd); +#else + return syscall(__NR_dup3, oldfd, newfd, 0); +#endif +} + /* Point *fixed_fd* to the same file that *tmp_fd* points to. * Regardless of success, *tmp_fd* is closed. * Whatever *fixed_fd* pointed to is closed silently. @@ -563,7 +573,7 @@ static inline int reuse_fd(int fixed_fd, int tmp_fd) { int err; - err = dup2(tmp_fd, fixed_fd); + err = sys_dup2(tmp_fd, fixed_fd); err = err < 0 ? -errno : 0; close(tmp_fd); /* clean up temporary FD */ return err; -- cgit v1.2.3 From edb799035dd7d41c3e81e1bef83e2a2120b08abb Mon Sep 17 00:00:00 2001 From: "Jose E. Marchesi" Date: Tue, 23 Jan 2024 21:17:29 +0100 Subject: bpf: avoid VLAs in progs/test_xdp_dynptr.c VLAs are not supported by either the BPF port of clang nor GCC. The selftest test_xdp_dynptr.c contains the following code: const size_t tcphdr_sz = sizeof(struct tcphdr); const size_t udphdr_sz = sizeof(struct udphdr); const size_t ethhdr_sz = sizeof(struct ethhdr); const size_t iphdr_sz = sizeof(struct iphdr); const size_t ipv6hdr_sz = sizeof(struct ipv6hdr); [...] static __always_inline int handle_ipv4(struct xdp_md *xdp, struct bpf_dynptr *xdp_ptr) { __u8 eth_buffer[ethhdr_sz + iphdr_sz + ethhdr_sz]; __u8 iph_buffer_tcp[iphdr_sz + tcphdr_sz]; __u8 iph_buffer_udp[iphdr_sz + udphdr_sz]; [...] } The eth_buffer, iph_buffer_tcp and other automatics are fixed size only if the compiler optimizes away the constant global variables. clang does this, but GCC does not, turning these automatics into variable length arrays. This patch removes the global variables and turns these values into preprocessor constants. This makes the selftest to build properly with GCC. Tested in bpf-next master. No regressions. Signed-off-by: Jose E. Marchesi Cc: Yonghong Song Cc: Eduard Zingerman Cc: david.faust@oracle.com Cc: cupertino.miranda@oracle.com Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20240123201729.16173-1-jose.marchesi@oracle.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/test_xdp_dynptr.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c index 78c368e71797..67a77944ef29 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c @@ -18,11 +18,11 @@ #include "test_iptunnel_common.h" #include "bpf_kfuncs.h" -const size_t tcphdr_sz = sizeof(struct tcphdr); -const size_t udphdr_sz = sizeof(struct udphdr); -const size_t ethhdr_sz = sizeof(struct ethhdr); -const size_t iphdr_sz = sizeof(struct iphdr); -const size_t ipv6hdr_sz = sizeof(struct ipv6hdr); +#define tcphdr_sz sizeof(struct tcphdr) +#define udphdr_sz sizeof(struct udphdr) +#define ethhdr_sz sizeof(struct ethhdr) +#define iphdr_sz sizeof(struct iphdr) +#define ipv6hdr_sz sizeof(struct ipv6hdr) struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); -- cgit v1.2.3 From 756e34da5380e4c0ed2cfbe5259e1b015567a099 Mon Sep 17 00:00:00 2001 From: "Jose E. Marchesi" Date: Tue, 23 Jan 2024 21:56:24 +0100 Subject: bpf: fix constraint in test_tcpbpf_kern.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GCC emits a warning: progs/test_tcpbpf_kern.c:60:9: error: ‘op’ is used uninitialized [-Werror=uninitialized] when an uninialized op is used with a "+r" constraint. The + modifier means a read-write operand, but that operand in the selftest is just written to. This patch changes the selftest to use a "=r" constraint. This pacifies GCC. Tested in bpf-next master. No regressions. Signed-off-by: Jose E. Marchesi Cc: Yonghong Song Cc: Eduard Zingerman Cc: david.faust@oracle.com Cc: cupertino.miranda@oracle.com Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20240123205624.14746-1-jose.marchesi@oracle.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c index cf7ed8cbb1fe..a3f3f43fc195 100644 --- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c @@ -59,7 +59,7 @@ int bpf_testcb(struct bpf_sock_ops *skops) asm volatile ( "%[op] = *(u32 *)(%[skops] +96)" - : [op] "+r"(op) + : [op] "=r"(op) : [skops] "r"(skops) :); -- cgit v1.2.3 From bbc094b3052647c188d6f155f5c09cb9492ce106 Mon Sep 17 00:00:00 2001 From: "Jose E. Marchesi" Date: Tue, 23 Jan 2024 19:13:09 +0100 Subject: bpf: Use r constraint instead of p constraint in selftests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some of the BPF selftests use the "p" constraint in inline assembly snippets, for input operands for MOV (rN = rM) instructions. This is mainly done via the __imm_ptr macro defined in tools/testing/selftests/bpf/progs/bpf_misc.h: #define __imm_ptr(name) [name]"p"(&name) Example: int consume_first_item_only(void *ctx) { struct bpf_iter_num iter; asm volatile ( /* create iterator */ "r1 = %[iter];" [...] : : __imm_ptr(iter) : CLOBBERS); [...] } The "p" constraint is a tricky one. It is documented in the GCC manual section "Simple Constraints": An operand that is a valid memory address is allowed. This is for ``load address'' and ``push address'' instructions. p in the constraint must be accompanied by address_operand as the predicate in the match_operand. This predicate interprets the mode specified in the match_operand as the mode of the memory reference for which the address would be valid. There are two problems: 1. It is questionable whether that constraint was ever intended to be used in inline assembly templates, because its behavior really depends on compiler internals. A "memory address" is not the same than a "memory operand" or a "memory reference" (constraint "m"), and in fact its usage in the template above results in an error in both x86_64-linux-gnu and bpf-unkonwn-none: foo.c: In function ‘bar’: foo.c:6:3: error: invalid 'asm': invalid expression as operand 6 | asm volatile ("r1 = %[jorl]" : : [jorl]"p"(&jorl)); | ^~~ I would assume the same happens with aarch64, riscv, and most/all other targets in GCC, that do not accept operands of the form A + B that are not wrapped either in a const or in a memory reference. To avoid that error, the usage of the "p" constraint in internal GCC instruction templates is supposed to be complemented by the 'a' modifier, like in: asm volatile ("r1 = %a[jorl]" : : [jorl]"p"(&jorl)); Internally documented (in GCC's final.cc) as: %aN means expect operand N to be a memory address (not a memory reference!) and print a reference to that address. That works because when the modifier 'a' is found, GCC prints an "operand address", which is not the same than an "operand". But... 2. Even if we used the internal 'a' modifier (we shouldn't) the 'rN = rM' instruction really requires a register argument. In cases involving automatics, like in the examples above, we easily end with: bar: #APP r1 = r10-4 #NO_APP In other cases we could conceibly also end with a 64-bit label that may overflow the 32-bit immediate operand of `rN = imm32' instructions: r1 = foo All of which is clearly wrong. clang happens to do "the right thing" in the current usage of __imm_ptr in the BPF tests, because even with -O2 it seems to "reload" the fp-relative address of the automatic to a register like in: bar: r1 = r10 r1 += -4 #APP r1 = r1 #NO_APP Which is what GCC would generate with -O0. Whether this is by chance or by design, the compiler shouln't be expected to do that reload driven by the "p" constraint. This patch changes the usage of the "p" constraint in the BPF selftests macros to use the "r" constraint instead. If a register is what is required, we should let the compiler know. Previous discussion in bpf@vger: https://lore.kernel.org/bpf/87h6p5ebpb.fsf@oracle.com/T/#ef0df83d6975c34dff20bf0dd52e078f5b8ca2767 Tested in bpf-next master. No regressions. Signed-off-by: Jose E. Marchesi Cc: Yonghong Song Cc: Eduard Zingerman Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20240123181309.19853-1-jose.marchesi@oracle.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/bpf_misc.h | 2 +- tools/testing/selftests/bpf/progs/iters.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 2fd59970c43a..fb2f5513e29e 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -80,7 +80,7 @@ #define __imm(name) [name]"i"(name) #define __imm_const(name, expr) [name]"i"(expr) #define __imm_addr(name) [name]"i"(&name) -#define __imm_ptr(name) [name]"p"(&name) +#define __imm_ptr(name) [name]"r"(&name) #define __imm_insn(name, expr) [name]"i"(*(long *)&(expr)) /* Magic constants used with __retval() */ diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index fe971992e635..225f02dd66d0 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -78,8 +78,8 @@ int iter_err_unsafe_asm_loop(const void *ctx) "*(u32 *)(r1 + 0) = r6;" /* invalid */ : : [it]"r"(&it), - [small_arr]"p"(small_arr), - [zero]"p"(zero), + [small_arr]"r"(small_arr), + [zero]"r"(zero), __imm(bpf_iter_num_new), __imm(bpf_iter_num_next), __imm(bpf_iter_num_destroy) -- cgit v1.2.3 From d5c16492c66fbfca85f36e42363d32212df5927b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 19 Jan 2024 12:04:58 +0100 Subject: bpf: Add cookie to perf_event bpf_link_info records At the moment we don't store cookie for perf_event probes, while we do that for the rest of the probes. Adding cookie fields to struct bpf_link_info perf event probe records: perf_event.uprobe perf_event.kprobe perf_event.tracepoint perf_event.perf_event And the code to store that in bpf_link_info struct. Signed-off-by: Jiri Olsa Acked-by: Song Liu Acked-by: Yafang Shao Link: https://lore.kernel.org/r/20240119110505.400573-2-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 6 ++++++ kernel/bpf/syscall.c | 4 ++++ tools/include/uapi/linux/bpf.h | 6 ++++++ 3 files changed, 16 insertions(+) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a00f8a5623e1..181e74433272 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6582,6 +6582,7 @@ struct bpf_link_info { __aligned_u64 file_name; /* in/out */ __u32 name_len; __u32 offset; /* offset from file_name */ + __u64 cookie; } uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */ struct { __aligned_u64 func_name; /* in/out */ @@ -6589,14 +6590,19 @@ struct bpf_link_info { __u32 offset; /* offset from func_name */ __u64 addr; __u64 missed; + __u64 cookie; } kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */ struct { __aligned_u64 tp_name; /* in/out */ __u32 name_len; + __u32 :32; + __u64 cookie; } tracepoint; /* BPF_PERF_EVENT_TRACEPOINT */ struct { __u64 config; __u32 type; + __u32 :32; + __u64 cookie; } event; /* BPF_PERF_EVENT_EVENT */ }; } perf_event; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index a1f18681721c..13193aaafb64 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3501,6 +3501,7 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event, if (!kallsyms_show_value(current_cred())) addr = 0; info->perf_event.kprobe.addr = addr; + info->perf_event.kprobe.cookie = event->bpf_cookie; return 0; } #endif @@ -3526,6 +3527,7 @@ static int bpf_perf_link_fill_uprobe(const struct perf_event *event, else info->perf_event.type = BPF_PERF_EVENT_UPROBE; info->perf_event.uprobe.offset = offset; + info->perf_event.uprobe.cookie = event->bpf_cookie; return 0; } #endif @@ -3553,6 +3555,7 @@ static int bpf_perf_link_fill_tracepoint(const struct perf_event *event, uname = u64_to_user_ptr(info->perf_event.tracepoint.tp_name); ulen = info->perf_event.tracepoint.name_len; info->perf_event.type = BPF_PERF_EVENT_TRACEPOINT; + info->perf_event.tracepoint.cookie = event->bpf_cookie; return bpf_perf_link_fill_common(event, uname, ulen, NULL, NULL, NULL, NULL); } @@ -3561,6 +3564,7 @@ static int bpf_perf_link_fill_perf_event(const struct perf_event *event, { info->perf_event.event.type = event->attr.type; info->perf_event.event.config = event->attr.config; + info->perf_event.event.cookie = event->bpf_cookie; info->perf_event.type = BPF_PERF_EVENT_EVENT; return 0; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index a00f8a5623e1..181e74433272 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6582,6 +6582,7 @@ struct bpf_link_info { __aligned_u64 file_name; /* in/out */ __u32 name_len; __u32 offset; /* offset from file_name */ + __u64 cookie; } uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */ struct { __aligned_u64 func_name; /* in/out */ @@ -6589,14 +6590,19 @@ struct bpf_link_info { __u32 offset; /* offset from func_name */ __u64 addr; __u64 missed; + __u64 cookie; } kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */ struct { __aligned_u64 tp_name; /* in/out */ __u32 name_len; + __u32 :32; + __u64 cookie; } tracepoint; /* BPF_PERF_EVENT_TRACEPOINT */ struct { __u64 config; __u32 type; + __u32 :32; + __u64 cookie; } event; /* BPF_PERF_EVENT_EVENT */ }; } perf_event; -- cgit v1.2.3 From 9fd112b1f82b587ffb12fb67dd032f551fdb571a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 19 Jan 2024 12:04:59 +0100 Subject: bpf: Store cookies in kprobe_multi bpf_link_info data Storing cookies in kprobe_multi bpf_link_info data. The cookies field is optional and if provided it needs to be an array of __u64 with kprobe_multi.count length. Acked-by: Yafang Shao Signed-off-by: Jiri Olsa Acked-by: Song Liu Link: https://lore.kernel.org/r/20240119110505.400573-3-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 1 + kernel/trace/bpf_trace.c | 15 +++++++++++++++ tools/include/uapi/linux/bpf.h | 1 + 3 files changed, 17 insertions(+) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 181e74433272..287d05732668 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6563,6 +6563,7 @@ struct bpf_link_info { __u32 count; /* in/out: kprobe_multi function count */ __u32 flags; __u64 missed; + __aligned_u64 cookies; } kprobe_multi; struct { __aligned_u64 path; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 7ac6c52b25eb..c98c20abaf99 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2679,6 +2679,7 @@ static void bpf_kprobe_multi_link_dealloc(struct bpf_link *link) static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link, struct bpf_link_info *info) { + u64 __user *ucookies = u64_to_user_ptr(info->kprobe_multi.cookies); u64 __user *uaddrs = u64_to_user_ptr(info->kprobe_multi.addrs); struct bpf_kprobe_multi_link *kmulti_link; u32 ucount = info->kprobe_multi.count; @@ -2686,6 +2687,8 @@ static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link, if (!uaddrs ^ !ucount) return -EINVAL; + if (ucookies && !ucount) + return -EINVAL; kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link); info->kprobe_multi.count = kmulti_link->cnt; @@ -2699,6 +2702,18 @@ static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link, else ucount = kmulti_link->cnt; + if (ucookies) { + if (kmulti_link->cookies) { + if (copy_to_user(ucookies, kmulti_link->cookies, ucount * sizeof(u64))) + return -EFAULT; + } else { + for (i = 0; i < ucount; i++) { + if (put_user(0, ucookies + i)) + return -EFAULT; + } + } + } + if (kallsyms_show_value(current_cred())) { if (copy_to_user(uaddrs, kmulti_link->addrs, ucount * sizeof(u64))) return -EFAULT; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 181e74433272..287d05732668 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6563,6 +6563,7 @@ struct bpf_link_info { __u32 count; /* in/out: kprobe_multi function count */ __u32 flags; __u64 missed; + __aligned_u64 cookies; } kprobe_multi; struct { __aligned_u64 path; -- cgit v1.2.3 From 2adb2e0fcdf3c6d8e28a5a9c33e458e1037ae5ad Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 19 Jan 2024 12:05:00 +0100 Subject: bpftool: Fix wrong free call in do_show_link The error path frees wrong array, it should be ref_ctr_offsets. Acked-by: Yafang Shao Reviewed-by: Quentin Monnet Fixes: a7795698f8b6 ("bpftool: Add support to display uprobe_multi links") Signed-off-by: Jiri Olsa Acked-by: Song Liu Link: https://lore.kernel.org/r/20240119110505.400573-4-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/link.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index cb46667a6b2e..35b6859dd7c3 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -977,7 +977,7 @@ again: cookies = calloc(count, sizeof(__u64)); if (!cookies) { p_err("mem alloc failed"); - free(cookies); + free(ref_ctr_offsets); free(offsets); close(fd); return -ENOMEM; -- cgit v1.2.3 From 59a89706c40c153a74a3a9570b4d696cf9eebb0b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 19 Jan 2024 12:05:01 +0100 Subject: selftests/bpf: Add cookies check for kprobe_multi fill_link_info test Adding cookies check for kprobe_multi fill_link_info test, plus tests for invalid values related to cookies. Signed-off-by: Jiri Olsa Acked-by: Song Liu Link: https://lore.kernel.org/r/20240119110505.400573-5-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/fill_link_info.c | 48 ++++++++++++++++------ 1 file changed, 36 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c index d4b1901f7879..2c2a02010c0b 100644 --- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c +++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c @@ -19,6 +19,7 @@ static const char *kmulti_syms[] = { }; #define KMULTI_CNT ARRAY_SIZE(kmulti_syms) static __u64 kmulti_addrs[KMULTI_CNT]; +static __u64 kmulti_cookies[] = { 3, 1, 2 }; #define KPROBE_FUNC "bpf_fentry_test1" static __u64 kprobe_addr; @@ -195,11 +196,11 @@ static void test_uprobe_fill_link_info(struct test_fill_link_info *skel, bpf_link__destroy(link); } -static int verify_kmulti_link_info(int fd, bool retprobe) +static int verify_kmulti_link_info(int fd, bool retprobe, bool has_cookies) { + __u64 addrs[KMULTI_CNT], cookies[KMULTI_CNT]; struct bpf_link_info info; __u32 len = sizeof(info); - __u64 addrs[KMULTI_CNT]; int flags, i, err; memset(&info, 0, sizeof(info)); @@ -221,18 +222,22 @@ again: if (!info.kprobe_multi.addrs) { info.kprobe_multi.addrs = ptr_to_u64(addrs); + info.kprobe_multi.cookies = ptr_to_u64(cookies); goto again; } - for (i = 0; i < KMULTI_CNT; i++) + for (i = 0; i < KMULTI_CNT; i++) { ASSERT_EQ(addrs[i], kmulti_addrs[i], "kmulti_addrs"); + ASSERT_EQ(cookies[i], has_cookies ? kmulti_cookies[i] : 0, + "kmulti_cookies_value"); + } return 0; } static void verify_kmulti_invalid_user_buffer(int fd) { + __u64 addrs[KMULTI_CNT], cookies[KMULTI_CNT]; struct bpf_link_info info; __u32 len = sizeof(info); - __u64 addrs[KMULTI_CNT]; int err, i; memset(&info, 0, sizeof(info)); @@ -266,7 +271,20 @@ static void verify_kmulti_invalid_user_buffer(int fd) info.kprobe_multi.count = KMULTI_CNT; info.kprobe_multi.addrs = 0x1; /* invalid addr */ err = bpf_link_get_info_by_fd(fd, &info, &len); - ASSERT_EQ(err, -EFAULT, "invalid_buff"); + ASSERT_EQ(err, -EFAULT, "invalid_buff_addrs"); + + info.kprobe_multi.count = KMULTI_CNT; + info.kprobe_multi.addrs = ptr_to_u64(addrs); + info.kprobe_multi.cookies = 0x1; /* invalid addr */ + err = bpf_link_get_info_by_fd(fd, &info, &len); + ASSERT_EQ(err, -EFAULT, "invalid_buff_cookies"); + + /* cookies && !count */ + info.kprobe_multi.count = 0; + info.kprobe_multi.addrs = ptr_to_u64(NULL); + info.kprobe_multi.cookies = ptr_to_u64(cookies); + err = bpf_link_get_info_by_fd(fd, &info, &len); + ASSERT_EQ(err, -EINVAL, "invalid_cookies_count"); } static int symbols_cmp_r(const void *a, const void *b) @@ -278,13 +296,15 @@ static int symbols_cmp_r(const void *a, const void *b) } static void test_kprobe_multi_fill_link_info(struct test_fill_link_info *skel, - bool retprobe, bool invalid) + bool retprobe, bool cookies, + bool invalid) { LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); struct bpf_link *link; int link_fd, err; opts.syms = kmulti_syms; + opts.cookies = cookies ? kmulti_cookies : NULL; opts.cnt = KMULTI_CNT; opts.retprobe = retprobe; link = bpf_program__attach_kprobe_multi_opts(skel->progs.kmulti_run, NULL, &opts); @@ -293,7 +313,7 @@ static void test_kprobe_multi_fill_link_info(struct test_fill_link_info *skel, link_fd = bpf_link__fd(link); if (!invalid) { - err = verify_kmulti_link_info(link_fd, retprobe); + err = verify_kmulti_link_info(link_fd, retprobe, cookies); ASSERT_OK(err, "verify_kmulti_link_info"); } else { verify_kmulti_invalid_user_buffer(link_fd); @@ -523,12 +543,16 @@ void test_fill_link_info(void) qsort(kmulti_syms, KMULTI_CNT, sizeof(kmulti_syms[0]), symbols_cmp_r); for (i = 0; i < KMULTI_CNT; i++) kmulti_addrs[i] = ksym_get_addr(kmulti_syms[i]); - if (test__start_subtest("kprobe_multi_link_info")) - test_kprobe_multi_fill_link_info(skel, false, false); - if (test__start_subtest("kretprobe_multi_link_info")) - test_kprobe_multi_fill_link_info(skel, true, false); + if (test__start_subtest("kprobe_multi_link_info")) { + test_kprobe_multi_fill_link_info(skel, false, false, false); + test_kprobe_multi_fill_link_info(skel, false, true, false); + } + if (test__start_subtest("kretprobe_multi_link_info")) { + test_kprobe_multi_fill_link_info(skel, true, false, false); + test_kprobe_multi_fill_link_info(skel, true, true, false); + } if (test__start_subtest("kprobe_multi_invalid_ubuff")) - test_kprobe_multi_fill_link_info(skel, true, true); + test_kprobe_multi_fill_link_info(skel, true, true, true); if (test__start_subtest("uprobe_multi_link_info")) test_uprobe_multi_fill_link_info(skel, false, false); -- cgit v1.2.3 From d74179708473c649c653f1db280e29875a532e99 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 19 Jan 2024 12:05:02 +0100 Subject: selftests/bpf: Add cookies check for perf_event fill_link_info test Now that we get cookies for perf_event probes, adding tests for cookie for kprobe/uprobe/tracepoint. The perf_event test needs to be added completely and is coming in following change. Signed-off-by: Jiri Olsa Acked-by: Song Liu Link: https://lore.kernel.org/r/20240119110505.400573-6-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/fill_link_info.c | 26 +++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c index 2c2a02010c0b..20e105001bc3 100644 --- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c +++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c @@ -32,6 +32,8 @@ static noinline void uprobe_func(void) asm volatile (""); } +#define PERF_EVENT_COOKIE 0xdeadbeef + static int verify_perf_link_info(int fd, enum bpf_perf_event_type type, long addr, ssize_t offset, ssize_t entry_offset) { @@ -63,6 +65,8 @@ again: ASSERT_EQ(info.perf_event.kprobe.addr, addr + entry_offset, "kprobe_addr"); + ASSERT_EQ(info.perf_event.kprobe.cookie, PERF_EVENT_COOKIE, "kprobe_cookie"); + if (!info.perf_event.kprobe.func_name) { ASSERT_EQ(info.perf_event.kprobe.name_len, 0, "name_len"); info.perf_event.kprobe.func_name = ptr_to_u64(&buf); @@ -82,6 +86,8 @@ again: goto again; } + ASSERT_EQ(info.perf_event.tracepoint.cookie, PERF_EVENT_COOKIE, "tracepoint_cookie"); + err = strncmp(u64_to_ptr(info.perf_event.tracepoint.tp_name), TP_NAME, strlen(TP_NAME)); ASSERT_EQ(err, 0, "cmp_tp_name"); @@ -97,6 +103,8 @@ again: goto again; } + ASSERT_EQ(info.perf_event.uprobe.cookie, PERF_EVENT_COOKIE, "uprobe_cookie"); + err = strncmp(u64_to_ptr(info.perf_event.uprobe.file_name), UPROBE_FILE, strlen(UPROBE_FILE)); ASSERT_EQ(err, 0, "cmp_file_name"); @@ -140,6 +148,7 @@ static void test_kprobe_fill_link_info(struct test_fill_link_info *skel, DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts, .attach_mode = PROBE_ATTACH_MODE_LINK, .retprobe = type == BPF_PERF_EVENT_KRETPROBE, + .bpf_cookie = PERF_EVENT_COOKIE, ); ssize_t entry_offset = 0; struct bpf_link *link; @@ -164,10 +173,13 @@ static void test_kprobe_fill_link_info(struct test_fill_link_info *skel, static void test_tp_fill_link_info(struct test_fill_link_info *skel) { + DECLARE_LIBBPF_OPTS(bpf_tracepoint_opts, opts, + .bpf_cookie = PERF_EVENT_COOKIE, + ); struct bpf_link *link; int link_fd, err; - link = bpf_program__attach_tracepoint(skel->progs.tp_run, TP_CAT, TP_NAME); + link = bpf_program__attach_tracepoint_opts(skel->progs.tp_run, TP_CAT, TP_NAME, &opts); if (!ASSERT_OK_PTR(link, "attach_tp")) return; @@ -180,13 +192,17 @@ static void test_tp_fill_link_info(struct test_fill_link_info *skel) static void test_uprobe_fill_link_info(struct test_fill_link_info *skel, enum bpf_perf_event_type type) { + DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts, + .retprobe = type == BPF_PERF_EVENT_URETPROBE, + .bpf_cookie = PERF_EVENT_COOKIE, + ); struct bpf_link *link; int link_fd, err; - link = bpf_program__attach_uprobe(skel->progs.uprobe_run, - type == BPF_PERF_EVENT_URETPROBE, - 0, /* self pid */ - UPROBE_FILE, uprobe_offset); + link = bpf_program__attach_uprobe_opts(skel->progs.uprobe_run, + 0, /* self pid */ + UPROBE_FILE, uprobe_offset, + &opts); if (!ASSERT_OK_PTR(link, "attach_uprobe")) return; -- cgit v1.2.3 From b7896486688af36e3bc5e27a6d5369cc5dcbcf69 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 19 Jan 2024 12:05:03 +0100 Subject: selftests/bpf: Add fill_link_info test for perf event Adding fill_link_info test for perf event and testing we get its values back through the bpf_link_info interface. Signed-off-by: Jiri Olsa Acked-by: Song Liu Link: https://lore.kernel.org/r/20240119110505.400573-7-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/fill_link_info.c | 40 ++++++++++++++++++++++ .../selftests/bpf/progs/test_fill_link_info.c | 6 ++++ 2 files changed, 46 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c index 20e105001bc3..f3932941bbaa 100644 --- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c +++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c @@ -109,6 +109,11 @@ again: strlen(UPROBE_FILE)); ASSERT_EQ(err, 0, "cmp_file_name"); break; + case BPF_PERF_EVENT_EVENT: + ASSERT_EQ(info.perf_event.event.type, PERF_TYPE_SOFTWARE, "event_type"); + ASSERT_EQ(info.perf_event.event.config, PERF_COUNT_SW_PAGE_FAULTS, "event_config"); + ASSERT_EQ(info.perf_event.event.cookie, PERF_EVENT_COOKIE, "event_cookie"); + break; default: err = -1; break; @@ -189,6 +194,39 @@ static void test_tp_fill_link_info(struct test_fill_link_info *skel) bpf_link__destroy(link); } +static void test_event_fill_link_info(struct test_fill_link_info *skel) +{ + DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, opts, + .bpf_cookie = PERF_EVENT_COOKIE, + ); + struct bpf_link *link; + int link_fd, err, pfd; + struct perf_event_attr attr = { + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_PAGE_FAULTS, + .freq = 1, + .sample_freq = 1, + .size = sizeof(struct perf_event_attr), + }; + + pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu 0 */, + -1 /* group id */, 0 /* flags */); + if (!ASSERT_GE(pfd, 0, "perf_event_open")) + return; + + link = bpf_program__attach_perf_event_opts(skel->progs.event_run, pfd, &opts); + if (!ASSERT_OK_PTR(link, "attach_event")) + goto error; + + link_fd = bpf_link__fd(link); + err = verify_perf_link_info(link_fd, BPF_PERF_EVENT_EVENT, 0, 0, 0); + ASSERT_OK(err, "verify_perf_link_info"); + bpf_link__destroy(link); + +error: + close(pfd); +} + static void test_uprobe_fill_link_info(struct test_fill_link_info *skel, enum bpf_perf_event_type type) { @@ -549,6 +587,8 @@ void test_fill_link_info(void) test_kprobe_fill_link_info(skel, BPF_PERF_EVENT_KPROBE, true); if (test__start_subtest("tracepoint_link_info")) test_tp_fill_link_info(skel); + if (test__start_subtest("event_link_info")) + test_event_fill_link_info(skel); uprobe_offset = get_uprobe_offset(&uprobe_func); if (test__start_subtest("uprobe_link_info")) diff --git a/tools/testing/selftests/bpf/progs/test_fill_link_info.c b/tools/testing/selftests/bpf/progs/test_fill_link_info.c index 69509f8bb680..6afa834756e9 100644 --- a/tools/testing/selftests/bpf/progs/test_fill_link_info.c +++ b/tools/testing/selftests/bpf/progs/test_fill_link_info.c @@ -33,6 +33,12 @@ int BPF_PROG(tp_run) return 0; } +SEC("perf_event") +int event_run(void *ctx) +{ + return 0; +} + SEC("kprobe.multi") int BPF_PROG(kmulti_run) { -- cgit v1.2.3 From 54258324b934aa8552c239c443272ec7aea55285 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 19 Jan 2024 12:05:04 +0100 Subject: bpftool: Display cookie for perf event link probes Displaying cookie for perf event link probes, in plain mode: # bpftool link 17: perf_event prog 90 kprobe ffffffff82b1c2b0 bpf_fentry_test1 cookie 3735928559 18: perf_event prog 90 kretprobe ffffffff82b1c2b0 bpf_fentry_test1 cookie 3735928559 20: perf_event prog 92 tracepoint sched_switch cookie 3735928559 21: perf_event prog 93 event software:page-faults cookie 3735928559 22: perf_event prog 91 uprobe /proc/self/exe+0xd703c cookie 3735928559 And in json mode: # bpftool link -j | jq { "id": 30, "type": "perf_event", "prog_id": 160, "retprobe": false, "addr": 18446744071607272112, "func": "bpf_fentry_test1", "offset": 0, "missed": 0, "cookie": 3735928559 } { "id": 33, "type": "perf_event", "prog_id": 162, "tracepoint": "sched_switch", "cookie": 3735928559 } { "id": 34, "type": "perf_event", "prog_id": 163, "event_type": "software", "event_config": "page-faults", "cookie": 3735928559 } { "id": 35, "type": "perf_event", "prog_id": 161, "retprobe": false, "file": "/proc/self/exe", "offset": 880700, "cookie": 3735928559 } Reviewed-by: Quentin Monnet Signed-off-by: Jiri Olsa Acked-by: Song Liu Link: https://lore.kernel.org/r/20240119110505.400573-8-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/link.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'tools') diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index 35b6859dd7c3..b66a1598b87c 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -334,6 +334,7 @@ show_perf_event_kprobe_json(struct bpf_link_info *info, json_writer_t *wtr) u64_to_ptr(info->perf_event.kprobe.func_name)); jsonw_uint_field(wtr, "offset", info->perf_event.kprobe.offset); jsonw_uint_field(wtr, "missed", info->perf_event.kprobe.missed); + jsonw_uint_field(wtr, "cookie", info->perf_event.kprobe.cookie); } static void @@ -343,6 +344,7 @@ show_perf_event_uprobe_json(struct bpf_link_info *info, json_writer_t *wtr) jsonw_string_field(wtr, "file", u64_to_ptr(info->perf_event.uprobe.file_name)); jsonw_uint_field(wtr, "offset", info->perf_event.uprobe.offset); + jsonw_uint_field(wtr, "cookie", info->perf_event.uprobe.cookie); } static void @@ -350,6 +352,7 @@ show_perf_event_tracepoint_json(struct bpf_link_info *info, json_writer_t *wtr) { jsonw_string_field(wtr, "tracepoint", u64_to_ptr(info->perf_event.tracepoint.tp_name)); + jsonw_uint_field(wtr, "cookie", info->perf_event.tracepoint.cookie); } static char *perf_config_hw_cache_str(__u64 config) @@ -426,6 +429,8 @@ show_perf_event_event_json(struct bpf_link_info *info, json_writer_t *wtr) else jsonw_uint_field(wtr, "event_config", config); + jsonw_uint_field(wtr, "cookie", info->perf_event.event.cookie); + if (type == PERF_TYPE_HW_CACHE && perf_config) free((void *)perf_config); } @@ -754,6 +759,8 @@ static void show_perf_event_kprobe_plain(struct bpf_link_info *info) printf("+%#x", info->perf_event.kprobe.offset); if (info->perf_event.kprobe.missed) printf(" missed %llu", info->perf_event.kprobe.missed); + if (info->perf_event.kprobe.cookie) + printf(" cookie %llu", info->perf_event.kprobe.cookie); printf(" "); } @@ -770,6 +777,8 @@ static void show_perf_event_uprobe_plain(struct bpf_link_info *info) else printf("\n\tuprobe "); printf("%s+%#x ", buf, info->perf_event.uprobe.offset); + if (info->perf_event.uprobe.cookie) + printf("cookie %llu ", info->perf_event.uprobe.cookie); } static void show_perf_event_tracepoint_plain(struct bpf_link_info *info) @@ -781,6 +790,8 @@ static void show_perf_event_tracepoint_plain(struct bpf_link_info *info) return; printf("\n\ttracepoint %s ", buf); + if (info->perf_event.tracepoint.cookie) + printf("cookie %llu ", info->perf_event.tracepoint.cookie); } static void show_perf_event_event_plain(struct bpf_link_info *info) @@ -802,6 +813,9 @@ static void show_perf_event_event_plain(struct bpf_link_info *info) else printf("%llu ", config); + if (info->perf_event.event.cookie) + printf("cookie %llu ", info->perf_event.event.cookie); + if (type == PERF_TYPE_HW_CACHE && perf_config) free((void *)perf_config); } -- cgit v1.2.3 From b0dc037399b19a777d569dbd9e2e9bbd62f3b3b1 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 19 Jan 2024 12:05:05 +0100 Subject: bpftool: Display cookie for kprobe multi link Displaying cookies for kprobe multi link, in plain mode: # bpftool link ... 1397: kprobe_multi prog 47532 kretprobe.multi func_cnt 3 addr cookie func [module] ffffffff82b370c0 3 bpf_fentry_test1 ffffffff82b39780 1 bpf_fentry_test2 ffffffff82b397a0 2 bpf_fentry_test3 And in json mode: # bpftool link -j | jq ... { "id": 1397, "type": "kprobe_multi", "prog_id": 47532, "retprobe": true, "func_cnt": 3, "missed": 0, "funcs": [ { "addr": 18446744071607382208, "func": "bpf_fentry_test1", "module": null, "cookie": 3 }, { "addr": 18446744071607392128, "func": "bpf_fentry_test2", "module": null, "cookie": 1 }, { "addr": 18446744071607392160, "func": "bpf_fentry_test3", "module": null, "cookie": 2 } ] } Cookie is attached to specific address, and because we sort addresses before printing, we need to sort cookies the same way, hence adding the struct addr_cookie to keep and sort them together. Also adding missing dd.sym_count check to show_kprobe_multi_json. Signed-off-by: Jiri Olsa Acked-by: Song Liu Link: https://lore.kernel.org/r/20240119110505.400573-9-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/link.c | 78 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 63 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index b66a1598b87c..afde9d0c2ea1 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -249,18 +249,44 @@ static int get_prog_info(int prog_id, struct bpf_prog_info *info) return err; } -static int cmp_u64(const void *A, const void *B) +struct addr_cookie { + __u64 addr; + __u64 cookie; +}; + +static int cmp_addr_cookie(const void *A, const void *B) +{ + const struct addr_cookie *a = A, *b = B; + + if (a->addr == b->addr) + return 0; + return a->addr < b->addr ? -1 : 1; +} + +static struct addr_cookie * +get_addr_cookie_array(__u64 *addrs, __u64 *cookies, __u32 count) { - const __u64 *a = A, *b = B; + struct addr_cookie *data; + __u32 i; - return *a - *b; + data = calloc(count, sizeof(data[0])); + if (!data) { + p_err("mem alloc failed"); + return NULL; + } + for (i = 0; i < count; i++) { + data[i].addr = addrs[i]; + data[i].cookie = cookies[i]; + } + qsort(data, count, sizeof(data[0]), cmp_addr_cookie); + return data; } static void show_kprobe_multi_json(struct bpf_link_info *info, json_writer_t *wtr) { + struct addr_cookie *data; __u32 i, j = 0; - __u64 *addrs; jsonw_bool_field(json_wtr, "retprobe", info->kprobe_multi.flags & BPF_F_KPROBE_MULTI_RETURN); @@ -268,14 +294,20 @@ show_kprobe_multi_json(struct bpf_link_info *info, json_writer_t *wtr) jsonw_uint_field(json_wtr, "missed", info->kprobe_multi.missed); jsonw_name(json_wtr, "funcs"); jsonw_start_array(json_wtr); - addrs = u64_to_ptr(info->kprobe_multi.addrs); - qsort(addrs, info->kprobe_multi.count, sizeof(addrs[0]), cmp_u64); + data = get_addr_cookie_array(u64_to_ptr(info->kprobe_multi.addrs), + u64_to_ptr(info->kprobe_multi.cookies), + info->kprobe_multi.count); + if (!data) + return; /* Load it once for all. */ if (!dd.sym_count) kernel_syms_load(&dd); + if (!dd.sym_count) + goto error; + for (i = 0; i < dd.sym_count; i++) { - if (dd.sym_mapping[i].address != addrs[j]) + if (dd.sym_mapping[i].address != data[j].addr) continue; jsonw_start_object(json_wtr); jsonw_uint_field(json_wtr, "addr", dd.sym_mapping[i].address); @@ -287,11 +319,14 @@ show_kprobe_multi_json(struct bpf_link_info *info, json_writer_t *wtr) } else { jsonw_string_field(json_wtr, "module", dd.sym_mapping[i].module); } + jsonw_uint_field(json_wtr, "cookie", data[j].cookie); jsonw_end_object(json_wtr); if (j++ == info->kprobe_multi.count) break; } jsonw_end_array(json_wtr); +error: + free(data); } static __u64 *u64_to_arr(__u64 val) @@ -675,8 +710,8 @@ void netfilter_dump_plain(const struct bpf_link_info *info) static void show_kprobe_multi_plain(struct bpf_link_info *info) { + struct addr_cookie *data; __u32 i, j = 0; - __u64 *addrs; if (!info->kprobe_multi.count) return; @@ -688,21 +723,24 @@ static void show_kprobe_multi_plain(struct bpf_link_info *info) printf("func_cnt %u ", info->kprobe_multi.count); if (info->kprobe_multi.missed) printf("missed %llu ", info->kprobe_multi.missed); - addrs = (__u64 *)u64_to_ptr(info->kprobe_multi.addrs); - qsort(addrs, info->kprobe_multi.count, sizeof(__u64), cmp_u64); + data = get_addr_cookie_array(u64_to_ptr(info->kprobe_multi.addrs), + u64_to_ptr(info->kprobe_multi.cookies), + info->kprobe_multi.count); + if (!data) + return; /* Load it once for all. */ if (!dd.sym_count) kernel_syms_load(&dd); if (!dd.sym_count) - return; + goto error; - printf("\n\t%-16s %s", "addr", "func [module]"); + printf("\n\t%-16s %-16s %s", "addr", "cookie", "func [module]"); for (i = 0; i < dd.sym_count; i++) { - if (dd.sym_mapping[i].address != addrs[j]) + if (dd.sym_mapping[i].address != data[j].addr) continue; - printf("\n\t%016lx %s", - dd.sym_mapping[i].address, dd.sym_mapping[i].name); + printf("\n\t%016lx %-16llx %s", + dd.sym_mapping[i].address, data[j].cookie, dd.sym_mapping[i].name); if (dd.sym_mapping[i].module[0] != '\0') printf(" [%s] ", dd.sym_mapping[i].module); else @@ -711,6 +749,8 @@ static void show_kprobe_multi_plain(struct bpf_link_info *info) if (j++ == info->kprobe_multi.count) break; } +error: + free(data); } static void show_uprobe_multi_plain(struct bpf_link_info *info) @@ -966,6 +1006,14 @@ again: return -ENOMEM; } info.kprobe_multi.addrs = ptr_to_u64(addrs); + cookies = calloc(count, sizeof(__u64)); + if (!cookies) { + p_err("mem alloc failed"); + free(addrs); + close(fd); + return -ENOMEM; + } + info.kprobe_multi.cookies = ptr_to_u64(cookies); goto again; } } -- cgit v1.2.3 From 1338b93346587a2a6ac79bbcf55ef5b357745573 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Fri, 19 Jan 2024 14:49:57 -0800 Subject: bpf: pass btf object id in bpf_map_info. Include btf object id (btf_obj_id) in bpf_map_info so that tools (ex: bpftools struct_ops dump) know the correct btf from the kernel to look up type information of struct_ops types. Since struct_ops types can be defined and registered in a module. The type information of a struct_ops type are defined in the btf of the module defining it. The userspace tools need to know which btf is for the module defining a struct_ops type. Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20240119225005.668602-7-thinker.li@gmail.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf.h | 4 ++++ include/uapi/linux/bpf.h | 2 +- kernel/bpf/bpf_struct_ops.c | 7 +++++++ kernel/bpf/syscall.c | 2 ++ tools/include/uapi/linux/bpf.h | 2 +- 5 files changed, 15 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 7fc95e7babab..29fcae9fa8ed 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1732,6 +1732,7 @@ struct bpf_dummy_ops { int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); #endif +void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map); #else static inline const struct bpf_struct_ops_desc *bpf_struct_ops_find(u32 type_id) { @@ -1759,6 +1760,9 @@ static inline int bpf_struct_ops_link_create(union bpf_attr *attr) { return -EOPNOTSUPP; } +static inline void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map) +{ +} #endif diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 287d05732668..a380047c86af 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6487,7 +6487,7 @@ struct bpf_map_info { __u32 btf_id; __u32 btf_key_type_id; __u32 btf_value_type_id; - __u32 :32; /* alignment pad */ + __u32 btf_vmlinux_id; __u64 map_extra; } __attribute__((aligned(8))); diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 5ddcca4c4fba..5e98af4fc2e2 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -947,3 +947,10 @@ err_out: kfree(link); return err; } + +void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map) +{ + struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; + + info->btf_vmlinux_id = btf_obj_id(st_map->btf); +} diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 13193aaafb64..55b458429705 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -4691,6 +4691,8 @@ static int bpf_map_get_info_by_fd(struct file *file, info.btf_value_type_id = map->btf_value_type_id; } info.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; + if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) + bpf_map_struct_ops_info_fill(&info, map); if (bpf_map_is_offloaded(map)) { err = bpf_map_offload_info_fill(&info, map); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 287d05732668..a380047c86af 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6487,7 +6487,7 @@ struct bpf_map_info { __u32 btf_id; __u32 btf_key_type_id; __u32 btf_value_type_id; - __u32 :32; /* alignment pad */ + __u32 btf_vmlinux_id; __u64 map_extra; } __attribute__((aligned(8))); -- cgit v1.2.3 From fcc2c1fb0651477c8ed78a3a293c175ccd70697a Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Fri, 19 Jan 2024 14:49:59 -0800 Subject: bpf: pass attached BTF to the bpf_struct_ops subsystem Pass the fd of a btf from the userspace to the bpf() syscall, and then convert the fd into a btf. The btf is generated from the module that defines the target BPF struct_ops type. In order to inform the kernel about the module that defines the target struct_ops type, the userspace program needs to provide a btf fd for the respective module's btf. This btf contains essential information on the types defined within the module, including the target struct_ops type. A btf fd must be provided to the kernel for struct_ops maps and for the bpf programs attached to those maps. In the case of the bpf programs, the attach_btf_obj_fd parameter is passed as part of the bpf_attr and is converted into a btf. This btf is then stored in the prog->aux->attach_btf field. Here, it just let the verifier access attach_btf directly. In the case of struct_ops maps, a btf fd is passed as value_type_btf_obj_fd of bpf_attr. The bpf_struct_ops_map_alloc() function converts the fd to a btf and stores it as st_map->btf. A flag BPF_F_VTYPE_BTF_OBJ_FD is added for map_flags to indicate that the value of value_type_btf_obj_fd is set. Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20240119225005.668602-9-thinker.li@gmail.com Signed-off-by: Martin KaFai Lau --- include/uapi/linux/bpf.h | 8 ++++++ kernel/bpf/bpf_struct_ops.c | 65 ++++++++++++++++++++++++++++++------------ kernel/bpf/syscall.c | 2 +- kernel/bpf/verifier.c | 9 ++++-- tools/include/uapi/linux/bpf.h | 8 ++++++ 5 files changed, 70 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a380047c86af..1fef6d5a1330 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1330,6 +1330,9 @@ enum { /* Get path from provided FD in BPF_OBJ_PIN/BPF_OBJ_GET commands */ BPF_F_PATH_FD = (1U << 14), + +/* Flag for value_type_btf_obj_fd, the fd is available */ + BPF_F_VTYPE_BTF_OBJ_FD = (1U << 15), }; /* Flags for BPF_PROG_QUERY. */ @@ -1403,6 +1406,11 @@ union bpf_attr { * to using 5 hash functions). */ __u64 map_extra; + + __s32 value_type_btf_obj_fd; /* fd pointing to a BTF + * type data for + * btf_vmlinux_value_type_id. + */ }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 7505f515aac3..3b8d689ece5d 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -641,6 +641,7 @@ static void __bpf_struct_ops_map_free(struct bpf_map *map) bpf_jit_uncharge_modmem(PAGE_SIZE); } bpf_map_area_free(st_map->uvalue); + btf_put(st_map->btf); bpf_map_area_free(st_map); } @@ -669,7 +670,8 @@ static void bpf_struct_ops_map_free(struct bpf_map *map) static int bpf_struct_ops_map_alloc_check(union bpf_attr *attr) { if (attr->key_size != sizeof(unsigned int) || attr->max_entries != 1 || - (attr->map_flags & ~BPF_F_LINK) || !attr->btf_vmlinux_value_type_id) + (attr->map_flags & ~(BPF_F_LINK | BPF_F_VTYPE_BTF_OBJ_FD)) || + !attr->btf_vmlinux_value_type_id) return -EINVAL; return 0; } @@ -681,15 +683,36 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) struct bpf_struct_ops_map *st_map; const struct btf_type *t, *vt; struct bpf_map *map; + struct btf *btf; int ret; - st_ops_desc = bpf_struct_ops_find_value(btf_vmlinux, attr->btf_vmlinux_value_type_id); - if (!st_ops_desc) - return ERR_PTR(-ENOTSUPP); + if (attr->map_flags & BPF_F_VTYPE_BTF_OBJ_FD) { + /* The map holds btf for its whole life time. */ + btf = btf_get_by_fd(attr->value_type_btf_obj_fd); + if (IS_ERR(btf)) + return ERR_CAST(btf); + if (!btf_is_module(btf)) { + btf_put(btf); + return ERR_PTR(-EINVAL); + } + } else { + btf = bpf_get_btf_vmlinux(); + if (IS_ERR(btf)) + return ERR_CAST(btf); + btf_get(btf); + } + + st_ops_desc = bpf_struct_ops_find_value(btf, attr->btf_vmlinux_value_type_id); + if (!st_ops_desc) { + ret = -ENOTSUPP; + goto errout; + } vt = st_ops_desc->value_type; - if (attr->value_size != vt->size) - return ERR_PTR(-EINVAL); + if (attr->value_size != vt->size) { + ret = -EINVAL; + goto errout; + } t = st_ops_desc->type; @@ -700,17 +723,17 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) (vt->size - sizeof(struct bpf_struct_ops_value)); st_map = bpf_map_area_alloc(st_map_size, NUMA_NO_NODE); - if (!st_map) - return ERR_PTR(-ENOMEM); + if (!st_map) { + ret = -ENOMEM; + goto errout; + } st_map->st_ops_desc = st_ops_desc; map = &st_map->map; ret = bpf_jit_charge_modmem(PAGE_SIZE); - if (ret) { - __bpf_struct_ops_map_free(map); - return ERR_PTR(ret); - } + if (ret) + goto errout_free; st_map->image = arch_alloc_bpf_trampoline(PAGE_SIZE); if (!st_map->image) { @@ -719,24 +742,30 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) * here. */ bpf_jit_uncharge_modmem(PAGE_SIZE); - __bpf_struct_ops_map_free(map); - return ERR_PTR(-ENOMEM); + ret = -ENOMEM; + goto errout_free; } st_map->uvalue = bpf_map_area_alloc(vt->size, NUMA_NO_NODE); st_map->links = bpf_map_area_alloc(btf_type_vlen(t) * sizeof(struct bpf_links *), NUMA_NO_NODE); if (!st_map->uvalue || !st_map->links) { - __bpf_struct_ops_map_free(map); - return ERR_PTR(-ENOMEM); + ret = -ENOMEM; + goto errout_free; } - - st_map->btf = btf_vmlinux; + st_map->btf = btf; mutex_init(&st_map->lock); bpf_map_init_from_attr(map, attr); return map; + +errout_free: + __bpf_struct_ops_map_free(map); +errout: + btf_put(btf); + + return ERR_PTR(ret); } static u64 bpf_struct_ops_map_mem_usage(const struct bpf_map *map) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 55b458429705..f8124b3229e2 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1123,7 +1123,7 @@ free_map_tab: return ret; } -#define BPF_MAP_CREATE_LAST_FIELD map_extra +#define BPF_MAP_CREATE_LAST_FIELD value_type_btf_obj_fd /* called via syscall */ static int map_create(union bpf_attr *attr) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2a0fd2ccdb11..6081512deb79 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -20290,6 +20290,7 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env) const struct btf_member *member; struct bpf_prog *prog = env->prog; u32 btf_id, member_idx; + struct btf *btf; const char *mname; if (!prog->gpl_compatible) { @@ -20297,8 +20298,10 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env) return -EINVAL; } + btf = prog->aux->attach_btf ?: bpf_get_btf_vmlinux(); + btf_id = prog->aux->attach_btf_id; - st_ops_desc = bpf_struct_ops_find(btf_vmlinux, btf_id); + st_ops_desc = bpf_struct_ops_find(btf, btf_id); if (!st_ops_desc) { verbose(env, "attach_btf_id %u is not a supported struct\n", btf_id); @@ -20315,8 +20318,8 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env) } member = &btf_type_member(t)[member_idx]; - mname = btf_name_by_offset(btf_vmlinux, member->name_off); - func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type, + mname = btf_name_by_offset(btf, member->name_off); + func_proto = btf_type_resolve_func_ptr(btf, member->type, NULL); if (!func_proto) { verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n", diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index a380047c86af..1fef6d5a1330 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1330,6 +1330,9 @@ enum { /* Get path from provided FD in BPF_OBJ_PIN/BPF_OBJ_GET commands */ BPF_F_PATH_FD = (1U << 14), + +/* Flag for value_type_btf_obj_fd, the fd is available */ + BPF_F_VTYPE_BTF_OBJ_FD = (1U << 15), }; /* Flags for BPF_PROG_QUERY. */ @@ -1403,6 +1406,11 @@ union bpf_attr { * to using 5 hash functions). */ __u64 map_extra; + + __s32 value_type_btf_obj_fd; /* fd pointing to a BTF + * type data for + * btf_vmlinux_value_type_id. + */ }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ -- cgit v1.2.3 From 9e926acda0c2e21bca431a1818665ddcd6939755 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Fri, 19 Jan 2024 14:50:03 -0800 Subject: libbpf: Find correct module BTFs for struct_ops maps and progs. Locate the module BTFs for struct_ops maps and progs and pass them to the kernel. This ensures that the kernel correctly resolves type IDs from the appropriate module BTFs. For the map of a struct_ops object, the FD of the module BTF is set to bpf_map to keep a reference to the module BTF. The FD is passed to the kernel as value_type_btf_obj_fd when the struct_ops object is loaded. For a bpf_struct_ops prog, attach_btf_obj_fd of bpf_prog is the FD of a module BTF in the kernel. Signed-off-by: Kui-Feng Lee Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20240119225005.668602-13-thinker.li@gmail.com Signed-off-by: Martin KaFai Lau --- tools/lib/bpf/bpf.c | 4 +++- tools/lib/bpf/bpf.h | 4 +++- tools/lib/bpf/libbpf.c | 41 +++++++++++++++++++++++++++++++---------- tools/lib/bpf/libbpf_probes.c | 1 + 4 files changed, 38 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 9dc9625651dc..3a35472a17c5 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -169,7 +169,8 @@ int bpf_map_create(enum bpf_map_type map_type, __u32 max_entries, const struct bpf_map_create_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, map_extra); + const size_t attr_sz = offsetofend(union bpf_attr, + value_type_btf_obj_fd); union bpf_attr attr; int fd; @@ -191,6 +192,7 @@ int bpf_map_create(enum bpf_map_type map_type, attr.btf_key_type_id = OPTS_GET(opts, btf_key_type_id, 0); attr.btf_value_type_id = OPTS_GET(opts, btf_value_type_id, 0); attr.btf_vmlinux_value_type_id = OPTS_GET(opts, btf_vmlinux_value_type_id, 0); + attr.value_type_btf_obj_fd = OPTS_GET(opts, value_type_btf_obj_fd, -1); attr.inner_map_fd = OPTS_GET(opts, inner_map_fd, 0); attr.map_flags = OPTS_GET(opts, map_flags, 0); diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index d0f53772bdc0..53b0bee053ad 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -51,8 +51,10 @@ struct bpf_map_create_opts { __u32 numa_node; __u32 map_ifindex; + __s32 value_type_btf_obj_fd; + size_t:0; }; -#define bpf_map_create_opts__last_field map_ifindex +#define bpf_map_create_opts__last_field value_type_btf_obj_fd LIBBPF_API int bpf_map_create(enum bpf_map_type map_type, const char *map_name, diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index b8b00da62907..0569b4973a4f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -528,6 +528,7 @@ struct bpf_map { struct bpf_map_def def; __u32 numa_node; __u32 btf_var_idx; + int mod_btf_fd; __u32 btf_key_type_id; __u32 btf_value_type_id; __u32 btf_vmlinux_value_type_id; @@ -931,22 +932,29 @@ find_member_by_name(const struct btf *btf, const struct btf_type *t, return NULL; } +static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, + __u16 kind, struct btf **res_btf, + struct module_btf **res_mod_btf); + #define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_" static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, const char *name, __u32 kind); static int -find_struct_ops_kern_types(const struct btf *btf, const char *tname, +find_struct_ops_kern_types(struct bpf_object *obj, const char *tname, + struct module_btf **mod_btf, const struct btf_type **type, __u32 *type_id, const struct btf_type **vtype, __u32 *vtype_id, const struct btf_member **data_member) { const struct btf_type *kern_type, *kern_vtype; const struct btf_member *kern_data_member; + struct btf *btf; __s32 kern_vtype_id, kern_type_id; __u32 i; - kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT); + kern_type_id = find_ksym_btf_id(obj, tname, BTF_KIND_STRUCT, + &btf, mod_btf); if (kern_type_id < 0) { pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", tname); @@ -1000,14 +1008,16 @@ static bool bpf_map__is_struct_ops(const struct bpf_map *map) } /* Init the map's fields that depend on kern_btf */ -static int bpf_map__init_kern_struct_ops(struct bpf_map *map, - const struct btf *btf, - const struct btf *kern_btf) +static int bpf_map__init_kern_struct_ops(struct bpf_map *map) { const struct btf_member *member, *kern_member, *kern_data_member; const struct btf_type *type, *kern_type, *kern_vtype; __u32 i, kern_type_id, kern_vtype_id, kern_data_off; + struct bpf_object *obj = map->obj; + const struct btf *btf = obj->btf; struct bpf_struct_ops *st_ops; + const struct btf *kern_btf; + struct module_btf *mod_btf; void *data, *kern_data; const char *tname; int err; @@ -1015,16 +1025,19 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map, st_ops = map->st_ops; type = st_ops->type; tname = st_ops->tname; - err = find_struct_ops_kern_types(kern_btf, tname, + err = find_struct_ops_kern_types(obj, tname, &mod_btf, &kern_type, &kern_type_id, &kern_vtype, &kern_vtype_id, &kern_data_member); if (err) return err; + kern_btf = mod_btf ? mod_btf->btf : obj->btf_vmlinux; + pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n", map->name, st_ops->type_id, kern_type_id, kern_vtype_id); + map->mod_btf_fd = mod_btf ? mod_btf->fd : -1; map->def.value_size = kern_vtype->size; map->btf_vmlinux_value_type_id = kern_vtype_id; @@ -1100,6 +1113,8 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map, return -ENOTSUP; } + if (mod_btf) + prog->attach_btf_obj_fd = mod_btf->fd; prog->attach_btf_id = kern_type_id; prog->expected_attach_type = kern_member_idx; @@ -1142,8 +1157,7 @@ static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj) if (!bpf_map__is_struct_ops(map)) continue; - err = bpf_map__init_kern_struct_ops(map, obj->btf, - obj->btf_vmlinux); + err = bpf_map__init_kern_struct_ops(map); if (err) return err; } @@ -5162,8 +5176,13 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.numa_node = map->numa_node; create_attr.map_extra = map->map_extra; - if (bpf_map__is_struct_ops(map)) + if (bpf_map__is_struct_ops(map)) { create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; + if (map->mod_btf_fd >= 0) { + create_attr.value_type_btf_obj_fd = map->mod_btf_fd; + create_attr.map_flags |= BPF_F_VTYPE_BTF_OBJ_FD; + } + } if (obj->btf && btf__fd(obj->btf) >= 0) { create_attr.btf_fd = btf__fd(obj->btf); @@ -9970,7 +9989,9 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attac *btf_obj_fd = 0; *btf_type_id = 1; } else { - err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id); + err = find_kernel_btf_id(prog->obj, attach_name, + attach_type, btf_obj_fd, + btf_type_id); } if (err) { pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %d\n", diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 9c4db90b92b6..98373d126d9d 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -326,6 +326,7 @@ static int probe_map_create(enum bpf_map_type map_type) case BPF_MAP_TYPE_STRUCT_OPS: /* we'll get -ENOTSUPP for invalid BTF type ID for struct_ops */ opts.btf_vmlinux_value_type_id = 1; + opts.value_type_btf_obj_fd = -1; exp_err = -524; /* -ENOTSUPP */ break; case BPF_MAP_TYPE_BLOOM_FILTER: -- cgit v1.2.3 From 0253e0590e2dc46996534371d56b5297099aed4e Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Fri, 19 Jan 2024 14:50:05 -0800 Subject: selftests/bpf: test case for register_bpf_struct_ops(). Create a new struct_ops type called bpf_testmod_ops within the bpf_testmod module. When a struct_ops object is registered, the bpf_testmod module will invoke test_2 from the module. Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20240119225005.668602-15-thinker.li@gmail.com Signed-off-by: Martin KaFai Lau --- .../selftests/bpf/bpf_testmod/bpf_testmod.c | 66 +++++++++++++++++++ .../selftests/bpf/bpf_testmod/bpf_testmod.h | 5 ++ .../bpf/prog_tests/test_struct_ops_module.c | 75 ++++++++++++++++++++++ .../selftests/bpf/progs/struct_ops_module.c | 30 +++++++++ 4 files changed, 176 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c create mode 100644 tools/testing/selftests/bpf/progs/struct_ops_module.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index e7c9e1c7fde0..8befaf17d454 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ +#include #include #include #include @@ -521,11 +522,75 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg) BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset) BTF_SET8_END(bpf_testmod_check_kfunc_ids) +static int bpf_testmod_ops_init(struct btf *btf) +{ + return 0; +} + +static bool bpf_testmod_ops_is_valid_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + return bpf_tracing_btf_ctx_access(off, size, type, prog, info); +} + +static int bpf_testmod_ops_init_member(const struct btf_type *t, + const struct btf_member *member, + void *kdata, const void *udata) +{ + return 0; +} + static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = { .owner = THIS_MODULE, .set = &bpf_testmod_check_kfunc_ids, }; +static const struct bpf_verifier_ops bpf_testmod_verifier_ops = { + .is_valid_access = bpf_testmod_ops_is_valid_access, +}; + +static int bpf_dummy_reg(void *kdata) +{ + struct bpf_testmod_ops *ops = kdata; + int r; + + r = ops->test_2(4, 3); + + return 0; +} + +static void bpf_dummy_unreg(void *kdata) +{ +} + +static int bpf_testmod_test_1(void) +{ + return 0; +} + +static int bpf_testmod_test_2(int a, int b) +{ + return 0; +} + +static struct bpf_testmod_ops __bpf_testmod_ops = { + .test_1 = bpf_testmod_test_1, + .test_2 = bpf_testmod_test_2, +}; + +struct bpf_struct_ops bpf_bpf_testmod_ops = { + .verifier_ops = &bpf_testmod_verifier_ops, + .init = bpf_testmod_ops_init, + .init_member = bpf_testmod_ops_init_member, + .reg = bpf_dummy_reg, + .unreg = bpf_dummy_unreg, + .cfi_stubs = &__bpf_testmod_ops, + .name = "bpf_testmod_ops", + .owner = THIS_MODULE, +}; + extern int bpf_fentry_test1(int a); static int bpf_testmod_init(void) @@ -536,6 +601,7 @@ static int bpf_testmod_init(void) ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_testmod_kfunc_set); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &bpf_testmod_kfunc_set); + ret = ret ?: register_bpf_struct_ops(&bpf_bpf_testmod_ops, bpf_testmod_ops); if (ret < 0) return ret; if (bpf_fentry_test1(0) < 0) diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h index f32793efe095..ca5435751c79 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h @@ -28,4 +28,9 @@ struct bpf_iter_testmod_seq { int cnt; }; +struct bpf_testmod_ops { + int (*test_1)(void); + int (*test_2)(int a, int b); +}; + #endif /* _BPF_TESTMOD_H */ diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c new file mode 100644 index 000000000000..8d833f0c7580 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include +#include + +#include "struct_ops_module.skel.h" + +static void check_map_info(struct bpf_map_info *info) +{ + struct bpf_btf_info btf_info; + char btf_name[256]; + u32 btf_info_len = sizeof(btf_info); + int err, fd; + + fd = bpf_btf_get_fd_by_id(info->btf_vmlinux_id); + if (!ASSERT_GE(fd, 0, "get_value_type_btf_obj_fd")) + return; + + memset(&btf_info, 0, sizeof(btf_info)); + btf_info.name = ptr_to_u64(btf_name); + btf_info.name_len = sizeof(btf_name); + err = bpf_btf_get_info_by_fd(fd, &btf_info, &btf_info_len); + if (!ASSERT_OK(err, "get_value_type_btf_obj_info")) + goto cleanup; + + if (!ASSERT_EQ(strcmp(btf_name, "bpf_testmod"), 0, "get_value_type_btf_obj_name")) + goto cleanup; + +cleanup: + close(fd); +} + +static void test_struct_ops_load(void) +{ + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts); + struct struct_ops_module *skel; + struct bpf_map_info info = {}; + struct bpf_link *link; + int err; + u32 len; + + skel = struct_ops_module__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "struct_ops_module_open")) + return; + + err = struct_ops_module__load(skel); + if (!ASSERT_OK(err, "struct_ops_module_load")) + goto cleanup; + + len = sizeof(info); + err = bpf_map_get_info_by_fd(bpf_map__fd(skel->maps.testmod_1), &info, + &len); + if (!ASSERT_OK(err, "bpf_map_get_info_by_fd")) + goto cleanup; + + link = bpf_map__attach_struct_ops(skel->maps.testmod_1); + ASSERT_OK_PTR(link, "attach_test_mod_1"); + + /* test_2() will be called from bpf_dummy_reg() in bpf_testmod.c */ + ASSERT_EQ(skel->bss->test_2_result, 7, "test_2_result"); + + bpf_link__destroy(link); + + check_map_info(&info); + +cleanup: + struct_ops_module__destroy(skel); +} + +void serial_test_struct_ops_module(void) +{ + if (test__start_subtest("test_struct_ops_load")) + test_struct_ops_load(); +} + diff --git a/tools/testing/selftests/bpf/progs/struct_ops_module.c b/tools/testing/selftests/bpf/progs/struct_ops_module.c new file mode 100644 index 000000000000..e44ac55195ca --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_module.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include +#include +#include +#include "../bpf_testmod/bpf_testmod.h" + +char _license[] SEC("license") = "GPL"; + +int test_2_result = 0; + +SEC("struct_ops/test_1") +int BPF_PROG(test_1) +{ + return 0xdeadbeef; +} + +SEC("struct_ops/test_2") +int BPF_PROG(test_2, int a, int b) +{ + test_2_result = a + b; + return a + b; +} + +SEC(".struct_ops.link") +struct bpf_testmod_ops testmod_1 = { + .test_1 = (void *)test_1, + .test_2 = (void *)test_2, +}; + -- cgit v1.2.3 From 15b4f88dcc0a751f790bfea5ef9dcc6385c62236 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 23 Jan 2024 17:03:50 +0800 Subject: selftests/bpf: Move is_jit_enabled() into testing_helpers Currently, is_jit_enabled() is only used in test_progs, move it into testing_helpers so that it can be used in test_verifier. While at it, remove the second argument "0" of open() as Hou Tao suggested. Signed-off-by: Tiezhu Yang Signed-off-by: Andrii Nakryiko Acked-by: Hou Tao Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20240123090351.2207-2-yangtiezhu@loongson.cn --- tools/testing/selftests/bpf/test_progs.c | 18 ------------------ tools/testing/selftests/bpf/testing_helpers.c | 18 ++++++++++++++++++ tools/testing/selftests/bpf/testing_helpers.h | 1 + 3 files changed, 19 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 1b9387890148..808550986f30 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -547,24 +547,6 @@ int bpf_find_map(const char *test, struct bpf_object *obj, const char *name) return bpf_map__fd(map); } -static bool is_jit_enabled(void) -{ - const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable"; - bool enabled = false; - int sysctl_fd; - - sysctl_fd = open(jit_sysctl, 0, O_RDONLY); - if (sysctl_fd != -1) { - char tmpc; - - if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1) - enabled = (tmpc != '0'); - close(sysctl_fd); - } - - return enabled; -} - int compare_map_keys(int map1_fd, int map2_fd) { __u32 key, next_key; diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c index 106ef05586b8..a59e56d804ee 100644 --- a/tools/testing/selftests/bpf/testing_helpers.c +++ b/tools/testing/selftests/bpf/testing_helpers.c @@ -457,3 +457,21 @@ out_free_buf: *buf = NULL; return -1; } + +bool is_jit_enabled(void) +{ + const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable"; + bool enabled = false; + int sysctl_fd; + + sysctl_fd = open(jit_sysctl, O_RDONLY); + if (sysctl_fd != -1) { + char tmpc; + + if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1) + enabled = (tmpc != '0'); + close(sysctl_fd); + } + + return enabled; +} diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h index e099aa4da611..d14de81727e6 100644 --- a/tools/testing/selftests/bpf/testing_helpers.h +++ b/tools/testing/selftests/bpf/testing_helpers.h @@ -52,5 +52,6 @@ struct bpf_insn; */ int get_xlated_program(int fd_prog, struct bpf_insn **buf, __u32 *cnt); int testing_prog_flags(void); +bool is_jit_enabled(void); #endif /* __TESTING_HELPERS_H */ -- cgit v1.2.3 From 0b50478fd8774f42721f4297293b711e17bc4b7b Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 23 Jan 2024 17:03:51 +0800 Subject: selftests/bpf: Skip callback tests if jit is disabled in test_verifier If CONFIG_BPF_JIT_ALWAYS_ON is not set and bpf_jit_enable is 0, there exist 6 failed tests. [root@linux bpf]# echo 0 > /proc/sys/net/core/bpf_jit_enable [root@linux bpf]# echo 0 > /proc/sys/kernel/unprivileged_bpf_disabled [root@linux bpf]# ./test_verifier | grep FAIL #106/p inline simple bpf_loop call FAIL #107/p don't inline bpf_loop call, flags non-zero FAIL #108/p don't inline bpf_loop call, callback non-constant FAIL #109/p bpf_loop_inline and a dead func FAIL #110/p bpf_loop_inline stack locations for loop vars FAIL #111/p inline bpf_loop call in a big program FAIL Summary: 768 PASSED, 15 SKIPPED, 6 FAILED The test log shows that callbacks are not allowed in non-JITed programs, interpreter doesn't support them yet, thus these tests should be skipped if jit is disabled. Add an explicit flag F_NEEDS_JIT_ENABLED to those tests to mark that they require JIT enabled in bpf_loop_inline.c, check the flag and jit_disabled at the beginning of do_test_single() to handle this case. With this patch: [root@linux bpf]# echo 0 > /proc/sys/net/core/bpf_jit_enable [root@linux bpf]# echo 0 > /proc/sys/kernel/unprivileged_bpf_disabled [root@linux bpf]# ./test_verifier | grep FAIL Summary: 768 PASSED, 21 SKIPPED, 0 FAILED Suggested-by: Andrii Nakryiko Signed-off-by: Tiezhu Yang Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240123090351.2207-3-yangtiezhu@loongson.cn --- tools/testing/selftests/bpf/test_verifier.c | 11 +++++++++++ tools/testing/selftests/bpf/verifier/bpf_loop_inline.c | 6 ++++++ 2 files changed, 17 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 1a09fc34d093..e1a1dfe8d7fa 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -67,6 +67,7 @@ #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0) #define F_LOAD_WITH_STRICT_ALIGNMENT (1 << 1) +#define F_NEEDS_JIT_ENABLED (1 << 2) /* need CAP_BPF, CAP_NET_ADMIN, CAP_PERFMON to load progs */ #define ADMIN_CAPS (1ULL << CAP_NET_ADMIN | \ @@ -74,6 +75,7 @@ 1ULL << CAP_BPF) #define UNPRIV_SYSCTL "kernel/unprivileged_bpf_disabled" static bool unpriv_disabled = false; +static bool jit_disabled; static int skips; static bool verbose = false; static int verif_log_level = 0; @@ -1524,6 +1526,13 @@ static void do_test_single(struct bpf_test *test, bool unpriv, __u32 pflags; int i, err; + if ((test->flags & F_NEEDS_JIT_ENABLED) && jit_disabled) { + printf("SKIP (requires BPF JIT)"); + skips++; + sched_yield(); + return; + } + fd_prog = -1; for (i = 0; i < MAX_NR_MAPS; i++) map_fds[i] = -1; @@ -1844,6 +1853,8 @@ int main(int argc, char **argv) return EXIT_FAILURE; } + jit_disabled = !is_jit_enabled(); + /* Use libbpf 1.0 API mode */ libbpf_set_strict_mode(LIBBPF_STRICT_ALL); diff --git a/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c b/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c index a535d41dc20d..59125b22ae39 100644 --- a/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c +++ b/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c @@ -57,6 +57,7 @@ .expected_insns = { PSEUDO_CALL_INSN() }, .unexpected_insns = { HELPER_CALL_INSN() }, .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .flags = F_NEEDS_JIT_ENABLED, .result = ACCEPT, .runs = 0, .func_info = { { 0, MAIN_TYPE }, { 12, CALLBACK_TYPE } }, @@ -90,6 +91,7 @@ .expected_insns = { HELPER_CALL_INSN() }, .unexpected_insns = { PSEUDO_CALL_INSN() }, .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .flags = F_NEEDS_JIT_ENABLED, .result = ACCEPT, .runs = 0, .func_info = { { 0, MAIN_TYPE }, { 16, CALLBACK_TYPE } }, @@ -127,6 +129,7 @@ .expected_insns = { HELPER_CALL_INSN() }, .unexpected_insns = { PSEUDO_CALL_INSN() }, .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .flags = F_NEEDS_JIT_ENABLED, .result = ACCEPT, .runs = 0, .func_info = { @@ -165,6 +168,7 @@ .expected_insns = { PSEUDO_CALL_INSN() }, .unexpected_insns = { HELPER_CALL_INSN() }, .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .flags = F_NEEDS_JIT_ENABLED, .result = ACCEPT, .runs = 0, .func_info = { @@ -235,6 +239,7 @@ }, .unexpected_insns = { HELPER_CALL_INSN() }, .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .flags = F_NEEDS_JIT_ENABLED, .result = ACCEPT, .func_info = { { 0, MAIN_TYPE }, @@ -252,6 +257,7 @@ .unexpected_insns = { HELPER_CALL_INSN() }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .flags = F_NEEDS_JIT_ENABLED, .func_info = { { 0, MAIN_TYPE }, { 16, CALLBACK_TYPE } }, .func_info_cnt = 2, BTF_TYPES -- cgit v1.2.3 From d47b9f68d2899b390a3655f2365f332a63396adf Mon Sep 17 00:00:00 2001 From: Dima Tisnek Date: Sun, 21 Jan 2024 15:01:26 +0900 Subject: libbpf: Correct bpf_core_read.h comment wrt bpf_core_relo struct Past commit ([0]) removed the last vestiges of struct bpf_field_reloc, it's called struct bpf_core_relo now. [0] 28b93c64499a ("libbpf: Clean up and improve CO-RE reloc logging") Signed-off-by: Dima Tisnek Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20240121060126.15650-1-dimaqq@gmail.com --- tools/lib/bpf/bpf_core_read.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index 7325a12692a3..5aec301e9585 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -268,7 +268,7 @@ enum bpf_enum_value_kind { * a relocation, which records BTF type ID describing root struct/union and an * accessor string which describes exact embedded field that was used to take * an address. See detailed description of this relocation format and - * semantics in comments to struct bpf_field_reloc in libbpf_internal.h. + * semantics in comments to struct bpf_core_relo in include/uapi/linux/bpf.h. * * This relocation allows libbpf to adjust BPF instruction to use correct * actual field offset, based on target kernel BTF type that matches original -- cgit v1.2.3 From 177f1d083a19af58f4b1206d299ed73689249fd8 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 19 Jan 2024 22:05:17 -0800 Subject: selftests/bpf: Fix the flaky tc_redirect_dtime test BPF CI has been reporting the tc_redirect_dtime test failing from time to time: test_inet_dtime:PASS:setns src 0 nsec (network_helpers.c:253: errno: No route to host) Failed to connect to server close_netns:PASS:setns 0 nsec test_inet_dtime:FAIL:connect_to_fd unexpected connect_to_fd: actual -1 < expected 0 test_tcp_clear_dtime:PASS:tcp ip6 clear dtime ingress_fwdns_p100 0 nsec The connect_to_fd failure (EHOSTUNREACH) is from the test_tcp_clear_dtime() test and it is the very first IPv6 traffic after setting up all the links, addresses, and routes. The symptom is this first connect() is always slow. In my setup, it could take ~3s. After some tracing and tcpdump, the slowness is mostly spent in the neighbor solicitation in the "ns_fwd" namespace while the "ns_src" and "ns_dst" are fine. I forced the kernel to drop the neighbor solicitation messages. I can then reproduce EHOSTUNREACH. What actually happen could be: - the neighbor advertisement came back a little slow. - the "ns_fwd" namespace concluded a neighbor discovery failure and triggered the ndisc_error_report() => ip6_link_failure() => icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0) - the client's connect() reports EHOSTUNREACH after receiving the ICMPV6_DEST_UNREACH message. The neigh table of both "ns_src" and "ns_dst" namespace has already been manually populated but not the "ns_fwd" namespace. This patch fixes it by manually populating the neigh table also in the "ns_fwd" namespace. Although the namespace configuration part had been existed before the tc_redirect_dtime test, still Fixes-tagging the patch when the tc_redirect_dtime test was added since it is the only test hitting it so far. Fixes: c803475fd8dd ("bpf: selftests: test skb->tstamp in redirect_neigh") Signed-off-by: Martin KaFai Lau Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240120060518.3604920-1-martin.lau@linux.dev --- tools/testing/selftests/bpf/prog_tests/tc_redirect.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index 518f143c5b0f..610887157fd8 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -188,6 +188,7 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) { struct nstoken *nstoken = NULL; char src_fwd_addr[IFADDR_STR_LEN+1] = {}; + char src_addr[IFADDR_STR_LEN + 1] = {}; int err; if (result->dev_mode == MODE_VETH) { @@ -208,6 +209,9 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) if (get_ifaddr("src_fwd", src_fwd_addr)) goto fail; + if (get_ifaddr("src", src_addr)) + goto fail; + result->ifindex_src = if_nametoindex("src"); if (!ASSERT_GT(result->ifindex_src, 0, "ifindex_src")) goto fail; @@ -270,6 +274,13 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) SYS(fail, "ip route add " IP4_DST "/32 dev dst_fwd scope global"); SYS(fail, "ip route add " IP6_DST "/128 dev dst_fwd scope global"); + if (result->dev_mode == MODE_VETH) { + SYS(fail, "ip neigh add " IP4_SRC " dev src_fwd lladdr %s", src_addr); + SYS(fail, "ip neigh add " IP6_SRC " dev src_fwd lladdr %s", src_addr); + SYS(fail, "ip neigh add " IP4_DST " dev dst_fwd lladdr %s", MAC_DST); + SYS(fail, "ip neigh add " IP6_DST " dev dst_fwd lladdr %s", MAC_DST); + } + close_netns(nstoken); /** setup in 'dst' namespace */ -- cgit v1.2.3 From ce6f6cffaeaa0a3bcdafcae7fe03c68c3afae631 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 19 Jan 2024 22:05:18 -0800 Subject: selftests/bpf: Wait for the netstamp_needed_key static key to be turned on After the previous patch that speeded up the test (by avoiding neigh discovery in IPv6), the BPF CI occasionally hits this error: rcv tstamp unexpected pkt rcv tstamp: actual 0 == expected 0 The test complains about the cmsg returned from the recvmsg() does not have the rcv timestamp. Setting skb->tstamp or not is controlled by a kernel static key "netstamp_needed_key". The static key is enabled whenever this is at least one sk with the SOCK_TIMESTAMP set. The test_redirect_dtime does use setsockopt() to turn on the SOCK_TIMESTAMP for the reading sk. In the kernel net_enable_timestamp() has a delay to enable the "netstamp_needed_key" when CONFIG_JUMP_LABEL is set. This potential delay is the likely reason for packet missing rcv timestamp occasionally. This patch is to create udp sockets with SOCK_TIMESTAMP set. It sends and receives some packets until the received packet has a rcv timestamp. It currently retries at most 5 times with 1s in between. This should be enough to wait for the "netstamp_needed_key". It then holds on to the socket and only closes it at the end of the test. This guarantees that the test has the "netstamp_needed_key" key turned on from the beginning. To simplify the udp sockets setup, they are sending/receiving packets in the same netns (ns_dst is used) and communicate over the "lo" dev. Hence, the patch enables the "lo" dev in the ns_dst. Fixes: c803475fd8dd ("bpf: selftests: test skb->tstamp in redirect_neigh") Signed-off-by: Martin KaFai Lau Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240120060518.3604920-2-martin.lau@linux.dev --- .../testing/selftests/bpf/prog_tests/tc_redirect.c | 79 ++++++++++++++++++++-- 1 file changed, 75 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index 610887157fd8..dbe06aeaa2b2 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -291,6 +291,7 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) SYS(fail, "ip addr add " IP4_DST "/32 dev dst"); SYS(fail, "ip addr add " IP6_DST "/128 dev dst nodad"); SYS(fail, "ip link set dev dst up"); + SYS(fail, "ip link set dev lo up"); SYS(fail, "ip route add " IP4_SRC "/32 dev dst scope global"); SYS(fail, "ip route add " IP4_NET "/16 dev dst scope global"); @@ -468,7 +469,7 @@ static int set_forwarding(bool enable) return 0; } -static void rcv_tstamp(int fd, const char *expected, size_t s) +static int __rcv_tstamp(int fd, const char *expected, size_t s, __u64 *tstamp) { struct __kernel_timespec pkt_ts = {}; char ctl[CMSG_SPACE(sizeof(pkt_ts))]; @@ -489,7 +490,7 @@ static void rcv_tstamp(int fd, const char *expected, size_t s) ret = recvmsg(fd, &msg, 0); if (!ASSERT_EQ(ret, s, "recvmsg")) - return; + return -1; ASSERT_STRNEQ(data, expected, s, "expected rcv data"); cmsg = CMSG_FIRSTHDR(&msg); @@ -498,6 +499,12 @@ static void rcv_tstamp(int fd, const char *expected, size_t s) memcpy(&pkt_ts, CMSG_DATA(cmsg), sizeof(pkt_ts)); pkt_ns = pkt_ts.tv_sec * NSEC_PER_SEC + pkt_ts.tv_nsec; + if (tstamp) { + /* caller will check the tstamp itself */ + *tstamp = pkt_ns; + return 0; + } + ASSERT_NEQ(pkt_ns, 0, "pkt rcv tstamp"); ret = clock_gettime(CLOCK_REALTIME, &now_ts); @@ -507,6 +514,60 @@ static void rcv_tstamp(int fd, const char *expected, size_t s) if (ASSERT_GE(now_ns, pkt_ns, "check rcv tstamp")) ASSERT_LT(now_ns - pkt_ns, 5 * NSEC_PER_SEC, "check rcv tstamp"); + return 0; +} + +static void rcv_tstamp(int fd, const char *expected, size_t s) +{ + __rcv_tstamp(fd, expected, s, NULL); +} + +static int wait_netstamp_needed_key(void) +{ + int opt = 1, srv_fd = -1, cli_fd = -1, nretries = 0, err, n; + char buf[] = "testing testing"; + struct nstoken *nstoken; + __u64 tstamp = 0; + + nstoken = open_netns(NS_DST); + if (!nstoken) + return -1; + + srv_fd = start_server(AF_INET6, SOCK_DGRAM, "::1", 0, 0); + if (!ASSERT_GE(srv_fd, 0, "start_server")) + goto done; + + err = setsockopt(srv_fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW, + &opt, sizeof(opt)); + if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS_NEW)")) + goto done; + + cli_fd = connect_to_fd(srv_fd, TIMEOUT_MILLIS); + if (!ASSERT_GE(cli_fd, 0, "connect_to_fd")) + goto done; + +again: + n = write(cli_fd, buf, sizeof(buf)); + if (!ASSERT_EQ(n, sizeof(buf), "send to server")) + goto done; + err = __rcv_tstamp(srv_fd, buf, sizeof(buf), &tstamp); + if (!ASSERT_OK(err, "__rcv_tstamp")) + goto done; + if (!tstamp && nretries++ < 5) { + sleep(1); + printf("netstamp_needed_key retry#%d\n", nretries); + goto again; + } + +done: + if (!tstamp && srv_fd != -1) { + close(srv_fd); + srv_fd = -1; + } + if (cli_fd != -1) + close(cli_fd); + close_netns(nstoken); + return srv_fd; } static void snd_tstamp(int fd, char *b, size_t s) @@ -843,11 +904,20 @@ static void test_tc_redirect_dtime(struct netns_setup_result *setup_result) { struct test_tc_dtime *skel; struct nstoken *nstoken; - int err; + int hold_tstamp_fd, err; + + /* Hold a sk with the SOCK_TIMESTAMP set to ensure there + * is no delay in the kernel net_enable_timestamp(). + * This ensures the following tests must have + * non zero rcv tstamp in the recvmsg(). + */ + hold_tstamp_fd = wait_netstamp_needed_key(); + if (!ASSERT_GE(hold_tstamp_fd, 0, "wait_netstamp_needed_key")) + return; skel = test_tc_dtime__open(); if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open")) - return; + goto done; skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd; skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd; @@ -892,6 +962,7 @@ static void test_tc_redirect_dtime(struct netns_setup_result *setup_result) done: test_tc_dtime__destroy(skel); + close(hold_tstamp_fd); } static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result) -- cgit v1.2.3 From c9f115564561af63db662791e9a35fcf1dfefd2a Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 24 Jan 2024 14:44:18 -0800 Subject: libbpf: Ensure undefined bpf_attr field stays 0 The commit 9e926acda0c2 ("libbpf: Find correct module BTFs for struct_ops maps and progs.") sets a newly added field (value_type_btf_obj_fd) to -1 in libbpf when the caller of the libbpf's bpf_map_create did not define this field by passing a NULL "opts" or passing in a "opts" that does not cover this new field. OPT_HAS(opts, field) is used to decide if the field is defined or not: ((opts) && opts->sz >= offsetofend(typeof(*(opts)), field)) Once OPTS_HAS decided the field is not defined, that field should be set to 0. For this particular new field (value_type_btf_obj_fd), its corresponding map_flags "BPF_F_VTYPE_BTF_OBJ_FD" is not set. Thus, the kernel does not treat it as an fd field. Fixes: 9e926acda0c2 ("libbpf: Find correct module BTFs for struct_ops maps and progs.") Reported-by: Andrii Nakryiko Signed-off-by: Martin KaFai Lau Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240124224418.2905133-1-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 3a35472a17c5..b133acfe08fb 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -192,7 +192,7 @@ int bpf_map_create(enum bpf_map_type map_type, attr.btf_key_type_id = OPTS_GET(opts, btf_key_type_id, 0); attr.btf_value_type_id = OPTS_GET(opts, btf_value_type_id, 0); attr.btf_vmlinux_value_type_id = OPTS_GET(opts, btf_vmlinux_value_type_id, 0); - attr.value_type_btf_obj_fd = OPTS_GET(opts, value_type_btf_obj_fd, -1); + attr.value_type_btf_obj_fd = OPTS_GET(opts, value_type_btf_obj_fd, 0); attr.inner_map_fd = OPTS_GET(opts, inner_map_fd, 0); attr.map_flags = OPTS_GET(opts, map_flags, 0); -- cgit v1.2.3 From 35f96de04127d332a5c5e8a155d31f452f88c76d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 23 Jan 2024 18:21:00 -0800 Subject: bpf: Introduce BPF token object Add new kind of BPF kernel object, BPF token. BPF token is meant to allow delegating privileged BPF functionality, like loading a BPF program or creating a BPF map, from privileged process to a *trusted* unprivileged process, all while having a good amount of control over which privileged operations could be performed using provided BPF token. This is achieved through mounting BPF FS instance with extra delegation mount options, which determine what operations are delegatable, and also constraining it to the owning user namespace (as mentioned in the previous patch). BPF token itself is just a derivative from BPF FS and can be created through a new bpf() syscall command, BPF_TOKEN_CREATE, which accepts BPF FS FD, which can be attained through open() API by opening BPF FS mount point. Currently, BPF token "inherits" delegated command, map types, prog type, and attach type bit sets from BPF FS as is. In the future, having an BPF token as a separate object with its own FD, we can allow to further restrict BPF token's allowable set of things either at the creation time or after the fact, allowing the process to guard itself further from unintentionally trying to load undesired kind of BPF programs. But for now we keep things simple and just copy bit sets as is. When BPF token is created from BPF FS mount, we take reference to the BPF super block's owning user namespace, and then use that namespace for checking all the {CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN} capabilities that are normally only checked against init userns (using capable()), but now we check them using ns_capable() instead (if BPF token is provided). See bpf_token_capable() for details. Such setup means that BPF token in itself is not sufficient to grant BPF functionality. User namespaced process has to *also* have necessary combination of capabilities inside that user namespace. So while previously CAP_BPF was useless when granted within user namespace, now it gains a meaning and allows container managers and sys admins to have a flexible control over which processes can and need to use BPF functionality within the user namespace (i.e., container in practice). And BPF FS delegation mount options and derived BPF tokens serve as a per-container "flag" to grant overall ability to use bpf() (plus further restrict on which parts of bpf() syscalls are treated as namespaced). Note also, BPF_TOKEN_CREATE command itself requires ns_capable(CAP_BPF) within the BPF FS owning user namespace, rounding up the ns_capable() story of BPF token. Also creating BPF token in init user namespace is currently not supported, given BPF token doesn't have any effect in init user namespace anyways. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Christian Brauner Link: https://lore.kernel.org/bpf/20240124022127.2379740-4-andrii@kernel.org --- include/linux/bpf.h | 41 ++++++++ include/uapi/linux/bpf.h | 37 +++++++ kernel/bpf/Makefile | 2 +- kernel/bpf/inode.c | 12 ++- kernel/bpf/syscall.c | 17 ++++ kernel/bpf/token.c | 217 +++++++++++++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 37 +++++++ 7 files changed, 357 insertions(+), 6 deletions(-) create mode 100644 kernel/bpf/token.c (limited to 'tools') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 28374cec49df..d9ff7ce547b4 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -52,6 +52,10 @@ struct module; struct bpf_func_state; struct ftrace_ops; struct cgroup; +struct bpf_token; +struct user_namespace; +struct super_block; +struct inode; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; @@ -1621,6 +1625,13 @@ struct bpf_mount_opts { u64 delegate_attachs; }; +struct bpf_token { + struct work_struct work; + atomic64_t refcnt; + struct user_namespace *userns; + u64 allowed_cmds; +}; + struct bpf_struct_ops_value; struct btf_member; @@ -2109,6 +2120,7 @@ static inline void bpf_enable_instrumentation(void) migrate_enable(); } +extern const struct super_operations bpf_super_ops; extern const struct file_operations bpf_map_fops; extern const struct file_operations bpf_prog_fops; extern const struct file_operations bpf_iter_fops; @@ -2243,6 +2255,8 @@ static inline void bpf_map_dec_elem_count(struct bpf_map *map) extern int sysctl_unprivileged_bpf_disabled; +bool bpf_token_capable(const struct bpf_token *token, int cap); + static inline bool bpf_allow_ptr_leaks(void) { return perfmon_capable(); @@ -2277,8 +2291,17 @@ int bpf_link_new_fd(struct bpf_link *link); struct bpf_link *bpf_link_get_from_fd(u32 ufd); struct bpf_link *bpf_link_get_curr_or_next(u32 *id); +void bpf_token_inc(struct bpf_token *token); +void bpf_token_put(struct bpf_token *token); +int bpf_token_create(union bpf_attr *attr); +struct bpf_token *bpf_token_get_from_fd(u32 ufd); + +bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd); + int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname); int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags); +struct inode *bpf_get_inode(struct super_block *sb, const struct inode *dir, + umode_t mode); #define BPF_ITER_FUNC_PREFIX "bpf_iter_" #define DEFINE_BPF_ITER_FUNC(target, args...) \ @@ -2638,6 +2661,24 @@ static inline int bpf_obj_get_user(const char __user *pathname, int flags) return -EOPNOTSUPP; } +static inline bool bpf_token_capable(const struct bpf_token *token, int cap) +{ + return capable(cap) || (cap != CAP_SYS_ADMIN && capable(CAP_SYS_ADMIN)); +} + +static inline void bpf_token_inc(struct bpf_token *token) +{ +} + +static inline void bpf_token_put(struct bpf_token *token) +{ +} + +static inline struct bpf_token *bpf_token_get_from_fd(u32 ufd) +{ + return ERR_PTR(-EOPNOTSUPP); +} + static inline void __dev_flush(void) { } diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1fef6d5a1330..b9dc0cca172c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -847,6 +847,36 @@ union bpf_iter_link_info { * Returns zero on success. On error, -1 is returned and *errno* * is set appropriately. * + * BPF_TOKEN_CREATE + * Description + * Create BPF token with embedded information about what + * BPF-related functionality it allows: + * - a set of allowed bpf() syscall commands; + * - a set of allowed BPF map types to be created with + * BPF_MAP_CREATE command, if BPF_MAP_CREATE itself is allowed; + * - a set of allowed BPF program types and BPF program attach + * types to be loaded with BPF_PROG_LOAD command, if + * BPF_PROG_LOAD itself is allowed. + * + * BPF token is created (derived) from an instance of BPF FS, + * assuming it has necessary delegation mount options specified. + * This BPF token can be passed as an extra parameter to various + * bpf() syscall commands to grant BPF subsystem functionality to + * unprivileged processes. + * + * When created, BPF token is "associated" with the owning + * user namespace of BPF FS instance (super block) that it was + * derived from, and subsequent BPF operations performed with + * BPF token would be performing capabilities checks (i.e., + * CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN) within + * that user namespace. Without BPF token, such capabilities + * have to be granted in init user namespace, making bpf() + * syscall incompatible with user namespace, for the most part. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * * NOTES * eBPF objects (maps and programs) can be shared between processes. * @@ -901,6 +931,8 @@ enum bpf_cmd { BPF_ITER_CREATE, BPF_LINK_DETACH, BPF_PROG_BIND_MAP, + BPF_TOKEN_CREATE, + __MAX_BPF_CMD, }; enum bpf_map_type { @@ -1722,6 +1754,11 @@ union bpf_attr { __u32 flags; /* extra flags */ } prog_bind_map; + struct { /* struct used by BPF_TOKEN_CREATE command */ + __u32 flags; + __u32 bpffs_fd; + } token_create; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index f526b7573e97..4ce95acfcaa7 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -6,7 +6,7 @@ cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse endif CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy) -obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o +obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o token.o obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 70b748f6228c..565be1f3f1ea 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -99,9 +99,9 @@ static const struct inode_operations bpf_prog_iops = { }; static const struct inode_operations bpf_map_iops = { }; static const struct inode_operations bpf_link_iops = { }; -static struct inode *bpf_get_inode(struct super_block *sb, - const struct inode *dir, - umode_t mode) +struct inode *bpf_get_inode(struct super_block *sb, + const struct inode *dir, + umode_t mode) { struct inode *inode; @@ -603,6 +603,7 @@ static int bpf_show_options(struct seq_file *m, struct dentry *root) struct inode *inode = d_inode(root); umode_t mode = inode->i_mode & S_IALLUGO & ~S_ISVTX; struct bpf_mount_opts *opts = root->d_sb->s_fs_info; + u64 mask; if (!uid_eq(inode->i_uid, GLOBAL_ROOT_UID)) seq_printf(m, ",uid=%u", @@ -613,7 +614,8 @@ static int bpf_show_options(struct seq_file *m, struct dentry *root) if (mode != S_IRWXUGO) seq_printf(m, ",mode=%o", mode); - if (opts->delegate_cmds == ~0ULL) + mask = (1ULL << __MAX_BPF_CMD) - 1; + if ((opts->delegate_cmds & mask) == mask) seq_printf(m, ",delegate_cmds=any"); else if (opts->delegate_cmds) seq_printf(m, ",delegate_cmds=0x%llx", opts->delegate_cmds); @@ -646,7 +648,7 @@ static void bpf_free_inode(struct inode *inode) free_inode_nonrcu(inode); } -static const struct super_operations bpf_super_ops = { +const struct super_operations bpf_super_ops = { .statfs = simple_statfs, .drop_inode = generic_delete_inode, .show_options = bpf_show_options, diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index d5f1edee2d50..3ecba592b8ed 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -5426,6 +5426,20 @@ out_prog_put: return ret; } +#define BPF_TOKEN_CREATE_LAST_FIELD token_create.bpffs_fd + +static int token_create(union bpf_attr *attr) +{ + if (CHECK_ATTR(BPF_TOKEN_CREATE)) + return -EINVAL; + + /* no flags are supported yet */ + if (attr->token_create.flags) + return -EINVAL; + + return bpf_token_create(attr); +} + static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size) { union bpf_attr attr; @@ -5559,6 +5573,9 @@ static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size) case BPF_PROG_BIND_MAP: err = bpf_prog_bind_map(&attr); break; + case BPF_TOKEN_CREATE: + err = token_create(&attr); + break; default: err = -EINVAL; break; diff --git a/kernel/bpf/token.c b/kernel/bpf/token.c new file mode 100644 index 000000000000..bdb6fe697568 --- /dev/null +++ b/kernel/bpf/token.c @@ -0,0 +1,217 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static bool bpf_ns_capable(struct user_namespace *ns, int cap) +{ + return ns_capable(ns, cap) || (cap != CAP_SYS_ADMIN && ns_capable(ns, CAP_SYS_ADMIN)); +} + +bool bpf_token_capable(const struct bpf_token *token, int cap) +{ + struct user_namespace *userns; + + /* BPF token allows ns_capable() level of capabilities */ + userns = token ? token->userns : &init_user_ns; + if (!bpf_ns_capable(userns, cap)) + return false; + return true; +} + +void bpf_token_inc(struct bpf_token *token) +{ + atomic64_inc(&token->refcnt); +} + +static void bpf_token_free(struct bpf_token *token) +{ + put_user_ns(token->userns); + kfree(token); +} + +static void bpf_token_put_deferred(struct work_struct *work) +{ + struct bpf_token *token = container_of(work, struct bpf_token, work); + + bpf_token_free(token); +} + +void bpf_token_put(struct bpf_token *token) +{ + if (!token) + return; + + if (!atomic64_dec_and_test(&token->refcnt)) + return; + + INIT_WORK(&token->work, bpf_token_put_deferred); + schedule_work(&token->work); +} + +static int bpf_token_release(struct inode *inode, struct file *filp) +{ + struct bpf_token *token = filp->private_data; + + bpf_token_put(token); + return 0; +} + +static void bpf_token_show_fdinfo(struct seq_file *m, struct file *filp) +{ + struct bpf_token *token = filp->private_data; + u64 mask; + + BUILD_BUG_ON(__MAX_BPF_CMD >= 64); + mask = (1ULL << __MAX_BPF_CMD) - 1; + if ((token->allowed_cmds & mask) == mask) + seq_printf(m, "allowed_cmds:\tany\n"); + else + seq_printf(m, "allowed_cmds:\t0x%llx\n", token->allowed_cmds); +} + +#define BPF_TOKEN_INODE_NAME "bpf-token" + +static const struct inode_operations bpf_token_iops = { }; + +static const struct file_operations bpf_token_fops = { + .release = bpf_token_release, + .show_fdinfo = bpf_token_show_fdinfo, +}; + +int bpf_token_create(union bpf_attr *attr) +{ + struct bpf_mount_opts *mnt_opts; + struct bpf_token *token = NULL; + struct user_namespace *userns; + struct inode *inode; + struct file *file; + struct path path; + struct fd f; + umode_t mode; + int err, fd; + + f = fdget(attr->token_create.bpffs_fd); + if (!f.file) + return -EBADF; + + path = f.file->f_path; + path_get(&path); + fdput(f); + + if (path.dentry != path.mnt->mnt_sb->s_root) { + err = -EINVAL; + goto out_path; + } + if (path.mnt->mnt_sb->s_op != &bpf_super_ops) { + err = -EINVAL; + goto out_path; + } + err = path_permission(&path, MAY_ACCESS); + if (err) + goto out_path; + + userns = path.dentry->d_sb->s_user_ns; + /* + * Enforce that creators of BPF tokens are in the same user + * namespace as the BPF FS instance. This makes reasoning about + * permissions a lot easier and we can always relax this later. + */ + if (current_user_ns() != userns) { + err = -EPERM; + goto out_path; + } + if (!ns_capable(userns, CAP_BPF)) { + err = -EPERM; + goto out_path; + } + + /* Creating BPF token in init_user_ns doesn't make much sense. */ + if (current_user_ns() == &init_user_ns) { + err = -EOPNOTSUPP; + goto out_path; + } + + mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); + inode = bpf_get_inode(path.mnt->mnt_sb, NULL, mode); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out_path; + } + + inode->i_op = &bpf_token_iops; + inode->i_fop = &bpf_token_fops; + clear_nlink(inode); /* make sure it is unlinked */ + + file = alloc_file_pseudo(inode, path.mnt, BPF_TOKEN_INODE_NAME, O_RDWR, &bpf_token_fops); + if (IS_ERR(file)) { + iput(inode); + err = PTR_ERR(file); + goto out_path; + } + + token = kzalloc(sizeof(*token), GFP_USER); + if (!token) { + err = -ENOMEM; + goto out_file; + } + + atomic64_set(&token->refcnt, 1); + + /* remember bpffs owning userns for future ns_capable() checks */ + token->userns = get_user_ns(userns); + + mnt_opts = path.dentry->d_sb->s_fs_info; + token->allowed_cmds = mnt_opts->delegate_cmds; + + fd = get_unused_fd_flags(O_CLOEXEC); + if (fd < 0) { + err = fd; + goto out_token; + } + + file->private_data = token; + fd_install(fd, file); + + path_put(&path); + return fd; + +out_token: + bpf_token_free(token); +out_file: + fput(file); +out_path: + path_put(&path); + return err; +} + +struct bpf_token *bpf_token_get_from_fd(u32 ufd) +{ + struct fd f = fdget(ufd); + struct bpf_token *token; + + if (!f.file) + return ERR_PTR(-EBADF); + if (f.file->f_op != &bpf_token_fops) { + fdput(f); + return ERR_PTR(-EINVAL); + } + + token = f.file->private_data; + bpf_token_inc(token); + fdput(f); + + return token; +} + +bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd) +{ + if (!token) + return false; + return token->allowed_cmds & (1ULL << cmd); +} diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 1fef6d5a1330..b9dc0cca172c 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -847,6 +847,36 @@ union bpf_iter_link_info { * Returns zero on success. On error, -1 is returned and *errno* * is set appropriately. * + * BPF_TOKEN_CREATE + * Description + * Create BPF token with embedded information about what + * BPF-related functionality it allows: + * - a set of allowed bpf() syscall commands; + * - a set of allowed BPF map types to be created with + * BPF_MAP_CREATE command, if BPF_MAP_CREATE itself is allowed; + * - a set of allowed BPF program types and BPF program attach + * types to be loaded with BPF_PROG_LOAD command, if + * BPF_PROG_LOAD itself is allowed. + * + * BPF token is created (derived) from an instance of BPF FS, + * assuming it has necessary delegation mount options specified. + * This BPF token can be passed as an extra parameter to various + * bpf() syscall commands to grant BPF subsystem functionality to + * unprivileged processes. + * + * When created, BPF token is "associated" with the owning + * user namespace of BPF FS instance (super block) that it was + * derived from, and subsequent BPF operations performed with + * BPF token would be performing capabilities checks (i.e., + * CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN) within + * that user namespace. Without BPF token, such capabilities + * have to be granted in init user namespace, making bpf() + * syscall incompatible with user namespace, for the most part. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * * NOTES * eBPF objects (maps and programs) can be shared between processes. * @@ -901,6 +931,8 @@ enum bpf_cmd { BPF_ITER_CREATE, BPF_LINK_DETACH, BPF_PROG_BIND_MAP, + BPF_TOKEN_CREATE, + __MAX_BPF_CMD, }; enum bpf_map_type { @@ -1722,6 +1754,11 @@ union bpf_attr { __u32 flags; /* extra flags */ } prog_bind_map; + struct { /* struct used by BPF_TOKEN_CREATE command */ + __u32 flags; + __u32 bpffs_fd; + } token_create; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF -- cgit v1.2.3 From a177fc2bf6fd83704854feaf7aae926b1df4f0b9 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 23 Jan 2024 18:21:01 -0800 Subject: bpf: Add BPF token support to BPF_MAP_CREATE command Allow providing token_fd for BPF_MAP_CREATE command to allow controlled BPF map creation from unprivileged process through delegated BPF token. New BPF_F_TOKEN_FD flag is added to specify together with BPF token FD for BPF_MAP_CREATE command. Wire through a set of allowed BPF map types to BPF token, derived from BPF FS at BPF token creation time. This, in combination with allowed_cmds allows to create a narrowly-focused BPF token (controlled by privileged agent) with a restrictive set of BPF maps that application can attempt to create. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20240124022127.2379740-5-andrii@kernel.org --- include/linux/bpf.h | 2 + include/uapi/linux/bpf.h | 8 +++ kernel/bpf/inode.c | 3 +- kernel/bpf/syscall.c | 59 +++++++++++++++++----- kernel/bpf/token.c | 16 ++++++ tools/include/uapi/linux/bpf.h | 8 +++ .../selftests/bpf/prog_tests/libbpf_probes.c | 2 + .../testing/selftests/bpf/prog_tests/libbpf_str.c | 3 ++ 8 files changed, 86 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d9ff7ce547b4..8252452d0c4d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1630,6 +1630,7 @@ struct bpf_token { atomic64_t refcnt; struct user_namespace *userns; u64 allowed_cmds; + u64 allowed_maps; }; struct bpf_struct_ops_value; @@ -2297,6 +2298,7 @@ int bpf_token_create(union bpf_attr *attr); struct bpf_token *bpf_token_get_from_fd(u32 ufd); bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd); +bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type type); int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname); int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b9dc0cca172c..c78cab8b462d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -983,6 +983,7 @@ enum bpf_map_type { BPF_MAP_TYPE_BLOOM_FILTER, BPF_MAP_TYPE_USER_RINGBUF, BPF_MAP_TYPE_CGRP_STORAGE, + __MAX_BPF_MAP_TYPE }; /* Note that tracing related programs such as @@ -1365,6 +1366,9 @@ enum { /* Flag for value_type_btf_obj_fd, the fd is available */ BPF_F_VTYPE_BTF_OBJ_FD = (1U << 15), + +/* BPF token FD is passed in a corresponding command's token_fd field */ + BPF_F_TOKEN_FD = (1U << 16), }; /* Flags for BPF_PROG_QUERY. */ @@ -1443,6 +1447,10 @@ union bpf_attr { * type data for * btf_vmlinux_value_type_id. */ + /* BPF token FD to use with BPF_MAP_CREATE operation. + * If provided, map_flags should have BPF_F_TOKEN_FD flag set. + */ + __s32 map_token_fd; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 565be1f3f1ea..034b7e4d8f19 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -620,7 +620,8 @@ static int bpf_show_options(struct seq_file *m, struct dentry *root) else if (opts->delegate_cmds) seq_printf(m, ",delegate_cmds=0x%llx", opts->delegate_cmds); - if (opts->delegate_maps == ~0ULL) + mask = (1ULL << __MAX_BPF_MAP_TYPE) - 1; + if ((opts->delegate_maps & mask) == mask) seq_printf(m, ",delegate_maps=any"); else if (opts->delegate_maps) seq_printf(m, ",delegate_maps=0x%llx", opts->delegate_maps); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 3ecba592b8ed..b13a4bdcd3a0 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1011,8 +1011,8 @@ int map_check_no_btf(const struct bpf_map *map, return -ENOTSUPP; } -static int map_check_btf(struct bpf_map *map, const struct btf *btf, - u32 btf_key_id, u32 btf_value_id) +static int map_check_btf(struct bpf_map *map, struct bpf_token *token, + const struct btf *btf, u32 btf_key_id, u32 btf_value_id) { const struct btf_type *key_type, *value_type; u32 key_size, value_size; @@ -1040,7 +1040,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, if (!IS_ERR_OR_NULL(map->record)) { int i; - if (!bpf_capable()) { + if (!bpf_token_capable(token, CAP_BPF)) { ret = -EPERM; goto free_map_tab; } @@ -1128,14 +1128,16 @@ static bool bpf_net_capable(void) return capable(CAP_NET_ADMIN) || capable(CAP_SYS_ADMIN); } -#define BPF_MAP_CREATE_LAST_FIELD value_type_btf_obj_fd +#define BPF_MAP_CREATE_LAST_FIELD map_token_fd /* called via syscall */ static int map_create(union bpf_attr *attr) { const struct bpf_map_ops *ops; + struct bpf_token *token = NULL; int numa_node = bpf_map_attr_numa_node(attr); u32 map_type = attr->map_type; struct bpf_map *map; + bool token_flag; int f_flags; int err; @@ -1143,6 +1145,12 @@ static int map_create(union bpf_attr *attr) if (err) return -EINVAL; + /* check BPF_F_TOKEN_FD flag, remember if it's set, and then clear it + * to avoid per-map type checks tripping on unknown flag + */ + token_flag = attr->map_flags & BPF_F_TOKEN_FD; + attr->map_flags &= ~BPF_F_TOKEN_FD; + if (attr->btf_vmlinux_value_type_id) { if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS || attr->btf_key_type_id || attr->btf_value_type_id) @@ -1183,14 +1191,32 @@ static int map_create(union bpf_attr *attr) if (!ops->map_mem_usage) return -EINVAL; + if (token_flag) { + token = bpf_token_get_from_fd(attr->map_token_fd); + if (IS_ERR(token)) + return PTR_ERR(token); + + /* if current token doesn't grant map creation permissions, + * then we can't use this token, so ignore it and rely on + * system-wide capabilities checks + */ + if (!bpf_token_allow_cmd(token, BPF_MAP_CREATE) || + !bpf_token_allow_map_type(token, attr->map_type)) { + bpf_token_put(token); + token = NULL; + } + } + + err = -EPERM; + /* Intent here is for unprivileged_bpf_disabled to block BPF map * creation for unprivileged users; other actions depend * on fd availability and access to bpffs, so are dependent on * object creation success. Even with unprivileged BPF disabled, * capability checks are still carried out. */ - if (sysctl_unprivileged_bpf_disabled && !bpf_capable()) - return -EPERM; + if (sysctl_unprivileged_bpf_disabled && !bpf_token_capable(token, CAP_BPF)) + goto put_token; /* check privileged map type permissions */ switch (map_type) { @@ -1223,25 +1249,27 @@ static int map_create(union bpf_attr *attr) case BPF_MAP_TYPE_LRU_PERCPU_HASH: case BPF_MAP_TYPE_STRUCT_OPS: case BPF_MAP_TYPE_CPUMAP: - if (!bpf_capable()) - return -EPERM; + if (!bpf_token_capable(token, CAP_BPF)) + goto put_token; break; case BPF_MAP_TYPE_SOCKMAP: case BPF_MAP_TYPE_SOCKHASH: case BPF_MAP_TYPE_DEVMAP: case BPF_MAP_TYPE_DEVMAP_HASH: case BPF_MAP_TYPE_XSKMAP: - if (!bpf_net_capable()) - return -EPERM; + if (!bpf_token_capable(token, CAP_NET_ADMIN)) + goto put_token; break; default: WARN(1, "unsupported map type %d", map_type); - return -EPERM; + goto put_token; } map = ops->map_alloc(attr); - if (IS_ERR(map)) - return PTR_ERR(map); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto put_token; + } map->ops = ops; map->map_type = map_type; @@ -1278,7 +1306,7 @@ static int map_create(union bpf_attr *attr) map->btf = btf; if (attr->btf_value_type_id) { - err = map_check_btf(map, btf, attr->btf_key_type_id, + err = map_check_btf(map, token, btf, attr->btf_key_type_id, attr->btf_value_type_id); if (err) goto free_map; @@ -1299,6 +1327,7 @@ static int map_create(union bpf_attr *attr) goto free_map_sec; bpf_map_save_memcg(map); + bpf_token_put(token); err = bpf_map_new_fd(map, f_flags); if (err < 0) { @@ -1319,6 +1348,8 @@ free_map_sec: free_map: btf_put(map->btf); map->ops->map_free(map); +put_token: + bpf_token_put(token); return err; } diff --git a/kernel/bpf/token.c b/kernel/bpf/token.c index bdb6fe697568..bc86be4ca567 100644 --- a/kernel/bpf/token.c +++ b/kernel/bpf/token.c @@ -73,6 +73,13 @@ static void bpf_token_show_fdinfo(struct seq_file *m, struct file *filp) seq_printf(m, "allowed_cmds:\tany\n"); else seq_printf(m, "allowed_cmds:\t0x%llx\n", token->allowed_cmds); + + BUILD_BUG_ON(__MAX_BPF_MAP_TYPE >= 64); + mask = (1ULL << __MAX_BPF_MAP_TYPE) - 1; + if ((token->allowed_maps & mask) == mask) + seq_printf(m, "allowed_maps:\tany\n"); + else + seq_printf(m, "allowed_maps:\t0x%llx\n", token->allowed_maps); } #define BPF_TOKEN_INODE_NAME "bpf-token" @@ -168,6 +175,7 @@ int bpf_token_create(union bpf_attr *attr) mnt_opts = path.dentry->d_sb->s_fs_info; token->allowed_cmds = mnt_opts->delegate_cmds; + token->allowed_maps = mnt_opts->delegate_maps; fd = get_unused_fd_flags(O_CLOEXEC); if (fd < 0) { @@ -215,3 +223,11 @@ bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd) return false; return token->allowed_cmds & (1ULL << cmd); } + +bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type type) +{ + if (!token || type >= __MAX_BPF_MAP_TYPE) + return false; + + return token->allowed_maps & (1ULL << type); +} diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index b9dc0cca172c..c78cab8b462d 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -983,6 +983,7 @@ enum bpf_map_type { BPF_MAP_TYPE_BLOOM_FILTER, BPF_MAP_TYPE_USER_RINGBUF, BPF_MAP_TYPE_CGRP_STORAGE, + __MAX_BPF_MAP_TYPE }; /* Note that tracing related programs such as @@ -1365,6 +1366,9 @@ enum { /* Flag for value_type_btf_obj_fd, the fd is available */ BPF_F_VTYPE_BTF_OBJ_FD = (1U << 15), + +/* BPF token FD is passed in a corresponding command's token_fd field */ + BPF_F_TOKEN_FD = (1U << 16), }; /* Flags for BPF_PROG_QUERY. */ @@ -1443,6 +1447,10 @@ union bpf_attr { * type data for * btf_vmlinux_value_type_id. */ + /* BPF token FD to use with BPF_MAP_CREATE operation. + * If provided, map_flags should have BPF_F_TOKEN_FD flag set. + */ + __s32 map_token_fd; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c index 9f766ddd946a..573249a2814d 100644 --- a/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c +++ b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c @@ -68,6 +68,8 @@ void test_libbpf_probe_map_types(void) if (map_type == BPF_MAP_TYPE_UNSPEC) continue; + if (strcmp(map_type_name, "__MAX_BPF_MAP_TYPE") == 0) + continue; if (!test__start_subtest(map_type_name)) continue; diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c index eb34d612d6f8..1f328c0d8aff 100644 --- a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c +++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c @@ -132,6 +132,9 @@ static void test_libbpf_bpf_map_type_str(void) const char *map_type_str; char buf[256]; + if (map_type == __MAX_BPF_MAP_TYPE) + continue; + map_type_name = btf__str_by_offset(btf, e->name_off); map_type_str = libbpf_bpf_map_type_str(map_type); ASSERT_OK_PTR(map_type_str, map_type_name); -- cgit v1.2.3 From 9ea7c4bf17e39d463eb4782f948f401d9764b1b3 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 23 Jan 2024 18:21:02 -0800 Subject: bpf: Add BPF token support to BPF_BTF_LOAD command Accept BPF token FD in BPF_BTF_LOAD command to allow BTF data loading through delegated BPF token. BPF_F_TOKEN_FD flag has to be specified when passing BPF token FD. Given BPF_BTF_LOAD command didn't have flags field before, we also add btf_flags field. BTF loading is a pretty straightforward operation, so as long as BPF token is created with allow_cmds granting BPF_BTF_LOAD command, kernel proceeds to parsing BTF data and creating BTF object. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20240124022127.2379740-6-andrii@kernel.org --- include/uapi/linux/bpf.h | 5 +++++ kernel/bpf/syscall.c | 23 +++++++++++++++++++++-- tools/include/uapi/linux/bpf.h | 5 +++++ 3 files changed, 31 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c78cab8b462d..cb2c888e3bb4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1632,6 +1632,11 @@ union bpf_attr { * truncated), or smaller (if log buffer wasn't filled completely). */ __u32 btf_log_true_size; + __u32 btf_flags; + /* BPF token FD to use with BPF_BTF_LOAD operation. + * If provided, btf_flags should have BPF_F_TOKEN_FD flag set. + */ + __s32 btf_token_fd; }; struct { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b13a4bdcd3a0..45b3a55896eb 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -4831,15 +4831,34 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, return err; } -#define BPF_BTF_LOAD_LAST_FIELD btf_log_true_size +#define BPF_BTF_LOAD_LAST_FIELD btf_token_fd static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size) { + struct bpf_token *token = NULL; + if (CHECK_ATTR(BPF_BTF_LOAD)) return -EINVAL; - if (!bpf_capable()) + if (attr->btf_flags & ~BPF_F_TOKEN_FD) + return -EINVAL; + + if (attr->btf_flags & BPF_F_TOKEN_FD) { + token = bpf_token_get_from_fd(attr->btf_token_fd); + if (IS_ERR(token)) + return PTR_ERR(token); + if (!bpf_token_allow_cmd(token, BPF_BTF_LOAD)) { + bpf_token_put(token); + token = NULL; + } + } + + if (!bpf_token_capable(token, CAP_BPF)) { + bpf_token_put(token); return -EPERM; + } + + bpf_token_put(token); return btf_new_fd(attr, uattr, uattr_size); } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index c78cab8b462d..cb2c888e3bb4 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1632,6 +1632,11 @@ union bpf_attr { * truncated), or smaller (if log buffer wasn't filled completely). */ __u32 btf_log_true_size; + __u32 btf_flags; + /* BPF token FD to use with BPF_BTF_LOAD operation. + * If provided, btf_flags should have BPF_F_TOKEN_FD flag set. + */ + __s32 btf_token_fd; }; struct { -- cgit v1.2.3 From caf8f28e036c4ba1e823355da6c0c01c39e70ab9 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 23 Jan 2024 18:21:03 -0800 Subject: bpf: Add BPF token support to BPF_PROG_LOAD command Add basic support of BPF token to BPF_PROG_LOAD. BPF_F_TOKEN_FD flag should be set in prog_flags field when providing prog_token_fd. Wire through a set of allowed BPF program types and attach types, derived from BPF FS at BPF token creation time. Then make sure we perform bpf_token_capable() checks everywhere where it's relevant. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20240124022127.2379740-7-andrii@kernel.org --- include/linux/bpf.h | 6 ++ include/uapi/linux/bpf.h | 5 ++ kernel/bpf/core.c | 1 + kernel/bpf/inode.c | 6 +- kernel/bpf/syscall.c | 90 ++++++++++++++++------ kernel/bpf/token.c | 27 +++++++ tools/include/uapi/linux/bpf.h | 5 ++ .../selftests/bpf/prog_tests/libbpf_probes.c | 2 + .../testing/selftests/bpf/prog_tests/libbpf_str.c | 3 + 9 files changed, 118 insertions(+), 27 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8252452d0c4d..d0bf37e3f166 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1489,6 +1489,7 @@ struct bpf_prog_aux { #ifdef CONFIG_SECURITY void *security; #endif + struct bpf_token *token; struct bpf_prog_offload *offload; struct btf *btf; struct bpf_func_info *func_info; @@ -1631,6 +1632,8 @@ struct bpf_token { struct user_namespace *userns; u64 allowed_cmds; u64 allowed_maps; + u64 allowed_progs; + u64 allowed_attachs; }; struct bpf_struct_ops_value; @@ -2299,6 +2302,9 @@ struct bpf_token *bpf_token_get_from_fd(u32 ufd); bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd); bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type type); +bool bpf_token_allow_prog_type(const struct bpf_token *token, + enum bpf_prog_type prog_type, + enum bpf_attach_type attach_type); int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname); int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index cb2c888e3bb4..d96708380e52 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1028,6 +1028,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SK_LOOKUP, BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ BPF_PROG_TYPE_NETFILTER, + __MAX_BPF_PROG_TYPE }; enum bpf_attach_type { @@ -1520,6 +1521,10 @@ union bpf_attr { * truncated), or smaller (if log buffer wasn't filled completely). */ __u32 log_true_size; + /* BPF token FD to use with BPF_PROG_LOAD operation. + * If provided, prog_flags should have BPF_F_TOKEN_FD flag set. + */ + __s32 prog_token_fd; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index fbb1d95a9b44..00dccba29769 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2779,6 +2779,7 @@ void bpf_prog_free(struct bpf_prog *fp) if (aux->dst_prog) bpf_prog_put(aux->dst_prog); + bpf_token_put(aux->token); INIT_WORK(&aux->work, bpf_prog_free_deferred); schedule_work(&aux->work); } diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 034b7e4d8f19..5fb10da5717f 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -626,12 +626,14 @@ static int bpf_show_options(struct seq_file *m, struct dentry *root) else if (opts->delegate_maps) seq_printf(m, ",delegate_maps=0x%llx", opts->delegate_maps); - if (opts->delegate_progs == ~0ULL) + mask = (1ULL << __MAX_BPF_PROG_TYPE) - 1; + if ((opts->delegate_progs & mask) == mask) seq_printf(m, ",delegate_progs=any"); else if (opts->delegate_progs) seq_printf(m, ",delegate_progs=0x%llx", opts->delegate_progs); - if (opts->delegate_attachs == ~0ULL) + mask = (1ULL << __MAX_BPF_ATTACH_TYPE) - 1; + if ((opts->delegate_attachs & mask) == mask) seq_printf(m, ",delegate_attachs=any"); else if (opts->delegate_attachs) seq_printf(m, ",delegate_attachs=0x%llx", opts->delegate_attachs); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 45b3a55896eb..61b4bf4cc287 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2626,13 +2626,15 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type) } /* last field in 'union bpf_attr' used by this command */ -#define BPF_PROG_LOAD_LAST_FIELD log_true_size +#define BPF_PROG_LOAD_LAST_FIELD prog_token_fd static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) { enum bpf_prog_type type = attr->prog_type; struct bpf_prog *prog, *dst_prog = NULL; struct btf *attach_btf = NULL; + struct bpf_token *token = NULL; + bool bpf_cap; int err; char license[128]; @@ -2646,13 +2648,35 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) BPF_F_TEST_RND_HI32 | BPF_F_XDP_HAS_FRAGS | BPF_F_XDP_DEV_BOUND_ONLY | - BPF_F_TEST_REG_INVARIANTS)) + BPF_F_TEST_REG_INVARIANTS | + BPF_F_TOKEN_FD)) return -EINVAL; + bpf_prog_load_fixup_attach_type(attr); + + if (attr->prog_flags & BPF_F_TOKEN_FD) { + token = bpf_token_get_from_fd(attr->prog_token_fd); + if (IS_ERR(token)) + return PTR_ERR(token); + /* if current token doesn't grant prog loading permissions, + * then we can't use this token, so ignore it and rely on + * system-wide capabilities checks + */ + if (!bpf_token_allow_cmd(token, BPF_PROG_LOAD) || + !bpf_token_allow_prog_type(token, attr->prog_type, + attr->expected_attach_type)) { + bpf_token_put(token); + token = NULL; + } + } + + bpf_cap = bpf_token_capable(token, CAP_BPF); + err = -EPERM; + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && (attr->prog_flags & BPF_F_ANY_ALIGNMENT) && - !bpf_capable()) - return -EPERM; + !bpf_cap) + goto put_token; /* Intent here is for unprivileged_bpf_disabled to block BPF program * creation for unprivileged users; other actions depend @@ -2661,21 +2685,23 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) * capability checks are still carried out for these * and other operations. */ - if (sysctl_unprivileged_bpf_disabled && !bpf_capable()) - return -EPERM; + if (sysctl_unprivileged_bpf_disabled && !bpf_cap) + goto put_token; if (attr->insn_cnt == 0 || - attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) - return -E2BIG; + attr->insn_cnt > (bpf_cap ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) { + err = -E2BIG; + goto put_token; + } if (type != BPF_PROG_TYPE_SOCKET_FILTER && type != BPF_PROG_TYPE_CGROUP_SKB && - !bpf_capable()) - return -EPERM; + !bpf_cap) + goto put_token; - if (is_net_admin_prog_type(type) && !bpf_net_capable()) - return -EPERM; - if (is_perfmon_prog_type(type) && !perfmon_capable()) - return -EPERM; + if (is_net_admin_prog_type(type) && !bpf_token_capable(token, CAP_NET_ADMIN)) + goto put_token; + if (is_perfmon_prog_type(type) && !bpf_token_capable(token, CAP_PERFMON)) + goto put_token; /* attach_prog_fd/attach_btf_obj_fd can specify fd of either bpf_prog * or btf, we need to check which one it is @@ -2685,27 +2711,33 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) if (IS_ERR(dst_prog)) { dst_prog = NULL; attach_btf = btf_get_by_fd(attr->attach_btf_obj_fd); - if (IS_ERR(attach_btf)) - return -EINVAL; + if (IS_ERR(attach_btf)) { + err = -EINVAL; + goto put_token; + } if (!btf_is_kernel(attach_btf)) { /* attaching through specifying bpf_prog's BTF * objects directly might be supported eventually */ btf_put(attach_btf); - return -ENOTSUPP; + err = -ENOTSUPP; + goto put_token; } } } else if (attr->attach_btf_id) { /* fall back to vmlinux BTF, if BTF type ID is specified */ attach_btf = bpf_get_btf_vmlinux(); - if (IS_ERR(attach_btf)) - return PTR_ERR(attach_btf); - if (!attach_btf) - return -EINVAL; + if (IS_ERR(attach_btf)) { + err = PTR_ERR(attach_btf); + goto put_token; + } + if (!attach_btf) { + err = -EINVAL; + goto put_token; + } btf_get(attach_btf); } - bpf_prog_load_fixup_attach_type(attr); if (bpf_prog_load_check_attach(type, attr->expected_attach_type, attach_btf, attr->attach_btf_id, dst_prog)) { @@ -2713,7 +2745,8 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) bpf_prog_put(dst_prog); if (attach_btf) btf_put(attach_btf); - return -EINVAL; + err = -EINVAL; + goto put_token; } /* plain bpf_prog allocation */ @@ -2723,7 +2756,8 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) bpf_prog_put(dst_prog); if (attach_btf) btf_put(attach_btf); - return -ENOMEM; + err = -EINVAL; + goto put_token; } prog->expected_attach_type = attr->expected_attach_type; @@ -2734,6 +2768,10 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE; prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS; + /* move token into prog->aux, reuse taken refcnt */ + prog->aux->token = token; + token = NULL; + err = security_bpf_prog_alloc(prog->aux); if (err) goto free_prog; @@ -2851,6 +2889,8 @@ free_prog: if (prog->aux->attach_btf) btf_put(prog->aux->attach_btf); bpf_prog_free(prog); +put_token: + bpf_token_put(token); return err; } @@ -3858,7 +3898,7 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, case BPF_PROG_TYPE_SK_LOOKUP: return attach_type == prog->expected_attach_type ? 0 : -EINVAL; case BPF_PROG_TYPE_CGROUP_SKB: - if (!bpf_net_capable()) + if (!bpf_token_capable(prog->aux->token, CAP_NET_ADMIN)) /* cg-skb progs can be loaded by unpriv user. * check permissions at attach time. */ diff --git a/kernel/bpf/token.c b/kernel/bpf/token.c index bc86be4ca567..c13c73788d8c 100644 --- a/kernel/bpf/token.c +++ b/kernel/bpf/token.c @@ -80,6 +80,20 @@ static void bpf_token_show_fdinfo(struct seq_file *m, struct file *filp) seq_printf(m, "allowed_maps:\tany\n"); else seq_printf(m, "allowed_maps:\t0x%llx\n", token->allowed_maps); + + BUILD_BUG_ON(__MAX_BPF_PROG_TYPE >= 64); + mask = (1ULL << __MAX_BPF_PROG_TYPE) - 1; + if ((token->allowed_progs & mask) == mask) + seq_printf(m, "allowed_progs:\tany\n"); + else + seq_printf(m, "allowed_progs:\t0x%llx\n", token->allowed_progs); + + BUILD_BUG_ON(__MAX_BPF_ATTACH_TYPE >= 64); + mask = (1ULL << __MAX_BPF_ATTACH_TYPE) - 1; + if ((token->allowed_attachs & mask) == mask) + seq_printf(m, "allowed_attachs:\tany\n"); + else + seq_printf(m, "allowed_attachs:\t0x%llx\n", token->allowed_attachs); } #define BPF_TOKEN_INODE_NAME "bpf-token" @@ -176,6 +190,8 @@ int bpf_token_create(union bpf_attr *attr) mnt_opts = path.dentry->d_sb->s_fs_info; token->allowed_cmds = mnt_opts->delegate_cmds; token->allowed_maps = mnt_opts->delegate_maps; + token->allowed_progs = mnt_opts->delegate_progs; + token->allowed_attachs = mnt_opts->delegate_attachs; fd = get_unused_fd_flags(O_CLOEXEC); if (fd < 0) { @@ -231,3 +247,14 @@ bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type t return token->allowed_maps & (1ULL << type); } + +bool bpf_token_allow_prog_type(const struct bpf_token *token, + enum bpf_prog_type prog_type, + enum bpf_attach_type attach_type) +{ + if (!token || prog_type >= __MAX_BPF_PROG_TYPE || attach_type >= __MAX_BPF_ATTACH_TYPE) + return false; + + return (token->allowed_progs & (1ULL << prog_type)) && + (token->allowed_attachs & (1ULL << attach_type)); +} diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index cb2c888e3bb4..d96708380e52 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1028,6 +1028,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SK_LOOKUP, BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ BPF_PROG_TYPE_NETFILTER, + __MAX_BPF_PROG_TYPE }; enum bpf_attach_type { @@ -1520,6 +1521,10 @@ union bpf_attr { * truncated), or smaller (if log buffer wasn't filled completely). */ __u32 log_true_size; + /* BPF token FD to use with BPF_PROG_LOAD operation. + * If provided, prog_flags should have BPF_F_TOKEN_FD flag set. + */ + __s32 prog_token_fd; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c index 573249a2814d..4ed46ed58a7b 100644 --- a/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c +++ b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c @@ -30,6 +30,8 @@ void test_libbpf_probe_prog_types(void) if (prog_type == BPF_PROG_TYPE_UNSPEC) continue; + if (strcmp(prog_type_name, "__MAX_BPF_PROG_TYPE") == 0) + continue; if (!test__start_subtest(prog_type_name)) continue; diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c index 1f328c0d8aff..62ea855ec4d0 100644 --- a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c +++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c @@ -189,6 +189,9 @@ static void test_libbpf_bpf_prog_type_str(void) const char *prog_type_str; char buf[256]; + if (prog_type == __MAX_BPF_PROG_TYPE) + continue; + prog_type_name = btf__str_by_offset(btf, e->name_off); prog_type_str = libbpf_bpf_prog_type_str(prog_type); ASSERT_OK_PTR(prog_type_str, prog_type_name); -- cgit v1.2.3 From 639ecd7d6247c48a0175f5b458b648f5d4b6dc34 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 23 Jan 2024 18:21:09 -0800 Subject: libbpf: Add bpf_token_create() API Add low-level wrapper API for BPF_TOKEN_CREATE command in bpf() syscall. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20240124022127.2379740-13-andrii@kernel.org --- tools/lib/bpf/bpf.c | 17 +++++++++++++++++ tools/lib/bpf/bpf.h | 24 ++++++++++++++++++++++++ tools/lib/bpf/libbpf.map | 1 + 3 files changed, 42 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index b133acfe08fb..54978198147c 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -1289,3 +1289,20 @@ int bpf_prog_bind_map(int prog_fd, int map_fd, ret = sys_bpf(BPF_PROG_BIND_MAP, &attr, attr_sz); return libbpf_err_errno(ret); } + +int bpf_token_create(int bpffs_fd, struct bpf_token_create_opts *opts) +{ + const size_t attr_sz = offsetofend(union bpf_attr, token_create); + union bpf_attr attr; + int fd; + + if (!OPTS_VALID(opts, bpf_token_create_opts)) + return libbpf_err(-EINVAL); + + memset(&attr, 0, attr_sz); + attr.token_create.bpffs_fd = bpffs_fd; + attr.token_create.flags = OPTS_GET(opts, flags, 0); + + fd = sys_bpf_fd(BPF_TOKEN_CREATE, &attr, attr_sz); + return libbpf_err_errno(fd); +} diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 53b0bee053ad..0a9f456698fe 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -642,6 +642,30 @@ struct bpf_test_run_opts { LIBBPF_API int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts); +struct bpf_token_create_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + __u32 flags; + size_t :0; +}; +#define bpf_token_create_opts__last_field flags + +/** + * @brief **bpf_token_create()** creates a new instance of BPF token derived + * from specified BPF FS mount point. + * + * BPF token created with this API can be passed to bpf() syscall for + * commands like BPF_PROG_LOAD, BPF_MAP_CREATE, etc. + * + * @param bpffs_fd FD for BPF FS instance from which to derive a BPF token + * instance. + * @param opts optional BPF token creation options, can be NULL + * + * @return BPF token FD > 0, on success; negative error code, otherwise (errno + * is also set to the error code) + */ +LIBBPF_API int bpf_token_create(int bpffs_fd, + struct bpf_token_create_opts *opts); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 91c5aef7dae7..d9e1f57534fa 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -411,4 +411,5 @@ LIBBPF_1.3.0 { } LIBBPF_1.2.0; LIBBPF_1.4.0 { + bpf_token_create; } LIBBPF_1.3.0; -- cgit v1.2.3 From 364f848375af311150210a1ad3c5bcb800b65b48 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 23 Jan 2024 18:21:10 -0800 Subject: libbpf: Add BPF token support to bpf_map_create() API Add ability to provide token_fd for BPF_MAP_CREATE command through bpf_map_create() API. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20240124022127.2379740-14-andrii@kernel.org --- tools/lib/bpf/bpf.c | 5 +++-- tools/lib/bpf/bpf.h | 6 ++++-- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 54978198147c..9b11ca25e977 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -169,8 +169,7 @@ int bpf_map_create(enum bpf_map_type map_type, __u32 max_entries, const struct bpf_map_create_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, - value_type_btf_obj_fd); + const size_t attr_sz = offsetofend(union bpf_attr, map_token_fd); union bpf_attr attr; int fd; @@ -200,6 +199,8 @@ int bpf_map_create(enum bpf_map_type map_type, attr.numa_node = OPTS_GET(opts, numa_node, 0); attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0); + attr.map_token_fd = OPTS_GET(opts, token_fd, 0); + fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz); return libbpf_err_errno(fd); } diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 0a9f456698fe..6af90d50960b 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -52,9 +52,11 @@ struct bpf_map_create_opts { __u32 numa_node; __u32 map_ifindex; __s32 value_type_btf_obj_fd; - size_t:0; + + __u32 token_fd; + size_t :0; }; -#define bpf_map_create_opts__last_field value_type_btf_obj_fd +#define bpf_map_create_opts__last_field token_fd LIBBPF_API int bpf_map_create(enum bpf_map_type map_type, const char *map_name, -- cgit v1.2.3 From a3d63e85253b6c9b6aa34b99208e835358a91320 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 23 Jan 2024 18:21:11 -0800 Subject: libbpf: Add BPF token support to bpf_btf_load() API Allow user to specify token_fd for bpf_btf_load() API that wraps kernel's BPF_BTF_LOAD command. This allows loading BTF from unprivileged process as long as it has BPF token allowing BPF_BTF_LOAD command, which can be created and delegated by privileged process. Wire through new btf_flags as well, so that user can provide BPF_F_TOKEN_FD flag, if necessary. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20240124022127.2379740-15-andrii@kernel.org --- tools/lib/bpf/bpf.c | 6 +++++- tools/lib/bpf/bpf.h | 5 ++++- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 9b11ca25e977..9ad7e39fed20 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -1185,7 +1185,7 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd) int bpf_btf_load(const void *btf_data, size_t btf_size, struct bpf_btf_load_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, btf_log_true_size); + const size_t attr_sz = offsetofend(union bpf_attr, btf_token_fd); union bpf_attr attr; char *log_buf; size_t log_size; @@ -1210,6 +1210,10 @@ int bpf_btf_load(const void *btf_data, size_t btf_size, struct bpf_btf_load_opts attr.btf = ptr_to_u64(btf_data); attr.btf_size = btf_size; + + attr.btf_flags = OPTS_GET(opts, btf_flags, 0); + attr.btf_token_fd = OPTS_GET(opts, token_fd, 0); + /* log_level == 0 and log_buf != NULL means "try loading without * log_buf, but retry with log_buf and log_level=1 on error", which is * consistent across low-level and high-level BTF and program loading diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 6af90d50960b..7fe28330c36e 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -134,9 +134,12 @@ struct bpf_btf_load_opts { * If kernel doesn't support this feature, log_size is left unchanged. */ __u32 log_true_size; + + __u32 btf_flags; + __u32 token_fd; size_t :0; }; -#define bpf_btf_load_opts__last_field log_true_size +#define bpf_btf_load_opts__last_field token_fd LIBBPF_API int bpf_btf_load(const void *btf_data, size_t btf_size, struct bpf_btf_load_opts *opts); -- cgit v1.2.3 From 404cbc149c3866e6ec2bfe1bce52c8864e1f81fc Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 23 Jan 2024 18:21:12 -0800 Subject: libbpf: Add BPF token support to bpf_prog_load() API Wire through token_fd into bpf_prog_load(). Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20240124022127.2379740-16-andrii@kernel.org --- tools/lib/bpf/bpf.c | 3 ++- tools/lib/bpf/bpf.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 9ad7e39fed20..42cde79a67c7 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -235,7 +235,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, size_t insn_cnt, struct bpf_prog_load_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, log_true_size); + const size_t attr_sz = offsetofend(union bpf_attr, prog_token_fd); void *finfo = NULL, *linfo = NULL; const char *func_info, *line_info; __u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd; @@ -264,6 +264,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, attr.prog_flags = OPTS_GET(opts, prog_flags, 0); attr.prog_ifindex = OPTS_GET(opts, prog_ifindex, 0); attr.kern_version = OPTS_GET(opts, kern_version, 0); + attr.prog_token_fd = OPTS_GET(opts, token_fd, 0); if (prog_name && kernel_supports(NULL, FEAT_PROG_NAME)) libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name)); diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 7fe28330c36e..1441f642c563 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -106,9 +106,10 @@ struct bpf_prog_load_opts { * If kernel doesn't support this feature, log_size is left unchanged. */ __u32 log_true_size; + __u32 token_fd; size_t :0; }; -#define bpf_prog_load_opts__last_field log_true_size +#define bpf_prog_load_opts__last_field token_fd LIBBPF_API int bpf_prog_load(enum bpf_prog_type prog_type, const char *prog_name, const char *license, -- cgit v1.2.3 From fcb9597ff7d1f7c772c1237dd2d04dd44e622501 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 23 Jan 2024 18:21:13 -0800 Subject: selftests/bpf: Add BPF token-enabled tests Add a selftest that attempts to conceptually replicate intended BPF token use cases inside user namespaced container. Child process is forked. It is then put into its own userns and mountns. Child creates BPF FS context object. This ensures child userns is captured as the owning userns for this instance of BPF FS. Given setting delegation mount options is privileged operation, we ensure that child cannot set them. This context is passed back to privileged parent process through Unix socket, where parent sets up delegation options, creates, and mounts it as a detached mount. This mount FD is passed back to the child to be used for BPF token creation, which allows otherwise privileged BPF operations to succeed inside userns. We validate that all of token-enabled privileged commands (BPF_BTF_LOAD, BPF_MAP_CREATE, and BPF_PROG_LOAD) work as intended. They should only succeed inside the userns if a) BPF token is provided with proper allowed sets of commands and types; and b) namespaces CAP_BPF and other privileges are set. Lacking a) or b) should lead to -EPERM failures. Based on suggested workflow by Christian Brauner ([0]). [0] https://lore.kernel.org/bpf/20230704-hochverdient-lehne-eeb9eeef785e@brauner/ Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20240124022127.2379740-17-andrii@kernel.org --- tools/testing/selftests/bpf/prog_tests/token.c | 683 +++++++++++++++++++++++++ 1 file changed, 683 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/token.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c new file mode 100644 index 000000000000..5394a0c880a9 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/token.c @@ -0,0 +1,683 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ +#define _GNU_SOURCE +#include +#include +#include "cap_helpers.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static inline int sys_mount(const char *dev_name, const char *dir_name, + const char *type, unsigned long flags, + const void *data) +{ + return syscall(__NR_mount, dev_name, dir_name, type, flags, data); +} + +static inline int sys_fsopen(const char *fsname, unsigned flags) +{ + return syscall(__NR_fsopen, fsname, flags); +} + +static inline int sys_fspick(int dfd, const char *path, unsigned flags) +{ + return syscall(__NR_fspick, dfd, path, flags); +} + +static inline int sys_fsconfig(int fs_fd, unsigned cmd, const char *key, const void *val, int aux) +{ + return syscall(__NR_fsconfig, fs_fd, cmd, key, val, aux); +} + +static inline int sys_fsmount(int fs_fd, unsigned flags, unsigned ms_flags) +{ + return syscall(__NR_fsmount, fs_fd, flags, ms_flags); +} + +static int drop_priv_caps(__u64 *old_caps) +{ + return cap_disable_effective((1ULL << CAP_BPF) | + (1ULL << CAP_PERFMON) | + (1ULL << CAP_NET_ADMIN) | + (1ULL << CAP_SYS_ADMIN), old_caps); +} + +static int restore_priv_caps(__u64 old_caps) +{ + return cap_enable_effective(old_caps, NULL); +} + +static int set_delegate_mask(int fs_fd, const char *key, __u64 mask) +{ + char buf[32]; + int err; + + snprintf(buf, sizeof(buf), "0x%llx", (unsigned long long)mask); + err = sys_fsconfig(fs_fd, FSCONFIG_SET_STRING, key, + mask == ~0ULL ? "any" : buf, 0); + if (err < 0) + err = -errno; + return err; +} + +#define zclose(fd) do { if (fd >= 0) close(fd); fd = -1; } while (0) + +struct bpffs_opts { + __u64 cmds; + __u64 maps; + __u64 progs; + __u64 attachs; +}; + +static int create_bpffs_fd(void) +{ + int fs_fd; + + /* create VFS context */ + fs_fd = sys_fsopen("bpf", 0); + ASSERT_GE(fs_fd, 0, "fs_fd"); + + return fs_fd; +} + +static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts) +{ + int mnt_fd, err; + + /* set up token delegation mount options */ + err = set_delegate_mask(fs_fd, "delegate_cmds", opts->cmds); + if (!ASSERT_OK(err, "fs_cfg_cmds")) + return err; + err = set_delegate_mask(fs_fd, "delegate_maps", opts->maps); + if (!ASSERT_OK(err, "fs_cfg_maps")) + return err; + err = set_delegate_mask(fs_fd, "delegate_progs", opts->progs); + if (!ASSERT_OK(err, "fs_cfg_progs")) + return err; + err = set_delegate_mask(fs_fd, "delegate_attachs", opts->attachs); + if (!ASSERT_OK(err, "fs_cfg_attachs")) + return err; + + /* instantiate FS object */ + err = sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0); + if (err < 0) + return -errno; + + /* create O_PATH fd for detached mount */ + mnt_fd = sys_fsmount(fs_fd, 0, 0); + if (err < 0) + return -errno; + + return mnt_fd; +} + +/* send FD over Unix domain (AF_UNIX) socket */ +static int sendfd(int sockfd, int fd) +{ + struct msghdr msg = {}; + struct cmsghdr *cmsg; + int fds[1] = { fd }, err; + char iobuf[1]; + struct iovec io = { + .iov_base = iobuf, + .iov_len = sizeof(iobuf), + }; + union { + char buf[CMSG_SPACE(sizeof(fds))]; + struct cmsghdr align; + } u; + + msg.msg_iov = &io; + msg.msg_iovlen = 1; + msg.msg_control = u.buf; + msg.msg_controllen = sizeof(u.buf); + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(fds)); + memcpy(CMSG_DATA(cmsg), fds, sizeof(fds)); + + err = sendmsg(sockfd, &msg, 0); + if (err < 0) + err = -errno; + if (!ASSERT_EQ(err, 1, "sendmsg")) + return -EINVAL; + + return 0; +} + +/* receive FD over Unix domain (AF_UNIX) socket */ +static int recvfd(int sockfd, int *fd) +{ + struct msghdr msg = {}; + struct cmsghdr *cmsg; + int fds[1], err; + char iobuf[1]; + struct iovec io = { + .iov_base = iobuf, + .iov_len = sizeof(iobuf), + }; + union { + char buf[CMSG_SPACE(sizeof(fds))]; + struct cmsghdr align; + } u; + + msg.msg_iov = &io; + msg.msg_iovlen = 1; + msg.msg_control = u.buf; + msg.msg_controllen = sizeof(u.buf); + + err = recvmsg(sockfd, &msg, 0); + if (err < 0) + err = -errno; + if (!ASSERT_EQ(err, 1, "recvmsg")) + return -EINVAL; + + cmsg = CMSG_FIRSTHDR(&msg); + if (!ASSERT_OK_PTR(cmsg, "cmsg_null") || + !ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(fds)), "cmsg_len") || + !ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET, "cmsg_level") || + !ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS, "cmsg_type")) + return -EINVAL; + + memcpy(fds, CMSG_DATA(cmsg), sizeof(fds)); + *fd = fds[0]; + + return 0; +} + +static ssize_t write_nointr(int fd, const void *buf, size_t count) +{ + ssize_t ret; + + do { + ret = write(fd, buf, count); + } while (ret < 0 && errno == EINTR); + + return ret; +} + +static int write_file(const char *path, const void *buf, size_t count) +{ + int fd; + ssize_t ret; + + fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW); + if (fd < 0) + return -1; + + ret = write_nointr(fd, buf, count); + close(fd); + if (ret < 0 || (size_t)ret != count) + return -1; + + return 0; +} + +static int create_and_enter_userns(void) +{ + uid_t uid; + gid_t gid; + char map[100]; + + uid = getuid(); + gid = getgid(); + + if (unshare(CLONE_NEWUSER)) + return -1; + + if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) && + errno != ENOENT) + return -1; + + snprintf(map, sizeof(map), "0 %d 1", uid); + if (write_file("/proc/self/uid_map", map, strlen(map))) + return -1; + + + snprintf(map, sizeof(map), "0 %d 1", gid); + if (write_file("/proc/self/gid_map", map, strlen(map))) + return -1; + + if (setgid(0)) + return -1; + + if (setuid(0)) + return -1; + + return 0; +} + +typedef int (*child_callback_fn)(int); + +static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callback) +{ + LIBBPF_OPTS(bpf_map_create_opts, map_opts); + int mnt_fd = -1, fs_fd = -1, err = 0, bpffs_fd = -1; + + /* setup userns with root mappings */ + err = create_and_enter_userns(); + if (!ASSERT_OK(err, "create_and_enter_userns")) + goto cleanup; + + /* setup mountns to allow creating BPF FS (fsopen("bpf")) from unpriv process */ + err = unshare(CLONE_NEWNS); + if (!ASSERT_OK(err, "create_mountns")) + goto cleanup; + + err = sys_mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0); + if (!ASSERT_OK(err, "remount_root")) + goto cleanup; + + fs_fd = create_bpffs_fd(); + if (!ASSERT_GE(fs_fd, 0, "create_bpffs_fd")) { + err = -EINVAL; + goto cleanup; + } + + /* ensure unprivileged child cannot set delegation options */ + err = set_delegate_mask(fs_fd, "delegate_cmds", 0x1); + ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm"); + err = set_delegate_mask(fs_fd, "delegate_maps", 0x1); + ASSERT_EQ(err, -EPERM, "delegate_maps_eperm"); + err = set_delegate_mask(fs_fd, "delegate_progs", 0x1); + ASSERT_EQ(err, -EPERM, "delegate_progs_eperm"); + err = set_delegate_mask(fs_fd, "delegate_attachs", 0x1); + ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm"); + + /* pass BPF FS context object to parent */ + err = sendfd(sock_fd, fs_fd); + if (!ASSERT_OK(err, "send_fs_fd")) + goto cleanup; + zclose(fs_fd); + + /* avoid mucking around with mount namespaces and mounting at + * well-known path, just get detach-mounted BPF FS fd back from parent + */ + err = recvfd(sock_fd, &mnt_fd); + if (!ASSERT_OK(err, "recv_mnt_fd")) + goto cleanup; + + /* try to fspick() BPF FS and try to add some delegation options */ + fs_fd = sys_fspick(mnt_fd, "", FSPICK_EMPTY_PATH); + if (!ASSERT_GE(fs_fd, 0, "bpffs_fspick")) { + err = -EINVAL; + goto cleanup; + } + + /* ensure unprivileged child cannot reconfigure to set delegation options */ + err = set_delegate_mask(fs_fd, "delegate_cmds", ~0ULL); + if (!ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm_reconfig")) { + err = -EINVAL; + goto cleanup; + } + err = set_delegate_mask(fs_fd, "delegate_maps", ~0ULL); + if (!ASSERT_EQ(err, -EPERM, "delegate_maps_eperm_reconfig")) { + err = -EINVAL; + goto cleanup; + } + err = set_delegate_mask(fs_fd, "delegate_progs", ~0ULL); + if (!ASSERT_EQ(err, -EPERM, "delegate_progs_eperm_reconfig")) { + err = -EINVAL; + goto cleanup; + } + err = set_delegate_mask(fs_fd, "delegate_attachs", ~0ULL); + if (!ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm_reconfig")) { + err = -EINVAL; + goto cleanup; + } + zclose(fs_fd); + + bpffs_fd = openat(mnt_fd, ".", 0, O_RDWR); + if (!ASSERT_GE(bpffs_fd, 0, "bpffs_open")) { + err = -EINVAL; + goto cleanup; + } + + /* do custom test logic with customly set up BPF FS instance */ + err = callback(bpffs_fd); + if (!ASSERT_OK(err, "test_callback")) + goto cleanup; + + err = 0; +cleanup: + zclose(sock_fd); + zclose(mnt_fd); + zclose(fs_fd); + zclose(bpffs_fd); + + exit(-err); +} + +static int wait_for_pid(pid_t pid) +{ + int status, ret; + +again: + ret = waitpid(pid, &status, 0); + if (ret == -1) { + if (errno == EINTR) + goto again; + + return -1; + } + + if (!WIFEXITED(status)) + return -1; + + return WEXITSTATUS(status); +} + +static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd) +{ + int fs_fd = -1, mnt_fd = -1, err; + + err = recvfd(sock_fd, &fs_fd); + if (!ASSERT_OK(err, "recv_bpffs_fd")) + goto cleanup; + + mnt_fd = materialize_bpffs_fd(fs_fd, bpffs_opts); + if (!ASSERT_GE(mnt_fd, 0, "materialize_bpffs_fd")) { + err = -EINVAL; + goto cleanup; + } + zclose(fs_fd); + + /* pass BPF FS context object to parent */ + err = sendfd(sock_fd, mnt_fd); + if (!ASSERT_OK(err, "send_mnt_fd")) + goto cleanup; + zclose(mnt_fd); + + err = wait_for_pid(child_pid); + ASSERT_OK(err, "waitpid_child"); + +cleanup: + zclose(sock_fd); + zclose(fs_fd); + zclose(mnt_fd); + + if (child_pid > 0) + (void)kill(child_pid, SIGKILL); +} + +static void subtest_userns(struct bpffs_opts *bpffs_opts, child_callback_fn cb) +{ + int sock_fds[2] = { -1, -1 }; + int child_pid = 0, err; + + err = socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds); + if (!ASSERT_OK(err, "socketpair")) + goto cleanup; + + child_pid = fork(); + if (!ASSERT_GE(child_pid, 0, "fork")) + goto cleanup; + + if (child_pid == 0) { + zclose(sock_fds[0]); + return child(sock_fds[1], bpffs_opts, cb); + + } else { + zclose(sock_fds[1]); + return parent(child_pid, bpffs_opts, sock_fds[0]); + } + +cleanup: + zclose(sock_fds[0]); + zclose(sock_fds[1]); + if (child_pid > 0) + (void)kill(child_pid, SIGKILL); +} + +static int userns_map_create(int mnt_fd) +{ + LIBBPF_OPTS(bpf_map_create_opts, map_opts); + int err, token_fd = -1, map_fd = -1; + __u64 old_caps = 0; + + /* create BPF token from BPF FS mount */ + token_fd = bpf_token_create(mnt_fd, NULL); + if (!ASSERT_GT(token_fd, 0, "token_create")) { + err = -EINVAL; + goto cleanup; + } + + /* while inside non-init userns, we need both a BPF token *and* + * CAP_BPF inside current userns to create privileged map; let's test + * that neither BPF token alone nor namespaced CAP_BPF is sufficient + */ + err = drop_priv_caps(&old_caps); + if (!ASSERT_OK(err, "drop_caps")) + goto cleanup; + + /* no token, no CAP_BPF -> fail */ + map_opts.map_flags = 0; + map_opts.token_fd = 0; + map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_wo_bpf", 0, 8, 1, &map_opts); + if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_wo_cap_bpf_should_fail")) { + err = -EINVAL; + goto cleanup; + } + + /* token without CAP_BPF -> fail */ + map_opts.map_flags = BPF_F_TOKEN_FD; + map_opts.token_fd = token_fd; + map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_wo_bpf", 0, 8, 1, &map_opts); + if (!ASSERT_LT(map_fd, 0, "stack_map_w_token_wo_cap_bpf_should_fail")) { + err = -EINVAL; + goto cleanup; + } + + /* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */ + err = restore_priv_caps(old_caps); + if (!ASSERT_OK(err, "restore_caps")) + goto cleanup; + + /* CAP_BPF without token -> fail */ + map_opts.map_flags = 0; + map_opts.token_fd = 0; + map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_w_bpf", 0, 8, 1, &map_opts); + if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_w_cap_bpf_should_fail")) { + err = -EINVAL; + goto cleanup; + } + + /* finally, namespaced CAP_BPF + token -> success */ + map_opts.map_flags = BPF_F_TOKEN_FD; + map_opts.token_fd = token_fd; + map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_w_bpf", 0, 8, 1, &map_opts); + if (!ASSERT_GT(map_fd, 0, "stack_map_w_token_w_cap_bpf")) { + err = -EINVAL; + goto cleanup; + } + +cleanup: + zclose(token_fd); + zclose(map_fd); + return err; +} + +static int userns_btf_load(int mnt_fd) +{ + LIBBPF_OPTS(bpf_btf_load_opts, btf_opts); + int err, token_fd = -1, btf_fd = -1; + const void *raw_btf_data; + struct btf *btf = NULL; + __u32 raw_btf_size; + __u64 old_caps = 0; + + /* create BPF token from BPF FS mount */ + token_fd = bpf_token_create(mnt_fd, NULL); + if (!ASSERT_GT(token_fd, 0, "token_create")) { + err = -EINVAL; + goto cleanup; + } + + /* while inside non-init userns, we need both a BPF token *and* + * CAP_BPF inside current userns to create privileged map; let's test + * that neither BPF token alone nor namespaced CAP_BPF is sufficient + */ + err = drop_priv_caps(&old_caps); + if (!ASSERT_OK(err, "drop_caps")) + goto cleanup; + + /* setup a trivial BTF data to load to the kernel */ + btf = btf__new_empty(); + if (!ASSERT_OK_PTR(btf, "empty_btf")) + goto cleanup; + + ASSERT_GT(btf__add_int(btf, "int", 4, 0), 0, "int_type"); + + raw_btf_data = btf__raw_data(btf, &raw_btf_size); + if (!ASSERT_OK_PTR(raw_btf_data, "raw_btf_data")) + goto cleanup; + + /* no token + no CAP_BPF -> failure */ + btf_opts.btf_flags = 0; + btf_opts.token_fd = 0; + btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts); + if (!ASSERT_LT(btf_fd, 0, "no_token_no_cap_should_fail")) + goto cleanup; + + /* token + no CAP_BPF -> failure */ + btf_opts.btf_flags = BPF_F_TOKEN_FD; + btf_opts.token_fd = token_fd; + btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts); + if (!ASSERT_LT(btf_fd, 0, "token_no_cap_should_fail")) + goto cleanup; + + /* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */ + err = restore_priv_caps(old_caps); + if (!ASSERT_OK(err, "restore_caps")) + goto cleanup; + + /* token + CAP_BPF -> success */ + btf_opts.btf_flags = BPF_F_TOKEN_FD; + btf_opts.token_fd = token_fd; + btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts); + if (!ASSERT_GT(btf_fd, 0, "token_and_cap_success")) + goto cleanup; + + err = 0; +cleanup: + btf__free(btf); + zclose(btf_fd); + zclose(token_fd); + return err; +} + +static int userns_prog_load(int mnt_fd) +{ + LIBBPF_OPTS(bpf_prog_load_opts, prog_opts); + int err, token_fd = -1, prog_fd = -1; + struct bpf_insn insns[] = { + /* bpf_jiffies64() requires CAP_BPF */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), + /* bpf_get_current_task() requires CAP_PERFMON */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_current_task), + /* r0 = 0; exit; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + size_t insn_cnt = ARRAY_SIZE(insns); + __u64 old_caps = 0; + + /* create BPF token from BPF FS mount */ + token_fd = bpf_token_create(mnt_fd, NULL); + if (!ASSERT_GT(token_fd, 0, "token_create")) { + err = -EINVAL; + goto cleanup; + } + + /* validate we can successfully load BPF program with token; this + * being XDP program (CAP_NET_ADMIN) using bpf_jiffies64() (CAP_BPF) + * and bpf_get_current_task() (CAP_PERFMON) helpers validates we have + * BPF token wired properly in a bunch of places in the kernel + */ + prog_opts.prog_flags = BPF_F_TOKEN_FD; + prog_opts.token_fd = token_fd; + prog_opts.expected_attach_type = BPF_XDP; + prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL", + insns, insn_cnt, &prog_opts); + if (!ASSERT_GT(prog_fd, 0, "prog_fd")) { + err = -EPERM; + goto cleanup; + } + + /* no token + caps -> failure */ + prog_opts.prog_flags = 0; + prog_opts.token_fd = 0; + prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL", + insns, insn_cnt, &prog_opts); + if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) { + err = -EPERM; + goto cleanup; + } + + err = drop_priv_caps(&old_caps); + if (!ASSERT_OK(err, "drop_caps")) + goto cleanup; + + /* no caps + token -> failure */ + prog_opts.prog_flags = BPF_F_TOKEN_FD; + prog_opts.token_fd = token_fd; + prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL", + insns, insn_cnt, &prog_opts); + if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) { + err = -EPERM; + goto cleanup; + } + + /* no caps + no token -> definitely a failure */ + prog_opts.prog_flags = 0; + prog_opts.token_fd = 0; + prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL", + insns, insn_cnt, &prog_opts); + if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) { + err = -EPERM; + goto cleanup; + } + + err = 0; +cleanup: + zclose(prog_fd); + zclose(token_fd); + return err; +} + +void test_token(void) +{ + if (test__start_subtest("map_token")) { + struct bpffs_opts opts = { + .cmds = 1ULL << BPF_MAP_CREATE, + .maps = 1ULL << BPF_MAP_TYPE_STACK, + }; + + subtest_userns(&opts, userns_map_create); + } + if (test__start_subtest("btf_token")) { + struct bpffs_opts opts = { + .cmds = 1ULL << BPF_BTF_LOAD, + }; + + subtest_userns(&opts, userns_btf_load); + } + if (test__start_subtest("prog_token")) { + struct bpffs_opts opts = { + .cmds = 1ULL << BPF_PROG_LOAD, + .progs = 1ULL << BPF_PROG_TYPE_XDP, + .attachs = 1ULL << BPF_XDP, + }; + + subtest_userns(&opts, userns_prog_load); + } +} -- cgit v1.2.3 From 0350f9d99ee538f2ccf179f0216e704a5f39b317 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 23 Jan 2024 18:21:17 -0800 Subject: selftests/bpf: Utilize string values for delegate_xxx mount options Use both hex-based and string-based way to specify delegate mount options for BPF FS. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20240124022127.2379740-21-andrii@kernel.org --- tools/testing/selftests/bpf/prog_tests/token.c | 52 ++++++++++++++++---------- 1 file changed, 32 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c index 5394a0c880a9..185ed2f79315 100644 --- a/tools/testing/selftests/bpf/prog_tests/token.c +++ b/tools/testing/selftests/bpf/prog_tests/token.c @@ -55,14 +55,22 @@ static int restore_priv_caps(__u64 old_caps) return cap_enable_effective(old_caps, NULL); } -static int set_delegate_mask(int fs_fd, const char *key, __u64 mask) +static int set_delegate_mask(int fs_fd, const char *key, __u64 mask, const char *mask_str) { char buf[32]; int err; - snprintf(buf, sizeof(buf), "0x%llx", (unsigned long long)mask); + if (!mask_str) { + if (mask == ~0ULL) { + mask_str = "any"; + } else { + snprintf(buf, sizeof(buf), "0x%llx", (unsigned long long)mask); + mask_str = buf; + } + } + err = sys_fsconfig(fs_fd, FSCONFIG_SET_STRING, key, - mask == ~0ULL ? "any" : buf, 0); + mask_str, 0); if (err < 0) err = -errno; return err; @@ -75,6 +83,10 @@ struct bpffs_opts { __u64 maps; __u64 progs; __u64 attachs; + const char *cmds_str; + const char *maps_str; + const char *progs_str; + const char *attachs_str; }; static int create_bpffs_fd(void) @@ -93,16 +105,16 @@ static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts) int mnt_fd, err; /* set up token delegation mount options */ - err = set_delegate_mask(fs_fd, "delegate_cmds", opts->cmds); + err = set_delegate_mask(fs_fd, "delegate_cmds", opts->cmds, opts->cmds_str); if (!ASSERT_OK(err, "fs_cfg_cmds")) return err; - err = set_delegate_mask(fs_fd, "delegate_maps", opts->maps); + err = set_delegate_mask(fs_fd, "delegate_maps", opts->maps, opts->maps_str); if (!ASSERT_OK(err, "fs_cfg_maps")) return err; - err = set_delegate_mask(fs_fd, "delegate_progs", opts->progs); + err = set_delegate_mask(fs_fd, "delegate_progs", opts->progs, opts->progs_str); if (!ASSERT_OK(err, "fs_cfg_progs")) return err; - err = set_delegate_mask(fs_fd, "delegate_attachs", opts->attachs); + err = set_delegate_mask(fs_fd, "delegate_attachs", opts->attachs, opts->attachs_str); if (!ASSERT_OK(err, "fs_cfg_attachs")) return err; @@ -284,13 +296,13 @@ static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callba } /* ensure unprivileged child cannot set delegation options */ - err = set_delegate_mask(fs_fd, "delegate_cmds", 0x1); + err = set_delegate_mask(fs_fd, "delegate_cmds", 0x1, NULL); ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm"); - err = set_delegate_mask(fs_fd, "delegate_maps", 0x1); + err = set_delegate_mask(fs_fd, "delegate_maps", 0x1, NULL); ASSERT_EQ(err, -EPERM, "delegate_maps_eperm"); - err = set_delegate_mask(fs_fd, "delegate_progs", 0x1); + err = set_delegate_mask(fs_fd, "delegate_progs", 0x1, NULL); ASSERT_EQ(err, -EPERM, "delegate_progs_eperm"); - err = set_delegate_mask(fs_fd, "delegate_attachs", 0x1); + err = set_delegate_mask(fs_fd, "delegate_attachs", 0x1, NULL); ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm"); /* pass BPF FS context object to parent */ @@ -314,22 +326,22 @@ static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callba } /* ensure unprivileged child cannot reconfigure to set delegation options */ - err = set_delegate_mask(fs_fd, "delegate_cmds", ~0ULL); + err = set_delegate_mask(fs_fd, "delegate_cmds", 0, "any"); if (!ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm_reconfig")) { err = -EINVAL; goto cleanup; } - err = set_delegate_mask(fs_fd, "delegate_maps", ~0ULL); + err = set_delegate_mask(fs_fd, "delegate_maps", 0, "any"); if (!ASSERT_EQ(err, -EPERM, "delegate_maps_eperm_reconfig")) { err = -EINVAL; goto cleanup; } - err = set_delegate_mask(fs_fd, "delegate_progs", ~0ULL); + err = set_delegate_mask(fs_fd, "delegate_progs", 0, "any"); if (!ASSERT_EQ(err, -EPERM, "delegate_progs_eperm_reconfig")) { err = -EINVAL; goto cleanup; } - err = set_delegate_mask(fs_fd, "delegate_attachs", ~0ULL); + err = set_delegate_mask(fs_fd, "delegate_attachs", 0, "any"); if (!ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm_reconfig")) { err = -EINVAL; goto cleanup; @@ -658,8 +670,8 @@ void test_token(void) { if (test__start_subtest("map_token")) { struct bpffs_opts opts = { - .cmds = 1ULL << BPF_MAP_CREATE, - .maps = 1ULL << BPF_MAP_TYPE_STACK, + .cmds_str = "map_create", + .maps_str = "stack", }; subtest_userns(&opts, userns_map_create); @@ -673,9 +685,9 @@ void test_token(void) } if (test__start_subtest("prog_token")) { struct bpffs_opts opts = { - .cmds = 1ULL << BPF_PROG_LOAD, - .progs = 1ULL << BPF_PROG_TYPE_XDP, - .attachs = 1ULL << BPF_XDP, + .cmds_str = "PROG_LOAD", + .progs_str = "XDP", + .attachs_str = "xdp", }; subtest_userns(&opts, userns_prog_load); -- cgit v1.2.3 From ea4d587354eb5e32dfa93cebb055b072f518b193 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 23 Jan 2024 18:21:18 -0800 Subject: libbpf: Split feature detectors definitions from cached results Split a list of supported feature detectors with their corresponding callbacks from actual cached supported/missing values. This will allow to have more flexible per-token or per-object feature detectors in subsequent refactorings. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20240124022127.2379740-22-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 0569b4973a4f..227f9f63e259 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4958,12 +4958,17 @@ enum kern_feature_result { FEAT_MISSING = 2, }; +struct kern_feature_cache { + enum kern_feature_result res[__FEAT_CNT]; +}; + typedef int (*feature_probe_fn)(void); +static struct kern_feature_cache feature_cache; + static struct kern_feature_desc { const char *desc; feature_probe_fn probe; - enum kern_feature_result res; } feature_probes[__FEAT_CNT] = { [FEAT_PROG_NAME] = { "BPF program name", probe_kern_prog_name, @@ -5031,6 +5036,7 @@ static struct kern_feature_desc { bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) { struct kern_feature_desc *feat = &feature_probes[feat_id]; + struct kern_feature_cache *cache = &feature_cache; int ret; if (obj && obj->gen_loader) @@ -5039,19 +5045,19 @@ bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) */ return true; - if (READ_ONCE(feat->res) == FEAT_UNKNOWN) { + if (READ_ONCE(cache->res[feat_id]) == FEAT_UNKNOWN) { ret = feat->probe(); if (ret > 0) { - WRITE_ONCE(feat->res, FEAT_SUPPORTED); + WRITE_ONCE(cache->res[feat_id], FEAT_SUPPORTED); } else if (ret == 0) { - WRITE_ONCE(feat->res, FEAT_MISSING); + WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); } else { pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret); - WRITE_ONCE(feat->res, FEAT_MISSING); + WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); } } - return READ_ONCE(feat->res) == FEAT_SUPPORTED; + return READ_ONCE(cache->res[feat_id]) == FEAT_SUPPORTED; } static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) -- cgit v1.2.3 From d6dd1d49367ab03832b3c4b6f8211765d488c82b Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 23 Jan 2024 18:21:19 -0800 Subject: libbpf: Further decouple feature checking logic from bpf_object Add feat_supported() helper that accepts feature cache instead of bpf_object. This allows low-level code in bpf.c to not know or care about higher-level concept of bpf_object, yet it will be able to utilize custom feature checking in cases where BPF token might influence the outcome. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20240124022127.2379740-23-andrii@kernel.org --- tools/lib/bpf/bpf.c | 6 +++--- tools/lib/bpf/libbpf.c | 22 +++++++++++++++------- tools/lib/bpf/libbpf_internal.h | 5 ++++- 3 files changed, 22 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 42cde79a67c7..1ebf224b8c56 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -146,7 +146,7 @@ int bump_rlimit_memlock(void) struct rlimit rlim; /* if kernel supports memcg-based accounting, skip bumping RLIMIT_MEMLOCK */ - if (memlock_bumped || kernel_supports(NULL, FEAT_MEMCG_ACCOUNT)) + if (memlock_bumped || feat_supported(NULL, FEAT_MEMCG_ACCOUNT)) return 0; memlock_bumped = true; @@ -181,7 +181,7 @@ int bpf_map_create(enum bpf_map_type map_type, return libbpf_err(-EINVAL); attr.map_type = map_type; - if (map_name && kernel_supports(NULL, FEAT_PROG_NAME)) + if (map_name && feat_supported(NULL, FEAT_PROG_NAME)) libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name)); attr.key_size = key_size; attr.value_size = value_size; @@ -266,7 +266,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, attr.kern_version = OPTS_GET(opts, kern_version, 0); attr.prog_token_fd = OPTS_GET(opts, token_fd, 0); - if (prog_name && kernel_supports(NULL, FEAT_PROG_NAME)) + if (prog_name && feat_supported(NULL, FEAT_PROG_NAME)) libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name)); attr.license = ptr_to_u64(license); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 227f9f63e259..4d791660bb56 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -5033,17 +5033,14 @@ static struct kern_feature_desc { }, }; -bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) +bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id) { struct kern_feature_desc *feat = &feature_probes[feat_id]; - struct kern_feature_cache *cache = &feature_cache; int ret; - if (obj && obj->gen_loader) - /* To generate loader program assume the latest kernel - * to avoid doing extra prog_load, map_create syscalls. - */ - return true; + /* assume global feature cache, unless custom one is provided */ + if (!cache) + cache = &feature_cache; if (READ_ONCE(cache->res[feat_id]) == FEAT_UNKNOWN) { ret = feat->probe(); @@ -5060,6 +5057,17 @@ bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) return READ_ONCE(cache->res[feat_id]) == FEAT_SUPPORTED; } +bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) +{ + if (obj && obj->gen_loader) + /* To generate loader program assume the latest kernel + * to avoid doing extra prog_load, map_create syscalls. + */ + return true; + + return feat_supported(NULL, feat_id); +} + static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) { struct bpf_map_info map_info; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 58c547d473e0..622892fc841d 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -361,8 +361,11 @@ enum kern_feature_id { __FEAT_CNT, }; -int probe_memcg_account(void); +struct kern_feature_cache; +bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id); bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id); + +int probe_memcg_account(void); int bump_rlimit_memlock(void); int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz); -- cgit v1.2.3 From 05f9cdd55d61cf9c6283fd3dc0edc7cad09bd7fe Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 23 Jan 2024 18:21:20 -0800 Subject: libbpf: Move feature detection code into its own file It's quite a lot of well isolated code, so it seems like a good candidate to move it out of libbpf.c to reduce its size. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20240124022127.2379740-24-andrii@kernel.org --- tools/lib/bpf/Build | 2 +- tools/lib/bpf/elf.c | 2 - tools/lib/bpf/features.c | 463 ++++++++++++++++++++++++++++++++++++++++ tools/lib/bpf/libbpf.c | 463 +--------------------------------------- tools/lib/bpf/libbpf_internal.h | 14 +- tools/lib/bpf/str_error.h | 3 + 6 files changed, 481 insertions(+), 466 deletions(-) create mode 100644 tools/lib/bpf/features.c (limited to 'tools') diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index 2d0c282c8588..b6619199a706 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1,4 +1,4 @@ libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \ netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \ btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \ - usdt.o zip.o elf.o + usdt.o zip.o elf.o features.o diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c index b02faec748a5..c92e02394159 100644 --- a/tools/lib/bpf/elf.c +++ b/tools/lib/bpf/elf.c @@ -11,8 +11,6 @@ #include "libbpf_internal.h" #include "str_error.h" -#define STRERR_BUFSIZE 128 - /* A SHT_GNU_versym section holds 16-bit words. This bit is set if * the symbol is hidden and can only be seen when referenced using an * explicit version number. This is a GNU extension. diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c new file mode 100644 index 000000000000..a4664526ab7f --- /dev/null +++ b/tools/lib/bpf/features.c @@ -0,0 +1,463 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ +#include +#include +#include "bpf.h" +#include "libbpf.h" +#include "libbpf_common.h" +#include "libbpf_internal.h" +#include "str_error.h" + +static inline __u64 ptr_to_u64(const void *ptr) +{ + return (__u64)(unsigned long)ptr; +} + +int probe_fd(int fd) +{ + if (fd >= 0) + close(fd); + return fd >= 0; +} + +static int probe_kern_prog_name(void) +{ + const size_t attr_sz = offsetofend(union bpf_attr, prog_name); + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + union bpf_attr attr; + int ret; + + memset(&attr, 0, attr_sz); + attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + attr.license = ptr_to_u64("GPL"); + attr.insns = ptr_to_u64(insns); + attr.insn_cnt = (__u32)ARRAY_SIZE(insns); + libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name)); + + /* make sure loading with name works */ + ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS); + return probe_fd(ret); +} + +static int probe_kern_global_data(void) +{ + char *cp, errmsg[STRERR_BUFSIZE]; + struct bpf_insn insns[] = { + BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int ret, map, insn_cnt = ARRAY_SIZE(insns); + + map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, NULL); + if (map < 0) { + ret = -errno; + cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); + pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", + __func__, cp, -ret); + return ret; + } + + insns[0].imm = map; + + ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); + close(map); + return probe_fd(ret); +} + +static int probe_kern_btf(void) +{ + static const char strs[] = "\0int"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_btf_func(void) +{ + static const char strs[] = "\0int\0x\0a"; + /* void x(int a) {} */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* FUNC_PROTO */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), + BTF_PARAM_ENC(7, 1), + /* FUNC x */ /* [3] */ + BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_btf_func_global(void) +{ + static const char strs[] = "\0int\0x\0a"; + /* static void x(int a) {} */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* FUNC_PROTO */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), + BTF_PARAM_ENC(7, 1), + /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */ + BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_btf_datasec(void) +{ + static const char strs[] = "\0x\0.data"; + /* static int a; */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* VAR x */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), + BTF_VAR_STATIC, + /* DATASEC val */ /* [3] */ + BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(2, 0, 4), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_btf_float(void) +{ + static const char strs[] = "\0float"; + __u32 types[] = { + /* float */ + BTF_TYPE_FLOAT_ENC(1, 4), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_btf_decl_tag(void) +{ + static const char strs[] = "\0tag"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* VAR x */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), + BTF_VAR_STATIC, + /* attr */ + BTF_TYPE_DECL_TAG_ENC(1, 2, -1), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_btf_type_tag(void) +{ + static const char strs[] = "\0tag"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* attr */ + BTF_TYPE_TYPE_TAG_ENC(1, 1), /* [2] */ + /* ptr */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* [3] */ + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_array_mmap(void) +{ + LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE); + int fd; + + fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts); + return probe_fd(fd); +} + +static int probe_kern_exp_attach_type(void) +{ + LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE); + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int fd, insn_cnt = ARRAY_SIZE(insns); + + /* use any valid combination of program type and (optional) + * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS) + * to see if kernel supports expected_attach_type field for + * BPF_PROG_LOAD command + */ + fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts); + return probe_fd(fd); +} + +static int probe_kern_probe_read_kernel(void) +{ + struct bpf_insn insns[] = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */ + BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */ + BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel), + BPF_EXIT_INSN(), + }; + int fd, insn_cnt = ARRAY_SIZE(insns); + + fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); + return probe_fd(fd); +} + +static int probe_prog_bind_map(void) +{ + char *cp, errmsg[STRERR_BUFSIZE]; + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int ret, map, prog, insn_cnt = ARRAY_SIZE(insns); + + map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, NULL); + if (map < 0) { + ret = -errno; + cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); + pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", + __func__, cp, -ret); + return ret; + } + + prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); + if (prog < 0) { + close(map); + return 0; + } + + ret = bpf_prog_bind_map(prog, map, NULL); + + close(map); + close(prog); + + return ret >= 0; +} + +static int probe_module_btf(void) +{ + static const char strs[] = "\0int"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), + }; + struct bpf_btf_info info; + __u32 len = sizeof(info); + char name[16]; + int fd, err; + + fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs)); + if (fd < 0) + return 0; /* BTF not supported at all */ + + memset(&info, 0, sizeof(info)); + info.name = ptr_to_u64(name); + info.name_len = sizeof(name); + + /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer; + * kernel's module BTF support coincides with support for + * name/name_len fields in struct bpf_btf_info. + */ + err = bpf_btf_get_info_by_fd(fd, &info, &len); + close(fd); + return !err; +} + +static int probe_perf_link(void) +{ + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int prog_fd, link_fd, err; + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", + insns, ARRAY_SIZE(insns), NULL); + if (prog_fd < 0) + return -errno; + + /* use invalid perf_event FD to get EBADF, if link is supported; + * otherwise EINVAL should be returned + */ + link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL); + err = -errno; /* close() can clobber errno */ + + if (link_fd >= 0) + close(link_fd); + close(prog_fd); + + return link_fd < 0 && err == -EBADF; +} + +static int probe_uprobe_multi_link(void) +{ + LIBBPF_OPTS(bpf_prog_load_opts, load_opts, + .expected_attach_type = BPF_TRACE_UPROBE_MULTI, + ); + LIBBPF_OPTS(bpf_link_create_opts, link_opts); + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int prog_fd, link_fd, err; + unsigned long offset = 0; + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", + insns, ARRAY_SIZE(insns), &load_opts); + if (prog_fd < 0) + return -errno; + + /* Creating uprobe in '/' binary should fail with -EBADF. */ + link_opts.uprobe_multi.path = "/"; + link_opts.uprobe_multi.offsets = &offset; + link_opts.uprobe_multi.cnt = 1; + + link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts); + err = -errno; /* close() can clobber errno */ + + if (link_fd >= 0) + close(link_fd); + close(prog_fd); + + return link_fd < 0 && err == -EBADF; +} + +static int probe_kern_bpf_cookie(void) +{ + struct bpf_insn insns[] = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie), + BPF_EXIT_INSN(), + }; + int ret, insn_cnt = ARRAY_SIZE(insns); + + ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL); + return probe_fd(ret); +} + +static int probe_kern_btf_enum64(void) +{ + static const char strs[] = "\0enum64"; + __u32 types[] = { + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +typedef int (*feature_probe_fn)(void); + +static struct kern_feature_cache feature_cache; + +static struct kern_feature_desc { + const char *desc; + feature_probe_fn probe; +} feature_probes[__FEAT_CNT] = { + [FEAT_PROG_NAME] = { + "BPF program name", probe_kern_prog_name, + }, + [FEAT_GLOBAL_DATA] = { + "global variables", probe_kern_global_data, + }, + [FEAT_BTF] = { + "minimal BTF", probe_kern_btf, + }, + [FEAT_BTF_FUNC] = { + "BTF functions", probe_kern_btf_func, + }, + [FEAT_BTF_GLOBAL_FUNC] = { + "BTF global function", probe_kern_btf_func_global, + }, + [FEAT_BTF_DATASEC] = { + "BTF data section and variable", probe_kern_btf_datasec, + }, + [FEAT_ARRAY_MMAP] = { + "ARRAY map mmap()", probe_kern_array_mmap, + }, + [FEAT_EXP_ATTACH_TYPE] = { + "BPF_PROG_LOAD expected_attach_type attribute", + probe_kern_exp_attach_type, + }, + [FEAT_PROBE_READ_KERN] = { + "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel, + }, + [FEAT_PROG_BIND_MAP] = { + "BPF_PROG_BIND_MAP support", probe_prog_bind_map, + }, + [FEAT_MODULE_BTF] = { + "module BTF support", probe_module_btf, + }, + [FEAT_BTF_FLOAT] = { + "BTF_KIND_FLOAT support", probe_kern_btf_float, + }, + [FEAT_PERF_LINK] = { + "BPF perf link support", probe_perf_link, + }, + [FEAT_BTF_DECL_TAG] = { + "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag, + }, + [FEAT_BTF_TYPE_TAG] = { + "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag, + }, + [FEAT_MEMCG_ACCOUNT] = { + "memcg-based memory accounting", probe_memcg_account, + }, + [FEAT_BPF_COOKIE] = { + "BPF cookie support", probe_kern_bpf_cookie, + }, + [FEAT_BTF_ENUM64] = { + "BTF_KIND_ENUM64 support", probe_kern_btf_enum64, + }, + [FEAT_SYSCALL_WRAPPER] = { + "Kernel using syscall wrapper", probe_kern_syscall_wrapper, + }, + [FEAT_UPROBE_MULTI_LINK] = { + "BPF multi-uprobe link support", probe_uprobe_multi_link, + }, +}; + +bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id) +{ + struct kern_feature_desc *feat = &feature_probes[feat_id]; + int ret; + + /* assume global feature cache, unless custom one is provided */ + if (!cache) + cache = &feature_cache; + + if (READ_ONCE(cache->res[feat_id]) == FEAT_UNKNOWN) { + ret = feat->probe(); + if (ret > 0) { + WRITE_ONCE(cache->res[feat_id], FEAT_SUPPORTED); + } else if (ret == 0) { + WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); + } else { + pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret); + WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); + } + } + + return READ_ONCE(cache->res[feat_id]) == FEAT_SUPPORTED; +} diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 4d791660bb56..a2b767bc0c5b 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4596,467 +4596,6 @@ bpf_object__probe_loading(struct bpf_object *obj) return 0; } -static int probe_fd(int fd) -{ - if (fd >= 0) - close(fd); - return fd >= 0; -} - -static int probe_kern_prog_name(void) -{ - const size_t attr_sz = offsetofend(union bpf_attr, prog_name); - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - union bpf_attr attr; - int ret; - - memset(&attr, 0, attr_sz); - attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; - attr.license = ptr_to_u64("GPL"); - attr.insns = ptr_to_u64(insns); - attr.insn_cnt = (__u32)ARRAY_SIZE(insns); - libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name)); - - /* make sure loading with name works */ - ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS); - return probe_fd(ret); -} - -static int probe_kern_global_data(void) -{ - char *cp, errmsg[STRERR_BUFSIZE]; - struct bpf_insn insns[] = { - BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16), - BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int ret, map, insn_cnt = ARRAY_SIZE(insns); - - map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, NULL); - if (map < 0) { - ret = -errno; - cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); - pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", - __func__, cp, -ret); - return ret; - } - - insns[0].imm = map; - - ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); - close(map); - return probe_fd(ret); -} - -static int probe_kern_btf(void) -{ - static const char strs[] = "\0int"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_func(void) -{ - static const char strs[] = "\0int\0x\0a"; - /* void x(int a) {} */ - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* FUNC_PROTO */ /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), - BTF_PARAM_ENC(7, 1), - /* FUNC x */ /* [3] */ - BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_func_global(void) -{ - static const char strs[] = "\0int\0x\0a"; - /* static void x(int a) {} */ - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* FUNC_PROTO */ /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), - BTF_PARAM_ENC(7, 1), - /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */ - BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_datasec(void) -{ - static const char strs[] = "\0x\0.data"; - /* static int a; */ - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* VAR x */ /* [2] */ - BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), - BTF_VAR_STATIC, - /* DATASEC val */ /* [3] */ - BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), - BTF_VAR_SECINFO_ENC(2, 0, 4), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_float(void) -{ - static const char strs[] = "\0float"; - __u32 types[] = { - /* float */ - BTF_TYPE_FLOAT_ENC(1, 4), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_decl_tag(void) -{ - static const char strs[] = "\0tag"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* VAR x */ /* [2] */ - BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), - BTF_VAR_STATIC, - /* attr */ - BTF_TYPE_DECL_TAG_ENC(1, 2, -1), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_type_tag(void) -{ - static const char strs[] = "\0tag"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* attr */ - BTF_TYPE_TYPE_TAG_ENC(1, 1), /* [2] */ - /* ptr */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* [3] */ - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_array_mmap(void) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE); - int fd; - - fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts); - return probe_fd(fd); -} - -static int probe_kern_exp_attach_type(void) -{ - LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE); - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int fd, insn_cnt = ARRAY_SIZE(insns); - - /* use any valid combination of program type and (optional) - * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS) - * to see if kernel supports expected_attach_type field for - * BPF_PROG_LOAD command - */ - fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts); - return probe_fd(fd); -} - -static int probe_kern_probe_read_kernel(void) -{ - struct bpf_insn insns[] = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */ - BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */ - BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */ - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }; - int fd, insn_cnt = ARRAY_SIZE(insns); - - fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); - return probe_fd(fd); -} - -static int probe_prog_bind_map(void) -{ - char *cp, errmsg[STRERR_BUFSIZE]; - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int ret, map, prog, insn_cnt = ARRAY_SIZE(insns); - - map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, NULL); - if (map < 0) { - ret = -errno; - cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); - pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", - __func__, cp, -ret); - return ret; - } - - prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); - if (prog < 0) { - close(map); - return 0; - } - - ret = bpf_prog_bind_map(prog, map, NULL); - - close(map); - close(prog); - - return ret >= 0; -} - -static int probe_module_btf(void) -{ - static const char strs[] = "\0int"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), - }; - struct bpf_btf_info info; - __u32 len = sizeof(info); - char name[16]; - int fd, err; - - fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs)); - if (fd < 0) - return 0; /* BTF not supported at all */ - - memset(&info, 0, sizeof(info)); - info.name = ptr_to_u64(name); - info.name_len = sizeof(name); - - /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer; - * kernel's module BTF support coincides with support for - * name/name_len fields in struct bpf_btf_info. - */ - err = bpf_btf_get_info_by_fd(fd, &info, &len); - close(fd); - return !err; -} - -static int probe_perf_link(void) -{ - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int prog_fd, link_fd, err; - - prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", - insns, ARRAY_SIZE(insns), NULL); - if (prog_fd < 0) - return -errno; - - /* use invalid perf_event FD to get EBADF, if link is supported; - * otherwise EINVAL should be returned - */ - link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL); - err = -errno; /* close() can clobber errno */ - - if (link_fd >= 0) - close(link_fd); - close(prog_fd); - - return link_fd < 0 && err == -EBADF; -} - -static int probe_uprobe_multi_link(void) -{ - LIBBPF_OPTS(bpf_prog_load_opts, load_opts, - .expected_attach_type = BPF_TRACE_UPROBE_MULTI, - ); - LIBBPF_OPTS(bpf_link_create_opts, link_opts); - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int prog_fd, link_fd, err; - unsigned long offset = 0; - - prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", - insns, ARRAY_SIZE(insns), &load_opts); - if (prog_fd < 0) - return -errno; - - /* Creating uprobe in '/' binary should fail with -EBADF. */ - link_opts.uprobe_multi.path = "/"; - link_opts.uprobe_multi.offsets = &offset; - link_opts.uprobe_multi.cnt = 1; - - link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts); - err = -errno; /* close() can clobber errno */ - - if (link_fd >= 0) - close(link_fd); - close(prog_fd); - - return link_fd < 0 && err == -EBADF; -} - -static int probe_kern_bpf_cookie(void) -{ - struct bpf_insn insns[] = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie), - BPF_EXIT_INSN(), - }; - int ret, insn_cnt = ARRAY_SIZE(insns); - - ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL); - return probe_fd(ret); -} - -static int probe_kern_btf_enum64(void) -{ - static const char strs[] = "\0enum64"; - __u32 types[] = { - BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_syscall_wrapper(void); - -enum kern_feature_result { - FEAT_UNKNOWN = 0, - FEAT_SUPPORTED = 1, - FEAT_MISSING = 2, -}; - -struct kern_feature_cache { - enum kern_feature_result res[__FEAT_CNT]; -}; - -typedef int (*feature_probe_fn)(void); - -static struct kern_feature_cache feature_cache; - -static struct kern_feature_desc { - const char *desc; - feature_probe_fn probe; -} feature_probes[__FEAT_CNT] = { - [FEAT_PROG_NAME] = { - "BPF program name", probe_kern_prog_name, - }, - [FEAT_GLOBAL_DATA] = { - "global variables", probe_kern_global_data, - }, - [FEAT_BTF] = { - "minimal BTF", probe_kern_btf, - }, - [FEAT_BTF_FUNC] = { - "BTF functions", probe_kern_btf_func, - }, - [FEAT_BTF_GLOBAL_FUNC] = { - "BTF global function", probe_kern_btf_func_global, - }, - [FEAT_BTF_DATASEC] = { - "BTF data section and variable", probe_kern_btf_datasec, - }, - [FEAT_ARRAY_MMAP] = { - "ARRAY map mmap()", probe_kern_array_mmap, - }, - [FEAT_EXP_ATTACH_TYPE] = { - "BPF_PROG_LOAD expected_attach_type attribute", - probe_kern_exp_attach_type, - }, - [FEAT_PROBE_READ_KERN] = { - "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel, - }, - [FEAT_PROG_BIND_MAP] = { - "BPF_PROG_BIND_MAP support", probe_prog_bind_map, - }, - [FEAT_MODULE_BTF] = { - "module BTF support", probe_module_btf, - }, - [FEAT_BTF_FLOAT] = { - "BTF_KIND_FLOAT support", probe_kern_btf_float, - }, - [FEAT_PERF_LINK] = { - "BPF perf link support", probe_perf_link, - }, - [FEAT_BTF_DECL_TAG] = { - "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag, - }, - [FEAT_BTF_TYPE_TAG] = { - "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag, - }, - [FEAT_MEMCG_ACCOUNT] = { - "memcg-based memory accounting", probe_memcg_account, - }, - [FEAT_BPF_COOKIE] = { - "BPF cookie support", probe_kern_bpf_cookie, - }, - [FEAT_BTF_ENUM64] = { - "BTF_KIND_ENUM64 support", probe_kern_btf_enum64, - }, - [FEAT_SYSCALL_WRAPPER] = { - "Kernel using syscall wrapper", probe_kern_syscall_wrapper, - }, - [FEAT_UPROBE_MULTI_LINK] = { - "BPF multi-uprobe link support", probe_uprobe_multi_link, - }, -}; - -bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id) -{ - struct kern_feature_desc *feat = &feature_probes[feat_id]; - int ret; - - /* assume global feature cache, unless custom one is provided */ - if (!cache) - cache = &feature_cache; - - if (READ_ONCE(cache->res[feat_id]) == FEAT_UNKNOWN) { - ret = feat->probe(); - if (ret > 0) { - WRITE_ONCE(cache->res[feat_id], FEAT_SUPPORTED); - } else if (ret == 0) { - WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); - } else { - pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret); - WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); - } - } - - return READ_ONCE(cache->res[feat_id]) == FEAT_SUPPORTED; -} - bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) { if (obj && obj->gen_loader) @@ -11067,7 +10606,7 @@ static const char *arch_specific_syscall_pfx(void) #endif } -static int probe_kern_syscall_wrapper(void) +int probe_kern_syscall_wrapper(void) { char syscall_name[64]; const char *ksys_pfx; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 622892fc841d..fa25e1232bc8 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -361,10 +361,20 @@ enum kern_feature_id { __FEAT_CNT, }; -struct kern_feature_cache; +enum kern_feature_result { + FEAT_UNKNOWN = 0, + FEAT_SUPPORTED = 1, + FEAT_MISSING = 2, +}; + +struct kern_feature_cache { + enum kern_feature_result res[__FEAT_CNT]; +}; + bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id); bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id); +int probe_kern_syscall_wrapper(void); int probe_memcg_account(void); int bump_rlimit_memlock(void); @@ -626,4 +636,6 @@ int elf_resolve_syms_offsets(const char *binary_path, int cnt, int elf_resolve_pattern_offsets(const char *binary_path, const char *pattern, unsigned long **poffsets, size_t *pcnt); +int probe_fd(int fd); + #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h index a139334d57b6..626d7ffb03d6 100644 --- a/tools/lib/bpf/str_error.h +++ b/tools/lib/bpf/str_error.h @@ -2,5 +2,8 @@ #ifndef __LIBBPF_STR_ERROR_H #define __LIBBPF_STR_ERROR_H +#define STRERR_BUFSIZE 128 + char *libbpf_strerror_r(int err, char *dst, int len); + #endif /* __LIBBPF_STR_ERROR_H */ -- cgit v1.2.3 From f3dcee938f485cf403ba2acf1f1548afe637c904 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 23 Jan 2024 18:21:21 -0800 Subject: libbpf: Wire up token_fd into feature probing logic Adjust feature probing callbacks to take into account optional token_fd. In unprivileged contexts, some feature detectors would fail to detect kernel support just because BPF program, BPF map, or BTF object can't be loaded due to privileged nature of those operations. So when BPF object is loaded with BPF token, this token should be used for feature probing. This patch is setting support for this scenario, but we don't yet pass non-zero token FD. This will be added in the next patch. We also switched BPF cookie detector from using kprobe program to tracepoint one, as tracepoint is somewhat less dangerous BPF program type and has higher likelihood of being allowed through BPF token in the future. This change has no effect on detection behavior. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20240124022127.2379740-25-andrii@kernel.org --- tools/lib/bpf/bpf.c | 5 +- tools/lib/bpf/features.c | 116 +++++++++++++++++++++++++++------------- tools/lib/bpf/libbpf.c | 4 +- tools/lib/bpf/libbpf_internal.h | 8 +-- tools/lib/bpf/libbpf_probes.c | 11 ++-- 5 files changed, 97 insertions(+), 47 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 1ebf224b8c56..97ec005c3c47 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -103,7 +103,7 @@ int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts) * [0] https://lore.kernel.org/bpf/20201201215900.3569844-1-guro@fb.com/ * [1] d05512618056 ("bpf: Add bpf_ktime_get_coarse_ns helper") */ -int probe_memcg_account(void) +int probe_memcg_account(int token_fd) { const size_t attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd); struct bpf_insn insns[] = { @@ -120,6 +120,9 @@ int probe_memcg_account(void) attr.insns = ptr_to_u64(insns); attr.insn_cnt = insn_cnt; attr.license = ptr_to_u64("GPL"); + attr.prog_token_fd = token_fd; + if (token_fd) + attr.prog_flags |= BPF_F_TOKEN_FD; prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, attr_sz); if (prog_fd >= 0) { diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c index a4664526ab7f..5a5c766bf615 100644 --- a/tools/lib/bpf/features.c +++ b/tools/lib/bpf/features.c @@ -20,7 +20,7 @@ int probe_fd(int fd) return fd >= 0; } -static int probe_kern_prog_name(void) +static int probe_kern_prog_name(int token_fd) { const size_t attr_sz = offsetofend(union bpf_attr, prog_name); struct bpf_insn insns[] = { @@ -35,6 +35,9 @@ static int probe_kern_prog_name(void) attr.license = ptr_to_u64("GPL"); attr.insns = ptr_to_u64(insns); attr.insn_cnt = (__u32)ARRAY_SIZE(insns); + attr.prog_token_fd = token_fd; + if (token_fd) + attr.prog_flags |= BPF_F_TOKEN_FD; libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name)); /* make sure loading with name works */ @@ -42,7 +45,7 @@ static int probe_kern_prog_name(void) return probe_fd(ret); } -static int probe_kern_global_data(void) +static int probe_kern_global_data(int token_fd) { char *cp, errmsg[STRERR_BUFSIZE]; struct bpf_insn insns[] = { @@ -51,9 +54,17 @@ static int probe_kern_global_data(void) BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }; + LIBBPF_OPTS(bpf_map_create_opts, map_opts, + .token_fd = token_fd, + .map_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); + LIBBPF_OPTS(bpf_prog_load_opts, prog_opts, + .token_fd = token_fd, + .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); int ret, map, insn_cnt = ARRAY_SIZE(insns); - map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, NULL); + map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, &map_opts); if (map < 0) { ret = -errno; cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); @@ -64,12 +75,12 @@ static int probe_kern_global_data(void) insns[0].imm = map; - ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); + ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts); close(map); return probe_fd(ret); } -static int probe_kern_btf(void) +static int probe_kern_btf(int token_fd) { static const char strs[] = "\0int"; __u32 types[] = { @@ -78,10 +89,10 @@ static int probe_kern_btf(void) }; return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); + strs, sizeof(strs), token_fd)); } -static int probe_kern_btf_func(void) +static int probe_kern_btf_func(int token_fd) { static const char strs[] = "\0int\0x\0a"; /* void x(int a) {} */ @@ -96,10 +107,10 @@ static int probe_kern_btf_func(void) }; return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); + strs, sizeof(strs), token_fd)); } -static int probe_kern_btf_func_global(void) +static int probe_kern_btf_func_global(int token_fd) { static const char strs[] = "\0int\0x\0a"; /* static void x(int a) {} */ @@ -114,10 +125,10 @@ static int probe_kern_btf_func_global(void) }; return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); + strs, sizeof(strs), token_fd)); } -static int probe_kern_btf_datasec(void) +static int probe_kern_btf_datasec(int token_fd) { static const char strs[] = "\0x\0.data"; /* static int a; */ @@ -133,10 +144,10 @@ static int probe_kern_btf_datasec(void) }; return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); + strs, sizeof(strs), token_fd)); } -static int probe_kern_btf_float(void) +static int probe_kern_btf_float(int token_fd) { static const char strs[] = "\0float"; __u32 types[] = { @@ -145,10 +156,10 @@ static int probe_kern_btf_float(void) }; return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); + strs, sizeof(strs), token_fd)); } -static int probe_kern_btf_decl_tag(void) +static int probe_kern_btf_decl_tag(int token_fd) { static const char strs[] = "\0tag"; __u32 types[] = { @@ -162,10 +173,10 @@ static int probe_kern_btf_decl_tag(void) }; return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); + strs, sizeof(strs), token_fd)); } -static int probe_kern_btf_type_tag(void) +static int probe_kern_btf_type_tag(int token_fd) { static const char strs[] = "\0tag"; __u32 types[] = { @@ -178,21 +189,28 @@ static int probe_kern_btf_type_tag(void) }; return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); + strs, sizeof(strs), token_fd)); } -static int probe_kern_array_mmap(void) +static int probe_kern_array_mmap(int token_fd) { - LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE); + LIBBPF_OPTS(bpf_map_create_opts, opts, + .map_flags = BPF_F_MMAPABLE | (token_fd ? BPF_F_TOKEN_FD : 0), + .token_fd = token_fd, + ); int fd; fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts); return probe_fd(fd); } -static int probe_kern_exp_attach_type(void) +static int probe_kern_exp_attach_type(int token_fd) { - LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE); + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .token_fd = token_fd, + .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), @@ -208,8 +226,12 @@ static int probe_kern_exp_attach_type(void) return probe_fd(fd); } -static int probe_kern_probe_read_kernel(void) +static int probe_kern_probe_read_kernel(int token_fd) { + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .token_fd = token_fd, + .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); struct bpf_insn insns[] = { BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */ @@ -220,20 +242,28 @@ static int probe_kern_probe_read_kernel(void) }; int fd, insn_cnt = ARRAY_SIZE(insns); - fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); + fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); return probe_fd(fd); } -static int probe_prog_bind_map(void) +static int probe_prog_bind_map(int token_fd) { char *cp, errmsg[STRERR_BUFSIZE]; struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }; + LIBBPF_OPTS(bpf_map_create_opts, map_opts, + .token_fd = token_fd, + .map_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); + LIBBPF_OPTS(bpf_prog_load_opts, prog_opts, + .token_fd = token_fd, + .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); int ret, map, prog, insn_cnt = ARRAY_SIZE(insns); - map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, NULL); + map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, &map_opts); if (map < 0) { ret = -errno; cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); @@ -242,7 +272,7 @@ static int probe_prog_bind_map(void) return ret; } - prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); + prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts); if (prog < 0) { close(map); return 0; @@ -256,7 +286,7 @@ static int probe_prog_bind_map(void) return ret >= 0; } -static int probe_module_btf(void) +static int probe_module_btf(int token_fd) { static const char strs[] = "\0int"; __u32 types[] = { @@ -268,7 +298,7 @@ static int probe_module_btf(void) char name[16]; int fd, err; - fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs)); + fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs), token_fd); if (fd < 0) return 0; /* BTF not supported at all */ @@ -285,16 +315,20 @@ static int probe_module_btf(void) return !err; } -static int probe_perf_link(void) +static int probe_perf_link(int token_fd) { struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }; + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .token_fd = token_fd, + .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); int prog_fd, link_fd, err; prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", - insns, ARRAY_SIZE(insns), NULL); + insns, ARRAY_SIZE(insns), &opts); if (prog_fd < 0) return -errno; @@ -311,10 +345,12 @@ static int probe_perf_link(void) return link_fd < 0 && err == -EBADF; } -static int probe_uprobe_multi_link(void) +static int probe_uprobe_multi_link(int token_fd) { LIBBPF_OPTS(bpf_prog_load_opts, load_opts, .expected_attach_type = BPF_TRACE_UPROBE_MULTI, + .token_fd = token_fd, + .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0, ); LIBBPF_OPTS(bpf_link_create_opts, link_opts); struct bpf_insn insns[] = { @@ -344,19 +380,23 @@ static int probe_uprobe_multi_link(void) return link_fd < 0 && err == -EBADF; } -static int probe_kern_bpf_cookie(void) +static int probe_kern_bpf_cookie(int token_fd) { struct bpf_insn insns[] = { BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie), BPF_EXIT_INSN(), }; + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .token_fd = token_fd, + .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); int ret, insn_cnt = ARRAY_SIZE(insns); - ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL); + ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); return probe_fd(ret); } -static int probe_kern_btf_enum64(void) +static int probe_kern_btf_enum64(int token_fd) { static const char strs[] = "\0enum64"; __u32 types[] = { @@ -364,10 +404,10 @@ static int probe_kern_btf_enum64(void) }; return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); + strs, sizeof(strs), token_fd)); } -typedef int (*feature_probe_fn)(void); +typedef int (*feature_probe_fn)(int /* token_fd */); static struct kern_feature_cache feature_cache; @@ -448,7 +488,7 @@ bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_ cache = &feature_cache; if (READ_ONCE(cache->res[feat_id]) == FEAT_UNKNOWN) { - ret = feat->probe(); + ret = feat->probe(cache->token_fd); if (ret > 0) { WRITE_ONCE(cache->res[feat_id], FEAT_SUPPORTED); } else if (ret == 0) { diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index a2b767bc0c5b..a1d100df0c71 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -6440,7 +6440,7 @@ static int probe_kern_arg_ctx_tag(void) if (cached_result >= 0) return cached_result; - btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs)); + btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs), 0); if (btf_fd < 0) return 0; @@ -10606,7 +10606,7 @@ static const char *arch_specific_syscall_pfx(void) #endif } -int probe_kern_syscall_wrapper(void) +int probe_kern_syscall_wrapper(int token_fd) { char syscall_name[64]; const char *ksys_pfx; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index fa25e1232bc8..28fabed1cd8f 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -369,19 +369,21 @@ enum kern_feature_result { struct kern_feature_cache { enum kern_feature_result res[__FEAT_CNT]; + int token_fd; }; bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id); bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id); -int probe_kern_syscall_wrapper(void); -int probe_memcg_account(void); +int probe_kern_syscall_wrapper(int token_fd); +int probe_memcg_account(int token_fd); int bump_rlimit_memlock(void); int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz); int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz); int libbpf__load_raw_btf(const char *raw_types, size_t types_len, - const char *str_sec, size_t str_len); + const char *str_sec, size_t str_len, + int token_fd); int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level); struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf); diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 98373d126d9d..ee9b1dbea9eb 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -219,7 +219,8 @@ int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts) } int libbpf__load_raw_btf(const char *raw_types, size_t types_len, - const char *str_sec, size_t str_len) + const char *str_sec, size_t str_len, + int token_fd) { struct btf_header hdr = { .magic = BTF_MAGIC, @@ -229,6 +230,10 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len, .str_off = types_len, .str_len = str_len, }; + LIBBPF_OPTS(bpf_btf_load_opts, opts, + .token_fd = token_fd, + .btf_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); int btf_fd, btf_len; __u8 *raw_btf; @@ -241,7 +246,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len, memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len); memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len); - btf_fd = bpf_btf_load(raw_btf, btf_len, NULL); + btf_fd = bpf_btf_load(raw_btf, btf_len, &opts); free(raw_btf); return btf_fd; @@ -271,7 +276,7 @@ static int load_local_storage_btf(void) }; return libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs)); + strs, sizeof(strs), 0); } static int probe_map_create(enum bpf_map_type map_type) -- cgit v1.2.3 From 6b434b61b4d9e0e59f2947ce0f58f6fb4de048d8 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 23 Jan 2024 18:21:22 -0800 Subject: libbpf: Wire up BPF token support at BPF object level Add BPF token support to BPF object-level functionality. BPF token is supported by BPF object logic either as an explicitly provided BPF token from outside (through BPF FS path), or implicitly (unless prevented through bpf_object_open_opts). Implicit mode is assumed to be the most common one for user namespaced unprivileged workloads. The assumption is that privileged container manager sets up default BPF FS mount point at /sys/fs/bpf with BPF token delegation options (delegate_{cmds,maps,progs,attachs} mount options). BPF object during loading will attempt to create BPF token from /sys/fs/bpf location, and pass it for all relevant operations (currently, map creation, BTF load, and program load). In this implicit mode, if BPF token creation fails due to whatever reason (BPF FS is not mounted, or kernel doesn't support BPF token, etc), this is not considered an error. BPF object loading sequence will proceed with no BPF token. In explicit BPF token mode, user provides explicitly custom BPF FS mount point path. In such case, BPF object will attempt to create BPF token from provided BPF FS location. If BPF token creation fails, that is considered a critical error and BPF object load fails with an error. Libbpf provides a way to disable implicit BPF token creation, if it causes any troubles (BPF token is designed to be completely optional and shouldn't cause any problems even if provided, but in the world of BPF LSM, custom security logic can be installed that might change outcome depending on the presence of BPF token). To disable libbpf's default BPF token creation behavior user should provide either invalid BPF token FD (negative), or empty bpf_token_path option. BPF token presence can influence libbpf's feature probing, so if BPF object has associated BPF token, feature probing is instructed to use BPF object-specific feature detection cache and token FD. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20240124022127.2379740-26-andrii@kernel.org --- tools/lib/bpf/btf.c | 10 +++- tools/lib/bpf/libbpf.c | 102 +++++++++++++++++++++++++++++++++++++--- tools/lib/bpf/libbpf.h | 13 ++++- tools/lib/bpf/libbpf_internal.h | 17 ++++++- 4 files changed, 131 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index ee95fd379d4d..ec92b87cae01 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1317,7 +1317,9 @@ struct btf *btf__parse_split(const char *path, struct btf *base_btf) static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian); -int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level) +int btf_load_into_kernel(struct btf *btf, + char *log_buf, size_t log_sz, __u32 log_level, + int token_fd) { LIBBPF_OPTS(bpf_btf_load_opts, opts); __u32 buf_sz = 0, raw_size; @@ -1367,6 +1369,10 @@ retry_load: opts.log_level = log_level; } + opts.token_fd = token_fd; + if (token_fd) + opts.btf_flags |= BPF_F_TOKEN_FD; + btf->fd = bpf_btf_load(raw_data, raw_size, &opts); if (btf->fd < 0) { /* time to turn on verbose mode and try again */ @@ -1394,7 +1400,7 @@ done: int btf__load_into_kernel(struct btf *btf) { - return btf_load_into_kernel(btf, NULL, 0, 0); + return btf_load_into_kernel(btf, NULL, 0, 0, 0); } int btf__fd(const struct btf *btf) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index a1d100df0c71..cefa607be335 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -59,6 +59,8 @@ #define BPF_FS_MAGIC 0xcafe4a11 #endif +#define BPF_FS_DEFAULT_PATH "/sys/fs/bpf" + #define BPF_INSN_SZ (sizeof(struct bpf_insn)) /* vsprintf() in __base_pr() uses nonliteral format string. It may break @@ -695,6 +697,10 @@ struct bpf_object { struct usdt_manager *usdt_man; + struct kern_feature_cache *feat_cache; + char *token_path; + int token_fd; + char path[]; }; @@ -2231,7 +2237,7 @@ static int build_map_pin_path(struct bpf_map *map, const char *path) int err; if (!path) - path = "/sys/fs/bpf"; + path = BPF_FS_DEFAULT_PATH; err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); if (err) @@ -3240,7 +3246,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) } else { /* currently BPF_BTF_LOAD only supports log_level 1 */ err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size, - obj->log_level ? 1 : 0); + obj->log_level ? 1 : 0, obj->token_fd); } if (sanitize) { if (!err) { @@ -4561,6 +4567,58 @@ int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries) return 0; } +static int bpf_object_prepare_token(struct bpf_object *obj) +{ + const char *bpffs_path; + int bpffs_fd = -1, token_fd, err; + bool mandatory; + enum libbpf_print_level level; + + /* token is explicitly prevented */ + if (obj->token_path && obj->token_path[0] == '\0') { + pr_debug("object '%s': token is prevented, skipping...\n", obj->name); + return 0; + } + + mandatory = obj->token_path != NULL; + level = mandatory ? LIBBPF_WARN : LIBBPF_DEBUG; + + bpffs_path = obj->token_path ?: BPF_FS_DEFAULT_PATH; + bpffs_fd = open(bpffs_path, O_DIRECTORY, O_RDWR); + if (bpffs_fd < 0) { + err = -errno; + __pr(level, "object '%s': failed (%d) to open BPF FS mount at '%s'%s\n", + obj->name, err, bpffs_path, + mandatory ? "" : ", skipping optional step..."); + return mandatory ? err : 0; + } + + token_fd = bpf_token_create(bpffs_fd, 0); + close(bpffs_fd); + if (token_fd < 0) { + if (!mandatory && token_fd == -ENOENT) { + pr_debug("object '%s': BPF FS at '%s' doesn't have BPF token delegation set up, skipping...\n", + obj->name, bpffs_path); + return 0; + } + __pr(level, "object '%s': failed (%d) to create BPF token from '%s'%s\n", + obj->name, token_fd, bpffs_path, + mandatory ? "" : ", skipping optional step..."); + return mandatory ? token_fd : 0; + } + + obj->feat_cache = calloc(1, sizeof(*obj->feat_cache)); + if (!obj->feat_cache) { + close(token_fd); + return -ENOMEM; + } + + obj->token_fd = token_fd; + obj->feat_cache->token_fd = token_fd; + + return 0; +} + static int bpf_object__probe_loading(struct bpf_object *obj) { @@ -4570,6 +4628,10 @@ bpf_object__probe_loading(struct bpf_object *obj) BPF_EXIT_INSN(), }; int ret, insn_cnt = ARRAY_SIZE(insns); + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .token_fd = obj->token_fd, + .prog_flags = obj->token_fd ? BPF_F_TOKEN_FD : 0, + ); if (obj->gen_loader) return 0; @@ -4579,9 +4641,9 @@ bpf_object__probe_loading(struct bpf_object *obj) pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret); /* make sure basic loading works */ - ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); + ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &opts); if (ret < 0) - ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); + ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); if (ret < 0) { ret = errno; cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); @@ -4604,6 +4666,9 @@ bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) */ return true; + if (obj->token_fd) + return feat_supported(obj->feat_cache, feat_id); + return feat_supported(NULL, feat_id); } @@ -4728,6 +4793,9 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.map_flags = def->map_flags; create_attr.numa_node = map->numa_node; create_attr.map_extra = map->map_extra; + create_attr.token_fd = obj->token_fd; + if (obj->token_fd) + create_attr.map_flags |= BPF_F_TOKEN_FD; if (bpf_map__is_struct_ops(map)) { create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; @@ -7049,6 +7117,10 @@ static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog load_attr.prog_flags = prog->prog_flags; load_attr.fd_array = obj->fd_array; + load_attr.token_fd = obj->token_fd; + if (obj->token_fd) + load_attr.prog_flags |= BPF_F_TOKEN_FD; + /* adjust load_attr if sec_def provides custom preload callback */ if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) { err = prog->sec_def->prog_prepare_load_fn(prog, &load_attr, prog->sec_def->cookie); @@ -7494,7 +7566,7 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz, const struct bpf_object_open_opts *opts) { - const char *obj_name, *kconfig, *btf_tmp_path; + const char *obj_name, *kconfig, *btf_tmp_path, *token_path; struct bpf_object *obj; char tmp_name[64]; int err; @@ -7531,6 +7603,10 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, if (log_size && !log_buf) return ERR_PTR(-EINVAL); + token_path = OPTS_GET(opts, bpf_token_path, NULL); + if (token_path && strlen(token_path) >= PATH_MAX) + return ERR_PTR(-ENAMETOOLONG); + obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name); if (IS_ERR(obj)) return obj; @@ -7539,6 +7615,14 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, obj->log_size = log_size; obj->log_level = log_level; + if (token_path) { + obj->token_path = strdup(token_path); + if (!obj->token_path) { + err = -ENOMEM; + goto out; + } + } + btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL); if (btf_tmp_path) { if (strlen(btf_tmp_path) >= PATH_MAX) { @@ -8049,7 +8133,8 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch if (obj->gen_loader) bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps); - err = bpf_object__probe_loading(obj); + err = bpf_object_prepare_token(obj); + err = err ? : bpf_object__probe_loading(obj); err = err ? : bpf_object__load_vmlinux_btf(obj, false); err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); err = err ? : bpf_object__sanitize_maps(obj); @@ -8584,6 +8669,11 @@ void bpf_object__close(struct bpf_object *obj) } zfree(&obj->programs); + zfree(&obj->feat_cache); + zfree(&obj->token_path); + if (obj->token_fd > 0) + close(obj->token_fd); + free(obj); } diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 6cd9c501624f..535ae15ed493 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -177,10 +177,21 @@ struct bpf_object_open_opts { * logs through its print callback. */ __u32 kernel_log_level; + /* Path to BPF FS mount point to derive BPF token from. + * + * Created BPF token will be used for all bpf() syscall operations + * that accept BPF token (e.g., map creation, BTF and program loads, + * etc) automatically within instantiated BPF object. + * + * Setting bpf_token_path option to empty string disables libbpf's + * automatic attempt to create BPF token from default BPF FS mount + * point (/sys/fs/bpf), in case this default behavior is undesirable. + */ + const char *bpf_token_path; size_t :0; }; -#define bpf_object_open_opts__last_field kernel_log_level +#define bpf_object_open_opts__last_field bpf_token_path /** * @brief **bpf_object__open()** creates a bpf_object by opening diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 28fabed1cd8f..930cc9616527 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -384,7 +384,9 @@ int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz); int libbpf__load_raw_btf(const char *raw_types, size_t types_len, const char *str_sec, size_t str_len, int token_fd); -int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level); +int btf_load_into_kernel(struct btf *btf, + char *log_buf, size_t log_sz, __u32 log_level, + int token_fd); struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf); void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, @@ -548,6 +550,17 @@ static inline bool is_ldimm64_insn(struct bpf_insn *insn) return insn->code == (BPF_LD | BPF_IMM | BPF_DW); } +/* Unconditionally dup FD, ensuring it doesn't use [0, 2] range. + * Original FD is not closed or altered in any other way. + * Preserves original FD value, if it's invalid (negative). + */ +static inline int dup_good_fd(int fd) +{ + if (fd < 0) + return fd; + return fcntl(fd, F_DUPFD_CLOEXEC, 3); +} + /* if fd is stdin, stdout, or stderr, dup to a fd greater than 2 * Takes ownership of the fd passed in, and closes it if calling * fcntl(fd, F_DUPFD_CLOEXEC, 3). @@ -559,7 +572,7 @@ static inline int ensure_good_fd(int fd) if (fd < 0) return fd; if (fd < 3) { - fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); + fd = dup_good_fd(fd); saved_errno = errno; close(old_fd); errno = saved_errno; -- cgit v1.2.3 From d5baf0cac627fb3a00d9235955a388e5930b6d0e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 23 Jan 2024 18:21:23 -0800 Subject: selftests/bpf: Add BPF object loading tests with explicit token passing Add a few tests that attempt to load BPF object containing privileged map, program, and the one requiring mandatory BTF uploading into the kernel (to validate token FD propagation to BPF_BTF_LOAD command). Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20240124022127.2379740-27-andrii@kernel.org --- tools/testing/selftests/bpf/prog_tests/token.c | 140 +++++++++++++++++++++++++ tools/testing/selftests/bpf/progs/priv_map.c | 13 +++ tools/testing/selftests/bpf/progs/priv_prog.c | 13 +++ 3 files changed, 166 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/priv_map.c create mode 100644 tools/testing/selftests/bpf/progs/priv_prog.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c index 185ed2f79315..1594d9b94b13 100644 --- a/tools/testing/selftests/bpf/prog_tests/token.c +++ b/tools/testing/selftests/bpf/prog_tests/token.c @@ -14,6 +14,9 @@ #include #include #include +#include "priv_map.skel.h" +#include "priv_prog.skel.h" +#include "dummy_st_ops_success.skel.h" static inline int sys_mount(const char *dev_name, const char *dir_name, const char *type, unsigned long flags, @@ -666,6 +669,104 @@ cleanup: return err; } +static int userns_obj_priv_map(int mnt_fd) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + char buf[256]; + struct priv_map *skel; + int err; + + skel = priv_map__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { + priv_map__destroy(skel); + return -EINVAL; + } + + /* use bpf_token_path to provide BPF FS path */ + snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd); + opts.bpf_token_path = buf; + skel = priv_map__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_token_path_open")) + return -EINVAL; + + err = priv_map__load(skel); + priv_map__destroy(skel); + if (!ASSERT_OK(err, "obj_token_path_load")) + return -EINVAL; + + return 0; +} + +static int userns_obj_priv_prog(int mnt_fd) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + char buf[256]; + struct priv_prog *skel; + int err; + + skel = priv_prog__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { + priv_prog__destroy(skel); + return -EINVAL; + } + + /* use bpf_token_path to provide BPF FS path */ + snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd); + opts.bpf_token_path = buf; + skel = priv_prog__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_token_path_open")) + return -EINVAL; + + err = priv_prog__load(skel); + priv_prog__destroy(skel); + if (!ASSERT_OK(err, "obj_token_path_load")) + return -EINVAL; + + return 0; +} + +/* this test is called with BPF FS that doesn't delegate BPF_BTF_LOAD command, + * which should cause struct_ops application to fail, as BTF won't be uploaded + * into the kernel, even if STRUCT_OPS programs themselves are allowed + */ +static int validate_struct_ops_load(int mnt_fd, bool expect_success) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + char buf[256]; + struct dummy_st_ops_success *skel; + int err; + + snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd); + opts.bpf_token_path = buf; + skel = dummy_st_ops_success__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_token_path_open")) + return -EINVAL; + + err = dummy_st_ops_success__load(skel); + dummy_st_ops_success__destroy(skel); + if (expect_success) { + if (!ASSERT_OK(err, "obj_token_path_load")) + return -EINVAL; + } else /* expect failure */ { + if (!ASSERT_ERR(err, "obj_token_path_load")) + return -EINVAL; + } + + return 0; +} + +static int userns_obj_priv_btf_fail(int mnt_fd) +{ + return validate_struct_ops_load(mnt_fd, false /* should fail */); +} + +static int userns_obj_priv_btf_success(int mnt_fd) +{ + return validate_struct_ops_load(mnt_fd, true /* should succeed */); +} + +#define bit(n) (1ULL << (n)) + void test_token(void) { if (test__start_subtest("map_token")) { @@ -692,4 +793,43 @@ void test_token(void) subtest_userns(&opts, userns_prog_load); } + if (test__start_subtest("obj_priv_map")) { + struct bpffs_opts opts = { + .cmds = bit(BPF_MAP_CREATE), + .maps = bit(BPF_MAP_TYPE_QUEUE), + }; + + subtest_userns(&opts, userns_obj_priv_map); + } + if (test__start_subtest("obj_priv_prog")) { + struct bpffs_opts opts = { + .cmds = bit(BPF_PROG_LOAD), + .progs = bit(BPF_PROG_TYPE_KPROBE), + .attachs = ~0ULL, + }; + + subtest_userns(&opts, userns_obj_priv_prog); + } + if (test__start_subtest("obj_priv_btf_fail")) { + struct bpffs_opts opts = { + /* disallow BTF loading */ + .cmds = bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), + .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), + .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), + .attachs = ~0ULL, + }; + + subtest_userns(&opts, userns_obj_priv_btf_fail); + } + if (test__start_subtest("obj_priv_btf_success")) { + struct bpffs_opts opts = { + /* allow BTF loading */ + .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), + .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), + .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), + .attachs = ~0ULL, + }; + + subtest_userns(&opts, userns_obj_priv_btf_success); + } } diff --git a/tools/testing/selftests/bpf/progs/priv_map.c b/tools/testing/selftests/bpf/progs/priv_map.c new file mode 100644 index 000000000000..9085be50f03b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/priv_map.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_QUEUE); + __uint(max_entries, 1); + __type(value, __u32); +} priv_map SEC(".maps"); diff --git a/tools/testing/selftests/bpf/progs/priv_prog.c b/tools/testing/selftests/bpf/progs/priv_prog.c new file mode 100644 index 000000000000..3c7b2b618c8a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/priv_prog.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include + +char _license[] SEC("license") = "GPL"; + +SEC("kprobe") +int kprobe_prog(void *ctx) +{ + return 1; +} -- cgit v1.2.3 From b73d08d1318a2dde5bacbab77d0e2fd2aa47c933 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 23 Jan 2024 18:21:24 -0800 Subject: selftests/bpf: Add tests for BPF object load with implicit token Add a test to validate libbpf's implicit BPF token creation from default BPF FS location (/sys/fs/bpf). Also validate that disabling this implicit BPF token creation works. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20240124022127.2379740-28-andrii@kernel.org --- tools/testing/selftests/bpf/prog_tests/token.c | 64 ++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c index 1594d9b94b13..003f7c208f4c 100644 --- a/tools/testing/selftests/bpf/prog_tests/token.c +++ b/tools/testing/selftests/bpf/prog_tests/token.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include "priv_map.skel.h" @@ -45,6 +46,13 @@ static inline int sys_fsmount(int fs_fd, unsigned flags, unsigned ms_flags) return syscall(__NR_fsmount, fs_fd, flags, ms_flags); } +static inline int sys_move_mount(int from_dfd, const char *from_path, + int to_dfd, const char *to_path, + unsigned flags) +{ + return syscall(__NR_move_mount, from_dfd, from_path, to_dfd, to_path, flags); +} + static int drop_priv_caps(__u64 *old_caps) { return cap_disable_effective((1ULL << CAP_BPF) | @@ -765,6 +773,51 @@ static int userns_obj_priv_btf_success(int mnt_fd) return validate_struct_ops_load(mnt_fd, true /* should succeed */); } +static int userns_obj_priv_implicit_token(int mnt_fd) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + struct dummy_st_ops_success *skel; + int err; + + /* before we mount BPF FS with token delegation, struct_ops skeleton + * should fail to load + */ + skel = dummy_st_ops_success__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { + dummy_st_ops_success__destroy(skel); + return -EINVAL; + } + + /* mount custom BPF FS over /sys/fs/bpf so that libbpf can create BPF + * token automatically and implicitly + */ + err = sys_move_mount(mnt_fd, "", AT_FDCWD, "/sys/fs/bpf", MOVE_MOUNT_F_EMPTY_PATH); + if (!ASSERT_OK(err, "move_mount_bpffs")) + return -EINVAL; + + /* now the same struct_ops skeleton should succeed thanks to libppf + * creating BPF token from /sys/fs/bpf mount point + */ + skel = dummy_st_ops_success__open_and_load(); + if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load")) + return -EINVAL; + + dummy_st_ops_success__destroy(skel); + + /* now disable implicit token through empty bpf_token_path, should fail */ + opts.bpf_token_path = ""; + skel = dummy_st_ops_success__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open")) + return -EINVAL; + + err = dummy_st_ops_success__load(skel); + dummy_st_ops_success__destroy(skel); + if (!ASSERT_ERR(err, "obj_empty_token_path_load")) + return -EINVAL; + + return 0; +} + #define bit(n) (1ULL << (n)) void test_token(void) @@ -832,4 +885,15 @@ void test_token(void) subtest_userns(&opts, userns_obj_priv_btf_success); } + if (test__start_subtest("obj_priv_implicit_token")) { + struct bpffs_opts opts = { + /* allow BTF loading */ + .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), + .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), + .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), + .attachs = ~0ULL, + }; + + subtest_userns(&opts, userns_obj_priv_implicit_token); + } } -- cgit v1.2.3 From cac270ad79afe212ed7986e8d271c72521cd8212 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 23 Jan 2024 18:21:25 -0800 Subject: libbpf: Support BPF token path setting through LIBBPF_BPF_TOKEN_PATH envvar To allow external admin authority to override default BPF FS location (/sys/fs/bpf) for implicit BPF token creation, teach libbpf to recognize LIBBPF_BPF_TOKEN_PATH envvar. If it is specified and user application didn't explicitly specify bpf_token_path option, it will be treated exactly like bpf_token_path option, overriding default /sys/fs/bpf location and making BPF token mandatory. Suggested-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20240124022127.2379740-29-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 6 ++++++ tools/lib/bpf/libbpf.h | 8 ++++++++ 2 files changed, 14 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index cefa607be335..fa7094ff3e66 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -7604,6 +7604,12 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, return ERR_PTR(-EINVAL); token_path = OPTS_GET(opts, bpf_token_path, NULL); + /* if user didn't specify bpf_token_path explicitly, check if + * LIBBPF_BPF_TOKEN_PATH envvar was set and treat it as bpf_token_path + * option + */ + if (!token_path) + token_path = getenv("LIBBPF_BPF_TOKEN_PATH"); if (token_path && strlen(token_path) >= PATH_MAX) return ERR_PTR(-ENAMETOOLONG); diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 535ae15ed493..5723cbbfcc41 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -183,6 +183,14 @@ struct bpf_object_open_opts { * that accept BPF token (e.g., map creation, BTF and program loads, * etc) automatically within instantiated BPF object. * + * If bpf_token_path is not specified, libbpf will consult + * LIBBPF_BPF_TOKEN_PATH environment variable. If set, it will be + * taken as a value of bpf_token_path option and will force libbpf to + * either create BPF token from provided custom BPF FS path, or will + * disable implicit BPF token creation, if envvar value is an empty + * string. bpf_token_path overrides LIBBPF_BPF_TOKEN_PATH, if both are + * set at the same time. + * * Setting bpf_token_path option to empty string disables libbpf's * automatic attempt to create BPF token from default BPF FS mount * point (/sys/fs/bpf), in case this default behavior is undesirable. -- cgit v1.2.3 From fadf54935e859c4d512aed6ad54f639b87a3b4d3 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 23 Jan 2024 18:21:26 -0800 Subject: selftests/bpf: Add tests for LIBBPF_BPF_TOKEN_PATH envvar Add new subtest validating LIBBPF_BPF_TOKEN_PATH envvar semantics. Extend existing test to validate that LIBBPF_BPF_TOKEN_PATH allows to disable implicit BPF token creation by setting envvar to empty string. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20240124022127.2379740-30-andrii@kernel.org --- tools/testing/selftests/bpf/prog_tests/token.c | 98 ++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c index 003f7c208f4c..1f6aa685e6f7 100644 --- a/tools/testing/selftests/bpf/prog_tests/token.c +++ b/tools/testing/selftests/bpf/prog_tests/token.c @@ -773,6 +773,9 @@ static int userns_obj_priv_btf_success(int mnt_fd) return validate_struct_ops_load(mnt_fd, true /* should succeed */); } +#define TOKEN_ENVVAR "LIBBPF_BPF_TOKEN_PATH" +#define TOKEN_BPFFS_CUSTOM "/bpf-token-fs" + static int userns_obj_priv_implicit_token(int mnt_fd) { LIBBPF_OPTS(bpf_object_open_opts, opts); @@ -795,6 +798,20 @@ static int userns_obj_priv_implicit_token(int mnt_fd) if (!ASSERT_OK(err, "move_mount_bpffs")) return -EINVAL; + /* disable implicit BPF token creation by setting + * LIBBPF_BPF_TOKEN_PATH envvar to empty value, load should fail + */ + err = setenv(TOKEN_ENVVAR, "", 1 /*overwrite*/); + if (!ASSERT_OK(err, "setenv_token_path")) + return -EINVAL; + skel = dummy_st_ops_success__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "obj_token_envvar_disabled_load")) { + unsetenv(TOKEN_ENVVAR); + dummy_st_ops_success__destroy(skel); + return -EINVAL; + } + unsetenv(TOKEN_ENVVAR); + /* now the same struct_ops skeleton should succeed thanks to libppf * creating BPF token from /sys/fs/bpf mount point */ @@ -818,6 +835,76 @@ static int userns_obj_priv_implicit_token(int mnt_fd) return 0; } +static int userns_obj_priv_implicit_token_envvar(int mnt_fd) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + struct dummy_st_ops_success *skel; + int err; + + /* before we mount BPF FS with token delegation, struct_ops skeleton + * should fail to load + */ + skel = dummy_st_ops_success__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { + dummy_st_ops_success__destroy(skel); + return -EINVAL; + } + + /* mount custom BPF FS over custom location, so libbpf can't create + * BPF token implicitly, unless pointed to it through + * LIBBPF_BPF_TOKEN_PATH envvar + */ + rmdir(TOKEN_BPFFS_CUSTOM); + if (!ASSERT_OK(mkdir(TOKEN_BPFFS_CUSTOM, 0777), "mkdir_bpffs_custom")) + goto err_out; + err = sys_move_mount(mnt_fd, "", AT_FDCWD, TOKEN_BPFFS_CUSTOM, MOVE_MOUNT_F_EMPTY_PATH); + if (!ASSERT_OK(err, "move_mount_bpffs")) + goto err_out; + + /* even though we have BPF FS with delegation, it's not at default + * /sys/fs/bpf location, so we still fail to load until envvar is set up + */ + skel = dummy_st_ops_success__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load2")) { + dummy_st_ops_success__destroy(skel); + goto err_out; + } + + err = setenv(TOKEN_ENVVAR, TOKEN_BPFFS_CUSTOM, 1 /*overwrite*/); + if (!ASSERT_OK(err, "setenv_token_path")) + goto err_out; + + /* now the same struct_ops skeleton should succeed thanks to libppf + * creating BPF token from custom mount point + */ + skel = dummy_st_ops_success__open_and_load(); + if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load")) + goto err_out; + + dummy_st_ops_success__destroy(skel); + + /* now disable implicit token through empty bpf_token_path, envvar + * will be ignored, should fail + */ + opts.bpf_token_path = ""; + skel = dummy_st_ops_success__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open")) + goto err_out; + + err = dummy_st_ops_success__load(skel); + dummy_st_ops_success__destroy(skel); + if (!ASSERT_ERR(err, "obj_empty_token_path_load")) + goto err_out; + + rmdir(TOKEN_BPFFS_CUSTOM); + unsetenv(TOKEN_ENVVAR); + return 0; +err_out: + rmdir(TOKEN_BPFFS_CUSTOM); + unsetenv(TOKEN_ENVVAR); + return -EINVAL; +} + #define bit(n) (1ULL << (n)) void test_token(void) @@ -896,4 +983,15 @@ void test_token(void) subtest_userns(&opts, userns_obj_priv_implicit_token); } + if (test__start_subtest("obj_priv_implicit_token_envvar")) { + struct bpffs_opts opts = { + /* allow BTF loading */ + .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), + .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), + .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), + .attachs = ~0ULL, + }; + + subtest_userns(&opts, userns_obj_priv_implicit_token_envvar); + } } -- cgit v1.2.3 From 906ee42cb1be1152ef24465704cc89edc3f571c1 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 23 Jan 2024 18:21:27 -0800 Subject: selftests/bpf: Incorporate LSM policy to token-based tests Add tests for LSM interactions (both bpf_token_capable and bpf_token_cmd LSM hooks) with BPF token in bpf() subsystem. Now child process passes back token FD for parent to be able to do tests with token originating in "wrong" userns. But we also create token in initns and check that token LSMs don't accidentally reject BPF operations when capable() checks pass without BPF token. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20240124022127.2379740-31-andrii@kernel.org --- tools/testing/selftests/bpf/prog_tests/token.c | 89 +++++++++++++++++++++----- tools/testing/selftests/bpf/progs/token_lsm.c | 32 +++++++++ 2 files changed, 104 insertions(+), 17 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/token_lsm.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c index 1f6aa685e6f7..fc4a175d8d76 100644 --- a/tools/testing/selftests/bpf/prog_tests/token.c +++ b/tools/testing/selftests/bpf/prog_tests/token.c @@ -18,6 +18,7 @@ #include "priv_map.skel.h" #include "priv_prog.skel.h" #include "dummy_st_ops_success.skel.h" +#include "token_lsm.skel.h" static inline int sys_mount(const char *dev_name, const char *dir_name, const char *type, unsigned long flags, @@ -279,12 +280,23 @@ static int create_and_enter_userns(void) return 0; } -typedef int (*child_callback_fn)(int); +typedef int (*child_callback_fn)(int bpffs_fd, struct token_lsm *lsm_skel); static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callback) { - LIBBPF_OPTS(bpf_map_create_opts, map_opts); - int mnt_fd = -1, fs_fd = -1, err = 0, bpffs_fd = -1; + int mnt_fd = -1, fs_fd = -1, err = 0, bpffs_fd = -1, token_fd = -1; + struct token_lsm *lsm_skel = NULL; + + /* load and attach LSM "policy" before we go into unpriv userns */ + lsm_skel = token_lsm__open_and_load(); + if (!ASSERT_OK_PTR(lsm_skel, "lsm_skel_load")) { + err = -EINVAL; + goto cleanup; + } + lsm_skel->bss->my_pid = getpid(); + err = token_lsm__attach(lsm_skel); + if (!ASSERT_OK(err, "lsm_skel_attach")) + goto cleanup; /* setup userns with root mappings */ err = create_and_enter_userns(); @@ -365,8 +377,19 @@ static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callba goto cleanup; } + /* create BPF token FD and pass it to parent for some extra checks */ + token_fd = bpf_token_create(bpffs_fd, NULL); + if (!ASSERT_GT(token_fd, 0, "child_token_create")) { + err = -EINVAL; + goto cleanup; + } + err = sendfd(sock_fd, token_fd); + if (!ASSERT_OK(err, "send_token_fd")) + goto cleanup; + zclose(token_fd); + /* do custom test logic with customly set up BPF FS instance */ - err = callback(bpffs_fd); + err = callback(bpffs_fd, lsm_skel); if (!ASSERT_OK(err, "test_callback")) goto cleanup; @@ -376,6 +399,10 @@ cleanup: zclose(mnt_fd); zclose(fs_fd); zclose(bpffs_fd); + zclose(token_fd); + + lsm_skel->bss->my_pid = 0; + token_lsm__destroy(lsm_skel); exit(-err); } @@ -401,7 +428,7 @@ again: static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd) { - int fs_fd = -1, mnt_fd = -1, err; + int fs_fd = -1, mnt_fd = -1, token_fd = -1, err; err = recvfd(sock_fd, &fs_fd); if (!ASSERT_OK(err, "recv_bpffs_fd")) @@ -420,6 +447,11 @@ static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd) goto cleanup; zclose(mnt_fd); + /* receive BPF token FD back from child for some extra tests */ + err = recvfd(sock_fd, &token_fd); + if (!ASSERT_OK(err, "recv_token_fd")) + goto cleanup; + err = wait_for_pid(child_pid); ASSERT_OK(err, "waitpid_child"); @@ -427,12 +459,14 @@ cleanup: zclose(sock_fd); zclose(fs_fd); zclose(mnt_fd); + zclose(token_fd); if (child_pid > 0) (void)kill(child_pid, SIGKILL); } -static void subtest_userns(struct bpffs_opts *bpffs_opts, child_callback_fn cb) +static void subtest_userns(struct bpffs_opts *bpffs_opts, + child_callback_fn child_cb) { int sock_fds[2] = { -1, -1 }; int child_pid = 0, err; @@ -447,7 +481,7 @@ static void subtest_userns(struct bpffs_opts *bpffs_opts, child_callback_fn cb) if (child_pid == 0) { zclose(sock_fds[0]); - return child(sock_fds[1], bpffs_opts, cb); + return child(sock_fds[1], bpffs_opts, child_cb); } else { zclose(sock_fds[1]); @@ -461,7 +495,7 @@ cleanup: (void)kill(child_pid, SIGKILL); } -static int userns_map_create(int mnt_fd) +static int userns_map_create(int mnt_fd, struct token_lsm *lsm_skel) { LIBBPF_OPTS(bpf_map_create_opts, map_opts); int err, token_fd = -1, map_fd = -1; @@ -529,7 +563,7 @@ cleanup: return err; } -static int userns_btf_load(int mnt_fd) +static int userns_btf_load(int mnt_fd, struct token_lsm *lsm_skel) { LIBBPF_OPTS(bpf_btf_load_opts, btf_opts); int err, token_fd = -1, btf_fd = -1; @@ -598,7 +632,7 @@ cleanup: return err; } -static int userns_prog_load(int mnt_fd) +static int userns_prog_load(int mnt_fd, struct token_lsm *lsm_skel) { LIBBPF_OPTS(bpf_prog_load_opts, prog_opts); int err, token_fd = -1, prog_fd = -1; @@ -677,7 +711,7 @@ cleanup: return err; } -static int userns_obj_priv_map(int mnt_fd) +static int userns_obj_priv_map(int mnt_fd, struct token_lsm *lsm_skel) { LIBBPF_OPTS(bpf_object_open_opts, opts); char buf[256]; @@ -705,7 +739,7 @@ static int userns_obj_priv_map(int mnt_fd) return 0; } -static int userns_obj_priv_prog(int mnt_fd) +static int userns_obj_priv_prog(int mnt_fd, struct token_lsm *lsm_skel) { LIBBPF_OPTS(bpf_object_open_opts, opts); char buf[256]; @@ -724,12 +758,33 @@ static int userns_obj_priv_prog(int mnt_fd) skel = priv_prog__open_opts(&opts); if (!ASSERT_OK_PTR(skel, "obj_token_path_open")) return -EINVAL; - err = priv_prog__load(skel); priv_prog__destroy(skel); if (!ASSERT_OK(err, "obj_token_path_load")) return -EINVAL; + /* provide BPF token, but reject bpf_token_capable() with LSM */ + lsm_skel->bss->reject_capable = true; + lsm_skel->bss->reject_cmd = false; + skel = priv_prog__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_token_lsm_reject_cap_open")) + return -EINVAL; + err = priv_prog__load(skel); + priv_prog__destroy(skel); + if (!ASSERT_ERR(err, "obj_token_lsm_reject_cap_load")) + return -EINVAL; + + /* provide BPF token, but reject bpf_token_cmd() with LSM */ + lsm_skel->bss->reject_capable = false; + lsm_skel->bss->reject_cmd = true; + skel = priv_prog__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_token_lsm_reject_cmd_open")) + return -EINVAL; + err = priv_prog__load(skel); + priv_prog__destroy(skel); + if (!ASSERT_ERR(err, "obj_token_lsm_reject_cmd_load")) + return -EINVAL; + return 0; } @@ -763,12 +818,12 @@ static int validate_struct_ops_load(int mnt_fd, bool expect_success) return 0; } -static int userns_obj_priv_btf_fail(int mnt_fd) +static int userns_obj_priv_btf_fail(int mnt_fd, struct token_lsm *lsm_skel) { return validate_struct_ops_load(mnt_fd, false /* should fail */); } -static int userns_obj_priv_btf_success(int mnt_fd) +static int userns_obj_priv_btf_success(int mnt_fd, struct token_lsm *lsm_skel) { return validate_struct_ops_load(mnt_fd, true /* should succeed */); } @@ -776,7 +831,7 @@ static int userns_obj_priv_btf_success(int mnt_fd) #define TOKEN_ENVVAR "LIBBPF_BPF_TOKEN_PATH" #define TOKEN_BPFFS_CUSTOM "/bpf-token-fs" -static int userns_obj_priv_implicit_token(int mnt_fd) +static int userns_obj_priv_implicit_token(int mnt_fd, struct token_lsm *lsm_skel) { LIBBPF_OPTS(bpf_object_open_opts, opts); struct dummy_st_ops_success *skel; @@ -835,7 +890,7 @@ static int userns_obj_priv_implicit_token(int mnt_fd) return 0; } -static int userns_obj_priv_implicit_token_envvar(int mnt_fd) +static int userns_obj_priv_implicit_token_envvar(int mnt_fd, struct token_lsm *lsm_skel) { LIBBPF_OPTS(bpf_object_open_opts, opts); struct dummy_st_ops_success *skel; diff --git a/tools/testing/selftests/bpf/progs/token_lsm.c b/tools/testing/selftests/bpf/progs/token_lsm.c new file mode 100644 index 000000000000..e4d59b6ba743 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/token_lsm.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include +#include + +char _license[] SEC("license") = "GPL"; + +int my_pid; +bool reject_capable; +bool reject_cmd; + +SEC("lsm/bpf_token_capable") +int BPF_PROG(token_capable, struct bpf_token *token, int cap) +{ + if (my_pid == 0 || my_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + if (reject_capable) + return -1; + return 0; +} + +SEC("lsm/bpf_token_cmd") +int BPF_PROG(token_cmd, struct bpf_token *token, enum bpf_cmd cmd) +{ + if (my_pid == 0 || my_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + if (reject_cmd) + return -1; + return 0; +} -- cgit v1.2.3 From e18c709230cb05886fdcf6b90ef21a0a733079d7 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Tue, 23 Jan 2024 10:27:50 +0300 Subject: vsock/test: add '--peer-port' input argument Implement port for given CID as input argument instead of using hardcoded value '1234'. This allows to run different test instances on a single CID. Port argument is not required parameter and if it is not set, then default value will be '1234' - thus we preserve previous behaviour. Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20240123072750.4084181-1-avkrasnov@salutedevices.com Signed-off-by: Jakub Kicinski --- tools/testing/vsock/util.c | 17 ++++- tools/testing/vsock/util.h | 4 ++ tools/testing/vsock/vsock_diag_test.c | 21 ++++-- tools/testing/vsock/vsock_test.c | 102 ++++++++++++++++++------------ tools/testing/vsock/vsock_test_zerocopy.c | 12 ++-- tools/testing/vsock/vsock_uring_test.c | 17 ++++- 6 files changed, 115 insertions(+), 58 deletions(-) (limited to 'tools') diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index ae2b33c21c45..554b290fefdc 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -33,8 +33,7 @@ void init_signals(void) signal(SIGPIPE, SIG_IGN); } -/* Parse a CID in string representation */ -unsigned int parse_cid(const char *str) +static unsigned int parse_uint(const char *str, const char *err_str) { char *endptr = NULL; unsigned long n; @@ -42,12 +41,24 @@ unsigned int parse_cid(const char *str) errno = 0; n = strtoul(str, &endptr, 10); if (errno || *endptr != '\0') { - fprintf(stderr, "malformed CID \"%s\"\n", str); + fprintf(stderr, "malformed %s \"%s\"\n", err_str, str); exit(EXIT_FAILURE); } return n; } +/* Parse a CID in string representation */ +unsigned int parse_cid(const char *str) +{ + return parse_uint(str, "CID"); +} + +/* Parse a port in string representation */ +unsigned int parse_port(const char *str) +{ + return parse_uint(str, "port"); +} + /* Wait for the remote to close the connection */ void vsock_wait_remote_close(int fd) { diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h index 03c88d0cb861..e95e62485959 100644 --- a/tools/testing/vsock/util.h +++ b/tools/testing/vsock/util.h @@ -12,10 +12,13 @@ enum test_mode { TEST_MODE_SERVER }; +#define DEFAULT_PEER_PORT 1234 + /* Test runner options */ struct test_opts { enum test_mode mode; unsigned int peer_cid; + unsigned int peer_port; }; /* A test case definition. Test functions must print failures to stderr and @@ -35,6 +38,7 @@ struct test_case { void init_signals(void); unsigned int parse_cid(const char *str); +unsigned int parse_port(const char *str); int vsock_stream_connect(unsigned int cid, unsigned int port); int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_port, int type); diff --git a/tools/testing/vsock/vsock_diag_test.c b/tools/testing/vsock/vsock_diag_test.c index fa927ad16f8a..9d61b1f1c4c3 100644 --- a/tools/testing/vsock/vsock_diag_test.c +++ b/tools/testing/vsock/vsock_diag_test.c @@ -342,7 +342,7 @@ static void test_listen_socket_server(const struct test_opts *opts) } addr = { .svm = { .svm_family = AF_VSOCK, - .svm_port = 1234, + .svm_port = opts->peer_port, .svm_cid = VMADDR_CID_ANY, }, }; @@ -378,7 +378,7 @@ static void test_connect_client(const struct test_opts *opts) LIST_HEAD(sockets); struct vsock_stat *st; - fd = vsock_stream_connect(opts->peer_cid, 1234); + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -403,7 +403,7 @@ static void test_connect_server(const struct test_opts *opts) LIST_HEAD(sockets); int client_fd; - client_fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + client_fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (client_fd < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -461,6 +461,11 @@ static const struct option longopts[] = { .has_arg = required_argument, .val = 'p', }, + { + .name = "peer-port", + .has_arg = required_argument, + .val = 'q', + }, { .name = "list", .has_arg = no_argument, @@ -481,7 +486,7 @@ static const struct option longopts[] = { static void usage(void) { - fprintf(stderr, "Usage: vsock_diag_test [--help] [--control-host=] --control-port= --mode=client|server --peer-cid= [--list] [--skip=]\n" + fprintf(stderr, "Usage: vsock_diag_test [--help] [--control-host=] --control-port= --mode=client|server --peer-cid= [--peer-port=] [--list] [--skip=]\n" "\n" " Server: vsock_diag_test --control-port=1234 --mode=server --peer-cid=3\n" " Client: vsock_diag_test --control-host=192.168.0.1 --control-port=1234 --mode=client --peer-cid=2\n" @@ -503,9 +508,11 @@ static void usage(void) " --control-port Server port to listen on/connect to\n" " --mode client|server Server or client mode\n" " --peer-cid CID of the other side\n" + " --peer-port AF_VSOCK port used for the test [default: %d]\n" " --list List of tests that will be executed\n" " --skip Test ID to skip;\n" - " use multiple --skip options to skip more tests\n" + " use multiple --skip options to skip more tests\n", + DEFAULT_PEER_PORT ); exit(EXIT_FAILURE); } @@ -517,6 +524,7 @@ int main(int argc, char **argv) struct test_opts opts = { .mode = TEST_MODE_UNSET, .peer_cid = VMADDR_CID_ANY, + .peer_port = DEFAULT_PEER_PORT, }; init_signals(); @@ -544,6 +552,9 @@ int main(int argc, char **argv) case 'p': opts.peer_cid = parse_cid(optarg); break; + case 'q': + opts.peer_port = parse_port(optarg); + break; case 'P': control_port = optarg; break; diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 66246d81d654..f851f8961247 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -34,7 +34,7 @@ static void test_stream_connection_reset(const struct test_opts *opts) } addr = { .svm = { .svm_family = AF_VSOCK, - .svm_port = 1234, + .svm_port = opts->peer_port, .svm_cid = opts->peer_cid, }, }; @@ -70,7 +70,7 @@ static void test_stream_bind_only_client(const struct test_opts *opts) } addr = { .svm = { .svm_family = AF_VSOCK, - .svm_port = 1234, + .svm_port = opts->peer_port, .svm_cid = opts->peer_cid, }, }; @@ -112,7 +112,7 @@ static void test_stream_bind_only_server(const struct test_opts *opts) } addr = { .svm = { .svm_family = AF_VSOCK, - .svm_port = 1234, + .svm_port = opts->peer_port, .svm_cid = VMADDR_CID_ANY, }, }; @@ -138,7 +138,7 @@ static void test_stream_client_close_client(const struct test_opts *opts) { int fd; - fd = vsock_stream_connect(opts->peer_cid, 1234); + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -152,7 +152,7 @@ static void test_stream_client_close_server(const struct test_opts *opts) { int fd; - fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -173,7 +173,7 @@ static void test_stream_server_close_client(const struct test_opts *opts) { int fd; - fd = vsock_stream_connect(opts->peer_cid, 1234); + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -194,7 +194,7 @@ static void test_stream_server_close_server(const struct test_opts *opts) { int fd; - fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -215,7 +215,7 @@ static void test_stream_multiconn_client(const struct test_opts *opts) int i; for (i = 0; i < MULTICONN_NFDS; i++) { - fds[i] = vsock_stream_connect(opts->peer_cid, 1234); + fds[i] = vsock_stream_connect(opts->peer_cid, opts->peer_port); if (fds[i] < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -239,7 +239,7 @@ static void test_stream_multiconn_server(const struct test_opts *opts) int i; for (i = 0; i < MULTICONN_NFDS; i++) { - fds[i] = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + fds[i] = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fds[i] < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -267,9 +267,9 @@ static void test_msg_peek_client(const struct test_opts *opts, int i; if (seqpacket) - fd = vsock_seqpacket_connect(opts->peer_cid, 1234); + fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port); else - fd = vsock_stream_connect(opts->peer_cid, 1234); + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); @@ -295,9 +295,9 @@ static void test_msg_peek_server(const struct test_opts *opts, int fd; if (seqpacket) - fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL); else - fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); @@ -363,7 +363,7 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) int msg_count; int fd; - fd = vsock_seqpacket_connect(opts->peer_cid, 1234); + fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -434,7 +434,7 @@ static void test_seqpacket_msg_bounds_server(const struct test_opts *opts) struct msghdr msg = {0}; struct iovec iov = {0}; - fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -505,7 +505,7 @@ static void test_seqpacket_msg_trunc_client(const struct test_opts *opts) int fd; char buf[MESSAGE_TRUNC_SZ]; - fd = vsock_seqpacket_connect(opts->peer_cid, 1234); + fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -524,7 +524,7 @@ static void test_seqpacket_msg_trunc_server(const struct test_opts *opts) struct msghdr msg = {0}; struct iovec iov = {0}; - fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -575,7 +575,7 @@ static void test_seqpacket_timeout_client(const struct test_opts *opts) time_t read_enter_ns; time_t read_overhead_ns; - fd = vsock_seqpacket_connect(opts->peer_cid, 1234); + fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -620,7 +620,7 @@ static void test_seqpacket_timeout_server(const struct test_opts *opts) { int fd; - fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -639,7 +639,7 @@ static void test_seqpacket_bigmsg_client(const struct test_opts *opts) len = sizeof(sock_buf_size); - fd = vsock_seqpacket_connect(opts->peer_cid, 1234); + fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -671,7 +671,7 @@ static void test_seqpacket_bigmsg_server(const struct test_opts *opts) { int fd; - fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -692,7 +692,7 @@ static void test_seqpacket_invalid_rec_buffer_client(const struct test_opts *opt unsigned char *buf2; int buf_size = getpagesize() * 3; - fd = vsock_seqpacket_connect(opts->peer_cid, 1234); + fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -732,7 +732,7 @@ static void test_seqpacket_invalid_rec_buffer_server(const struct test_opts *opt int flags = MAP_PRIVATE | MAP_ANONYMOUS; int i; - fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -808,7 +808,7 @@ static void test_stream_poll_rcvlowat_server(const struct test_opts *opts) int fd; int i; - fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -839,7 +839,7 @@ static void test_stream_poll_rcvlowat_client(const struct test_opts *opts) short poll_flags; int fd; - fd = vsock_stream_connect(opts->peer_cid, 1234); + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -906,9 +906,9 @@ static void test_inv_buf_client(const struct test_opts *opts, bool stream) int fd; if (stream) - fd = vsock_stream_connect(opts->peer_cid, 1234); + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); else - fd = vsock_seqpacket_connect(opts->peer_cid, 1234); + fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); @@ -941,9 +941,9 @@ static void test_inv_buf_server(const struct test_opts *opts, bool stream) int fd; if (stream) - fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); else - fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); @@ -986,7 +986,7 @@ static void test_stream_virtio_skb_merge_client(const struct test_opts *opts) { int fd; - fd = vsock_stream_connect(opts->peer_cid, 1234); + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -1015,7 +1015,7 @@ static void test_stream_virtio_skb_merge_server(const struct test_opts *opts) unsigned char buf[64]; int fd; - fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -1108,7 +1108,7 @@ static void test_stream_shutwr_client(const struct test_opts *opts) sigaction(SIGPIPE, &act, NULL); - fd = vsock_stream_connect(opts->peer_cid, 1234); + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -1130,7 +1130,7 @@ static void test_stream_shutwr_server(const struct test_opts *opts) { int fd; - fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -1151,7 +1151,7 @@ static void test_stream_shutrd_client(const struct test_opts *opts) sigaction(SIGPIPE, &act, NULL); - fd = vsock_stream_connect(opts->peer_cid, 1234); + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -1170,7 +1170,7 @@ static void test_stream_shutrd_server(const struct test_opts *opts) { int fd; - fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -1193,7 +1193,7 @@ static void test_double_bind_connect_server(const struct test_opts *opts) struct sockaddr_vm sa_client; socklen_t socklen_client = sizeof(sa_client); - listen_fd = vsock_stream_listen(VMADDR_CID_ANY, 1234); + listen_fd = vsock_stream_listen(VMADDR_CID_ANY, opts->peer_port); for (i = 0; i < 2; i++) { control_writeln("LISTENING"); @@ -1226,7 +1226,13 @@ static void test_double_bind_connect_client(const struct test_opts *opts) /* Wait until server is ready to accept a new connection */ control_expectln("LISTENING"); - client_fd = vsock_bind_connect(opts->peer_cid, 1234, 4321, SOCK_STREAM); + /* We use 'peer_port + 1' as "some" port for the 'bind()' + * call. It is safe for overflow, but must be considered, + * when running multiple test applications simultaneously + * where 'peer-port' argument differs by 1. + */ + client_fd = vsock_bind_connect(opts->peer_cid, opts->peer_port, + opts->peer_port + 1, SOCK_STREAM); close(client_fd); } @@ -1246,7 +1252,7 @@ static void test_stream_rcvlowat_def_cred_upd_client(const struct test_opts *opt void *buf; int fd; - fd = vsock_stream_connect(opts->peer_cid, 1234); + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -1282,7 +1288,7 @@ static void test_stream_credit_update_test(const struct test_opts *opts, void *buf; int fd; - fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -1542,6 +1548,11 @@ static const struct option longopts[] = { .has_arg = required_argument, .val = 'p', }, + { + .name = "peer-port", + .has_arg = required_argument, + .val = 'q', + }, { .name = "list", .has_arg = no_argument, @@ -1562,7 +1573,7 @@ static const struct option longopts[] = { static void usage(void) { - fprintf(stderr, "Usage: vsock_test [--help] [--control-host=] --control-port= --mode=client|server --peer-cid= [--list] [--skip=]\n" + fprintf(stderr, "Usage: vsock_test [--help] [--control-host=] --control-port= --mode=client|server --peer-cid= [--peer-port=] [--list] [--skip=]\n" "\n" " Server: vsock_test --control-port=1234 --mode=server --peer-cid=3\n" " Client: vsock_test --control-host=192.168.0.1 --control-port=1234 --mode=client --peer-cid=2\n" @@ -1577,6 +1588,9 @@ static void usage(void) "connect to.\n" "\n" "The CID of the other side must be given with --peer-cid=.\n" + "During the test, two AF_VSOCK ports will be used: the port\n" + "specified with --peer-port= (or the default port)\n" + "and the next one.\n" "\n" "Options:\n" " --help This help message\n" @@ -1584,9 +1598,11 @@ static void usage(void) " --control-port Server port to listen on/connect to\n" " --mode client|server Server or client mode\n" " --peer-cid CID of the other side\n" + " --peer-port AF_VSOCK port used for the test [default: %d]\n" " --list List of tests that will be executed\n" " --skip Test ID to skip;\n" - " use multiple --skip options to skip more tests\n" + " use multiple --skip options to skip more tests\n", + DEFAULT_PEER_PORT ); exit(EXIT_FAILURE); } @@ -1598,6 +1614,7 @@ int main(int argc, char **argv) struct test_opts opts = { .mode = TEST_MODE_UNSET, .peer_cid = VMADDR_CID_ANY, + .peer_port = DEFAULT_PEER_PORT, }; srand(time(NULL)); @@ -1626,6 +1643,9 @@ int main(int argc, char **argv) case 'p': opts.peer_cid = parse_cid(optarg); break; + case 'q': + opts.peer_port = parse_port(optarg); + break; case 'P': control_port = optarg; break; diff --git a/tools/testing/vsock/vsock_test_zerocopy.c b/tools/testing/vsock/vsock_test_zerocopy.c index a16ff76484e6..04c376b6937f 100644 --- a/tools/testing/vsock/vsock_test_zerocopy.c +++ b/tools/testing/vsock/vsock_test_zerocopy.c @@ -152,9 +152,9 @@ static void test_client(const struct test_opts *opts, int fd; if (sock_seqpacket) - fd = vsock_seqpacket_connect(opts->peer_cid, 1234); + fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port); else - fd = vsock_stream_connect(opts->peer_cid, 1234); + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); @@ -248,9 +248,9 @@ static void test_server(const struct test_opts *opts, int fd; if (sock_seqpacket) - fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL); else - fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); @@ -323,7 +323,7 @@ void test_stream_msgzcopy_empty_errq_client(const struct test_opts *opts) ssize_t res; int fd; - fd = vsock_stream_connect(opts->peer_cid, 1234); + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -347,7 +347,7 @@ void test_stream_msgzcopy_empty_errq_server(const struct test_opts *opts) { int fd; - fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); exit(EXIT_FAILURE); diff --git a/tools/testing/vsock/vsock_uring_test.c b/tools/testing/vsock/vsock_uring_test.c index d976d35f0ba9..6c3e6f70c457 100644 --- a/tools/testing/vsock/vsock_uring_test.c +++ b/tools/testing/vsock/vsock_uring_test.c @@ -66,7 +66,7 @@ static void vsock_io_uring_client(const struct test_opts *opts, struct msghdr msg; int fd; - fd = vsock_stream_connect(opts->peer_cid, 1234); + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -120,7 +120,7 @@ static void vsock_io_uring_server(const struct test_opts *opts, void *data; int fd; - fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -247,6 +247,11 @@ static const struct option longopts[] = { .has_arg = required_argument, .val = 'p', }, + { + .name = "peer-port", + .has_arg = required_argument, + .val = 'q', + }, { .name = "help", .has_arg = no_argument, @@ -257,7 +262,7 @@ static const struct option longopts[] = { static void usage(void) { - fprintf(stderr, "Usage: vsock_uring_test [--help] [--control-host=] --control-port= --mode=client|server --peer-cid=\n" + fprintf(stderr, "Usage: vsock_uring_test [--help] [--control-host=] --control-port= --mode=client|server --peer-cid= [--peer-port=]\n" "\n" " Server: vsock_uring_test --control-port=1234 --mode=server --peer-cid=3\n" " Client: vsock_uring_test --control-host=192.168.0.1 --control-port=1234 --mode=client --peer-cid=2\n" @@ -271,6 +276,8 @@ static void usage(void) " --control-port Server port to listen on/connect to\n" " --mode client|server Server or client mode\n" " --peer-cid CID of the other side\n" + " --peer-port AF_VSOCK port used for the test [default: %d]\n", + DEFAULT_PEER_PORT ); exit(EXIT_FAILURE); } @@ -282,6 +289,7 @@ int main(int argc, char **argv) struct test_opts opts = { .mode = TEST_MODE_UNSET, .peer_cid = VMADDR_CID_ANY, + .peer_port = DEFAULT_PEER_PORT, }; init_signals(); @@ -309,6 +317,9 @@ int main(int argc, char **argv) case 'p': opts.peer_cid = parse_cid(optarg); break; + case 'q': + opts.peer_port = parse_port(optarg); + break; case 'P': control_port = optarg; break; -- cgit v1.2.3 From 14a12e6c0b7f03b08573f972789cd1a499d473f0 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Wed, 24 Jan 2024 15:19:29 -0300 Subject: selftests: tc-testing: add missing netfilter config On a default config + tc-testing config build, tdc will miss all the netfilter related tests because it's missing: CONFIG_NETFILTER=y Signed-off-by: Pedro Tammela Reviewed-by: Davide Caratti Acked-by: Jamal Hadi Salim Reviewed-by: Davide Caratti Link: https://lore.kernel.org/r/20240124181933.75724-2-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/config | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config index c60acba951c2..db176fe7d0c3 100644 --- a/tools/testing/selftests/tc-testing/config +++ b/tools/testing/selftests/tc-testing/config @@ -8,6 +8,7 @@ CONFIG_VETH=y # # Core Netfilter Configuration # +CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_MARK=y -- cgit v1.2.3 From 4f4d3841214056b3a35c030ceaa52092c3ed7fb5 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Wed, 24 Jan 2024 15:19:30 -0300 Subject: selftests: tc-testing: check if 'jq' is available in taprio tests If 'jq' is not available the taprio tests might enter an infinite loop, use the "dependsOn" feature from tdc to check if jq is present. If it's not the test is skipped. Suggested-by: Davide Caratti Signed-off-by: Pedro Tammela Reviewed-by: Davide Caratti Acked-by: Jamal Hadi Salim Reviewed-by: Davide Caratti Link: https://lore.kernel.org/r/20240124181933.75724-3-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json index 2d603ef2e375..12da0a939e3e 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json @@ -167,6 +167,7 @@ "plugins": { "requires": "nsPlugin" }, + "dependsOn": "echo '' | jq", "setup": [ "echo \"1 1 8\" > /sys/bus/netdevsim/new_device", "$TC qdisc replace dev $ETH handle 8001: parent root stab overhead 24 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 0 sched-entry S ff 20000000 clockid CLOCK_TAI", @@ -192,6 +193,7 @@ "plugins": { "requires": "nsPlugin" }, + "dependsOn": "echo '' | jq", "setup": [ "echo \"1 1 8\" > /sys/bus/netdevsim/new_device", "$TC qdisc replace dev $ETH handle 8001: parent root stab overhead 24 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 0 sched-entry S ff 20000000 flags 0x2", -- cgit v1.2.3 From 3007d8712c9b0c8f5f72c2e8b72bf71496f52196 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Wed, 24 Jan 2024 15:19:31 -0300 Subject: selftests: tc-testing: adjust fq test to latest iproute2 Adjust the fq verify regex to the latest iproute2 Signed-off-by: Pedro Tammela Reviewed-by: Davide Caratti Acked-by: Jamal Hadi Salim Reviewed-by: Davide Caratti Link: https://lore.kernel.org/r/20240124181933.75724-4-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json index be293e7c6d18..3a537b2ec4c9 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json @@ -77,7 +77,7 @@ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq quantum 9000", "expExitCode": "0", "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 10000p flow_limit 100p buckets.*orphan_mask 1023 quantum 9000b", + "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 10000p flow_limit 100p.*quantum 9000b", "matchCount": "1", "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" -- cgit v1.2.3 From d17d0e333707a8437eb37b35945d233ddc431de3 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Wed, 24 Jan 2024 15:19:32 -0300 Subject: selftests: tc-testing: enable all tdc tests For the longest time tdc ran only actions and qdiscs tests. It's time to enable all the remaining tests so every user visible piece of TC is tested by the downstream CIs. Signed-off-by: Pedro Tammela Reviewed-by: Davide Caratti Acked-by: Jamal Hadi Salim Reviewed-by: Davide Caratti Link: https://lore.kernel.org/r/20240124181933.75724-5-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/tdc.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh index c53ede8b730d..cddff1772e10 100755 --- a/tools/testing/selftests/tc-testing/tdc.sh +++ b/tools/testing/selftests/tc-testing/tdc.sh @@ -63,5 +63,4 @@ try_modprobe sch_hfsc try_modprobe sch_hhf try_modprobe sch_htb try_modprobe sch_teql -./tdc.py -J`nproc` -c actions -./tdc.py -J`nproc` -c qdisc +./tdc.py -J`nproc` -- cgit v1.2.3 From 8981a85e1ba792f1de035cf98ea3fd8592d7ffbd Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Wed, 24 Jan 2024 15:19:33 -0300 Subject: selftests: tc-testing: return fail if a test fails in setup/teardown As of today tests throwing exceptions in setup/teardown phase are treated as skipped but they should really be failures. Signed-off-by: Pedro Tammela Reviewed-by: Davide Caratti Acked-by: Jamal Hadi Salim Reviewed-by: Davide Caratti Link: https://lore.kernel.org/r/20240124181933.75724-6-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/tdc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index caeacc691587..ee349187636f 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -541,7 +541,7 @@ def test_runner(pm, args, filtered_tests): message = pmtf.message output = pmtf.output res = TestResult(tidx['id'], tidx['name']) - res.set_result(ResultState.skip) + res.set_result(ResultState.fail) res.set_errormsg(pmtf.message) res.set_failmsg(pmtf.output) tsr.add_resultdata(res) -- cgit v1.2.3 From 767ec326f98546a1c7ab556cd379cd625332b5e1 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Wed, 24 Jan 2024 22:32:55 +0300 Subject: vsock/test: print type for SOCK_SEQPACKET SOCK_SEQPACKET is supported for virtio transport, so do not interpret such type of socket as unknown. Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20240124193255.3417803-1-avkrasnov@salutedevices.com Signed-off-by: Jakub Kicinski --- tools/testing/vsock/vsock_diag_test.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/testing/vsock/vsock_diag_test.c b/tools/testing/vsock/vsock_diag_test.c index 9d61b1f1c4c3..081e045f4696 100644 --- a/tools/testing/vsock/vsock_diag_test.c +++ b/tools/testing/vsock/vsock_diag_test.c @@ -39,6 +39,8 @@ static const char *sock_type_str(int type) return "DGRAM"; case SOCK_STREAM: return "STREAM"; + case SOCK_SEQPACKET: + return "SEQPACKET"; default: return "INVALID TYPE"; } -- cgit v1.2.3 From ad9b701aed48b0a682f56b4bee03731d9d1834bd Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 24 Jan 2024 14:41:15 -0700 Subject: selftest: Update PATH for nettest in fcnal-test Allow fcnal-test.sh to be run from top level directory in the kernel repo as well as from tools/testing/selftests/net by setting the PATH to find the in-tree nettest. Signed-off-by: David Ahern Link: https://lore.kernel.org/r/20240124214117.24687-2-dsahern@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fcnal-test.sh | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 0d4f252427e2..3d69fac6bcc0 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -38,6 +38,9 @@ # server / client nomenclature relative to ns-A source lib.sh + +PATH=$PWD:$PWD/tools/testing/selftests/net:$PATH + VERBOSE=0 NSA_DEV=eth1 -- cgit v1.2.3 From 79bf0d4a07d4af8fd646d07b70b11abd47c41083 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 24 Jan 2024 14:41:16 -0700 Subject: selftest: Fix set of ping_group_range in fcnal-test ping_group_range sysctl has a compound value which does not go through the various function layers in tact. Create a helper function to bypass the layers and correctly set the value. Signed-off-by: David Ahern Link: https://lore.kernel.org/r/20240124214117.24687-3-dsahern@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fcnal-test.sh | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 3d69fac6bcc0..f590b0fb740e 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -190,6 +190,15 @@ kill_procs() sleep 1 } +set_ping_group() +{ + if [ "$VERBOSE" = "1" ]; then + echo "COMMAND: ${NSA_CMD} sysctl -q -w net.ipv4.ping_group_range='0 2147483647'" + fi + + ${NSA_CMD} sysctl -q -w net.ipv4.ping_group_range='0 2147483647' +} + do_run_cmd() { local cmd="$*" @@ -838,14 +847,14 @@ ipv4_ping() set_sysctl net.ipv4.raw_l3mdev_accept=1 2>/dev/null ipv4_ping_novrf setup - set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + set_ping_group ipv4_ping_novrf log_subsection "With VRF" setup "yes" ipv4_ping_vrf setup "yes" - set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + set_ping_group ipv4_ping_vrf } @@ -2056,12 +2065,12 @@ ipv4_addr_bind() log_subsection "No VRF" setup - set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + set_ping_group ipv4_addr_bind_novrf log_subsection "With VRF" setup "yes" - set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + set_ping_group ipv4_addr_bind_vrf } @@ -2524,14 +2533,14 @@ ipv6_ping() setup ipv6_ping_novrf setup - set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + set_ping_group ipv6_ping_novrf log_subsection "With VRF" setup "yes" ipv6_ping_vrf setup "yes" - set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + set_ping_group ipv6_ping_vrf } -- cgit v1.2.3 From 70863c902d76f8837652539698bc261c763a2869 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 24 Jan 2024 14:41:17 -0700 Subject: selftest: Show expected and actual return codes for test failures in fcnal-test Capture expected and actual return codes for a test that fails in the fcnal-test suite. Signed-off-by: David Ahern Link: https://lore.kernel.org/r/20240124214117.24687-4-dsahern@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fcnal-test.sh | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index f590b0fb740e..d7cfb7c2b427 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -109,6 +109,7 @@ log_test() else nfail=$((nfail+1)) printf "TEST: %-70s [FAIL]\n" "${msg}" + echo " expected rc $expected; actual rc $rc" if [ "${PAUSE_ON_FAIL}" = "yes" ]; then echo echo "hit enter to continue, 'q' to quit" -- cgit v1.2.3 From 28b3df1fe6ba2cb439ba109f095aa841fef3a54f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 26 Jan 2024 10:34:17 +0200 Subject: kunit: add wireless unit tests Add cfg80211, mac80211 and iwlwifi to the all_tests config so that the unit tests (enabled by KUNIT_ALL_TESTS) will be run by default. Signed-off-by: Johannes Berg --- tools/testing/kunit/configs/all_tests.config | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools') diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config index 3bf506d4a63c..12da4c493867 100644 --- a/tools/testing/kunit/configs/all_tests.config +++ b/tools/testing/kunit/configs/all_tests.config @@ -27,6 +27,11 @@ CONFIG_MCTP=y CONFIG_INET=y CONFIG_MPTCP=y +CONFIG_CFG80211=y +CONFIG_MAC80211=y +CONFIG_WLAN_VENDOR_INTEL=y +CONFIG_IWLWIFI=y + CONFIG_DAMON=y CONFIG_DAMON_VADDR=y CONFIG_DAMON_PADDR=y -- cgit v1.2.3 From fa7178b0f12e55a4f2d4906df3f25d6d4f88d962 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Fri, 26 Jan 2024 09:57:36 +0800 Subject: selftests/bpf: Add missing line break in test_verifier There are no break lines in the test log for test_verifier #106 ~ #111 if jit is disabled, add the missing line break at the end of printf() to fix it. Without this patch: [root@linux bpf]# echo 0 > /proc/sys/net/core/bpf_jit_enable [root@linux bpf]# ./test_verifier 106 #106/p inline simple bpf_loop call SKIP (requires BPF JIT)Summary: 0 PASSED, 1 SKIPPED, 0 FAILED With this patch: [root@linux bpf]# echo 0 > /proc/sys/net/core/bpf_jit_enable [root@linux bpf]# ./test_verifier 106 #106/p inline simple bpf_loop call SKIP (requires BPF JIT) Summary: 0 PASSED, 1 SKIPPED, 0 FAILED Fixes: 0b50478fd877 ("selftests/bpf: Skip callback tests if jit is disabled in test_verifier") Signed-off-by: Tiezhu Yang Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240126015736.655-1-yangtiezhu@loongson.cn --- tools/testing/selftests/bpf/test_verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index e1a1dfe8d7fa..df04bda1c927 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1527,7 +1527,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, int i, err; if ((test->flags & F_NEEDS_JIT_ENABLED) && jit_disabled) { - printf("SKIP (requires BPF JIT)"); + printf("SKIP (requires BPF JIT)\n"); skips++; sched_yield(); return; -- cgit v1.2.3 From 29788f39a4171dd48a6d19eb78cf2ab168c4349a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 29 Jan 2024 11:33:26 -0300 Subject: bpftool: Be more portable by using POSIX's basename() musl libc had the basename() prototype in string.h, but this is a glibc-ism, now they removed the _GNU_SOURCE bits in their devel distro, Alpine Linux edge: https://git.musl-libc.org/cgit/musl/commit/?id=725e17ed6dff4d0cd22487bb64470881e86a92e7 So lets use the POSIX version, the whole rationale is spelled out at: https://gitlab.alpinelinux.org/alpine/aports/-/issues/15643 Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Daniel Borkmann Acked-by: Jiri Olsa Acked-by: Quentin Monnet Link: https://lore.kernel.org/lkml/ZZhsPs00TI75RdAr@kernel.org Link: https://lore.kernel.org/bpf/Zbe3NuOgaupvUcpF@kernel.org --- tools/bpf/bpftool/gen.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index ee3ce2b8000d..a9334c57e859 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -56,9 +57,11 @@ static bool str_has_suffix(const char *str, const char *suffix) static void get_obj_name(char *name, const char *file) { - /* Using basename() GNU version which doesn't modify arg. */ - strncpy(name, basename(file), MAX_OBJ_NAME_LEN - 1); - name[MAX_OBJ_NAME_LEN - 1] = '\0'; + char file_copy[PATH_MAX]; + + /* Using basename() POSIX version to be more portable. */ + strncpy(file_copy, file, PATH_MAX - 1)[PATH_MAX - 1] = '\0'; + strncpy(name, basename(file_copy), MAX_OBJ_NAME_LEN - 1)[MAX_OBJ_NAME_LEN - 1] = '\0'; if (str_has_suffix(name, ".o")) name[strlen(name) - 2] = '\0'; sanitize_identifier(name); -- cgit v1.2.3 From ad57654053805bf9a62602aaec74cc78edb6f235 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 26 Jan 2024 14:09:44 -0800 Subject: libbpf: Fix faccessat() usage on Android Android implementation of libc errors out with -EINVAL in faccessat() if passed AT_EACCESS ([0]), this leads to ridiculous issue with libbpf refusing to load /sys/kernel/btf/vmlinux on Androids ([1]). Fix by detecting Android and redefining AT_EACCESS to 0, it's equivalent on Android. [0] https://android.googlesource.com/platform/bionic/+/refs/heads/android13-release/libc/bionic/faccessat.cpp#50 [1] https://github.com/libbpf/libbpf-bootstrap/issues/250#issuecomment-1911324250 Fixes: 6a4ab8869d0b ("libbpf: Fix the case of running as non-root with capabilities") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20240126220944.2497665-1-andrii@kernel.org --- tools/lib/bpf/libbpf_internal.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 930cc9616527..5b30f3b67a02 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -19,6 +19,20 @@ #include #include "relo_core.h" +/* Android's libc doesn't support AT_EACCESS in faccessat() implementation + * ([0]), and just returns -EINVAL even if file exists and is accessible. + * See [1] for issues caused by this. + * + * So just redefine it to 0 on Android. + * + * [0] https://android.googlesource.com/platform/bionic/+/refs/heads/android13-release/libc/bionic/faccessat.cpp#50 + * [1] https://github.com/libbpf/libbpf-bootstrap/issues/250#issuecomment-1911324250 + */ +#ifdef __ANDROID__ +#undef AT_EACCESS +#define AT_EACCESS 0 +#endif + /* make sure libbpf doesn't use kernel-only integer typedefs */ #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 -- cgit v1.2.3 From f149d03f450b4afab11f5e1ebd8fdfaf7eb24a28 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Sun, 28 Jan 2024 19:43:57 +0800 Subject: selftests/bpf: Drop return in bpf_testmod_exit bpf_testmod_exit() does not need to have a return value (given the void), so this patch drops this useless 'return' in it. Signed-off-by: Geliang Tang Signed-off-by: Daniel Borkmann Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/5765b287ea088f0c820f2a834faf9b20fb2f8215.1706442113.git.tanggeliang@kylinos.cn --- tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 8befaf17d454..6f163a0f1c94 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -619,7 +619,7 @@ static void bpf_testmod_exit(void) while (refcount_read(&prog_test_struct.cnt) > 1) msleep(20); - return sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); + sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); } module_init(bpf_testmod_init); -- cgit v1.2.3 From 0e6d0a9d2348b64df74239e859fa9d6e86cdcdef Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 25 Jan 2024 12:55:04 -0800 Subject: libbpf: integrate __arg_ctx feature detector into kernel_supports() Now that feature detection code is in bpf-next tree, integrate __arg_ctx kernel-side support into kernel_supports() framework. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20240125205510.3642094-2-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/features.c | 58 ++++++++++++++++++++++++++++++++++++ tools/lib/bpf/libbpf.c | 65 +---------------------------------------- tools/lib/bpf/libbpf_internal.h | 2 ++ 3 files changed, 61 insertions(+), 64 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c index 5a5c766bf615..6b0738ad7063 100644 --- a/tools/lib/bpf/features.c +++ b/tools/lib/bpf/features.c @@ -407,6 +407,61 @@ static int probe_kern_btf_enum64(int token_fd) strs, sizeof(strs), token_fd)); } +static int probe_kern_arg_ctx_tag(int token_fd) +{ + static const char strs[] = "\0a\0b\0arg:ctx\0"; + const __u32 types[] = { + /* [1] INT */ + BTF_TYPE_INT_ENC(1 /* "a" */, BTF_INT_SIGNED, 0, 32, 4), + /* [2] PTR -> VOID */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0), + /* [3] FUNC_PROTO `int(void *a)` */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 1), + BTF_PARAM_ENC(1 /* "a" */, 2), + /* [4] FUNC 'a' -> FUNC_PROTO (main prog) */ + BTF_TYPE_ENC(1 /* "a" */, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 3), + /* [5] FUNC_PROTO `int(void *b __arg_ctx)` */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 1), + BTF_PARAM_ENC(3 /* "b" */, 2), + /* [6] FUNC 'b' -> FUNC_PROTO (subprog) */ + BTF_TYPE_ENC(3 /* "b" */, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 5), + /* [7] DECL_TAG 'arg:ctx' -> func 'b' arg 'b' */ + BTF_TYPE_DECL_TAG_ENC(5 /* "arg:ctx" */, 6, 0), + }; + const struct bpf_insn insns[] = { + /* main prog */ + BPF_CALL_REL(+1), + BPF_EXIT_INSN(), + /* global subprog */ + BPF_EMIT_CALL(BPF_FUNC_get_func_ip), /* needs PTR_TO_CTX */ + BPF_EXIT_INSN(), + }; + const struct bpf_func_info_min func_infos[] = { + { 0, 4 }, /* main prog -> FUNC 'a' */ + { 2, 6 }, /* subprog -> FUNC 'b' */ + }; + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .token_fd = token_fd, + .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); + int prog_fd, btf_fd, insn_cnt = ARRAY_SIZE(insns); + + btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs), token_fd); + if (btf_fd < 0) + return 0; + + opts.prog_btf_fd = btf_fd; + opts.func_info = &func_infos; + opts.func_info_cnt = ARRAY_SIZE(func_infos); + opts.func_info_rec_size = sizeof(func_infos[0]); + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, "det_arg_ctx", + "GPL", insns, insn_cnt, &opts); + close(btf_fd); + + return probe_fd(prog_fd); +} + typedef int (*feature_probe_fn)(int /* token_fd */); static struct kern_feature_cache feature_cache; @@ -476,6 +531,9 @@ static struct kern_feature_desc { [FEAT_UPROBE_MULTI_LINK] = { "BPF multi-uprobe link support", probe_uprobe_multi_link, }, + [FEAT_ARG_CTX_TAG] = { + "kernel-side __arg_ctx tag", probe_kern_arg_ctx_tag, + }, }; bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index fa7094ff3e66..41ab7a21f868 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -6462,69 +6462,6 @@ static int clone_func_btf_info(struct btf *btf, int orig_fn_id, struct bpf_progr return fn_id; } -static int probe_kern_arg_ctx_tag(void) -{ - /* To minimize merge conflicts with BPF token series that refactors - * feature detection code a lot, we don't integrate - * probe_kern_arg_ctx_tag() into kernel_supports() feature-detection - * framework yet, doing our own caching internally. - * This will be cleaned up a bit later when bpf/bpf-next trees settle. - */ - static int cached_result = -1; - static const char strs[] = "\0a\0b\0arg:ctx\0"; - const __u32 types[] = { - /* [1] INT */ - BTF_TYPE_INT_ENC(1 /* "a" */, BTF_INT_SIGNED, 0, 32, 4), - /* [2] PTR -> VOID */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0), - /* [3] FUNC_PROTO `int(void *a)` */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 1), - BTF_PARAM_ENC(1 /* "a" */, 2), - /* [4] FUNC 'a' -> FUNC_PROTO (main prog) */ - BTF_TYPE_ENC(1 /* "a" */, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 3), - /* [5] FUNC_PROTO `int(void *b __arg_ctx)` */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 1), - BTF_PARAM_ENC(3 /* "b" */, 2), - /* [6] FUNC 'b' -> FUNC_PROTO (subprog) */ - BTF_TYPE_ENC(3 /* "b" */, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 5), - /* [7] DECL_TAG 'arg:ctx' -> func 'b' arg 'b' */ - BTF_TYPE_DECL_TAG_ENC(5 /* "arg:ctx" */, 6, 0), - }; - const struct bpf_insn insns[] = { - /* main prog */ - BPF_CALL_REL(+1), - BPF_EXIT_INSN(), - /* global subprog */ - BPF_EMIT_CALL(BPF_FUNC_get_func_ip), /* needs PTR_TO_CTX */ - BPF_EXIT_INSN(), - }; - const struct bpf_func_info_min func_infos[] = { - { 0, 4 }, /* main prog -> FUNC 'a' */ - { 2, 6 }, /* subprog -> FUNC 'b' */ - }; - LIBBPF_OPTS(bpf_prog_load_opts, opts); - int prog_fd, btf_fd, insn_cnt = ARRAY_SIZE(insns); - - if (cached_result >= 0) - return cached_result; - - btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs), 0); - if (btf_fd < 0) - return 0; - - opts.prog_btf_fd = btf_fd; - opts.func_info = &func_infos; - opts.func_info_cnt = ARRAY_SIZE(func_infos); - opts.func_info_rec_size = sizeof(func_infos[0]); - - prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, "det_arg_ctx", - "GPL", insns, insn_cnt, &opts); - close(btf_fd); - - cached_result = probe_fd(prog_fd); - return cached_result; -} - /* Check if main program or global subprog's function prototype has `arg:ctx` * argument tags, and, if necessary, substitute correct type to match what BPF * verifier would expect, taking into account specific program type. This @@ -6549,7 +6486,7 @@ static int bpf_program_fixup_func_info(struct bpf_object *obj, struct bpf_progra return 0; /* don't do any fix ups if kernel natively supports __arg_ctx */ - if (probe_kern_arg_ctx_tag() > 0) + if (kernel_supports(obj, FEAT_ARG_CTX_TAG)) return 0; /* some BPF program types just don't have named context structs, so diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 5b30f3b67a02..ad936ac5e639 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -372,6 +372,8 @@ enum kern_feature_id { FEAT_SYSCALL_WRAPPER, /* BPF multi-uprobe link support */ FEAT_UPROBE_MULTI_LINK, + /* Kernel supports arg:ctx tag (__arg_ctx) for global subprogs natively */ + FEAT_ARG_CTX_TAG, __FEAT_CNT, }; -- cgit v1.2.3 From 9eea8fafe33eb70868f6ace2fc1e17c4ff5539c3 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 25 Jan 2024 12:55:05 -0800 Subject: libbpf: fix __arg_ctx type enforcement for perf_event programs Adjust PERF_EVENT type enforcement around __arg_ctx to match exactly what kernel is doing. Fixes: 76ec90a996e3 ("libbpf: warn on unexpected __arg_ctx type when rewriting BTF") Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20240125205510.3642094-3-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 41ab7a21f868..db65ea59a05a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -6339,6 +6340,14 @@ static struct { /* all other program types don't have "named" context structs */ }; +/* forward declarations for arch-specific underlying types of bpf_user_pt_regs_t typedef, + * for below __builtin_types_compatible_p() checks; + * with this approach we don't need any extra arch-specific #ifdef guards + */ +struct pt_regs; +struct user_pt_regs; +struct user_regs_struct; + static bool need_func_arg_type_fixup(const struct btf *btf, const struct bpf_program *prog, const char *subprog_name, int arg_idx, int arg_type_id, const char *ctx_name) @@ -6379,11 +6388,21 @@ static bool need_func_arg_type_fixup(const struct btf *btf, const struct bpf_pro /* special cases */ switch (prog->type) { case BPF_PROG_TYPE_KPROBE: - case BPF_PROG_TYPE_PERF_EVENT: /* `struct pt_regs *` is expected, but we need to fix up */ if (btf_is_struct(t) && strcmp(tname, "pt_regs") == 0) return true; break; + case BPF_PROG_TYPE_PERF_EVENT: + if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct pt_regs) && + btf_is_struct(t) && strcmp(tname, "pt_regs") == 0) + return 0; + if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_pt_regs) && + btf_is_struct(t) && strcmp(tname, "user_pt_regs") == 0) + return 0; + if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_regs_struct) && + btf_is_struct(t) && strcmp(tname, "user_regs_struct") == 0) + return 0; + break; case BPF_PROG_TYPE_RAW_TRACEPOINT: case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: /* allow u64* as ctx */ -- cgit v1.2.3 From fbaf59a9f513416c05f4b4e87d26898d3dccd1cc Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 26 Jan 2024 18:50:17 -0800 Subject: selftests/bpf: Remove "&>" usage in the selftests In s390, CI reported that the sock_iter_batch selftest hits this error very often: 2024-01-26T16:56:49.3091804Z Bind /proc/self/ns/net -> /run/netns/sock_iter_batch_netns failed: No such file or directory 2024-01-26T16:56:49.3149524Z Cannot remove namespace file "/run/netns/sock_iter_batch_netns": No such file or directory 2024-01-26T16:56:49.3772213Z test_sock_iter_batch:FAIL:ip netns add sock_iter_batch_netns unexpected error: 256 (errno 0) It happens very often in s390 but Manu also noticed it happens very sparsely in other arch also. It turns out the default dash shell does not recognize "&>" as a redirection operator, so the command went to the background. In the sock_iter_batch selftest, the "ip netns delete" went into background and then race with the following "ip netns add" command. This patch replaces the "&> /dev/null" usage with ">/dev/null 2>&1" and does this redirection in the SYS_NOFAIL macro instead of doing it individually by its caller. The SYS_NOFAIL callers do not care about failure, so it is no harm to do this redirection even if some of the existing callers do not redirect to /dev/null now. It touches different test files, so I skipped the Fixes tags in this patch. Some of the changed tests do not use "&>" but they use the SYS_NOFAIL, so these tests are also changed to avoid doing its own redirection because SYS_NOFAIL does it internally now. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20240127025017.950825-1-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/decap_sanity.c | 2 +- tools/testing/selftests/bpf/prog_tests/fib_lookup.c | 2 +- .../testing/selftests/bpf/prog_tests/ip_check_defrag.c | 4 ++-- tools/testing/selftests/bpf/prog_tests/lwt_redirect.c | 2 +- tools/testing/selftests/bpf/prog_tests/lwt_reroute.c | 2 +- tools/testing/selftests/bpf/prog_tests/mptcp.c | 2 +- tools/testing/selftests/bpf/prog_tests/sock_destroy.c | 2 +- .../testing/selftests/bpf/prog_tests/sock_iter_batch.c | 4 ++-- tools/testing/selftests/bpf/prog_tests/test_tunnel.c | 18 +++++++++--------- tools/testing/selftests/bpf/test_progs.h | 7 ++++++- 10 files changed, 25 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/decap_sanity.c b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c index 5c0ebe6ba866..dcb9e5070cc3 100644 --- a/tools/testing/selftests/bpf/prog_tests/decap_sanity.c +++ b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c @@ -72,6 +72,6 @@ fail: bpf_tc_hook_destroy(&qdisc_hook); close_netns(nstoken); } - SYS_NOFAIL("ip netns del " NS_TEST " &> /dev/null"); + SYS_NOFAIL("ip netns del " NS_TEST); decap_sanity__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c index 4ad4cd69152e..3379df2d4cf2 100644 --- a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c @@ -298,6 +298,6 @@ void test_fib_lookup(void) fail: if (nstoken) close_netns(nstoken); - SYS_NOFAIL("ip netns del " NS_TEST " &> /dev/null"); + SYS_NOFAIL("ip netns del " NS_TEST); fib_lookup__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c b/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c index 57c814f5f6a7..8dd2af9081f4 100644 --- a/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c +++ b/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c @@ -59,9 +59,9 @@ static int setup_topology(bool ipv6) /* Wait for up to 5s for links to come up */ for (i = 0; i < 5; ++i) { if (ipv6) - up = !system("ip netns exec " NS0 " ping -6 -c 1 -W 1 " VETH1_ADDR6 " &>/dev/null"); + up = !SYS_NOFAIL("ip netns exec " NS0 " ping -6 -c 1 -W 1 " VETH1_ADDR6); else - up = !system("ip netns exec " NS0 " ping -c 1 -W 1 " VETH1_ADDR " &>/dev/null"); + up = !SYS_NOFAIL("ip netns exec " NS0 " ping -c 1 -W 1 " VETH1_ADDR); if (up) break; diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c b/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c index 59b38569f310..beeb3ac1c361 100644 --- a/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c @@ -85,7 +85,7 @@ static void ping_dev(const char *dev, bool is_ingress) snprintf(ip, sizeof(ip), "20.0.0.%d", link_index); /* We won't get a reply. Don't fail here */ - SYS_NOFAIL("ping %s -c1 -W1 -s %d >/dev/null 2>&1", + SYS_NOFAIL("ping %s -c1 -W1 -s %d", ip, ICMP_PAYLOAD_SIZE); } diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c b/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c index f4bb2d5fcae0..5610bc76928d 100644 --- a/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c +++ b/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c @@ -63,7 +63,7 @@ static void ping_once(const char *ip) { /* We won't get a reply. Don't fail here */ - SYS_NOFAIL("ping %s -c1 -W1 -s %d >/dev/null 2>&1", + SYS_NOFAIL("ping %s -c1 -W1 -s %d", ip, ICMP_PAYLOAD_SIZE); } diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index 7c0be7cf550b..8f8d792307c1 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -79,7 +79,7 @@ static void cleanup_netns(struct nstoken *nstoken) if (nstoken) close_netns(nstoken); - SYS_NOFAIL("ip netns del %s &> /dev/null", NS_TEST); + SYS_NOFAIL("ip netns del %s", NS_TEST); } static int verify_tsk(int map_fd, int client_fd) diff --git a/tools/testing/selftests/bpf/prog_tests/sock_destroy.c b/tools/testing/selftests/bpf/prog_tests/sock_destroy.c index b0583309a94e..9c11938fe597 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_destroy.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_destroy.c @@ -214,7 +214,7 @@ void test_sock_destroy(void) cleanup: if (nstoken) close_netns(nstoken); - SYS_NOFAIL("ip netns del " TEST_NS " &> /dev/null"); + SYS_NOFAIL("ip netns del " TEST_NS); if (cgroup_fd >= 0) close(cgroup_fd); sock_destroy_prog__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c index 0c365f36c73b..d56e18b25528 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c @@ -112,7 +112,7 @@ void test_sock_iter_batch(void) { struct nstoken *nstoken = NULL; - SYS_NOFAIL("ip netns del " TEST_NS " &> /dev/null"); + SYS_NOFAIL("ip netns del " TEST_NS); SYS(done, "ip netns add %s", TEST_NS); SYS(done, "ip -net %s link set dev lo up", TEST_NS); @@ -131,5 +131,5 @@ void test_sock_iter_batch(void) close_netns(nstoken); done: - SYS_NOFAIL("ip netns del " TEST_NS " &> /dev/null"); + SYS_NOFAIL("ip netns del " TEST_NS); } diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c index 2b3c6dd66259..5f1fb0a2ea56 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c +++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c @@ -118,9 +118,9 @@ fail: static void cleanup(void) { SYS_NOFAIL("test -f /var/run/netns/at_ns0 && ip netns delete at_ns0"); - SYS_NOFAIL("ip link del veth1 2> /dev/null"); - SYS_NOFAIL("ip link del %s 2> /dev/null", VXLAN_TUNL_DEV1); - SYS_NOFAIL("ip link del %s 2> /dev/null", IP6VXLAN_TUNL_DEV1); + SYS_NOFAIL("ip link del veth1"); + SYS_NOFAIL("ip link del %s", VXLAN_TUNL_DEV1); + SYS_NOFAIL("ip link del %s", IP6VXLAN_TUNL_DEV1); } static int add_vxlan_tunnel(void) @@ -265,9 +265,9 @@ fail: static void delete_ipip_tunnel(void) { SYS_NOFAIL("ip -n at_ns0 link delete dev %s", IPIP_TUNL_DEV0); - SYS_NOFAIL("ip -n at_ns0 fou del port 5555 2> /dev/null"); + SYS_NOFAIL("ip -n at_ns0 fou del port 5555"); SYS_NOFAIL("ip link delete dev %s", IPIP_TUNL_DEV1); - SYS_NOFAIL("ip fou del port 5555 2> /dev/null"); + SYS_NOFAIL("ip fou del port 5555"); } static int add_xfrm_tunnel(void) @@ -346,13 +346,13 @@ fail: static void delete_xfrm_tunnel(void) { - SYS_NOFAIL("ip xfrm policy delete dir out src %s/32 dst %s/32 2> /dev/null", + SYS_NOFAIL("ip xfrm policy delete dir out src %s/32 dst %s/32", IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0); - SYS_NOFAIL("ip xfrm policy delete dir in src %s/32 dst %s/32 2> /dev/null", + SYS_NOFAIL("ip xfrm policy delete dir in src %s/32 dst %s/32", IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1); - SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d 2> /dev/null", + SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d", IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT); - SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d 2> /dev/null", + SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d", IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN); } diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 2f9f6f250f17..80df51244886 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -385,10 +385,15 @@ int test__join_cgroup(const char *path); goto goto_label; \ }) +#define ALL_TO_DEV_NULL " >/dev/null 2>&1" + #define SYS_NOFAIL(fmt, ...) \ ({ \ char cmd[1024]; \ - snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ + int n; \ + n = snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ + if (n < sizeof(cmd) && sizeof(cmd) - n >= sizeof(ALL_TO_DEV_NULL)) \ + strcat(cmd, ALL_TO_DEV_NULL); \ system(cmd); \ }) -- cgit v1.2.3 From 646751d523587cfd7ebcf1733298ecd470879eda Mon Sep 17 00:00:00 2001 From: "Jose E. Marchesi" Date: Sat, 27 Jan 2024 11:07:02 +0100 Subject: bpf: Use -Wno-error in certain tests when building with GCC Certain BPF selftests contain code that, albeit being legal C, trigger warnings in GCC that cannot be disabled. This is the case for example for the tests progs/btf_dump_test_case_bitfields.c progs/btf_dump_test_case_namespacing.c progs/btf_dump_test_case_packing.c progs/btf_dump_test_case_padding.c progs/btf_dump_test_case_syntax.c which contain struct type declarations inside function parameter lists. This is problematic, because: - The BPF selftests are built with -Werror. - The Clang and GCC compilers sometimes differ when it comes to handle warnings. in the handling of warnings. One compiler may emit warnings for code that the other compiles compiles silently, and one compiler may offer the possibility to disable certain warnings, while the other doesn't. In order to overcome this problem, this patch modifies the tools/testing/selftests/bpf/Makefile in order to: 1. Enable the possibility of specifing per-source-file extra CFLAGS. This is done by defining a make variable like: -CFLAGS := And then modifying the proper Make rule in order to use these flags when compiling . 2. Use the mechanism above to add -Wno-error to CFLAGS for the following selftests: progs/btf_dump_test_case_bitfields.c progs/btf_dump_test_case_namespacing.c progs/btf_dump_test_case_packing.c progs/btf_dump_test_case_padding.c progs/btf_dump_test_case_syntax.c Note the corresponding -CFLAGS variables for these files are defined only if the selftests are being built with GCC. Note that, while compiler pragmas can generally be used to disable particular warnings per file, this 1) is only possible for warning that actually can be disabled in the command line, i.e. that have -Wno-FOO options, and 2) doesn't apply to -Wno-error. Tested in bpf-next master branch. No regressions. Signed-off-by: Jose E. Marchesi Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20240127100702.21549-1-jose.marchesi@oracle.com --- tools/testing/selftests/bpf/Makefile | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index fd15017ed3b1..1a3654bcb5dd 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -64,6 +64,15 @@ TEST_INST_SUBDIRS := no_alu32 ifneq ($(BPF_GCC),) TEST_GEN_PROGS += test_progs-bpf_gcc TEST_INST_SUBDIRS += bpf_gcc + +# The following tests contain C code that, although technically legal, +# triggers GCC warnings that cannot be disabled: declaration of +# anonymous struct types in function parameter lists. +progs/btf_dump_test_case_bitfields.c-CFLAGS := -Wno-error +progs/btf_dump_test_case_namespacing.c-CFLAGS := -Wno-error +progs/btf_dump_test_case_packing.c-CFLAGS := -Wno-error +progs/btf_dump_test_case_padding.c-CFLAGS := -Wno-error +progs/btf_dump_test_case_syntax.c-CFLAGS := -Wno-error endif ifneq ($(CLANG_CPUV4),) @@ -504,7 +513,8 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.bpf.o: \ $(wildcard $(BPFDIR)/*.bpf.h) \ | $(TRUNNER_OUTPUT) $$(BPFOBJ) $$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \ - $(TRUNNER_BPF_CFLAGS)) + $(TRUNNER_BPF_CFLAGS) \ + $$($$<-CFLAGS)) $(TRUNNER_BPF_SKELS): %.skel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT) $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@) -- cgit v1.2.3 From f2e4040c82d3fddd11fa7c64e8f810e6f9cb7460 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 25 Jan 2024 15:18:40 -0800 Subject: libbpf: Add some details for BTF parsing failures As CONFIG_DEBUG_INFO_BTF is default off the existing "failed to find valid kernel BTF" message makes diagnosing the kernel build issue somewhat cryptic. Add a little more detail with the hope of helping users. Before: ``` libbpf: failed to find valid kernel BTF libbpf: Error loading vmlinux BTF: -3 ``` After not accessible: ``` libbpf: kernel BTF is missing at '/sys/kernel/btf/vmlinux', was CONFIG_DEBUG_INFO_BTF enabled? libbpf: failed to find valid kernel BTF libbpf: Error loading vmlinux BTF: -3 ``` After not readable: ``` libbpf: failed to read kernel BTF from (/sys/kernel/btf/vmlinux): -1 ``` Closes: https://lore.kernel.org/bpf/CAP-5=fU+DN_+Y=Y4gtELUsJxKNDDCOvJzPHvjUVaUoeFAzNnig@mail.gmail.com/ Signed-off-by: Ian Rogers Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240125231840.1647951-1-irogers@google.com --- tools/lib/bpf/btf.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index ec92b87cae01..95db88b36cf3 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -4932,10 +4932,9 @@ static int btf_dedup_remap_types(struct btf_dedup *d) */ struct btf *btf__load_vmlinux_btf(void) { + const char *sysfs_btf_path = "/sys/kernel/btf/vmlinux"; + /* fall back locations, trying to find vmlinux on disk */ const char *locations[] = { - /* try canonical vmlinux BTF through sysfs first */ - "/sys/kernel/btf/vmlinux", - /* fall back to trying to find vmlinux on disk otherwise */ "/boot/vmlinux-%1$s", "/lib/modules/%1$s/vmlinux-%1$s", "/lib/modules/%1$s/build/vmlinux", @@ -4949,8 +4948,23 @@ struct btf *btf__load_vmlinux_btf(void) struct btf *btf; int i, err; - uname(&buf); + /* is canonical sysfs location accessible? */ + if (faccessat(AT_FDCWD, sysfs_btf_path, F_OK, AT_EACCESS) < 0) { + pr_warn("kernel BTF is missing at '%s', was CONFIG_DEBUG_INFO_BTF enabled?\n", + sysfs_btf_path); + } else { + btf = btf__parse(sysfs_btf_path, NULL); + if (!btf) { + err = -errno; + pr_warn("failed to read kernel BTF from '%s': %d\n", sysfs_btf_path, err); + return libbpf_err_ptr(err); + } + pr_debug("loaded kernel BTF from '%s'\n", path); + return btf; + } + /* try fallback locations */ + uname(&buf); for (i = 0; i < ARRAY_SIZE(locations); i++) { snprintf(path, PATH_MAX, locations[i], buf.release); -- cgit v1.2.3 From 4acf4e62cd572b0c806035046b3698f5585ab821 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 26 Jan 2024 17:36:16 +0100 Subject: selftests: forwarding: Add missing config entries The config file contains a partial kernel configuration to be used by `virtme-configkernel --custom'. The presumption is that the config file contains all Kconfig options needed by the selftests from the directory. In net/forwarding/config, many are missing, which manifests as spurious failures when running the selftests, with messages about unknown device types, qdisc kinds or classifier actions. Add the missing configurations. Tested the resulting configuration using virtme-ng as follows: # vng -b -f tools/testing/selftests/net/forwarding/config # vng --user root (within the VM:) # make -C tools/testing/selftests TARGETS=net/forwarding run_tests Signed-off-by: Petr Machata Link: https://lore.kernel.org/r/025abded7ff9cea5874a7fe35dcd3fd41bf5e6ac.1706286755.git.petrm@nvidia.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/forwarding/config | 28 +++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config index 697994a9278b..ba2343514582 100644 --- a/tools/testing/selftests/net/forwarding/config +++ b/tools/testing/selftests/net/forwarding/config @@ -6,14 +6,42 @@ CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_NET_VRF=m CONFIG_BPF_SYSCALL=y CONFIG_CGROUP_BPF=y +CONFIG_DUMMY=m +CONFIG_IPV6=y +CONFIG_IPV6_GRE=m +CONFIG_MACVLAN=m CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_MIRRED=m CONFIG_NET_ACT_MPLS=m +CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_ACT_POLICE=m +CONFIG_NET_ACT_SAMPLE=m +CONFIG_NET_ACT_SKBEDIT=m +CONFIG_NET_ACT_TUNNEL_KEY=m CONFIG_NET_ACT_VLAN=m CONFIG_NET_CLS_FLOWER=m CONFIG_NET_CLS_MATCHALL=m +CONFIG_NET_CLS_BASIC=m +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_META=m +CONFIG_NET_IPGRE=m +CONFIG_NET_IPGRE_DEMUX=m +CONFIG_NET_IPIP=m +CONFIG_NET_SCH_ETS=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_ACT_GACT=m +CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_TC_SKB_EXT=y +CONFIG_NET_TEAM=y +CONFIG_NET_TEAM_MODE_LOADBALANCE=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=m +CONFIG_NF_FLOW_TABLE=m +CONFIG_NF_TABLES=m CONFIG_VETH=m CONFIG_NAMESPACES=y CONFIG_NET_NS=y +CONFIG_VXLAN=m +CONFIG_XFRM_USER=m -- cgit v1.2.3 From 27a90b14b93d3b2e1efd10764e456af7e2a42991 Mon Sep 17 00:00:00 2001 From: "Jose E. Marchesi" Date: Tue, 30 Jan 2024 12:03:43 +0100 Subject: bpf: Build type-punning BPF selftests with -fno-strict-aliasing A few BPF selftests perform type punning and they may break strict aliasing rules, which are exploited by both GCC and clang by default while optimizing. This can lead to broken compiled programs. This patch disables strict aliasing for these particular tests, by mean of the -fno-strict-aliasing command line option. This will make sure these tests are optimized properly even if some strict aliasing rule gets violated. After this patch, GCC is able to build all the selftests without warning about potential strict aliasing issue. bpf@vger discussion on strict aliasing and BPF selftests: https://lore.kernel.org/bpf/bae1205a-b6e5-4e46-8e20-520d7c327f7a@linux.dev/T/#t Tested in bpf-next master. No regressions. Signed-off-by: Jose E. Marchesi Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/bae1205a-b6e5-4e46-8e20-520d7c327f7a@linux.dev Link: https://lore.kernel.org/bpf/20240130110343.11217-1-jose.marchesi@oracle.com --- tools/testing/selftests/bpf/Makefile | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 1a3654bcb5dd..79253a376fc8 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -41,6 +41,19 @@ CFLAGS += -g $(OPT_FLAGS) -rdynamic \ LDFLAGS += $(SAN_LDFLAGS) LDLIBS += $(LIBELF_LIBS) -lz -lrt -lpthread +# The following tests perform type punning and they may break strict +# aliasing rules, which are exploited by both GCC and clang by default +# while optimizing. This can lead to broken programs. +progs/bind4_prog.c-CFLAGS := -fno-strict-aliasing +progs/bind6_prog.c-CFLAGS := -fno-strict-aliasing +progs/dynptr_fail.c-CFLAGS := -fno-strict-aliasing +progs/linked_list_fail.c-CFLAGS := -fno-strict-aliasing +progs/map_kptr_fail.c-CFLAGS := -fno-strict-aliasing +progs/syscall.c-CFLAGS := -fno-strict-aliasing +progs/test_pkt_md_access.c-CFLAGS := -fno-strict-aliasing +progs/test_sk_lookup.c-CFLAGS := -fno-strict-aliasing +progs/timer_crash.c-CFLAGS := -fno-strict-aliasing + ifneq ($(LLVM),) # Silence some warnings when compiled with clang CFLAGS += -Wno-unused-command-line-argument -- cgit v1.2.3 From 24219056805f3988bf93e494499b2329453fc706 Mon Sep 17 00:00:00 2001 From: "Jose E. Marchesi" Date: Tue, 30 Jan 2024 12:36:24 +0100 Subject: bpf: Move -Wno-compare-distinct-pointer-types to BPF_CFLAGS Clang supports enabling/disabling certain conversion diagnostics via the -W[no-]compare-distinct-pointer-types command line options. Disabling this warning is required by some BPF selftests due to -Werror. Until very recently GCC would emit these warnings unconditionally, which was a problem for gcc-bpf, but we added support for the command-line options to GCC upstream [1]. This patch moves the -Wno-cmopare-distinct-pointer-types from CLANG_CFLAGS to BPF_CFLAGS in selftests/bpf/Makefile so the option is also used in gcc-bpf builds, not just in clang builds. Tested in bpf-next master. No regressions. [1] https://gcc.gnu.org/pipermail/gcc-patches/2023-August/627769.html Signed-off-by: Jose E. Marchesi Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20240130113624.24940-1-jose.marchesi@oracle.com --- tools/testing/selftests/bpf/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 79253a376fc8..a38a3001527c 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -404,11 +404,11 @@ endif CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) BPF_CFLAGS = -g -Wall -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \ - -I$(abspath $(OUTPUT)/../usr/include) + -I$(abspath $(OUTPUT)/../usr/include) \ + -Wno-compare-distinct-pointer-types # TODO: enable me -Wsign-compare -CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \ - -Wno-compare-distinct-pointer-types +CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) $(OUTPUT)/test_l4lb_noinline.o: BPF_CFLAGS += -fno-inline $(OUTPUT)/test_xdp_noinline.o: BPF_CFLAGS += -fno-inline -- cgit v1.2.3 From d28bb1a86e68a3d523e0acee8281bb904dd7f451 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 29 Jan 2024 16:06:47 -0800 Subject: libbpf: add __arg_trusted and __arg_nullable tag macros Add __arg_trusted to annotate global func args that accept trusted PTR_TO_BTF_ID arguments. Also add __arg_nullable to combine with __arg_trusted (and maybe other tags in the future) to force global subprog itself (i.e., callee) to do NULL checks, as opposed to default non-NULL semantics (and thus caller's responsibility to ensure non-NULL values). Acked-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20240130000648.2144827-4-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/bpf_helpers.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 2324cc42b017..79eaa581be98 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -190,6 +190,8 @@ enum libbpf_tristate { #define __arg_ctx __attribute__((btf_decl_tag("arg:ctx"))) #define __arg_nonnull __attribute((btf_decl_tag("arg:nonnull"))) +#define __arg_nullable __attribute((btf_decl_tag("arg:nullable"))) +#define __arg_trusted __attribute((btf_decl_tag("arg:trusted"))) #ifndef ___bpf_concat #define ___bpf_concat(a, b) a ## b -- cgit v1.2.3 From c381203eadb76d5601fc04b814317e7608af5f5c Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 29 Jan 2024 16:06:48 -0800 Subject: selftests/bpf: add trusted global subprog arg tests Add a bunch of test cases validating behavior of __arg_trusted and its combination with __arg_nullable tag. We also validate CO-RE flavor support by kernel for __arg_trusted args. Acked-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20240130000648.2144827-5-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/verifier.c | 2 + .../selftests/bpf/progs/verifier_global_ptr_args.c | 156 +++++++++++++++++++++ 2 files changed, 158 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index d62c5bf00e71..9c6072a19745 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -28,6 +28,7 @@ #include "verifier_div0.skel.h" #include "verifier_div_overflow.skel.h" #include "verifier_global_subprogs.skel.h" +#include "verifier_global_ptr_args.skel.h" #include "verifier_gotol.skel.h" #include "verifier_helper_access_var_len.skel.h" #include "verifier_helper_packet_access.skel.h" @@ -140,6 +141,7 @@ void test_verifier_direct_stack_access_wraparound(void) { RUN(verifier_direct_st void test_verifier_div0(void) { RUN(verifier_div0); } void test_verifier_div_overflow(void) { RUN(verifier_div_overflow); } void test_verifier_global_subprogs(void) { RUN(verifier_global_subprogs); } +void test_verifier_global_ptr_args(void) { RUN(verifier_global_ptr_args); } void test_verifier_gotol(void) { RUN(verifier_gotol); } void test_verifier_helper_access_var_len(void) { RUN(verifier_helper_access_var_len); } void test_verifier_helper_packet_access(void) { RUN(verifier_helper_packet_access); } diff --git a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c new file mode 100644 index 000000000000..484d6262363f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include +#include +#include "bpf_misc.h" +#include "xdp_metadata.h" +#include "bpf_kfuncs.h" + +extern struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym __weak; +extern void bpf_task_release(struct task_struct *p) __ksym __weak; + +__weak int subprog_trusted_task_nullable(struct task_struct *task __arg_trusted __arg_nullable) +{ + if (!task) + return 0; + return task->pid + task->tgid; +} + +SEC("?kprobe") +__success __log_level(2) +__msg("Validating subprog_trusted_task_nullable() func#1...") +__msg(": R1=trusted_ptr_or_null_task_struct(") +int trusted_task_arg_nullable(void *ctx) +{ + struct task_struct *t = bpf_get_current_task_btf(); + + return subprog_trusted_task_nullable(t) + subprog_trusted_task_nullable(NULL); +} + +__weak int subprog_trusted_task_nonnull(struct task_struct *task __arg_trusted) +{ + return task->pid + task->tgid; +} + +SEC("?kprobe") +__failure __log_level(2) +__msg("R1 type=scalar expected=ptr_, trusted_ptr_, rcu_ptr_") +__msg("Caller passes invalid args into func#1 ('subprog_trusted_task_nonnull')") +int trusted_task_arg_nonnull_fail1(void *ctx) +{ + return subprog_trusted_task_nonnull(NULL); +} + +SEC("?tp_btf/task_newtask") +__failure __log_level(2) +__msg("R1 type=ptr_or_null_ expected=ptr_, trusted_ptr_, rcu_ptr_") +__msg("Caller passes invalid args into func#1 ('subprog_trusted_task_nonnull')") +int trusted_task_arg_nonnull_fail2(void *ctx) +{ + struct task_struct *t = bpf_get_current_task_btf(); + struct task_struct *nullable; + int res; + + nullable = bpf_task_acquire(t); + + /* should fail, PTR_TO_BTF_ID_OR_NULL */ + res = subprog_trusted_task_nonnull(nullable); + + if (nullable) + bpf_task_release(nullable); + + return res; +} + +SEC("?kprobe") +__success __log_level(2) +__msg("Validating subprog_trusted_task_nonnull() func#1...") +__msg(": R1=trusted_ptr_task_struct(") +int trusted_task_arg_nonnull(void *ctx) +{ + struct task_struct *t = bpf_get_current_task_btf(); + + return subprog_trusted_task_nonnull(t); +} + +struct task_struct___local {} __attribute__((preserve_access_index)); + +__weak int subprog_nullable_task_flavor( + struct task_struct___local *task __arg_trusted __arg_nullable) +{ + char buf[16]; + + if (!task) + return 0; + + return bpf_copy_from_user_task(&buf, sizeof(buf), NULL, (void *)task, 0); +} + +SEC("?uprobe.s") +__success __log_level(2) +__msg("Validating subprog_nullable_task_flavor() func#1...") +__msg(": R1=trusted_ptr_or_null_task_struct(") +int flavor_ptr_nullable(void *ctx) +{ + struct task_struct___local *t = (void *)bpf_get_current_task_btf(); + + return subprog_nullable_task_flavor(t); +} + +__weak int subprog_nonnull_task_flavor(struct task_struct___local *task __arg_trusted) +{ + char buf[16]; + + return bpf_copy_from_user_task(&buf, sizeof(buf), NULL, (void *)task, 0); +} + +SEC("?uprobe.s") +__success __log_level(2) +__msg("Validating subprog_nonnull_task_flavor() func#1...") +__msg(": R1=trusted_ptr_task_struct(") +int flavor_ptr_nonnull(void *ctx) +{ + struct task_struct *t = bpf_get_current_task_btf(); + + return subprog_nonnull_task_flavor((void *)t); +} + +__weak int subprog_trusted_destroy(struct task_struct *task __arg_trusted) +{ + bpf_task_release(task); /* should be rejected */ + + return 0; +} + +SEC("?tp_btf/task_newtask") +__failure __log_level(2) +__msg("release kernel function bpf_task_release expects refcounted PTR_TO_BTF_ID") +int BPF_PROG(trusted_destroy_fail, struct task_struct *task, u64 clone_flags) +{ + return subprog_trusted_destroy(task); +} + +__weak int subprog_trusted_acq_rel(struct task_struct *task __arg_trusted) +{ + struct task_struct *owned; + + owned = bpf_task_acquire(task); + if (!owned) + return 0; + + bpf_task_release(owned); /* this one is OK, we acquired it locally */ + + return 0; +} + +SEC("?tp_btf/task_newtask") +__success __log_level(2) +int BPF_PROG(trusted_acq_rel, struct task_struct *task, u64 clone_flags) +{ + return subprog_trusted_acq_rel(task); +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 20d59ee55172fdf6072abf871fa62b2070d6383f Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 30 Jan 2024 13:20:22 -0800 Subject: libbpf: add bpf_core_cast() macro Add bpf_core_cast() macro that wraps bpf_rdonly_cast() kfunc. It's more ergonomic than kfunc, as it automatically extracts btf_id with bpf_core_type_id_kernel(), and works with type names. It also casts result to (T *) pointer. See the definition of the macro, it's self-explanatory. libbpf declares bpf_rdonly_cast() extern as __weak __ksym and should be safe to not conflict with other possible declarations in user code. But we do have a conflict with current BPF selftests that declare their externs with first argument as `void *obj`, while libbpf opts into more permissive `const void *obj`. This causes conflict, so we fix up BPF selftests uses in the same patch. Acked-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20240130212023.183765-2-andrii@kernel.org Signed-off-by: Martin KaFai Lau --- tools/lib/bpf/bpf_core_read.h | 13 +++++++++++++ tools/testing/selftests/bpf/bpf_kfuncs.h | 2 +- .../testing/selftests/bpf/progs/sk_storage_omem_uncharge.c | 2 -- tools/testing/selftests/bpf/progs/type_cast.c | 4 +--- 4 files changed, 15 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index 5aec301e9585..0d3e88bd7d5f 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -2,6 +2,8 @@ #ifndef __BPF_CORE_READ_H__ #define __BPF_CORE_READ_H__ +#include + /* * enum bpf_field_info_kind is passed as a second argument into * __builtin_preserve_field_info() built-in to get a specific aspect of @@ -292,6 +294,17 @@ enum bpf_enum_value_kind { #define bpf_core_read_user_str(dst, sz, src) \ bpf_probe_read_user_str(dst, sz, (const void *)__builtin_preserve_access_index(src)) +extern void *bpf_rdonly_cast(const void *obj, __u32 btf_id) __ksym __weak; + +/* + * Cast provided pointer *ptr* into a pointer to a specified *type* in such + * a way that BPF verifier will become aware of associated kernel-side BTF + * type. This allows to access members of kernel types directly without the + * need to use BPF_CORE_READ() macros. + */ +#define bpf_core_cast(ptr, type) \ + ((typeof(type) *)bpf_rdonly_cast((ptr), bpf_core_type_id_kernel(type))) + #define ___concat(a, b) a ## b #define ___apply(fn, n) ___concat(fn, n) #define ___nth(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, __11, N, ...) N diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index 23c24f852f4f..0cd4111f954c 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -63,7 +63,7 @@ extern int bpf_sk_assign_tcp_reqsk(struct __sk_buff *skb, struct sock *sk, void *bpf_cast_to_kern_ctx(void *) __ksym; -void *bpf_rdonly_cast(void *obj, __u32 btf_id) __ksym; +extern void *bpf_rdonly_cast(const void *obj, __u32 btf_id) __ksym __weak; extern int bpf_get_file_xattr(struct file *file, const char *name, struct bpf_dynptr *value_ptr) __ksym; diff --git a/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c b/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c index 3e745793b27a..934c4e5ada5b 100644 --- a/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c +++ b/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c @@ -12,8 +12,6 @@ int cookie_found = 0; __u64 cookie = 0; __u32 omem = 0; -void *bpf_rdonly_cast(void *, __u32) __ksym; - struct { __uint(type, BPF_MAP_TYPE_SK_STORAGE); __uint(map_flags, BPF_F_NO_PREALLOC); diff --git a/tools/testing/selftests/bpf/progs/type_cast.c b/tools/testing/selftests/bpf/progs/type_cast.c index a9629ac230fd..98ab35893f51 100644 --- a/tools/testing/selftests/bpf/progs/type_cast.c +++ b/tools/testing/selftests/bpf/progs/type_cast.c @@ -4,6 +4,7 @@ #include #include #include +#include "bpf_kfuncs.h" struct { __uint(type, BPF_MAP_TYPE_TASK_STORAGE); @@ -19,9 +20,6 @@ char name[IFNAMSIZ]; unsigned int inum; unsigned int meta_len, frag0_len, kskb_len, kskb2_len; -void *bpf_cast_to_kern_ctx(void *) __ksym; -void *bpf_rdonly_cast(void *, __u32) __ksym; - SEC("?xdp") int md_xdp(struct xdp_md *ctx) { -- cgit v1.2.3 From ea9d561686fbd0e1ddf05d861d8f2c1ae8291870 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 30 Jan 2024 13:20:23 -0800 Subject: selftests/bpf: convert bpf_rdonly_cast() uses to bpf_core_cast() macro Use more ergonomic bpf_core_cast() macro instead of bpf_rdonly_cast() in selftests code. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20240130212023.183765-3-andrii@kernel.org Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/progs/connect_unix_prog.c | 3 +-- tools/testing/selftests/bpf/progs/getpeername_unix_prog.c | 3 +-- tools/testing/selftests/bpf/progs/getsockname_unix_prog.c | 3 +-- tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c | 3 +-- tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c | 3 +-- tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c | 2 +- tools/testing/selftests/bpf/progs/sock_iter_batch.c | 4 ++-- tools/testing/selftests/bpf/progs/type_cast.c | 9 ++++----- 8 files changed, 12 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/connect_unix_prog.c b/tools/testing/selftests/bpf/progs/connect_unix_prog.c index ca8aa2f116b3..2ef0e0c46d17 100644 --- a/tools/testing/selftests/bpf/progs/connect_unix_prog.c +++ b/tools/testing/selftests/bpf/progs/connect_unix_prog.c @@ -28,8 +28,7 @@ int connect_unix_prog(struct bpf_sock_addr *ctx) if (sa_kern->uaddrlen != unaddrlen) return 0; - sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr, - bpf_core_type_id_kernel(struct sockaddr_un)); + sa_kern_unaddr = bpf_core_cast(sa_kern->uaddr, struct sockaddr_un); if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS, sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0) return 0; diff --git a/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c b/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c index 9c078f34bbb2..5a76754f846b 100644 --- a/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c +++ b/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c @@ -27,8 +27,7 @@ int getpeername_unix_prog(struct bpf_sock_addr *ctx) if (sa_kern->uaddrlen != unaddrlen) return 1; - sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr, - bpf_core_type_id_kernel(struct sockaddr_un)); + sa_kern_unaddr = bpf_core_cast(sa_kern->uaddr, struct sockaddr_un); if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS, sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0) return 1; diff --git a/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c b/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c index ac7145111497..7867113c696f 100644 --- a/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c +++ b/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c @@ -27,8 +27,7 @@ int getsockname_unix_prog(struct bpf_sock_addr *ctx) if (sa_kern->uaddrlen != unaddrlen) return 1; - sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr, - bpf_core_type_id_kernel(struct sockaddr_un)); + sa_kern_unaddr = bpf_core_cast(sa_kern->uaddr, struct sockaddr_un); if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS, sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0) return 1; diff --git a/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c b/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c index 4dfbc8552558..1c7ab44bccfa 100644 --- a/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c +++ b/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c @@ -27,8 +27,7 @@ int recvmsg_unix_prog(struct bpf_sock_addr *ctx) if (sa_kern->uaddrlen != unaddrlen) return 1; - sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr, - bpf_core_type_id_kernel(struct sockaddr_un)); + sa_kern_unaddr = bpf_core_cast(sa_kern->uaddr, struct sockaddr_un); if (memcmp(sa_kern_unaddr->sun_path, SERVUN_ADDRESS, sizeof(SERVUN_ADDRESS) - 1) != 0) return 1; diff --git a/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c b/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c index 1f67e832666e..d8869b03dda9 100644 --- a/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c +++ b/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c @@ -28,8 +28,7 @@ int sendmsg_unix_prog(struct bpf_sock_addr *ctx) if (sa_kern->uaddrlen != unaddrlen) return 0; - sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr, - bpf_core_type_id_kernel(struct sockaddr_un)); + sa_kern_unaddr = bpf_core_cast(sa_kern->uaddr, struct sockaddr_un); if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS, sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0) return 0; diff --git a/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c b/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c index 934c4e5ada5b..46d6eb2a3b17 100644 --- a/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c +++ b/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c @@ -27,7 +27,7 @@ int BPF_PROG(bpf_local_storage_destroy, struct bpf_local_storage *local_storage) if (local_storage_ptr != local_storage) return 0; - sk = bpf_rdonly_cast(sk_ptr, bpf_core_type_id_kernel(struct sock)); + sk = bpf_core_cast(sk_ptr, struct sock); if (sk->sk_cookie.counter != cookie) return 0; diff --git a/tools/testing/selftests/bpf/progs/sock_iter_batch.c b/tools/testing/selftests/bpf/progs/sock_iter_batch.c index ffbbfe1fa1c1..96531b0d9d55 100644 --- a/tools/testing/selftests/bpf/progs/sock_iter_batch.c +++ b/tools/testing/selftests/bpf/progs/sock_iter_batch.c @@ -32,7 +32,7 @@ int iter_tcp_soreuse(struct bpf_iter__tcp *ctx) if (!sk) return 0; - sk = bpf_rdonly_cast(sk, bpf_core_type_id_kernel(struct sock)); + sk = bpf_core_cast(sk, struct sock); if (sk->sk_family != AF_INET6 || sk->sk_state != TCP_LISTEN || !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr)) @@ -68,7 +68,7 @@ int iter_udp_soreuse(struct bpf_iter__udp *ctx) if (!sk) return 0; - sk = bpf_rdonly_cast(sk, bpf_core_type_id_kernel(struct sock)); + sk = bpf_core_cast(sk, struct sock); if (sk->sk_family != AF_INET6 || !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr)) return 0; diff --git a/tools/testing/selftests/bpf/progs/type_cast.c b/tools/testing/selftests/bpf/progs/type_cast.c index 98ab35893f51..9d808b8f4ab0 100644 --- a/tools/testing/selftests/bpf/progs/type_cast.c +++ b/tools/testing/selftests/bpf/progs/type_cast.c @@ -46,13 +46,12 @@ int md_skb(struct __sk_buff *skb) /* Simulate the following kernel macro: * #define skb_shinfo(SKB) ((struct skb_shared_info *)(skb_end_pointer(SKB))) */ - shared_info = bpf_rdonly_cast(kskb->head + kskb->end, - bpf_core_type_id_kernel(struct skb_shared_info)); + shared_info = bpf_core_cast(kskb->head + kskb->end, struct skb_shared_info); meta_len = shared_info->meta_len; frag0_len = shared_info->frag_list->len; /* kskb2 should be equal to kskb */ - kskb2 = bpf_rdonly_cast(kskb, bpf_core_type_id_kernel(struct sk_buff)); + kskb2 = bpf_core_cast(kskb, typeof(*kskb2)); kskb2_len = kskb2->len; return 0; } @@ -63,7 +62,7 @@ int BPF_PROG(untrusted_ptr, struct pt_regs *regs, long id) struct task_struct *task, *task_dup; task = bpf_get_current_task_btf(); - task_dup = bpf_rdonly_cast(task, bpf_core_type_id_kernel(struct task_struct)); + task_dup = bpf_core_cast(task, struct task_struct); (void)bpf_task_storage_get(&enter_id, task_dup, 0, 0); return 0; } @@ -71,7 +70,7 @@ int BPF_PROG(untrusted_ptr, struct pt_regs *regs, long id) SEC("?tracepoint/syscalls/sys_enter_nanosleep") int kctx_u64(void *ctx) { - u64 *kctx = bpf_rdonly_cast(ctx, bpf_core_type_id_kernel(u64)); + u64 *kctx = bpf_core_cast(ctx, u64); (void)kctx; return 0; -- cgit v1.2.3 From 2a0683be5b4c9829e8335e494a21d1148e832822 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Fri, 26 Jan 2024 18:21:18 -0500 Subject: selftests: Introduce Makefile variable to list shared bash scripts Some tests written in bash source other files in a parent directory. For example, drivers/net/bonding/dev_addr_lists.sh sources net/forwarding/lib.sh. If a subset of tests is exported and run outside the source tree (for example by using `make -C tools/testing/selftests gen_tar TARGETS="drivers/net/bonding"`), these other files must be made available as well. Commit ae108c48b5d2 ("selftests: net: Fix cross-tree inclusion of scripts") addressed this problem by symlinking and copying the sourced files but this only works for direct dependencies. Commit 25ae948b4478 ("selftests/net: add lib.sh") changed net/forwarding/lib.sh to source net/lib.sh. As a result, that latter file must be included as well when the former is exported. This was not handled and was reverted in commit 2114e83381d3 ("selftests: forwarding: Avoid failures to source net/lib.sh"). In order to allow reinstating the inclusion of net/lib.sh from net/forwarding/lib.sh, add a mechanism to list dependent files in a new Makefile variable and export them. This allows sourcing those files using the same expression whether tests are run in-tree or exported. Dependencies are not resolved recursively so transitive dependencies must be listed in TEST_INCLUDES. For example, if net/forwarding/lib.sh sources net/lib.sh; the Makefile related to a test that sources net/forwarding/lib.sh from a parent directory must list: TEST_INCLUDES := \ ../../../net/forwarding/lib.sh \ ../../../net/lib.sh v2: Fix rst syntax in Documentation/dev-tools/kselftest.rst (Jakub Kicinski) v1 (from RFC): * changed TEST_INCLUDES to take relative paths, like other TEST_* variables (Vladimir Oltean) * preserved common "$(MAKE) OUTPUT=... -C ... target" ordering in Makefile (Petr Machata) Signed-off-by: Benjamin Poirier Signed-off-by: David S. Miller --- Documentation/dev-tools/kselftest.rst | 12 ++++++++++++ tools/testing/selftests/Makefile | 7 ++++++- tools/testing/selftests/lib.mk | 19 +++++++++++++++++++ 3 files changed, 37 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/Documentation/dev-tools/kselftest.rst b/Documentation/dev-tools/kselftest.rst index ab376b316c36..522214c7b43b 100644 --- a/Documentation/dev-tools/kselftest.rst +++ b/Documentation/dev-tools/kselftest.rst @@ -255,9 +255,21 @@ Contributing new tests (details) TEST_PROGS_EXTENDED, TEST_GEN_PROGS_EXTENDED mean it is the executable which is not tested by default. + TEST_FILES, TEST_GEN_FILES mean it is the file which is used by test. + TEST_INCLUDES is similar to TEST_FILES, it lists files which should be + included when exporting or installing the tests, with the following + differences: + + * symlinks to files in other directories are preserved + * the part of paths below tools/testing/selftests/ is preserved when + copying the files to the output directory + + TEST_INCLUDES is meant to list dependencies located in other directories of + the selftests hierarchy. + * First use the headers inside the kernel source and/or git repo, and then the system headers. Headers for the kernel release as opposed to headers installed by the distro on the system should be the primary focus to be able diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 15b6a111c3be..082db6b68060 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -191,6 +191,8 @@ run_tests: all @for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests \ + SRC_PATH=$(shell readlink -e $$(pwd)) \ + OBJ_PATH=$(BUILD) \ O=$(abs_objtree); \ done; @@ -241,7 +243,10 @@ ifdef INSTALL_PATH @ret=1; \ for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ - $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install \ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install \ + INSTALL_PATH=$(INSTALL_PATH)/$$TARGET \ + SRC_PATH=$(shell readlink -e $$(pwd)) \ + OBJ_PATH=$(INSTALL_PATH) \ O=$(abs_objtree) \ $(if $(FORCE_TARGETS),|| exit); \ ret=$$((ret * $$?)); \ diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index aa646e0661f3..087fee22dd53 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -69,11 +69,29 @@ define RUN_TESTS run_many $(1) endef +define INSTALL_INCLUDES + $(if $(TEST_INCLUDES), \ + relative_files=""; \ + for entry in $(TEST_INCLUDES); do \ + entry_dir=$$(readlink -e "$$(dirname "$$entry")"); \ + entry_name=$$(basename "$$entry"); \ + relative_dir=$${entry_dir#"$$SRC_PATH"/}; \ + if [ "$$relative_dir" = "$$entry_dir" ]; then \ + echo "Error: TEST_INCLUDES entry \"$$entry\" not located inside selftests directory ($$SRC_PATH)" >&2; \ + exit 1; \ + fi; \ + relative_files="$$relative_files $$relative_dir/$$entry_name"; \ + done; \ + cd $(SRC_PATH) && rsync -aR $$relative_files $(OBJ_PATH)/ \ + ) +endef + run_tests: all ifdef building_out_of_srctree @if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then \ rsync -aq --copy-unsafe-links $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(OUTPUT); \ fi + @$(INSTALL_INCLUDES) @if [ "X$(TEST_PROGS)" != "X" ]; then \ $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) \ $(addprefix $(OUTPUT)/,$(TEST_PROGS))) ; \ @@ -103,6 +121,7 @@ endef install: all ifdef INSTALL_PATH $(INSTALL_RULE) + $(INSTALL_INCLUDES) else $(error Error: set INSTALL_PATH to use install) endif -- cgit v1.2.3 From 6500780cffa7f221431fa4a2ec1c2f6bc51dcb6b Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Fri, 26 Jan 2024 18:21:19 -0500 Subject: selftests: bonding: Add net/forwarding/lib.sh to TEST_INCLUDES In order to avoid duplicated files when both the bonding and forwarding tests are exported together, add net/forwarding/lib.sh to TEST_INCLUDES and include it via its relative path. Reviewed-by: Petr Machata Tested-by: Petr Machata Reviewed-by: Hangbin Liu Signed-off-by: Benjamin Poirier Reviewed-by: Jay Vosburgh Signed-off-by: David S. Miller --- tools/testing/selftests/drivers/net/bonding/Makefile | 6 ++++-- tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh | 2 +- tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh | 2 +- tools/testing/selftests/drivers/net/bonding/dev_addr_lists.sh | 2 +- .../selftests/drivers/net/bonding/mode-1-recovery-updelay.sh | 2 +- .../selftests/drivers/net/bonding/mode-2-recovery-updelay.sh | 2 +- tools/testing/selftests/drivers/net/bonding/net_forwarding_lib.sh | 1 - 7 files changed, 9 insertions(+), 8 deletions(-) delete mode 120000 tools/testing/selftests/drivers/net/bonding/net_forwarding_lib.sh (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile index 8a72bb7de70f..1e10a1f06faf 100644 --- a/tools/testing/selftests/drivers/net/bonding/Makefile +++ b/tools/testing/selftests/drivers/net/bonding/Makefile @@ -15,7 +15,9 @@ TEST_PROGS := \ TEST_FILES := \ lag_lib.sh \ bond_topo_2d1c.sh \ - bond_topo_3d1c.sh \ - net_forwarding_lib.sh + bond_topo_3d1c.sh + +TEST_INCLUDES := \ + ../../../net/forwarding/lib.sh include ../../../lib.mk diff --git a/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh b/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh index 862e947e17c7..8293dbc7c18f 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh @@ -11,7 +11,7 @@ ALL_TESTS=" REQUIRE_MZ=no NUM_NETIFS=0 lib_dir=$(dirname "$0") -source "$lib_dir"/net_forwarding_lib.sh +source "$lib_dir"/../../../net/forwarding/lib.sh bond_check_flags() { diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh index a509ef949dcf..0eb7edfb584c 100644 --- a/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh @@ -28,7 +28,7 @@ REQUIRE_MZ=no NUM_NETIFS=0 lib_dir=$(dirname "$0") -source ${lib_dir}/net_forwarding_lib.sh +source "$lib_dir"/../../../net/forwarding/lib.sh s_ns="s-$(mktemp -u XXXXXX)" c_ns="c-$(mktemp -u XXXXXX)" diff --git a/tools/testing/selftests/drivers/net/bonding/dev_addr_lists.sh b/tools/testing/selftests/drivers/net/bonding/dev_addr_lists.sh index 5cfe7d8ebc25..e6fa24eded5b 100755 --- a/tools/testing/selftests/drivers/net/bonding/dev_addr_lists.sh +++ b/tools/testing/selftests/drivers/net/bonding/dev_addr_lists.sh @@ -14,7 +14,7 @@ ALL_TESTS=" REQUIRE_MZ=no NUM_NETIFS=0 lib_dir=$(dirname "$0") -source "$lib_dir"/net_forwarding_lib.sh +source "$lib_dir"/../../../net/forwarding/lib.sh source "$lib_dir"/lag_lib.sh diff --git a/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh b/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh index b76bf5030952..9d26ab4cad0b 100755 --- a/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh +++ b/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh @@ -23,7 +23,7 @@ REQUIRE_MZ=no REQUIRE_JQ=no NUM_NETIFS=0 lib_dir=$(dirname "$0") -source "$lib_dir"/net_forwarding_lib.sh +source "$lib_dir"/../../../net/forwarding/lib.sh source "$lib_dir"/lag_lib.sh cleanup() diff --git a/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh b/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh index 8c2619002147..2d275b3e47dd 100755 --- a/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh +++ b/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh @@ -23,7 +23,7 @@ REQUIRE_MZ=no REQUIRE_JQ=no NUM_NETIFS=0 lib_dir=$(dirname "$0") -source "$lib_dir"/net_forwarding_lib.sh +source "$lib_dir"/../../../net/forwarding/lib.sh source "$lib_dir"/lag_lib.sh cleanup() diff --git a/tools/testing/selftests/drivers/net/bonding/net_forwarding_lib.sh b/tools/testing/selftests/drivers/net/bonding/net_forwarding_lib.sh deleted file mode 120000 index 39c96828c5ef..000000000000 --- a/tools/testing/selftests/drivers/net/bonding/net_forwarding_lib.sh +++ /dev/null @@ -1 +0,0 @@ -../../../net/forwarding/lib.sh \ No newline at end of file -- cgit v1.2.3 From 975b4a8b68ffbf3583b406758ed1b5d35b764942 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Fri, 26 Jan 2024 18:21:20 -0500 Subject: selftests: team: Add shared library scripts to TEST_INCLUDES In order to avoid duplicated files when both the team and bonding tests are exported together, add lag_lib.sh to TEST_INCLUDES. Do likewise for net/forwarding/lib.sh regarding team and forwarding tests. Reviewed-by: Petr Machata Tested-by: Petr Machata Reviewed-by: Hangbin Liu Signed-off-by: Benjamin Poirier Signed-off-by: David S. Miller --- tools/testing/selftests/drivers/net/team/Makefile | 6 +++--- tools/testing/selftests/drivers/net/team/dev_addr_lists.sh | 4 ++-- tools/testing/selftests/drivers/net/team/lag_lib.sh | 1 - tools/testing/selftests/drivers/net/team/net_forwarding_lib.sh | 1 - 4 files changed, 5 insertions(+), 7 deletions(-) delete mode 120000 tools/testing/selftests/drivers/net/team/lag_lib.sh delete mode 120000 tools/testing/selftests/drivers/net/team/net_forwarding_lib.sh (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/team/Makefile b/tools/testing/selftests/drivers/net/team/Makefile index 6a86e61e8bfe..708bdfce37f5 100644 --- a/tools/testing/selftests/drivers/net/team/Makefile +++ b/tools/testing/selftests/drivers/net/team/Makefile @@ -3,8 +3,8 @@ TEST_PROGS := dev_addr_lists.sh -TEST_FILES := \ - lag_lib.sh \ - net_forwarding_lib.sh +TEST_INCLUDES := \ + ../bonding/lag_lib.sh \ + ../../../net/forwarding/lib.sh include ../../../lib.mk diff --git a/tools/testing/selftests/drivers/net/team/dev_addr_lists.sh b/tools/testing/selftests/drivers/net/team/dev_addr_lists.sh index 33913112d5ca..b1ec7755b783 100755 --- a/tools/testing/selftests/drivers/net/team/dev_addr_lists.sh +++ b/tools/testing/selftests/drivers/net/team/dev_addr_lists.sh @@ -11,9 +11,9 @@ ALL_TESTS=" REQUIRE_MZ=no NUM_NETIFS=0 lib_dir=$(dirname "$0") -source "$lib_dir"/net_forwarding_lib.sh +source "$lib_dir"/../../../net/forwarding/lib.sh -source "$lib_dir"/lag_lib.sh +source "$lib_dir"/../bonding/lag_lib.sh destroy() diff --git a/tools/testing/selftests/drivers/net/team/lag_lib.sh b/tools/testing/selftests/drivers/net/team/lag_lib.sh deleted file mode 120000 index e1347a10afde..000000000000 --- a/tools/testing/selftests/drivers/net/team/lag_lib.sh +++ /dev/null @@ -1 +0,0 @@ -../bonding/lag_lib.sh \ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/team/net_forwarding_lib.sh b/tools/testing/selftests/drivers/net/team/net_forwarding_lib.sh deleted file mode 120000 index 39c96828c5ef..000000000000 --- a/tools/testing/selftests/drivers/net/team/net_forwarding_lib.sh +++ /dev/null @@ -1 +0,0 @@ -../../../net/forwarding/lib.sh \ No newline at end of file -- cgit v1.2.3 From 4a24560ad72f8febfa4e11cad63fbf24ac94c008 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Fri, 26 Jan 2024 18:21:21 -0500 Subject: selftests: dsa: Replace test symlinks by wrapper script The dsa tests which are symlinks of tests from net/forwarding/ (like tc_actions.sh) become regular files after export (because `rsync --copy-unsafe-links` is used) and expect to source lib.sh (net/forwarding/lib.sh) from the same directory. In the last patch of this series, net/forwarding/lib.sh will source lib.sh from its parent directory (ie. net/lib.sh). This would not work for dsa tests because net/lib.sh is not present under drivers/net/. Since the tests in net/forwarding/ are not meant to be copied and run from another directory, as a preparation for that last patch, replace the test symlinks by a wrapper script which runs the original tests under net/forwarding/. Following from that, the links to shared library scripts in dsa/ are no longer used so remove them and add all the original files needed from parent directories to TEST_INCLUDES. Suggested-by: Hangbin Liu Reviewed-by: Petr Machata Tested-by: Petr Machata Reviewed-by: Hangbin Liu Signed-off-by: Benjamin Poirier Signed-off-by: David S. Miller --- tools/testing/selftests/drivers/net/dsa/Makefile | 17 +++++++++++++++-- .../selftests/drivers/net/dsa/bridge_locked_port.sh | 2 +- tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh | 2 +- tools/testing/selftests/drivers/net/dsa/bridge_mld.sh | 2 +- .../selftests/drivers/net/dsa/bridge_vlan_aware.sh | 2 +- .../selftests/drivers/net/dsa/bridge_vlan_mcast.sh | 2 +- .../selftests/drivers/net/dsa/bridge_vlan_unaware.sh | 2 +- tools/testing/selftests/drivers/net/dsa/lib.sh | 1 - .../selftests/drivers/net/dsa/local_termination.sh | 2 +- .../testing/selftests/drivers/net/dsa/no_forwarding.sh | 2 +- .../drivers/net/dsa/run_net_forwarding_test.sh | 9 +++++++++ tools/testing/selftests/drivers/net/dsa/tc_actions.sh | 2 +- tools/testing/selftests/drivers/net/dsa/tc_common.sh | 1 - .../selftests/drivers/net/dsa/test_bridge_fdb_stress.sh | 2 +- 14 files changed, 34 insertions(+), 14 deletions(-) delete mode 120000 tools/testing/selftests/drivers/net/dsa/lib.sh create mode 100755 tools/testing/selftests/drivers/net/dsa/run_net_forwarding_test.sh delete mode 120000 tools/testing/selftests/drivers/net/dsa/tc_common.sh (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/dsa/Makefile b/tools/testing/selftests/drivers/net/dsa/Makefile index c393e7b73805..83da1d721017 100644 --- a/tools/testing/selftests/drivers/net/dsa/Makefile +++ b/tools/testing/selftests/drivers/net/dsa/Makefile @@ -11,8 +11,21 @@ TEST_PROGS = bridge_locked_port.sh \ tc_actions.sh \ test_bridge_fdb_stress.sh -TEST_PROGS_EXTENDED := lib.sh tc_common.sh +TEST_FILES := \ + run_net_forwarding_test.sh \ + forwarding.config -TEST_FILES := forwarding.config +TEST_INCLUDES := \ + ../../../net/forwarding/bridge_locked_port.sh \ + ../../../net/forwarding/bridge_mdb.sh \ + ../../../net/forwarding/bridge_mld.sh \ + ../../../net/forwarding/bridge_vlan_aware.sh \ + ../../../net/forwarding/bridge_vlan_mcast.sh \ + ../../../net/forwarding/bridge_vlan_unaware.sh \ + ../../../net/forwarding/lib.sh \ + ../../../net/forwarding/local_termination.sh \ + ../../../net/forwarding/no_forwarding.sh \ + ../../../net/forwarding/tc_actions.sh \ + ../../../net/forwarding/tc_common.sh include ../../../lib.mk diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh b/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh index f5eb940c4c7c..d16a65e7595d 120000 --- a/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh +++ b/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh @@ -1 +1 @@ -../../../net/forwarding/bridge_locked_port.sh \ No newline at end of file +run_net_forwarding_test.sh \ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh b/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh index 76492da525f7..d16a65e7595d 120000 --- a/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh +++ b/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh @@ -1 +1 @@ -../../../net/forwarding/bridge_mdb.sh \ No newline at end of file +run_net_forwarding_test.sh \ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh b/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh index 81a7e0df0474..d16a65e7595d 120000 --- a/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh +++ b/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh @@ -1 +1 @@ -../../../net/forwarding/bridge_mld.sh \ No newline at end of file +run_net_forwarding_test.sh \ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh index 9831ed74376a..d16a65e7595d 120000 --- a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh +++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh @@ -1 +1 @@ -../../../net/forwarding/bridge_vlan_aware.sh \ No newline at end of file +run_net_forwarding_test.sh \ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh index 7f3c3f0bf719..d16a65e7595d 120000 --- a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh +++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh @@ -1 +1 @@ -../../../net/forwarding/bridge_vlan_mcast.sh \ No newline at end of file +run_net_forwarding_test.sh \ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh index bf1a57e6bde1..d16a65e7595d 120000 --- a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh +++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh @@ -1 +1 @@ -../../../net/forwarding/bridge_vlan_unaware.sh \ No newline at end of file +run_net_forwarding_test.sh \ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/lib.sh b/tools/testing/selftests/drivers/net/dsa/lib.sh deleted file mode 120000 index 39c96828c5ef..000000000000 --- a/tools/testing/selftests/drivers/net/dsa/lib.sh +++ /dev/null @@ -1 +0,0 @@ -../../../net/forwarding/lib.sh \ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/local_termination.sh b/tools/testing/selftests/drivers/net/dsa/local_termination.sh index c08166f84501..d16a65e7595d 120000 --- a/tools/testing/selftests/drivers/net/dsa/local_termination.sh +++ b/tools/testing/selftests/drivers/net/dsa/local_termination.sh @@ -1 +1 @@ -../../../net/forwarding/local_termination.sh \ No newline at end of file +run_net_forwarding_test.sh \ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh b/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh index b9757466bc97..d16a65e7595d 120000 --- a/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh +++ b/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh @@ -1 +1 @@ -../../../net/forwarding/no_forwarding.sh \ No newline at end of file +run_net_forwarding_test.sh \ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/run_net_forwarding_test.sh b/tools/testing/selftests/drivers/net/dsa/run_net_forwarding_test.sh new file mode 100755 index 000000000000..4106c0a102ea --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/run_net_forwarding_test.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +libdir=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")") +testname=$(basename "${BASH_SOURCE[0]}") + +source "$libdir"/forwarding.config +cd "$libdir"/../../../net/forwarding/ || exit 1 +source "./$testname" "$@" diff --git a/tools/testing/selftests/drivers/net/dsa/tc_actions.sh b/tools/testing/selftests/drivers/net/dsa/tc_actions.sh index 306213d9430e..d16a65e7595d 120000 --- a/tools/testing/selftests/drivers/net/dsa/tc_actions.sh +++ b/tools/testing/selftests/drivers/net/dsa/tc_actions.sh @@ -1 +1 @@ -../../../net/forwarding/tc_actions.sh \ No newline at end of file +run_net_forwarding_test.sh \ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/tc_common.sh b/tools/testing/selftests/drivers/net/dsa/tc_common.sh deleted file mode 120000 index bc3465bdc36b..000000000000 --- a/tools/testing/selftests/drivers/net/dsa/tc_common.sh +++ /dev/null @@ -1 +0,0 @@ -../../../net/forwarding/tc_common.sh \ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh b/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh index 92acab83fbe2..74682151d04d 100755 --- a/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh +++ b/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh @@ -19,7 +19,7 @@ REQUIRE_JQ="no" REQUIRE_MZ="no" NETIF_CREATE="no" lib_dir=$(dirname "$0") -source "$lib_dir"/lib.sh +source "$lib_dir"/../../../net/forwarding/lib.sh cleanup() { echo "Cleaning up" -- cgit v1.2.3 From 9f2af915916b49f2f3a0285cae805a33c2a1dde3 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Fri, 26 Jan 2024 18:21:22 -0500 Subject: selftests: forwarding: Redefine relative_path variable The following code which is part of lib.sh: relative_path="${BASH_SOURCE%/*}" if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then relative_path="." fi reimplements functionality that is part of `dirname`: $ dirname "" . To avoid this duplication, replace "relative_path" by "net_forwarding_dir", a new variable defined using dirname. Furthermore, to avoid the potential confusion about what "relative_path" is about (cwd, test script directory or test library directory), define "net_forwarding_dir" as the absolute path to net/forwarding/. Tested-by: Petr Machata Reviewed-by: Hangbin Liu Signed-off-by: Benjamin Poirier Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/lib.sh | 9 +++------ tools/testing/selftests/net/forwarding/mirror_gre_lib.sh | 2 +- tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh | 2 +- 3 files changed, 5 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 8a61464ab6eb..cf0ba4bfe50d 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -29,13 +29,10 @@ STABLE_MAC_ADDRS=${STABLE_MAC_ADDRS:=no} TCPDUMP_EXTRA_FLAGS=${TCPDUMP_EXTRA_FLAGS:=} TROUTE6=${TROUTE6:=traceroute6} -relative_path="${BASH_SOURCE%/*}" -if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then - relative_path="." -fi +net_forwarding_dir=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") -if [[ -f $relative_path/forwarding.config ]]; then - source "$relative_path/forwarding.config" +if [[ -f $net_forwarding_dir/forwarding.config ]]; then + source "$net_forwarding_dir/forwarding.config" fi # Kselftest framework requirement - SKIP code is 4. diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh index fac486178ef7..0c36546e131e 100644 --- a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -source "$relative_path/mirror_lib.sh" +source "$net_forwarding_dir/mirror_lib.sh" quick_test_span_gre_dir_ips() { diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh index 39c03e2867f4..6e615fffa4ef 100644 --- a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh @@ -33,7 +33,7 @@ # | | # +-------------------------------------------------------------------------+ -source "$relative_path/mirror_topo_lib.sh" +source "$net_forwarding_dir/mirror_topo_lib.sh" mirror_gre_topo_h3_create() { -- cgit v1.2.3 From 521ed1ce94bb35842357fb71918d8ebed552f941 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 26 Jan 2024 18:21:23 -0500 Subject: selftests: forwarding: Remove duplicated lib.sh content commit 25ae948b4478 ("selftests/net: add lib.sh") added net/lib.sh to contain code shared by tests under net/ and net/forwarding/. However, this caused issues with selftests from directories other than net/forwarding/, in particular those under drivers/net/. Those issues were avoided in a simple way by duplicating some content in commit 2114e83381d3 ("selftests: forwarding: Avoid failures to source net/lib.sh"). In order to remove the duplicated content, restore the inclusion of net/lib.sh from net/forwarding/lib.sh but with the following changes: * net/lib.sh is imported through the net_forwarding_dir path The original expression "source ../lib.sh" would look for lib.sh in the directory above the script file's one, which did not work for tests under drivers/net/. * net/lib.sh is added to TEST_INCLUDES Since net/forwarding/lib.sh now sources net/lib.sh, both of those files must be exported along with tests which source net/forwarding/lib.sh. Suggested-by: Hangbin Liu Reviewed-by: Hangbin Liu Signed-off-by: Petr Machata Signed-off-by: Benjamin Poirier Signed-off-by: David S. Miller --- .../testing/selftests/drivers/net/bonding/Makefile | 3 ++- tools/testing/selftests/drivers/net/dsa/Makefile | 3 ++- tools/testing/selftests/drivers/net/team/Makefile | 3 ++- tools/testing/selftests/net/forwarding/Makefile | 3 +++ tools/testing/selftests/net/forwarding/lib.sh | 26 +--------------------- 5 files changed, 10 insertions(+), 28 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile index 1e10a1f06faf..03a089165d3f 100644 --- a/tools/testing/selftests/drivers/net/bonding/Makefile +++ b/tools/testing/selftests/drivers/net/bonding/Makefile @@ -18,6 +18,7 @@ TEST_FILES := \ bond_topo_3d1c.sh TEST_INCLUDES := \ - ../../../net/forwarding/lib.sh + ../../../net/forwarding/lib.sh \ + ../../../net/lib.sh include ../../../lib.mk diff --git a/tools/testing/selftests/drivers/net/dsa/Makefile b/tools/testing/selftests/drivers/net/dsa/Makefile index 83da1d721017..cd6817fe5be6 100644 --- a/tools/testing/selftests/drivers/net/dsa/Makefile +++ b/tools/testing/selftests/drivers/net/dsa/Makefile @@ -26,6 +26,7 @@ TEST_INCLUDES := \ ../../../net/forwarding/local_termination.sh \ ../../../net/forwarding/no_forwarding.sh \ ../../../net/forwarding/tc_actions.sh \ - ../../../net/forwarding/tc_common.sh + ../../../net/forwarding/tc_common.sh \ + ../../../net/lib.sh include ../../../lib.mk diff --git a/tools/testing/selftests/drivers/net/team/Makefile b/tools/testing/selftests/drivers/net/team/Makefile index 708bdfce37f5..2d5a76d99181 100644 --- a/tools/testing/selftests/drivers/net/team/Makefile +++ b/tools/testing/selftests/drivers/net/team/Makefile @@ -5,6 +5,7 @@ TEST_PROGS := dev_addr_lists.sh TEST_INCLUDES := \ ../bonding/lag_lib.sh \ - ../../../net/forwarding/lib.sh + ../../../net/forwarding/lib.sh \ + ../../../net/lib.sh include ../../../lib.mk diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 452693514be4..1fba2717738d 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -129,4 +129,7 @@ TEST_PROGS_EXTENDED := devlink_lib.sh \ sch_tbf_etsprio.sh \ tc_common.sh +TEST_INCLUDES := \ + ../lib.sh + include ../../lib.mk diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index cf0ba4bfe50d..a7ecfc8cae98 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -35,31 +35,7 @@ if [[ -f $net_forwarding_dir/forwarding.config ]]; then source "$net_forwarding_dir/forwarding.config" fi -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - -busywait() -{ - local timeout=$1; shift - - local start_time="$(date -u +%s%3N)" - while true - do - local out - out=$("$@") - local ret=$? - if ((!ret)); then - echo -n "$out" - return 0 - fi - - local current_time="$(date -u +%s%3N)" - if ((current_time - start_time > timeout)); then - echo -n "$out" - return 1 - fi - done -} +source "$net_forwarding_dir/../lib.sh" ############################################################################## # Sanity checks -- cgit v1.2.3 From 57bf3dd2fe91aa144d2c8f53b02dc4e65c9b1135 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 26 Jan 2024 21:33:03 -0500 Subject: selftests/net: calibrate fq_band_pktlimit This test validates per-band packet limits in FQ. Packets are dropped rather than enqueued if the limit for their band is reached. This test is timing sensitive. It queues packets in FQ with a future delivery time to fill the qdisc. The test failed in a virtual environment (vng). Increase the delays to make it more tolerant to environments with timing variance. Signed-off-by: Willem de Bruijn Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- tools/testing/selftests/net/fq_band_pktlimit.sh | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/fq_band_pktlimit.sh b/tools/testing/selftests/net/fq_band_pktlimit.sh index 24b77bdf41ff..977070ed42b3 100755 --- a/tools/testing/selftests/net/fq_band_pktlimit.sh +++ b/tools/testing/selftests/net/fq_band_pktlimit.sh @@ -8,7 +8,7 @@ # 3. send 20 pkts on band A: verify that 0 are queued, 20 dropped # 4. send 20 pkts on band B: verify that 10 are queued, 10 dropped # -# Send packets with a 100ms delay to ensure that previously sent +# Send packets with a delay to ensure that previously sent # packets are still queued when later ones are sent. # Use SO_TXTIME for this. @@ -29,19 +29,21 @@ ip -6 addr add fdaa::1/128 dev dummy0 ip -6 route add fdaa::/64 dev dummy0 tc qdisc replace dev dummy0 root handle 1: fq quantum 1514 initial_quantum 1514 limit 10 -./cmsg_sender -6 -p u -d 100000 -n 20 fdaa::2 8000 +DELAY=400000 + +./cmsg_sender -6 -p u -d "${DELAY}" -n 20 fdaa::2 8000 OUT1="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" -./cmsg_sender -6 -p u -d 100000 -n 20 fdaa::2 8000 +./cmsg_sender -6 -p u -d "${DELAY}" -n 20 fdaa::2 8000 OUT2="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" -./cmsg_sender -6 -p u -d 100000 -n 20 -P 7 fdaa::2 8000 +./cmsg_sender -6 -p u -d "${DELAY}" -n 20 -P 7 fdaa::2 8000 OUT3="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" # Initial stats will report zero sent, as all packets are still -# queued in FQ. Sleep for the delay period (100ms) and see that +# queued in FQ. Sleep for at least the delay period and see that # twenty are now sent. -sleep 0.1 +sleep 0.6 OUT4="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" # Log the output after the test -- cgit v1.2.3 From 2ef61296d2844c6a4211e07ab70ef2fb412b2c30 Mon Sep 17 00:00:00 2001 From: Manu Bretelle Date: Tue, 30 Jan 2024 21:32:12 -0800 Subject: selftests/bpf: Disable IPv6 for lwt_redirect test After a recent change in the vmtest runner, this test started failing sporadically. Investigation showed that this test was subject to race condition which got exacerbated after the vm runner change. The symptoms being that the logic that waited for an ICMPv4 packet is naive and will break if 5 or more non-ICMPv4 packets make it to tap0. When ICMPv6 is enabled, the kernel will generate traffic such as ICMPv6 router solicitation... On a system with good performance, the expected ICMPv4 packet would very likely make it to the network interface promptly, but on a system with poor performance, those "guarantees" do not hold true anymore. Given that the test is IPv4 only, this change disable IPv6 in the test netns by setting `net.ipv6.conf.all.disable_ipv6` to 1. This essentially leaves "ping" as the sole generator of traffic in the network namespace. If this test was to be made IPv6 compatible, the logic in `wait_for_packet` would need to be modified. In more details... At a high level, the test does: - create a new namespace - in `setup_redirect_target` set up lo, tap0, and link_err interfaces as well as add 2 routes that attaches ingress/egress sections of `test_lwt_redirect.bpf.o` to the xmit path. - in `send_and_capture_test_packets` send an ICMP packet and read off the tap interface (using `wait_for_packet`) to check that a ICMP packet with the right size is read. `wait_for_packet` will try to read `max_retry` (5) times from the tap0 fd looking for an ICMPv4 packet matching some criteria. The problem is that when we set up the `tap0` interface, because IPv6 is enabled by default, traffic such as Router solicitation is sent through tap0, as in: # tcpdump -r /tmp/lwt_redirect.pc reading from file /tmp/lwt_redirect.pcap, link-type EN10MB (Ethernet) 04:46:23.578352 IP6 :: > ff02::1:ffc0:4427: ICMP6, neighbor solicitation, who has fe80::fcba:dff:fec0:4427, length 32 04:46:23.659522 IP6 :: > ff02::16: HBH ICMP6, multicast listener report v2, 1 group record(s), length 28 04:46:24.389169 IP 10.0.0.1 > 20.0.0.9: ICMP echo request, id 122, seq 1, length 108 04:46:24.618599 IP6 fe80::fcba:dff:fec0:4427 > ff02::16: HBH ICMP6, multicast listener report v2, 1 group record(s), length 28 04:46:24.619985 IP6 fe80::fcba:dff:fec0:4427 > ff02::2: ICMP6, router solicitation, length 16 04:46:24.767326 IP6 fe80::fcba:dff:fec0:4427 > ff02::16: HBH ICMP6, multicast listener report v2, 1 group record(s), length 28 04:46:28.936402 IP6 fe80::fcba:dff:fec0:4427 > ff02::2: ICMP6, router solicitation, length 16 If `wait_for_packet` sees 5 non-ICMPv4 packets, it will return 0, which is what we see in: 2024-01-31T03:51:25.0336992Z test_lwt_redirect_run:PASS:netns_create 0 nsec 2024-01-31T03:51:25.0341309Z open_netns:PASS:malloc token 0 nsec 2024-01-31T03:51:25.0344844Z open_netns:PASS:open /proc/self/ns/net 0 nsec 2024-01-31T03:51:25.0350071Z open_netns:PASS:open netns fd 0 nsec 2024-01-31T03:51:25.0353516Z open_netns:PASS:setns 0 nsec 2024-01-31T03:51:25.0356560Z test_lwt_redirect_run:PASS:setns 0 nsec 2024-01-31T03:51:25.0360140Z open_tuntap:PASS:open(/dev/net/tun) 0 nsec 2024-01-31T03:51:25.0363822Z open_tuntap:PASS:ioctl(TUNSETIFF) 0 nsec 2024-01-31T03:51:25.0367402Z open_tuntap:PASS:fcntl(O_NONBLOCK) 0 nsec 2024-01-31T03:51:25.0371167Z setup_redirect_target:PASS:open_tuntap 0 nsec 2024-01-31T03:51:25.0375180Z setup_redirect_target:PASS:if_nametoindex 0 nsec 2024-01-31T03:51:25.0379929Z setup_redirect_target:PASS:ip link add link_err type dummy 0 nsec 2024-01-31T03:51:25.0384874Z setup_redirect_target:PASS:ip link set lo up 0 nsec 2024-01-31T03:51:25.0389678Z setup_redirect_target:PASS:ip addr add dev lo 10.0.0.1/32 0 nsec 2024-01-31T03:51:25.0394814Z setup_redirect_target:PASS:ip link set link_err up 0 nsec 2024-01-31T03:51:25.0399874Z setup_redirect_target:PASS:ip link set tap0 up 0 nsec 2024-01-31T03:51:25.0407731Z setup_redirect_target:PASS:ip route add 10.0.0.0/24 dev link_err encap bpf xmit obj test_lwt_redirect.bpf.o sec redir_ingress 0 nsec 2024-01-31T03:51:25.0419105Z setup_redirect_target:PASS:ip route add 20.0.0.0/24 dev link_err encap bpf xmit obj test_lwt_redirect.bpf.o sec redir_egress 0 nsec 2024-01-31T03:51:25.0427209Z test_lwt_redirect_normal:PASS:setup_redirect_target 0 nsec 2024-01-31T03:51:25.0431424Z ping_dev:PASS:if_nametoindex 0 nsec 2024-01-31T03:51:25.0437222Z send_and_capture_test_packets:FAIL:wait_for_epacket unexpected wait_for_epacket: actual 0 != expected 1 2024-01-31T03:51:25.0448298Z (/tmp/work/bpf/bpf/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c:175: errno: Success) test_lwt_redirect_normal egress test fails 2024-01-31T03:51:25.0457124Z close_netns:PASS:setns 0 nsec When running in a VM which potential resource contrains, the odds that calling `ping` is not scheduled very soon after bringing `tap0` up increases, and with this the chances to get our ICMP packet pushed to position 6+ in the network trace. To confirm this indeed solves the issue, I ran the test 100 times in a row with: errors=0 successes=0 for i in `seq 1 100` do ./test_progs -t lwt_redirect/lwt_redirect_normal if [ $? -eq 0 ]; then successes=$((successes+1)) else errors=$((errors+1)) fi done echo "successes: $successes/errors: $errors" While this test would at least fail a couple of time every 10 runs, here it ran 100 times with no error. Fixes: 43a7c3ef8a15 ("selftests/bpf: Add lwt_xmit tests for BPF_REDIRECT") Signed-off-by: Manu Bretelle Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240131053212.2247527-1-chantr4@gmail.com --- tools/testing/selftests/bpf/prog_tests/lwt_redirect.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c b/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c index beeb3ac1c361..b5b9e74b1044 100644 --- a/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c @@ -203,6 +203,7 @@ static int setup_redirect_target(const char *target_dev, bool need_mac) if (!ASSERT_GE(target_index, 0, "if_nametoindex")) goto fail; + SYS(fail, "sysctl -w net.ipv6.conf.all.disable_ipv6=1"); SYS(fail, "ip link add link_err type dummy"); SYS(fail, "ip link set lo up"); SYS(fail, "ip addr add dev lo " LOCAL_SRC "/32"); -- cgit v1.2.3 From 5264ab612e28058536de8069bcf83eb20fd65c29 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 26 Jan 2024 21:31:51 -0500 Subject: selftests/net: calibrate txtimestamp The test sends packets and compares enqueue, transmit and Ack timestamps with expected values. It installs netem delays to increase latency between these points. The test proves flaky in virtual environment (vng). Increase the delays to reduce variance. Scale measurement tolerance accordingly. Time sensitive tests are difficult to calibrate. Increasing delays 10x also increases runtime 10x, for one. And it may still prove flaky at some rate. Signed-off-by: Willem de Bruijn Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240127023212.3746239-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/txtimestamp.sh | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/txtimestamp.sh b/tools/testing/selftests/net/txtimestamp.sh index 31637769f59f..25baca4b148e 100755 --- a/tools/testing/selftests/net/txtimestamp.sh +++ b/tools/testing/selftests/net/txtimestamp.sh @@ -8,13 +8,13 @@ set -e setup() { # set 1ms delay on lo egress - tc qdisc add dev lo root netem delay 1ms + tc qdisc add dev lo root netem delay 10ms # set 2ms delay on ifb0 egress modprobe ifb ip link add ifb_netem0 type ifb ip link set dev ifb_netem0 up - tc qdisc add dev ifb_netem0 root netem delay 2ms + tc qdisc add dev ifb_netem0 root netem delay 20ms # redirect lo ingress through ifb0 egress tc qdisc add dev lo handle ffff: ingress @@ -24,9 +24,11 @@ setup() { } run_test_v4v6() { - # SND will be delayed 1000us - # ACK will be delayed 6000us: 1 + 2 ms round-trip - local -r args="$@ -v 1000 -V 6000" + # SND will be delayed 10ms + # ACK will be delayed 60ms: 10 + 20 ms round-trip + # allow +/- tolerance of 8ms + # wait for ACK to be queued + local -r args="$@ -v 10000 -V 60000 -t 8000 -S 80000" ./txtimestamp ${args} -4 -L 127.0.0.1 ./txtimestamp ${args} -6 -L ::1 -- cgit v1.2.3 From 6f3189f38a3e995232e028a4c341164c4aca1b20 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Sun, 28 Jan 2024 18:24:08 -0700 Subject: bpf: treewide: Annotate BPF kfuncs in BTF This commit marks kfuncs as such inside the .BTF_ids section. The upshot of these annotations is that we'll be able to automatically generate kfunc prototypes for downstream users. The process is as follows: 1. In source, use BTF_KFUNCS_START/END macro pair to mark kfuncs 2. During build, pahole injects into BTF a "bpf_kfunc" BTF_DECL_TAG for each function inside BTF_KFUNCS sets 3. At runtime, vmlinux or module BTF is made available in sysfs 4. At runtime, bpftool (or similar) can look at provided BTF and generate appropriate prototypes for functions with "bpf_kfunc" tag To ensure future kfunc are similarly tagged, we now also return error inside kfunc registration for untagged kfuncs. For vmlinux kfuncs, we also WARN(), as initcall machinery does not handle errors. Signed-off-by: Daniel Xu Acked-by: Benjamin Tissoires Link: https://lore.kernel.org/r/e55150ceecbf0a5d961e608941165c0bee7bc943.1706491398.git.dxu@dxuuu.xyz Signed-off-by: Alexei Starovoitov --- Documentation/bpf/kfuncs.rst | 8 ++++---- drivers/hid/bpf/hid_bpf_dispatch.c | 8 ++++---- fs/verity/measure.c | 4 ++-- kernel/bpf/btf.c | 8 ++++++++ kernel/bpf/cpumask.c | 4 ++-- kernel/bpf/helpers.c | 8 ++++---- kernel/bpf/map_iter.c | 4 ++-- kernel/cgroup/rstat.c | 4 ++-- kernel/trace/bpf_trace.c | 8 ++++---- net/bpf/test_run.c | 8 ++++---- net/core/filter.c | 20 ++++++++++---------- net/core/xdp.c | 4 ++-- net/ipv4/bpf_tcp_ca.c | 4 ++-- net/ipv4/fou_bpf.c | 4 ++-- net/ipv4/tcp_bbr.c | 4 ++-- net/ipv4/tcp_cubic.c | 4 ++-- net/ipv4/tcp_dctcp.c | 4 ++-- net/netfilter/nf_conntrack_bpf.c | 4 ++-- net/netfilter/nf_nat_bpf.c | 4 ++-- net/xfrm/xfrm_interface_bpf.c | 4 ++-- net/xfrm/xfrm_state_bpf.c | 4 ++-- .../testing/selftests/bpf/bpf_testmod/bpf_testmod.c | 8 ++++---- 22 files changed, 70 insertions(+), 62 deletions(-) (limited to 'tools') diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst index 7985c6615f3c..a8f5782bd833 100644 --- a/Documentation/bpf/kfuncs.rst +++ b/Documentation/bpf/kfuncs.rst @@ -177,10 +177,10 @@ In addition to kfuncs' arguments, verifier may need more information about the type of kfunc(s) being registered with the BPF subsystem. To do so, we define flags on a set of kfuncs as follows:: - BTF_SET8_START(bpf_task_set) + BTF_KFUNCS_START(bpf_task_set) BTF_ID_FLAGS(func, bpf_get_task_pid, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_put_pid, KF_RELEASE) - BTF_SET8_END(bpf_task_set) + BTF_KFUNCS_END(bpf_task_set) This set encodes the BTF ID of each kfunc listed above, and encodes the flags along with it. Ofcourse, it is also allowed to specify no flags. @@ -347,10 +347,10 @@ Once the kfunc is prepared for use, the final step to making it visible is registering it with the BPF subsystem. Registration is done per BPF program type. An example is shown below:: - BTF_SET8_START(bpf_task_set) + BTF_KFUNCS_START(bpf_task_set) BTF_ID_FLAGS(func, bpf_get_task_pid, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_put_pid, KF_RELEASE) - BTF_SET8_END(bpf_task_set) + BTF_KFUNCS_END(bpf_task_set) static const struct btf_kfunc_id_set bpf_task_kfunc_set = { .owner = THIS_MODULE, diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c index d9ef45fcaeab..02c441aaa217 100644 --- a/drivers/hid/bpf/hid_bpf_dispatch.c +++ b/drivers/hid/bpf/hid_bpf_dispatch.c @@ -172,9 +172,9 @@ hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, const size_t rdwr * The following set contains all functions we agree BPF programs * can use. */ -BTF_SET8_START(hid_bpf_kfunc_ids) +BTF_KFUNCS_START(hid_bpf_kfunc_ids) BTF_ID_FLAGS(func, hid_bpf_get_data, KF_RET_NULL) -BTF_SET8_END(hid_bpf_kfunc_ids) +BTF_KFUNCS_END(hid_bpf_kfunc_ids) static const struct btf_kfunc_id_set hid_bpf_kfunc_set = { .owner = THIS_MODULE, @@ -440,12 +440,12 @@ static const struct btf_kfunc_id_set hid_bpf_fmodret_set = { }; /* for syscall HID-BPF */ -BTF_SET8_START(hid_bpf_syscall_kfunc_ids) +BTF_KFUNCS_START(hid_bpf_syscall_kfunc_ids) BTF_ID_FLAGS(func, hid_bpf_attach_prog) BTF_ID_FLAGS(func, hid_bpf_allocate_context, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, hid_bpf_release_context, KF_RELEASE) BTF_ID_FLAGS(func, hid_bpf_hw_request) -BTF_SET8_END(hid_bpf_syscall_kfunc_ids) +BTF_KFUNCS_END(hid_bpf_syscall_kfunc_ids) static const struct btf_kfunc_id_set hid_bpf_syscall_kfunc_set = { .owner = THIS_MODULE, diff --git a/fs/verity/measure.c b/fs/verity/measure.c index bf7a5f4cccaf..3969d54158d1 100644 --- a/fs/verity/measure.c +++ b/fs/verity/measure.c @@ -159,9 +159,9 @@ __bpf_kfunc int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr_ker __bpf_kfunc_end_defs(); -BTF_SET8_START(fsverity_set_ids) +BTF_KFUNCS_START(fsverity_set_ids) BTF_ID_FLAGS(func, bpf_get_fsverity_digest, KF_TRUSTED_ARGS) -BTF_SET8_END(fsverity_set_ids) +BTF_KFUNCS_END(fsverity_set_ids) static int bpf_get_fsverity_digest_filter(const struct bpf_prog *prog, u32 kfunc_id) { diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index c8c6e6cf18e7..ef380e546952 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -8124,6 +8124,14 @@ int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, { enum btf_kfunc_hook hook; + /* All kfuncs need to be tagged as such in BTF. + * WARN() for initcall registrations that do not check errors. + */ + if (!(kset->set->flags & BTF_SET8_KFUNCS)) { + WARN_ON(!kset->owner); + return -EINVAL; + } + hook = bpf_prog_type_to_kfunc_hook(prog_type); return __register_btf_kfunc_id_set(hook, kset); } diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c index 2e73533a3811..dad0fb1c8e87 100644 --- a/kernel/bpf/cpumask.c +++ b/kernel/bpf/cpumask.c @@ -424,7 +424,7 @@ __bpf_kfunc u32 bpf_cpumask_weight(const struct cpumask *cpumask) __bpf_kfunc_end_defs(); -BTF_SET8_START(cpumask_kfunc_btf_ids) +BTF_KFUNCS_START(cpumask_kfunc_btf_ids) BTF_ID_FLAGS(func, bpf_cpumask_create, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cpumask_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_cpumask_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS) @@ -450,7 +450,7 @@ BTF_ID_FLAGS(func, bpf_cpumask_copy, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_any_distribute, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_any_and_distribute, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_weight, KF_RCU) -BTF_SET8_END(cpumask_kfunc_btf_ids) +BTF_KFUNCS_END(cpumask_kfunc_btf_ids) static const struct btf_kfunc_id_set cpumask_kfunc_set = { .owner = THIS_MODULE, diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index bcb951a2ecf4..4db1c658254c 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2544,7 +2544,7 @@ __bpf_kfunc void bpf_throw(u64 cookie) __bpf_kfunc_end_defs(); -BTF_SET8_START(generic_btf_ids) +BTF_KFUNCS_START(generic_btf_ids) #ifdef CONFIG_KEXEC_CORE BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE) #endif @@ -2573,7 +2573,7 @@ BTF_ID_FLAGS(func, bpf_task_get_cgroup1, KF_ACQUIRE | KF_RCU | KF_RET_NULL) #endif BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_throw) -BTF_SET8_END(generic_btf_ids) +BTF_KFUNCS_END(generic_btf_ids) static const struct btf_kfunc_id_set generic_kfunc_set = { .owner = THIS_MODULE, @@ -2589,7 +2589,7 @@ BTF_ID(struct, cgroup) BTF_ID(func, bpf_cgroup_release_dtor) #endif -BTF_SET8_START(common_btf_ids) +BTF_KFUNCS_START(common_btf_ids) BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx) BTF_ID_FLAGS(func, bpf_rdonly_cast) BTF_ID_FLAGS(func, bpf_rcu_read_lock) @@ -2618,7 +2618,7 @@ BTF_ID_FLAGS(func, bpf_dynptr_is_null) BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly) BTF_ID_FLAGS(func, bpf_dynptr_size) BTF_ID_FLAGS(func, bpf_dynptr_clone) -BTF_SET8_END(common_btf_ids) +BTF_KFUNCS_END(common_btf_ids) static const struct btf_kfunc_id_set common_kfunc_set = { .owner = THIS_MODULE, diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c index 6abd7c5df4b3..9575314f40a6 100644 --- a/kernel/bpf/map_iter.c +++ b/kernel/bpf/map_iter.c @@ -213,9 +213,9 @@ __bpf_kfunc s64 bpf_map_sum_elem_count(const struct bpf_map *map) __bpf_kfunc_end_defs(); -BTF_SET8_START(bpf_map_iter_kfunc_ids) +BTF_KFUNCS_START(bpf_map_iter_kfunc_ids) BTF_ID_FLAGS(func, bpf_map_sum_elem_count, KF_TRUSTED_ARGS) -BTF_SET8_END(bpf_map_iter_kfunc_ids) +BTF_KFUNCS_END(bpf_map_iter_kfunc_ids) static const struct btf_kfunc_id_set bpf_map_iter_kfunc_set = { .owner = THIS_MODULE, diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index a8350d2d63e6..07e2284bb499 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -562,10 +562,10 @@ void cgroup_base_stat_cputime_show(struct seq_file *seq) } /* Add bpf kfuncs for cgroup_rstat_updated() and cgroup_rstat_flush() */ -BTF_SET8_START(bpf_rstat_kfunc_ids) +BTF_KFUNCS_START(bpf_rstat_kfunc_ids) BTF_ID_FLAGS(func, cgroup_rstat_updated) BTF_ID_FLAGS(func, cgroup_rstat_flush, KF_SLEEPABLE) -BTF_SET8_END(bpf_rstat_kfunc_ids) +BTF_KFUNCS_END(bpf_rstat_kfunc_ids) static const struct btf_kfunc_id_set bpf_rstat_kfunc_set = { .owner = THIS_MODULE, diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 64fdaf79d113..241ddf5e3895 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1412,14 +1412,14 @@ __bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr, __bpf_kfunc_end_defs(); -BTF_SET8_START(key_sig_kfunc_set) +BTF_KFUNCS_START(key_sig_kfunc_set) BTF_ID_FLAGS(func, bpf_lookup_user_key, KF_ACQUIRE | KF_RET_NULL | KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_lookup_system_key, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_key_put, KF_RELEASE) #ifdef CONFIG_SYSTEM_DATA_VERIFICATION BTF_ID_FLAGS(func, bpf_verify_pkcs7_signature, KF_SLEEPABLE) #endif -BTF_SET8_END(key_sig_kfunc_set) +BTF_KFUNCS_END(key_sig_kfunc_set) static const struct btf_kfunc_id_set bpf_key_sig_kfunc_set = { .owner = THIS_MODULE, @@ -1475,9 +1475,9 @@ __bpf_kfunc int bpf_get_file_xattr(struct file *file, const char *name__str, __bpf_kfunc_end_defs(); -BTF_SET8_START(fs_kfunc_set_ids) +BTF_KFUNCS_START(fs_kfunc_set_ids) BTF_ID_FLAGS(func, bpf_get_file_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS) -BTF_SET8_END(fs_kfunc_set_ids) +BTF_KFUNCS_END(fs_kfunc_set_ids) static int bpf_get_file_xattr_filter(const struct bpf_prog *prog, u32 kfunc_id) { diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index dfd919374017..5535f9adc658 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -617,21 +617,21 @@ CFI_NOSEAL(bpf_kfunc_call_memb_release_dtor); __bpf_kfunc_end_defs(); -BTF_SET8_START(bpf_test_modify_return_ids) +BTF_KFUNCS_START(bpf_test_modify_return_ids) BTF_ID_FLAGS(func, bpf_modify_return_test) BTF_ID_FLAGS(func, bpf_modify_return_test2) BTF_ID_FLAGS(func, bpf_fentry_test1, KF_SLEEPABLE) -BTF_SET8_END(bpf_test_modify_return_ids) +BTF_KFUNCS_END(bpf_test_modify_return_ids) static const struct btf_kfunc_id_set bpf_test_modify_return_set = { .owner = THIS_MODULE, .set = &bpf_test_modify_return_ids, }; -BTF_SET8_START(test_sk_check_kfunc_ids) +BTF_KFUNCS_START(test_sk_check_kfunc_ids) BTF_ID_FLAGS(func, bpf_kfunc_call_test_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_kfunc_call_memb_release, KF_RELEASE) -BTF_SET8_END(test_sk_check_kfunc_ids) +BTF_KFUNCS_END(test_sk_check_kfunc_ids) static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size, u32 size, u32 headroom, u32 tailroom) diff --git a/net/core/filter.c b/net/core/filter.c index 358870408a51..524adf1fa6d0 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -11982,21 +11982,21 @@ int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, return 0; } -BTF_SET8_START(bpf_kfunc_check_set_skb) +BTF_KFUNCS_START(bpf_kfunc_check_set_skb) BTF_ID_FLAGS(func, bpf_dynptr_from_skb) -BTF_SET8_END(bpf_kfunc_check_set_skb) +BTF_KFUNCS_END(bpf_kfunc_check_set_skb) -BTF_SET8_START(bpf_kfunc_check_set_xdp) +BTF_KFUNCS_START(bpf_kfunc_check_set_xdp) BTF_ID_FLAGS(func, bpf_dynptr_from_xdp) -BTF_SET8_END(bpf_kfunc_check_set_xdp) +BTF_KFUNCS_END(bpf_kfunc_check_set_xdp) -BTF_SET8_START(bpf_kfunc_check_set_sock_addr) +BTF_KFUNCS_START(bpf_kfunc_check_set_sock_addr) BTF_ID_FLAGS(func, bpf_sock_addr_set_sun_path) -BTF_SET8_END(bpf_kfunc_check_set_sock_addr) +BTF_KFUNCS_END(bpf_kfunc_check_set_sock_addr) -BTF_SET8_START(bpf_kfunc_check_set_tcp_reqsk) +BTF_KFUNCS_START(bpf_kfunc_check_set_tcp_reqsk) BTF_ID_FLAGS(func, bpf_sk_assign_tcp_reqsk, KF_TRUSTED_ARGS) -BTF_SET8_END(bpf_kfunc_check_set_tcp_reqsk) +BTF_KFUNCS_END(bpf_kfunc_check_set_tcp_reqsk) static const struct btf_kfunc_id_set bpf_kfunc_set_skb = { .owner = THIS_MODULE, @@ -12075,9 +12075,9 @@ __bpf_kfunc int bpf_sock_destroy(struct sock_common *sock) __bpf_kfunc_end_defs(); -BTF_SET8_START(bpf_sk_iter_kfunc_ids) +BTF_KFUNCS_START(bpf_sk_iter_kfunc_ids) BTF_ID_FLAGS(func, bpf_sock_destroy, KF_TRUSTED_ARGS) -BTF_SET8_END(bpf_sk_iter_kfunc_ids) +BTF_KFUNCS_END(bpf_sk_iter_kfunc_ids) static int tracing_iter_filter(const struct bpf_prog *prog, u32 kfunc_id) { diff --git a/net/core/xdp.c b/net/core/xdp.c index 4869c1c2d8f3..034fb80f3fbe 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -771,11 +771,11 @@ __bpf_kfunc int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx, __bpf_kfunc_end_defs(); -BTF_SET8_START(xdp_metadata_kfunc_ids) +BTF_KFUNCS_START(xdp_metadata_kfunc_ids) #define XDP_METADATA_KFUNC(_, __, name, ___) BTF_ID_FLAGS(func, name, KF_TRUSTED_ARGS) XDP_METADATA_KFUNC_xxx #undef XDP_METADATA_KFUNC -BTF_SET8_END(xdp_metadata_kfunc_ids) +BTF_KFUNCS_END(xdp_metadata_kfunc_ids) static const struct btf_kfunc_id_set xdp_metadata_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 834edc18463a..7f518ea5f4ac 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -201,13 +201,13 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id, } } -BTF_SET8_START(bpf_tcp_ca_check_kfunc_ids) +BTF_KFUNCS_START(bpf_tcp_ca_check_kfunc_ids) BTF_ID_FLAGS(func, tcp_reno_ssthresh) BTF_ID_FLAGS(func, tcp_reno_cong_avoid) BTF_ID_FLAGS(func, tcp_reno_undo_cwnd) BTF_ID_FLAGS(func, tcp_slow_start) BTF_ID_FLAGS(func, tcp_cong_avoid_ai) -BTF_SET8_END(bpf_tcp_ca_check_kfunc_ids) +BTF_KFUNCS_END(bpf_tcp_ca_check_kfunc_ids) static const struct btf_kfunc_id_set bpf_tcp_ca_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/ipv4/fou_bpf.c b/net/ipv4/fou_bpf.c index 4da03bf45c9b..06e5572f296f 100644 --- a/net/ipv4/fou_bpf.c +++ b/net/ipv4/fou_bpf.c @@ -100,10 +100,10 @@ __bpf_kfunc int bpf_skb_get_fou_encap(struct __sk_buff *skb_ctx, __bpf_kfunc_end_defs(); -BTF_SET8_START(fou_kfunc_set) +BTF_KFUNCS_START(fou_kfunc_set) BTF_ID_FLAGS(func, bpf_skb_set_fou_encap) BTF_ID_FLAGS(func, bpf_skb_get_fou_encap) -BTF_SET8_END(fou_kfunc_set) +BTF_KFUNCS_END(fou_kfunc_set) static const struct btf_kfunc_id_set fou_bpf_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 22358032dd48..05dc2d05bc7c 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -1155,7 +1155,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { .set_state = bbr_set_state, }; -BTF_SET8_START(tcp_bbr_check_kfunc_ids) +BTF_KFUNCS_START(tcp_bbr_check_kfunc_ids) #ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE BTF_ID_FLAGS(func, bbr_init) @@ -1168,7 +1168,7 @@ BTF_ID_FLAGS(func, bbr_min_tso_segs) BTF_ID_FLAGS(func, bbr_set_state) #endif #endif -BTF_SET8_END(tcp_bbr_check_kfunc_ids) +BTF_KFUNCS_END(tcp_bbr_check_kfunc_ids) static const struct btf_kfunc_id_set tcp_bbr_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 0fd78ecb67e7..44869ea089e3 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -485,7 +485,7 @@ static struct tcp_congestion_ops cubictcp __read_mostly = { .name = "cubic", }; -BTF_SET8_START(tcp_cubic_check_kfunc_ids) +BTF_KFUNCS_START(tcp_cubic_check_kfunc_ids) #ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE BTF_ID_FLAGS(func, cubictcp_init) @@ -496,7 +496,7 @@ BTF_ID_FLAGS(func, cubictcp_cwnd_event) BTF_ID_FLAGS(func, cubictcp_acked) #endif #endif -BTF_SET8_END(tcp_cubic_check_kfunc_ids) +BTF_KFUNCS_END(tcp_cubic_check_kfunc_ids) static const struct btf_kfunc_id_set tcp_cubic_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index bb23bb5b387a..e33fbe4933e4 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -260,7 +260,7 @@ static struct tcp_congestion_ops dctcp_reno __read_mostly = { .name = "dctcp-reno", }; -BTF_SET8_START(tcp_dctcp_check_kfunc_ids) +BTF_KFUNCS_START(tcp_dctcp_check_kfunc_ids) #ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE BTF_ID_FLAGS(func, dctcp_init) @@ -271,7 +271,7 @@ BTF_ID_FLAGS(func, dctcp_cwnd_undo) BTF_ID_FLAGS(func, dctcp_state) #endif #endif -BTF_SET8_END(tcp_dctcp_check_kfunc_ids) +BTF_KFUNCS_END(tcp_dctcp_check_kfunc_ids) static const struct btf_kfunc_id_set tcp_dctcp_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index 475358ec8212..d2492d050fe6 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -467,7 +467,7 @@ __bpf_kfunc int bpf_ct_change_status(struct nf_conn *nfct, u32 status) __bpf_kfunc_end_defs(); -BTF_SET8_START(nf_ct_kfunc_set) +BTF_KFUNCS_START(nf_ct_kfunc_set) BTF_ID_FLAGS(func, bpf_xdp_ct_alloc, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_xdp_ct_lookup, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_skb_ct_alloc, KF_ACQUIRE | KF_RET_NULL) @@ -478,7 +478,7 @@ BTF_ID_FLAGS(func, bpf_ct_set_timeout, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_ct_change_timeout, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_ct_set_status, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_ct_change_status, KF_TRUSTED_ARGS) -BTF_SET8_END(nf_ct_kfunc_set) +BTF_KFUNCS_END(nf_ct_kfunc_set) static const struct btf_kfunc_id_set nf_conntrack_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/netfilter/nf_nat_bpf.c b/net/netfilter/nf_nat_bpf.c index 6e3b2f58855f..481be15609b1 100644 --- a/net/netfilter/nf_nat_bpf.c +++ b/net/netfilter/nf_nat_bpf.c @@ -54,9 +54,9 @@ __bpf_kfunc int bpf_ct_set_nat_info(struct nf_conn___init *nfct, __bpf_kfunc_end_defs(); -BTF_SET8_START(nf_nat_kfunc_set) +BTF_KFUNCS_START(nf_nat_kfunc_set) BTF_ID_FLAGS(func, bpf_ct_set_nat_info, KF_TRUSTED_ARGS) -BTF_SET8_END(nf_nat_kfunc_set) +BTF_KFUNCS_END(nf_nat_kfunc_set) static const struct btf_kfunc_id_set nf_bpf_nat_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/xfrm/xfrm_interface_bpf.c b/net/xfrm/xfrm_interface_bpf.c index 7d5e920141e9..5ea15037ebd1 100644 --- a/net/xfrm/xfrm_interface_bpf.c +++ b/net/xfrm/xfrm_interface_bpf.c @@ -93,10 +93,10 @@ __bpf_kfunc int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx, const struct bp __bpf_kfunc_end_defs(); -BTF_SET8_START(xfrm_ifc_kfunc_set) +BTF_KFUNCS_START(xfrm_ifc_kfunc_set) BTF_ID_FLAGS(func, bpf_skb_get_xfrm_info) BTF_ID_FLAGS(func, bpf_skb_set_xfrm_info) -BTF_SET8_END(xfrm_ifc_kfunc_set) +BTF_KFUNCS_END(xfrm_ifc_kfunc_set) static const struct btf_kfunc_id_set xfrm_interface_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/xfrm/xfrm_state_bpf.c b/net/xfrm/xfrm_state_bpf.c index 9e20d4a377f7..2248eda741f8 100644 --- a/net/xfrm/xfrm_state_bpf.c +++ b/net/xfrm/xfrm_state_bpf.c @@ -117,10 +117,10 @@ __bpf_kfunc void bpf_xdp_xfrm_state_release(struct xfrm_state *x) __bpf_kfunc_end_defs(); -BTF_SET8_START(xfrm_state_kfunc_set) +BTF_KFUNCS_START(xfrm_state_kfunc_set) BTF_ID_FLAGS(func, bpf_xdp_get_xfrm_state, KF_RET_NULL | KF_ACQUIRE) BTF_ID_FLAGS(func, bpf_xdp_xfrm_state_release, KF_RELEASE) -BTF_SET8_END(xfrm_state_kfunc_set) +BTF_KFUNCS_END(xfrm_state_kfunc_set) static const struct btf_kfunc_id_set xfrm_state_xdp_kfunc_set = { .owner = THIS_MODULE, diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 6f163a0f1c94..4754c662b39f 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -343,12 +343,12 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { .write = bpf_testmod_test_write, }; -BTF_SET8_START(bpf_testmod_common_kfunc_ids) +BTF_KFUNCS_START(bpf_testmod_common_kfunc_ids) BTF_ID_FLAGS(func, bpf_iter_testmod_seq_new, KF_ITER_NEW) BTF_ID_FLAGS(func, bpf_iter_testmod_seq_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_testmod_seq_destroy, KF_ITER_DESTROY) BTF_ID_FLAGS(func, bpf_kfunc_common_test) -BTF_SET8_END(bpf_testmod_common_kfunc_ids) +BTF_KFUNCS_END(bpf_testmod_common_kfunc_ids) static const struct btf_kfunc_id_set bpf_testmod_common_kfunc_set = { .owner = THIS_MODULE, @@ -494,7 +494,7 @@ __bpf_kfunc static u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused return arg; } -BTF_SET8_START(bpf_testmod_check_kfunc_ids) +BTF_KFUNCS_START(bpf_testmod_check_kfunc_ids) BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc) BTF_ID_FLAGS(func, bpf_kfunc_call_test1) BTF_ID_FLAGS(func, bpf_kfunc_call_test2) @@ -520,7 +520,7 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU) BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE) BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg) BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset) -BTF_SET8_END(bpf_testmod_check_kfunc_ids) +BTF_KFUNCS_END(bpf_testmod_check_kfunc_ids) static int bpf_testmod_ops_init(struct btf *btf) { -- cgit v1.2.3 From e79027c08302e299571555a9606b751820afa409 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 30 Jan 2024 08:43:27 -0700 Subject: selftests: Declare local variable for pause in fcnal-test.sh Running fcnal-test.sh script with -P argument is causing test failures: $ ./fcnal-test.sh -t ping -P TEST: ping out - ns-B IP [ OK ] hit enter to continue, 'q' to quit fcnal-test.sh: line 106: [: ping: integer expression expected TEST: out, [FAIL] expected rc ping; actual rc 0 hit enter to continue, 'q' to quit The test functions use local variable 'a' for addresses and then log_test is also using 'a' without a local declaration. Fix by declaring a local variable and using 'ans' (for answer) in the read. Signed-off-by: David Ahern Link: https://lore.kernel.org/r/20240130154327.33848-1-dsahern@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fcnal-test.sh | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index d7cfb7c2b427..386ebd829df5 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -100,6 +100,7 @@ log_test() local rc=$1 local expected=$2 local msg="$3" + local ans [ "${VERBOSE}" = "1" ] && echo @@ -113,16 +114,16 @@ log_test() if [ "${PAUSE_ON_FAIL}" = "yes" ]; then echo echo "hit enter to continue, 'q' to quit" - read a - [ "$a" = "q" ] && exit 1 + read ans + [ "$ans" = "q" ] && exit 1 fi fi if [ "${PAUSE}" = "yes" ]; then echo echo "hit enter to continue, 'q' to quit" - read a - [ "$a" = "q" ] && exit 1 + read ans + [ "$ans" = "q" ] && exit 1 fi kill_procs -- cgit v1.2.3 From e2ece0bc5ab1f7e0bb00f3b81fd4132b774d880d Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Mon, 29 Jan 2024 22:34:46 +0000 Subject: tools/net/ynl: Add --output-json arg to ynl cli The ynl cli currently emits python pretty printed structures which is hard to consume. Add a new --output-json argument to emit JSON. Signed-off-by: Donald Hunter Reviewed-by: Breno Leitao Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240129223458.52046-2-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/cli.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/cli.py b/tools/net/ynl/cli.py index 2ad9ec0f5545..0f8239979670 100755 --- a/tools/net/ynl/cli.py +++ b/tools/net/ynl/cli.py @@ -9,6 +9,15 @@ import time from lib import YnlFamily, Netlink +class YnlEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, bytes): + return bytes.hex(obj) + if isinstance(obj, set): + return list(obj) + return json.JSONEncoder.default(self, obj) + + def main(): parser = argparse.ArgumentParser(description='YNL CLI sample') parser.add_argument('--spec', dest='spec', type=str, required=True) @@ -28,8 +37,15 @@ def main(): parser.add_argument('--append', dest='flags', action='append_const', const=Netlink.NLM_F_APPEND) parser.add_argument('--process-unknown', action=argparse.BooleanOptionalAction) + parser.add_argument('--output-json', action='store_true') args = parser.parse_args() + def output(msg): + if args.output_json: + print(json.dumps(msg, cls=YnlEncoder)) + else: + pprint.PrettyPrinter().pprint(msg) + if args.no_schema: args.schema = '' @@ -47,14 +63,14 @@ def main(): if args.do: reply = ynl.do(args.do, attrs, args.flags) - pprint.PrettyPrinter().pprint(reply) + output(reply) if args.dump: reply = ynl.dump(args.dump, attrs) - pprint.PrettyPrinter().pprint(reply) + output(reply) if args.ntf: ynl.check_ntf() - pprint.PrettyPrinter().pprint(ynl.async_msg_queue) + output(ynl.async_msg_queue) if __name__ == "__main__": -- cgit v1.2.3 From bf8b832374fb85931acda478920e3a270f53bf17 Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Mon, 29 Jan 2024 22:34:47 +0000 Subject: tools/net/ynl: Support sub-messages in nested attribute spaces Sub-message selectors could only be resolved using values from the current nest level. Enable value lookup in outer scopes by using collections.ChainMap to implement an ordered lookup from nested to outer scopes. Signed-off-by: Donald Hunter Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240129223458.52046-3-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl.py | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index 1e10512b2117..f24581759acf 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -405,6 +405,26 @@ class GenlProtocol(NetlinkProtocol): return self.genl_family['mcast'][mcast_name] + +class SpaceAttrs: + SpecValuesPair = namedtuple('SpecValuesPair', ['spec', 'values']) + + def __init__(self, attr_space, attrs, outer = None): + outer_scopes = outer.scopes if outer else [] + inner_scope = self.SpecValuesPair(attr_space, attrs) + self.scopes = [inner_scope] + outer_scopes + + def lookup(self, name): + for scope in self.scopes: + if name in scope.spec: + if name in scope.values: + return scope.values[name] + spec_name = scope.spec.yaml['name'] + raise Exception( + f"No value for '{name}' in attribute space '{spec_name}'") + raise Exception(f"Attribute '{name}' not defined in any attribute-set") + + # # YNL implementation details. # @@ -548,24 +568,22 @@ class YnlFamily(SpecFamily): else: rsp[name] = [decoded] - def _resolve_selector(self, attr_spec, vals): + def _resolve_selector(self, attr_spec, search_attrs): sub_msg = attr_spec.sub_message if sub_msg not in self.sub_msgs: raise Exception(f"No sub-message spec named {sub_msg} for {attr_spec.name}") sub_msg_spec = self.sub_msgs[sub_msg] selector = attr_spec.selector - if selector not in vals: - raise Exception(f"There is no value for {selector} to resolve '{attr_spec.name}'") - value = vals[selector] + value = search_attrs.lookup(selector) if value not in sub_msg_spec.formats: raise Exception(f"No message format for '{value}' in sub-message spec '{sub_msg}'") spec = sub_msg_spec.formats[value] return spec - def _decode_sub_msg(self, attr, attr_spec, rsp): - msg_format = self._resolve_selector(attr_spec, rsp) + def _decode_sub_msg(self, attr, attr_spec, search_attrs): + msg_format = self._resolve_selector(attr_spec, search_attrs) decoded = {} offset = 0 if msg_format.fixed_header: @@ -579,10 +597,12 @@ class YnlFamily(SpecFamily): raise Exception(f"Unknown attribute-set '{attr_space}' when decoding '{attr_spec.name}'") return decoded - def _decode(self, attrs, space): + def _decode(self, attrs, space, outer_attrs = None): if space: attr_space = self.attr_sets[space] rsp = dict() + search_attrs = SpaceAttrs(attr_space, rsp, outer_attrs) + for attr in attrs: try: attr_spec = attr_space.attrs_by_val[attr.type] @@ -594,7 +614,7 @@ class YnlFamily(SpecFamily): continue if attr_spec["type"] == 'nest': - subdict = self._decode(NlAttrs(attr.raw), attr_spec['nested-attributes']) + subdict = self._decode(NlAttrs(attr.raw), attr_spec['nested-attributes'], search_attrs) decoded = subdict elif attr_spec["type"] == 'string': decoded = attr.as_strz() @@ -617,7 +637,7 @@ class YnlFamily(SpecFamily): selector = self._decode_enum(selector, attr_spec) decoded = {"value": value, "selector": selector} elif attr_spec["type"] == 'sub-message': - decoded = self._decode_sub_msg(attr, attr_spec, rsp) + decoded = self._decode_sub_msg(attr, attr_spec, vals) else: if not self.process_unknown: raise Exception(f'Unknown {attr_spec["type"]} with name {attr_spec["name"]}') -- cgit v1.2.3 From 5f2823c48ad6f63fc033e8ec5e995fcd5cbab409 Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Mon, 29 Jan 2024 22:34:49 +0000 Subject: tools/net/ynl: Refactor fixed header encoding into separate method Refactor the fixed header encoding into a separate _encode_struct method so that it can be reused for fixed headers in sub-messages and for encoding structs. Signed-off-by: Donald Hunter Reviewed-by: Breno Leitao Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240129223458.52046-5-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index f24581759acf..b22ddedb801b 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -720,6 +720,20 @@ class YnlFamily(SpecFamily): fixed_header_attrs[m.name] = value return fixed_header_attrs + def _encode_struct(self, name, vals): + members = self.consts[name].members + attr_payload = b'' + for m in members: + value = vals.pop(m.name) if m.name in vals else 0 + if m.type == 'pad': + attr_payload += bytearray(m.len) + elif m.type == 'binary': + attr_payload += bytes.fromhex(value) + else: + format = NlAttr.get_format(m.type, m.byte_order) + attr_payload += format.pack(value) + return attr_payload + def handle_ntf(self, decoded): msg = dict() if self.include_raw: @@ -779,18 +793,8 @@ class YnlFamily(SpecFamily): req_seq = random.randint(1024, 65535) msg = self.nlproto.message(nl_flags, op.req_value, 1, req_seq) - fixed_header_members = [] if op.fixed_header: - fixed_header_members = self.consts[op.fixed_header].members - for m in fixed_header_members: - value = vals.pop(m.name) if m.name in vals else 0 - if m.type == 'pad': - msg += bytearray(m.len) - elif m.type == 'binary': - msg += bytes.fromhex(value) - else: - format = NlAttr.get_format(m.type, m.byte_order) - msg += format.pack(value) + msg += self._encode_struct(op.fixed_header, vals) for name, value in vals.items(): msg += self._add_attr(op.attr_set.name, name, value) msg = _genl_msg_finalize(msg) -- cgit v1.2.3 From ab463c4342d1d14f25357c0f9f1a9744d7417edd Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Mon, 29 Jan 2024 22:34:50 +0000 Subject: tools/net/ynl: Add support for encoding sub-messages Add sub-message encoding to ynl. This makes it possible to create tc qdiscs and other polymorphic netlink objects. Signed-off-by: Donald Hunter Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240129223458.52046-6-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index b22ddedb801b..b063094e8a4b 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -469,7 +469,7 @@ class YnlFamily(SpecFamily): self.sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_ADD_MEMBERSHIP, mcast_id) - def _add_attr(self, space, name, value): + def _add_attr(self, space, name, value, search_attrs): try: attr = self.attr_sets[space][name] except KeyError: @@ -478,8 +478,10 @@ class YnlFamily(SpecFamily): if attr["type"] == 'nest': nl_type |= Netlink.NLA_F_NESTED attr_payload = b'' + sub_attrs = SpaceAttrs(self.attr_sets[space], value, search_attrs) for subname, subvalue in value.items(): - attr_payload += self._add_attr(attr['nested-attributes'], subname, subvalue) + attr_payload += self._add_attr(attr['nested-attributes'], + subname, subvalue, sub_attrs) elif attr["type"] == 'flag': attr_payload = b'' elif attr["type"] == 'string': @@ -489,6 +491,8 @@ class YnlFamily(SpecFamily): attr_payload = value elif isinstance(value, str): attr_payload = bytes.fromhex(value) + elif isinstance(value, dict) and attr.struct_name: + attr_payload = self._encode_struct(attr.struct_name, value) else: raise Exception(f'Unknown type for binary attribute, value: {value}') elif attr.is_auto_scalar: @@ -501,6 +505,20 @@ class YnlFamily(SpecFamily): attr_payload = format.pack(int(value)) elif attr['type'] in "bitfield32": attr_payload = struct.pack("II", int(value["value"]), int(value["selector"])) + elif attr['type'] == 'sub-message': + msg_format = self._resolve_selector(attr, search_attrs) + attr_payload = b'' + if msg_format.fixed_header: + attr_payload += self._encode_struct(msg_format.fixed_header, value) + if msg_format.attr_set: + if msg_format.attr_set in self.attr_sets: + nl_type |= Netlink.NLA_F_NESTED + sub_attrs = SpaceAttrs(msg_format.attr_set, value, search_attrs) + for subname, subvalue in value.items(): + attr_payload += self._add_attr(msg_format.attr_set, + subname, subvalue, sub_attrs) + else: + raise Exception(f"Unknown attribute-set '{msg_format.attr_set}'") else: raise Exception(f'Unknown type at {space} {name} {value} {attr["type"]}') @@ -637,7 +655,7 @@ class YnlFamily(SpecFamily): selector = self._decode_enum(selector, attr_spec) decoded = {"value": value, "selector": selector} elif attr_spec["type"] == 'sub-message': - decoded = self._decode_sub_msg(attr, attr_spec, vals) + decoded = self._decode_sub_msg(attr, attr_spec, search_attrs) else: if not self.process_unknown: raise Exception(f'Unknown {attr_spec["type"]} with name {attr_spec["name"]}') @@ -795,8 +813,9 @@ class YnlFamily(SpecFamily): msg = self.nlproto.message(nl_flags, op.req_value, 1, req_seq) if op.fixed_header: msg += self._encode_struct(op.fixed_header, vals) + search_attrs = SpaceAttrs(op.attr_set, vals) for name, value in vals.items(): - msg += self._add_attr(op.attr_set.name, name, value) + msg += self._add_attr(op.attr_set.name, name, value, search_attrs) msg = _genl_msg_finalize(msg) self.sock.send(msg, 0) -- cgit v1.2.3 From a387a921139e07587b0fb93c9c0fec8b6800776d Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Mon, 29 Jan 2024 22:34:51 +0000 Subject: tools/net/ynl: Encode default values for binary blobs Add support for defaulting binary byte arrays to all zeros as well as defaulting scalar values to 0 when encoding input parameters. Signed-off-by: Donald Hunter Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240129223458.52046-7-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index b063094e8a4b..d04435f26f89 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -742,12 +742,17 @@ class YnlFamily(SpecFamily): members = self.consts[name].members attr_payload = b'' for m in members: - value = vals.pop(m.name) if m.name in vals else 0 + value = vals.pop(m.name) if m.name in vals else None if m.type == 'pad': attr_payload += bytearray(m.len) elif m.type == 'binary': - attr_payload += bytes.fromhex(value) + if value is None: + attr_payload += bytearray(m.len) + else: + attr_payload += bytes.fromhex(value) else: + if value is None: + value = 0 format = NlAttr.get_format(m.type, m.byte_order) attr_payload += format.pack(value) return attr_payload -- cgit v1.2.3 From e45fee0f49fcbafe9f0f883ef22afb890504a5e6 Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Mon, 29 Jan 2024 22:34:52 +0000 Subject: tools/net/ynl: Combine struct decoding logic in ynl _decode_fixed_header() and NlAttr.as_struct() both implemented struct decoding logic. Deduplicate the code into newly named _decode_struct() method. Signed-off-by: Donald Hunter Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240129223458.52046-8-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl.py | 47 ++++++++++++++--------------------------------- 1 file changed, 14 insertions(+), 33 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index d04435f26f89..0e7e9f60ab7e 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -148,23 +148,6 @@ class NlAttr: format = self.get_format(type) return [ x[0] for x in format.iter_unpack(self.raw) ] - def as_struct(self, members): - value = dict() - offset = 0 - for m in members: - # TODO: handle non-scalar members - if m.type == 'binary': - decoded = self.raw[offset : offset + m['len']] - offset += m['len'] - elif m.type in NlAttr.type_formats: - format = self.get_format(m.type, m.byte_order) - [ decoded ] = format.unpack_from(self.raw, offset) - offset += format.size - if m.display_hint: - decoded = self.formatted_string(decoded, m.display_hint) - value[m.name] = decoded - return value - def __repr__(self): return f"[type:{self.type} len:{self._len}] {self.raw}" @@ -541,11 +524,7 @@ class YnlFamily(SpecFamily): def _decode_binary(self, attr, attr_spec): if attr_spec.struct_name: - members = self.consts[attr_spec.struct_name] - decoded = attr.as_struct(members) - for m in members: - if m.enum: - decoded[m.name] = self._decode_enum(decoded[m.name], m) + decoded = self._decode_struct(attr.raw, attr_spec.struct_name) elif attr_spec.sub_type: decoded = attr.as_c_array(attr_spec.sub_type) else: @@ -605,7 +584,7 @@ class YnlFamily(SpecFamily): decoded = {} offset = 0 if msg_format.fixed_header: - decoded.update(self._decode_fixed_header(attr, msg_format.fixed_header)); + decoded.update(self._decode_struct(attr.raw, msg_format.fixed_header)); offset = self._fixed_header_size(msg_format.fixed_header) if msg_format.attr_set: if msg_format.attr_set in self.attr_sets: @@ -717,26 +696,28 @@ class YnlFamily(SpecFamily): else: return 0 - def _decode_fixed_header(self, msg, name): - fixed_header_members = self.consts[name].members - fixed_header_attrs = dict() + def _decode_struct(self, data, name): + members = self.consts[name].members + attrs = dict() offset = 0 - for m in fixed_header_members: + for m in members: value = None if m.type == 'pad': offset += m.len elif m.type == 'binary': - value = msg.raw[offset : offset + m.len] + value = data[offset : offset + m.len] offset += m.len else: format = NlAttr.get_format(m.type, m.byte_order) - [ value ] = format.unpack_from(msg.raw, offset) + [ value ] = format.unpack_from(data, offset) offset += format.size if value is not None: if m.enum: value = self._decode_enum(value, m) - fixed_header_attrs[m.name] = value - return fixed_header_attrs + elif m.display_hint: + value = NlAttr.formatted_string(value, m.display_hint) + attrs[m.name] = value + return attrs def _encode_struct(self, name, vals): members = self.consts[name].members @@ -764,7 +745,7 @@ class YnlFamily(SpecFamily): op = self.rsp_by_value[decoded.cmd()] attrs = self._decode(decoded.raw_attrs, op.attr_set.name) if op.fixed_header: - attrs.update(self._decode_fixed_header(decoded, op.fixed_header)) + attrs.update(self._decode_struct(decoded.raw, op.fixed_header)) msg['name'] = op['name'] msg['msg'] = attrs @@ -856,7 +837,7 @@ class YnlFamily(SpecFamily): rsp_msg = self._decode(decoded.raw_attrs, op.attr_set.name) if op.fixed_header: - rsp_msg.update(self._decode_fixed_header(decoded, op.fixed_header)) + rsp_msg.update(self._decode_struct(decoded.raw, op.fixed_header)) rsp.append(rsp_msg) if not rsp: -- cgit v1.2.3 From 886365cf40b2b53864f1ef0655fe7999c2477418 Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Mon, 29 Jan 2024 22:34:53 +0000 Subject: tools/net/ynl: Rename _fixed_header_size() to _struct_size() Refactor the _fixed_header_size() method to be _struct_size() so that naming is consistent with _encode_struct() and _decode_struct(). Signed-off-by: Donald Hunter Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240129223458.52046-9-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index 0e7e9f60ab7e..173ef4489e38 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -353,7 +353,7 @@ class NetlinkProtocol: fixed_header_size = 0 if ynl: op = ynl.rsp_by_value[msg.cmd()] - fixed_header_size = ynl._fixed_header_size(op.fixed_header) + fixed_header_size = ynl._struct_size(op.fixed_header) msg.raw_attrs = NlAttrs(msg.raw, fixed_header_size) return msg @@ -585,7 +585,7 @@ class YnlFamily(SpecFamily): offset = 0 if msg_format.fixed_header: decoded.update(self._decode_struct(attr.raw, msg_format.fixed_header)); - offset = self._fixed_header_size(msg_format.fixed_header) + offset = self._struct_size(msg_format.fixed_header) if msg_format.attr_set: if msg_format.attr_set in self.attr_sets: subdict = self._decode(NlAttrs(attr.raw, offset), msg_format.attr_set) @@ -675,18 +675,18 @@ class YnlFamily(SpecFamily): return msg = self.nlproto.decode(self, NlMsg(request, 0, op.attr_set)) - offset = 20 + self._fixed_header_size(op.fixed_header) + offset = 20 + self._struct_size(op.fixed_header) path = self._decode_extack_path(msg.raw_attrs, op.attr_set, offset, extack['bad-attr-offs']) if path: del extack['bad-attr-offs'] extack['bad-attr'] = path - def _fixed_header_size(self, name): + def _struct_size(self, name): if name: - fixed_header_members = self.consts[name].members + members = self.consts[name].members size = 0 - for m in fixed_header_members: + for m in members: if m.type in ['pad', 'binary']: size += m.len else: -- cgit v1.2.3 From 971c3eeaf668ed312d032d12becb106982d2b2bd Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Mon, 29 Jan 2024 22:34:54 +0000 Subject: tools/net/ynl: Move formatted_string method out of NlAttr The formatted_string() class method was in NlAttr so that it could be accessed by NlAttr.as_struct(). Now that as_struct() has been removed, move formatted_string() to YnlFamily as an internal helper method. Signed-off-by: Donald Hunter Reviewed-by: Breno Leitao Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240129223458.52046-10-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index 173ef4489e38..2b0ca61deaf8 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -113,20 +113,6 @@ class NlAttr: else format.little return format.native - @classmethod - def formatted_string(cls, raw, display_hint): - if display_hint == 'mac': - formatted = ':'.join('%02x' % b for b in raw) - elif display_hint == 'hex': - formatted = bytes.hex(raw, ' ') - elif display_hint in [ 'ipv4', 'ipv6' ]: - formatted = format(ipaddress.ip_address(raw)) - elif display_hint == 'uuid': - formatted = str(uuid.UUID(bytes=raw)) - else: - formatted = raw - return formatted - def as_scalar(self, attr_type, byte_order=None): format = self.get_format(attr_type, byte_order) return format.unpack(self.raw)[0] @@ -530,7 +516,7 @@ class YnlFamily(SpecFamily): else: decoded = attr.as_bin() if attr_spec.display_hint: - decoded = NlAttr.formatted_string(decoded, attr_spec.display_hint) + decoded = self._formatted_string(decoded, attr_spec.display_hint) return decoded def _decode_array_nest(self, attr, attr_spec): @@ -715,7 +701,7 @@ class YnlFamily(SpecFamily): if m.enum: value = self._decode_enum(value, m) elif m.display_hint: - value = NlAttr.formatted_string(value, m.display_hint) + value = self._formatted_string(value, m.display_hint) attrs[m.name] = value return attrs @@ -738,6 +724,19 @@ class YnlFamily(SpecFamily): attr_payload += format.pack(value) return attr_payload + def _formatted_string(self, raw, display_hint): + if display_hint == 'mac': + formatted = ':'.join('%02x' % b for b in raw) + elif display_hint == 'hex': + formatted = bytes.hex(raw, ' ') + elif display_hint in [ 'ipv4', 'ipv6' ]: + formatted = format(ipaddress.ip_address(raw)) + elif display_hint == 'uuid': + formatted = str(uuid.UUID(bytes=raw)) + else: + formatted = raw + return formatted + def handle_ntf(self, decoded): msg = dict() if self.include_raw: -- cgit v1.2.3 From bf08f32c8cedb12a23efcdc2c9584601d7030e16 Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Mon, 29 Jan 2024 22:34:55 +0000 Subject: tools/net/ynl: Add support for nested structs Make it possible for struct definitions to reference other struct definitions ofr binary members. For example, the tbf qdisc uses this struct definition for its parms attribute: - name: tc-tbf-qopt type: struct members: - name: rate type: binary struct: tc-ratespec - name: peakrate type: binary struct: tc-ratespec - name: limit type: u32 - name: buffer type: u32 - name: mtu type: u32 This adds the necessary schema changes and adds nested struct encoding and decoding to ynl. Signed-off-by: Donald Hunter Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240129223458.52046-11-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/netlink-raw.yaml | 15 ++++++++++++--- tools/net/ynl/lib/nlspec.py | 2 ++ tools/net/ynl/lib/ynl.py | 26 ++++++++++++++++++++------ 3 files changed, 34 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/Documentation/netlink/netlink-raw.yaml b/Documentation/netlink/netlink-raw.yaml index 04b92f1a5cd6..ac4e05415f2f 100644 --- a/Documentation/netlink/netlink-raw.yaml +++ b/Documentation/netlink/netlink-raw.yaml @@ -152,14 +152,23 @@ properties: the right formatting mechanism when displaying values of this type. enum: [ hex, mac, fddi, ipv4, ipv6, uuid ] + struct: + description: Name of the nested struct type. + type: string if: properties: type: - oneOf: - - const: binary - - const: pad + const: pad then: required: [ len ] + if: + properties: + type: + const: binary + then: + oneOf: + - required: [ len ] + - required: [ struct ] # End genetlink-legacy attribute-sets: diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/lib/nlspec.py index 44f13e383e8a..5d197a12ab8d 100644 --- a/tools/net/ynl/lib/nlspec.py +++ b/tools/net/ynl/lib/nlspec.py @@ -248,6 +248,7 @@ class SpecStructMember(SpecElement): len integer, optional byte length of binary types display_hint string, hint to help choose format specifier when displaying the value + struct string, name of nested struct type """ def __init__(self, family, yaml): super().__init__(family, yaml) @@ -256,6 +257,7 @@ class SpecStructMember(SpecElement): self.enum = yaml.get('enum') self.len = yaml.get('len') self.display_hint = yaml.get('display-hint') + self.struct = yaml.get('struct') class SpecStruct(SpecElement): diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index 2b0ca61deaf8..0f4193cc2e3b 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -674,7 +674,10 @@ class YnlFamily(SpecFamily): size = 0 for m in members: if m.type in ['pad', 'binary']: - size += m.len + if m.struct: + size += self._struct_size(m.struct) + else: + size += m.len else: format = NlAttr.get_format(m.type, m.byte_order) size += format.size @@ -691,8 +694,14 @@ class YnlFamily(SpecFamily): if m.type == 'pad': offset += m.len elif m.type == 'binary': - value = data[offset : offset + m.len] - offset += m.len + if m.struct: + len = self._struct_size(m.struct) + value = self._decode_struct(data[offset : offset + len], + m.struct) + offset += len + else: + value = data[offset : offset + m.len] + offset += m.len else: format = NlAttr.get_format(m.type, m.byte_order) [ value ] = format.unpack_from(data, offset) @@ -713,10 +722,15 @@ class YnlFamily(SpecFamily): if m.type == 'pad': attr_payload += bytearray(m.len) elif m.type == 'binary': - if value is None: - attr_payload += bytearray(m.len) + if m.struct: + if value is None: + value = dict() + attr_payload += self._encode_struct(m.struct, value) else: - attr_payload += bytes.fromhex(value) + if value is None: + attr_payload += bytearray(m.len) + else: + attr_payload += bytes.fromhex(value) else: if value is None: value = 0 -- cgit v1.2.3 From fe09ae5fb93be371a72d1b2ac62fb3eead51a728 Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Mon, 29 Jan 2024 22:34:57 +0000 Subject: tools/net/ynl: Add type info to struct members in generated docs Extend the ynl doc generator to include type information for struct members, ignoring the pad type. Signed-off-by: Donald Hunter Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240129223458.52046-13-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/ynl-gen-rst.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/net/ynl/ynl-gen-rst.py b/tools/net/ynl/ynl-gen-rst.py index 262d88f88696..927407b3efb3 100755 --- a/tools/net/ynl/ynl-gen-rst.py +++ b/tools/net/ynl/ynl-gen-rst.py @@ -189,12 +189,19 @@ def parse_operations(operations: List[Dict[str, Any]]) -> str: def parse_entries(entries: List[Dict[str, Any]], level: int) -> str: """Parse a list of entries""" + ignored = ["pad"] lines = [] for entry in entries: if isinstance(entry, dict): # entries could be a list or a dictionary + field_name = entry.get("name", "") + if field_name in ignored: + continue + type_ = entry.get("type") + if type_: + field_name += f" ({inline(type_)})" lines.append( - rst_fields(entry.get("name", ""), sanitize(entry.get("doc", "")), level) + rst_fields(field_name, sanitize(entry.get("doc", "")), level) ) elif isinstance(entry, list): lines.append(rst_list_inline(entry, level)) -- cgit v1.2.3 From 8263b3382d8c1af0fffa27095a9f1db6f2dad899 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 31 Jan 2024 23:26:15 +0200 Subject: libbpf: Remove unnecessary null check in kernel_supports() After recent changes, Coverity complained about inconsistent null checks in kernel_supports() function: kernel_supports(const struct bpf_object *obj, ...) [...] // var_compare_op: Comparing obj to null implies that obj might be null if (obj && obj->gen_loader) return true; // var_deref_op: Dereferencing null pointer obj if (obj->token_fd) return feat_supported(obj->feat_cache, feat_id); [...] - The original null check was introduced by commit [0], which introduced a call `kernel_supports(NULL, ...)` in function bump_rlimit_memlock(); - This call was refactored to use `feat_supported(NULL, ...)` in commit [1]. Looking at all places where kernel_supports() is called: - There is either `obj->...` access before the call; - Or `obj` comes from `prog->obj` expression, where `prog` comes from enumeration of programs in `obj`; - Or `obj` comes from `prog->obj`, where `prog` is a parameter to one of the API functions: - bpf_program__attach_kprobe_opts; - bpf_program__attach_kprobe; - bpf_program__attach_ksyscall. Assuming correct API usage, it appears that `obj` can never be null when passed to kernel_supports(). Silence the Coverity warning by removing redundant null check. [0] e542f2c4cd16 ("libbpf: Auto-bump RLIMIT_MEMLOCK if kernel needs it for BPF") [1] d6dd1d49367a ("libbpf: Further decouple feature checking logic from bpf_object") Signed-off-by: Eduard Zingerman Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20240131212615.20112-1-eddyz87@gmail.com --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index db65ea59a05a..f6953d7faff1 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4661,7 +4661,7 @@ bpf_object__probe_loading(struct bpf_object *obj) bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) { - if (obj && obj->gen_loader) + if (obj->gen_loader) /* To generate loader program assume the latest kernel * to avoid doing extra prog_load, map_create syscalls. */ -- cgit v1.2.3 From 994ff2f7973982af286608da10c295383650fc28 Mon Sep 17 00:00:00 2001 From: Pu Lehui Date: Tue, 30 Jan 2024 12:46:59 +0000 Subject: selftests/bpf: Enable inline bpf_kptr_xchg() test for RV64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable inline bpf_kptr_xchg() test for RV64, and the test have passed as show below: Summary: 1/0 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Pu Lehui Signed-off-by: Daniel Borkmann Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20240130124659.670321-3-pulehui@huaweicloud.com --- tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c b/tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c index 15144943e88b..7def158da9eb 100644 --- a/tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c +++ b/tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c @@ -13,7 +13,8 @@ void test_kptr_xchg_inline(void) unsigned int cnt; int err; -#if !(defined(__x86_64__) || defined(__aarch64__)) +#if !(defined(__x86_64__) || defined(__aarch64__) || \ + (defined(__riscv) && __riscv_xlen == 64)) test__skip(); return; #endif -- cgit v1.2.3 From 9fa5e1a180aa639fb156a16e453ab820b7e7860b Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 1 Feb 2024 09:20:23 -0800 Subject: libbpf: Call memfd_create() syscall directly Some versions of Android do not implement memfd_create() wrapper in their libc implementation, leading to build failures ([0]). On the other hand, memfd_create() is available as a syscall on quite old kernels (3.17+, while bpf() syscall itself is available since 3.18+), so it is ok to assume that syscall availability and call into it with syscall() helper to avoid Android-specific workarounds. Validated in libbpf-bootstrap's CI ([1]). [0] https://github.com/libbpf/libbpf-bootstrap/actions/runs/7701003207/job/20986080319#step:5:83 [1] https://github.com/libbpf/libbpf-bootstrap/actions/runs/7715988887/job/21031767212?pr=253 Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20240201172027.604869-2-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index f6953d7faff1..6932f2c4ddfd 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1525,11 +1525,20 @@ static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *nam return ERR_PTR(-ENOENT); } +/* Some versions of Android don't provide memfd_create() in their libc + * implementation, so avoid complications and just go straight to Linux + * syscall. + */ +static int sys_memfd_create(const char *name, unsigned flags) +{ + return syscall(__NR_memfd_create, name, flags); +} + static int create_placeholder_fd(void) { int fd; - fd = ensure_good_fd(memfd_create("libbpf-placeholder-fd", MFD_CLOEXEC)); + fd = ensure_good_fd(sys_memfd_create("libbpf-placeholder-fd", MFD_CLOEXEC)); if (fd < 0) return -errno; return fd; -- cgit v1.2.3 From 93ee1eb85e28d1e35bb059c1f5965d65d5fc83c2 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 1 Feb 2024 09:20:24 -0800 Subject: libbpf: Add missing LIBBPF_API annotation to libbpf_set_memlock_rlim API LIBBPF_API annotation seems missing on libbpf_set_memlock_rlim API, so add it to make this API callable from libbpf's shared library version. Fixes: e542f2c4cd16 ("libbpf: Auto-bump RLIMIT_MEMLOCK if kernel needs it for BPF") Fixes: ab9a5a05dc48 ("libbpf: fix up few libbpf.map problems") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20240201172027.604869-3-andrii@kernel.org --- tools/lib/bpf/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 1441f642c563..f866e98b2436 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -35,7 +35,7 @@ extern "C" { #endif -int libbpf_set_memlock_rlim(size_t memlock_bytes); +LIBBPF_API int libbpf_set_memlock_rlim(size_t memlock_bytes); struct bpf_map_create_opts { size_t sz; /* size of this struct for forward/backward compatibility */ -- cgit v1.2.3 From c81a8ab196b5083d5109a51585fcc24fa2055a77 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 1 Feb 2024 09:20:25 -0800 Subject: libbpf: Add btf__new_split() API that was declared but not implemented Seems like original commit adding split BTF support intended to add btf__new_split() API, and even declared it in libbpf.map, but never added (trivial) implementation. Fix this. Fixes: ba451366bf44 ("libbpf: Implement basic split BTF support") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20240201172027.604869-4-andrii@kernel.org --- tools/lib/bpf/btf.c | 5 +++++ tools/lib/bpf/libbpf.map | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 95db88b36cf3..845034d15420 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1079,6 +1079,11 @@ struct btf *btf__new(const void *data, __u32 size) return libbpf_ptr(btf_new(data, size, NULL)); } +struct btf *btf__new_split(const void *data, __u32 size, struct btf *base_btf) +{ + return libbpf_ptr(btf_new(data, size, base_btf)); +} + static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, struct btf_ext **btf_ext) { diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index d9e1f57534fa..386964f572a8 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -245,7 +245,6 @@ LIBBPF_0.3.0 { btf__parse_raw_split; btf__parse_split; btf__new_empty_split; - btf__new_split; ring_buffer__epoll_fd; } LIBBPF_0.2.0; @@ -411,5 +410,7 @@ LIBBPF_1.3.0 { } LIBBPF_1.2.0; LIBBPF_1.4.0 { + global: bpf_token_create; + btf__new_split; } LIBBPF_1.3.0; -- cgit v1.2.3 From b9551da8cf3ade01a50316df8a618fd945723ee0 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 1 Feb 2024 09:20:26 -0800 Subject: libbpf: Add missed btf_ext__raw_data() API Another API that was declared in libbpf.map but actual implementation was missing. btf_ext__get_raw_data() was intended as a discouraged alias to consistently-named btf_ext__raw_data(), so make this an actuality. Fixes: 20eccf29e297 ("libbpf: hide and discourage inconsistently named getters") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20240201172027.604869-5-andrii@kernel.org --- tools/lib/bpf/btf.c | 6 +++++- tools/lib/bpf/libbpf.map | 2 +- tools/lib/bpf/linker.c | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 845034d15420..a17b4c9c4213 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -3050,12 +3050,16 @@ done: return btf_ext; } -const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size) +const void *btf_ext__raw_data(const struct btf_ext *btf_ext, __u32 *size) { *size = btf_ext->data_size; return btf_ext->data; } +__attribute__((alias("btf_ext__raw_data"))) +const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size); + + struct btf_dedup; static struct btf_dedup *btf_dedup_new(struct btf *btf, const struct btf_dedup_opts *opts); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 386964f572a8..86804fd90dd1 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -325,7 +325,6 @@ LIBBPF_0.7.0 { bpf_xdp_detach; bpf_xdp_query; bpf_xdp_query_id; - btf_ext__raw_data; libbpf_probe_bpf_helper; libbpf_probe_bpf_map_type; libbpf_probe_bpf_prog_type; @@ -413,4 +412,5 @@ LIBBPF_1.4.0 { global: bpf_token_create; btf__new_split; + btf_ext__raw_data; } LIBBPF_1.3.0; diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 16bca56002ab..0d4be829551b 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -2732,7 +2732,7 @@ static int finalize_btf(struct bpf_linker *linker) /* Emit .BTF.ext section */ if (linker->btf_ext) { - raw_data = btf_ext__get_raw_data(linker->btf_ext, &raw_sz); + raw_data = btf_ext__raw_data(linker->btf_ext, &raw_sz); if (!raw_data) return -ENOMEM; -- cgit v1.2.3 From 943b043aeecce9accb6d367af47791c633e95e4d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 1 Feb 2024 09:20:27 -0800 Subject: selftests/bpf: Fix bench runner SIGSEGV Some benchmarks don't have either "consumer" or "producer" sides. For example, trig-tp and other BPF triggering benchmarks don't have consumers, as they only do "producing" by calling into syscall or predefined uproes. As such it's valid for some benchmarks to have zero consumers or producers. So allows to specify `-c0` explicitly. This triggers another problem. If benchmark doesn't support either consumer or producer side, consumer_thread/producer_thread callback will be NULL, but benchmark runner will attempt to use those NULL callback to create threads anyways. So instead of crashing with SIGSEGV in case of misconfigured benchmark, detect the condition and report error. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20240201172027.604869-6-andrii@kernel.org --- tools/testing/selftests/bpf/bench.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index 73ce11b0547d..1724d50ba942 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -323,14 +323,14 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) break; case 'p': env.producer_cnt = strtol(arg, NULL, 10); - if (env.producer_cnt <= 0) { + if (env.producer_cnt < 0) { fprintf(stderr, "Invalid producer count: %s\n", arg); argp_usage(state); } break; case 'c': env.consumer_cnt = strtol(arg, NULL, 10); - if (env.consumer_cnt <= 0) { + if (env.consumer_cnt < 0) { fprintf(stderr, "Invalid consumer count: %s\n", arg); argp_usage(state); } @@ -607,6 +607,10 @@ static void setup_benchmark(void) bench->setup(); for (i = 0; i < env.consumer_cnt; i++) { + if (!bench->consumer_thread) { + fprintf(stderr, "benchmark doesn't support consumers!\n"); + exit(1); + } err = pthread_create(&state.consumers[i], NULL, bench->consumer_thread, (void *)(long)i); if (err) { @@ -626,6 +630,10 @@ static void setup_benchmark(void) env.prod_cpus.next_cpu = env.cons_cpus.next_cpu; for (i = 0; i < env.producer_cnt; i++) { + if (!bench->producer_thread) { + fprintf(stderr, "benchmark doesn't support producers!\n"); + exit(1); + } err = pthread_create(&state.producers[i], NULL, bench->producer_thread, (void *)(long)i); if (err) { -- cgit v1.2.3 From 094bdd48afb8dde4ac71a6a672b096108ded9f49 Mon Sep 17 00:00:00 2001 From: Brad Cowie Date: Wed, 31 Jan 2024 17:08:22 +1300 Subject: selftests: openvswitch: Test ICMP related matches work with SNAT Add a test case for regression in openvswitch nat that was fixed by commit e6345d2824a3 ("netfilter: nf_nat: fix action not being set for all ct states"). Link: https://lore.kernel.org/netdev/20231221224311.130319-1-brad@faucet.nz/ Link: https://mail.openvswitch.org/pipermail/ovs-dev/2024-January/410476.html Suggested-by: Aaron Conole Signed-off-by: Brad Cowie Tested-by: Aaron Conole Acked-by: Aaron Conole Signed-off-by: David S. Miller --- .../selftests/net/openvswitch/openvswitch.sh | 62 ++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index f8499d4c87f3..87b80bee6df4 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -17,6 +17,7 @@ tests=" ct_connect_v4 ip4-ct-xon: Basic ipv4 tcp connection using ct connect_v4 ip4-xon: Basic ipv4 ping between two NS nat_connect_v4 ip4-nat-xon: Basic ipv4 tcp connection via NAT + nat_related_v4 ip4-nat-related: ICMP related matches work with SNAT netlink_checks ovsnl: validate netlink attrs and settings upcall_interfaces ovs: test the upcall interfaces drop_reason drop: test drop reasons are emitted" @@ -473,6 +474,67 @@ test_nat_connect_v4 () { return 0 } +# nat_related_v4 test +# - client->server ip packets go via SNAT +# - client solicits ICMP destination unreachable packet from server +# - undo NAT for ICMP reply and test dst ip has been updated +test_nat_related_v4 () { + which nc >/dev/null 2>/dev/null || return $ksft_skip + + sbx_add "test_nat_related_v4" || return $? + + ovs_add_dp "test_nat_related_v4" natrelated4 || return 1 + info "create namespaces" + for ns in client server; do + ovs_add_netns_and_veths "test_nat_related_v4" "natrelated4" "$ns" \ + "${ns:0:1}0" "${ns:0:1}1" || return 1 + done + + ip netns exec client ip addr add 172.31.110.10/24 dev c1 + ip netns exec client ip link set c1 up + ip netns exec server ip addr add 172.31.110.20/24 dev s1 + ip netns exec server ip link set s1 up + + ip netns exec server ip route add 192.168.0.20/32 via 172.31.110.10 + + # Allow ARP + ovs_add_flow "test_nat_related_v4" natrelated4 \ + "in_port(1),eth(),eth_type(0x0806),arp()" "2" || return 1 + ovs_add_flow "test_nat_related_v4" natrelated4 \ + "in_port(2),eth(),eth_type(0x0806),arp()" "1" || return 1 + + # Allow IP traffic from client->server, rewrite source IP with SNAT to 192.168.0.20 + ovs_add_flow "test_nat_related_v4" natrelated4 \ + "ct_state(-trk),in_port(1),eth(),eth_type(0x0800),ipv4(dst=172.31.110.20)" \ + "ct(commit,nat(src=192.168.0.20)),recirc(0x1)" || return 1 + ovs_add_flow "test_nat_related_v4" natrelated4 \ + "recirc_id(0x1),ct_state(+trk-inv),in_port(1),eth(),eth_type(0x0800),ipv4()" \ + "2" || return 1 + + # Allow related ICMP responses back from server and undo NAT to restore original IP + # Drop any ICMP related packets where dst ip hasn't been restored back to original IP + ovs_add_flow "test_nat_related_v4" natrelated4 \ + "ct_state(-trk),in_port(2),eth(),eth_type(0x0800),ipv4()" \ + "ct(commit,nat),recirc(0x2)" || return 1 + ovs_add_flow "test_nat_related_v4" natrelated4 \ + "recirc_id(0x2),ct_state(+rel+trk),in_port(2),eth(),eth_type(0x0800),ipv4(src=172.31.110.20,dst=172.31.110.10,proto=1),icmp()" \ + "1" || return 1 + ovs_add_flow "test_nat_related_v4" natrelated4 \ + "recirc_id(0x2),ct_state(+rel+trk),in_port(2),eth(),eth_type(0x0800),ipv4(dst=192.168.0.20,proto=1),icmp()" \ + "drop" || return 1 + + # Solicit destination unreachable response from server + ovs_sbx "test_nat_related_v4" ip netns exec client \ + bash -c "echo a | nc -u -w 1 172.31.110.20 10000" + + # Check to make sure no packets matched the drop rule with incorrect dst ip + python3 "$ovs_base/ovs-dpctl.py" dump-flows natrelated4 \ + | grep "drop" | grep "packets:0" >/dev/null || return 1 + + info "done..." + return 0 +} + # netlink_validation # - Create a dp # - check no warning with "old version" simulation -- cgit v1.2.3 From e67ddd9b1cff7872d43ead73a1403c4e532003d9 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Sat, 27 Jan 2024 19:52:32 +0200 Subject: bpf: Track spilled unbounded scalars Support the pattern where an unbounded scalar is spilled to the stack, then boundary checks are performed on the src register, after which the stack frame slot is refilled into a register. Before this commit, the verifier didn't treat the src register and the stack slot as related if the src register was an unbounded scalar. The register state wasn't copied, the id wasn't preserved, and the stack slot was marked as STACK_MISC. Subsequent boundary checks on the src register wouldn't result in updating the boundaries of the spilled variable on the stack. After this commit, the verifier will preserve the bond between src and dst even if src is unbounded, which permits to do boundary checks on src and refill dst later, still remembering its boundaries. Such a pattern is sometimes generated by clang when compiling complex long functions. One test is adjusted to reflect that now unbounded scalars are tracked. Signed-off-by: Maxim Mikityanskiy Signed-off-by: Andrii Nakryiko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20240127175237.526726-2-maxtram95@gmail.com --- kernel/bpf/verifier.c | 16 +--------------- tools/testing/selftests/bpf/progs/verifier_spill_fill.c | 6 +++--- 2 files changed, 4 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index cd4d780e5400..28f62f24da7e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4380,20 +4380,6 @@ static u64 reg_const_value(struct bpf_reg_state *reg, bool subreg32) return subreg32 ? tnum_subreg(reg->var_off).value : reg->var_off.value; } -static bool __is_scalar_unbounded(struct bpf_reg_state *reg) -{ - return tnum_is_unknown(reg->var_off) && - reg->smin_value == S64_MIN && reg->smax_value == S64_MAX && - reg->umin_value == 0 && reg->umax_value == U64_MAX && - reg->s32_min_value == S32_MIN && reg->s32_max_value == S32_MAX && - reg->u32_min_value == 0 && reg->u32_max_value == U32_MAX; -} - -static bool register_is_bounded(struct bpf_reg_state *reg) -{ - return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg); -} - static bool __is_pointer_value(bool allow_ptr_leaks, const struct bpf_reg_state *reg) { @@ -4504,7 +4490,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, return err; mark_stack_slot_scratched(env, spi); - if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) && env->bpf_capable) { + if (reg && !(off % BPF_REG_SIZE) && reg->type == SCALAR_VALUE && env->bpf_capable) { bool reg_value_fits; reg_value_fits = get_reg_width(reg) <= BITS_PER_BYTE * size; diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c index 7013a9694163..317806451762 100644 --- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c +++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c @@ -452,9 +452,9 @@ l0_%=: r1 >>= 16; \ SEC("raw_tp") __log_level(2) __success -__msg("fp-8=0m??mmmm") -__msg("fp-16=00mm??mm") -__msg("fp-24=00mm???m") +__msg("fp-8=0m??scalar()") +__msg("fp-16=00mm??scalar()") +__msg("fp-24=00mm???scalar()") __naked void spill_subregs_preserve_stack_zero(void) { asm volatile ( -- cgit v1.2.3 From 6be503cec6c9bccd64f72c03697011d2e2b96fc3 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Sat, 27 Jan 2024 19:52:33 +0200 Subject: selftests/bpf: Test tracking spilled unbounded scalars The previous commit added tracking for unbounded scalars on spill. Add the test case to check the new functionality. Signed-off-by: Maxim Mikityanskiy Signed-off-by: Andrii Nakryiko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20240127175237.526726-3-maxtram95@gmail.com --- .../selftests/bpf/progs/verifier_spill_fill.c | 27 ++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c index 317806451762..f9803005e1c0 100644 --- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c +++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c @@ -940,4 +940,31 @@ l0_%=: r0 = 0; \ : __clobber_all); } +SEC("xdp") +__description("spill unbounded reg, then range check src") +__success __retval(0) +__naked void spill_unbounded(void) +{ + asm volatile (" \ + /* Produce an unbounded scalar. */ \ + call %[bpf_get_prandom_u32]; \ + /* Spill r0 to stack. */ \ + *(u64*)(r10 - 8) = r0; \ + /* Boundary check on r0. */ \ + if r0 > 16 goto l0_%=; \ + /* Fill r0 from stack. */ \ + r0 = *(u64*)(r10 - 8); \ + /* Boundary check on r0 with predetermined result. */\ + if r0 <= 16 goto l0_%=; \ + /* Dead branch: the verifier should prune it. Do an invalid memory\ + * access if the verifier follows it. \ + */ \ + r0 = *(u64*)(r9 + 0); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From c1e6148cb4f83cec841db1f066e8db4a86c1f118 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Sat, 27 Jan 2024 19:52:34 +0200 Subject: bpf: Preserve boundaries and track scalars on narrowing fill When the width of a fill is smaller than the width of the preceding spill, the information about scalar boundaries can still be preserved, as long as it's coerced to the right width (done by coerce_reg_to_size). Even further, if the actual value fits into the fill width, the ID can be preserved as well for further tracking of equal scalars. Implement the above improvements, which makes narrowing fills behave the same as narrowing spills and MOVs between registers. Two tests are adjusted to accommodate for endianness differences and to take into account that it's now allowed to do a narrowing fill from the least significant bits. reg_bounds_sync is added to coerce_reg_to_size to correctly adjust umin/umax boundaries after the var_off truncation, for example, a 64-bit value 0xXXXXXXXX00000000, when read as a 32-bit, gets umin = 0, umax = 0xFFFFFFFF, var_off = (0x0; 0xffffffff00000000), which needs to be synced down to umax = 0, otherwise reg_bounds_sanity_check doesn't pass. Signed-off-by: Maxim Mikityanskiy Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240127175237.526726-4-maxtram95@gmail.com --- include/linux/bpf_verifier.h | 9 ++++++++ kernel/bpf/verifier.c | 15 +++++++++---- .../selftests/bpf/progs/verifier_spill_fill.c | 26 ++++++++++++++++------ 3 files changed, 39 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 0dcde339dc7e..84365e6dd85d 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -919,6 +919,15 @@ static inline void mark_verifier_state_scratched(struct bpf_verifier_env *env) env->scratched_stack_slots = ~0ULL; } +static inline bool bpf_stack_narrow_access_ok(int off, int fill_size, int spill_size) +{ +#ifdef __BIG_ENDIAN + off -= spill_size - fill_size; +#endif + + return !(off % BPF_REG_SIZE); +} + const char *reg_type_str(struct bpf_verifier_env *env, enum bpf_reg_type type); const char *dynptr_type_str(enum bpf_dynptr_type type); const char *iter_type_str(const struct btf *btf, u32 btf_id); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 28f62f24da7e..82af971926ac 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4778,7 +4778,8 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, if (dst_regno < 0) return 0; - if (!(off % BPF_REG_SIZE) && size == spill_size) { + if (size <= spill_size && + bpf_stack_narrow_access_ok(off, size, spill_size)) { /* The earlier check_reg_arg() has decided the * subreg_def for this insn. Save it first. */ @@ -4786,6 +4787,12 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, copy_register_state(&state->regs[dst_regno], reg); state->regs[dst_regno].subreg_def = subreg_def; + + /* Break the relation on a narrowing fill. + * coerce_reg_to_size will adjust the boundaries. + */ + if (get_reg_width(reg) > size * BITS_PER_BYTE) + state->regs[dst_regno].id = 0; } else { int spill_cnt = 0, zero_cnt = 0; @@ -6061,10 +6068,10 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size) * values are also truncated so we push 64-bit bounds into * 32-bit bounds. Above were truncated < 32-bits already. */ - if (size < 4) { + if (size < 4) __mark_reg32_unbounded(reg); - reg_bounds_sync(reg); - } + + reg_bounds_sync(reg); } static void set_sext64_default_val(struct bpf_reg_state *reg, int size) diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c index f9803005e1c0..3e5d063ea7e8 100644 --- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c +++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c @@ -217,7 +217,7 @@ __naked void uninit_u32_from_the_stack(void) SEC("tc") __description("Spill a u32 const scalar. Refill as u16. Offset to skb->data") -__failure __msg("invalid access to packet") +__success __retval(0) __naked void u16_offset_to_skb_data(void) { asm volatile (" \ @@ -225,13 +225,19 @@ __naked void u16_offset_to_skb_data(void) r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \ w4 = 20; \ *(u32*)(r10 - 8) = r4; \ - r4 = *(u16*)(r10 - 8); \ + " +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "r4 = *(u16*)(r10 - 8);" +#else + "r4 = *(u16*)(r10 - 6);" +#endif + " \ r0 = r2; \ - /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */\ + /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=20 */\ r0 += r4; \ - /* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=umax=65535 */\ + /* if (r0 > r3) R0=pkt,off=20 R2=pkt R3=pkt_end R4=20 */\ if r0 > r3 goto l0_%=; \ - /* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=20 */\ + /* r0 = *(u32 *)r2 R0=pkt,off=20 R2=pkt R3=pkt_end R4=20 */\ r0 = *(u32*)(r2 + 0); \ l0_%=: r0 = 0; \ exit; \ @@ -268,7 +274,7 @@ l0_%=: r0 = 0; \ } SEC("tc") -__description("Spill a u32 const scalar. Refill as u16 from fp-6. Offset to skb->data") +__description("Spill a u32 const scalar. Refill as u16 from MSB. Offset to skb->data") __failure __msg("invalid access to packet") __naked void _6_offset_to_skb_data(void) { @@ -277,7 +283,13 @@ __naked void _6_offset_to_skb_data(void) r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \ w4 = 20; \ *(u32*)(r10 - 8) = r4; \ - r4 = *(u16*)(r10 - 6); \ + " +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "r4 = *(u16*)(r10 - 6);" +#else + "r4 = *(u16*)(r10 - 8);" +#endif + " \ r0 = r2; \ /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */\ r0 += r4; \ -- cgit v1.2.3 From 067313a85c6f213932518f12f628810f0092492b Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Sat, 27 Jan 2024 19:52:35 +0200 Subject: selftests/bpf: Add test cases for narrowing fill The previous commit allowed to preserve boundaries and track IDs of scalars on narrowing fills. Add test cases for that pattern. Signed-off-by: Maxim Mikityanskiy Signed-off-by: Andrii Nakryiko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20240127175237.526726-5-maxtram95@gmail.com --- .../selftests/bpf/progs/verifier_spill_fill.c | 111 +++++++++++++++++++++ 1 file changed, 111 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c index 3e5d063ea7e8..7f3b1319bd99 100644 --- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c +++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c @@ -979,4 +979,115 @@ l0_%=: r0 = 0; \ : __clobber_all); } +SEC("xdp") +__description("32-bit fill after 64-bit spill") +__success __retval(0) +__naked void fill_32bit_after_spill_64bit(void) +{ + asm volatile(" \ + /* Randomize the upper 32 bits. */ \ + call %[bpf_get_prandom_u32]; \ + r0 <<= 32; \ + /* 64-bit spill r0 to stack. */ \ + *(u64*)(r10 - 8) = r0; \ + /* 32-bit fill r0 from stack. */ \ + " +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "r0 = *(u32*)(r10 - 8);" +#else + "r0 = *(u32*)(r10 - 4);" +#endif + " \ + /* Boundary check on r0 with predetermined result. */\ + if r0 == 0 goto l0_%=; \ + /* Dead branch: the verifier should prune it. Do an invalid memory\ + * access if the verifier follows it. \ + */ \ + r0 = *(u64*)(r9 + 0); \ +l0_%=: exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("xdp") +__description("32-bit fill after 64-bit spill of 32-bit value should preserve ID") +__success __retval(0) +__naked void fill_32bit_after_spill_64bit_preserve_id(void) +{ + asm volatile (" \ + /* Randomize the lower 32 bits. */ \ + call %[bpf_get_prandom_u32]; \ + w0 &= 0xffffffff; \ + /* 64-bit spill r0 to stack - should assign an ID. */\ + *(u64*)(r10 - 8) = r0; \ + /* 32-bit fill r1 from stack - should preserve the ID. */\ + " +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "r1 = *(u32*)(r10 - 8);" +#else + "r1 = *(u32*)(r10 - 4);" +#endif + " \ + /* Compare r1 with another register to trigger find_equal_scalars. */\ + r2 = 0; \ + if r1 != r2 goto l0_%=; \ + /* The result of this comparison is predefined. */\ + if r0 == r2 goto l0_%=; \ + /* Dead branch: the verifier should prune it. Do an invalid memory\ + * access if the verifier follows it. \ + */ \ + r0 = *(u64*)(r9 + 0); \ + exit; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("xdp") +__description("32-bit fill after 64-bit spill should clear ID") +__failure __msg("math between ctx pointer and 4294967295 is not allowed") +__naked void fill_32bit_after_spill_64bit_clear_id(void) +{ + asm volatile (" \ + r6 = r1; \ + /* Roll one bit to force the verifier to track both branches. */\ + call %[bpf_get_prandom_u32]; \ + r0 &= 0x8; \ + /* Put a large number into r1. */ \ + r1 = 0xffffffff; \ + r1 <<= 32; \ + r1 += r0; \ + /* 64-bit spill r1 to stack - should assign an ID. */\ + *(u64*)(r10 - 8) = r1; \ + /* 32-bit fill r2 from stack - should clear the ID. */\ + " +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "r2 = *(u32*)(r10 - 8);" +#else + "r2 = *(u32*)(r10 - 4);" +#endif + " \ + /* Compare r2 with another register to trigger find_equal_scalars.\ + * Having one random bit is important here, otherwise the verifier cuts\ + * the corners. If the ID was mistakenly preserved on fill, this would\ + * cause the verifier to think that r1 is also equal to zero in one of\ + * the branches, and equal to eight on the other branch.\ + */ \ + r3 = 0; \ + if r2 != r3 goto l0_%=; \ +l0_%=: r1 >>= 32; \ + /* The verifier shouldn't propagate r2's range to r1, so it should\ + * still remember r1 = 0xffffffff and reject the below.\ + */ \ + r6 += r1; \ + r0 = *(u32*)(r6 + 0); \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 73a28d9d000e8d20b4b3c516b74ee92afe3ae4be Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 27 Jan 2024 19:52:37 +0200 Subject: selftests/bpf: States pruning checks for scalar vs STACK_MISC Check that stacksafe() compares spilled scalars with STACK_MISC. The following combinations are explored: - old spill of imprecise scalar is equivalent to cur STACK_{MISC,INVALID} (plus error in unpriv mode); - old spill of precise scalar is not equivalent to cur STACK_MISC; - old STACK_MISC is equivalent to cur scalar; - old STACK_MISC is not equivalent to cur non-scalar. Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240127175237.526726-7-maxtram95@gmail.com --- .../selftests/bpf/progs/verifier_spill_fill.c | 154 +++++++++++++++++++++ 1 file changed, 154 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c index 7f3b1319bd99..85e48069c9e6 100644 --- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c +++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c @@ -1090,4 +1090,158 @@ l0_%=: r1 >>= 32; \ : __clobber_all); } +/* stacksafe(): check if stack spill of an imprecise scalar in old state + * is considered equivalent to STACK_{MISC,INVALID} in cur state. + */ +SEC("socket") +__success __log_level(2) +__msg("8: (79) r1 = *(u64 *)(r10 -8)") +__msg("8: safe") +__msg("processed 11 insns") +/* STACK_INVALID should prevent verifier in unpriv mode from + * considering states equivalent and force an error on second + * verification path (entry - label 1 - label 2). + */ +__failure_unpriv +__msg_unpriv("8: (79) r1 = *(u64 *)(r10 -8)") +__msg_unpriv("9: (95) exit") +__msg_unpriv("8: (79) r1 = *(u64 *)(r10 -8)") +__msg_unpriv("invalid read from stack off -8+2 size 8") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void old_imprecise_scalar_vs_cur_stack_misc(void) +{ + asm volatile( + /* get a random value for branching */ + "call %[bpf_ktime_get_ns];" + "if r0 == 0 goto 1f;" + /* conjure scalar at fp-8 */ + "r0 = 42;" + "*(u64*)(r10 - 8) = r0;" + "goto 2f;" +"1:" + /* conjure STACK_{MISC,INVALID} at fp-8 */ + "call %[bpf_ktime_get_ns];" + "*(u16*)(r10 - 8) = r0;" + "*(u16*)(r10 - 4) = r0;" +"2:" + /* read fp-8, should be considered safe on second visit */ + "r1 = *(u64*)(r10 - 8);" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +/* stacksafe(): check that stack spill of a precise scalar in old state + * is not considered equivalent to STACK_MISC in cur state. + */ +SEC("socket") +__success __log_level(2) +/* verifier should visit 'if r1 == 0x2a ...' two times: + * - once for path entry - label 2; + * - once for path entry - label 1 - label 2. + */ +__msg("if r1 == 0x2a goto pc+0") +__msg("if r1 == 0x2a goto pc+0") +__msg("processed 15 insns") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void old_precise_scalar_vs_cur_stack_misc(void) +{ + asm volatile( + /* get a random value for branching */ + "call %[bpf_ktime_get_ns];" + "if r0 == 0 goto 1f;" + /* conjure scalar at fp-8 */ + "r0 = 42;" + "*(u64*)(r10 - 8) = r0;" + "goto 2f;" +"1:" + /* conjure STACK_MISC at fp-8 */ + "call %[bpf_ktime_get_ns];" + "*(u64*)(r10 - 8) = r0;" + "*(u32*)(r10 - 4) = r0;" +"2:" + /* read fp-8, should not be considered safe on second visit */ + "r1 = *(u64*)(r10 - 8);" + /* use r1 in precise context */ + "if r1 == 42 goto +0;" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +/* stacksafe(): check if STACK_MISC in old state is considered + * equivalent to stack spill of a scalar in cur state. + */ +SEC("socket") +__success __log_level(2) +__msg("8: (79) r0 = *(u64 *)(r10 -8)") +__msg("8: safe") +__msg("processed 11 insns") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void old_stack_misc_vs_cur_scalar(void) +{ + asm volatile( + /* get a random value for branching */ + "call %[bpf_ktime_get_ns];" + "if r0 == 0 goto 1f;" + /* conjure STACK_{MISC,INVALID} at fp-8 */ + "call %[bpf_ktime_get_ns];" + "*(u16*)(r10 - 8) = r0;" + "*(u16*)(r10 - 4) = r0;" + "goto 2f;" +"1:" + /* conjure scalar at fp-8 */ + "r0 = 42;" + "*(u64*)(r10 - 8) = r0;" +"2:" + /* read fp-8, should be considered safe on second visit */ + "r0 = *(u64*)(r10 - 8);" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +/* stacksafe(): check that STACK_MISC in old state is not considered + * equivalent to stack spill of a non-scalar in cur state. + */ +SEC("socket") +__success __log_level(2) +/* verifier should process exit instructions twice: + * - once for path entry - label 2; + * - once for path entry - label 1 - label 2. + */ +__msg("r1 = *(u64 *)(r10 -8)") +__msg("exit") +__msg("r1 = *(u64 *)(r10 -8)") +__msg("exit") +__msg("processed 11 insns") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void old_stack_misc_vs_cur_ctx_ptr(void) +{ + asm volatile( + /* remember context pointer in r9 */ + "r9 = r1;" + /* get a random value for branching */ + "call %[bpf_ktime_get_ns];" + "if r0 == 0 goto 1f;" + /* conjure STACK_MISC at fp-8 */ + "call %[bpf_ktime_get_ns];" + "*(u64*)(r10 - 8) = r0;" + "*(u32*)(r10 - 4) = r0;" + "goto 2f;" +"1:" + /* conjure context pointer in fp-8 */ + "*(u64*)(r10 - 8) = r9;" +"2:" + /* read fp-8, should not be considered safe on second visit */ + "r1 = *(u64*)(r10 - 8);" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From a68b50f47bec8bd6a33b07b7e1562db2553981a7 Mon Sep 17 00:00:00 2001 From: Shung-Hsi Yu Date: Fri, 2 Feb 2024 17:55:58 +0800 Subject: selftests/bpf: trace_helpers.c: do not use poisoned type After commit c698eaebdf47 ("selftests/bpf: trace_helpers.c: Optimize kallsyms cache") trace_helpers.c now includes libbpf_internal.h, and thus can no longer use the u32 type (among others) since they are poison in libbpf_internal.h. Replace u32 with __u32 to fix the following error when building trace_helpers.c on powerpc: error: attempt to use poisoned "u32" Fixes: c698eaebdf47 ("selftests/bpf: trace_helpers.c: Optimize kallsyms cache") Signed-off-by: Shung-Hsi Yu Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20240202095559.12900-1-shung-hsi.yu@suse.com Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/trace_helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 4faa898ff7fc..27fd7ed3e4b0 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -271,7 +271,7 @@ ssize_t get_uprobe_offset(const void *addr) * addi r2,r2,XXXX */ { - const u32 *insn = (const u32 *)(uintptr_t)addr; + const __u32 *insn = (const __u32 *)(uintptr_t)addr; if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) || ((*insn & OP_RT_RA_MASK) == LIS_R2)) && -- cgit v1.2.3 From e2e70535dd76c6f17bdc9009ffca3d26cfd35ea4 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 2 Feb 2024 11:05:28 -0800 Subject: selftests/bpf: add more cases for __arg_trusted __arg_nullable args Add extra layer of global functions to ensure that passing around (trusted) PTR_TO_BTF_ID_OR_NULL registers works as expected. We also extend trusted_task_arg_nullable subtest to check three possible valid argumements: known NULL, known non-NULL, and maybe NULL cases. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20240202190529.2374377-3-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/verifier_global_ptr_args.c | 32 ++++++++++++++++++++-- 1 file changed, 29 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c index 484d6262363f..4ab0ef18d7eb 100644 --- a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c +++ b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c @@ -19,15 +19,41 @@ __weak int subprog_trusted_task_nullable(struct task_struct *task __arg_trusted return task->pid + task->tgid; } -SEC("?kprobe") +__weak int subprog_trusted_task_nullable_extra_layer(struct task_struct *task __arg_trusted __arg_nullable) +{ + return subprog_trusted_task_nullable(task) + subprog_trusted_task_nullable(NULL); +} + +SEC("?tp_btf/task_newtask") __success __log_level(2) __msg("Validating subprog_trusted_task_nullable() func#1...") __msg(": R1=trusted_ptr_or_null_task_struct(") int trusted_task_arg_nullable(void *ctx) { - struct task_struct *t = bpf_get_current_task_btf(); + struct task_struct *t1 = bpf_get_current_task_btf(); + struct task_struct *t2 = bpf_task_acquire(t1); + int res = 0; - return subprog_trusted_task_nullable(t) + subprog_trusted_task_nullable(NULL); + /* known NULL */ + res += subprog_trusted_task_nullable(NULL); + + /* known non-NULL */ + res += subprog_trusted_task_nullable(t1); + res += subprog_trusted_task_nullable_extra_layer(t1); + + /* unknown if NULL or not */ + res += subprog_trusted_task_nullable(t2); + res += subprog_trusted_task_nullable_extra_layer(t2); + + if (t2) { + /* known non-NULL after explicit NULL check, just in case */ + res += subprog_trusted_task_nullable(t2); + res += subprog_trusted_task_nullable_extra_layer(t2); + + bpf_task_release(t2); + } + + return res; } __weak int subprog_trusted_task_nonnull(struct task_struct *task __arg_trusted) -- cgit v1.2.3 From 8f109e91b852f159b917f5c565bcf43c26d974e2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 1 Feb 2024 16:49:24 -0800 Subject: tools: ynl: include dpll and mptcp_pm in C codegen The DPLL and mptcp_pm families are pretty clean, and YNL C codegen supports them fully with no changes. Add them to user space codegen so that C samples can be written, and we know immediately if changes to these families require YNL codegen work. Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240202004926.447803-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/Makefile.deps | 2 ++ tools/net/ynl/generated/Makefile | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps index 3110f84dd029..e6154a1255f8 100644 --- a/tools/net/ynl/Makefile.deps +++ b/tools/net/ynl/Makefile.deps @@ -15,7 +15,9 @@ UAPI_PATH:=../../../../include/uapi/ get_hdr_inc=-D$(1) -include $(UAPI_PATH)/linux/$(2) CFLAGS_devlink:=$(call get_hdr_inc,_LINUX_DEVLINK_H_,devlink.h) +CFLAGS_dpll:=$(call get_hdr_inc,_LINUX_DPLL_H_,dpll.h) CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h) CFLAGS_handshake:=$(call get_hdr_inc,_LINUX_HANDSHAKE_H,handshake.h) +CFLAGS_mptcp_pm:=$(call get_hdr_inc,_LINUX_MPTCP_PM_H_,mptcp_pm.h) CFLAGS_netdev:=$(call get_hdr_inc,_LINUX_NETDEV_H,netdev.h) CFLAGS_nfsd:=$(call get_hdr_inc,_LINUX_NFSD_NETLINK_H,nfsd_netlink.h) diff --git a/tools/net/ynl/generated/Makefile b/tools/net/ynl/generated/Makefile index 84cbabdd02a8..0d826fd008ed 100644 --- a/tools/net/ynl/generated/Makefile +++ b/tools/net/ynl/generated/Makefile @@ -14,7 +14,7 @@ YNL_GEN_ARG_ethtool:=--user-header linux/ethtool_netlink.h \ TOOL:=../ynl-gen-c.py -GENS:=ethtool devlink handshake fou netdev nfsd +GENS:=ethtool devlink dpll handshake fou mptcp_pm netdev nfsd SRCS=$(patsubst %,%-user.c,${GENS}) HDRS=$(patsubst %,%-user.h,${GENS}) OBJS=$(patsubst %,%-user.o,${GENS}) -- cgit v1.2.3 From 7c59c9c8f2025358cacf08b8e5a626a08112cffc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 1 Feb 2024 16:49:25 -0800 Subject: tools: ynl: generate code for ovs families Add ovs_flow, ovs_vport and ovs_datapath to the families supported in C. ovs-flow has some circular nesting which is fun to deal with, but the necessary support has been added already in the previous release cycle. Add a sample that proves that dealing with fixed headers does actually work correctly. Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240202004926.447803-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/generated/Makefile | 2 +- tools/net/ynl/samples/.gitignore | 1 + tools/net/ynl/samples/ovs.c | 60 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+), 1 deletion(-) create mode 100644 tools/net/ynl/samples/ovs.c (limited to 'tools') diff --git a/tools/net/ynl/generated/Makefile b/tools/net/ynl/generated/Makefile index 0d826fd008ed..3b9f738c61b8 100644 --- a/tools/net/ynl/generated/Makefile +++ b/tools/net/ynl/generated/Makefile @@ -14,7 +14,7 @@ YNL_GEN_ARG_ethtool:=--user-header linux/ethtool_netlink.h \ TOOL:=../ynl-gen-c.py -GENS:=ethtool devlink dpll handshake fou mptcp_pm netdev nfsd +GENS:=ethtool devlink dpll handshake fou mptcp_pm netdev nfsd ovs_datapath ovs_vport ovs_flow SRCS=$(patsubst %,%-user.c,${GENS}) HDRS=$(patsubst %,%-user.h,${GENS}) OBJS=$(patsubst %,%-user.o,${GENS}) diff --git a/tools/net/ynl/samples/.gitignore b/tools/net/ynl/samples/.gitignore index 49637b26c482..dda6686257a7 100644 --- a/tools/net/ynl/samples/.gitignore +++ b/tools/net/ynl/samples/.gitignore @@ -1,4 +1,5 @@ ethtool devlink netdev +ovs page-pool \ No newline at end of file diff --git a/tools/net/ynl/samples/ovs.c b/tools/net/ynl/samples/ovs.c new file mode 100644 index 000000000000..3e975c003d77 --- /dev/null +++ b/tools/net/ynl/samples/ovs.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +#include + +#include "ovs_datapath-user.h" + +int main(int argc, char **argv) +{ + struct ynl_sock *ys; + int err; + + ys = ynl_sock_create(&ynl_ovs_datapath_family, NULL); + if (!ys) + return 1; + + if (argc > 1) { + struct ovs_datapath_new_req *req; + + req = ovs_datapath_new_req_alloc(); + if (!req) + goto err_close; + + ovs_datapath_new_req_set_upcall_pid(req, 1); + ovs_datapath_new_req_set_name(req, argv[1]); + + err = ovs_datapath_new(ys, req); + ovs_datapath_new_req_free(req); + if (err) + goto err_close; + } else { + struct ovs_datapath_get_req_dump *req; + struct ovs_datapath_get_list *dps; + + printf("Dump:\n"); + req = ovs_datapath_get_req_dump_alloc(); + + dps = ovs_datapath_get_dump(ys, req); + ovs_datapath_get_req_dump_free(req); + if (!dps) + goto err_close; + + ynl_dump_foreach(dps, dp) { + printf(" %s(%d): pid:%u cache:%u\n", + dp->name, dp->_hdr.dp_ifindex, + dp->upcall_pid, dp->masks_cache_size); + } + ovs_datapath_get_list_free(dps); + } + + ynl_sock_destroy(ys); + + return 0; + +err_close: + fprintf(stderr, "YNL (%d): %s\n", ys->err.code, ys->err.msg); + ynl_sock_destroy(ys); + return 2; +} -- cgit v1.2.3 From d2866539df7b7fd491c7f2bfea79e5eaa7f46310 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 1 Feb 2024 16:49:26 -0800 Subject: tools: ynl: auto-gen for all genetlink families Instead of listing the genetlink families that we want to codegen for, always codegen for everyone. We can add an opt-out later but it seems like most families are not causing any issues, and yet folks forget to add them to the Makefile. Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240202004926.447803-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/generated/Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/net/ynl/generated/Makefile b/tools/net/ynl/generated/Makefile index 3b9f738c61b8..7135028cb449 100644 --- a/tools/net/ynl/generated/Makefile +++ b/tools/net/ynl/generated/Makefile @@ -14,7 +14,10 @@ YNL_GEN_ARG_ethtool:=--user-header linux/ethtool_netlink.h \ TOOL:=../ynl-gen-c.py -GENS:=ethtool devlink dpll handshake fou mptcp_pm netdev nfsd ovs_datapath ovs_vport ovs_flow +GENS_PATHS=$(shell grep -nrI --files-without-match \ + 'protocol: netlink' \ + ../../../../Documentation/netlink/specs/) +GENS=$(patsubst ../../../../Documentation/netlink/specs/%.yaml,%,${GENS_PATHS}) SRCS=$(patsubst %,%-user.c,${GENS}) HDRS=$(patsubst %,%-user.h,${GENS}) OBJS=$(patsubst %,%-user.o,${GENS}) -- cgit v1.2.3 From e35ba5811714b7e5a73f98100ab8112a8176f84c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 1 Feb 2024 16:11:54 -0800 Subject: selftests: netdevsim: stop using ifconfig Paolo points out that ifconfig is legacy and we should not use it. Signed-off-by: Jakub Kicinski Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- .../drivers/net/netdevsim/udp_tunnel_nic.sh | 40 +++++++++++----------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh index f98435c502f6..384cfa3d38a6 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh @@ -270,7 +270,7 @@ for port in 0 1; do echo 1 > $NSIM_DEV_SYS/new_port fi NSIM_NETDEV=`get_netdev_name old_netdevs` - ifconfig $NSIM_NETDEV up + ip link set dev $NSIM_NETDEV up msg="new NIC device created" exp0=( 0 0 0 0 ) @@ -284,8 +284,8 @@ for port in 0 1; do msg="VxLAN v4 devices go down" exp0=( 0 0 0 0 ) - ifconfig vxlan1 down - ifconfig vxlan0 down + ip link set dev vxlan1 down + ip link set dev vxlan0 down check_tables msg="VxLAN v6 devices" @@ -293,7 +293,7 @@ for port in 0 1; do new_vxlan vxlanA 4789 $NSIM_NETDEV 6 for ifc in vxlan0 vxlan1; do - ifconfig $ifc up + ip link set dev $ifc up done new_vxlan vxlanB 4789 $NSIM_NETDEV 6 @@ -307,14 +307,14 @@ for port in 0 1; do new_geneve gnv0 6081 msg="NIC device goes down" - ifconfig $NSIM_NETDEV down + ip link set dev $NSIM_NETDEV down if [ $port -eq 1 ]; then exp0=( 0 0 0 0 ) exp1=( 0 0 0 0 ) fi check_tables msg="NIC device goes up again" - ifconfig $NSIM_NETDEV up + ip link set dev $NSIM_NETDEV up exp0=( `mke 4789 1` `mke 4790 1` 0 0 ) exp1=( `mke 6081 2` 0 0 0 ) check_tables @@ -433,7 +433,7 @@ for port in 0 1; do echo $port > $NSIM_DEV_SYS/new_port NSIM_NETDEV=`get_netdev_name old_netdevs` - ifconfig $NSIM_NETDEV up + ip link set dev $NSIM_NETDEV up overflow_table0 "overflow NIC table" overflow_table1 "overflow NIC table" @@ -491,7 +491,7 @@ for port in 0 1; do echo $port > $NSIM_DEV_SYS/new_port NSIM_NETDEV=`get_netdev_name old_netdevs` - ifconfig $NSIM_NETDEV up + ip link set dev $NSIM_NETDEV up overflow_table0 "overflow NIC table" overflow_table1 "overflow NIC table" @@ -548,7 +548,7 @@ for port in 0 1; do echo $port > $NSIM_DEV_SYS/new_port NSIM_NETDEV=`get_netdev_name old_netdevs` - ifconfig $NSIM_NETDEV up + ip link set dev $NSIM_NETDEV up overflow_table0 "destroy NIC" overflow_table1 "destroy NIC" @@ -578,7 +578,7 @@ for port in 0 1; do echo $port > $NSIM_DEV_SYS/new_port NSIM_NETDEV=`get_netdev_name old_netdevs` - ifconfig $NSIM_NETDEV up + ip link set dev $NSIM_NETDEV up msg="create VxLANs v6" new_vxlan vxlanA0 10000 $NSIM_NETDEV 6 @@ -639,7 +639,7 @@ for port in 0 1; do echo $port > $NSIM_DEV_SYS/new_port NSIM_NETDEV=`get_netdev_name old_netdevs` - ifconfig $NSIM_NETDEV up + ip link set dev $NSIM_NETDEV up echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error @@ -695,7 +695,7 @@ for port in 0 1; do echo $port > $NSIM_DEV_SYS/new_port NSIM_NETDEV=`get_netdev_name old_netdevs` - ifconfig $NSIM_NETDEV up + ip link set dev $NSIM_NETDEV up msg="create VxLANs v6" exp0=( `mke 10000 1` 0 0 0 ) @@ -755,7 +755,7 @@ for port in 0 1; do echo $port > $NSIM_DEV_SYS/new_port NSIM_NETDEV=`get_netdev_name old_netdevs` - ifconfig $NSIM_NETDEV up + ip link set dev $NSIM_NETDEV up msg="create VxLANs v6" exp0=( `mke 10000 1` 0 0 0 ) @@ -768,7 +768,7 @@ for port in 0 1; do check_tables msg="NIC device goes down" - ifconfig $NSIM_NETDEV down + ip link set dev $NSIM_NETDEV down if [ $port -eq 1 ]; then exp0=( 0 0 0 0 ) exp1=( 0 0 0 0 ) @@ -779,7 +779,7 @@ for port in 0 1; do check_tables msg="NIC device goes up again" - ifconfig $NSIM_NETDEV up + ip link set dev $NSIM_NETDEV up exp0=( `mke 10000 1` 0 0 0 ) check_tables @@ -827,12 +827,12 @@ new_vxlan vxlan1 4789 $NSIM_NETDEV2 msg="VxLAN v4 devices go down" exp0=( 0 0 0 0 ) -ifconfig vxlan1 down -ifconfig vxlan0 down +ip link set dev vxlan1 down +ip link set dev vxlan0 down check_tables for ifc in vxlan0 vxlan1; do - ifconfig $ifc up + ip link set dev $ifc up done msg="VxLAN v6 device" @@ -844,11 +844,11 @@ exp1=( `mke 6081 2` 0 0 0 ) new_geneve gnv0 6081 msg="NIC device goes down" -ifconfig $NSIM_NETDEV down +ip link set dev $NSIM_NETDEV down check_tables msg="NIC device goes up again" -ifconfig $NSIM_NETDEV up +ip link set dev $NSIM_NETDEV up check_tables for i in `seq 2`; do -- cgit v1.2.3 From 8ff25dac88f616ebebb30830e3a20f079d7a30c9 Mon Sep 17 00:00:00 2001 From: David Wei Date: Tue, 30 Jan 2024 13:46:20 -0800 Subject: netdevsim: add Makefile for selftests Add a Makefile for netdevsim selftests and add selftests path to MAINTAINERS Signed-off-by: David Wei Link: https://lore.kernel.org/r/20240130214620.3722189-5-dw@davidwei.uk Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 + tools/testing/selftests/drivers/net/netdevsim/Makefile | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 tools/testing/selftests/drivers/net/netdevsim/Makefile (limited to 'tools') diff --git a/MAINTAINERS b/MAINTAINERS index f70c15292707..3f465fd778b1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15087,6 +15087,7 @@ NETDEVSIM M: Jakub Kicinski S: Maintained F: drivers/net/netdevsim/* +F: tools/testing/selftests/drivers/net/netdevsim/* NETEM NETWORK EMULATOR M: Stephen Hemminger diff --git a/tools/testing/selftests/drivers/net/netdevsim/Makefile b/tools/testing/selftests/drivers/net/netdevsim/Makefile new file mode 100644 index 000000000000..7a29a05bea8b --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/Makefile @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0+ OR MIT + +TEST_PROGS = devlink.sh \ + devlink_in_netns.sh \ + devlink_trap.sh \ + ethtool-coalesce.sh \ + ethtool-fec.sh \ + ethtool-pause.sh \ + ethtool-ring.sh \ + fib.sh \ + hw_stats_l3.sh \ + nexthop.sh \ + psample.sh \ + tc-mq-visibility.sh \ + udp_tunnel_nic.sh \ + +include ../../../lib.mk -- cgit v1.2.3 From 7e428638bd784fd9e8944bfbf11513520e141b91 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sun, 4 Feb 2024 11:44:52 -0800 Subject: selftests/bpf: Fix flaky test ptr_untrusted Somehow recently I frequently hit the following test failure with either ./test_progs or ./test_progs-cpuv4: serial_test_ptr_untrusted:PASS:skel_open 0 nsec serial_test_ptr_untrusted:PASS:lsm_attach 0 nsec serial_test_ptr_untrusted:PASS:raw_tp_attach 0 nsec serial_test_ptr_untrusted:FAIL:cmp_tp_name unexpected cmp_tp_name: actual -115 != expected 0 #182 ptr_untrusted:FAIL Further investigation found the failure is due to bpf_probe_read_user_str() where reading user-level string attr->raw_tracepoint.name is not successfully, most likely due to the string itself still in disk and not populated into memory yet. One solution is do a printf() call of the string before doing bpf syscall which will force the raw_tracepoint.name into memory. But I think a more robust solution is to use bpf_copy_from_user() which is used in sleepable program and can tolerate page fault, and the fix here used the latter approach. Signed-off-by: Yonghong Song Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240204194452.2785936-1-yonghong.song@linux.dev --- tools/testing/selftests/bpf/progs/test_ptr_untrusted.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c b/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c index 4bdd65b5aa2d..2fdc44e76624 100644 --- a/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c +++ b/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c @@ -6,13 +6,13 @@ char tp_name[128]; -SEC("lsm/bpf") +SEC("lsm.s/bpf") int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size) { switch (cmd) { case BPF_RAW_TRACEPOINT_OPEN: - bpf_probe_read_user_str(tp_name, sizeof(tp_name) - 1, - (void *)attr->raw_tracepoint.name); + bpf_copy_from_user(tp_name, sizeof(tp_name) - 1, + (void *)attr->raw_tracepoint.name); break; default: break; -- cgit v1.2.3 From 169e650069647325496e5a65408ff301874c8e01 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Sat, 3 Feb 2024 22:12:04 -0800 Subject: selftests/bpf: Suppress warning message of an unused variable. "r" is used to receive the return value of test_2 in bpf_testmod.c, but it is not actually used. So, we remove "r" and change the return type to "void". Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202401300557.z5vzn8FM-lkp@intel.com/ Signed-off-by: Kui-Feng Lee Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20240204061204.1864529-1-thinker.li@gmail.com Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c | 6 ++---- tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h | 2 +- tools/testing/selftests/bpf/progs/struct_ops_module.c | 3 +-- 3 files changed, 4 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 4754c662b39f..a06daebc75c9 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -554,9 +554,8 @@ static const struct bpf_verifier_ops bpf_testmod_verifier_ops = { static int bpf_dummy_reg(void *kdata) { struct bpf_testmod_ops *ops = kdata; - int r; - r = ops->test_2(4, 3); + ops->test_2(4, 3); return 0; } @@ -570,9 +569,8 @@ static int bpf_testmod_test_1(void) return 0; } -static int bpf_testmod_test_2(int a, int b) +static void bpf_testmod_test_2(int a, int b) { - return 0; } static struct bpf_testmod_ops __bpf_testmod_ops = { diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h index ca5435751c79..537beca42896 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h @@ -30,7 +30,7 @@ struct bpf_iter_testmod_seq { struct bpf_testmod_ops { int (*test_1)(void); - int (*test_2)(int a, int b); + void (*test_2)(int a, int b); }; #endif /* _BPF_TESTMOD_H */ diff --git a/tools/testing/selftests/bpf/progs/struct_ops_module.c b/tools/testing/selftests/bpf/progs/struct_ops_module.c index e44ac55195ca..b78746b3cef3 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_module.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_module.c @@ -16,10 +16,9 @@ int BPF_PROG(test_1) } SEC("struct_ops/test_2") -int BPF_PROG(test_2, int a, int b) +void BPF_PROG(test_2, int a, int b) { test_2_result = a + b; - return a + b; } SEC(".struct_ops.link") -- cgit v1.2.3 From e7f31873176a345d72ca77c7b4da48493ccd9efd Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sun, 4 Feb 2024 21:29:14 -0800 Subject: selftests/bpf: Fix flaky selftest lwt_redirect/lwt_reroute Recently, when running './test_progs -j', I occasionally hit the following errors: test_lwt_redirect:PASS:pthread_create 0 nsec test_lwt_redirect_run:FAIL:netns_create unexpected error: 256 (errno 0) #142/2 lwt_redirect/lwt_redirect_normal_nomac:FAIL #142 lwt_redirect:FAIL test_lwt_reroute:PASS:pthread_create 0 nsec test_lwt_reroute_run:FAIL:netns_create unexpected error: 256 (errno 0) test_lwt_reroute:PASS:pthread_join 0 nsec #143/2 lwt_reroute/lwt_reroute_qdisc_dropped:FAIL #143 lwt_reroute:FAIL The netns_create() definition looks like below: #define NETNS "ns_lwt" static inline int netns_create(void) { return system("ip netns add " NETNS); } One possibility is that both lwt_redirect and lwt_reroute create netns with the same name "ns_lwt" which may cause conflict. I tried the following example: $ sudo ip netns add abc $ echo $? 0 $ sudo ip netns add abc Cannot create namespace file "/var/run/netns/abc": File exists $ echo $? 1 $ The return code for above netns_create() is 256. The internet search suggests that the return value for 'ip netns add ns_lwt' is 1, which matches the above 'sudo ip netns add abc' example. This patch tried to use different netns names for two tests to avoid 'ip netns add ' failure. I ran './test_progs -j' 10 times and all succeeded with lwt_redirect/lwt_reroute tests. Signed-off-by: Yonghong Song Signed-off-by: Andrii Nakryiko Tested-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20240205052914.1742687-1-yonghong.song@linux.dev --- tools/testing/selftests/bpf/prog_tests/lwt_helpers.h | 2 -- tools/testing/selftests/bpf/prog_tests/lwt_redirect.c | 1 + tools/testing/selftests/bpf/prog_tests/lwt_reroute.c | 1 + 3 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h index e9190574e79f..fb1eb8c67361 100644 --- a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h @@ -27,8 +27,6 @@ } \ }) -#define NETNS "ns_lwt" - static inline int netns_create(void) { return system("ip netns add " NETNS); diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c b/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c index b5b9e74b1044..835a1d756c16 100644 --- a/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c @@ -54,6 +54,7 @@ #include #include +#define NETNS "ns_lwt_redirect" #include "lwt_helpers.h" #include "test_progs.h" #include "network_helpers.h" diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c b/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c index 5610bc76928d..03825d2b45a8 100644 --- a/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c +++ b/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c @@ -48,6 +48,7 @@ * For case 2, force UDP packets to overflow fq limit. As long as kernel * is not crashed, it is considered successful. */ +#define NETNS "ns_lwt_reroute" #include "lwt_helpers.h" #include "network_helpers.h" #include -- cgit v1.2.3 From a44b1334aadd82203f661adb9adb41e53ad0e8d1 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sun, 4 Feb 2024 22:23:48 +0000 Subject: bpf: Allow calling static subprogs while holding a bpf_spin_lock Currently, calling any helpers, kfuncs, or subprogs except the graph data structure (lists, rbtrees) API kfuncs while holding a bpf_spin_lock is not allowed. One of the original motivations of this decision was to force the BPF programmer's hand into keeping the bpf_spin_lock critical section small, and to ensure the execution time of the program does not increase due to lock waiting times. In addition to this, some of the helpers and kfuncs may be unsafe to call while holding a bpf_spin_lock. However, when it comes to subprog calls, atleast for static subprogs, the verifier is able to explore their instructions during verification. Therefore, it is similar in effect to having the same code inlined into the critical section. Hence, not allowing static subprog calls in the bpf_spin_lock critical section is mostly an annoyance that needs to be worked around, without providing any tangible benefit. Unlike static subprog calls, global subprog calls are not safe to permit within the critical section, as the verifier does not explore them during verification, therefore whether the same lock will be taken again, or unlocked, cannot be ascertained. Therefore, allow calling static subprogs within a bpf_spin_lock critical section, and only reject it in case the subprog linkage is global. Acked-by: Yonghong Song Acked-by: David Vernet Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20240204222349.938118-2-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 11 ++++++++--- tools/testing/selftests/bpf/progs/verifier_spin_lock.c | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 64fa188d00ad..7d38b2343ad4 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -9493,6 +9493,13 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, if (subprog_is_global(env, subprog)) { const char *sub_name = subprog_name(env, subprog); + /* Only global subprogs cannot be called with a lock held. */ + if (env->cur_state->active_lock.ptr) { + verbose(env, "global function calls are not allowed while holding a lock,\n" + "use static function instead\n"); + return -EINVAL; + } + if (err) { verbose(env, "Caller passes invalid args into func#%d ('%s')\n", subprog, sub_name); @@ -17644,7 +17651,6 @@ static int do_check(struct bpf_verifier_env *env) if (env->cur_state->active_lock.ptr) { if ((insn->src_reg == BPF_REG_0 && insn->imm != BPF_FUNC_spin_unlock) || - (insn->src_reg == BPF_PSEUDO_CALL) || (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && (insn->off != 0 || !is_bpf_graph_api_kfunc(insn->imm)))) { verbose(env, "function calls are not allowed while holding a lock\n"); @@ -17692,8 +17698,7 @@ static int do_check(struct bpf_verifier_env *env) return -EINVAL; } process_bpf_exit_full: - if (env->cur_state->active_lock.ptr && - !in_rbtree_lock_required_cb(env)) { + if (env->cur_state->active_lock.ptr && !env->cur_state->curframe) { verbose(env, "bpf_spin_unlock is missing\n"); return -EINVAL; } diff --git a/tools/testing/selftests/bpf/progs/verifier_spin_lock.c b/tools/testing/selftests/bpf/progs/verifier_spin_lock.c index 9c1aa69650f8..fb316c080c84 100644 --- a/tools/testing/selftests/bpf/progs/verifier_spin_lock.c +++ b/tools/testing/selftests/bpf/progs/verifier_spin_lock.c @@ -330,7 +330,7 @@ l1_%=: r7 = r0; \ SEC("cgroup/skb") __description("spin_lock: test10 lock in subprog without unlock") -__failure __msg("unlock is missing") +__success __failure_unpriv __msg_unpriv("") __naked void lock_in_subprog_without_unlock(void) { -- cgit v1.2.3 From e8699c4ff85baedcf40f33db816cc487cee39397 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sun, 4 Feb 2024 22:23:49 +0000 Subject: selftests/bpf: Add test for static subprog call in lock cs Add selftests for static subprog calls within bpf_spin_lock critical section, and ensure we still reject global subprog calls. Also test the case where a subprog call will unlock the caller's held lock, or the caller will unlock a lock taken by a subprog call, ensuring correct transfer of lock state across frames on exit. Acked-by: Yonghong Song Acked-by: David Vernet Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20240204222349.938118-3-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/spin_lock.c | 2 + tools/testing/selftests/bpf/progs/test_spin_lock.c | 65 ++++++++++++++++++++++ .../selftests/bpf/progs/test_spin_lock_fail.c | 44 +++++++++++++++ 3 files changed, 111 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c index 18d451be57c8..2b0068742ef9 100644 --- a/tools/testing/selftests/bpf/prog_tests/spin_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c @@ -48,6 +48,8 @@ static struct { { "lock_id_mismatch_innermapval_kptr", "bpf_spin_unlock of different lock" }, { "lock_id_mismatch_innermapval_global", "bpf_spin_unlock of different lock" }, { "lock_id_mismatch_innermapval_mapval", "bpf_spin_unlock of different lock" }, + { "lock_global_subprog_call1", "global function calls are not allowed while holding a lock" }, + { "lock_global_subprog_call2", "global function calls are not allowed while holding a lock" }, }; static int match_regex(const char *pattern, const char *string) diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock.c b/tools/testing/selftests/bpf/progs/test_spin_lock.c index b2440a0ff422..d8d77bdffd3d 100644 --- a/tools/testing/selftests/bpf/progs/test_spin_lock.c +++ b/tools/testing/selftests/bpf/progs/test_spin_lock.c @@ -101,4 +101,69 @@ int bpf_spin_lock_test(struct __sk_buff *skb) err: return err; } + +struct bpf_spin_lock lockA __hidden SEC(".data.A"); + +__noinline +static int static_subprog(struct __sk_buff *ctx) +{ + volatile int ret = 0; + + if (ctx->protocol) + return ret; + return ret + ctx->len; +} + +__noinline +static int static_subprog_lock(struct __sk_buff *ctx) +{ + volatile int ret = 0; + + ret = static_subprog(ctx); + bpf_spin_lock(&lockA); + return ret + ctx->len; +} + +__noinline +static int static_subprog_unlock(struct __sk_buff *ctx) +{ + volatile int ret = 0; + + ret = static_subprog(ctx); + bpf_spin_unlock(&lockA); + return ret + ctx->len; +} + +SEC("tc") +int lock_static_subprog_call(struct __sk_buff *ctx) +{ + int ret = 0; + + bpf_spin_lock(&lockA); + if (ctx->mark == 42) + ret = static_subprog(ctx); + bpf_spin_unlock(&lockA); + return ret; +} + +SEC("tc") +int lock_static_subprog_lock(struct __sk_buff *ctx) +{ + int ret = 0; + + ret = static_subprog_lock(ctx); + bpf_spin_unlock(&lockA); + return ret; +} + +SEC("tc") +int lock_static_subprog_unlock(struct __sk_buff *ctx) +{ + int ret = 0; + + bpf_spin_lock(&lockA); + ret = static_subprog_unlock(ctx); + return ret; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c index 86cd183ef6dc..43f40c4fe241 100644 --- a/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c +++ b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c @@ -201,4 +201,48 @@ CHECK(innermapval_mapval, &iv->lock, &v->lock); #undef CHECK +__noinline +int global_subprog(struct __sk_buff *ctx) +{ + volatile int ret = 0; + + if (ctx->protocol) + ret += ctx->protocol; + return ret + ctx->mark; +} + +__noinline +static int static_subprog_call_global(struct __sk_buff *ctx) +{ + volatile int ret = 0; + + if (ctx->protocol) + return ret; + return ret + ctx->len + global_subprog(ctx); +} + +SEC("?tc") +int lock_global_subprog_call1(struct __sk_buff *ctx) +{ + int ret = 0; + + bpf_spin_lock(&lockA); + if (ctx->mark == 42) + ret = global_subprog(ctx); + bpf_spin_unlock(&lockA); + return ret; +} + +SEC("?tc") +int lock_global_subprog_call2(struct __sk_buff *ctx) +{ + int ret = 0; + + bpf_spin_lock(&lockA); + if (ctx->mark == 42) + ret = static_subprog_call_global(ctx); + bpf_spin_unlock(&lockA); + return ret; +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 8be6a0147af314fd60db9da2158cd737dc6394a7 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Mon, 5 Feb 2024 05:56:46 +0000 Subject: selftests/bpf: Add tests for RCU lock transfer between subprogs Add selftests covering the following cases: - A static or global subprog called from within a RCU read section works - A static subprog taking an RCU read lock which is released in caller works - A static subprog releasing the caller's RCU read lock works Global subprogs that leave the lock in an imbalanced state will not work, as they are verified separately, so ensure those cases fail as well. Acked-by: Yonghong Song Signed-off-by: Kumar Kartikeya Dwivedi Acked-by: David Vernet Link: https://lore.kernel.org/r/20240205055646.1112186-3-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/rcu_read_lock.c | 6 ++ tools/testing/selftests/bpf/progs/rcu_read_lock.c | 120 +++++++++++++++++++++ 2 files changed, 126 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c index 3f1f58d3a729..a1f7e7378a64 100644 --- a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c @@ -29,6 +29,10 @@ static void test_success(void) bpf_program__set_autoload(skel->progs.non_sleepable_1, true); bpf_program__set_autoload(skel->progs.non_sleepable_2, true); bpf_program__set_autoload(skel->progs.task_trusted_non_rcuptr, true); + bpf_program__set_autoload(skel->progs.rcu_read_lock_subprog, true); + bpf_program__set_autoload(skel->progs.rcu_read_lock_global_subprog, true); + bpf_program__set_autoload(skel->progs.rcu_read_lock_subprog_lock, true); + bpf_program__set_autoload(skel->progs.rcu_read_lock_subprog_unlock, true); err = rcu_read_lock__load(skel); if (!ASSERT_OK(err, "skel_load")) goto out; @@ -75,6 +79,8 @@ static const char * const inproper_region_tests[] = { "inproper_sleepable_helper", "inproper_sleepable_kfunc", "nested_rcu_region", + "rcu_read_lock_global_subprog_lock", + "rcu_read_lock_global_subprog_unlock", }; static void test_inproper_region(void) diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c index 14fb01437fb8..ab3a532b7dd6 100644 --- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c +++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c @@ -319,3 +319,123 @@ int cross_rcu_region(void *ctx) bpf_rcu_read_unlock(); return 0; } + +__noinline +static int static_subprog(void *ctx) +{ + volatile int ret = 0; + + if (bpf_get_prandom_u32()) + return ret + 42; + return ret + bpf_get_prandom_u32(); +} + +__noinline +int global_subprog(u64 a) +{ + volatile int ret = a; + + return ret + static_subprog(NULL); +} + +__noinline +static int static_subprog_lock(void *ctx) +{ + volatile int ret = 0; + + bpf_rcu_read_lock(); + if (bpf_get_prandom_u32()) + return ret + 42; + return ret + bpf_get_prandom_u32(); +} + +__noinline +int global_subprog_lock(u64 a) +{ + volatile int ret = a; + + return ret + static_subprog_lock(NULL); +} + +__noinline +static int static_subprog_unlock(void *ctx) +{ + volatile int ret = 0; + + bpf_rcu_read_unlock(); + if (bpf_get_prandom_u32()) + return ret + 42; + return ret + bpf_get_prandom_u32(); +} + +__noinline +int global_subprog_unlock(u64 a) +{ + volatile int ret = a; + + return ret + static_subprog_unlock(NULL); +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int rcu_read_lock_subprog(void *ctx) +{ + volatile int ret = 0; + + bpf_rcu_read_lock(); + if (bpf_get_prandom_u32()) + ret += static_subprog(ctx); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int rcu_read_lock_global_subprog(void *ctx) +{ + volatile int ret = 0; + + bpf_rcu_read_lock(); + if (bpf_get_prandom_u32()) + ret += global_subprog(ret); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int rcu_read_lock_subprog_lock(void *ctx) +{ + volatile int ret = 0; + + ret += static_subprog_lock(ctx); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int rcu_read_lock_global_subprog_lock(void *ctx) +{ + volatile int ret = 0; + + ret += global_subprog_lock(ret); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int rcu_read_lock_subprog_unlock(void *ctx) +{ + volatile int ret = 0; + + bpf_rcu_read_lock(); + ret += static_subprog_unlock(ctx); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int rcu_read_lock_global_subprog_unlock(void *ctx) +{ + volatile int ret = 0; + + bpf_rcu_read_lock(); + ret += global_subprog_unlock(ret); + return 0; +} -- cgit v1.2.3 From d7bc416aa5cc183691287e8f0b1d5b182a7ce9c3 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 5 Feb 2024 16:22:43 -0800 Subject: libbpf: fix return value for PERF_EVENT __arg_ctx type fix up check If PERF_EVENT program has __arg_ctx argument with matching architecture-specific pt_regs/user_pt_regs/user_regs_struct pointer type, libbpf should still perform type rewrite for old kernels, but not emit the warning. Fix copy/paste from kernel code where 0 is meant to signify "no error" condition. For libbpf we need to return "true" to proceed with type rewrite (which for PERF_EVENT program will be a canonical `struct bpf_perf_event_data *` type). Fixes: 9eea8fafe33e ("libbpf: fix __arg_ctx type enforcement for perf_event programs") Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20240206002243.1439450-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 6932f2c4ddfd..01f407591a92 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -6404,13 +6404,13 @@ static bool need_func_arg_type_fixup(const struct btf *btf, const struct bpf_pro case BPF_PROG_TYPE_PERF_EVENT: if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct pt_regs) && btf_is_struct(t) && strcmp(tname, "pt_regs") == 0) - return 0; + return true; if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_pt_regs) && btf_is_struct(t) && strcmp(tname, "user_pt_regs") == 0) - return 0; + return true; if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_regs_struct) && btf_is_struct(t) && strcmp(tname, "user_regs_struct") == 0) - return 0; + return true; break; case BPF_PROG_TYPE_RAW_TRACEPOINT: case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: -- cgit v1.2.3 From c7dcb6c9aa85fa310251dad7e233eb955a5235ed Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 5 Feb 2024 16:40:08 -0800 Subject: selftests/bpf: mark dynptr kfuncs __weak to make them optional on old kernels Mark dynptr kfuncs as __weak to allow verifier_global_subprogs/arg_ctx_{perf,kprobe,raw_tp} subtests to be loadable on old kernels. Because bpf_dynptr_from_xdp() kfunc is used from arg_tag_dynptr BPF program in progs/verifier_global_subprogs.c *and* is not marked as __weak, loading any subtest from verifier_global_subprogs fails on old kernels that don't have bpf_dynptr_from_xdp() kfunc defined. Even if arg_tag_dynptr program itself is not loaded, libbpf bails out on non-weak reference to bpf_dynptr_from_xdp (that can't be resolved), which shared across all programs in progs/verifier_global_subprogs.c. So mark all dynptr-related kfuncs as __weak to unblock libbpf CI ([0]). In the upcoming "kfunc in vmlinux.h" work we should make sure that kfuncs are always declared __weak as well. [0] https://github.com/libbpf/libbpf/actions/runs/7792673215/job/21251250831?pr=776#step:4:7961 Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20240206004008.1541513-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/bpf_kfuncs.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index 0cd4111f954c..14ebe7d9e1a3 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -9,7 +9,7 @@ struct bpf_sock_addr_kern; * Error code */ extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags, - struct bpf_dynptr *ptr__uninit) __ksym; + struct bpf_dynptr *ptr__uninit) __ksym __weak; /* Description * Initializes an xdp-type dynptr @@ -17,7 +17,7 @@ extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags, * Error code */ extern int bpf_dynptr_from_xdp(struct xdp_md *xdp, __u64 flags, - struct bpf_dynptr *ptr__uninit) __ksym; + struct bpf_dynptr *ptr__uninit) __ksym __weak; /* Description * Obtain a read-only pointer to the dynptr's data @@ -26,7 +26,7 @@ extern int bpf_dynptr_from_xdp(struct xdp_md *xdp, __u64 flags, * buffer if unable to obtain a direct pointer */ extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset, - void *buffer, __u32 buffer__szk) __ksym; + void *buffer, __u32 buffer__szk) __ksym __weak; /* Description * Obtain a read-write pointer to the dynptr's data @@ -35,13 +35,13 @@ extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset, * buffer if unable to obtain a direct pointer */ extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u32 offset, - void *buffer, __u32 buffer__szk) __ksym; + void *buffer, __u32 buffer__szk) __ksym __weak; -extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u32 start, __u32 end) __ksym; -extern bool bpf_dynptr_is_null(const struct bpf_dynptr *ptr) __ksym; -extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym; -extern __u32 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym; -extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym; +extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u32 start, __u32 end) __ksym __weak; +extern bool bpf_dynptr_is_null(const struct bpf_dynptr *ptr) __ksym __weak; +extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym __weak; +extern __u32 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym __weak; +extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym __weak; /* Description * Modify the address of a AF_UNIX sockaddr. -- cgit v1.2.3 From c41dfb0dfbece824143ff51829d42cba4cb3c277 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 1 Feb 2024 11:21:19 -0500 Subject: selftests/net: ignore timing errors in so_txtime if KSFT_MACHINE_SLOW This test is time sensitive. It may fail on virtual machines and for debug builds. Continue to run in these environments to get code coverage. But optionally suppress failure for timing errors (only). This is controlled with environment variable KSFT_MACHINE_SLOW. The test continues to return 0 (KSFT_PASS), rather than KSFT_XFAIL as previously discussed. Because making so_txtime.c return that and then making so_txtime.sh capture runs that pass that vs KSFT_FAIL and pass it on added a bunch of (fragile bash) boilerplate, while the result is interpreted the same as KSFT_PASS anyway. Signed-off-by: Willem de Bruijn Link: https://lore.kernel.org/r/20240201162130.2278240-1-willemdebruijn.kernel@gmail.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/so_txtime.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c index 2672ac0b6d1f..8457b7ccbc09 100644 --- a/tools/testing/selftests/net/so_txtime.c +++ b/tools/testing/selftests/net/so_txtime.c @@ -134,8 +134,11 @@ static void do_recv_one(int fdr, struct timed_send *ts) if (rbuf[0] != ts->data) error(1, 0, "payload mismatch. expected %c", ts->data); - if (llabs(tstop - texpect) > cfg_variance_us) - error(1, 0, "exceeds variance (%d us)", cfg_variance_us); + if (llabs(tstop - texpect) > cfg_variance_us) { + fprintf(stderr, "exceeds variance (%d us)\n", cfg_variance_us); + if (!getenv("KSFT_MACHINE_SLOW")) + exit(1); + } } static void do_recv_verify_empty(int fdr) -- cgit v1.2.3 From 240fd405528bbf7fafa0559202ca7aa524c9cd96 Mon Sep 17 00:00:00 2001 From: Aahil Awatramani Date: Fri, 2 Feb 2024 17:58:58 +0000 Subject: bonding: Add independent control state machine Add support for the independent control state machine per IEEE 802.1AX-2008 5.4.15 in addition to the existing implementation of the coupled control state machine. Introduces two new states, AD_MUX_COLLECTING and AD_MUX_DISTRIBUTING in the LACP MUX state machine for separated handling of an initial Collecting state before the Collecting and Distributing state. This enables a port to be in a state where it can receive incoming packets while not still distributing. This is useful for reducing packet loss when a port begins distributing before its partner is able to collect. Added new functions such as bond_set_slave_tx_disabled_flags and bond_set_slave_rx_enabled_flags to precisely manage the port's collecting and distributing states. Previously, there was no dedicated method to disable TX while keeping RX enabled, which this patch addresses. Note that the regular flow process in the kernel's bonding driver remains unaffected by this patch. The extension requires explicit opt-in by the user (in order to ensure no disruptions for existing setups) via netlink support using the new bonding parameter coupled_control. The default value for coupled_control is set to 1 so as to preserve existing behaviour. Signed-off-by: Aahil Awatramani Reviewed-by: Hangbin Liu Link: https://lore.kernel.org/r/20240202175858.1573852-1-aahila@google.com Signed-off-by: Paolo Abeni --- Documentation/networking/bonding.rst | 12 +++ drivers/net/bonding/bond_3ad.c | 157 +++++++++++++++++++++++++++++++++-- drivers/net/bonding/bond_main.c | 1 + drivers/net/bonding/bond_netlink.c | 16 ++++ drivers/net/bonding/bond_options.c | 28 ++++++- include/net/bond_3ad.h | 2 + include/net/bond_options.h | 1 + include/net/bonding.h | 23 +++++ include/uapi/linux/if_link.h | 1 + tools/include/uapi/linux/if_link.h | 1 + 10 files changed, 234 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst index f7a73421eb76..e774b48de9f5 100644 --- a/Documentation/networking/bonding.rst +++ b/Documentation/networking/bonding.rst @@ -444,6 +444,18 @@ arp_missed_max The default value is 2, and the allowable range is 1 - 255. +coupled_control + + Specifies whether the LACP state machine's MUX in the 802.3ad mode + should have separate Collecting and Distributing states. + + This is by implementing the independent control state machine per + IEEE 802.1AX-2008 5.4.15 in addition to the existing coupled control + state machine. + + The default value is 1. This setting does not separate the Collecting + and Distributing states, maintaining the bond in coupled control. + downdelay Specifies the time, in milliseconds, to wait before disabling diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index c99ffe6c683a..f2942e8c6c91 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -106,6 +106,9 @@ static void ad_agg_selection_logic(struct aggregator *aggregator, static void ad_clear_agg(struct aggregator *aggregator); static void ad_initialize_agg(struct aggregator *aggregator); static void ad_initialize_port(struct port *port, int lacp_fast); +static void ad_enable_collecting(struct port *port); +static void ad_disable_distributing(struct port *port, + bool *update_slave_arr); static void ad_enable_collecting_distributing(struct port *port, bool *update_slave_arr); static void ad_disable_collecting_distributing(struct port *port, @@ -171,9 +174,38 @@ static inline int __agg_has_partner(struct aggregator *agg) return !is_zero_ether_addr(agg->partner_system.mac_addr_value); } +/** + * __disable_distributing_port - disable the port's slave for distributing. + * Port will still be able to collect. + * @port: the port we're looking at + * + * This will disable only distributing on the port's slave. + */ +static void __disable_distributing_port(struct port *port) +{ + bond_set_slave_tx_disabled_flags(port->slave, BOND_SLAVE_NOTIFY_LATER); +} + +/** + * __enable_collecting_port - enable the port's slave for collecting, + * if it's up + * @port: the port we're looking at + * + * This will enable only collecting on the port's slave. + */ +static void __enable_collecting_port(struct port *port) +{ + struct slave *slave = port->slave; + + if (slave->link == BOND_LINK_UP && bond_slave_is_up(slave)) + bond_set_slave_rx_enabled_flags(slave, BOND_SLAVE_NOTIFY_LATER); +} + /** * __disable_port - disable the port's slave * @port: the port we're looking at + * + * This will disable both collecting and distributing on the port's slave. */ static inline void __disable_port(struct port *port) { @@ -183,6 +215,8 @@ static inline void __disable_port(struct port *port) /** * __enable_port - enable the port's slave, if it's up * @port: the port we're looking at + * + * This will enable both collecting and distributing on the port's slave. */ static inline void __enable_port(struct port *port) { @@ -193,10 +227,27 @@ static inline void __enable_port(struct port *port) } /** - * __port_is_enabled - check if the port's slave is in active state + * __port_move_to_attached_state - check if port should transition back to attached + * state. + * @port: the port we're looking at + */ +static bool __port_move_to_attached_state(struct port *port) +{ + if (!(port->sm_vars & AD_PORT_SELECTED) || + (port->sm_vars & AD_PORT_STANDBY) || + !(port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) || + !(port->actor_oper_port_state & LACP_STATE_SYNCHRONIZATION)) + port->sm_mux_state = AD_MUX_ATTACHED; + + return port->sm_mux_state == AD_MUX_ATTACHED; +} + +/** + * __port_is_collecting_distributing - check if the port's slave is in the + * combined collecting/distributing state * @port: the port we're looking at */ -static inline int __port_is_enabled(struct port *port) +static int __port_is_collecting_distributing(struct port *port) { return bond_is_active_slave(port->slave); } @@ -942,6 +993,7 @@ static int ad_marker_send(struct port *port, struct bond_marker *marker) */ static void ad_mux_machine(struct port *port, bool *update_slave_arr) { + struct bonding *bond = __get_bond_by_port(port); mux_states_t last_state; /* keep current State Machine state to compare later if it was @@ -999,9 +1051,13 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr) if ((port->sm_vars & AD_PORT_SELECTED) && (port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) && !__check_agg_selection_timer(port)) { - if (port->aggregator->is_active) - port->sm_mux_state = - AD_MUX_COLLECTING_DISTRIBUTING; + if (port->aggregator->is_active) { + int state = AD_MUX_COLLECTING_DISTRIBUTING; + + if (!bond->params.coupled_control) + state = AD_MUX_COLLECTING; + port->sm_mux_state = state; + } } else if (!(port->sm_vars & AD_PORT_SELECTED) || (port->sm_vars & AD_PORT_STANDBY)) { /* if UNSELECTED or STANDBY */ @@ -1019,11 +1075,45 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr) } break; case AD_MUX_COLLECTING_DISTRIBUTING: + if (!__port_move_to_attached_state(port)) { + /* if port state hasn't changed make + * sure that a collecting distributing + * port in an active aggregator is enabled + */ + if (port->aggregator->is_active && + !__port_is_collecting_distributing(port)) { + __enable_port(port); + *update_slave_arr = true; + } + } + break; + case AD_MUX_COLLECTING: + if (!__port_move_to_attached_state(port)) { + if ((port->sm_vars & AD_PORT_SELECTED) && + (port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) && + (port->partner_oper.port_state & LACP_STATE_COLLECTING)) { + port->sm_mux_state = AD_MUX_DISTRIBUTING; + } else { + /* If port state hasn't changed, make sure that a collecting + * port is enabled for an active aggregator. + */ + struct slave *slave = port->slave; + + if (port->aggregator->is_active && + bond_is_slave_rx_disabled(slave)) { + ad_enable_collecting(port); + *update_slave_arr = true; + } + } + } + break; + case AD_MUX_DISTRIBUTING: if (!(port->sm_vars & AD_PORT_SELECTED) || (port->sm_vars & AD_PORT_STANDBY) || + !(port->partner_oper.port_state & LACP_STATE_COLLECTING) || !(port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) || !(port->actor_oper_port_state & LACP_STATE_SYNCHRONIZATION)) { - port->sm_mux_state = AD_MUX_ATTACHED; + port->sm_mux_state = AD_MUX_COLLECTING; } else { /* if port state hasn't changed make * sure that a collecting distributing @@ -1031,7 +1121,7 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr) */ if (port->aggregator && port->aggregator->is_active && - !__port_is_enabled(port)) { + !__port_is_collecting_distributing(port)) { __enable_port(port); *update_slave_arr = true; } @@ -1082,6 +1172,20 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr) update_slave_arr); port->ntt = true; break; + case AD_MUX_COLLECTING: + port->actor_oper_port_state |= LACP_STATE_COLLECTING; + port->actor_oper_port_state &= ~LACP_STATE_DISTRIBUTING; + port->actor_oper_port_state |= LACP_STATE_SYNCHRONIZATION; + ad_enable_collecting(port); + ad_disable_distributing(port, update_slave_arr); + port->ntt = true; + break; + case AD_MUX_DISTRIBUTING: + port->actor_oper_port_state |= LACP_STATE_DISTRIBUTING; + port->actor_oper_port_state |= LACP_STATE_SYNCHRONIZATION; + ad_enable_collecting_distributing(port, + update_slave_arr); + break; default: break; } @@ -1906,6 +2010,45 @@ static void ad_initialize_port(struct port *port, int lacp_fast) } } +/** + * ad_enable_collecting - enable a port's receive + * @port: the port we're looking at + * + * Enable @port if it's in an active aggregator + */ +static void ad_enable_collecting(struct port *port) +{ + if (port->aggregator->is_active) { + struct slave *slave = port->slave; + + slave_dbg(slave->bond->dev, slave->dev, + "Enabling collecting on port %d (LAG %d)\n", + port->actor_port_number, + port->aggregator->aggregator_identifier); + __enable_collecting_port(port); + } +} + +/** + * ad_disable_distributing - disable a port's transmit + * @port: the port we're looking at + * @update_slave_arr: Does slave array need update? + */ +static void ad_disable_distributing(struct port *port, bool *update_slave_arr) +{ + if (port->aggregator && + !MAC_ADDRESS_EQUAL(&port->aggregator->partner_system, + &(null_mac_addr))) { + slave_dbg(port->slave->bond->dev, port->slave->dev, + "Disabling distributing on port %d (LAG %d)\n", + port->actor_port_number, + port->aggregator->aggregator_identifier); + __disable_distributing_port(port); + /* Slave array needs an update */ + *update_slave_arr = true; + } +} + /** * ad_enable_collecting_distributing - enable a port's transmit/receive * @port: the port we're looking at diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 4e0600c7b050..ae9d32c0faf4 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -6306,6 +6306,7 @@ static int __init bond_check_params(struct bond_params *params) params->ad_actor_sys_prio = ad_actor_sys_prio; eth_zero_addr(params->ad_actor_system); params->ad_user_port_key = ad_user_port_key; + params->coupled_control = 1; if (packets_per_slave > 0) { params->reciprocal_packets_per_slave = reciprocal_value(packets_per_slave); diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index cfa74cf8bb1a..29b4c3d1b9b6 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -122,6 +122,7 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = { [IFLA_BOND_PEER_NOTIF_DELAY] = NLA_POLICY_FULL_RANGE(NLA_U32, &delay_range), [IFLA_BOND_MISSED_MAX] = { .type = NLA_U8 }, [IFLA_BOND_NS_IP6_TARGET] = { .type = NLA_NESTED }, + [IFLA_BOND_COUPLED_CONTROL] = { .type = NLA_U8 }, }; static const struct nla_policy bond_slave_policy[IFLA_BOND_SLAVE_MAX + 1] = { @@ -549,6 +550,16 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], return err; } + if (data[IFLA_BOND_COUPLED_CONTROL]) { + int coupled_control = nla_get_u8(data[IFLA_BOND_COUPLED_CONTROL]); + + bond_opt_initval(&newval, coupled_control); + err = __bond_opt_set(bond, BOND_OPT_COUPLED_CONTROL, &newval, + data[IFLA_BOND_COUPLED_CONTROL], extack); + if (err) + return err; + } + return 0; } @@ -615,6 +626,7 @@ static size_t bond_get_size(const struct net_device *bond_dev) /* IFLA_BOND_NS_IP6_TARGET */ nla_total_size(sizeof(struct nlattr)) + nla_total_size(sizeof(struct in6_addr)) * BOND_MAX_NS_TARGETS + + nla_total_size(sizeof(u8)) + /* IFLA_BOND_COUPLED_CONTROL */ 0; } @@ -774,6 +786,10 @@ static int bond_fill_info(struct sk_buff *skb, bond->params.missed_max)) goto nla_put_failure; + if (nla_put_u8(skb, IFLA_BOND_COUPLED_CONTROL, + bond->params.coupled_control)) + goto nla_put_failure; + if (BOND_MODE(bond) == BOND_MODE_8023AD) { struct ad_info info; diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index f3f27f0bd2a6..4cdbc7e084f4 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -84,7 +84,8 @@ static int bond_option_ad_user_port_key_set(struct bonding *bond, const struct bond_opt_value *newval); static int bond_option_missed_max_set(struct bonding *bond, const struct bond_opt_value *newval); - +static int bond_option_coupled_control_set(struct bonding *bond, + const struct bond_opt_value *newval); static const struct bond_opt_value bond_mode_tbl[] = { { "balance-rr", BOND_MODE_ROUNDROBIN, BOND_VALFLAG_DEFAULT}, @@ -232,6 +233,12 @@ static const struct bond_opt_value bond_missed_max_tbl[] = { { NULL, -1, 0}, }; +static const struct bond_opt_value bond_coupled_control_tbl[] = { + { "on", 1, BOND_VALFLAG_DEFAULT}, + { "off", 0, 0}, + { NULL, -1, 0}, +}; + static const struct bond_option bond_opts[BOND_OPT_LAST] = { [BOND_OPT_MODE] = { .id = BOND_OPT_MODE, @@ -496,6 +503,15 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = { .desc = "Delay between each peer notification on failover event, in milliseconds", .values = bond_peer_notif_delay_tbl, .set = bond_option_peer_notif_delay_set + }, + [BOND_OPT_COUPLED_CONTROL] = { + .id = BOND_OPT_COUPLED_CONTROL, + .name = "coupled_control", + .desc = "Opt into using coupled control MUX for LACP states", + .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)), + .flags = BOND_OPTFLAG_IFDOWN, + .values = bond_coupled_control_tbl, + .set = bond_option_coupled_control_set, } }; @@ -1692,3 +1708,13 @@ static int bond_option_ad_user_port_key_set(struct bonding *bond, bond->params.ad_user_port_key = newval->value; return 0; } + +static int bond_option_coupled_control_set(struct bonding *bond, + const struct bond_opt_value *newval) +{ + netdev_info(bond->dev, "Setting coupled_control to %s (%llu)\n", + newval->string, newval->value); + + bond->params.coupled_control = newval->value; + return 0; +} diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h index c5e57c6bd873..9ce5ac2bfbad 100644 --- a/include/net/bond_3ad.h +++ b/include/net/bond_3ad.h @@ -54,6 +54,8 @@ typedef enum { AD_MUX_DETACHED, /* mux machine */ AD_MUX_WAITING, /* mux machine */ AD_MUX_ATTACHED, /* mux machine */ + AD_MUX_COLLECTING, /* mux machine */ + AD_MUX_DISTRIBUTING, /* mux machine */ AD_MUX_COLLECTING_DISTRIBUTING /* mux machine */ } mux_states_t; diff --git a/include/net/bond_options.h b/include/net/bond_options.h index 69292ecc0325..473a0147769e 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -76,6 +76,7 @@ enum { BOND_OPT_MISSED_MAX, BOND_OPT_NS_TARGETS, BOND_OPT_PRIO, + BOND_OPT_COUPLED_CONTROL, BOND_OPT_LAST }; diff --git a/include/net/bonding.h b/include/net/bonding.h index 5b8b1b644a2d..b61fb1aa3a56 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -148,6 +148,7 @@ struct bond_params { #if IS_ENABLED(CONFIG_IPV6) struct in6_addr ns_targets[BOND_MAX_NS_TARGETS]; #endif + int coupled_control; /* 2 bytes of padding : see ether_addr_equal_64bits() */ u8 ad_actor_system[ETH_ALEN + 2]; @@ -167,6 +168,7 @@ struct slave { u8 backup:1, /* indicates backup slave. Value corresponds with BOND_STATE_ACTIVE and BOND_STATE_BACKUP */ inactive:1, /* indicates inactive slave */ + rx_disabled:1, /* indicates whether slave's Rx is disabled */ should_notify:1, /* indicates whether the state changed */ should_notify_link:1; /* indicates whether the link changed */ u8 duplex; @@ -568,6 +570,14 @@ static inline void bond_set_slave_inactive_flags(struct slave *slave, bond_set_slave_state(slave, BOND_STATE_BACKUP, notify); if (!slave->bond->params.all_slaves_active) slave->inactive = 1; + if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) + slave->rx_disabled = 1; +} + +static inline void bond_set_slave_tx_disabled_flags(struct slave *slave, + bool notify) +{ + bond_set_slave_state(slave, BOND_STATE_BACKUP, notify); } static inline void bond_set_slave_active_flags(struct slave *slave, @@ -575,6 +585,14 @@ static inline void bond_set_slave_active_flags(struct slave *slave, { bond_set_slave_state(slave, BOND_STATE_ACTIVE, notify); slave->inactive = 0; + if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) + slave->rx_disabled = 0; +} + +static inline void bond_set_slave_rx_enabled_flags(struct slave *slave, + bool notify) +{ + slave->rx_disabled = 0; } static inline bool bond_is_slave_inactive(struct slave *slave) @@ -582,6 +600,11 @@ static inline bool bond_is_slave_inactive(struct slave *slave) return slave->inactive; } +static inline bool bond_is_slave_rx_disabled(struct slave *slave) +{ + return slave->rx_disabled; +} + static inline void bond_propose_link_state(struct slave *slave, int state) { slave->link_new_state = state; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index ab9bcff96e4d..ffa637b38c93 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1505,6 +1505,7 @@ enum { IFLA_BOND_AD_LACP_ACTIVE, IFLA_BOND_MISSED_MAX, IFLA_BOND_NS_IP6_TARGET, + IFLA_BOND_COUPLED_CONTROL, __IFLA_BOND_MAX, }; diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index a0aa05a28cf2..f0d71b2a3f1e 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -974,6 +974,7 @@ enum { IFLA_BOND_AD_LACP_ACTIVE, IFLA_BOND_MISSED_MAX, IFLA_BOND_NS_IP6_TARGET, + IFLA_BOND_COUPLED_CONTROL, __IFLA_BOND_MAX, }; -- cgit v1.2.3 From c27aa462aa78ff157fdda222af242e4571803d4a Mon Sep 17 00:00:00 2001 From: "Jose E. Marchesi" Date: Tue, 6 Feb 2024 11:23:30 +0100 Subject: bpf: Use -Wno-address-of-packed-member in some selftests [Differences from V2: - Remove conditionals in the source files pragmas, as the pragma is supported by both GCC and clang.] Both GCC and clang implement the -Wno-address-of-packed-member warning, which is enabled by -Wall, that warns about taking the address of a packed struct field when it can lead to an "unaligned" address. This triggers the following errors (-Werror) when building three particular BPF selftests with GCC: progs/test_cls_redirect.c 986 | if (ipv4_is_fragment((void *)&encap->ip)) { progs/test_cls_redirect_dynptr.c 410 | pkt_ipv4_checksum((void *)&encap_gre->ip); progs/test_cls_redirect.c 521 | pkt_ipv4_checksum((void *)&encap_gre->ip); progs/test_tc_tunnel.c 232 | set_ipv4_csum((void *)&h_outer.ip); These warnings do not signal any real problem in the tests as far as I can see. This patch adds pragmas to these test files that inhibit the -Waddress-of-packed-member warning. Tested in bpf-next master. No regressions. Signed-off-by: Jose E. Marchesi Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20240206102330.7113-1-jose.marchesi@oracle.com --- tools/testing/selftests/bpf/progs/test_cls_redirect.c | 2 ++ tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c | 2 ++ tools/testing/selftests/bpf/progs/test_tc_tunnel.c | 2 ++ 3 files changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c index 66b304982245..bfc9179259d5 100644 --- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c @@ -22,6 +22,8 @@ #include "test_cls_redirect.h" +#pragma GCC diagnostic ignored "-Waddress-of-packed-member" + #ifdef SUBPROGS #define INLINING __noinline #else diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c index f41c81212ee9..da54c09e9a15 100644 --- a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c @@ -23,6 +23,8 @@ #include "test_cls_redirect.h" #include "bpf_kfuncs.h" +#pragma GCC diagnostic ignored "-Waddress-of-packed-member" + #define offsetofend(TYPE, MEMBER) \ (offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER))) diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c index e6e678aa9874..d8d7ab5e8e30 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c +++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c @@ -20,6 +20,8 @@ #include #include +#pragma GCC diagnostic ignored "-Waddress-of-packed-member" + static const int cfg_port = 8000; static const int cfg_udp_src = 20000; -- cgit v1.2.3 From 92a871ab9fa59a74d013bc04f321026a057618e7 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Tue, 6 Feb 2024 13:59:22 +0100 Subject: libbpf: Use OPTS_SET() macro in bpf_xdp_query() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the feature_flags and xdp_zc_max_segs fields were added to the libbpf bpf_xdp_query_opts, the code writing them did not use the OPTS_SET() macro. This causes libbpf to write to those fields unconditionally, which means that programs compiled against an older version of libbpf (with a smaller size of the bpf_xdp_query_opts struct) will have its stack corrupted by libbpf writing out of bounds. The patch adding the feature_flags field has an early bail out if the feature_flags field is not part of the opts struct (via the OPTS_HAS) macro, but the patch adding xdp_zc_max_segs does not. For consistency, this fix just changes the assignments to both fields to use the OPTS_SET() macro. Fixes: 13ce2daa259a ("xsk: add new netlink attribute dedicated for ZC max frags") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240206125922.1992815-1-toke@redhat.com --- tools/lib/bpf/netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 090bcf6e3b3d..68a2def17175 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -496,8 +496,8 @@ int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts) if (err) return libbpf_err(err); - opts->feature_flags = md.flags; - opts->xdp_zc_max_segs = md.xdp_zc_max_segs; + OPTS_SET(opts, feature_flags, md.flags); + OPTS_SET(opts, xdp_zc_max_segs, md.xdp_zc_max_segs); skip_feature_flags: return 0; -- cgit v1.2.3 From 7b4434a8face07aa1a9737637e58de028a5972d8 Mon Sep 17 00:00:00 2001 From: Alessandro Marcolini Date: Sat, 3 Feb 2024 14:16:51 +0100 Subject: tools: ynl: correct typo and docstring Correct typo in SpecAttr docstring. Changed SpecSubMessageFormat docstring. Signed-off-by: Alessandro Marcolini Reviewed-by: Donald Hunter Reviewed-by: Jakub Kicinski Link: https://lore.kernel.org/r/6ab1dea7fb1f635c0d8b237f03a49eaa448c2bf4.1706962013.git.alessandromarcolini99@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/nlspec.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/lib/nlspec.py index 5d197a12ab8d..fbce52395b3b 100644 --- a/tools/net/ynl/lib/nlspec.py +++ b/tools/net/ynl/lib/nlspec.py @@ -144,7 +144,7 @@ class SpecEnumSet(SpecElement): class SpecAttr(SpecElement): - """ Single Netlink atttribute type + """ Single Netlink attribute type Represents a single attribute type within an attr space. @@ -308,10 +308,9 @@ class SpecSubMessage(SpecElement): class SpecSubMessageFormat(SpecElement): - """ Netlink sub-message definition + """ Netlink sub-message format definition - Represents a set of sub-message formats for polymorphic nlattrs - that contain type-specific sub messages. + Represents a single format for a sub-message. Attributes: value attribute value to match against type selector -- cgit v1.2.3 From b9bcfc3bc978f7b81104d2c01d4fe29a6f45e17a Mon Sep 17 00:00:00 2001 From: Alessandro Marcolini Date: Sat, 3 Feb 2024 14:16:53 +0100 Subject: tools: ynl: add support for encoding multi-attr Multi-attr elements could not be encoded because of missing logic in the ynl code. Enable encoding of these attributes by checking if the attribute is a multi-attr and if the value to be processed is a list. This has been tested both with the taprio and ets qdisc which contain this kind of attributes. Signed-off-by: Alessandro Marcolini Reviewed-by: Donald Hunter Reviewed-by: Jakub Kicinski Link: https://lore.kernel.org/r/c5bc9f5797168dbf7a4379c42f38d5de8ac7f38a.1706962013.git.alessandromarcolini99@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl.py | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index 0f4193cc2e3b..03c7ca6aaae9 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -444,6 +444,13 @@ class YnlFamily(SpecFamily): except KeyError: raise Exception(f"Space '{space}' has no attribute '{name}'") nl_type = attr.value + + if attr.is_multi and isinstance(value, list): + attr_payload = b'' + for subvalue in value: + attr_payload += self._add_attr(space, name, subvalue, search_attrs) + return attr_payload + if attr["type"] == 'nest': nl_type |= Netlink.NLA_F_NESTED attr_payload = b'' -- cgit v1.2.3 From 9707ac4fe2f5bac6406d2403f8b8a64d7b3d8e43 Mon Sep 17 00:00:00 2001 From: Viktor Malik Date: Tue, 6 Feb 2024 13:46:09 +0100 Subject: tools/resolve_btfids: Refactor set sorting with types from btf_ids.h Instead of using magic offsets to access BTF ID set data, leverage types from btf_ids.h (btf_id_set and btf_id_set8) which define the actual layout of the data. Thanks to this change, set sorting should also continue working if the layout changes. This requires to sync the definition of 'struct btf_id_set8' from include/linux/btf_ids.h to tools/include/linux/btf_ids.h. We don't sync the rest of the file at the moment, b/c that would require to also sync multiple dependent headers and we don't need any other defs from btf_ids.h. Signed-off-by: Viktor Malik Signed-off-by: Andrii Nakryiko Acked-by: Daniel Xu Link: https://lore.kernel.org/bpf/ff7f062ddf6a00815fda3087957c4ce667f50532.1707223196.git.vmalik@redhat.com --- tools/bpf/resolve_btfids/main.c | 35 +++++++++++++++++++++-------------- tools/include/linux/btf_ids.h | 9 +++++++++ 2 files changed, 30 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index 27a23196d58e..32634f00abba 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -70,6 +70,7 @@ #include #include #include +#include #include #include #include @@ -78,7 +79,7 @@ #include #define BTF_IDS_SECTION ".BTF_ids" -#define BTF_ID "__BTF_ID__" +#define BTF_ID_PREFIX "__BTF_ID__" #define BTF_STRUCT "struct" #define BTF_UNION "union" @@ -161,7 +162,7 @@ static int eprintf(int level, int var, const char *fmt, ...) static bool is_btf_id(const char *name) { - return name && !strncmp(name, BTF_ID, sizeof(BTF_ID) - 1); + return name && !strncmp(name, BTF_ID_PREFIX, sizeof(BTF_ID_PREFIX) - 1); } static struct btf_id *btf_id__find(struct rb_root *root, const char *name) @@ -441,7 +442,7 @@ static int symbols_collect(struct object *obj) * __BTF_ID__TYPE__vfs_truncate__0 * prefix = ^ */ - prefix = name + sizeof(BTF_ID) - 1; + prefix = name + sizeof(BTF_ID_PREFIX) - 1; /* struct */ if (!strncmp(prefix, BTF_STRUCT, sizeof(BTF_STRUCT) - 1)) { @@ -649,19 +650,18 @@ static int cmp_id(const void *pa, const void *pb) static int sets_patch(struct object *obj) { Elf_Data *data = obj->efile.idlist; - int *ptr = data->d_buf; struct rb_node *next; next = rb_first(&obj->sets); while (next) { - unsigned long addr, idx; + struct btf_id_set8 *set8; + struct btf_id_set *set; + unsigned long addr, off; struct btf_id *id; - int *base; - int cnt; id = rb_entry(next, struct btf_id, rb_node); addr = id->addr[0]; - idx = addr - obj->efile.idlist_addr; + off = addr - obj->efile.idlist_addr; /* sets are unique */ if (id->addr_cnt != 1) { @@ -670,14 +670,21 @@ static int sets_patch(struct object *obj) return -1; } - idx = idx / sizeof(int); - base = &ptr[idx] + (id->is_set8 ? 2 : 1); - cnt = ptr[idx]; + if (id->is_set) { + set = data->d_buf + off; + qsort(set->ids, set->cnt, sizeof(set->ids[0]), cmp_id); + } else { + set8 = data->d_buf + off; + /* + * Make sure id is at the beginning of the pairs + * struct, otherwise the below qsort would not work. + */ + BUILD_BUG_ON(set8->pairs != &set8->pairs[0].id); + qsort(set8->pairs, set8->cnt, sizeof(set8->pairs[0]), cmp_id); + } pr_debug("sorting addr %5lu: cnt %6d [%s]\n", - (idx + 1) * sizeof(int), cnt, id->name); - - qsort(base, cnt, id->is_set8 ? sizeof(uint64_t) : sizeof(int), cmp_id); + off, id->is_set ? set->cnt : set8->cnt, id->name); next = rb_next(next); } diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h index 2f882d5cb30f..72535f00572f 100644 --- a/tools/include/linux/btf_ids.h +++ b/tools/include/linux/btf_ids.h @@ -8,6 +8,15 @@ struct btf_id_set { u32 ids[]; }; +struct btf_id_set8 { + u32 cnt; + u32 flags; + struct { + u32 id; + u32 flags; + } pairs[]; +}; + #ifdef CONFIG_DEBUG_INFO_BTF #include /* for __PASTE */ -- cgit v1.2.3 From 903fad4394666bc23975c93fb58f137ce64b5192 Mon Sep 17 00:00:00 2001 From: Viktor Malik Date: Tue, 6 Feb 2024 13:46:10 +0100 Subject: tools/resolve_btfids: Fix cross-compilation to non-host endianness The .BTF_ids section is pre-filled with zeroed BTF ID entries during the build and afterwards patched by resolve_btfids with correct values. Since resolve_btfids always writes in host-native endianness, it relies on libelf to do the translation when the target ELF is cross-compiled to a different endianness (this was introduced in commit 61e8aeda9398 ("bpf: Fix libelf endian handling in resolv_btfids")). Unfortunately, the translation will corrupt the flags fields of SET8 entries because these were written during vmlinux compilation and are in the correct endianness already. This will lead to numerous selftests failures such as: $ sudo ./test_verifier 502 502 #502/p sleepable fentry accept FAIL Failed to load prog 'Invalid argument'! bpf_fentry_test1 is not sleepable verification time 34 usec stack depth 0 processed 0 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0 Summary: 0 PASSED, 0 SKIPPED, 1 FAILED Since it's not possible to instruct libelf to translate just certain values, let's manually bswap the flags (both global and entry flags) in resolve_btfids when needed, so that libelf then translates everything correctly. Fixes: ef2c6f370a63 ("tools/resolve_btfids: Add support for 8-byte BTF sets") Signed-off-by: Viktor Malik Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/7b6bff690919555574ce0f13d2a5996cacf7bf69.1707223196.git.vmalik@redhat.com --- tools/bpf/resolve_btfids/main.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'tools') diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index 32634f00abba..d9520cb826b3 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -90,6 +90,14 @@ #define ADDR_CNT 100 +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define ELFDATANATIVE ELFDATA2LSB +#elif __BYTE_ORDER == __BIG_ENDIAN +# define ELFDATANATIVE ELFDATA2MSB +#else +# error "Unknown machine endianness!" +#endif + struct btf_id { struct rb_node rb_node; char *name; @@ -117,6 +125,7 @@ struct object { int idlist_shndx; size_t strtabidx; unsigned long idlist_addr; + int encoding; } efile; struct rb_root sets; @@ -320,6 +329,7 @@ static int elf_collect(struct object *obj) { Elf_Scn *scn = NULL; size_t shdrstrndx; + GElf_Ehdr ehdr; int idx = 0; Elf *elf; int fd; @@ -351,6 +361,13 @@ static int elf_collect(struct object *obj) return -1; } + if (gelf_getehdr(obj->efile.elf, &ehdr) == NULL) { + pr_err("FAILED cannot get ELF header: %s\n", + elf_errmsg(-1)); + return -1; + } + obj->efile.encoding = ehdr.e_ident[EI_DATA]; + /* * Scan all the elf sections and look for save data * from .BTF_ids section and symbols. @@ -681,6 +698,24 @@ static int sets_patch(struct object *obj) */ BUILD_BUG_ON(set8->pairs != &set8->pairs[0].id); qsort(set8->pairs, set8->cnt, sizeof(set8->pairs[0]), cmp_id); + + /* + * When ELF endianness does not match endianness of the + * host, libelf will do the translation when updating + * the ELF. This, however, corrupts SET8 flags which are + * already in the target endianness. So, let's bswap + * them to the host endianness and libelf will then + * correctly translate everything. + */ + if (obj->efile.encoding != ELFDATANATIVE) { + int i; + + set8->flags = bswap_32(set8->flags); + for (i = 0; i < set8->cnt; i++) { + set8->pairs[i].flags = + bswap_32(set8->pairs[i].flags); + } + } } pr_debug("sorting addr %5lu: cnt %6d [%s]\n", -- cgit v1.2.3 From a2bff65cfca93f0fe4c5996f55ce8f413e85e4fe Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Tue, 6 Feb 2024 16:14:14 +0800 Subject: selftests/bpf: Fix error checking for cpumask_success__load() We should verify the return value of cpumask_success__load(). Signed-off-by: Yafang Shao Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240206081416.26242-4-laoar.shao@gmail.com --- tools/testing/selftests/bpf/prog_tests/cpumask.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask.c b/tools/testing/selftests/bpf/prog_tests/cpumask.c index c2e886399e3c..ecf89df78109 100644 --- a/tools/testing/selftests/bpf/prog_tests/cpumask.c +++ b/tools/testing/selftests/bpf/prog_tests/cpumask.c @@ -27,7 +27,7 @@ static void verify_success(const char *prog_name) struct bpf_program *prog; struct bpf_link *link = NULL; pid_t child_pid; - int status; + int status, err; skel = cpumask_success__open(); if (!ASSERT_OK_PTR(skel, "cpumask_success__open")) @@ -36,8 +36,8 @@ static void verify_success(const char *prog_name) skel->bss->pid = getpid(); skel->bss->nr_cpus = libbpf_num_possible_cpus(); - cpumask_success__load(skel); - if (!ASSERT_OK_PTR(skel, "cpumask_success__load")) + err = cpumask_success__load(skel); + if (!ASSERT_OK(err, "cpumask_success__load")) goto cleanup; prog = bpf_object__find_program_by_name(skel->obj, prog_name); -- cgit v1.2.3 From ba6a6abb3bfa8377bcf386a11077c0533909f9e8 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Tue, 6 Feb 2024 16:14:15 +0800 Subject: selftests/bpf: Mark cpumask kfunc declarations as __weak After the series "Annotate kfuncs in .BTF_ids section"[0], kfuncs can be generated from bpftool. Let's mark the existing cpumask kfunc declarations __weak so they don't conflict with definitions that will eventually come from vmlinux.h. [0]. https://lore.kernel.org/all/cover.1706491398.git.dxu@dxuuu.xyz Suggested-by: Andrii Nakryiko Signed-off-by: Yafang Shao Signed-off-by: Andrii Nakryiko Acked-by: Daniel Xu Link: https://lore.kernel.org/bpf/20240206081416.26242-5-laoar.shao@gmail.com --- tools/testing/selftests/bpf/progs/cpumask_common.h | 57 +++++++++++----------- 1 file changed, 29 insertions(+), 28 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h index 0cd4aebb97cf..c705d8112a35 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_common.h +++ b/tools/testing/selftests/bpf/progs/cpumask_common.h @@ -23,41 +23,42 @@ struct array_map { __uint(max_entries, 1); } __cpumask_map SEC(".maps"); -struct bpf_cpumask *bpf_cpumask_create(void) __ksym; -void bpf_cpumask_release(struct bpf_cpumask *cpumask) __ksym; -struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask) __ksym; -u32 bpf_cpumask_first(const struct cpumask *cpumask) __ksym; -u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym; +struct bpf_cpumask *bpf_cpumask_create(void) __ksym __weak; +void bpf_cpumask_release(struct bpf_cpumask *cpumask) __ksym __weak; +struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask) __ksym __weak; +u32 bpf_cpumask_first(const struct cpumask *cpumask) __ksym __weak; +u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym __weak; u32 bpf_cpumask_first_and(const struct cpumask *src1, - const struct cpumask *src2) __ksym; -void bpf_cpumask_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym; -void bpf_cpumask_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym; -bool bpf_cpumask_test_cpu(u32 cpu, const struct cpumask *cpumask) __ksym; -bool bpf_cpumask_test_and_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym; -bool bpf_cpumask_test_and_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym; -void bpf_cpumask_setall(struct bpf_cpumask *cpumask) __ksym; -void bpf_cpumask_clear(struct bpf_cpumask *cpumask) __ksym; + const struct cpumask *src2) __ksym __weak; +void bpf_cpumask_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym __weak; +void bpf_cpumask_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym __weak; +bool bpf_cpumask_test_cpu(u32 cpu, const struct cpumask *cpumask) __ksym __weak; +bool bpf_cpumask_test_and_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym __weak; +bool bpf_cpumask_test_and_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym __weak; +void bpf_cpumask_setall(struct bpf_cpumask *cpumask) __ksym __weak; +void bpf_cpumask_clear(struct bpf_cpumask *cpumask) __ksym __weak; bool bpf_cpumask_and(struct bpf_cpumask *cpumask, const struct cpumask *src1, - const struct cpumask *src2) __ksym; + const struct cpumask *src2) __ksym __weak; void bpf_cpumask_or(struct bpf_cpumask *cpumask, const struct cpumask *src1, - const struct cpumask *src2) __ksym; + const struct cpumask *src2) __ksym __weak; void bpf_cpumask_xor(struct bpf_cpumask *cpumask, const struct cpumask *src1, - const struct cpumask *src2) __ksym; -bool bpf_cpumask_equal(const struct cpumask *src1, const struct cpumask *src2) __ksym; -bool bpf_cpumask_intersects(const struct cpumask *src1, const struct cpumask *src2) __ksym; -bool bpf_cpumask_subset(const struct cpumask *src1, const struct cpumask *src2) __ksym; -bool bpf_cpumask_empty(const struct cpumask *cpumask) __ksym; -bool bpf_cpumask_full(const struct cpumask *cpumask) __ksym; -void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) __ksym; -u32 bpf_cpumask_any_distribute(const struct cpumask *src) __ksym; -u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1, const struct cpumask *src2) __ksym; -u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym; - -void bpf_rcu_read_lock(void) __ksym; -void bpf_rcu_read_unlock(void) __ksym; + const struct cpumask *src2) __ksym __weak; +bool bpf_cpumask_equal(const struct cpumask *src1, const struct cpumask *src2) __ksym __weak; +bool bpf_cpumask_intersects(const struct cpumask *src1, const struct cpumask *src2) __ksym __weak; +bool bpf_cpumask_subset(const struct cpumask *src1, const struct cpumask *src2) __ksym __weak; +bool bpf_cpumask_empty(const struct cpumask *cpumask) __ksym __weak; +bool bpf_cpumask_full(const struct cpumask *cpumask) __ksym __weak; +void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) __ksym __weak; +u32 bpf_cpumask_any_distribute(const struct cpumask *src) __ksym __weak; +u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1, + const struct cpumask *src2) __ksym __weak; +u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym __weak; + +void bpf_rcu_read_lock(void) __ksym __weak; +void bpf_rcu_read_unlock(void) __ksym __weak; static inline const struct cpumask *cast(struct bpf_cpumask *cpumask) { -- cgit v1.2.3 From c8f4b19d64b93091ca30fe6800f41d1532bd7507 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 5 Feb 2024 21:00:45 +0800 Subject: selftests/net/forwarding: add slowwait functions Add slowwait functions to wait for some operations that may need a long time to finish. The busywait executes the cmd too fast, which is kind of wasting cpu in this scenario. At the same time, if shell debugging is enabled with `set -x`. the busywait will output too much logs. Reviewed-by: Przemek Kitszel Signed-off-by: Hangbin Liu Link: https://lore.kernel.org/r/20240205130048.282087-2-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/lib.sh | 35 +++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index a7ecfc8cae98..db3688f52888 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -37,6 +37,32 @@ fi source "$net_forwarding_dir/../lib.sh" +# timeout in seconds +slowwait() +{ + local timeout=$1; shift + + local start_time="$(date -u +%s)" + while true + do + local out + out=$("$@") + local ret=$? + if ((!ret)); then + echo -n "$out" + return 0 + fi + + local current_time="$(date -u +%s)" + if ((current_time - start_time > timeout)); then + echo -n "$out" + return 1 + fi + + sleep 0.1 + done +} + ############################################################################## # Sanity checks @@ -478,6 +504,15 @@ busywait_for_counter() busywait "$timeout" until_counter_is ">= $((base + delta))" "$@" } +slowwait_for_counter() +{ + local timeout=$1; shift + local delta=$1; shift + + local base=$("$@") + slowwait "$timeout" until_counter_is ">= $((base + delta))" "$@" +} + setup_wait_dev() { local dev=$1; shift -- cgit v1.2.3 From 9150820c88304a9f52452bc18725c88f137688d8 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 5 Feb 2024 21:00:46 +0800 Subject: selftests: bonding: use tc filter to check if LACP was sent Use tc filter to check if LACP was sent, which is accurate and save more time. No need to remove bonding module as some test env may buildin bonding. And the bond link has been deleted. Reviewed-by: Przemek Kitszel Signed-off-by: Hangbin Liu Link: https://lore.kernel.org/r/20240205130048.282087-3-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- .../drivers/net/bonding/bond-break-lacpdu-tx.sh | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh b/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh index 6358df5752f9..1ec7f59db7f4 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh @@ -20,21 +20,21 @@ # +------+ +------+ # # We use veths instead of physical interfaces +REQUIRE_MZ=no +NUM_NETIFS=0 +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh set -e -tmp=$(mktemp -q dump.XXXXXX) cleanup() { ip link del fab-br0 >/dev/null 2>&1 || : ip link del fbond >/dev/null 2>&1 || : ip link del veth1-bond >/dev/null 2>&1 || : ip link del veth2-bond >/dev/null 2>&1 || : - modprobe -r bonding >/dev/null 2>&1 || : - rm -f -- ${tmp} } trap cleanup 0 1 2 cleanup -sleep 1 # create the bridge ip link add fab-br0 address 52:54:00:3B:7C:A6 mtu 1500 type bridge \ @@ -67,13 +67,12 @@ ip link set fab-br0 up ip link set fbond up ip addr add dev fab-br0 10.0.0.3 -tcpdump -n -i veth1-end -e ether proto 0x8809 >${tmp} 2>&1 & -sleep 15 -pkill tcpdump >/dev/null 2>&1 rc=0 -num=$(grep "packets captured" ${tmp} | awk '{print $1}') -if test "$num" -gt 0; then - echo "PASS, captured ${num}" +tc qdisc add dev veth1-end clsact +tc filter add dev veth1-end ingress protocol 0x8809 pref 1 handle 101 flower skip_hw action pass +if slowwait_for_counter 15 2 \ + tc_rule_handle_stats_get "dev veth1-end ingress" 101 ".packets" "" &> /dev/null; then + echo "PASS, captured 2" else echo "FAIL" rc=1 -- cgit v1.2.3 From 45bf79bc56c4158afb6f57cca67ac0ef0a7073ef Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 5 Feb 2024 21:00:47 +0800 Subject: selftests: bonding: reduce garp_test/arp_validate test time The purpose of grat_arp is testing commit 9949e2efb54e ("bonding: fix send_peer_notif overflow"). As the send_peer_notif was defined to u8, to overflow it, we need to send_peer_notif = num_peer_notif * peer_notif_delay = num_grat_arp * peer_notify_delay / miimon > 255 (kernel) (kernel parameter) (user parameter) e.g. 30 (num_grat_arp) * 1000 (peer_notify_delay) / 100 (miimon) > 255. Which need 30s to complete sending garp messages. To save the testing time, the only way is reduce the miimon number. Something like 30 (num_grat_arp) * 100 (peer_notify_delay) / 10 (miimon) > 255. To save more time, the 50 num_grat_arp testing could be removed. The arp_validate_test also need to check the mii_status, which sleep too long. Use slowwait to save some time. For other connection checkings, make sure active slave changed first. Reviewed-by: Przemek Kitszel Signed-off-by: Hangbin Liu Link: https://lore.kernel.org/r/20240205130048.282087-4-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/bonding/bond_options.sh | 38 +++++++++++++++++----- 1 file changed, 29 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh index d508486cc0bd..6fd0cff3e1e9 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh @@ -45,15 +45,23 @@ skip_ns() } active_slave="" +active_slave_changed() +{ + local old_active_slave=$1 + local new_active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" \ + ".[].linkinfo.info_data.active_slave") + test "$old_active_slave" != "$new_active_slave" +} + check_active_slave() { local target_active_slave=$1 + slowwait 2 active_slave_changed $active_slave active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave") test "$active_slave" = "$target_active_slave" check_err $? "Current active slave is $active_slave but not $target_active_slave" } - # Test bonding prio option prio_test() { @@ -84,13 +92,13 @@ prio_test() # active slave should be the higher prio slave ip -n ${s_ns} link set $active_slave down - bond_check_connection "fail over" check_active_slave eth2 + bond_check_connection "fail over" # when only 1 slave is up ip -n ${s_ns} link set $active_slave down - bond_check_connection "only 1 slave up" check_active_slave eth0 + bond_check_connection "only 1 slave up" # when a higher prio slave change to up ip -n ${s_ns} link set eth2 up @@ -140,8 +148,8 @@ prio_test() check_active_slave "eth1" ip -n ${s_ns} link set $active_slave down - bond_check_connection "change slave prio" check_active_slave "eth0" + bond_check_connection "change slave prio" fi } @@ -199,6 +207,15 @@ prio() prio_ns "active-backup" } +wait_mii_up() +{ + for i in $(seq 0 2); do + mii_status=$(cmd_jq "ip -n ${s_ns} -j -d link show eth$i" ".[].linkinfo.info_slave_data.mii_status") + [ ${mii_status} != "UP" ] && return 1 + done + return 0 +} + arp_validate_test() { local param="$1" @@ -211,7 +228,7 @@ arp_validate_test() [ $RET -ne 0 ] && log_test "arp_validate" "$retmsg" # wait for a while to make sure the mii status stable - sleep 5 + slowwait 5 wait_mii_up for i in $(seq 0 2); do mii_status=$(cmd_jq "ip -n ${s_ns} -j -d link show eth$i" ".[].linkinfo.info_slave_data.mii_status") if [ ${mii_status} != "UP" ]; then @@ -276,10 +293,13 @@ garp_test() active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave") ip -n ${s_ns} link set ${active_slave} down - exp_num=$(echo "${param}" | cut -f6 -d ' ') - sleep $((exp_num + 2)) + # wait for active link change + slowwait 2 active_slave_changed $active_slave + exp_num=$(echo "${param}" | cut -f6 -d ' ') active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave") + slowwait_for_counter $((exp_num + 5)) $exp_num \ + tc_rule_handle_stats_get "dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}" # check result real_num=$(tc_rule_handle_stats_get "dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}") @@ -296,8 +316,8 @@ garp_test() num_grat_arp() { local val - for val in 10 20 30 50; do - garp_test "mode active-backup miimon 100 num_grat_arp $val peer_notify_delay 1000" + for val in 10 20 30; do + garp_test "mode active-backup miimon 10 num_grat_arp $val peer_notify_delay 100" log_test "num_grat_arp" "active-backup miimon num_grat_arp $val" done } -- cgit v1.2.3 From e1f0da9b90fbe0886f57f8cbe0b84ce82fc20e75 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 5 Feb 2024 21:00:48 +0800 Subject: selftests: bonding: use slowwait instead of hard code sleep Use slowwait instead of hard code sleep for bonding tests. In function setup_prepare(), the client_create() will be called after server_create(). So I think there is no need to sleep in server_create() and remove it. For lab_lib.sh, remove bonding module may affect other running bonding tests. And some test env may buildin bond which can't be removed. The bonding link should be removed by lag_reset_network() or netns delete. Signed-off-by: Hangbin Liu Link: https://lore.kernel.org/r/20240205130048.282087-5-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- .../drivers/net/bonding/bond-lladdr-target.sh | 21 ++++++++++++++++++--- .../selftests/drivers/net/bonding/bond_topo_2d1c.sh | 6 +++--- .../selftests/drivers/net/bonding/lag_lib.sh | 7 +++---- 3 files changed, 24 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh b/tools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh index 89af402fabbe..78d3e0fe6604 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh @@ -17,6 +17,11 @@ # +----------------+ # # We use veths instead of physical interfaces +REQUIRE_MZ=no +NUM_NETIFS=0 +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh + sw="sw-$(mktemp -u XXXXXX)" host="ns-$(mktemp -u XXXXXX)" @@ -26,6 +31,16 @@ cleanup() ip netns del $host } +wait_lladdr_dad() +{ + $@ | grep fe80 | grep -qv tentative +} + +wait_bond_up() +{ + $@ | grep -q 'state UP' +} + trap cleanup 0 1 2 ip netns add $sw @@ -37,8 +52,8 @@ ip -n $host link add veth1 type veth peer name veth1 netns $sw ip -n $sw link add br0 type bridge ip -n $sw link set br0 up sw_lladdr=$(ip -n $sw addr show br0 | awk '/fe80/{print $2}' | cut -d'/' -f1) -# sleep some time to make sure bridge lladdr pass DAD -sleep 2 +# wait some time to make sure bridge lladdr pass DAD +slowwait 2 wait_lladdr_dad ip -n $sw addr show br0 ip -n $host link add bond0 type bond mode 1 ns_ip6_target ${sw_lladdr} \ arp_validate 3 arp_interval 1000 @@ -53,7 +68,7 @@ ip -n $sw link set veth1 master br0 ip -n $sw link set veth0 up ip -n $sw link set veth1 up -sleep 5 +slowwait 5 wait_bond_up ip -n $host link show bond0 rc=0 if ip -n $host link show bond0 | grep -q LOWER_UP; then diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh index 0eb7edfb584c..195ef83cfbf1 100644 --- a/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh @@ -73,7 +73,6 @@ server_create() ip -n ${s_ns} link set bond0 up ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0 ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0 - sleep 2 } # Reset bond with new mode and options @@ -96,7 +95,8 @@ bond_reset() ip -n ${s_ns} link set bond0 up ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0 ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0 - sleep 2 + # Wait for IPv6 address ready as it needs DAD + slowwait 2 ip netns exec ${s_ns} ping6 ${c_ip6} -c 1 -W 0.1 &> /dev/null } server_destroy() @@ -150,7 +150,7 @@ bond_check_connection() { local msg=${1:-"check connection"} - sleep 2 + slowwait 2 ip netns exec ${s_ns} ping ${c_ip4} -c 1 -W 0.1 &> /dev/null ip netns exec ${s_ns} ping ${c_ip4} -c5 -i 0.1 &>/dev/null check_err $? "${msg}: ping failed" ip netns exec ${s_ns} ping6 ${c_ip6} -c5 -i 0.1 &>/dev/null diff --git a/tools/testing/selftests/drivers/net/bonding/lag_lib.sh b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh index dbdd736a41d3..bf9bcd1b5ec0 100644 --- a/tools/testing/selftests/drivers/net/bonding/lag_lib.sh +++ b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh @@ -107,13 +107,12 @@ lag_setup2x2() NAMESPACES="${namespaces}" } -# cleanup all lag related namespaces and remove the bonding module +# cleanup all lag related namespaces lag_cleanup() { for n in ${NAMESPACES}; do ip netns delete ${n} >/dev/null 2>&1 || true done - modprobe -r bonding } SWITCH="lag_node1" @@ -159,7 +158,7 @@ test_bond_recovery() create_bond $@ # verify connectivity - ip netns exec ${CLIENT} ping ${SWITCHIP} -c 2 >/dev/null 2>&1 + slowwait 2 ip netns exec ${CLIENT} ping ${SWITCHIP} -c 2 -W 0.1 &> /dev/null check_err $? "No connectivity" # force the links of the bond down @@ -169,7 +168,7 @@ test_bond_recovery() ip netns exec ${SWITCH} ip link set eth1 down # re-verify connectivity - ip netns exec ${CLIENT} ping ${SWITCHIP} -c 2 >/dev/null 2>&1 + slowwait 2 ip netns exec ${CLIENT} ping ${SWITCHIP} -c 2 -W 0.1 &> /dev/null local rc=$? check_err $rc "Bond failed to recover" -- cgit v1.2.3 From f0ddf15f0a74c27eb4b2271a90e69948acc3fa2c Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 8 Feb 2024 18:55:38 +0200 Subject: selftests: forwarding: Add missing multicast routing config entries The two tests that make use of multicast routig (router.sh and router_multicast.sh) are currently failing in the netdev CI because the kernel is missing multicast routing support. Fix by adding the required config entries. Fixes: 6d4efada3b82 ("selftests: forwarding: Add multicast routing test") Signed-off-by: Ido Schimmel Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240208165538.1303021-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/config | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config index ba2343514582..8d7a1a004b7c 100644 --- a/tools/testing/selftests/net/forwarding/config +++ b/tools/testing/selftests/net/forwarding/config @@ -9,6 +9,13 @@ CONFIG_CGROUP_BPF=y CONFIG_DUMMY=m CONFIG_IPV6=y CONFIG_IPV6_GRE=m +CONFIG_IPV6_MROUTE=y +CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y +CONFIG_IPV6_PIMSM_V2=y +CONFIG_IP_MROUTE=y +CONFIG_IP_MROUTE_MULTIPLE_TABLES=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y CONFIG_MACVLAN=m CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_MIRRED=m -- cgit v1.2.3 From f51470c5c4a08d3e69f7c7891633b960aa6fe678 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Thu, 1 Feb 2024 23:07:26 -0300 Subject: selftests: tc-testing: add mirred to block tdc tests Add 8 new mirred tdc tests that target mirred to block: - Add mirred mirror to egress block action - Add mirred mirror to ingress block action - Add mirred redirect to egress block action - Add mirred redirect to ingress block action - Try to add mirred action with both dev and block - Try to add mirred action without specifying neither dev nor block - Replace mirred redirect to dev action with redirect to block - Replace mirred redirect to block action with mirror to dev Signed-off-by: Victor Nogueira Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20240202020726.529170-1-victor@mojatatu.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/actions/mirred.json | 396 +++++++++++++++++++++ 1 file changed, 396 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json index b53d12909962..795cf1ce8af0 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json @@ -649,5 +649,401 @@ "teardown": [ "$TC actions flush action mirred" ] + }, + { + "id": "456d", + "name": "Add mirred mirror to egress block action", + "category": [ + "actions", + "mirred" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ], + [ + "$TC qdisc add dev $DEV1 egress_block 21 clsact", + 0 + ] + ], + "cmdUnderTest": "$TC actions add action mirred egress mirror index 1 blockid 21", + "expExitCode": "0", + "verifyCmd": "$TC -j actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "mirror", + "direction": "egress", + "to_blockid": 21, + "control_action": { + "type": "pipe" + }, + "index": 1, + "ref": 1, + "bind": 0, + "not_in_hw": true + } + ] + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 egress_block 21 clsact", + "$TC actions flush action mirred" + ] + }, + { + "id": "2358", + "name": "Add mirred mirror to ingress block action", + "category": [ + "actions", + "mirred" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ], + [ + "$TC qdisc add dev $DEV1 ingress_block 21 clsact", + 0 + ] + ], + "cmdUnderTest": "$TC actions add action mirred ingress mirror index 1 blockid 21", + "expExitCode": "0", + "verifyCmd": "$TC -j actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "mirror", + "direction": "ingress", + "to_blockid": 21, + "control_action": { + "type": "pipe" + }, + "index": 1, + "ref": 1, + "bind": 0, + "not_in_hw": true + } + ] + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress_block 21 clsact", + "$TC actions flush action mirred" + ] + }, + { + "id": "fdb1", + "name": "Add mirred redirect to egress block action", + "category": [ + "actions", + "mirred" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ], + [ + "$TC qdisc add dev $DEV1 ingress_block 21 clsact", + 0 + ] + ], + "cmdUnderTest": "$TC actions add action mirred egress redirect index 1 blockid 21", + "expExitCode": "0", + "verifyCmd": "$TC -j actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "egress", + "to_blockid": 21, + "control_action": { + "type": "stolen" + }, + "index": 1, + "ref": 1, + "bind": 0, + "not_in_hw": true + } + ] + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress_block 21 clsact", + "$TC actions flush action mirred" + ] + }, + { + "id": "20cc", + "name": "Add mirred redirect to ingress block action", + "category": [ + "actions", + "mirred" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ], + [ + "$TC qdisc add dev $DEV1 ingress_block 21 clsact", + 0 + ] + ], + "cmdUnderTest": "$TC actions add action mirred ingress redirect index 1 blockid 21", + "expExitCode": "0", + "verifyCmd": "$TC -j actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "ingress", + "to_blockid": 21, + "control_action": { + "type": "stolen" + }, + "index": 1, + "ref": 1, + "bind": 0, + "not_in_hw": true + } + ] + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress_block 21 clsact", + "$TC actions flush action mirred" + ] + }, + { + "id": "e739", + "name": "Try to add mirred action with both dev and block", + "category": [ + "actions", + "mirred" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ], + [ + "$TC qdisc add dev $DEV1 ingress_block 21 clsact", + 0 + ] + ], + "cmdUnderTest": "$TC actions add action mirred ingress redirect index 1 blockid 21 dev $DEV1", + "expExitCode": "255", + "verifyCmd": "$TC -j actions list action mirred", + "matchJSON": [], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress_block 21 clsact", + "$TC actions flush action mirred" + ] + }, + { + "id": "2f47", + "name": "Try to add mirred action without specifying neither dev nor block", + "category": [ + "actions", + "mirred" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ], + [ + "$TC qdisc add dev $DEV1 ingress_block 21 clsact", + 0 + ] + ], + "cmdUnderTest": "$TC actions add action mirred ingress redirect index 1", + "expExitCode": "255", + "verifyCmd": "$TC -j actions list action mirred", + "matchJSON": [], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress_block 21 clsact", + "$TC actions flush action mirred" + ] + }, + { + "id": "3188", + "name": "Replace mirred redirect to dev action with redirect to block", + "category": [ + "actions", + "mirred" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ], + [ + "$TC qdisc add dev $DEV1 ingress_block 21 clsact", + 0 + ], + [ + "$TC actions add action mirred ingress redirect index 1 dev $DEV1", + 0 + ] + ], + "cmdUnderTest": "$TC actions replace action mirred egress redirect index 1 blockid 21", + "expExitCode": "0", + "verifyCmd": "$TC -j actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "egress", + "to_blockid": 21, + "control_action": { + "type": "stolen" + }, + "index": 1, + "ref": 1, + "bind": 0, + "not_in_hw": true + } + ] + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress_block 21 clsact", + "$TC actions flush action mirred" + ] + }, + { + "id": "83cc", + "name": "Replace mirred redirect to block action with mirror to dev", + "category": [ + "actions", + "mirred" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ], + [ + "$TC qdisc add dev $DEV1 ingress_block 21 clsact", + 0 + ], + [ + "$TC actions add action mirred egress redirect index 1 blockid 21", + 0 + ] + ], + "cmdUnderTest": "$TC actions replace action mirred ingress mirror index 1 dev lo", + "expExitCode": "0", + "verifyCmd": "$TC -j actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "mirror", + "direction": "ingress", + "to_dev": "lo", + "control_action": { + "type": "pipe" + }, + "index": 1, + "ref": 1, + "bind": 0, + "not_in_hw": true + } + ] + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress_block 21 clsact", + "$TC actions flush action mirred" + ] } ] -- cgit v1.2.3 From 876e32473d1dd61cb58ece9965c3b3ea3ab307ab Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 7 Feb 2024 10:42:45 +0100 Subject: selftests: net: include forwarding lib The altnames test uses the forwarding/lib.sh and that dependency currently causes failures when running the test after install: make -C tools/testing/selftests/ TARGETS=net install ./tools/testing/selftests/kselftest_install/run_kselftest.sh \ -t net:altnames.sh # ... # ./altnames.sh: line 8: ./forwarding/lib.sh: No such file or directory # RTNETLINK answers: Operation not permitted # ./altnames.sh: line 73: tests_run: command not found # ./altnames.sh: line 65: pre_cleanup: command not found Address the issue leveraging the TEST_INCLUDES infrastructure provided by commit 2a0683be5b4c ("selftests: Introduce Makefile variable to list shared bash scripts") Signed-off-by: Paolo Abeni Link: https://lore.kernel.org/r/f7b1e9d468224cbc136d304362315499fe39848f.1707298927.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 211753756bde..7b6918d5f4af 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -97,6 +97,8 @@ TEST_PROGS += vlan_hw_filter.sh TEST_FILES := settings TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh +TEST_INCLUDES := forwarding/lib.sh + include ../lib.mk $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma -- cgit v1.2.3 From d45f5fa8b4aeaf24d2d5911cb459aeb74bada52c Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 7 Feb 2024 21:35:22 +0100 Subject: selftests: udpgso: Pull up network setup into shell script udpgso regression test configures routing and device MTU directly through uAPI (Netlink, ioctl) to do its job. While there is nothing wrong with it, it takes more effort than doing it from shell. Looking forward, we would like to extend the udpgso regression tests to cover the EIO corner case [1], once it gets addressed. That will require a dummy device and device feature manipulation to set it up. Which means more Netlink code. So, in preparation, pull out network configuration into the shell script part of the test, so it is easily extendable in the future. Also, because it now easy to setup routing, add a second local IPv6 address. Because the second address is not managed by the kernel, we can "replace" the corresponding local route with a reduced-MTU one. This unblocks the disabled "ipv6 connected" test case. Add a similar setup for IPv4 for symmetry. [1] https://lore.kernel.org/netdev/87jzqsld6q.fsf@cloudflare.com/ Reviewed-by: Willem de Bruijn Signed-off-by: Jakub Sitnicki Link: https://lore.kernel.org/r/20240207-jakub-krn-635-v3-1-3dfa3da8a7d3@cloudflare.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/udpgso.c | 134 ++-------------------------------- tools/testing/selftests/net/udpgso.sh | 49 ++++++++++--- 2 files changed, 47 insertions(+), 136 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c index 7badaf215de2..1d975bf52af3 100644 --- a/tools/testing/selftests/net/udpgso.c +++ b/tools/testing/selftests/net/udpgso.c @@ -56,7 +56,6 @@ static bool cfg_do_msgmore; static bool cfg_do_setsockopt; static int cfg_specific_test_id = -1; -static const char cfg_ifname[] = "lo"; static unsigned short cfg_port = 9000; static char buf[ETH_MAX_MTU]; @@ -69,8 +68,13 @@ struct testcase { int r_len_last; /* recv(): size of last non-mss dgram, if any */ }; -const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT; -const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) }; +const struct in6_addr addr6 = { + { { 0xfd, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 } }, /* fd00::1 */ +}; + +const struct in_addr addr4 = { + __constant_htonl(0x0a000001), /* 10.0.0.1 */ +}; struct testcase testcases_v4[] = { { @@ -274,48 +278,6 @@ struct testcase testcases_v6[] = { } }; -static unsigned int get_device_mtu(int fd, const char *ifname) -{ - struct ifreq ifr; - - memset(&ifr, 0, sizeof(ifr)); - - strcpy(ifr.ifr_name, ifname); - - if (ioctl(fd, SIOCGIFMTU, &ifr)) - error(1, errno, "ioctl get mtu"); - - return ifr.ifr_mtu; -} - -static void __set_device_mtu(int fd, const char *ifname, unsigned int mtu) -{ - struct ifreq ifr; - - memset(&ifr, 0, sizeof(ifr)); - - ifr.ifr_mtu = mtu; - strcpy(ifr.ifr_name, ifname); - - if (ioctl(fd, SIOCSIFMTU, &ifr)) - error(1, errno, "ioctl set mtu"); -} - -static void set_device_mtu(int fd, int mtu) -{ - int val; - - val = get_device_mtu(fd, cfg_ifname); - fprintf(stderr, "device mtu (orig): %u\n", val); - - __set_device_mtu(fd, cfg_ifname, mtu); - val = get_device_mtu(fd, cfg_ifname); - if (val != mtu) - error(1, 0, "unable to set device mtu to %u\n", val); - - fprintf(stderr, "device mtu (test): %u\n", val); -} - static void set_pmtu_discover(int fd, bool is_ipv4) { int level, name, val; @@ -354,81 +316,6 @@ static unsigned int get_path_mtu(int fd, bool is_ipv4) return mtu; } -/* very wordy version of system("ip route add dev lo mtu 1500 127.0.0.3/32") */ -static void set_route_mtu(int mtu, bool is_ipv4) -{ - struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; - struct nlmsghdr *nh; - struct rtattr *rta; - struct rtmsg *rt; - char data[NLMSG_ALIGN(sizeof(*nh)) + - NLMSG_ALIGN(sizeof(*rt)) + - NLMSG_ALIGN(RTA_LENGTH(sizeof(addr6))) + - NLMSG_ALIGN(RTA_LENGTH(sizeof(int))) + - NLMSG_ALIGN(RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)))]; - int fd, ret, alen, off = 0; - - alen = is_ipv4 ? sizeof(addr4) : sizeof(addr6); - - fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); - if (fd == -1) - error(1, errno, "socket netlink"); - - memset(data, 0, sizeof(data)); - - nh = (void *)data; - nh->nlmsg_type = RTM_NEWROUTE; - nh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE; - off += NLMSG_ALIGN(sizeof(*nh)); - - rt = (void *)(data + off); - rt->rtm_family = is_ipv4 ? AF_INET : AF_INET6; - rt->rtm_table = RT_TABLE_MAIN; - rt->rtm_dst_len = alen << 3; - rt->rtm_protocol = RTPROT_BOOT; - rt->rtm_scope = RT_SCOPE_UNIVERSE; - rt->rtm_type = RTN_UNICAST; - off += NLMSG_ALIGN(sizeof(*rt)); - - rta = (void *)(data + off); - rta->rta_type = RTA_DST; - rta->rta_len = RTA_LENGTH(alen); - if (is_ipv4) - memcpy(RTA_DATA(rta), &addr4, alen); - else - memcpy(RTA_DATA(rta), &addr6, alen); - off += NLMSG_ALIGN(rta->rta_len); - - rta = (void *)(data + off); - rta->rta_type = RTA_OIF; - rta->rta_len = RTA_LENGTH(sizeof(int)); - *((int *)(RTA_DATA(rta))) = 1; //if_nametoindex("lo"); - off += NLMSG_ALIGN(rta->rta_len); - - /* MTU is a subtype in a metrics type */ - rta = (void *)(data + off); - rta->rta_type = RTA_METRICS; - rta->rta_len = RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)); - off += NLMSG_ALIGN(rta->rta_len); - - /* now fill MTU subtype. Note that it fits within above rta_len */ - rta = (void *)(((char *) rta) + RTA_LENGTH(0)); - rta->rta_type = RTAX_MTU; - rta->rta_len = RTA_LENGTH(sizeof(int)); - *((int *)(RTA_DATA(rta))) = mtu; - - nh->nlmsg_len = off; - - ret = sendto(fd, data, off, 0, (void *)&nladdr, sizeof(nladdr)); - if (ret != off) - error(1, errno, "send netlink: %uB != %uB\n", ret, off); - - if (close(fd)) - error(1, errno, "close netlink"); - - fprintf(stderr, "route mtu (test): %u\n", mtu); -} - static bool __send_one(int fd, struct msghdr *msg, int flags) { int ret; @@ -591,15 +478,10 @@ static void run_test(struct sockaddr *addr, socklen_t alen) /* Do not fragment these datagrams: only succeed if GSO works */ set_pmtu_discover(fdt, addr->sa_family == AF_INET); - if (cfg_do_connectionless) { - set_device_mtu(fdt, CONST_MTU_TEST); + if (cfg_do_connectionless) run_all(fdt, fdr, addr, alen); - } if (cfg_do_connected) { - set_device_mtu(fdt, CONST_MTU_TEST + 100); - set_route_mtu(CONST_MTU_TEST, addr->sa_family == AF_INET); - if (connect(fdt, addr, alen)) error(1, errno, "connect"); diff --git a/tools/testing/selftests/net/udpgso.sh b/tools/testing/selftests/net/udpgso.sh index fec24f584fe9..6c63178086b0 100755 --- a/tools/testing/selftests/net/udpgso.sh +++ b/tools/testing/selftests/net/udpgso.sh @@ -3,27 +3,56 @@ # # Run a series of udpgso regression tests +set -o errexit +set -o nounset + +setup_loopback() { + ip addr add dev lo 10.0.0.1/32 + ip addr add dev lo fd00::1/128 nodad noprefixroute +} + +test_dev_mtu() { + setup_loopback + # Reduce loopback MTU + ip link set dev lo mtu 1500 +} + +test_route_mtu() { + setup_loopback + # Remove default local routes + ip route del local 10.0.0.1/32 table local dev lo + ip route del local fd00::1/128 table local dev lo + # Install local routes with reduced MTU + ip route add local 10.0.0.1/32 table local dev lo mtu 1500 + ip route add local fd00::1/128 table local dev lo mtu 1500 +} + +if [ "$#" -gt 0 ]; then + "$1" + shift 2 # pop "test_*" arg and "--" delimiter + exec "$@" +fi + echo "ipv4 cmsg" -./in_netns.sh ./udpgso -4 -C +./in_netns.sh "$0" test_dev_mtu -- ./udpgso -4 -C echo "ipv4 setsockopt" -./in_netns.sh ./udpgso -4 -C -s +./in_netns.sh "$0" test_dev_mtu -- ./udpgso -4 -C -s echo "ipv6 cmsg" -./in_netns.sh ./udpgso -6 -C +./in_netns.sh "$0" test_dev_mtu -- ./udpgso -6 -C echo "ipv6 setsockopt" -./in_netns.sh ./udpgso -6 -C -s +./in_netns.sh "$0" test_dev_mtu -- ./udpgso -6 -C -s echo "ipv4 connected" -./in_netns.sh ./udpgso -4 -c +./in_netns.sh "$0" test_route_mtu -- ./udpgso -4 -c -# blocked on 2nd loopback address -# echo "ipv6 connected" -# ./in_netns.sh ./udpgso -6 -c +echo "ipv6 connected" +./in_netns.sh "$0" test_route_mtu -- ./udpgso -6 -c echo "ipv4 msg_more" -./in_netns.sh ./udpgso -4 -C -m +./in_netns.sh "$0" test_dev_mtu -- ./udpgso -4 -C -m echo "ipv6 msg_more" -./in_netns.sh ./udpgso -6 -C -m +./in_netns.sh "$0" test_dev_mtu -- ./udpgso -6 -C -m -- cgit v1.2.3 From 68bc61c26cacf152baf905786b5949769700f40d Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Wed, 7 Feb 2024 13:26:17 +0100 Subject: bpf: Allow compiler to inline most of bpf_local_storage_lookup() In various performance profiles of kernels with BPF programs attached, bpf_local_storage_lookup() appears as a significant portion of CPU cycles spent. To enable the compiler generate more optimal code, turn bpf_local_storage_lookup() into a static inline function, where only the cache insertion code path is outlined Notably, outlining cache insertion helps avoid bloating callers by duplicating setting up calls to raw_spin_{lock,unlock}_irqsave() (on architectures which do not inline spin_lock/unlock, such as x86), which would cause the compiler produce worse code by deciding to outline otherwise inlinable functions. The call overhead is neutral, because we make 2 calls either way: either calling raw_spin_lock_irqsave() and raw_spin_unlock_irqsave(); or call __bpf_local_storage_insert_cache(), which calls raw_spin_lock_irqsave(), followed by a tail-call to raw_spin_unlock_irqsave() where the compiler can perform TCO and (in optimized uninstrumented builds) turns it into a plain jump. The call to __bpf_local_storage_insert_cache() can be elided entirely if cacheit_lockit is a false constant expression. Based on results from './benchs/run_bench_local_storage.sh' (21 trials, reboot between each trial; x86 defconfig + BPF, clang 16) this produces improvements in throughput and latency in the majority of cases, with an average (geomean) improvement of 8%: +---- Hashmap Control -------------------- | | + num keys: 10 | : | | +-+ hashmap (control) sequential get +----------------------+---------------------- | +- hits throughput | 14.789 M ops/s | 14.745 M ops/s ( ~ ) | +- hits latency | 67.679 ns/op | 67.879 ns/op ( ~ ) | +- important_hits throughput | 14.789 M ops/s | 14.745 M ops/s ( ~ ) | | + num keys: 1000 | : | | +-+ hashmap (control) sequential get +----------------------+---------------------- | +- hits throughput | 12.233 M ops/s | 12.170 M ops/s ( ~ ) | +- hits latency | 81.754 ns/op | 82.185 ns/op ( ~ ) | +- important_hits throughput | 12.233 M ops/s | 12.170 M ops/s ( ~ ) | | + num keys: 10000 | : | | +-+ hashmap (control) sequential get +----------------------+---------------------- | +- hits throughput | 7.220 M ops/s | 7.204 M ops/s ( ~ ) | +- hits latency | 138.522 ns/op | 138.842 ns/op ( ~ ) | +- important_hits throughput | 7.220 M ops/s | 7.204 M ops/s ( ~ ) | | + num keys: 100000 | : | | +-+ hashmap (control) sequential get +----------------------+---------------------- | +- hits throughput | 5.061 M ops/s | 5.165 M ops/s (+2.1%) | +- hits latency | 198.483 ns/op | 194.270 ns/op (-2.1%) | +- important_hits throughput | 5.061 M ops/s | 5.165 M ops/s (+2.1%) | | + num keys: 4194304 | : | | +-+ hashmap (control) sequential get +----------------------+---------------------- | +- hits throughput | 2.864 M ops/s | 2.882 M ops/s ( ~ ) | +- hits latency | 365.220 ns/op | 361.418 ns/op (-1.0%) | +- important_hits throughput | 2.864 M ops/s | 2.882 M ops/s ( ~ ) | +---- Local Storage ---------------------- | | + num_maps: 1 | : | | +-+ local_storage cache sequential get +----------------------+---------------------- | +- hits throughput | 33.005 M ops/s | 39.068 M ops/s (+18.4%) | +- hits latency | 30.300 ns/op | 25.598 ns/op (-15.5%) | +- important_hits throughput | 33.005 M ops/s | 39.068 M ops/s (+18.4%) | : | : | | +-+ local_storage cache interleaved get +----------------------+---------------------- | +- hits throughput | 37.151 M ops/s | 44.926 M ops/s (+20.9%) | +- hits latency | 26.919 ns/op | 22.259 ns/op (-17.3%) | +- important_hits throughput | 37.151 M ops/s | 44.926 M ops/s (+20.9%) | | + num_maps: 10 | : | | +-+ local_storage cache sequential get +----------------------+---------------------- | +- hits throughput | 32.288 M ops/s | 38.099 M ops/s (+18.0%) | +- hits latency | 30.972 ns/op | 26.248 ns/op (-15.3%) | +- important_hits throughput | 3.229 M ops/s | 3.810 M ops/s (+18.0%) | : | : | | +-+ local_storage cache interleaved get +----------------------+---------------------- | +- hits throughput | 34.473 M ops/s | 41.145 M ops/s (+19.4%) | +- hits latency | 29.010 ns/op | 24.307 ns/op (-16.2%) | +- important_hits throughput | 12.312 M ops/s | 14.695 M ops/s (+19.4%) | | + num_maps: 16 | : | | +-+ local_storage cache sequential get +----------------------+---------------------- | +- hits throughput | 32.524 M ops/s | 38.341 M ops/s (+17.9%) | +- hits latency | 30.748 ns/op | 26.083 ns/op (-15.2%) | +- important_hits throughput | 2.033 M ops/s | 2.396 M ops/s (+17.9%) | : | : | | +-+ local_storage cache interleaved get +----------------------+---------------------- | +- hits throughput | 34.575 M ops/s | 41.338 M ops/s (+19.6%) | +- hits latency | 28.925 ns/op | 24.193 ns/op (-16.4%) | +- important_hits throughput | 11.001 M ops/s | 13.153 M ops/s (+19.6%) | | + num_maps: 17 | : | | +-+ local_storage cache sequential get +----------------------+---------------------- | +- hits throughput | 28.861 M ops/s | 32.756 M ops/s (+13.5%) | +- hits latency | 34.649 ns/op | 30.530 ns/op (-11.9%) | +- important_hits throughput | 1.700 M ops/s | 1.929 M ops/s (+13.5%) | : | : | | +-+ local_storage cache interleaved get +----------------------+---------------------- | +- hits throughput | 31.529 M ops/s | 36.110 M ops/s (+14.5%) | +- hits latency | 31.719 ns/op | 27.697 ns/op (-12.7%) | +- important_hits throughput | 9.598 M ops/s | 10.993 M ops/s (+14.5%) | | + num_maps: 24 | : | | +-+ local_storage cache sequential get +----------------------+---------------------- | +- hits throughput | 18.602 M ops/s | 19.937 M ops/s (+7.2%) | +- hits latency | 53.767 ns/op | 50.166 ns/op (-6.7%) | +- important_hits throughput | 0.776 M ops/s | 0.831 M ops/s (+7.2%) | : | : | | +-+ local_storage cache interleaved get +----------------------+---------------------- | +- hits throughput | 21.718 M ops/s | 23.332 M ops/s (+7.4%) | +- hits latency | 46.047 ns/op | 42.865 ns/op (-6.9%) | +- important_hits throughput | 6.110 M ops/s | 6.564 M ops/s (+7.4%) | | + num_maps: 32 | : | | +-+ local_storage cache sequential get +----------------------+---------------------- | +- hits throughput | 14.118 M ops/s | 14.626 M ops/s (+3.6%) | +- hits latency | 70.856 ns/op | 68.381 ns/op (-3.5%) | +- important_hits throughput | 0.442 M ops/s | 0.458 M ops/s (+3.6%) | : | : | | +-+ local_storage cache interleaved get +----------------------+---------------------- | +- hits throughput | 17.111 M ops/s | 17.906 M ops/s (+4.6%) | +- hits latency | 58.451 ns/op | 55.865 ns/op (-4.4%) | +- important_hits throughput | 4.776 M ops/s | 4.998 M ops/s (+4.6%) | | + num_maps: 100 | : | | +-+ local_storage cache sequential get +----------------------+---------------------- | +- hits throughput | 5.281 M ops/s | 5.528 M ops/s (+4.7%) | +- hits latency | 192.398 ns/op | 183.059 ns/op (-4.9%) | +- important_hits throughput | 0.053 M ops/s | 0.055 M ops/s (+4.9%) | : | : | | +-+ local_storage cache interleaved get +----------------------+---------------------- | +- hits throughput | 6.265 M ops/s | 6.498 M ops/s (+3.7%) | +- hits latency | 161.436 ns/op | 152.877 ns/op (-5.3%) | +- important_hits throughput | 1.636 M ops/s | 1.697 M ops/s (+3.7%) | | + num_maps: 1000 | : | | +-+ local_storage cache sequential get +----------------------+---------------------- | +- hits throughput | 0.355 M ops/s | 0.354 M ops/s ( ~ ) | +- hits latency | 2826.538 ns/op | 2827.139 ns/op ( ~ ) | +- important_hits throughput | 0.000 M ops/s | 0.000 M ops/s ( ~ ) | : | : | | +-+ local_storage cache interleaved get +----------------------+---------------------- | +- hits throughput | 0.404 M ops/s | 0.403 M ops/s ( ~ ) | +- hits latency | 2481.190 ns/op | 2487.555 ns/op ( ~ ) | +- important_hits throughput | 0.102 M ops/s | 0.101 M ops/s ( ~ ) The on_lookup test in {cgrp,task}_ls_recursion.c is removed because the bpf_local_storage_lookup is no longer traceable and adding tracepoint will make the compiler generate worse code: https://lore.kernel.org/bpf/ZcJmok64Xqv6l4ZS@elver.google.com/ Signed-off-by: Marco Elver Cc: Martin KaFai Lau Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20240207122626.3508658-1-elver@google.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf_local_storage.h | 30 ++++++++++++- kernel/bpf/bpf_local_storage.c | 52 ++++++---------------- .../selftests/bpf/prog_tests/task_local_storage.c | 6 --- .../selftests/bpf/progs/cgrp_ls_recursion.c | 26 ----------- .../selftests/bpf/progs/task_ls_recursion.c | 17 ------- 5 files changed, 41 insertions(+), 90 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 173ec7f43ed1..dcddb0aef7d8 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -129,10 +129,36 @@ bpf_local_storage_map_alloc(union bpf_attr *attr, struct bpf_local_storage_cache *cache, bool bpf_ma); -struct bpf_local_storage_data * +void __bpf_local_storage_insert_cache(struct bpf_local_storage *local_storage, + struct bpf_local_storage_map *smap, + struct bpf_local_storage_elem *selem); +/* If cacheit_lockit is false, this lookup function is lockless */ +static inline struct bpf_local_storage_data * bpf_local_storage_lookup(struct bpf_local_storage *local_storage, struct bpf_local_storage_map *smap, - bool cacheit_lockit); + bool cacheit_lockit) +{ + struct bpf_local_storage_data *sdata; + struct bpf_local_storage_elem *selem; + + /* Fast path (cache hit) */ + sdata = rcu_dereference_check(local_storage->cache[smap->cache_idx], + bpf_rcu_lock_held()); + if (sdata && rcu_access_pointer(sdata->smap) == smap) + return sdata; + + /* Slow path (cache miss) */ + hlist_for_each_entry_rcu(selem, &local_storage->list, snode, + rcu_read_lock_trace_held()) + if (rcu_access_pointer(SDATA(selem)->smap) == smap) + break; + + if (!selem) + return NULL; + if (cacheit_lockit) + __bpf_local_storage_insert_cache(local_storage, smap, selem); + return SDATA(selem); +} void bpf_local_storage_destroy(struct bpf_local_storage *local_storage); diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 146824cc9689..bdea1a459153 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -414,47 +414,21 @@ void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now) bpf_selem_unlink_storage(selem, reuse_now); } -/* If cacheit_lockit is false, this lookup function is lockless */ -struct bpf_local_storage_data * -bpf_local_storage_lookup(struct bpf_local_storage *local_storage, - struct bpf_local_storage_map *smap, - bool cacheit_lockit) +void __bpf_local_storage_insert_cache(struct bpf_local_storage *local_storage, + struct bpf_local_storage_map *smap, + struct bpf_local_storage_elem *selem) { - struct bpf_local_storage_data *sdata; - struct bpf_local_storage_elem *selem; - - /* Fast path (cache hit) */ - sdata = rcu_dereference_check(local_storage->cache[smap->cache_idx], - bpf_rcu_lock_held()); - if (sdata && rcu_access_pointer(sdata->smap) == smap) - return sdata; - - /* Slow path (cache miss) */ - hlist_for_each_entry_rcu(selem, &local_storage->list, snode, - rcu_read_lock_trace_held()) - if (rcu_access_pointer(SDATA(selem)->smap) == smap) - break; - - if (!selem) - return NULL; - - sdata = SDATA(selem); - if (cacheit_lockit) { - unsigned long flags; - - /* spinlock is needed to avoid racing with the - * parallel delete. Otherwise, publishing an already - * deleted sdata to the cache will become a use-after-free - * problem in the next bpf_local_storage_lookup(). - */ - raw_spin_lock_irqsave(&local_storage->lock, flags); - if (selem_linked_to_storage(selem)) - rcu_assign_pointer(local_storage->cache[smap->cache_idx], - sdata); - raw_spin_unlock_irqrestore(&local_storage->lock, flags); - } + unsigned long flags; - return sdata; + /* spinlock is needed to avoid racing with the + * parallel delete. Otherwise, publishing an already + * deleted sdata to the cache will become a use-after-free + * problem in the next bpf_local_storage_lookup(). + */ + raw_spin_lock_irqsave(&local_storage->lock, flags); + if (selem_linked_to_storage(selem)) + rcu_assign_pointer(local_storage->cache[smap->cache_idx], SDATA(selem)); + raw_spin_unlock_irqrestore(&local_storage->lock, flags); } static int check_flags(const struct bpf_local_storage_data *old_sdata, diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c index ea8537c54413..c33c05161a9e 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c @@ -117,12 +117,6 @@ static void test_recursion(void) ASSERT_OK(err, "lookup map_b"); ASSERT_EQ(value, 100, "map_b value"); - prog_fd = bpf_program__fd(skel->progs.on_lookup); - memset(&info, 0, sizeof(info)); - err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); - ASSERT_OK(err, "get prog info"); - ASSERT_GT(info.recursion_misses, 0, "on_lookup prog recursion"); - prog_fd = bpf_program__fd(skel->progs.on_update); memset(&info, 0, sizeof(info)); err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c b/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c index 610c2427fd93..3500e4b69ebe 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c @@ -27,32 +27,6 @@ bool is_cgroup1 = 0; struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym; void bpf_cgroup_release(struct cgroup *cgrp) __ksym; -static void __on_lookup(struct cgroup *cgrp) -{ - bpf_cgrp_storage_delete(&map_a, cgrp); - bpf_cgrp_storage_delete(&map_b, cgrp); -} - -SEC("fentry/bpf_local_storage_lookup") -int BPF_PROG(on_lookup) -{ - struct task_struct *task = bpf_get_current_task_btf(); - struct cgroup *cgrp; - - if (is_cgroup1) { - cgrp = bpf_task_get_cgroup1(task, target_hid); - if (!cgrp) - return 0; - - __on_lookup(cgrp); - bpf_cgroup_release(cgrp); - return 0; - } - - __on_lookup(task->cgroups->dfl_cgrp); - return 0; -} - static void __on_update(struct cgroup *cgrp) { long *ptr; diff --git a/tools/testing/selftests/bpf/progs/task_ls_recursion.c b/tools/testing/selftests/bpf/progs/task_ls_recursion.c index 4542dc683b44..f1853c38aada 100644 --- a/tools/testing/selftests/bpf/progs/task_ls_recursion.c +++ b/tools/testing/selftests/bpf/progs/task_ls_recursion.c @@ -27,23 +27,6 @@ struct { __type(value, long); } map_b SEC(".maps"); -SEC("fentry/bpf_local_storage_lookup") -int BPF_PROG(on_lookup) -{ - struct task_struct *task = bpf_get_current_task_btf(); - - if (!test_pid || task->pid != test_pid) - return 0; - - /* The bpf_task_storage_delete will call - * bpf_local_storage_lookup. The prog->active will - * stop the recursion. - */ - bpf_task_storage_delete(&map_a, task); - bpf_task_storage_delete(&map_b, task); - return 0; -} - SEC("fentry/bpf_local_storage_update") int BPF_PROG(on_update) { -- cgit v1.2.3 From 9c52994e32c5d8aaff13b1a13f7b35b988b02b26 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 8 Feb 2024 18:57:49 +0100 Subject: selftests: net: ignore timing errors in txtimestamp if KSFT_MACHINE_SLOW This test is time sensitive. It may fail on virtual machines and for debug builds. Similar to commit c41dfb0dfbec ("selftests/net: ignore timing errors in so_txtime if KSFT_MACHINE_SLOW"), optionally suppress failure for timing errors (only). Signed-off-by: Paolo Abeni Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- tools/testing/selftests/net/txtimestamp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c index 10f2fde3686b..ec60a16c9307 100644 --- a/tools/testing/selftests/net/txtimestamp.c +++ b/tools/testing/selftests/net/txtimestamp.c @@ -163,7 +163,8 @@ static void validate_timestamp(struct timespec *cur, int min_delay) if (cur64 < start64 + min_delay || cur64 > start64 + max_delay) { fprintf(stderr, "ERROR: %" PRId64 " us expected between %d and %d\n", cur64 - start64, min_delay, max_delay); - test_failed = true; + if (!getenv("KSFT_MACHINE_SLOW")) + test_failed = true; } } -- cgit v1.2.3 From 3407df8dc2de07d6b80f28d1a19a5c24a835f69b Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Thu, 8 Feb 2024 14:06:53 -0800 Subject: selftests/net: Adding test cases of replacing routes and route advertisements. Add tests of changing permanent routes to temporary routes and the reversed case to make sure GC working correctly in these cases. Add tests for the temporary routes from RA. The existing device will be deleted between tests to remove all routes associated with it, so that the earlier tests don't mess up the later ones. Reviewed-by: Hangbin Liu Reviewed-by: David Ahern Tested-by: Hangbin Liu Signed-off-by: Kui-Feng Lee Signed-off-by: David S. Miller --- tools/testing/selftests/net/fib_tests.sh | 151 ++++++++++++++++++++++++++----- 1 file changed, 129 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index b3ecccbbfcd2..3ec1050e47a2 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -743,6 +743,43 @@ fib_notify_test() cleanup &> /dev/null } +# Create a new dummy_10 to remove all associated routes. +reset_dummy_10() +{ + $IP link del dev dummy_10 + + $IP link add dummy_10 type dummy + $IP link set dev dummy_10 up + $IP -6 address add 2001:10::1/64 dev dummy_10 +} + +check_rt_num() +{ + local expected=$1 + local num=$2 + + if [ $num -ne $expected ]; then + echo "FAIL: Expected $expected routes, got $num" + ret=1 + else + ret=0 + fi +} + +check_rt_num_clean() +{ + local expected=$1 + local num=$2 + + if [ $num -ne $expected ]; then + log_test 1 0 "expected $expected routes, got $num" + set +e + cleanup &> /dev/null + return 1 + fi + return 0 +} + fib6_gc_test() { setup @@ -751,7 +788,7 @@ fib6_gc_test() echo "Fib6 garbage collection test" set -e - EXPIRE=3 + EXPIRE=5 # Check expiration of routes every $EXPIRE seconds (GC) $NS_EXEC sysctl -wq net.ipv6.route.gc_interval=$EXPIRE @@ -763,44 +800,114 @@ fib6_gc_test() $NS_EXEC sysctl -wq net.ipv6.route.flush=1 # Temporary routes - for i in $(seq 1 1000); do + for i in $(seq 1 5); do # Expire route after $EXPIRE seconds $IP -6 route add 2001:20::$i \ via 2001:10::2 dev dummy_10 expires $EXPIRE done - sleep $(($EXPIRE * 2)) - N_EXP_SLEEP=$($IP -6 route list |grep expires|wc -l) - if [ $N_EXP_SLEEP -ne 0 ]; then - echo "FAIL: expected 0 routes with expires, got $N_EXP_SLEEP" - ret=1 - else - ret=0 - fi + sleep $(($EXPIRE * 2 + 1)) + $NS_EXEC sysctl -wq net.ipv6.route.flush=1 + check_rt_num 0 $($IP -6 route list |grep expires|wc -l) + log_test $ret 0 "ipv6 route garbage collection" + + reset_dummy_10 # Permanent routes - for i in $(seq 1 5000); do + for i in $(seq 1 5); do $IP -6 route add 2001:30::$i \ via 2001:10::2 dev dummy_10 done # Temporary routes - for i in $(seq 1 1000); do + for i in $(seq 1 5); do # Expire route after $EXPIRE seconds $IP -6 route add 2001:20::$i \ via 2001:10::2 dev dummy_10 expires $EXPIRE done - sleep $(($EXPIRE * 2)) - N_EXP_SLEEP=$($IP -6 route list |grep expires|wc -l) - if [ $N_EXP_SLEEP -ne 0 ]; then - echo "FAIL: expected 0 routes with expires," \ - "got $N_EXP_SLEEP (5000 permanent routes)" - ret=1 - else - ret=0 + sleep $(($EXPIRE * 2 + 1)) + check_rt_num 0 $($IP -6 route list |grep expires|wc -l) + log_test $ret 0 "ipv6 route garbage collection (with permanent routes)" + + reset_dummy_10 + + # Permanent routes + for i in $(seq 1 5); do + $IP -6 route add 2001:20::$i \ + via 2001:10::2 dev dummy_10 + done + # Replace with temporary routes + for i in $(seq 1 5); do + # Expire route after $EXPIRE seconds + $IP -6 route replace 2001:20::$i \ + via 2001:10::2 dev dummy_10 expires $EXPIRE + done + check_rt_num_clean 5 $($IP -6 route list |grep expires|wc -l) || return + # Wait for GC + sleep $(($EXPIRE * 2 + 1)) + $NS_EXEC sysctl -wq net.ipv6.route.flush=1 + check_rt_num 0 $($IP -6 route list |grep expires|wc -l) + log_test $ret 0 "ipv6 route garbage collection (replace with expires)" + + reset_dummy_10 + + # Temporary routes + for i in $(seq 1 5); do + # Expire route after $EXPIRE seconds + $IP -6 route add 2001:20::$i \ + via 2001:10::2 dev dummy_10 expires $EXPIRE + done + # Replace with permanent routes + for i in $(seq 1 5); do + $IP -6 route replace 2001:20::$i \ + via 2001:10::2 dev dummy_10 + done + check_rt_num_clean 0 $($IP -6 route list |grep expires|wc -l) || return + + # Wait for GC + sleep $(($EXPIRE * 2 + 1)) + + check_rt_num 5 $($IP -6 route list |grep -v expires|grep 2001:20::|wc -l) + log_test $ret 0 "ipv6 route garbage collection (replace with permanent)" + + # ra6 is required for the next test. (ipv6toolkit) + if [ ! -x "$(command -v ra6)" ]; then + echo "SKIP: ra6 not found." + set +e + cleanup &> /dev/null + return fi - set +e + # Delete dummy_10 and remove all routes + $IP link del dev dummy_10 - log_test $ret 0 "ipv6 route garbage collection" + # Create a pair of veth devices to send a RA message from one + # device to another. + $IP link add veth1 type veth peer name veth2 + $IP link set dev veth1 up + $IP link set dev veth2 up + $IP -6 address add 2001:10::1/64 dev veth1 nodad + $IP -6 address add 2001:10::2/64 dev veth2 nodad + + # Make veth1 ready to receive RA messages. + $NS_EXEC sysctl -wq net.ipv6.conf.veth1.accept_ra=2 + + # Send a RA message with a route from veth2 to veth1. + $NS_EXEC ra6 -i veth2 -d 2001:10::1 -t $EXPIRE + + # Wait for the RA message. + sleep 1 + + # systemd may mess up the test. You syould make sure that + # systemd-networkd.service and systemd-networkd.socket are stopped. + check_rt_num_clean 1 $($IP -6 route list|grep expires|wc -l) || return + + # Wait for GC + sleep $(($EXPIRE * 2 + 1)) + + $NS_EXEC sysctl -wq net.ipv6.route.flush=1 + check_rt_num 0 $($IP -6 route list |grep expires|wc -l) + log_test $ret 0 "ipv6 route garbage collection (RA message)" + + set +e cleanup &> /dev/null } -- cgit v1.2.3 From fc1c9e40da37905f87c73711a1ecc57f52c1fe1c Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 6 Feb 2024 23:01:07 -0800 Subject: selftests/bpf: Ensure fentry prog cannot attach to bpf_spin_{lock,unlcok}() Add two tests to ensure fentry programs cannot attach to bpf_spin_{lock,unlock}() helpers. The tracing_failure.c files can be used in the future for other tracing failure cases. Signed-off-by: Yonghong Song Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240207070107.335341-1-yonghong.song@linux.dev --- .../selftests/bpf/prog_tests/tracing_failure.c | 37 ++++++++++++++++++++++ .../testing/selftests/bpf/progs/tracing_failure.c | 20 ++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/tracing_failure.c create mode 100644 tools/testing/selftests/bpf/progs/tracing_failure.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_failure.c b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c new file mode 100644 index 000000000000..a222df765bc3 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include +#include "tracing_failure.skel.h" + +static void test_bpf_spin_lock(bool is_spin_lock) +{ + struct tracing_failure *skel; + int err; + + skel = tracing_failure__open(); + if (!ASSERT_OK_PTR(skel, "tracing_failure__open")) + return; + + if (is_spin_lock) + bpf_program__set_autoload(skel->progs.test_spin_lock, true); + else + bpf_program__set_autoload(skel->progs.test_spin_unlock, true); + + err = tracing_failure__load(skel); + if (!ASSERT_OK(err, "tracing_failure__load")) + goto out; + + err = tracing_failure__attach(skel); + ASSERT_ERR(err, "tracing_failure__attach"); + +out: + tracing_failure__destroy(skel); +} + +void test_tracing_failure(void) +{ + if (test__start_subtest("bpf_spin_lock")) + test_bpf_spin_lock(true); + if (test__start_subtest("bpf_spin_unlock")) + test_bpf_spin_lock(false); +} diff --git a/tools/testing/selftests/bpf/progs/tracing_failure.c b/tools/testing/selftests/bpf/progs/tracing_failure.c new file mode 100644 index 000000000000..d41665d2ec8c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tracing_failure.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include +#include + +char _license[] SEC("license") = "GPL"; + +SEC("?fentry/bpf_spin_lock") +int BPF_PROG(test_spin_lock, struct bpf_spin_lock *lock) +{ + return 0; +} + +SEC("?fentry/bpf_spin_unlock") +int BPF_PROG(test_spin_unlock, struct bpf_spin_lock *lock) +{ + return 0; +} -- cgit v1.2.3 From 52dbd67dff5d050e99301100e2cac578eef9b2e9 Mon Sep 17 00:00:00 2001 From: "Jose E. Marchesi" Date: Thu, 8 Feb 2024 21:36:12 +0100 Subject: bpf: Abstract loop unrolling pragmas in BPF selftests [Changes from V1: - Avoid conflict by rebasing with latest master.] Some BPF tests use loop unrolling compiler pragmas that are clang specific and not supported by GCC. These pragmas, along with their GCC equivalences are: #pragma clang loop unroll_count(N) #pragma GCC unroll N #pragma clang loop unroll(full) #pragma GCC unroll 65534 #pragma clang loop unroll(disable) #pragma GCC unroll 1 #pragma unroll [aka #pragma clang loop unroll(enable)] There is no GCC equivalence to this pragma. It enables unrolling on loops that the compiler would not ordinarily unroll even with -O2|-funroll-loops, but it is not equivalent to full unrolling either. This patch adds a new header progs/bpf_compiler.h that defines the following macros, which correspond to each pair of compiler-specific pragmas above: __pragma_loop_unroll_count(N) __pragma_loop_unroll_full __pragma_loop_no_unroll __pragma_loop_unroll The selftests using loop unrolling pragmas are then changed to include the header and use these macros in place of the explicit pragmas. Tested in bpf-next master. No regressions. Signed-off-by: Jose E. Marchesi Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20240208203612.29611-1-jose.marchesi@oracle.com --- tools/testing/selftests/bpf/progs/bpf_compiler.h | 33 ++++++++++++++++++++++ tools/testing/selftests/bpf/progs/iters.c | 5 ++-- tools/testing/selftests/bpf/progs/loop4.c | 4 ++- tools/testing/selftests/bpf/progs/profiler.inc.h | 17 +++++------ tools/testing/selftests/bpf/progs/pyperf.h | 7 +++-- tools/testing/selftests/bpf/progs/strobemeta.h | 18 ++++++------ .../selftests/bpf/progs/test_cls_redirect.c | 5 ++-- .../selftests/bpf/progs/test_lwt_seg6local.c | 6 ++-- tools/testing/selftests/bpf/progs/test_seg6_loop.c | 4 ++- tools/testing/selftests/bpf/progs/test_skb_ctx.c | 4 ++- .../selftests/bpf/progs/test_sysctl_loop1.c | 6 ++-- .../selftests/bpf/progs/test_sysctl_loop2.c | 6 ++-- .../testing/selftests/bpf/progs/test_sysctl_prog.c | 6 ++-- tools/testing/selftests/bpf/progs/test_tc_tunnel.c | 3 +- tools/testing/selftests/bpf/progs/test_xdp.c | 3 +- tools/testing/selftests/bpf/progs/test_xdp_loop.c | 3 +- .../selftests/bpf/progs/test_xdp_noinline.c | 5 ++-- .../selftests/bpf/progs/xdp_synproxy_kern.c | 6 ++-- tools/testing/selftests/bpf/progs/xdping_kern.c | 3 +- 19 files changed, 102 insertions(+), 42 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/bpf_compiler.h (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/bpf_compiler.h b/tools/testing/selftests/bpf/progs/bpf_compiler.h new file mode 100644 index 000000000000..a7c343dc82e6 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_compiler.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __BPF_COMPILER_H__ +#define __BPF_COMPILER_H__ + +#define DO_PRAGMA_(X) _Pragma(#X) + +#if __clang__ +#define __pragma_loop_unroll DO_PRAGMA_(clang loop unroll(enable)) +#else +/* In GCC -funroll-loops, which is enabled with -O2, should have the + same impact than the loop-unroll-enable pragma above. */ +#define __pragma_loop_unroll +#endif + +#if __clang__ +#define __pragma_loop_unroll_count(N) DO_PRAGMA_(clang loop unroll_count(N)) +#else +#define __pragma_loop_unroll_count(N) DO_PRAGMA_(GCC unroll N) +#endif + +#if __clang__ +#define __pragma_loop_unroll_full DO_PRAGMA_(clang loop unroll(full)) +#else +#define __pragma_loop_unroll_full DO_PRAGMA_(GCC unroll 65534) +#endif + +#if __clang__ +#define __pragma_loop_no_unroll DO_PRAGMA_(clang loop unroll(disable)) +#else +#define __pragma_loop_no_unroll DO_PRAGMA_(GCC unroll 1) +#endif + +#endif diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index 225f02dd66d0..3db416606f2f 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -5,6 +5,7 @@ #include #include #include "bpf_misc.h" +#include "bpf_compiler.h" #define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof((x)[0])) @@ -183,7 +184,7 @@ int iter_pragma_unroll_loop(const void *ctx) MY_PID_GUARD(); bpf_iter_num_new(&it, 0, 2); -#pragma nounroll + __pragma_loop_no_unroll for (i = 0; i < 3; i++) { v = bpf_iter_num_next(&it); bpf_printk("ITER_BASIC: E3 VAL: i=%d v=%d", i, v ? *v : -1); @@ -238,7 +239,7 @@ int iter_multiple_sequential_loops(const void *ctx) bpf_iter_num_destroy(&it); bpf_iter_num_new(&it, 0, 2); -#pragma nounroll + __pragma_loop_no_unroll for (i = 0; i < 3; i++) { v = bpf_iter_num_next(&it); bpf_printk("ITER_BASIC: E3 VAL: i=%d v=%d", i, v ? *v : -1); diff --git a/tools/testing/selftests/bpf/progs/loop4.c b/tools/testing/selftests/bpf/progs/loop4.c index b35337926d66..0de0357f57cc 100644 --- a/tools/testing/selftests/bpf/progs/loop4.c +++ b/tools/testing/selftests/bpf/progs/loop4.c @@ -3,6 +3,8 @@ #include #include +#include "bpf_compiler.h" + char _license[] SEC("license") = "GPL"; SEC("socket") @@ -10,7 +12,7 @@ int combinations(volatile struct __sk_buff* skb) { int ret = 0, i; -#pragma nounroll + __pragma_loop_no_unroll for (i = 0; i < 20; i++) if (skb->len) ret |= 1 << i; diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h index de3b6e4e4d0a..6957d9f2805e 100644 --- a/tools/testing/selftests/bpf/progs/profiler.inc.h +++ b/tools/testing/selftests/bpf/progs/profiler.inc.h @@ -8,6 +8,7 @@ #include "profiler.h" #include "err.h" #include "bpf_experimental.h" +#include "bpf_compiler.h" #ifndef NULL #define NULL 0 @@ -169,7 +170,7 @@ static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct, int spid) { #ifdef UNROLL -#pragma unroll + __pragma_loop_unroll #endif for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) if (arr_struct->array[i].meta.pid == spid) @@ -185,7 +186,7 @@ static INLINE void populate_ancestors(struct task_struct* task, ancestors_data->num_ancestors = 0; #ifdef UNROLL -#pragma unroll + __pragma_loop_unroll #endif for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) { parent = BPF_CORE_READ(parent, real_parent); @@ -212,7 +213,7 @@ static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node, size_t filepart_length; #ifdef UNROLL -#pragma unroll + __pragma_loop_unroll #endif for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) { filepart_length = @@ -261,7 +262,7 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local, pids_cgrp_id___local); #ifdef UNROLL -#pragma unroll + __pragma_loop_unroll #endif for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) { struct cgroup_subsys_state* subsys = @@ -402,7 +403,7 @@ static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig) if (kill_data == NULL) return 0; #ifdef UNROLL -#pragma unroll + __pragma_loop_unroll #endif for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) if (arr_struct->array[i].meta.pid == 0) { @@ -482,7 +483,7 @@ read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload) struct dentry* parent_dentry; #ifdef UNROLL -#pragma unroll + __pragma_loop_unroll #endif for (int i = 0; i < MAX_PATH_DEPTH; i++) { filepart_length = @@ -508,7 +509,7 @@ is_ancestor_in_allowed_inodes(struct dentry* filp_dentry) { struct dentry* parent_dentry; #ifdef UNROLL -#pragma unroll + __pragma_loop_unroll #endif for (int i = 0; i < MAX_PATH_DEPTH; i++) { u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino); @@ -629,7 +630,7 @@ int raw_tracepoint__sched_process_exit(void* ctx) struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn); #ifdef UNROLL -#pragma unroll + __pragma_loop_unroll #endif for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) { struct var_kill_data_t* past_kill_data = &arr_struct->array[i]; diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h index 026d573ce179..86484f07e1d1 100644 --- a/tools/testing/selftests/bpf/progs/pyperf.h +++ b/tools/testing/selftests/bpf/progs/pyperf.h @@ -8,6 +8,7 @@ #include #include #include "bpf_misc.h" +#include "bpf_compiler.h" #define FUNCTION_NAME_LEN 64 #define FILE_NAME_LEN 128 @@ -298,11 +299,11 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx) #if defined(USE_ITER) /* no for loop, no unrolling */ #elif defined(NO_UNROLL) -#pragma clang loop unroll(disable) + __pragma_loop_no_unroll #elif defined(UNROLL_COUNT) -#pragma clang loop unroll_count(UNROLL_COUNT) + __pragma_loop_unroll_count(UNROLL_COUNT) #else -#pragma clang loop unroll(full) + __pragma_loop_unroll_full #endif /* NO_UNROLL */ /* Unwind python stack */ #ifdef USE_ITER diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h index 40df2cc26eaf..f74459eead26 100644 --- a/tools/testing/selftests/bpf/progs/strobemeta.h +++ b/tools/testing/selftests/bpf/progs/strobemeta.h @@ -10,6 +10,8 @@ #include #include +#include "bpf_compiler.h" + typedef uint32_t pid_t; struct task_struct {}; @@ -419,9 +421,9 @@ static __always_inline uint64_t read_map_var(struct strobemeta_cfg *cfg, } #ifdef NO_UNROLL -#pragma clang loop unroll(disable) + __pragma_loop_no_unroll #else -#pragma unroll + __pragma_loop_unroll #endif for (int i = 0; i < STROBE_MAX_MAP_ENTRIES; ++i) { if (i >= map.cnt) @@ -560,25 +562,25 @@ static void *read_strobe_meta(struct task_struct *task, payload_off = sizeof(data->payload); #else #ifdef NO_UNROLL -#pragma clang loop unroll(disable) + __pragma_loop_no_unroll #else -#pragma unroll + __pragma_loop_unroll #endif /* NO_UNROLL */ for (int i = 0; i < STROBE_MAX_INTS; ++i) { read_int_var(cfg, i, tls_base, &value, data); } #ifdef NO_UNROLL -#pragma clang loop unroll(disable) + __pragma_loop_no_unroll #else -#pragma unroll + __pragma_loop_unroll #endif /* NO_UNROLL */ for (int i = 0; i < STROBE_MAX_STRS; ++i) { payload_off = read_str_var(cfg, i, tls_base, &value, data, payload_off); } #ifdef NO_UNROLL -#pragma clang loop unroll(disable) + __pragma_loop_no_unroll #else -#pragma unroll + __pragma_loop_unroll #endif /* NO_UNROLL */ for (int i = 0; i < STROBE_MAX_MAPS; ++i) { payload_off = read_map_var(cfg, i, tls_base, &value, data, payload_off); diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c index bfc9179259d5..683c8aaa63da 100644 --- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c @@ -20,6 +20,7 @@ #include #include +#include "bpf_compiler.h" #include "test_cls_redirect.h" #pragma GCC diagnostic ignored "-Waddress-of-packed-member" @@ -269,7 +270,7 @@ static INLINING void pkt_ipv4_checksum(struct iphdr *iph) uint32_t acc = 0; uint16_t *ipw = (uint16_t *)iph; -#pragma clang loop unroll(full) + __pragma_loop_unroll_full for (size_t i = 0; i < sizeof(struct iphdr) / 2; i++) { acc += ipw[i]; } @@ -296,7 +297,7 @@ bool pkt_skip_ipv6_extension_headers(buf_t *pkt, }; *is_fragment = false; -#pragma clang loop unroll(full) + __pragma_loop_unroll_full for (int i = 0; i < 6; i++) { switch (exthdr.next) { case IPPROTO_FRAGMENT: diff --git a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c index 48ff2b2ad5e7..fed66f36adb6 100644 --- a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c +++ b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c @@ -6,6 +6,8 @@ #include #include +#include "bpf_compiler.h" + /* Packet parsing state machine helpers. */ #define cursor_advance(_cursor, _len) \ ({ void *_tmp = _cursor; _cursor += _len; _tmp; }) @@ -131,7 +133,7 @@ int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh, *pad_off = 0; // we can only go as far as ~10 TLVs due to the BPF max stack size - #pragma clang loop unroll(full) + __pragma_loop_unroll_full for (int i = 0; i < 10; i++) { struct sr6_tlv_t tlv; @@ -302,7 +304,7 @@ int __encap_srh(struct __sk_buff *skb) seg = (struct ip6_addr_t *)((char *)srh + sizeof(*srh)); - #pragma clang loop unroll(full) + __pragma_loop_unroll_full for (unsigned long long lo = 0; lo < 4; lo++) { seg->lo = bpf_cpu_to_be64(4 - lo); seg->hi = bpf_cpu_to_be64(hi); diff --git a/tools/testing/selftests/bpf/progs/test_seg6_loop.c b/tools/testing/selftests/bpf/progs/test_seg6_loop.c index a7278f064368..5059050f74f6 100644 --- a/tools/testing/selftests/bpf/progs/test_seg6_loop.c +++ b/tools/testing/selftests/bpf/progs/test_seg6_loop.c @@ -6,6 +6,8 @@ #include #include +#include "bpf_compiler.h" + /* Packet parsing state machine helpers. */ #define cursor_advance(_cursor, _len) \ ({ void *_tmp = _cursor; _cursor += _len; _tmp; }) @@ -134,7 +136,7 @@ static __always_inline int is_valid_tlv_boundary(struct __sk_buff *skb, // we can only go as far as ~10 TLVs due to the BPF max stack size // workaround: define induction variable "i" as "long" instead // of "int" to prevent alu32 sub-register spilling. - #pragma clang loop unroll(disable) + __pragma_loop_no_unroll for (long i = 0; i < 100; i++) { struct sr6_tlv_t tlv; diff --git a/tools/testing/selftests/bpf/progs/test_skb_ctx.c b/tools/testing/selftests/bpf/progs/test_skb_ctx.c index c482110cfc95..a724a70c6700 100644 --- a/tools/testing/selftests/bpf/progs/test_skb_ctx.c +++ b/tools/testing/selftests/bpf/progs/test_skb_ctx.c @@ -3,12 +3,14 @@ #include #include +#include "bpf_compiler.h" + char _license[] SEC("license") = "GPL"; SEC("tc") int process(struct __sk_buff *skb) { - #pragma clang loop unroll(full) + __pragma_loop_unroll_full for (int i = 0; i < 5; i++) { if (skb->cb[i] != i + 1) return 1; diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c index 553a282d816a..7f74077d6622 100644 --- a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c +++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c @@ -9,6 +9,8 @@ #include +#include "bpf_compiler.h" + #ifndef ARRAY_SIZE #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #endif @@ -30,7 +32,7 @@ static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx) if (ret < 0 || ret != sizeof(tcp_mem_name) - 1) return 0; -#pragma clang loop unroll(disable) + __pragma_loop_no_unroll for (i = 0; i < sizeof(tcp_mem_name); ++i) if (name[i] != tcp_mem_name[i]) return 0; @@ -59,7 +61,7 @@ int sysctl_tcp_mem(struct bpf_sysctl *ctx) if (ret < 0 || ret >= MAX_VALUE_STR_LEN) return 0; -#pragma clang loop unroll(disable) + __pragma_loop_no_unroll for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) { ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0, tcp_mem + i); diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c index 2b64bc563a12..68a75436e8af 100644 --- a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c +++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c @@ -9,6 +9,8 @@ #include +#include "bpf_compiler.h" + #ifndef ARRAY_SIZE #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #endif @@ -30,7 +32,7 @@ static __attribute__((noinline)) int is_tcp_mem(struct bpf_sysctl *ctx) if (ret < 0 || ret != sizeof(tcp_mem_name) - 1) return 0; -#pragma clang loop unroll(disable) + __pragma_loop_no_unroll for (i = 0; i < sizeof(tcp_mem_name); ++i) if (name[i] != tcp_mem_name[i]) return 0; @@ -57,7 +59,7 @@ int sysctl_tcp_mem(struct bpf_sysctl *ctx) if (ret < 0 || ret >= MAX_VALUE_STR_LEN) return 0; -#pragma clang loop unroll(disable) + __pragma_loop_no_unroll for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) { ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0, tcp_mem + i); diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c index 5489823c83fc..efc3c61f7852 100644 --- a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c +++ b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c @@ -9,6 +9,8 @@ #include +#include "bpf_compiler.h" + /* Max supported length of a string with unsigned long in base 10 (pow2 - 1). */ #define MAX_ULONG_STR_LEN 0xF @@ -31,7 +33,7 @@ static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx) if (ret < 0 || ret != sizeof(tcp_mem_name) - 1) return 0; -#pragma clang loop unroll(full) + __pragma_loop_unroll_full for (i = 0; i < sizeof(tcp_mem_name); ++i) if (name[i] != tcp_mem_name[i]) return 0; @@ -57,7 +59,7 @@ int sysctl_tcp_mem(struct bpf_sysctl *ctx) if (ret < 0 || ret >= MAX_VALUE_STR_LEN) return 0; -#pragma clang loop unroll(full) + __pragma_loop_unroll_full for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) { ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0, tcp_mem + i); diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c index d8d7ab5e8e30..404124a93892 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c +++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c @@ -19,6 +19,7 @@ #include #include +#include "bpf_compiler.h" #pragma GCC diagnostic ignored "-Waddress-of-packed-member" @@ -83,7 +84,7 @@ static __always_inline void set_ipv4_csum(struct iphdr *iph) iph->check = 0; -#pragma clang loop unroll(full) + __pragma_loop_unroll_full for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++) csum += *iph16++; diff --git a/tools/testing/selftests/bpf/progs/test_xdp.c b/tools/testing/selftests/bpf/progs/test_xdp.c index d7a9a74b7245..8caf58be5818 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp.c +++ b/tools/testing/selftests/bpf/progs/test_xdp.c @@ -19,6 +19,7 @@ #include #include #include "test_iptunnel_common.h" +#include "bpf_compiler.h" struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); @@ -137,7 +138,7 @@ static __always_inline int handle_ipv4(struct xdp_md *xdp) iph->ttl = 8; next_iph = (__u16 *)iph; -#pragma clang loop unroll(full) + __pragma_loop_unroll_full for (i = 0; i < sizeof(*iph) >> 1; i++) csum += *next_iph++; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_loop.c b/tools/testing/selftests/bpf/progs/test_xdp_loop.c index c98fb44156f0..93267a68825b 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_loop.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_loop.c @@ -15,6 +15,7 @@ #include #include #include "test_iptunnel_common.h" +#include "bpf_compiler.h" struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); @@ -133,7 +134,7 @@ static __always_inline int handle_ipv4(struct xdp_md *xdp) iph->ttl = 8; next_iph = (__u16 *)iph; -#pragma clang loop unroll(disable) + __pragma_loop_no_unroll for (i = 0; i < sizeof(*iph) >> 1; i++) csum += *next_iph++; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c index 42c8f6ded0e4..5c7e4758a0ca 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c @@ -15,6 +15,7 @@ #include #include #include +#include "bpf_compiler.h" static __always_inline __u32 rol32(__u32 word, unsigned int shift) { @@ -362,7 +363,7 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval, iph->ttl = 4; next_iph_u16 = (__u16 *) iph; -#pragma clang loop unroll(full) + __pragma_loop_unroll_full for (int i = 0; i < sizeof(struct iphdr) >> 1; i++) csum += *next_iph_u16++; iph->check = ~((csum & 0xffff) + (csum >> 16)); @@ -409,7 +410,7 @@ int send_icmp_reply(void *data, void *data_end) iph->saddr = tmp_addr; iph->check = 0; next_iph_u16 = (__u16 *) iph; -#pragma clang loop unroll(full) + __pragma_loop_unroll_full for (int i = 0; i < sizeof(struct iphdr) >> 1; i++) csum += *next_iph_u16++; iph->check = ~((csum & 0xffff) + (csum >> 16)); diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c index 518329c666e9..7ea9785738b5 100644 --- a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c +++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c @@ -7,6 +7,8 @@ #include #include +#include "bpf_compiler.h" + #define TC_ACT_OK 0 #define TC_ACT_SHOT 2 @@ -151,11 +153,11 @@ static __always_inline __u16 csum_ipv6_magic(const struct in6_addr *saddr, __u64 sum = csum; int i; -#pragma unroll + __pragma_loop_unroll for (i = 0; i < 4; i++) sum += (__u32)saddr->in6_u.u6_addr32[i]; -#pragma unroll + __pragma_loop_unroll for (i = 0; i < 4; i++) sum += (__u32)daddr->in6_u.u6_addr32[i]; diff --git a/tools/testing/selftests/bpf/progs/xdping_kern.c b/tools/testing/selftests/bpf/progs/xdping_kern.c index 54cf1765118b..44e2b0ef23ae 100644 --- a/tools/testing/selftests/bpf/progs/xdping_kern.c +++ b/tools/testing/selftests/bpf/progs/xdping_kern.c @@ -15,6 +15,7 @@ #include #include +#include "bpf_compiler.h" #include "xdping.h" struct { @@ -116,7 +117,7 @@ int xdping_client(struct xdp_md *ctx) return XDP_PASS; if (pinginfo->start) { -#pragma clang loop unroll(full) + __pragma_loop_unroll_full for (i = 0; i < XDPING_MAX_COUNT; i++) { if (pinginfo->times[i] == 0) break; -- cgit v1.2.3 From 12bbcf8e840f40b82b02981e96e0a5fbb0703ea9 Mon Sep 17 00:00:00 2001 From: Cupertino Miranda Date: Tue, 13 Feb 2024 17:35:43 +0000 Subject: libbpf: Add support to GCC in CORE macro definitions Due to internal differences between LLVM and GCC the current implementation for the CO-RE macros does not fit GCC parser, as it will optimize those expressions even before those would be accessible by the BPF backend. As examples, the following would be optimized out with the original definitions: - As enums are converted to their integer representation during parsing, the IR would not know how to distinguish an integer constant from an actual enum value. - Types need to be kept as temporary variables, as the existing type casts of the 0 address (as expanded for LLVM), are optimized away by the GCC C parser, never really reaching GCCs IR. Although, the macros appear to add extra complexity, the expanded code is removed from the compilation flow very early in the compilation process, not really affecting the quality of the generated assembly. Signed-off-by: Cupertino Miranda Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240213173543.1397708-1-cupertino.miranda@oracle.com --- tools/lib/bpf/bpf_core_read.h | 45 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index 0d3e88bd7d5f..1ce738d91685 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -46,7 +46,7 @@ enum bpf_enum_value_kind { #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \ bpf_probe_read_kernel( \ - (void *)dst, \ + (void *)dst, \ __CORE_RELO(src, fld, BYTE_SIZE), \ (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET)) #else @@ -145,8 +145,29 @@ enum bpf_enum_value_kind { } \ }) +/* Differentiator between compilers builtin implementations. This is a + * requirement due to the compiler parsing differences where GCC optimizes + * early in parsing those constructs of type pointers to the builtin specific + * type, resulting in not being possible to collect the required type + * information in the builtin expansion. + */ +#ifdef __clang__ +#define ___bpf_typeof(type) ((typeof(type) *) 0) +#else +#define ___bpf_typeof1(type, NR) ({ \ + extern typeof(type) *___concat(bpf_type_tmp_, NR); \ + ___concat(bpf_type_tmp_, NR); \ +}) +#define ___bpf_typeof(type) ___bpf_typeof1(type, __COUNTER__) +#endif + +#ifdef __clang__ #define ___bpf_field_ref1(field) (field) -#define ___bpf_field_ref2(type, field) (((typeof(type) *)0)->field) +#define ___bpf_field_ref2(type, field) (___bpf_typeof(type)->field) +#else +#define ___bpf_field_ref1(field) (&(field)) +#define ___bpf_field_ref2(type, field) (&(___bpf_typeof(type)->field)) +#endif #define ___bpf_field_ref(args...) \ ___bpf_apply(___bpf_field_ref, ___bpf_narg(args))(args) @@ -196,7 +217,7 @@ enum bpf_enum_value_kind { * BTF. Always succeeds. */ #define bpf_core_type_id_local(type) \ - __builtin_btf_type_id(*(typeof(type) *)0, BPF_TYPE_ID_LOCAL) + __builtin_btf_type_id(*___bpf_typeof(type), BPF_TYPE_ID_LOCAL) /* * Convenience macro to get BTF type ID of a target kernel's type that matches @@ -206,7 +227,7 @@ enum bpf_enum_value_kind { * - 0, if no matching type was found in a target kernel BTF. */ #define bpf_core_type_id_kernel(type) \ - __builtin_btf_type_id(*(typeof(type) *)0, BPF_TYPE_ID_TARGET) + __builtin_btf_type_id(*___bpf_typeof(type), BPF_TYPE_ID_TARGET) /* * Convenience macro to check that provided named type @@ -216,7 +237,7 @@ enum bpf_enum_value_kind { * 0, if no matching type is found. */ #define bpf_core_type_exists(type) \ - __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_EXISTS) + __builtin_preserve_type_info(*___bpf_typeof(type), BPF_TYPE_EXISTS) /* * Convenience macro to check that provided named type @@ -226,7 +247,7 @@ enum bpf_enum_value_kind { * 0, if the type does not match any in the target kernel */ #define bpf_core_type_matches(type) \ - __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_MATCHES) + __builtin_preserve_type_info(*___bpf_typeof(type), BPF_TYPE_MATCHES) /* * Convenience macro to get the byte size of a provided named type @@ -236,7 +257,7 @@ enum bpf_enum_value_kind { * 0, if no matching type is found. */ #define bpf_core_type_size(type) \ - __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_SIZE) + __builtin_preserve_type_info(*___bpf_typeof(type), BPF_TYPE_SIZE) /* * Convenience macro to check that provided enumerator value is defined in @@ -246,8 +267,13 @@ enum bpf_enum_value_kind { * kernel's BTF; * 0, if no matching enum and/or enum value within that enum is found. */ +#ifdef __clang__ #define bpf_core_enum_value_exists(enum_type, enum_value) \ __builtin_preserve_enum_value(*(typeof(enum_type) *)enum_value, BPF_ENUMVAL_EXISTS) +#else +#define bpf_core_enum_value_exists(enum_type, enum_value) \ + __builtin_preserve_enum_value(___bpf_typeof(enum_type), enum_value, BPF_ENUMVAL_EXISTS) +#endif /* * Convenience macro to get the integer value of an enumerator value in @@ -257,8 +283,13 @@ enum bpf_enum_value_kind { * present in target kernel's BTF; * 0, if no matching enum and/or enum value within that enum is found. */ +#ifdef __clang__ #define bpf_core_enum_value(enum_type, enum_value) \ __builtin_preserve_enum_value(*(typeof(enum_type) *)enum_value, BPF_ENUMVAL_VALUE) +#else +#define bpf_core_enum_value(enum_type, enum_value) \ + __builtin_preserve_enum_value(___bpf_typeof(enum_type), enum_value, BPF_ENUMVAL_VALUE) +#endif /* * bpf_core_read() abstracts away bpf_probe_read_kernel() call and captures -- cgit v1.2.3 From 00f239eccf461a6403b3c16e767d04f3954cae98 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Thu, 8 Feb 2024 18:37:50 -0800 Subject: selftests/bpf: Test PTR_MAYBE_NULL arguments of struct_ops operators. Test if the verifier verifies nullable pointer arguments correctly for BPF struct_ops programs. "test_maybe_null" in struct bpf_testmod_ops is the operator defined for the test cases here. A BPF program should check a pointer for NULL beforehand to access the value pointed by the nullable pointer arguments, or the verifier should reject the programs. The test here includes two parts; the programs checking pointers properly and the programs not checking pointers beforehand. The test checks if the verifier accepts the programs checking properly and rejects the programs not checking at all. Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20240209023750.1153905-5-thinker.li@gmail.com Signed-off-by: Martin KaFai Lau --- .../selftests/bpf/bpf_testmod/bpf_testmod.c | 13 +++++- .../selftests/bpf/bpf_testmod/bpf_testmod.h | 4 ++ .../bpf/prog_tests/test_struct_ops_maybe_null.c | 46 ++++++++++++++++++++++ .../selftests/bpf/progs/struct_ops_maybe_null.c | 29 ++++++++++++++ .../bpf/progs/struct_ops_maybe_null_fail.c | 24 +++++++++++ 5 files changed, 115 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/test_struct_ops_maybe_null.c create mode 100644 tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c create mode 100644 tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index a06daebc75c9..66787e99ba1b 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -555,7 +555,11 @@ static int bpf_dummy_reg(void *kdata) { struct bpf_testmod_ops *ops = kdata; - ops->test_2(4, 3); + /* Some test cases (ex. struct_ops_maybe_null) may not have test_2 + * initialized, so we need to check for NULL. + */ + if (ops->test_2) + ops->test_2(4, 3); return 0; } @@ -573,9 +577,16 @@ static void bpf_testmod_test_2(int a, int b) { } +static int bpf_testmod_ops__test_maybe_null(int dummy, + struct task_struct *task__nullable) +{ + return 0; +} + static struct bpf_testmod_ops __bpf_testmod_ops = { .test_1 = bpf_testmod_test_1, .test_2 = bpf_testmod_test_2, + .test_maybe_null = bpf_testmod_ops__test_maybe_null, }; struct bpf_struct_ops bpf_bpf_testmod_ops = { diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h index 537beca42896..c3b0cf788f9f 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h @@ -5,6 +5,8 @@ #include +struct task_struct; + struct bpf_testmod_test_read_ctx { char *buf; loff_t off; @@ -31,6 +33,8 @@ struct bpf_iter_testmod_seq { struct bpf_testmod_ops { int (*test_1)(void); void (*test_2)(int a, int b); + /* Used to test nullable arguments. */ + int (*test_maybe_null)(int dummy, struct task_struct *task); }; #endif /* _BPF_TESTMOD_H */ diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_maybe_null.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_maybe_null.c new file mode 100644 index 000000000000..01dc2613c8a5 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_maybe_null.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include + +#include "struct_ops_maybe_null.skel.h" +#include "struct_ops_maybe_null_fail.skel.h" + +/* Test that the verifier accepts a program that access a nullable pointer + * with a proper check. + */ +static void maybe_null(void) +{ + struct struct_ops_maybe_null *skel; + + skel = struct_ops_maybe_null__open_and_load(); + if (!ASSERT_OK_PTR(skel, "struct_ops_module_open_and_load")) + return; + + struct_ops_maybe_null__destroy(skel); +} + +/* Test that the verifier rejects a program that access a nullable pointer + * without a check beforehand. + */ +static void maybe_null_fail(void) +{ + struct struct_ops_maybe_null_fail *skel; + + skel = struct_ops_maybe_null_fail__open_and_load(); + if (ASSERT_ERR_PTR(skel, "struct_ops_module_fail__open_and_load")) + return; + + struct_ops_maybe_null_fail__destroy(skel); +} + +void test_struct_ops_maybe_null(void) +{ + /* The verifier verifies the programs at load time, so testing both + * programs in the same compile-unit is complicated. We run them in + * separate objects to simplify the testing. + */ + if (test__start_subtest("maybe_null")) + maybe_null(); + if (test__start_subtest("maybe_null_fail")) + maybe_null_fail(); +} diff --git a/tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c new file mode 100644 index 000000000000..b450f72e744a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include +#include +#include "../bpf_testmod/bpf_testmod.h" + +char _license[] SEC("license") = "GPL"; + +pid_t tgid = 0; + +/* This is a test BPF program that uses struct_ops to access an argument + * that may be NULL. This is a test for the verifier to ensure that it can + * rip PTR_MAYBE_NULL correctly. + */ +SEC("struct_ops/test_maybe_null") +int BPF_PROG(test_maybe_null, int dummy, + struct task_struct *task) +{ + if (task) + tgid = task->tgid; + + return 0; +} + +SEC(".struct_ops.link") +struct bpf_testmod_ops testmod_1 = { + .test_maybe_null = (void *)test_maybe_null, +}; + diff --git a/tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c new file mode 100644 index 000000000000..6283099ec383 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include +#include +#include "../bpf_testmod/bpf_testmod.h" + +char _license[] SEC("license") = "GPL"; + +pid_t tgid = 0; + +SEC("struct_ops/test_maybe_null_struct_ptr") +int BPF_PROG(test_maybe_null_struct_ptr, int dummy, + struct task_struct *task) +{ + tgid = task->tgid; + + return 0; +} + +SEC(".struct_ops.link") +struct bpf_testmod_ops testmod_struct_ptr = { + .test_maybe_null = (void *)test_maybe_null_struct_ptr, +}; + -- cgit v1.2.3 From 879bbe7aa4afa80acf72a1cad7f52416ea78c52d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 12 Feb 2024 15:32:20 -0800 Subject: bpf: don't infer PTR_TO_CTX for programs with unnamed context type For program types that don't have named context type name (e.g., BPF iterator programs or tracepoint programs), ctx_tname will be a non-NULL empty string. For such programs it shouldn't be possible to have PTR_TO_CTX argument for global subprogs based on type name alone. arg:ctx tag is the only way to have PTR_TO_CTX passed into global subprog for such program types. Fix this loophole, which currently would assume PTR_TO_CTX whenever user uses a pointer to anonymous struct as an argument to their global subprogs. This happens in practice with the following (quite common, in practice) approach: typedef struct { /* anonymous */ int x; } my_type_t; int my_subprog(my_type_t *arg) { ... } User's intent is to have PTR_TO_MEM argument for `arg`, but verifier will complain about expecting PTR_TO_CTX. This fix also closes unintended s390x-specific KPROBE handling of PTR_TO_CTX case. Selftest change is necessary to accommodate this. Fixes: 91cc1a99740e ("bpf: Annotate context types") Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20240212233221.2575350-4-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/btf.c | 3 +++ .../selftests/bpf/progs/test_global_func_ctx_args.c | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+) (limited to 'tools') diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 26dc0876e426..6ff0bd1a91d5 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -5746,6 +5746,9 @@ again: bpf_log(log, "Please fix kernel include/linux/bpf_types.h\n"); return false; } + /* program types without named context types work only with arg:ctx tag */ + if (ctx_tname[0] == '\0') + return false; /* only compare that prog's ctx type name is the same as * kernel expects. No need to compare field by field. * It's ok for bpf prog to do: diff --git a/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c b/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c index 9a06e5eb1fbe..143c8a4852bf 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c +++ b/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c @@ -26,6 +26,23 @@ int kprobe_typedef_ctx(void *ctx) return kprobe_typedef_ctx_subprog(ctx); } +/* s390x defines: + * + * typedef user_pt_regs bpf_user_pt_regs_t; + * typedef struct { ... } user_pt_regs; + * + * And so "canonical" underlying struct type is anonymous. + * So on s390x only valid ways to have PTR_TO_CTX argument in global subprogs + * are: + * - bpf_user_pt_regs_t *ctx (typedef); + * - struct bpf_user_pt_regs_t *ctx (backwards compatible struct hack); + * - void *ctx __arg_ctx (arg:ctx tag) + * + * Other architectures also allow using underlying struct types (e.g., + * `struct pt_regs *ctx` for x86-64) + */ +#ifndef bpf_target_s390 + #define pt_regs_struct_t typeof(*(__PT_REGS_CAST((struct pt_regs *)NULL))) __weak int kprobe_struct_ctx_subprog(pt_regs_struct_t *ctx) @@ -40,6 +57,8 @@ int kprobe_resolved_ctx(void *ctx) return kprobe_struct_ctx_subprog(ctx); } +#endif + /* this is current hack to make this work on old kernels */ struct bpf_user_pt_regs_t {}; -- cgit v1.2.3 From 63d5a33fb4ec2a4ed6907c8ac144b6f10f6dba47 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 12 Feb 2024 15:32:21 -0800 Subject: selftests/bpf: add anonymous user struct as global subprog arg test Add tests validating that kernel handles pointer to anonymous struct argument as PTR_TO_MEM case, not as PTR_TO_CTX case. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20240212233221.2575350-5-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/verifier_global_subprogs.c | 29 ++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c index 67dddd941891..baff5ffe9405 100644 --- a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c +++ b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c @@ -115,6 +115,35 @@ int arg_tag_nullable_ptr_fail(void *ctx) return subprog_nullable_ptr_bad(&x); } +typedef struct { + int x; +} user_struct_t; + +__noinline __weak int subprog_user_anon_mem(user_struct_t *t) +{ + return t ? t->x : 0; +} + +SEC("?tracepoint") +__failure __log_level(2) +__msg("invalid bpf_context access") +__msg("Caller passes invalid args into func#1 ('subprog_user_anon_mem')") +int anon_user_mem_invalid(void *ctx) +{ + /* can't pass PTR_TO_CTX as user memory */ + return subprog_user_anon_mem(ctx); +} + +SEC("?tracepoint") +__success __log_level(2) +__msg("Func#1 ('subprog_user_anon_mem') is safe for any args that match its prototype") +int anon_user_mem_valid(void *ctx) +{ + user_struct_t t = { .x = 42 }; + + return subprog_user_anon_mem(&t); +} + __noinline __weak int subprog_nonnull_ptr_good(int *p1 __arg_nonnull, int *p2 __arg_nonnull) { return (*p1) * (*p2); /* good, no need for NULL checks */ -- cgit v1.2.3 From 7cc13adbd057f1905564ec2a254883d7fd407deb Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 12 Feb 2024 15:59:44 -0800 Subject: bpf: emit source code file name and line number in verifier log As BPF applications grow in size and complexity and are separated into multiple .bpf.c files that are statically linked together, it becomes harder and harder to match verifier's BPF assembly level output to original C code. While often annotated C source code is unique enough to be able to identify the file it belongs to, quite often this is actually problematic as parts of source code can be quite generic. Long story short, it is very useful to see source code file name and line number information along with the original C code. Verifier already knows this information, we just need to output it. This patch extends verifier log with file name and line number information, emitted next to original (presumably C) source code, annotating BPF assembly output, like so: ; @ .bpf.c: If file name has directory names in it, they are stripped away. This should be fine in practice as file names tend to be pretty unique with C code anyways, and keeping log size smaller is always good. In practice this might look something like below, where some code is coming from application files, while others are from libbpf's usdt.bpf.h header file: ; if (STROBEMETA_READ( @ strobemeta_probe.bpf.c:534 5592: (79) r1 = *(u64 *)(r10 -56) ; R1_w=mem_or_null(id=1589,sz=7680) R10=fp0 5593: (7b) *(u64 *)(r10 -56) = r1 ; R1_w=mem_or_null(id=1589,sz=7680) R10=fp0 5594: (79) r3 = *(u64 *)(r10 -8) ; R3_w=scalar() R10=fp0 fp-8=mmmmmmmm ... 170: (71) r1 = *(u8 *)(r8 +15) ; frame1: R1_w=scalar(...) R8_w=map_value(map=__bpf_usdt_spec,ks=4,vs=208) 171: (67) r1 <<= 56 ; frame1: R1_w=scalar(...) 172: (c7) r1 s>>= 56 ; frame1: R1_w=scalar(smin=smin32=-128,smax=smax32=127) ; val <<= arg_spec->arg_bitshift; @ usdt.bpf.h:183 173: (67) r1 <<= 32 ; frame1: R1_w=scalar(...) 174: (77) r1 >>= 32 ; frame1: R1_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) 175: (79) r2 = *(u64 *)(r10 -8) ; frame1: R2_w=scalar() R10=fp0 fp-8=mmmmmmmm 176: (6f) r2 <<= r1 ; frame1: R1_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) R2_w=scalar() 177: (7b) *(u64 *)(r10 -8) = r2 ; frame1: R2_w=scalar(id=61) R10=fp0 fp-8_w=scalar(id=61) ; if (arg_spec->arg_signed) @ usdt.bpf.h:184 178: (bf) r3 = r2 ; frame1: R2_w=scalar(id=61) R3_w=scalar(id=61) 179: (7f) r3 >>= r1 ; frame1: R1_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) R3_w=scalar() ; if (arg_spec->arg_signed) @ usdt.bpf.h:184 180: (71) r4 = *(u8 *)(r8 +14) 181: safe log_fixup tests needed a minor adjustment as verifier log output increased a bit and that test is quite sensitive to such changes. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20240212235944.2816107-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/log.c | 15 ++++++++++++--- tools/testing/selftests/bpf/prog_tests/log_fixup.c | 4 ++-- 2 files changed, 14 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index 594a234f122b..cc789efc7f43 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -9,6 +9,7 @@ #include #include #include +#include #define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args) @@ -362,6 +363,8 @@ __printf(3, 4) void verbose_linfo(struct bpf_verifier_env *env, const char *prefix_fmt, ...) { const struct bpf_line_info *linfo; + const struct btf *btf; + const char *s, *fname; if (!bpf_verifier_log_needed(&env->log)) return; @@ -378,9 +381,15 @@ __printf(3, 4) void verbose_linfo(struct bpf_verifier_env *env, va_end(args); } - verbose(env, "%s\n", - ltrim(btf_name_by_offset(env->prog->aux->btf, - linfo->line_off))); + btf = env->prog->aux->btf; + s = ltrim(btf_name_by_offset(btf, linfo->line_off)); + verbose(env, "%s", s); /* source code line */ + + s = btf_name_by_offset(btf, linfo->file_name_off); + /* leave only file name */ + fname = strrchr(s, '/'); + fname = fname ? fname + 1 : s; + verbose(env, " @ %s:%u\n", fname, BPF_LINE_INFO_LINE_NUM(linfo->line_col)); env->prev_linfo = linfo; } diff --git a/tools/testing/selftests/bpf/prog_tests/log_fixup.c b/tools/testing/selftests/bpf/prog_tests/log_fixup.c index 7a3fa2ff567b..90a98e23be61 100644 --- a/tools/testing/selftests/bpf/prog_tests/log_fixup.c +++ b/tools/testing/selftests/bpf/prog_tests/log_fixup.c @@ -169,9 +169,9 @@ void test_log_fixup(void) if (test__start_subtest("bad_core_relo_trunc_none")) bad_core_relo(0, TRUNC_NONE /* full buf */); if (test__start_subtest("bad_core_relo_trunc_partial")) - bad_core_relo(280, TRUNC_PARTIAL /* truncate original log a bit */); + bad_core_relo(300, TRUNC_PARTIAL /* truncate original log a bit */); if (test__start_subtest("bad_core_relo_trunc_full")) - bad_core_relo(220, TRUNC_FULL /* truncate also libbpf's message patch */); + bad_core_relo(240, TRUNC_FULL /* truncate also libbpf's message patch */); if (test__start_subtest("bad_core_relo_subprog")) bad_core_relo_subprog(); if (test__start_subtest("missing_map")) -- cgit v1.2.3 From 1159d27852207e8efb8d6ef2dae5aaa87ec4e225 Mon Sep 17 00:00:00 2001 From: Matt Bobrowski Date: Wed, 14 Feb 2024 09:14:23 +0000 Subject: libbpf: Make remark about zero-initializing bpf_*_info structs In some situations, if you fail to zero-initialize the bpf_{prog,map,btf,link}_info structs supplied to the set of LIBBPF helpers bpf_{prog,map,btf,link}_get_info_by_fd(), you can expect the helper to return an error. This can possibly leave people in a situation where they're scratching their heads for an unnnecessary amount of time. Make an explicit remark about the requirement of zero-initializing the supplied bpf_{prog,map,btf,link}_info structs for the respective LIBBPF helpers. Internally, LIBBPF helpers bpf_{prog,map,btf,link}_get_info_by_fd() call into bpf_obj_get_info_by_fd() where the bpf(2) BPF_OBJ_GET_INFO_BY_FD command is used. This specific command is effectively backed by restrictions enforced by the bpf_check_uarg_tail_zero() helper. This function ensures that if the size of the supplied bpf_{prog,map,btf,link}_info structs are larger than what the kernel can handle, trailing bits are zeroed. This can be a problem when compiling against UAPI headers that don't necessarily match the sizes of the same underlying types known to the kernel. Signed-off-by: Matt Bobrowski Signed-off-by: Andrii Nakryiko Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/ZcyEb8x4VbhieWsL@google.com --- tools/lib/bpf/bpf.h | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index f866e98b2436..ab2570d28aec 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -500,7 +500,10 @@ LIBBPF_API int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len); * program corresponding to *prog_fd*. * * Populates up to *info_len* bytes of *info* and updates *info_len* with the - * actual number of bytes written to *info*. + * actual number of bytes written to *info*. Note that *info* should be + * zero-initialized or initialized as expected by the requested *info* + * type. Failing to (zero-)initialize *info* under certain circumstances can + * result in this helper returning an error. * * @param prog_fd BPF program file descriptor * @param info pointer to **struct bpf_prog_info** that will be populated with @@ -517,7 +520,10 @@ LIBBPF_API int bpf_prog_get_info_by_fd(int prog_fd, struct bpf_prog_info *info, * map corresponding to *map_fd*. * * Populates up to *info_len* bytes of *info* and updates *info_len* with the - * actual number of bytes written to *info*. + * actual number of bytes written to *info*. Note that *info* should be + * zero-initialized or initialized as expected by the requested *info* + * type. Failing to (zero-)initialize *info* under certain circumstances can + * result in this helper returning an error. * * @param map_fd BPF map file descriptor * @param info pointer to **struct bpf_map_info** that will be populated with @@ -530,11 +536,14 @@ LIBBPF_API int bpf_prog_get_info_by_fd(int prog_fd, struct bpf_prog_info *info, LIBBPF_API int bpf_map_get_info_by_fd(int map_fd, struct bpf_map_info *info, __u32 *info_len); /** - * @brief **bpf_btf_get_info_by_fd()** obtains information about the + * @brief **bpf_btf_get_info_by_fd()** obtains information about the * BTF object corresponding to *btf_fd*. * * Populates up to *info_len* bytes of *info* and updates *info_len* with the - * actual number of bytes written to *info*. + * actual number of bytes written to *info*. Note that *info* should be + * zero-initialized or initialized as expected by the requested *info* + * type. Failing to (zero-)initialize *info* under certain circumstances can + * result in this helper returning an error. * * @param btf_fd BTF object file descriptor * @param info pointer to **struct bpf_btf_info** that will be populated with @@ -551,7 +560,10 @@ LIBBPF_API int bpf_btf_get_info_by_fd(int btf_fd, struct bpf_btf_info *info, __u * link corresponding to *link_fd*. * * Populates up to *info_len* bytes of *info* and updates *info_len* with the - * actual number of bytes written to *info*. + * actual number of bytes written to *info*. Note that *info* should be + * zero-initialized or initialized as expected by the requested *info* + * type. Failing to (zero-)initialize *info* under certain circumstances can + * result in this helper returning an error. * * @param link_fd BPF link file descriptor * @param info pointer to **struct bpf_link_info** that will be populated with -- cgit v1.2.3 From 682158ab532a5bd24399fec25b65fec561f0f6e9 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 14 Feb 2024 15:29:51 -0800 Subject: bpf: Fix test verif_scale_strobemeta_subprogs failure due to llvm19 With latest llvm19, I hit the following selftest failures with $ ./test_progs -j libbpf: prog 'on_event': BPF program load failed: Permission denied libbpf: prog 'on_event': -- BEGIN PROG LOAD LOG -- combined stack size of 4 calls is 544. Too large verification time 1344153 usec stack depth 24+440+0+32 processed 51008 insns (limit 1000000) max_states_per_insn 19 total_states 1467 peak_states 303 mark_read 146 -- END PROG LOAD LOG -- libbpf: prog 'on_event': failed to load: -13 libbpf: failed to load object 'strobemeta_subprogs.bpf.o' scale_test:FAIL:expect_success unexpected error: -13 (errno 13) #498 verif_scale_strobemeta_subprogs:FAIL The verifier complains too big of the combined stack size (544 bytes) which exceeds the maximum stack limit 512. This is a regression from llvm19 ([1]). In the above error log, the original stack depth is 24+440+0+32. To satisfy interpreter's need, in verifier the stack depth is adjusted to 32+448+32+32=544 which exceeds 512, hence the error. The same adjusted stack size is also used for jit case. But the jitted codes could use smaller stack size. $ egrep -r stack_depth | grep round_up arm64/net/bpf_jit_comp.c: ctx->stack_size = round_up(prog->aux->stack_depth, 16); loongarch/net/bpf_jit.c: bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); powerpc/net/bpf_jit_comp.c: cgctx.stack_size = round_up(fp->aux->stack_depth, 16); riscv/net/bpf_jit_comp32.c: round_up(ctx->prog->aux->stack_depth, STACK_ALIGN); riscv/net/bpf_jit_comp64.c: bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); s390/net/bpf_jit_comp.c: u32 stack_depth = round_up(fp->aux->stack_depth, 8); sparc/net/bpf_jit_comp_64.c: stack_needed += round_up(stack_depth, 16); x86/net/bpf_jit_comp.c: EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8)); x86/net/bpf_jit_comp.c: int tcc_off = -4 - round_up(stack_depth, 8); x86/net/bpf_jit_comp.c: round_up(stack_depth, 8)); x86/net/bpf_jit_comp.c: int tcc_off = -4 - round_up(stack_depth, 8); x86/net/bpf_jit_comp.c: EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8)); In the above, STACK_ALIGN in riscv/net/bpf_jit_comp32.c is defined as 16. So stack is aligned in either 8 or 16, x86/s390 having 8-byte stack alignment and the rest having 16-byte alignment. This patch calculates total stack depth based on 16-byte alignment if jit is requested. For the above failing case, the new stack size will be 32+448+0+32=512 and no verification failure. llvm19 regression will be discussed separately in llvm upstream. The verifier change caused three test failures as these tests compared messages with stack size. More specifically, - test_global_funcs/global_func1: fail with interpreter mode and success with jit mode. Adjusted stack sizes so both jit and interpreter modes will fail. - async_stack_depth/{pseudo_call_check, async_call_root_check}: since jit and interpreter will calculate different stack sizes, the failure msg is adjusted to omit those specific stack size numbers. [1] https://lore.kernel.org/bpf/32bde0f0-1881-46c9-931a-673be566c61d@linux.dev/ Suggested-by: Alexei Starovoitov Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20240214232951.4113094-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 18 +++++++++++++----- tools/testing/selftests/bpf/progs/async_stack_depth.c | 4 ++-- tools/testing/selftests/bpf/progs/test_global_func1.c | 8 ++++++-- 3 files changed, 21 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index aa192dc735a9..011d54a1dc53 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5812,6 +5812,17 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, strict); } +static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth) +{ + if (env->prog->jit_requested) + return round_up(stack_depth, 16); + + /* round up to 32-bytes, since this is granularity + * of interpreter stack size + */ + return round_up(max_t(u32, stack_depth, 1), 32); +} + /* starting from main bpf function walk all instructions of the function * and recursively walk all callees that given function can call. * Ignore jump and exit insns. @@ -5855,10 +5866,7 @@ process_func: depth); return -EACCES; } - /* round up to 32-bytes, since this is granularity - * of interpreter stack size - */ - depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32); + depth += round_up_stack_depth(env, subprog[idx].stack_depth); if (depth > MAX_BPF_STACK) { verbose(env, "combined stack size of %d calls is %d. Too large\n", frame + 1, depth); @@ -5952,7 +5960,7 @@ continue_func: */ if (frame == 0) return 0; - depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32); + depth -= round_up_stack_depth(env, subprog[idx].stack_depth); frame--; i = ret_insn[frame]; idx = ret_prog[frame]; diff --git a/tools/testing/selftests/bpf/progs/async_stack_depth.c b/tools/testing/selftests/bpf/progs/async_stack_depth.c index 3517c0e01206..36734683acbd 100644 --- a/tools/testing/selftests/bpf/progs/async_stack_depth.c +++ b/tools/testing/selftests/bpf/progs/async_stack_depth.c @@ -30,7 +30,7 @@ static int bad_timer_cb(void *map, int *key, struct bpf_timer *timer) } SEC("tc") -__failure __msg("combined stack size of 2 calls is 576. Too large") +__failure __msg("combined stack size of 2 calls is") int pseudo_call_check(struct __sk_buff *ctx) { struct hmap_elem *elem; @@ -45,7 +45,7 @@ int pseudo_call_check(struct __sk_buff *ctx) } SEC("tc") -__failure __msg("combined stack size of 2 calls is 608. Too large") +__failure __msg("combined stack size of 2 calls is") int async_call_root_check(struct __sk_buff *ctx) { struct hmap_elem *elem; diff --git a/tools/testing/selftests/bpf/progs/test_global_func1.c b/tools/testing/selftests/bpf/progs/test_global_func1.c index 17a9f59bf5f3..fc69ff18880d 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func1.c +++ b/tools/testing/selftests/bpf/progs/test_global_func1.c @@ -5,7 +5,7 @@ #include #include "bpf_misc.h" -#define MAX_STACK (512 - 3 * 32 + 8) +#define MAX_STACK 260 static __attribute__ ((noinline)) int f0(int var, struct __sk_buff *skb) @@ -30,6 +30,10 @@ int f3(int, struct __sk_buff *skb, int); __attribute__ ((noinline)) int f2(int val, struct __sk_buff *skb) { + volatile char buf[MAX_STACK] = {}; + + __sink(buf[MAX_STACK - 1]); + return f1(skb) + f3(val, skb, 1); } @@ -44,7 +48,7 @@ int f3(int val, struct __sk_buff *skb, int var) } SEC("tc") -__failure __msg("combined stack size of 4 calls is 544") +__failure __msg("combined stack size of 3 calls is") int global_func1(struct __sk_buff *skb) { return f0(1, skb) + f1(skb) + f2(2, skb) + f3(3, skb, 4); -- cgit v1.2.3 From 31f26e4fec1fd9fd51efab0aee6b6c370b5e27e7 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 14 Feb 2024 17:21:28 +0800 Subject: selftests: bonding: make sure new active is not null One of Jakub's tests[1] shows that there may be period all ports are down and no active slave. This makes the new_active_slave null and the test fails. Add a check to make sure the new active is not null. [ 189.051966] br0: port 2(s1) entered disabled state [ 189.317881] bond0: (slave eth1): link status definitely down, disabling slave [ 189.318487] bond0: (slave eth2): making interface the new active one [ 190.435430] br0: port 4(s2) entered disabled state [ 190.773786] bond0: (slave eth0): link status definitely down, disabling slave [ 190.774204] bond0: (slave eth2): link status definitely down, disabling slave [ 190.774715] bond0: now running without any active interface! [ 190.877760] bond0: (slave eth0): link status definitely up [ 190.878098] bond0: (slave eth0): making interface the new active one [ 190.878495] bond0: active interface up! [ 191.802872] br0: port 4(s2) entered blocking state [ 191.803157] br0: port 4(s2) entered forwarding state [ 191.813756] bond0: (slave eth2): link status definitely up [ 192.847095] br0: port 2(s1) entered blocking state [ 192.847396] br0: port 2(s1) entered forwarding state [ 192.853740] bond0: (slave eth1): link status definitely up # TEST: prio (active-backup ns_ip6_target primary_reselect 1) [FAIL] # Current active slave is null but not eth0 [1] https://netdev-3.bots.linux.dev/vmksft-bonding/results/464481/1-bond-options-sh/stdout Fixes: 45bf79bc56c4 ("selftests: bonding: reduce garp_test/arp_validate test time") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- tools/testing/selftests/drivers/net/bonding/bond_options.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh index 6fd0cff3e1e9..644ea5769e81 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh @@ -50,13 +50,13 @@ active_slave_changed() local old_active_slave=$1 local new_active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" \ ".[].linkinfo.info_data.active_slave") - test "$old_active_slave" != "$new_active_slave" + [ "$new_active_slave" != "$old_active_slave" -a "$new_active_slave" != "null" ] } check_active_slave() { local target_active_slave=$1 - slowwait 2 active_slave_changed $active_slave + slowwait 5 active_slave_changed $active_slave active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave") test "$active_slave" = "$target_active_slave" check_err $? "Current active slave is $active_slave but not $target_active_slave" -- cgit v1.2.3 From 7648f0c91eaa3598add9e91991a5483b29da32ee Mon Sep 17 00:00:00 2001 From: Marcos Paulo de Souza Date: Fri, 16 Feb 2024 09:42:45 -0300 Subject: selftests/bpf: Remove empty TEST_CUSTOM_PROGS Commit f04a32b2c5b5 ("selftests/bpf: Do not use sign-file as testcase") removed the TEST_CUSTOM_PROGS assignment, and removed it from being used on TEST_GEN_FILES. Remove two leftovers from that cleanup. Found by inspection. Signed-off-by: Marcos Paulo de Souza Signed-off-by: Daniel Borkmann Cc: Alexey Gladkov Link: https://lore.kernel.org/bpf/20240216-bpf-selftests-custom-progs-v1-1-f7cf281a1fda@suse.com --- tools/testing/selftests/bpf/Makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index a38a3001527c..dbb8c5f94f34 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -197,8 +197,7 @@ endif # NOTE: Semicolon at the end is critical to override lib.mk's default static # rule for binaries. $(notdir $(TEST_GEN_PROGS) \ - $(TEST_GEN_PROGS_EXTENDED) \ - $(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ; + $(TEST_GEN_PROGS_EXTENDED)): %: $(OUTPUT)/% ; # sort removes libbpf duplicates when not cross-building MAKE_DIRS := $(sort $(BUILD_DIR)/libbpf $(HOST_BUILD_DIR)/libbpf \ @@ -752,7 +751,7 @@ $(OUTPUT)/uprobe_multi: uprobe_multi.c $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $^ $(LDLIBS) -o $@ -EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \ +EXTRA_CLEAN := $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \ prog_tests/tests.h map_tests/tests.h verifier/tests.h \ feature bpftool \ $(addprefix $(OUTPUT)/,*.o *.skel.h *.lskel.h *.subskel.h \ -- cgit v1.2.3 From d0bcc15cbae806cad7d1d90003c82ecb5833b533 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 15 Feb 2024 13:27:26 +0100 Subject: tools: ynl: don't access uninitialized attr_space variable If message contains unknown attribute and user passes "--process-unknown" command line option, _decode() gets called with space arg set to None. In that case, attr_space variable is not initialized used which leads to following trace: Traceback (most recent call last): File "./tools/net/ynl/cli.py", line 77, in main() File "./tools/net/ynl/cli.py", line 68, in main reply = ynl.dump(args.dump, attrs) ^^^^^^^^^^^^^^^^^^^^^^^^^^ File "tools/net/ynl/lib/ynl.py", line 909, in dump return self._op(method, vals, [], dump=True) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "tools/net/ynl/lib/ynl.py", line 894, in _op rsp_msg = self._decode(decoded.raw_attrs, op.attr_set.name) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "tools/net/ynl/lib/ynl.py", line 639, in _decode self._rsp_add(rsp, attr_name, None, self._decode_unknown(attr)) ^^^^^^^^^^^^^^^^^^^^^^^^^^ File "tools/net/ynl/lib/ynl.py", line 569, in _decode_unknown return self._decode(NlAttrs(attr.raw), None) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "tools/net/ynl/lib/ynl.py", line 630, in _decode search_attrs = SpaceAttrs(attr_space, rsp, outer_attrs) ^^^^^^^^^^ UnboundLocalError: cannot access local variable 'attr_space' where it is not associated with a value Fix this by moving search_attrs assignment under the if statement above it to make sure attr_space is initialized. Fixes: bf8b832374fb ("tools/net/ynl: Support sub-messages in nested attribute spaces") Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- tools/net/ynl/lib/ynl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index 03c7ca6aaae9..f45ee5f29bed 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -588,10 +588,10 @@ class YnlFamily(SpecFamily): return decoded def _decode(self, attrs, space, outer_attrs = None): + rsp = dict() if space: attr_space = self.attr_sets[space] - rsp = dict() - search_attrs = SpaceAttrs(attr_space, rsp, outer_attrs) + search_attrs = SpaceAttrs(attr_space, rsp, outer_attrs) for attr in attrs: try: -- cgit v1.2.3 From 01dbd7d8720a0cc97dad5e70ec674dacdc66cf3c Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 20 Feb 2024 15:11:02 -0800 Subject: selftests/bpf: Remove intermediate test files. The test of linking process creates several intermediate files. Remove them once the build is over. This reduces the number of files in selftests/bpf/ directory from ~4400 to ~2600. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20240220231102.49090-1-alexei.starovoitov@gmail.com --- tools/testing/selftests/bpf/Makefile | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index dbb8c5f94f34..9be69ff701ba 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -536,6 +536,7 @@ $(TRUNNER_BPF_SKELS): %.skel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT) $(Q)diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o) $(Q)$$(BPFTOOL) gen skeleton $$(<:.o=.linked3.o) name $$(notdir $$(<:.bpf.o=)) > $$@ $(Q)$$(BPFTOOL) gen subskeleton $$(<:.o=.linked3.o) name $$(notdir $$(<:.bpf.o=)) > $$(@:.skel.h=.subskel.h) + $(Q)rm -f $$(<:.o=.linked1.o) $$(<:.o=.linked2.o) $$(<:.o=.linked3.o) $(TRUNNER_BPF_LSKELS): %.lskel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT) $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@) @@ -544,6 +545,7 @@ $(TRUNNER_BPF_LSKELS): %.lskel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT) $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked3.o) $$(<:.o=.llinked2.o) $(Q)diff $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o) $(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.llinked3.o) name $$(notdir $$(<:.bpf.o=_lskel)) > $$@ + $(Q)rm -f $$(<:.o=.llinked1.o) $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o) $(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_BPF_OBJS) $(BPFTOOL) | $(TRUNNER_OUTPUT) $$(call msg,LINK-BPF,$(TRUNNER_BINARY),$$(@:.skel.h=.bpf.o)) @@ -554,6 +556,7 @@ $(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_BPF_OBJS) $(BPFTOOL) | $(TRUNNER_OUTPUT) $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@) $(Q)$$(BPFTOOL) gen skeleton $$(@:.skel.h=.linked3.o) name $$(notdir $$(@:.skel.h=)) > $$@ $(Q)$$(BPFTOOL) gen subskeleton $$(@:.skel.h=.linked3.o) name $$(notdir $$(@:.skel.h=)) > $$(@:.skel.h=.subskel.h) + $(Q)rm -f $$(@:.skel.h=.linked1.o) $$(@:.skel.h=.linked2.o) $$(@:.skel.h=.linked3.o) endif # ensure we set up tests.h header generation rule just once -- cgit v1.2.3 From 109a5331143da79eb2b63dee9c65188784edfbce Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 19 Feb 2024 17:51:55 +0800 Subject: net: mctp: tests: Test that outgoing skbs have flow data populated When CONFIG_MCTP_FLOWS is enabled, outgoing skbs should have their SKB_EXT_MCTP extension set for drivers to consume. Add two tests for local-to-output routing that check for the flow extensions: one for the simple single-packet case, and one for fragmentation. We now make MCTP_TEST select MCTP_FLOWS, so we always get coverage of these flow tests. The tests are skippable if MCTP_FLOWS is (otherwise) disabled, but that would need manual config tweaking. Signed-off-by: Jeremy Kerr Signed-off-by: Paolo Abeni --- net/mctp/Kconfig | 1 + net/mctp/test/route-test.c | 136 +++++++++++++++++++++++++++ tools/testing/kunit/configs/all_tests.config | 1 + 3 files changed, 138 insertions(+) (limited to 'tools') diff --git a/net/mctp/Kconfig b/net/mctp/Kconfig index 3a5c0e70da77..d8d3413a37f7 100644 --- a/net/mctp/Kconfig +++ b/net/mctp/Kconfig @@ -14,6 +14,7 @@ menuconfig MCTP config MCTP_TEST bool "MCTP core tests" if !KUNIT_ALL_TESTS + select MCTP_FLOWS depends on MCTP=y && KUNIT=y default KUNIT_ALL_TESTS diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index bad084525f17..eb7e9ac95612 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -837,6 +837,140 @@ static void mctp_test_route_input_multiple_nets_key(struct kunit *test) mctp_test_route_input_multiple_nets_key_fini(test, &t2); } +#if IS_ENABLED(CONFIG_MCTP_FLOWS) + +static void mctp_test_flow_init(struct kunit *test, + struct mctp_test_dev **devp, + struct mctp_test_route **rtp, + struct socket **sock, + struct sk_buff **skbp, + unsigned int len) +{ + struct mctp_test_route *rt; + struct mctp_test_dev *dev; + struct sk_buff *skb; + + /* we have a slightly odd routing setup here; the test route + * is for EID 8, which is our local EID. We don't do a routing + * lookup, so that's fine - all we require is a path through + * mctp_local_output, which will call rt->output on whatever + * route we provide + */ + __mctp_route_test_init(test, &dev, &rt, sock, MCTP_NET_ANY); + + /* Assign a single EID. ->addrs is freed on mctp netdev release */ + dev->mdev->addrs = kmalloc(sizeof(u8), GFP_KERNEL); + dev->mdev->num_addrs = 1; + dev->mdev->addrs[0] = 8; + + skb = alloc_skb(len + sizeof(struct mctp_hdr) + 1, GFP_KERNEL); + KUNIT_ASSERT_TRUE(test, skb); + __mctp_cb(skb); + skb_reserve(skb, sizeof(struct mctp_hdr) + 1); + memset(skb_put(skb, len), 0, len); + + /* take a ref for the route, we'll decrement in local output */ + refcount_inc(&rt->rt.refs); + + *devp = dev; + *rtp = rt; + *skbp = skb; +} + +static void mctp_test_flow_fini(struct kunit *test, + struct mctp_test_dev *dev, + struct mctp_test_route *rt, + struct socket *sock) +{ + __mctp_route_test_fini(test, dev, rt, sock); +} + +/* test that an outgoing skb has the correct MCTP extension data set */ +static void mctp_test_packet_flow(struct kunit *test) +{ + struct sk_buff *skb, *skb2; + struct mctp_test_route *rt; + struct mctp_test_dev *dev; + struct mctp_flow *flow; + struct socket *sock; + u8 dst = 8; + int n, rc; + + mctp_test_flow_init(test, &dev, &rt, &sock, &skb, 30); + + rc = mctp_local_output(sock->sk, &rt->rt, skb, dst, MCTP_TAG_OWNER); + KUNIT_ASSERT_EQ(test, rc, 0); + + n = rt->pkts.qlen; + KUNIT_ASSERT_EQ(test, n, 1); + + skb2 = skb_dequeue(&rt->pkts); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb2); + + flow = skb_ext_find(skb2, SKB_EXT_MCTP); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, flow); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, flow->key); + KUNIT_ASSERT_PTR_EQ(test, flow->key->sk, sock->sk); + + kfree_skb(skb2); + mctp_test_flow_fini(test, dev, rt, sock); +} + +/* test that outgoing skbs, after fragmentation, all have the correct MCTP + * extension data set. + */ +static void mctp_test_fragment_flow(struct kunit *test) +{ + struct mctp_flow *flows[2]; + struct sk_buff *tx_skbs[2]; + struct mctp_test_route *rt; + struct mctp_test_dev *dev; + struct sk_buff *skb; + struct socket *sock; + u8 dst = 8; + int n, rc; + + mctp_test_flow_init(test, &dev, &rt, &sock, &skb, 100); + + rc = mctp_local_output(sock->sk, &rt->rt, skb, dst, MCTP_TAG_OWNER); + KUNIT_ASSERT_EQ(test, rc, 0); + + n = rt->pkts.qlen; + KUNIT_ASSERT_EQ(test, n, 2); + + /* both resulting packets should have the same flow data */ + tx_skbs[0] = skb_dequeue(&rt->pkts); + tx_skbs[1] = skb_dequeue(&rt->pkts); + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, tx_skbs[0]); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, tx_skbs[1]); + + flows[0] = skb_ext_find(tx_skbs[0], SKB_EXT_MCTP); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, flows[0]); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, flows[0]->key); + KUNIT_ASSERT_PTR_EQ(test, flows[0]->key->sk, sock->sk); + + flows[1] = skb_ext_find(tx_skbs[1], SKB_EXT_MCTP); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, flows[1]); + KUNIT_ASSERT_PTR_EQ(test, flows[1]->key, flows[0]->key); + + kfree_skb(tx_skbs[0]); + kfree_skb(tx_skbs[1]); + mctp_test_flow_fini(test, dev, rt, sock); +} + +#else +static void mctp_test_packet_flow(struct kunit *test) +{ + kunit_skip(test, "Requires CONFIG_MCTP_FLOWS=y"); +} + +static void mctp_test_fragment_flow(struct kunit *test) +{ + kunit_skip(test, "Requires CONFIG_MCTP_FLOWS=y"); +} +#endif + static struct kunit_case mctp_test_cases[] = { KUNIT_CASE_PARAM(mctp_test_fragment, mctp_frag_gen_params), KUNIT_CASE_PARAM(mctp_test_rx_input, mctp_rx_input_gen_params), @@ -847,6 +981,8 @@ static struct kunit_case mctp_test_cases[] = { mctp_route_input_sk_keys_gen_params), KUNIT_CASE(mctp_test_route_input_multiple_nets_bind), KUNIT_CASE(mctp_test_route_input_multiple_nets_key), + KUNIT_CASE(mctp_test_packet_flow), + KUNIT_CASE(mctp_test_fragment_flow), {} }; diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config index 12da4c493867..a6cf69a665e8 100644 --- a/tools/testing/kunit/configs/all_tests.config +++ b/tools/testing/kunit/configs/all_tests.config @@ -23,6 +23,7 @@ CONFIG_USB4=y CONFIG_NET=y CONFIG_MCTP=y +CONFIG_MCTP_FLOWS=y CONFIG_INET=y CONFIG_MPTCP=y -- cgit v1.2.3 From b546b57526953be2981113171ed586c4c50b1b0a Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Thu, 22 Feb 2024 17:03:00 +0200 Subject: selftests/bpf: update tcp_custom_syncookie to use scalar packet offset This commit updates tcp_custom_syncookie.c:tcp_parse_option() to use explicit packet offset (ctx->off) for packet access instead of ever moving pointer (ctx->ptr), this reduces verification complexity: - the tcp_parse_option() is passed as a callback to bpf_loop(); - suppose a checkpoint is created each time at function entry; - the ctx->ptr is tracked by verifier as PTR_TO_PACKET; - the ctx->ptr is incremented in tcp_parse_option(), thus umax_value field tracked for it is incremented as well; - on each next iteration of tcp_parse_option() checkpoint from a previous iteration can't be reused for state pruning, because PTR_TO_PACKET registers are considered equivalent only if old->umax_value >= cur->umax_value; - on the other hand, the ctx->off is a SCALAR, subject to widen_imprecise_scalars(); - it's exact bounds are eventually forgotten and it is tracked as unknown scalar at entry to tcp_parse_option(); - hence checkpoints created at the start of the function eventually converge. The change is similar to one applied in [0] to xdp_synproxy_kern.c. Comparing before and after with veristat yields following results: File Insns (A) Insns (B) Insns (DIFF) ------------------------------- --------- --------- ----------------- test_tcp_custom_syncookie.bpf.o 466657 12423 -454234 (-97.34%) [0] commit 977bc146d4eb ("selftests/bpf: track tcp payload offset as scalar in xdp_synproxy") Acked-by: Yonghong Song Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20240222150300.14909-2-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../bpf/progs/test_tcp_custom_syncookie.c | 83 ++++++++++++++-------- 1 file changed, 53 insertions(+), 30 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c index a5501b29979a..c8e4553648bf 100644 --- a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c +++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c @@ -10,6 +10,8 @@ #include "test_siphash.h" #include "test_tcp_custom_syncookie.h" +#define MAX_PACKET_OFF 0xffff + /* Hash is calculated for each client and split into ISN and TS. * * MSB LSB @@ -52,16 +54,15 @@ static siphash_key_t test_key_siphash = { struct tcp_syncookie { struct __sk_buff *skb; + void *data; void *data_end; struct ethhdr *eth; struct iphdr *ipv4; struct ipv6hdr *ipv6; struct tcphdr *tcp; - union { - char *ptr; - __be32 *ptr32; - }; + __be32 *ptr32; struct bpf_tcp_req_attrs attrs; + u32 off; u32 cookie; u64 first; }; @@ -70,6 +71,7 @@ bool handled_syn, handled_ack; static int tcp_load_headers(struct tcp_syncookie *ctx) { + ctx->data = (void *)(long)ctx->skb->data; ctx->data_end = (void *)(long)ctx->skb->data_end; ctx->eth = (struct ethhdr *)(long)ctx->skb->data; @@ -134,6 +136,7 @@ static int tcp_reload_headers(struct tcp_syncookie *ctx) if (bpf_skb_change_tail(ctx->skb, data_len + 60 - ctx->tcp->doff * 4, 0)) goto err; + ctx->data = (void *)(long)ctx->skb->data; ctx->data_end = (void *)(long)ctx->skb->data_end; ctx->eth = (struct ethhdr *)(long)ctx->skb->data; if (ctx->ipv4) { @@ -195,47 +198,68 @@ err: return -1; } -static int tcp_parse_option(__u32 index, struct tcp_syncookie *ctx) +static __always_inline void *next(struct tcp_syncookie *ctx, __u32 sz) { - char opcode, opsize; + __u64 off = ctx->off; + __u8 *data; - if (ctx->ptr + 1 > ctx->data_end) - goto stop; + /* Verifier forbids access to packet when offset exceeds MAX_PACKET_OFF */ + if (off > MAX_PACKET_OFF - sz) + return NULL; + + data = ctx->data + off; + barrier_var(data); + if (data + sz >= ctx->data_end) + return NULL; - opcode = *ctx->ptr++; + ctx->off += sz; + return data; +} - if (opcode == TCPOPT_EOL) +static int tcp_parse_option(__u32 index, struct tcp_syncookie *ctx) +{ + __u8 *opcode, *opsize, *wscale; + __u32 *tsval, *tsecr; + __u16 *mss; + __u32 off; + + off = ctx->off; + opcode = next(ctx, 1); + if (!opcode) goto stop; - if (opcode == TCPOPT_NOP) + if (*opcode == TCPOPT_EOL) + goto stop; + + if (*opcode == TCPOPT_NOP) goto next; - if (ctx->ptr + 1 > ctx->data_end) + opsize = next(ctx, 1); + if (!opsize) goto stop; - opsize = *ctx->ptr++; - - if (opsize < 2) + if (*opsize < 2) goto stop; - switch (opcode) { + switch (*opcode) { case TCPOPT_MSS: - if (opsize == TCPOLEN_MSS && ctx->tcp->syn && - ctx->ptr + (TCPOLEN_MSS - 2) < ctx->data_end) - ctx->attrs.mss = get_unaligned_be16(ctx->ptr); + mss = next(ctx, 2); + if (*opsize == TCPOLEN_MSS && ctx->tcp->syn && mss) + ctx->attrs.mss = get_unaligned_be16(mss); break; case TCPOPT_WINDOW: - if (opsize == TCPOLEN_WINDOW && ctx->tcp->syn && - ctx->ptr + (TCPOLEN_WINDOW - 2) < ctx->data_end) { + wscale = next(ctx, 1); + if (*opsize == TCPOLEN_WINDOW && ctx->tcp->syn && wscale) { ctx->attrs.wscale_ok = 1; - ctx->attrs.snd_wscale = *ctx->ptr; + ctx->attrs.snd_wscale = *wscale; } break; case TCPOPT_TIMESTAMP: - if (opsize == TCPOLEN_TIMESTAMP && - ctx->ptr + (TCPOLEN_TIMESTAMP - 2) < ctx->data_end) { - ctx->attrs.rcv_tsval = get_unaligned_be32(ctx->ptr); - ctx->attrs.rcv_tsecr = get_unaligned_be32(ctx->ptr + 4); + tsval = next(ctx, 4); + tsecr = next(ctx, 4); + if (*opsize == TCPOLEN_TIMESTAMP && tsval && tsecr) { + ctx->attrs.rcv_tsval = get_unaligned_be32(tsval); + ctx->attrs.rcv_tsecr = get_unaligned_be32(tsecr); if (ctx->tcp->syn && ctx->attrs.rcv_tsecr) ctx->attrs.tstamp_ok = 0; @@ -244,13 +268,12 @@ static int tcp_parse_option(__u32 index, struct tcp_syncookie *ctx) } break; case TCPOPT_SACK_PERM: - if (opsize == TCPOLEN_SACK_PERM && ctx->tcp->syn && - ctx->ptr + (TCPOLEN_SACK_PERM - 2) < ctx->data_end) + if (*opsize == TCPOLEN_SACK_PERM && ctx->tcp->syn) ctx->attrs.sack_ok = 1; break; } - ctx->ptr += opsize - 2; + ctx->off = off + *opsize; next: return 0; stop: @@ -259,7 +282,7 @@ stop: static void tcp_parse_options(struct tcp_syncookie *ctx) { - ctx->ptr = (char *)(ctx->tcp + 1); + ctx->off = (__u8 *)(ctx->tcp + 1) - (__u8 *)ctx->data, bpf_loop(40, tcp_parse_option, ctx, 0); } -- cgit v1.2.3 From 58fd62e0aa50fdd20bc41a01e787001f3af8a925 Mon Sep 17 00:00:00 2001 From: Martin Kelly Date: Wed, 21 Feb 2024 13:18:38 -0800 Subject: bpf: Clarify batch lookup/lookup_and_delete semantics The batch lookup and lookup_and_delete APIs have two parameters, in_batch and out_batch, to facilitate iterative lookup/lookup_and_deletion operations for supported maps. Except NULL for in_batch at the start of these two batch operations, both parameters need to point to memory equal or larger than the respective map key size, except for various hashmaps (hash, percpu_hash, lru_hash, lru_percpu_hash) where the in_batch/out_batch memory size should be at least 4 bytes. Document these semantics to clarify the API. Signed-off-by: Martin Kelly Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20240221211838.1241578-1-martin.kelly@crowdstrike.com Signed-off-by: Martin KaFai Lau --- include/uapi/linux/bpf.h | 6 +++++- tools/include/uapi/linux/bpf.h | 6 +++++- tools/lib/bpf/bpf.h | 17 ++++++++++++----- 3 files changed, 22 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d96708380e52..d2e6c5fcec01 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -617,7 +617,11 @@ union bpf_iter_link_info { * to NULL to begin the batched operation. After each subsequent * **BPF_MAP_LOOKUP_BATCH**, the caller should pass the resultant * *out_batch* as the *in_batch* for the next operation to - * continue iteration from the current point. + * continue iteration from the current point. Both *in_batch* and + * *out_batch* must point to memory large enough to hold a key, + * except for maps of type **BPF_MAP_TYPE_{HASH, PERCPU_HASH, + * LRU_HASH, LRU_PERCPU_HASH}**, for which batch parameters + * must be at least 4 bytes wide regardless of key size. * * The *keys* and *values* are output parameters which must point * to memory large enough to hold *count* items based on the key diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index d96708380e52..d2e6c5fcec01 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -617,7 +617,11 @@ union bpf_iter_link_info { * to NULL to begin the batched operation. After each subsequent * **BPF_MAP_LOOKUP_BATCH**, the caller should pass the resultant * *out_batch* as the *in_batch* for the next operation to - * continue iteration from the current point. + * continue iteration from the current point. Both *in_batch* and + * *out_batch* must point to memory large enough to hold a key, + * except for maps of type **BPF_MAP_TYPE_{HASH, PERCPU_HASH, + * LRU_HASH, LRU_PERCPU_HASH}**, for which batch parameters + * must be at least 4 bytes wide regardless of key size. * * The *keys* and *values* are output parameters which must point * to memory large enough to hold *count* items based on the key diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index ab2570d28aec..df0db2f0cdb7 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -190,10 +190,14 @@ LIBBPF_API int bpf_map_delete_batch(int fd, const void *keys, /** * @brief **bpf_map_lookup_batch()** allows for batch lookup of BPF map elements. * - * The parameter *in_batch* is the address of the first element in the batch to read. - * *out_batch* is an output parameter that should be passed as *in_batch* to subsequent - * calls to **bpf_map_lookup_batch()**. NULL can be passed for *in_batch* to indicate - * that the batched lookup starts from the beginning of the map. + * The parameter *in_batch* is the address of the first element in the batch to + * read. *out_batch* is an output parameter that should be passed as *in_batch* + * to subsequent calls to **bpf_map_lookup_batch()**. NULL can be passed for + * *in_batch* to indicate that the batched lookup starts from the beginning of + * the map. Both *in_batch* and *out_batch* must point to memory large enough to + * hold a single key, except for maps of type **BPF_MAP_TYPE_{HASH, PERCPU_HASH, + * LRU_HASH, LRU_PERCPU_HASH}**, for which the memory size must be at + * least 4 bytes wide regardless of key size. * * The *keys* and *values* are output parameters which must point to memory large enough to * hold *count* items based on the key and value size of the map *map_fd*. The *keys* @@ -226,7 +230,10 @@ LIBBPF_API int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, * * @param fd BPF map file descriptor * @param in_batch address of the first element in batch to read, can pass NULL to - * get address of the first element in *out_batch* + * get address of the first element in *out_batch*. If not NULL, must be large + * enough to hold a key. For **BPF_MAP_TYPE_{HASH, PERCPU_HASH, LRU_HASH, + * LRU_PERCPU_HASH}**, the memory size must be at least 4 bytes wide regardless + * of key size. * @param out_batch output parameter that should be passed to next call as *in_batch* * @param keys pointer to an array of *count* keys * @param values pointer to an array large enough for *count* values -- cgit v1.2.3 From e9bbda13a7b876451285ab15fb600b809e5e2290 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 21 Feb 2024 18:11:05 -0800 Subject: selftests/bpf: Test case for lacking CFI stub functions. Ensure struct_ops rejects the registration of struct_ops types without proper CFI stub functions. bpf_test_no_cfi.ko is a module that attempts to register a struct_ops type called "bpf_test_no_cfi_ops" with cfi_stubs of NULL and non-NULL value. The NULL one should fail, and the non-NULL one should succeed. The module can only be loaded successfully if these registrations yield the expected results. Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20240222021105.1180475-3-thinker.li@gmail.com Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/Makefile | 10 ++- .../testing/selftests/bpf/bpf_test_no_cfi/Makefile | 19 +++++ .../bpf/bpf_test_no_cfi/bpf_test_no_cfi.c | 84 ++++++++++++++++++++++ .../bpf/prog_tests/test_struct_ops_no_cfi.c | 35 +++++++++ tools/testing/selftests/bpf/testing_helpers.c | 4 +- tools/testing/selftests/bpf/testing_helpers.h | 2 + 6 files changed, 151 insertions(+), 3 deletions(-) create mode 100644 tools/testing/selftests/bpf/bpf_test_no_cfi/Makefile create mode 100644 tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c create mode 100644 tools/testing/selftests/bpf/prog_tests/test_struct_ops_no_cfi.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 9be69ff701ba..84cb5500e8ef 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -132,7 +132,7 @@ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \ flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \ test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \ xskxceiver xdp_redirect_multi xdp_synproxy veristat xdp_hw_metadata \ - xdp_features + xdp_features bpf_test_no_cfi.ko TEST_GEN_FILES += liburandom_read.so urandom_read sign-file uprobe_multi @@ -254,6 +254,12 @@ $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_testmo $(Q)$(MAKE) $(submake_extras) RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) -C bpf_testmod $(Q)cp bpf_testmod/bpf_testmod.ko $@ +$(OUTPUT)/bpf_test_no_cfi.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_test_no_cfi/Makefile bpf_test_no_cfi/*.[ch]) + $(call msg,MOD,,$@) + $(Q)$(RM) bpf_test_no_cfi/bpf_test_no_cfi.ko # force re-compilation + $(Q)$(MAKE) $(submake_extras) RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) -C bpf_test_no_cfi + $(Q)cp bpf_test_no_cfi/bpf_test_no_cfi.ko $@ + DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool ifneq ($(CROSS_COMPILE),) CROSS_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool @@ -631,6 +637,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c \ flow_dissector_load.h \ ip_check_defrag_frags.h TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ + $(OUTPUT)/bpf_test_no_cfi.ko \ $(OUTPUT)/liburandom_read.so \ $(OUTPUT)/xdp_synproxy \ $(OUTPUT)/sign-file \ @@ -759,6 +766,7 @@ EXTRA_CLEAN := $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \ feature bpftool \ $(addprefix $(OUTPUT)/,*.o *.skel.h *.lskel.h *.subskel.h \ no_alu32 cpuv4 bpf_gcc bpf_testmod.ko \ + bpf_test_no_cfi.ko \ liburandom_read.so) .PHONY: docs docs-clean diff --git a/tools/testing/selftests/bpf/bpf_test_no_cfi/Makefile b/tools/testing/selftests/bpf/bpf_test_no_cfi/Makefile new file mode 100644 index 000000000000..ed5143b79edf --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_test_no_cfi/Makefile @@ -0,0 +1,19 @@ +BPF_TEST_NO_CFI_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST))))) +KDIR ?= $(abspath $(BPF_TEST_NO_CFI_DIR)/../../../../..) + +ifeq ($(V),1) +Q = +else +Q = @ +endif + +MODULES = bpf_test_no_cfi.ko + +obj-m += bpf_test_no_cfi.o + +all: + +$(Q)make -C $(KDIR) M=$(BPF_TEST_NO_CFI_DIR) modules + +clean: + +$(Q)make -C $(KDIR) M=$(BPF_TEST_NO_CFI_DIR) clean + diff --git a/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c b/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c new file mode 100644 index 000000000000..b1dd889d5d7d --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include +#include +#include +#include + +struct bpf_test_no_cfi_ops { + void (*fn_1)(void); + void (*fn_2)(void); +}; + +static int dummy_init(struct btf *btf) +{ + return 0; +} + +static int dummy_init_member(const struct btf_type *t, + const struct btf_member *member, + void *kdata, const void *udata) +{ + return 0; +} + +static int dummy_reg(void *kdata) +{ + return 0; +} + +static void dummy_unreg(void *kdata) +{ +} + +static const struct bpf_verifier_ops dummy_verifier_ops; + +static void bpf_test_no_cfi_ops__fn_1(void) +{ +} + +static void bpf_test_no_cfi_ops__fn_2(void) +{ +} + +static struct bpf_test_no_cfi_ops __test_no_cif_ops = { + .fn_1 = bpf_test_no_cfi_ops__fn_1, + .fn_2 = bpf_test_no_cfi_ops__fn_2, +}; + +static struct bpf_struct_ops test_no_cif_ops = { + .verifier_ops = &dummy_verifier_ops, + .init = dummy_init, + .init_member = dummy_init_member, + .reg = dummy_reg, + .unreg = dummy_unreg, + .name = "bpf_test_no_cfi_ops", + .owner = THIS_MODULE, +}; + +static int bpf_test_no_cfi_init(void) +{ + int ret; + + ret = register_bpf_struct_ops(&test_no_cif_ops, + bpf_test_no_cfi_ops); + if (!ret) + return -EINVAL; + + test_no_cif_ops.cfi_stubs = &__test_no_cif_ops; + ret = register_bpf_struct_ops(&test_no_cif_ops, + bpf_test_no_cfi_ops); + return ret; +} + +static void bpf_test_no_cfi_exit(void) +{ +} + +module_init(bpf_test_no_cfi_init); +module_exit(bpf_test_no_cfi_exit); + +MODULE_AUTHOR("Kuifeng Lee"); +MODULE_DESCRIPTION("BPF no cfi_stubs test module"); +MODULE_LICENSE("Dual BSD/GPL"); + diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_no_cfi.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_no_cfi.c new file mode 100644 index 000000000000..106ea447965a --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_no_cfi.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include +#include + +static void load_bpf_test_no_cfi(void) +{ + int fd; + int err; + + fd = open("bpf_test_no_cfi.ko", O_RDONLY); + if (!ASSERT_GE(fd, 0, "open")) + return; + + /* The module will try to register a struct_ops type without + * cfi_stubs and with cfi_stubs. + * + * The one without cfi_stub should fail. The module will be loaded + * successfully only if the result of the registration is as + * expected, or it fails. + */ + err = finit_module(fd, "", 0); + close(fd); + if (!ASSERT_OK(err, "finit_module")) + return; + + err = delete_module("bpf_test_no_cfi", 0); + ASSERT_OK(err, "delete_module"); +} + +void test_struct_ops_no_cfi(void) +{ + if (test__start_subtest("load_bpf_test_no_cfi")) + load_bpf_test_no_cfi(); +} diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c index a59e56d804ee..28b6646662af 100644 --- a/tools/testing/selftests/bpf/testing_helpers.c +++ b/tools/testing/selftests/bpf/testing_helpers.c @@ -356,12 +356,12 @@ __u64 read_perf_max_sample_freq(void) return sample_freq; } -static int finit_module(int fd, const char *param_values, int flags) +int finit_module(int fd, const char *param_values, int flags) { return syscall(__NR_finit_module, fd, param_values, flags); } -static int delete_module(const char *name, int flags) +int delete_module(const char *name, int flags) { return syscall(__NR_delete_module, name, flags); } diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h index d14de81727e6..d55f6ab12433 100644 --- a/tools/testing/selftests/bpf/testing_helpers.h +++ b/tools/testing/selftests/bpf/testing_helpers.h @@ -36,6 +36,8 @@ __u64 read_perf_max_sample_freq(void); int load_bpf_testmod(bool verbose); int unload_bpf_testmod(bool verbose); int kern_sync_rcu(void); +int finit_module(int fd, const char *param_values, int flags); +int delete_module(const char *name, int flags); static inline __u64 get_time_ns(void) { -- cgit v1.2.3 From ac95b1fca034d50d42a058a9476e0fe555653d0d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 22 Feb 2024 14:43:49 +0100 Subject: tools: ynl: allow user to specify flag attr with bool values The flag attr presence in Netlink message indicates value "true", if it is missing in the message it means "false". Allow user to specify attrname with value "true"/"false" in json for flag attrs, treat "false" value properly. Signed-off-by: Jiri Pirko Reviewed-by: Donald Hunter Link: https://lore.kernel.org/r/20240222134351.224704-2-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index f45ee5f29bed..1c5c7662dc9a 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -459,6 +459,9 @@ class YnlFamily(SpecFamily): attr_payload += self._add_attr(attr['nested-attributes'], subname, subvalue, sub_attrs) elif attr["type"] == 'flag': + if not value: + # If value is absent or false then skip attribute creation. + return b'' attr_payload = b'' elif attr["type"] == 'string': attr_payload = str(value).encode('ascii') + b'\x00' -- cgit v1.2.3 From ffe10a4546feaae085a269f6e99680b9eda7d5a6 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 22 Feb 2024 14:43:50 +0100 Subject: tools: ynl: process all scalar types encoding in single elif statement As a preparation to handle enums for scalar values, unify the processing of all scalar types in a single elif statement. Signed-off-by: Jiri Pirko Reviewed-by: Donald Hunter Reviewed-by: Jacob Keller Link: https://lore.kernel.org/r/20240222134351.224704-3-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index 1c5c7662dc9a..e459a130170b 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -474,14 +474,14 @@ class YnlFamily(SpecFamily): attr_payload = self._encode_struct(attr.struct_name, value) else: raise Exception(f'Unknown type for binary attribute, value: {value}') - elif attr.is_auto_scalar: + elif attr['type'] in NlAttr.type_formats or attr.is_auto_scalar: scalar = int(value) - real_type = attr["type"][0] + ('32' if scalar.bit_length() <= 32 else '64') - format = NlAttr.get_format(real_type, attr.byte_order) - attr_payload = format.pack(int(value)) - elif attr['type'] in NlAttr.type_formats: - format = NlAttr.get_format(attr['type'], attr.byte_order) - attr_payload = format.pack(int(value)) + if attr.is_auto_scalar: + attr_type = attr["type"][0] + ('32' if scalar.bit_length() <= 32 else '64') + else: + attr_type = attr["type"] + format = NlAttr.get_format(attr_type, attr.byte_order) + attr_payload = format.pack(scalar) elif attr['type'] in "bitfield32": attr_payload = struct.pack("II", int(value["value"]), int(value["selector"])) elif attr['type'] == 'sub-message': -- cgit v1.2.3 From e8a6c515ff5f52e1ee24397116a05cd4fc1bae99 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 22 Feb 2024 14:43:51 +0100 Subject: tools: ynl: allow user to pass enum string instead of scalar value During decoding of messages coming from kernel, attribute values are converted to enum names in case the attribute type is enum of bitfield32. However, when user constructs json message, he has to pass plain scalar values. See "state" "selector" and "value" attributes in following examples: $ sudo ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/dpll.yaml --do pin-set --json '{"id": 0, "parent-device": {"parent-id": 0, "state": 1}}' $ sudo ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/devlink.yaml --do port-set --json '{"bus-name": "pci", "dev-name": "0000:08:00.1", "port-index": 98304, "port-function": {"caps": {"selector": 1, "value": 1 }}}' Allow user to pass strings containing enum names, convert them to scalar values to be encoded into Netlink message: $ sudo ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/dpll.yaml --do pin-set --json '{"id": 0, "parent-device": {"parent-id": 0, "state": "connected"}}' $ sudo ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/devlink.yaml --do port-set --json '{"bus-name": "pci", "dev-name": "0000:08:00.1", "port-index": 98304, "port-function": {"caps": {"selector": ["roce-bit"], "value": ["roce-bit"] }}}' Signed-off-by: Jiri Pirko Reviewed-by: Donald Hunter Link: https://lore.kernel.org/r/20240222134351.224704-4-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index e459a130170b..ac55aa5a3083 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -438,6 +438,26 @@ class YnlFamily(SpecFamily): self.sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_ADD_MEMBERSHIP, mcast_id) + def _encode_enum(self, attr_spec, value): + enum = self.consts[attr_spec['enum']] + if enum.type == 'flags' or attr_spec.get('enum-as-flags', False): + scalar = 0 + if isinstance(value, str): + value = [value] + for single_value in value: + scalar += enum.entries[single_value].user_value(as_flags = True) + return scalar + else: + return enum.entries[value].user_value() + + def _get_scalar(self, attr_spec, value): + try: + return int(value) + except (ValueError, TypeError) as e: + if 'enum' not in attr_spec: + raise e + return self._encode_enum(attr_spec, value); + def _add_attr(self, space, name, value, search_attrs): try: attr = self.attr_sets[space][name] @@ -475,7 +495,7 @@ class YnlFamily(SpecFamily): else: raise Exception(f'Unknown type for binary attribute, value: {value}') elif attr['type'] in NlAttr.type_formats or attr.is_auto_scalar: - scalar = int(value) + scalar = self._get_scalar(attr, value) if attr.is_auto_scalar: attr_type = attr["type"][0] + ('32' if scalar.bit_length() <= 32 else '64') else: @@ -483,7 +503,9 @@ class YnlFamily(SpecFamily): format = NlAttr.get_format(attr_type, attr.byte_order) attr_payload = format.pack(scalar) elif attr['type'] in "bitfield32": - attr_payload = struct.pack("II", int(value["value"]), int(value["selector"])) + scalar_value = self._get_scalar(attr, value["value"]) + scalar_selector = self._get_scalar(attr, value["selector"]) + attr_payload = struct.pack("II", scalar_value, scalar_selector) elif attr['type'] == 'sub-message': msg_format = self._resolve_selector(attr, search_attrs) attr_payload = b'' -- cgit v1.2.3 From d662c5b3ce6dbed9d0991bc83001bbcc4a9bc2f8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 22 Feb 2024 15:48:31 -0800 Subject: tools: ynl: fix header guards devlink and ethtool have a trailing _ in the header guard. I must have copy/pasted it into new guards, assuming it's a headers_install artifact. This fixes build if system headers are old. Fixes: 8f109e91b852 ("tools: ynl: include dpll and mptcp_pm in C codegen") Acked-by: Nicolas Dichtel Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240222234831.179181-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/Makefile.deps | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps index e6154a1255f8..7dcec16da509 100644 --- a/tools/net/ynl/Makefile.deps +++ b/tools/net/ynl/Makefile.deps @@ -15,9 +15,9 @@ UAPI_PATH:=../../../../include/uapi/ get_hdr_inc=-D$(1) -include $(UAPI_PATH)/linux/$(2) CFLAGS_devlink:=$(call get_hdr_inc,_LINUX_DEVLINK_H_,devlink.h) -CFLAGS_dpll:=$(call get_hdr_inc,_LINUX_DPLL_H_,dpll.h) +CFLAGS_dpll:=$(call get_hdr_inc,_LINUX_DPLL_H,dpll.h) CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h) CFLAGS_handshake:=$(call get_hdr_inc,_LINUX_HANDSHAKE_H,handshake.h) -CFLAGS_mptcp_pm:=$(call get_hdr_inc,_LINUX_MPTCP_PM_H_,mptcp_pm.h) +CFLAGS_mptcp_pm:=$(call get_hdr_inc,_LINUX_MPTCP_PM_H,mptcp_pm.h) CFLAGS_netdev:=$(call get_hdr_inc,_LINUX_NETDEV_H,netdev.h) CFLAGS_nfsd:=$(call get_hdr_inc,_LINUX_NFSD_NETLINK_H,nfsd_netlink.h) -- cgit v1.2.3 From 9da74836740d9c753fb6cd270f7c75c5258836de Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 23 Feb 2024 21:17:53 +0100 Subject: selftests: mptcp: lib: catch duplicated subtest entries It is important to have a unique (sub)test name in TAP, because some CI environments drop tests with duplicated name. When adding a new subtest entry, an error message is printed in case of duplicated entries. If there were duplicated entries and if all features were expected to work, the script exits with an error at the end, after having printed all subtests in the TAP format. Thanks to that, the MPTCP CI will catch such issues early. Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240223-upstream-net-next-20240223-misc-improvements-v1-1-b6c8a10396bd@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_lib.sh | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 3a2abae5993e..037cb3e84330 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -9,6 +9,7 @@ readonly KSFT_SKIP=4 readonly KSFT_TEST="${MPTCP_LIB_KSFT_TEST:-$(basename "${0}" .sh)}" MPTCP_LIB_SUBTESTS=() +MPTCP_LIB_SUBTESTS_DUPLICATED=0 # only if supported (or forced) and not disabled, see no-color.org if { [ -t 1 ] || [ "${SELFTESTS_MPTCP_LIB_COLOR_FORCE:-}" = "1" ]; } && @@ -146,12 +147,26 @@ mptcp_lib_kversion_ge() { mptcp_lib_fail_if_expected_feature "kernel version ${1} lower than ${v}" } +__mptcp_lib_result_check_duplicated() { + local subtest + + for subtest in "${MPTCP_LIB_SUBTESTS[@]}"; do + if [[ "${subtest}" == *" - ${KSFT_TEST}: ${*%% #*}" ]]; then + MPTCP_LIB_SUBTESTS_DUPLICATED=1 + mptcp_lib_print_err "Duplicated entry: ${*}" + break + fi + done +} + __mptcp_lib_result_add() { local result="${1}" shift local id=$((${#MPTCP_LIB_SUBTESTS[@]} + 1)) + __mptcp_lib_result_check_duplicated "${*}" + MPTCP_LIB_SUBTESTS+=("${result} ${id} - ${KSFT_TEST}: ${*}") } @@ -206,6 +221,12 @@ mptcp_lib_result_print_all_tap() { for subtest in "${MPTCP_LIB_SUBTESTS[@]}"; do printf "%s\n" "${subtest}" done + + if [ "${MPTCP_LIB_SUBTESTS_DUPLICATED}" = 1 ] && + mptcp_lib_expect_all_features; then + mptcp_lib_print_err "Duplicated test entries" + exit ${KSFT_FAIL} + fi } # get the value of keyword $1 in the line marked by keyword $2 -- cgit v1.2.3 From 488ccbe76cb4fb3dff87722c35dd5e2ea6c5f8d6 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 23 Feb 2024 21:17:57 +0100 Subject: selftests: mptcp: netlink: drop duplicate var ret The variable 'ret' are defined twice in pm_netlink.sh. This patch drops this duplicate one that has been defined from the beginning, with commit eedbc685321b ("selftests: add PM netlink functional tests") Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240223-upstream-net-next-20240223-misc-improvements-v1-5-b6c8a10396bd@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/pm_netlink.sh | 1 - 1 file changed, 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 71899a3ffa7a..ebfefae71e13 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -28,7 +28,6 @@ sec=$(date +%s) rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) ns1="ns1-$rndh" err=$(mktemp) -ret=0 cleanup() { -- cgit v1.2.3 From fccf7c922459c588eb71bdda5c53727e539442e8 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 23 Feb 2024 21:17:58 +0100 Subject: selftests: mptcp: simult flows: define missing vars The variables 'large', 'small', 'sout', 'cout', 'capout' and 'size' are used in multiple functions, so they should be clearly defined as global variables at the top of the file. This patch redefines them at the beginning of simult_flows.sh. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240223-upstream-net-next-20240223-misc-improvements-v1-6-b6c8a10396bd@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/simult_flows.sh | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 8f9ddb3ad4fe..ed0165c15a24 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -16,6 +16,12 @@ test_cnt=1 ret=0 bail=0 slack=50 +large="" +small="" +sout="" +cout="" +capout="" +size=0 usage() { echo "Usage: $0 [ -b ] [ -c ] [ -d ]" -- cgit v1.2.3 From 8c6f6b4bb53a904f922dfb90d566391d3feee32c Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 23 Feb 2024 21:17:59 +0100 Subject: selftests: mptcp: join: change capture/checksum as bool To maintain consistency with other scripts, this patch changes vars 'capture' and 'checksum' as bool vars in mptcp_join. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240223-upstream-net-next-20240223-misc-improvements-v1-7-b6c8a10396bd@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index c07386e21e0a..3f198743cb03 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -29,11 +29,11 @@ iptables="iptables" ip6tables="ip6tables" timeout_poll=30 timeout_test=$((timeout_poll * 2 + 1)) -capture=0 -checksum=0 +capture=false +checksum=false ip_mptcp=0 check_invert=0 -validate_checksum=0 +validate_checksum=false init=0 evts_ns1="" evts_ns2="" @@ -100,7 +100,7 @@ init_partial() ip netns exec $netns sysctl -q net.mptcp.pm_type=0 2>/dev/null || true ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0 ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0 - if [ $checksum -eq 1 ]; then + if $checksum; then ip netns exec $netns sysctl -q net.mptcp.checksum_enabled=1 fi done @@ -380,7 +380,7 @@ reset_with_checksum() ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=$ns1_enable ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=$ns2_enable - validate_checksum=1 + validate_checksum=true } reset_with_allow_join_id0() @@ -413,7 +413,7 @@ reset_with_allow_join_id0() setup_fail_rules() { check_invert=1 - validate_checksum=1 + validate_checksum=true local i="$1" local ip="${2:-4}" local tables @@ -1017,7 +1017,7 @@ do_transfer() :> "$sout" :> "$capout" - if [ $capture -eq 1 ]; then + if $capture; then local capuser if [ -z $SUDO_USER ] ; then capuser="" @@ -1119,7 +1119,7 @@ do_transfer() wait $spid local rets=$? - if [ $capture -eq 1 ]; then + if $capture; then sleep 1 kill $cappid fi @@ -1507,7 +1507,7 @@ chk_join_nr() else print_ok fi - if [ $validate_checksum -eq 1 ]; then + if $validate_checksum; then chk_csum_nr $csum_ns1 $csum_ns2 chk_fail_nr $fail_nr $fail_nr chk_rst_nr $rst_nr $rst_nr @@ -3664,10 +3664,10 @@ while getopts "${all_tests_args}cCih" opt; do tests+=("${all_tests[${opt}]}") ;; c) - capture=1 + capture=true ;; C) - checksum=1 + checksum=true ;; i) ip_mptcp=1 -- cgit v1.2.3 From e8ddc5f255c3e5371aed621fdda6dce227c69a68 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 23 Feb 2024 21:18:00 +0100 Subject: selftests: mptcp: diag: change timeout_poll to 30 Even if it is set to 100ms from the beginning with commit df62f2ec3df6 ("selftests/mptcp: add diag interface tests"), there is no reason not to have it to 30ms like all the other tests. "diag.sh" is not supposed to be slower than the other ones. To maintain consistency with other scripts, this patch changes it to 30. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240223-upstream-net-next-20240223-misc-improvements-v1-8-b6c8a10396bd@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/diag.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 0a58ebb8b04c..8573326d326a 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -8,7 +8,7 @@ rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) ns="ns1-$rndh" ksft_skip=4 test_cnt=1 -timeout_poll=100 +timeout_poll=30 timeout_test=$((timeout_poll * 2 + 1)) ret=0 -- cgit v1.2.3 From b819a8481a19043ca990980dd56b1a41389c665c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 23 Feb 2024 21:06:58 -0800 Subject: selftests: netdevsim: be less selective for FW for the devlink test Commit 6151ff9c7521 ("selftests: netdevsim: use suitable existing dummy file for flash test") introduced a nice trick to the devlink flashing test. Instead of user having to create a file under /lib/firmware we just pick the first one that already exists. Sadly, in AWS Linux there are no files directly under /lib/firmware, only in subdirectories. Don't limit the search to -maxdepth 1. We can use the %P print format to get the correct path for files inside subdirectories: $ find /lib/firmware -type f -printf '%P\n' | head -1 intel-ucode/06-1a-05 The full path is /lib/firmware/intel-ucode/06-1a-05 This works in GNU find, busybox doesn't have printf at all, so we're not making it worse. Signed-off-by: Jakub Kicinski Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240224050658.930272-1-kuba@kernel.org Signed-off-by: Paolo Abeni --- tools/testing/selftests/drivers/net/netdevsim/devlink.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh index 46e20b13473c..b5ea2526f23c 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh @@ -31,7 +31,7 @@ devlink_wait() fw_flash_test() { - DUMMYFILE=$(find /lib/firmware -maxdepth 1 -type f -printf '%f\n' |head -1) + DUMMYFILE=$(find /lib/firmware -type f -printf '%P\n' | head -1) RET=0 if [ -z "$DUMMYFILE" ] -- cgit v1.2.3 From 22fc0e80aeb5c0c1377e6c02d7248f8fbf5df7fc Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Thu, 1 Feb 2024 12:52:25 +0000 Subject: bpf, arm64: support exceptions The prologue generation code has been modified to make the callback program use the stack of the program marked as exception boundary where callee-saved registers are already pushed. As the bpf_throw function never returns, if it clobbers any callee-saved registers, they would remain clobbered. So, the prologue of the exception-boundary program is modified to push R23 and R24 as well, which the callback will then recover in its epilogue. The Procedure Call Standard for the Arm 64-bit Architecture[1] states that registers r19 to r28 should be saved by the callee. BPF programs on ARM64 already save all callee-saved registers except r23 and r24. This patch adds an instruction in prologue of the program to save these two registers and another instruction in the epilogue to recover them. These extra instructions are only added if bpf_throw() is used. Otherwise the emitted prologue/epilogue remains unchanged. [1] https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst Signed-off-by: Puranjay Mohan Link: https://lore.kernel.org/r/20240201125225.72796-3-puranjay12@gmail.com Signed-off-by: Alexei Starovoitov --- arch/arm64/net/bpf_jit_comp.c | 87 ++++++++++++++++++++++------ tools/testing/selftests/bpf/DENYLIST.aarch64 | 1 - 2 files changed, 68 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index cfd5434de483..20720ec346b8 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -285,7 +285,8 @@ static bool is_lsi_offset(int offset, int scale) /* Tail call offset to jump into */ #define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 8) -static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) +static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf, + bool is_exception_cb) { const struct bpf_prog *prog = ctx->prog; const bool is_main_prog = !bpf_is_subprog(prog); @@ -333,19 +334,34 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) emit(A64_MOV(1, A64_R(9), A64_LR), ctx); emit(A64_NOP, ctx); - /* Sign lr */ - if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) - emit(A64_PACIASP, ctx); - - /* Save FP and LR registers to stay align with ARM64 AAPCS */ - emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); - emit(A64_MOV(1, A64_FP, A64_SP), ctx); - - /* Save callee-saved registers */ - emit(A64_PUSH(r6, r7, A64_SP), ctx); - emit(A64_PUSH(r8, r9, A64_SP), ctx); - emit(A64_PUSH(fp, tcc, A64_SP), ctx); - emit(A64_PUSH(fpb, A64_R(28), A64_SP), ctx); + if (!is_exception_cb) { + /* Sign lr */ + if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) + emit(A64_PACIASP, ctx); + /* Save FP and LR registers to stay align with ARM64 AAPCS */ + emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); + emit(A64_MOV(1, A64_FP, A64_SP), ctx); + + /* Save callee-saved registers */ + emit(A64_PUSH(r6, r7, A64_SP), ctx); + emit(A64_PUSH(r8, r9, A64_SP), ctx); + emit(A64_PUSH(fp, tcc, A64_SP), ctx); + emit(A64_PUSH(fpb, A64_R(28), A64_SP), ctx); + } else { + /* + * Exception callback receives FP of Main Program as third + * parameter + */ + emit(A64_MOV(1, A64_FP, A64_R(2)), ctx); + /* + * Main Program already pushed the frame record and the + * callee-saved registers. The exception callback will not push + * anything and re-use the main program's stack. + * + * 10 registers are on the stack + */ + emit(A64_SUB_I(1, A64_SP, A64_FP, 80), ctx); + } /* Set up BPF prog stack base register */ emit(A64_MOV(1, fp, A64_SP), ctx); @@ -365,6 +381,20 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) emit_bti(A64_BTI_J, ctx); } + /* + * Program acting as exception boundary should save all ARM64 + * Callee-saved registers as the exception callback needs to recover + * all ARM64 Callee-saved registers in its epilogue. + */ + if (prog->aux->exception_boundary) { + /* + * As we are pushing two more registers, BPF_FP should be moved + * 16 bytes + */ + emit(A64_SUB_I(1, fp, fp, 16), ctx); + emit(A64_PUSH(A64_R(23), A64_R(24), A64_SP), ctx); + } + emit(A64_SUB_I(1, fpb, fp, ctx->fpb_offset), ctx); /* Stack must be multiples of 16B */ @@ -653,7 +683,7 @@ static void build_plt(struct jit_ctx *ctx) plt->target = (u64)&dummy_tramp; } -static void build_epilogue(struct jit_ctx *ctx) +static void build_epilogue(struct jit_ctx *ctx, bool is_exception_cb) { const u8 r0 = bpf2a64[BPF_REG_0]; const u8 r6 = bpf2a64[BPF_REG_6]; @@ -666,6 +696,15 @@ static void build_epilogue(struct jit_ctx *ctx) /* We're done with BPF stack */ emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); + /* + * Program acting as exception boundary pushes R23 and R24 in addition + * to BPF callee-saved registers. Exception callback uses the boundary + * program's stack frame, so recover these extra registers in the above + * two cases. + */ + if (ctx->prog->aux->exception_boundary || is_exception_cb) + emit(A64_POP(A64_R(23), A64_R(24), A64_SP), ctx); + /* Restore x27 and x28 */ emit(A64_POP(fpb, A64_R(28), A64_SP), ctx); /* Restore fs (x25) and x26 */ @@ -1575,7 +1614,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) * BPF line info needs ctx->offset[i] to be the offset of * instruction[i] in jited image, so build prologue first. */ - if (build_prologue(&ctx, was_classic)) { + if (build_prologue(&ctx, was_classic, prog->aux->exception_cb)) { prog = orig_prog; goto out_off; } @@ -1586,7 +1625,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) } ctx.epilogue_offset = ctx.idx; - build_epilogue(&ctx); + build_epilogue(&ctx, prog->aux->exception_cb); build_plt(&ctx); extable_align = __alignof__(struct exception_table_entry); @@ -1614,7 +1653,7 @@ skip_init_ctx: ctx.idx = 0; ctx.exentry_idx = 0; - build_prologue(&ctx, was_classic); + build_prologue(&ctx, was_classic, prog->aux->exception_cb); if (build_body(&ctx, extra_pass)) { bpf_jit_binary_free(header); @@ -1622,7 +1661,7 @@ skip_init_ctx: goto out_off; } - build_epilogue(&ctx); + build_epilogue(&ctx, prog->aux->exception_cb); build_plt(&ctx); /* 3. Extra pass to validate JITed code. */ @@ -2310,3 +2349,13 @@ bool bpf_jit_supports_ptr_xchg(void) { return true; } + +bool bpf_jit_supports_exceptions(void) +{ + /* We unwind through both kernel frames starting from within bpf_throw + * call and BPF frames. Therefore we require FP unwinder to be enabled + * to walk kernel frames and reach BPF frames in the stack trace. + * ARM64 kernel is aways compiled with CONFIG_FRAME_POINTER=y + */ + return true; +} diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64 index 5c2cc7e8c5d0..0445ac38bc07 100644 --- a/tools/testing/selftests/bpf/DENYLIST.aarch64 +++ b/tools/testing/selftests/bpf/DENYLIST.aarch64 @@ -1,6 +1,5 @@ bpf_cookie/multi_kprobe_attach_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3 bpf_cookie/multi_kprobe_link_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3 -exceptions # JIT does not support calling kfunc bpf_throw: -524 fexit_sleep # The test never returns. The remaining tests cannot start. kprobe_multi_bench_attach # needs CONFIG_FPROBE kprobe_multi_test # needs CONFIG_FPROBE -- cgit v1.2.3 From 3bfe90527d6387e19078c9f774d6328448bac201 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 26 Feb 2024 14:58:06 -0800 Subject: tools: ynl: protect from old OvS headers Since commit 7c59c9c8f202 ("tools: ynl: generate code for ovs families") we need relatively recent OvS headers to get YNL to compile. Add the direct include workaround to fix compilation on less up-to-date OSes like CentOS 9. Reviewed-by: Donald Hunter Link: https://lore.kernel.org/r/20240226225806.1301152-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/Makefile.deps | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps index 7dcec16da509..07373c5a7afe 100644 --- a/tools/net/ynl/Makefile.deps +++ b/tools/net/ynl/Makefile.deps @@ -21,3 +21,6 @@ CFLAGS_handshake:=$(call get_hdr_inc,_LINUX_HANDSHAKE_H,handshake.h) CFLAGS_mptcp_pm:=$(call get_hdr_inc,_LINUX_MPTCP_PM_H,mptcp_pm.h) CFLAGS_netdev:=$(call get_hdr_inc,_LINUX_NETDEV_H,netdev.h) CFLAGS_nfsd:=$(call get_hdr_inc,_LINUX_NFSD_NETLINK_H,nfsd_netlink.h) +CFLAGS_ovs_datapath:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h) +CFLAGS_ovs_flow:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h) +CFLAGS_ovs_vport:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h) -- cgit v1.2.3 From 21f6986d19b01ecda3d8ae2e79780aa3603d4eeb Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 27 Feb 2024 14:30:18 -0800 Subject: tools: ynl: give up on libmnl for auto-ints The temporary auto-int helpers are not really correct. We can't treat signed and unsigned ints the same when determining whether we need full 8B. I realized this before sending the patch to add support in libmnl. Unfortunately, that patch has not been merged, so time to fix our local helpers. Use the mnl* name for now, subsequent patches will address that. Acked-by: Nicolas Dichtel Reviewed-by: Donald Hunter Link: https://lore.kernel.org/r/20240227223032.1835527-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl-priv.h | 45 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h index 7491da8e7555..eaa0d432366c 100644 --- a/tools/net/ynl/lib/ynl-priv.h +++ b/tools/net/ynl/lib/ynl-priv.h @@ -125,20 +125,47 @@ int ynl_exec_dump(struct ynl_sock *ys, struct nlmsghdr *req_nlh, void ynl_error_unknown_notification(struct ynl_sock *ys, __u8 cmd); int ynl_error_parse(struct ynl_parse_arg *yarg, const char *msg); -#ifndef MNL_HAS_AUTO_SCALARS -static inline uint64_t mnl_attr_get_uint(const struct nlattr *attr) +/* Attribute helpers */ + +static inline __u64 mnl_attr_get_uint(const struct nlattr *attr) +{ + switch (mnl_attr_get_payload_len(attr)) { + case 4: + return mnl_attr_get_u32(attr); + case 8: + return mnl_attr_get_u64(attr); + default: + return 0; + } +} + +static inline __s64 mnl_attr_get_sint(const struct nlattr *attr) { - if (mnl_attr_get_payload_len(attr) == 4) + switch (mnl_attr_get_payload_len(attr)) { + case 4: return mnl_attr_get_u32(attr); - return mnl_attr_get_u64(attr); + case 8: + return mnl_attr_get_u64(attr); + default: + return 0; + } } static inline void -mnl_attr_put_uint(struct nlmsghdr *nlh, uint16_t type, uint64_t data) +mnl_attr_put_uint(struct nlmsghdr *nlh, __u16 type, __u64 data) { - if ((uint32_t)data == (uint64_t)data) - return mnl_attr_put_u32(nlh, type, data); - return mnl_attr_put_u64(nlh, type, data); + if ((__u32)data == (__u64)data) + mnl_attr_put_u32(nlh, type, data); + else + mnl_attr_put_u64(nlh, type, data); +} + +static inline void +mnl_attr_put_sint(struct nlmsghdr *nlh, __u16 type, __s64 data) +{ + if ((__s32)data == (__s64)data) + mnl_attr_put_u32(nlh, type, data); + else + mnl_attr_put_u64(nlh, type, data); } -#endif #endif -- cgit v1.2.3 From 5600c580383acb655f0fd8d4963cbea52f056938 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 27 Feb 2024 14:30:19 -0800 Subject: tools: ynl: create local attribute helpers Don't use mnl attr helpers, we're trying to remove the libmnl dependency. Create both signed and unsigned helpers, libmnl had unsigned helpers, so code generator no longer needs the mnl_type() hack. The new helpers are written from first principles, but are hopefully not too buggy. Acked-by: Nicolas Dichtel Link: https://lore.kernel.org/r/20240227223032.1835527-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl-priv.h | 196 +++++++++++++++++++++++++++++++++++++++---- tools/net/ynl/lib/ynl.c | 44 +++++----- tools/net/ynl/ynl-gen-c.py | 61 +++++--------- 3 files changed, 227 insertions(+), 74 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h index eaa0d432366c..d42d44a77dcb 100644 --- a/tools/net/ynl/lib/ynl-priv.h +++ b/tools/net/ynl/lib/ynl-priv.h @@ -127,45 +127,213 @@ int ynl_error_parse(struct ynl_parse_arg *yarg, const char *msg); /* Attribute helpers */ -static inline __u64 mnl_attr_get_uint(const struct nlattr *attr) +static inline void *ynl_nlmsg_end_addr(const struct nlmsghdr *nlh) { - switch (mnl_attr_get_payload_len(attr)) { + return (char *)nlh + nlh->nlmsg_len; +} + +static inline unsigned int ynl_attr_type(const struct nlattr *attr) +{ + return attr->nla_type & NLA_TYPE_MASK; +} + +static inline unsigned int ynl_attr_data_len(const struct nlattr *attr) +{ + return attr->nla_len - NLA_HDRLEN; +} + +static inline void *ynl_attr_data(const struct nlattr *attr) +{ + return (unsigned char *)attr + NLA_HDRLEN; +} + +static inline struct nlattr * +ynl_attr_nest_start(struct nlmsghdr *nlh, unsigned int attr_type) +{ + struct nlattr *attr; + + attr = ynl_nlmsg_end_addr(nlh); + attr->nla_type = attr_type | NLA_F_NESTED; + nlh->nlmsg_len += NLA_HDRLEN; + + return attr; +} + +static inline void +ynl_attr_nest_end(struct nlmsghdr *nlh, struct nlattr *attr) +{ + attr->nla_len = (char *)ynl_nlmsg_end_addr(nlh) - (char *)attr; +} + +static inline void +ynl_attr_put(struct nlmsghdr *nlh, unsigned int attr_type, + const void *value, size_t size) +{ + struct nlattr *attr; + + attr = ynl_nlmsg_end_addr(nlh); + attr->nla_type = attr_type; + attr->nla_len = NLA_HDRLEN + size; + + memcpy(ynl_attr_data(attr), value, size); + + nlh->nlmsg_len += NLMSG_ALIGN(attr->nla_len); +} + +static inline void +ynl_attr_put_str(struct nlmsghdr *nlh, unsigned int attr_type, const char *str) +{ + struct nlattr *attr; + const char *end; + + attr = ynl_nlmsg_end_addr(nlh); + attr->nla_type = attr_type; + + end = stpcpy(ynl_attr_data(attr), str); + attr->nla_len = + NLA_HDRLEN + NLA_ALIGN(end - (char *)ynl_attr_data(attr)); + + nlh->nlmsg_len += NLMSG_ALIGN(attr->nla_len); +} + +static inline const char *ynl_attr_get_str(const struct nlattr *attr) +{ + return (const char *)ynl_attr_data(attr); +} + +static inline __s8 ynl_attr_get_s8(const struct nlattr *attr) +{ + return *(__s8 *)ynl_attr_data(attr); +} + +static inline __s16 ynl_attr_get_s16(const struct nlattr *attr) +{ + return *(__s16 *)ynl_attr_data(attr); +} + +static inline __s32 ynl_attr_get_s32(const struct nlattr *attr) +{ + return *(__s32 *)ynl_attr_data(attr); +} + +static inline __s64 ynl_attr_get_s64(const struct nlattr *attr) +{ + __s64 tmp; + + memcpy(&tmp, (unsigned char *)(attr + 1), sizeof(tmp)); + return tmp; +} + +static inline __u8 ynl_attr_get_u8(const struct nlattr *attr) +{ + return *(__u8 *)ynl_attr_data(attr); +} + +static inline __u16 ynl_attr_get_u16(const struct nlattr *attr) +{ + return *(__u16 *)ynl_attr_data(attr); +} + +static inline __u32 ynl_attr_get_u32(const struct nlattr *attr) +{ + return *(__u32 *)ynl_attr_data(attr); +} + +static inline __u64 ynl_attr_get_u64(const struct nlattr *attr) +{ + __u64 tmp; + + memcpy(&tmp, (unsigned char *)(attr + 1), sizeof(tmp)); + return tmp; +} + +static inline void +ynl_attr_put_s8(struct nlmsghdr *nlh, unsigned int attr_type, __s8 value) +{ + ynl_attr_put(nlh, attr_type, &value, sizeof(value)); +} + +static inline void +ynl_attr_put_s16(struct nlmsghdr *nlh, unsigned int attr_type, __s16 value) +{ + ynl_attr_put(nlh, attr_type, &value, sizeof(value)); +} + +static inline void +ynl_attr_put_s32(struct nlmsghdr *nlh, unsigned int attr_type, __s32 value) +{ + ynl_attr_put(nlh, attr_type, &value, sizeof(value)); +} + +static inline void +ynl_attr_put_s64(struct nlmsghdr *nlh, unsigned int attr_type, __s64 value) +{ + ynl_attr_put(nlh, attr_type, &value, sizeof(value)); +} + +static inline void +ynl_attr_put_u8(struct nlmsghdr *nlh, unsigned int attr_type, __u8 value) +{ + ynl_attr_put(nlh, attr_type, &value, sizeof(value)); +} + +static inline void +ynl_attr_put_u16(struct nlmsghdr *nlh, unsigned int attr_type, __u16 value) +{ + ynl_attr_put(nlh, attr_type, &value, sizeof(value)); +} + +static inline void +ynl_attr_put_u32(struct nlmsghdr *nlh, unsigned int attr_type, __u32 value) +{ + ynl_attr_put(nlh, attr_type, &value, sizeof(value)); +} + +static inline void +ynl_attr_put_u64(struct nlmsghdr *nlh, unsigned int attr_type, __u64 value) +{ + ynl_attr_put(nlh, attr_type, &value, sizeof(value)); +} + +static inline __u64 ynl_attr_get_uint(const struct nlattr *attr) +{ + switch (ynl_attr_data_len(attr)) { case 4: - return mnl_attr_get_u32(attr); + return ynl_attr_get_u32(attr); case 8: - return mnl_attr_get_u64(attr); + return ynl_attr_get_u64(attr); default: return 0; } } -static inline __s64 mnl_attr_get_sint(const struct nlattr *attr) +static inline __s64 ynl_attr_get_sint(const struct nlattr *attr) { - switch (mnl_attr_get_payload_len(attr)) { + switch (ynl_attr_data_len(attr)) { case 4: - return mnl_attr_get_u32(attr); + return ynl_attr_get_s32(attr); case 8: - return mnl_attr_get_u64(attr); + return ynl_attr_get_s64(attr); default: return 0; } } static inline void -mnl_attr_put_uint(struct nlmsghdr *nlh, __u16 type, __u64 data) +ynl_attr_put_uint(struct nlmsghdr *nlh, __u16 type, __u64 data) { if ((__u32)data == (__u64)data) - mnl_attr_put_u32(nlh, type, data); + ynl_attr_put_u32(nlh, type, data); else - mnl_attr_put_u64(nlh, type, data); + ynl_attr_put_u64(nlh, type, data); } static inline void -mnl_attr_put_sint(struct nlmsghdr *nlh, __u16 type, __s64 data) +ynl_attr_put_sint(struct nlmsghdr *nlh, __u16 type, __s64 data) { if ((__s32)data == (__s64)data) - mnl_attr_put_u32(nlh, type, data); + ynl_attr_put_s32(nlh, type, data); else - mnl_attr_put_u64(nlh, type, data); + ynl_attr_put_s64(nlh, type, data); } #endif diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index 6e6d474c8366..b9ed587af676 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -94,7 +94,7 @@ ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off, mnl_attr_for_each_payload(start, data_len) { astart_off = (char *)attr - (char *)start; - aend_off = astart_off + mnl_attr_get_payload_len(attr); + aend_off = astart_off + ynl_attr_data_len(attr); if (aend_off <= off) continue; @@ -106,7 +106,7 @@ ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off, off -= astart_off; - type = mnl_attr_get_type(attr); + type = ynl_attr_type(attr); if (ynl_err_walk_report_one(policy, type, str, str_sz, &n)) return n; @@ -124,8 +124,8 @@ ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off, } off -= sizeof(struct nlattr); - start = mnl_attr_get_payload(attr); - end = start + mnl_attr_get_payload_len(attr); + start = ynl_attr_data(attr); + end = start + ynl_attr_data_len(attr); return n + ynl_err_walk(ys, start, end, off, policy->table[type].nest, &str[n], str_sz - n, nest_pol); @@ -153,8 +153,8 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh, mnl_attr_for_each(attr, nlh, hlen) { unsigned int len, type; - len = mnl_attr_get_payload_len(attr); - type = mnl_attr_get_type(attr); + len = ynl_attr_data_len(attr); + type = ynl_attr_type(attr); if (type > NLMSGERR_ATTR_MAX) continue; @@ -169,7 +169,7 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh, return MNL_CB_ERROR; break; case NLMSGERR_ATTR_MSG: - str = mnl_attr_get_payload(attr); + str = ynl_attr_get_str(attr); if (str[len - 1]) return MNL_CB_ERROR; break; @@ -185,7 +185,7 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh, unsigned int n, off; void *start, *end; - ys->err.attr_offs = mnl_attr_get_u32(tb[NLMSGERR_ATTR_OFFS]); + ys->err.attr_offs = ynl_attr_get_u32(tb[NLMSGERR_ATTR_OFFS]); n = snprintf(bad_attr, sizeof(bad_attr), "%sbad attribute: ", str ? " (" : ""); @@ -211,7 +211,7 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh, void *start, *end; int n2; - type = mnl_attr_get_u32(tb[NLMSGERR_ATTR_MISS_TYPE]); + type = ynl_attr_get_u32(tb[NLMSGERR_ATTR_MISS_TYPE]); n = snprintf(miss_attr, sizeof(miss_attr), "%smissing attribute: ", bad_attr[0] ? ", " : (str ? " (" : "")); @@ -222,7 +222,7 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh, nest_pol = ys->req_policy; if (tb[NLMSGERR_ATTR_MISS_NEST]) { - off = mnl_attr_get_u32(tb[NLMSGERR_ATTR_MISS_NEST]); + off = ynl_attr_get_u32(tb[NLMSGERR_ATTR_MISS_NEST]); off -= sizeof(struct nlmsghdr); off -= ys->family->hdr_len; @@ -314,9 +314,9 @@ int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr) unsigned int type, len; unsigned char *data; - data = mnl_attr_get_payload(attr); - len = mnl_attr_get_payload_len(attr); - type = mnl_attr_get_type(attr); + data = ynl_attr_data(attr); + len = ynl_attr_data_len(attr); + type = ynl_attr_type(attr); if (type > yarg->rsp_policy->max_attr) { yerr(yarg->ys, YNL_ERROR_INTERNAL, "Internal error, validating unknown attribute"); @@ -514,11 +514,11 @@ ynl_get_family_info_mcast(struct ynl_sock *ys, const struct nlattr *mcasts) i = 0; mnl_attr_for_each_nested(entry, mcasts) { mnl_attr_for_each_nested(attr, entry) { - if (mnl_attr_get_type(attr) == CTRL_ATTR_MCAST_GRP_ID) - ys->mcast_groups[i].id = mnl_attr_get_u32(attr); - if (mnl_attr_get_type(attr) == CTRL_ATTR_MCAST_GRP_NAME) { + if (ynl_attr_type(attr) == CTRL_ATTR_MCAST_GRP_ID) + ys->mcast_groups[i].id = ynl_attr_get_u32(attr); + if (ynl_attr_type(attr) == CTRL_ATTR_MCAST_GRP_NAME) { strncpy(ys->mcast_groups[i].name, - mnl_attr_get_str(attr), + ynl_attr_get_str(attr), GENL_NAMSIZ - 1); ys->mcast_groups[i].name[GENL_NAMSIZ - 1] = 0; } @@ -536,19 +536,19 @@ static int ynl_get_family_info_cb(const struct nlmsghdr *nlh, void *data) bool found_id = true; mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - if (mnl_attr_get_type(attr) == CTRL_ATTR_MCAST_GROUPS) + if (ynl_attr_type(attr) == CTRL_ATTR_MCAST_GROUPS) if (ynl_get_family_info_mcast(ys, attr)) return MNL_CB_ERROR; - if (mnl_attr_get_type(attr) != CTRL_ATTR_FAMILY_ID) + if (ynl_attr_type(attr) != CTRL_ATTR_FAMILY_ID) continue; - if (mnl_attr_get_payload_len(attr) != sizeof(__u16)) { + if (ynl_attr_data_len(attr) != sizeof(__u16)) { yerr(ys, YNL_ERROR_ATTR_INVALID, "Invalid family ID"); return MNL_CB_ERROR; } - ys->family_id = mnl_attr_get_u16(attr); + ys->family_id = ynl_attr_get_u16(attr); found_id = true; } @@ -566,7 +566,7 @@ static int ynl_sock_read_family(struct ynl_sock *ys, const char *family_name) int err; nlh = ynl_gemsg_start_req(ys, GENL_ID_CTRL, CTRL_CMD_GETFAMILY, 1); - mnl_attr_put_strz(nlh, CTRL_ATTR_FAMILY_NAME, family_name); + ynl_attr_put_str(nlh, CTRL_ATTR_FAMILY_NAME, family_name); err = mnl_socket_sendto(ys->sock, nlh, nlh->nlmsg_len); if (err < 0) { diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 7fc1aa788f6f..99a3eb7b158b 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -168,15 +168,6 @@ class Type(SpecAttr): spec = self._attr_policy(policy) cw.p(f"\t[{self.enum_name}] = {spec},") - def _mnl_type(self): - # mnl does not have helpers for signed integer types - # turn signed type into unsigned - # this only makes sense for scalar types - t = self.type - if t[0] == 's': - t = 'u' + t[1:] - return t - def _attr_typol(self): raise Exception(f"Type policy not implemented for class type {self.type}") @@ -192,7 +183,7 @@ class Type(SpecAttr): ri.cw.p(f"{line};") def _attr_put_simple(self, ri, var, put_type): - line = f"mnl_attr_put_{put_type}(nlh, {self.enum_name}, {var}->{self.c_name})" + line = f"ynl_attr_put_{put_type}(nlh, {self.enum_name}, {var}->{self.c_name})" self._attr_put_line(ri, var, line) def attr_put(self, ri, var): @@ -357,9 +348,6 @@ class TypeScalar(Type): else: self.type_name = '__' + self.type - def mnl_type(self): - return self._mnl_type() - def _attr_policy(self, policy): if 'flags-mask' in self.checks or self.is_bitfield: if self.is_bitfield: @@ -387,10 +375,10 @@ class TypeScalar(Type): return [f'{self.type_name} {self.c_name}{self.byte_order_comment}'] def attr_put(self, ri, var): - self._attr_put_simple(ri, var, self.mnl_type()) + self._attr_put_simple(ri, var, self.type) def _attr_get(self, ri, var): - return f"{var}->{self.c_name} = mnl_attr_get_{self.mnl_type()}(attr);", None, None + return f"{var}->{self.c_name} = ynl_attr_get_{self.type}(attr);", None, None def _setter_lines(self, ri, member, presence): return [f"{member} = {self.c_name};"] @@ -404,7 +392,7 @@ class TypeFlag(Type): return '.type = YNL_PT_FLAG, ' def attr_put(self, ri, var): - self._attr_put_line(ri, var, f"mnl_attr_put(nlh, {self.enum_name}, 0, NULL)") + self._attr_put_line(ri, var, f"ynl_attr_put(nlh, {self.enum_name}, NULL, 0)") def _attr_get(self, ri, var): return [], None, None @@ -446,15 +434,15 @@ class TypeString(Type): cw.p(f"\t[{self.enum_name}] = {spec},") def attr_put(self, ri, var): - self._attr_put_simple(ri, var, 'strz') + self._attr_put_simple(ri, var, 'str') def _attr_get(self, ri, var): len_mem = var + '->_present.' + self.c_name + '_len' return [f"{len_mem} = len;", f"{var}->{self.c_name} = malloc(len + 1);", - f"memcpy({var}->{self.c_name}, mnl_attr_get_str(attr), len);", + f"memcpy({var}->{self.c_name}, ynl_attr_get_str(attr), len);", f"{var}->{self.c_name}[len] = 0;"], \ - ['len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));'], \ + ['len = strnlen(ynl_attr_get_str(attr), ynl_attr_data_len(attr));'], \ ['unsigned int len;'] def _setter_lines(self, ri, member, presence): @@ -493,15 +481,15 @@ class TypeBinary(Type): return mem def attr_put(self, ri, var): - self._attr_put_line(ri, var, f"mnl_attr_put(nlh, {self.enum_name}, " + - f"{var}->_present.{self.c_name}_len, {var}->{self.c_name})") + self._attr_put_line(ri, var, f"ynl_attr_put(nlh, {self.enum_name}, " + + f"{var}->{self.c_name}, {var}->_present.{self.c_name}_len)") def _attr_get(self, ri, var): len_mem = var + '->_present.' + self.c_name + '_len' return [f"{len_mem} = len;", f"{var}->{self.c_name} = malloc(len);", - f"memcpy({var}->{self.c_name}, mnl_attr_get_payload(attr), len);"], \ - ['len = mnl_attr_get_payload_len(attr);'], \ + f"memcpy({var}->{self.c_name}, ynl_attr_data(attr), len);"], \ + ['len = ynl_attr_data_len(attr);'], \ ['unsigned int len;'] def _setter_lines(self, ri, member, presence): @@ -526,11 +514,11 @@ class TypeBitfield32(Type): return f"NLA_POLICY_BITFIELD32({mask})" def attr_put(self, ri, var): - line = f"mnl_attr_put(nlh, {self.enum_name}, sizeof(struct nla_bitfield32), &{var}->{self.c_name})" + line = f"ynl_attr_put(nlh, {self.enum_name}, &{var}->{self.c_name}, sizeof(struct nla_bitfield32))" self._attr_put_line(ri, var, line) def _attr_get(self, ri, var): - return f"memcpy(&{var}->{self.c_name}, mnl_attr_get_payload(attr), sizeof(struct nla_bitfield32));", None, None + return f"memcpy(&{var}->{self.c_name}, ynl_attr_data(attr), sizeof(struct nla_bitfield32));", None, None def _setter_lines(self, ri, member, presence): return [f"memcpy(&{member}, {self.c_name}, sizeof(struct nla_bitfield32));"] @@ -589,9 +577,6 @@ class TypeMultiAttr(Type): def presence_type(self): return 'count' - def mnl_type(self): - return self._mnl_type() - def _complex_member_type(self, ri): if 'type' not in self.attr or self.attr['type'] == 'nest': return self.nested_struct_type @@ -625,9 +610,9 @@ class TypeMultiAttr(Type): def attr_put(self, ri, var): if self.attr['type'] in scalars: - put_type = self.mnl_type() + put_type = self.type ri.cw.p(f"for (unsigned int i = 0; i < {var}->n_{self.c_name}; i++)") - ri.cw.p(f"mnl_attr_put_{put_type}(nlh, {self.enum_name}, {var}->{self.c_name}[i]);") + ri.cw.p(f"ynl_attr_put_{put_type}(nlh, {self.enum_name}, {var}->{self.c_name}[i]);") elif 'type' not in self.attr or self.attr['type'] == 'nest': ri.cw.p(f"for (unsigned int i = 0; i < {var}->n_{self.c_name}; i++)") self._attr_put_line(ri, var, f"{self.nested_render_name}_put(nlh, " + @@ -690,8 +675,8 @@ class TypeNestTypeValue(Type): local_vars += [f'__u32 {", ".join(tv_names)};'] for level in self.attr["type-value"]: level = c_lower(level) - get_lines += [f'attr_{level} = mnl_attr_get_payload({prev});'] - get_lines += [f'{level} = mnl_attr_get_type(attr_{level});'] + get_lines += [f'attr_{level} = ynl_attr_data({prev});'] + get_lines += [f'{level} = ynl_attr_type(attr_{level});'] prev = 'attr_' + level tv_args = f", {', '.join(tv_names)}" @@ -1612,12 +1597,12 @@ def put_req_nested(ri, struct): ri.cw.block_start() ri.cw.write_func_lvar('struct nlattr *nest;') - ri.cw.p("nest = mnl_attr_nest_start(nlh, attr_type);") + ri.cw.p("nest = ynl_attr_nest_start(nlh, attr_type);") for _, arg in struct.member_list(): arg.attr_put(ri, "obj") - ri.cw.p("mnl_attr_nest_end(nlh, nest);") + ri.cw.p("ynl_attr_nest_end(nlh, nest);") ri.cw.nl() ri.cw.p('return 0;') @@ -1674,7 +1659,7 @@ def _multi_parse(ri, struct, init_lines, local_vars): ri.cw.nl() ri.cw.block_start(line=iter_line) - ri.cw.p('unsigned int type = mnl_attr_get_type(attr);') + ri.cw.p('unsigned int type = ynl_attr_type(attr);') ri.cw.nl() first = True @@ -1696,7 +1681,7 @@ def _multi_parse(ri, struct, init_lines, local_vars): ri.cw.p(f"parg.rsp_policy = &{aspec.nested_render_name}_nest;") ri.cw.block_start(line=f"mnl_attr_for_each_nested(attr, attr_{aspec.c_name})") ri.cw.p(f"parg.data = &dst->{aspec.c_name}[i];") - ri.cw.p(f"if ({aspec.nested_render_name}_parse(&parg, attr, mnl_attr_get_type(attr)))") + ri.cw.p(f"if ({aspec.nested_render_name}_parse(&parg, attr, ynl_attr_type(attr)))") ri.cw.p('return MNL_CB_ERROR;') ri.cw.p('i++;') ri.cw.block_end() @@ -1712,13 +1697,13 @@ def _multi_parse(ri, struct, init_lines, local_vars): if 'nested-attributes' in aspec: ri.cw.p(f"parg.rsp_policy = &{aspec.nested_render_name}_nest;") ri.cw.block_start(line=iter_line) - ri.cw.block_start(line=f"if (mnl_attr_get_type(attr) == {aspec.enum_name})") + ri.cw.block_start(line=f"if (ynl_attr_type(attr) == {aspec.enum_name})") if 'nested-attributes' in aspec: ri.cw.p(f"parg.data = &dst->{aspec.c_name}[i];") ri.cw.p(f"if ({aspec.nested_render_name}_parse(&parg, attr))") ri.cw.p('return MNL_CB_ERROR;') elif aspec.type in scalars: - ri.cw.p(f"dst->{aspec.c_name}[i] = mnl_attr_get_{aspec.mnl_type()}(attr);") + ri.cw.p(f"dst->{aspec.c_name}[i] = ynl_attr_get_{aspec.type}(attr);") else: raise Exception('Nest parsing type not supported yet') ri.cw.p('i++;') -- cgit v1.2.3 From 66fcdad0884223eb9dfa9b76161dca7917e6e03b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 27 Feb 2024 14:30:20 -0800 Subject: tools: ynl: create local for_each helpers Create ynl_attr_for_each*() iteration helpers. Use them instead of the mnl ones. Acked-by: Nicolas Dichtel Link: https://lore.kernel.org/r/20240227223032.1835527-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl-priv.h | 47 ++++++++++++++++++++++++++++++++++++++++++++ tools/net/ynl/lib/ynl.c | 12 +++++------ tools/net/ynl/ynl-gen-c.py | 8 ++++---- 3 files changed, 57 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h index d42d44a77dcb..a339732450c3 100644 --- a/tools/net/ynl/lib/ynl-priv.h +++ b/tools/net/ynl/lib/ynl-priv.h @@ -147,6 +147,53 @@ static inline void *ynl_attr_data(const struct nlattr *attr) return (unsigned char *)attr + NLA_HDRLEN; } +static inline void *ynl_attr_data_end(const struct nlattr *attr) +{ + return ynl_attr_data(attr) + ynl_attr_data_len(attr); +} + +#define ynl_attr_for_each(attr, nlh, fixed_hdr_sz) \ + for ((attr) = ynl_attr_first(nlh, (nlh)->nlmsg_len, \ + NLMSG_HDRLEN + fixed_hdr_sz); attr; \ + (attr) = ynl_attr_next(ynl_nlmsg_end_addr(nlh), attr)) + +#define ynl_attr_for_each_nested(attr, outer) \ + for ((attr) = ynl_attr_first(outer, outer->nla_len, \ + sizeof(struct nlattr)); attr; \ + (attr) = ynl_attr_next(ynl_attr_data_end(outer), attr)) + +#define ynl_attr_for_each_payload(start, len, attr) \ + for ((attr) = ynl_attr_first(start, len, 0); attr; \ + (attr) = ynl_attr_next(start + len, attr)) + +static inline struct nlattr * +ynl_attr_if_good(const void *end, struct nlattr *attr) +{ + if (attr + 1 > (const struct nlattr *)end) + return NULL; + if (ynl_attr_data_end(attr) > end) + return NULL; + return attr; +} + +static inline struct nlattr * +ynl_attr_next(const void *end, const struct nlattr *prev) +{ + struct nlattr *attr; + + attr = (void *)((char *)prev + NLA_ALIGN(prev->nla_len)); + return ynl_attr_if_good(end, attr); +} + +static inline struct nlattr * +ynl_attr_first(const void *start, size_t len, size_t skip) +{ + struct nlattr *attr; + + attr = (void *)((char *)start + NLMSG_ALIGN(skip)); + return ynl_attr_if_good(start + len, attr); +} + static inline struct nlattr * ynl_attr_nest_start(struct nlmsghdr *nlh, unsigned int attr_type) { diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index b9ed587af676..0f71e69b70c0 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -92,7 +92,7 @@ ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off, data_len = end - start; - mnl_attr_for_each_payload(start, data_len) { + ynl_attr_for_each_payload(start, data_len, attr) { astart_off = (char *)attr - (char *)start; aend_off = astart_off + ynl_attr_data_len(attr); if (aend_off <= off) @@ -150,7 +150,7 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh, return MNL_CB_OK; } - mnl_attr_for_each(attr, nlh, hlen) { + ynl_attr_for_each(attr, nlh, hlen) { unsigned int len, type; len = ynl_attr_data_len(attr); @@ -500,7 +500,7 @@ ynl_get_family_info_mcast(struct ynl_sock *ys, const struct nlattr *mcasts) const struct nlattr *entry, *attr; unsigned int i; - mnl_attr_for_each_nested(attr, mcasts) + ynl_attr_for_each_nested(attr, mcasts) ys->n_mcast_groups++; if (!ys->n_mcast_groups) @@ -512,8 +512,8 @@ ynl_get_family_info_mcast(struct ynl_sock *ys, const struct nlattr *mcasts) return MNL_CB_ERROR; i = 0; - mnl_attr_for_each_nested(entry, mcasts) { - mnl_attr_for_each_nested(attr, entry) { + ynl_attr_for_each_nested(entry, mcasts) { + ynl_attr_for_each_nested(attr, entry) { if (ynl_attr_type(attr) == CTRL_ATTR_MCAST_GRP_ID) ys->mcast_groups[i].id = ynl_attr_get_u32(attr); if (ynl_attr_type(attr) == CTRL_ATTR_MCAST_GRP_NAME) { @@ -535,7 +535,7 @@ static int ynl_get_family_info_cb(const struct nlmsghdr *nlh, void *data) const struct nlattr *attr; bool found_id = true; - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { + ynl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { if (ynl_attr_type(attr) == CTRL_ATTR_MCAST_GROUPS) if (ynl_get_family_info_mcast(ys, attr)) return MNL_CB_ERROR; diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 99a3eb7b158b..eabce06f03d9 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -650,7 +650,7 @@ class TypeArrayNest(Type): def _attr_get(self, ri, var): local_vars = ['const struct nlattr *attr2;'] get_lines = [f'attr_{self.c_name} = attr;', - 'mnl_attr_for_each_nested(attr2, attr)', + 'ynl_attr_for_each_nested(attr2, attr)', f'\t{var}->n_{self.c_name}++;'] return get_lines, None, local_vars @@ -1612,11 +1612,11 @@ def put_req_nested(ri, struct): def _multi_parse(ri, struct, init_lines, local_vars): if struct.nested: - iter_line = "mnl_attr_for_each_nested(attr, nested)" + iter_line = "ynl_attr_for_each_nested(attr, nested)" else: if ri.fixed_hdr: local_vars += ['void *hdr;'] - iter_line = "mnl_attr_for_each(attr, nlh, yarg->ys->family->hdr_len)" + iter_line = "ynl_attr_for_each(attr, nlh, yarg->ys->family->hdr_len)" array_nests = set() multi_attrs = set() @@ -1679,7 +1679,7 @@ def _multi_parse(ri, struct, init_lines, local_vars): ri.cw.p(f"dst->n_{aspec.c_name} = n_{aspec.c_name};") ri.cw.p('i = 0;') ri.cw.p(f"parg.rsp_policy = &{aspec.nested_render_name}_nest;") - ri.cw.block_start(line=f"mnl_attr_for_each_nested(attr, attr_{aspec.c_name})") + ri.cw.block_start(line=f"ynl_attr_for_each_nested(attr, attr_{aspec.c_name})") ri.cw.p(f"parg.data = &dst->{aspec.c_name}[i];") ri.cw.p(f"if ({aspec.nested_render_name}_parse(&parg, attr, ynl_attr_type(attr)))") ri.cw.p('return MNL_CB_ERROR;') -- cgit v1.2.3 From 0b3ece44220887e7cf1e7469867fdd8ce9986c16 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 27 Feb 2024 14:30:21 -0800 Subject: tools: ynl: create local nlmsg access helpers Create helpers for accessing payloads of struct nlmsg. Use them instead of the libmnl ones. Acked-by: Nicolas Dichtel Link: https://lore.kernel.org/r/20240227223032.1835527-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl-priv.h | 39 ++++++++++++++++++++++++++++++++++++++- tools/net/ynl/lib/ynl.c | 24 +++++++++++------------- tools/net/ynl/ynl-gen-c.py | 6 +++--- 3 files changed, 52 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h index a339732450c3..1dfa09497be8 100644 --- a/tools/net/ynl/lib/ynl-priv.h +++ b/tools/net/ynl/lib/ynl-priv.h @@ -125,13 +125,50 @@ int ynl_exec_dump(struct ynl_sock *ys, struct nlmsghdr *req_nlh, void ynl_error_unknown_notification(struct ynl_sock *ys, __u8 cmd); int ynl_error_parse(struct ynl_parse_arg *yarg, const char *msg); -/* Attribute helpers */ +/* Netlink message handling helpers */ + +static inline struct nlmsghdr *ynl_nlmsg_put_header(void *buf) +{ + struct nlmsghdr *nlh = buf; + + memset(nlh, 0, sizeof(*nlh)); + nlh->nlmsg_len = NLMSG_HDRLEN; + + return nlh; +} + +static inline unsigned int ynl_nlmsg_data_len(const struct nlmsghdr *nlh) +{ + return nlh->nlmsg_len - NLMSG_HDRLEN; +} + +static inline void *ynl_nlmsg_data(const struct nlmsghdr *nlh) +{ + return (unsigned char *)nlh + NLMSG_HDRLEN; +} + +static inline void * +ynl_nlmsg_data_offset(const struct nlmsghdr *nlh, unsigned int offset) +{ + return (unsigned char *)nlh + NLMSG_HDRLEN + offset; +} static inline void *ynl_nlmsg_end_addr(const struct nlmsghdr *nlh) { return (char *)nlh + nlh->nlmsg_len; } +static inline void * +ynl_nlmsg_put_extra_header(struct nlmsghdr *nlh, unsigned int size) +{ + void *tail = ynl_nlmsg_end_addr(nlh); + + nlh->nlmsg_len += NLMSG_ALIGN(size); + return tail; +} + +/* Netlink attribute helpers */ + static inline unsigned int ynl_attr_type(const struct nlattr *attr) { return attr->nla_type & NLA_TYPE_MASK; diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index 0f71e69b70c0..5f303d6e751f 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -190,9 +190,8 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh, n = snprintf(bad_attr, sizeof(bad_attr), "%sbad attribute: ", str ? " (" : ""); - start = mnl_nlmsg_get_payload_offset(ys->nlh, - ys->family->hdr_len); - end = mnl_nlmsg_get_payload_tail(ys->nlh); + start = ynl_nlmsg_data_offset(ys->nlh, ys->family->hdr_len); + end = ynl_nlmsg_end_addr(ys->nlh); off = ys->err.attr_offs; off -= sizeof(struct nlmsghdr); @@ -216,9 +215,8 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh, n = snprintf(miss_attr, sizeof(miss_attr), "%smissing attribute: ", bad_attr[0] ? ", " : (str ? " (" : "")); - start = mnl_nlmsg_get_payload_offset(ys->nlh, - ys->family->hdr_len); - end = mnl_nlmsg_get_payload_tail(ys->nlh); + start = ynl_nlmsg_data_offset(ys->nlh, ys->family->hdr_len); + end = ynl_nlmsg_end_addr(ys->nlh); nest_pol = ys->req_policy; if (tb[NLMSGERR_ATTR_MISS_NEST]) { @@ -259,7 +257,7 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh, static int ynl_cb_error(const struct nlmsghdr *nlh, void *data) { - const struct nlmsgerr *err = mnl_nlmsg_get_payload(nlh); + const struct nlmsgerr *err = ynl_nlmsg_data(nlh); struct ynl_parse_arg *yarg = data; unsigned int hlen; int code; @@ -270,7 +268,7 @@ static int ynl_cb_error(const struct nlmsghdr *nlh, void *data) hlen = sizeof(*err); if (!(nlh->nlmsg_flags & NLM_F_CAPPED)) - hlen += mnl_nlmsg_get_payload_len(&err->msg); + hlen += ynl_nlmsg_data_len(&err->msg); ynl_ext_ack_check(yarg->ys, nlh, hlen); @@ -413,7 +411,7 @@ struct nlmsghdr *ynl_msg_start(struct ynl_sock *ys, __u32 id, __u16 flags) ynl_err_reset(ys); - nlh = ys->nlh = mnl_nlmsg_put_header(ys->tx_buf); + nlh = ys->nlh = ynl_nlmsg_put_header(ys->tx_buf); nlh->nlmsg_type = id; nlh->nlmsg_flags = flags; nlh->nlmsg_seq = ++ys->seq; @@ -435,7 +433,7 @@ ynl_gemsg_start(struct ynl_sock *ys, __u32 id, __u16 flags, gehdr.cmd = cmd; gehdr.version = version; - data = mnl_nlmsg_put_extra_header(nlh, sizeof(gehdr)); + data = ynl_nlmsg_put_extra_header(nlh, sizeof(gehdr)); memcpy(data, &gehdr, sizeof(gehdr)); return nlh; @@ -718,7 +716,7 @@ static int ynl_ntf_parse(struct ynl_sock *ys, const struct nlmsghdr *nlh) struct genlmsghdr *gehdr; int ret; - gehdr = mnl_nlmsg_get_payload(nlh); + gehdr = ynl_nlmsg_data(nlh); if (gehdr->cmd >= ys->family->ntf_info_size) return MNL_CB_ERROR; info = &ys->family->ntf_info[gehdr->cmd]; @@ -808,13 +806,13 @@ ynl_check_alien(struct ynl_sock *ys, const struct nlmsghdr *nlh, __u32 rsp_cmd) { struct genlmsghdr *gehdr; - if (mnl_nlmsg_get_payload_len(nlh) < sizeof(*gehdr)) { + if (ynl_nlmsg_data_len(nlh) < sizeof(*gehdr)) { yerr(ys, YNL_ERROR_INV_RESP, "Kernel responded with truncated message"); return -1; } - gehdr = mnl_nlmsg_get_payload(nlh); + gehdr = ynl_nlmsg_data(nlh); if (gehdr->cmd != rsp_cmd) return ynl_ntf_parse(ys, nlh); diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index eabce06f03d9..90d7bf4849fc 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -1650,7 +1650,7 @@ def _multi_parse(ri, struct, init_lines, local_vars): ri.cw.p(f'dst->{arg} = {arg};') if ri.fixed_hdr: - ri.cw.p('hdr = mnl_nlmsg_get_payload_offset(nlh, sizeof(struct genlmsghdr));') + ri.cw.p('hdr = ynl_nlmsg_data_offset(nlh, sizeof(struct genlmsghdr));') ri.cw.p(f"memcpy(&dst->_hdr, hdr, sizeof({ri.fixed_hdr}));") for anest in sorted(all_multi): aspec = struct[anest] @@ -1794,7 +1794,7 @@ def print_req(ri): if ri.fixed_hdr: ri.cw.p("hdr_len = sizeof(req->_hdr);") - ri.cw.p("hdr = mnl_nlmsg_put_extra_header(nlh, hdr_len);") + ri.cw.p("hdr = ynl_nlmsg_put_extra_header(nlh, hdr_len);") ri.cw.p("memcpy(hdr, &req->_hdr, hdr_len);") ri.cw.nl() @@ -1857,7 +1857,7 @@ def print_dump(ri): if ri.fixed_hdr: ri.cw.p("hdr_len = sizeof(req->_hdr);") - ri.cw.p("hdr = mnl_nlmsg_put_extra_header(nlh, hdr_len);") + ri.cw.p("hdr = ynl_nlmsg_put_extra_header(nlh, hdr_len);") ri.cw.p("memcpy(hdr, &req->_hdr, hdr_len);") ri.cw.nl() -- cgit v1.2.3 From 7600875f295f4591eebbb3744ad5de3b4f8e4117 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 27 Feb 2024 14:30:22 -0800 Subject: tools: ynl: create local ARRAY_SIZE() helper libc doesn't have an ARRAY_SIZE() create one locally. Acked-by: Nicolas Dichtel Link: https://lore.kernel.org/r/20240227223032.1835527-6-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl-priv.h | 3 +++ tools/net/ynl/ynl-gen-c.py | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h index 1dfa09497be8..7f24d07692bf 100644 --- a/tools/net/ynl/lib/ynl-priv.h +++ b/tools/net/ynl/lib/ynl-priv.h @@ -27,6 +27,9 @@ enum ynl_policy_type { YNL_PT_BITFIELD32, }; +#define YNL_ARRAY_SIZE(array) (sizeof(array) ? \ + sizeof(array) / sizeof(array[0]) : 0) + struct ynl_policy_attr { enum ynl_policy_type type; unsigned int len; diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 90d7bf4849fc..407902b903e0 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -1535,7 +1535,7 @@ def _put_enum_to_str_helper(cw, render_name, map_name, arg_name, enum=None): cw.block_start() if enum and enum.type == 'flags': cw.p(f'{arg_name} = ffs({arg_name}) - 1;') - cw.p(f'if ({arg_name} < 0 || {arg_name} >= (int)MNL_ARRAY_SIZE({map_name}))') + cw.p(f'if ({arg_name} < 0 || {arg_name} >= (int)YNL_ARRAY_SIZE({map_name}))') cw.p('return NULL;') cw.p(f'return {map_name}[{arg_name}];') cw.block_end() @@ -2569,7 +2569,7 @@ def render_user_family(family, cw, prototype): cw.p('.hdr_len\t= sizeof(struct genlmsghdr),') if family.ntfs: cw.p(f".ntf_info\t= {family['name']}_ntf_info,") - cw.p(f".ntf_info_size\t= MNL_ARRAY_SIZE({family['name']}_ntf_info),") + cw.p(f".ntf_info_size\t= YNL_ARRAY_SIZE({family['name']}_ntf_info),") cw.block_end(line=';') -- cgit v1.2.3 From d62c5d487cfe41b2d47e790acc72a062308fb0f0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 27 Feb 2024 14:30:23 -0800 Subject: tools: ynl: make yarg the first member of struct ynl_dump_state All YNL parsing code expects a pointer to struct ynl_parse_arg AKA yarg. For dump was pass in struct ynl_dump_state, which works fine, because struct ynl_dump_state and struct ynl_parse_arg have identical layout for the members that matter.. but it's a bit hacky. Acked-by: Nicolas Dichtel Link: https://lore.kernel.org/r/20240227223032.1835527-7-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl-priv.h | 3 +-- tools/net/ynl/lib/ynl.c | 5 ++--- tools/net/ynl/ynl-gen-c.py | 5 +++-- 3 files changed, 6 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h index 7f24d07692bf..c44b53c8d084 100644 --- a/tools/net/ynl/lib/ynl-priv.h +++ b/tools/net/ynl/lib/ynl-priv.h @@ -104,8 +104,7 @@ struct ynl_req_state { }; struct ynl_dump_state { - struct ynl_sock *ys; - struct ynl_policy_nest *rsp_policy; + struct ynl_parse_arg yarg; void *first; struct ynl_dump_list_type *last; size_t alloc_sz; diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index 5f303d6e751f..88456c7bb1ec 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -864,7 +864,7 @@ static int ynl_dump_trampoline(const struct nlmsghdr *nlh, void *data) struct ynl_parse_arg yarg = {}; int ret; - ret = ynl_check_alien(ds->ys, nlh, ds->rsp_cmd); + ret = ynl_check_alien(ds->yarg.ys, nlh, ds->rsp_cmd); if (ret) return ret < 0 ? MNL_CB_ERROR : MNL_CB_OK; @@ -878,8 +878,7 @@ static int ynl_dump_trampoline(const struct nlmsghdr *nlh, void *data) ds->last->next = obj; ds->last = obj; - yarg.ys = ds->ys; - yarg.rsp_policy = ds->rsp_policy; + yarg = ds->yarg; yarg.data = &obj->data; return ds->cb(nlh, &yarg); diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 407902b903e0..6f57c9f00e7a 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -1844,14 +1844,15 @@ def print_dump(ri): ri.cw.write_func_lvar(local_vars) - ri.cw.p('yds.ys = ys;') + ri.cw.p('yds.yarg.ys = ys;') + ri.cw.p(f"yds.yarg.rsp_policy = &{ri.struct['reply'].render_name}_nest;") + ri.cw.p("yds.yarg.data = NULL;") ri.cw.p(f"yds.alloc_sz = sizeof({type_name(ri, rdir(direction))});") ri.cw.p(f"yds.cb = {op_prefix(ri, 'reply', deref=True)}_parse;") if ri.op.value is not None: ri.cw.p(f'yds.rsp_cmd = {ri.op.enum_name};') else: ri.cw.p(f'yds.rsp_cmd = {ri.op.rsp_value};') - ri.cw.p(f"yds.rsp_policy = &{ri.struct['reply'].render_name}_nest;") ri.cw.nl() ri.cw.p(f"nlh = ynl_gemsg_start_dump(ys, {ri.nl.get_family_id()}, {ri.op.enum_name}, 1);") -- cgit v1.2.3 From 9c29a113165fc473e6f91f40e59ece94d287f95d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 27 Feb 2024 14:30:24 -0800 Subject: tools: ynl-gen: remove unused parse code Commit f2ba1e5e2208 ("tools: ynl-gen: stop generating common notification handlers") removed the last caller of the parse_cb_run() helper. We no longer need to export ynl_cb_array. Acked-by: Nicolas Dichtel Link: https://lore.kernel.org/r/20240227223032.1835527-8-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl-priv.h | 2 -- tools/net/ynl/lib/ynl.c | 2 +- tools/net/ynl/ynl-gen-c.py | 8 -------- 3 files changed, 1 insertion(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h index c44b53c8d084..ef16fcbd9f68 100644 --- a/tools/net/ynl/lib/ynl-priv.h +++ b/tools/net/ynl/lib/ynl-priv.h @@ -83,8 +83,6 @@ struct ynl_ntf_base_type { unsigned char data[] __attribute__((aligned(8))); }; -extern mnl_cb_t ynl_cb_array[NLMSG_MIN_TYPE]; - struct nlmsghdr * ynl_gemsg_start_req(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version); struct nlmsghdr * diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index 88456c7bb1ec..ad77ce6a1128 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -297,7 +297,7 @@ static int ynl_cb_noop(const struct nlmsghdr *nlh, void *data) return MNL_CB_OK; } -mnl_cb_t ynl_cb_array[NLMSG_MIN_TYPE] = { +static mnl_cb_t ynl_cb_array[NLMSG_MIN_TYPE] = { [NLMSG_NOOP] = ynl_cb_noop, [NLMSG_ERROR] = ynl_cb_error, [NLMSG_DONE] = ynl_cb_done, diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 6f57c9f00e7a..289a04f2cfaa 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -40,14 +40,6 @@ class BaseNlLib: def get_family_id(self): return 'ys->family_id' - def parse_cb_run(self, cb, data, is_dump=False, indent=1): - ind = '\n\t\t' + '\t' * indent + ' ' - if is_dump: - return f"mnl_cb_run2(ys->rx_buf, len, 0, 0, {cb}, {data},{ind}ynl_cb_array, NLMSG_MIN_TYPE)" - else: - return f"mnl_cb_run2(ys->rx_buf, len, ys->seq, ys->portid,{ind}{cb}, {data},{ind}" + \ - "ynl_cb_array, NLMSG_MIN_TYPE)" - class Type(SpecAttr): def __init__(self, family, attr_set, attr, value): -- cgit v1.2.3 From 2f22f0b313f4c11e524c68e34165e62d8276e442 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 27 Feb 2024 14:30:25 -0800 Subject: tools: ynl: wrap recv() + mnl_cb_run2() into a single helper All callers to mnl_cb_run2() call mnl_socket_recvfrom() right before. Wrap the two in a helper, take typed arguments (struct ynl_parse_arg), instead of hoping that all callers remember that parser error handling requires yarg. In case of ynl_sock_read_family() we will no longer check for kernel returning no data, but that would be a kernel bug, not worth complicating the code to catch this. Calling mnl_cb_run2() on an empty buffer is legal and results in STOP (1). Acked-by: Nicolas Dichtel Link: https://lore.kernel.org/r/20240227223032.1835527-9-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl.c | 56 ++++++++++++++++--------------------------------- 1 file changed, 18 insertions(+), 38 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index ad77ce6a1128..7c0f404526a0 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -491,6 +491,19 @@ int ynl_cb_null(const struct nlmsghdr *nlh, void *data) return MNL_CB_ERROR; } +static int ynl_sock_read_msgs(struct ynl_parse_arg *yarg, mnl_cb_t cb) +{ + struct ynl_sock *ys = yarg->ys; + ssize_t len; + + len = mnl_socket_recvfrom(ys->sock, ys->rx_buf, MNL_SOCKET_BUFFER_SIZE); + if (len < 0) + return len; + + return mnl_cb_run2(ys->rx_buf, len, ys->seq, ys->portid, + cb, yarg, ynl_cb_array, NLMSG_MIN_TYPE); +} + /* Init/fini and genetlink boiler plate */ static int ynl_get_family_info_mcast(struct ynl_sock *ys, const struct nlattr *mcasts) @@ -572,14 +585,7 @@ static int ynl_sock_read_family(struct ynl_sock *ys, const char *family_name) return err; } - err = mnl_socket_recvfrom(ys->sock, ys->rx_buf, MNL_SOCKET_BUFFER_SIZE); - if (err <= 0) { - perr(ys, "failed to receive the socket family info"); - return err; - } - err = mnl_cb_run2(ys->rx_buf, err, ys->seq, ys->portid, - ynl_get_family_info_cb, &yarg, - ynl_cb_array, ARRAY_SIZE(ynl_cb_array)); + err = ynl_sock_read_msgs(&yarg, ynl_get_family_info_cb); if (err < 0) { free(ys->mcast_groups); perr(ys, "failed to receive the socket family info - no such family?"); @@ -755,7 +761,6 @@ static int ynl_ntf_trampoline(const struct nlmsghdr *nlh, void *data) int ynl_ntf_check(struct ynl_sock *ys) { struct ynl_parse_arg yarg = { .ys = ys, }; - ssize_t len; int err; do { @@ -770,14 +775,7 @@ int ynl_ntf_check(struct ynl_sock *ys) if (err < 1) return err; - len = mnl_socket_recvfrom(ys->sock, ys->rx_buf, - MNL_SOCKET_BUFFER_SIZE); - if (len < 0) - return len; - - err = mnl_cb_run2(ys->rx_buf, len, ys->seq, ys->portid, - ynl_ntf_trampoline, &yarg, - ynl_cb_array, NLMSG_MIN_TYPE); + err = ynl_sock_read_msgs(&yarg, ynl_ntf_trampoline); if (err < 0) return err; } while (err > 0); @@ -834,7 +832,6 @@ static int ynl_req_trampoline(const struct nlmsghdr *nlh, void *data) int ynl_exec(struct ynl_sock *ys, struct nlmsghdr *req_nlh, struct ynl_req_state *yrs) { - ssize_t len; int err; err = mnl_socket_sendto(ys->sock, req_nlh, req_nlh->nlmsg_len); @@ -842,19 +839,10 @@ int ynl_exec(struct ynl_sock *ys, struct nlmsghdr *req_nlh, return err; do { - len = mnl_socket_recvfrom(ys->sock, ys->rx_buf, - MNL_SOCKET_BUFFER_SIZE); - if (len < 0) - return len; - - err = mnl_cb_run2(ys->rx_buf, len, ys->seq, ys->portid, - ynl_req_trampoline, yrs, - ynl_cb_array, NLMSG_MIN_TYPE); - if (err < 0) - return err; + err = ynl_sock_read_msgs(&yrs->yarg, ynl_req_trampoline); } while (err > 0); - return 0; + return err; } static int ynl_dump_trampoline(const struct nlmsghdr *nlh, void *data) @@ -896,7 +884,6 @@ static void *ynl_dump_end(struct ynl_dump_state *ds) int ynl_exec_dump(struct ynl_sock *ys, struct nlmsghdr *req_nlh, struct ynl_dump_state *yds) { - ssize_t len; int err; err = mnl_socket_sendto(ys->sock, req_nlh, req_nlh->nlmsg_len); @@ -904,14 +891,7 @@ int ynl_exec_dump(struct ynl_sock *ys, struct nlmsghdr *req_nlh, return err; do { - len = mnl_socket_recvfrom(ys->sock, ys->rx_buf, - MNL_SOCKET_BUFFER_SIZE); - if (len < 0) - goto err_close_list; - - err = mnl_cb_run2(ys->rx_buf, len, ys->seq, ys->portid, - ynl_dump_trampoline, yds, - ynl_cb_array, NLMSG_MIN_TYPE); + err = ynl_sock_read_msgs(&yds->yarg, ynl_dump_trampoline); if (err < 0) goto err_close_list; } while (err > 0); -- cgit v1.2.3 From 1621378aab19e6197fe95dc76d1825ef91d40e49 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 27 Feb 2024 14:30:26 -0800 Subject: tools: ynl: use ynl_sock_read_msgs() for ACK handling ynl_recv_ack() is simple and it's the only user of mnl_cb_run(). Now that ynl_sock_read_msgs() exists it's actually less code to use ynl_sock_read_msgs() instead of being special. Acked-by: Nicolas Dichtel Link: https://lore.kernel.org/r/20240227223032.1835527-10-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl-priv.h | 3 --- tools/net/ynl/lib/ynl.c | 34 ++++++++++++++-------------------- 2 files changed, 14 insertions(+), 23 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h index ef16fcbd9f68..42f7d29fbee0 100644 --- a/tools/net/ynl/lib/ynl-priv.h +++ b/tools/net/ynl/lib/ynl-priv.h @@ -90,9 +90,6 @@ ynl_gemsg_start_dump(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version); int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr); -int ynl_recv_ack(struct ynl_sock *ys, int ret); -int ynl_cb_null(const struct nlmsghdr *nlh, void *data); - /* YNL specific helpers used by the auto-generated code */ struct ynl_req_state { diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index 7c0f404526a0..f830990b3f4a 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -462,26 +462,7 @@ ynl_gemsg_start_dump(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version) cmd, version); } -int ynl_recv_ack(struct ynl_sock *ys, int ret) -{ - struct ynl_parse_arg yarg = { .ys = ys, }; - - if (!ret) { - yerr(ys, YNL_ERROR_EXPECT_ACK, - "Expecting an ACK but nothing received"); - return -1; - } - - ret = mnl_socket_recvfrom(ys->sock, ys->rx_buf, MNL_SOCKET_BUFFER_SIZE); - if (ret < 0) { - perr(ys, "Socket receive failed"); - return ret; - } - return mnl_cb_run(ys->rx_buf, ret, ys->seq, ys->portid, - ynl_cb_null, &yarg); -} - -int ynl_cb_null(const struct nlmsghdr *nlh, void *data) +static int ynl_cb_null(const struct nlmsghdr *nlh, void *data) { struct ynl_parse_arg *yarg = data; @@ -504,6 +485,19 @@ static int ynl_sock_read_msgs(struct ynl_parse_arg *yarg, mnl_cb_t cb) cb, yarg, ynl_cb_array, NLMSG_MIN_TYPE); } +static int ynl_recv_ack(struct ynl_sock *ys, int ret) +{ + struct ynl_parse_arg yarg = { .ys = ys, }; + + if (!ret) { + yerr(ys, YNL_ERROR_EXPECT_ACK, + "Expecting an ACK but nothing received"); + return -1; + } + + return ynl_sock_read_msgs(&yarg, ynl_cb_null); +} + /* Init/fini and genetlink boiler plate */ static int ynl_get_family_info_mcast(struct ynl_sock *ys, const struct nlattr *mcasts) -- cgit v1.2.3 From 766c4b5460f4edf3fd51ef8696f89fa207bf95f8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 27 Feb 2024 14:30:27 -0800 Subject: tools: ynl: stop using mnl_cb_run2() There's only one set of callbacks in YNL, for netlink control messages, and most of them are trivial. So implement the message walking directly without depending on mnl_cb_run2(). Acked-by: Nicolas Dichtel Link: https://lore.kernel.org/r/20240227223032.1835527-11-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl.c | 63 ++++++++++++++++++++++++++++++++++--------------- tools/net/ynl/lib/ynl.h | 1 + 2 files changed, 45 insertions(+), 19 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index f830990b3f4a..fd658aaff345 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -255,10 +255,10 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh, return MNL_CB_OK; } -static int ynl_cb_error(const struct nlmsghdr *nlh, void *data) +static int +ynl_cb_error(const struct nlmsghdr *nlh, struct ynl_parse_arg *yarg) { const struct nlmsgerr *err = ynl_nlmsg_data(nlh); - struct ynl_parse_arg *yarg = data; unsigned int hlen; int code; @@ -275,9 +275,8 @@ static int ynl_cb_error(const struct nlmsghdr *nlh, void *data) return code ? MNL_CB_ERROR : MNL_CB_STOP; } -static int ynl_cb_done(const struct nlmsghdr *nlh, void *data) +static int ynl_cb_done(const struct nlmsghdr *nlh, struct ynl_parse_arg *yarg) { - struct ynl_parse_arg *yarg = data; int err; err = *(int *)NLMSG_DATA(nlh); @@ -292,18 +291,6 @@ static int ynl_cb_done(const struct nlmsghdr *nlh, void *data) return MNL_CB_STOP; } -static int ynl_cb_noop(const struct nlmsghdr *nlh, void *data) -{ - return MNL_CB_OK; -} - -static mnl_cb_t ynl_cb_array[NLMSG_MIN_TYPE] = { - [NLMSG_NOOP] = ynl_cb_noop, - [NLMSG_ERROR] = ynl_cb_error, - [NLMSG_DONE] = ynl_cb_done, - [NLMSG_OVERRUN] = ynl_cb_noop, -}; - /* Attribute validation */ int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr) @@ -475,14 +462,52 @@ static int ynl_cb_null(const struct nlmsghdr *nlh, void *data) static int ynl_sock_read_msgs(struct ynl_parse_arg *yarg, mnl_cb_t cb) { struct ynl_sock *ys = yarg->ys; - ssize_t len; + const struct nlmsghdr *nlh; + ssize_t len, rem; + int ret; len = mnl_socket_recvfrom(ys->sock, ys->rx_buf, MNL_SOCKET_BUFFER_SIZE); if (len < 0) return len; - return mnl_cb_run2(ys->rx_buf, len, ys->seq, ys->portid, - cb, yarg, ynl_cb_array, NLMSG_MIN_TYPE); + ret = MNL_CB_STOP; + for (rem = len; rem > 0; NLMSG_NEXT(nlh, rem)) { + nlh = (struct nlmsghdr *)&ys->rx_buf[len - rem]; + if (!NLMSG_OK(nlh, rem)) { + yerr(yarg->ys, YNL_ERROR_INV_RESP, + "Invalid message or trailing data in the response."); + return MNL_CB_ERROR; + } + + if (nlh->nlmsg_flags & NLM_F_DUMP_INTR) { + /* TODO: handle this better */ + yerr(yarg->ys, YNL_ERROR_DUMP_INTER, + "Dump interrupted / inconsistent, please retry."); + return MNL_CB_ERROR; + } + + switch (nlh->nlmsg_type) { + case 0: + yerr(yarg->ys, YNL_ERROR_INV_RESP, + "Invalid message type in the response."); + return MNL_CB_ERROR; + case NLMSG_NOOP: + case NLMSG_OVERRUN ... NLMSG_MIN_TYPE - 1: + ret = MNL_CB_OK; + break; + case NLMSG_ERROR: + ret = ynl_cb_error(nlh, yarg); + break; + case NLMSG_DONE: + ret = ynl_cb_done(nlh, yarg); + break; + default: + ret = cb(nlh, yarg); + break; + } + } + + return ret; } static int ynl_recv_ack(struct ynl_sock *ys, int ret) diff --git a/tools/net/ynl/lib/ynl.h b/tools/net/ynl/lib/ynl.h index ce77a6d76ce0..4849c142fce0 100644 --- a/tools/net/ynl/lib/ynl.h +++ b/tools/net/ynl/lib/ynl.h @@ -12,6 +12,7 @@ enum ynl_error_code { YNL_ERROR_NONE = 0, __YNL_ERRNO_END = 4096, YNL_ERROR_INTERNAL, + YNL_ERROR_DUMP_INTER, YNL_ERROR_EXPECT_ACK, YNL_ERROR_EXPECT_MSG, YNL_ERROR_UNEXPECT_MSG, -- cgit v1.2.3 From dd0973d71e1feb36fd396aea7094f89888be7e42 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 27 Feb 2024 14:30:28 -0800 Subject: tools: ynl: switch away from mnl_cb_t All YNL parsing callbacks take struct ynl_parse_arg as the argument. Make that official by using a local callback type instead of mnl_cb_t. Acked-by: Nicolas Dichtel Link: https://lore.kernel.org/r/20240227223032.1835527-12-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl-priv.h | 11 ++++++++--- tools/net/ynl/lib/ynl.c | 25 ++++++++++++------------- tools/net/ynl/ynl-gen-c.py | 3 +-- 3 files changed, 21 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h index 42f7d29fbee0..5c0bd90c116b 100644 --- a/tools/net/ynl/lib/ynl-priv.h +++ b/tools/net/ynl/lib/ynl-priv.h @@ -6,6 +6,8 @@ #include #include +struct ynl_parse_arg; + /* * YNL internals / low level stuff */ @@ -30,6 +32,9 @@ enum ynl_policy_type { #define YNL_ARRAY_SIZE(array) (sizeof(array) ? \ sizeof(array) / sizeof(array[0]) : 0) +typedef int (*ynl_parse_cb_t)(const struct nlmsghdr *nlh, + struct ynl_parse_arg *yarg); + struct ynl_policy_attr { enum ynl_policy_type type; unsigned int len; @@ -94,7 +99,7 @@ int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr); struct ynl_req_state { struct ynl_parse_arg yarg; - mnl_cb_t cb; + ynl_parse_cb_t cb; __u32 rsp_cmd; }; @@ -103,13 +108,13 @@ struct ynl_dump_state { void *first; struct ynl_dump_list_type *last; size_t alloc_sz; - mnl_cb_t cb; + ynl_parse_cb_t cb; __u32 rsp_cmd; }; struct ynl_ntf_info { struct ynl_policy_nest *policy; - mnl_cb_t cb; + ynl_parse_cb_t cb; size_t alloc_sz; void (*free)(struct ynl_ntf_base_type *ntf); }; diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index fd658aaff345..27fb596aa791 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -449,17 +449,15 @@ ynl_gemsg_start_dump(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version) cmd, version); } -static int ynl_cb_null(const struct nlmsghdr *nlh, void *data) +static int ynl_cb_null(const struct nlmsghdr *nlh, struct ynl_parse_arg *yarg) { - struct ynl_parse_arg *yarg = data; - yerr(yarg->ys, YNL_ERROR_UNEXPECT_MSG, "Received a message when none were expected"); return MNL_CB_ERROR; } -static int ynl_sock_read_msgs(struct ynl_parse_arg *yarg, mnl_cb_t cb) +static int ynl_sock_read_msgs(struct ynl_parse_arg *yarg, ynl_parse_cb_t cb) { struct ynl_sock *ys = yarg->ys; const struct nlmsghdr *nlh; @@ -558,9 +556,9 @@ ynl_get_family_info_mcast(struct ynl_sock *ys, const struct nlattr *mcasts) return 0; } -static int ynl_get_family_info_cb(const struct nlmsghdr *nlh, void *data) +static int +ynl_get_family_info_cb(const struct nlmsghdr *nlh, struct ynl_parse_arg *yarg) { - struct ynl_parse_arg *yarg = data; struct ynl_sock *ys = yarg->ys; const struct nlattr *attr; bool found_id = true; @@ -770,10 +768,9 @@ err_free: return MNL_CB_ERROR; } -static int ynl_ntf_trampoline(const struct nlmsghdr *nlh, void *data) +static int +ynl_ntf_trampoline(const struct nlmsghdr *nlh, struct ynl_parse_arg *yarg) { - struct ynl_parse_arg *yarg = data; - return ynl_ntf_parse(yarg->ys, nlh); } @@ -836,9 +833,10 @@ ynl_check_alien(struct ynl_sock *ys, const struct nlmsghdr *nlh, __u32 rsp_cmd) return 0; } -static int ynl_req_trampoline(const struct nlmsghdr *nlh, void *data) +static +int ynl_req_trampoline(const struct nlmsghdr *nlh, struct ynl_parse_arg *yarg) { - struct ynl_req_state *yrs = data; + struct ynl_req_state *yrs = (void *)yarg; int ret; ret = ynl_check_alien(yrs->yarg.ys, nlh, yrs->rsp_cmd); @@ -864,9 +862,10 @@ int ynl_exec(struct ynl_sock *ys, struct nlmsghdr *req_nlh, return err; } -static int ynl_dump_trampoline(const struct nlmsghdr *nlh, void *data) +static int +ynl_dump_trampoline(const struct nlmsghdr *nlh, struct ynl_parse_arg *data) { - struct ynl_dump_state *ds = data; + struct ynl_dump_state *ds = (void *)data; struct ynl_dump_list_type *obj; struct ynl_parse_arg yarg = {}; int ret; diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 289a04f2cfaa..15a9d3b2eed3 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -1737,10 +1737,9 @@ def parse_rsp_msg(ri, deref=False): return func_args = ['const struct nlmsghdr *nlh', - 'void *data'] + 'struct ynl_parse_arg *yarg'] local_vars = [f'{type_name(ri, "reply", deref=deref)} *dst;', - 'struct ynl_parse_arg *yarg = data;', 'const struct nlattr *attr;'] init_lines = ['dst = yarg->data;'] -- cgit v1.2.3 From 50042e8051fe6246e188b24f0b9bed7822582435 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 27 Feb 2024 14:30:29 -0800 Subject: tools: ynl: switch away from MNL_CB_* Create a local version of the MNL_CB_* parser control values. Acked-by: Nicolas Dichtel Link: https://lore.kernel.org/r/20240227223032.1835527-13-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl-priv.h | 6 +++++ tools/net/ynl/lib/ynl.c | 54 ++++++++++++++++++++++---------------------- tools/net/ynl/ynl-gen-c.py | 14 ++++++------ 3 files changed, 40 insertions(+), 34 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h index 5c0bd90c116b..a3a9367aacbe 100644 --- a/tools/net/ynl/lib/ynl-priv.h +++ b/tools/net/ynl/lib/ynl-priv.h @@ -29,6 +29,12 @@ enum ynl_policy_type { YNL_PT_BITFIELD32, }; +enum ynl_parse_result { + YNL_PARSE_CB_ERROR = -1, + YNL_PARSE_CB_STOP = 0, + YNL_PARSE_CB_OK = 1, +}; + #define YNL_ARRAY_SIZE(array) (sizeof(array) ? \ sizeof(array) / sizeof(array[0]) : 0) diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index 27fb596aa791..968ab679b5e3 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -147,7 +147,7 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh, if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS)) { yerr_msg(ys, "%s", strerror(ys->err.code)); - return MNL_CB_OK; + return YNL_PARSE_CB_OK; } ynl_attr_for_each(attr, nlh, hlen) { @@ -166,12 +166,12 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh, case NLMSGERR_ATTR_MISS_TYPE: case NLMSGERR_ATTR_MISS_NEST: if (len != sizeof(__u32)) - return MNL_CB_ERROR; + return YNL_PARSE_CB_ERROR; break; case NLMSGERR_ATTR_MSG: str = ynl_attr_get_str(attr); if (str[len - 1]) - return MNL_CB_ERROR; + return YNL_PARSE_CB_ERROR; break; default: break; @@ -252,7 +252,7 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh, else yerr_msg(ys, "%s", strerror(ys->err.code)); - return MNL_CB_OK; + return YNL_PARSE_CB_OK; } static int @@ -272,7 +272,7 @@ ynl_cb_error(const struct nlmsghdr *nlh, struct ynl_parse_arg *yarg) ynl_ext_ack_check(yarg->ys, nlh, hlen); - return code ? MNL_CB_ERROR : MNL_CB_STOP; + return code ? YNL_PARSE_CB_ERROR : YNL_PARSE_CB_STOP; } static int ynl_cb_done(const struct nlmsghdr *nlh, struct ynl_parse_arg *yarg) @@ -286,9 +286,9 @@ static int ynl_cb_done(const struct nlmsghdr *nlh, struct ynl_parse_arg *yarg) ynl_ext_ack_check(yarg->ys, nlh, sizeof(int)); - return MNL_CB_ERROR; + return YNL_PARSE_CB_ERROR; } - return MNL_CB_STOP; + return YNL_PARSE_CB_STOP; } /* Attribute validation */ @@ -454,7 +454,7 @@ static int ynl_cb_null(const struct nlmsghdr *nlh, struct ynl_parse_arg *yarg) yerr(yarg->ys, YNL_ERROR_UNEXPECT_MSG, "Received a message when none were expected"); - return MNL_CB_ERROR; + return YNL_PARSE_CB_ERROR; } static int ynl_sock_read_msgs(struct ynl_parse_arg *yarg, ynl_parse_cb_t cb) @@ -468,30 +468,30 @@ static int ynl_sock_read_msgs(struct ynl_parse_arg *yarg, ynl_parse_cb_t cb) if (len < 0) return len; - ret = MNL_CB_STOP; + ret = YNL_PARSE_CB_STOP; for (rem = len; rem > 0; NLMSG_NEXT(nlh, rem)) { nlh = (struct nlmsghdr *)&ys->rx_buf[len - rem]; if (!NLMSG_OK(nlh, rem)) { yerr(yarg->ys, YNL_ERROR_INV_RESP, "Invalid message or trailing data in the response."); - return MNL_CB_ERROR; + return YNL_PARSE_CB_ERROR; } if (nlh->nlmsg_flags & NLM_F_DUMP_INTR) { /* TODO: handle this better */ yerr(yarg->ys, YNL_ERROR_DUMP_INTER, "Dump interrupted / inconsistent, please retry."); - return MNL_CB_ERROR; + return YNL_PARSE_CB_ERROR; } switch (nlh->nlmsg_type) { case 0: yerr(yarg->ys, YNL_ERROR_INV_RESP, "Invalid message type in the response."); - return MNL_CB_ERROR; + return YNL_PARSE_CB_ERROR; case NLMSG_NOOP: case NLMSG_OVERRUN ... NLMSG_MIN_TYPE - 1: - ret = MNL_CB_OK; + ret = YNL_PARSE_CB_OK; break; case NLMSG_ERROR: ret = ynl_cb_error(nlh, yarg); @@ -537,7 +537,7 @@ ynl_get_family_info_mcast(struct ynl_sock *ys, const struct nlattr *mcasts) ys->mcast_groups = calloc(ys->n_mcast_groups, sizeof(*ys->mcast_groups)); if (!ys->mcast_groups) - return MNL_CB_ERROR; + return YNL_PARSE_CB_ERROR; i = 0; ynl_attr_for_each_nested(entry, mcasts) { @@ -566,14 +566,14 @@ ynl_get_family_info_cb(const struct nlmsghdr *nlh, struct ynl_parse_arg *yarg) ynl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { if (ynl_attr_type(attr) == CTRL_ATTR_MCAST_GROUPS) if (ynl_get_family_info_mcast(ys, attr)) - return MNL_CB_ERROR; + return YNL_PARSE_CB_ERROR; if (ynl_attr_type(attr) != CTRL_ATTR_FAMILY_ID) continue; if (ynl_attr_data_len(attr) != sizeof(__u16)) { yerr(ys, YNL_ERROR_ATTR_INVALID, "Invalid family ID"); - return MNL_CB_ERROR; + return YNL_PARSE_CB_ERROR; } ys->family_id = ynl_attr_get_u16(attr); @@ -582,9 +582,9 @@ ynl_get_family_info_cb(const struct nlmsghdr *nlh, struct ynl_parse_arg *yarg) if (!found_id) { yerr(ys, YNL_ERROR_ATTR_MISSING, "Family ID missing"); - return MNL_CB_ERROR; + return YNL_PARSE_CB_ERROR; } - return MNL_CB_OK; + return YNL_PARSE_CB_OK; } static int ynl_sock_read_family(struct ynl_sock *ys, const char *family_name) @@ -741,10 +741,10 @@ static int ynl_ntf_parse(struct ynl_sock *ys, const struct nlmsghdr *nlh) gehdr = ynl_nlmsg_data(nlh); if (gehdr->cmd >= ys->family->ntf_info_size) - return MNL_CB_ERROR; + return YNL_PARSE_CB_ERROR; info = &ys->family->ntf_info[gehdr->cmd]; if (!info->cb) - return MNL_CB_ERROR; + return YNL_PARSE_CB_ERROR; rsp = calloc(1, info->alloc_sz); rsp->free = info->free; @@ -752,7 +752,7 @@ static int ynl_ntf_parse(struct ynl_sock *ys, const struct nlmsghdr *nlh) yarg.rsp_policy = info->policy; ret = info->cb(nlh, &yarg); - if (ret <= MNL_CB_STOP) + if (ret <= YNL_PARSE_CB_STOP) goto err_free; rsp->family = nlh->nlmsg_type; @@ -761,11 +761,11 @@ static int ynl_ntf_parse(struct ynl_sock *ys, const struct nlmsghdr *nlh) *ys->ntf_last_next = rsp; ys->ntf_last_next = &rsp->next; - return MNL_CB_OK; + return YNL_PARSE_CB_OK; err_free: info->free(rsp); - return MNL_CB_ERROR; + return YNL_PARSE_CB_ERROR; } static int @@ -812,7 +812,7 @@ void ynl_error_unknown_notification(struct ynl_sock *ys, __u8 cmd) int ynl_error_parse(struct ynl_parse_arg *yarg, const char *msg) { yerr(yarg->ys, YNL_ERROR_INV_RESP, "Error parsing response: %s", msg); - return MNL_CB_ERROR; + return YNL_PARSE_CB_ERROR; } static int @@ -841,7 +841,7 @@ int ynl_req_trampoline(const struct nlmsghdr *nlh, struct ynl_parse_arg *yarg) ret = ynl_check_alien(yrs->yarg.ys, nlh, yrs->rsp_cmd); if (ret) - return ret < 0 ? MNL_CB_ERROR : MNL_CB_OK; + return ret < 0 ? YNL_PARSE_CB_ERROR : YNL_PARSE_CB_OK; return yrs->cb(nlh, &yrs->yarg); } @@ -872,11 +872,11 @@ ynl_dump_trampoline(const struct nlmsghdr *nlh, struct ynl_parse_arg *data) ret = ynl_check_alien(ds->yarg.ys, nlh, ds->rsp_cmd); if (ret) - return ret < 0 ? MNL_CB_ERROR : MNL_CB_OK; + return ret < 0 ? YNL_PARSE_CB_ERROR : YNL_PARSE_CB_OK; obj = calloc(1, ds->alloc_sz); if (!obj) - return MNL_CB_ERROR; + return YNL_PARSE_CB_ERROR; if (!ds->first) ds->first = obj; diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 15a9d3b2eed3..375d5f5e3052 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -200,7 +200,7 @@ class Type(SpecAttr): if not self.is_multi_val(): ri.cw.p("if (ynl_attr_validate(yarg, attr))") - ri.cw.p("return MNL_CB_ERROR;") + ri.cw.p("return YNL_PARSE_CB_ERROR;") if self.presence_type() == 'bit': ri.cw.p(f"{var}->_present.{self.c_name} = 1;") @@ -247,7 +247,7 @@ class TypeUnused(Type): return [] def _attr_get(self, ri, var): - return ['return MNL_CB_ERROR;'], None, None + return ['return YNL_PARSE_CB_ERROR;'], None, None def _attr_typol(self): return '.type = YNL_PT_REJECT, ' @@ -543,7 +543,7 @@ class TypeNest(Type): def _attr_get(self, ri, var): get_lines = [f"if ({self.nested_render_name}_parse(&parg, attr))", - "return MNL_CB_ERROR;"] + "return YNL_PARSE_CB_ERROR;"] init_lines = [f"parg.rsp_policy = &{self.nested_render_name}_nest;", f"parg.data = &{var}->{self.c_name};"] return get_lines, init_lines, None @@ -1674,7 +1674,7 @@ def _multi_parse(ri, struct, init_lines, local_vars): ri.cw.block_start(line=f"ynl_attr_for_each_nested(attr, attr_{aspec.c_name})") ri.cw.p(f"parg.data = &dst->{aspec.c_name}[i];") ri.cw.p(f"if ({aspec.nested_render_name}_parse(&parg, attr, ynl_attr_type(attr)))") - ri.cw.p('return MNL_CB_ERROR;') + ri.cw.p('return YNL_PARSE_CB_ERROR;') ri.cw.p('i++;') ri.cw.block_end() ri.cw.block_end() @@ -1693,7 +1693,7 @@ def _multi_parse(ri, struct, init_lines, local_vars): if 'nested-attributes' in aspec: ri.cw.p(f"parg.data = &dst->{aspec.c_name}[i];") ri.cw.p(f"if ({aspec.nested_render_name}_parse(&parg, attr))") - ri.cw.p('return MNL_CB_ERROR;') + ri.cw.p('return YNL_PARSE_CB_ERROR;') elif aspec.type in scalars: ri.cw.p(f"dst->{aspec.c_name}[i] = ynl_attr_get_{aspec.type}(attr);") else: @@ -1707,7 +1707,7 @@ def _multi_parse(ri, struct, init_lines, local_vars): if struct.nested: ri.cw.p('return 0;') else: - ri.cw.p('return MNL_CB_OK;') + ri.cw.p('return YNL_PARSE_CB_OK;') ri.cw.block_end() ri.cw.nl() @@ -1750,7 +1750,7 @@ def parse_rsp_msg(ri, deref=False): else: # Empty reply ri.cw.block_start() - ri.cw.p('return MNL_CB_OK;') + ri.cw.p('return YNL_PARSE_CB_OK;') ri.cw.block_end() ri.cw.nl() -- cgit v1.2.3 From 5ac6868daa0e34856506baf43c89e6d4fd5635c8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 27 Feb 2024 14:30:30 -0800 Subject: tools: ynl: stop using mnl socket helpers Most libmnl socket helpers can be replaced by direct calls to the underlying libc API. We need portid, the netlink manpage suggests we bind() address of zero. Acked-by: Nicolas Dichtel Link: https://lore.kernel.org/r/20240227223032.1835527-14-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl-priv.h | 2 ++ tools/net/ynl/lib/ynl.c | 60 ++++++++++++++++++++++++++++---------------- tools/net/ynl/lib/ynl.h | 2 +- 3 files changed, 42 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h index a3a9367aacbe..5150ef1dacda 100644 --- a/tools/net/ynl/lib/ynl-priv.h +++ b/tools/net/ynl/lib/ynl-priv.h @@ -35,6 +35,8 @@ enum ynl_parse_result { YNL_PARSE_CB_OK = 1, }; +#define YNL_SOCKET_BUFFER_SIZE (1 << 17) + #define YNL_ARRAY_SIZE(array) (sizeof(array) ? \ sizeof(array) / sizeof(array[0]) : 0) diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index 968ab679b5e3..9d028929117a 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -3,10 +3,12 @@ #include #include #include +#include +#include #include - #include #include +#include #include "ynl.h" @@ -464,7 +466,7 @@ static int ynl_sock_read_msgs(struct ynl_parse_arg *yarg, ynl_parse_cb_t cb) ssize_t len, rem; int ret; - len = mnl_socket_recvfrom(ys->sock, ys->rx_buf, MNL_SOCKET_BUFFER_SIZE); + len = recv(ys->socket, ys->rx_buf, YNL_SOCKET_BUFFER_SIZE, 0); if (len < 0) return len; @@ -596,7 +598,7 @@ static int ynl_sock_read_family(struct ynl_sock *ys, const char *family_name) nlh = ynl_gemsg_start_req(ys, GENL_ID_CTRL, CTRL_CMD_GETFAMILY, 1); ynl_attr_put_str(nlh, CTRL_ATTR_FAMILY_NAME, family_name); - err = mnl_socket_sendto(ys->sock, nlh, nlh->nlmsg_len); + err = send(ys->socket, nlh, nlh->nlmsg_len, 0); if (err < 0) { perr(ys, "failed to request socket family info"); return err; @@ -621,38 +623,54 @@ static int ynl_sock_read_family(struct ynl_sock *ys, const char *family_name) struct ynl_sock * ynl_sock_create(const struct ynl_family *yf, struct ynl_error *yse) { + struct sockaddr_nl addr; struct ynl_sock *ys; + socklen_t addrlen; int one = 1; - ys = malloc(sizeof(*ys) + 2 * MNL_SOCKET_BUFFER_SIZE); + ys = malloc(sizeof(*ys) + 2 * YNL_SOCKET_BUFFER_SIZE); if (!ys) return NULL; memset(ys, 0, sizeof(*ys)); ys->family = yf; ys->tx_buf = &ys->raw_buf[0]; - ys->rx_buf = &ys->raw_buf[MNL_SOCKET_BUFFER_SIZE]; + ys->rx_buf = &ys->raw_buf[YNL_SOCKET_BUFFER_SIZE]; ys->ntf_last_next = &ys->ntf_first; - ys->sock = mnl_socket_open(NETLINK_GENERIC); - if (!ys->sock) { + ys->socket = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + if (ys->socket < 0) { __perr(yse, "failed to create a netlink socket"); goto err_free_sock; } - if (mnl_socket_setsockopt(ys->sock, NETLINK_CAP_ACK, - &one, sizeof(one))) { + if (setsockopt(ys->socket, SOL_NETLINK, NETLINK_CAP_ACK, + &one, sizeof(one))) { __perr(yse, "failed to enable netlink ACK"); goto err_close_sock; } - if (mnl_socket_setsockopt(ys->sock, NETLINK_EXT_ACK, - &one, sizeof(one))) { + if (setsockopt(ys->socket, SOL_NETLINK, NETLINK_EXT_ACK, + &one, sizeof(one))) { __perr(yse, "failed to enable netlink ext ACK"); goto err_close_sock; } + memset(&addr, 0, sizeof(addr)); + addr.nl_family = AF_NETLINK; + if (bind(ys->socket, (struct sockaddr *)&addr, sizeof(addr)) < 0) { + __perr(yse, "unable to bind to a socket address"); + goto err_close_sock;; + } + + memset(&addr, 0, sizeof(addr)); + addrlen = sizeof(addr); + if (getsockname(ys->socket, (struct sockaddr *)&addr, &addrlen) < 0) { + __perr(yse, "unable to read socket address"); + goto err_close_sock;; + } + ys->portid = addr.nl_pid; ys->seq = random(); - ys->portid = mnl_socket_get_portid(ys->sock); + if (ynl_sock_read_family(ys, yf->name)) { if (yse) @@ -663,7 +681,7 @@ ynl_sock_create(const struct ynl_family *yf, struct ynl_error *yse) return ys; err_close_sock: - mnl_socket_close(ys->sock); + close(ys->socket); err_free_sock: free(ys); return NULL; @@ -673,7 +691,7 @@ void ynl_sock_destroy(struct ynl_sock *ys) { struct ynl_ntf_base_type *ntf; - mnl_socket_close(ys->sock); + close(ys->socket); while ((ntf = ynl_ntf_dequeue(ys))) ynl_ntf_free(ntf); free(ys->mcast_groups); @@ -700,9 +718,9 @@ int ynl_subscribe(struct ynl_sock *ys, const char *grp_name) return -1; } - err = mnl_socket_setsockopt(ys->sock, NETLINK_ADD_MEMBERSHIP, - &ys->mcast_groups[i].id, - sizeof(ys->mcast_groups[i].id)); + err = setsockopt(ys->socket, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, + &ys->mcast_groups[i].id, + sizeof(ys->mcast_groups[i].id)); if (err < 0) { perr(ys, "Subscribing to multicast group failed"); return -1; @@ -713,7 +731,7 @@ int ynl_subscribe(struct ynl_sock *ys, const char *grp_name) int ynl_socket_get_fd(struct ynl_sock *ys) { - return mnl_socket_get_fd(ys->sock); + return ys->socket; } struct ynl_ntf_base_type *ynl_ntf_dequeue(struct ynl_sock *ys) @@ -785,7 +803,7 @@ int ynl_ntf_check(struct ynl_sock *ys) */ struct pollfd pfd = { }; - pfd.fd = mnl_socket_get_fd(ys->sock); + pfd.fd = ys->socket; pfd.events = POLLIN; err = poll(&pfd, 1, 1); if (err < 1) @@ -851,7 +869,7 @@ int ynl_exec(struct ynl_sock *ys, struct nlmsghdr *req_nlh, { int err; - err = mnl_socket_sendto(ys->sock, req_nlh, req_nlh->nlmsg_len); + err = send(ys->socket, req_nlh, req_nlh->nlmsg_len, 0); if (err < 0) return err; @@ -904,7 +922,7 @@ int ynl_exec_dump(struct ynl_sock *ys, struct nlmsghdr *req_nlh, { int err; - err = mnl_socket_sendto(ys->sock, req_nlh, req_nlh->nlmsg_len); + err = send(ys->socket, req_nlh, req_nlh->nlmsg_len, 0); if (err < 0) return err; diff --git a/tools/net/ynl/lib/ynl.h b/tools/net/ynl/lib/ynl.h index 4849c142fce0..dbeeef8ce91a 100644 --- a/tools/net/ynl/lib/ynl.h +++ b/tools/net/ynl/lib/ynl.h @@ -59,7 +59,7 @@ struct ynl_sock { /* private: */ const struct ynl_family *family; - struct mnl_socket *sock; + int socket; __u32 seq; __u32 portid; __u16 family_id; -- cgit v1.2.3 From 73395b43819b00365f0971e0e0e023d9fb341346 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 27 Feb 2024 14:30:31 -0800 Subject: tools: ynl: remove the libmnl dependency We don't use libmnl any more. Acked-by: Nicolas Dichtel Link: https://lore.kernel.org/r/20240227223032.1835527-15-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl-priv.h | 4 +--- tools/net/ynl/lib/ynl.c | 1 - tools/net/ynl/samples/Makefile | 2 +- tools/net/ynl/ynl-gen-c.py | 1 - 4 files changed, 2 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h index 5150ef1dacda..a8099fab035d 100644 --- a/tools/net/ynl/lib/ynl-priv.h +++ b/tools/net/ynl/lib/ynl-priv.h @@ -2,8 +2,8 @@ #ifndef __YNL_C_PRIV_H #define __YNL_C_PRIV_H 1 +#include #include -#include #include struct ynl_parse_arg; @@ -12,8 +12,6 @@ struct ynl_parse_arg; * YNL internals / low level stuff */ -/* Generic mnl helper code */ - enum ynl_policy_type { YNL_PT_REJECT = 1, YNL_PT_IGNORE, diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index 9d028929117a..f8a66ae88ba9 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include diff --git a/tools/net/ynl/samples/Makefile b/tools/net/ynl/samples/Makefile index 28bdb1557a54..1d33e98e3ffe 100644 --- a/tools/net/ynl/samples/Makefile +++ b/tools/net/ynl/samples/Makefile @@ -9,7 +9,7 @@ ifeq ("$(DEBUG)","1") CFLAGS += -g -fsanitize=address -fsanitize=leak -static-libasan endif -LDLIBS=-lmnl ../lib/ynl.a ../generated/protos.a +LDLIBS=../lib/ynl.a ../generated/protos.a SRCS=$(wildcard *.c) BINS=$(patsubst %.c,%,${SRCS}) diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 375d5f5e3052..2f5febfe66a1 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -2677,7 +2677,6 @@ def main(): if args.mode == "user": if not args.header: - cw.p("#include ") cw.p("#include ") cw.nl() for one in args.user_header: -- cgit v1.2.3 From 7c4a38bf1eba9398d03e706823ec3b08b0a960de Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 27 Feb 2024 14:30:32 -0800 Subject: tools: ynl: use MSG_DONTWAIT for getting notifications To stick to libmnl wrappers in the past we had to use poll() to check if there are any outstanding notifications on the socket. This is no longer necessary, we can use MSG_DONTWAIT. Acked-by: Nicolas Dichtel Link: https://lore.kernel.org/r/20240227223032.1835527-16-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index f8a66ae88ba9..86729119e1ef 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -458,16 +458,20 @@ static int ynl_cb_null(const struct nlmsghdr *nlh, struct ynl_parse_arg *yarg) return YNL_PARSE_CB_ERROR; } -static int ynl_sock_read_msgs(struct ynl_parse_arg *yarg, ynl_parse_cb_t cb) +static int +__ynl_sock_read_msgs(struct ynl_parse_arg *yarg, ynl_parse_cb_t cb, int flags) { struct ynl_sock *ys = yarg->ys; const struct nlmsghdr *nlh; ssize_t len, rem; int ret; - len = recv(ys->socket, ys->rx_buf, YNL_SOCKET_BUFFER_SIZE, 0); - if (len < 0) + len = recv(ys->socket, ys->rx_buf, YNL_SOCKET_BUFFER_SIZE, flags); + if (len < 0) { + if (flags & MSG_DONTWAIT && errno == EAGAIN) + return YNL_PARSE_CB_STOP; return len; + } ret = YNL_PARSE_CB_STOP; for (rem = len; rem > 0; NLMSG_NEXT(nlh, rem)) { @@ -509,6 +513,11 @@ static int ynl_sock_read_msgs(struct ynl_parse_arg *yarg, ynl_parse_cb_t cb) return ret; } +static int ynl_sock_read_msgs(struct ynl_parse_arg *yarg, ynl_parse_cb_t cb) +{ + return __ynl_sock_read_msgs(yarg, cb, 0); +} + static int ynl_recv_ack(struct ynl_sock *ys, int ret) { struct ynl_parse_arg yarg = { .ys = ys, }; @@ -797,18 +806,8 @@ int ynl_ntf_check(struct ynl_sock *ys) int err; do { - /* libmnl doesn't let us pass flags to the recv to make - * it non-blocking so we need to poll() or peek() :| - */ - struct pollfd pfd = { }; - - pfd.fd = ys->socket; - pfd.events = POLLIN; - err = poll(&pfd, 1, 1); - if (err < 1) - return err; - - err = ynl_sock_read_msgs(&yarg, ynl_ntf_trampoline); + err = __ynl_sock_read_msgs(&yarg, ynl_ntf_trampoline, + MSG_DONTWAIT); if (err < 0) return err; } while (err > 0); -- cgit v1.2.3 From 8a7746982ed797e1f511dece56c6675f0f845d6f Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 27 Feb 2024 19:04:18 +0200 Subject: selftests: vxlan_mdb: Avoid duplicate test names Rename some test cases to avoid overlapping test names which is problematic for the kernel test robot. No changes in the test's logic. Suggested-by: Yujie Liu Signed-off-by: Ido Schimmel Link: https://lore.kernel.org/r/20240227170418.491442-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/test_vxlan_mdb.sh | 36 +++++++++++++-------------- 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/test_vxlan_mdb.sh b/tools/testing/selftests/net/test_vxlan_mdb.sh index 84a05a9e46d8..74ff9fb2a6f0 100755 --- a/tools/testing/selftests/net/test_vxlan_mdb.sh +++ b/tools/testing/selftests/net/test_vxlan_mdb.sh @@ -1014,10 +1014,10 @@ flush() run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 port vx0" run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010" - log_test $? 254 "Flush by port" + log_test $? 254 "Flush by port - matching" run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 port veth0" - log_test $? 255 "Flush by wrong port" + log_test $? 255 "Flush by port - non-matching" # Check that when flushing by source VNI only entries programmed with # the specified source VNI are flushed and the rest are not. @@ -1030,9 +1030,9 @@ flush() run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 src_vni 10010" run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010" - log_test $? 254 "Flush by specified source VNI" + log_test $? 254 "Flush by source VNI - matching" run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10011" - log_test $? 0 "Flush by unspecified source VNI" + log_test $? 0 "Flush by source VNI - non-matching" run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" @@ -1058,9 +1058,9 @@ flush() run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 proto bgp" run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"proto bgp\"" - log_test $? 1 "Flush by specified routing protocol" + log_test $? 1 "Flush by routing protocol - matching" run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"proto zebra\"" - log_test $? 0 "Flush by unspecified routing protocol" + log_test $? 0 "Flush by routing protocol - non-matching" run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" @@ -1075,9 +1075,9 @@ flush() run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst 198.51.100.2" run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.2" - log_test $? 1 "Flush by specified destination IP - IPv4" + log_test $? 1 "Flush by IPv4 destination IP - matching" run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.1" - log_test $? 0 "Flush by unspecified destination IP - IPv4" + log_test $? 0 "Flush by IPv4 destination IP - non-matching" run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" @@ -1089,9 +1089,9 @@ flush() run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst 2001:db8:1000::2" run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 2001:db8:1000::2" - log_test $? 1 "Flush by specified destination IP - IPv6" + log_test $? 1 "Flush by IPv6 destination IP - matching" run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 2001:db8:1000::1" - log_test $? 0 "Flush by unspecified destination IP - IPv6" + log_test $? 0 "Flush by IPv6 destination IP - non-matching" run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" @@ -1104,9 +1104,9 @@ flush() run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst_port 11111" run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"dst_port 11111\"" - log_test $? 1 "Flush by specified UDP destination port" + log_test $? 1 "Flush by UDP destination port - matching" run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"dst_port 22222\"" - log_test $? 0 "Flush by unspecified UDP destination port" + log_test $? 0 "Flush by UDP destination port - non-matching" run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" @@ -1121,9 +1121,9 @@ flush() run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst_port 4789" run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.1" - log_test $? 1 "Flush by device's UDP destination port" + log_test $? 1 "Flush by device's UDP destination port - matching" run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.2" - log_test $? 0 "Flush by unspecified UDP destination port" + log_test $? 0 "Flush by device's UDP destination port - non-matching" run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" @@ -1136,9 +1136,9 @@ flush() run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 vni 20010" run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \" vni 20010\"" - log_test $? 1 "Flush by specified destination VNI" + log_test $? 1 "Flush by destination VNI - matching" run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \" vni 20011\"" - log_test $? 0 "Flush by unspecified destination VNI" + log_test $? 0 "Flush by destination VNI - non-matching" run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" @@ -1153,9 +1153,9 @@ flush() run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 vni 10010" run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.1" - log_test $? 1 "Flush by destination VNI equal to source VNI" + log_test $? 1 "Flush by destination VNI equal to source VNI - matching" run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.2" - log_test $? 0 "Flush by unspecified destination VNI" + log_test $? 0 "Flush by destination VNI equal to source VNI - non-matching" run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" -- cgit v1.2.3 From 896880ff30866f386ebed14ab81ce1ad3710cfc4 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 22 Feb 2024 07:56:15 -0800 Subject: bpf: Replace bpf_lpm_trie_key 0-length array with flexible array Replace deprecated 0-length array in struct bpf_lpm_trie_key with flexible array. Found with GCC 13: ../kernel/bpf/lpm_trie.c:207:51: warning: array subscript i is outside array bounds of 'const __u8[0]' {aka 'const unsigned char[]'} [-Warray-bounds=] 207 | *(__be16 *)&key->data[i]); | ^~~~~~~~~~~~~ ../include/uapi/linux/swab.h:102:54: note: in definition of macro '__swab16' 102 | #define __swab16(x) (__u16)__builtin_bswap16((__u16)(x)) | ^ ../include/linux/byteorder/generic.h:97:21: note: in expansion of macro '__be16_to_cpu' 97 | #define be16_to_cpu __be16_to_cpu | ^~~~~~~~~~~~~ ../kernel/bpf/lpm_trie.c:206:28: note: in expansion of macro 'be16_to_cpu' 206 | u16 diff = be16_to_cpu(*(__be16 *)&node->data[i] ^ | ^~~~~~~~~~~ In file included from ../include/linux/bpf.h:7: ../include/uapi/linux/bpf.h:82:17: note: while referencing 'data' 82 | __u8 data[0]; /* Arbitrary size */ | ^~~~ And found at run-time under CONFIG_FORTIFY_SOURCE: UBSAN: array-index-out-of-bounds in kernel/bpf/lpm_trie.c:218:49 index 0 is out of range for type '__u8 [*]' Changing struct bpf_lpm_trie_key is difficult since has been used by userspace. For example, in Cilium: struct egress_gw_policy_key { struct bpf_lpm_trie_key lpm_key; __u32 saddr; __u32 daddr; }; While direct references to the "data" member haven't been found, there are static initializers what include the final member. For example, the "{}" here: struct egress_gw_policy_key in_key = { .lpm_key = { 32 + 24, {} }, .saddr = CLIENT_IP, .daddr = EXTERNAL_SVC_IP & 0Xffffff, }; To avoid the build time and run time warnings seen with a 0-sized trailing array for struct bpf_lpm_trie_key, introduce a new struct that correctly uses a flexible array for the trailing bytes, struct bpf_lpm_trie_key_u8. As part of this, include the "header" portion (which is just the "prefixlen" member), so it can be used by anything building a bpf_lpr_trie_key that has trailing members that aren't a u8 flexible array (like the self-test[1]), which is named struct bpf_lpm_trie_key_hdr. Unfortunately, C++ refuses to parse the __struct_group() helper, so it is not possible to define struct bpf_lpm_trie_key_hdr directly in struct bpf_lpm_trie_key_u8, so we must open-code the union directly. Adjust the kernel code to use struct bpf_lpm_trie_key_u8 through-out, and for the selftest to use struct bpf_lpm_trie_key_hdr. Add a comment to the UAPI header directing folks to the two new options. Reported-by: Mark Rutland Signed-off-by: Kees Cook Signed-off-by: Daniel Borkmann Acked-by: Gustavo A. R. Silva Closes: https://paste.debian.net/hidden/ca500597/ Link: https://lore.kernel.org/all/202206281009.4332AA33@keescook/ [1] Link: https://lore.kernel.org/bpf/20240222155612.it.533-kees@kernel.org --- Documentation/bpf/map_lpm_trie.rst | 2 +- include/uapi/linux/bpf.h | 19 ++++++++++++++++++- kernel/bpf/lpm_trie.c | 20 ++++++++++---------- samples/bpf/map_perf_test_user.c | 2 +- samples/bpf/xdp_router_ipv4_user.c | 2 +- tools/include/uapi/linux/bpf.h | 19 ++++++++++++++++++- tools/testing/selftests/bpf/progs/map_ptr_kern.c | 2 +- tools/testing/selftests/bpf/test_lpm_map.c | 18 +++++++++--------- 8 files changed, 59 insertions(+), 25 deletions(-) (limited to 'tools') diff --git a/Documentation/bpf/map_lpm_trie.rst b/Documentation/bpf/map_lpm_trie.rst index 74d64a30f500..f9cd579496c9 100644 --- a/Documentation/bpf/map_lpm_trie.rst +++ b/Documentation/bpf/map_lpm_trie.rst @@ -17,7 +17,7 @@ significant byte. LPM tries may be created with a maximum prefix length that is a multiple of 8, in the range from 8 to 2048. The key used for lookup and update -operations is a ``struct bpf_lpm_trie_key``, extended by +operations is a ``struct bpf_lpm_trie_key_u8``, extended by ``max_prefixlen/8`` bytes. - For IPv4 addresses the data length is 4 bytes diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d2e6c5fcec01..a241f407c234 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -77,12 +77,29 @@ struct bpf_insn { __s32 imm; /* signed immediate constant */ }; -/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */ +/* Deprecated: use struct bpf_lpm_trie_key_u8 (when the "data" member is needed for + * byte access) or struct bpf_lpm_trie_key_hdr (when using an alternative type for + * the trailing flexible array member) instead. + */ struct bpf_lpm_trie_key { __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */ __u8 data[0]; /* Arbitrary size */ }; +/* Header for bpf_lpm_trie_key structs */ +struct bpf_lpm_trie_key_hdr { + __u32 prefixlen; +}; + +/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry, with trailing byte array. */ +struct bpf_lpm_trie_key_u8 { + union { + struct bpf_lpm_trie_key_hdr hdr; + __u32 prefixlen; + }; + __u8 data[]; /* Arbitrary size */ +}; + struct bpf_cgroup_storage_key { __u64 cgroup_inode_id; /* cgroup inode id */ __u32 attach_type; /* program attach type (enum bpf_attach_type) */ diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index b32be680da6c..050fe1ebf0f7 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -164,13 +164,13 @@ static inline int extract_bit(const u8 *data, size_t index) */ static size_t longest_prefix_match(const struct lpm_trie *trie, const struct lpm_trie_node *node, - const struct bpf_lpm_trie_key *key) + const struct bpf_lpm_trie_key_u8 *key) { u32 limit = min(node->prefixlen, key->prefixlen); u32 prefixlen = 0, i = 0; BUILD_BUG_ON(offsetof(struct lpm_trie_node, data) % sizeof(u32)); - BUILD_BUG_ON(offsetof(struct bpf_lpm_trie_key, data) % sizeof(u32)); + BUILD_BUG_ON(offsetof(struct bpf_lpm_trie_key_u8, data) % sizeof(u32)); #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(CONFIG_64BIT) @@ -229,7 +229,7 @@ static void *trie_lookup_elem(struct bpf_map *map, void *_key) { struct lpm_trie *trie = container_of(map, struct lpm_trie, map); struct lpm_trie_node *node, *found = NULL; - struct bpf_lpm_trie_key *key = _key; + struct bpf_lpm_trie_key_u8 *key = _key; if (key->prefixlen > trie->max_prefixlen) return NULL; @@ -309,7 +309,7 @@ static long trie_update_elem(struct bpf_map *map, struct lpm_trie *trie = container_of(map, struct lpm_trie, map); struct lpm_trie_node *node, *im_node = NULL, *new_node = NULL; struct lpm_trie_node __rcu **slot; - struct bpf_lpm_trie_key *key = _key; + struct bpf_lpm_trie_key_u8 *key = _key; unsigned long irq_flags; unsigned int next_bit; size_t matchlen = 0; @@ -437,7 +437,7 @@ out: static long trie_delete_elem(struct bpf_map *map, void *_key) { struct lpm_trie *trie = container_of(map, struct lpm_trie, map); - struct bpf_lpm_trie_key *key = _key; + struct bpf_lpm_trie_key_u8 *key = _key; struct lpm_trie_node __rcu **trim, **trim2; struct lpm_trie_node *node, *parent; unsigned long irq_flags; @@ -536,7 +536,7 @@ out: sizeof(struct lpm_trie_node)) #define LPM_VAL_SIZE_MIN 1 -#define LPM_KEY_SIZE(X) (sizeof(struct bpf_lpm_trie_key) + (X)) +#define LPM_KEY_SIZE(X) (sizeof(struct bpf_lpm_trie_key_u8) + (X)) #define LPM_KEY_SIZE_MAX LPM_KEY_SIZE(LPM_DATA_SIZE_MAX) #define LPM_KEY_SIZE_MIN LPM_KEY_SIZE(LPM_DATA_SIZE_MIN) @@ -565,7 +565,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr) /* copy mandatory map attributes */ bpf_map_init_from_attr(&trie->map, attr); trie->data_size = attr->key_size - - offsetof(struct bpf_lpm_trie_key, data); + offsetof(struct bpf_lpm_trie_key_u8, data); trie->max_prefixlen = trie->data_size * 8; spin_lock_init(&trie->lock); @@ -616,7 +616,7 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key) { struct lpm_trie_node *node, *next_node = NULL, *parent, *search_root; struct lpm_trie *trie = container_of(map, struct lpm_trie, map); - struct bpf_lpm_trie_key *key = _key, *next_key = _next_key; + struct bpf_lpm_trie_key_u8 *key = _key, *next_key = _next_key; struct lpm_trie_node **node_stack = NULL; int err = 0, stack_ptr = -1; unsigned int next_bit; @@ -703,7 +703,7 @@ find_leftmost: } do_copy: next_key->prefixlen = next_node->prefixlen; - memcpy((void *)next_key + offsetof(struct bpf_lpm_trie_key, data), + memcpy((void *)next_key + offsetof(struct bpf_lpm_trie_key_u8, data), next_node->data, trie->data_size); free_stack: kfree(node_stack); @@ -715,7 +715,7 @@ static int trie_check_btf(const struct bpf_map *map, const struct btf_type *key_type, const struct btf_type *value_type) { - /* Keys must have struct bpf_lpm_trie_key embedded. */ + /* Keys must have struct bpf_lpm_trie_key_u8 embedded. */ return BTF_INFO_KIND(key_type->info) != BTF_KIND_STRUCT ? -EINVAL : 0; } diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c index d2fbcf963cdf..07ff471ed6ae 100644 --- a/samples/bpf/map_perf_test_user.c +++ b/samples/bpf/map_perf_test_user.c @@ -370,7 +370,7 @@ static void run_perf_test(int tasks) static void fill_lpm_trie(void) { - struct bpf_lpm_trie_key *key; + struct bpf_lpm_trie_key_u8 *key; unsigned long value = 0; unsigned int i; int r; diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c index 9d41db09c480..266fdd0b025d 100644 --- a/samples/bpf/xdp_router_ipv4_user.c +++ b/samples/bpf/xdp_router_ipv4_user.c @@ -91,7 +91,7 @@ static int recv_msg(struct sockaddr_nl sock_addr, int sock) static void read_route(struct nlmsghdr *nh, int nll) { char dsts[24], gws[24], ifs[16], dsts_len[24], metrics[24]; - struct bpf_lpm_trie_key *prefix_key; + struct bpf_lpm_trie_key_u8 *prefix_key; struct rtattr *rt_attr; struct rtmsg *rt_msg; int rtm_family; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index d2e6c5fcec01..a241f407c234 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -77,12 +77,29 @@ struct bpf_insn { __s32 imm; /* signed immediate constant */ }; -/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */ +/* Deprecated: use struct bpf_lpm_trie_key_u8 (when the "data" member is needed for + * byte access) or struct bpf_lpm_trie_key_hdr (when using an alternative type for + * the trailing flexible array member) instead. + */ struct bpf_lpm_trie_key { __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */ __u8 data[0]; /* Arbitrary size */ }; +/* Header for bpf_lpm_trie_key structs */ +struct bpf_lpm_trie_key_hdr { + __u32 prefixlen; +}; + +/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry, with trailing byte array. */ +struct bpf_lpm_trie_key_u8 { + union { + struct bpf_lpm_trie_key_hdr hdr; + __u32 prefixlen; + }; + __u8 data[]; /* Arbitrary size */ +}; + struct bpf_cgroup_storage_key { __u64 cgroup_inode_id; /* cgroup inode id */ __u32 attach_type; /* program attach type (enum bpf_attach_type) */ diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c index 3325da17ec81..efaf622c28dd 100644 --- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c +++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c @@ -316,7 +316,7 @@ struct lpm_trie { } __attribute__((preserve_access_index)); struct lpm_key { - struct bpf_lpm_trie_key trie_key; + struct bpf_lpm_trie_key_hdr trie_key; __u32 data; }; diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c index c028d621c744..d98c72dc563e 100644 --- a/tools/testing/selftests/bpf/test_lpm_map.c +++ b/tools/testing/selftests/bpf/test_lpm_map.c @@ -211,7 +211,7 @@ static void test_lpm_map(int keysize) volatile size_t n_matches, n_matches_after_delete; size_t i, j, n_nodes, n_lookups; struct tlpm_node *t, *list = NULL; - struct bpf_lpm_trie_key *key; + struct bpf_lpm_trie_key_u8 *key; uint8_t *data, *value; int r, map; @@ -331,8 +331,8 @@ static void test_lpm_map(int keysize) static void test_lpm_ipaddr(void) { LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC); - struct bpf_lpm_trie_key *key_ipv4; - struct bpf_lpm_trie_key *key_ipv6; + struct bpf_lpm_trie_key_u8 *key_ipv4; + struct bpf_lpm_trie_key_u8 *key_ipv6; size_t key_size_ipv4; size_t key_size_ipv6; int map_fd_ipv4; @@ -423,7 +423,7 @@ static void test_lpm_ipaddr(void) static void test_lpm_delete(void) { LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC); - struct bpf_lpm_trie_key *key; + struct bpf_lpm_trie_key_u8 *key; size_t key_size; int map_fd; __u64 value; @@ -532,7 +532,7 @@ static void test_lpm_delete(void) static void test_lpm_get_next_key(void) { LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC); - struct bpf_lpm_trie_key *key_p, *next_key_p; + struct bpf_lpm_trie_key_u8 *key_p, *next_key_p; size_t key_size; __u32 value = 0; int map_fd; @@ -693,9 +693,9 @@ static void *lpm_test_command(void *arg) { int i, j, ret, iter, key_size; struct lpm_mt_test_info *info = arg; - struct bpf_lpm_trie_key *key_p; + struct bpf_lpm_trie_key_u8 *key_p; - key_size = sizeof(struct bpf_lpm_trie_key) + sizeof(__u32); + key_size = sizeof(*key_p) + sizeof(__u32); key_p = alloca(key_size); for (iter = 0; iter < info->iter; iter++) for (i = 0; i < MAX_TEST_KEYS; i++) { @@ -717,7 +717,7 @@ static void *lpm_test_command(void *arg) ret = bpf_map_lookup_elem(info->map_fd, key_p, &value); assert(ret == 0 || errno == ENOENT); } else { - struct bpf_lpm_trie_key *next_key_p = alloca(key_size); + struct bpf_lpm_trie_key_u8 *next_key_p = alloca(key_size); ret = bpf_map_get_next_key(info->map_fd, key_p, next_key_p); assert(ret == 0 || errno == ENOENT || errno == ENOMEM); } @@ -752,7 +752,7 @@ static void test_lpm_multi_thread(void) /* create a trie */ value_size = sizeof(__u32); - key_size = sizeof(struct bpf_lpm_trie_key) + value_size; + key_size = sizeof(struct bpf_lpm_trie_key_hdr) + value_size; map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, key_size, value_size, 100, &opts); /* create 4 threads to test update, delete, lookup and get_next_key */ -- cgit v1.2.3 From 3644d285462a60c80ac225d508fcfe705640d2b4 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 28 Feb 2024 22:45:19 -0800 Subject: libbpf: Set btf_value_type_id of struct bpf_map for struct_ops. For a struct_ops map, btf_value_type_id is the type ID of it's struct type. This value is required by bpftool to generate skeleton including pointers of shadow types. The code generator gets the type ID from bpf_map__btf_value_type_id() in order to get the type information of the struct type of a map. Signed-off-by: Kui-Feng Lee Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240229064523.2091270-2-thinker.li@gmail.com --- tools/lib/bpf/libbpf.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 01f407591a92..c759628f4250 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1229,6 +1229,7 @@ static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name, map->name = strdup(var_name); if (!map->name) return -ENOMEM; + map->btf_value_type_id = type_id; map->def.type = BPF_MAP_TYPE_STRUCT_OPS; map->def.key_size = sizeof(int); @@ -4857,6 +4858,10 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.btf_value_type_id = 0; map->btf_key_type_id = 0; map->btf_value_type_id = 0; + break; + case BPF_MAP_TYPE_STRUCT_OPS: + create_attr.btf_value_type_id = 0; + break; default: break; } -- cgit v1.2.3 From 69e4a9d2b3f5adf5af4feeab0a9f505da971265a Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 28 Feb 2024 22:45:20 -0800 Subject: libbpf: Convert st_ops->data to shadow type. Convert st_ops->data to the shadow type of the struct_ops map. The shadow type of a struct_ops type is a variant of the original struct type providing a way to access/change the values in the maps of the struct_ops type. bpf_map__initial_value() will return st_ops->data for struct_ops types. The skeleton is going to use it as the pointer to the shadow type of the original struct type. One of the main differences between the original struct type and the shadow type is that all function pointers of the shadow type are converted to pointers of struct bpf_program. Users can replace these bpf_program pointers with other BPF programs. The st_ops->progs[] will be updated before updating the value of a map to reflect the changes made by users. Signed-off-by: Kui-Feng Lee Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240229064523.2091270-3-thinker.li@gmail.com --- tools/lib/bpf/libbpf.c | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index c759628f4250..6c2979f1b471 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1014,6 +1014,19 @@ static bool bpf_map__is_struct_ops(const struct bpf_map *map) return map->def.type == BPF_MAP_TYPE_STRUCT_OPS; } +static bool is_valid_st_ops_program(struct bpf_object *obj, + const struct bpf_program *prog) +{ + int i; + + for (i = 0; i < obj->nr_programs; i++) { + if (&obj->programs[i] == prog) + return prog->type == BPF_PROG_TYPE_STRUCT_OPS; + } + + return false; +} + /* Init the map's fields that depend on kern_btf */ static int bpf_map__init_kern_struct_ops(struct bpf_map *map) { @@ -1102,9 +1115,16 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map) if (btf_is_ptr(mtype)) { struct bpf_program *prog; - prog = st_ops->progs[i]; + /* Update the value from the shadow type */ + prog = *(void **)mdata; + st_ops->progs[i] = prog; if (!prog) continue; + if (!is_valid_st_ops_program(obj, prog)) { + pr_warn("struct_ops init_kern %s: member %s is not a struct_ops program\n", + map->name, mname); + return -ENOTSUP; + } kern_mtype = skip_mods_and_typedefs(kern_btf, kern_mtype->type, @@ -9310,7 +9330,9 @@ static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj, return NULL; } -/* Collect the reloc from ELF and populate the st_ops->progs[] */ +/* Collect the reloc from ELF, populate the st_ops->progs[], and update + * st_ops->data for shadow type. + */ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data) { @@ -9424,6 +9446,14 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, } st_ops->progs[member_idx] = prog; + + /* st_ops->data will be exposed to users, being returned by + * bpf_map__initial_value() as a pointer to the shadow + * type. All function pointers in the original struct type + * should be converted to a pointer to struct bpf_program + * in the shadow type. + */ + *((struct bpf_program **)(st_ops->data + moff)) = prog; } return 0; @@ -9882,6 +9912,12 @@ int bpf_map__set_initial_value(struct bpf_map *map, void *bpf_map__initial_value(struct bpf_map *map, size_t *psize) { + if (bpf_map__is_struct_ops(map)) { + if (psize) + *psize = map->def.value_size; + return map->st_ops->data; + } + if (!map->mmaped) return NULL; *psize = map->def.value_size; -- cgit v1.2.3 From a7b0fa352eafef95bd0d736ca94965d3f884ad18 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 28 Feb 2024 22:45:21 -0800 Subject: bpftool: Generated shadow variables for struct_ops maps. Declares and defines a pointer of the shadow type for each struct_ops map. The code generator will create an anonymous struct type as the shadow type for each struct_ops map. The shadow type is translated from the original struct type of the map. The user of the skeleton use pointers of them to access the values of struct_ops maps. However, shadow types only supports certain types of fields, including scalar types and function pointers. Any fields of unsupported types are translated into an array of characters to occupy the space of the original field. Function pointers are translated into pointers of the struct bpf_program. Additionally, padding fields are generated to occupy the space between two consecutive fields. The pointers of shadow types of struct_osp maps are initialized when *__open_opts() in skeletons are called. For a map called FOO, the user can access it through the pointer at skel->struct_ops.FOO. Signed-off-by: Kui-Feng Lee Signed-off-by: Andrii Nakryiko Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20240229064523.2091270-4-thinker.li@gmail.com --- tools/bpf/bpftool/gen.c | 237 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 236 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index a9334c57e859..1f579eacd9d4 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -55,6 +55,20 @@ static bool str_has_suffix(const char *str, const char *suffix) return true; } +static const struct btf_type * +resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id) +{ + const struct btf_type *t; + + t = skip_mods_and_typedefs(btf, id, NULL); + if (!btf_is_ptr(t)) + return NULL; + + t = skip_mods_and_typedefs(btf, t->type, res_id); + + return btf_is_func_proto(t) ? t : NULL; +} + static void get_obj_name(char *name, const char *file) { char file_copy[PATH_MAX]; @@ -909,6 +923,207 @@ codegen_progs_skeleton(struct bpf_object *obj, size_t prog_cnt, bool populate_li } } +static int walk_st_ops_shadow_vars(struct btf *btf, const char *ident, + const struct btf_type *map_type, __u32 map_type_id) +{ + LIBBPF_OPTS(btf_dump_emit_type_decl_opts, opts, .indent_level = 3); + const struct btf_type *member_type; + __u32 offset, next_offset = 0; + const struct btf_member *m; + struct btf_dump *d = NULL; + const char *member_name; + __u32 member_type_id; + int i, err = 0, n; + int size; + + d = btf_dump__new(btf, codegen_btf_dump_printf, NULL, NULL); + if (!d) + return -errno; + + n = btf_vlen(map_type); + for (i = 0, m = btf_members(map_type); i < n; i++, m++) { + member_type = skip_mods_and_typedefs(btf, m->type, &member_type_id); + member_name = btf__name_by_offset(btf, m->name_off); + + offset = m->offset / 8; + if (next_offset < offset) + printf("\t\t\tchar __padding_%d[%d];\n", i, offset - next_offset); + + switch (btf_kind(member_type)) { + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: + /* scalar type */ + printf("\t\t\t"); + opts.field_name = member_name; + err = btf_dump__emit_type_decl(d, member_type_id, &opts); + if (err) { + p_err("Failed to emit type declaration for %s: %d", member_name, err); + goto out; + } + printf(";\n"); + + size = btf__resolve_size(btf, member_type_id); + if (size < 0) { + p_err("Failed to resolve size of %s: %d\n", member_name, size); + err = size; + goto out; + } + + next_offset = offset + size; + break; + + case BTF_KIND_PTR: + if (resolve_func_ptr(btf, m->type, NULL)) { + /* Function pointer */ + printf("\t\t\tstruct bpf_program *%s;\n", member_name); + + next_offset = offset + sizeof(void *); + break; + } + /* All pointer types are unsupported except for + * function pointers. + */ + fallthrough; + + default: + /* Unsupported types + * + * Types other than scalar types and function + * pointers are currently not supported in order to + * prevent conflicts in the generated code caused + * by multiple definitions. For instance, if the + * struct type FOO is used in a struct_ops map, + * bpftool has to generate definitions for FOO, + * which may result in conflicts if FOO is defined + * in different skeleton files. + */ + size = btf__resolve_size(btf, member_type_id); + if (size < 0) { + p_err("Failed to resolve size of %s: %d\n", member_name, size); + err = size; + goto out; + } + printf("\t\t\tchar __unsupported_%d[%d];\n", i, size); + + next_offset = offset + size; + break; + } + } + + /* Cannot fail since it must be a struct type */ + size = btf__resolve_size(btf, map_type_id); + if (next_offset < (__u32)size) + printf("\t\t\tchar __padding_end[%d];\n", size - next_offset); + +out: + btf_dump__free(d); + + return err; +} + +/* Generate the pointer of the shadow type for a struct_ops map. + * + * This function adds a pointer of the shadow type for a struct_ops map. + * The members of a struct_ops map can be exported through a pointer to a + * shadow type. The user can access these members through the pointer. + * + * A shadow type includes not all members, only members of some types. + * They are scalar types and function pointers. The function pointers are + * translated to the pointer of the struct bpf_program. The scalar types + * are translated to the original type without any modifiers. + * + * Unsupported types will be translated to a char array to occupy the same + * space as the original field, being renamed as __unsupported_*. The user + * should treat these fields as opaque data. + */ +static int gen_st_ops_shadow_type(const char *obj_name, struct btf *btf, const char *ident, + const struct bpf_map *map) +{ + const struct btf_type *map_type; + const char *type_name; + __u32 map_type_id; + int err; + + map_type_id = bpf_map__btf_value_type_id(map); + if (map_type_id == 0) + return -EINVAL; + map_type = btf__type_by_id(btf, map_type_id); + if (!map_type) + return -EINVAL; + + type_name = btf__name_by_offset(btf, map_type->name_off); + + printf("\t\tstruct %s__%s__%s {\n", obj_name, ident, type_name); + + err = walk_st_ops_shadow_vars(btf, ident, map_type, map_type_id); + if (err) + return err; + + printf("\t\t} *%s;\n", ident); + + return 0; +} + +static int gen_st_ops_shadow(const char *obj_name, struct btf *btf, struct bpf_object *obj) +{ + int err, st_ops_cnt = 0; + struct bpf_map *map; + char ident[256]; + + if (!btf) + return 0; + + /* Generate the pointers to shadow types of + * struct_ops maps. + */ + bpf_object__for_each_map(map, obj) { + if (bpf_map__type(map) != BPF_MAP_TYPE_STRUCT_OPS) + continue; + if (!get_map_ident(map, ident, sizeof(ident))) + continue; + + if (st_ops_cnt == 0) /* first struct_ops map */ + printf("\tstruct {\n"); + st_ops_cnt++; + + err = gen_st_ops_shadow_type(obj_name, btf, ident, map); + if (err) + return err; + } + + if (st_ops_cnt) + printf("\t} struct_ops;\n"); + + return 0; +} + +/* Generate the code to initialize the pointers of shadow types. */ +static void gen_st_ops_shadow_init(struct btf *btf, struct bpf_object *obj) +{ + struct bpf_map *map; + char ident[256]; + + if (!btf) + return; + + /* Initialize the pointers to_ops shadow types of + * struct_ops maps. + */ + bpf_object__for_each_map(map, obj) { + if (bpf_map__type(map) != BPF_MAP_TYPE_STRUCT_OPS) + continue; + if (!get_map_ident(map, ident, sizeof(ident))) + continue; + codegen("\ + \n\ + obj->struct_ops.%1$s = bpf_map__initial_value(obj->maps.%1$s, NULL);\n\ + \n\ + ", ident); + } +} + static int do_skeleton(int argc, char **argv) { char header_guard[MAX_OBJ_NAME_LEN + sizeof("__SKEL_H__")]; @@ -1052,6 +1267,11 @@ static int do_skeleton(int argc, char **argv) printf("\t} maps;\n"); } + btf = bpf_object__btf(obj); + err = gen_st_ops_shadow(obj_name, btf, obj); + if (err) + goto out; + if (prog_cnt) { printf("\tstruct {\n"); bpf_object__for_each_program(prog, obj) { @@ -1075,7 +1295,6 @@ static int do_skeleton(int argc, char **argv) printf("\t} links;\n"); } - btf = bpf_object__btf(obj); if (btf) { err = codegen_datasecs(obj, obj_name); if (err) @@ -1133,6 +1352,12 @@ static int do_skeleton(int argc, char **argv) if (err) \n\ goto err_out; \n\ \n\ + ", obj_name); + + gen_st_ops_shadow_init(btf, obj); + + codegen("\ + \n\ return obj; \n\ err_out: \n\ %1$s__destroy(obj); \n\ @@ -1442,6 +1667,10 @@ static int do_subskeleton(int argc, char **argv) printf("\t} maps;\n"); } + err = gen_st_ops_shadow(obj_name, btf, obj); + if (err) + goto out; + if (prog_cnt) { printf("\tstruct {\n"); bpf_object__for_each_program(prog, obj) { @@ -1553,6 +1782,12 @@ static int do_subskeleton(int argc, char **argv) if (err) \n\ goto err; \n\ \n\ + "); + + gen_st_ops_shadow_init(btf, obj); + + codegen("\ + \n\ return obj; \n\ err: \n\ %1$s__destroy(obj); \n\ -- cgit v1.2.3 From f2e81192e07e87897ff1296c96775eceea8f582a Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 28 Feb 2024 22:45:22 -0800 Subject: bpftool: Add an example for struct_ops map and shadow type. The example in bpftool-gen.8 explains how to use the pointer of the shadow type to change the value of a field of a struct_ops map. Signed-off-by: Kui-Feng Lee Signed-off-by: Andrii Nakryiko Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20240229064523.2091270-5-thinker.li@gmail.com --- tools/bpf/bpftool/Documentation/bpftool-gen.rst | 58 ++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst index 5006e724d1bc..5e60825818dd 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst @@ -257,18 +257,48 @@ EXAMPLES return 0; } -This is example BPF application with two BPF programs and a mix of BPF maps -and global variables. Source code is split across two source code files. +**$ cat example3.bpf.c** + +:: + + #include + #include + #include + /* This header file is provided by the bpf_testmod module. */ + #include "bpf_testmod.h" + + int test_2_result = 0; + + /* bpf_Testmod.ko calls this function, passing a "4" + * and testmod_map->data. + */ + SEC("struct_ops/test_2") + void BPF_PROG(test_2, int a, int b) + { + test_2_result = a + b; + } + + SEC(".struct_ops") + struct bpf_testmod_ops testmod_map = { + .test_2 = (void *)test_2, + .data = 0x1, + }; + +This is example BPF application with three BPF programs and a mix of BPF +maps and global variables. Source code is split across three source code +files. **$ clang --target=bpf -g example1.bpf.c -o example1.bpf.o** **$ clang --target=bpf -g example2.bpf.c -o example2.bpf.o** -**$ bpftool gen object example.bpf.o example1.bpf.o example2.bpf.o** +**$ clang --target=bpf -g example3.bpf.c -o example3.bpf.o** + +**$ bpftool gen object example.bpf.o example1.bpf.o example2.bpf.o example3.bpf.o** -This set of commands compiles *example1.bpf.c* and *example2.bpf.c* -individually and then statically links respective object files into the final -BPF ELF object file *example.bpf.o*. +This set of commands compiles *example1.bpf.c*, *example2.bpf.c* and +*example3.bpf.c* individually and then statically links respective object +files into the final BPF ELF object file *example.bpf.o*. **$ bpftool gen skeleton example.bpf.o name example | tee example.skel.h** @@ -291,7 +321,15 @@ BPF ELF object file *example.bpf.o*. struct bpf_map *data; struct bpf_map *bss; struct bpf_map *my_map; + struct bpf_map *testmod_map; } maps; + struct { + struct example__testmod_map__bpf_testmod_ops { + const struct bpf_program *test_1; + const struct bpf_program *test_2; + int data; + } *testmod_map; + } struct_ops; struct { struct bpf_program *handle_sys_enter; struct bpf_program *handle_sys_exit; @@ -304,6 +342,7 @@ BPF ELF object file *example.bpf.o*. struct { int x; } data; + int test_2_result; } *bss; struct example__data { _Bool global_flag; @@ -342,10 +381,16 @@ BPF ELF object file *example.bpf.o*. skel->rodata->param1 = 128; + /* Change the value through the pointer of shadow type */ + skel->struct_ops.testmod_map->data = 13; + err = example__load(skel); if (err) goto cleanup; + /* The result of the function test_2() */ + printf("test_2_result: %d\n", skel->bss->test_2_result); + err = example__attach(skel); if (err) goto cleanup; @@ -372,6 +417,7 @@ BPF ELF object file *example.bpf.o*. :: + test_2_result: 17 my_map name: my_map sys_enter prog FD: 8 my_static_var: 7 -- cgit v1.2.3 From 0623e73317940d052216fb6eef4efd55a0a7f602 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 28 Feb 2024 22:45:23 -0800 Subject: selftests/bpf: Test if shadow types work correctly. Change the values of fields, including scalar types and function pointers, and check if the struct_ops map works as expected. The test changes the field "test_2" of "testmod_1" from the pointer to test_2() to pointer to test_3() and the field "data" to 13. The function test_2() and test_3() both compute a new value for "test_2_result", but in different way. By checking the value of "test_2_result", it ensures the struct_ops map works as expected with changes through shadow types. Signed-off-by: Kui-Feng Lee Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240229064523.2091270-6-thinker.li@gmail.com --- tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c | 11 ++++++++++- tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h | 8 ++++++++ .../selftests/bpf/prog_tests/test_struct_ops_module.c | 19 +++++++++++++++---- tools/testing/selftests/bpf/progs/struct_ops_module.c | 8 ++++++++ 4 files changed, 41 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 66787e99ba1b..098ddd067224 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -539,6 +539,15 @@ static int bpf_testmod_ops_init_member(const struct btf_type *t, const struct btf_member *member, void *kdata, const void *udata) { + if (member->offset == offsetof(struct bpf_testmod_ops, data) * 8) { + /* For data fields, this function has to copy it and return + * 1 to indicate that the data has been handled by the + * struct_ops type, or the verifier will reject the map if + * the value of the data field is not zero. + */ + ((struct bpf_testmod_ops *)kdata)->data = ((struct bpf_testmod_ops *)udata)->data; + return 1; + } return 0; } @@ -559,7 +568,7 @@ static int bpf_dummy_reg(void *kdata) * initialized, so we need to check for NULL. */ if (ops->test_2) - ops->test_2(4, 3); + ops->test_2(4, ops->data); return 0; } diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h index c3b0cf788f9f..971458acfac3 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h @@ -35,6 +35,14 @@ struct bpf_testmod_ops { void (*test_2)(int a, int b); /* Used to test nullable arguments. */ int (*test_maybe_null)(int dummy, struct task_struct *task); + + /* The following fields are used to test shadow copies. */ + char onebyte; + struct { + int a; + int b; + } unsupported; + int data; }; #endif /* _BPF_TESTMOD_H */ diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c index 8d833f0c7580..7d6facf46ebb 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c +++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c @@ -32,17 +32,23 @@ cleanup: static void test_struct_ops_load(void) { - DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts); struct struct_ops_module *skel; struct bpf_map_info info = {}; struct bpf_link *link; int err; u32 len; - skel = struct_ops_module__open_opts(&opts); + skel = struct_ops_module__open(); if (!ASSERT_OK_PTR(skel, "struct_ops_module_open")) return; + skel->struct_ops.testmod_1->data = 13; + skel->struct_ops.testmod_1->test_2 = skel->progs.test_3; + /* Since test_2() is not being used, it should be disabled from + * auto-loading, or it will fail to load. + */ + bpf_program__set_autoload(skel->progs.test_2, false); + err = struct_ops_module__load(skel); if (!ASSERT_OK(err, "struct_ops_module_load")) goto cleanup; @@ -56,8 +62,13 @@ static void test_struct_ops_load(void) link = bpf_map__attach_struct_ops(skel->maps.testmod_1); ASSERT_OK_PTR(link, "attach_test_mod_1"); - /* test_2() will be called from bpf_dummy_reg() in bpf_testmod.c */ - ASSERT_EQ(skel->bss->test_2_result, 7, "test_2_result"); + /* test_3() will be called from bpf_dummy_reg() in bpf_testmod.c + * + * In bpf_testmod.c it will pass 4 and 13 (the value of data) to + * .test_2. So, the value of test_2_result should be 20 (4 + 13 + + * 3). + */ + ASSERT_EQ(skel->bss->test_2_result, 20, "check_shadow_variables"); bpf_link__destroy(link); diff --git a/tools/testing/selftests/bpf/progs/struct_ops_module.c b/tools/testing/selftests/bpf/progs/struct_ops_module.c index b78746b3cef3..25952fa09348 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_module.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_module.c @@ -21,9 +21,17 @@ void BPF_PROG(test_2, int a, int b) test_2_result = a + b; } +SEC("struct_ops/test_3") +int BPF_PROG(test_3, int a, int b) +{ + test_2_result = a + b + 3; + return a + b + 3; +} + SEC(".struct_ops.link") struct bpf_testmod_ops testmod_1 = { .test_1 = (void *)test_1, .test_2 = (void *)test_2, + .data = 0x1, }; -- cgit v1.2.3 From e74048650eaff6674895f2220d0545f5e5920cd6 Mon Sep 17 00:00:00 2001 From: Mickaël Salaün Date: Wed, 28 Feb 2024 16:59:08 -0800 Subject: selftests/landlock: Redefine TEST_F() as TEST_F_FORK() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This has the effect of creating a new test process for either TEST_F() or TEST_F_FORK(), which doesn't change tests but will ease potential backports. See next commit for the TEST_F_FORK() merge into TEST_F(). Cc: Günther Noack Cc: Kees Cook Cc: Shuah Khan Cc: Will Drewry Signed-off-by: Mickaël Salaün Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- tools/testing/selftests/landlock/common.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h index e64bbdf0e86e..f40146d40763 100644 --- a/tools/testing/selftests/landlock/common.h +++ b/tools/testing/selftests/landlock/common.h @@ -37,7 +37,7 @@ struct __test_metadata *_metadata, \ FIXTURE_DATA(fixture_name) *self, \ const FIXTURE_VARIANT(fixture_name) *variant); \ - TEST_F(fixture_name, test_name) \ + __TEST_F_IMPL(fixture_name, test_name, -1, TEST_TIMEOUT_DEFAULT) \ { \ int status; \ const pid_t child = fork(); \ @@ -80,6 +80,10 @@ __attribute__((unused)) *variant) /* clang-format on */ +/* Makes backporting easier. */ +#undef TEST_F +#define TEST_F(fixture_name, test_name) TEST_F_FORK(fixture_name, test_name) + #ifndef landlock_create_ruleset static inline int landlock_create_ruleset(const struct landlock_ruleset_attr *const attr, -- cgit v1.2.3 From 0710a1a73fb45033ebb06073e374ab7d44a05f15 Mon Sep 17 00:00:00 2001 From: Mickaël Salaün Date: Wed, 28 Feb 2024 16:59:09 -0800 Subject: selftests/harness: Merge TEST_F_FORK() into TEST_F() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace Landlock-specific TEST_F_FORK() with an improved TEST_F() which brings four related changes: Run TEST_F()'s tests in a grandchild process to make it possible to drop privileges and delegate teardown to the parent. Compared to TEST_F_FORK(), simplify handling of the test grandchild process thanks to vfork(2), and makes it generic (e.g. no explicit conversion between exit code and _metadata). Compared to TEST_F_FORK(), run teardown even when tests failed with an assert thanks to commit 63e6b2a42342 ("selftests/harness: Run TEARDOWN for ASSERT failures"). Simplify the test harness code by removing the no_print and step fields which are not used. I added this feature just after I made kselftest_harness.h more broadly available but this step counter remained even though it wasn't needed after all. See commit 369130b63178 ("selftests: Enhance kselftest_harness.h to print which assert failed"). Replace spaces with tabs in one line of __TEST_F_IMPL(). Cc: Günther Noack Cc: Shuah Khan Cc: Will Drewry Signed-off-by: Mickaël Salaün Reviewed-by: Kees Cook Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- tools/testing/selftests/kselftest_harness.h | 56 ++++++++++++-------------- tools/testing/selftests/landlock/common.h | 62 +---------------------------- 2 files changed, 27 insertions(+), 91 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index e05ac8261046..ad49832457af 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -95,14 +95,6 @@ * E.g., #define TH_LOG_ENABLED 1 * * If no definition is provided, logging is enabled by default. - * - * If there is no way to print an error message for the process running the - * test (e.g. not allowed to write to stderr), it is still possible to get the - * ASSERT_* number for which the test failed. This behavior can be enabled by - * writing `_metadata->no_print = true;` before the check sequence that is - * unable to print. When an error occur, instead of printing an error message - * and calling `abort(3)`, the test process call `_exit(2)` with the assert - * number as argument, which is then printed by the parent process. */ #define TH_LOG(fmt, ...) do { \ if (TH_LOG_ENABLED) \ @@ -363,6 +355,11 @@ * Defines a test that depends on a fixture (e.g., is part of a test case). * Very similar to TEST() except that *self* is the setup instance of fixture's * datatype exposed for use by the implementation. + * + * The @test_name code is run in a separate process sharing the same memory + * (i.e. vfork), which means that the test process can update its privileges + * without impacting the related FIXTURE_TEARDOWN() (e.g. to remove files from + * a directory where write access was dropped). */ #define TEST_F(fixture_name, test_name) \ __TEST_F_IMPL(fixture_name, test_name, -1, TEST_TIMEOUT_DEFAULT) @@ -384,15 +381,28 @@ { \ /* fixture data is alloced, setup, and torn down per call. */ \ FIXTURE_DATA(fixture_name) self; \ + pid_t child = 1; \ memset(&self, 0, sizeof(FIXTURE_DATA(fixture_name))); \ if (setjmp(_metadata->env) == 0) { \ fixture_name##_setup(_metadata, &self, variant->data); \ /* Let setup failure terminate early. */ \ - if (!_metadata->passed || _metadata->skip) \ + if (!_metadata->passed || _metadata->skip) \ return; \ _metadata->setup_completed = true; \ - fixture_name##_##test_name(_metadata, &self, variant->data); \ + /* Use the same _metadata. */ \ + child = vfork(); \ + if (child == 0) { \ + fixture_name##_##test_name(_metadata, &self, variant->data); \ + _exit(0); \ + } \ + if (child < 0) { \ + ksft_print_msg("ERROR SPAWNING TEST GRANDCHILD\n"); \ + _metadata->passed = 0; \ + } \ } \ + if (child == 0) \ + /* Child failed and updated the shared _metadata. */ \ + _exit(0); \ if (_metadata->setup_completed) \ fixture_name##_teardown(_metadata, &self, variant->data); \ __test_check_assert(_metadata); \ @@ -694,18 +704,12 @@ for (; _metadata->trigger; _metadata->trigger = \ __bail(_assert, _metadata)) -#define __INC_STEP(_metadata) \ - /* Keep "step" below 255 (which is used for "SKIP" reporting). */ \ - if (_metadata->passed && _metadata->step < 253) \ - _metadata->step++; - #define is_signed_type(var) (!!(((__typeof__(var))(-1)) < (__typeof__(var))1)) #define __EXPECT(_expected, _expected_str, _seen, _seen_str, _t, _assert) do { \ /* Avoid multiple evaluation of the cases */ \ __typeof__(_expected) __exp = (_expected); \ __typeof__(_seen) __seen = (_seen); \ - if (_assert) __INC_STEP(_metadata); \ if (!(__exp _t __seen)) { \ /* Report with actual signedness to avoid weird output. */ \ switch (is_signed_type(__exp) * 2 + is_signed_type(__seen)) { \ @@ -751,7 +755,6 @@ #define __EXPECT_STR(_expected, _seen, _t, _assert) do { \ const char *__exp = (_expected); \ const char *__seen = (_seen); \ - if (_assert) __INC_STEP(_metadata); \ if (!(strcmp(__exp, __seen) _t 0)) { \ __TH_LOG("Expected '%s' %s '%s'.", __exp, #_t, __seen); \ _metadata->passed = 0; \ @@ -837,8 +840,6 @@ struct __test_metadata { int trigger; /* extra handler after the evaluation */ int timeout; /* seconds to wait for test timeout */ bool timed_out; /* did this test timeout instead of exiting? */ - __u8 step; - bool no_print; /* manual trigger when TH_LOG_STREAM is not available */ bool aborted; /* stopped test due to failed ASSERT */ bool setup_completed; /* did setup finish? */ jmp_buf env; /* for exiting out of test early */ @@ -873,11 +874,8 @@ static inline int __bail(int for_realz, struct __test_metadata *t) static inline void __test_check_assert(struct __test_metadata *t) { - if (t->aborted) { - if (t->no_print) - _exit(t->step); + if (t->aborted) abort(); - } } struct __test_metadata *__active_test; @@ -954,13 +952,12 @@ void __wait_for_test(struct __test_metadata *t) case 0: t->passed = 1; break; - /* Other failure, assume step report. */ + /* Failure */ default: t->passed = 0; fprintf(TH_LOG_STREAM, - "# %s: Test failed at step #%d\n", - t->name, - WEXITSTATUS(status)); + "# %s: Test failed\n", + t->name); } } } else if (WIFSIGNALED(status)) { @@ -1114,8 +1111,6 @@ void __run_test(struct __fixture_metadata *f, t->passed = 1; t->skip = 0; t->trigger = 0; - t->step = 1; - t->no_print = 0; memset(t->results->reason, 0, sizeof(t->results->reason)); ksft_print_msg(" RUN %s%s%s.%s ...\n", @@ -1137,8 +1132,7 @@ void __run_test(struct __fixture_metadata *f, /* Pass is exit 0 */ if (t->passed) _exit(0); - /* Something else happened, report the step. */ - _exit(t->step); + _exit(1); } else { __wait_for_test(t); } diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h index f40146d40763..401e2eb092a3 100644 --- a/tools/testing/selftests/landlock/common.h +++ b/tools/testing/selftests/landlock/common.h @@ -23,66 +23,8 @@ #define __maybe_unused __attribute__((__unused__)) #endif -/* - * TEST_F_FORK() is useful when a test drop privileges but the corresponding - * FIXTURE_TEARDOWN() requires them (e.g. to remove files from a directory - * where write actions are denied). For convenience, FIXTURE_TEARDOWN() is - * also called when the test failed, but not when FIXTURE_SETUP() failed. For - * this to be possible, we must not call abort() but instead exit smoothly - * (hence the step print). - */ -/* clang-format off */ -#define TEST_F_FORK(fixture_name, test_name) \ - static void fixture_name##_##test_name##_child( \ - struct __test_metadata *_metadata, \ - FIXTURE_DATA(fixture_name) *self, \ - const FIXTURE_VARIANT(fixture_name) *variant); \ - __TEST_F_IMPL(fixture_name, test_name, -1, TEST_TIMEOUT_DEFAULT) \ - { \ - int status; \ - const pid_t child = fork(); \ - if (child < 0) \ - abort(); \ - if (child == 0) { \ - _metadata->no_print = 1; \ - fixture_name##_##test_name##_child(_metadata, self, variant); \ - if (_metadata->skip) \ - _exit(255); \ - if (_metadata->passed) \ - _exit(0); \ - _exit(_metadata->step); \ - } \ - if (child != waitpid(child, &status, 0)) \ - abort(); \ - if (WIFSIGNALED(status) || !WIFEXITED(status)) { \ - _metadata->passed = 0; \ - _metadata->step = 1; \ - return; \ - } \ - switch (WEXITSTATUS(status)) { \ - case 0: \ - _metadata->passed = 1; \ - break; \ - case 255: \ - _metadata->passed = 1; \ - _metadata->skip = 1; \ - break; \ - default: \ - _metadata->passed = 0; \ - _metadata->step = WEXITSTATUS(status); \ - break; \ - } \ - } \ - static void fixture_name##_##test_name##_child( \ - struct __test_metadata __attribute__((unused)) *_metadata, \ - FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \ - const FIXTURE_VARIANT(fixture_name) \ - __attribute__((unused)) *variant) -/* clang-format on */ - -/* Makes backporting easier. */ -#undef TEST_F -#define TEST_F(fixture_name, test_name) TEST_F_FORK(fixture_name, test_name) +/* TEST_F_FORK() should not be used for new tests. */ +#define TEST_F_FORK(fixture_name, test_name) TEST_F(fixture_name, test_name) #ifndef landlock_create_ruleset static inline int -- cgit v1.2.3 From a724707976b0bcf5eb6f4637dead0cc380659940 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 28 Feb 2024 16:59:10 -0800 Subject: selftests: kselftest_harness: use KSFT_* exit codes Now that we no longer need low exit codes to communicate assertion steps - use normal KSFT exit codes. Acked-by: Kees Cook Tested-by: Jakub Sitnicki Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- tools/testing/selftests/kselftest_harness.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index ad49832457af..62ce258b0853 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -936,7 +936,7 @@ void __wait_for_test(struct __test_metadata *t) fprintf(TH_LOG_STREAM, "# %s: Test terminated by timeout\n", t->name); } else if (WIFEXITED(status)) { - if (WEXITSTATUS(status) == 255) { + if (WEXITSTATUS(status) == KSFT_SKIP) { /* SKIP */ t->passed = 1; t->skip = 1; @@ -949,7 +949,7 @@ void __wait_for_test(struct __test_metadata *t) } else { switch (WEXITSTATUS(status)) { /* Success */ - case 0: + case KSFT_PASS: t->passed = 1; break; /* Failure */ @@ -1128,11 +1128,10 @@ void __run_test(struct __fixture_metadata *f, setpgrp(); t->fn(t, variant); if (t->skip) - _exit(255); - /* Pass is exit 0 */ + _exit(KSFT_SKIP); if (t->passed) - _exit(0); - _exit(1); + _exit(KSFT_PASS); + _exit(KSFT_FAIL); } else { __wait_for_test(t); } -- cgit v1.2.3 From 38c957f07038278b98af7223a411c0daa54bd1b4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 28 Feb 2024 16:59:11 -0800 Subject: selftests: kselftest_harness: generate test name once Since we added variant support generating full test case name takes 4 string arguments. We're about to need it in another two places. Stop the duplication and print once into a temporary buffer. Suggested-by: Jakub Sitnicki Acked-by: Kees Cook Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- tools/testing/selftests/kselftest_harness.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 62ce258b0853..4a2bda6a67ed 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -56,6 +56,7 @@ #include #include #include +#include #include #include #include @@ -1107,14 +1108,18 @@ void __run_test(struct __fixture_metadata *f, struct __fixture_variant_metadata *variant, struct __test_metadata *t) { + char test_name[LINE_MAX]; + /* reset test struct */ t->passed = 1; t->skip = 0; t->trigger = 0; memset(t->results->reason, 0, sizeof(t->results->reason)); - ksft_print_msg(" RUN %s%s%s.%s ...\n", - f->name, variant->name[0] ? "." : "", variant->name, t->name); + snprintf(test_name, sizeof(test_name), "%s%s%s.%s", + f->name, variant->name[0] ? "." : "", variant->name, t->name); + + ksft_print_msg(" RUN %s ...\n", test_name); /* Make sure output buffers are flushed before fork */ fflush(stdout); @@ -1135,15 +1140,14 @@ void __run_test(struct __fixture_metadata *f, } else { __wait_for_test(t); } - ksft_print_msg(" %4s %s%s%s.%s\n", t->passed ? "OK" : "FAIL", - f->name, variant->name[0] ? "." : "", variant->name, t->name); + ksft_print_msg(" %4s %s\n", + t->passed ? "OK" : "FAIL", test_name); if (t->skip) ksft_test_result_skip("%s\n", t->results->reason[0] ? t->results->reason : "unknown"); else - ksft_test_result(t->passed, "%s%s%s.%s\n", - f->name, variant->name[0] ? "." : "", variant->name, t->name); + ksft_test_result(t->passed, "%s\n", test_name); } static int test_harness_run(int argc, char **argv) -- cgit v1.2.3 From 69fe8ec4f673b5b35a34718dc39a5131bfc55e36 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 28 Feb 2024 16:59:12 -0800 Subject: selftests: kselftest_harness: save full exit code in metadata Instead of tracking passed = 0/1 rename the field to exit_code and invert the values so that they match the KSFT_* exit codes. This will allow us to fold SKIP / XFAIL into the same value. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- tools/testing/selftests/kselftest_harness.h | 48 +++++++++++++++----------- tools/testing/selftests/landlock/base_test.c | 2 +- tools/testing/selftests/landlock/fs_test.c | 4 +-- tools/testing/selftests/landlock/net_test.c | 4 +-- tools/testing/selftests/landlock/ptrace_test.c | 7 ++-- tools/testing/selftests/net/tls.c | 2 +- tools/testing/selftests/seccomp/seccomp_bpf.c | 9 +++-- 7 files changed, 41 insertions(+), 35 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 4a2bda6a67ed..d90d4a9039ee 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -128,7 +128,7 @@ fprintf(TH_LOG_STREAM, "# SKIP %s\n", \ _metadata->results->reason); \ } \ - _metadata->passed = 1; \ + _metadata->exit_code = KSFT_PASS; \ _metadata->skip = 1; \ _metadata->trigger = 0; \ statement; \ @@ -387,7 +387,7 @@ if (setjmp(_metadata->env) == 0) { \ fixture_name##_setup(_metadata, &self, variant->data); \ /* Let setup failure terminate early. */ \ - if (!_metadata->passed || _metadata->skip) \ + if (!__test_passed(_metadata) || _metadata->skip) \ return; \ _metadata->setup_completed = true; \ /* Use the same _metadata. */ \ @@ -398,7 +398,7 @@ } \ if (child < 0) { \ ksft_print_msg("ERROR SPAWNING TEST GRANDCHILD\n"); \ - _metadata->passed = 0; \ + _metadata->exit_code = KSFT_FAIL; \ } \ } \ if (child == 0) \ @@ -747,7 +747,7 @@ break; \ } \ } \ - _metadata->passed = 0; \ + _metadata->exit_code = KSFT_FAIL; \ /* Ensure the optional handler is triggered */ \ _metadata->trigger = 1; \ } \ @@ -758,7 +758,7 @@ const char *__seen = (_seen); \ if (!(strcmp(__exp, __seen) _t 0)) { \ __TH_LOG("Expected '%s' %s '%s'.", __exp, #_t, __seen); \ - _metadata->passed = 0; \ + _metadata->exit_code = KSFT_FAIL; \ _metadata->trigger = 1; \ } \ } while (0); OPTIONAL_HANDLER(_assert) @@ -836,7 +836,7 @@ struct __test_metadata { pid_t pid; /* pid of test when being run */ struct __fixture_metadata *fixture; int termsig; - int passed; + int exit_code; int skip; /* did SKIP get used? */ int trigger; /* extra handler after the evaluation */ int timeout; /* seconds to wait for test timeout */ @@ -848,6 +848,12 @@ struct __test_metadata { struct __test_metadata *prev, *next; }; +static inline bool __test_passed(struct __test_metadata *metadata) +{ + return metadata->exit_code != KSFT_FAIL && + metadata->exit_code <= KSFT_SKIP; +} + /* * Since constructors are called in reverse order, reverse the test * list so tests are run in source declaration order. @@ -912,7 +918,7 @@ void __wait_for_test(struct __test_metadata *t) int status; if (sigaction(SIGALRM, &action, &saved_action)) { - t->passed = 0; + t->exit_code = KSFT_FAIL; fprintf(TH_LOG_STREAM, "# %s: unable to install SIGALRM handler\n", t->name); @@ -924,7 +930,7 @@ void __wait_for_test(struct __test_metadata *t) waitpid(t->pid, &status, 0); alarm(0); if (sigaction(SIGALRM, &saved_action, NULL)) { - t->passed = 0; + t->exit_code = KSFT_FAIL; fprintf(TH_LOG_STREAM, "# %s: unable to uninstall SIGALRM handler\n", t->name); @@ -933,16 +939,16 @@ void __wait_for_test(struct __test_metadata *t) __active_test = NULL; if (t->timed_out) { - t->passed = 0; + t->exit_code = KSFT_FAIL; fprintf(TH_LOG_STREAM, "# %s: Test terminated by timeout\n", t->name); } else if (WIFEXITED(status)) { if (WEXITSTATUS(status) == KSFT_SKIP) { /* SKIP */ - t->passed = 1; + t->exit_code = KSFT_PASS; t->skip = 1; } else if (t->termsig != -1) { - t->passed = 0; + t->exit_code = KSFT_FAIL; fprintf(TH_LOG_STREAM, "# %s: Test exited normally instead of by signal (code: %d)\n", t->name, @@ -951,24 +957,24 @@ void __wait_for_test(struct __test_metadata *t) switch (WEXITSTATUS(status)) { /* Success */ case KSFT_PASS: - t->passed = 1; + t->exit_code = KSFT_PASS; break; /* Failure */ default: - t->passed = 0; + t->exit_code = KSFT_FAIL; fprintf(TH_LOG_STREAM, "# %s: Test failed\n", t->name); } } } else if (WIFSIGNALED(status)) { - t->passed = 0; + t->exit_code = KSFT_FAIL; if (WTERMSIG(status) == SIGABRT) { fprintf(TH_LOG_STREAM, "# %s: Test terminated by assertion\n", t->name); } else if (WTERMSIG(status) == t->termsig) { - t->passed = 1; + t->exit_code = KSFT_PASS; } else { fprintf(TH_LOG_STREAM, "# %s: Test terminated unexpectedly by signal %d\n", @@ -1111,7 +1117,7 @@ void __run_test(struct __fixture_metadata *f, char test_name[LINE_MAX]; /* reset test struct */ - t->passed = 1; + t->exit_code = KSFT_PASS; t->skip = 0; t->trigger = 0; memset(t->results->reason, 0, sizeof(t->results->reason)); @@ -1128,26 +1134,26 @@ void __run_test(struct __fixture_metadata *f, t->pid = fork(); if (t->pid < 0) { ksft_print_msg("ERROR SPAWNING TEST CHILD\n"); - t->passed = 0; + t->exit_code = KSFT_FAIL; } else if (t->pid == 0) { setpgrp(); t->fn(t, variant); if (t->skip) _exit(KSFT_SKIP); - if (t->passed) + if (__test_passed(t)) _exit(KSFT_PASS); _exit(KSFT_FAIL); } else { __wait_for_test(t); } ksft_print_msg(" %4s %s\n", - t->passed ? "OK" : "FAIL", test_name); + __test_passed(t) ? "OK" : "FAIL", test_name); if (t->skip) ksft_test_result_skip("%s\n", t->results->reason[0] ? t->results->reason : "unknown"); else - ksft_test_result(t->passed, "%s\n", test_name); + ksft_test_result(__test_passed(t), "%s\n", test_name); } static int test_harness_run(int argc, char **argv) @@ -1195,7 +1201,7 @@ static int test_harness_run(int argc, char **argv) t->results = results; __run_test(f, v, t); t->results = NULL; - if (t->passed) + if (__test_passed(t)) pass_count++; else ret = 1; diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c index 646f778dfb1e..a6f89aaea77d 100644 --- a/tools/testing/selftests/landlock/base_test.c +++ b/tools/testing/selftests/landlock/base_test.c @@ -307,7 +307,7 @@ TEST(ruleset_fd_transfer) dir_fd = open("/tmp", O_RDONLY | O_DIRECTORY | O_CLOEXEC); ASSERT_LE(0, dir_fd); ASSERT_EQ(0, close(dir_fd)); - _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE); + _exit(_metadata->exit_code); return; } diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c index 2d6d9b43d958..98817a14c91b 100644 --- a/tools/testing/selftests/landlock/fs_test.c +++ b/tools/testing/selftests/landlock/fs_test.c @@ -1964,7 +1964,7 @@ static void test_execute(struct __test_metadata *const _metadata, const int err, strerror(errno)); }; ASSERT_EQ(err, errno); - _exit(_metadata->passed ? 2 : 1); + _exit(__test_passed(_metadata) ? 2 : 1); return; } ASSERT_EQ(child, waitpid(child, &status, 0)); @@ -3807,7 +3807,7 @@ TEST_F_FORK(ftruncate, open_and_ftruncate_in_different_processes) ASSERT_EQ(0, close(socket_fds[0])); - _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE); + _exit(_metadata->exit_code); return; } diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c index 936cfc879f1d..f21cfbbc3638 100644 --- a/tools/testing/selftests/landlock/net_test.c +++ b/tools/testing/selftests/landlock/net_test.c @@ -539,7 +539,7 @@ static void test_bind_and_connect(struct __test_metadata *const _metadata, } EXPECT_EQ(0, close(connect_fd)); - _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE); + _exit(_metadata->exit_code); return; } @@ -834,7 +834,7 @@ TEST_F(protocol, connect_unspec) } EXPECT_EQ(0, close(connect_fd)); - _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE); + _exit(_metadata->exit_code); return; } diff --git a/tools/testing/selftests/landlock/ptrace_test.c b/tools/testing/selftests/landlock/ptrace_test.c index 55e7871631a1..a19db4d0b3bd 100644 --- a/tools/testing/selftests/landlock/ptrace_test.c +++ b/tools/testing/selftests/landlock/ptrace_test.c @@ -314,7 +314,7 @@ TEST_F(hierarchy, trace) ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC)); if (variant->domain_both) { create_domain(_metadata); - if (!_metadata->passed) + if (!__test_passed(_metadata)) /* Aborts before forking. */ return; } @@ -375,7 +375,7 @@ TEST_F(hierarchy, trace) /* Waits for the parent PTRACE_ATTACH test. */ ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1)); - _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE); + _exit(_metadata->exit_code); return; } @@ -430,9 +430,10 @@ TEST_F(hierarchy, trace) /* Signals that the parent PTRACE_ATTACH test is done. */ ASSERT_EQ(1, write(pipe_parent[1], ".", 1)); ASSERT_EQ(child, waitpid(child, &status, 0)); + if (WIFSIGNALED(status) || !WIFEXITED(status) || WEXITSTATUS(status) != EXIT_SUCCESS) - _metadata->passed = 0; + _metadata->exit_code = KSFT_FAIL; } TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index b95c249f81c2..c6eda21cefb6 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -1927,7 +1927,7 @@ TEST_F(tls_err, poll_partial_rec_async) pfd.events = POLLIN; EXPECT_EQ(poll(&pfd, 1, 20), 1); - exit(!_metadata->passed); + exit(!__test_passed(_metadata)); } } diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 38f651469968..1027e6170186 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1576,7 +1576,7 @@ void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee, ASSERT_EQ(0, ret); } /* Directly report the status of our test harness results. */ - syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE); + syscall(__NR_exit, _metadata->exit_code); } /* Common tracer setup/teardown functions. */ @@ -1623,7 +1623,7 @@ void teardown_trace_fixture(struct __test_metadata *_metadata, ASSERT_EQ(0, kill(tracer, SIGUSR1)); ASSERT_EQ(tracer, waitpid(tracer, &status, 0)); if (WEXITSTATUS(status)) - _metadata->passed = 0; + _metadata->exit_code = KSFT_FAIL; } } @@ -3088,8 +3088,7 @@ TEST(syscall_restart) } /* Directly report the status of our test harness results. */ - syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS - : EXIT_FAILURE); + syscall(__NR_exit, _metadata->exit_code); } EXPECT_EQ(0, close(pipefd[0])); @@ -3174,7 +3173,7 @@ TEST(syscall_restart) ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); if (WIFSIGNALED(status) || WEXITSTATUS(status)) - _metadata->passed = 0; + _metadata->exit_code = KSFT_FAIL; } TEST_SIGNAL(filter_flag_log, SIGSYS) -- cgit v1.2.3 From 796a344fa4315f9c1f2258de14c1d20cbbef79a8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 28 Feb 2024 16:59:13 -0800 Subject: selftests: kselftest_harness: use exit code to store skip We always use skip in combination with exit_code being 0 (KSFT_PASS). This are basic KSFT / KTAP semantics. Store the right KSFT_* code in exit_code directly. This makes it easier to support tests reporting other extended KSFT_* codes like XFAIL / XPASS. Reviewed-by: Kees Cook Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- tools/testing/selftests/kselftest_harness.h | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index d90d4a9039ee..5a48177c8c00 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -128,8 +128,7 @@ fprintf(TH_LOG_STREAM, "# SKIP %s\n", \ _metadata->results->reason); \ } \ - _metadata->exit_code = KSFT_PASS; \ - _metadata->skip = 1; \ + _metadata->exit_code = KSFT_SKIP; \ _metadata->trigger = 0; \ statement; \ } while (0) @@ -387,7 +386,7 @@ if (setjmp(_metadata->env) == 0) { \ fixture_name##_setup(_metadata, &self, variant->data); \ /* Let setup failure terminate early. */ \ - if (!__test_passed(_metadata) || _metadata->skip) \ + if (_metadata->exit_code) \ return; \ _metadata->setup_completed = true; \ /* Use the same _metadata. */ \ @@ -837,7 +836,6 @@ struct __test_metadata { struct __fixture_metadata *fixture; int termsig; int exit_code; - int skip; /* did SKIP get used? */ int trigger; /* extra handler after the evaluation */ int timeout; /* seconds to wait for test timeout */ bool timed_out; /* did this test timeout instead of exiting? */ @@ -944,9 +942,7 @@ void __wait_for_test(struct __test_metadata *t) "# %s: Test terminated by timeout\n", t->name); } else if (WIFEXITED(status)) { if (WEXITSTATUS(status) == KSFT_SKIP) { - /* SKIP */ - t->exit_code = KSFT_PASS; - t->skip = 1; + t->exit_code = WEXITSTATUS(status); } else if (t->termsig != -1) { t->exit_code = KSFT_FAIL; fprintf(TH_LOG_STREAM, @@ -1118,7 +1114,6 @@ void __run_test(struct __fixture_metadata *f, /* reset test struct */ t->exit_code = KSFT_PASS; - t->skip = 0; t->trigger = 0; memset(t->results->reason, 0, sizeof(t->results->reason)); @@ -1138,18 +1133,14 @@ void __run_test(struct __fixture_metadata *f, } else if (t->pid == 0) { setpgrp(); t->fn(t, variant); - if (t->skip) - _exit(KSFT_SKIP); - if (__test_passed(t)) - _exit(KSFT_PASS); - _exit(KSFT_FAIL); + _exit(t->exit_code); } else { __wait_for_test(t); } ksft_print_msg(" %4s %s\n", __test_passed(t) ? "OK" : "FAIL", test_name); - if (t->skip) + if (t->exit_code == KSFT_SKIP) ksft_test_result_skip("%s\n", t->results->reason[0] ? t->results->reason : "unknown"); else -- cgit v1.2.3 From fa1a53d83674b3aa089b8661ee9f884c024f6e95 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 28 Feb 2024 16:59:14 -0800 Subject: selftests: kselftest: add ksft_test_result_code(), handling all exit codes For generic test harness code it's more useful to deal with exit codes directly, rather than having to switch on them and call the right ksft_test_result_*() helper. Add such function to kselftest.h. Note that "directive" and "diagnostic" are what ktap docs call those parts of the message. Reviewed-by: Kees Cook Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- tools/testing/selftests/kselftest.h | 39 +++++++++++++++++++++++++++++ tools/testing/selftests/kselftest_harness.h | 9 +++++-- 2 files changed, 46 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index a781e6311810..12ad7f8dfe3a 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -25,6 +25,7 @@ * ksft_test_result_skip(fmt, ...); * ksft_test_result_xfail(fmt, ...); * ksft_test_result_error(fmt, ...); + * ksft_test_result_code(exit_code, test_name, fmt, ...); * * When all tests are finished, clean up and exit the program with one of: * @@ -254,6 +255,44 @@ static inline __printf(1, 2) void ksft_test_result_error(const char *msg, ...) va_end(args); } +static inline __printf(2, 3) +void ksft_test_result_code(int exit_code, const char *msg, ...) +{ + const char *tap_code = "ok"; + const char *directive = ""; + int saved_errno = errno; + va_list args; + + switch (exit_code) { + case KSFT_PASS: + ksft_cnt.ksft_pass++; + break; + case KSFT_XFAIL: + directive = " # XFAIL "; + ksft_cnt.ksft_xfail++; + break; + case KSFT_XPASS: + directive = " # XPASS "; + ksft_cnt.ksft_xpass++; + break; + case KSFT_SKIP: + directive = " # SKIP "; + ksft_cnt.ksft_xskip++; + break; + case KSFT_FAIL: + default: + tap_code = "not ok"; + ksft_cnt.ksft_fail++; + break; + } + + va_start(args, msg); + printf("%s %u%s", tap_code, ksft_test_num(), directive); + errno = saved_errno; + vprintf(msg, args); + va_end(args); +} + static inline int ksft_exit_pass(void) { ksft_print_cnts(); diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 5a48177c8c00..4fb30fcc7774 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -1111,6 +1111,7 @@ void __run_test(struct __fixture_metadata *f, struct __test_metadata *t) { char test_name[LINE_MAX]; + const char *diagnostic; /* reset test struct */ t->exit_code = KSFT_PASS; @@ -1140,9 +1141,13 @@ void __run_test(struct __fixture_metadata *f, ksft_print_msg(" %4s %s\n", __test_passed(t) ? "OK" : "FAIL", test_name); + if (t->results->reason[0]) + diagnostic = t->results->reason; + else + diagnostic = "unknown"; + if (t->exit_code == KSFT_SKIP) - ksft_test_result_skip("%s\n", t->results->reason[0] ? - t->results->reason : "unknown"); + ksft_test_result_code(t->exit_code, "%s\n", diagnostic); else ksft_test_result(__test_passed(t), "%s\n", test_name); } -- cgit v1.2.3 From 732e2035280bf499a4b002677e2a1845f557d403 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 28 Feb 2024 16:59:15 -0800 Subject: selftests: kselftest_harness: print test name for SKIP Jakub points out that for parsers it's rather useful to always have the test name on the result line. Currently if we SKIP (or soon XFAIL or XPASS), we will print: ok 17 # SKIP SCTP doesn't support IP_BIND_ADDRESS_NO_PORT ^ no test name Always print the test name. KTAP format seems to allow or even call for it, per: https://docs.kernel.org/dev-tools/ktap.html Suggested-by: Jakub Sitnicki Link: https://lore.kernel.org/all/87jzn6lnou.fsf@cloudflare.com/ Reviewed-by: Kees Cook Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- tools/testing/selftests/kselftest.h | 7 ++++--- tools/testing/selftests/kselftest_harness.h | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index 12ad7f8dfe3a..25e29626566e 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -255,8 +255,9 @@ static inline __printf(1, 2) void ksft_test_result_error(const char *msg, ...) va_end(args); } -static inline __printf(2, 3) -void ksft_test_result_code(int exit_code, const char *msg, ...) +static inline __printf(3, 4) +void ksft_test_result_code(int exit_code, const char *test_name, + const char *msg, ...) { const char *tap_code = "ok"; const char *directive = ""; @@ -287,7 +288,7 @@ void ksft_test_result_code(int exit_code, const char *msg, ...) } va_start(args, msg); - printf("%s %u%s", tap_code, ksft_test_num(), directive); + printf("%s %u %s%s", tap_code, ksft_test_num(), test_name, directive); errno = saved_errno; vprintf(msg, args); va_end(args); diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 4fb30fcc7774..82377051aa54 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -1147,7 +1147,8 @@ void __run_test(struct __fixture_metadata *f, diagnostic = "unknown"; if (t->exit_code == KSFT_SKIP) - ksft_test_result_code(t->exit_code, "%s\n", diagnostic); + ksft_test_result_code(t->exit_code, test_name, + "%s\n", diagnostic); else ksft_test_result(__test_passed(t), "%s\n", test_name); } -- cgit v1.2.3 From 42ab727eb95fcff83e7a67fcdbf467bbcd53451e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 28 Feb 2024 16:59:16 -0800 Subject: selftests: kselftest_harness: separate diagnostic message with # in ksft_test_result_code() According to the spec we should always print a # if we add a diagnostic message. Having the caller pass in the new line as part of diagnostic message makes handling this a bit counter-intuitive, so append the new line in the helper. Reviewed-by: Kees Cook Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- tools/testing/selftests/kselftest.h | 5 +++++ tools/testing/selftests/kselftest_harness.h | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index 25e29626566e..541bf192e30e 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -287,10 +287,15 @@ void ksft_test_result_code(int exit_code, const char *test_name, break; } + /* Docs seem to call for double space if directive is absent */ + if (!directive[0] && msg[0]) + directive = " # "; + va_start(args, msg); printf("%s %u %s%s", tap_code, ksft_test_num(), test_name, directive); errno = saved_errno; vprintf(msg, args); + printf("\n"); va_end(args); } diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 82377051aa54..5b0592e4b7a4 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -1148,7 +1148,7 @@ void __run_test(struct __fixture_metadata *f, if (t->exit_code == KSFT_SKIP) ksft_test_result_code(t->exit_code, test_name, - "%s\n", diagnostic); + "%s", diagnostic); else ksft_test_result(__test_passed(t), "%s\n", test_name); } -- cgit v1.2.3 From 378193eff3399acb8529d0b553709e8f91d34fe3 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 28 Feb 2024 16:59:17 -0800 Subject: selftests: kselftest_harness: let PASS / FAIL provide diagnostic Switch to printing KTAP line for PASS / FAIL with ksft_test_result_code(), this gives us the ability to report diagnostic messages. Reviewed-by: Kees Cook Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- tools/testing/selftests/kselftest_harness.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 5b0592e4b7a4..b643a577f9e1 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -1143,14 +1143,13 @@ void __run_test(struct __fixture_metadata *f, if (t->results->reason[0]) diagnostic = t->results->reason; + else if (t->exit_code == KSFT_PASS || t->exit_code == KSFT_FAIL) + diagnostic = NULL; else diagnostic = "unknown"; - if (t->exit_code == KSFT_SKIP) - ksft_test_result_code(t->exit_code, test_name, - "%s", diagnostic); - else - ksft_test_result(__test_passed(t), "%s\n", test_name); + ksft_test_result_code(t->exit_code, test_name, + diagnostic ? "%s" : "", diagnostic); } static int test_harness_run(int argc, char **argv) -- cgit v1.2.3 From 2709473c938602c3955b87000009e9bf3cd9bb32 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 28 Feb 2024 16:59:18 -0800 Subject: selftests: kselftest_harness: support using xfail Currently some tests report skip for things they expect to fail e.g. when given combination of parameters is known to be unsupported. This is confusing because in an ideal test environment and fully featured kernel no tests should be skipped. Selftest summary line already includes xfail and xpass counters, e.g.: Totals: pass:725 fail:0 xfail:0 xpass:0 skip:0 error:0 but there's no way to use it from within the harness. Add a new per-fixture+variant combination list of test cases we expect to fail. Reviewed-by: Kees Cook Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- tools/testing/selftests/kselftest_harness.h | 49 ++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index b643a577f9e1..634be793ad58 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -803,6 +803,37 @@ struct __fixture_metadata { .prev = &_fixture_global, }; +struct __test_xfail { + struct __fixture_metadata *fixture; + struct __fixture_variant_metadata *variant; + struct __test_metadata *test; + struct __test_xfail *prev, *next; +}; + +/** + * XFAIL_ADD() - mark variant + test case combination as expected to fail + * @fixture_name: name of the fixture + * @variant_name: name of the variant + * @test_name: name of the test case + * + * Mark a combination of variant + test case for a given fixture as expected + * to fail. Tests marked this way will report XPASS / XFAIL return codes, + * instead of PASS / FAIL,and use respective counters. + */ +#define XFAIL_ADD(fixture_name, variant_name, test_name) \ + static struct __test_xfail \ + _##fixture_name##_##variant_name##_##test_name##_xfail = \ + { \ + .fixture = &_##fixture_name##_fixture_object, \ + .variant = &_##fixture_name##_##variant_name##_object, \ + .test = &_##fixture_name##_##test_name##_object, \ + }; \ + static void __attribute__((constructor)) \ + _register_##fixture_name##_##variant_name##_##test_name##_xfail(void) \ + { \ + __register_xfail(&_##fixture_name##_##variant_name##_##test_name##_xfail); \ + } + static struct __fixture_metadata *__fixture_list = &_fixture_global; static int __constructor_order; @@ -817,6 +848,7 @@ static inline void __register_fixture(struct __fixture_metadata *f) struct __fixture_variant_metadata { const char *name; const void *data; + struct __test_xfail *xfails; struct __fixture_variant_metadata *prev, *next; }; @@ -866,6 +898,11 @@ static inline void __register_test(struct __test_metadata *t) __LIST_APPEND(t->fixture->tests, t); } +static inline void __register_xfail(struct __test_xfail *xf) +{ + __LIST_APPEND(xf->variant->xfails, xf); +} + static inline int __bail(int for_realz, struct __test_metadata *t) { /* if this is ASSERT, return immediately. */ @@ -941,7 +978,9 @@ void __wait_for_test(struct __test_metadata *t) fprintf(TH_LOG_STREAM, "# %s: Test terminated by timeout\n", t->name); } else if (WIFEXITED(status)) { - if (WEXITSTATUS(status) == KSFT_SKIP) { + if (WEXITSTATUS(status) == KSFT_SKIP || + WEXITSTATUS(status) == KSFT_XPASS || + WEXITSTATUS(status) == KSFT_XFAIL) { t->exit_code = WEXITSTATUS(status); } else if (t->termsig != -1) { t->exit_code = KSFT_FAIL; @@ -1110,6 +1149,7 @@ void __run_test(struct __fixture_metadata *f, struct __fixture_variant_metadata *variant, struct __test_metadata *t) { + struct __test_xfail *xfail; char test_name[LINE_MAX]; const char *diagnostic; @@ -1141,6 +1181,13 @@ void __run_test(struct __fixture_metadata *f, ksft_print_msg(" %4s %s\n", __test_passed(t) ? "OK" : "FAIL", test_name); + /* Check if we're expecting this test to fail */ + for (xfail = variant->xfails; xfail; xfail = xfail->next) + if (xfail->test == t) + break; + if (xfail) + t->exit_code = __test_passed(t) ? KSFT_XPASS : KSFT_XFAIL; + if (t->results->reason[0]) diagnostic = t->results->reason; else if (t->exit_code == KSFT_PASS || t->exit_code == KSFT_FAIL) -- cgit v1.2.3 From c05bf0e933129dbb71d057949d90531b03462538 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 28 Feb 2024 16:59:19 -0800 Subject: selftests: ip_local_port_range: use XFAIL instead of SKIP SCTP does not support IP_LOCAL_PORT_RANGE and we know it, so use XFAIL instead of SKIP. Reviewed-by: Kees Cook Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- tools/testing/selftests/net/ip_local_port_range.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/ip_local_port_range.c b/tools/testing/selftests/net/ip_local_port_range.c index 6ebd58869a63..193b82745fd8 100644 --- a/tools/testing/selftests/net/ip_local_port_range.c +++ b/tools/testing/selftests/net/ip_local_port_range.c @@ -365,9 +365,6 @@ TEST_F(ip_local_port_range, late_bind) __u32 range; __u16 port; - if (variant->so_protocol == IPPROTO_SCTP) - SKIP(return, "SCTP doesn't support IP_BIND_ADDRESS_NO_PORT"); - fd = socket(variant->so_domain, variant->so_type, 0); ASSERT_GE(fd, 0) TH_LOG("socket failed"); @@ -414,6 +411,9 @@ TEST_F(ip_local_port_range, late_bind) ASSERT_TRUE(!err) TH_LOG("close failed"); } +XFAIL_ADD(ip_local_port_range, ip4_stcp, late_bind); +XFAIL_ADD(ip_local_port_range, ip6_stcp, late_bind); + TEST_F(ip_local_port_range, get_port_range) { __u16 lo, hi; -- cgit v1.2.3 From dfb429ea4f2db6a946ada94cce8955e87906e6b5 Mon Sep 17 00:00:00 2001 From: David Wei Date: Wed, 28 Feb 2024 15:22:52 -0800 Subject: netdevsim: add selftest for forwarding skb between connected ports Connect two netdevsim ports in different namespaces together, then send packets between them using socat. Signed-off-by: David Wei Reviewed-by: Maciek Machnikowski Signed-off-by: David S. Miller --- .../selftests/drivers/net/netdevsim/Makefile | 1 + .../selftests/drivers/net/netdevsim/peer.sh | 143 +++++++++++++++++++++ 2 files changed, 144 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/netdevsim/peer.sh (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/netdevsim/Makefile b/tools/testing/selftests/drivers/net/netdevsim/Makefile index 7a29a05bea8b..5bace0b7fb57 100644 --- a/tools/testing/selftests/drivers/net/netdevsim/Makefile +++ b/tools/testing/selftests/drivers/net/netdevsim/Makefile @@ -10,6 +10,7 @@ TEST_PROGS = devlink.sh \ fib.sh \ hw_stats_l3.sh \ nexthop.sh \ + peer.sh \ psample.sh \ tc-mq-visibility.sh \ udp_tunnel_nic.sh \ diff --git a/tools/testing/selftests/drivers/net/netdevsim/peer.sh b/tools/testing/selftests/drivers/net/netdevsim/peer.sh new file mode 100755 index 000000000000..aed62d9e6c0a --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/peer.sh @@ -0,0 +1,143 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only + +source ../../../net/net_helper.sh + +NSIM_DEV_1_ID=$((256 + RANDOM % 256)) +NSIM_DEV_1_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_1_ID +NSIM_DEV_2_ID=$((512 + RANDOM % 256)) +NSIM_DEV_2_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_2_ID + +NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device +NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device +NSIM_DEV_SYS_LINK=/sys/bus/netdevsim/link_device +NSIM_DEV_SYS_UNLINK=/sys/bus/netdevsim/unlink_device + +socat_check() +{ + if [ ! -x "$(command -v socat)" ]; then + echo "socat command not found. Skipping test" + return 1 + fi + + return 0 +} + +setup_ns() +{ + set -e + ip netns add nssv + ip netns add nscl + + NSIM_DEV_1_NAME=$(find $NSIM_DEV_1_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_DEV_1_SYS/net -exec basename {} \;) + NSIM_DEV_2_NAME=$(find $NSIM_DEV_2_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_DEV_2_SYS/net -exec basename {} \;) + + ip link set $NSIM_DEV_1_NAME netns nssv + ip link set $NSIM_DEV_2_NAME netns nscl + + ip netns exec nssv ip addr add '192.168.1.1/24' dev $NSIM_DEV_1_NAME + ip netns exec nscl ip addr add '192.168.1.2/24' dev $NSIM_DEV_2_NAME + + ip netns exec nssv ip link set dev $NSIM_DEV_1_NAME up + ip netns exec nscl ip link set dev $NSIM_DEV_2_NAME up + set +e +} + +cleanup_ns() +{ + ip netns del nscl + ip netns del nssv +} + +### +### Code start +### + +socat_check || exit 4 + +modprobe netdevsim + +# linking + +echo $NSIM_DEV_1_ID > $NSIM_DEV_SYS_NEW +echo $NSIM_DEV_2_ID > $NSIM_DEV_SYS_NEW +udevadm settle + +setup_ns + +NSIM_DEV_1_FD=$((256 + RANDOM % 256)) +exec {NSIM_DEV_1_FD} $NSIM_DEV_SYS_LINK 2>/dev/null +if [ $? -eq 0 ]; then + echo "linking with non-existent netdevsim should fail" + cleanup_ns + exit 1 +fi + +echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX 2000:$NSIM_DEV_2_IFIDX" > $NSIM_DEV_SYS_LINK 2>/dev/null +if [ $? -eq 0 ]; then + echo "linking with non-existent netnsid should fail" + cleanup_ns + exit 1 +fi + +echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX" > $NSIM_DEV_SYS_LINK 2>/dev/null +if [ $? -eq 0 ]; then + echo "linking with self should fail" + cleanup_ns + exit 1 +fi + +echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:$NSIM_DEV_2_IFIDX" > $NSIM_DEV_SYS_LINK +if [ $? -ne 0 ]; then + echo "linking netdevsim1 with netdevsim2 should succeed" + cleanup_ns + exit 1 +fi + +# argument error checking + +echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:a" > $NSIM_DEV_SYS_LINK 2>/dev/null +if [ $? -eq 0 ]; then + echo "invalid arg should fail" + cleanup_ns + exit 1 +fi + +# send/recv packets + +tmp_file=$(mktemp) +ip netns exec nssv socat TCP-LISTEN:1234,fork $tmp_file & +pid=$! +res=0 + +wait_local_port_listen nssv 1234 tcp + +echo "HI" | ip netns exec nscl socat STDIN TCP:192.168.1.1:1234 + +count=$(cat $tmp_file | wc -c) +if [[ $count -ne 3 ]]; then + echo "expected 3 bytes, got $count" + res=1 +fi + +echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX" > $NSIM_DEV_SYS_UNLINK + +echo $NSIM_DEV_2_ID > $NSIM_DEV_SYS_DEL + +kill $pid +echo $NSIM_DEV_1_ID > $NSIM_DEV_SYS_DEL + +cleanup_ns + +modprobe -r netdevsim + +exit $res -- cgit v1.2.3 From 8ee60f9c41fb01440e8a8f97127869c9b1978362 Mon Sep 17 00:00:00 2001 From: David Wei Date: Wed, 28 Feb 2024 15:22:53 -0800 Subject: netdevsim: fix rtnetlink.sh selftest I cleared IFF_NOARP flag from netdevsim dev->flags in order to support skb forwarding. This breaks the rtnetlink.sh selftest kci_test_ipsec_offload() test because ipsec does not connect to peers it cannot transmit to. Fix the issue by adding a neigh entry manually. ipsec_offload test now successfully pass. Signed-off-by: David Wei Signed-off-by: David S. Miller --- tools/testing/selftests/net/rtnetlink.sh | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 874a2952aa8e..bdf6f10d0558 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -801,6 +801,8 @@ kci_test_ipsec_offload() end_test "FAIL: ipsec_offload SA offload missing from list output" fi + # we didn't create a peer, make sure we can Tx + ip neigh add $dstip dev $dev lladdr 00:11:22:33:44:55 # use ping to exercise the Tx path ping -I $dev -c 3 -W 1 -i 0 $dstip >/dev/null -- cgit v1.2.3 From 9963b77e25c63b3f1fee306a8e83e9e0b45ebad0 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 1 Mar 2024 19:18:31 +0100 Subject: selftests: mptcp: add userspace pm subflow flag This patch adds the address flag MPTCP_PM_ADDR_FLAG_SUBFLOW in csf() in pm_nl_ctl.c when subflow is created by a userspace PM. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/pm_nl_ctl.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index 49369c4a5f26..e97856323ec3 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -453,6 +453,7 @@ int csf(int fd, int pm_family, int argc, char *argv[]) char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1024]; + u_int32_t flags = MPTCP_PM_ADDR_FLAG_SUBFLOW; const char *params[5]; struct nlmsghdr *nh; struct rtattr *addr; @@ -558,6 +559,13 @@ int csf(int fd, int pm_family, int argc, char *argv[]) off += NLMSG_ALIGN(rta->rta_len); } + /* addr flags */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &flags, 4); + off += NLMSG_ALIGN(rta->rta_len); + addr->rta_len = off - addr_start; } -- cgit v1.2.3 From 950c332125f66d7419a24fc933f05606ae199b40 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 1 Mar 2024 19:18:32 +0100 Subject: selftests: mptcp: add token for dump_addr The command dump_addr() of pm_nl_ctl can be used like this in in-kernel PM: pm_nl_ctl dump This patch adds token argument for it to support userspace PM: pm_nl_ctl dump token $token If 'token $token' is passed to dump_addr(), copy it into the kernel netlink. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/pm_nl_ctl.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index e97856323ec3..8d7d1b4ed28e 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -1127,8 +1127,16 @@ int dump_addrs(int fd, int pm_family, int argc, char *argv[]) 1024]; pid_t pid = getpid(); struct nlmsghdr *nh; + u_int32_t token = 0; + struct rtattr *rta; int off = 0; + if (argc != 2 && argc != 4) + syntax(argv); + + if (argc == 4 && !strcmp(argv[2], "token")) + token = strtoul(argv[3], NULL, 10); + memset(data, 0, sizeof(data)); nh = (void *)data; off = init_genl_req(data, pm_family, MPTCP_PM_CMD_GET_ADDR, @@ -1138,6 +1146,15 @@ int dump_addrs(int fd, int pm_family, int argc, char *argv[]) nh->nlmsg_pid = pid; nh->nlmsg_len = off; + /* token */ + if (token) { + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ATTR_TOKEN; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &token, 4); + off += NLMSG_ALIGN(rta->rta_len); + } + print_addrs(nh, pm_family, do_nl_req(fd, nh, off, sizeof(data))); return 0; } -- cgit v1.2.3 From 2d0c1d27ea4eced07bd841d1cfccb838b437dfae Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 1 Mar 2024 19:18:33 +0100 Subject: selftests: mptcp: add mptcp_lib_check_output helper Extract the main part of check() in pm_netlink.sh into a new helper named mptcp_lib_check_output in mptcp_lib.sh. This helper will be used for userspace dump addresses tests. Co-developed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/mptcp_lib.sh | 23 +++++++++++++++++++++++ tools/testing/selftests/net/mptcp/pm_netlink.sh | 18 +++++++----------- 2 files changed, 30 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 108a1e12436c..438f557aac90 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -319,3 +319,26 @@ mptcp_lib_wait_local_port_listen() { sleep 0.1 done } + +mptcp_lib_check_output() { + local err="${1}" + local cmd="${2}" + local expected="${3}" + local cmd_ret=0 + local out + + if ! out=$(${cmd} 2>"${err}"); then + cmd_ret=${?} + fi + + if [ ${cmd_ret} -ne 0 ]; then + mptcp_lib_print_err "[FAIL] command execution '${cmd}' stderr" + cat "${err}" + return 2 + elif [ "${out}" = "${expected}" ]; then + return 0 + else + mptcp_lib_print_err "[FAIL] expected '${expected}' got '${out}'" + return 1 + fi +} diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index ebfefae71e13..705106d60db5 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -54,21 +54,17 @@ check() local cmd="$1" local expected="$2" local msg="$3" - local out=`$cmd 2>$err` - local cmd_ret=$? + local rc=0 printf "%-50s" "$msg" - if [ $cmd_ret -ne 0 ]; then - echo "[FAIL] command execution '$cmd' stderr " - cat $err - mptcp_lib_result_fail "${msg} # error ${cmd_ret}" + mptcp_lib_check_output "${err}" "${cmd}" "${expected}" || rc=${?} + if [ ${rc} -eq 2 ]; then + mptcp_lib_result_fail "${msg} # error ${rc}" ret=1 - elif [ "$out" = "$expected" ]; then - echo "[ OK ]" + elif [ ${rc} -eq 0 ]; then + mptcp_lib_print_ok "[ OK ]" mptcp_lib_result_pass "${msg}" - else - echo -n "[FAIL] " - echo "expected '$expected' got '$out'" + elif [ ${rc} -eq 1 ]; then mptcp_lib_result_fail "${msg} # different output" ret=1 fi -- cgit v1.2.3 From 38f027fca1b724c6814fff4b8ad16b59c14a3e2a Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 1 Mar 2024 19:18:34 +0100 Subject: selftests: mptcp: dump userspace addrs list This patch adds a new helper userspace_pm_dump() to dump addresses for the userspace PM. Use this helper to check whether an ID 0 subflow is listed in the output of dump command after creating an ID 0 subflow in "userspace pm create id 0 subflow" test. Dump userspace PM addresses list in "userspace pm add & remove address" test and in "userspace pm create destroy subflow" test. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 61 +++++++++++++++++++++++++ 1 file changed, 61 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 1267d5708e13..8b6430642706 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -21,6 +21,7 @@ cinfail="" cinsent="" tmpfile="" cout="" +err="" capout="" ns1="" ns2="" @@ -189,6 +190,7 @@ init() { cin=$(mktemp) cinsent=$(mktemp) cout=$(mktemp) + err=$(mktemp) evts_ns1=$(mktemp) evts_ns2=$(mktemp) @@ -204,6 +206,7 @@ cleanup() rm -f "$sin" "$sout" "$cinsent" "$cinfail" rm -f "$tmpfile" rm -rf $evts_ns1 $evts_ns2 + rm -f "$err" cleanup_partial } @@ -3356,6 +3359,50 @@ userspace_pm_rm_sf() wait_rm_sf $1 "${cnt}" } +check_output() +{ + local cmd="$1" + local expected="$2" + local msg="$3" + local rc=0 + + mptcp_lib_check_output "${err}" "${cmd}" "${expected}" || rc=${?} + if [ ${rc} -eq 2 ]; then + fail_test "fail to check output # error ${rc}" + elif [ ${rc} -eq 0 ]; then + print_ok + elif [ ${rc} -eq 1 ]; then + fail_test "fail to check output # different output" + fi +} + +# $1: ns +userspace_pm_dump() +{ + local evts=$evts_ns1 + local tk + + [ "$1" == "$ns2" ] && evts=$evts_ns2 + tk=$(mptcp_lib_evts_get_info token "$evts") + + ip netns exec $1 ./pm_nl_ctl dump token $tk +} + +userspace_pm_chk_dump_addr() +{ + local ns="${1}" + local exp="${2}" + local check="${3}" + + print_check "dump addrs ${check}" + + if mptcp_lib_kallsyms_has "mptcp_userspace_pm_dump_addr$"; then + check_output "userspace_pm_dump ${ns}" "${exp}" + else + print_skip + fi +} + userspace_tests() { # userspace pm type prevents add_addr @@ -3447,10 +3494,16 @@ userspace_tests() chk_mptcp_info subflows 2 subflows 2 chk_subflows_total 3 3 chk_mptcp_info add_addr_signal 2 add_addr_accepted 2 + userspace_pm_chk_dump_addr "${ns1}" \ + $'id 10 flags signal 10.0.2.1\nid 20 flags signal 10.0.3.1' \ + "signal" userspace_pm_rm_addr $ns1 10 userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $SUB_ESTABLISHED + userspace_pm_chk_dump_addr "${ns1}" \ + "id 20 flags signal 10.0.3.1" "after rm_addr 10" userspace_pm_rm_addr $ns1 20 userspace_pm_rm_sf $ns1 10.0.3.1 $SUB_ESTABLISHED + userspace_pm_chk_dump_addr "${ns1}" "" "after rm_addr 20" chk_rm_nr 2 2 invert chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 @@ -3471,8 +3524,14 @@ userspace_tests() chk_join_nr 1 1 1 chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 2 2 + userspace_pm_chk_dump_addr "${ns2}" \ + "id 20 flags subflow 10.0.3.2" \ + "subflow" userspace_pm_rm_addr $ns2 20 userspace_pm_rm_sf $ns2 10.0.3.2 $SUB_ESTABLISHED + userspace_pm_chk_dump_addr "${ns2}" \ + "" \ + "after rm_addr 20" chk_rm_nr 1 1 chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 @@ -3492,6 +3551,8 @@ userspace_tests() chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 userspace_pm_add_sf $ns2 10.0.3.2 0 + userspace_pm_chk_dump_addr "${ns2}" \ + "id 0 flags subflow 10.0.3.2" "id 0 subflow" chk_join_nr 1 1 1 chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 2 2 -- cgit v1.2.3 From b055671b39363ada7270b815448042aade4d60eb Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 1 Mar 2024 19:18:38 +0100 Subject: selftests: mptcp: add token for get_addr The command get_addr() of pm_nl_ctl can be used like this in in-kernel PM: pm_nl_ctl get $id This patch adds token argument for it to support userspace PM: pm_nl_ctl get $id token $token If 'token $token' is passed to get_addr(), copy it into the kernel netlink. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/pm_nl_ctl.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index 8d7d1b4ed28e..7426a2cbd4a0 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -1087,6 +1087,7 @@ int get_addr(int fd, int pm_family, int argc, char *argv[]) 1024]; struct rtattr *rta, *nest; struct nlmsghdr *nh; + u_int32_t token = 0; int nest_start; u_int8_t id; int off = 0; @@ -1097,10 +1098,12 @@ int get_addr(int fd, int pm_family, int argc, char *argv[]) MPTCP_PM_VER); /* the only argument is the address id */ - if (argc != 3) + if (argc != 3 && argc != 5) syntax(argv); id = atoi(argv[2]); + if (argc == 5 && !strcmp(argv[3], "token")) + token = strtoul(argv[4], NULL, 10); nest_start = off; nest = (void *)(data + off); @@ -1116,6 +1119,15 @@ int get_addr(int fd, int pm_family, int argc, char *argv[]) off += NLMSG_ALIGN(rta->rta_len); nest->rta_len = off - nest_start; + /* token */ + if (token) { + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ATTR_TOKEN; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &token, 4); + off += NLMSG_ALIGN(rta->rta_len); + } + print_addrs(nh, pm_family, do_nl_req(fd, nh, off, sizeof(data))); return 0; } -- cgit v1.2.3 From 4cc5cc7ca052c816e20ed0cbc160299b454cbb75 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 1 Mar 2024 19:18:39 +0100 Subject: selftests: mptcp: userspace pm get addr tests This patch adds a new helper userspace_pm_get_addr() in mptcp_join.sh. In it, parse the token value from the output of 'pm_nl_ctl events', then pass it to pm_nl_ctl get_addr command. Use this helper in userspace pm dump tests. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 30 +++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 8b6430642706..955ee651dcd5 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3388,6 +3388,18 @@ userspace_pm_dump() ip netns exec $1 ./pm_nl_ctl dump token $tk } +# $1: ns ; $2: id +userspace_pm_get_addr() +{ + local evts=$evts_ns1 + local tk + + [ "$1" == "$ns2" ] && evts=$evts_ns2 + tk=$(mptcp_lib_evts_get_info token "$evts") + + ip netns exec $1 ./pm_nl_ctl get $2 token $tk +} + userspace_pm_chk_dump_addr() { local ns="${1}" @@ -3403,6 +3415,21 @@ userspace_pm_chk_dump_addr() fi } +userspace_pm_chk_get_addr() +{ + local ns="${1}" + local id="${2}" + local exp="${3}" + + print_check "get id ${id} addr" + + if mptcp_lib_kallsyms_has "mptcp_userspace_pm_get_addr$"; then + check_output "userspace_pm_get_addr ${ns} ${id}" "${exp}" + else + print_skip + fi +} + userspace_tests() { # userspace pm type prevents add_addr @@ -3497,6 +3524,8 @@ userspace_tests() userspace_pm_chk_dump_addr "${ns1}" \ $'id 10 flags signal 10.0.2.1\nid 20 flags signal 10.0.3.1' \ "signal" + userspace_pm_chk_get_addr "${ns1}" "10" "id 10 flags signal 10.0.2.1" + userspace_pm_chk_get_addr "${ns1}" "20" "id 20 flags signal 10.0.3.1" userspace_pm_rm_addr $ns1 10 userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $SUB_ESTABLISHED userspace_pm_chk_dump_addr "${ns1}" \ @@ -3527,6 +3556,7 @@ userspace_tests() userspace_pm_chk_dump_addr "${ns2}" \ "id 20 flags subflow 10.0.3.2" \ "subflow" + userspace_pm_chk_get_addr "${ns2}" "20" "id 20 flags subflow 10.0.3.2" userspace_pm_rm_addr $ns2 20 userspace_pm_rm_sf $ns2 10.0.3.2 $SUB_ESTABLISHED userspace_pm_chk_dump_addr "${ns2}" \ -- cgit v1.2.3 From 25703adf45f8430ec59effa20920c80139d13cdc Mon Sep 17 00:00:00 2001 From: Chen Shen Date: Sat, 2 Mar 2024 14:22:18 +0800 Subject: libbpf: Correct debug message in btf__load_vmlinux_btf In the function btf__load_vmlinux_btf, the debug message incorrectly refers to 'path' instead of 'sysfs_btf_path'. Signed-off-by: Chen Shen Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20240302062218.3587-1-peterchenshen@gmail.com --- tools/lib/bpf/btf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index a17b4c9c4213..2d0840ef599a 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -4968,7 +4968,7 @@ struct btf *btf__load_vmlinux_btf(void) pr_warn("failed to read kernel BTF from '%s': %d\n", sysfs_btf_path, err); return libbpf_err_ptr(err); } - pr_debug("loaded kernel BTF from '%s'\n", path); + pr_debug("loaded kernel BTF from '%s'\n", sysfs_btf_path); return btf; } -- cgit v1.2.3 From 8f79870ec8a9409983ad5981e1b7d599cbf047bd Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 1 Mar 2024 13:45:51 -0800 Subject: selftests/bpf: Extend uprobe/uretprobe triggering benchmarks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Settle on three "flavors" of uprobe/uretprobe, installed on different kinds of instruction: nop, push, and ret. All three are testing different internal code paths emulating or single-stepping instructions, so are interesting to compare and benchmark separately. To ensure `push rbp` instruction we ensure that uprobe_target_push() is not a leaf function by calling (global __weak) noop function and returning something afterwards (if we don't do that, compiler will just do a tail call optimization). Also, we need to make sure that compiler isn't skipping frame pointer generation, so let's add `-fno-omit-frame-pointers` to Makefile. Just to give an idea of where we currently stand in terms of relative performance of different uprobe/uretprobe cases vs a cheap syscall (getpgid()) baseline, here are results from my local machine: $ benchs/run_bench_uprobes.sh base : 1.561 ± 0.020M/s uprobe-nop : 0.947 ± 0.007M/s uprobe-push : 0.951 ± 0.004M/s uprobe-ret : 0.443 ± 0.007M/s uretprobe-nop : 0.471 ± 0.013M/s uretprobe-push : 0.483 ± 0.004M/s uretprobe-ret : 0.306 ± 0.007M/s Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20240301214551.1686095-1-andrii@kernel.org --- tools/testing/selftests/bpf/Makefile | 2 +- tools/testing/selftests/bpf/bench.c | 20 ++-- tools/testing/selftests/bpf/benchs/bench_trigger.c | 118 ++++++++++++++------- .../selftests/bpf/benchs/run_bench_uprobes.sh | 9 ++ 4 files changed, 103 insertions(+), 46 deletions(-) create mode 100755 tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 84cb5500e8ef..3b9eb40d6343 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -34,7 +34,7 @@ LIBELF_CFLAGS := $(shell $(PKG_CONFIG) libelf --cflags 2>/dev/null) LIBELF_LIBS := $(shell $(PKG_CONFIG) libelf --libs 2>/dev/null || echo -lelf) CFLAGS += -g $(OPT_FLAGS) -rdynamic \ - -Wall -Werror \ + -Wall -Werror -fno-omit-frame-pointer \ $(GENFLAGS) $(SAN_CFLAGS) $(LIBELF_CFLAGS) \ -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \ -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT) diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index 1724d50ba942..60df99b6ac72 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -499,10 +499,12 @@ extern const struct bench bench_trig_fentry; extern const struct bench bench_trig_fentry_sleep; extern const struct bench bench_trig_fmodret; extern const struct bench bench_trig_uprobe_base; -extern const struct bench bench_trig_uprobe_with_nop; -extern const struct bench bench_trig_uretprobe_with_nop; -extern const struct bench bench_trig_uprobe_without_nop; -extern const struct bench bench_trig_uretprobe_without_nop; +extern const struct bench bench_trig_uprobe_nop; +extern const struct bench bench_trig_uretprobe_nop; +extern const struct bench bench_trig_uprobe_push; +extern const struct bench bench_trig_uretprobe_push; +extern const struct bench bench_trig_uprobe_ret; +extern const struct bench bench_trig_uretprobe_ret; extern const struct bench bench_rb_libbpf; extern const struct bench bench_rb_custom; extern const struct bench bench_pb_libbpf; @@ -541,10 +543,12 @@ static const struct bench *benchs[] = { &bench_trig_fentry_sleep, &bench_trig_fmodret, &bench_trig_uprobe_base, - &bench_trig_uprobe_with_nop, - &bench_trig_uretprobe_with_nop, - &bench_trig_uprobe_without_nop, - &bench_trig_uretprobe_without_nop, + &bench_trig_uprobe_nop, + &bench_trig_uretprobe_nop, + &bench_trig_uprobe_push, + &bench_trig_uretprobe_push, + &bench_trig_uprobe_ret, + &bench_trig_uretprobe_ret, &bench_rb_libbpf, &bench_rb_custom, &bench_pb_libbpf, diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c index dbd362771d6a..064a1ef7a6fb 100644 --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c @@ -113,12 +113,25 @@ static void trigger_fmodret_setup(void) * GCC doesn't generate stack setup preample for these functions due to them * having no input arguments and doing nothing in the body. */ -__weak void uprobe_target_with_nop(void) +__weak void uprobe_target_nop(void) { asm volatile ("nop"); } -__weak void uprobe_target_without_nop(void) +__weak void opaque_noop_func(void) +{ +} + +__weak int uprobe_target_push(void) +{ + /* overhead of function call is negligible compared to uprobe + * triggering, so this shouldn't affect benchmark results much + */ + opaque_noop_func(); + return 1; +} + +__weak void uprobe_target_ret(void) { asm volatile (""); } @@ -126,27 +139,34 @@ __weak void uprobe_target_without_nop(void) static void *uprobe_base_producer(void *input) { while (true) { - uprobe_target_with_nop(); + uprobe_target_nop(); atomic_inc(&base_hits.value); } return NULL; } -static void *uprobe_producer_with_nop(void *input) +static void *uprobe_producer_nop(void *input) +{ + while (true) + uprobe_target_nop(); + return NULL; +} + +static void *uprobe_producer_push(void *input) { while (true) - uprobe_target_with_nop(); + uprobe_target_push(); return NULL; } -static void *uprobe_producer_without_nop(void *input) +static void *uprobe_producer_ret(void *input) { while (true) - uprobe_target_without_nop(); + uprobe_target_ret(); return NULL; } -static void usetup(bool use_retprobe, bool use_nop) +static void usetup(bool use_retprobe, void *target_addr) { size_t uprobe_offset; struct bpf_link *link; @@ -159,11 +179,7 @@ static void usetup(bool use_retprobe, bool use_nop) exit(1); } - if (use_nop) - uprobe_offset = get_uprobe_offset(&uprobe_target_with_nop); - else - uprobe_offset = get_uprobe_offset(&uprobe_target_without_nop); - + uprobe_offset = get_uprobe_offset(target_addr); link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe, use_retprobe, -1 /* all PIDs */, @@ -176,24 +192,34 @@ static void usetup(bool use_retprobe, bool use_nop) ctx.skel->links.bench_trigger_uprobe = link; } -static void uprobe_setup_with_nop(void) +static void uprobe_setup_nop(void) +{ + usetup(false, &uprobe_target_nop); +} + +static void uretprobe_setup_nop(void) +{ + usetup(true, &uprobe_target_nop); +} + +static void uprobe_setup_push(void) { - usetup(false, true); + usetup(false, &uprobe_target_push); } -static void uretprobe_setup_with_nop(void) +static void uretprobe_setup_push(void) { - usetup(true, true); + usetup(true, &uprobe_target_push); } -static void uprobe_setup_without_nop(void) +static void uprobe_setup_ret(void) { - usetup(false, false); + usetup(false, &uprobe_target_ret); } -static void uretprobe_setup_without_nop(void) +static void uretprobe_setup_ret(void) { - usetup(true, false); + usetup(true, &uprobe_target_ret); } const struct bench bench_trig_base = { @@ -274,37 +300,55 @@ const struct bench bench_trig_uprobe_base = { .report_final = hits_drops_report_final, }; -const struct bench bench_trig_uprobe_with_nop = { - .name = "trig-uprobe-with-nop", - .setup = uprobe_setup_with_nop, - .producer_thread = uprobe_producer_with_nop, +const struct bench bench_trig_uprobe_nop = { + .name = "trig-uprobe-nop", + .setup = uprobe_setup_nop, + .producer_thread = uprobe_producer_nop, + .measure = trigger_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_trig_uretprobe_nop = { + .name = "trig-uretprobe-nop", + .setup = uretprobe_setup_nop, + .producer_thread = uprobe_producer_nop, + .measure = trigger_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_trig_uprobe_push = { + .name = "trig-uprobe-push", + .setup = uprobe_setup_push, + .producer_thread = uprobe_producer_push, .measure = trigger_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, }; -const struct bench bench_trig_uretprobe_with_nop = { - .name = "trig-uretprobe-with-nop", - .setup = uretprobe_setup_with_nop, - .producer_thread = uprobe_producer_with_nop, +const struct bench bench_trig_uretprobe_push = { + .name = "trig-uretprobe-push", + .setup = uretprobe_setup_push, + .producer_thread = uprobe_producer_push, .measure = trigger_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, }; -const struct bench bench_trig_uprobe_without_nop = { - .name = "trig-uprobe-without-nop", - .setup = uprobe_setup_without_nop, - .producer_thread = uprobe_producer_without_nop, +const struct bench bench_trig_uprobe_ret = { + .name = "trig-uprobe-ret", + .setup = uprobe_setup_ret, + .producer_thread = uprobe_producer_ret, .measure = trigger_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, }; -const struct bench bench_trig_uretprobe_without_nop = { - .name = "trig-uretprobe-without-nop", - .setup = uretprobe_setup_without_nop, - .producer_thread = uprobe_producer_without_nop, +const struct bench bench_trig_uretprobe_ret = { + .name = "trig-uretprobe-ret", + .setup = uretprobe_setup_ret, + .producer_thread = uprobe_producer_ret, .measure = trigger_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, diff --git a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh new file mode 100755 index 000000000000..9bdcc74e03a4 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +set -eufo pipefail + +for i in base {uprobe,uretprobe}-{nop,push,ret} +do + summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-) + printf "%-15s: %s\n" $i "$summary" +done -- cgit v1.2.3 From 01031fd473059bf69bb6edc6d51d4bd58ad92e50 Mon Sep 17 00:00:00 2001 From: Song Yoong Siang Date: Sun, 3 Mar 2024 16:32:24 +0800 Subject: selftests/bpf: xdp_hw_metadata reduce sleep interval In current ping-pong design, xdp_hw_metadata will wait until the packet transmission completely done, then only start to receive the next packet. The current sleep interval is 10ms, which is unnecessary large. Typically, a NIC does not need such a long time to transmit a packet. Furthermore, during this 10ms sleep time, the app is unable to receive incoming packets. Therefore, this commit reduce sleep interval to 10us, so that xdp_hw_metadata is able to support periodic packets with shorter interval. 10us * 500 = 5ms should be enough for packet transmission and status retrieval. Signed-off-by: Song Yoong Siang Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/bpf/20240303083225.1184165-2-yoong.siang.song@intel.com --- tools/testing/selftests/bpf/xdp_hw_metadata.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c index 878d68db0325..bdf5d8180067 100644 --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c @@ -480,7 +480,7 @@ peek: for (int j = 0; j < 500; j++) { if (complete_tx(xsk, clock_id)) break; - usleep(10*1000); + usleep(10); } } } -- cgit v1.2.3 From 93bc28d859e57f1a654d3b63600d14c85c5630a4 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Sat, 24 Feb 2024 14:34:18 -0800 Subject: selftests/bpf: Test struct_ops maps with a large number of struct_ops program. Create and load a struct_ops map with a large number of struct_ops programs to generate trampolines taking a size over multiple pages. The map includes 40 programs. Their trampolines takes 6.6k+, more than 1.5 pages, on x86. Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20240224223418.526631-4-thinker.li@gmail.com Signed-off-by: Martin KaFai Lau --- .../selftests/bpf/bpf_testmod/bpf_testmod.h | 44 +++++++++ .../bpf/prog_tests/test_struct_ops_multi_pages.c | 30 ++++++ .../selftests/bpf/progs/struct_ops_multi_pages.c | 102 +++++++++++++++++++++ 3 files changed, 176 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/test_struct_ops_multi_pages.c create mode 100644 tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h index 971458acfac3..622d24e29d35 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h @@ -43,6 +43,50 @@ struct bpf_testmod_ops { int b; } unsupported; int data; + + /* The following pointers are used to test the maps having multiple + * pages of trampolines. + */ + int (*tramp_1)(int value); + int (*tramp_2)(int value); + int (*tramp_3)(int value); + int (*tramp_4)(int value); + int (*tramp_5)(int value); + int (*tramp_6)(int value); + int (*tramp_7)(int value); + int (*tramp_8)(int value); + int (*tramp_9)(int value); + int (*tramp_10)(int value); + int (*tramp_11)(int value); + int (*tramp_12)(int value); + int (*tramp_13)(int value); + int (*tramp_14)(int value); + int (*tramp_15)(int value); + int (*tramp_16)(int value); + int (*tramp_17)(int value); + int (*tramp_18)(int value); + int (*tramp_19)(int value); + int (*tramp_20)(int value); + int (*tramp_21)(int value); + int (*tramp_22)(int value); + int (*tramp_23)(int value); + int (*tramp_24)(int value); + int (*tramp_25)(int value); + int (*tramp_26)(int value); + int (*tramp_27)(int value); + int (*tramp_28)(int value); + int (*tramp_29)(int value); + int (*tramp_30)(int value); + int (*tramp_31)(int value); + int (*tramp_32)(int value); + int (*tramp_33)(int value); + int (*tramp_34)(int value); + int (*tramp_35)(int value); + int (*tramp_36)(int value); + int (*tramp_37)(int value); + int (*tramp_38)(int value); + int (*tramp_39)(int value); + int (*tramp_40)(int value); }; #endif /* _BPF_TESTMOD_H */ diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_multi_pages.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_multi_pages.c new file mode 100644 index 000000000000..645d32b5160c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_multi_pages.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include + +#include "struct_ops_multi_pages.skel.h" + +static void do_struct_ops_multi_pages(void) +{ + struct struct_ops_multi_pages *skel; + struct bpf_link *link; + + /* The size of all trampolines of skel->maps.multi_pages should be + * over 1 page (at least for x86). + */ + skel = struct_ops_multi_pages__open_and_load(); + if (!ASSERT_OK_PTR(skel, "struct_ops_multi_pages_open_and_load")) + return; + + link = bpf_map__attach_struct_ops(skel->maps.multi_pages); + ASSERT_OK_PTR(link, "attach_multi_pages"); + + bpf_link__destroy(link); + struct_ops_multi_pages__destroy(skel); +} + +void test_struct_ops_multi_pages(void) +{ + if (test__start_subtest("multi_pages")) + do_struct_ops_multi_pages(); +} diff --git a/tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c b/tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c new file mode 100644 index 000000000000..9efcc6e4d356 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include +#include +#include +#include "../bpf_testmod/bpf_testmod.h" + +char _license[] SEC("license") = "GPL"; + +#define TRAMP(x) \ + SEC("struct_ops/tramp_" #x) \ + int BPF_PROG(tramp_ ## x, int a) \ + { \ + return a; \ + } + +TRAMP(1) +TRAMP(2) +TRAMP(3) +TRAMP(4) +TRAMP(5) +TRAMP(6) +TRAMP(7) +TRAMP(8) +TRAMP(9) +TRAMP(10) +TRAMP(11) +TRAMP(12) +TRAMP(13) +TRAMP(14) +TRAMP(15) +TRAMP(16) +TRAMP(17) +TRAMP(18) +TRAMP(19) +TRAMP(20) +TRAMP(21) +TRAMP(22) +TRAMP(23) +TRAMP(24) +TRAMP(25) +TRAMP(26) +TRAMP(27) +TRAMP(28) +TRAMP(29) +TRAMP(30) +TRAMP(31) +TRAMP(32) +TRAMP(33) +TRAMP(34) +TRAMP(35) +TRAMP(36) +TRAMP(37) +TRAMP(38) +TRAMP(39) +TRAMP(40) + +#define F_TRAMP(x) .tramp_ ## x = (void *)tramp_ ## x + +SEC(".struct_ops.link") +struct bpf_testmod_ops multi_pages = { + F_TRAMP(1), + F_TRAMP(2), + F_TRAMP(3), + F_TRAMP(4), + F_TRAMP(5), + F_TRAMP(6), + F_TRAMP(7), + F_TRAMP(8), + F_TRAMP(9), + F_TRAMP(10), + F_TRAMP(11), + F_TRAMP(12), + F_TRAMP(13), + F_TRAMP(14), + F_TRAMP(15), + F_TRAMP(16), + F_TRAMP(17), + F_TRAMP(18), + F_TRAMP(19), + F_TRAMP(20), + F_TRAMP(21), + F_TRAMP(22), + F_TRAMP(23), + F_TRAMP(24), + F_TRAMP(25), + F_TRAMP(26), + F_TRAMP(27), + F_TRAMP(28), + F_TRAMP(29), + F_TRAMP(30), + F_TRAMP(31), + F_TRAMP(32), + F_TRAMP(33), + F_TRAMP(34), + F_TRAMP(35), + F_TRAMP(36), + F_TRAMP(37), + F_TRAMP(38), + F_TRAMP(39), + F_TRAMP(40), +}; -- cgit v1.2.3 From fb0f02308126736c015afc0cac3c0e8712443299 Mon Sep 17 00:00:00 2001 From: Prabhav Kumar Vaish Date: Wed, 28 Feb 2024 17:37:01 +0530 Subject: selftests: net: Correct couple of spelling mistakes Changes : - "excercise" is corrected to "exercise" in drivers/net/mlxsw/spectrum-2/tc_flower.sh - "mutliple" is corrected to "multiple" in drivers/net/netdevsim/ethtool-fec.sh Signed-off-by: Prabhav Kumar Vaish Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240228120701.422264-1-pvkumar5749404@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh | 2 +- tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh index 616d3581419c..31252bc8775e 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh @@ -869,7 +869,7 @@ bloom_simple_test() bloom_complex_test() { # Bloom filter index computation is affected from region ID, eRP - # ID and from the region key size. In order to excercise those parts + # ID and from the region key size. In order to exercise those parts # of the Bloom filter code, use a series of regions, each with a # different key size and send packet that should hit all of them. local index diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh index 7d7829f57550..6c52ce1b0450 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh @@ -49,7 +49,7 @@ for o in llrs rs; do Active FEC encoding: ${o^^}" done -# Test mutliple bits +# Test multiple bits $ETHTOOL --set-fec $NSIM_NETDEV encoding rs llrs check $? s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) -- cgit v1.2.3 From dcfaf1f758ee74cf059acf6e33faf83f988fad4a Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Thu, 29 Feb 2024 11:38:25 -0300 Subject: selftests/tc-testing: require an up to date iproute2 for blockcast tests Add the dependsOn test check for all the mirred blockcast tests. It will prevent the issue reported by LKFT which happens when an older iproute2 is used to run the current tdc. Tests are skipped if the dependsOn check fails. Reported-by: Linux Kernel Functional Testing Signed-off-by: Pedro Tammela Link: https://lore.kernel.org/r/20240229143825.1373550-1-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json index 795cf1ce8af0..b73bd255ea36 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json @@ -657,6 +657,7 @@ "actions", "mirred" ], + "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid", "plugins": { "requires": "nsPlugin" }, @@ -711,6 +712,7 @@ "actions", "mirred" ], + "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid", "plugins": { "requires": "nsPlugin" }, @@ -765,6 +767,7 @@ "actions", "mirred" ], + "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid", "plugins": { "requires": "nsPlugin" }, @@ -819,6 +822,7 @@ "actions", "mirred" ], + "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid", "plugins": { "requires": "nsPlugin" }, @@ -873,6 +877,7 @@ "actions", "mirred" ], + "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid", "plugins": { "requires": "nsPlugin" }, @@ -937,6 +942,7 @@ "actions", "mirred" ], + "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid", "plugins": { "requires": "nsPlugin" }, @@ -995,6 +1001,7 @@ "actions", "mirred" ], + "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid", "plugins": { "requires": "nsPlugin" }, -- cgit v1.2.3 From c5d3705cfd938f6ddd8fa2ff74689cc9b52c00ca Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 28 Feb 2024 17:30:12 +0800 Subject: tools: virtio: introduce vhost_net_test introduce vhost_net_test for both vhost_net tx and rx basing on virtio_test to test vhost_net changing in the kernel. Steps for vhost_net tx testing: 1. Prepare a out buf. 2. Kick the vhost_net to do tx processing. 3. Do the receiving in the tun side. 4. verify the data received by tun is correct. Steps for vhost_net rx testing: 1. Prepare a in buf. 2. Do the sending in the tun side. 3. Kick the vhost_net to do rx processing. 4. verify the data received by vhost_net is correct. Signed-off-by: Yunsheng Lin Acked-by: Michael S. Tsirkin Signed-off-by: Paolo Abeni --- tools/virtio/.gitignore | 1 + tools/virtio/Makefile | 8 +- tools/virtio/linux/virtio_config.h | 4 + tools/virtio/vhost_net_test.c | 532 +++++++++++++++++++++++++++++++++++++ 4 files changed, 542 insertions(+), 3 deletions(-) create mode 100644 tools/virtio/vhost_net_test.c (limited to 'tools') diff --git a/tools/virtio/.gitignore b/tools/virtio/.gitignore index 9934d48d9a55..7e47b281c442 100644 --- a/tools/virtio/.gitignore +++ b/tools/virtio/.gitignore @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only *.d virtio_test +vhost_net_test vringh_test virtio-trace/trace-agent diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile index d128925980e0..e25e99c1c3b7 100644 --- a/tools/virtio/Makefile +++ b/tools/virtio/Makefile @@ -1,8 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 all: test mod -test: virtio_test vringh_test +test: virtio_test vringh_test vhost_net_test virtio_test: virtio_ring.o virtio_test.o vringh_test: vringh_test.o vringh.o virtio_ring.o +vhost_net_test: virtio_ring.o vhost_net_test.o try-run = $(shell set -e; \ if ($(1)) >/dev/null 2>&1; \ @@ -49,6 +50,7 @@ oot-clean: OOT_BUILD+=clean .PHONY: all test mod clean vhost oot oot-clean oot-build clean: - ${RM} *.o vringh_test virtio_test vhost_test/*.o vhost_test/.*.cmd \ - vhost_test/Module.symvers vhost_test/modules.order *.d + ${RM} *.o vringh_test virtio_test vhost_net_test vhost_test/*.o \ + vhost_test/.*.cmd vhost_test/Module.symvers \ + vhost_test/modules.order *.d -include *.d diff --git a/tools/virtio/linux/virtio_config.h b/tools/virtio/linux/virtio_config.h index 2a8a70e2a950..42a564f22f2d 100644 --- a/tools/virtio/linux/virtio_config.h +++ b/tools/virtio/linux/virtio_config.h @@ -1,4 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef LINUX_VIRTIO_CONFIG_H +#define LINUX_VIRTIO_CONFIG_H #include #include #include @@ -95,3 +97,5 @@ static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val) { return __cpu_to_virtio64(virtio_is_little_endian(vdev), val); } + +#endif diff --git a/tools/virtio/vhost_net_test.c b/tools/virtio/vhost_net_test.c new file mode 100644 index 000000000000..389d99a6d7c7 --- /dev/null +++ b/tools/virtio/vhost_net_test.c @@ -0,0 +1,532 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define HDR_LEN sizeof(struct virtio_net_hdr_mrg_rxbuf) +#define TEST_BUF_LEN 256 +#define TEST_PTYPE ETH_P_LOOPBACK +#define DESC_NUM 256 + +/* Used by implementation of kmalloc() in tools/virtio/linux/kernel.h */ +void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end; + +struct vq_info { + int kick; + int call; + int idx; + long started; + long completed; + struct pollfd fds; + void *ring; + /* copy used for control */ + struct vring vring; + struct virtqueue *vq; +}; + +struct vdev_info { + struct virtio_device vdev; + int control; + struct vq_info vqs[2]; + int nvqs; + void *buf; + size_t buf_size; + char *test_buf; + char *res_buf; + struct vhost_memory *mem; + int sock; + int ifindex; + unsigned char mac[ETHER_ADDR_LEN]; +}; + +static int tun_alloc(struct vdev_info *dev, char *tun_name) +{ + struct ifreq ifr; + int len = HDR_LEN; + int fd, e; + + fd = open("/dev/net/tun", O_RDWR); + if (fd < 0) { + perror("Cannot open /dev/net/tun"); + return fd; + } + + memset(&ifr, 0, sizeof(ifr)); + + ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; + strncpy(ifr.ifr_name, tun_name, IFNAMSIZ); + + e = ioctl(fd, TUNSETIFF, &ifr); + if (e < 0) { + perror("ioctl[TUNSETIFF]"); + close(fd); + return e; + } + + e = ioctl(fd, TUNSETVNETHDRSZ, &len); + if (e < 0) { + perror("ioctl[TUNSETVNETHDRSZ]"); + close(fd); + return e; + } + + e = ioctl(fd, SIOCGIFHWADDR, &ifr); + if (e < 0) { + perror("ioctl[SIOCGIFHWADDR]"); + close(fd); + return e; + } + + memcpy(dev->mac, &ifr.ifr_hwaddr.sa_data, ETHER_ADDR_LEN); + return fd; +} + +static void vdev_create_socket(struct vdev_info *dev, char *tun_name) +{ + struct ifreq ifr; + + dev->sock = socket(AF_PACKET, SOCK_RAW, htons(TEST_PTYPE)); + assert(dev->sock != -1); + + strncpy(ifr.ifr_name, tun_name, IFNAMSIZ); + assert(ioctl(dev->sock, SIOCGIFINDEX, &ifr) >= 0); + + dev->ifindex = ifr.ifr_ifindex; + + /* Set the flags that bring the device up */ + assert(ioctl(dev->sock, SIOCGIFFLAGS, &ifr) >= 0); + ifr.ifr_flags |= (IFF_UP | IFF_RUNNING); + assert(ioctl(dev->sock, SIOCSIFFLAGS, &ifr) >= 0); +} + +static void vdev_send_packet(struct vdev_info *dev) +{ + char *sendbuf = dev->test_buf + HDR_LEN; + struct sockaddr_ll saddrll = {0}; + int sockfd = dev->sock; + int ret; + + saddrll.sll_family = PF_PACKET; + saddrll.sll_ifindex = dev->ifindex; + saddrll.sll_halen = ETH_ALEN; + saddrll.sll_protocol = htons(TEST_PTYPE); + + ret = sendto(sockfd, sendbuf, TEST_BUF_LEN, 0, + (struct sockaddr *)&saddrll, + sizeof(struct sockaddr_ll)); + assert(ret >= 0); +} + +static bool vq_notify(struct virtqueue *vq) +{ + struct vq_info *info = vq->priv; + unsigned long long v = 1; + int r; + + r = write(info->kick, &v, sizeof(v)); + assert(r == sizeof(v)); + + return true; +} + +static void vhost_vq_setup(struct vdev_info *dev, struct vq_info *info) +{ + struct vhost_vring_addr addr = { + .index = info->idx, + .desc_user_addr = (uint64_t)(unsigned long)info->vring.desc, + .avail_user_addr = (uint64_t)(unsigned long)info->vring.avail, + .used_user_addr = (uint64_t)(unsigned long)info->vring.used, + }; + struct vhost_vring_state state = { .index = info->idx }; + struct vhost_vring_file file = { .index = info->idx }; + int r; + + state.num = info->vring.num; + r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state); + assert(r >= 0); + + state.num = 0; + r = ioctl(dev->control, VHOST_SET_VRING_BASE, &state); + assert(r >= 0); + + r = ioctl(dev->control, VHOST_SET_VRING_ADDR, &addr); + assert(r >= 0); + + file.fd = info->kick; + r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file); + assert(r >= 0); +} + +static void vq_reset(struct vq_info *info, int num, struct virtio_device *vdev) +{ + if (info->vq) + vring_del_virtqueue(info->vq); + + memset(info->ring, 0, vring_size(num, 4096)); + vring_init(&info->vring, num, info->ring, 4096); + info->vq = vring_new_virtqueue(info->idx, num, 4096, vdev, true, false, + info->ring, vq_notify, NULL, "test"); + assert(info->vq); + info->vq->priv = info; +} + +static void vq_info_add(struct vdev_info *dev, int idx, int num, int fd) +{ + struct vhost_vring_file backend = { .index = idx, .fd = fd }; + struct vq_info *info = &dev->vqs[idx]; + int r; + + info->idx = idx; + info->kick = eventfd(0, EFD_NONBLOCK); + r = posix_memalign(&info->ring, 4096, vring_size(num, 4096)); + assert(r >= 0); + vq_reset(info, num, &dev->vdev); + vhost_vq_setup(dev, info); + + r = ioctl(dev->control, VHOST_NET_SET_BACKEND, &backend); + assert(!r); +} + +static void vdev_info_init(struct vdev_info *dev, unsigned long long features) +{ + struct ether_header *eh; + int i, r; + + dev->vdev.features = features; + INIT_LIST_HEAD(&dev->vdev.vqs); + spin_lock_init(&dev->vdev.vqs_list_lock); + + dev->buf_size = (HDR_LEN + TEST_BUF_LEN) * 2; + dev->buf = malloc(dev->buf_size); + assert(dev->buf); + dev->test_buf = dev->buf; + dev->res_buf = dev->test_buf + HDR_LEN + TEST_BUF_LEN; + + memset(dev->test_buf, 0, HDR_LEN + TEST_BUF_LEN); + eh = (struct ether_header *)(dev->test_buf + HDR_LEN); + eh->ether_type = htons(TEST_PTYPE); + memcpy(eh->ether_dhost, dev->mac, ETHER_ADDR_LEN); + memcpy(eh->ether_shost, dev->mac, ETHER_ADDR_LEN); + + for (i = sizeof(*eh); i < TEST_BUF_LEN; i++) + dev->test_buf[i + HDR_LEN] = (char)i; + + dev->control = open("/dev/vhost-net", O_RDWR); + assert(dev->control >= 0); + + r = ioctl(dev->control, VHOST_SET_OWNER, NULL); + assert(r >= 0); + + dev->mem = malloc(offsetof(struct vhost_memory, regions) + + sizeof(dev->mem->regions[0])); + assert(dev->mem); + memset(dev->mem, 0, offsetof(struct vhost_memory, regions) + + sizeof(dev->mem->regions[0])); + dev->mem->nregions = 1; + dev->mem->regions[0].guest_phys_addr = (long)dev->buf; + dev->mem->regions[0].userspace_addr = (long)dev->buf; + dev->mem->regions[0].memory_size = dev->buf_size; + + r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem); + assert(r >= 0); + + r = ioctl(dev->control, VHOST_SET_FEATURES, &features); + assert(r >= 0); + + dev->nvqs = 2; +} + +static void wait_for_interrupt(struct vq_info *vq) +{ + unsigned long long val; + + poll(&vq->fds, 1, 100); + + if (vq->fds.revents & POLLIN) + read(vq->fds.fd, &val, sizeof(val)); +} + +static void verify_res_buf(char *res_buf) +{ + int i; + + for (i = ETHER_HDR_LEN; i < TEST_BUF_LEN; i++) + assert(res_buf[i] == (char)i); +} + +static void run_tx_test(struct vdev_info *dev, struct vq_info *vq, + bool delayed, int bufs) +{ + long long spurious = 0; + struct scatterlist sl; + unsigned int len; + int r; + + for (;;) { + long started_before = vq->started; + long completed_before = vq->completed; + + virtqueue_disable_cb(vq->vq); + do { + while (vq->started < bufs && + (vq->started - vq->completed) < 1) { + sg_init_one(&sl, dev->test_buf, HDR_LEN + TEST_BUF_LEN); + r = virtqueue_add_outbuf(vq->vq, &sl, 1, + dev->test_buf + vq->started, + GFP_ATOMIC); + if (unlikely(r != 0)) + break; + + ++vq->started; + + if (unlikely(!virtqueue_kick(vq->vq))) { + r = -1; + break; + } + } + + if (vq->started >= bufs) + r = -1; + + /* Flush out completed bufs if any */ + while (virtqueue_get_buf(vq->vq, &len)) { + int n; + + n = recvfrom(dev->sock, dev->res_buf, TEST_BUF_LEN, 0, NULL, NULL); + assert(n == TEST_BUF_LEN); + verify_res_buf(dev->res_buf); + + ++vq->completed; + r = 0; + } + } while (r == 0); + + if (vq->completed == completed_before && vq->started == started_before) + ++spurious; + + assert(vq->completed <= bufs); + assert(vq->started <= bufs); + if (vq->completed == bufs) + break; + + if (delayed) { + if (virtqueue_enable_cb_delayed(vq->vq)) + wait_for_interrupt(vq); + } else { + if (virtqueue_enable_cb(vq->vq)) + wait_for_interrupt(vq); + } + } + printf("TX spurious wakeups: 0x%llx started=0x%lx completed=0x%lx\n", + spurious, vq->started, vq->completed); +} + +static void run_rx_test(struct vdev_info *dev, struct vq_info *vq, + bool delayed, int bufs) +{ + long long spurious = 0; + struct scatterlist sl; + unsigned int len; + int r; + + for (;;) { + long started_before = vq->started; + long completed_before = vq->completed; + + do { + while (vq->started < bufs && + (vq->started - vq->completed) < 1) { + sg_init_one(&sl, dev->res_buf, HDR_LEN + TEST_BUF_LEN); + + r = virtqueue_add_inbuf(vq->vq, &sl, 1, + dev->res_buf + vq->started, + GFP_ATOMIC); + if (unlikely(r != 0)) + break; + + ++vq->started; + + vdev_send_packet(dev); + + if (unlikely(!virtqueue_kick(vq->vq))) { + r = -1; + break; + } + } + + if (vq->started >= bufs) + r = -1; + + /* Flush out completed bufs if any */ + while (virtqueue_get_buf(vq->vq, &len)) { + struct ether_header *eh; + + eh = (struct ether_header *)(dev->res_buf + HDR_LEN); + + /* tun netdev is up and running, only handle the + * TEST_PTYPE packet. + */ + if (eh->ether_type == htons(TEST_PTYPE)) { + assert(len == TEST_BUF_LEN + HDR_LEN); + verify_res_buf(dev->res_buf + HDR_LEN); + } + + ++vq->completed; + r = 0; + } + } while (r == 0); + + if (vq->completed == completed_before && vq->started == started_before) + ++spurious; + + assert(vq->completed <= bufs); + assert(vq->started <= bufs); + if (vq->completed == bufs) + break; + } + + printf("RX spurious wakeups: 0x%llx started=0x%lx completed=0x%lx\n", + spurious, vq->started, vq->completed); +} + +static const char optstring[] = "h"; +static const struct option longopts[] = { + { + .name = "help", + .val = 'h', + }, + { + .name = "event-idx", + .val = 'E', + }, + { + .name = "no-event-idx", + .val = 'e', + }, + { + .name = "indirect", + .val = 'I', + }, + { + .name = "no-indirect", + .val = 'i', + }, + { + .name = "virtio-1", + .val = '1', + }, + { + .name = "no-virtio-1", + .val = '0', + }, + { + .name = "delayed-interrupt", + .val = 'D', + }, + { + .name = "no-delayed-interrupt", + .val = 'd', + }, + { + .name = "buf-num", + .val = 'n', + .has_arg = required_argument, + }, + { + .name = "batch", + .val = 'b', + .has_arg = required_argument, + }, + { + } +}; + +static void help(int status) +{ + fprintf(stderr, "Usage: vhost_net_test [--help]" + " [--no-indirect]" + " [--no-event-idx]" + " [--no-virtio-1]" + " [--delayed-interrupt]" + " [--buf-num]" + "\n"); + + exit(status); +} + +int main(int argc, char **argv) +{ + unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | + (1ULL << VIRTIO_RING_F_EVENT_IDX) | (1ULL << VIRTIO_F_VERSION_1); + char tun_name[IFNAMSIZ]; + long nbufs = 0x100000; + struct vdev_info dev; + bool delayed = false; + int o, fd; + + for (;;) { + o = getopt_long(argc, argv, optstring, longopts, NULL); + switch (o) { + case -1: + goto done; + case '?': + help(2); + case 'e': + features &= ~(1ULL << VIRTIO_RING_F_EVENT_IDX); + break; + case 'h': + help(0); + case 'i': + features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC); + break; + case '0': + features &= ~(1ULL << VIRTIO_F_VERSION_1); + break; + case 'D': + delayed = true; + break; + case 'n': + nbufs = strtol(optarg, NULL, 10); + assert(nbufs > 0); + break; + default: + assert(0); + break; + } + } + +done: + memset(&dev, 0, sizeof(dev)); + snprintf(tun_name, IFNAMSIZ, "tun_%d", getpid()); + + fd = tun_alloc(&dev, tun_name); + assert(fd >= 0); + + vdev_info_init(&dev, features); + vq_info_add(&dev, 0, DESC_NUM, fd); + vq_info_add(&dev, 1, DESC_NUM, fd); + vdev_create_socket(&dev, tun_name); + + run_rx_test(&dev, &dev.vqs[0], delayed, nbufs); + run_tx_test(&dev, &dev.vqs[1], delayed, nbufs); + + return 0; +} -- cgit v1.2.3 From 7b2d64f93319e0d03c1b68ecfdccd5d168cb5c31 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 4 Mar 2024 11:56:07 +0200 Subject: selftests: forwarding: Remove IPv6 L3 multipath hash tests The multipath tests currently test both the L3 and L4 multipath hash policies for IPv6, but only the L4 policy for IPv4. The reason is mostly historic: When the initial multipath test was added (router_multipath.sh) the IPv6 L4 policy did not exist and was later added to the test. The other multipath tests copied this pattern although there is little value in testing both policies. Align the IPv4 and IPv6 tests and only test the L4 policy. On my system, this reduces the run time of router_multipath.sh by 89% because of the repeated ping6 invocations to randomize the flow label. Signed-off-by: Ido Schimmel Link: https://lore.kernel.org/r/20240304095612.462900-2-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- .../selftests/net/forwarding/gre_multipath_nh.sh | 37 -------------------- .../net/forwarding/gre_multipath_nh_res.sh | 38 --------------------- .../selftests/net/forwarding/router_mpath_nh.sh | 35 +------------------ .../selftests/net/forwarding/router_multipath.sh | 39 +--------------------- 4 files changed, 2 insertions(+), 147 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh index d03aa2cab9fd..62281898e7a4 100755 --- a/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh +++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh @@ -64,7 +64,6 @@ ALL_TESTS=" ping_ipv6 multipath_ipv4 multipath_ipv6 - multipath_ipv6_l4 " NUM_NETIFS=6 @@ -264,34 +263,6 @@ multipath6_test() local weight1=$1; shift local weight2=$1; shift - sysctl_set net.ipv6.fib_multipath_hash_policy 0 - ip nexthop replace id 103 group 101,$weight1/102,$weight2 - - local t0_111=$(tc_rule_stats_get $ul2 111 ingress) - local t0_222=$(tc_rule_stats_get $ul2 222 ingress) - - # Generate 16384 echo requests, each with a random flow label. - for ((i=0; i < 16384; ++i)); do - ip vrf exec v$h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null - done - - local t1_111=$(tc_rule_stats_get $ul2 111 ingress) - local t1_222=$(tc_rule_stats_get $ul2 222 ingress) - - local d111=$((t1_111 - t0_111)) - local d222=$((t1_222 - t0_222)) - multipath_eval "$what" $weight1 $weight2 $d111 $d222 - - ip nexthop replace id 103 group 101/102 - sysctl_restore net.ipv6.fib_multipath_hash_policy -} - -multipath6_l4_test() -{ - local what=$1; shift - local weight1=$1; shift - local weight2=$1; shift - sysctl_set net.ipv6.fib_multipath_hash_policy 1 ip nexthop replace id 103 group 101,$weight1/102,$weight2 @@ -339,14 +310,6 @@ multipath_ipv6() multipath6_test "Weighted MP 11:45" 11 45 } -multipath_ipv6_l4() -{ - log_info "Running IPv6 L4 hash multipath tests" - multipath6_l4_test "ECMP" 1 1 - multipath6_l4_test "Weighted MP 2:1" 2 1 - multipath6_l4_test "Weighted MP 11:45" 11 45 -} - trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh index 088b65e64d66..2085111bcd67 100755 --- a/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh +++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh @@ -64,7 +64,6 @@ ALL_TESTS=" ping_ipv6 multipath_ipv4 multipath_ipv6 - multipath_ipv6_l4 " NUM_NETIFS=6 @@ -267,35 +266,6 @@ multipath6_test() local weight1=$1; shift local weight2=$1; shift - sysctl_set net.ipv6.fib_multipath_hash_policy 0 - ip nexthop replace id 103 group 101,$weight1/102,$weight2 \ - type resilient - - local t0_111=$(tc_rule_stats_get $ul2 111 ingress) - local t0_222=$(tc_rule_stats_get $ul2 222 ingress) - - # Generate 16384 echo requests, each with a random flow label. - for ((i=0; i < 16384; ++i)); do - ip vrf exec v$h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null - done - - local t1_111=$(tc_rule_stats_get $ul2 111 ingress) - local t1_222=$(tc_rule_stats_get $ul2 222 ingress) - - local d111=$((t1_111 - t0_111)) - local d222=$((t1_222 - t0_222)) - multipath_eval "$what" $weight1 $weight2 $d111 $d222 - - ip nexthop replace id 103 group 101/102 type resilient - sysctl_restore net.ipv6.fib_multipath_hash_policy -} - -multipath6_l4_test() -{ - local what=$1; shift - local weight1=$1; shift - local weight2=$1; shift - sysctl_set net.ipv6.fib_multipath_hash_policy 1 ip nexthop replace id 103 group 101,$weight1/102,$weight2 \ type resilient @@ -344,14 +314,6 @@ multipath_ipv6() multipath6_test "Weighted MP 11:45" 11 45 } -multipath_ipv6_l4() -{ - log_info "Running IPv6 L4 hash multipath tests" - multipath6_l4_test "ECMP" 1 1 - multipath6_l4_test "Weighted MP 2:1" 2 1 - multipath6_l4_test "Weighted MP 11:45" 11 45 -} - trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh index a0d612e04990..2ef469ff3bc4 100755 --- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh @@ -218,7 +218,7 @@ multipath4_test() sysctl_restore net.ipv4.fib_multipath_hash_policy } -multipath6_l4_test() +multipath6_test() { local desc="$1" local weight_rp12=$2 @@ -251,34 +251,6 @@ multipath6_l4_test() sysctl_restore net.ipv6.fib_multipath_hash_policy } -multipath6_test() -{ - local desc="$1" - local weight_rp12=$2 - local weight_rp13=$3 - local t0_rp12 t0_rp13 t1_rp12 t1_rp13 - local packets_rp12 packets_rp13 - - ip nexthop replace id 106 group 104,$weight_rp12/105,$weight_rp13 - - t0_rp12=$(link_stats_tx_packets_get $rp12) - t0_rp13=$(link_stats_tx_packets_get $rp13) - - # Generate 16384 echo requests, each with a random flow label. - for _ in $(seq 1 16384); do - ip vrf exec vrf-h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1 - done - - t1_rp12=$(link_stats_tx_packets_get $rp12) - t1_rp13=$(link_stats_tx_packets_get $rp13) - - let "packets_rp12 = $t1_rp12 - $t0_rp12" - let "packets_rp13 = $t1_rp13 - $t0_rp13" - multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13 - - ip nexthop replace id 106 group 104/105 -} - multipath_test() { log_info "Running IPv4 multipath tests" @@ -301,11 +273,6 @@ multipath_test() multipath6_test "ECMP" 1 1 multipath6_test "Weighted MP 2:1" 2 1 multipath6_test "Weighted MP 11:45" 11 45 - - log_info "Running IPv6 L4 hash multipath tests" - multipath6_l4_test "ECMP" 1 1 - multipath6_l4_test "Weighted MP 2:1" 2 1 - multipath6_l4_test "Weighted MP 11:45" 11 45 } ping_ipv4_blackhole() diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh index 464821c587a5..a4eceeb5c06e 100755 --- a/tools/testing/selftests/net/forwarding/router_multipath.sh +++ b/tools/testing/selftests/net/forwarding/router_multipath.sh @@ -195,7 +195,7 @@ multipath4_test() sysctl_restore net.ipv4.fib_multipath_hash_policy } -multipath6_l4_test() +multipath6_test() { local desc="$1" local weight_rp12=$2 @@ -232,38 +232,6 @@ multipath6_l4_test() sysctl_restore net.ipv6.fib_multipath_hash_policy } -multipath6_test() -{ - local desc="$1" - local weight_rp12=$2 - local weight_rp13=$3 - local t0_rp12 t0_rp13 t1_rp12 t1_rp13 - local packets_rp12 packets_rp13 - - ip route replace 2001:db8:2::/64 vrf vrf-r1 \ - nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \ - nexthop via fe80:3::23 dev $rp13 weight $weight_rp13 - - t0_rp12=$(link_stats_tx_packets_get $rp12) - t0_rp13=$(link_stats_tx_packets_get $rp13) - - # Generate 16384 echo requests, each with a random flow label. - for _ in $(seq 1 16384); do - ip vrf exec vrf-h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null - done - - t1_rp12=$(link_stats_tx_packets_get $rp12) - t1_rp13=$(link_stats_tx_packets_get $rp13) - - let "packets_rp12 = $t1_rp12 - $t0_rp12" - let "packets_rp13 = $t1_rp13 - $t0_rp13" - multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13 - - ip route replace 2001:db8:2::/64 vrf vrf-r1 \ - nexthop via fe80:2::22 dev $rp12 \ - nexthop via fe80:3::23 dev $rp13 -} - multipath_test() { log_info "Running IPv4 multipath tests" @@ -275,11 +243,6 @@ multipath_test() multipath6_test "ECMP" 1 1 multipath6_test "Weighted MP 2:1" 2 1 multipath6_test "Weighted MP 11:45" 11 45 - - log_info "Running IPv6 L4 hash multipath tests" - multipath6_l4_test "ECMP" 1 1 - multipath6_l4_test "Weighted MP 2:1" 2 1 - multipath6_l4_test "Weighted MP 11:45" 11 45 } setup_prepare() -- cgit v1.2.3 From 748d27447daa3f2bdf519f2a97647ff69fd1b70b Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 4 Mar 2024 11:56:08 +0200 Subject: selftests: forwarding: Parametrize mausezahn delay The various multipath tests use mausezahn to generate different flows and check how they are distributed between the available nexthops. The tool is currently invoked with an hard coded transmission delay of 1 ms. This is unnecessary when the tests are run with veth pairs and needlessly prolongs the tests. Parametrize this delay and default it to 0 us. It can be overridden using the forwarding.config file. On my system, this reduces the run time of router_multipath.sh by 93%. Signed-off-by: Ido Schimmel Link: https://lore.kernel.org/r/20240304095612.462900-3-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- .../selftests/net/forwarding/custom_multipath_hash.sh | 16 ++++++++-------- .../selftests/net/forwarding/forwarding.config.sample | 2 ++ .../net/forwarding/gre_custom_multipath_hash.sh | 16 ++++++++-------- .../selftests/net/forwarding/gre_inner_v4_multipath.sh | 2 +- .../selftests/net/forwarding/gre_inner_v6_multipath.sh | 2 +- tools/testing/selftests/net/forwarding/gre_multipath.sh | 2 +- .../testing/selftests/net/forwarding/gre_multipath_nh.sh | 4 ++-- .../selftests/net/forwarding/gre_multipath_nh_res.sh | 4 ++-- .../net/forwarding/ip6gre_custom_multipath_hash.sh | 16 ++++++++-------- .../net/forwarding/ip6gre_inner_v4_multipath.sh | 2 +- .../net/forwarding/ip6gre_inner_v6_multipath.sh | 2 +- tools/testing/selftests/net/forwarding/ip6gre_lib.sh | 4 ++-- tools/testing/selftests/net/forwarding/lib.sh | 1 + .../testing/selftests/net/forwarding/router_mpath_nh.sh | 4 ++-- .../selftests/net/forwarding/router_mpath_nh_res.sh | 4 ++-- .../testing/selftests/net/forwarding/router_multipath.sh | 4 ++-- 16 files changed, 44 insertions(+), 41 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh index 56eb83d1a3bd..1783c10215e5 100755 --- a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh +++ b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh @@ -183,42 +183,42 @@ send_src_ipv4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_dst_ipv4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_src_udp4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A 198.51.100.2 -B 203.0.113.2 \ - -d 1msec -t udp "sp=0-32768,dp=30000" + -d $MZ_DELAY -t udp "sp=0-32768,dp=30000" } send_dst_udp4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A 198.51.100.2 -B 203.0.113.2 \ - -d 1msec -t udp "sp=20000,dp=0-32768" + -d $MZ_DELAY -t udp "sp=20000,dp=0-32768" } send_src_ipv6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:4::2 \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_dst_ipv6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A 2001:db8:1::2 -B "2001:db8:4::2-2001:db8:4::fd" \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_flowlabel() @@ -234,14 +234,14 @@ send_src_udp6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A 2001:db8:1::2 -B 2001:db8:4::2 \ - -d 1msec -t udp "sp=0-32768,dp=30000" + -d $MZ_DELAY -t udp "sp=0-32768,dp=30000" } send_dst_udp6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A 2001:db8:1::2 -B 2001:db8:4::2 \ - -d 1msec -t udp "sp=20000,dp=0-32768" + -d $MZ_DELAY -t udp "sp=20000,dp=0-32768" } custom_hash_test() diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample index 4a546509de90..1fc4f0242fc5 100644 --- a/tools/testing/selftests/net/forwarding/forwarding.config.sample +++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample @@ -28,6 +28,8 @@ PING=ping PING6=ping6 # Packet generator. Some distributions use 'mz'. MZ=mausezahn +# mausezahn delay between transmissions in microseconds. +MZ_DELAY=0 # Time to wait after interfaces participating in the test are all UP WAIT_TIME=5 # Whether to pause on failure or not. diff --git a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh index 0446db9c6f74..9788bd0f6e8b 100755 --- a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh +++ b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh @@ -278,42 +278,42 @@ send_src_ipv4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_dst_ipv4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_src_udp4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A 198.51.100.2 -B 203.0.113.2 \ - -d 1msec -t udp "sp=0-32768,dp=30000" + -d $MZ_DELAY -t udp "sp=0-32768,dp=30000" } send_dst_udp4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A 198.51.100.2 -B 203.0.113.2 \ - -d 1msec -t udp "sp=20000,dp=0-32768" + -d $MZ_DELAY -t udp "sp=20000,dp=0-32768" } send_src_ipv6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:2::2 \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_dst_ipv6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A 2001:db8:1::2 -B "2001:db8:2::2-2001:db8:2::fd" \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_flowlabel() @@ -329,14 +329,14 @@ send_src_udp6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A 2001:db8:1::2 -B 2001:db8:2::2 \ - -d 1msec -t udp "sp=0-32768,dp=30000" + -d $MZ_DELAY -t udp "sp=0-32768,dp=30000" } send_dst_udp6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A 2001:db8:1::2 -B 2001:db8:2::2 \ - -d 1msec -t udp "sp=20000,dp=0-32768" + -d $MZ_DELAY -t udp "sp=20000,dp=0-32768" } custom_hash_test() diff --git a/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh b/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh index e4009f658003..efca6114a3ce 100755 --- a/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh +++ b/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh @@ -267,7 +267,7 @@ multipath4_test() ip vrf exec v$h1 \ $MZ $h1 -q -p 64 -A "192.0.3.2-192.0.3.62" -B "192.0.4.2-192.0.4.62" \ - -d 1msec -c 50 -t udp "sp=1024,dp=1024" + -d $MZ_DELAY -c 50 -t udp "sp=1024,dp=1024" sleep 1 local t1_111=$(tc_rule_stats_get $ul32 111 ingress) diff --git a/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh b/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh index e449475c4d3e..e5e911ce1562 100755 --- a/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh +++ b/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh @@ -268,7 +268,7 @@ multipath6_test() ip vrf exec v$h1 \ $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::1e" \ -B "2001:db8:2::2-2001:db8:2::1e" \ - -d 1msec -c 50 -t udp "sp=1024,dp=1024" + -d $MZ_DELAY -c 50 -t udp "sp=1024,dp=1024" sleep 1 local t1_111=$(tc_rule_stats_get $ul32 111 ingress) diff --git a/tools/testing/selftests/net/forwarding/gre_multipath.sh b/tools/testing/selftests/net/forwarding/gre_multipath.sh index a8d8e8b3dc81..57531c1d884d 100755 --- a/tools/testing/selftests/net/forwarding/gre_multipath.sh +++ b/tools/testing/selftests/net/forwarding/gre_multipath.sh @@ -220,7 +220,7 @@ multipath4_test() ip vrf exec v$h1 \ $MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" local t1_111=$(tc_rule_stats_get $ul2 111 ingress) local t1_222=$(tc_rule_stats_get $ul2 222 ingress) diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh index 62281898e7a4..7d5b2b9cc133 100755 --- a/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh +++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh @@ -244,7 +244,7 @@ multipath4_test() ip vrf exec v$h1 \ $MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" local t1_111=$(tc_rule_stats_get $ul2 111 ingress) local t1_222=$(tc_rule_stats_get $ul2 222 ingress) @@ -271,7 +271,7 @@ multipath6_test() ip vrf exec v$h1 \ $MZ $h1 -6 -q -p 64 -A 2001:db8:1::1 -B 2001:db8:2::2 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" local t1_111=$(tc_rule_stats_get $ul2 111 ingress) local t1_222=$(tc_rule_stats_get $ul2 222 ingress) diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh index 2085111bcd67..370f9925302d 100755 --- a/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh +++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh @@ -247,7 +247,7 @@ multipath4_test() ip vrf exec v$h1 \ $MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" local t1_111=$(tc_rule_stats_get $ul2 111 ingress) local t1_222=$(tc_rule_stats_get $ul2 222 ingress) @@ -275,7 +275,7 @@ multipath6_test() ip vrf exec v$h1 \ $MZ $h1 -6 -q -p 64 -A 2001:db8:1::1 -B 2001:db8:2::2 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" local t1_111=$(tc_rule_stats_get $ul2 111 ingress) local t1_222=$(tc_rule_stats_get $ul2 222 ingress) diff --git a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh index d40183b4eccc..2ab9eaaa5532 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh @@ -280,42 +280,42 @@ send_src_ipv4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_dst_ipv4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_src_udp4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A 198.51.100.2 -B 203.0.113.2 \ - -d 1msec -t udp "sp=0-32768,dp=30000" + -d $MZ_DELAY -t udp "sp=0-32768,dp=30000" } send_dst_udp4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A 198.51.100.2 -B 203.0.113.2 \ - -d 1msec -t udp "sp=20000,dp=0-32768" + -d $MZ_DELAY -t udp "sp=20000,dp=0-32768" } send_src_ipv6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:2::2 \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_dst_ipv6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A 2001:db8:1::2 -B "2001:db8:2::2-2001:db8:2::fd" \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_flowlabel() @@ -331,14 +331,14 @@ send_src_udp6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A 2001:db8:1::2 -B 2001:db8:2::2 \ - -d 1msec -t udp "sp=0-32768,dp=30000" + -d $MZ_DELAY -t udp "sp=0-32768,dp=30000" } send_dst_udp6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A 2001:db8:1::2 -B 2001:db8:2::2 \ - -d 1msec -t udp "sp=20000,dp=0-32768" + -d $MZ_DELAY -t udp "sp=20000,dp=0-32768" } custom_hash_test() diff --git a/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh b/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh index a257979d3fc5..32d1461f37b7 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh @@ -266,7 +266,7 @@ multipath4_test() ip vrf exec v$h1 \ $MZ $h1 -q -p 64 -A "192.0.3.2-192.0.3.62" -B "192.0.4.2-192.0.4.62" \ - -d 1msec -c 50 -t udp "sp=1024,dp=1024" + -d $MZ_DELAY -c 50 -t udp "sp=1024,dp=1024" sleep 1 local t1_111=$(tc_rule_stats_get $ul32 111 ingress) diff --git a/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh b/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh index d208f5243ade..eb4e50df5337 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh @@ -267,7 +267,7 @@ multipath6_test() ip vrf exec v$h1 \ $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::1e" \ -B "2001:db8:2::2-2001:db8:2::1e" \ - -d 1msec -c 50 -t udp "sp=1024,dp=1024" + -d $MZ_DELAY -c 50 -t udp "sp=1024,dp=1024" sleep 1 local t1_111=$(tc_rule_stats_get $ul32 111 ingress) diff --git a/tools/testing/selftests/net/forwarding/ip6gre_lib.sh b/tools/testing/selftests/net/forwarding/ip6gre_lib.sh index 58a3597037b1..24f4ab328bd2 100644 --- a/tools/testing/selftests/net/forwarding/ip6gre_lib.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_lib.sh @@ -356,7 +356,7 @@ test_traffic_ip4ip6() flower $TC_FLAG dst_ip 203.0.113.1 action pass $MZ $h1 -c 1000 -p 64 -a $h1mac -b $ol1mac -A 198.51.100.1 \ - -B 203.0.113.1 -t ip -q -d 1msec + -B 203.0.113.1 -t ip -q -d $MZ_DELAY # Check ports after encap and after decap. tc_check_at_least_x_packets "dev $ul1 egress" 101 1000 @@ -389,7 +389,7 @@ test_traffic_ip6ip6() flower $TC_FLAG dst_ip 2001:db8:2::1 action pass $MZ -6 $h1 -c 1000 -p 64 -a $h1mac -b $ol1mac -A 2001:db8:1::1 \ - -B 2001:db8:2::1 -t ip -q -d 1msec + -B 2001:db8:2::1 -t ip -q -d $MZ_DELAY # Check ports after encap and after decap. tc_check_at_least_x_packets "dev $ul1 egress" 101 1000 diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index db3688f52888..d1bf39eaf2b3 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -8,6 +8,7 @@ PING=${PING:=ping} PING6=${PING6:=ping6} MZ=${MZ:=mausezahn} +MZ_DELAY=${MZ_DELAY:=0} ARPING=${ARPING:=arping} TEAMD=${TEAMD:=teamd} WAIT_TIME=${WAIT_TIME:=5} diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh index 2ef469ff3bc4..982e0d098ea9 100755 --- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh @@ -204,7 +204,7 @@ multipath4_test() t0_rp13=$(link_stats_tx_packets_get $rp13) ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -237,7 +237,7 @@ multipath6_test() t0_rp13=$(link_stats_tx_packets_get $rp13) $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh index cb08ffe2356a..a60ff54723b7 100755 --- a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh @@ -205,7 +205,7 @@ multipath4_test() t0_rp13=$(link_stats_tx_packets_get $rp13) ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -235,7 +235,7 @@ multipath6_l4_test() t0_rp13=$(link_stats_tx_packets_get $rp13) $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh index a4eceeb5c06e..e2be354167a1 100755 --- a/tools/testing/selftests/net/forwarding/router_multipath.sh +++ b/tools/testing/selftests/net/forwarding/router_multipath.sh @@ -179,7 +179,7 @@ multipath4_test() t0_rp13=$(link_stats_tx_packets_get $rp13) ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -216,7 +216,7 @@ multipath6_test() t0_rp13=$(link_stats_tx_packets_get $rp13) $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) -- cgit v1.2.3 From 4aca9eae6f7bfea30467fce6b1062f1bf5f21e9b Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 4 Mar 2024 11:56:09 +0200 Subject: selftests: forwarding: Make tc-police pass on debug kernels The test configures a policer with a rate of 80Mbps and expects to measure a rate close to it. This is a too high rate for debug kernels, causing the test to fail [1]. Fix by reducing the rate to 10Mbps. [1] # ./tc_police.sh TEST: police on rx [FAIL] Expected rate 76.2Mbps, got 29.6Mbps, which is -61% off. Required accuracy is +-10%. TEST: police on tx [FAIL] Expected rate 76.2Mbps, got 30.4Mbps, which is -60% off. Required accuracy is +-10%. Signed-off-by: Ido Schimmel Link: https://lore.kernel.org/r/20240304095612.462900-4-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/tc_police.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/tc_police.sh b/tools/testing/selftests/net/forwarding/tc_police.sh index 0a51eef21b9e..5103f64a71d6 100755 --- a/tools/testing/selftests/net/forwarding/tc_police.sh +++ b/tools/testing/selftests/net/forwarding/tc_police.sh @@ -140,7 +140,7 @@ police_common_test() sleep 10 local t1=$(tc_rule_stats_get $h2 1 ingress .bytes) - local er=$((80 * 1000 * 1000)) + local er=$((10 * 1000 * 1000)) local nr=$(rate $t0 $t1 10) local nr_pct=$((100 * (nr - er) / er)) ((-10 <= nr_pct && nr_pct <= 10)) @@ -157,7 +157,7 @@ police_rx_test() # Rule to police traffic destined to $h2 on ingress of $rp1 tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ - action police rate 80mbit burst 16k conform-exceed drop/ok + action police rate 10mbit burst 16k conform-exceed drop/ok police_common_test "police on rx" @@ -169,7 +169,7 @@ police_tx_test() # Rule to police traffic destined to $h2 on egress of $rp2 tc filter add dev $rp2 egress protocol ip pref 1 handle 101 flower \ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ - action police rate 80mbit burst 16k conform-exceed drop/ok + action police rate 10mbit burst 16k conform-exceed drop/ok police_common_test "police on tx" @@ -190,7 +190,7 @@ police_shared_common_test() sleep 10 local t1=$(tc_rule_stats_get $h2 1 ingress .bytes) - local er=$((80 * 1000 * 1000)) + local er=$((10 * 1000 * 1000)) local nr=$(rate $t0 $t1 10) local nr_pct=$((100 * (nr - er) / er)) ((-10 <= nr_pct && nr_pct <= 10)) @@ -211,7 +211,7 @@ police_shared_test() # Rule to police traffic destined to $h2 on ingress of $rp1 tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ - action police rate 80mbit burst 16k conform-exceed drop/ok \ + action police rate 10mbit burst 16k conform-exceed drop/ok \ index 10 # Rule to police a different flow destined to $h2 on egress of $rp2 @@ -250,7 +250,7 @@ police_mirror_common_test() # Rule to police traffic destined to $h2 and mirror to $h3 tc filter add dev $pol_if $dir protocol ip pref 1 handle 101 flower \ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ - action police rate 80mbit burst 16k conform-exceed drop/pipe \ + action police rate 10mbit burst 16k conform-exceed drop/pipe \ action mirred egress mirror dev $rp3 mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \ @@ -260,7 +260,7 @@ police_mirror_common_test() sleep 10 local t1=$(tc_rule_stats_get $h2 1 ingress .bytes) - local er=$((80 * 1000 * 1000)) + local er=$((10 * 1000 * 1000)) local nr=$(rate $t0 $t1 10) local nr_pct=$((100 * (nr - er) / er)) ((-10 <= nr_pct && nr_pct <= 10)) @@ -270,7 +270,7 @@ police_mirror_common_test() sleep 10 local t1=$(tc_rule_stats_get $h3 1 ingress .bytes) - local er=$((80 * 1000 * 1000)) + local er=$((10 * 1000 * 1000)) local nr=$(rate $t0 $t1 10) local nr_pct=$((100 * (nr - er) / er)) ((-10 <= nr_pct && nr_pct <= 10)) -- cgit v1.2.3 From dfbab74044be66852ebb9d4b2cbef2f082e82531 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 4 Mar 2024 11:56:10 +0200 Subject: selftests: forwarding: Make vxlan-bridge-1q pass on debug kernels The ageing time used by the test is too short for debug kernels and results in entries being aged out prematurely [1]. Fix by increasing the ageing time. [1] # ./vxlan_bridge_1q.sh [...] INFO: learning vlan 10 TEST: VXLAN: flood before learning [ OK ] TEST: VXLAN: show learned FDB entry [ OK ] TEST: VXLAN: learned FDB entry [FAIL] swp4: Expected to capture 0 packets, got 10. RTNETLINK answers: No such file or directory TEST: VXLAN: deletion of learned FDB entry [ OK ] TEST: VXLAN: Ageing of learned FDB entry [FAIL] swp4: Expected to capture 0 packets, got 10. TEST: VXLAN: learning toggling on bridge port [ OK ] [...] Signed-off-by: Ido Schimmel Link: https://lore.kernel.org/r/20240304095612.462900-5-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh index a596bbf3ed6a..fb9a34cb50c6 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh @@ -750,7 +750,7 @@ __test_learning() expects[0]=0; expects[$idx1]=10; expects[$idx2]=0 vxlan_flood_test $mac $dst $vid "${expects[@]}" - sleep 20 + sleep 60 bridge fdb show brport $vx | grep $mac | grep -q self check_fail $? @@ -796,11 +796,11 @@ test_learning() local dst=192.0.2.100 local vid=10 - # Enable learning on the VxLAN devices and set ageing time to 10 seconds - ip link set dev br1 type bridge ageing_time 1000 - ip link set dev vx10 type vxlan ageing 10 + # Enable learning on the VxLAN devices and set ageing time to 30 seconds + ip link set dev br1 type bridge ageing_time 3000 + ip link set dev vx10 type vxlan ageing 30 ip link set dev vx10 type vxlan learning - ip link set dev vx20 type vxlan ageing 10 + ip link set dev vx20 type vxlan ageing 30 ip link set dev vx20 type vxlan learning reapply_config -- cgit v1.2.3 From f0008b04977a741b2fb9c5e220120f82a0e50aa8 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 4 Mar 2024 11:56:11 +0200 Subject: selftests: forwarding: Make VXLAN ECN encap tests more robust These tests sometimes fail on the netdev CI because the expected number of packets is larger than expected [1]. Make the tests more robust by specifically matching on VXLAN encapsulated packets and allowing up to five stray packets instead of just two. [1] [...] # TEST: VXLAN: ECN encap: 0x00->0x00 [FAIL] # v1: Expected to capture 10 packets, got 13. # TEST: VXLAN: ECN encap: 0x01->0x01 [ OK ] # TEST: VXLAN: ECN encap: 0x02->0x02 [ OK ] # TEST: VXLAN: ECN encap: 0x03->0x02 [ OK ] [...] Signed-off-by: Ido Schimmel Link: https://lore.kernel.org/r/20240304095612.462900-6-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh | 4 ++-- tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh index eb307ca37bfa..6f0a2e452ba1 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -495,7 +495,7 @@ vxlan_ping_test() local delta=$((t1 - t0)) # Tolerate a couple stray extra packets. - ((expect <= delta && delta <= expect + 2)) + ((expect <= delta && delta <= expect + 5)) check_err $? "$capture_dev: Expected to capture $expect packets, got $delta." } @@ -532,7 +532,7 @@ __test_ecn_encap() RET=0 tc filter add dev v1 egress pref 77 prot ip \ - flower ip_tos $tos action pass + flower ip_tos $tos ip_proto udp dst_port $VXPORT action pass sleep 1 vxlan_ping_test $h1 192.0.2.3 "-Q $q" v1 egress 77 10 tc filter del dev v1 egress pref 77 prot ip diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh index ac97f07e5ce8..a0bb4524e1e9 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh @@ -616,7 +616,7 @@ vxlan_ping_test() local delta=$((t1 - t0)) # Tolerate a couple stray extra packets. - ((expect <= delta && delta <= expect + 2)) + ((expect <= delta && delta <= expect + 5)) check_err $? "$capture_dev: Expected to capture $expect packets, got $delta." } @@ -653,7 +653,7 @@ __test_ecn_encap() RET=0 tc filter add dev v1 egress pref 77 protocol ipv6 \ - flower ip_tos $tos action pass + flower ip_tos $tos ip_proto udp dst_port $VXPORT action pass sleep 1 vxlan_ping_test $h1 2001:db8:1::3 "-Q $q" v1 egress 77 10 tc filter del dev v1 egress pref 77 protocol ipv6 -- cgit v1.2.3 From 35df2ce896dc098554152db063aff1b52e09457a Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 4 Mar 2024 11:56:12 +0200 Subject: selftests: forwarding: Make {, ip6}gre-inner-v6-multipath tests more robust These tests generate various IPv6 flows, encapsulate them in GRE packets and check that the encapsulated packets are distributed between the available nexthops according to the configured weights. Unlike the corresponding IPv4 tests, these tests sometimes fail in the netdev CI because of large discrepancies between the expected and measured ratios [1]. This can be explained by the fact that the IPv4 tests generate about 3,600 different flows whereas the IPv6 tests only generate about 784 different flows (potentially by mistake). Fix by aligning the IPv6 tests to the IPv4 ones and increase the number of generated flows. [1] [...] # TEST: ping [ OK ] # INFO: Running IPv6 over GRE over IPv4 multipath tests # TEST: ECMP [FAIL] # Too large discrepancy between expected and measured ratios # INFO: Expected ratio 1.00 Measured ratio 1.18 [...] Signed-off-by: Ido Schimmel Link: https://lore.kernel.org/r/20240304095612.462900-7-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh | 4 ++-- tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh b/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh index e5e911ce1562..a71ad39fc0c3 100755 --- a/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh +++ b/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh @@ -266,8 +266,8 @@ multipath6_test() local t0_222=$(tc_rule_stats_get $ul32 222 ingress) ip vrf exec v$h1 \ - $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::1e" \ - -B "2001:db8:2::2-2001:db8:2::1e" \ + $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::3e" \ + -B "2001:db8:2::2-2001:db8:2::3e" \ -d $MZ_DELAY -c 50 -t udp "sp=1024,dp=1024" sleep 1 diff --git a/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh b/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh index eb4e50df5337..e1a4b50505f5 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh @@ -265,8 +265,8 @@ multipath6_test() local t0_222=$(tc_rule_stats_get $ul32 222 ingress) ip vrf exec v$h1 \ - $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::1e" \ - -B "2001:db8:2::2-2001:db8:2::1e" \ + $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::3e" \ + -B "2001:db8:2::2-2001:db8:2::3e" \ -d $MZ_DELAY -c 50 -t udp "sp=1024,dp=1024" sleep 1 -- cgit v1.2.3 From e3350ba4a5b7573d4e14ee1dff8c414646435a04 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 4 Mar 2024 15:36:20 -0800 Subject: selftests: avoid using SKIP(exit()) in harness fixure setup selftest harness uses various exit codes to signal test results. Avoid calling exit() directly, otherwise tests may get broken by harness refactoring (like the commit under Fixes). SKIP() will instruct the harness that the test shouldn't run, it used to not be the case, but that has been fixed. So just return, no need to exit. Note that for hmm-tests this actually changes the result from pass to skip. Which seems fair, the test is skipped, after all. Reported-by: Mark Brown Link: https://lore.kernel.org/all/05f7bf89-04a5-4b65-bf59-c19456aeb1f0@sirena.org.uk Fixes: a724707976b0 ("selftests: kselftest_harness: use KSFT_* exit codes") Reviewed-by: Kees Cook Reviewed-by: Mark Brown Tested-by: Mark Brown Reviewed-by: Przemek Kitszel Link: https://lore.kernel.org/r/20240304233621.646054-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/alsa/test-pcmtest-driver.c | 4 ++-- tools/testing/selftests/mm/hmm-tests.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/alsa/test-pcmtest-driver.c b/tools/testing/selftests/alsa/test-pcmtest-driver.c index a52ecd43dbe3..ca81afa4ee90 100644 --- a/tools/testing/selftests/alsa/test-pcmtest-driver.c +++ b/tools/testing/selftests/alsa/test-pcmtest-driver.c @@ -127,11 +127,11 @@ FIXTURE_SETUP(pcmtest) { int err; if (geteuid()) - SKIP(exit(-1), "This test needs root to run!"); + SKIP(return, "This test needs root to run!"); err = read_patterns(); if (err) - SKIP(exit(-1), "Can't read patterns. Probably, module isn't loaded"); + SKIP(return, "Can't read patterns. Probably, module isn't loaded"); card_name = malloc(127); ASSERT_NE(card_name, NULL); diff --git a/tools/testing/selftests/mm/hmm-tests.c b/tools/testing/selftests/mm/hmm-tests.c index 20294553a5dd..d2cfc9b494a0 100644 --- a/tools/testing/selftests/mm/hmm-tests.c +++ b/tools/testing/selftests/mm/hmm-tests.c @@ -138,7 +138,7 @@ FIXTURE_SETUP(hmm) self->fd = hmm_open(variant->device_number); if (self->fd < 0 && hmm_is_coherent_type(variant->device_number)) - SKIP(exit(0), "DEVICE_COHERENT not available"); + SKIP(return, "DEVICE_COHERENT not available"); ASSERT_GE(self->fd, 0); } @@ -149,7 +149,7 @@ FIXTURE_SETUP(hmm2) self->fd0 = hmm_open(variant->device_number0); if (self->fd0 < 0 && hmm_is_coherent_type(variant->device_number0)) - SKIP(exit(0), "DEVICE_COHERENT not available"); + SKIP(return, "DEVICE_COHERENT not available"); ASSERT_GE(self->fd0, 0); self->fd1 = hmm_open(variant->device_number1); ASSERT_GE(self->fd1, 0); -- cgit v1.2.3 From 4e887471e8e3a513607495d18333c44f59a82c5a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 4 Mar 2024 21:13:26 -0800 Subject: tools: ynl: rename make hardclean -> distclean The make target to remove all generated files used to be called "hardclean" because it deleted files which were tracked by git. We no longer track generated user space files, so use the more common "distclean" name. Signed-off-by: Jakub Kicinski Reviewed-by: Donald Hunter Signed-off-by: David S. Miller --- tools/net/ynl/Makefile | 2 +- tools/net/ynl/generated/Makefile | 4 ++-- tools/net/ynl/lib/Makefile | 2 +- tools/net/ynl/samples/Makefile | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/Makefile b/tools/net/ynl/Makefile index da1aa10bbcc3..1874296665e1 100644 --- a/tools/net/ynl/Makefile +++ b/tools/net/ynl/Makefile @@ -11,7 +11,7 @@ $(SUBDIRS): $(MAKE) -C $@ ; \ fi -clean hardclean: +clean distclean: @for dir in $(SUBDIRS) ; do \ if [ -f "$$dir/Makefile" ] ; then \ $(MAKE) -C $$dir $@; \ diff --git a/tools/net/ynl/generated/Makefile b/tools/net/ynl/generated/Makefile index 7135028cb449..713f5fb9cc2d 100644 --- a/tools/net/ynl/generated/Makefile +++ b/tools/net/ynl/generated/Makefile @@ -43,11 +43,11 @@ protos.a: $(OBJS) clean: rm -f *.o -hardclean: clean +distclean: clean rm -f *.c *.h *.a regen: @../ynl-regen.sh -.PHONY: all clean hardclean regen +.PHONY: all clean distclean regen .DEFAULT_GOAL: all diff --git a/tools/net/ynl/lib/Makefile b/tools/net/ynl/lib/Makefile index d2e50fd0a52d..2201dafc62b3 100644 --- a/tools/net/ynl/lib/Makefile +++ b/tools/net/ynl/lib/Makefile @@ -18,7 +18,7 @@ ynl.a: $(OBJS) clean: rm -f *.o *.d *~ -hardclean: clean +distclean: clean rm -f *.a %.o: %.c diff --git a/tools/net/ynl/samples/Makefile b/tools/net/ynl/samples/Makefile index 1d33e98e3ffe..3e81432f7b27 100644 --- a/tools/net/ynl/samples/Makefile +++ b/tools/net/ynl/samples/Makefile @@ -28,7 +28,7 @@ $(BINS): ../lib/ynl.a ../generated/protos.a $(SRCS) clean: rm -f *.o *.d *~ -hardclean: clean +distclean: clean rm -f $(BINS) .PHONY: all clean -- cgit v1.2.3 From 1d8617b2a610f36e361a91846725c065dc233541 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 4 Mar 2024 21:13:27 -0800 Subject: tools: ynl: add distclean to .PHONY in all makefiles Donald points out most YNL makefiles are missing distclean in .PHONY, even tho generated/Makefile does list it. Suggested-by: Donald Hunter Signed-off-by: Jakub Kicinski Reviewed-by: Donald Hunter Signed-off-by: David S. Miller --- tools/net/ynl/Makefile | 2 +- tools/net/ynl/lib/Makefile | 2 +- tools/net/ynl/samples/Makefile | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/Makefile b/tools/net/ynl/Makefile index 1874296665e1..8e9e09d84e26 100644 --- a/tools/net/ynl/Makefile +++ b/tools/net/ynl/Makefile @@ -18,4 +18,4 @@ clean distclean: fi \ done -.PHONY: clean all $(SUBDIRS) +.PHONY: all clean distclean $(SUBDIRS) diff --git a/tools/net/ynl/lib/Makefile b/tools/net/ynl/lib/Makefile index 2201dafc62b3..1507833d05c5 100644 --- a/tools/net/ynl/lib/Makefile +++ b/tools/net/ynl/lib/Makefile @@ -24,5 +24,5 @@ distclean: clean %.o: %.c $(COMPILE.c) -MMD -c -o $@ $< -.PHONY: all clean +.PHONY: all clean distclean .DEFAULT_GOAL=all diff --git a/tools/net/ynl/samples/Makefile b/tools/net/ynl/samples/Makefile index 3e81432f7b27..e194a7565861 100644 --- a/tools/net/ynl/samples/Makefile +++ b/tools/net/ynl/samples/Makefile @@ -31,5 +31,5 @@ clean: distclean: clean rm -f $(BINS) -.PHONY: all clean +.PHONY: all clean distclean .DEFAULT_GOAL=all -- cgit v1.2.3 From 72fa191bfdf611e5362e71d4cacae26c4c8d302c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 4 Mar 2024 21:13:28 -0800 Subject: tools: ynl: remove __pycache__ during clean Build process uses python to generate the user space code. Remove __pycache__ on make clean. Signed-off-by: Jakub Kicinski Reviewed-by: Donald Hunter Signed-off-by: David S. Miller --- tools/net/ynl/lib/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/net/ynl/lib/Makefile b/tools/net/ynl/lib/Makefile index 1507833d05c5..dfff3ecd1cba 100644 --- a/tools/net/ynl/lib/Makefile +++ b/tools/net/ynl/lib/Makefile @@ -17,6 +17,7 @@ ynl.a: $(OBJS) ar rcs $@ $(OBJS) clean: rm -f *.o *.d *~ + rm -rf __pycache__ distclean: clean rm -f *.a -- cgit v1.2.3 From 7df7231d6a6b68b4b68fb4e38a4639997a208fd2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 4 Mar 2024 21:33:07 -0800 Subject: tools: ynl: move the new line in NlMsg __repr__ We add the new line even if message has no error or extack, which leads to print(nl_msg) ending with two new lines. Reviewed-by: Donald Hunter Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- tools/net/ynl/lib/ynl.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index ac55aa5a3083..92ade9105f31 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -213,11 +213,11 @@ class NlMsg: return self.nl_type def __repr__(self): - msg = f"nl_len = {self.nl_len} ({len(self.raw)}) nl_flags = 0x{self.nl_flags:x} nl_type = {self.nl_type}\n" + msg = f"nl_len = {self.nl_len} ({len(self.raw)}) nl_flags = 0x{self.nl_flags:x} nl_type = {self.nl_type}" if self.error: - msg += '\terror: ' + str(self.error) + msg += '\n\terror: ' + str(self.error) if self.extack: - msg += '\textack: ' + repr(self.extack) + msg += '\n\textack: ' + repr(self.extack) return msg -- cgit v1.2.3 From 7c93a88785dae6b61dc736b46594d088989e484b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 4 Mar 2024 21:33:08 -0800 Subject: tools: ynl: allow setting recv() size Make the size of the buffer we use for recv() configurable. The details of the buffer sizing in netlink are somewhat arcane, we could spend a lot of time polishing this API. Let's just leave some hopefully helpful comments for now. This is a for-developers-only feature, anyway. Signed-off-by: Jakub Kicinski Reviewed-by: Donald Hunter Signed-off-by: David S. Miller --- tools/net/ynl/lib/ynl.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index 92ade9105f31..c3ff5be33e4e 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -84,6 +84,10 @@ class NlError(Exception): return f"Netlink error: {os.strerror(-self.nl_msg.error)}\n{self.nl_msg}" +class ConfigError(Exception): + pass + + class NlAttr: ScalarFormat = namedtuple('ScalarFormat', ['native', 'big', 'little']) type_formats = { @@ -400,7 +404,8 @@ class SpaceAttrs: class YnlFamily(SpecFamily): - def __init__(self, def_path, schema=None, process_unknown=False): + def __init__(self, def_path, schema=None, process_unknown=False, + recv_size=0): super().__init__(def_path, schema) self.include_raw = False @@ -415,6 +420,16 @@ class YnlFamily(SpecFamily): except KeyError: raise Exception(f"Family '{self.yaml['name']}' not supported by the kernel") + # Note that netlink will use conservative (min) message size for + # the first dump recv() on the socket, our setting will only matter + # from the second recv() on. + self._recv_size = recv_size if recv_size else 131072 + # Netlink will always allocate at least PAGE_SIZE - sizeof(skb_shinfo) + # for a message, so smaller receive sizes will lead to truncation. + # Note that the min size for other families may be larger than 4k! + if self._recv_size < 4000: + raise ConfigError() + self.sock = socket.socket(socket.AF_NETLINK, socket.SOCK_RAW, self.nlproto.proto_num) self.sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_CAP_ACK, 1) self.sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_EXT_ACK, 1) @@ -799,7 +814,7 @@ class YnlFamily(SpecFamily): def check_ntf(self): while True: try: - reply = self.sock.recv(128 * 1024, socket.MSG_DONTWAIT) + reply = self.sock.recv(self._recv_size, socket.MSG_DONTWAIT) except BlockingIOError: return @@ -854,7 +869,7 @@ class YnlFamily(SpecFamily): done = False rsp = [] while not done: - reply = self.sock.recv(128 * 1024) + reply = self.sock.recv(self._recv_size) nms = NlMsgs(reply, attr_space=op.attr_set) for nl_msg in nms: if nl_msg.extack: -- cgit v1.2.3 From a6a41521f95e5263a52ac79c02167a59ccc7183b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 4 Mar 2024 21:33:09 -0800 Subject: tools: ynl: support debug printing messages For manual debug, allow printing the netlink level messages to stderr. Reviewed-by: Donald Hunter Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- tools/net/ynl/lib/ynl.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index c3ff5be33e4e..239e22b7a85f 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -7,6 +7,7 @@ import random import socket import struct from struct import Struct +import sys import yaml import ipaddress import uuid @@ -420,6 +421,7 @@ class YnlFamily(SpecFamily): except KeyError: raise Exception(f"Family '{self.yaml['name']}' not supported by the kernel") + self._recv_dbg = False # Note that netlink will use conservative (min) message size for # the first dump recv() on the socket, our setting will only matter # from the second recv() on. @@ -453,6 +455,17 @@ class YnlFamily(SpecFamily): self.sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_ADD_MEMBERSHIP, mcast_id) + def set_recv_dbg(self, enabled): + self._recv_dbg = enabled + + def _recv_dbg_print(self, reply, nl_msgs): + if not self._recv_dbg: + return + print("Recv: read", len(reply), "bytes,", + len(nl_msgs.msgs), "messages", file=sys.stderr) + for nl_msg in nl_msgs: + print(" ", nl_msg, file=sys.stderr) + def _encode_enum(self, attr_spec, value): enum = self.consts[attr_spec['enum']] if enum.type == 'flags' or attr_spec.get('enum-as-flags', False): @@ -819,6 +832,7 @@ class YnlFamily(SpecFamily): return nms = NlMsgs(reply) + self._recv_dbg_print(reply, nms) for nl_msg in nms: if nl_msg.error: print("Netlink error in ntf!?", os.strerror(-nl_msg.error)) @@ -871,6 +885,7 @@ class YnlFamily(SpecFamily): while not done: reply = self.sock.recv(self._recv_size) nms = NlMsgs(reply, attr_space=op.attr_set) + self._recv_dbg_print(reply, nms) for nl_msg in nms: if nl_msg.extack: self._decode_extack(msg, op, nl_msg.extack) -- cgit v1.2.3 From c0111878d45e3bb8779886fbf956b574bac8a3aa Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 4 Mar 2024 21:33:10 -0800 Subject: tools: ynl: add --dbg-small-recv for easier kernel testing Most "production" netlink clients use large buffers to make dump efficient, which means that handling of dump continuation in the kernel is not very well tested. Add an option for debugging / testing handling of dumps. It enables printing of extra netlink-level debug and lowers the recv() buffer size in one go. When used without any argument (--dbg-small-recv) it picks a very small default (4000), explicit size can be set, too (--dbg-small-recv 5000). Example: $ ./cli.py [...] --dbg-small-recv Recv: read 3712 bytes, 29 messages nl_len = 128 (112) nl_flags = 0x0 nl_type = 19 [...] nl_len = 128 (112) nl_flags = 0x0 nl_type = 19 Recv: read 3968 bytes, 31 messages nl_len = 128 (112) nl_flags = 0x0 nl_type = 19 [...] nl_len = 128 (112) nl_flags = 0x0 nl_type = 19 Recv: read 532 bytes, 5 messages nl_len = 128 (112) nl_flags = 0x0 nl_type = 19 [...] nl_len = 128 (112) nl_flags = 0x0 nl_type = 19 nl_len = 20 (4) nl_flags = 0x2 nl_type = 3 (the [...] are edits to shorten the commit message). Note that the first message of the dump is sized conservatively by the kernel. Signed-off-by: Jakub Kicinski Reviewed-by: Donald Hunter Signed-off-by: David S. Miller --- tools/net/ynl/cli.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/net/ynl/cli.py b/tools/net/ynl/cli.py index 0f8239979670..e8a65fbc3698 100755 --- a/tools/net/ynl/cli.py +++ b/tools/net/ynl/cli.py @@ -38,6 +38,8 @@ def main(): const=Netlink.NLM_F_APPEND) parser.add_argument('--process-unknown', action=argparse.BooleanOptionalAction) parser.add_argument('--output-json', action='store_true') + parser.add_argument('--dbg-small-recv', default=0, const=4000, + action='store', nargs='?', type=int) args = parser.parse_args() def output(msg): @@ -53,7 +55,10 @@ def main(): if args.json_text: attrs = json.loads(args.json_text) - ynl = YnlFamily(args.spec, args.schema, args.process_unknown) + ynl = YnlFamily(args.spec, args.schema, args.process_unknown, + recv_size=args.dbg_small_recv) + if args.dbg_small_recv: + ynl.set_recv_dbg(True) if args.ntf: ynl.ntf_subscribe(args.ntf) -- cgit v1.2.3 From 011832b97b311bb9e3c27945bc0d1089a14209c9 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 5 Mar 2024 19:19:26 -0800 Subject: bpf: Introduce may_goto instruction Introduce may_goto instruction that from the verifier pov is similar to open coded iterators bpf_for()/bpf_repeat() and bpf_loop() helper, but it doesn't iterate any objects. In assembly 'may_goto' is a nop most of the time until bpf runtime has to terminate the program for whatever reason. In the current implementation may_goto has a hidden counter, but other mechanisms can be used. For programs written in C the later patch introduces 'cond_break' macro that combines 'may_goto' with 'break' statement and has similar semantics: cond_break is a nop until bpf runtime has to break out of this loop. It can be used in any normal "for" or "while" loop, like for (i = zero; i < cnt; cond_break, i++) { The verifier recognizes that may_goto is used in the program, reserves additional 8 bytes of stack, initializes them in subprog prologue, and replaces may_goto instruction with: aux_reg = *(u64 *)(fp - 40) if aux_reg == 0 goto pc+off aux_reg -= 1 *(u64 *)(fp - 40) = aux_reg may_goto instruction can be used by LLVM to implement __builtin_memcpy, __builtin_strcmp. may_goto is not a full substitute for bpf_for() macro. bpf_for() doesn't have induction variable that verifiers sees, so 'i' in bpf_for(i, 0, 100) is seen as imprecise and bounded. But when the code is written as: for (i = 0; i < 100; cond_break, i++) the verifier see 'i' as precise constant zero, hence cond_break (aka may_goto) doesn't help to converge the loop. A static or global variable can be used as a workaround: static int zero = 0; for (i = zero; i < 100; cond_break, i++) // works! may_goto works well with arena pointers that don't need to be bounds checked on access. Load/store from arena returns imprecise unbounded scalar and loops with may_goto pass the verifier. Reserve new opcode BPF_JMP | BPF_JCOND for may_goto insn. JCOND stands for conditional pseudo jump. Since goto_or_nop insn was proposed, it may use the same opcode. may_goto vs goto_or_nop can be distinguished by src_reg: code = BPF_JMP | BPF_JCOND src_reg = 0 - may_goto src_reg = 1 - goto_or_nop Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Acked-by: Eduard Zingerman Acked-by: John Fastabend Tested-by: John Fastabend Link: https://lore.kernel.org/bpf/20240306031929.42666-2-alexei.starovoitov@gmail.com --- include/linux/bpf_verifier.h | 2 + include/uapi/linux/bpf.h | 5 ++ kernel/bpf/core.c | 1 + kernel/bpf/disasm.c | 4 + kernel/bpf/verifier.c | 163 +++++++++++++++++++++++++++++++++-------- tools/include/uapi/linux/bpf.h | 5 ++ 6 files changed, 150 insertions(+), 30 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 84365e6dd85d..4b0f6600e499 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -449,6 +449,7 @@ struct bpf_verifier_state { u32 jmp_history_cnt; u32 dfs_depth; u32 callback_unroll_depth; + u32 may_goto_depth; }; #define bpf_get_spilled_reg(slot, frame, mask) \ @@ -619,6 +620,7 @@ struct bpf_subprog_info { u32 start; /* insn idx of function entry point */ u32 linfo_idx; /* The idx to the main_prog->aux->linfo */ u16 stack_depth; /* max. stack depth used by this function */ + u16 stack_extra; bool has_tail_call: 1; bool tail_call_reachable: 1; bool has_ld_abs: 1; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a241f407c234..85ec7fc799d7 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -42,6 +42,7 @@ #define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ #define BPF_JSLT 0xc0 /* SLT is signed, '<' */ #define BPF_JSLE 0xd0 /* SLE is signed, '<=' */ +#define BPF_JCOND 0xe0 /* conditional pseudo jumps: may_goto, goto_or_nop */ #define BPF_CALL 0x80 /* function call */ #define BPF_EXIT 0x90 /* function return */ @@ -50,6 +51,10 @@ #define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */ #define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */ +enum bpf_cond_pseudo_jmp { + BPF_MAY_GOTO = 0, +}; + /* Register numbers */ enum { BPF_REG_0 = 0, diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 71c459a51d9e..9ee4536d0a09 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1675,6 +1675,7 @@ bool bpf_opcode_in_insntable(u8 code) [BPF_LD | BPF_IND | BPF_B] = true, [BPF_LD | BPF_IND | BPF_H] = true, [BPF_LD | BPF_IND | BPF_W] = true, + [BPF_JMP | BPF_JCOND] = true, }; #undef BPF_INSN_3_TBL #undef BPF_INSN_2_TBL diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c index 49940c26a227..82b2dbdd048f 100644 --- a/kernel/bpf/disasm.c +++ b/kernel/bpf/disasm.c @@ -322,6 +322,10 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs, } else if (insn->code == (BPF_JMP | BPF_JA)) { verbose(cbs->private_data, "(%02x) goto pc%+d\n", insn->code, insn->off); + } else if (insn->code == (BPF_JMP | BPF_JCOND) && + insn->src_reg == BPF_MAY_GOTO) { + verbose(cbs->private_data, "(%02x) may_goto pc%+d\n", + insn->code, insn->off); } else if (insn->code == (BPF_JMP32 | BPF_JA)) { verbose(cbs->private_data, "(%02x) gotol pc%+d\n", insn->code, insn->imm); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 4dd84e13bbfe..8030b50d3b45 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -533,6 +533,16 @@ static bool is_async_callback_calling_insn(struct bpf_insn *insn) return bpf_helper_call(insn) && is_async_callback_calling_function(insn->imm); } +static bool is_may_goto_insn(struct bpf_insn *insn) +{ + return insn->code == (BPF_JMP | BPF_JCOND) && insn->src_reg == BPF_MAY_GOTO; +} + +static bool is_may_goto_insn_at(struct bpf_verifier_env *env, int insn_idx) +{ + return is_may_goto_insn(&env->prog->insnsi[insn_idx]); +} + static bool is_storage_get_function(enum bpf_func_id func_id) { return func_id == BPF_FUNC_sk_storage_get || @@ -1429,6 +1439,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, dst_state->dfs_depth = src->dfs_depth; dst_state->callback_unroll_depth = src->callback_unroll_depth; dst_state->used_as_loop_entry = src->used_as_loop_entry; + dst_state->may_goto_depth = src->may_goto_depth; for (i = 0; i <= src->curframe; i++) { dst = dst_state->frame[i]; if (!dst) { @@ -14871,11 +14882,36 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, int err; /* Only conditional jumps are expected to reach here. */ - if (opcode == BPF_JA || opcode > BPF_JSLE) { + if (opcode == BPF_JA || opcode > BPF_JCOND) { verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode); return -EINVAL; } + if (opcode == BPF_JCOND) { + struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st; + int idx = *insn_idx; + + if (insn->code != (BPF_JMP | BPF_JCOND) || + insn->src_reg != BPF_MAY_GOTO || + insn->dst_reg || insn->imm || insn->off == 0) { + verbose(env, "invalid may_goto off %d imm %d\n", + insn->off, insn->imm); + return -EINVAL; + } + prev_st = find_prev_entry(env, cur_st->parent, idx); + + /* branch out 'fallthrough' insn as a new state to explore */ + queued_st = push_stack(env, idx + 1, idx, false); + if (!queued_st) + return -ENOMEM; + + queued_st->may_goto_depth++; + if (prev_st) + widen_imprecise_scalars(env, prev_st, queued_st); + *insn_idx += insn->off; + return 0; + } + /* check src2 operand */ err = check_reg_arg(env, insn->dst_reg, SRC_OP); if (err) @@ -15659,6 +15695,8 @@ static int visit_insn(int t, struct bpf_verifier_env *env) default: /* conditional jump with two edges */ mark_prune_point(env, t); + if (is_may_goto_insn(insn)) + mark_force_checkpoint(env, t); ret = push_insn(t, t + 1, FALLTHROUGH, env); if (ret) @@ -17135,6 +17173,13 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) } goto skip_inf_loop_check; } + if (is_may_goto_insn_at(env, insn_idx)) { + if (states_equal(env, &sl->state, cur, true)) { + update_loop_entry(cur, &sl->state); + goto hit; + } + goto skip_inf_loop_check; + } if (calls_callback(env, insn_idx)) { if (states_equal(env, &sl->state, cur, true)) goto hit; @@ -17144,6 +17189,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) if (states_maybe_looping(&sl->state, cur) && states_equal(env, &sl->state, cur, true) && !iter_active_depths_differ(&sl->state, cur) && + sl->state.may_goto_depth == cur->may_goto_depth && sl->state.callback_unroll_depth == cur->callback_unroll_depth) { verbose_linfo(env, insn_idx, "; "); verbose(env, "infinite loop detected at insn %d\n", insn_idx); @@ -19408,7 +19454,10 @@ static int do_misc_fixups(struct bpf_verifier_env *env) struct bpf_insn insn_buf[16]; struct bpf_prog *new_prog; struct bpf_map *map_ptr; - int i, ret, cnt, delta = 0; + int i, ret, cnt, delta = 0, cur_subprog = 0; + struct bpf_subprog_info *subprogs = env->subprog_info; + u16 stack_depth = subprogs[cur_subprog].stack_depth; + u16 stack_depth_extra = 0; if (env->seen_exception && !env->exception_callback_subprog) { struct bpf_insn patch[] = { @@ -19428,7 +19477,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env) mark_subprog_exc_cb(env, env->exception_callback_subprog); } - for (i = 0; i < insn_cnt; i++, insn++) { + for (i = 0; i < insn_cnt;) { /* Make divide-by-zero exceptions impossible. */ if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) || insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) || @@ -19467,7 +19516,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env) delta += cnt - 1; env->prog = prog = new_prog; insn = new_prog->insnsi + i + delta; - continue; + goto next_insn; } /* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */ @@ -19487,7 +19536,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env) delta += cnt - 1; env->prog = prog = new_prog; insn = new_prog->insnsi + i + delta; - continue; + goto next_insn; } /* Rewrite pointer arithmetic to mitigate speculation attacks. */ @@ -19502,7 +19551,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env) aux = &env->insn_aux_data[i + delta]; if (!aux->alu_state || aux->alu_state == BPF_ALU_NON_POINTER) - continue; + goto next_insn; isneg = aux->alu_state & BPF_ALU_NEG_VALUE; issrc = (aux->alu_state & BPF_ALU_SANITIZE) == @@ -19540,19 +19589,39 @@ static int do_misc_fixups(struct bpf_verifier_env *env) delta += cnt - 1; env->prog = prog = new_prog; insn = new_prog->insnsi + i + delta; - continue; + goto next_insn; + } + + if (is_may_goto_insn(insn)) { + int stack_off = -stack_depth - 8; + + stack_depth_extra = 8; + insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off); + insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2); + insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1); + insn_buf[3] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off); + cnt = 4; + + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + goto next_insn; } if (insn->code != (BPF_JMP | BPF_CALL)) - continue; + goto next_insn; if (insn->src_reg == BPF_PSEUDO_CALL) - continue; + goto next_insn; if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { ret = fixup_kfunc_call(env, insn, insn_buf, i + delta, &cnt); if (ret) return ret; if (cnt == 0) - continue; + goto next_insn; new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); if (!new_prog) @@ -19561,7 +19630,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env) delta += cnt - 1; env->prog = prog = new_prog; insn = new_prog->insnsi + i + delta; - continue; + goto next_insn; } if (insn->imm == BPF_FUNC_get_route_realm) @@ -19609,11 +19678,11 @@ static int do_misc_fixups(struct bpf_verifier_env *env) } insn->imm = ret + 1; - continue; + goto next_insn; } if (!bpf_map_ptr_unpriv(aux)) - continue; + goto next_insn; /* instead of changing every JIT dealing with tail_call * emit two extra insns: @@ -19642,7 +19711,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env) delta += cnt - 1; env->prog = prog = new_prog; insn = new_prog->insnsi + i + delta; - continue; + goto next_insn; } if (insn->imm == BPF_FUNC_timer_set_callback) { @@ -19754,7 +19823,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env) delta += cnt - 1; env->prog = prog = new_prog; insn = new_prog->insnsi + i + delta; - continue; + goto next_insn; } BUILD_BUG_ON(!__same_type(ops->map_lookup_elem, @@ -19785,31 +19854,31 @@ patch_map_ops_generic: switch (insn->imm) { case BPF_FUNC_map_lookup_elem: insn->imm = BPF_CALL_IMM(ops->map_lookup_elem); - continue; + goto next_insn; case BPF_FUNC_map_update_elem: insn->imm = BPF_CALL_IMM(ops->map_update_elem); - continue; + goto next_insn; case BPF_FUNC_map_delete_elem: insn->imm = BPF_CALL_IMM(ops->map_delete_elem); - continue; + goto next_insn; case BPF_FUNC_map_push_elem: insn->imm = BPF_CALL_IMM(ops->map_push_elem); - continue; + goto next_insn; case BPF_FUNC_map_pop_elem: insn->imm = BPF_CALL_IMM(ops->map_pop_elem); - continue; + goto next_insn; case BPF_FUNC_map_peek_elem: insn->imm = BPF_CALL_IMM(ops->map_peek_elem); - continue; + goto next_insn; case BPF_FUNC_redirect_map: insn->imm = BPF_CALL_IMM(ops->map_redirect); - continue; + goto next_insn; case BPF_FUNC_for_each_map_elem: insn->imm = BPF_CALL_IMM(ops->map_for_each_callback); - continue; + goto next_insn; case BPF_FUNC_map_lookup_percpu_elem: insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem); - continue; + goto next_insn; } goto patch_call_imm; @@ -19837,7 +19906,7 @@ patch_map_ops_generic: delta += cnt - 1; env->prog = prog = new_prog; insn = new_prog->insnsi + i + delta; - continue; + goto next_insn; } /* Implement bpf_get_func_arg inline. */ @@ -19862,7 +19931,7 @@ patch_map_ops_generic: delta += cnt - 1; env->prog = prog = new_prog; insn = new_prog->insnsi + i + delta; - continue; + goto next_insn; } /* Implement bpf_get_func_ret inline. */ @@ -19890,7 +19959,7 @@ patch_map_ops_generic: delta += cnt - 1; env->prog = prog = new_prog; insn = new_prog->insnsi + i + delta; - continue; + goto next_insn; } /* Implement get_func_arg_cnt inline. */ @@ -19905,7 +19974,7 @@ patch_map_ops_generic: env->prog = prog = new_prog; insn = new_prog->insnsi + i + delta; - continue; + goto next_insn; } /* Implement bpf_get_func_ip inline. */ @@ -19920,7 +19989,7 @@ patch_map_ops_generic: env->prog = prog = new_prog; insn = new_prog->insnsi + i + delta; - continue; + goto next_insn; } /* Implement bpf_kptr_xchg inline */ @@ -19938,7 +20007,7 @@ patch_map_ops_generic: delta += cnt - 1; env->prog = prog = new_prog; insn = new_prog->insnsi + i + delta; - continue; + goto next_insn; } patch_call_imm: fn = env->ops->get_func_proto(insn->imm, env->prog); @@ -19952,6 +20021,40 @@ patch_call_imm: return -EFAULT; } insn->imm = fn->func - __bpf_call_base; +next_insn: + if (subprogs[cur_subprog + 1].start == i + delta + 1) { + subprogs[cur_subprog].stack_depth += stack_depth_extra; + subprogs[cur_subprog].stack_extra = stack_depth_extra; + cur_subprog++; + stack_depth = subprogs[cur_subprog].stack_depth; + stack_depth_extra = 0; + } + i++; + insn++; + } + + env->prog->aux->stack_depth = subprogs[0].stack_depth; + for (i = 0; i < env->subprog_cnt; i++) { + int subprog_start = subprogs[i].start; + int stack_slots = subprogs[i].stack_extra / 8; + + if (!stack_slots) + continue; + if (stack_slots > 1) { + verbose(env, "verifier bug: stack_slots supports may_goto only\n"); + return -EFAULT; + } + + /* Add ST insn to subprog prologue to init extra stack */ + insn_buf[0] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, + -subprogs[i].stack_depth, BPF_MAX_LOOPS); + /* Copy first actual insn to preserve it */ + insn_buf[1] = env->prog->insnsi[subprog_start]; + + new_prog = bpf_patch_insn_data(env, subprog_start, insn_buf, 2); + if (!new_prog) + return -ENOMEM; + env->prog = prog = new_prog; } /* Since poke tab is now finalized, publish aux to tracker. */ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index a241f407c234..85ec7fc799d7 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -42,6 +42,7 @@ #define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ #define BPF_JSLT 0xc0 /* SLT is signed, '<' */ #define BPF_JSLE 0xd0 /* SLE is signed, '<=' */ +#define BPF_JCOND 0xe0 /* conditional pseudo jumps: may_goto, goto_or_nop */ #define BPF_CALL 0x80 /* function call */ #define BPF_EXIT 0x90 /* function return */ @@ -50,6 +51,10 @@ #define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */ #define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */ +enum bpf_cond_pseudo_jmp { + BPF_MAY_GOTO = 0, +}; + /* Register numbers */ enum { BPF_REG_0 = 0, -- cgit v1.2.3 From 06375801525717173aee790310b7d959bb77879b Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 5 Mar 2024 19:19:28 -0800 Subject: bpf: Add cond_break macro Use may_goto instruction to implement cond_break macro. Ideally the macro should be written as: asm volatile goto(".byte 0xe5; .byte 0; .short %l[l_break] ... .long 0; but LLVM doesn't recognize fixup of 2 byte PC relative yet. Hence use asm volatile goto(".byte 0xe5; .byte 0; .long %l[l_break] ... .short 0; that produces correct asm on little endian. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: Eduard Zingerman Acked-by: John Fastabend Tested-by: John Fastabend Link: https://lore.kernel.org/bpf/20240306031929.42666-4-alexei.starovoitov@gmail.com --- tools/testing/selftests/bpf/bpf_experimental.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h index 0d749006d107..bc9a0832ae72 100644 --- a/tools/testing/selftests/bpf/bpf_experimental.h +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -326,6 +326,18 @@ l_true: \ }) #endif +#define cond_break \ + ({ __label__ l_break, l_continue; \ + asm volatile goto("1:.byte 0xe5; \ + .byte 0; \ + .long ((%l[l_break] - 1b - 8) / 8) & 0xffff; \ + .short 0" \ + :::: l_break); \ + goto l_continue; \ + l_break: break; \ + l_continue:; \ + }) + #ifndef bpf_nop_mov #define bpf_nop_mov(var) \ asm volatile("%[reg]=%[reg]"::[reg]"r"((short)var)) -- cgit v1.2.3 From 0c8bbf990bddef1a4f32889b18a4a016d9bd2cfd Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 5 Mar 2024 19:19:29 -0800 Subject: selftests/bpf: Test may_goto Add tests for may_goto instruction via cond_break macro. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: John Fastabend Tested-by: John Fastabend Link: https://lore.kernel.org/bpf/20240306031929.42666-5-alexei.starovoitov@gmail.com --- tools/testing/selftests/bpf/DENYLIST.s390x | 1 + .../bpf/progs/verifier_iterating_callbacks.c | 103 ++++++++++++++++++++- 2 files changed, 101 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index 1a63996c0304..cb810a98e78f 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -3,3 +3,4 @@ exceptions # JIT does not support calling kfunc bpf_throw (exceptions) get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace) stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?) +verifier_iterating_callbacks diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c index 5905e036e0ea..04cdbce4652f 100644 --- a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c +++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c @@ -1,8 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 - -#include -#include #include "bpf_misc.h" +#include "bpf_experimental.h" struct { __uint(type, BPF_MAP_TYPE_ARRAY); @@ -239,4 +237,103 @@ int bpf_loop_iter_limit_nested(void *unused) return 1000 * a + b + c; } +#define ARR_SZ 1000000 +int zero; +char arr[ARR_SZ]; + +SEC("socket") +__success __retval(0xd495cdc0) +int cond_break1(const void *ctx) +{ + unsigned long i; + unsigned int sum = 0; + + for (i = zero; i < ARR_SZ; cond_break, i++) + sum += i; + for (i = zero; i < ARR_SZ; i++) { + barrier_var(i); + sum += i + arr[i]; + cond_break; + } + + return sum; +} + +SEC("socket") +__success __retval(999000000) +int cond_break2(const void *ctx) +{ + int i, j; + int sum = 0; + + for (i = zero; i < 1000; cond_break, i++) + for (j = zero; j < 1000; j++) { + sum += i + j; + cond_break; + } + + return sum; +} + +static __noinline int loop(void) +{ + int i, sum = 0; + + for (i = zero; i <= 1000000; i++, cond_break) + sum += i; + + return sum; +} + +SEC("socket") +__success __retval(0x6a5a2920) +int cond_break3(const void *ctx) +{ + return loop(); +} + +SEC("socket") +__success __retval(1) +int cond_break4(const void *ctx) +{ + int cnt = zero; + + for (;;) { + /* should eventually break out of the loop */ + cond_break; + cnt++; + } + /* if we looped a bit, it's a success */ + return cnt > 1 ? 1 : 0; +} + +static __noinline int static_subprog(void) +{ + int cnt = zero; + + for (;;) { + cond_break; + cnt++; + } + + return cnt; +} + +SEC("socket") +__success __retval(1) +int cond_break5(const void *ctx) +{ + int cnt1 = zero, cnt2; + + for (;;) { + cond_break; + cnt1++; + } + + cnt2 = static_subprog(); + + /* main and subprog have to loop a bit */ + return cnt1 > 1 && cnt2 > 1 ? 1 : 0; +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From a2a5172cf1eb39472bd2038079f65c6f676906a5 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 6 Mar 2024 12:45:15 +0200 Subject: libbpf: Allow version suffixes (___smth) for struct_ops types E.g. allow the following struct_ops definitions: struct bpf_testmod_ops___v1 { int (*test)(void); }; struct bpf_testmod_ops___v2 { int (*test)(void); }; SEC(".struct_ops.link") struct bpf_testmod_ops___v1 a = { .test = ... } SEC(".struct_ops.link") struct bpf_testmod_ops___v2 b = { .test = ... } Where both bpf_testmod_ops__v1 and bpf_testmod_ops__v2 would be resolved as 'struct bpf_testmod_ops' from kernel BTF. Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Acked-by: David Vernet Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240306104529.6453-2-eddyz87@gmail.com --- tools/lib/bpf/libbpf.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 6c2979f1b471..e2a4c409980b 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -948,7 +948,7 @@ static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, const char *name, __u32 kind); static int -find_struct_ops_kern_types(struct bpf_object *obj, const char *tname, +find_struct_ops_kern_types(struct bpf_object *obj, const char *tname_raw, struct module_btf **mod_btf, const struct btf_type **type, __u32 *type_id, const struct btf_type **vtype, __u32 *vtype_id, @@ -958,8 +958,12 @@ find_struct_ops_kern_types(struct bpf_object *obj, const char *tname, const struct btf_member *kern_data_member; struct btf *btf; __s32 kern_vtype_id, kern_type_id; + char tname[256]; __u32 i; + snprintf(tname, sizeof(tname), "%.*s", + (int)bpf_core_essential_name_len(tname_raw), tname_raw); + kern_type_id = find_ksym_btf_id(obj, tname, BTF_KIND_STRUCT, &btf, mod_btf); if (kern_type_id < 0) { -- cgit v1.2.3 From d9ab2f76ef5abb76190ffb42d83bdc6caede807e Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 6 Mar 2024 12:45:16 +0200 Subject: libbpf: Tie struct_ops programs to kernel BTF ids, not to local ids Enforce the following existing limitation on struct_ops programs based on kernel BTF id instead of program-local BTF id: struct_ops BPF prog can be re-used between multiple .struct_ops & .struct_ops.link as long as it's the same struct_ops struct definition and the same function pointer field This allows reusing same BPF program for versioned struct_ops map definitions, e.g.: SEC("struct_ops/test") int BPF_PROG(foo) { ... } struct some_ops___v1 { int (*test)(void); }; struct some_ops___v2 { int (*test)(void); }; SEC(".struct_ops.link") struct some_ops___v1 a = { .test = foo } SEC(".struct_ops.link") struct some_ops___v2 b = { .test = foo } Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240306104529.6453-3-eddyz87@gmail.com --- tools/lib/bpf/libbpf.c | 49 ++++++++++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 23 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e2a4c409980b..2c0cb72bc7a4 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1146,8 +1146,32 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map) if (mod_btf) prog->attach_btf_obj_fd = mod_btf->fd; - prog->attach_btf_id = kern_type_id; - prog->expected_attach_type = kern_member_idx; + + /* if we haven't yet processed this BPF program, record proper + * attach_btf_id and member_idx + */ + if (!prog->attach_btf_id) { + prog->attach_btf_id = kern_type_id; + prog->expected_attach_type = kern_member_idx; + } + + /* struct_ops BPF prog can be re-used between multiple + * .struct_ops & .struct_ops.link as long as it's the + * same struct_ops struct definition and the same + * function pointer field + */ + if (prog->attach_btf_id != kern_type_id) { + pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: attach_btf_id %u != kern_type_id %u\n", + map->name, mname, prog->name, prog->sec_name, prog->type, + prog->attach_btf_id, kern_type_id); + return -EINVAL; + } + if (prog->expected_attach_type != kern_member_idx) { + pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: expected_attach_type %u != kern_member_idx %u\n", + map->name, mname, prog->name, prog->sec_name, prog->type, + prog->expected_attach_type, kern_member_idx); + return -EINVAL; + } st_ops->kern_func_off[i] = kern_data_off + kern_moff; @@ -9428,27 +9452,6 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, return -EINVAL; } - /* if we haven't yet processed this BPF program, record proper - * attach_btf_id and member_idx - */ - if (!prog->attach_btf_id) { - prog->attach_btf_id = st_ops->type_id; - prog->expected_attach_type = member_idx; - } - - /* struct_ops BPF prog can be re-used between multiple - * .struct_ops & .struct_ops.link as long as it's the - * same struct_ops struct definition and the same - * function pointer field - */ - if (prog->attach_btf_id != st_ops->type_id || - prog->expected_attach_type != member_idx) { - pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n", - map->name, prog->name, prog->sec_name, prog->type, - prog->attach_btf_id, prog->expected_attach_type, name); - return -EINVAL; - } - st_ops->progs[member_idx] = prog; /* st_ops->data will be exposed to users, being returned by -- cgit v1.2.3 From 8db052615a9780b45e26d6afabc7abefe1ba20ac Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 6 Mar 2024 12:45:17 +0200 Subject: libbpf: Honor autocreate flag for struct_ops maps Skip load steps for struct_ops maps not marked for automatic creation. This should allow to load bpf object in situations like below: SEC("struct_ops/foo") int BPF_PROG(foo) { ... } SEC("struct_ops/bar") int BPF_PROG(bar) { ... } struct test_ops___v1 { int (*foo)(void); }; struct test_ops___v2 { int (*foo)(void); int (*does_not_exist)(void); }; SEC(".struct_ops.link") struct test_ops___v1 map_for_old = { .test_1 = (void *)foo }; SEC(".struct_ops.link") struct test_ops___v2 map_for_new = { .test_1 = (void *)foo, .does_not_exist = (void *)bar }; Suppose program is loaded on old kernel that does not have definition for 'does_not_exist' struct_ops member. After this commit it would be possible to load such object file after the following tweaks: bpf_program__set_autoload(skel->progs.bar, false); bpf_map__set_autocreate(skel->maps.map_for_new, false); Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Acked-by: David Vernet Link: https://lore.kernel.org/bpf/20240306104529.6453-4-eddyz87@gmail.com --- tools/lib/bpf/libbpf.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 2c0cb72bc7a4..1fb9a4f237b4 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1212,6 +1212,9 @@ static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj) if (!bpf_map__is_struct_ops(map)) continue; + if (!map->autocreate) + continue; + err = bpf_map__init_kern_struct_ops(map); if (err) return err; @@ -8133,11 +8136,20 @@ static void bpf_map_prepare_vdata(const struct bpf_map *map) static int bpf_object_prepare_struct_ops(struct bpf_object *obj) { + struct bpf_map *map; int i; - for (i = 0; i < obj->nr_maps; i++) - if (bpf_map__is_struct_ops(&obj->maps[i])) - bpf_map_prepare_vdata(&obj->maps[i]); + for (i = 0; i < obj->nr_maps; i++) { + map = &obj->maps[i]; + + if (!bpf_map__is_struct_ops(map)) + continue; + + if (!map->autocreate) + continue; + + bpf_map_prepare_vdata(map); + } return 0; } -- cgit v1.2.3 From 5bab7a277ca8d4ef377a50a6678577b5bd7f74d8 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 6 Mar 2024 12:45:18 +0200 Subject: selftests/bpf: Test struct_ops map definition with type suffix Extend struct_ops_module test case to check if it is possible to use '___' suffixes for struct_ops type specification. Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Acked-by: David Vernet Link: https://lore.kernel.org/bpf/20240306104529.6453-5-eddyz87@gmail.com --- .../selftests/bpf/bpf_testmod/bpf_testmod.c | 2 ++ .../bpf/prog_tests/test_struct_ops_module.c | 33 ++++++++++++++++------ .../selftests/bpf/progs/struct_ops_module.c | 21 +++++++++++++- 3 files changed, 46 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 098ddd067224..b9b488d6d53a 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -564,6 +564,8 @@ static int bpf_dummy_reg(void *kdata) { struct bpf_testmod_ops *ops = kdata; + if (ops->test_1) + ops->test_1(); /* Some test cases (ex. struct_ops_maybe_null) may not have test_2 * initialized, so we need to check for NULL. */ diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c index 7d6facf46ebb..ee5372c7f2c7 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c +++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c @@ -30,11 +30,29 @@ cleanup: close(fd); } +static int attach_ops_and_check(struct struct_ops_module *skel, + struct bpf_map *map, + int expected_test_2_result) +{ + struct bpf_link *link; + + link = bpf_map__attach_struct_ops(map); + ASSERT_OK_PTR(link, "attach_test_mod_1"); + if (!link) + return -1; + + /* test_{1,2}() would be called from bpf_dummy_reg() in bpf_testmod.c */ + ASSERT_EQ(skel->bss->test_1_result, 0xdeadbeef, "test_1_result"); + ASSERT_EQ(skel->bss->test_2_result, expected_test_2_result, "test_2_result"); + + bpf_link__destroy(link); + return 0; +} + static void test_struct_ops_load(void) { struct struct_ops_module *skel; struct bpf_map_info info = {}; - struct bpf_link *link; int err; u32 len; @@ -59,20 +77,17 @@ static void test_struct_ops_load(void) if (!ASSERT_OK(err, "bpf_map_get_info_by_fd")) goto cleanup; - link = bpf_map__attach_struct_ops(skel->maps.testmod_1); - ASSERT_OK_PTR(link, "attach_test_mod_1"); - + check_map_info(&info); /* test_3() will be called from bpf_dummy_reg() in bpf_testmod.c * * In bpf_testmod.c it will pass 4 and 13 (the value of data) to * .test_2. So, the value of test_2_result should be 20 (4 + 13 + * 3). */ - ASSERT_EQ(skel->bss->test_2_result, 20, "check_shadow_variables"); - - bpf_link__destroy(link); - - check_map_info(&info); + if (!attach_ops_and_check(skel, skel->maps.testmod_1, 20)) + goto cleanup; + if (!attach_ops_and_check(skel, skel->maps.testmod_2, 12)) + goto cleanup; cleanup: struct_ops_module__destroy(skel); diff --git a/tools/testing/selftests/bpf/progs/struct_ops_module.c b/tools/testing/selftests/bpf/progs/struct_ops_module.c index 25952fa09348..026cabfa7f1f 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_module.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_module.c @@ -7,12 +7,14 @@ char _license[] SEC("license") = "GPL"; +int test_1_result = 0; int test_2_result = 0; SEC("struct_ops/test_1") int BPF_PROG(test_1) { - return 0xdeadbeef; + test_1_result = 0xdeadbeef; + return 0; } SEC("struct_ops/test_2") @@ -35,3 +37,20 @@ struct bpf_testmod_ops testmod_1 = { .data = 0x1, }; +SEC("struct_ops/test_2") +void BPF_PROG(test_2_v2, int a, int b) +{ + test_2_result = a * b; +} + +struct bpf_testmod_ops___v2 { + int (*test_1)(void); + void (*test_2)(int a, int b); + int (*test_maybe_null)(int dummy, struct task_struct *task); +}; + +SEC(".struct_ops.link") +struct bpf_testmod_ops___v2 testmod_2 = { + .test_1 = (void *)test_1, + .test_2 = (void *)test_2_v2, +}; -- cgit v1.2.3 From c8617e8bcf8d1ef357fadf5c96bd86b9952fb93f Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 6 Mar 2024 12:45:19 +0200 Subject: selftests/bpf: Utility functions to capture libbpf log in test_progs Several test_progs tests already capture libbpf log in order to check for some expected output, e.g bpf_tcp_ca.c, kfunc_dynptr_param.c, log_buf.c and a few others. This commit provides a, hopefully, simple API to capture libbpf log w/o necessity to define new print callback in each test: /* Creates a global memstream capturing INFO and WARN level output * passed to libbpf_print_fn. * Returns 0 on success, negative value on failure. * On failure the description is printed using PRINT_FAIL and * current test case is marked as fail. */ int start_libbpf_log_capture(void) /* Destroys global memstream created by start_libbpf_log_capture(). * Returns a pointer to captured data which has to be freed. * Returned buffer is null terminated. */ char *stop_libbpf_log_capture(void) The intended usage is as follows: if (start_libbpf_log_capture()) return; use_libbpf(); char *log = stop_libbpf_log_capture(); ASSERT_HAS_SUBSTR(log, "... expected ...", "expected some message"); free(log); As a safety measure, free(start_libbpf_log_capture()) is invoked in the epilogue of the test_progs.c:run_one_test(). Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240306104529.6453-6-eddyz87@gmail.com --- tools/testing/selftests/bpf/test_progs.c | 59 ++++++++++++++++++++++++++++++++ tools/testing/selftests/bpf/test_progs.h | 3 ++ 2 files changed, 62 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 808550986f30..89ff704e9dad 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -683,11 +683,69 @@ static const struct argp_option opts[] = { {}, }; +static FILE *libbpf_capture_stream; + +static struct { + char *buf; + size_t buf_sz; +} libbpf_output_capture; + +/* Creates a global memstream capturing INFO and WARN level output + * passed to libbpf_print_fn. + * Returns 0 on success, negative value on failure. + * On failure the description is printed using PRINT_FAIL and + * current test case is marked as fail. + */ +int start_libbpf_log_capture(void) +{ + if (libbpf_capture_stream) { + PRINT_FAIL("%s: libbpf_capture_stream != NULL\n", __func__); + return -EINVAL; + } + + libbpf_capture_stream = open_memstream(&libbpf_output_capture.buf, + &libbpf_output_capture.buf_sz); + if (!libbpf_capture_stream) { + PRINT_FAIL("%s: open_memstream failed errno=%d\n", __func__, errno); + return -EINVAL; + } + + return 0; +} + +/* Destroys global memstream created by start_libbpf_log_capture(). + * Returns a pointer to captured data which has to be freed. + * Returned buffer is null terminated. + */ +char *stop_libbpf_log_capture(void) +{ + char *buf; + + if (!libbpf_capture_stream) + return NULL; + + fputc(0, libbpf_capture_stream); + fclose(libbpf_capture_stream); + libbpf_capture_stream = NULL; + /* get 'buf' after fclose(), see open_memstream() documentation */ + buf = libbpf_output_capture.buf; + memset(&libbpf_output_capture, 0, sizeof(libbpf_output_capture)); + return buf; +} + static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) { + if (libbpf_capture_stream && level != LIBBPF_DEBUG) { + va_list args2; + + va_copy(args2, args); + vfprintf(libbpf_capture_stream, format, args2); + } + if (env.verbosity < VERBOSE_VERY && level == LIBBPF_DEBUG) return 0; + vfprintf(stdout, format, args); return 0; } @@ -1081,6 +1139,7 @@ static void run_one_test(int test_num) cleanup_cgroup_environment(); stdio_restore(); + free(stop_libbpf_log_capture()); dump_test_log(test, state, false, false, NULL); } diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 80df51244886..0ba5a20b19ba 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -397,6 +397,9 @@ int test__join_cgroup(const char *path); system(cmd); \ }) +int start_libbpf_log_capture(void); +char *stop_libbpf_log_capture(void); + static inline __u64 ptr_to_u64(const void *ptr) { return (__u64) (unsigned long) ptr; -- cgit v1.2.3 From c1b93c07b3ac3204c6a42a7f7b6217e36f44df4f Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 6 Mar 2024 12:45:20 +0200 Subject: selftests/bpf: Bad_struct_ops test When loading struct_ops programs kernel requires BTF id of the struct_ops type and member index for attachment point inside that type. This makes impossible to use same BPF program in several struct_ops maps that have different struct_ops type. Check if libbpf rejects such BPF objects files. Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240306104529.6453-7-eddyz87@gmail.com --- .../selftests/bpf/bpf_testmod/bpf_testmod.c | 24 +++++++++++++++ .../selftests/bpf/bpf_testmod/bpf_testmod.h | 4 +++ .../selftests/bpf/prog_tests/bad_struct_ops.c | 35 ++++++++++++++++++++++ tools/testing/selftests/bpf/progs/bad_struct_ops.c | 25 ++++++++++++++++ 4 files changed, 88 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/bad_struct_ops.c create mode 100644 tools/testing/selftests/bpf/progs/bad_struct_ops.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index b9b488d6d53a..39ad96a18123 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -611,6 +611,29 @@ struct bpf_struct_ops bpf_bpf_testmod_ops = { .owner = THIS_MODULE, }; +static int bpf_dummy_reg2(void *kdata) +{ + struct bpf_testmod_ops2 *ops = kdata; + + ops->test_1(); + return 0; +} + +static struct bpf_testmod_ops2 __bpf_testmod_ops2 = { + .test_1 = bpf_testmod_test_1, +}; + +struct bpf_struct_ops bpf_testmod_ops2 = { + .verifier_ops = &bpf_testmod_verifier_ops, + .init = bpf_testmod_ops_init, + .init_member = bpf_testmod_ops_init_member, + .reg = bpf_dummy_reg2, + .unreg = bpf_dummy_unreg, + .cfi_stubs = &__bpf_testmod_ops2, + .name = "bpf_testmod_ops2", + .owner = THIS_MODULE, +}; + extern int bpf_fentry_test1(int a); static int bpf_testmod_init(void) @@ -622,6 +645,7 @@ static int bpf_testmod_init(void) ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_testmod_kfunc_set); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &bpf_testmod_kfunc_set); ret = ret ?: register_bpf_struct_ops(&bpf_bpf_testmod_ops, bpf_testmod_ops); + ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops2, bpf_testmod_ops2); if (ret < 0) return ret; if (bpf_fentry_test1(0) < 0) diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h index 622d24e29d35..23fa1872ee67 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h @@ -89,4 +89,8 @@ struct bpf_testmod_ops { int (*tramp_40)(int value); }; +struct bpf_testmod_ops2 { + int (*test_1)(void); +}; + #endif /* _BPF_TESTMOD_H */ diff --git a/tools/testing/selftests/bpf/prog_tests/bad_struct_ops.c b/tools/testing/selftests/bpf/prog_tests/bad_struct_ops.c new file mode 100644 index 000000000000..9f5dbefa0dd9 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bad_struct_ops.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include "bad_struct_ops.skel.h" + +static void invalid_prog_reuse(void) +{ + struct bad_struct_ops *skel; + char *log = NULL; + int err; + + skel = bad_struct_ops__open(); + if (!ASSERT_OK_PTR(skel, "bad_struct_ops__open")) + return; + + if (start_libbpf_log_capture()) + goto cleanup; + + err = bad_struct_ops__load(skel); + log = stop_libbpf_log_capture(); + ASSERT_ERR(err, "bad_struct_ops__load should fail"); + ASSERT_HAS_SUBSTR(log, + "struct_ops init_kern testmod_2 func ptr test_1: invalid reuse of prog test_1", + "expected init_kern message"); + +cleanup: + free(log); + bad_struct_ops__destroy(skel); +} + +void test_bad_struct_ops(void) +{ + if (test__start_subtest("invalid_prog_reuse")) + invalid_prog_reuse(); +} diff --git a/tools/testing/selftests/bpf/progs/bad_struct_ops.c b/tools/testing/selftests/bpf/progs/bad_struct_ops.c new file mode 100644 index 000000000000..b7e175cd0af0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bad_struct_ops.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include "../bpf_testmod/bpf_testmod.h" + +char _license[] SEC("license") = "GPL"; + +SEC("struct_ops/test_1") +int BPF_PROG(test_1) { return 0; } + +SEC("struct_ops/test_2") +int BPF_PROG(test_2) { return 0; } + +SEC(".struct_ops.link") +struct bpf_testmod_ops testmod_1 = { + .test_1 = (void *)test_1, + .test_2 = (void *)test_2 +}; + +SEC(".struct_ops.link") +struct bpf_testmod_ops2 testmod_2 = { + .test_1 = (void *)test_1 +}; -- cgit v1.2.3 From 1863acccdf936c6917398165ca97e252d02fa6dd Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 6 Mar 2024 12:45:21 +0200 Subject: selftests/bpf: Test autocreate behavior for struct_ops maps Check that bpf_map__set_autocreate() can be used to disable automatic creation for struct_ops maps. Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240306104529.6453-8-eddyz87@gmail.com --- .../bpf/prog_tests/struct_ops_autocreate.c | 76 ++++++++++++++++++++++ .../selftests/bpf/progs/struct_ops_autocreate.c | 42 ++++++++++++ 2 files changed, 118 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/struct_ops_autocreate.c create mode 100644 tools/testing/selftests/bpf/progs/struct_ops_autocreate.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/struct_ops_autocreate.c b/tools/testing/selftests/bpf/prog_tests/struct_ops_autocreate.c new file mode 100644 index 000000000000..a5c8bb9fcb2c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/struct_ops_autocreate.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include "struct_ops_autocreate.skel.h" + +static void cant_load_full_object(void) +{ + struct struct_ops_autocreate *skel; + char *log = NULL; + int err; + + skel = struct_ops_autocreate__open(); + if (!ASSERT_OK_PTR(skel, "struct_ops_autocreate__open")) + return; + + if (start_libbpf_log_capture()) + goto cleanup; + /* The testmod_2 map BTF type (struct bpf_testmod_ops___v2) doesn't + * match the BTF of the actual struct bpf_testmod_ops defined in the + * kernel, so we should fail to load it if we don't disable autocreate + * for that map. + */ + err = struct_ops_autocreate__load(skel); + log = stop_libbpf_log_capture(); + if (!ASSERT_ERR(err, "struct_ops_autocreate__load")) + goto cleanup; + + ASSERT_HAS_SUBSTR(log, "libbpf: struct_ops init_kern", "init_kern message"); + ASSERT_EQ(err, -ENOTSUP, "errno should be ENOTSUP"); + +cleanup: + free(log); + struct_ops_autocreate__destroy(skel); +} + +static void can_load_partial_object(void) +{ + struct struct_ops_autocreate *skel; + struct bpf_link *link = NULL; + int err; + + skel = struct_ops_autocreate__open(); + if (!ASSERT_OK_PTR(skel, "struct_ops_autocreate__open_opts")) + return; + + err = bpf_program__set_autoload(skel->progs.test_2, false); + if (!ASSERT_OK(err, "bpf_program__set_autoload")) + goto cleanup; + + err = bpf_map__set_autocreate(skel->maps.testmod_2, false); + if (!ASSERT_OK(err, "bpf_map__set_autocreate")) + goto cleanup; + + err = struct_ops_autocreate__load(skel); + if (ASSERT_OK(err, "struct_ops_autocreate__load")) + goto cleanup; + + link = bpf_map__attach_struct_ops(skel->maps.testmod_1); + if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) + goto cleanup; + + /* test_1() would be called from bpf_dummy_reg2() in bpf_testmod.c */ + ASSERT_EQ(skel->bss->test_1_result, 42, "test_1_result"); + +cleanup: + bpf_link__destroy(link); + struct_ops_autocreate__destroy(skel); +} + +void test_struct_ops_autocreate(void) +{ + if (test__start_subtest("cant_load_full_object")) + cant_load_full_object(); + if (test__start_subtest("can_load_partial_object")) + can_load_partial_object(); +} diff --git a/tools/testing/selftests/bpf/progs/struct_ops_autocreate.c b/tools/testing/selftests/bpf/progs/struct_ops_autocreate.c new file mode 100644 index 000000000000..9a951ee6f55c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_autocreate.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +int test_1_result = 0; + +SEC("struct_ops/test_1") +int BPF_PROG(test_1) +{ + test_1_result = 42; + return 0; +} + +SEC("struct_ops/test_1") +int BPF_PROG(test_2) +{ + return 0; +} + +struct bpf_testmod_ops___v1 { + int (*test_1)(void); +}; + +struct bpf_testmod_ops___v2 { + int (*test_1)(void); + int (*does_not_exist)(void); +}; + +SEC(".struct_ops.link") +struct bpf_testmod_ops___v1 testmod_1 = { + .test_1 = (void *)test_1 +}; + +SEC(".struct_ops.link") +struct bpf_testmod_ops___v2 testmod_2 = { + .test_1 = (void *)test_1, + .does_not_exist = (void *)test_2 +}; -- cgit v1.2.3 From fe9d049c3da06373a1a35914b7f695509e4cb1fe Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 6 Mar 2024 12:45:22 +0200 Subject: libbpf: Sync progs autoload with maps autocreate for struct_ops maps Automatically select which struct_ops programs to load depending on which struct_ops maps are selected for automatic creation. E.g. for the BPF code below: SEC("struct_ops/test_1") int BPF_PROG(foo) { ... } SEC("struct_ops/test_2") int BPF_PROG(bar) { ... } SEC(".struct_ops.link") struct test_ops___v1 A = { .foo = (void *)foo }; SEC(".struct_ops.link") struct test_ops___v2 B = { .foo = (void *)foo, .bar = (void *)bar, }; And the following libbpf API calls: bpf_map__set_autocreate(skel->maps.A, true); bpf_map__set_autocreate(skel->maps.B, false); The autoload would be enabled for program 'foo' and disabled for program 'bar'. During load, for each struct_ops program P, referenced from some struct_ops map M: - set P.autoload = true if M.autocreate is true for some M; - set P.autoload = false if M.autocreate is false for all M; - don't change P.autoload, if P is not referenced from any map. Do this after bpf_object__init_kern_struct_ops_maps() to make sure that shadow vars assignment is done. Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240306104529.6453-9-eddyz87@gmail.com --- tools/lib/bpf/libbpf.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 1fb9a4f237b4..595c287d84ef 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1031,6 +1031,48 @@ static bool is_valid_st_ops_program(struct bpf_object *obj, return false; } +/* For each struct_ops program P, referenced from some struct_ops map M, + * enable P.autoload if there are Ms for which M.autocreate is true, + * disable P.autoload if for all Ms M.autocreate is false. + * Don't change P.autoload for programs that are not referenced from any maps. + */ +static int bpf_object_adjust_struct_ops_autoload(struct bpf_object *obj) +{ + struct bpf_program *prog, *slot_prog; + struct bpf_map *map; + int i, j, k, vlen; + + for (i = 0; i < obj->nr_programs; ++i) { + int should_load = false; + int use_cnt = 0; + + prog = &obj->programs[i]; + if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) + continue; + + for (j = 0; j < obj->nr_maps; ++j) { + map = &obj->maps[j]; + if (!bpf_map__is_struct_ops(map)) + continue; + + vlen = btf_vlen(map->st_ops->type); + for (k = 0; k < vlen; ++k) { + slot_prog = map->st_ops->progs[k]; + if (prog != slot_prog) + continue; + + use_cnt++; + if (map->autocreate) + should_load = true; + } + } + if (use_cnt) + prog->autoload = should_load; + } + + return 0; +} + /* Init the map's fields that depend on kern_btf */ static int bpf_map__init_kern_struct_ops(struct bpf_map *map) { @@ -8175,6 +8217,7 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); err = err ? : bpf_object__sanitize_maps(obj); err = err ? : bpf_object__init_kern_struct_ops_maps(obj); + err = err ? : bpf_object_adjust_struct_ops_autoload(obj); err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path); err = err ? : bpf_object__sanitize_and_load_btf(obj); err = err ? : bpf_object__create_maps(obj); -- cgit v1.2.3 From 651d49f15b2a84b3bcfe950fa99c3672b9619dbd Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 6 Mar 2024 12:45:23 +0200 Subject: selftests/bpf: Verify struct_ops autoload/autocreate sync Check that autocreate flags of struct_ops map cause autoload of struct_ops corresponding programs: - when struct_ops program is referenced only from a map for which autocreate is set to false, that program should not be loaded; - when struct_ops program with autoload == false is set to be used from a map with autocreate == true using shadow var, that program should be loaded; - when struct_ops program is not referenced from any map object load should fail. Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240306104529.6453-10-eddyz87@gmail.com --- .../selftests/bpf/prog_tests/bad_struct_ops.c | 32 ++++++++++++++ .../bpf/prog_tests/struct_ops_autocreate.c | 51 ++++++++++++++++++++-- .../testing/selftests/bpf/progs/bad_struct_ops2.c | 14 ++++++ .../selftests/bpf/progs/struct_ops_autocreate2.c | 32 ++++++++++++++ 4 files changed, 125 insertions(+), 4 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/bad_struct_ops2.c create mode 100644 tools/testing/selftests/bpf/progs/struct_ops_autocreate2.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/bad_struct_ops.c b/tools/testing/selftests/bpf/prog_tests/bad_struct_ops.c index 9f5dbefa0dd9..6a707213e46b 100644 --- a/tools/testing/selftests/bpf/prog_tests/bad_struct_ops.c +++ b/tools/testing/selftests/bpf/prog_tests/bad_struct_ops.c @@ -2,6 +2,7 @@ #include #include "bad_struct_ops.skel.h" +#include "bad_struct_ops2.skel.h" static void invalid_prog_reuse(void) { @@ -28,8 +29,39 @@ cleanup: bad_struct_ops__destroy(skel); } +static void unused_program(void) +{ + struct bad_struct_ops2 *skel; + char *log = NULL; + int err; + + skel = bad_struct_ops2__open(); + if (!ASSERT_OK_PTR(skel, "bad_struct_ops2__open")) + return; + + /* struct_ops programs not referenced from any maps are open + * with autoload set to true. + */ + ASSERT_TRUE(bpf_program__autoload(skel->progs.foo), "foo autoload == true"); + + if (start_libbpf_log_capture()) + goto cleanup; + + err = bad_struct_ops2__load(skel); + ASSERT_ERR(err, "bad_struct_ops2__load should fail"); + log = stop_libbpf_log_capture(); + ASSERT_HAS_SUBSTR(log, "prog 'foo': failed to load", + "message about 'foo' failing to load"); + +cleanup: + free(log); + bad_struct_ops2__destroy(skel); +} + void test_bad_struct_ops(void) { if (test__start_subtest("invalid_prog_reuse")) invalid_prog_reuse(); + if (test__start_subtest("unused_program")) + unused_program(); } diff --git a/tools/testing/selftests/bpf/prog_tests/struct_ops_autocreate.c b/tools/testing/selftests/bpf/prog_tests/struct_ops_autocreate.c index a5c8bb9fcb2c..78948dc6445c 100644 --- a/tools/testing/selftests/bpf/prog_tests/struct_ops_autocreate.c +++ b/tools/testing/selftests/bpf/prog_tests/struct_ops_autocreate.c @@ -2,6 +2,7 @@ #include #include "struct_ops_autocreate.skel.h" +#include "struct_ops_autocreate2.skel.h" static void cant_load_full_object(void) { @@ -43,18 +44,20 @@ static void can_load_partial_object(void) if (!ASSERT_OK_PTR(skel, "struct_ops_autocreate__open_opts")) return; - err = bpf_program__set_autoload(skel->progs.test_2, false); - if (!ASSERT_OK(err, "bpf_program__set_autoload")) - goto cleanup; - err = bpf_map__set_autocreate(skel->maps.testmod_2, false); if (!ASSERT_OK(err, "bpf_map__set_autocreate")) goto cleanup; + ASSERT_TRUE(bpf_program__autoload(skel->progs.test_1), "test_1 default autoload"); + ASSERT_TRUE(bpf_program__autoload(skel->progs.test_2), "test_2 default autoload"); + err = struct_ops_autocreate__load(skel); if (ASSERT_OK(err, "struct_ops_autocreate__load")) goto cleanup; + ASSERT_TRUE(bpf_program__autoload(skel->progs.test_1), "test_1 actual autoload"); + ASSERT_FALSE(bpf_program__autoload(skel->progs.test_2), "test_2 actual autoload"); + link = bpf_map__attach_struct_ops(skel->maps.testmod_1); if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) goto cleanup; @@ -67,10 +70,50 @@ cleanup: struct_ops_autocreate__destroy(skel); } +/* Swap test_mod1->test_1 program from 'bar' to 'foo' using shadow vars. + * test_mod1 load should enable autoload for 'foo'. + */ +static void autoload_and_shadow_vars(void) +{ + struct struct_ops_autocreate2 *skel = NULL; + struct bpf_link *link = NULL; + int err; + + skel = struct_ops_autocreate2__open(); + if (!ASSERT_OK_PTR(skel, "struct_ops_autocreate__open_opts")) + return; + + ASSERT_FALSE(bpf_program__autoload(skel->progs.foo), "foo default autoload"); + ASSERT_FALSE(bpf_program__autoload(skel->progs.bar), "bar default autoload"); + + /* loading map testmod_1 would switch foo's autoload to true */ + skel->struct_ops.testmod_1->test_1 = skel->progs.foo; + + err = struct_ops_autocreate2__load(skel); + if (ASSERT_OK(err, "struct_ops_autocreate__load")) + goto cleanup; + + ASSERT_TRUE(bpf_program__autoload(skel->progs.foo), "foo actual autoload"); + ASSERT_FALSE(bpf_program__autoload(skel->progs.bar), "bar actual autoload"); + + link = bpf_map__attach_struct_ops(skel->maps.testmod_1); + if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) + goto cleanup; + + /* test_1() would be called from bpf_dummy_reg2() in bpf_testmod.c */ + err = ASSERT_EQ(skel->bss->test_1_result, 42, "test_1_result"); + +cleanup: + bpf_link__destroy(link); + struct_ops_autocreate2__destroy(skel); +} + void test_struct_ops_autocreate(void) { if (test__start_subtest("cant_load_full_object")) cant_load_full_object(); if (test__start_subtest("can_load_partial_object")) can_load_partial_object(); + if (test__start_subtest("autoload_and_shadow_vars")) + autoload_and_shadow_vars(); } diff --git a/tools/testing/selftests/bpf/progs/bad_struct_ops2.c b/tools/testing/selftests/bpf/progs/bad_struct_ops2.c new file mode 100644 index 000000000000..64a95f6be86d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bad_struct_ops2.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include + +char _license[] SEC("license") = "GPL"; + +/* This is an unused struct_ops program, it lacks corresponding + * struct_ops map, which provides attachment information. + * W/o additional configuration attempt to load such + * BPF object file would fail. + */ +SEC("struct_ops/foo") +void foo(void) {} diff --git a/tools/testing/selftests/bpf/progs/struct_ops_autocreate2.c b/tools/testing/selftests/bpf/progs/struct_ops_autocreate2.c new file mode 100644 index 000000000000..6049d9c902d3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_autocreate2.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +int test_1_result = 0; + +SEC("?struct_ops/test_1") +int BPF_PROG(foo) +{ + test_1_result = 42; + return 0; +} + +SEC("?struct_ops/test_1") +int BPF_PROG(bar) +{ + test_1_result = 24; + return 0; +} + +struct bpf_testmod_ops { + int (*test_1)(void); +}; + +SEC(".struct_ops.link") +struct bpf_testmod_ops testmod_1 = { + .test_1 = (void *)bar +}; -- cgit v1.2.3 From 240bf8a5162e8c43cf368909582a01082d494d79 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 6 Mar 2024 12:45:24 +0200 Subject: libbpf: Replace elf_state->st_ops_* fields with SEC_ST_OPS sec_type The next patch would add two new section names for struct_ops maps. To make working with multiple struct_ops sections more convenient: - remove fields like elf_state->st_ops_{shndx,link_shndx}; - mark section descriptions hosting struct_ops as elf_sec_desc->sec_type == SEC_ST_OPS; After these changes struct_ops sections could be processed uniformly by iterating bpf_object->efile.secs entries. Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240306104529.6453-11-eddyz87@gmail.com --- tools/lib/bpf/libbpf.c | 61 ++++++++++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 29 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 595c287d84ef..cff72de42bf4 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -612,6 +612,7 @@ enum sec_type { SEC_BSS, SEC_DATA, SEC_RODATA, + SEC_ST_OPS, }; struct elf_sec_desc { @@ -627,8 +628,6 @@ struct elf_state { Elf *elf; Elf64_Ehdr *ehdr; Elf_Data *symbols; - Elf_Data *st_ops_data; - Elf_Data *st_ops_link_data; size_t shstrndx; /* section index for section name strings */ size_t strtabidx; struct elf_sec_desc *secs; @@ -637,8 +636,7 @@ struct elf_state { __u32 btf_maps_sec_btf_id; int text_shndx; int symbols_shndx; - int st_ops_shndx; - int st_ops_link_shndx; + bool has_st_ops; }; struct usdt_manager; @@ -1266,7 +1264,7 @@ static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj) } static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name, - int shndx, Elf_Data *data, __u32 map_flags) + int shndx, Elf_Data *data) { const struct btf_type *type, *datasec; const struct btf_var_secinfo *vsi; @@ -1328,7 +1326,7 @@ static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name, map->def.key_size = sizeof(int); map->def.value_size = type->size; map->def.max_entries = 1; - map->def.map_flags = map_flags; + map->def.map_flags = strcmp(sec_name, STRUCT_OPS_LINK_SEC) == 0 ? BPF_F_LINK : 0; map->st_ops = calloc(1, sizeof(*map->st_ops)); if (!map->st_ops) @@ -1363,15 +1361,25 @@ static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name, static int bpf_object_init_struct_ops(struct bpf_object *obj) { - int err; + const char *sec_name; + int sec_idx, err; - err = init_struct_ops_maps(obj, STRUCT_OPS_SEC, obj->efile.st_ops_shndx, - obj->efile.st_ops_data, 0); - err = err ?: init_struct_ops_maps(obj, STRUCT_OPS_LINK_SEC, - obj->efile.st_ops_link_shndx, - obj->efile.st_ops_link_data, - BPF_F_LINK); - return err; + for (sec_idx = 0; sec_idx < obj->efile.sec_cnt; ++sec_idx) { + struct elf_sec_desc *desc = &obj->efile.secs[sec_idx]; + + if (desc->sec_type != SEC_ST_OPS) + continue; + + sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); + if (!sec_name) + return -LIBBPF_ERRNO__FORMAT; + + err = init_struct_ops_maps(obj, sec_name, sec_idx, desc->data); + if (err) + return err; + } + + return 0; } static struct bpf_object *bpf_object__new(const char *path, @@ -1409,8 +1417,6 @@ static struct bpf_object *bpf_object__new(const char *path, obj->efile.obj_buf = obj_buf; obj->efile.obj_buf_sz = obj_buf_sz; obj->efile.btf_maps_shndx = -1; - obj->efile.st_ops_shndx = -1; - obj->efile.st_ops_link_shndx = -1; obj->kconfig_map_idx = -1; obj->kern_version = get_kernel_version(); @@ -1427,8 +1433,6 @@ static void bpf_object__elf_finish(struct bpf_object *obj) elf_end(obj->efile.elf); obj->efile.elf = NULL; obj->efile.symbols = NULL; - obj->efile.st_ops_data = NULL; - obj->efile.st_ops_link_data = NULL; zfree(&obj->efile.secs); obj->efile.sec_cnt = 0; @@ -2973,14 +2977,13 @@ static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) static bool libbpf_needs_btf(const struct bpf_object *obj) { return obj->efile.btf_maps_shndx >= 0 || - obj->efile.st_ops_shndx >= 0 || - obj->efile.st_ops_link_shndx >= 0 || + obj->efile.has_st_ops || obj->nr_extern > 0; } static bool kernel_needs_btf(const struct bpf_object *obj) { - return obj->efile.st_ops_shndx >= 0 || obj->efile.st_ops_link_shndx >= 0; + return obj->efile.has_st_ops; } static int bpf_object__init_btf(struct bpf_object *obj, @@ -3681,12 +3684,12 @@ static int bpf_object__elf_collect(struct bpf_object *obj) sec_desc->sec_type = SEC_RODATA; sec_desc->shdr = sh; sec_desc->data = data; - } else if (strcmp(name, STRUCT_OPS_SEC) == 0) { - obj->efile.st_ops_data = data; - obj->efile.st_ops_shndx = idx; - } else if (strcmp(name, STRUCT_OPS_LINK_SEC) == 0) { - obj->efile.st_ops_link_data = data; - obj->efile.st_ops_link_shndx = idx; + } else if (strcmp(name, STRUCT_OPS_SEC) == 0 || + strcmp(name, STRUCT_OPS_LINK_SEC) == 0) { + sec_desc->sec_type = SEC_ST_OPS; + sec_desc->shdr = sh; + sec_desc->data = data; + obj->efile.has_st_ops = true; } else { pr_info("elf: skipping unrecognized data section(%d) %s\n", idx, name); @@ -6999,12 +7002,12 @@ static int bpf_object__collect_relos(struct bpf_object *obj) data = sec_desc->data; idx = shdr->sh_info; - if (shdr->sh_type != SHT_REL) { + if (shdr->sh_type != SHT_REL || idx < 0 || idx >= obj->efile.sec_cnt) { pr_warn("internal error at %d\n", __LINE__); return -LIBBPF_ERRNO__INTERNAL; } - if (idx == obj->efile.st_ops_shndx || idx == obj->efile.st_ops_link_shndx) + if (obj->efile.secs[idx].sec_type == SEC_ST_OPS) err = bpf_object__collect_st_ops_relos(obj, shdr, data); else if (idx == obj->efile.btf_maps_shndx) err = bpf_object__collect_map_relos(obj, shdr, data); -- cgit v1.2.3 From 5ad0ecbe056a4ea5ffaa73e58503a2f87b119a59 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 6 Mar 2024 12:45:25 +0200 Subject: libbpf: Struct_ops in SEC("?.struct_ops") / SEC("?.struct_ops.link") Allow using two new section names for struct_ops maps: - SEC("?.struct_ops") - SEC("?.struct_ops.link") To specify maps that have bpf_map->autocreate == false after open. Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240306104529.6453-12-eddyz87@gmail.com --- tools/lib/bpf/libbpf.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index cff72de42bf4..ec0f508b853d 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1322,6 +1322,15 @@ static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name, return -ENOMEM; map->btf_value_type_id = type_id; + /* Follow same convention as for programs autoload: + * SEC("?.struct_ops") means map is not created by default. + */ + if (sec_name[0] == '?') { + map->autocreate = false; + /* from now on forget there was ? in section name */ + sec_name++; + } + map->def.type = BPF_MAP_TYPE_STRUCT_OPS; map->def.key_size = sizeof(int); map->def.value_size = type->size; @@ -3685,7 +3694,9 @@ static int bpf_object__elf_collect(struct bpf_object *obj) sec_desc->shdr = sh; sec_desc->data = data; } else if (strcmp(name, STRUCT_OPS_SEC) == 0 || - strcmp(name, STRUCT_OPS_LINK_SEC) == 0) { + strcmp(name, STRUCT_OPS_LINK_SEC) == 0 || + strcmp(name, "?" STRUCT_OPS_SEC) == 0 || + strcmp(name, "?" STRUCT_OPS_LINK_SEC) == 0) { sec_desc->sec_type = SEC_ST_OPS; sec_desc->shdr = sh; sec_desc->data = data; @@ -3705,6 +3716,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) if (!section_have_execinstr(obj, targ_sec_idx) && strcmp(name, ".rel" STRUCT_OPS_SEC) && strcmp(name, ".rel" STRUCT_OPS_LINK_SEC) && + strcmp(name, ".rel?" STRUCT_OPS_SEC) && + strcmp(name, ".rel?" STRUCT_OPS_LINK_SEC) && strcmp(name, ".rel" MAPS_ELF_SEC)) { pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n", idx, name, targ_sec_idx, -- cgit v1.2.3 From 6ebaa3fb88bbe4c33a0e01ce27007e1dd4fd133c Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 6 Mar 2024 12:45:26 +0200 Subject: libbpf: Rewrite btf datasec names starting from '?' Optional struct_ops maps are defined using question mark at the start of the section name, e.g.: SEC("?.struct_ops") struct test_ops optional_map = { ... }; This commit teaches libbpf to detect if kernel allows '?' prefix in datasec names, and if it doesn't then to rewrite such names by replacing '?' with '_', e.g.: DATASEC ?.struct_ops -> DATASEC _.struct_ops Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240306104529.6453-13-eddyz87@gmail.com --- tools/lib/bpf/features.c | 22 ++++++++++++++++++++++ tools/lib/bpf/libbpf.c | 19 +++++++++++++++++-- tools/lib/bpf/libbpf_internal.h | 2 ++ 3 files changed, 41 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c index 6b0738ad7063..4e783cc7fc4b 100644 --- a/tools/lib/bpf/features.c +++ b/tools/lib/bpf/features.c @@ -147,6 +147,25 @@ static int probe_kern_btf_datasec(int token_fd) strs, sizeof(strs), token_fd)); } +static int probe_kern_btf_qmark_datasec(int token_fd) +{ + static const char strs[] = "\0x\0?.data"; + /* static int a; */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* VAR x */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), + BTF_VAR_STATIC, + /* DATASEC ?.data */ /* [3] */ + BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(2, 0, 4), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + static int probe_kern_btf_float(int token_fd) { static const char strs[] = "\0float"; @@ -534,6 +553,9 @@ static struct kern_feature_desc { [FEAT_ARG_CTX_TAG] = { "kernel-side __arg_ctx tag", probe_kern_arg_ctx_tag, }, + [FEAT_BTF_QMARK_DATASEC] = { + "BTF DATASEC names starting from '?'", probe_kern_btf_qmark_datasec, + }, }; bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index ec0f508b853d..672fca94ff53 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2869,6 +2869,11 @@ static bool section_have_execinstr(struct bpf_object *obj, int idx) return sh->sh_flags & SHF_EXECINSTR; } +static bool starts_with_qmark(const char *s) +{ + return s && s[0] == '?'; +} + static bool btf_needs_sanitization(struct bpf_object *obj) { bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC); @@ -2878,9 +2883,10 @@ static bool btf_needs_sanitization(struct bpf_object *obj) bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64); + bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC); return !has_func || !has_datasec || !has_func_global || !has_float || - !has_decl_tag || !has_type_tag || !has_enum64; + !has_decl_tag || !has_type_tag || !has_enum64 || !has_qmark_datasec; } static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) @@ -2892,6 +2898,7 @@ static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64); + bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC); int enum64_placeholder_id = 0; struct btf_type *t; int i, j, vlen; @@ -2918,7 +2925,7 @@ static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) name = (char *)btf__name_by_offset(btf, t->name_off); while (*name) { - if (*name == '.') + if (*name == '.' || *name == '?') *name = '_'; name++; } @@ -2933,6 +2940,14 @@ static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) vt = (void *)btf__type_by_id(btf, v->type); m->name_off = vt->name_off; } + } else if (!has_qmark_datasec && btf_is_datasec(t) && + starts_with_qmark(btf__name_by_offset(btf, t->name_off))) { + /* replace '?' prefix with '_' for DATASEC names */ + char *name; + + name = (char *)btf__name_by_offset(btf, t->name_off); + if (name[0] == '?') + name[0] = '_'; } else if (!has_func && btf_is_func_proto(t)) { /* replace FUNC_PROTO with ENUM */ vlen = btf_vlen(t); diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index ad936ac5e639..864b36177424 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -374,6 +374,8 @@ enum kern_feature_id { FEAT_UPROBE_MULTI_LINK, /* Kernel supports arg:ctx tag (__arg_ctx) for global subprogs natively */ FEAT_ARG_CTX_TAG, + /* Kernel supports '?' at the front of datasec names */ + FEAT_BTF_QMARK_DATASEC, __FEAT_CNT, }; -- cgit v1.2.3 From 733e5e875444fc5afc9b72714f0ecaca629ccf8a Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 6 Mar 2024 12:45:27 +0200 Subject: selftests/bpf: Test case for SEC("?.struct_ops") Check that "?.struct_ops" and "?.struct_ops.link" section names define struct_ops maps with autocreate == false after open. Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240306104529.6453-14-eddyz87@gmail.com --- .../bpf/prog_tests/struct_ops_autocreate.c | 52 +++++++++++++++++++--- .../selftests/bpf/progs/struct_ops_autocreate.c | 10 +++++ 2 files changed, 56 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/struct_ops_autocreate.c b/tools/testing/selftests/bpf/prog_tests/struct_ops_autocreate.c index 78948dc6445c..a5cc593c1e1d 100644 --- a/tools/testing/selftests/bpf/prog_tests/struct_ops_autocreate.c +++ b/tools/testing/selftests/bpf/prog_tests/struct_ops_autocreate.c @@ -34,10 +34,24 @@ cleanup: struct_ops_autocreate__destroy(skel); } +static int check_test_1_link(struct struct_ops_autocreate *skel, struct bpf_map *map) +{ + struct bpf_link *link; + int err; + + link = bpf_map__attach_struct_ops(skel->maps.testmod_1); + if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) + return -1; + + /* test_1() would be called from bpf_dummy_reg2() in bpf_testmod.c */ + err = ASSERT_EQ(skel->bss->test_1_result, 42, "test_1_result"); + bpf_link__destroy(link); + return err; +} + static void can_load_partial_object(void) { struct struct_ops_autocreate *skel; - struct bpf_link *link = NULL; int err; skel = struct_ops_autocreate__open(); @@ -58,15 +72,39 @@ static void can_load_partial_object(void) ASSERT_TRUE(bpf_program__autoload(skel->progs.test_1), "test_1 actual autoload"); ASSERT_FALSE(bpf_program__autoload(skel->progs.test_2), "test_2 actual autoload"); - link = bpf_map__attach_struct_ops(skel->maps.testmod_1); - if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) + check_test_1_link(skel, skel->maps.testmod_1); + +cleanup: + struct_ops_autocreate__destroy(skel); +} + +static void optional_maps(void) +{ + struct struct_ops_autocreate *skel; + int err; + + skel = struct_ops_autocreate__open(); + if (!ASSERT_OK_PTR(skel, "struct_ops_autocreate__open")) + return; + + ASSERT_TRUE(bpf_map__autocreate(skel->maps.testmod_1), "testmod_1 autocreate"); + ASSERT_TRUE(bpf_map__autocreate(skel->maps.testmod_2), "testmod_2 autocreate"); + ASSERT_FALSE(bpf_map__autocreate(skel->maps.optional_map), "optional_map autocreate"); + ASSERT_FALSE(bpf_map__autocreate(skel->maps.optional_map2), "optional_map2 autocreate"); + + err = bpf_map__set_autocreate(skel->maps.testmod_1, false); + err |= bpf_map__set_autocreate(skel->maps.testmod_2, false); + err |= bpf_map__set_autocreate(skel->maps.optional_map2, true); + if (!ASSERT_OK(err, "bpf_map__set_autocreate")) goto cleanup; - /* test_1() would be called from bpf_dummy_reg2() in bpf_testmod.c */ - ASSERT_EQ(skel->bss->test_1_result, 42, "test_1_result"); + err = struct_ops_autocreate__load(skel); + if (ASSERT_OK(err, "struct_ops_autocreate__load")) + goto cleanup; + + check_test_1_link(skel, skel->maps.optional_map2); cleanup: - bpf_link__destroy(link); struct_ops_autocreate__destroy(skel); } @@ -116,4 +154,6 @@ void test_struct_ops_autocreate(void) can_load_partial_object(); if (test__start_subtest("autoload_and_shadow_vars")) autoload_and_shadow_vars(); + if (test__start_subtest("optional_maps")) + optional_maps(); } diff --git a/tools/testing/selftests/bpf/progs/struct_ops_autocreate.c b/tools/testing/selftests/bpf/progs/struct_ops_autocreate.c index 9a951ee6f55c..ba10c3896213 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_autocreate.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_autocreate.c @@ -40,3 +40,13 @@ struct bpf_testmod_ops___v2 testmod_2 = { .test_1 = (void *)test_1, .does_not_exist = (void *)test_2 }; + +SEC("?.struct_ops") +struct bpf_testmod_ops___v1 optional_map = { + .test_1 = (void *)test_1, +}; + +SEC("?.struct_ops.link") +struct bpf_testmod_ops___v1 optional_map2 = { + .test_1 = (void *)test_1, +}; -- cgit v1.2.3 From 5208930a909ad618363471e2872d79abef103626 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 6 Mar 2024 12:45:29 +0200 Subject: selftests/bpf: Test cases for '?' in BTF names Two test cases to verify that '?' and other printable characters are allowed in BTF DATASEC names: - DATASEC with name "?.foo bar:buz" should be accepted; - type with name "?foo" should be rejected. Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240306104529.6453-16-eddyz87@gmail.com --- tools/testing/selftests/bpf/prog_tests/btf.c | 29 ++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 816145bcb647..00965a6e83bb 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -3535,6 +3535,32 @@ static struct btf_raw_test raw_tests[] = { .value_type_id = 1, .max_entries = 1, }, +{ + .descr = "datasec: name '?.foo bar:buz' is ok", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* VAR x */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), + BTF_VAR_STATIC, + /* DATASEC ?.data */ /* [3] */ + BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(2, 0, 4), + BTF_END_RAW, + }, + BTF_STR_SEC("\0x\0?.foo bar:buz"), +}, +{ + .descr = "type name '?foo' is not ok", + .raw_types = { + /* union ?foo; */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_FWD, 1, 0), 0), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0?foo"), + .err_str = "Invalid name", + .btf_load_err = true, +}, { .descr = "float test #1, well-formed", @@ -4363,6 +4389,9 @@ static void do_test_raw(unsigned int test_num) if (err || btf_fd < 0) goto done; + if (!test->map_type) + goto done; + opts.btf_fd = btf_fd; opts.btf_key_type_id = test->key_type_id; opts.btf_value_type_id = test->value_type_id; -- cgit v1.2.3 From 41cca0542d7c79b6be3920a917f643c075ad1561 Mon Sep 17 00:00:00 2001 From: Mickaël Salaün Date: Tue, 5 Mar 2024 21:10:29 +0100 Subject: selftests/harness: Fix TEST_F()'s vfork handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Always run fixture setup in the grandchild process, and by default also run the teardown in the same process. However, this change makes it possible to run the teardown in a parent process when _metadata->teardown_parent is set to true (e.g. in fixture setup). Fix TEST_SIGNAL() by forwarding grandchild's signal to its parent. Fix seccomp tests by running the test setup in the parent of the test thread, as expected by the related test code. Fix Landlock tests by waiting for the grandchild before processing _metadata. Use of exit(3) in tests should be OK because the environment in which the vfork(2) call happen is already dedicated to the running test (with flushed stdio, setpgrp() call), see __run_test() and the call to fork(2) just before running the setup/test/teardown. Even if the test configures its own exit handlers, they will not be run by the parent because it never calls exit(3), and the test function either ends with a call to _exit(2) or a signal. Cc: Günther Noack Cc: Shuah Khan Cc: Will Drewry Fixes: 0710a1a73fb4 ("selftests/harness: Merge TEST_F_FORK() into TEST_F()") Reviewed-by: Kees Cook Tested-by: Kees Cook Signed-off-by: Mickaël Salaün Reported-by: Mark Brown Tested-by: Mark Brown Link: https://lore.kernel.org/r/20240305201029.1331333-1-mic@digikod.net Signed-off-by: Jakub Kicinski --- tools/testing/selftests/kselftest_harness.h | 28 +++++++++++++++++----------- tools/testing/selftests/landlock/fs_test.c | 22 ++++++++++------------ 2 files changed, 27 insertions(+), 23 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 634be793ad58..4fd735e48ee7 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -382,29 +382,33 @@ /* fixture data is alloced, setup, and torn down per call. */ \ FIXTURE_DATA(fixture_name) self; \ pid_t child = 1; \ + int status = 0; \ memset(&self, 0, sizeof(FIXTURE_DATA(fixture_name))); \ if (setjmp(_metadata->env) == 0) { \ - fixture_name##_setup(_metadata, &self, variant->data); \ - /* Let setup failure terminate early. */ \ - if (_metadata->exit_code) \ - return; \ - _metadata->setup_completed = true; \ /* Use the same _metadata. */ \ child = vfork(); \ if (child == 0) { \ + fixture_name##_setup(_metadata, &self, variant->data); \ + /* Let setup failure terminate early. */ \ + if (_metadata->exit_code) \ + _exit(0); \ + _metadata->setup_completed = true; \ fixture_name##_##test_name(_metadata, &self, variant->data); \ - _exit(0); \ - } \ - if (child < 0) { \ + } else if (child < 0 || child != waitpid(child, &status, 0)) { \ ksft_print_msg("ERROR SPAWNING TEST GRANDCHILD\n"); \ _metadata->exit_code = KSFT_FAIL; \ } \ } \ - if (child == 0) \ - /* Child failed and updated the shared _metadata. */ \ + if (child == 0) { \ + if (_metadata->setup_completed && !_metadata->teardown_parent) \ + fixture_name##_teardown(_metadata, &self, variant->data); \ _exit(0); \ - if (_metadata->setup_completed) \ + } \ + if (_metadata->setup_completed && _metadata->teardown_parent) \ fixture_name##_teardown(_metadata, &self, variant->data); \ + if (!WIFEXITED(status) && WIFSIGNALED(status)) \ + /* Forward signal to __wait_for_test(). */ \ + kill(getpid(), WTERMSIG(status)); \ __test_check_assert(_metadata); \ } \ static struct __test_metadata \ @@ -414,6 +418,7 @@ .fixture = &_##fixture_name##_fixture_object, \ .termsig = signal, \ .timeout = tmout, \ + .teardown_parent = false, \ }; \ static void __attribute__((constructor)) \ _register_##fixture_name##_##test_name(void) \ @@ -873,6 +878,7 @@ struct __test_metadata { bool timed_out; /* did this test timeout instead of exiting? */ bool aborted; /* stopped test due to failed ASSERT */ bool setup_completed; /* did setup finish? */ + bool teardown_parent; /* run teardown in a parent process */ jmp_buf env; /* for exiting out of test early */ struct __test_results *results; struct __test_metadata *prev, *next; diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c index 98817a14c91b..9a6036fbf289 100644 --- a/tools/testing/selftests/landlock/fs_test.c +++ b/tools/testing/selftests/landlock/fs_test.c @@ -285,6 +285,8 @@ static void prepare_layout_opt(struct __test_metadata *const _metadata, static void prepare_layout(struct __test_metadata *const _metadata) { + _metadata->teardown_parent = true; + prepare_layout_opt(_metadata, &mnt_tmp); } @@ -3861,9 +3863,7 @@ FIXTURE_SETUP(layout1_bind) FIXTURE_TEARDOWN(layout1_bind) { - set_cap(_metadata, CAP_SYS_ADMIN); - EXPECT_EQ(0, umount(dir_s2d2)); - clear_cap(_metadata, CAP_SYS_ADMIN); + /* umount(dir_s2d2)) is handled by namespace lifetime. */ remove_layout1(_metadata); @@ -4276,9 +4276,8 @@ FIXTURE_TEARDOWN(layout2_overlay) EXPECT_EQ(0, remove_path(lower_fl1)); EXPECT_EQ(0, remove_path(lower_do1_fo2)); EXPECT_EQ(0, remove_path(lower_fo1)); - set_cap(_metadata, CAP_SYS_ADMIN); - EXPECT_EQ(0, umount(LOWER_BASE)); - clear_cap(_metadata, CAP_SYS_ADMIN); + + /* umount(LOWER_BASE)) is handled by namespace lifetime. */ EXPECT_EQ(0, remove_path(LOWER_BASE)); EXPECT_EQ(0, remove_path(upper_do1_fu3)); @@ -4287,14 +4286,11 @@ FIXTURE_TEARDOWN(layout2_overlay) EXPECT_EQ(0, remove_path(upper_do1_fo2)); EXPECT_EQ(0, remove_path(upper_fo1)); EXPECT_EQ(0, remove_path(UPPER_WORK "/work")); - set_cap(_metadata, CAP_SYS_ADMIN); - EXPECT_EQ(0, umount(UPPER_BASE)); - clear_cap(_metadata, CAP_SYS_ADMIN); + + /* umount(UPPER_BASE)) is handled by namespace lifetime. */ EXPECT_EQ(0, remove_path(UPPER_BASE)); - set_cap(_metadata, CAP_SYS_ADMIN); - EXPECT_EQ(0, umount(MERGE_DATA)); - clear_cap(_metadata, CAP_SYS_ADMIN); + /* umount(MERGE_DATA)) is handled by namespace lifetime. */ EXPECT_EQ(0, remove_path(MERGE_DATA)); cleanup_layout(_metadata); @@ -4691,6 +4687,8 @@ FIXTURE_SETUP(layout3_fs) SKIP(return, "this filesystem is not supported (setup)"); } + _metadata->teardown_parent = true; + slash = strrchr(variant->file_path, '/'); ASSERT_NE(slash, NULL); dir_len = (size_t)slash - (size_t)variant->file_path; -- cgit v1.2.3 From 15d2540e0d626b7960c9cafb3c825554fdf1a2b4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 5 Mar 2024 10:50:00 -0800 Subject: tools: ynl: check for overflow of constructed messages Donald points out that we don't check for overflows. Stash the length of the message on nlmsg_pid (nlmsg_seq would do as well). This allows the attribute helpers to remain self-contained (no extra arguments). Also let the put helpers continue to return nothing. The error is checked only in (newly introduced) ynl_msg_end(). Reviewed-by: Donald Hunter Link: https://lore.kernel.org/r/20240305185000.964773-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl-priv.h | 34 ++++++++++++++++++++++++++++++---- tools/net/ynl/lib/ynl.c | 36 ++++++++++++++++++++++++++++++++++++ tools/net/ynl/lib/ynl.h | 2 ++ 3 files changed, 68 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h index a8099fab035d..6cf890080dc0 100644 --- a/tools/net/ynl/lib/ynl-priv.h +++ b/tools/net/ynl/lib/ynl-priv.h @@ -135,6 +135,8 @@ int ynl_error_parse(struct ynl_parse_arg *yarg, const char *msg); /* Netlink message handling helpers */ +#define YNL_MSG_OVERFLOW 1 + static inline struct nlmsghdr *ynl_nlmsg_put_header(void *buf) { struct nlmsghdr *nlh = buf; @@ -239,11 +241,29 @@ ynl_attr_first(const void *start, size_t len, size_t skip) return ynl_attr_if_good(start + len, attr); } +static inline bool +__ynl_attr_put_overflow(struct nlmsghdr *nlh, size_t size) +{ + bool o; + + /* ynl_msg_start() stashed buffer length in nlmsg_pid. */ + o = nlh->nlmsg_len + NLA_HDRLEN + NLMSG_ALIGN(size) > nlh->nlmsg_pid; + if (o) + /* YNL_MSG_OVERFLOW is < NLMSG_HDRLEN, all subsequent checks + * are guaranteed to fail. + */ + nlh->nlmsg_pid = YNL_MSG_OVERFLOW; + return o; +} + static inline struct nlattr * ynl_attr_nest_start(struct nlmsghdr *nlh, unsigned int attr_type) { struct nlattr *attr; + if (__ynl_attr_put_overflow(nlh, 0)) + return ynl_nlmsg_end_addr(nlh) - NLA_HDRLEN; + attr = ynl_nlmsg_end_addr(nlh); attr->nla_type = attr_type | NLA_F_NESTED; nlh->nlmsg_len += NLA_HDRLEN; @@ -263,6 +283,9 @@ ynl_attr_put(struct nlmsghdr *nlh, unsigned int attr_type, { struct nlattr *attr; + if (__ynl_attr_put_overflow(nlh, size)) + return; + attr = ynl_nlmsg_end_addr(nlh); attr->nla_type = attr_type; attr->nla_len = NLA_HDRLEN + size; @@ -276,14 +299,17 @@ static inline void ynl_attr_put_str(struct nlmsghdr *nlh, unsigned int attr_type, const char *str) { struct nlattr *attr; - const char *end; + size_t len; + + len = strlen(str); + if (__ynl_attr_put_overflow(nlh, len)) + return; attr = ynl_nlmsg_end_addr(nlh); attr->nla_type = attr_type; - end = stpcpy(ynl_attr_data(attr), str); - attr->nla_len = - NLA_HDRLEN + NLA_ALIGN(end - (char *)ynl_attr_data(attr)); + strcpy(ynl_attr_data(attr), str); + attr->nla_len = NLA_HDRLEN + NLA_ALIGN(len); nlh->nlmsg_len += NLMSG_ALIGN(attr->nla_len); } diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index 484070492b17..b9e77af5af5f 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -404,9 +404,33 @@ struct nlmsghdr *ynl_msg_start(struct ynl_sock *ys, __u32 id, __u16 flags) nlh->nlmsg_flags = flags; nlh->nlmsg_seq = ++ys->seq; + /* This is a local YNL hack for length checking, we put the buffer + * length in nlmsg_pid, since messages sent to the kernel always use + * PID 0. Message needs to be terminated with ynl_msg_end(). + */ + nlh->nlmsg_pid = YNL_SOCKET_BUFFER_SIZE; + return nlh; } +static int ynl_msg_end(struct ynl_sock *ys, struct nlmsghdr *nlh) +{ + /* We stash buffer length in nlmsg_pid. */ + if (nlh->nlmsg_pid == 0) { + yerr(ys, YNL_ERROR_INPUT_INVALID, + "Unknown input buffer length"); + return -EINVAL; + } + if (nlh->nlmsg_pid == YNL_MSG_OVERFLOW) { + yerr(ys, YNL_ERROR_INPUT_TOO_BIG, + "Constructred message longer than internal buffer"); + return -EMSGSIZE; + } + + nlh->nlmsg_pid = 0; + return 0; +} + struct nlmsghdr * ynl_gemsg_start(struct ynl_sock *ys, __u32 id, __u16 flags, __u8 cmd, __u8 version) @@ -607,6 +631,10 @@ static int ynl_sock_read_family(struct ynl_sock *ys, const char *family_name) nlh = ynl_gemsg_start_req(ys, GENL_ID_CTRL, CTRL_CMD_GETFAMILY, 1); ynl_attr_put_str(nlh, CTRL_ATTR_FAMILY_NAME, family_name); + err = ynl_msg_end(ys, nlh); + if (err < 0) + return err; + err = send(ys->socket, nlh, nlh->nlmsg_len, 0); if (err < 0) { perr(ys, "failed to request socket family info"); @@ -868,6 +896,10 @@ int ynl_exec(struct ynl_sock *ys, struct nlmsghdr *req_nlh, { int err; + err = ynl_msg_end(ys, req_nlh); + if (err < 0) + return err; + err = send(ys->socket, req_nlh, req_nlh->nlmsg_len, 0); if (err < 0) return err; @@ -921,6 +953,10 @@ int ynl_exec_dump(struct ynl_sock *ys, struct nlmsghdr *req_nlh, { int err; + err = ynl_msg_end(ys, req_nlh); + if (err < 0) + return err; + err = send(ys->socket, req_nlh, req_nlh->nlmsg_len, 0); if (err < 0) return err; diff --git a/tools/net/ynl/lib/ynl.h b/tools/net/ynl/lib/ynl.h index dbeeef8ce91a..9842e85a8c57 100644 --- a/tools/net/ynl/lib/ynl.h +++ b/tools/net/ynl/lib/ynl.h @@ -20,6 +20,8 @@ enum ynl_error_code { YNL_ERROR_ATTR_INVALID, YNL_ERROR_UNKNOWN_NTF, YNL_ERROR_INV_RESP, + YNL_ERROR_INPUT_INVALID, + YNL_ERROR_INPUT_TOO_BIG, }; /** -- cgit v1.2.3 From d147357e2e5977c5fe9218457a1e359fd1d36609 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 6 Mar 2024 19:12:26 -0800 Subject: libbpf: Allow specifying 64-bit integers in map BTF. __uint() macro that is used to specify map attributes like: __uint(type, BPF_MAP_TYPE_ARRAY); __uint(map_flags, BPF_F_MMAPABLE); It is limited to 32-bit, since BTF_KIND_ARRAY has u32 "number of elements" field in "struct btf_array". Introduce __ulong() macro that allows specifying values bigger than 32-bit. In map definition "map_extra" is the only u64 field, so far. Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/r/20240307031228.42896-5-alexei.starovoitov@gmail.com Signed-off-by: Martin KaFai Lau --- tools/lib/bpf/bpf_helpers.h | 1 + tools/lib/bpf/libbpf.c | 44 ++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 43 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 79eaa581be98..112b1504e072 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -13,6 +13,7 @@ #define __uint(name, val) int (*name)[val] #define __type(name, val) typeof(val) *name #define __array(name, val) typeof(val) *name[] +#define __ulong(name, val) enum { ___bpf_concat(__unique_value, __COUNTER__) = val } name /* * Helper macro to place programs, maps, license in diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 672fca94ff53..567ad367e7aa 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2335,6 +2335,46 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf, return true; } +static bool get_map_field_long(const char *map_name, const struct btf *btf, + const struct btf_member *m, __u64 *res) +{ + const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL); + const char *name = btf__name_by_offset(btf, m->name_off); + + if (btf_is_ptr(t)) { + __u32 res32; + bool ret; + + ret = get_map_field_int(map_name, btf, m, &res32); + if (ret) + *res = (__u64)res32; + return ret; + } + + if (!btf_is_enum(t) && !btf_is_enum64(t)) { + pr_warn("map '%s': attr '%s': expected ENUM or ENUM64, got %s.\n", + map_name, name, btf_kind_str(t)); + return false; + } + + if (btf_vlen(t) != 1) { + pr_warn("map '%s': attr '%s': invalid __ulong\n", + map_name, name); + return false; + } + + if (btf_is_enum(t)) { + const struct btf_enum *e = btf_enum(t); + + *res = e->val; + } else { + const struct btf_enum64 *e = btf_enum64(t); + + *res = btf_enum64_value(e); + } + return true; +} + static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name) { int len; @@ -2568,9 +2608,9 @@ int parse_btf_map_def(const char *map_name, struct btf *btf, map_def->pinning = val; map_def->parts |= MAP_DEF_PINNING; } else if (strcmp(name, "map_extra") == 0) { - __u32 map_extra; + __u64 map_extra; - if (!get_map_field_int(map_name, btf, m, &map_extra)) + if (!get_map_field_long(map_name, btf, m, &map_extra)) return -EINVAL; map_def->map_extra = map_extra; map_def->parts |= MAP_DEF_MAP_EXTRA; -- cgit v1.2.3 From 1576b07961971d4eeb0e269c7133e9a6d430daf8 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 6 Mar 2024 19:12:27 -0800 Subject: bpftool: rename is_internal_mmapable_map into is_mmapable_map It's not restricted to working with "internal" maps, it cares about any map that can be mmap'ed. Reflect that in more succinct and generic name. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Quentin Monnet Link: https://lore.kernel.org/r/20240307031228.42896-6-alexei.starovoitov@gmail.com Signed-off-by: Martin KaFai Lau --- tools/bpf/bpftool/gen.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 1f579eacd9d4..a3d72be347b0 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -248,7 +248,7 @@ static const struct btf_type *find_type_for_map(struct btf *btf, const char *map return NULL; } -static bool is_internal_mmapable_map(const struct bpf_map *map, char *buf, size_t sz) +static bool is_mmapable_map(const struct bpf_map *map, char *buf, size_t sz) { if (!bpf_map__is_internal(map) || !(bpf_map__map_flags(map) & BPF_F_MMAPABLE)) return false; @@ -274,7 +274,7 @@ static int codegen_datasecs(struct bpf_object *obj, const char *obj_name) bpf_object__for_each_map(map, obj) { /* only generate definitions for memory-mapped internal maps */ - if (!is_internal_mmapable_map(map, map_ident, sizeof(map_ident))) + if (!is_mmapable_map(map, map_ident, sizeof(map_ident))) continue; sec = find_type_for_map(btf, map_ident); @@ -327,7 +327,7 @@ static int codegen_subskel_datasecs(struct bpf_object *obj, const char *obj_name bpf_object__for_each_map(map, obj) { /* only generate definitions for memory-mapped internal maps */ - if (!is_internal_mmapable_map(map, map_ident, sizeof(map_ident))) + if (!is_mmapable_map(map, map_ident, sizeof(map_ident))) continue; sec = find_type_for_map(btf, map_ident); @@ -504,7 +504,7 @@ static void codegen_asserts(struct bpf_object *obj, const char *obj_name) ", obj_name); bpf_object__for_each_map(map, obj) { - if (!is_internal_mmapable_map(map, map_ident, sizeof(map_ident))) + if (!is_mmapable_map(map, map_ident, sizeof(map_ident))) continue; sec = find_type_for_map(btf, map_ident); @@ -720,7 +720,7 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h const void *mmap_data = NULL; size_t mmap_size = 0; - if (!is_internal_mmapable_map(map, ident, sizeof(ident))) + if (!is_mmapable_map(map, ident, sizeof(ident))) continue; codegen("\ @@ -782,7 +782,7 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h bpf_object__for_each_map(map, obj) { const char *mmap_flags; - if (!is_internal_mmapable_map(map, ident, sizeof(ident))) + if (!is_mmapable_map(map, ident, sizeof(ident))) continue; if (bpf_map__map_flags(map) & BPF_F_RDONLY_PROG) @@ -871,7 +871,7 @@ codegen_maps_skeleton(struct bpf_object *obj, size_t map_cnt, bool mmaped) ", i, bpf_map__name(map), i, ident); /* memory-mapped internal maps */ - if (mmaped && is_internal_mmapable_map(map, ident, sizeof(ident))) { + if (mmaped && is_mmapable_map(map, ident, sizeof(ident))) { printf("\ts->maps[%zu].mmaped = (void **)&obj->%s;\n", i, ident); } @@ -1617,7 +1617,7 @@ static int do_subskeleton(int argc, char **argv) /* Also count all maps that have a name */ map_cnt++; - if (!is_internal_mmapable_map(map, ident, sizeof(ident))) + if (!is_mmapable_map(map, ident, sizeof(ident))) continue; map_type_id = bpf_map__btf_value_type_id(map); @@ -1739,7 +1739,7 @@ static int do_subskeleton(int argc, char **argv) /* walk through each symbol and emit the runtime representation */ bpf_object__for_each_map(map, obj) { - if (!is_internal_mmapable_map(map, ident, sizeof(ident))) + if (!is_mmapable_map(map, ident, sizeof(ident))) continue; map_type_id = bpf_map__btf_value_type_id(map); -- cgit v1.2.3 From cecbc52c46e235bd651f7b3ebbf481764846a2d9 Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Wed, 6 Mar 2024 23:10:41 +0000 Subject: tools/net/ynl: Fix extack decoding for netlink-raw Extack decoding was using a hard-coded msg header size of 20 but netlink-raw has a header size of 16. Use a protocol specific msghdr_size() when decoding the attr offssets. Signed-off-by: Donald Hunter Link: https://lore.kernel.org/r/20240306231046.97158-2-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index 239e22b7a85f..b810a478a304 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -353,6 +353,9 @@ class NetlinkProtocol: raise Exception(f'Multicast group "{mcast_name}" not present in the spec') return mcast_groups[mcast_name].value + def msghdr_size(self): + return 16 + class GenlProtocol(NetlinkProtocol): def __init__(self, family_name): @@ -378,6 +381,8 @@ class GenlProtocol(NetlinkProtocol): raise Exception(f'Multicast group "{mcast_name}" not present in the family') return self.genl_family['mcast'][mcast_name] + def msghdr_size(self): + return super().msghdr_size() + 4 class SpaceAttrs: @@ -721,7 +726,7 @@ class YnlFamily(SpecFamily): return msg = self.nlproto.decode(self, NlMsg(request, 0, op.attr_set)) - offset = 20 + self._struct_size(op.fixed_header) + offset = self.nlproto.msghdr_size() + self._struct_size(op.fixed_header) path = self._decode_extack_path(msg.raw_attrs, op.attr_set, offset, extack['bad-attr-offs']) if path: -- cgit v1.2.3 From 771b7012e5f3a49739dab4be60b87517a249a1df Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Wed, 6 Mar 2024 23:10:42 +0000 Subject: tools/net/ynl: Report netlink errors without stacktrace ynl does not handle NlError exceptions so they get reported like program failures. Handle the NlError exceptions and report the netlink errors more cleanly. Example now: Netlink error: No such file or directory nl_len = 44 (28) nl_flags = 0x300 nl_type = 2 error: -2 extack: {'bad-attr': '.op'} Example before: Traceback (most recent call last): File "/home/donaldh/net-next/./tools/net/ynl/cli.py", line 81, in main() File "/home/donaldh/net-next/./tools/net/ynl/cli.py", line 69, in main reply = ynl.dump(args.dump, attrs) ^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/donaldh/net-next/tools/net/ynl/lib/ynl.py", line 906, in dump return self._op(method, vals, [], dump=True) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/donaldh/net-next/tools/net/ynl/lib/ynl.py", line 872, in _op raise NlError(nl_msg) lib.ynl.NlError: Netlink error: No such file or directory nl_len = 44 (28) nl_flags = 0x300 nl_type = 2 error: -2 extack: {'bad-attr': '.op'} Signed-off-by: Donald Hunter Link: https://lore.kernel.org/r/20240306231046.97158-3-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/cli.py | 18 +++++++++++------- tools/net/ynl/lib/__init__.py | 4 ++-- 2 files changed, 13 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/cli.py b/tools/net/ynl/cli.py index e8a65fbc3698..f131e33ac3ee 100755 --- a/tools/net/ynl/cli.py +++ b/tools/net/ynl/cli.py @@ -6,7 +6,7 @@ import json import pprint import time -from lib import YnlFamily, Netlink +from lib import YnlFamily, Netlink, NlError class YnlEncoder(json.JSONEncoder): @@ -66,12 +66,16 @@ def main(): if args.sleep: time.sleep(args.sleep) - if args.do: - reply = ynl.do(args.do, attrs, args.flags) - output(reply) - if args.dump: - reply = ynl.dump(args.dump, attrs) - output(reply) + try: + if args.do: + reply = ynl.do(args.do, attrs, args.flags) + output(reply) + if args.dump: + reply = ynl.dump(args.dump, attrs) + output(reply) + except NlError as e: + print(e) + exit(1) if args.ntf: ynl.check_ntf() diff --git a/tools/net/ynl/lib/__init__.py b/tools/net/ynl/lib/__init__.py index f7eaa07783e7..9137b83e580a 100644 --- a/tools/net/ynl/lib/__init__.py +++ b/tools/net/ynl/lib/__init__.py @@ -2,7 +2,7 @@ from .nlspec import SpecAttr, SpecAttrSet, SpecEnumEntry, SpecEnumSet, \ SpecFamily, SpecOperation -from .ynl import YnlFamily, Netlink +from .ynl import YnlFamily, Netlink, NlError __all__ = ["SpecAttr", "SpecAttrSet", "SpecEnumEntry", "SpecEnumSet", - "SpecFamily", "SpecOperation", "YnlFamily", "Netlink"] + "SpecFamily", "SpecOperation", "YnlFamily", "Netlink", "NlError"] -- cgit v1.2.3 From 6fe7de5e9c08dd6838149a50e468bc8f94e4fdbe Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Wed, 6 Mar 2024 23:10:43 +0000 Subject: tools/net/ynl: Fix c codegen for array-nest ynl-gen-c generates e.g. 'calloc(mcast_groups, sizeof(*dst->mcast_groups))' for array-nest attrs when it should be 'n_mcast_groups'. Add a 'n_' prefix in the generated code for array-nests. Signed-off-by: Donald Hunter Link: https://lore.kernel.org/r/20240306231046.97158-4-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/ynl-gen-c.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 2f5febfe66a1..67bfaff05154 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -1667,7 +1667,7 @@ def _multi_parse(ri, struct, init_lines, local_vars): aspec = struct[anest] ri.cw.block_start(line=f"if (n_{aspec.c_name})") - ri.cw.p(f"dst->{aspec.c_name} = calloc({aspec.c_name}, sizeof(*dst->{aspec.c_name}));") + ri.cw.p(f"dst->{aspec.c_name} = calloc(n_{aspec.c_name}, sizeof(*dst->{aspec.c_name}));") ri.cw.p(f"dst->n_{aspec.c_name} = n_{aspec.c_name};") ri.cw.p('i = 0;') ri.cw.p(f"parg.rsp_policy = &{aspec.nested_render_name}_nest;") -- cgit v1.2.3 From b6e6a76dec330dc0c3ed3149acc2669f3ebfb3cb Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Wed, 6 Mar 2024 23:10:44 +0000 Subject: tools/net/ynl: Add nest-type-value decoding The nlctrl genetlink-legacy family uses nest-type-value encoding as described in Documentation/userspace-api/netlink/genetlink-legacy.rst Add nest-type-value decoding to ynl. Signed-off-by: Donald Hunter Link: https://lore.kernel.org/r/20240306231046.97158-5-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index b810a478a304..2d7fdd903d9e 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -595,6 +595,16 @@ class YnlFamily(SpecFamily): decoded.append({ item.type: subattrs }) return decoded + def _decode_nest_type_value(self, attr, attr_spec): + decoded = {} + value = attr + for name in attr_spec['type-value']: + value = NlAttr(value.raw, 0) + decoded[name] = value.type + subattrs = self._decode(NlAttrs(value.raw), attr_spec['nested-attributes']) + decoded.update(subattrs) + return decoded + def _decode_unknown(self, attr): if attr.is_nest: return self._decode(NlAttrs(attr.raw), None) @@ -686,6 +696,8 @@ class YnlFamily(SpecFamily): decoded = {"value": value, "selector": selector} elif attr_spec["type"] == 'sub-message': decoded = self._decode_sub_msg(attr, attr_spec, search_attrs) + elif attr_spec["type"] == 'nest-type-value': + decoded = self._decode_nest_type_value(attr, attr_spec) else: if not self.process_unknown: raise Exception(f'Unknown {attr_spec["type"]} with name {attr_spec["name"]}') -- cgit v1.2.3 From 4af9a0bee116e927a88da903a9244bb7d03483e3 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Tue, 5 Mar 2024 10:39:49 -0800 Subject: selftests/net: fix waiting time for ipv6_gc test in fib_tests.sh. ipv6_gc fails occasionally. According to the study, fib6_run_gc() using jiffies_round() to round the GC interval could increase the waiting time up to 750ms (3/4 seconds). The timer has a granularity of 512ms at the range 4s to 32s. That means a route with an expiration time E seconds can wait for more than E * 2 + 1 seconds if the GC interval is also E seconds. E * 2 + 2 seconds should be enough for waiting for removing routes. Also remove a check immediately after replacing 5 routes since it is very likely to remove some of routes before completing the last route with a slow environment. Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20240305183949.258473-1-thinker.li@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fib_tests.sh | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 3ec1050e47a2..73895711cdf4 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -789,6 +789,7 @@ fib6_gc_test() set -e EXPIRE=5 + GC_WAIT_TIME=$((EXPIRE * 2 + 2)) # Check expiration of routes every $EXPIRE seconds (GC) $NS_EXEC sysctl -wq net.ipv6.route.gc_interval=$EXPIRE @@ -805,7 +806,7 @@ fib6_gc_test() $IP -6 route add 2001:20::$i \ via 2001:10::2 dev dummy_10 expires $EXPIRE done - sleep $(($EXPIRE * 2 + 1)) + sleep $GC_WAIT_TIME $NS_EXEC sysctl -wq net.ipv6.route.flush=1 check_rt_num 0 $($IP -6 route list |grep expires|wc -l) log_test $ret 0 "ipv6 route garbage collection" @@ -823,7 +824,8 @@ fib6_gc_test() $IP -6 route add 2001:20::$i \ via 2001:10::2 dev dummy_10 expires $EXPIRE done - sleep $(($EXPIRE * 2 + 1)) + # Wait for GC + sleep $GC_WAIT_TIME check_rt_num 0 $($IP -6 route list |grep expires|wc -l) log_test $ret 0 "ipv6 route garbage collection (with permanent routes)" @@ -840,10 +842,8 @@ fib6_gc_test() $IP -6 route replace 2001:20::$i \ via 2001:10::2 dev dummy_10 expires $EXPIRE done - check_rt_num_clean 5 $($IP -6 route list |grep expires|wc -l) || return # Wait for GC - sleep $(($EXPIRE * 2 + 1)) - $NS_EXEC sysctl -wq net.ipv6.route.flush=1 + sleep $GC_WAIT_TIME check_rt_num 0 $($IP -6 route list |grep expires|wc -l) log_test $ret 0 "ipv6 route garbage collection (replace with expires)" @@ -863,8 +863,7 @@ fib6_gc_test() check_rt_num_clean 0 $($IP -6 route list |grep expires|wc -l) || return # Wait for GC - sleep $(($EXPIRE * 2 + 1)) - + sleep $GC_WAIT_TIME check_rt_num 5 $($IP -6 route list |grep -v expires|grep 2001:20::|wc -l) log_test $ret 0 "ipv6 route garbage collection (replace with permanent)" @@ -901,9 +900,7 @@ fib6_gc_test() check_rt_num_clean 1 $($IP -6 route list|grep expires|wc -l) || return # Wait for GC - sleep $(($EXPIRE * 2 + 1)) - - $NS_EXEC sysctl -wq net.ipv6.route.flush=1 + sleep $GC_WAIT_TIME check_rt_num 0 $($IP -6 route list |grep expires|wc -l) log_test $ret 0 "ipv6 route garbage collection (RA message)" -- cgit v1.2.3 From 7c2eac649054c04007f61fcc90a42f6800727efc Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 6 Mar 2024 10:42:50 +0100 Subject: selftests: mptcp: stop forcing iptables-legacy Commit 0c4cd3f86a40 ("selftests: mptcp: join: use 'iptables-legacy' if available") and commit a5a5990c099d ("selftests: mptcp: sockopt: use 'iptables-legacy' if available") forced using iptables-legacy if available. This was needed because of some issues that were visible when testing the kselftests on a v5.15.x with iptables-nft as default backend. It looks like these errors are no longer present. As mentioned by Pablo [1], the errors were maybe due to missing kernel config. We can then use iptables-nft if it is the default one, instead of using a legacy tool. We can then check the variables iptables and ip6tables are valid. We can keep the variables to easily change it later or add options. Link: https://lore.kernel.org/netdev/ZbFiixyMFpQnxzCH@calendula/ [1] Suggested-by: Pablo Neira Ayuso Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240306-upstream-net-next-20240304-selftests-mptcp-shared-code-shellcheck-v2-1-bc79e6e5e6a0@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 12 ++++-------- tools/testing/selftests/net/mptcp/mptcp_sockopt.sh | 12 ++++-------- 2 files changed, 8 insertions(+), 16 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 955ee651dcd5..6a4af95cd4c3 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -167,15 +167,11 @@ check_tools() exit $ksft_skip fi - # Use the legacy version if available to support old kernel versions - if iptables-legacy -V &> /dev/null; then - iptables="iptables-legacy" - ip6tables="ip6tables-legacy" - elif ! iptables -V &> /dev/null; then - echo "SKIP: Could not run all tests without iptables tool" + if ! "${iptables}" -V &> /dev/null; then + echo "SKIP: Could not run all tests without ${iptables} tool" exit $ksft_skip - elif ! ip6tables -V &> /dev/null; then - echo "SKIP: Could not run all tests without ip6tables tool" + elif ! "${ip6tables}" -V &> /dev/null; then + echo "SKIP: Could not run all tests without ${ip6tables} tool" exit $ksft_skip fi } diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index c643872ddf47..dac8e1fc7143 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -96,15 +96,11 @@ if [ $? -ne 0 ];then exit $ksft_skip fi -# Use the legacy version if available to support old kernel versions -if iptables-legacy -V &> /dev/null; then - iptables="iptables-legacy" - ip6tables="ip6tables-legacy" -elif ! iptables -V &> /dev/null; then - echo "SKIP: Could not run all tests without iptables tool" +if ! "${iptables}" -V &> /dev/null; then + echo "SKIP: Could not run all tests without ${iptables} tool" exit $ksft_skip -elif ! ip6tables -V &> /dev/null; then - echo "SKIP: Could not run all tests without ip6tables tool" +elif ! "${ip6tables}" -V &> /dev/null; then + echo "SKIP: Could not run all tests without ${ip6tables} tool" exit $ksft_skip fi -- cgit v1.2.3 From 3fb8c33ef4b90f4cc83e635ca716a831d5251bc7 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 6 Mar 2024 10:42:51 +0100 Subject: selftests: mptcp: add mptcp_lib_check_tools helper This patch exports check_tools() helper from mptcp_join.sh into mptcp_lib.sh as a public one mptcp_lib_check_tools(). The arguments "ip", "ss", "iptables" and "ip6tables" are passed into this helper to indicate whether to check ip tool, ss tool, iptables and ip6tables tools. This helper can be used in every scripts. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240306-upstream-net-next-20240304-selftests-mptcp-shared-code-shellcheck-v2-2-bc79e6e5e6a0@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/diag.sh | 12 +-------- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 7 +---- tools/testing/selftests/net/mptcp/mptcp_join.sh | 28 +++---------------- tools/testing/selftests/net/mptcp/mptcp_lib.sh | 31 ++++++++++++++++++++++ tools/testing/selftests/net/mptcp/mptcp_sockopt.sh | 15 +---------- tools/testing/selftests/net/mptcp/pm_netlink.sh | 7 +---- tools/testing/selftests/net/mptcp/simult_flows.sh | 7 +---- tools/testing/selftests/net/mptcp/userspace_pm.sh | 6 +---- 8 files changed, 40 insertions(+), 73 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index cc8bdf95b104..16c115709a37 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -34,17 +34,7 @@ cleanup() } mptcp_lib_check_mptcp - -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi -ss -h | grep -q MPTCP -if [ $? -ne 0 ];then - echo "SKIP: ss tool does not support MPTCP" - exit $ksft_skip -fi +mptcp_lib_check_tools ip ss get_msk_inuse() { diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 7898d62fce0b..ea52110c3fbc 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -147,12 +147,7 @@ cleanup() mptcp_lib_check_mptcp mptcp_lib_check_kallsyms - -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi +mptcp_lib_check_tools ip sin=$(mktemp) sout=$(mktemp) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 6a4af95cd4c3..aedc5698f26a 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -152,34 +152,12 @@ cleanup_partial() done } -check_tools() -{ - mptcp_lib_check_mptcp - mptcp_lib_check_kallsyms - - if ! ip -Version &> /dev/null; then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip - fi - - if ! ss -h | grep -q MPTCP; then - echo "SKIP: ss tool does not support MPTCP" - exit $ksft_skip - fi - - if ! "${iptables}" -V &> /dev/null; then - echo "SKIP: Could not run all tests without ${iptables} tool" - exit $ksft_skip - elif ! "${ip6tables}" -V &> /dev/null; then - echo "SKIP: Could not run all tests without ${ip6tables} tool" - exit $ksft_skip - fi -} - init() { init=1 - check_tools + mptcp_lib_check_mptcp + mptcp_lib_check_kallsyms + mptcp_lib_check_tools ip ss "${iptables}" "${ip6tables}" sin=$(mktemp) sout=$(mktemp) diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 438f557aac90..b1fe354cfe96 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -342,3 +342,34 @@ mptcp_lib_check_output() { return 1 fi } + +mptcp_lib_check_tools() { + local tool + + for tool in "${@}"; do + case "${tool}" in + "ip") + if ! ip -Version &> /dev/null; then + mptcp_lib_print_warn "SKIP: Could not run test without ip tool" + exit ${KSFT_SKIP} + fi + ;; + "ss") + if ! ss -h | grep -q MPTCP; then + mptcp_lib_print_warn "SKIP: ss tool does not support MPTCP" + exit ${KSFT_SKIP} + fi + ;; + "iptables"* | "ip6tables"*) + if ! "${tool}" -V &> /dev/null; then + mptcp_lib_print_warn "SKIP: Could not run all tests without ${tool}" + exit ${KSFT_SKIP} + fi + ;; + *) + mptcp_lib_print_err "Internal error: unsupported tool: ${tool}" + exit ${KSFT_FAIL} + ;; + esac + done +} diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index dac8e1fc7143..fd7de1b3dc55 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -89,20 +89,7 @@ cleanup() mptcp_lib_check_mptcp mptcp_lib_check_kallsyms - -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi - -if ! "${iptables}" -V &> /dev/null; then - echo "SKIP: Could not run all tests without ${iptables} tool" - exit $ksft_skip -elif ! "${ip6tables}" -V &> /dev/null; then - echo "SKIP: Could not run all tests without ${ip6tables} tool" - exit $ksft_skip -fi +mptcp_lib_check_tools ip "${iptables}" "${ip6tables}" check_mark() { diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 705106d60db5..1ec9d8622fc9 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -36,12 +36,7 @@ cleanup() } mptcp_lib_check_mptcp - -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi +mptcp_lib_check_tools ip trap cleanup EXIT diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index ed0165c15a24..dbbb13d1d74e 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -43,12 +43,7 @@ cleanup() } mptcp_lib_check_mptcp - -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi +mptcp_lib_check_tools ip # "$ns1" ns2 ns3 # ns1eth1 ns2eth1 ns2eth3 ns3eth1 diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 1b94a75604fe..629fc5d0ecc5 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -17,11 +17,7 @@ if ! mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then echo "userspace pm tests are not supported by the kernel: SKIP" exit ${KSFT_SKIP} fi - -if ! ip -Version &> /dev/null; then - echo "SKIP: Cannot not run test without ip tool" - exit ${KSFT_SKIP} -fi +mptcp_lib_check_tools ip ANNOUNCED=6 # MPTCP_EVENT_ANNOUNCED REMOVED=7 # MPTCP_EVENT_REMOVED -- cgit v1.2.3 From 4214aac14e51f931e19e39083432e93abb938ab0 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 6 Mar 2024 10:42:52 +0100 Subject: selftests: mptcp: add local variables rndh This patch adds local variables rndh in do_transfer() functions both in mptcp_connect.sh and simult_flows.sh, setting it with ${ns1:4}, not the global variable rndh. The global one is hidden in the next commit. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240306-upstream-net-next-20240304-selftests-mptcp-shared-code-shellcheck-v2-3-bc79e6e5e6a0@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 1 + tools/testing/selftests/net/mptcp/simult_flows.sh | 1 + 2 files changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index ea52110c3fbc..b609649311f6 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -348,6 +348,7 @@ do_transfer() if $capture; then local capuser + local rndh="${connector_ns:4}" if [ -z $SUDO_USER ] ; then capuser="" else diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index dbbb13d1d74e..3d08116500a4 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -139,6 +139,7 @@ do_transfer() if $capture; then local capuser + local rndh="${ns1:4}" if [ -z $SUDO_USER ] ; then capuser="" else -- cgit v1.2.3 From 3a0f9bed3c28811d692f59b2db35e6c3e259ab43 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 6 Mar 2024 10:42:53 +0100 Subject: selftests: mptcp: add mptcp_lib_ns_init/exit helpers Add helpers mptcp_lib_ns_init() and mptcp_lib_ns_exit() in mptcp_lib.sh to initialize and delete the given namespaces. Then every test script can invoke these helpers and use all namespaces. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240306-upstream-net-next-20240304-selftests-mptcp-shared-code-shellcheck-v2-4-bc79e6e5e6a0@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/diag.sh | 9 +++------ tools/testing/selftests/net/mptcp/mptcp_connect.sh | 23 +++++++++------------- tools/testing/selftests/net/mptcp/mptcp_join.sh | 11 ++--------- tools/testing/selftests/net/mptcp/mptcp_lib.sh | 21 ++++++++++++++++++++ tools/testing/selftests/net/mptcp/mptcp_sockopt.sh | 17 ++++++---------- tools/testing/selftests/net/mptcp/pm_netlink.sh | 9 +++------ tools/testing/selftests/net/mptcp/simult_flows.sh | 16 +++++---------- tools/testing/selftests/net/mptcp/userspace_pm.sh | 14 ++++--------- 8 files changed, 53 insertions(+), 67 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 16c115709a37..c3b69b64042c 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -3,9 +3,7 @@ . "$(dirname "${0}")/mptcp_lib.sh" -sec=$(date +%s) -rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) -ns="ns1-$rndh" +ns="" ksft_skip=4 test_cnt=1 timeout_poll=30 @@ -30,7 +28,7 @@ cleanup() { ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGKILL &>/dev/null - ip netns del $ns + mptcp_lib_ns_exit "${ns}" } mptcp_lib_check_mptcp @@ -214,8 +212,7 @@ wait_connected() } trap cleanup EXIT -ip netns add $ns -ip -n $ns link set dev lo up +mptcp_lib_ns_init ns echo "a" | \ timeout ${timeout_test} \ diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index b609649311f6..92f6260ba9f3 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -121,12 +121,10 @@ while getopts "$optstring" option;do esac done -sec=$(date +%s) -rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) -ns1="ns1-$rndh" -ns2="ns2-$rndh" -ns3="ns3-$rndh" -ns4="ns4-$rndh" +ns1="" +ns2="" +ns3="" +ns4="" TEST_COUNT=0 TEST_GROUP="" @@ -140,9 +138,9 @@ cleanup() local netns for netns in "$ns1" "$ns2" "$ns3" "$ns4";do - ip netns del $netns rm -f /tmp/$netns.{nstat,out} done + mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns3}" "${ns4}" } mptcp_lib_check_mptcp @@ -158,10 +156,7 @@ cin_disconnect="$cin".disconnect cout_disconnect="$cout".disconnect trap cleanup EXIT -for i in "$ns1" "$ns2" "$ns3" "$ns4";do - ip netns add $i || exit $ksft_skip - ip -net $i link set lo up -done +mptcp_lib_ns_init ns1 ns2 ns3 ns4 # "$ns1" ns2 ns3 ns4 # ns1eth2 ns2eth1 ns2eth3 ns3eth2 ns3eth4 ns4eth3 @@ -251,8 +246,8 @@ fi check_mptcp_disabled() { - local disabled_ns="ns_disabled-$rndh" - ip netns add ${disabled_ns} || exit $ksft_skip + local disabled_ns + mptcp_lib_ns_init disabled_ns # net.mptcp.enabled should be enabled by default if [ "$(ip netns exec ${disabled_ns} sysctl net.mptcp.enabled | awk '{ print $3 }')" -ne 1 ]; then @@ -266,7 +261,7 @@ check_mptcp_disabled() local err=0 LC_ALL=C ip netns exec ${disabled_ns} ./mptcp_connect -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \ grep -q "^socket: Protocol not available$" && err=1 - ip netns delete ${disabled_ns} + mptcp_lib_ns_exit "${disabled_ns}" if [ ${err} -eq 0 ]; then echo -e "New MPTCP socket cannot be blocked via sysctl\t\t[ FAIL ]" diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index aedc5698f26a..612470244c58 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -86,17 +86,10 @@ init_partial() { capout=$(mktemp) - local sec rndh - sec=$(date +%s) - rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) - - ns1="ns1-$rndh" - ns2="ns2-$rndh" + mptcp_lib_ns_init ns1 ns2 local netns for netns in "$ns1" "$ns2"; do - ip netns add $netns || exit $ksft_skip - ip -net $netns link set lo up ip netns exec $netns sysctl -q net.mptcp.enabled=1 ip netns exec $netns sysctl -q net.mptcp.pm_type=0 2>/dev/null || true ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0 @@ -147,9 +140,9 @@ cleanup_partial() local netns for netns in "$ns1" "$ns2"; do - ip netns del $netns rm -f /tmp/$netns.{nstat,out} done + mptcp_lib_ns_exit "${ns1}" "${ns2}" } init() { diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index b1fe354cfe96..17d609368603 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -373,3 +373,24 @@ mptcp_lib_check_tools() { esac done } +mptcp_lib_ns_init() { + local sec rndh + + sec=$(date +%s) + rndh=$(printf %x "${sec}")-$(mktemp -u XXXXXX) + + local netns + for netns in "${@}"; do + eval "${netns}=${netns}-${rndh}" + + ip netns add "${!netns}" || exit ${KSFT_SKIP} + ip -net "${!netns}" link set lo up + done +} + +mptcp_lib_ns_exit() { + local netns + for netns in "${@}"; do + ip netns del "${netns}" + done +} diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index fd7de1b3dc55..5fa5fa8cab71 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -14,11 +14,9 @@ timeout_test=$((timeout_poll * 2 + 1)) iptables="iptables" ip6tables="ip6tables" -sec=$(date +%s) -rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) -ns1="ns1-$rndh" -ns2="ns2-$rndh" -ns_sbox="ns_sbox-$rndh" +ns1="" +ns2="" +ns_sbox="" add_mark_rules() { @@ -40,10 +38,10 @@ add_mark_rules() init() { + mptcp_lib_ns_init ns1 ns2 ns_sbox + local netns for netns in "$ns1" "$ns2" "$ns_sbox";do - ip netns add $netns || exit $ksft_skip - ip -net $netns link set lo up ip netns exec $netns sysctl -q net.mptcp.enabled=1 ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0 ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0 @@ -79,10 +77,7 @@ init() cleanup() { - local netns - for netns in "$ns1" "$ns2" "$ns_sbox"; do - ip netns del $netns - done + mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns_sbox}" rm -f "$cin" "$cout" rm -f "$sin" "$sout" } diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 1ec9d8622fc9..30ec0ec3d68f 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -24,15 +24,13 @@ while getopts "$optstring" option;do esac done -sec=$(date +%s) -rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) -ns1="ns1-$rndh" +ns1="" err=$(mktemp) cleanup() { rm -f $err - ip netns del $ns1 + mptcp_lib_ns_exit "${ns1}" } mptcp_lib_check_mptcp @@ -40,8 +38,7 @@ mptcp_lib_check_tools ip trap cleanup EXIT -ip netns add $ns1 || exit $ksft_skip -ip -net $ns1 link set lo up +mptcp_lib_ns_init ns1 ip netns exec $ns1 sysctl -q net.mptcp.enabled=1 check() diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 3d08116500a4..2aeebb80da07 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -3,11 +3,9 @@ . "$(dirname "${0}")/mptcp_lib.sh" -sec=$(date +%s) -rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) -ns1="ns1-$rndh" -ns2="ns2-$rndh" -ns3="ns3-$rndh" +ns1="" +ns2="" +ns3="" capture=false ksft_skip=4 timeout_poll=30 @@ -36,10 +34,7 @@ cleanup() rm -f "$large" "$small" rm -f "$capout" - local netns - for netns in "$ns1" "$ns2" "$ns3";do - ip netns del $netns - done + mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns3}" } mptcp_lib_check_mptcp @@ -65,9 +60,8 @@ setup() trap cleanup EXIT + mptcp_lib_ns_init ns1 ns2 ns3 for i in "$ns1" "$ns2" "$ns3";do - ip netns add $i || exit $ksft_skip - ip -net $i link set lo up ip netns exec $i sysctl -q net.ipv4.conf.all.rp_filter=0 ip netns exec $i sysctl -q net.ipv4.conf.default.rp_filter=0 done diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 629fc5d0ecc5..e3092696d2de 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -50,10 +50,8 @@ app6_port=50004 client_addr_id=${RANDOM:0:2} server_addr_id=${RANDOM:0:2} -sec=$(date +%s) -rndh=$(printf %x "$sec")-$(mktemp -u XXXXXX) -ns1="ns1-$rndh" -ns2="ns2-$rndh" +ns1="" +ns2="" ret=0 test_name="" @@ -118,10 +116,7 @@ cleanup() mptcp_lib_kill_wait $pid done - local netns - for netns in "$ns1" "$ns2" ;do - ip netns del "$netns" - done + mptcp_lib_ns_exit "${ns1}" "${ns2}" rm -rf $file $client_evts $server_evts @@ -131,9 +126,8 @@ cleanup() trap cleanup EXIT # Create and configure network namespaces for testing +mptcp_lib_ns_init ns1 ns2 for i in "$ns1" "$ns2" ;do - ip netns add "$i" || exit 1 - ip -net "$i" link set lo up ip netns exec "$i" sysctl -q net.mptcp.enabled=1 ip netns exec "$i" sysctl -q net.mptcp.pm_type=1 done -- cgit v1.2.3 From df8d3ba55b4fa1d6aed8449971ee50757cb0732f Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 6 Mar 2024 10:42:54 +0100 Subject: selftests: mptcp: more operations in ns_init/exit Set more the default sysctl values in mptcp_lib_ns_init(). It is fine to do that everywhere, because they could be overridden latter if needed. mptcp_lib_ns_exit() now also try to remove temp netns files used for the stats even for selftests not using them. That's fine to do that because these files have a unique name. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240306-upstream-net-next-20240304-selftests-mptcp-shared-code-shellcheck-v2-5-bc79e6e5e6a0@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 4 ---- tools/testing/selftests/net/mptcp/mptcp_join.sh | 7 ------- tools/testing/selftests/net/mptcp/mptcp_lib.sh | 4 ++++ tools/testing/selftests/net/mptcp/mptcp_sockopt.sh | 7 ------- tools/testing/selftests/net/mptcp/pm_netlink.sh | 1 - tools/testing/selftests/net/mptcp/simult_flows.sh | 4 ---- tools/testing/selftests/net/mptcp/userspace_pm.sh | 1 - 7 files changed, 4 insertions(+), 24 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 92f6260ba9f3..b53ae64ec08c 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -136,10 +136,6 @@ cleanup() rm -f "$sin" "$sout" rm -f "$capout" - local netns - for netns in "$ns1" "$ns2" "$ns3" "$ns4";do - rm -f /tmp/$netns.{nstat,out} - done mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns3}" "${ns4}" } diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 612470244c58..2d9cf6f3bbf3 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -90,10 +90,7 @@ init_partial() local netns for netns in "$ns1" "$ns2"; do - ip netns exec $netns sysctl -q net.mptcp.enabled=1 ip netns exec $netns sysctl -q net.mptcp.pm_type=0 2>/dev/null || true - ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0 - ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0 if $checksum; then ip netns exec $netns sysctl -q net.mptcp.checksum_enabled=1 fi @@ -138,10 +135,6 @@ cleanup_partial() { rm -f "$capout" - local netns - for netns in "$ns1" "$ns2"; do - rm -f /tmp/$netns.{nstat,out} - done mptcp_lib_ns_exit "${ns1}" "${ns2}" } diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 17d609368603..5f44a4ebd185 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -385,6 +385,9 @@ mptcp_lib_ns_init() { ip netns add "${!netns}" || exit ${KSFT_SKIP} ip -net "${!netns}" link set lo up + ip netns exec "${!netns}" sysctl -q net.mptcp.enabled=1 + ip netns exec "${!netns}" sysctl -q net.ipv4.conf.all.rp_filter=0 + ip netns exec "${!netns}" sysctl -q net.ipv4.conf.default.rp_filter=0 done } @@ -392,5 +395,6 @@ mptcp_lib_ns_exit() { local netns for netns in "${@}"; do ip netns del "${netns}" + rm -f /tmp/"${netns}".{nstat,out} done } diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index 5fa5fa8cab71..7dd0e5467d35 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -40,13 +40,6 @@ init() { mptcp_lib_ns_init ns1 ns2 ns_sbox - local netns - for netns in "$ns1" "$ns2" "$ns_sbox";do - ip netns exec $netns sysctl -q net.mptcp.enabled=1 - ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0 - ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0 - done - local i for i in `seq 1 4`; do ip link add ns1eth$i netns "$ns1" type veth peer name ns2eth$i netns "$ns2" diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 30ec0ec3d68f..c7c46152f6fd 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -39,7 +39,6 @@ mptcp_lib_check_tools ip trap cleanup EXIT mptcp_lib_ns_init ns1 -ip netns exec $ns1 sysctl -q net.mptcp.enabled=1 check() { diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 2aeebb80da07..5a4b83cdaaa9 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -61,10 +61,6 @@ setup() trap cleanup EXIT mptcp_lib_ns_init ns1 ns2 ns3 - for i in "$ns1" "$ns2" "$ns3";do - ip netns exec $i sysctl -q net.ipv4.conf.all.rp_filter=0 - ip netns exec $i sysctl -q net.ipv4.conf.default.rp_filter=0 - done ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2" ip link add ns1eth2 netns "$ns1" type veth peer name ns2eth2 netns "$ns2" diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index e3092696d2de..6d71bf36a1b9 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -128,7 +128,6 @@ trap cleanup EXIT # Create and configure network namespaces for testing mptcp_lib_ns_init ns1 ns2 for i in "$ns1" "$ns2" ;do - ip netns exec "$i" sysctl -q net.mptcp.enabled=1 ip netns exec "$i" sysctl -q net.mptcp.pm_type=1 done -- cgit v1.2.3 From 35bc143a8514ee72b2e9d6b8b385468608b93a53 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 6 Mar 2024 10:42:55 +0100 Subject: selftests: mptcp: add mptcp_lib_events helper To avoid duplicated code in different MPTCP selftests, we can add and use helpers defined in mptcp_lib.sh. This patch unifies "pm_nl_ctl events" related code in userspace_pm.sh and mptcp_join.sh into a helper mptcp_lib_events(). Define it in mptcp_lib.sh and use it in both scripts. Note that mptcp_lib_kill_wait is now call before starting 'events' for mptcp_join.sh as well, but that's fine: each test is started from a new netns, so there will not be any existing pid there, and nothing is done when mptcp_lib_kill_wait is called with 0. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240306-upstream-net-next-20240304-selftests-mptcp-shared-code-shellcheck-v2-6-bc79e6e5e6a0@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 10 ++++------ tools/testing/selftests/net/mptcp/mptcp_lib.sh | 12 ++++++++++++ tools/testing/selftests/net/mptcp/userspace_pm.sh | 14 ++------------ 3 files changed, 18 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 2d9cf6f3bbf3..1df2d24979a0 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -430,12 +430,8 @@ reset_with_events() { reset "${1}" || return 1 - :> "$evts_ns1" - :> "$evts_ns2" - ip netns exec $ns1 ./pm_nl_ctl events >> "$evts_ns1" 2>&1 & - evts_ns1_pid=$! - ip netns exec $ns2 ./pm_nl_ctl events >> "$evts_ns2" 2>&1 & - evts_ns2_pid=$! + mptcp_lib_events "${ns1}" "${evts_ns1}" evts_ns1_pid + mptcp_lib_events "${ns2}" "${evts_ns2}" evts_ns2_pid } reset_with_tcp_filter() @@ -608,7 +604,9 @@ wait_mpj() kill_events_pids() { mptcp_lib_kill_wait $evts_ns1_pid + evts_ns1_pid=0 mptcp_lib_kill_wait $evts_ns2_pid + evts_ns2_pid=0 } pm_nl_set_limits() diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 5f44a4ebd185..5d071b6eb780 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -398,3 +398,15 @@ mptcp_lib_ns_exit() { rm -f /tmp/"${netns}".{nstat,out} done } + +mptcp_lib_events() { + local ns="${1}" + local evts="${2}" + declare -n pid="${3}" + + :>"${evts}" + + mptcp_lib_kill_wait "${pid:-0}" + ip netns exec "${ns}" ./pm_nl_ctl events >> "${evts}" 2>&1 & + pid=$! +} diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 6d71bf36a1b9..3200d0b96d53 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -178,21 +178,11 @@ make_connection() if [ -z "$client_evts" ]; then client_evts=$(mktemp) fi - :>"$client_evts" - if [ $client_evts_pid -ne 0 ]; then - mptcp_lib_kill_wait $client_evts_pid - fi - ip netns exec "$ns2" ./pm_nl_ctl events >> "$client_evts" 2>&1 & - client_evts_pid=$! + mptcp_lib_events "${ns2}" "${client_evts}" client_evts_pid if [ -z "$server_evts" ]; then server_evts=$(mktemp) fi - :>"$server_evts" - if [ $server_evts_pid -ne 0 ]; then - mptcp_lib_kill_wait $server_evts_pid - fi - ip netns exec "$ns1" ./pm_nl_ctl events >> "$server_evts" 2>&1 & - server_evts_pid=$! + mptcp_lib_events "${ns1}" "${server_evts}" server_evts_pid sleep 0.5 # Run the server -- cgit v1.2.3 From 97633aa74d93cefabc0943f6b0d4cfdeb39177cf Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 6 Mar 2024 10:42:56 +0100 Subject: selftests: mptcp: diag: fix shellcheck warnings shellcheck recently helped to prevent issues. It is then good to fix the other harmless issues in order to spot "real" ones later. Here, two categories of warnings are now ignored: - SC2317: Command appears to be unreachable. The cleanup() function is invoked indirectly via the EXIT trap. - SC2086: Double quote to prevent globbing and word splitting. This is recommended, but the current usage is correct and there is no need to do all these modifications to be compliant with this rule. For the modifications: - SC2034: ksft_skip appears unused. - SC2046: Quote '$(get_msk_inuse)' to prevent word splitting. - SC2006: Use $(...) notation instead of legacy backticks `...`. Now this script is shellcheck (0.9.0) compliant. We can easily spot new issues. Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240306-upstream-net-next-20240304-selftests-mptcp-shared-code-shellcheck-v2-7-bc79e6e5e6a0@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/diag.sh | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index c3b69b64042c..afe862895946 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -1,10 +1,14 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# Double quotes to prevent globbing and word splitting is recommended in new +# code but we accept it, especially because there were too many before having +# address all other issues detected by shellcheck. +#shellcheck disable=SC2086 + . "$(dirname "${0}")/mptcp_lib.sh" ns="" -ksft_skip=4 test_cnt=1 timeout_poll=30 timeout_test=$((timeout_poll * 2 + 1)) @@ -24,6 +28,8 @@ flush_pids() done } +# This function is used in the cleanup trap +#shellcheck disable=SC2317 cleanup() { ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGKILL &>/dev/null @@ -174,7 +180,7 @@ chk_msk_inuse() expected=$((expected + listen_nr)) for _ in $(seq 10); do - if [ $(get_msk_inuse) -eq $expected ];then + if [ "$(get_msk_inuse)" -eq $expected ]; then break fi sleep 0.1 @@ -260,7 +266,7 @@ chk_msk_inuse 0 "1->0" chk_msk_cestab 0 "1->0" NR_CLIENTS=100 -for I in `seq 1 $NR_CLIENTS`; do +for I in $(seq 1 $NR_CLIENTS); do echo "a" | \ timeout ${timeout_test} \ ip netns exec $ns \ @@ -269,7 +275,7 @@ for I in `seq 1 $NR_CLIENTS`; do done mptcp_lib_wait_local_port_listen $ns $((NR_CLIENTS + 10001)) -for I in `seq 1 $NR_CLIENTS`; do +for I in $(seq 1 $NR_CLIENTS); do echo "b" | \ timeout ${timeout_test} \ ip netns exec $ns \ -- cgit v1.2.3 From e3aae1098f109f0bd33c971deff1926f4e4441d0 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 6 Mar 2024 10:42:57 +0100 Subject: selftests: mptcp: connect: fix shellcheck warnings shellcheck recently helped to prevent issues. It is then good to fix the other harmless issues in order to spot "real" ones later. Here, two categories of warnings are now ignored: - SC2317: Command appears to be unreachable. The cleanup() function is invoked indirectly via the EXIT trap. - SC2086: Double quote to prevent globbing and word splitting. This is recommended, but the current usage is correct and there is no need to do all these modifications to be compliant with this rule. For the modifications: - SC2034: ksft_skip appears unused. - SC2181: Check exit code directly with e.g. 'if mycmd;', not indirectly with $?. - SC2004: $/${} is unnecessary on arithmetic variables. - SC2155: Declare and assign separately to avoid masking return values. - SC2166: Prefer [ p ] && [ q ] as [ p -a q ] is not well defined. - SC2059: Don't use variables in the printf format string. Use printf '..%s..' "$foo". Now this script is shellcheck (0.9.0) compliant. We can easily spot new issues. Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240306-upstream-net-next-20240304-selftests-mptcp-shared-code-shellcheck-v2-8-bc79e6e5e6a0@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 76 +++++++++++++--------- 1 file changed, 47 insertions(+), 29 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index b53ae64ec08c..0ca2960c9099 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -1,6 +1,11 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# Double quotes to prevent globbing and word splitting is recommended in new +# code but we accept it, especially because there were too many before having +# address all other issues detected by shellcheck. +#shellcheck disable=SC2086 + . "$(dirname "${0}")/mptcp_lib.sh" time_start=$(date +%s) @@ -13,7 +18,6 @@ sout="" cin_disconnect="" cin="" cout="" -ksft_skip=4 capture=false timeout_poll=30 timeout_test=$((timeout_poll * 2 + 1)) @@ -129,6 +133,8 @@ ns4="" TEST_COUNT=0 TEST_GROUP="" +# This function is used in the cleanup trap +#shellcheck disable=SC2317 cleanup() { rm -f "$cin_disconnect" "$cout_disconnect" @@ -211,8 +217,9 @@ set_ethtool_flags() { local dev="$2" local flags="$3" - ip netns exec $ns ethtool -K $dev $flags 2>/dev/null - [ $? -eq 0 ] && echo "INFO: set $ns dev $dev: ethtool -K $flags" + if ip netns exec $ns ethtool -K $dev $flags 2>/dev/null; then + echo "INFO: set $ns dev $dev: ethtool -K $flags" + fi } set_random_ethtool_flags() { @@ -307,7 +314,7 @@ do_transfer() local extra_args="$7" local port - port=$((10000+$TEST_COUNT)) + port=$((10000+TEST_COUNT)) TEST_COUNT=$((TEST_COUNT+1)) if [ "$rcvbuf" -gt 0 ]; then @@ -365,12 +372,18 @@ do_transfer() nstat -n fi - local stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") - local stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") - local stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") - local stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") - local stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") - local stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") + local stat_synrx_last_l + local stat_ackrx_last_l + local stat_cookietx_last + local stat_cookierx_last + local stat_csum_err_s + local stat_csum_err_c + stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") + stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") + stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") + stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") + stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") + stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") timeout ${timeout_test} \ ip netns exec ${listener_ns} \ @@ -433,11 +446,16 @@ do_transfer() mptcp_lib_check_transfer $cin $sout "file received by server" rets=$? - local stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") - local stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") - local stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") - local stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") - local stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue") + local stat_synrx_now_l + local stat_ackrx_now_l + local stat_cookietx_now + local stat_cookierx_now + local stat_ooo_now + stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") + stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") + stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") + stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") + stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue") expect_synrx=$((stat_synrx_last_l)) expect_ackrx=$((stat_ackrx_last_l)) @@ -446,8 +464,8 @@ do_transfer() cookies=${cookies##*=} if [ ${cl_proto} = "MPTCP" ] && [ ${srv_proto} = "MPTCP" ]; then - expect_synrx=$((stat_synrx_last_l+$connect_per_transfer)) - expect_ackrx=$((stat_ackrx_last_l+$connect_per_transfer)) + expect_synrx=$((stat_synrx_last_l+connect_per_transfer)) + expect_ackrx=$((stat_ackrx_last_l+connect_per_transfer)) fi if [ ${stat_synrx_now_l} -lt ${expect_synrx} ]; then @@ -455,7 +473,7 @@ do_transfer() "${stat_synrx_now_l}" "${expect_synrx}" 1>&2 retc=1 fi - if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} -a ${stat_ooo_now} -eq 0 ]; then + if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ] && [ ${stat_ooo_now} -eq 0 ]; then if [ ${stat_ooo_now} -eq 0 ]; then printf "[ FAIL ] lower MPC ACK rx (%d) than expected (%d)\n" \ "${stat_ackrx_now_l}" "${expect_ackrx}" 1>&2 @@ -466,18 +484,20 @@ do_transfer() fi if $checksum; then - local csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") - local csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") + local csum_err_s + local csum_err_c + csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") + csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") local csum_err_s_nr=$((csum_err_s - stat_csum_err_s)) if [ $csum_err_s_nr -gt 0 ]; then - printf "[ FAIL ]\nserver got $csum_err_s_nr data checksum error[s]" + printf "[ FAIL ]\nserver got %d data checksum error[s]" ${csum_err_s_nr} rets=1 fi local csum_err_c_nr=$((csum_err_c - stat_csum_err_c)) if [ $csum_err_c_nr -gt 0 ]; then - printf "[ FAIL ]\nclient got $csum_err_c_nr data checksum error[s]" + printf "[ FAIL ]\nclient got %d data checksum error[s]" ${csum_err_c_nr} retc=1 fi fi @@ -645,7 +665,7 @@ run_test_transparent() return fi -ip netns exec "$listener_ns" nft -f /dev/stdin <<"EOF" + if ! ip netns exec "$listener_ns" nft -f /dev/stdin <<"EOF" flush ruleset table inet mangle { chain divert { @@ -656,7 +676,7 @@ table inet mangle { } } EOF - if [ $? -ne 0 ]; then + then echo "SKIP: $msg, could not load nft ruleset" mptcp_lib_fail_if_expected_feature "nft rules" mptcp_lib_result_skip "${TEST_GROUP}" @@ -671,8 +691,7 @@ EOF local_addr="0.0.0.0" fi - ip -net "$listener_ns" $r6flag rule add fwmark 1 lookup 100 - if [ $? -ne 0 ]; then + if ! ip -net "$listener_ns" $r6flag rule add fwmark 1 lookup 100; then ip netns exec "$listener_ns" nft flush ruleset echo "SKIP: $msg, ip $r6flag rule failed" mptcp_lib_fail_if_expected_feature "ip rule" @@ -680,8 +699,7 @@ EOF return fi - ip -net "$listener_ns" route add local $local_addr/0 dev lo table 100 - if [ $? -ne 0 ]; then + if ! ip -net "$listener_ns" route add local $local_addr/0 dev lo table 100; then ip netns exec "$listener_ns" nft flush ruleset ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100 echo "SKIP: $msg, ip route add local $local_addr failed" @@ -844,7 +862,7 @@ stop_if_error "Could not even run ping tests" echo -n "INFO: Using loss of $tc_loss " test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms " -reorder_delay=$(($tc_delay / 4)) +reorder_delay=$((tc_delay / 4)) if [ -z "${tc_reorder}" ]; then reorder1=$((RANDOM%10)) -- cgit v1.2.3 From 5751c291349deb9c1fa59455df3c9e1be30137a5 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 6 Mar 2024 10:42:58 +0100 Subject: selftests: mptcp: sockopt: fix shellcheck warnings shellcheck recently helped to prevent issues. It is then good to fix the other harmless issues in order to spot "real" ones later. Here, two categories of warnings are now ignored: - SC2317: Command appears to be unreachable. The cleanup() function is invoked indirectly via the EXIT trap. - SC2086: Double quote to prevent globbing and word splitting. This is recommended, but the current usage is correct and there is no need to do all these modifications to be compliant with this rule. For the modifications: - SC2034: ksft_skip appears unused. - SC2006: Use $(...) notation instead of legacy backticks `...`. - SC2145: Argument mixes string and array. Use * or separate argument. Now this script is shellcheck (0.9.0) compliant. We can easily spot new issues. Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240306-upstream-net-next-20240304-selftests-mptcp-shared-code-shellcheck-v2-9-bc79e6e5e6a0@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_sockopt.sh | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index 7dd0e5467d35..6ed4aa32222f 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -1,6 +1,11 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# Double quotes to prevent globbing and word splitting is recommended in new +# code but we accept it, especially because there were too many before having +# address all other issues detected by shellcheck. +#shellcheck disable=SC2086 + . "$(dirname "${0}")/mptcp_lib.sh" ret=0 @@ -8,7 +13,6 @@ sin="" sout="" cin="" cout="" -ksft_skip=4 timeout_poll=30 timeout_test=$((timeout_poll * 2 + 1)) iptables="iptables" @@ -41,7 +45,7 @@ init() mptcp_lib_ns_init ns1 ns2 ns_sbox local i - for i in `seq 1 4`; do + for i in $(seq 1 4); do ip link add ns1eth$i netns "$ns1" type veth peer name ns2eth$i netns "$ns2" ip -net "$ns1" addr add 10.0.$i.1/24 dev ns1eth$i ip -net "$ns1" addr add dead:beef:$i::1/64 dev ns1eth$i nodad @@ -68,6 +72,8 @@ init() add_mark_rules $ns2 2 } +# This function is used in the cleanup trap +#shellcheck disable=SC2317 cleanup() { mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns_sbox}" @@ -257,12 +263,12 @@ do_tcpinq_test() local lret=$? if [ $lret -ne 0 ];then ret=$lret - echo "FAIL: mptcp_inq $@" 1>&2 + echo "FAIL: mptcp_inq $*" 1>&2 mptcp_lib_result_fail "TCP_INQ: $*" return $lret fi - echo "PASS: TCP_INQ cmsg/ioctl $@" + echo "PASS: TCP_INQ cmsg/ioctl $*" mptcp_lib_result_pass "TCP_INQ: $*" return $lret } -- cgit v1.2.3 From 21781b42f2f35d24b502205a3d8987adcfd05636 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 6 Mar 2024 10:42:59 +0100 Subject: selftests: mptcp: pm netlink: fix shellcheck warnings shellcheck recently helped to prevent issues. It is then good to fix the other harmless issues in order to spot "real" ones later. Here, two categories of warnings are now ignored: - SC2317: Command appears to be unreachable. The cleanup() function is invoked indirectly via the EXIT trap. - SC2086: Double quote to prevent globbing and word splitting. This is recommended, but the current usage is correct and there is no need to do all these modifications to be compliant with this rule. For the modifications: - SC2034: ksft_skip appears unused. - SC2154: optstring is referenced but not assigned. - SC2006: Use $(...) notation instead of legacy backticks `...`. Now this script is shellcheck (0.9.0) compliant. We can easily spot new issues. Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240306-upstream-net-next-20240304-selftests-mptcp-shared-code-shellcheck-v2-10-bc79e6e5e6a0@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/pm_netlink.sh | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index c7c46152f6fd..427fc5c70b3c 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -1,16 +1,20 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# Double quotes to prevent globbing and word splitting is recommended in new +# code but we accept it, especially because there were too many before having +# address all other issues detected by shellcheck. +#shellcheck disable=SC2086 + . "$(dirname "${0}")/mptcp_lib.sh" -ksft_skip=4 ret=0 usage() { echo "Usage: $0 [ -h ]" } - +optstring=h while getopts "$optstring" option;do case "$option" in "h") @@ -27,6 +31,8 @@ done ns1="" err=$(mktemp) +# This function is used in the cleanup trap +#shellcheck disable=SC2317 cleanup() { rm -f $err @@ -91,14 +97,14 @@ check "ip netns exec $ns1 ./pm_nl_ctl get 4" "" "duplicate addr" ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4 flags signal check "ip netns exec $ns1 ./pm_nl_ctl get 4" "id 4 flags signal 10.0.1.4" "id addr increment" -for i in `seq 5 9`; do +for i in $(seq 5 9); do ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.$i flags signal >/dev/null 2>&1 done check "ip netns exec $ns1 ./pm_nl_ctl get 9" "id 9 flags signal 10.0.1.9" "hard addr limit" check "ip netns exec $ns1 ./pm_nl_ctl get 10" "" "above hard addr limit" ip netns exec $ns1 ./pm_nl_ctl del 9 -for i in `seq 10 255`; do +for i in $(seq 10 255); do ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9 id $i ip netns exec $ns1 ./pm_nl_ctl del $i done -- cgit v1.2.3 From 2aebd3579d90e12cadc1bf93c524e2f422cd1c49 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 6 Mar 2024 10:43:00 +0100 Subject: selftests: mptcp: simult flows: fix shellcheck warnings shellcheck recently helped to prevent issues. It is then good to fix the other harmless issues in order to spot "real" ones later. Here, two categories of warnings are now ignored: - SC2317: Command appears to be unreachable. The cleanup() function is invoked indirectly via the EXIT trap. - SC2086: Double quote to prevent globbing and word splitting. This is recommended, but the current usage is correct and there is no need to do all these modifications to be compliant with this rule. For the modifications: - SC2034: ksft_skip appears unused. - SC2004: $/${} is unnecessary on arithmetic variables. Now this script is shellcheck (0.9.0) compliant. We can easily spot new issues. Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240306-upstream-net-next-20240304-selftests-mptcp-shared-code-shellcheck-v2-11-bc79e6e5e6a0@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/simult_flows.sh | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 5a4b83cdaaa9..365fb3d6ef55 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -1,13 +1,17 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# Double quotes to prevent globbing and word splitting is recommended in new +# code but we accept it, especially because there were too many before having +# address all other issues detected by shellcheck. +#shellcheck disable=SC2086 + . "$(dirname "${0}")/mptcp_lib.sh" ns1="" ns2="" ns3="" capture=false -ksft_skip=4 timeout_poll=30 timeout_test=$((timeout_poll * 2 + 1)) test_cnt=1 @@ -28,6 +32,8 @@ usage() { echo -e "\t-d: debug this script" } +# This function is used in the cleanup trap +#shellcheck disable=SC2317 cleanup() { rm -f "$cout" "$sout" @@ -120,7 +126,7 @@ do_transfer() local sin=$2 local max_time=$3 local port - port=$((10000+$test_cnt)) + port=$((10000+test_cnt)) test_cnt=$((test_cnt+1)) :> "$cout" -- cgit v1.2.3 From c66fb480a3302577f657a5d4f5308312bf1b52f8 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 6 Mar 2024 10:43:01 +0100 Subject: selftests: userspace pm: avoid relaunching pm events 'make_connection' is launched twice: once for IPv4, once for IPv6. But then, the "pm_nl_ctl events" was launched a first time, killed, then relaunched after for no particular reason. We can then move this code, and the generation of the temp file to exchange, to the init part, and remove extra conditions that no longer needed. Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240306-upstream-net-next-20240304-selftests-mptcp-shared-code-shellcheck-v2-12-bc79e6e5e6a0@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/userspace_pm.sh | 29 ++++++++++------------- 1 file changed, 13 insertions(+), 16 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 3200d0b96d53..b0cce8f065d8 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -149,17 +149,23 @@ ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad ip -net "$ns2" addr add dead:beef:2::2/64 dev ns2eth1 nodad ip -net "$ns2" link set ns2eth1 up +file=$(mktemp) +mptcp_lib_make_file "$file" 2 1 + +# Capture netlink events over the two network namespaces running +# the MPTCP client and server +client_evts=$(mktemp) +mptcp_lib_events "${ns2}" "${client_evts}" client_evts_pid +server_evts=$(mktemp) +mptcp_lib_events "${ns1}" "${server_evts}" server_evts_pid +sleep 0.5 + print_title "Init" print_test "Created network namespaces ns1, ns2" test_pass make_connection() { - if [ -z "$file" ]; then - file=$(mktemp) - fi - mptcp_lib_make_file "$file" 2 1 - local is_v6=$1 local app_port=$app4_port local connect_addr="10.0.1.1" @@ -173,17 +179,8 @@ make_connection() is_v6="v4" fi - # Capture netlink events over the two network namespaces running - # the MPTCP client and server - if [ -z "$client_evts" ]; then - client_evts=$(mktemp) - fi - mptcp_lib_events "${ns2}" "${client_evts}" client_evts_pid - if [ -z "$server_evts" ]; then - server_evts=$(mktemp) - fi - mptcp_lib_events "${ns1}" "${server_evts}" server_evts_pid - sleep 0.5 + :>"$client_evts" + :>"$server_evts" # Run the server ip netns exec "$ns1" \ -- cgit v1.2.3 From ab63a2387cb906d43b72a8effb611bbaecb2d0cd Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 6 Mar 2024 11:55:07 -0800 Subject: netdev: add per-queue statistics The ethtool-nl family does a good job exposing various protocol related and IEEE/IETF statistics which used to get dumped under ethtool -S, with creative names. Queue stats don't have a netlink API, yet, and remain a lion's share of ethtool -S output for new drivers. Not only is that bad because the names differ driver to driver but it's also bug-prone. Intuitively drivers try to report only the stats for active queues, but querying ethtool stats involves multiple system calls, and the number of stats is read separately from the stats themselves. Worse still when user space asks for values of the stats, it doesn't inform the kernel how big the buffer is. If number of stats increases in the meantime kernel will overflow user buffer. Add a netlink API for dumping queue stats. Queue information is exposed via the netdev-genl family, so add the stats there. Support per-queue and sum-for-device dumps. Latter will be useful when subsequent patches add more interesting common stats than just bytes and packets. The API does not currently distinguish between HW and SW stats. The expectation is that the source of the stats will either not matter much (good packets) or be obvious (skb alloc errors). Acked-by: Stanislav Fomichev Reviewed-by: Amritha Nambiar Reviewed-by: Xuan Zhuo Link: https://lore.kernel.org/r/20240306195509.1502746-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/netdev.yaml | 84 +++++++++++++ Documentation/networking/statistics.rst | 15 +++ include/linux/netdevice.h | 3 + include/net/netdev_queues.h | 54 ++++++++ include/uapi/linux/netdev.h | 19 +++ net/core/netdev-genl-gen.c | 12 ++ net/core/netdev-genl-gen.h | 2 + net/core/netdev-genl.c | 213 ++++++++++++++++++++++++++++++++ tools/include/uapi/linux/netdev.h | 19 +++ 9 files changed, 421 insertions(+) (limited to 'tools') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 3addac970680..a1e48c3c84c9 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -74,6 +74,10 @@ definitions: name: queue-type type: enum entries: [ rx, tx ] + - + name: qstats-scope + type: flags + entries: [ queue ] attribute-sets: - @@ -265,6 +269,66 @@ attribute-sets: doc: ID of the NAPI instance which services this queue. type: u32 + - + name: qstats + doc: | + Get device statistics, scoped to a device or a queue. + These statistics extend (and partially duplicate) statistics available + in struct rtnl_link_stats64. + Value of the `scope` attribute determines how statistics are + aggregated. When aggregated for the entire device the statistics + represent the total number of events since last explicit reset of + the device (i.e. not a reconfiguration like changing queue count). + When reported per-queue, however, the statistics may not add + up to the total number of events, will only be reported for currently + active objects, and will likely report the number of events since last + reconfiguration. + attributes: + - + name: ifindex + doc: ifindex of the netdevice to which stats belong. + type: u32 + checks: + min: 1 + - + name: queue-type + doc: Queue type as rx, tx, for queue-id. + type: u32 + enum: queue-type + - + name: queue-id + doc: Queue ID, if stats are scoped to a single queue instance. + type: u32 + - + name: scope + doc: | + What object type should be used to iterate over the stats. + type: uint + enum: qstats-scope + - + name: rx-packets + doc: | + Number of wire packets successfully received and passed to the stack. + For drivers supporting XDP, XDP is considered the first layer + of the stack, so packets consumed by XDP are still counted here. + type: uint + value: 8 # reserve some attr ids in case we need more metadata later + - + name: rx-bytes + doc: Successfully received bytes, see `rx-packets`. + type: uint + - + name: tx-packets + doc: | + Number of wire packets successfully sent. Packet is considered to be + successfully sent once it is in device memory (usually this means + the device has issued a DMA completion for the packet). + type: uint + - + name: tx-bytes + doc: Successfully sent bytes, see `tx-packets`. + type: uint + operations: list: - @@ -405,6 +469,26 @@ operations: attributes: - ifindex reply: *napi-get-op + - + name: qstats-get + doc: | + Get / dump fine grained statistics. Which statistics are reported + depends on the device and the driver, and whether the driver stores + software counters per-queue. + attribute-set: qstats + dump: + request: + attributes: + - scope + reply: + attributes: + - ifindex + - queue-type + - queue-id + - rx-packets + - rx-bytes + - tx-packets + - tx-bytes mcast-groups: list: diff --git a/Documentation/networking/statistics.rst b/Documentation/networking/statistics.rst index 551b3cc29a41..75e017dfa825 100644 --- a/Documentation/networking/statistics.rst +++ b/Documentation/networking/statistics.rst @@ -41,6 +41,15 @@ If `-s` is specified once the detailed errors won't be shown. `ip` supports JSON formatting via the `-j` option. +Queue statistics +~~~~~~~~~~~~~~~~ + +Queue statistics are accessible via the netdev netlink family. + +Currently no widely distributed CLI exists to access those statistics. +Kernel development tools (ynl) can be used to experiment with them, +see `Documentation/userspace-api/netlink/intro-specs.rst`. + Protocol-specific statistics ---------------------------- @@ -147,6 +156,12 @@ Statistics are reported both in the responses to link information requests (`RTM_GETLINK`) and statistic requests (`RTM_GETSTATS`, when `IFLA_STATS_LINK_64` bit is set in the `.filter_mask` of the request). +netdev (netlink) +~~~~~~~~~~~~~~~~ + +`netdev` generic netlink family allows accessing page pool and per queue +statistics. + ethtool ------- diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 416a800d72ba..4230c7f3b959 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1955,6 +1955,7 @@ enum netdev_reg_state { * * @sysfs_rx_queue_group: Space for optional per-rx queue attributes * @rtnl_link_ops: Rtnl_link_ops + * @stat_ops: Optional ops for queue-aware statistics * * @gso_max_size: Maximum size of generic segmentation offload * @tso_max_size: Device (as in HW) limit on the max TSO request size @@ -2335,6 +2336,8 @@ struct net_device { const struct rtnl_link_ops *rtnl_link_ops; + const struct netdev_stat_ops *stat_ops; + /* for setting kernel sock attribute on TCP connection setup */ #define GSO_MAX_SEGS 65535u #define GSO_LEGACY_MAX_SIZE 65536u diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index 8b8ed4e13d74..d633347eeda5 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -4,6 +4,60 @@ #include +struct netdev_queue_stats_rx { + u64 bytes; + u64 packets; +}; + +struct netdev_queue_stats_tx { + u64 bytes; + u64 packets; +}; + +/** + * struct netdev_stat_ops - netdev ops for fine grained stats + * @get_queue_stats_rx: get stats for a given Rx queue + * @get_queue_stats_tx: get stats for a given Tx queue + * @get_base_stats: get base stats (not belonging to any live instance) + * + * Query stats for a given object. The values of the statistics are undefined + * on entry (specifically they are *not* zero-initialized). Drivers should + * assign values only to the statistics they collect. Statistics which are not + * collected must be left undefined. + * + * Queue objects are not necessarily persistent, and only currently active + * queues are queried by the per-queue callbacks. This means that per-queue + * statistics will not generally add up to the total number of events for + * the device. The @get_base_stats callback allows filling in the delta + * between events for currently live queues and overall device history. + * When the statistics for the entire device are queried, first @get_base_stats + * is issued to collect the delta, and then a series of per-queue callbacks. + * Only statistics which are set in @get_base_stats will be reported + * at the device level, meaning that unlike in queue callbacks, setting + * a statistic to zero in @get_base_stats is a legitimate thing to do. + * This is because @get_base_stats has a second function of designating which + * statistics are in fact correct for the entire device (e.g. when history + * for some of the events is not maintained, and reliable "total" cannot + * be provided). + * + * Device drivers can assume that when collecting total device stats, + * the @get_base_stats and subsequent per-queue calls are performed + * "atomically" (without releasing the rtnl_lock). + * + * Device drivers are encouraged to reset the per-queue statistics when + * number of queues change. This is because the primary use case for + * per-queue statistics is currently to detect traffic imbalance. + */ +struct netdev_stat_ops { + void (*get_queue_stats_rx)(struct net_device *dev, int idx, + struct netdev_queue_stats_rx *stats); + void (*get_queue_stats_tx)(struct net_device *dev, int idx, + struct netdev_queue_stats_tx *stats); + void (*get_base_stats)(struct net_device *dev, + struct netdev_queue_stats_rx *rx, + struct netdev_queue_stats_tx *tx); +}; + /** * DOC: Lockless queue stopping / waking helpers. * diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 93cb411adf72..639ffa04c172 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -70,6 +70,10 @@ enum netdev_queue_type { NETDEV_QUEUE_TYPE_TX, }; +enum netdev_qstats_scope { + NETDEV_QSTATS_SCOPE_QUEUE = 1, +}; + enum { NETDEV_A_DEV_IFINDEX = 1, NETDEV_A_DEV_PAD, @@ -132,6 +136,20 @@ enum { NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) }; +enum { + NETDEV_A_QSTATS_IFINDEX = 1, + NETDEV_A_QSTATS_QUEUE_TYPE, + NETDEV_A_QSTATS_QUEUE_ID, + NETDEV_A_QSTATS_SCOPE, + NETDEV_A_QSTATS_RX_PACKETS = 8, + NETDEV_A_QSTATS_RX_BYTES, + NETDEV_A_QSTATS_TX_PACKETS, + NETDEV_A_QSTATS_TX_BYTES, + + __NETDEV_A_QSTATS_MAX, + NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1) +}; + enum { NETDEV_CMD_DEV_GET = 1, NETDEV_CMD_DEV_ADD_NTF, @@ -144,6 +162,7 @@ enum { NETDEV_CMD_PAGE_POOL_STATS_GET, NETDEV_CMD_QUEUE_GET, NETDEV_CMD_NAPI_GET, + NETDEV_CMD_QSTATS_GET, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index be7f2ebd61b2..8d8ace9ef87f 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -68,6 +68,11 @@ static const struct nla_policy netdev_napi_get_dump_nl_policy[NETDEV_A_NAPI_IFIN [NETDEV_A_NAPI_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1), }; +/* NETDEV_CMD_QSTATS_GET - dump */ +static const struct nla_policy netdev_qstats_get_nl_policy[NETDEV_A_QSTATS_SCOPE + 1] = { + [NETDEV_A_QSTATS_SCOPE] = NLA_POLICY_MASK(NLA_UINT, 0x1), +}; + /* Ops table for netdev */ static const struct genl_split_ops netdev_nl_ops[] = { { @@ -138,6 +143,13 @@ static const struct genl_split_ops netdev_nl_ops[] = { .maxattr = NETDEV_A_NAPI_IFINDEX, .flags = GENL_CMD_CAP_DUMP, }, + { + .cmd = NETDEV_CMD_QSTATS_GET, + .dumpit = netdev_nl_qstats_get_dumpit, + .policy = netdev_qstats_get_nl_policy, + .maxattr = NETDEV_A_QSTATS_SCOPE, + .flags = GENL_CMD_CAP_DUMP, + }, }; static const struct genl_multicast_group netdev_nl_mcgrps[] = { diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h index a47f2bcbe4fa..4db40fd5b4a9 100644 --- a/net/core/netdev-genl-gen.h +++ b/net/core/netdev-genl-gen.h @@ -28,6 +28,8 @@ int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info); int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int netdev_nl_qstats_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb); enum { NETDEV_NLGRP_MGMT, diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 918b109e0cf4..7fa75e13dc6d 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include "netdev-genl-gen.h" @@ -460,6 +461,218 @@ int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) return err; } +#define NETDEV_STAT_NOT_SET (~0ULL) + +static void netdev_nl_stats_add(void *_sum, const void *_add, size_t size) +{ + const u64 *add = _add; + u64 *sum = _sum; + + while (size) { + if (*add != NETDEV_STAT_NOT_SET && *sum != NETDEV_STAT_NOT_SET) + *sum += *add; + sum++; + add++; + size -= 8; + } +} + +static int netdev_stat_put(struct sk_buff *rsp, unsigned int attr_id, u64 value) +{ + if (value == NETDEV_STAT_NOT_SET) + return 0; + return nla_put_uint(rsp, attr_id, value); +} + +static int +netdev_nl_stats_write_rx(struct sk_buff *rsp, struct netdev_queue_stats_rx *rx) +{ + if (netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_PACKETS, rx->packets) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_BYTES, rx->bytes)) + return -EMSGSIZE; + return 0; +} + +static int +netdev_nl_stats_write_tx(struct sk_buff *rsp, struct netdev_queue_stats_tx *tx) +{ + if (netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_PACKETS, tx->packets) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_BYTES, tx->bytes)) + return -EMSGSIZE; + return 0; +} + +static int +netdev_nl_stats_queue(struct net_device *netdev, struct sk_buff *rsp, + u32 q_type, int i, const struct genl_info *info) +{ + const struct netdev_stat_ops *ops = netdev->stat_ops; + struct netdev_queue_stats_rx rx; + struct netdev_queue_stats_tx tx; + void *hdr; + + hdr = genlmsg_iput(rsp, info); + if (!hdr) + return -EMSGSIZE; + if (nla_put_u32(rsp, NETDEV_A_QSTATS_IFINDEX, netdev->ifindex) || + nla_put_u32(rsp, NETDEV_A_QSTATS_QUEUE_TYPE, q_type) || + nla_put_u32(rsp, NETDEV_A_QSTATS_QUEUE_ID, i)) + goto nla_put_failure; + + switch (q_type) { + case NETDEV_QUEUE_TYPE_RX: + memset(&rx, 0xff, sizeof(rx)); + ops->get_queue_stats_rx(netdev, i, &rx); + if (!memchr_inv(&rx, 0xff, sizeof(rx))) + goto nla_cancel; + if (netdev_nl_stats_write_rx(rsp, &rx)) + goto nla_put_failure; + break; + case NETDEV_QUEUE_TYPE_TX: + memset(&tx, 0xff, sizeof(tx)); + ops->get_queue_stats_tx(netdev, i, &tx); + if (!memchr_inv(&tx, 0xff, sizeof(tx))) + goto nla_cancel; + if (netdev_nl_stats_write_tx(rsp, &tx)) + goto nla_put_failure; + break; + } + + genlmsg_end(rsp, hdr); + return 0; + +nla_cancel: + genlmsg_cancel(rsp, hdr); + return 0; +nla_put_failure: + genlmsg_cancel(rsp, hdr); + return -EMSGSIZE; +} + +static int +netdev_nl_stats_by_queue(struct net_device *netdev, struct sk_buff *rsp, + const struct genl_info *info, + struct netdev_nl_dump_ctx *ctx) +{ + const struct netdev_stat_ops *ops = netdev->stat_ops; + int i, err; + + if (!(netdev->flags & IFF_UP)) + return 0; + + i = ctx->rxq_idx; + while (ops->get_queue_stats_rx && i < netdev->real_num_rx_queues) { + err = netdev_nl_stats_queue(netdev, rsp, NETDEV_QUEUE_TYPE_RX, + i, info); + if (err) + return err; + ctx->rxq_idx = i++; + } + i = ctx->txq_idx; + while (ops->get_queue_stats_tx && i < netdev->real_num_tx_queues) { + err = netdev_nl_stats_queue(netdev, rsp, NETDEV_QUEUE_TYPE_TX, + i, info); + if (err) + return err; + ctx->txq_idx = i++; + } + + ctx->rxq_idx = 0; + ctx->txq_idx = 0; + return 0; +} + +static int +netdev_nl_stats_by_netdev(struct net_device *netdev, struct sk_buff *rsp, + const struct genl_info *info) +{ + struct netdev_queue_stats_rx rx_sum, rx; + struct netdev_queue_stats_tx tx_sum, tx; + const struct netdev_stat_ops *ops; + void *hdr; + int i; + + ops = netdev->stat_ops; + /* Netdev can't guarantee any complete counters */ + if (!ops->get_base_stats) + return 0; + + memset(&rx_sum, 0xff, sizeof(rx_sum)); + memset(&tx_sum, 0xff, sizeof(tx_sum)); + + ops->get_base_stats(netdev, &rx_sum, &tx_sum); + + /* The op was there, but nothing reported, don't bother */ + if (!memchr_inv(&rx_sum, 0xff, sizeof(rx_sum)) && + !memchr_inv(&tx_sum, 0xff, sizeof(tx_sum))) + return 0; + + hdr = genlmsg_iput(rsp, info); + if (!hdr) + return -EMSGSIZE; + if (nla_put_u32(rsp, NETDEV_A_QSTATS_IFINDEX, netdev->ifindex)) + goto nla_put_failure; + + for (i = 0; i < netdev->real_num_rx_queues; i++) { + memset(&rx, 0xff, sizeof(rx)); + if (ops->get_queue_stats_rx) + ops->get_queue_stats_rx(netdev, i, &rx); + netdev_nl_stats_add(&rx_sum, &rx, sizeof(rx)); + } + for (i = 0; i < netdev->real_num_tx_queues; i++) { + memset(&tx, 0xff, sizeof(tx)); + if (ops->get_queue_stats_tx) + ops->get_queue_stats_tx(netdev, i, &tx); + netdev_nl_stats_add(&tx_sum, &tx, sizeof(tx)); + } + + if (netdev_nl_stats_write_rx(rsp, &rx_sum) || + netdev_nl_stats_write_tx(rsp, &tx_sum)) + goto nla_put_failure; + + genlmsg_end(rsp, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(rsp, hdr); + return -EMSGSIZE; +} + +int netdev_nl_qstats_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb); + const struct genl_info *info = genl_info_dump(cb); + struct net *net = sock_net(skb->sk); + struct net_device *netdev; + unsigned int scope; + int err = 0; + + scope = 0; + if (info->attrs[NETDEV_A_QSTATS_SCOPE]) + scope = nla_get_uint(info->attrs[NETDEV_A_QSTATS_SCOPE]); + + rtnl_lock(); + for_each_netdev_dump(net, netdev, ctx->ifindex) { + if (!netdev->stat_ops) + continue; + + switch (scope) { + case 0: + err = netdev_nl_stats_by_netdev(netdev, skb, info); + break; + case NETDEV_QSTATS_SCOPE_QUEUE: + err = netdev_nl_stats_by_queue(netdev, skb, info, ctx); + break; + } + if (err < 0) + break; + } + rtnl_unlock(); + + return err; +} + static int netdev_genl_netdevice_event(struct notifier_block *nb, unsigned long event, void *ptr) { diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 93cb411adf72..639ffa04c172 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -70,6 +70,10 @@ enum netdev_queue_type { NETDEV_QUEUE_TYPE_TX, }; +enum netdev_qstats_scope { + NETDEV_QSTATS_SCOPE_QUEUE = 1, +}; + enum { NETDEV_A_DEV_IFINDEX = 1, NETDEV_A_DEV_PAD, @@ -132,6 +136,20 @@ enum { NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) }; +enum { + NETDEV_A_QSTATS_IFINDEX = 1, + NETDEV_A_QSTATS_QUEUE_TYPE, + NETDEV_A_QSTATS_QUEUE_ID, + NETDEV_A_QSTATS_SCOPE, + NETDEV_A_QSTATS_RX_PACKETS = 8, + NETDEV_A_QSTATS_RX_BYTES, + NETDEV_A_QSTATS_TX_PACKETS, + NETDEV_A_QSTATS_TX_BYTES, + + __NETDEV_A_QSTATS_MAX, + NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1) +}; + enum { NETDEV_CMD_DEV_GET = 1, NETDEV_CMD_DEV_ADD_NTF, @@ -144,6 +162,7 @@ enum { NETDEV_CMD_PAGE_POOL_STATS_GET, NETDEV_CMD_QUEUE_GET, NETDEV_CMD_NAPI_GET, + NETDEV_CMD_QSTATS_GET, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) -- cgit v1.2.3 From 92f8b1f5ca0f157f564e75cef4c63641c172e0f1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 6 Mar 2024 11:55:08 -0800 Subject: netdev: add queue stat for alloc failures Rx alloc failures are commonly counted by drivers. Support reporting those via netdev-genl queue stats. Acked-by: Stanislav Fomichev Reviewed-by: Amritha Nambiar Reviewed-by: Xuan Zhuo Link: https://lore.kernel.org/r/20240306195509.1502746-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/netdev.yaml | 7 +++++++ include/net/netdev_queues.h | 2 ++ include/uapi/linux/netdev.h | 1 + net/core/netdev-genl.c | 3 ++- tools/include/uapi/linux/netdev.h | 1 + 5 files changed, 13 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index a1e48c3c84c9..76352dbd2be4 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -328,6 +328,13 @@ attribute-sets: name: tx-bytes doc: Successfully sent bytes, see `tx-packets`. type: uint + - + name: rx-alloc-fail + doc: | + Number of times skb or buffer allocation failed on the Rx datapath. + Allocation failure may, or may not result in a packet drop, depending + on driver implementation and whether system recovers quickly. + type: uint operations: list: diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index d633347eeda5..1ec408585373 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -4,9 +4,11 @@ #include +/* See the netdev.yaml spec for definition of each statistic */ struct netdev_queue_stats_rx { u64 bytes; u64 packets; + u64 alloc_fail; }; struct netdev_queue_stats_tx { diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 639ffa04c172..bb65ee840cda 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -145,6 +145,7 @@ enum { NETDEV_A_QSTATS_RX_BYTES, NETDEV_A_QSTATS_TX_PACKETS, NETDEV_A_QSTATS_TX_BYTES, + NETDEV_A_QSTATS_RX_ALLOC_FAIL, __NETDEV_A_QSTATS_MAX, NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1) diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 7fa75e13dc6d..7004b3399c2b 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -488,7 +488,8 @@ static int netdev_nl_stats_write_rx(struct sk_buff *rsp, struct netdev_queue_stats_rx *rx) { if (netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_PACKETS, rx->packets) || - netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_BYTES, rx->bytes)) + netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_BYTES, rx->bytes) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_ALLOC_FAIL, rx->alloc_fail)) return -EMSGSIZE; return 0; } diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 639ffa04c172..bb65ee840cda 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -145,6 +145,7 @@ enum { NETDEV_A_QSTATS_RX_BYTES, NETDEV_A_QSTATS_TX_PACKETS, NETDEV_A_QSTATS_TX_BYTES, + NETDEV_A_QSTATS_RX_ALLOC_FAIL, __NETDEV_A_QSTATS_MAX, NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1) -- cgit v1.2.3 From 6de3b6c75dd931b078ab21f12c598c6177fb3f75 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 8 Mar 2024 08:44:58 +0000 Subject: tools: ynl: Fix spelling mistake "Constructred" -> "Constructed" There is a spelling mistake in an error message. Fix it. Signed-off-by: Colin Ian King Acked-by: Jakub Kicinski Link: https://lore.kernel.org/r/20240308084458.2045266-1-colin.i.king@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index b9e77af5af5f..4b9c091fc86b 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -423,7 +423,7 @@ static int ynl_msg_end(struct ynl_sock *ys, struct nlmsghdr *nlh) } if (nlh->nlmsg_pid == YNL_MSG_OVERFLOW) { yerr(ys, YNL_ERROR_INPUT_TOO_BIG, - "Constructred message longer than internal buffer"); + "Constructed message longer than internal buffer"); return -EMSGSIZE; } -- cgit v1.2.3 From 900b2801bf250affe410193a0d27a2ba9f2db4e5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 7 Mar 2024 14:11:22 -0800 Subject: ynl: samples: fix recycling rate calculation Running the page-pool sample on production machines under moderate networking load shows recycling rate higher than 100%: $ page-pool eth0[2] page pools: 14 (zombies: 0) refs: 89088 bytes: 364904448 (refs: 0 bytes: 0) recycling: 100.3% (alloc: 1392:2290247724 recycle: 469289484:1828235386) Note that outstanding refs (89088) == slow alloc * cache size (1392 * 64) which means this machine is recycling page pool pages perfectly, not a single page has been released. The extra 0.3% is because sample ignores allocations from the ptr_ring. Treat those the same as alloc_fast, the ring vs cache alloc is already captured accurately enough by recycling stats. With the fix: $ page-pool eth0[2] page pools: 14 (zombies: 0) refs: 89088 bytes: 364904448 (refs: 0 bytes: 0) recycling: 100.0% (alloc: 1392:2331141604 recycle: 473625579:1857460661) Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- tools/net/ynl/samples/page-pool.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/net/ynl/samples/page-pool.c b/tools/net/ynl/samples/page-pool.c index 098b5190d0e5..332f281ee5cb 100644 --- a/tools/net/ynl/samples/page-pool.c +++ b/tools/net/ynl/samples/page-pool.c @@ -95,6 +95,8 @@ int main(int argc, char **argv) if (pp->_present.alloc_fast) s->alloc_fast += pp->alloc_fast; + if (pp->_present.alloc_refill) + s->alloc_fast += pp->alloc_refill; if (pp->_present.alloc_slow) s->alloc_slow += pp->alloc_slow; if (pp->_present.recycle_ring) -- cgit v1.2.3 From 365c2b32792e692bad6e3761ad19ac3f8f52c0fe Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 8 Mar 2024 16:51:24 -0800 Subject: selftests/bpf: Add fexit and kretprobe triggering benchmarks We already have kprobe and fentry benchmarks. Let's add kretprobe and fexit ones for completeness. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20240309005124.3004446-1-andrii@kernel.org --- tools/testing/selftests/bpf/bench.c | 4 +++ tools/testing/selftests/bpf/benchs/bench_trigger.c | 32 ++++++++++++++++++++++ tools/testing/selftests/bpf/progs/trigger_bench.c | 14 ++++++++++ 3 files changed, 50 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index 60df99b6ac72..8e02645afe31 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -495,7 +495,9 @@ extern const struct bench bench_trig_base; extern const struct bench bench_trig_tp; extern const struct bench bench_trig_rawtp; extern const struct bench bench_trig_kprobe; +extern const struct bench bench_trig_kretprobe; extern const struct bench bench_trig_fentry; +extern const struct bench bench_trig_fexit; extern const struct bench bench_trig_fentry_sleep; extern const struct bench bench_trig_fmodret; extern const struct bench bench_trig_uprobe_base; @@ -539,7 +541,9 @@ static const struct bench *benchs[] = { &bench_trig_tp, &bench_trig_rawtp, &bench_trig_kprobe, + &bench_trig_kretprobe, &bench_trig_fentry, + &bench_trig_fexit, &bench_trig_fentry_sleep, &bench_trig_fmodret, &bench_trig_uprobe_base, diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c index 064a1ef7a6fb..3f24be9cfcb9 100644 --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c @@ -85,12 +85,24 @@ static void trigger_kprobe_setup(void) attach_bpf(ctx.skel->progs.bench_trigger_kprobe); } +static void trigger_kretprobe_setup(void) +{ + setup_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_kretprobe); +} + static void trigger_fentry_setup(void) { setup_ctx(); attach_bpf(ctx.skel->progs.bench_trigger_fentry); } +static void trigger_fexit_setup(void) +{ + setup_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_fexit); +} + static void trigger_fentry_sleep_setup(void) { setup_ctx(); @@ -261,6 +273,16 @@ const struct bench bench_trig_kprobe = { .report_final = hits_drops_report_final, }; +const struct bench bench_trig_kretprobe = { + .name = "trig-kretprobe", + .validate = trigger_validate, + .setup = trigger_kretprobe_setup, + .producer_thread = trigger_producer, + .measure = trigger_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + const struct bench bench_trig_fentry = { .name = "trig-fentry", .validate = trigger_validate, @@ -271,6 +293,16 @@ const struct bench bench_trig_fentry = { .report_final = hits_drops_report_final, }; +const struct bench bench_trig_fexit = { + .name = "trig-fexit", + .validate = trigger_validate, + .setup = trigger_fexit_setup, + .producer_thread = trigger_producer, + .measure = trigger_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + const struct bench bench_trig_fentry_sleep = { .name = "trig-fentry-sleep", .validate = trigger_validate, diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c index 694e7cec1823..eb94f9c0186f 100644 --- a/tools/testing/selftests/bpf/progs/trigger_bench.c +++ b/tools/testing/selftests/bpf/progs/trigger_bench.c @@ -33,6 +33,13 @@ int bench_trigger_kprobe(void *ctx) return 0; } +SEC("kretprobe/" SYS_PREFIX "sys_getpgid") +int bench_trigger_kretprobe(void *ctx) +{ + __sync_add_and_fetch(&hits, 1); + return 0; +} + SEC("fentry/" SYS_PREFIX "sys_getpgid") int bench_trigger_fentry(void *ctx) { @@ -40,6 +47,13 @@ int bench_trigger_fentry(void *ctx) return 0; } +SEC("fexit/" SYS_PREFIX "sys_getpgid") +int bench_trigger_fexit(void *ctx) +{ + __sync_add_and_fetch(&hits, 1); + return 0; +} + SEC("fentry.s/" SYS_PREFIX "sys_getpgid") int bench_trigger_fentry_sleep(void *ctx) { -- cgit v1.2.3 From 8d0c314c30c9fe7f755d941f5d65a6e427518048 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 11 Mar 2024 22:07:27 +0800 Subject: tools: ynl-gen: support using pre-defined values in attr checks Support using pre-defined values in checks so we don't need to use hard code number for the string, binary length. e.g. we have a definition like #define TEAM_STRING_MAX_LEN 32 Which defined in yaml like: definitions: - name: string-max-len type: const value: 32 It can be used in the attribute-sets like attribute-sets: - name: attr-option name-prefix: team-attr-option- attributes: - name: name type: string checks: len: string-max-len With this patch it will be converted to [TEAM_ATTR_OPTION_NAME] = { .type = NLA_STRING, .len = TEAM_STRING_MAX_LEN, } Signed-off-by: Hangbin Liu Link: https://lore.kernel.org/r/20240311140727.109562-1-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/genetlink-c.yaml | 2 +- Documentation/netlink/genetlink-legacy.yaml | 2 +- Documentation/netlink/genetlink.yaml | 2 +- Documentation/netlink/netlink-raw.yaml | 2 +- tools/net/ynl/ynl-gen-c.py | 2 ++ 5 files changed, 6 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/Documentation/netlink/genetlink-c.yaml b/Documentation/netlink/genetlink-c.yaml index 3ebd50d78820..24692a9343f0 100644 --- a/Documentation/netlink/genetlink-c.yaml +++ b/Documentation/netlink/genetlink-c.yaml @@ -11,7 +11,7 @@ $defs: minimum: 0 len-or-define: type: [ string, integer ] - pattern: ^[0-9A-Za-z_]+( - 1)?$ + pattern: ^[0-9A-Za-z_-]+( - 1)?$ minimum: 0 len-or-limit: # literal int or limit based on fixed-width type e.g. u8-min, u16-max, etc. diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml index 1d3fe3637707..b0b7e8bab8a9 100644 --- a/Documentation/netlink/genetlink-legacy.yaml +++ b/Documentation/netlink/genetlink-legacy.yaml @@ -11,7 +11,7 @@ $defs: minimum: 0 len-or-define: type: [ string, integer ] - pattern: ^[0-9A-Za-z_]+( - 1)?$ + pattern: ^[0-9A-Za-z_-]+( - 1)?$ minimum: 0 len-or-limit: # literal int or limit based on fixed-width type e.g. u8-min, u16-max, etc. diff --git a/Documentation/netlink/genetlink.yaml b/Documentation/netlink/genetlink.yaml index 3283bf458ff1..d7edb8855563 100644 --- a/Documentation/netlink/genetlink.yaml +++ b/Documentation/netlink/genetlink.yaml @@ -11,7 +11,7 @@ $defs: minimum: 0 len-or-define: type: [ string, integer ] - pattern: ^[0-9A-Za-z_]+( - 1)?$ + pattern: ^[0-9A-Za-z_-]+( - 1)?$ minimum: 0 len-or-limit: # literal int or limit based on fixed-width type e.g. u8-min, u16-max, etc. diff --git a/Documentation/netlink/netlink-raw.yaml b/Documentation/netlink/netlink-raw.yaml index 40fc8ab1ee44..57490e5c1ddf 100644 --- a/Documentation/netlink/netlink-raw.yaml +++ b/Documentation/netlink/netlink-raw.yaml @@ -11,7 +11,7 @@ $defs: minimum: 0 len-or-define: type: [ string, integer ] - pattern: ^[0-9A-Za-z_]+( - 1)?$ + pattern: ^[0-9A-Za-z_-]+( - 1)?$ minimum: 0 # Schema for specs diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 67bfaff05154..5bb7ca01fe51 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -80,6 +80,8 @@ class Type(SpecAttr): value = self.checks.get(limit, default) if value is None: return value + elif value in self.family.consts: + return c_upper(f"{self.family['name']}-{value}") if not isinstance(value, int): value = limit_to_number(value) return value -- cgit v1.2.3 From a22b042660ca1d695cc225401c72b3fb393acd49 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 8 Mar 2024 13:59:55 +0100 Subject: selftests: forwarding: Add a test for NH group stats Add to lib.sh support for fetching NH stats, and a new library, router_mpath_nh_lib.sh, with the common code for testing NH stats. Use the latter from router_mpath_nh.sh and router_mpath_nh_res.sh. The test works by sending traffic through a NH group, and checking that the reported values correspond to what the link that ultimately receives the traffic reports having seen. Signed-off-by: Petr Machata Link: https://lore.kernel.org/r/2a424c54062a5f1efd13b9ec5b2b0e29c6af2574.1709901020.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/Makefile | 1 + tools/testing/selftests/net/forwarding/lib.sh | 34 ++++++ .../selftests/net/forwarding/router_mpath_nh.sh | 13 +++ .../net/forwarding/router_mpath_nh_lib.sh | 129 +++++++++++++++++++++ .../net/forwarding/router_mpath_nh_res.sh | 13 +++ 5 files changed, 190 insertions(+) create mode 100644 tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index cdefc9a5ec34..535865b3d1d6 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -123,6 +123,7 @@ TEST_FILES := devlink_lib.sh \ mirror_gre_topo_lib.sh \ mirror_lib.sh \ mirror_topo_lib.sh \ + router_mpath_nh_lib.sh \ sch_ets_core.sh \ sch_ets_tests.sh \ sch_tbf_core.sh \ diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index d1bf39eaf2b3..e579c2e0c462 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -900,6 +900,33 @@ hw_stats_get() jq ".[0].stats64.$dir.$stat" } +__nh_stats_get() +{ + local key=$1; shift + local group_id=$1; shift + local member_id=$1; shift + + ip -j -s -s nexthop show id $group_id | + jq --argjson member_id "$member_id" --arg key "$key" \ + '.[].group_stats[] | select(.id == $member_id) | .[$key]' +} + +nh_stats_get() +{ + local group_id=$1; shift + local member_id=$1; shift + + __nh_stats_get packets "$group_id" "$member_id" +} + +nh_stats_get_hw() +{ + local group_id=$1; shift + local member_id=$1; shift + + __nh_stats_get packets_hw "$group_id" "$member_id" +} + humanize() { local speed=$1; shift @@ -2010,3 +2037,10 @@ bail_on_lldpad() fi fi } + +absval() +{ + local v=$1; shift + + echo $((v > 0 ? v : -v)) +} diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh index 982e0d098ea9..3f0f5dc95542 100755 --- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh @@ -7,9 +7,12 @@ ALL_TESTS=" multipath_test ping_ipv4_blackhole ping_ipv6_blackhole + nh_stats_test_v4 + nh_stats_test_v6 " NUM_NETIFS=8 source lib.sh +source router_mpath_nh_lib.sh h1_create() { @@ -325,6 +328,16 @@ ping_ipv6_blackhole() ip -6 nexthop del id 1001 } +nh_stats_test_v4() +{ + __nh_stats_test_v4 mpath +} + +nh_stats_test_v6() +{ + __nh_stats_test_v6 mpath +} + setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh new file mode 100644 index 000000000000..7e7d62161c34 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh @@ -0,0 +1,129 @@ +# SPDX-License-Identifier: GPL-2.0 + +nh_stats_do_test() +{ + local what=$1; shift + local nh1_id=$1; shift + local nh2_id=$1; shift + local group_id=$1; shift + local stats_get=$1; shift + local mz="$@" + + local dp + + RET=0 + + sleep 2 + for ((dp=0; dp < 60000; dp += 10000)); do + local dd + local t0_rp12=$(link_stats_tx_packets_get $rp12) + local t0_rp13=$(link_stats_tx_packets_get $rp13) + local t0_nh1=$($stats_get $group_id $nh1_id) + local t0_nh2=$($stats_get $group_id $nh2_id) + + ip vrf exec vrf-h1 \ + $mz -q -p 64 -d 0 -t udp \ + "sp=1024,dp=$((dp))-$((dp + 10000))" + sleep 2 + + local t1_rp12=$(link_stats_tx_packets_get $rp12) + local t1_rp13=$(link_stats_tx_packets_get $rp13) + local t1_nh1=$($stats_get $group_id $nh1_id) + local t1_nh2=$($stats_get $group_id $nh2_id) + + local d_rp12=$((t1_rp12 - t0_rp12)) + local d_rp13=$((t1_rp13 - t0_rp13)) + local d_nh1=$((t1_nh1 - t0_nh1)) + local d_nh2=$((t1_nh2 - t0_nh2)) + + dd=$(absval $((d_rp12 - d_nh1))) + ((dd < 10)) + check_err $? "Discrepancy between link and $stats_get: d_rp12=$d_rp12 d_nh1=$d_nh1" + + dd=$(absval $((d_rp13 - d_nh2))) + ((dd < 10)) + check_err $? "Discrepancy between link and $stats_get: d_rp13=$d_rp13 d_nh2=$d_nh2" + done + + log_test "NH stats test $what" +} + +nh_stats_test_dispatch_swhw() +{ + local what=$1; shift + local nh1_id=$1; shift + local nh2_id=$1; shift + local group_id=$1; shift + local mz="$@" + + local used + + nh_stats_do_test "$what" "$nh1_id" "$nh2_id" "$group_id" \ + nh_stats_get "${mz[@]}" + + used=$(ip -s -j -d nexthop show id $group_id | + jq '.[].hw_stats.used') + kind=$(ip -j -d link show dev $rp11 | + jq -r '.[].linkinfo.info_kind') + if [[ $used == true ]]; then + nh_stats_do_test "HW $what" "$nh1_id" "$nh2_id" "$group_id" \ + nh_stats_get_hw "${mz[@]}" + elif [[ $kind == veth ]]; then + log_test_skip "HW stats not offloaded on veth topology" + fi +} + +nh_stats_test_dispatch() +{ + local nhgtype=$1; shift + local what=$1; shift + local nh1_id=$1; shift + local nh2_id=$1; shift + local group_id=$1; shift + local mz="$@" + + local enabled + local kind + + if ! ip nexthop help 2>&1 | grep -q hw_stats; then + log_test_skip "NH stats test: ip doesn't support HW stats" + return + fi + + ip nexthop replace id $group_id group $nh1_id/$nh2_id \ + hw_stats on type $nhgtype + enabled=$(ip -s -j -d nexthop show id $group_id | + jq '.[].hw_stats.enabled') + if [[ $enabled == true ]]; then + nh_stats_test_dispatch_swhw "$what" "$nh1_id" "$nh2_id" \ + "$group_id" "${mz[@]}" + elif [[ $enabled == false ]]; then + check_err 1 "HW stats still disabled after enabling" + log_test "NH stats test" + else + log_test_skip "NH stats test: ip doesn't report hw_stats info" + fi + + ip nexthop replace id $group_id group $nh1_id/$nh2_id \ + hw_stats off type $nhgtype +} + +__nh_stats_test_v4() +{ + local nhgtype=$1; shift + + sysctl_set net.ipv4.fib_multipath_hash_policy 1 + nh_stats_test_dispatch $nhgtype "IPv4" 101 102 103 \ + $MZ $h1 -A 192.0.2.2 -B 198.51.100.2 + sysctl_restore net.ipv4.fib_multipath_hash_policy +} + +__nh_stats_test_v6() +{ + local nhgtype=$1; shift + + sysctl_set net.ipv6.fib_multipath_hash_policy 1 + nh_stats_test_dispatch $nhgtype "IPv6" 104 105 106 \ + $MZ -6 $h1 -A 2001:db8:1::2 -B 2001:db8:2::2 + sysctl_restore net.ipv6.fib_multipath_hash_policy +} diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh index a60ff54723b7..4b483d24ad00 100755 --- a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh @@ -5,9 +5,12 @@ ALL_TESTS=" ping_ipv4 ping_ipv6 multipath_test + nh_stats_test_v4 + nh_stats_test_v6 " NUM_NETIFS=8 source lib.sh +source router_mpath_nh_lib.sh h1_create() { @@ -333,6 +336,16 @@ multipath_test() ip nexthop replace id 106 group 104,1/105,1 type resilient } +nh_stats_test_v4() +{ + __nh_stats_test_v4 resilient +} + +nh_stats_test_v6() +{ + __nh_stats_test_v6 resilient +} + setup_prepare() { h1=${NETIFS[p1]} -- cgit v1.2.3 From 6215df11b94556735192b7b488ee98611a9feb12 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 8 Mar 2024 23:10:08 +0100 Subject: selftests: mptcp: print all error messages to stdout Some error messages are printed to stderr while the others are printed to 'stdout'. As part of the unification, this patch drop "1>&2" to let all errors messages are printed to 'stdout'. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240308-upstream-net-next-20240308-selftests-mptcp-unification-v1-1-4f42c347b653@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 10 +++++----- tools/testing/selftests/net/mptcp/mptcp_sockopt.sh | 11 ++++++----- 2 files changed, 11 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 0ca2960c9099..679e366c8f6b 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -294,7 +294,7 @@ do_ping() ip netns exec ${connector_ns} ping ${ping_args} $connect_addr >/dev/null || rc=1 if [ $rc -ne 0 ] ; then - echo "$listener_ns -> $connect_addr connectivity [ FAIL ]" 1>&2 + echo "$listener_ns -> $connect_addr connectivity [ FAIL ]" ret=1 return 1 @@ -470,13 +470,13 @@ do_transfer() if [ ${stat_synrx_now_l} -lt ${expect_synrx} ]; then printf "[ FAIL ] lower MPC SYN rx (%d) than expected (%d)\n" \ - "${stat_synrx_now_l}" "${expect_synrx}" 1>&2 + "${stat_synrx_now_l}" "${expect_synrx}" retc=1 fi if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ] && [ ${stat_ooo_now} -eq 0 ]; then if [ ${stat_ooo_now} -eq 0 ]; then printf "[ FAIL ] lower MPC ACK rx (%d) than expected (%d)\n" \ - "${stat_ackrx_now_l}" "${expect_ackrx}" 1>&2 + "${stat_ackrx_now_l}" "${expect_ackrx}" rets=1 else printf "[ Note ] fallback due to TCP OoO" @@ -721,7 +721,7 @@ EOF ip -net "$listener_ns" route del local $local_addr/0 dev lo table 100 if [ $lret -ne 0 ]; then - echo "FAIL: $msg, mptcp connection error" 1>&2 + echo "FAIL: $msg, mptcp connection error" ret=$lret return 1 fi @@ -810,7 +810,7 @@ log_if_error() local msg="$1" if [ ${ret} -ne 0 ]; then - echo "FAIL: ${msg}" 1>&2 + echo "FAIL: ${msg}" final_ret=${ret} ret=0 diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index 6ed4aa32222f..7c70b52e63c6 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -103,7 +103,8 @@ check_mark() local v for v in $values; do if [ $v -ne 0 ]; then - echo "FAIL: got $tables $values in ns $ns , not 0 - not all expected packets marked" 1>&2 + echo "FAIL: got $tables $values in ns $ns," \ + "not 0 - not all expected packets marked" ret=1 return 1 fi @@ -162,7 +163,7 @@ do_transfer() local rets=$? if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then - echo " client exit code $retc, server $rets" 1>&2 + echo " client exit code $retc, server $rets" echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port" @@ -221,7 +222,7 @@ do_mptcp_sockopt_tests() lret=$? if [ $lret -ne 0 ]; then - echo "FAIL: SOL_MPTCP getsockopt" 1>&2 + echo "FAIL: SOL_MPTCP getsockopt" mptcp_lib_result_fail "sockopt v4" ret=$lret return @@ -232,7 +233,7 @@ do_mptcp_sockopt_tests() lret=$? if [ $lret -ne 0 ]; then - echo "FAIL: SOL_MPTCP getsockopt (ipv6)" 1>&2 + echo "FAIL: SOL_MPTCP getsockopt (ipv6)" mptcp_lib_result_fail "sockopt v6" ret=$lret return @@ -263,7 +264,7 @@ do_tcpinq_test() local lret=$? if [ $lret -ne 0 ];then ret=$lret - echo "FAIL: mptcp_inq $*" 1>&2 + echo "FAIL: mptcp_inq $*" mptcp_lib_result_fail "TCP_INQ: $*" return $lret fi -- cgit v1.2.3 From 01ed9838107f7d52a319fbe2267d89a83a4273b5 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 8 Mar 2024 23:10:09 +0100 Subject: selftests: mptcp: connect: add dedicated port counter This patch adds a new dedicated counter 'port' instead of TEST_COUNT to increase port numbers in mptcp_connect.sh. This can avoid outputting discontinuous test counters. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240308-upstream-net-next-20240308-selftests-mptcp-unification-v1-2-4f42c347b653@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 679e366c8f6b..ab3f52434753 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -33,6 +33,7 @@ do_tcp=0 checksum=false filesize=0 connect_per_transfer=1 +port=$((10000 - 1)) if [ $tc_loss -eq 100 ];then tc_loss=1% @@ -313,8 +314,7 @@ do_transfer() local local_addr="$6" local extra_args="$7" - local port - port=$((10000+TEST_COUNT)) + port=$((port + 1)) TEST_COUNT=$((TEST_COUNT+1)) if [ "$rcvbuf" -gt 0 ]; then @@ -710,7 +710,7 @@ EOF echo "INFO: test $msg" - TEST_COUNT=10000 + port=$((20000 - 1)) local extra_args="-o TRANSPARENT" do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP \ ${connect_addr} ${local_addr} "${extra_args}" -- cgit v1.2.3 From c9161a0f8ff96f4e1980e2e24a222cd65143edf5 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 8 Mar 2024 23:10:10 +0100 Subject: selftests: mptcp: connect: fix misaligned output The first [ OK ] in the output of mptcp_connect.sh misaligns with the others: New MPTCP socket can be blocked via sysctl [ OK ] INFO: validating network environment with pings INFO: Using loss of 0.85% delay 16 ms reorder 95% 70% with delay 4ms on ns1 MPTCP -> ns1 (10.0.1.1:10000 ) MPTCP (duration 184ms) [ OK ] ns1 MPTCP -> ns1 (10.0.1.1:10001 ) TCP (duration 50ms) [ OK ] ns1 TCP -> ns1 (10.0.1.1:10002 ) MPTCP (duration 55ms) [ OK ] This patch aligns them by using 69 chars to display the first two lines, and 50 chars for the other. Since 19 chars are used to display duration time. Also print out a [ OK ] at the end of the 2nd line for consistency. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240308-upstream-net-next-20240308-selftests-mptcp-unification-v1-3-4f42c347b653@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index ab3f52434753..ce9342b241cd 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -248,11 +248,17 @@ else set_ethtool_flags "$ns4" ns4eth3 "$ethtool_args" fi +print_larger_title() { + # here we don't have the time, a bit longer for the alignment + printf "%-69s" "${@}" +} + check_mptcp_disabled() { local disabled_ns mptcp_lib_ns_init disabled_ns + print_larger_title "New MPTCP socket can be blocked via sysctl" # net.mptcp.enabled should be enabled by default if [ "$(ip netns exec ${disabled_ns} sysctl net.mptcp.enabled | awk '{ print $3 }')" -ne 1 ]; then echo -e "net.mptcp.enabled sysctl is not 1 by default\t\t[ FAIL ]" @@ -274,7 +280,7 @@ check_mptcp_disabled() return 1 fi - echo -e "New MPTCP socket can be blocked via sysctl\t\t[ OK ]" + echo "[ OK ]" mptcp_lib_result_pass "New MPTCP socket can be blocked via sysctl" return 0 } @@ -342,7 +348,7 @@ do_transfer() addr_port=$(printf "%s:%d" ${connect_addr} ${port}) local result_msg result_msg="$(printf "%.3s %-5s -> %.3s (%-20s) %-5s" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto})" - printf "%s\t" "${result_msg}" + printf "%-50s" "${result_msg}" if $capture; then local capuser @@ -835,7 +841,7 @@ check_mptcp_disabled stop_if_error "The kernel configuration is not valid for MPTCP" -echo "INFO: validating network environment with pings" +print_larger_title "Validating network environment with pings" for sender in "$ns1" "$ns2" "$ns3" "$ns4";do do_ping "$ns1" $sender 10.0.1.1 do_ping "$ns1" $sender dead:beef:1::1 @@ -857,6 +863,7 @@ done mptcp_lib_result_code "${ret}" "ping tests" stop_if_error "Could not even run ping tests" +echo "[ OK ]" [ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss delay ${tc_delay}ms echo -n "INFO: Using loss of $tc_loss " -- cgit v1.2.3 From fd959262c1bb09eac288445ff90696087e0d063f Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 8 Mar 2024 23:10:11 +0100 Subject: selftests: mptcp: sockopt: print every test result Only total test results are printed out in mptcp_sockopt.sh: PASS: all packets had packet mark set PASS: SOL_MPTCP getsockopt has expected information PASS: TCP_INQ cmsg/ioctl -t tcp PASS: TCP_INQ cmsg/ioctl -6 -t tcp PASS: TCP_INQ cmsg/ioctl -r tcp PASS: TCP_INQ cmsg/ioctl -6 -r tcp PASS: TCP_INQ cmsg/ioctl -r tcp -t tcp They mismatch with the test results: ok 1 - mptcp_sockopt: mark ipv4 ok 2 - mptcp_sockopt: transfer ipv4 ok 3 - mptcp_sockopt: mark ipv6 ok 4 - mptcp_sockopt: transfer ipv6 ok 5 - mptcp_sockopt: sockopt v4 ok 6 - mptcp_sockopt: sockopt v6 ok 7 - mptcp_sockopt: TCP_INQ: -t tcp ok 8 - mptcp_sockopt: TCP_INQ: -6 -t tcp ok 9 - mptcp_sockopt: TCP_INQ: -r tcp ok 10 - mptcp_sockopt: TCP_INQ: -6 -r tcp ok 11 - mptcp_sockopt: TCP_INQ: -r tcp -t tcp 'mptcp_sockopt.sh' now display more detailed results + why (what you had in a former patch from v6, merged here). It no longer displays 'PASS:', because it is duplicated info now that the detailed are displayed: Transfer v4 [ OK ] Mark v4 [ OK ] Transfer v6 [ OK ] Mark v6 [ OK ] SOL_MPTCP sockopt v4 [ OK ] SOL_MPTCP sockopt v6 [ OK ] TCP_INQ cmsg/ioctl -t tcp [ OK ] TCP_INQ cmsg/ioctl -6 -t tcp [ OK ] TCP_INQ cmsg/ioctl -r tcp [ OK ] TCP_INQ cmsg/ioctl -6 -r tcp [ OK ] TCP_INQ cmsg/ioctl -r tcp -t tcp [ OK ] Also fix the TAP output: ok 1 - mptcp_sockopt: transfer ipv4 ok 2 - mptcp_sockopt: mark ipv4 ok 3 - mptcp_sockopt: transfer ipv6 ok 4 - mptcp_sockopt: mark ipv6 ok 5 - mptcp_sockopt: sockopt v4 ok 6 - mptcp_sockopt: sockopt v6 ok 7 - mptcp_sockopt: TCP_INQ: -t tcp ok 8 - mptcp_sockopt: TCP_INQ: -6 -t tcp ok 9 - mptcp_sockopt: TCP_INQ: -r tcp ok 10 - mptcp_sockopt: TCP_INQ: -6 -r tcp ok 11 - mptcp_sockopt: TCP_INQ: -r tcp -t tcp Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240308-upstream-net-next-20240308-selftests-mptcp-unification-v1-4-4f42c347b653@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_sockopt.sh | 42 +++++++++++++--------- 1 file changed, 25 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index 7c70b52e63c6..17b36c1312f4 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -113,6 +113,11 @@ check_mark() return 0 } +print_title() +{ + printf "%-50s" "${@}" +} + do_transfer() { local listener_ns="$1" @@ -162,8 +167,9 @@ do_transfer() wait $spid local rets=$? + print_title "Transfer ${ip:2}" if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then - echo " client exit code $retc, server $rets" + echo "[FAIL] client exit code $retc, server $rets" echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port" @@ -175,7 +181,14 @@ do_transfer() ret=1 return 1 fi + if ! mptcp_lib_check_transfer $cin $sout "file received by server"; then + rets=1 + else + echo "[ OK ]" + fi + mptcp_lib_result_code "${rets}" "transfer ${ip}" + print_title "Mark ${ip:2}" if [ $local_addr = "::" ];then check_mark $listener_ns 6 || retc=1 check_mark $connector_ns 6 || retc=1 @@ -184,15 +197,13 @@ do_transfer() check_mark $connector_ns 4 || retc=1 fi - mptcp_lib_check_transfer $cin $sout "file received by server" - rets=$? - mptcp_lib_result_code "${retc}" "mark ${ip}" - mptcp_lib_result_code "${rets}" "transfer ${ip}" if [ $retc -eq 0 ] && [ $rets -eq 0 ];then + echo "[ OK ]" return 0 fi + echo "[FAIL]" return 1 } @@ -221,23 +232,27 @@ do_mptcp_sockopt_tests() ip netns exec "$ns_sbox" ./mptcp_sockopt lret=$? + print_title "SOL_MPTCP sockopt v4" if [ $lret -ne 0 ]; then - echo "FAIL: SOL_MPTCP getsockopt" + echo "[FAIL]" mptcp_lib_result_fail "sockopt v4" ret=$lret return fi + echo "[ OK ]" mptcp_lib_result_pass "sockopt v4" ip netns exec "$ns_sbox" ./mptcp_sockopt -6 lret=$? + print_title "SOL_MPTCP sockopt v6" if [ $lret -ne 0 ]; then - echo "FAIL: SOL_MPTCP getsockopt (ipv6)" + echo "[FAIL]" mptcp_lib_result_fail "sockopt v6" ret=$lret return fi + echo "[ OK ]" mptcp_lib_result_pass "sockopt v6" } @@ -260,16 +275,17 @@ run_tests() do_tcpinq_test() { + print_title "TCP_INQ cmsg/ioctl $*" ip netns exec "$ns_sbox" ./mptcp_inq "$@" local lret=$? if [ $lret -ne 0 ];then ret=$lret - echo "FAIL: mptcp_inq $*" + echo "[FAIL]" mptcp_lib_result_fail "TCP_INQ: $*" return $lret fi - echo "PASS: TCP_INQ cmsg/ioctl $*" + echo "[ OK ]" mptcp_lib_result_pass "TCP_INQ: $*" return $lret } @@ -315,15 +331,7 @@ trap cleanup EXIT run_tests $ns1 $ns2 10.0.1.1 run_tests $ns1 $ns2 dead:beef:1::1 -if [ $ret -eq 0 ];then - echo "PASS: all packets had packet mark set" -fi - do_mptcp_sockopt_tests -if [ $ret -eq 0 ];then - echo "PASS: SOL_MPTCP getsockopt has expected information" -fi - do_tcpinq_tests mptcp_lib_result_print_all_tap -- cgit v1.2.3 From 9e6a39ecb9a11603c1c28e5463008f2e0af56085 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 8 Mar 2024 23:10:12 +0100 Subject: selftests: mptcp: export TEST_COUNTER variable Variable TEST_COUNT are used in mptcp_connect.sh and mptcp_join.sh as test counters, which are initialized to 0, while variable test_cnt are used in diag.sh and simult_flows.sh, which are initialized to 1. To maintain consistency, this patch renames them all as MPTCP_LIB_TEST_COUNTER, initializes it to 1, and exports it into mptcp_lib.sh. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240308-upstream-net-next-20240308-selftests-mptcp-unification-v1-5-4f42c347b653@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/diag.sh | 5 ++--- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 3 +-- tools/testing/selftests/net/mptcp/mptcp_join.sh | 15 +++++++-------- tools/testing/selftests/net/mptcp/mptcp_lib.sh | 2 ++ tools/testing/selftests/net/mptcp/simult_flows.sh | 5 ++--- 5 files changed, 14 insertions(+), 16 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index afe862895946..b63510ce2327 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -9,7 +9,6 @@ . "$(dirname "${0}")/mptcp_lib.sh" ns="" -test_cnt=1 timeout_poll=30 timeout_test=$((timeout_poll * 2 + 1)) ret=0 @@ -69,7 +68,7 @@ __chk_nr() echo "[ ok ]" mptcp_lib_result_pass "${msg}" fi - test_cnt=$((test_cnt+1)) + MPTCP_LIB_TEST_COUNTER=$((MPTCP_LIB_TEST_COUNTER+1)) } __chk_msk_nr() @@ -127,7 +126,7 @@ wait_msk_nr() echo "[ ok ]" mptcp_lib_result_pass "${msg}" fi - test_cnt=$((test_cnt+1)) + MPTCP_LIB_TEST_COUNTER=$((MPTCP_LIB_TEST_COUNTER+1)) } chk_msk_fallback_nr() diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index ce9342b241cd..915faee77e25 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -131,7 +131,6 @@ ns2="" ns3="" ns4="" -TEST_COUNT=0 TEST_GROUP="" # This function is used in the cleanup trap @@ -321,7 +320,7 @@ do_transfer() local extra_args="$7" port=$((port + 1)) - TEST_COUNT=$((TEST_COUNT+1)) + MPTCP_LIB_TEST_COUNTER=$((MPTCP_LIB_TEST_COUNTER+1)) if [ "$rcvbuf" -gt 0 ]; then extra_args="$extra_args -R $rcvbuf" diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 1df2d24979a0..4ef0d5ae9dae 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -48,7 +48,6 @@ declare -A all_tests declare -a only_tests_ids declare -a only_tests_names declare -A failed_tests -TEST_COUNT=0 TEST_NAME="" nr_blank=6 @@ -172,7 +171,7 @@ cleanup() print_title() { - printf "%03u %s\n" "${TEST_COUNT}" "${TEST_NAME}" + printf "%03u %s\n" "${MPTCP_LIB_TEST_COUNTER}" "${TEST_NAME}" } print_check() @@ -233,7 +232,7 @@ skip_test() local i for i in "${only_tests_ids[@]}"; do - if [ "${TEST_COUNT}" -eq "${i}" ]; then + if [ "${MPTCP_LIB_TEST_COUNTER}" -eq "${i}" ]; then return 1 fi done @@ -268,7 +267,7 @@ reset() TEST_NAME="${1}" - TEST_COUNT=$((TEST_COUNT+1)) + MPTCP_LIB_TEST_COUNTER=$((MPTCP_LIB_TEST_COUNTER+1)) if skip_test; then last_test_ignored=1 @@ -462,7 +461,7 @@ fail_test() # just in case a test is marked twice as failed if [ ${last_test_failed} -eq 0 ]; then - failed_tests[${TEST_COUNT}]="${TEST_NAME}" + failed_tests[${MPTCP_LIB_TEST_COUNTER}]="${TEST_NAME}" dump_stats last_test_failed=1 fi @@ -973,7 +972,7 @@ do_transfer() local srv_proto="$4" local connect_addr="$5" - local port=$((10000 + TEST_COUNT - 1)) + local port=$((10000 + MPTCP_LIB_TEST_COUNTER - 1)) local cappid local FAILING_LINKS=${FAILING_LINKS:-""} local fastclose=${fastclose:-""} @@ -991,9 +990,9 @@ do_transfer() capuser="-Z $SUDO_USER" fi - capfile=$(printf "mp_join-%02u-%s.pcap" "$TEST_COUNT" "${listener_ns}") + capfile=$(printf "mp_join-%02u-%s.pcap" "$MPTCP_LIB_TEST_COUNTER" "${listener_ns}") - echo "Capturing traffic for test $TEST_COUNT into $capfile" + echo "Capturing traffic for test $MPTCP_LIB_TEST_COUNTER into $capfile" ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 & cappid=$! diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 5d071b6eb780..63abf4431993 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -10,6 +10,8 @@ readonly KSFT_TEST="${MPTCP_LIB_KSFT_TEST:-$(basename "${0}" .sh)}" MPTCP_LIB_SUBTESTS=() MPTCP_LIB_SUBTESTS_DUPLICATED=0 +# shellcheck disable=SC2034 # unused at this moment +MPTCP_LIB_TEST_COUNTER=0 # only if supported (or forced) and not disabled, see no-color.org if { [ -t 1 ] || [ "${SELFTESTS_MPTCP_LIB_COLOR_FORCE:-}" = "1" ]; } && diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 365fb3d6ef55..3ad663e38c42 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -14,7 +14,6 @@ ns3="" capture=false timeout_poll=30 timeout_test=$((timeout_poll * 2 + 1)) -test_cnt=1 ret=0 bail=0 slack=50 @@ -126,8 +125,8 @@ do_transfer() local sin=$2 local max_time=$3 local port - port=$((10000+test_cnt)) - test_cnt=$((test_cnt+1)) + port=$((10000+MPTCP_LIB_TEST_COUNTER)) + MPTCP_LIB_TEST_COUNTER=$((MPTCP_LIB_TEST_COUNTER+1)) :> "$cout" :> "$sout" -- cgit v1.2.3 From 3382bb09701bb73c6ed97dbc690cf0b3384c8b79 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 8 Mar 2024 23:10:13 +0100 Subject: selftests: mptcp: add print_title in mptcp_lib This patch adds a new variable MPTCP_LIB_TEST_FORMAT as the test title printing format. Also add a helper mptcp_lib_print_title() to use this format to print the test title with test counters. They are used in mptcp_join.sh first. Each MPTCP selftest is having subtests, and it helps to give them a number to quickly identify them. This can be managed by mptcp_lib.sh, reusing what has been done here. The following commit will use these new helpers in the other tests. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240308-upstream-net-next-20240308-selftests-mptcp-unification-v1-6-4f42c347b653@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 13 ++++--------- tools/testing/selftests/net/mptcp/mptcp_lib.sh | 10 +++++++++- 2 files changed, 13 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 4ef0d5ae9dae..2f34e2b9a1c4 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -48,6 +48,7 @@ declare -A all_tests declare -a only_tests_ids declare -a only_tests_names declare -A failed_tests +MPTCP_LIB_TEST_FORMAT="%03u %s\n" TEST_NAME="" nr_blank=6 @@ -169,11 +170,6 @@ cleanup() cleanup_partial } -print_title() -{ - printf "%03u %s\n" "${MPTCP_LIB_TEST_COUNTER}" "${TEST_NAME}" -} - print_check() { printf "%-${nr_blank}s%-36s" " " "${*}" @@ -232,7 +228,7 @@ skip_test() local i for i in "${only_tests_ids[@]}"; do - if [ "${MPTCP_LIB_TEST_COUNTER}" -eq "${i}" ]; then + if [ "$((MPTCP_LIB_TEST_COUNTER+1))" -eq "${i}" ]; then return 1 fi done @@ -267,14 +263,13 @@ reset() TEST_NAME="${1}" - MPTCP_LIB_TEST_COUNTER=$((MPTCP_LIB_TEST_COUNTER+1)) - if skip_test; then + MPTCP_LIB_TEST_COUNTER=$((MPTCP_LIB_TEST_COUNTER+1)) last_test_ignored=1 return 1 fi - print_title + mptcp_lib_print_title "${TEST_NAME}" if [ "${init}" != "1" ]; then init diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 63abf4431993..eb740a2f7898 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -10,8 +10,8 @@ readonly KSFT_TEST="${MPTCP_LIB_KSFT_TEST:-$(basename "${0}" .sh)}" MPTCP_LIB_SUBTESTS=() MPTCP_LIB_SUBTESTS_DUPLICATED=0 -# shellcheck disable=SC2034 # unused at this moment MPTCP_LIB_TEST_COUNTER=0 +MPTCP_LIB_TEST_FORMAT="%02u %-50s" # only if supported (or forced) and not disabled, see no-color.org if { [ -t 1 ] || [ "${SELFTESTS_MPTCP_LIB_COLOR_FORCE:-}" = "1" ]; } && @@ -412,3 +412,11 @@ mptcp_lib_events() { ip netns exec "${ns}" ./pm_nl_ctl events >> "${evts}" 2>&1 & pid=$! } + +mptcp_lib_print_title() { + : "${MPTCP_LIB_TEST_COUNTER:?}" + : "${MPTCP_LIB_TEST_FORMAT:?}" + + # shellcheck disable=SC2059 # the format is in a variable + printf "${MPTCP_LIB_TEST_FORMAT}" "$((++MPTCP_LIB_TEST_COUNTER))" "${*}" +} -- cgit v1.2.3 From aa7694766f1458a158944b18d3c7b4fbaed08510 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 8 Mar 2024 23:10:14 +0100 Subject: selftests: mptcp: print test results with counters This patch adds a new helper mptcp_lib_print_title(), a wrapper of mptcp_lib_inc_test_counter() and mptcp_lib_pr_title_counter(), to print out test counter in each test result and increase the counter. Use this helper to print out test counters for every tests in diag.sh, mptcp_connect.sh, mptcp_sockopt.sh, pm_netlink.sh, simult_flows.sh, and userspace_pm.sh. diag.sh: 01 no msk on netns creation [ ok ] 02 listen match for dport 10000 [ ok ] 03 listen match for sport 10000 [ ok ] 04 listen match for saddr and sport [ ok ] 05 all listen sockets [ ok ] mptcp_connect.sh: 01 New MPTCP socket can be blocked via sysctl [ OK ] 02 Validating network environment with pings [ OK ] INFO: Using loss of 0.85% delay 31 ms reorder .. with delay 7ms on ns3eth4 03 ns1 MPTCP -> ns1 (10.0.1.1:10000 ) MPTCP (duration 69ms) [ OK ] 04 ns1 MPTCP -> ns1 (10.0.1.1:10001 ) TCP (duration 20ms) [ OK ] 05 ns1 TCP -> ns1 (10.0.1.1:10002 ) MPTCP (duration 16ms) [ OK ] mptcp_sockopt.sh: 01 Transfer v4 [ OK ] 02 Mark v4 [ OK ] 03 Transfer v6 [ OK ] 04 Mark v6 [ OK ] 05 SOL_MPTCP sockopt v4 [ OK ] pm_netlink.sh: 01 defaults addr list [ OK ] 02 simple add/get addr [ OK ] 03 dump addrs [ OK ] 04 simple del addr [ OK ] 05 dump addrs after del [ OK ] simult_flows.sh: 01 balanced bwidth 7391 max 8456 [ OK ] 02 balanced bwidth - reverse direction 7403 max 8456 [ OK ] 03 balanced bwidth with unbalanced delay 7429 max 8456 [ OK ] 04 balanced bwidth with unbalanced delay - reverse ... 7485 max 8456 [ OK ] 05 unbalanced bwidth 7549 max 8456 [ OK ] userspace_pm.sh: 01 Created network namespaces ns1, ns2 [ OK ] INFO: Make connections 02 Established IPv4 MPTCP Connection ns2 => ns1 [ OK ] 03 Established IPv6 MPTCP Connection ns2 => ns1 [ OK ] INFO: Announce tests 04 ADD_ADDR 10.0.2.2 (ns2) => ns1, invalid token [ OK ] 05 ADD_ADDR id:67 10.0.2.2 (ns2) => ns1, reuse port [ OK ] Having test counters helps to quickly identify issues when looking at a long list of output logs and results. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240308-upstream-net-next-20240308-selftests-mptcp-unification-v1-7-4f42c347b653@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/diag.sh | 6 ++---- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 6 +++--- tools/testing/selftests/net/mptcp/mptcp_sockopt.sh | 2 +- tools/testing/selftests/net/mptcp/pm_netlink.sh | 5 +++-- tools/testing/selftests/net/mptcp/simult_flows.sh | 7 ++++--- tools/testing/selftests/net/mptcp/userspace_pm.sh | 4 +++- 6 files changed, 16 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index b63510ce2327..4ffdd415e670 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -54,7 +54,7 @@ __chk_nr() nr=$(eval $command) - printf "%-50s" "$msg" + mptcp_lib_print_title "$msg" if [ "$nr" != "$expected" ]; then if [ "$nr" = "$skip" ] && ! mptcp_lib_expect_all_features; then echo "[ skip ] Feature probably not supported" @@ -68,7 +68,6 @@ __chk_nr() echo "[ ok ]" mptcp_lib_result_pass "${msg}" fi - MPTCP_LIB_TEST_COUNTER=$((MPTCP_LIB_TEST_COUNTER+1)) } __chk_msk_nr() @@ -113,7 +112,7 @@ wait_msk_nr() sleep 1 done - printf "%-50s" "$msg" + mptcp_lib_print_title "$msg" if [ $i -ge $timeout ]; then echo "[ fail ] timeout while expecting $expected max $max last $nr" mptcp_lib_result_fail "${msg} # timeout" @@ -126,7 +125,6 @@ wait_msk_nr() echo "[ ok ]" mptcp_lib_result_pass "${msg}" fi - MPTCP_LIB_TEST_COUNTER=$((MPTCP_LIB_TEST_COUNTER+1)) } chk_msk_fallback_nr() diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 915faee77e25..bbda37d6514f 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -249,7 +249,8 @@ fi print_larger_title() { # here we don't have the time, a bit longer for the alignment - printf "%-69s" "${@}" + MPTCP_LIB_TEST_FORMAT="%02u %-69s" \ + mptcp_lib_print_title "${@}" } check_mptcp_disabled() @@ -320,7 +321,6 @@ do_transfer() local extra_args="$7" port=$((port + 1)) - MPTCP_LIB_TEST_COUNTER=$((MPTCP_LIB_TEST_COUNTER+1)) if [ "$rcvbuf" -gt 0 ]; then extra_args="$extra_args -R $rcvbuf" @@ -347,7 +347,7 @@ do_transfer() addr_port=$(printf "%s:%d" ${connect_addr} ${port}) local result_msg result_msg="$(printf "%.3s %-5s -> %.3s (%-20s) %-5s" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto})" - printf "%-50s" "${result_msg}" + mptcp_lib_print_title "${result_msg}" if $capture; then local capuser diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index 17b36c1312f4..19d0ac31b6a9 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -115,7 +115,7 @@ check_mark() print_title() { - printf "%-50s" "${@}" + mptcp_lib_print_title "${@}" } do_transfer() diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 427fc5c70b3c..5b9bc25dfef4 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -53,7 +53,7 @@ check() local msg="$3" local rc=0 - printf "%-50s" "$msg" + mptcp_lib_print_title "$msg" mptcp_lib_check_output "${err}" "${cmd}" "${expected}" || rc=${?} if [ ${rc} -eq 2 ]; then mptcp_lib_result_fail "${msg} # error ${rc}" @@ -189,7 +189,8 @@ subflow,backup,fullmesh 10.0.1.1" " (backup,fullmesh)" else for st in fullmesh nofullmesh backup,fullmesh; do st=" (${st})" - printf "%-50s%s\n" "${st}" "[SKIP]" + mptcp_lib_print_title "${st}" + echo "[SKIP]" mptcp_lib_result_skip "${st}" done fi diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 3ad663e38c42..b7549d9364d6 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -14,6 +14,8 @@ ns3="" capture=false timeout_poll=30 timeout_test=$((timeout_poll * 2 + 1)) +# a bit more space: because we have more to display +MPTCP_LIB_TEST_FORMAT="%02u %-60s" ret=0 bail=0 slack=50 @@ -126,7 +128,6 @@ do_transfer() local max_time=$3 local port port=$((10000+MPTCP_LIB_TEST_COUNTER)) - MPTCP_LIB_TEST_COUNTER=$((MPTCP_LIB_TEST_COUNTER+1)) :> "$cout" :> "$sout" @@ -238,7 +239,7 @@ run_test() # completion (see mptcp_connect): 200ms on each side, add some slack time=$((time + 400 + slack)) - printf "%-60s" "$msg" + mptcp_lib_print_title "$msg" do_transfer $small $large $time lret=$? mptcp_lib_result_code "${lret}" "${msg}" @@ -248,7 +249,7 @@ run_test() fi msg+=" - reverse direction" - printf "%-60s" "${msg}" + mptcp_lib_print_title "${msg}" do_transfer $large $small $time lret=$? mptcp_lib_result_code "${lret}" "${msg}" diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index b0cce8f065d8..ca238592baee 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -54,6 +54,8 @@ ns1="" ns2="" ret=0 test_name="" +# a bit more space: because we have more to display +MPTCP_LIB_TEST_FORMAT="%02u %-68s" _printf() { stdbuf -o0 -e0 printf "${@}" @@ -69,7 +71,7 @@ print_test() { test_name="${1}" - _printf "%-68s" "${test_name}" + mptcp_lib_print_title "${test_name}" } print_results() -- cgit v1.2.3 From e7c42bf4d320affe37337aa83ae0347832b3f568 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 8 Mar 2024 23:10:15 +0100 Subject: selftests: mptcp: use += operator to append strings This patch uses addition assignment operator (+=) to append strings instead of duplicating the variable name in mptcp_connect.sh and mptcp_join.sh. This can make the statements shorter. Note: in mptcp_connect.sh, add a local variable extra in do_transfer to save the various extra warning logs, using += to append it. And add a new variable tc_info to save various tc info, also using += to append it. This can make the code more readable and prepare for the next commit. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240308-upstream-net-next-20240308-selftests-mptcp-unification-v1-8-4f42c347b653@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 53 ++++++++++++---------- tools/testing/selftests/net/mptcp/mptcp_join.sh | 30 ++++++------ 2 files changed, 43 insertions(+), 40 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index bbda37d6514f..78098546ddd3 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -323,15 +323,15 @@ do_transfer() port=$((port + 1)) if [ "$rcvbuf" -gt 0 ]; then - extra_args="$extra_args -R $rcvbuf" + extra_args+=" -R $rcvbuf" fi if [ "$sndbuf" -gt 0 ]; then - extra_args="$extra_args -S $sndbuf" + extra_args+=" -S $sndbuf" fi if [ -n "$testmode" ]; then - extra_args="$extra_args -m $testmode" + extra_args+=" -m $testmode" fi if [ -n "$extra_args" ] && $options_log; then @@ -451,6 +451,7 @@ do_transfer() mptcp_lib_check_transfer $cin $sout "file received by server" rets=$? + local extra="" local stat_synrx_now_l local stat_ackrx_now_l local stat_cookietx_now @@ -484,7 +485,7 @@ do_transfer() "${stat_ackrx_now_l}" "${expect_ackrx}" rets=1 else - printf "[ Note ] fallback due to TCP OoO" + extra+=" [ Note ] fallback due to TCP OoO" fi fi @@ -507,39 +508,41 @@ do_transfer() fi fi - if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then - printf "[ OK ]" - mptcp_lib_result_pass "${TEST_GROUP}: ${result_msg}" - else - mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}" - fi - if [ $cookies -eq 2 ];then if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then - printf " WARN: CookieSent: did not advance" + extra+=" WARN: CookieSent: did not advance" fi if [ $stat_cookierx_last -ge $stat_cookierx_now ] ;then - printf " WARN: CookieRecv: did not advance" + extra+=" WARN: CookieRecv: did not advance" fi else if [ $stat_cookietx_last -ne $stat_cookietx_now ] ;then - printf " WARN: CookieSent: changed" + extra+=" WARN: CookieSent: changed" fi if [ $stat_cookierx_last -ne $stat_cookierx_now ] ;then - printf " WARN: CookieRecv: changed" + extra+=" WARN: CookieRecv: changed" fi fi if [ ${stat_synrx_now_l} -gt ${expect_synrx} ]; then - printf " WARN: SYNRX: expect %d, got %d (probably retransmissions)" \ - "${expect_synrx}" "${stat_synrx_now_l}" + extra+=" WARN: SYNRX: expect ${expect_synrx}," + extra+=" got ${stat_synrx_now_l} (probably retransmissions)" fi if [ ${stat_ackrx_now_l} -gt ${expect_ackrx} ]; then - printf " WARN: ACKRX: expect %d, got %d (probably retransmissions)" \ - "${expect_ackrx}" "${stat_ackrx_now_l}" + extra+=" WARN: ACKRX: expect ${expect_ackrx}," + extra+=" got ${stat_ackrx_now_l} (probably retransmissions)" + fi + + if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then + printf "[ OK ]%s\n" "${extra:1}" + mptcp_lib_result_pass "${TEST_GROUP}: ${result_msg}" + else + if [ -n "${extra}" ]; then + printf "%s\n" "${extra:1}" + fi + mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}" fi - echo cat "$capout" [ $retc -eq 0 ] && [ $rets -eq 0 ] } @@ -865,8 +868,8 @@ stop_if_error "Could not even run ping tests" echo "[ OK ]" [ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss delay ${tc_delay}ms -echo -n "INFO: Using loss of $tc_loss " -test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms " +tc_info="loss of $tc_loss " +test "$tc_delay" -gt 0 && tc_info+="delay $tc_delay ms " reorder_delay=$((tc_delay / 4)) @@ -877,17 +880,17 @@ if [ -z "${tc_reorder}" ]; then if [ $reorder_delay -gt 0 ] && [ $reorder1 -lt 100 ] && [ $reorder2 -gt 0 ]; then tc_reorder="reorder ${reorder1}% ${reorder2}%" - echo -n "$tc_reorder with delay ${reorder_delay}ms " + tc_info+="$tc_reorder with delay ${reorder_delay}ms " fi elif [ "$tc_reorder" = "0" ];then tc_reorder="" elif [ "$reorder_delay" -gt 0 ];then # reordering requires some delay tc_reorder="reorder $tc_reorder" - echo -n "$tc_reorder with delay ${reorder_delay}ms " + tc_info+="$tc_reorder with delay ${reorder_delay}ms " fi -echo "on ns3eth4" +echo "INFO: Using ${tc_info}on ns3eth4" tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${reorder_delay}ms $tc_reorder diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 2f34e2b9a1c4..d2969eec36d5 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -754,18 +754,18 @@ pm_nl_check_endpoint() line="${line% }" # the dump order is: address id flags port dev [ -n "$addr" ] && expected_line="$addr" - expected_line="$expected_line $id" - [ -n "$_flags" ] && expected_line="$expected_line ${_flags//","/" "}" - [ -n "$dev" ] && expected_line="$expected_line $dev" - [ -n "$port" ] && expected_line="$expected_line $port" + expected_line+=" $id" + [ -n "$_flags" ] && expected_line+=" ${_flags//","/" "}" + [ -n "$dev" ] && expected_line+=" $dev" + [ -n "$port" ] && expected_line+=" $port" else line=$(ip netns exec $ns ./pm_nl_ctl get $_id) # the dump order is: id flags dev address port expected_line="$id" - [ -n "$flags" ] && expected_line="$expected_line $flags" - [ -n "$dev" ] && expected_line="$expected_line $dev" - [ -n "$addr" ] && expected_line="$expected_line $addr" - [ -n "$_port" ] && expected_line="$expected_line $_port" + [ -n "$flags" ] && expected_line+=" $flags" + [ -n "$dev" ] && expected_line+=" $dev" + [ -n "$addr" ] && expected_line+=" $addr" + [ -n "$_port" ] && expected_line+=" $_port" fi if [ "$line" = "$expected_line" ]; then print_ok @@ -1216,7 +1216,7 @@ chk_csum_nr() print_check "sum" count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtDataCsumErr") if [ "$count" != "$csum_ns1" ]; then - extra_msg="$extra_msg ns1=$count" + extra_msg+=" ns1=$count" fi if [ -z "$count" ]; then print_skip @@ -1229,7 +1229,7 @@ chk_csum_nr() print_check "csum" count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtDataCsumErr") if [ "$count" != "$csum_ns2" ]; then - extra_msg="$extra_msg ns2=$count" + extra_msg+=" ns2=$count" fi if [ -z "$count" ]; then print_skip @@ -1273,7 +1273,7 @@ chk_fail_nr() print_check "ftx" count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFailTx") if [ "$count" != "$fail_tx" ]; then - extra_msg="$extra_msg,tx=$count" + extra_msg+=",tx=$count" fi if [ -z "$count" ]; then print_skip @@ -1287,7 +1287,7 @@ chk_fail_nr() print_check "failrx" count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFailRx") if [ "$count" != "$fail_rx" ]; then - extra_msg="$extra_msg,rx=$count" + extra_msg+=",rx=$count" fi if [ -z "$count" ]; then print_skip @@ -1322,7 +1322,7 @@ chk_fclose_nr() if [ -z "$count" ]; then print_skip elif [ "$count" != "$fclose_tx" ]; then - extra_msg="$extra_msg,tx=$count" + extra_msg+=",tx=$count" fail_test "got $count MP_FASTCLOSE[s] TX expected $fclose_tx" else print_ok @@ -1333,7 +1333,7 @@ chk_fclose_nr() if [ -z "$count" ]; then print_skip elif [ "$count" != "$fclose_rx" ]; then - extra_msg="$extra_msg,rx=$count" + extra_msg+=",rx=$count" fail_test "got $count MP_FASTCLOSE[s] RX expected $fclose_rx" else print_ok @@ -1702,7 +1702,7 @@ chk_rm_nr() count=$((count + cnt)) if [ "$count" != "$rm_subflow_nr" ]; then suffix="$count in [$rm_subflow_nr:$((rm_subflow_nr*2))]" - extra_msg="$extra_msg simult" + extra_msg+=" simult" fi if [ $count -ge "$rm_subflow_nr" ] && \ [ "$count" -le "$((rm_subflow_nr *2 ))" ]; then -- cgit v1.2.3 From 747ba8783a33fca6bad9b6c74280096ee93be9f2 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 8 Mar 2024 23:10:16 +0100 Subject: selftests: mptcp: print test results with colors To unify the output formats of all test scripts, this patch adds four more helpers: mptcp_lib_pr_ok() mptcp_lib_pr_skip() mptcp_lib_pr_fail() mptcp_lib_pr_info() to print out [ OK ], [SKIP], [FAIL] and 'INFO: ' with colors. Use them in all scripts to print the "ok/skip/fail/info' using the same 'format'. Having colors helps to quickly identify issues when looking at a long list of output logs and results. Note that now all print the same keywords, which was not the case before, but it is good to uniform that. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240308-upstream-net-next-20240308-selftests-mptcp-unification-v1-9-4f42c347b653@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/diag.sh | 12 ++--- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 62 +++++++++++----------- tools/testing/selftests/net/mptcp/mptcp_join.sh | 6 +-- tools/testing/selftests/net/mptcp/mptcp_lib.sh | 36 +++++++++---- tools/testing/selftests/net/mptcp/mptcp_sockopt.sh | 28 +++++----- tools/testing/selftests/net/mptcp/pm_netlink.sh | 2 +- tools/testing/selftests/net/mptcp/simult_flows.sh | 4 +- tools/testing/selftests/net/mptcp/userspace_pm.sh | 27 +++------- 8 files changed, 90 insertions(+), 87 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 4ffdd415e670..bc97ab33a00e 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -57,15 +57,15 @@ __chk_nr() mptcp_lib_print_title "$msg" if [ "$nr" != "$expected" ]; then if [ "$nr" = "$skip" ] && ! mptcp_lib_expect_all_features; then - echo "[ skip ] Feature probably not supported" + mptcp_lib_pr_skip "Feature probably not supported" mptcp_lib_result_skip "${msg}" else - echo "[ fail ] expected $expected found $nr" + mptcp_lib_pr_fail "expected $expected found $nr" mptcp_lib_result_fail "${msg}" ret=${KSFT_FAIL} fi else - echo "[ ok ]" + mptcp_lib_pr_ok mptcp_lib_result_pass "${msg}" fi } @@ -114,15 +114,15 @@ wait_msk_nr() mptcp_lib_print_title "$msg" if [ $i -ge $timeout ]; then - echo "[ fail ] timeout while expecting $expected max $max last $nr" + mptcp_lib_pr_fail "timeout while expecting $expected max $max last $nr" mptcp_lib_result_fail "${msg} # timeout" ret=${KSFT_FAIL} elif [ $nr != $expected ]; then - echo "[ fail ] expected $expected found $nr" + mptcp_lib_pr_fail "expected $expected found $nr" mptcp_lib_result_fail "${msg} # unexpected result" ret=${KSFT_FAIL} else - echo "[ ok ]" + mptcp_lib_pr_ok mptcp_lib_result_pass "${msg}" fi } diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 78098546ddd3..cb1837f2761a 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -218,7 +218,7 @@ set_ethtool_flags() { local flags="$3" if ip netns exec $ns ethtool -K $dev $flags 2>/dev/null; then - echo "INFO: set $ns dev $dev: ethtool -K $flags" + mptcp_lib_pr_info "set $ns dev $dev: ethtool -K $flags" fi } @@ -261,7 +261,7 @@ check_mptcp_disabled() print_larger_title "New MPTCP socket can be blocked via sysctl" # net.mptcp.enabled should be enabled by default if [ "$(ip netns exec ${disabled_ns} sysctl net.mptcp.enabled | awk '{ print $3 }')" -ne 1 ]; then - echo -e "net.mptcp.enabled sysctl is not 1 by default\t\t[ FAIL ]" + mptcp_lib_pr_fail "net.mptcp.enabled sysctl is not 1 by default" mptcp_lib_result_fail "net.mptcp.enabled sysctl is not 1 by default" ret=1 return 1 @@ -274,13 +274,13 @@ check_mptcp_disabled() mptcp_lib_ns_exit "${disabled_ns}" if [ ${err} -eq 0 ]; then - echo -e "New MPTCP socket cannot be blocked via sysctl\t\t[ FAIL ]" + mptcp_lib_pr_fail "New MPTCP socket cannot be blocked via sysctl" mptcp_lib_result_fail "New MPTCP socket cannot be blocked via sysctl" ret=1 return 1 fi - echo "[ OK ]" + mptcp_lib_pr_ok mptcp_lib_result_pass "New MPTCP socket can be blocked via sysctl" return 0 } @@ -301,7 +301,7 @@ do_ping() ip netns exec ${connector_ns} ping ${ping_args} $connect_addr >/dev/null || rc=1 if [ $rc -ne 0 ] ; then - echo "$listener_ns -> $connect_addr connectivity [ FAIL ]" + mptcp_lib_pr_fail "$listener_ns -> $connect_addr connectivity" ret=1 return 1 @@ -335,7 +335,7 @@ do_transfer() fi if [ -n "$extra_args" ] && $options_log; then - echo "INFO: extra options: $extra_args" + mptcp_lib_pr_info "extra options: $extra_args" fi options_log=false @@ -432,7 +432,7 @@ do_transfer() result_msg+=" # time=${duration}ms" printf "(duration %05sms) " "${duration}" if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then - echo "[ FAIL ] client exit code $retc, server $rets" 1>&2 + mptcp_lib_pr_fail "client exit code $retc, server $rets" echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port" cat /tmp/${listener_ns}.out @@ -475,14 +475,14 @@ do_transfer() fi if [ ${stat_synrx_now_l} -lt ${expect_synrx} ]; then - printf "[ FAIL ] lower MPC SYN rx (%d) than expected (%d)\n" \ - "${stat_synrx_now_l}" "${expect_synrx}" + mptcp_lib_pr_fail "lower MPC SYN rx (${stat_synrx_now_l})" \ + "than expected (${expect_synrx})" retc=1 fi if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ] && [ ${stat_ooo_now} -eq 0 ]; then if [ ${stat_ooo_now} -eq 0 ]; then - printf "[ FAIL ] lower MPC ACK rx (%d) than expected (%d)\n" \ - "${stat_ackrx_now_l}" "${expect_ackrx}" + mptcp_lib_pr_fail "lower MPC ACK rx (${stat_ackrx_now_l})" \ + "than expected (${expect_ackrx})" rets=1 else extra+=" [ Note ] fallback due to TCP OoO" @@ -497,13 +497,13 @@ do_transfer() local csum_err_s_nr=$((csum_err_s - stat_csum_err_s)) if [ $csum_err_s_nr -gt 0 ]; then - printf "[ FAIL ]\nserver got %d data checksum error[s]" ${csum_err_s_nr} + mptcp_lib_pr_fail "server got ${csum_err_s_nr} data checksum error[s]" rets=1 fi local csum_err_c_nr=$((csum_err_c - stat_csum_err_c)) if [ $csum_err_c_nr -gt 0 ]; then - printf "[ FAIL ]\nclient got %d data checksum error[s]" ${csum_err_c_nr} + mptcp_lib_pr_fail "client got ${csum_err_c_nr} data checksum error[s]" retc=1 fi fi @@ -534,11 +534,11 @@ do_transfer() fi if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then - printf "[ OK ]%s\n" "${extra:1}" + mptcp_lib_pr_ok "${extra:1}" mptcp_lib_result_pass "${TEST_GROUP}: ${result_msg}" else if [ -n "${extra}" ]; then - printf "%s\n" "${extra:1}" + mptcp_lib_print_warn "${extra:1}" fi mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}" fi @@ -668,7 +668,7 @@ run_test_transparent() # following function has been exported (T). Not great but better than # checking for a specific kernel version. if ! mptcp_lib_kallsyms_has "T __ip_sock_set_tos$"; then - echo "INFO: ${msg} not supported by the kernel: SKIP" + mptcp_lib_pr_skip "${msg} not supported by the kernel" mptcp_lib_result_skip "${TEST_GROUP}" return fi @@ -685,7 +685,7 @@ table inet mangle { } EOF then - echo "SKIP: $msg, could not load nft ruleset" + mptcp_lib_pr_skip "$msg, could not load nft ruleset" mptcp_lib_fail_if_expected_feature "nft rules" mptcp_lib_result_skip "${TEST_GROUP}" return @@ -701,7 +701,7 @@ EOF if ! ip -net "$listener_ns" $r6flag rule add fwmark 1 lookup 100; then ip netns exec "$listener_ns" nft flush ruleset - echo "SKIP: $msg, ip $r6flag rule failed" + mptcp_lib_pr_skip "$msg, ip $r6flag rule failed" mptcp_lib_fail_if_expected_feature "ip rule" mptcp_lib_result_skip "${TEST_GROUP}" return @@ -710,13 +710,13 @@ EOF if ! ip -net "$listener_ns" route add local $local_addr/0 dev lo table 100; then ip netns exec "$listener_ns" nft flush ruleset ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100 - echo "SKIP: $msg, ip route add local $local_addr failed" + mptcp_lib_pr_skip "$msg, ip route add local $local_addr failed" mptcp_lib_fail_if_expected_feature "ip route" mptcp_lib_result_skip "${TEST_GROUP}" return fi - echo "INFO: test $msg" + mptcp_lib_pr_info "test $msg" port=$((20000 - 1)) local extra_args="-o TRANSPARENT" @@ -729,12 +729,12 @@ EOF ip -net "$listener_ns" route del local $local_addr/0 dev lo table 100 if [ $lret -ne 0 ]; then - echo "FAIL: $msg, mptcp connection error" + mptcp_lib_pr_fail "$msg, mptcp connection error" ret=$lret return 1 fi - echo "PASS: $msg" + mptcp_lib_pr_info "$msg pass" return 0 } @@ -743,7 +743,7 @@ run_tests_peekmode() local peekmode="$1" TEST_GROUP="peek mode: ${peekmode}" - echo "INFO: with peek mode: ${peekmode}" + mptcp_lib_pr_info "with peek mode: ${peekmode}" run_tests_lo "$ns1" "$ns1" 10.0.1.1 1 "-P ${peekmode}" run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-P ${peekmode}" } @@ -753,12 +753,12 @@ run_tests_mptfo() TEST_GROUP="MPTFO" if ! mptcp_lib_kallsyms_has "mptcp_fastopen_"; then - echo "INFO: TFO not supported by the kernel: SKIP" + mptcp_lib_pr_skip "TFO not supported by the kernel" mptcp_lib_result_skip "${TEST_GROUP}" return fi - echo "INFO: with MPTFO start" + mptcp_lib_pr_info "with MPTFO start" ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=2 ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=1 @@ -770,7 +770,7 @@ run_tests_mptfo() ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=0 ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=0 - echo "INFO: with MPTFO end" + mptcp_lib_pr_info "with MPTFO end" } run_tests_disconnect() @@ -781,7 +781,7 @@ run_tests_disconnect() TEST_GROUP="full disconnect" if ! mptcp_lib_kallsyms_has "mptcp_pm_data_reset$"; then - echo "INFO: Full disconnect not supported: SKIP" + mptcp_lib_pr_skip "Full disconnect not supported" mptcp_lib_result_skip "${TEST_GROUP}" return fi @@ -794,7 +794,7 @@ run_tests_disconnect() cin_disconnect="$old_cin" connect_per_transfer=3 - echo "INFO: disconnect" + mptcp_lib_pr_info "disconnect" run_tests_lo "$ns1" "$ns1" 10.0.1.1 1 "-I 3 -i $old_cin" run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-I 3 -i $old_cin" @@ -818,7 +818,7 @@ log_if_error() local msg="$1" if [ ${ret} -ne 0 ]; then - echo "FAIL: ${msg}" + mptcp_lib_pr_fail "${msg}" final_ret=${ret} ret=0 @@ -865,7 +865,7 @@ done mptcp_lib_result_code "${ret}" "ping tests" stop_if_error "Could not even run ping tests" -echo "[ OK ]" +mptcp_lib_pr_ok [ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss delay ${tc_delay}ms tc_info="loss of $tc_loss " @@ -890,7 +890,7 @@ elif [ "$reorder_delay" -gt 0 ];then tc_info+="$tc_reorder with delay ${reorder_delay}ms " fi -echo "INFO: Using ${tc_info}on ns3eth4" +mptcp_lib_pr_info "Using ${tc_info}on ns3eth4" tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${reorder_delay}ms $tc_reorder diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index d2969eec36d5..9f67b9ba97d8 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -185,17 +185,17 @@ print_info() print_ok() { - mptcp_lib_print_ok "[ ok ]${1:+ ${*}}" + mptcp_lib_pr_ok "${@}" } print_fail() { - mptcp_lib_print_err "[fail]${1:+ ${*}}" + mptcp_lib_pr_fail "${@}" } print_skip() { - mptcp_lib_print_warn "[skip]${1:+ ${*}}" + mptcp_lib_pr_skip "${@}" } # [ $1: fail msg ] diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index eb740a2f7898..ea39392c68e7 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -50,6 +50,23 @@ mptcp_lib_print_err() { mptcp_lib_print_color "${MPTCP_LIB_COLOR_RED}${*}" } +# shellcheck disable=SC2120 # parameters are optional +mptcp_lib_pr_ok() { + mptcp_lib_print_ok "[ OK ]${1:+ ${*}}" +} + +mptcp_lib_pr_skip() { + mptcp_lib_print_warn "[SKIP]${1:+ ${*}}" +} + +mptcp_lib_pr_fail() { + mptcp_lib_print_err "[FAIL]${1:+ ${*}}" +} + +mptcp_lib_pr_info() { + mptcp_lib_print_info "INFO: ${*}" +} + # SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES env var can be set when validating all # features using the last version of the kernel and the selftests to make sure # a test is not being skipped by mistake. @@ -80,14 +97,14 @@ mptcp_lib_has_file() { mptcp_lib_check_mptcp() { if ! mptcp_lib_has_file "/proc/sys/net/mptcp/enabled"; then - echo "SKIP: MPTCP support is not available" + mptcp_lib_pr_skip "MPTCP support is not available" exit ${KSFT_SKIP} fi } mptcp_lib_check_kallsyms() { if ! mptcp_lib_has_file "/proc/kallsyms"; then - echo "SKIP: CONFIG_KALLSYMS is missing" + mptcp_lib_pr_skip "CONFIG_KALLSYMS is missing" exit ${KSFT_SKIP} fi } @@ -294,7 +311,7 @@ mptcp_lib_check_transfer() { local what="${3}" if ! cmp "$in" "$out" > /dev/null 2>&1; then - echo "[ FAIL ] $what does not match (in, out):" + mptcp_lib_pr_fail "$what does not match (in, out):" mptcp_lib_print_file_err "$in" mptcp_lib_print_file_err "$out" @@ -334,13 +351,13 @@ mptcp_lib_check_output() { fi if [ ${cmd_ret} -ne 0 ]; then - mptcp_lib_print_err "[FAIL] command execution '${cmd}' stderr" + mptcp_lib_pr_fail "command execution '${cmd}' stderr" cat "${err}" return 2 elif [ "${out}" = "${expected}" ]; then return 0 else - mptcp_lib_print_err "[FAIL] expected '${expected}' got '${out}'" + mptcp_lib_pr_fail "expected '${expected}' got '${out}'" return 1 fi } @@ -352,29 +369,30 @@ mptcp_lib_check_tools() { case "${tool}" in "ip") if ! ip -Version &> /dev/null; then - mptcp_lib_print_warn "SKIP: Could not run test without ip tool" + mptcp_lib_pr_skip "Could not run test without ip tool" exit ${KSFT_SKIP} fi ;; "ss") if ! ss -h | grep -q MPTCP; then - mptcp_lib_print_warn "SKIP: ss tool does not support MPTCP" + mptcp_lib_pr_skip "ss tool does not support MPTCP" exit ${KSFT_SKIP} fi ;; "iptables"* | "ip6tables"*) if ! "${tool}" -V &> /dev/null; then - mptcp_lib_print_warn "SKIP: Could not run all tests without ${tool}" + mptcp_lib_pr_skip "Could not run all tests without ${tool}" exit ${KSFT_SKIP} fi ;; *) - mptcp_lib_print_err "Internal error: unsupported tool: ${tool}" + mptcp_lib_pr_fail "Internal error: unsupported tool: ${tool}" exit ${KSFT_FAIL} ;; esac done } + mptcp_lib_ns_init() { local sec rndh diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index 19d0ac31b6a9..96aa8f71bbb0 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -103,8 +103,8 @@ check_mark() local v for v in $values; do if [ $v -ne 0 ]; then - echo "FAIL: got $tables $values in ns $ns," \ - "not 0 - not all expected packets marked" + mptcp_lib_pr_fail "got $tables $values in ns $ns," \ + "not 0 - not all expected packets marked" ret=1 return 1 fi @@ -169,7 +169,7 @@ do_transfer() print_title "Transfer ${ip:2}" if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then - echo "[FAIL] client exit code $retc, server $rets" + mptcp_lib_pr_fail "client exit code $retc, server $rets" echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port" @@ -184,7 +184,7 @@ do_transfer() if ! mptcp_lib_check_transfer $cin $sout "file received by server"; then rets=1 else - echo "[ OK ]" + mptcp_lib_pr_ok fi mptcp_lib_result_code "${rets}" "transfer ${ip}" @@ -200,10 +200,10 @@ do_transfer() mptcp_lib_result_code "${retc}" "mark ${ip}" if [ $retc -eq 0 ] && [ $rets -eq 0 ];then - echo "[ OK ]" + mptcp_lib_pr_ok return 0 fi - echo "[FAIL]" + mptcp_lib_pr_fail return 1 } @@ -224,7 +224,7 @@ do_mptcp_sockopt_tests() local lret=0 if ! mptcp_lib_kallsyms_has "mptcp_diag_fill_info$"; then - echo "INFO: MPTCP sockopt not supported: SKIP" + mptcp_lib_pr_skip "MPTCP sockopt not supported" mptcp_lib_result_skip "sockopt" return fi @@ -234,12 +234,12 @@ do_mptcp_sockopt_tests() print_title "SOL_MPTCP sockopt v4" if [ $lret -ne 0 ]; then - echo "[FAIL]" + mptcp_lib_pr_fail mptcp_lib_result_fail "sockopt v4" ret=$lret return fi - echo "[ OK ]" + mptcp_lib_pr_ok mptcp_lib_result_pass "sockopt v4" ip netns exec "$ns_sbox" ./mptcp_sockopt -6 @@ -247,12 +247,12 @@ do_mptcp_sockopt_tests() print_title "SOL_MPTCP sockopt v6" if [ $lret -ne 0 ]; then - echo "[FAIL]" + mptcp_lib_pr_fail mptcp_lib_result_fail "sockopt v6" ret=$lret return fi - echo "[ OK ]" + mptcp_lib_pr_ok mptcp_lib_result_pass "sockopt v6" } @@ -280,12 +280,12 @@ do_tcpinq_test() local lret=$? if [ $lret -ne 0 ];then ret=$lret - echo "[FAIL]" + mptcp_lib_pr_fail mptcp_lib_result_fail "TCP_INQ: $*" return $lret fi - echo "[ OK ]" + mptcp_lib_pr_ok mptcp_lib_result_pass "TCP_INQ: $*" return $lret } @@ -295,7 +295,7 @@ do_tcpinq_tests() local lret=0 if ! mptcp_lib_kallsyms_has "mptcp_ioctl$"; then - echo "INFO: TCP_INQ not supported: SKIP" + mptcp_lib_pr_skip "TCP_INQ not supported" mptcp_lib_result_skip "TCP_INQ" return fi diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 5b9bc25dfef4..69ffff8b076b 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -190,7 +190,7 @@ else for st in fullmesh nofullmesh backup,fullmesh; do st=" (${st})" mptcp_lib_print_title "${st}" - echo "[SKIP]" + mptcp_lib_pr_skip mptcp_lib_result_skip "${st}" done fi diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index b7549d9364d6..e62052c3206d 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -187,12 +187,12 @@ do_transfer() printf "%-16s" " max $max_time " if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \ [ $cmpc -eq 0 ] && [ $cmps -eq 0 ]; then - echo "[ OK ]" + mptcp_lib_pr_ok cat "$capout" return 0 fi - echo " [ fail ]" + mptcp_lib_pr_fail echo "client exit code $retc, server $rets" 1>&2 echo -e "\nnetns ${ns3} socket stat for $port:" 1>&2 ip netns exec ${ns3} ss -nita 1>&2 -o "sport = :$port" diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index ca238592baee..4e29aa9c2529 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -57,13 +57,9 @@ test_name="" # a bit more space: because we have more to display MPTCP_LIB_TEST_FORMAT="%02u %-68s" -_printf() { - stdbuf -o0 -e0 printf "${@}" -} - print_title() { - _printf "INFO: %s\n" "${1}" + mptcp_lib_pr_info "${1}" } # $1: test name @@ -74,33 +70,23 @@ print_test() mptcp_lib_print_title "${test_name}" } -print_results() -{ - _printf "[%s]\n" "${1}" -} - test_pass() { - print_results " OK " + mptcp_lib_pr_ok mptcp_lib_result_pass "${test_name}" } test_skip() { - print_results "SKIP" + mptcp_lib_pr_skip mptcp_lib_result_skip "${test_name}" } # $1: msg test_fail() { - print_results "FAIL" + mptcp_lib_pr_fail "${@}" ret=1 - - if [ -n "${1}" ]; then - _printf "\t%s\n" "${1}" - fi - mptcp_lib_result_fail "${test_name}" } @@ -122,7 +108,7 @@ cleanup() rm -rf $file $client_evts $server_evts - _printf "Done\n" + mptcp_lib_pr_info "Done" } trap cleanup EXIT @@ -256,8 +242,7 @@ check_expected_one() test_fail fi - _printf "\tExpected value for '%s': '%s', got '%s'.\n" \ - "${var}" "${!exp}" "${!var}" + mptcp_lib_print_err "\tExpected value for '${var}': '${!exp}', got '${!var}'." return 1 } -- cgit v1.2.3 From 339c225e2e03a41aa716038b7de2051a930603de Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 8 Mar 2024 23:10:17 +0100 Subject: selftests: mptcp: call test_fail without argument This patch modifies test_fail() to call mptcp_lib_pr_fail() only if there are arguments (if [ ${#} -gt 0 ]) in userspace_pm.sh, add arguments "unexpected type: ${type}" when calling test_fail() from test_remove(). Then mptcp_lib_pr_fail() can be used in check_expected_one() instead of test_fail(). The same in mptcp_join.sh, calling fail_test() without argument, and adapt this helper not to call print_fail() in this case. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240308-upstream-net-next-20240308-selftests-mptcp-unification-v1-10-4f42c347b653@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 7 +++++-- tools/testing/selftests/net/mptcp/userspace_pm.sh | 12 ++++++++---- 2 files changed, 13 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 9f67b9ba97d8..041175ec1304 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -452,7 +452,9 @@ fail_test() { ret=1 - print_fail "${@}" + if [ ${#} -gt 0 ]; then + print_fail "${@}" + fi # just in case a test is marked twice as failed if [ ${last_test_failed} -eq 0 ]; then @@ -2834,7 +2836,8 @@ verify_listener_events() print_ok return 0 fi - fail_test "$e_type:$type $e_family:$family $e_saddr:$saddr $e_sport:$sport" + print_fail "$e_type:$type $e_family:$family $e_saddr:$saddr $e_sport:$sport" + fail_test } add_addr_ports_tests() diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 4e29aa9c2529..bc2f0184b1eb 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -85,7 +85,10 @@ test_skip() # $1: msg test_fail() { - mptcp_lib_pr_fail "${@}" + if [ ${#} -gt 0 ] + then + mptcp_lib_pr_fail "${@}" + fi ret=1 mptcp_lib_result_fail "${test_name}" } @@ -239,7 +242,7 @@ check_expected_one() if [ "${prev_ret}" = "0" ] then - test_fail + mptcp_lib_pr_fail fi mptcp_lib_print_err "\tExpected value for '${var}': '${!exp}', got '${!var}'." @@ -263,6 +266,7 @@ check_expected() return 0 fi + test_fail return 1 } @@ -412,7 +416,7 @@ test_remove() then test_pass else - test_fail + test_fail "unexpected type: ${type}" fi # RM_ADDR using an invalid addr id should result in no action @@ -425,7 +429,7 @@ test_remove() then test_pass else - test_fail + test_fail "unexpected type: ${type}" fi # RM_ADDR from the client to server machine -- cgit v1.2.3 From 663260e146688e1c90ef39234a212ac0c32e448d Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 8 Mar 2024 23:10:18 +0100 Subject: selftests: mptcp: extract mptcp_lib_check_expected Extract the main part of check_expected() in userspace_pm.sh to a new function mptcp_lib_check_expected() in mptcp_lib.sh. It will be used in both mptcp_john.sh and userspace_pm.sh. check_expected_one() is moved into mptcp_lib.sh too as mptcp_lib_check_expected_one(). Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240308-upstream-net-next-20240308-selftests-mptcp-unification-v1-11-4f42c347b653@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_lib.sh | 30 +++++++++++++++++++++ tools/testing/selftests/net/mptcp/userspace_pm.sh | 33 ++--------------------- 2 files changed, 32 insertions(+), 31 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index ea39392c68e7..44491f18ed17 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -438,3 +438,33 @@ mptcp_lib_print_title() { # shellcheck disable=SC2059 # the format is in a variable printf "${MPTCP_LIB_TEST_FORMAT}" "$((++MPTCP_LIB_TEST_COUNTER))" "${*}" } + +# $1: var name ; $2: prev ret +mptcp_lib_check_expected_one() { + local var="${1}" + local exp="e_${var}" + local prev_ret="${2}" + + if [ "${!var}" = "${!exp}" ]; then + return 0 + fi + + if [ "${prev_ret}" = "0" ]; then + mptcp_lib_pr_fail + fi + + mptcp_lib_print_err "Expected value for '${var}': '${!exp}', got '${!var}'." + return 1 +} + +# $@: all var names to check +mptcp_lib_check_expected() { + local rc=0 + local var + + for var in "${@}"; do + mptcp_lib_check_expected_one "${var}" "${rc}" || rc=1 + done + + return "${rc}" +} diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index bc2f0184b1eb..6d0862a1b68d 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -5,7 +5,7 @@ # code but we accept it. #shellcheck disable=SC2086 -# Some variables are used below but indirectly, see check_expected_one() +# Some variables are used below but indirectly, see verify_*_event() #shellcheck disable=SC2034 . "$(dirname "${0}")/mptcp_lib.sh" @@ -228,39 +228,10 @@ make_connection() fi } -# $1: var name ; $2: prev ret -check_expected_one() -{ - local var="${1}" - local exp="e_${var}" - local prev_ret="${2}" - - if [ "${!var}" = "${!exp}" ] - then - return 0 - fi - - if [ "${prev_ret}" = "0" ] - then - mptcp_lib_pr_fail - fi - - mptcp_lib_print_err "\tExpected value for '${var}': '${!exp}', got '${!var}'." - return 1 -} - # $@: all var names to check check_expected() { - local rc=0 - local var - - for var in "${@}" - do - check_expected_one "${var}" "${rc}" || rc=1 - done - - if [ ${rc} -eq 0 ] + if mptcp_lib_check_expected "${@}" then test_pass return 0 -- cgit v1.2.3 From 8ebb44196585d3c9405fba1e409cf2312bca30ac Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 8 Mar 2024 23:10:19 +0100 Subject: selftests: mptcp: print_test out of verify_listener_events verify_listener_events() helper will be exported into mptcp_lib.sh as a public function, but print_test() is invoked in it, which is a private function in userspace_pm.sh only. So this patch moves print_test() out of verify_listener_events(). Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240308-upstream-net-next-20240308-selftests-mptcp-unification-v1-12-4f42c347b653@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/userspace_pm.sh | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 6d0862a1b68d..e9aea44edee5 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -845,12 +845,6 @@ verify_listener_events() local saddr local sport - if [ $e_type = $LISTENER_CREATED ]; then - print_test "CREATE_LISTENER $e_saddr:$e_sport" - elif [ $e_type = $LISTENER_CLOSED ]; then - print_test "CLOSE_LISTENER $e_saddr:$e_sport" - fi - type=$(mptcp_lib_evts_get_info type $evt $e_type) family=$(mptcp_lib_evts_get_info family $evt $e_type) sport=$(mptcp_lib_evts_get_info sport $evt $e_type) @@ -882,6 +876,7 @@ test_listener() local listener_pid=$! sleep 0.5 + print_test "CREATE_LISTENER 10.0.2.2:$client4_port" verify_listener_events $client_evts $LISTENER_CREATED $AF_INET 10.0.2.2 $client4_port # ADD_ADDR from client to server machine reusing the subflow port @@ -898,6 +893,7 @@ test_listener() mptcp_lib_kill_wait $listener_pid sleep 0.5 + print_test "CLOSE_LISTENER 10.0.2.2:$client4_port" verify_listener_events $client_evts $LISTENER_CLOSED $AF_INET 10.0.2.2 $client4_port } -- cgit v1.2.3 From 7f0782ca1ce9f5dcb163c3f3a626f8b23fa85b3d Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 8 Mar 2024 23:10:20 +0100 Subject: selftests: mptcp: add mptcp_lib_verify_listener_events To avoid duplicated code in different MPTCP selftests, we can add and use helpers defined in mptcp_lib.sh. The helper verify_listener_events() is defined both in mptcp_join.sh and userspace_pm.sh, export it into mptcp_lib.sh and rename it with mptcp_lib_ prefix. Use this new helper in both scripts. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240308-upstream-net-next-20240308-selftests-mptcp-unification-v1-13-4f42c347b653@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 21 +----------------- tools/testing/selftests/net/mptcp/mptcp_lib.sh | 26 +++++++++++++++++++++++ tools/testing/selftests/net/mptcp/userspace_pm.sh | 21 +++--------------- 3 files changed, 30 insertions(+), 38 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 041175ec1304..9b538a7071e7 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2794,15 +2794,9 @@ AF_INET6=10 verify_listener_events() { - local evt=$1 local e_type=$2 - local e_family=$3 local e_saddr=$4 local e_sport=$5 - local type - local family - local saddr - local sport local name if [ $e_type = $LISTENER_CREATED ]; then @@ -2820,23 +2814,10 @@ verify_listener_events() return fi - type=$(mptcp_lib_evts_get_info type "$evt" "$e_type") - family=$(mptcp_lib_evts_get_info family "$evt" "$e_type") - sport=$(mptcp_lib_evts_get_info sport "$evt" "$e_type") - if [ $family ] && [ $family = $AF_INET6 ]; then - saddr=$(mptcp_lib_evts_get_info saddr6 "$evt" "$e_type") - else - saddr=$(mptcp_lib_evts_get_info saddr4 "$evt" "$e_type") - fi - - if [ $type ] && [ $type = $e_type ] && - [ $family ] && [ $family = $e_family ] && - [ $saddr ] && [ $saddr = $e_saddr ] && - [ $sport ] && [ $sport = $e_sport ]; then + if mptcp_lib_verify_listener_events "${@}"; then print_ok return 0 fi - print_fail "$e_type:$type $e_family:$family $e_saddr:$saddr $e_sport:$sport" fail_test } diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 44491f18ed17..a977a722fb3d 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -468,3 +468,29 @@ mptcp_lib_check_expected() { return "${rc}" } + +# shellcheck disable=SC2034 # Some variables are used below but indirectly +mptcp_lib_verify_listener_events() { + local evt=${1} + local e_type=${2} + local e_family=${3} + local e_saddr=${4} + local e_sport=${5} + local type + local family + local saddr + local sport + local rc=0 + + type=$(mptcp_lib_evts_get_info type "${evt}" "${e_type}") + family=$(mptcp_lib_evts_get_info family "${evt}" "${e_type}") + if [ "${family}" ] && [ "${family}" = "${AF_INET6}" ]; then + saddr=$(mptcp_lib_evts_get_info saddr6 "${evt}" "${e_type}") + else + saddr=$(mptcp_lib_evts_get_info saddr4 "${evt}" "${e_type}") + fi + sport=$(mptcp_lib_evts_get_info sport "${evt}" "${e_type}") + + mptcp_lib_check_expected "type" "family" "saddr" "sport" || rc="${?}" + return "${rc}" +} diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index e9aea44edee5..1e0b39e5525c 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -835,26 +835,11 @@ test_prio() verify_listener_events() { - local evt=$1 - local e_type=$2 - local e_family=$3 - local e_saddr=$4 - local e_sport=$5 - local type - local family - local saddr - local sport - - type=$(mptcp_lib_evts_get_info type $evt $e_type) - family=$(mptcp_lib_evts_get_info family $evt $e_type) - sport=$(mptcp_lib_evts_get_info sport $evt $e_type) - if [ $family ] && [ $family = $AF_INET6 ]; then - saddr=$(mptcp_lib_evts_get_info saddr6 $evt $e_type) + if mptcp_lib_verify_listener_events "${@}"; then + test_pass else - saddr=$(mptcp_lib_evts_get_info saddr4 $evt $e_type) + test_fail fi - - check_expected "type" "family" "saddr" "sport" } test_listener() -- cgit v1.2.3 From 23a0485d1c0491a3044026263cf9a0acd33d30a2 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 8 Mar 2024 23:10:21 +0100 Subject: selftests: mptcp: declare event macros in mptcp_lib MPTCP event macros (SUB_ESTABLISHED, LISTENER_CREATED, LISTENER_CLOSED), and the protocol family macros (AF_INET, AF_INET6) are defined in both mptcp_join.sh and userspace_pm.sh. In order not to duplicate code, this patch declares them all in mptcp_lib.sh with MPTCP_LIB_ prefixs. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240308-upstream-net-next-20240308-selftests-mptcp-unification-v1-14-4f42c347b653@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 23 +++++++++-------------- tools/testing/selftests/net/mptcp/mptcp_lib.sh | 11 +++++++++++ tools/testing/selftests/net/mptcp/userspace_pm.sh | 18 +++++++++--------- 3 files changed, 29 insertions(+), 23 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 9b538a7071e7..728575fd86ea 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2785,13 +2785,6 @@ backup_tests() fi } -SUB_ESTABLISHED=10 # MPTCP_EVENT_SUB_ESTABLISHED -LISTENER_CREATED=15 #MPTCP_EVENT_LISTENER_CREATED -LISTENER_CLOSED=16 #MPTCP_EVENT_LISTENER_CLOSED - -AF_INET=2 -AF_INET6=10 - verify_listener_events() { local e_type=$2 @@ -2799,9 +2792,9 @@ verify_listener_events() local e_sport=$5 local name - if [ $e_type = $LISTENER_CREATED ]; then + if [ $e_type = $MPTCP_LIB_EVENT_LISTENER_CREATED ]; then name="LISTENER_CREATED" - elif [ $e_type = $LISTENER_CLOSED ]; then + elif [ $e_type = $MPTCP_LIB_EVENT_LISTENER_CLOSED ]; then name="LISTENER_CLOSED " else name="$e_type" @@ -2856,8 +2849,10 @@ add_addr_ports_tests() chk_add_nr 1 1 1 chk_rm_nr 1 1 invert - verify_listener_events $evts_ns1 $LISTENER_CREATED $AF_INET 10.0.2.1 10100 - verify_listener_events $evts_ns1 $LISTENER_CLOSED $AF_INET 10.0.2.1 10100 + verify_listener_events $evts_ns1 $MPTCP_LIB_EVENT_LISTENER_CREATED \ + $MPTCP_LIB_AF_INET 10.0.2.1 10100 + verify_listener_events $evts_ns1 $MPTCP_LIB_EVENT_LISTENER_CLOSED \ + $MPTCP_LIB_AF_INET 10.0.2.1 10100 kill_events_pids fi @@ -3463,11 +3458,11 @@ userspace_tests() userspace_pm_chk_get_addr "${ns1}" "10" "id 10 flags signal 10.0.2.1" userspace_pm_chk_get_addr "${ns1}" "20" "id 20 flags signal 10.0.3.1" userspace_pm_rm_addr $ns1 10 - userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $SUB_ESTABLISHED + userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $MPTCP_LIB_EVENT_SUB_ESTABLISHED userspace_pm_chk_dump_addr "${ns1}" \ "id 20 flags signal 10.0.3.1" "after rm_addr 10" userspace_pm_rm_addr $ns1 20 - userspace_pm_rm_sf $ns1 10.0.3.1 $SUB_ESTABLISHED + userspace_pm_rm_sf $ns1 10.0.3.1 $MPTCP_LIB_EVENT_SUB_ESTABLISHED userspace_pm_chk_dump_addr "${ns1}" "" "after rm_addr 20" chk_rm_nr 2 2 invert chk_mptcp_info subflows 0 subflows 0 @@ -3494,7 +3489,7 @@ userspace_tests() "subflow" userspace_pm_chk_get_addr "${ns2}" "20" "id 20 flags subflow 10.0.3.2" userspace_pm_rm_addr $ns2 20 - userspace_pm_rm_sf $ns2 10.0.3.2 $SUB_ESTABLISHED + userspace_pm_rm_sf $ns2 10.0.3.2 $MPTCP_LIB_EVENT_SUB_ESTABLISHED userspace_pm_chk_dump_addr "${ns2}" \ "" \ "after rm_addr 20" diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index a977a722fb3d..d529b4b37af8 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -8,6 +8,17 @@ readonly KSFT_SKIP=4 # shellcheck disable=SC2155 # declare and assign separately readonly KSFT_TEST="${MPTCP_LIB_KSFT_TEST:-$(basename "${0}" .sh)}" +# These variables are used in some selftests, read-only +declare -rx MPTCP_LIB_EVENT_ANNOUNCED=6 # MPTCP_EVENT_ANNOUNCED +declare -rx MPTCP_LIB_EVENT_REMOVED=7 # MPTCP_EVENT_REMOVED +declare -rx MPTCP_LIB_EVENT_SUB_ESTABLISHED=10 # MPTCP_EVENT_SUB_ESTABLISHED +declare -rx MPTCP_LIB_EVENT_SUB_CLOSED=11 # MPTCP_EVENT_SUB_CLOSED +declare -rx MPTCP_LIB_EVENT_LISTENER_CREATED=15 # MPTCP_EVENT_LISTENER_CREATED +declare -rx MPTCP_LIB_EVENT_LISTENER_CLOSED=16 # MPTCP_EVENT_LISTENER_CLOSED + +declare -rx MPTCP_LIB_AF_INET=2 +declare -rx MPTCP_LIB_AF_INET6=10 + MPTCP_LIB_SUBTESTS=() MPTCP_LIB_SUBTESTS_DUPLICATED=0 MPTCP_LIB_TEST_COUNTER=0 diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 1e0b39e5525c..72dca742280f 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -19,15 +19,15 @@ if ! mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then fi mptcp_lib_check_tools ip -ANNOUNCED=6 # MPTCP_EVENT_ANNOUNCED -REMOVED=7 # MPTCP_EVENT_REMOVED -SUB_ESTABLISHED=10 # MPTCP_EVENT_SUB_ESTABLISHED -SUB_CLOSED=11 # MPTCP_EVENT_SUB_CLOSED -LISTENER_CREATED=15 #MPTCP_EVENT_LISTENER_CREATED -LISTENER_CLOSED=16 #MPTCP_EVENT_LISTENER_CLOSED - -AF_INET=2 -AF_INET6=10 +ANNOUNCED=${MPTCP_LIB_EVENT_ANNOUNCED} +REMOVED=${MPTCP_LIB_EVENT_REMOVED} +SUB_ESTABLISHED=${MPTCP_LIB_EVENT_SUB_ESTABLISHED} +SUB_CLOSED=${MPTCP_LIB_EVENT_SUB_CLOSED} +LISTENER_CREATED=${MPTCP_LIB_EVENT_LISTENER_CREATED} +LISTENER_CLOSED=${MPTCP_LIB_EVENT_LISTENER_CLOSED} + +AF_INET=${MPTCP_LIB_AF_INET} +AF_INET6=${MPTCP_LIB_AF_INET6} file="" server_evts="" -- cgit v1.2.3 From 8f7a69a8e7dc719a34f2896d7c9fe4b10bbe71f0 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 8 Mar 2024 23:10:22 +0100 Subject: selftests: mptcp: use KSFT_SKIP/KSFT_PASS/KSFT_FAIL This patch uses the public var KSFT_SKIP in mptcp_lib.sh instead of ksft_skip, and drop 'ksft_skip=4' in mptcp_join.sh. Use KSFT_PASS and KSFT_FAIL macros instead of 0 and 1 after 'exit ' and 'ret=' in all scripts: exit 0 -> exit ${KSFT_PASS} exit 1 -> exit ${KSFT_FAIL} ret=0 -> ret=${KSFT_PASS} ret=1 -> ret=${KSFT_FAIL} Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240308-upstream-net-next-20240308-selftests-mptcp-unification-v1-15-4f42c347b653@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 18 +++++++++--------- tools/testing/selftests/net/mptcp/mptcp_join.sh | 13 ++++++------- tools/testing/selftests/net/mptcp/mptcp_sockopt.sh | 4 ++-- tools/testing/selftests/net/mptcp/pm_netlink.sh | 8 ++++---- tools/testing/selftests/net/mptcp/simult_flows.sh | 4 ++-- tools/testing/selftests/net/mptcp/userspace_pm.sh | 4 ++-- 6 files changed, 25 insertions(+), 26 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index cb1837f2761a..4c4248554826 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -65,14 +65,14 @@ while getopts "$optstring" option;do case "$option" in "h") usage $0 - exit 0 + exit ${KSFT_PASS} ;; "d") if [ $OPTARG -ge 0 ];then tc_delay="$OPTARG" else echo "-d requires numeric argument, got \"$OPTARG\"" 1>&2 - exit 1 + exit ${KSFT_FAIL} fi ;; "e") @@ -96,7 +96,7 @@ while getopts "$optstring" option;do sndbuf="$OPTARG" else echo "-S requires numeric argument, got \"$OPTARG\"" 1>&2 - exit 1 + exit ${KSFT_FAIL} fi ;; "R") @@ -104,7 +104,7 @@ while getopts "$optstring" option;do rcvbuf="$OPTARG" else echo "-R requires numeric argument, got \"$OPTARG\"" 1>&2 - exit 1 + exit ${KSFT_FAIL} fi ;; "m") @@ -121,7 +121,7 @@ while getopts "$optstring" option;do ;; "?") usage $0 - exit 1 + exit ${KSFT_FAIL} ;; esac done @@ -263,7 +263,7 @@ check_mptcp_disabled() if [ "$(ip netns exec ${disabled_ns} sysctl net.mptcp.enabled | awk '{ print $3 }')" -ne 1 ]; then mptcp_lib_pr_fail "net.mptcp.enabled sysctl is not 1 by default" mptcp_lib_result_fail "net.mptcp.enabled sysctl is not 1 by default" - ret=1 + ret=${KSFT_FAIL} return 1 fi ip netns exec ${disabled_ns} sysctl -q net.mptcp.enabled=0 @@ -276,7 +276,7 @@ check_mptcp_disabled() if [ ${err} -eq 0 ]; then mptcp_lib_pr_fail "New MPTCP socket cannot be blocked via sysctl" mptcp_lib_result_fail "New MPTCP socket cannot be blocked via sysctl" - ret=1 + ret=${KSFT_FAIL} return 1 fi @@ -302,7 +302,7 @@ do_ping() if [ $rc -ne 0 ] ; then mptcp_lib_pr_fail "$listener_ns -> $connect_addr connectivity" - ret=1 + ret=${KSFT_FAIL} return 1 fi @@ -821,7 +821,7 @@ log_if_error() mptcp_lib_pr_fail "${msg}" final_ret=${ret} - ret=0 + ret=${KSFT_PASS} return ${final_ret} fi diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 728575fd86ea..5e9211e89825 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -25,7 +25,6 @@ err="" capout="" ns1="" ns2="" -ksft_skip=4 iptables="iptables" ip6tables="ip6tables" timeout_poll=30 @@ -392,15 +391,15 @@ setup_fail_rules() -p tcp \ -m length --length 150:9999 \ -m statistic --mode nth --packet 1 --every 99999 \ - -j MARK --set-mark 42 || return ${ksft_skip} + -j MARK --set-mark 42 || return ${KSFT_SKIP} - tc -n $ns2 qdisc add dev ns2eth$i clsact || return ${ksft_skip} + tc -n $ns2 qdisc add dev ns2eth$i clsact || return ${KSFT_SKIP} tc -n $ns2 filter add dev ns2eth$i egress \ protocol ip prio 1000 \ handle 42 fw \ action pedit munge offset 148 u8 invert \ pipe csum tcp \ - index 100 || return ${ksft_skip} + index 100 || return ${KSFT_SKIP} } reset_with_fail() @@ -414,7 +413,7 @@ reset_with_fail() local rc=0 setup_fail_rules "${@}" || rc=$? - if [ ${rc} -eq ${ksft_skip} ]; then + if [ ${rc} -eq ${KSFT_SKIP} ]; then mark_as_skipped "unable to set the 'fail' rules" return 1 fi @@ -450,7 +449,7 @@ reset_with_tcp_filter() # $1: err msg fail_test() { - ret=1 + ret=${KSFT_FAIL} if [ ${#} -gt 0 ]; then print_fail "${@}" @@ -3632,7 +3631,7 @@ usage() { if [ -n "${1}" ]; then echo "${1}" - ret=1 + ret=${KSFT_FAIL} fi echo "mptcp_join usage:" diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index 96aa8f71bbb0..e2d70c18786e 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -105,7 +105,7 @@ check_mark() if [ $v -ne 0 ]; then mptcp_lib_pr_fail "got $tables $values in ns $ns," \ "not 0 - not all expected packets marked" - ret=1 + ret=${KSFT_FAIL} return 1 fi done @@ -178,7 +178,7 @@ do_transfer() mptcp_lib_result_fail "transfer ${ip}" - ret=1 + ret=${KSFT_FAIL} return 1 fi if ! mptcp_lib_check_transfer $cin $sout "file received by server"; then diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 69ffff8b076b..6ab8c5d36340 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -19,11 +19,11 @@ while getopts "$optstring" option;do case "$option" in "h") usage $0 - exit 0 + exit ${KSFT_PASS} ;; "?") usage $0 - exit 1 + exit ${KSFT_FAIL} ;; esac done @@ -57,13 +57,13 @@ check() mptcp_lib_check_output "${err}" "${cmd}" "${expected}" || rc=${?} if [ ${rc} -eq 2 ]; then mptcp_lib_result_fail "${msg} # error ${rc}" - ret=1 + ret=${KSFT_FAIL} elif [ ${rc} -eq 0 ]; then mptcp_lib_print_ok "[ OK ]" mptcp_lib_result_pass "${msg}" elif [ ${rc} -eq 1 ]; then mptcp_lib_result_fail "${msg} # different output" - ret=1 + ret=${KSFT_FAIL} fi } diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index e62052c3206d..1b2366220388 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -263,7 +263,7 @@ while getopts "bcdh" option;do case "$option" in "h") usage $0 - exit 0 + exit ${KSFT_PASS} ;; "b") bail=1 @@ -276,7 +276,7 @@ while getopts "bcdh" option;do ;; "?") usage $0 - exit 1 + exit ${KSFT_FAIL} ;; esac done diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 72dca742280f..9e2981f2d7f5 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -89,7 +89,7 @@ test_fail() then mptcp_lib_pr_fail "${@}" fi - ret=1 + ret=${KSFT_FAIL} mptcp_lib_result_fail "${test_name}" } @@ -209,7 +209,7 @@ make_connection() else test_fail "Expected tokens (c:${client_token} - s:${server_token}) and server (c:${client_serverside} - s:${server_serverside})" mptcp_lib_result_print_all_tap - exit 1 + exit ${KSFT_FAIL} fi if [ "$is_v6" = "v6" ] -- cgit v1.2.3 From a0d942960d9b0ba352b2400e1a3b8fb02d911143 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 8 Mar 2024 11:25:55 -0800 Subject: tools: ynl: remove trailing semicolon Commit e8a6c515ff5f ("tools: ynl: allow user to pass enum string instead of scalar value") added a semicolon at the end of a line. Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240308192555.2550253-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index 2d7fdd903d9e..5fa7957f6e0f 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -489,7 +489,7 @@ class YnlFamily(SpecFamily): except (ValueError, TypeError) as e: if 'enum' not in attr_spec: raise e - return self._encode_enum(attr_spec, value); + return self._encode_enum(attr_spec, value) def _add_attr(self, space, name, value, search_attrs): try: -- cgit v1.2.3 From ba980f8dff548ec4558ca9c5f20ac6545920debb Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 8 Mar 2024 11:03:19 -0800 Subject: netlink: specs: support generating code for genl socket priv The family struct is auto-generated for new families, support use of the sock_priv_* mechanism added in commit a731132424ad ("genetlink: introduce per-sock family private storage"). For example if the family wants to use struct sk_buff as its private struct (unrealistic but just for illustration), it would add to its spec: kernel-family: headers: [ "linux/skbuff.h" ] sock-priv: struct sk_buff ynl-gen-c will declare the appropriate priv size and hook in function prototypes to be implemented by the family. Link: https://lore.kernel.org/r/20240308190319.2523704-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/genetlink-c.yaml | 19 +++++++++++++++++++ Documentation/netlink/genetlink-legacy.yaml | 19 +++++++++++++++++++ Documentation/netlink/genetlink.yaml | 19 +++++++++++++++++++ tools/net/ynl/lib/nlspec.py | 2 ++ tools/net/ynl/ynl-gen-c.py | 10 ++++++++++ 5 files changed, 69 insertions(+) (limited to 'tools') diff --git a/Documentation/netlink/genetlink-c.yaml b/Documentation/netlink/genetlink-c.yaml index de786e2d35bf..4dfd899a1661 100644 --- a/Documentation/netlink/genetlink-c.yaml +++ b/Documentation/netlink/genetlink-c.yaml @@ -378,3 +378,22 @@ properties: type: string # End genetlink-c flags: *cmd_flags + + kernel-family: + description: Additional global attributes used for kernel C code generation. + type: object + additionalProperties: False + properties: + headers: + description: | + List of extra headers which should be included in the source + of the generated code. + type: array + items: + type: string + sock-priv: + description: | + Literal name of the type which is used within the kernel + to store the socket state. The type / structure is internal + to the kernel, and is not defined in the spec. + type: string diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml index c69de46b189b..b48ad3b1cc32 100644 --- a/Documentation/netlink/genetlink-legacy.yaml +++ b/Documentation/netlink/genetlink-legacy.yaml @@ -439,3 +439,22 @@ properties: type: string # End genetlink-c flags: *cmd_flags + + kernel-family: + description: Additional global attributes used for kernel C code generation. + type: object + additionalProperties: False + properties: + headers: + description: | + List of extra headers which should be included in the source + of the generated code. + type: array + items: + type: string + sock-priv: + description: | + Literal name of the type which is used within the kernel + to store the socket state. The type / structure is internal + to the kernel, and is not defined in the spec. + type: string diff --git a/Documentation/netlink/genetlink.yaml b/Documentation/netlink/genetlink.yaml index d7edb8855563..ebd6ee743fcc 100644 --- a/Documentation/netlink/genetlink.yaml +++ b/Documentation/netlink/genetlink.yaml @@ -328,3 +328,22 @@ properties: The name for the group, used to form the define and the value of the define. type: string flags: *cmd_flags + + kernel-family: + description: Additional global attributes used for kernel C code generation. + type: object + additionalProperties: False + properties: + headers: + description: | + List of extra headers which should be included in the source + of the generated code. + type: array + items: + type: string + sock-priv: + description: | + Literal name of the type which is used within the kernel + to store the socket state. The type / structure is internal + to the kernel, and is not defined in the spec. + type: string diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/lib/nlspec.py index fbce52395b3b..6d08ab9e213f 100644 --- a/tools/net/ynl/lib/nlspec.py +++ b/tools/net/ynl/lib/nlspec.py @@ -418,6 +418,7 @@ class SpecFamily(SpecElement): consts dict of all constants/enums fixed_header string, optional name of family default fixed header struct mcast_groups dict of all multicast groups (index by name) + kernel_family dict of kernel family attributes """ def __init__(self, spec_path, schema_path=None, exclude_ops=None): with open(spec_path, "r") as stream: @@ -461,6 +462,7 @@ class SpecFamily(SpecElement): self.ntfs = collections.OrderedDict() self.consts = collections.OrderedDict() self.mcast_groups = collections.OrderedDict() + self.kernel_family = collections.OrderedDict(self.yaml.get('kernel-family', {})) last_exception = None while len(self._resolution_list) > 0: diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 5bb7ca01fe51..6b7eb2d2aaf1 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -2342,6 +2342,10 @@ def print_kernel_family_struct_hdr(family, cw): cw.p(f"extern struct genl_family {family.c_name}_nl_family;") cw.nl() + if 'sock-priv' in family.kernel_family: + cw.p(f'void {family.c_name}_nl_sock_priv_init({family.kernel_family["sock-priv"]} *priv);') + cw.p(f'void {family.c_name}_nl_sock_priv_destroy({family.kernel_family["sock-priv"]} *priv);') + cw.nl() def print_kernel_family_struct_src(family, cw): @@ -2363,6 +2367,11 @@ def print_kernel_family_struct_src(family, cw): if family.mcgrps['list']: cw.p(f'.mcgrps\t\t= {family.c_name}_nl_mcgrps,') cw.p(f'.n_mcgrps\t= ARRAY_SIZE({family.c_name}_nl_mcgrps),') + if 'sock-priv' in family.kernel_family: + cw.p(f'.sock_priv_size\t= sizeof({family.kernel_family["sock-priv"]}),') + # Force cast here, actual helpers take pointer to the real type. + cw.p(f'.sock_priv_init\t= (void *){family.c_name}_nl_sock_priv_init,') + cw.p(f'.sock_priv_destroy = (void *){family.c_name}_nl_sock_priv_destroy,') cw.block_end(';') @@ -2659,6 +2668,7 @@ def main(): cw.p(f'#include "{os.path.basename(args.out_file[:-2])}.h"') cw.nl() headers = ['uapi/' + parsed.uapi_header] + headers += parsed.kernel_family.get('headers', []) else: cw.p('#include ') cw.p('#include ') -- cgit v1.2.3 From 317460317a02a1af512697e6e964298dedd8a163 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 7 Mar 2024 17:07:59 -0800 Subject: bpf: Introduce bpf_arena. Introduce bpf_arena, which is a sparse shared memory region between the bpf program and user space. Use cases: 1. User space mmap-s bpf_arena and uses it as a traditional mmap-ed anonymous region, like memcached or any key/value storage. The bpf program implements an in-kernel accelerator. XDP prog can search for a key in bpf_arena and return a value without going to user space. 2. The bpf program builds arbitrary data structures in bpf_arena (hash tables, rb-trees, sparse arrays), while user space consumes it. 3. bpf_arena is a "heap" of memory from the bpf program's point of view. The user space may mmap it, but bpf program will not convert pointers to user base at run-time to improve bpf program speed. Initially, the kernel vm_area and user vma are not populated. User space can fault in pages within the range. While servicing a page fault, bpf_arena logic will insert a new page into the kernel and user vmas. The bpf program can allocate pages from that region via bpf_arena_alloc_pages(). This kernel function will insert pages into the kernel vm_area. The subsequent fault-in from user space will populate that page into the user vma. The BPF_F_SEGV_ON_FAULT flag at arena creation time can be used to prevent fault-in from user space. In such a case, if a page is not allocated by the bpf program and not present in the kernel vm_area, the user process will segfault. This is useful for use cases 2 and 3 above. bpf_arena_alloc_pages() is similar to user space mmap(). It allocates pages either at a specific address within the arena or allocates a range with the maple tree. bpf_arena_free_pages() is analogous to munmap(), which frees pages and removes the range from the kernel vm_area and from user process vmas. bpf_arena can be used as a bpf program "heap" of up to 4GB. The speed of bpf program is more important than ease of sharing with user space. This is use case 3. In such a case, the BPF_F_NO_USER_CONV flag is recommended. It will tell the verifier to treat the rX = bpf_arena_cast_user(rY) instruction as a 32-bit move wX = wY, which will improve bpf prog performance. Otherwise, bpf_arena_cast_user is translated by JIT to conditionally add the upper 32 bits of user vm_start (if the pointer is not NULL) to arena pointers before they are stored into memory. This way, user space sees them as valid 64-bit pointers. Diff https://github.com/llvm/llvm-project/pull/84410 enables LLVM BPF backend generate the bpf_addr_space_cast() instruction to cast pointers between address_space(1) which is reserved for bpf_arena pointers and default address space zero. All arena pointers in a bpf program written in C language are tagged as __attribute__((address_space(1))). Hence, clang provides helpful diagnostics when pointers cross address space. Libbpf and the kernel support only address_space == 1. All other address space identifiers are reserved. rX = bpf_addr_space_cast(rY, /* dst_as */ 1, /* src_as */ 0) tells the verifier that rX->type = PTR_TO_ARENA. Any further operations on PTR_TO_ARENA register have to be in the 32-bit domain. The verifier will mark load/store through PTR_TO_ARENA with PROBE_MEM32. JIT will generate them as kern_vm_start + 32bit_addr memory accesses. The behavior is similar to copy_from_kernel_nofault() except that no address checks are necessary. The address is guaranteed to be in the 4GB range. If the page is not present, the destination register is zeroed on read, and the operation is ignored on write. rX = bpf_addr_space_cast(rY, 0, 1) tells the verifier that rX->type = unknown scalar. If arena->map_flags has BPF_F_NO_USER_CONV set, then the verifier converts such cast instructions to mov32. Otherwise, JIT will emit native code equivalent to: rX = (u32)rY; if (rY) rX |= clear_lo32_bits(arena->user_vm_start); /* replace hi32 bits in rX */ After such conversion, the pointer becomes a valid user pointer within bpf_arena range. The user process can access data structures created in bpf_arena without any additional computations. For example, a linked list built by a bpf program can be walked natively by user space. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Reviewed-by: Barret Rhoden Link: https://lore.kernel.org/bpf/20240308010812.89848-2-alexei.starovoitov@gmail.com --- include/linux/bpf.h | 7 +- include/linux/bpf_types.h | 1 + include/uapi/linux/bpf.h | 10 + kernel/bpf/Makefile | 3 + kernel/bpf/arena.c | 558 +++++++++++++++++++++++++++++++++++++++++ kernel/bpf/core.c | 11 + kernel/bpf/syscall.c | 36 +++ kernel/bpf/verifier.c | 1 + tools/include/uapi/linux/bpf.h | 10 + 9 files changed, 635 insertions(+), 2 deletions(-) create mode 100644 kernel/bpf/arena.c (limited to 'tools') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 95e07673cdc1..ea6ab6e0eef9 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -37,6 +37,7 @@ struct perf_event; struct bpf_prog; struct bpf_prog_aux; struct bpf_map; +struct bpf_arena; struct sock; struct seq_file; struct btf; @@ -528,8 +529,8 @@ void bpf_list_head_free(const struct btf_field *field, void *list_head, struct bpf_spin_lock *spin_lock); void bpf_rb_root_free(const struct btf_field *field, void *rb_root, struct bpf_spin_lock *spin_lock); - - +u64 bpf_arena_get_kern_vm_start(struct bpf_arena *arena); +u64 bpf_arena_get_user_vm_start(struct bpf_arena *arena); int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size); struct bpf_offload_dev; @@ -2215,6 +2216,8 @@ int generic_map_delete_batch(struct bpf_map *map, struct bpf_map *bpf_map_get_curr_or_next(u32 *id); struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id); +int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid, + unsigned long nr_pages, struct page **page_array); #ifdef CONFIG_MEMCG_KMEM void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, int node); diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 94baced5a1ad..9f2a6b83b49e 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -132,6 +132,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_RINGBUF, ringbuf_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_BLOOM_FILTER, bloom_filter_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_USER_RINGBUF, user_ringbuf_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_ARENA, arena_map_ops) BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint) BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 85ec7fc799d7..e30d943db8a4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1009,6 +1009,7 @@ enum bpf_map_type { BPF_MAP_TYPE_BLOOM_FILTER, BPF_MAP_TYPE_USER_RINGBUF, BPF_MAP_TYPE_CGRP_STORAGE, + BPF_MAP_TYPE_ARENA, __MAX_BPF_MAP_TYPE }; @@ -1396,6 +1397,12 @@ enum { /* BPF token FD is passed in a corresponding command's token_fd field */ BPF_F_TOKEN_FD = (1U << 16), + +/* When user space page faults in bpf_arena send SIGSEGV instead of inserting new page */ + BPF_F_SEGV_ON_FAULT = (1U << 17), + +/* Do not translate kernel bpf_arena pointers to user pointers */ + BPF_F_NO_USER_CONV = (1U << 18), }; /* Flags for BPF_PROG_QUERY. */ @@ -1467,6 +1474,9 @@ union bpf_attr { * BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the * number of hash functions (if 0, the bloom filter will default * to using 5 hash functions). + * + * BPF_MAP_TYPE_ARENA - contains the address where user space + * is going to mmap() the arena. It has to be page aligned. */ __u64 map_extra; diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 4ce95acfcaa7..368c5d86b5b7 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -15,6 +15,9 @@ obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o obj-$(CONFIG_BPF_SYSCALL) += disasm.o mprog.o obj-$(CONFIG_BPF_JIT) += trampoline.o obj-$(CONFIG_BPF_SYSCALL) += btf.o memalloc.o +ifeq ($(CONFIG_MMU)$(CONFIG_64BIT),yy) +obj-$(CONFIG_BPF_SYSCALL) += arena.o +endif obj-$(CONFIG_BPF_JIT) += dispatcher.o ifeq ($(CONFIG_NET),y) obj-$(CONFIG_BPF_SYSCALL) += devmap.o diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c new file mode 100644 index 000000000000..86571e760dd6 --- /dev/null +++ b/kernel/bpf/arena.c @@ -0,0 +1,558 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include +#include +#include +#include +#include +#include + +/* + * bpf_arena is a sparsely populated shared memory region between bpf program and + * user space process. + * + * For example on x86-64 the values could be: + * user_vm_start 7f7d26200000 // picked by mmap() + * kern_vm_start ffffc90001e69000 // picked by get_vm_area() + * For user space all pointers within the arena are normal 8-byte addresses. + * In this example 7f7d26200000 is the address of the first page (pgoff=0). + * The bpf program will access it as: kern_vm_start + lower_32bit_of_user_ptr + * (u32)7f7d26200000 -> 26200000 + * hence + * ffffc90001e69000 + 26200000 == ffffc90028069000 is "pgoff=0" within 4Gb + * kernel memory region. + * + * BPF JITs generate the following code to access arena: + * mov eax, eax // eax has lower 32-bit of user pointer + * mov word ptr [rax + r12 + off], bx + * where r12 == kern_vm_start and off is s16. + * Hence allocate 4Gb + GUARD_SZ/2 on each side. + * + * Initially kernel vm_area and user vma are not populated. + * User space can fault-in any address which will insert the page + * into kernel and user vma. + * bpf program can allocate a page via bpf_arena_alloc_pages() kfunc + * which will insert it into kernel vm_area. + * The later fault-in from user space will populate that page into user vma. + */ + +/* number of bytes addressable by LDX/STX insn with 16-bit 'off' field */ +#define GUARD_SZ (1ull << sizeof(((struct bpf_insn *)0)->off) * 8) +#define KERN_VM_SZ ((1ull << 32) + GUARD_SZ) + +struct bpf_arena { + struct bpf_map map; + u64 user_vm_start; + u64 user_vm_end; + struct vm_struct *kern_vm; + struct maple_tree mt; + struct list_head vma_list; + struct mutex lock; +}; + +u64 bpf_arena_get_kern_vm_start(struct bpf_arena *arena) +{ + return arena ? (u64) (long) arena->kern_vm->addr + GUARD_SZ / 2 : 0; +} + +u64 bpf_arena_get_user_vm_start(struct bpf_arena *arena) +{ + return arena ? arena->user_vm_start : 0; +} + +static long arena_map_peek_elem(struct bpf_map *map, void *value) +{ + return -EOPNOTSUPP; +} + +static long arena_map_push_elem(struct bpf_map *map, void *value, u64 flags) +{ + return -EOPNOTSUPP; +} + +static long arena_map_pop_elem(struct bpf_map *map, void *value) +{ + return -EOPNOTSUPP; +} + +static long arena_map_delete_elem(struct bpf_map *map, void *value) +{ + return -EOPNOTSUPP; +} + +static int arena_map_get_next_key(struct bpf_map *map, void *key, void *next_key) +{ + return -EOPNOTSUPP; +} + +static long compute_pgoff(struct bpf_arena *arena, long uaddr) +{ + return (u32)(uaddr - (u32)arena->user_vm_start) >> PAGE_SHIFT; +} + +static struct bpf_map *arena_map_alloc(union bpf_attr *attr) +{ + struct vm_struct *kern_vm; + int numa_node = bpf_map_attr_numa_node(attr); + struct bpf_arena *arena; + u64 vm_range; + int err = -ENOMEM; + + if (attr->key_size || attr->value_size || attr->max_entries == 0 || + /* BPF_F_MMAPABLE must be set */ + !(attr->map_flags & BPF_F_MMAPABLE) || + /* No unsupported flags present */ + (attr->map_flags & ~(BPF_F_SEGV_ON_FAULT | BPF_F_MMAPABLE | BPF_F_NO_USER_CONV))) + return ERR_PTR(-EINVAL); + + if (attr->map_extra & ~PAGE_MASK) + /* If non-zero the map_extra is an expected user VMA start address */ + return ERR_PTR(-EINVAL); + + vm_range = (u64)attr->max_entries * PAGE_SIZE; + if (vm_range > (1ull << 32)) + return ERR_PTR(-E2BIG); + + if ((attr->map_extra >> 32) != ((attr->map_extra + vm_range - 1) >> 32)) + /* user vma must not cross 32-bit boundary */ + return ERR_PTR(-ERANGE); + + kern_vm = get_vm_area(KERN_VM_SZ, VM_SPARSE | VM_USERMAP); + if (!kern_vm) + return ERR_PTR(-ENOMEM); + + arena = bpf_map_area_alloc(sizeof(*arena), numa_node); + if (!arena) + goto err; + + arena->kern_vm = kern_vm; + arena->user_vm_start = attr->map_extra; + if (arena->user_vm_start) + arena->user_vm_end = arena->user_vm_start + vm_range; + + INIT_LIST_HEAD(&arena->vma_list); + bpf_map_init_from_attr(&arena->map, attr); + mt_init_flags(&arena->mt, MT_FLAGS_ALLOC_RANGE); + mutex_init(&arena->lock); + + return &arena->map; +err: + free_vm_area(kern_vm); + return ERR_PTR(err); +} + +static int existing_page_cb(pte_t *ptep, unsigned long addr, void *data) +{ + struct page *page; + pte_t pte; + + pte = ptep_get(ptep); + if (!pte_present(pte)) /* sanity check */ + return 0; + page = pte_page(pte); + /* + * We do not update pte here: + * 1. Nobody should be accessing bpf_arena's range outside of a kernel bug + * 2. TLB flushing is batched or deferred. Even if we clear pte, + * the TLB entries can stick around and continue to permit access to + * the freed page. So it all relies on 1. + */ + __free_page(page); + return 0; +} + +static void arena_map_free(struct bpf_map *map) +{ + struct bpf_arena *arena = container_of(map, struct bpf_arena, map); + + /* + * Check that user vma-s are not around when bpf map is freed. + * mmap() holds vm_file which holds bpf_map refcnt. + * munmap() must have happened on vma followed by arena_vm_close() + * which would clear arena->vma_list. + */ + if (WARN_ON_ONCE(!list_empty(&arena->vma_list))) + return; + + /* + * free_vm_area() calls remove_vm_area() that calls free_unmap_vmap_area(). + * It unmaps everything from vmalloc area and clears pgtables. + * Call apply_to_existing_page_range() first to find populated ptes and + * free those pages. + */ + apply_to_existing_page_range(&init_mm, bpf_arena_get_kern_vm_start(arena), + KERN_VM_SZ - GUARD_SZ, existing_page_cb, NULL); + free_vm_area(arena->kern_vm); + mtree_destroy(&arena->mt); + bpf_map_area_free(arena); +} + +static void *arena_map_lookup_elem(struct bpf_map *map, void *key) +{ + return ERR_PTR(-EINVAL); +} + +static long arena_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 flags) +{ + return -EOPNOTSUPP; +} + +static int arena_map_check_btf(const struct bpf_map *map, const struct btf *btf, + const struct btf_type *key_type, const struct btf_type *value_type) +{ + return 0; +} + +static u64 arena_map_mem_usage(const struct bpf_map *map) +{ + return 0; +} + +struct vma_list { + struct vm_area_struct *vma; + struct list_head head; +}; + +static int remember_vma(struct bpf_arena *arena, struct vm_area_struct *vma) +{ + struct vma_list *vml; + + vml = kmalloc(sizeof(*vml), GFP_KERNEL); + if (!vml) + return -ENOMEM; + vma->vm_private_data = vml; + vml->vma = vma; + list_add(&vml->head, &arena->vma_list); + return 0; +} + +static void arena_vm_close(struct vm_area_struct *vma) +{ + struct bpf_map *map = vma->vm_file->private_data; + struct bpf_arena *arena = container_of(map, struct bpf_arena, map); + struct vma_list *vml; + + guard(mutex)(&arena->lock); + vml = vma->vm_private_data; + list_del(&vml->head); + vma->vm_private_data = NULL; + kfree(vml); +} + +#define MT_ENTRY ((void *)&arena_map_ops) /* unused. has to be valid pointer */ + +static vm_fault_t arena_vm_fault(struct vm_fault *vmf) +{ + struct bpf_map *map = vmf->vma->vm_file->private_data; + struct bpf_arena *arena = container_of(map, struct bpf_arena, map); + struct page *page; + long kbase, kaddr; + int ret; + + kbase = bpf_arena_get_kern_vm_start(arena); + kaddr = kbase + (u32)(vmf->address & PAGE_MASK); + + guard(mutex)(&arena->lock); + page = vmalloc_to_page((void *)kaddr); + if (page) + /* already have a page vmap-ed */ + goto out; + + if (arena->map.map_flags & BPF_F_SEGV_ON_FAULT) + /* User space requested to segfault when page is not allocated by bpf prog */ + return VM_FAULT_SIGSEGV; + + ret = mtree_insert(&arena->mt, vmf->pgoff, MT_ENTRY, GFP_KERNEL); + if (ret) + return VM_FAULT_SIGSEGV; + + /* Account into memcg of the process that created bpf_arena */ + ret = bpf_map_alloc_pages(map, GFP_KERNEL | __GFP_ZERO, NUMA_NO_NODE, 1, &page); + if (ret) { + mtree_erase(&arena->mt, vmf->pgoff); + return VM_FAULT_SIGSEGV; + } + + ret = vm_area_map_pages(arena->kern_vm, kaddr, kaddr + PAGE_SIZE, &page); + if (ret) { + mtree_erase(&arena->mt, vmf->pgoff); + __free_page(page); + return VM_FAULT_SIGSEGV; + } +out: + page_ref_add(page, 1); + vmf->page = page; + return 0; +} + +static const struct vm_operations_struct arena_vm_ops = { + .close = arena_vm_close, + .fault = arena_vm_fault, +}; + +static unsigned long arena_get_unmapped_area(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + struct bpf_map *map = filp->private_data; + struct bpf_arena *arena = container_of(map, struct bpf_arena, map); + long ret; + + if (pgoff) + return -EINVAL; + if (len > (1ull << 32)) + return -E2BIG; + + /* if user_vm_start was specified at arena creation time */ + if (arena->user_vm_start) { + if (len > arena->user_vm_end - arena->user_vm_start) + return -E2BIG; + if (len != arena->user_vm_end - arena->user_vm_start) + return -EINVAL; + if (addr != arena->user_vm_start) + return -EINVAL; + } + + ret = current->mm->get_unmapped_area(filp, addr, len * 2, 0, flags); + if (IS_ERR_VALUE(ret)) + return ret; + if ((ret >> 32) == ((ret + len - 1) >> 32)) + return ret; + if (WARN_ON_ONCE(arena->user_vm_start)) + /* checks at map creation time should prevent this */ + return -EFAULT; + return round_up(ret, 1ull << 32); +} + +static int arena_map_mmap(struct bpf_map *map, struct vm_area_struct *vma) +{ + struct bpf_arena *arena = container_of(map, struct bpf_arena, map); + + guard(mutex)(&arena->lock); + if (arena->user_vm_start && arena->user_vm_start != vma->vm_start) + /* + * If map_extra was not specified at arena creation time then + * 1st user process can do mmap(NULL, ...) to pick user_vm_start + * 2nd user process must pass the same addr to mmap(addr, MAP_FIXED..); + * or + * specify addr in map_extra and + * use the same addr later with mmap(addr, MAP_FIXED..); + */ + return -EBUSY; + + if (arena->user_vm_end && arena->user_vm_end != vma->vm_end) + /* all user processes must have the same size of mmap-ed region */ + return -EBUSY; + + /* Earlier checks should prevent this */ + if (WARN_ON_ONCE(vma->vm_end - vma->vm_start > (1ull << 32) || vma->vm_pgoff)) + return -EFAULT; + + if (remember_vma(arena, vma)) + return -ENOMEM; + + arena->user_vm_start = vma->vm_start; + arena->user_vm_end = vma->vm_end; + /* + * bpf_map_mmap() checks that it's being mmaped as VM_SHARED and + * clears VM_MAYEXEC. Set VM_DONTEXPAND as well to avoid + * potential change of user_vm_start. + */ + vm_flags_set(vma, VM_DONTEXPAND); + vma->vm_ops = &arena_vm_ops; + return 0; +} + +static int arena_map_direct_value_addr(const struct bpf_map *map, u64 *imm, u32 off) +{ + struct bpf_arena *arena = container_of(map, struct bpf_arena, map); + + if ((u64)off > arena->user_vm_end - arena->user_vm_start) + return -ERANGE; + *imm = (unsigned long)arena->user_vm_start; + return 0; +} + +BTF_ID_LIST_SINGLE(bpf_arena_map_btf_ids, struct, bpf_arena) +const struct bpf_map_ops arena_map_ops = { + .map_meta_equal = bpf_map_meta_equal, + .map_alloc = arena_map_alloc, + .map_free = arena_map_free, + .map_direct_value_addr = arena_map_direct_value_addr, + .map_mmap = arena_map_mmap, + .map_get_unmapped_area = arena_get_unmapped_area, + .map_get_next_key = arena_map_get_next_key, + .map_push_elem = arena_map_push_elem, + .map_peek_elem = arena_map_peek_elem, + .map_pop_elem = arena_map_pop_elem, + .map_lookup_elem = arena_map_lookup_elem, + .map_update_elem = arena_map_update_elem, + .map_delete_elem = arena_map_delete_elem, + .map_check_btf = arena_map_check_btf, + .map_mem_usage = arena_map_mem_usage, + .map_btf_id = &bpf_arena_map_btf_ids[0], +}; + +static u64 clear_lo32(u64 val) +{ + return val & ~(u64)~0U; +} + +/* + * Allocate pages and vmap them into kernel vmalloc area. + * Later the pages will be mmaped into user space vma. + */ +static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt, int node_id) +{ + /* user_vm_end/start are fixed before bpf prog runs */ + long page_cnt_max = (arena->user_vm_end - arena->user_vm_start) >> PAGE_SHIFT; + u64 kern_vm_start = bpf_arena_get_kern_vm_start(arena); + struct page **pages; + long pgoff = 0; + u32 uaddr32; + int ret, i; + + if (page_cnt > page_cnt_max) + return 0; + + if (uaddr) { + if (uaddr & ~PAGE_MASK) + return 0; + pgoff = compute_pgoff(arena, uaddr); + if (pgoff + page_cnt > page_cnt_max) + /* requested address will be outside of user VMA */ + return 0; + } + + /* zeroing is needed, since alloc_pages_bulk_array() only fills in non-zero entries */ + pages = kvcalloc(page_cnt, sizeof(struct page *), GFP_KERNEL); + if (!pages) + return 0; + + guard(mutex)(&arena->lock); + + if (uaddr) + ret = mtree_insert_range(&arena->mt, pgoff, pgoff + page_cnt - 1, + MT_ENTRY, GFP_KERNEL); + else + ret = mtree_alloc_range(&arena->mt, &pgoff, MT_ENTRY, + page_cnt, 0, page_cnt_max - 1, GFP_KERNEL); + if (ret) + goto out_free_pages; + + ret = bpf_map_alloc_pages(&arena->map, GFP_KERNEL | __GFP_ZERO, + node_id, page_cnt, pages); + if (ret) + goto out; + + uaddr32 = (u32)(arena->user_vm_start + pgoff * PAGE_SIZE); + /* Earlier checks make sure that uaddr32 + page_cnt * PAGE_SIZE will not overflow 32-bit */ + ret = vm_area_map_pages(arena->kern_vm, kern_vm_start + uaddr32, + kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE, pages); + if (ret) { + for (i = 0; i < page_cnt; i++) + __free_page(pages[i]); + goto out; + } + kvfree(pages); + return clear_lo32(arena->user_vm_start) + uaddr32; +out: + mtree_erase(&arena->mt, pgoff); +out_free_pages: + kvfree(pages); + return 0; +} + +/* + * If page is present in vmalloc area, unmap it from vmalloc area, + * unmap it from all user space vma-s, + * and free it. + */ +static void zap_pages(struct bpf_arena *arena, long uaddr, long page_cnt) +{ + struct vma_list *vml; + + list_for_each_entry(vml, &arena->vma_list, head) + zap_page_range_single(vml->vma, uaddr, + PAGE_SIZE * page_cnt, NULL); +} + +static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt) +{ + u64 full_uaddr, uaddr_end; + long kaddr, pgoff, i; + struct page *page; + + /* only aligned lower 32-bit are relevant */ + uaddr = (u32)uaddr; + uaddr &= PAGE_MASK; + full_uaddr = clear_lo32(arena->user_vm_start) + uaddr; + uaddr_end = min(arena->user_vm_end, full_uaddr + (page_cnt << PAGE_SHIFT)); + if (full_uaddr >= uaddr_end) + return; + + page_cnt = (uaddr_end - full_uaddr) >> PAGE_SHIFT; + + guard(mutex)(&arena->lock); + + pgoff = compute_pgoff(arena, uaddr); + /* clear range */ + mtree_store_range(&arena->mt, pgoff, pgoff + page_cnt - 1, NULL, GFP_KERNEL); + + if (page_cnt > 1) + /* bulk zap if multiple pages being freed */ + zap_pages(arena, full_uaddr, page_cnt); + + kaddr = bpf_arena_get_kern_vm_start(arena) + uaddr; + for (i = 0; i < page_cnt; i++, kaddr += PAGE_SIZE, full_uaddr += PAGE_SIZE) { + page = vmalloc_to_page((void *)kaddr); + if (!page) + continue; + if (page_cnt == 1 && page_mapped(page)) /* mapped by some user process */ + zap_pages(arena, full_uaddr, 1); + vm_area_unmap_pages(arena->kern_vm, kaddr, kaddr + PAGE_SIZE); + __free_page(page); + } +} + +__bpf_kfunc_start_defs(); + +__bpf_kfunc void *bpf_arena_alloc_pages(void *p__map, void *addr__ign, u32 page_cnt, + int node_id, u64 flags) +{ + struct bpf_map *map = p__map; + struct bpf_arena *arena = container_of(map, struct bpf_arena, map); + + if (map->map_type != BPF_MAP_TYPE_ARENA || flags || !page_cnt) + return NULL; + + return (void *)arena_alloc_pages(arena, (long)addr__ign, page_cnt, node_id); +} + +__bpf_kfunc void bpf_arena_free_pages(void *p__map, void *ptr__ign, u32 page_cnt) +{ + struct bpf_map *map = p__map; + struct bpf_arena *arena = container_of(map, struct bpf_arena, map); + + if (map->map_type != BPF_MAP_TYPE_ARENA || !page_cnt || !ptr__ign) + return; + arena_free_pages(arena, (long)ptr__ign, page_cnt); +} +__bpf_kfunc_end_defs(); + +BTF_KFUNCS_START(arena_kfuncs) +BTF_ID_FLAGS(func, bpf_arena_alloc_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_arena_free_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE) +BTF_KFUNCS_END(arena_kfuncs) + +static const struct btf_kfunc_id_set common_kfunc_set = { + .owner = THIS_MODULE, + .set = &arena_kfuncs, +}; + +static int __init kfunc_init(void) +{ + return register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &common_kfunc_set); +} +late_initcall(kfunc_init); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 134b7979f537..a8ecf69c7754 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2976,6 +2976,17 @@ void __weak arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, { } +/* for configs without MMU or 32-bit */ +__weak const struct bpf_map_ops arena_map_ops; +__weak u64 bpf_arena_get_user_vm_start(struct bpf_arena *arena) +{ + return 0; +} +__weak u64 bpf_arena_get_kern_vm_start(struct bpf_arena *arena) +{ + return 0; +} + #ifdef CONFIG_BPF_SYSCALL static int __init bpf_global_ma_init(void) { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index f63f4da4db5e..67923e41a07e 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -164,6 +164,7 @@ static int bpf_map_update_value(struct bpf_map *map, struct file *map_file, if (bpf_map_is_offloaded(map)) { return bpf_map_offload_update_elem(map, key, value, flags); } else if (map->map_type == BPF_MAP_TYPE_CPUMAP || + map->map_type == BPF_MAP_TYPE_ARENA || map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { return map->ops->map_update_elem(map, key, value, flags); } else if (map->map_type == BPF_MAP_TYPE_SOCKHASH || @@ -479,6 +480,39 @@ static void bpf_map_release_memcg(struct bpf_map *map) } #endif +int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid, + unsigned long nr_pages, struct page **pages) +{ + unsigned long i, j; + struct page *pg; + int ret = 0; +#ifdef CONFIG_MEMCG_KMEM + struct mem_cgroup *memcg, *old_memcg; + + memcg = bpf_map_get_memcg(map); + old_memcg = set_active_memcg(memcg); +#endif + for (i = 0; i < nr_pages; i++) { + pg = alloc_pages_node(nid, gfp | __GFP_ACCOUNT, 0); + + if (pg) { + pages[i] = pg; + continue; + } + for (j = 0; j < i; j++) + __free_page(pages[j]); + ret = -ENOMEM; + break; + } + +#ifdef CONFIG_MEMCG_KMEM + set_active_memcg(old_memcg); + mem_cgroup_put(memcg); +#endif + return ret; +} + + static int btf_field_cmp(const void *a, const void *b) { const struct btf_field *f1 = a, *f2 = b; @@ -1176,6 +1210,7 @@ static int map_create(union bpf_attr *attr) } if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER && + attr->map_type != BPF_MAP_TYPE_ARENA && attr->map_extra != 0) return -EINVAL; @@ -1265,6 +1300,7 @@ static int map_create(union bpf_attr *attr) case BPF_MAP_TYPE_LRU_PERCPU_HASH: case BPF_MAP_TYPE_STRUCT_OPS: case BPF_MAP_TYPE_CPUMAP: + case BPF_MAP_TYPE_ARENA: if (!bpf_token_capable(token, CAP_BPF)) goto put_token; break; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index bf084c693507..fbcf2e5e635a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -18108,6 +18108,7 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, case BPF_MAP_TYPE_CGRP_STORAGE: case BPF_MAP_TYPE_QUEUE: case BPF_MAP_TYPE_STACK: + case BPF_MAP_TYPE_ARENA: break; default: verbose(env, diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 85ec7fc799d7..e30d943db8a4 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1009,6 +1009,7 @@ enum bpf_map_type { BPF_MAP_TYPE_BLOOM_FILTER, BPF_MAP_TYPE_USER_RINGBUF, BPF_MAP_TYPE_CGRP_STORAGE, + BPF_MAP_TYPE_ARENA, __MAX_BPF_MAP_TYPE }; @@ -1396,6 +1397,12 @@ enum { /* BPF token FD is passed in a corresponding command's token_fd field */ BPF_F_TOKEN_FD = (1U << 16), + +/* When user space page faults in bpf_arena send SIGSEGV instead of inserting new page */ + BPF_F_SEGV_ON_FAULT = (1U << 17), + +/* Do not translate kernel bpf_arena pointers to user pointers */ + BPF_F_NO_USER_CONV = (1U << 18), }; /* Flags for BPF_PROG_QUERY. */ @@ -1467,6 +1474,9 @@ union bpf_attr { * BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the * number of hash functions (if 0, the bloom filter will default * to using 5 hash functions). + * + * BPF_MAP_TYPE_ARENA - contains the address where user space + * is going to mmap() the arena. It has to be page aligned. */ __u64 map_extra; -- cgit v1.2.3 From 667a86ad9b71d934c444eec193cf3508016f35c5 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 7 Mar 2024 17:08:00 -0800 Subject: bpf: Disasm support for addr_space_cast instruction. LLVM generates rX = addr_space_cast(rY, dst_addr_space, src_addr_space) instruction when pointers in non-zero address space are used by the bpf program. Recognize this insn in uapi and in bpf disassembler. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/bpf/20240308010812.89848-3-alexei.starovoitov@gmail.com --- include/uapi/linux/bpf.h | 4 ++++ kernel/bpf/disasm.c | 10 ++++++++++ tools/include/uapi/linux/bpf.h | 4 ++++ 3 files changed, 18 insertions(+) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e30d943db8a4..3c42b9f1bada 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1339,6 +1339,10 @@ enum { */ #define BPF_PSEUDO_KFUNC_CALL 2 +enum bpf_addr_space_cast { + BPF_ADDR_SPACE_CAST = 1, +}; + /* flags for BPF_MAP_UPDATE_ELEM command */ enum { BPF_ANY = 0, /* create new element or update existing */ diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c index 82b2dbdd048f..bd2e2dd04740 100644 --- a/kernel/bpf/disasm.c +++ b/kernel/bpf/disasm.c @@ -166,6 +166,12 @@ static bool is_movsx(const struct bpf_insn *insn) (insn->off == 8 || insn->off == 16 || insn->off == 32); } +static bool is_addr_space_cast(const struct bpf_insn *insn) +{ + return insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) && + insn->off == BPF_ADDR_SPACE_CAST; +} + void print_bpf_insn(const struct bpf_insn_cbs *cbs, const struct bpf_insn *insn, bool allow_ptr_leaks) @@ -184,6 +190,10 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs, insn->code, class == BPF_ALU ? 'w' : 'r', insn->dst_reg, class == BPF_ALU ? 'w' : 'r', insn->dst_reg); + } else if (is_addr_space_cast(insn)) { + verbose(cbs->private_data, "(%02x) r%d = addr_space_cast(r%d, %d, %d)\n", + insn->code, insn->dst_reg, + insn->src_reg, ((u32)insn->imm) >> 16, (u16)insn->imm); } else if (BPF_SRC(insn->code) == BPF_X) { verbose(cbs->private_data, "(%02x) %c%d %s %s%c%d\n", insn->code, class == BPF_ALU ? 'w' : 'r', diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e30d943db8a4..3c42b9f1bada 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1339,6 +1339,10 @@ enum { */ #define BPF_PSEUDO_KFUNC_CALL 2 +enum bpf_addr_space_cast { + BPF_ADDR_SPACE_CAST = 1, +}; + /* flags for BPF_MAP_UPDATE_ELEM command */ enum { BPF_ANY = 0, /* create new element or update existing */ -- cgit v1.2.3 From 4d2b56081c32cb33364745da434b88eeaa9d8d8d Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 7 Mar 2024 17:08:05 -0800 Subject: libbpf: Add __arg_arena to bpf_helpers.h Add __arg_arena to bpf_helpers.h Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: Kumar Kartikeya Dwivedi Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240308010812.89848-8-alexei.starovoitov@gmail.com --- tools/lib/bpf/bpf_helpers.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 112b1504e072..cd17f6d0791f 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -193,6 +193,7 @@ enum libbpf_tristate { #define __arg_nonnull __attribute((btf_decl_tag("arg:nonnull"))) #define __arg_nullable __attribute((btf_decl_tag("arg:nullable"))) #define __arg_trusted __attribute((btf_decl_tag("arg:trusted"))) +#define __arg_arena __attribute((btf_decl_tag("arg:arena"))) #ifndef ___bpf_concat #define ___bpf_concat(a, b) a ## b -- cgit v1.2.3 From 79ff13e99169ddb0e2277e046dbfb112f77dfac5 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 7 Mar 2024 17:08:06 -0800 Subject: libbpf: Add support for bpf_arena. mmap() bpf_arena right after creation, since the kernel needs to remember the address returned from mmap. This is user_vm_start. LLVM will generate bpf_arena_cast_user() instructions where necessary and JIT will add upper 32-bit of user_vm_start to such pointers. Fix up bpf_map_mmap_sz() to compute mmap size as map->value_size * map->max_entries for arrays and PAGE_SIZE * map->max_entries for arena. Don't set BTF at arena creation time, since it doesn't support it. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240308010812.89848-9-alexei.starovoitov@gmail.com --- tools/lib/bpf/libbpf.c | 47 +++++++++++++++++++++++++++++++++++-------- tools/lib/bpf/libbpf_probes.c | 7 +++++++ 2 files changed, 46 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 567ad367e7aa..34b722071874 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -185,6 +185,7 @@ static const char * const map_type_name[] = { [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter", [BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf", [BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage", + [BPF_MAP_TYPE_ARENA] = "arena", }; static const char * const prog_type_name[] = { @@ -1684,7 +1685,7 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) return map; } -static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) +static size_t array_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) { const long page_sz = sysconf(_SC_PAGE_SIZE); size_t map_sz; @@ -1694,6 +1695,20 @@ static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) return map_sz; } +static size_t bpf_map_mmap_sz(const struct bpf_map *map) +{ + const long page_sz = sysconf(_SC_PAGE_SIZE); + + switch (map->def.type) { + case BPF_MAP_TYPE_ARRAY: + return array_map_mmap_sz(map->def.value_size, map->def.max_entries); + case BPF_MAP_TYPE_ARENA: + return page_sz * map->def.max_entries; + default: + return 0; /* not supported */ + } +} + static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz) { void *mmaped; @@ -1847,7 +1862,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", map->name, map->sec_idx, map->sec_offset, def->map_flags); - mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); + mmap_sz = bpf_map_mmap_sz(map); map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); if (map->mmaped == MAP_FAILED) { @@ -5017,6 +5032,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b case BPF_MAP_TYPE_SOCKHASH: case BPF_MAP_TYPE_QUEUE: case BPF_MAP_TYPE_STACK: + case BPF_MAP_TYPE_ARENA: create_attr.btf_fd = 0; create_attr.btf_key_type_id = 0; create_attr.btf_value_type_id = 0; @@ -5261,7 +5277,19 @@ retry: if (err < 0) goto err_out; } - + if (map->def.type == BPF_MAP_TYPE_ARENA) { + map->mmaped = mmap((void *)map->map_extra, bpf_map_mmap_sz(map), + PROT_READ | PROT_WRITE, + map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED, + map->fd, 0); + if (map->mmaped == MAP_FAILED) { + err = -errno; + map->mmaped = NULL; + pr_warn("map '%s': failed to mmap arena: %d\n", + map->name, err); + return err; + } + } if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) { err = init_map_in_map_slots(obj, map); if (err < 0) @@ -8761,7 +8789,7 @@ static void bpf_map__destroy(struct bpf_map *map) if (map->mmaped) { size_t mmap_sz; - mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); + mmap_sz = bpf_map_mmap_sz(map); munmap(map->mmaped, mmap_sz); map->mmaped = NULL; } @@ -9995,11 +10023,14 @@ int bpf_map__set_value_size(struct bpf_map *map, __u32 size) return libbpf_err(-EBUSY); if (map->mmaped) { - int err; size_t mmap_old_sz, mmap_new_sz; + int err; + + if (map->def.type != BPF_MAP_TYPE_ARRAY) + return -EOPNOTSUPP; - mmap_old_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); - mmap_new_sz = bpf_map_mmap_sz(size, map->def.max_entries); + mmap_old_sz = bpf_map_mmap_sz(map); + mmap_new_sz = array_map_mmap_sz(size, map->def.max_entries); err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz); if (err) { pr_warn("map '%s': failed to resize memory-mapped region: %d\n", @@ -13530,7 +13561,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s) for (i = 0; i < s->map_cnt; i++) { struct bpf_map *map = *s->maps[i].map; - size_t mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); + size_t mmap_sz = bpf_map_mmap_sz(map); int prot, map_fd = map->fd; void **mmaped = s->maps[i].mmaped; diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index ee9b1dbea9eb..302188122439 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -338,6 +338,13 @@ static int probe_map_create(enum bpf_map_type map_type) key_size = 0; max_entries = 1; break; + case BPF_MAP_TYPE_ARENA: + key_size = 0; + value_size = 0; + max_entries = 1; /* one page */ + opts.map_extra = 0; /* can mmap() at any address */ + opts.map_flags = BPF_F_MMAPABLE; + break; case BPF_MAP_TYPE_HASH: case BPF_MAP_TYPE_ARRAY: case BPF_MAP_TYPE_PROG_ARRAY: -- cgit v1.2.3 From eed512e8ac64339cfc69da1a6a4b60982cb502ca Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 7 Mar 2024 17:08:07 -0800 Subject: bpftool: Recognize arena map type Teach bpftool to recognize arena map type. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20240308010812.89848-10-alexei.starovoitov@gmail.com --- tools/bpf/bpftool/Documentation/bpftool-map.rst | 2 +- tools/bpf/bpftool/map.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 3b7ba037af95..9d6a314dfd7a 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -55,7 +55,7 @@ MAP COMMANDS | | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash** | | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage** | | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage** -| | **task_storage** | **bloom_filter** | **user_ringbuf** | **cgrp_storage** } +| | **task_storage** | **bloom_filter** | **user_ringbuf** | **cgrp_storage** | **arena** } DESCRIPTION =========== diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index f98f7bbea2b1..b89bd792c1d5 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -1463,7 +1463,7 @@ static int do_help(int argc, char **argv) " devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n" " cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n" " queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n" - " task_storage | bloom_filter | user_ringbuf | cgrp_storage }\n" + " task_storage | bloom_filter | user_ringbuf | cgrp_storage | arena }\n" " " HELP_SPEC_OPTIONS " |\n" " {-f|--bpffs} | {-n|--nomount} }\n" "", -- cgit v1.2.3 From 2e7ba4f8fd1fa879b37db0b738c23ba2af8292ee Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 7 Mar 2024 17:08:08 -0800 Subject: libbpf: Recognize __arena global variables. LLVM automatically places __arena variables into ".arena.1" ELF section. In order to use such global variables bpf program must include definition of arena map in ".maps" section, like: struct { __uint(type, BPF_MAP_TYPE_ARENA); __uint(map_flags, BPF_F_MMAPABLE); __uint(max_entries, 1000); /* number of pages */ __ulong(map_extra, 2ull << 44); /* start of mmap() region */ } arena SEC(".maps"); libbpf recognizes both uses of arena and creates single `struct bpf_map *` instance in libbpf APIs. ".arena.1" ELF section data is used as initial data image, which is exposed through skeleton and bpf_map__initial_value() to the user, if they need to tune it before the load phase. During load phase, this initial image is copied over into mmap()'ed region corresponding to arena, and discarded. Few small checks here and there had to be added to make sure this approach works with bpf_map__initial_value(), mostly due to hard-coded assumption that map->mmaped is set up with mmap() syscall and should be munmap()'ed. For arena, .arena.1 can be (much) smaller than maximum arena size, so this smaller data size has to be tracked separately. Given it is enforced that there is only one arena for entire bpf_object instance, we just keep it in a separate field. This can be generalized if necessary later. All global variables from ".arena.1" section are accessible from user space via skel->arena->name_of_var. For bss/data/rodata the skeleton/libbpf perform the following sequence: 1. addr = mmap(MAP_ANONYMOUS) 2. user space optionally modifies global vars 3. map_fd = bpf_create_map() 4. bpf_update_map_elem(map_fd, addr) // to store values into the kernel 5. mmap(addr, MAP_FIXED, map_fd) after step 5 user spaces see the values it wrote at step 2 at the same addresses arena doesn't support update_map_elem. Hence skeleton/libbpf do: 1. addr = malloc(sizeof SEC ".arena.1") 2. user space optionally modifies global vars 3. map_fd = bpf_create_map(MAP_TYPE_ARENA) 4. real_addr = mmap(map->map_extra, MAP_SHARED | MAP_FIXED, map_fd) 5. memcpy(real_addr, addr) // this will fault-in and allocate pages At the end look and feel of global data vs __arena global data is the same from bpf prog pov. Another complication is: struct { __uint(type, BPF_MAP_TYPE_ARENA); } arena SEC(".maps"); int __arena foo; int bar; ptr1 = &foo; // relocation against ".arena.1" section ptr2 = &arena; // relocation against ".maps" section ptr3 = &bar; // relocation against ".bss" section Fo the kernel ptr1 and ptr2 has point to the same arena's map_fd while ptr3 points to a different global array's map_fd. For the verifier: ptr1->type == unknown_scalar ptr2->type == const_ptr_to_map ptr3->type == ptr_to_map_value After verification, from JIT pov all 3 ptr-s are normal ld_imm64 insns. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20240308010812.89848-11-alexei.starovoitov@gmail.com --- tools/bpf/bpftool/gen.c | 13 ++++++ tools/lib/bpf/libbpf.c | 118 +++++++++++++++++++++++++++++++++++++++++++----- tools/lib/bpf/libbpf.h | 2 +- 3 files changed, 120 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index a3d72be347b0..4fa4ade1ce74 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -120,6 +120,12 @@ static bool get_datasec_ident(const char *sec_name, char *buf, size_t buf_sz) static const char *pfxs[] = { ".data", ".rodata", ".bss", ".kconfig" }; int i, n; + /* recognize hard coded LLVM section name */ + if (strcmp(sec_name, ".arena.1") == 0) { + /* this is the name to use in skeleton */ + snprintf(buf, buf_sz, "arena"); + return true; + } for (i = 0, n = ARRAY_SIZE(pfxs); i < n; i++) { const char *pfx = pfxs[i]; @@ -250,6 +256,13 @@ static const struct btf_type *find_type_for_map(struct btf *btf, const char *map static bool is_mmapable_map(const struct bpf_map *map, char *buf, size_t sz) { + size_t tmp_sz; + + if (bpf_map__type(map) == BPF_MAP_TYPE_ARENA && bpf_map__initial_value(map, &tmp_sz)) { + snprintf(buf, sz, "arena"); + return true; + } + if (!bpf_map__is_internal(map) || !(bpf_map__map_flags(map) & BPF_F_MMAPABLE)) return false; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 34b722071874..efab29b8935b 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -498,6 +498,7 @@ struct bpf_struct_ops { #define KSYMS_SEC ".ksyms" #define STRUCT_OPS_SEC ".struct_ops" #define STRUCT_OPS_LINK_SEC ".struct_ops.link" +#define ARENA_SEC ".arena.1" enum libbpf_map_type { LIBBPF_MAP_UNSPEC, @@ -629,6 +630,7 @@ struct elf_state { Elf *elf; Elf64_Ehdr *ehdr; Elf_Data *symbols; + Elf_Data *arena_data; size_t shstrndx; /* section index for section name strings */ size_t strtabidx; struct elf_sec_desc *secs; @@ -638,6 +640,7 @@ struct elf_state { int text_shndx; int symbols_shndx; bool has_st_ops; + int arena_data_shndx; }; struct usdt_manager; @@ -697,6 +700,10 @@ struct bpf_object { struct usdt_manager *usdt_man; + struct bpf_map *arena_map; + void *arena_data; + size_t arena_data_sz; + struct kern_feature_cache *feat_cache; char *token_path; int token_fd; @@ -1443,6 +1450,7 @@ static void bpf_object__elf_finish(struct bpf_object *obj) elf_end(obj->efile.elf); obj->efile.elf = NULL; obj->efile.symbols = NULL; + obj->efile.arena_data = NULL; zfree(&obj->efile.secs); obj->efile.sec_cnt = 0; @@ -1851,7 +1859,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, def->value_size = data_sz; def->max_entries = 1; def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG - ? BPF_F_RDONLY_PROG : 0; + ? BPF_F_RDONLY_PROG : 0; /* failures are fine because of maps like .rodata.str1.1 */ (void) map_fill_btf_type_info(obj, map); @@ -2843,6 +2851,32 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, return 0; } +static int init_arena_map_data(struct bpf_object *obj, struct bpf_map *map, + const char *sec_name, int sec_idx, + void *data, size_t data_sz) +{ + const long page_sz = sysconf(_SC_PAGE_SIZE); + size_t mmap_sz; + + mmap_sz = bpf_map_mmap_sz(obj->arena_map); + if (roundup(data_sz, page_sz) > mmap_sz) { + pr_warn("elf: sec '%s': declared ARENA map size (%zu) is too small to hold global __arena variables of size %zu\n", + sec_name, mmap_sz, data_sz); + return -E2BIG; + } + + obj->arena_data = malloc(data_sz); + if (!obj->arena_data) + return -ENOMEM; + memcpy(obj->arena_data, data, data_sz); + obj->arena_data_sz = data_sz; + + /* make bpf_map__init_value() work for ARENA maps */ + map->mmaped = obj->arena_data; + + return 0; +} + static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, const char *pin_root_path) { @@ -2892,6 +2926,33 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, return err; } + for (i = 0; i < obj->nr_maps; i++) { + struct bpf_map *map = &obj->maps[i]; + + if (map->def.type != BPF_MAP_TYPE_ARENA) + continue; + + if (obj->arena_map) { + pr_warn("map '%s': only single ARENA map is supported (map '%s' is also ARENA)\n", + map->name, obj->arena_map->name); + return -EINVAL; + } + obj->arena_map = map; + + if (obj->efile.arena_data) { + err = init_arena_map_data(obj, map, ARENA_SEC, obj->efile.arena_data_shndx, + obj->efile.arena_data->d_buf, + obj->efile.arena_data->d_size); + if (err) + return err; + } + } + if (obj->efile.arena_data && !obj->arena_map) { + pr_warn("elf: sec '%s': to use global __arena variables the ARENA map should be explicitly declared in SEC(\".maps\")\n", + ARENA_SEC); + return -ENOENT; + } + return 0; } @@ -3771,6 +3832,9 @@ static int bpf_object__elf_collect(struct bpf_object *obj) sec_desc->shdr = sh; sec_desc->data = data; obj->efile.has_st_ops = true; + } else if (strcmp(name, ARENA_SEC) == 0) { + obj->efile.arena_data = data; + obj->efile.arena_data_shndx = idx; } else { pr_info("elf: skipping unrecognized data section(%d) %s\n", idx, name); @@ -4400,6 +4464,15 @@ static int bpf_program__record_reloc(struct bpf_program *prog, type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx); sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx)); + /* arena data relocation */ + if (shdr_idx == obj->efile.arena_data_shndx) { + reloc_desc->type = RELO_DATA; + reloc_desc->insn_idx = insn_idx; + reloc_desc->map_idx = obj->arena_map - obj->maps; + reloc_desc->sym_off = sym->st_value; + return 0; + } + /* generic map reference relocation */ if (type == LIBBPF_MAP_UNSPEC) { if (!bpf_object__shndx_is_maps(obj, shdr_idx)) { @@ -4940,6 +5013,7 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) bpf_gen__map_freeze(obj->gen_loader, map - obj->maps); return 0; } + err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0); if (err) { err = -errno; @@ -5289,6 +5363,10 @@ retry: map->name, err); return err; } + if (obj->arena_data) { + memcpy(map->mmaped, obj->arena_data, obj->arena_data_sz); + zfree(&obj->arena_data); + } } if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) { err = init_map_in_map_slots(obj, map); @@ -8786,13 +8864,9 @@ static void bpf_map__destroy(struct bpf_map *map) zfree(&map->init_slots); map->init_slots_sz = 0; - if (map->mmaped) { - size_t mmap_sz; - - mmap_sz = bpf_map_mmap_sz(map); - munmap(map->mmaped, mmap_sz); - map->mmaped = NULL; - } + if (map->mmaped && map->mmaped != map->obj->arena_data) + munmap(map->mmaped, bpf_map_mmap_sz(map)); + map->mmaped = NULL; if (map->st_ops) { zfree(&map->st_ops->data); @@ -8852,6 +8926,8 @@ void bpf_object__close(struct bpf_object *obj) if (obj->token_fd > 0) close(obj->token_fd); + zfree(&obj->arena_data); + free(obj); } @@ -10063,18 +10139,26 @@ __u32 bpf_map__btf_value_type_id(const struct bpf_map *map) int bpf_map__set_initial_value(struct bpf_map *map, const void *data, size_t size) { + size_t actual_sz; + if (map->obj->loaded || map->reused) return libbpf_err(-EBUSY); - if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG || - size != map->def.value_size) + if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG) + return libbpf_err(-EINVAL); + + if (map->def.type == BPF_MAP_TYPE_ARENA) + actual_sz = map->obj->arena_data_sz; + else + actual_sz = map->def.value_size; + if (size != actual_sz) return libbpf_err(-EINVAL); memcpy(map->mmaped, data, size); return 0; } -void *bpf_map__initial_value(struct bpf_map *map, size_t *psize) +void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize) { if (bpf_map__is_struct_ops(map)) { if (psize) @@ -10084,7 +10168,12 @@ void *bpf_map__initial_value(struct bpf_map *map, size_t *psize) if (!map->mmaped) return NULL; - *psize = map->def.value_size; + + if (map->def.type == BPF_MAP_TYPE_ARENA) + *psize = map->obj->arena_data_sz; + else + *psize = map->def.value_size; + return map->mmaped; } @@ -13573,6 +13662,11 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s) continue; } + if (map->def.type == BPF_MAP_TYPE_ARENA) { + *mmaped = map->mmaped; + continue; + } + if (map->def.map_flags & BPF_F_RDONLY_PROG) prot = PROT_READ; else diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 5723cbbfcc41..7b510761f545 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -1014,7 +1014,7 @@ LIBBPF_API int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra); LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map, const void *data, size_t size); -LIBBPF_API void *bpf_map__initial_value(struct bpf_map *map, size_t *psize); +LIBBPF_API void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize); /** * @brief **bpf_map__is_internal()** tells the caller whether or not the -- cgit v1.2.3 From 204c628730c62de5a0b593008549a9b95aa96b01 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 7 Mar 2024 17:08:09 -0800 Subject: bpf: Add helper macro bpf_addr_space_cast() Introduce helper macro bpf_addr_space_cast() that emits: rX = rX instruction with off = BPF_ADDR_SPACE_CAST and encodes dest and src address_space-s into imm32. It's useful with older LLVM that doesn't emit this insn automatically. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/bpf/20240308010812.89848-12-alexei.starovoitov@gmail.com --- tools/testing/selftests/bpf/bpf_experimental.h | 43 ++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h index bc9a0832ae72..a5b9df38c162 100644 --- a/tools/testing/selftests/bpf/bpf_experimental.h +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -343,6 +343,49 @@ l_true: \ asm volatile("%[reg]=%[reg]"::[reg]"r"((short)var)) #endif +/* emit instruction: + * rX = rX .off = BPF_ADDR_SPACE_CAST .imm32 = (dst_as << 16) | src_as + */ +#ifndef bpf_addr_space_cast +#define bpf_addr_space_cast(var, dst_as, src_as)\ + asm volatile(".byte 0xBF; \ + .ifc %[reg], r0; \ + .byte 0x00; \ + .endif; \ + .ifc %[reg], r1; \ + .byte 0x11; \ + .endif; \ + .ifc %[reg], r2; \ + .byte 0x22; \ + .endif; \ + .ifc %[reg], r3; \ + .byte 0x33; \ + .endif; \ + .ifc %[reg], r4; \ + .byte 0x44; \ + .endif; \ + .ifc %[reg], r5; \ + .byte 0x55; \ + .endif; \ + .ifc %[reg], r6; \ + .byte 0x66; \ + .endif; \ + .ifc %[reg], r7; \ + .byte 0x77; \ + .endif; \ + .ifc %[reg], r8; \ + .byte 0x88; \ + .endif; \ + .ifc %[reg], r9; \ + .byte 0x99; \ + .endif; \ + .short %[off]; \ + .long %[as]" \ + : [reg]"+r"(var) \ + : [off]"i"(BPF_ADDR_SPACE_CAST) \ + , [as]"i"((dst_as << 16) | src_as)); +#endif + /* Description * Assert that a conditional expression is true. * Returns -- cgit v1.2.3 From 80a4129fcf20da3c6941411155a9b3b45caa5b8d Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 7 Mar 2024 17:08:10 -0800 Subject: selftests/bpf: Add unit tests for bpf_arena_alloc/free_pages Add unit tests for bpf_arena_alloc/free_pages() functionality and bpf_arena_common.h with a set of common helpers and macros that is used in this test and the following patches. Also modify test_loader that didn't support running bpf_prog_type_syscall programs. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240308010812.89848-13-alexei.starovoitov@gmail.com --- tools/testing/selftests/bpf/DENYLIST.aarch64 | 1 + tools/testing/selftests/bpf/DENYLIST.s390x | 1 + tools/testing/selftests/bpf/bpf_arena_common.h | 70 ++++++++++ tools/testing/selftests/bpf/prog_tests/verifier.c | 2 + tools/testing/selftests/bpf/progs/verifier_arena.c | 146 +++++++++++++++++++++ tools/testing/selftests/bpf/test_loader.c | 9 +- 6 files changed, 227 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/bpf/bpf_arena_common.h create mode 100644 tools/testing/selftests/bpf/progs/verifier_arena.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64 index 0445ac38bc07..f9101651747b 100644 --- a/tools/testing/selftests/bpf/DENYLIST.aarch64 +++ b/tools/testing/selftests/bpf/DENYLIST.aarch64 @@ -10,3 +10,4 @@ fill_link_info/kprobe_multi_link_info # bpf_program__attach_kprobe_mu fill_link_info/kretprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95 fill_link_info/kprobe_multi_invalid_ubuff # bpf_program__attach_kprobe_multi_opts unexpected error: -95 missed/kprobe_recursion # missed_kprobe_recursion__attach unexpected error: -95 (errno 95) +verifier_arena # JIT does not support arena diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index cb810a98e78f..aa8a620f3318 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -4,3 +4,4 @@ exceptions # JIT does not support calling kfunc bpf_throw (excepti get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace) stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?) verifier_iterating_callbacks +verifier_arena # JIT does not support arena diff --git a/tools/testing/selftests/bpf/bpf_arena_common.h b/tools/testing/selftests/bpf/bpf_arena_common.h new file mode 100644 index 000000000000..bcf195c64a45 --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_arena_common.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#pragma once + +#ifndef WRITE_ONCE +#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *) &(x)) = (val)) +#endif + +#ifndef NUMA_NO_NODE +#define NUMA_NO_NODE (-1) +#endif + +#ifndef arena_container_of +#define arena_container_of(ptr, type, member) \ + ({ \ + void __arena *__mptr = (void __arena *)(ptr); \ + ((type *)(__mptr - offsetof(type, member))); \ + }) +#endif + +#ifdef __BPF__ /* when compiled as bpf program */ + +#ifndef PAGE_SIZE +#define PAGE_SIZE __PAGE_SIZE +/* + * for older kernels try sizeof(struct genradix_node) + * or flexible: + * static inline long __bpf_page_size(void) { + * return bpf_core_enum_value(enum page_size_enum___l, __PAGE_SIZE___l) ?: sizeof(struct genradix_node); + * } + * but generated code is not great. + */ +#endif + +#if defined(__BPF_FEATURE_ARENA_CAST) && !defined(BPF_ARENA_FORCE_ASM) +#define __arena __attribute__((address_space(1))) +#define cast_kern(ptr) /* nop for bpf prog. emitted by LLVM */ +#define cast_user(ptr) /* nop for bpf prog. emitted by LLVM */ +#else +#define __arena +#define cast_kern(ptr) bpf_addr_space_cast(ptr, 0, 1) +#define cast_user(ptr) bpf_addr_space_cast(ptr, 1, 0) +#endif + +void __arena* bpf_arena_alloc_pages(void *map, void __arena *addr, __u32 page_cnt, + int node_id, __u64 flags) __ksym __weak; +void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt) __ksym __weak; + +#else /* when compiled as user space code */ + +#define __arena +#define __arg_arena +#define cast_kern(ptr) /* nop for user space */ +#define cast_user(ptr) /* nop for user space */ +__weak char arena[1]; + +#ifndef offsetof +#define offsetof(type, member) ((unsigned long)&((type *)0)->member) +#endif + +static inline void __arena* bpf_arena_alloc_pages(void *map, void *addr, __u32 page_cnt, + int node_id, __u64 flags) +{ + return NULL; +} +static inline void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt) +{ +} + +#endif diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 9c6072a19745..985273832f89 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -4,6 +4,7 @@ #include "cap_helpers.h" #include "verifier_and.skel.h" +#include "verifier_arena.skel.h" #include "verifier_array_access.skel.h" #include "verifier_basic_stack.skel.h" #include "verifier_bitfield_write.skel.h" @@ -118,6 +119,7 @@ static void run_tests_aux(const char *skel_name, #define RUN(skel) run_tests_aux(#skel, skel##__elf_bytes, NULL) void test_verifier_and(void) { RUN(verifier_and); } +void test_verifier_arena(void) { RUN(verifier_arena); } void test_verifier_basic_stack(void) { RUN(verifier_basic_stack); } void test_verifier_bitfield_write(void) { RUN(verifier_bitfield_write); } void test_verifier_bounds(void) { RUN(verifier_bounds); } diff --git a/tools/testing/selftests/bpf/progs/verifier_arena.c b/tools/testing/selftests/bpf/progs/verifier_arena.c new file mode 100644 index 000000000000..5540b05ff9ee --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_arena.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include +#include "bpf_misc.h" +#include "bpf_experimental.h" +#include "bpf_arena_common.h" + +struct { + __uint(type, BPF_MAP_TYPE_ARENA); + __uint(map_flags, BPF_F_MMAPABLE); + __uint(max_entries, 2); /* arena of two pages close to 32-bit boundary*/ + __ulong(map_extra, (1ull << 44) | (~0u - __PAGE_SIZE * 2 + 1)); /* start of mmap() region */ +} arena SEC(".maps"); + +SEC("syscall") +__success __retval(0) +int basic_alloc1(void *ctx) +{ +#if defined(__BPF_FEATURE_ARENA_CAST) + volatile int __arena *page1, *page2, *no_page, *page3; + + page1 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0); + if (!page1) + return 1; + *page1 = 1; + page2 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0); + if (!page2) + return 2; + *page2 = 2; + no_page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0); + if (no_page) + return 3; + if (*page1 != 1) + return 4; + if (*page2 != 2) + return 5; + bpf_arena_free_pages(&arena, (void __arena *)page2, 1); + if (*page1 != 1) + return 6; + if (*page2 != 0) /* use-after-free should return 0 */ + return 7; + page3 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0); + if (!page3) + return 8; + *page3 = 3; + if (page2 != page3) + return 9; + if (*page1 != 1) + return 10; +#endif + return 0; +} + +SEC("syscall") +__success __retval(0) +int basic_alloc2(void *ctx) +{ +#if defined(__BPF_FEATURE_ARENA_CAST) + volatile char __arena *page1, *page2, *page3, *page4; + + page1 = bpf_arena_alloc_pages(&arena, NULL, 2, NUMA_NO_NODE, 0); + if (!page1) + return 1; + page2 = page1 + __PAGE_SIZE; + page3 = page1 + __PAGE_SIZE * 2; + page4 = page1 - __PAGE_SIZE; + *page1 = 1; + *page2 = 2; + *page3 = 3; + *page4 = 4; + if (*page1 != 1) + return 1; + if (*page2 != 2) + return 2; + if (*page3 != 0) + return 3; + if (*page4 != 0) + return 4; + bpf_arena_free_pages(&arena, (void __arena *)page1, 2); + if (*page1 != 0) + return 5; + if (*page2 != 0) + return 6; + if (*page3 != 0) + return 7; + if (*page4 != 0) + return 8; +#endif + return 0; +} + +struct bpf_arena___l { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +SEC("syscall") +__success __retval(0) __log_level(2) +int basic_alloc3(void *ctx) +{ + struct bpf_arena___l *ar = (struct bpf_arena___l *)&arena; + volatile char __arena *pages; + + pages = bpf_arena_alloc_pages(&ar->map, NULL, ar->map.max_entries, NUMA_NO_NODE, 0); + if (!pages) + return 1; + return 0; +} + +SEC("iter.s/bpf_map") +__success __log_level(2) +int iter_maps1(struct bpf_iter__bpf_map *ctx) +{ + struct bpf_map *map = ctx->map; + + if (!map) + return 0; + bpf_arena_alloc_pages(map, NULL, map->max_entries, 0, 0); + return 0; +} + +SEC("iter.s/bpf_map") +__failure __msg("expected pointer to STRUCT bpf_map") +int iter_maps2(struct bpf_iter__bpf_map *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + + bpf_arena_alloc_pages((void *)seq, NULL, 1, 0, 0); + return 0; +} + +SEC("iter.s/bpf_map") +__failure __msg("untrusted_ptr_bpf_map") +int iter_maps3(struct bpf_iter__bpf_map *ctx) +{ + struct bpf_map *map = ctx->map; + + if (!map) + return 0; + bpf_arena_alloc_pages(map->inner_map_meta, NULL, map->max_entries, 0, 0); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index ba57601c2a4d..524c38e9cde4 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -501,7 +501,7 @@ static bool is_unpriv_capable_map(struct bpf_map *map) } } -static int do_prog_test_run(int fd_prog, int *retval) +static int do_prog_test_run(int fd_prog, int *retval, bool empty_opts) { __u8 tmp_out[TEST_DATA_LEN << 2] = {}; __u8 tmp_in[TEST_DATA_LEN] = {}; @@ -514,6 +514,10 @@ static int do_prog_test_run(int fd_prog, int *retval) .repeat = 1, ); + if (empty_opts) { + memset(&topts, 0, sizeof(struct bpf_test_run_opts)); + topts.sz = sizeof(struct bpf_test_run_opts); + } err = bpf_prog_test_run_opts(fd_prog, &topts); saved_errno = errno; @@ -649,7 +653,8 @@ void run_subtest(struct test_loader *tester, } } - do_prog_test_run(bpf_program__fd(tprog), &retval); + do_prog_test_run(bpf_program__fd(tprog), &retval, + bpf_program__type(tprog) == BPF_PROG_TYPE_SYSCALL ? true : false); if (retval != subspec->retval && subspec->retval != POINTER_VALUE) { PRINT_FAIL("Unexpected retval: %d != %d\n", retval, subspec->retval); goto tobj_cleanup; -- cgit v1.2.3 From 9f2c156f90a422b4897a8c2831076a96a31413d1 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 7 Mar 2024 17:08:11 -0800 Subject: selftests/bpf: Add bpf_arena_list test. bpf_arena_alloc.h - implements page_frag allocator as a bpf program. bpf_arena_list.h - doubly linked link list as a bpf program. Compiled as a bpf program and as native C code. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240308010812.89848-14-alexei.starovoitov@gmail.com --- tools/testing/selftests/bpf/bpf_arena_alloc.h | 67 ++++++++++++++++ tools/testing/selftests/bpf/bpf_arena_list.h | 92 ++++++++++++++++++++++ .../testing/selftests/bpf/prog_tests/arena_list.c | 68 ++++++++++++++++ tools/testing/selftests/bpf/progs/arena_list.c | 87 ++++++++++++++++++++ 4 files changed, 314 insertions(+) create mode 100644 tools/testing/selftests/bpf/bpf_arena_alloc.h create mode 100644 tools/testing/selftests/bpf/bpf_arena_list.h create mode 100644 tools/testing/selftests/bpf/prog_tests/arena_list.c create mode 100644 tools/testing/selftests/bpf/progs/arena_list.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/bpf_arena_alloc.h b/tools/testing/selftests/bpf/bpf_arena_alloc.h new file mode 100644 index 000000000000..c27678299e0c --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_arena_alloc.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#pragma once +#include "bpf_arena_common.h" + +#ifndef __round_mask +#define __round_mask(x, y) ((__typeof__(x))((y)-1)) +#endif +#ifndef round_up +#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) +#endif + +#ifdef __BPF__ +#define NR_CPUS (sizeof(struct cpumask) * 8) + +static void __arena * __arena page_frag_cur_page[NR_CPUS]; +static int __arena page_frag_cur_offset[NR_CPUS]; + +/* Simple page_frag allocator */ +static inline void __arena* bpf_alloc(unsigned int size) +{ + __u64 __arena *obj_cnt; + __u32 cpu = bpf_get_smp_processor_id(); + void __arena *page = page_frag_cur_page[cpu]; + int __arena *cur_offset = &page_frag_cur_offset[cpu]; + int offset; + + size = round_up(size, 8); + if (size >= PAGE_SIZE - 8) + return NULL; + if (!page) { +refill: + page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0); + if (!page) + return NULL; + cast_kern(page); + page_frag_cur_page[cpu] = page; + *cur_offset = PAGE_SIZE - 8; + obj_cnt = page + PAGE_SIZE - 8; + *obj_cnt = 0; + } else { + cast_kern(page); + obj_cnt = page + PAGE_SIZE - 8; + } + + offset = *cur_offset - size; + if (offset < 0) + goto refill; + + (*obj_cnt)++; + *cur_offset = offset; + return page + offset; +} + +static inline void bpf_free(void __arena *addr) +{ + __u64 __arena *obj_cnt; + + addr = (void __arena *)(((long)addr) & ~(PAGE_SIZE - 1)); + obj_cnt = addr + PAGE_SIZE - 8; + if (--(*obj_cnt) == 0) + bpf_arena_free_pages(&arena, addr, 1); +} +#else +static inline void __arena* bpf_alloc(unsigned int size) { return NULL; } +static inline void bpf_free(void __arena *addr) {} +#endif diff --git a/tools/testing/selftests/bpf/bpf_arena_list.h b/tools/testing/selftests/bpf/bpf_arena_list.h new file mode 100644 index 000000000000..b99b9f408eff --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_arena_list.h @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#pragma once +#include "bpf_arena_common.h" + +struct arena_list_node; + +typedef struct arena_list_node __arena arena_list_node_t; + +struct arena_list_node { + arena_list_node_t *next; + arena_list_node_t * __arena *pprev; +}; + +struct arena_list_head { + struct arena_list_node __arena *first; +}; +typedef struct arena_list_head __arena arena_list_head_t; + +#define list_entry(ptr, type, member) arena_container_of(ptr, type, member) + +#define list_entry_safe(ptr, type, member) \ + ({ typeof(*ptr) * ___ptr = (ptr); \ + ___ptr ? ({ cast_kern(___ptr); list_entry(___ptr, type, member); }) : NULL; \ + }) + +#ifndef __BPF__ +static inline void *bpf_iter_num_new(struct bpf_iter_num *it, int i, int j) { return NULL; } +static inline void bpf_iter_num_destroy(struct bpf_iter_num *it) {} +static inline bool bpf_iter_num_next(struct bpf_iter_num *it) { return true; } +#define cond_break ({}) +#endif + +/* Safely walk link list elements. Deletion of elements is allowed. */ +#define list_for_each_entry(pos, head, member) \ + for (void * ___tmp = (pos = list_entry_safe((head)->first, \ + typeof(*(pos)), member), \ + (void *)0); \ + pos && ({ ___tmp = (void *)pos->member.next; 1; }); \ + cond_break, \ + pos = list_entry_safe((void __arena *)___tmp, typeof(*(pos)), member)) + +static inline void list_add_head(arena_list_node_t *n, arena_list_head_t *h) +{ + arena_list_node_t *first = h->first, * __arena *tmp; + + cast_user(first); + cast_kern(n); + WRITE_ONCE(n->next, first); + cast_kern(first); + if (first) { + tmp = &n->next; + cast_user(tmp); + WRITE_ONCE(first->pprev, tmp); + } + cast_user(n); + WRITE_ONCE(h->first, n); + + tmp = &h->first; + cast_user(tmp); + cast_kern(n); + WRITE_ONCE(n->pprev, tmp); +} + +static inline void __list_del(arena_list_node_t *n) +{ + arena_list_node_t *next = n->next, *tmp; + arena_list_node_t * __arena *pprev = n->pprev; + + cast_user(next); + cast_kern(pprev); + tmp = *pprev; + cast_kern(tmp); + WRITE_ONCE(tmp, next); + if (next) { + cast_user(pprev); + cast_kern(next); + WRITE_ONCE(next->pprev, pprev); + } +} + +#define POISON_POINTER_DELTA 0 + +#define LIST_POISON1 ((void __arena *) 0x100 + POISON_POINTER_DELTA) +#define LIST_POISON2 ((void __arena *) 0x122 + POISON_POINTER_DELTA) + +static inline void list_del(arena_list_node_t *n) +{ + __list_del(n); + n->next = LIST_POISON1; + n->pprev = LIST_POISON2; +} diff --git a/tools/testing/selftests/bpf/prog_tests/arena_list.c b/tools/testing/selftests/bpf/prog_tests/arena_list.c new file mode 100644 index 000000000000..e61886debab1 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/arena_list.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include +#include +#include + +#define PAGE_SIZE 4096 + +#include "bpf_arena_list.h" +#include "arena_list.skel.h" + +struct elem { + struct arena_list_node node; + __u64 value; +}; + +static int list_sum(struct arena_list_head *head) +{ + struct elem __arena *n; + int sum = 0; + + list_for_each_entry(n, head, node) + sum += n->value; + return sum; +} + +static void test_arena_list_add_del(int cnt) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + struct arena_list *skel; + int expected_sum = (u64)cnt * (cnt - 1) / 2; + int ret, sum; + + skel = arena_list__open_and_load(); + if (!ASSERT_OK_PTR(skel, "arena_list__open_and_load")) + return; + + skel->bss->cnt = cnt; + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_list_add), &opts); + ASSERT_OK(ret, "ret_add"); + ASSERT_OK(opts.retval, "retval"); + if (skel->bss->skip) { + printf("%s:SKIP:compiler doesn't support arena_cast\n", __func__); + test__skip(); + goto out; + } + sum = list_sum(skel->bss->list_head); + ASSERT_EQ(sum, expected_sum, "sum of elems"); + ASSERT_EQ(skel->arena->arena_sum, expected_sum, "__arena sum of elems"); + ASSERT_EQ(skel->arena->test_val, cnt + 1, "num of elems"); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_list_del), &opts); + ASSERT_OK(ret, "ret_del"); + sum = list_sum(skel->bss->list_head); + ASSERT_EQ(sum, 0, "sum of list elems after del"); + ASSERT_EQ(skel->bss->list_sum, expected_sum, "sum of list elems computed by prog"); + ASSERT_EQ(skel->arena->arena_sum, expected_sum, "__arena sum of elems"); +out: + arena_list__destroy(skel); +} + +void test_arena_list(void) +{ + if (test__start_subtest("arena_list_1")) + test_arena_list_add_del(1); + if (test__start_subtest("arena_list_1000")) + test_arena_list_add_del(1000); +} diff --git a/tools/testing/selftests/bpf/progs/arena_list.c b/tools/testing/selftests/bpf/progs/arena_list.c new file mode 100644 index 000000000000..cd35b8448435 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/arena_list.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include +#include +#include +#include +#include "bpf_experimental.h" + +struct { + __uint(type, BPF_MAP_TYPE_ARENA); + __uint(map_flags, BPF_F_MMAPABLE); + __uint(max_entries, 100); /* number of pages */ +#ifdef __TARGET_ARCH_arm64 + __ulong(map_extra, 0x1ull << 32); /* start of mmap() region */ +#else + __ulong(map_extra, 0x1ull << 44); /* start of mmap() region */ +#endif +} arena SEC(".maps"); + +#include "bpf_arena_alloc.h" +#include "bpf_arena_list.h" + +struct elem { + struct arena_list_node node; + __u64 value; +}; + +struct arena_list_head __arena *list_head; +int list_sum; +int cnt; +bool skip = false; + +#ifdef __BPF_FEATURE_ARENA_CAST +long __arena arena_sum; +int __arena test_val = 1; +struct arena_list_head __arena global_head; +#else +long arena_sum SEC(".arena.1"); +int test_val SEC(".arena.1"); +#endif + +int zero; + +SEC("syscall") +int arena_list_add(void *ctx) +{ +#ifdef __BPF_FEATURE_ARENA_CAST + __u64 i; + + list_head = &global_head; + + for (i = zero; i < cnt; cond_break, i++) { + struct elem __arena *n = bpf_alloc(sizeof(*n)); + + test_val++; + n->value = i; + arena_sum += i; + list_add_head(&n->node, list_head); + } +#else + skip = true; +#endif + return 0; +} + +SEC("syscall") +int arena_list_del(void *ctx) +{ +#ifdef __BPF_FEATURE_ARENA_CAST + struct elem __arena *n; + int sum = 0; + + arena_sum = 0; + list_for_each_entry(n, list_head, node) { + sum += n->value; + arena_sum += n->value; + list_del(&n->node); + bpf_free(n); + } + list_sum = sum; +#else + skip = true; +#endif + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 8df839ae23b8c581bdac4b6970d029d65a415852 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 7 Mar 2024 17:08:12 -0800 Subject: selftests/bpf: Add bpf_arena_htab test. bpf_arena_htab.h - hash table implemented as bpf program Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240308010812.89848-15-alexei.starovoitov@gmail.com --- tools/testing/selftests/bpf/DENYLIST.aarch64 | 1 + tools/testing/selftests/bpf/DENYLIST.s390x | 1 + tools/testing/selftests/bpf/bpf_arena_htab.h | 100 +++++++++++++++++++++ .../testing/selftests/bpf/prog_tests/arena_htab.c | 88 ++++++++++++++++++ tools/testing/selftests/bpf/progs/arena_htab.c | 48 ++++++++++ tools/testing/selftests/bpf/progs/arena_htab_asm.c | 5 ++ 6 files changed, 243 insertions(+) create mode 100644 tools/testing/selftests/bpf/bpf_arena_htab.h create mode 100644 tools/testing/selftests/bpf/prog_tests/arena_htab.c create mode 100644 tools/testing/selftests/bpf/progs/arena_htab.c create mode 100644 tools/testing/selftests/bpf/progs/arena_htab_asm.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64 index f9101651747b..d8ade15e2789 100644 --- a/tools/testing/selftests/bpf/DENYLIST.aarch64 +++ b/tools/testing/selftests/bpf/DENYLIST.aarch64 @@ -11,3 +11,4 @@ fill_link_info/kretprobe_multi_link_info # bpf_program__attach_kprobe_mu fill_link_info/kprobe_multi_invalid_ubuff # bpf_program__attach_kprobe_multi_opts unexpected error: -95 missed/kprobe_recursion # missed_kprobe_recursion__attach unexpected error: -95 (errno 95) verifier_arena # JIT does not support arena +arena_htab # JIT does not support arena diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index aa8a620f3318..f4a2f66a683d 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -5,3 +5,4 @@ get_stack_raw_tp # user_stack corrupted user stack stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?) verifier_iterating_callbacks verifier_arena # JIT does not support arena +arena_htab # JIT does not support arena diff --git a/tools/testing/selftests/bpf/bpf_arena_htab.h b/tools/testing/selftests/bpf/bpf_arena_htab.h new file mode 100644 index 000000000000..acc01a876668 --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_arena_htab.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#pragma once +#include +#include "bpf_arena_alloc.h" +#include "bpf_arena_list.h" + +struct htab_bucket { + struct arena_list_head head; +}; +typedef struct htab_bucket __arena htab_bucket_t; + +struct htab { + htab_bucket_t *buckets; + int n_buckets; +}; +typedef struct htab __arena htab_t; + +static inline htab_bucket_t *__select_bucket(htab_t *htab, __u32 hash) +{ + htab_bucket_t *b = htab->buckets; + + cast_kern(b); + return &b[hash & (htab->n_buckets - 1)]; +} + +static inline arena_list_head_t *select_bucket(htab_t *htab, __u32 hash) +{ + return &__select_bucket(htab, hash)->head; +} + +struct hashtab_elem { + int hash; + int key; + int value; + struct arena_list_node hash_node; +}; +typedef struct hashtab_elem __arena hashtab_elem_t; + +static hashtab_elem_t *lookup_elem_raw(arena_list_head_t *head, __u32 hash, int key) +{ + hashtab_elem_t *l; + + list_for_each_entry(l, head, hash_node) + if (l->hash == hash && l->key == key) + return l; + + return NULL; +} + +static int htab_hash(int key) +{ + return key; +} + +__weak int htab_lookup_elem(htab_t *htab __arg_arena, int key) +{ + hashtab_elem_t *l_old; + arena_list_head_t *head; + + cast_kern(htab); + head = select_bucket(htab, key); + l_old = lookup_elem_raw(head, htab_hash(key), key); + if (l_old) + return l_old->value; + return 0; +} + +__weak int htab_update_elem(htab_t *htab __arg_arena, int key, int value) +{ + hashtab_elem_t *l_new = NULL, *l_old; + arena_list_head_t *head; + + cast_kern(htab); + head = select_bucket(htab, key); + l_old = lookup_elem_raw(head, htab_hash(key), key); + + l_new = bpf_alloc(sizeof(*l_new)); + if (!l_new) + return -ENOMEM; + l_new->key = key; + l_new->hash = htab_hash(key); + l_new->value = value; + + list_add_head(&l_new->hash_node, head); + if (l_old) { + list_del(&l_old->hash_node); + bpf_free(l_old); + } + return 0; +} + +void htab_init(htab_t *htab) +{ + void __arena *buckets = bpf_arena_alloc_pages(&arena, NULL, 2, NUMA_NO_NODE, 0); + + cast_user(buckets); + htab->buckets = buckets; + htab->n_buckets = 2 * PAGE_SIZE / sizeof(struct htab_bucket); +} diff --git a/tools/testing/selftests/bpf/prog_tests/arena_htab.c b/tools/testing/selftests/bpf/prog_tests/arena_htab.c new file mode 100644 index 000000000000..0766702de846 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/arena_htab.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include +#include +#include + +#include "arena_htab_asm.skel.h" +#include "arena_htab.skel.h" + +#define PAGE_SIZE 4096 + +#include "bpf_arena_htab.h" + +static void test_arena_htab_common(struct htab *htab) +{ + int i; + + printf("htab %p buckets %p n_buckets %d\n", htab, htab->buckets, htab->n_buckets); + ASSERT_OK_PTR(htab->buckets, "htab->buckets shouldn't be NULL"); + for (i = 0; htab->buckets && i < 16; i += 4) { + /* + * Walk htab buckets and link lists since all pointers are correct, + * though they were written by bpf program. + */ + int val = htab_lookup_elem(htab, i); + + ASSERT_EQ(i, val, "key == value"); + } +} + +static void test_arena_htab_llvm(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + struct arena_htab *skel; + struct htab *htab; + size_t arena_sz; + void *area; + int ret; + + skel = arena_htab__open_and_load(); + if (!ASSERT_OK_PTR(skel, "arena_htab__open_and_load")) + return; + + area = bpf_map__initial_value(skel->maps.arena, &arena_sz); + /* fault-in a page with pgoff == 0 as sanity check */ + *(volatile int *)area = 0x55aa; + + /* bpf prog will allocate more pages */ + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_htab_llvm), &opts); + ASSERT_OK(ret, "ret"); + ASSERT_OK(opts.retval, "retval"); + if (skel->bss->skip) { + printf("%s:SKIP:compiler doesn't support arena_cast\n", __func__); + test__skip(); + goto out; + } + htab = skel->bss->htab_for_user; + test_arena_htab_common(htab); +out: + arena_htab__destroy(skel); +} + +static void test_arena_htab_asm(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + struct arena_htab_asm *skel; + struct htab *htab; + int ret; + + skel = arena_htab_asm__open_and_load(); + if (!ASSERT_OK_PTR(skel, "arena_htab_asm__open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_htab_asm), &opts); + ASSERT_OK(ret, "ret"); + ASSERT_OK(opts.retval, "retval"); + htab = skel->bss->htab_for_user; + test_arena_htab_common(htab); + arena_htab_asm__destroy(skel); +} + +void test_arena_htab(void) +{ + if (test__start_subtest("arena_htab_llvm")) + test_arena_htab_llvm(); + if (test__start_subtest("arena_htab_asm")) + test_arena_htab_asm(); +} diff --git a/tools/testing/selftests/bpf/progs/arena_htab.c b/tools/testing/selftests/bpf/progs/arena_htab.c new file mode 100644 index 000000000000..b7bb712cacfd --- /dev/null +++ b/tools/testing/selftests/bpf/progs/arena_htab.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include +#include +#include +#include +#include "bpf_experimental.h" + +struct { + __uint(type, BPF_MAP_TYPE_ARENA); + __uint(map_flags, BPF_F_MMAPABLE); + __uint(max_entries, 100); /* number of pages */ +} arena SEC(".maps"); + +#include "bpf_arena_htab.h" + +void __arena *htab_for_user; +bool skip = false; + +int zero = 0; + +SEC("syscall") +int arena_htab_llvm(void *ctx) +{ +#if defined(__BPF_FEATURE_ARENA_CAST) || defined(BPF_ARENA_FORCE_ASM) + struct htab __arena *htab; + __u64 i; + + htab = bpf_alloc(sizeof(*htab)); + cast_kern(htab); + htab_init(htab); + + /* first run. No old elems in the table */ + for (i = zero; i < 1000; i++) + htab_update_elem(htab, i, i); + + /* should replace all elems with new ones */ + for (i = zero; i < 1000; i++) + htab_update_elem(htab, i, i); + cast_user(htab); + htab_for_user = htab; +#else + skip = true; +#endif + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/arena_htab_asm.c b/tools/testing/selftests/bpf/progs/arena_htab_asm.c new file mode 100644 index 000000000000..6cd70ea12f0d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/arena_htab_asm.c @@ -0,0 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#define BPF_ARENA_FORCE_ASM +#define arena_htab_llvm arena_htab_asm +#include "arena_htab.c" -- cgit v1.2.3 From 379b97bbf02feecae5ce870bc0c67e3d723e30f5 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 11 Mar 2024 22:10:23 +0100 Subject: selftests/bpf: Add kprobe multi triggering benchmarks Adding kprobe multi triggering benchmarks. It's useful now to bench new fprobe implementation and might be useful later as well. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240311211023.590321-1-jolsa@kernel.org --- tools/testing/selftests/bpf/bench.c | 4 +++ tools/testing/selftests/bpf/benchs/bench_trigger.c | 32 ++++++++++++++++++++++ tools/testing/selftests/bpf/progs/trigger_bench.c | 14 ++++++++++ 3 files changed, 50 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index 8e02645afe31..b2b4c391eb0a 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -496,6 +496,8 @@ extern const struct bench bench_trig_tp; extern const struct bench bench_trig_rawtp; extern const struct bench bench_trig_kprobe; extern const struct bench bench_trig_kretprobe; +extern const struct bench bench_trig_kprobe_multi; +extern const struct bench bench_trig_kretprobe_multi; extern const struct bench bench_trig_fentry; extern const struct bench bench_trig_fexit; extern const struct bench bench_trig_fentry_sleep; @@ -542,6 +544,8 @@ static const struct bench *benchs[] = { &bench_trig_rawtp, &bench_trig_kprobe, &bench_trig_kretprobe, + &bench_trig_kprobe_multi, + &bench_trig_kretprobe_multi, &bench_trig_fentry, &bench_trig_fexit, &bench_trig_fentry_sleep, diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c index 3f24be9cfcb9..ace0d1011a8e 100644 --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c @@ -91,6 +91,18 @@ static void trigger_kretprobe_setup(void) attach_bpf(ctx.skel->progs.bench_trigger_kretprobe); } +static void trigger_kprobe_multi_setup(void) +{ + setup_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_kprobe_multi); +} + +static void trigger_kretprobe_multi_setup(void) +{ + setup_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_kretprobe_multi); +} + static void trigger_fentry_setup(void) { setup_ctx(); @@ -283,6 +295,26 @@ const struct bench bench_trig_kretprobe = { .report_final = hits_drops_report_final, }; +const struct bench bench_trig_kprobe_multi = { + .name = "trig-kprobe-multi", + .validate = trigger_validate, + .setup = trigger_kprobe_multi_setup, + .producer_thread = trigger_producer, + .measure = trigger_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_trig_kretprobe_multi = { + .name = "trig-kretprobe-multi", + .validate = trigger_validate, + .setup = trigger_kretprobe_multi_setup, + .producer_thread = trigger_producer, + .measure = trigger_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + const struct bench bench_trig_fentry = { .name = "trig-fentry", .validate = trigger_validate, diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c index eb94f9c0186f..5fda43901033 100644 --- a/tools/testing/selftests/bpf/progs/trigger_bench.c +++ b/tools/testing/selftests/bpf/progs/trigger_bench.c @@ -40,6 +40,20 @@ int bench_trigger_kretprobe(void *ctx) return 0; } +SEC("kprobe.multi/" SYS_PREFIX "sys_getpgid") +int bench_trigger_kprobe_multi(void *ctx) +{ + __sync_add_and_fetch(&hits, 1); + return 0; +} + +SEC("kretprobe.multi/" SYS_PREFIX "sys_getpgid") +int bench_trigger_kretprobe_multi(void *ctx) +{ + __sync_add_and_fetch(&hits, 1); + return 0; +} + SEC("fentry/" SYS_PREFIX "sys_getpgid") int bench_trigger_fentry(void *ctx) { -- cgit v1.2.3 From d8a21070b6e168d3800c2962a574f16020dd2951 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 11 Mar 2024 18:23:06 +0200 Subject: nexthop: Fix out-of-bounds access during attribute validation Passing a maximum attribute type to nlmsg_parse() that is larger than the size of the passed policy will result in an out-of-bounds access [1] when the attribute type is used as an index into the policy array. Fix by setting the maximum attribute type according to the policy size, as is already done for RTM_NEWNEXTHOP messages. Add a test case that triggers the bug. No regressions in fib nexthops tests: # ./fib_nexthops.sh [...] Tests passed: 236 Tests failed: 0 [1] BUG: KASAN: global-out-of-bounds in __nla_validate_parse+0x1e53/0x2940 Read of size 1 at addr ffffffff99ab4d20 by task ip/610 CPU: 3 PID: 610 Comm: ip Not tainted 6.8.0-rc7-custom-gd435d6e3e161 #9 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-1.fc38 04/01/2014 Call Trace: dump_stack_lvl+0x8f/0xe0 print_report+0xcf/0x670 kasan_report+0xd8/0x110 __nla_validate_parse+0x1e53/0x2940 __nla_parse+0x40/0x50 rtm_del_nexthop+0x1bd/0x400 rtnetlink_rcv_msg+0x3cc/0xf20 netlink_rcv_skb+0x170/0x440 netlink_unicast+0x540/0x820 netlink_sendmsg+0x8d3/0xdb0 ____sys_sendmsg+0x31f/0xa60 ___sys_sendmsg+0x13a/0x1e0 __sys_sendmsg+0x11c/0x1f0 do_syscall_64+0xc5/0x1d0 entry_SYSCALL_64_after_hwframe+0x63/0x6b [...] The buggy address belongs to the variable: rtm_nh_policy_del+0x20/0x40 Fixes: 2118f9390d83 ("net: nexthop: Adjust netlink policy parsing for a new attribute") Reported-by: Eric Dumazet Closes: https://lore.kernel.org/netdev/CANn89i+UNcG0PJMW5X7gOMunF38ryMh=L1aeZUKH3kL4UdUqag@mail.gmail.com/ Reported-by: syzbot+65bb09a7208ce3d4a633@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/00000000000088981b06133bc07b@google.com/ Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240311162307.545385-4-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 29 +++++++++++++++++------------ tools/testing/selftests/net/fib_nexthops.sh | 6 ++++++ 2 files changed, 23 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index d1669000ac1c..4c6915bff31e 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -3244,8 +3244,8 @@ static int nh_valid_get_del_req(const struct nlmsghdr *nlh, static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { + struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_del)]; struct net *net = sock_net(skb->sk); - struct nlattr *tb[NHA_MAX + 1]; struct nl_info nlinfo = { .nlh = nlh, .nl_net = net, @@ -3255,8 +3255,9 @@ static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, int err; u32 id; - err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, NHA_MAX, - rtm_nh_policy_del, extack); + err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, + ARRAY_SIZE(rtm_nh_policy_del) - 1, rtm_nh_policy_del, + extack); if (err < 0) return err; @@ -3277,16 +3278,17 @@ static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { + struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get)]; struct net *net = sock_net(in_skb->sk); - struct nlattr *tb[NHA_MAX + 1]; struct sk_buff *skb = NULL; struct nexthop *nh; u32 op_flags; int err; u32 id; - err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, NHA_MAX, - rtm_nh_policy_get, extack); + err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, + ARRAY_SIZE(rtm_nh_policy_get) - 1, rtm_nh_policy_get, + extack); if (err < 0) return err; @@ -3405,10 +3407,11 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, struct nh_dump_filter *filter, struct netlink_callback *cb) { - struct nlattr *tb[NHA_MAX + 1]; + struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_dump)]; int err; - err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, NHA_MAX, + err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, + ARRAY_SIZE(rtm_nh_policy_dump) - 1, rtm_nh_policy_dump, cb->extack); if (err < 0) return err; @@ -3548,10 +3551,11 @@ static int nh_valid_dump_bucket_req(const struct nlmsghdr *nlh, struct netlink_callback *cb) { struct nlattr *res_tb[ARRAY_SIZE(rtm_nh_res_bucket_policy_dump)]; - struct nlattr *tb[NHA_MAX + 1]; + struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_dump_bucket)]; int err; - err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, NHA_MAX, + err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, + ARRAY_SIZE(rtm_nh_policy_dump_bucket) - 1, rtm_nh_policy_dump_bucket, NULL); if (err < 0) return err; @@ -3716,10 +3720,11 @@ static int nh_valid_get_bucket_req(const struct nlmsghdr *nlh, u32 *id, u16 *bucket_index, struct netlink_ext_ack *extack) { - struct nlattr *tb[NHA_MAX + 1]; + struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get_bucket)]; int err; - err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, NHA_MAX, + err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, + ARRAY_SIZE(rtm_nh_policy_get_bucket) - 1, rtm_nh_policy_get_bucket, extack); if (err < 0) return err; diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index d5a281aadbac..ac0b2c6a5761 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -2066,6 +2066,12 @@ basic() run_cmd "$IP nexthop get id 1" log_test $? 2 "Nexthop get on non-existent id" + run_cmd "$IP nexthop del id 1" + log_test $? 2 "Nexthop del with non-existent id" + + run_cmd "$IP nexthop del id 1 group 1/2/3/4/5/6/7/8" + log_test $? 2 "Nexthop del with non-existent id and extra attributes" + # attempt to create nh without a device or gw - fails run_cmd "$IP nexthop add id 1" log_test $? 2 "Nexthop with no device or gateway" -- cgit v1.2.3