diff options
Diffstat (limited to 'tools/testing/selftests')
357 files changed, 21679 insertions, 2155 deletions
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 9018f45d631d..d9c283503159 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -88,10 +88,10 @@ endif # of the targets gets built. FORCE_TARGETS ?= -# Clear LDFLAGS and MAKEFLAGS if called from main -# Makefile to avoid test build failures when test -# Makefile doesn't have explicit build rules. -ifeq (1,$(MAKELEVEL)) +# Clear LDFLAGS and MAKEFLAGS when implicit rules are missing. This provides +# implicit rules to sub-test Makefiles which avoids build failures in test +# Makefile that don't have explicit build rules. +ifeq (,$(LINK.c)) override LDFLAGS = override MAKEFLAGS = endif @@ -206,6 +206,7 @@ KSFT_INSTALL_PATH := $(abspath $(KSFT_INSTALL_PATH)) # Avoid changing the rest of the logic here and lib.mk. INSTALL_PATH := $(KSFT_INSTALL_PATH) ALL_SCRIPT := $(INSTALL_PATH)/run_kselftest.sh +TEST_LIST := $(INSTALL_PATH)/kselftest-list.txt install: all ifdef INSTALL_PATH @@ -214,6 +215,8 @@ ifdef INSTALL_PATH install -m 744 kselftest/module.sh $(INSTALL_PATH)/kselftest/ install -m 744 kselftest/runner.sh $(INSTALL_PATH)/kselftest/ install -m 744 kselftest/prefix.pl $(INSTALL_PATH)/kselftest/ + install -m 744 run_kselftest.sh $(INSTALL_PATH)/ + rm -f $(TEST_LIST) @ret=1; \ for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ @@ -222,33 +225,18 @@ ifdef INSTALL_PATH ret=$$((ret * $$?)); \ done; exit $$ret; - @# Ask all targets to emit their test scripts - echo "#!/bin/sh" > $(ALL_SCRIPT) - echo "BASE_DIR=\$$(realpath \$$(dirname \$$0))" >> $(ALL_SCRIPT) - echo "cd \$$BASE_DIR" >> $(ALL_SCRIPT) - echo ". ./kselftest/runner.sh" >> $(ALL_SCRIPT) - echo "ROOT=\$$PWD" >> $(ALL_SCRIPT) - echo "if [ \"\$$1\" = \"--summary\" ]; then" >> $(ALL_SCRIPT) - echo " logfile=\$$BASE_DIR/output.log" >> $(ALL_SCRIPT) - echo " cat /dev/null > \$$logfile" >> $(ALL_SCRIPT) - echo "fi" >> $(ALL_SCRIPT) - @# While building run_kselftest.sh skip also non-existent TARGET dirs: + @# Ask all targets to emit their test scripts + @# While building kselftest-list.text skip also non-existent TARGET dirs: @# they could be the result of a build failure and should NOT be @# included in the generated runlist. for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ [ ! -d $(INSTALL_PATH)/$$TARGET ] && echo "Skipping non-existent dir: $$TARGET" && continue; \ - echo "[ -w /dev/kmsg ] && echo \"kselftest: Running tests in $$TARGET\" >> /dev/kmsg" >> $(ALL_SCRIPT); \ - echo "cd $$TARGET" >> $(ALL_SCRIPT); \ - echo -n "run_many" >> $(ALL_SCRIPT); \ echo -n "Emit Tests for $$TARGET\n"; \ - $(MAKE) -s --no-print-directory OUTPUT=$$BUILD_TARGET -C $$TARGET emit_tests >> $(ALL_SCRIPT); \ - echo "" >> $(ALL_SCRIPT); \ - echo "cd \$$ROOT" >> $(ALL_SCRIPT); \ + $(MAKE) -s --no-print-directory OUTPUT=$$BUILD_TARGET COLLECTION=$$TARGET \ + -C $$TARGET emit_tests >> $(TEST_LIST); \ done; - - chmod u+x $(ALL_SCRIPT) else $(error Error: set INSTALL_PATH to use install) endif diff --git a/tools/testing/selftests/arm64/mte/check_buffer_fill.c b/tools/testing/selftests/arm64/mte/check_buffer_fill.c index 242635d79035..c9fa141ebdcc 100644 --- a/tools/testing/selftests/arm64/mte/check_buffer_fill.c +++ b/tools/testing/selftests/arm64/mte/check_buffer_fill.c @@ -417,6 +417,9 @@ int main(int argc, char *argv[]) /* Register SIGSEGV handler */ mte_register_signal(SIGSEGV, mte_default_handler); + /* Set test plan */ + ksft_set_plan(20); + /* Buffer by byte tests */ evaluate_test(check_buffer_by_byte(USE_MMAP, MTE_SYNC_ERR), "Check buffer correctness by byte with sync err mode and mmap memory\n"); diff --git a/tools/testing/selftests/arm64/mte/check_child_memory.c b/tools/testing/selftests/arm64/mte/check_child_memory.c index 97bebdecd29e..43bd94f853ba 100644 --- a/tools/testing/selftests/arm64/mte/check_child_memory.c +++ b/tools/testing/selftests/arm64/mte/check_child_memory.c @@ -163,6 +163,9 @@ int main(int argc, char *argv[]) mte_register_signal(SIGSEGV, mte_default_handler); mte_register_signal(SIGBUS, mte_default_handler); + /* Set test plan */ + ksft_set_plan(12); + evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), "Check child anonymous memory with private mapping, precise mode and mmap memory\n"); evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED), diff --git a/tools/testing/selftests/arm64/mte/check_ksm_options.c b/tools/testing/selftests/arm64/mte/check_ksm_options.c index bc41ae630c86..3b23c4d61d38 100644 --- a/tools/testing/selftests/arm64/mte/check_ksm_options.c +++ b/tools/testing/selftests/arm64/mte/check_ksm_options.c @@ -140,6 +140,10 @@ int main(int argc, char *argv[]) /* Register signal handlers */ mte_register_signal(SIGBUS, mte_default_handler); mte_register_signal(SIGSEGV, mte_default_handler); + + /* Set test plan */ + ksft_set_plan(4); + /* Enable KSM */ mte_ksm_setup(); diff --git a/tools/testing/selftests/arm64/mte/check_mmap_options.c b/tools/testing/selftests/arm64/mte/check_mmap_options.c index 33b13b86199b..a04b12c21ac9 100644 --- a/tools/testing/selftests/arm64/mte/check_mmap_options.c +++ b/tools/testing/selftests/arm64/mte/check_mmap_options.c @@ -205,7 +205,11 @@ int main(int argc, char *argv[]) mte_register_signal(SIGBUS, mte_default_handler); mte_register_signal(SIGSEGV, mte_default_handler); + /* Set test plan */ + ksft_set_plan(22); + mte_enable_pstate_tco(); + evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF), "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check off\n"); evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF), diff --git a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c index 94d245a0ed56..deaef1f61076 100644 --- a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c +++ b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c @@ -170,6 +170,9 @@ int main(int argc, char *argv[]) /* Register SIGSEGV handler */ mte_register_signal(SIGSEGV, mte_default_handler); + /* Set test plan */ + ksft_set_plan(4); + evaluate_test(check_single_included_tags(USE_MMAP, MTE_SYNC_ERR), "Check an included tag value with sync mode\n"); evaluate_test(check_multiple_included_tags(USE_MMAP, MTE_SYNC_ERR), diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c index 594e98e76880..4bfa80f2a8c3 100644 --- a/tools/testing/selftests/arm64/mte/check_user_mem.c +++ b/tools/testing/selftests/arm64/mte/check_user_mem.c @@ -92,9 +92,13 @@ int main(int argc, char *argv[]) err = mte_default_setup(); if (err) return err; + /* Register signal handlers */ mte_register_signal(SIGSEGV, mte_default_handler); + /* Set test plan */ + ksft_set_plan(4); + evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), "Check memory access from kernel in sync mode, private mapping and mmap memory\n"); evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED), diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 9a0946ddb705..3ab1200e172f 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -13,9 +13,7 @@ test_verifier_log feature test_sock test_sock_addr -test_sock_fields urandom_read -test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index fc946b7ac288..542768f5195b 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -33,9 +33,9 @@ LDLIBS += -lcap -lelf -lz -lrt -lpthread # Order correspond to 'make run_tests' order TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ test_verifier_log test_dev_cgroup test_tcpbpf_user \ - test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \ + test_sock test_sockmap get_cgroup_id_user test_socket_cookie \ test_cgroup_storage \ - test_netcnt test_tcpnotify_user test_sock_fields test_sysctl \ + test_netcnt test_tcpnotify_user test_sysctl \ test_progs-no_alu32 \ test_current_pid_tgid_new_ns @@ -68,7 +68,8 @@ TEST_PROGS := test_kmod.sh \ test_tc_edt.sh \ test_xdping.sh \ test_bpftool_build.sh \ - test_bpftool.sh + test_bpftool.sh \ + test_bpftool_metadata.sh \ TEST_PROGS_EXTENDED := with_addr.sh \ with_tunnels.sh \ @@ -133,7 +134,7 @@ $(OUTPUT)/%:%.c $(OUTPUT)/urandom_read: urandom_read.c $(call msg,BINARY,,$@) - $(Q)$(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id + $(Q)$(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id=sha1 $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ) $(call msg,CC,,$@) @@ -176,6 +177,11 @@ $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ OUTPUT=$(BUILD_DIR)/bpftool/ \ prefix= DESTDIR=$(SCRATCH_DIR)/ install + $(Q)mkdir -p $(BUILD_DIR)/bpftool/Documentation + $(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras) \ + -C $(BPFTOOLDIR)/Documentation \ + OUTPUT=$(BUILD_DIR)/bpftool/Documentation/ \ + prefix= DESTDIR=$(SCRATCH_DIR)/ install $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ ../../../include/uapi/linux/bpf.h \ @@ -316,7 +322,7 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \ $(TRUNNER_BPF_PROGS_DIR)/%.c \ $(TRUNNER_BPF_PROGS_DIR)/*.h \ $$(INCLUDE_DIR)/vmlinux.h \ - $$(BPFOBJ) | $(TRUNNER_OUTPUT) + $(wildcard $(BPFDIR)/bpf_*.h) | $(TRUNNER_OUTPUT) $$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \ $(TRUNNER_BPF_CFLAGS), \ $(TRUNNER_BPF_LDFLAGS)) diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst index e885d351595f..ac9eda830187 100644 --- a/tools/testing/selftests/bpf/README.rst +++ b/tools/testing/selftests/bpf/README.rst @@ -7,6 +7,44 @@ General instructions on running selftests can be found in Additional information about selftest failures are documented here. +profiler[23] test failures with clang/llvm <12.0.0 +================================================== + +With clang/llvm <12.0.0, the profiler[23] test may fail. +The symptom looks like + +.. code-block:: c + + // r9 is a pointer to map_value + // r7 is a scalar + 17: bf 96 00 00 00 00 00 00 r6 = r9 + 18: 0f 76 00 00 00 00 00 00 r6 += r7 + math between map_value pointer and register with unbounded min value is not allowed + + // the instructions below will not be seen in the verifier log + 19: a5 07 01 00 01 01 00 00 if r7 < 257 goto +1 + 20: bf 96 00 00 00 00 00 00 r6 = r9 + // r6 is used here + +The verifier will reject such code with above error. +At insn 18 the r7 is indeed unbounded. The later insn 19 checks the bounds and +the insn 20 undoes map_value addition. It is currently impossible for the +verifier to understand such speculative pointer arithmetic. +Hence + https://reviews.llvm.org/D85570 +addresses it on the compiler side. It was committed on llvm 12. + +The corresponding C code +.. code-block:: c + + for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) { + filepart_length = bpf_probe_read_str(payload, ...); + if (filepart_length <= MAX_PATH) { + barrier_var(filepart_length); // workaround + payload += filepart_length; + } + } + bpf_iter test failures with clang/llvm 10.0.0 ============================================= @@ -43,3 +81,24 @@ This is due to a llvm BPF backend bug. The fix https://reviews.llvm.org/D78466 has been pushed to llvm 10.x release branch and will be available in 10.0.1. The fix is available in llvm 11.0.0 trunk. + +BPF CO-RE-based tests and Clang version +======================================= + +A set of selftests use BPF target-specific built-ins, which might require +bleeding-edge Clang versions (Clang 12 nightly at this time). + +Few sub-tests of core_reloc test suit (part of test_progs test runner) require +the following built-ins, listed with corresponding Clang diffs introducing +them to Clang/LLVM. These sub-tests are going to be skipped if Clang is too +old to support them, they shouldn't cause build failures or runtime test +failures: + + - __builtin_btf_type_id() ([0], [1], [2]); + - __builtin_preserve_type_info(), __builtin_preserve_enum_value() ([3], [4]). + + [0] https://reviews.llvm.org/D74572 + [1] https://reviews.llvm.org/D74668 + [2] https://reviews.llvm.org/D85174 + [3] https://reviews.llvm.org/D83878 + [4] https://reviews.llvm.org/D83242 diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index 944ad4721c83..332ed2f7b402 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -311,12 +311,12 @@ extern const struct bench bench_rename_kretprobe; extern const struct bench bench_rename_rawtp; extern const struct bench bench_rename_fentry; extern const struct bench bench_rename_fexit; -extern const struct bench bench_rename_fmodret; extern const struct bench bench_trig_base; extern const struct bench bench_trig_tp; extern const struct bench bench_trig_rawtp; extern const struct bench bench_trig_kprobe; extern const struct bench bench_trig_fentry; +extern const struct bench bench_trig_fentry_sleep; extern const struct bench bench_trig_fmodret; extern const struct bench bench_rb_libbpf; extern const struct bench bench_rb_custom; @@ -332,12 +332,12 @@ static const struct bench *benchs[] = { &bench_rename_rawtp, &bench_rename_fentry, &bench_rename_fexit, - &bench_rename_fmodret, &bench_trig_base, &bench_trig_tp, &bench_trig_rawtp, &bench_trig_kprobe, &bench_trig_fentry, + &bench_trig_fentry_sleep, &bench_trig_fmodret, &bench_rb_libbpf, &bench_rb_custom, @@ -462,4 +462,3 @@ int main(int argc, char **argv) return 0; } - diff --git a/tools/testing/selftests/bpf/benchs/bench_rename.c b/tools/testing/selftests/bpf/benchs/bench_rename.c index e74cff40f4fe..a967674098ad 100644 --- a/tools/testing/selftests/bpf/benchs/bench_rename.c +++ b/tools/testing/selftests/bpf/benchs/bench_rename.c @@ -106,12 +106,6 @@ static void setup_fexit() attach_bpf(ctx.skel->progs.prog5); } -static void setup_fmodret() -{ - setup_ctx(); - attach_bpf(ctx.skel->progs.prog6); -} - static void *consumer(void *input) { return NULL; @@ -182,14 +176,3 @@ const struct bench bench_rename_fexit = { .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, }; - -const struct bench bench_rename_fmodret = { - .name = "rename-fmodret", - .validate = validate, - .setup = setup_fmodret, - .producer_thread = producer, - .consumer_thread = consumer, - .measure = measure, - .report_progress = hits_drops_report_progress, - .report_final = hits_drops_report_final, -}; diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c index 49c22832f216..2a0b6c9885a4 100644 --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c @@ -90,6 +90,12 @@ static void trigger_fentry_setup() attach_bpf(ctx.skel->progs.bench_trigger_fentry); } +static void trigger_fentry_sleep_setup() +{ + setup_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_fentry_sleep); +} + static void trigger_fmodret_setup() { setup_ctx(); @@ -155,6 +161,17 @@ const struct bench bench_trig_fentry = { .report_final = hits_drops_report_final, }; +const struct bench bench_trig_fentry_sleep = { + .name = "trig-fentry-sleep", + .validate = trigger_validate, + .setup = trigger_fentry_sleep_setup, + .producer_thread = trigger_producer, + .consumer_thread = trigger_consumer, + .measure = trigger_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + const struct bench bench_trig_fmodret = { .name = "trig-fmodret", .validate = trigger_validate, diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h index 5bf2fe9b1efa..2915664c335d 100644 --- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h +++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h @@ -16,6 +16,7 @@ BPF_PROG(name, args) struct sock_common { unsigned char skc_state; + __u16 skc_num; } __attribute__((preserve_access_index)); enum sk_pacing { @@ -45,6 +46,10 @@ struct inet_connection_sock { __u64 icsk_ca_priv[104 / sizeof(__u64)]; } __attribute__((preserve_access_index)); +struct request_sock { + struct sock_common __req_common; +} __attribute__((preserve_access_index)); + struct tcp_sock { struct inet_connection_sock inet_conn; @@ -115,14 +120,6 @@ enum tcp_ca_event { CA_EVENT_ECN_IS_CE = 5, }; -enum tcp_ca_state { - TCP_CA_Open = 0, - TCP_CA_Disorder = 1, - TCP_CA_CWR = 2, - TCP_CA_Recovery = 3, - TCP_CA_Loss = 4 -}; - struct ack_sample { __u32 pkts_acked; __s32 rtt_us; diff --git a/tools/testing/selftests/bpf/flow_dissector_load.h b/tools/testing/selftests/bpf/flow_dissector_load.h index daeaeb518894..7290401ec172 100644 --- a/tools/testing/selftests/bpf/flow_dissector_load.h +++ b/tools/testing/selftests/bpf/flow_dissector_load.h @@ -23,7 +23,13 @@ static inline int bpf_flow_load(struct bpf_object **obj, if (ret) return ret; - main_prog = bpf_object__find_program_by_title(*obj, section_name); + main_prog = NULL; + bpf_object__for_each_program(prog, *obj) { + if (strcmp(section_name, bpf_program__section_name(prog)) == 0) { + main_prog = prog; + break; + } + } if (!main_prog) return -1; diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index f56655690f9b..12ee40284da0 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -104,6 +104,43 @@ error_close: return -1; } +int fastopen_connect(int server_fd, const char *data, unsigned int data_len, + int timeout_ms) +{ + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + struct sockaddr_in *addr_in; + int fd, ret; + + if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) { + log_err("Failed to get server addr"); + return -1; + } + + addr_in = (struct sockaddr_in *)&addr; + fd = socket(addr_in->sin_family, SOCK_STREAM, 0); + if (fd < 0) { + log_err("Failed to create client socket"); + return -1; + } + + if (settimeo(fd, timeout_ms)) + goto error_close; + + ret = sendto(fd, data, data_len, MSG_FASTOPEN, (struct sockaddr *)&addr, + addrlen); + if (ret != data_len) { + log_err("sendto(data, %u) != %d\n", data_len, ret); + goto error_close; + } + + return fd; + +error_close: + save_errno_close(fd); + return -1; +} + static int connect_fd_to_addr(int fd, const struct sockaddr_storage *addr, socklen_t addrlen) diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index c3728f6667e4..7205f8afdba1 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -37,6 +37,8 @@ int start_server(int family, int type, const char *addr, __u16 port, int timeout_ms); int connect_to_fd(int server_fd, int timeout_ms); int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms); +int fastopen_connect(int server_fd, const char *data, unsigned int data_len, + int timeout_ms); int make_sockaddr(int family, const char *addr_str, __u16 port, struct sockaddr_storage *addr, socklen_t *len); diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c index c548aded6585..52414058a627 100644 --- a/tools/testing/selftests/bpf/prog_tests/align.c +++ b/tools/testing/selftests/bpf/prog_tests/align.c @@ -195,13 +195,13 @@ static struct bpf_align_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {8, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {8, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, {9, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {10, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, {11, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, - {12, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {12, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, {13, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, - {14, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {14, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, {15, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, {16, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, }, @@ -518,7 +518,7 @@ static struct bpf_align_test tests[] = { * the total offset is 4-byte aligned and meets the * load's requirements. */ - {20, "R5=pkt(id=1,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc)"}, + {20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc)"}, }, }, @@ -561,18 +561,18 @@ static struct bpf_align_test tests[] = { /* Adding 14 makes R6 be (4n+2) */ {11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"}, /* Subtracting from packet pointer overflows ubounds */ - {13, "R5_w=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"}, + {13, "R5_w=pkt(id=2,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"}, /* New unknown value in R7 is (4n), >= 76 */ {15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"}, /* Adding it to packet pointer gives nice bounds again */ - {16, "R5_w=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"}, + {16, "R5_w=pkt(id=3,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so * the total offset is 4-byte aligned and meets the * load's requirements. */ - {20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"}, + {20, "R5=pkt(id=3,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"}, }, }, }; diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 7375d9a6d242..448885b95eed 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -7,6 +7,7 @@ #include "bpf_iter_task.skel.h" #include "bpf_iter_task_stack.skel.h" #include "bpf_iter_task_file.skel.h" +#include "bpf_iter_task_btf.skel.h" #include "bpf_iter_tcp4.skel.h" #include "bpf_iter_tcp6.skel.h" #include "bpf_iter_udp4.skel.h" @@ -132,20 +133,118 @@ static void test_task_stack(void) bpf_iter_task_stack__destroy(skel); } +static void *do_nothing(void *arg) +{ + pthread_exit(arg); +} + static void test_task_file(void) { struct bpf_iter_task_file *skel; + pthread_t thread_id; + void *ret; skel = bpf_iter_task_file__open_and_load(); if (CHECK(!skel, "bpf_iter_task_file__open_and_load", "skeleton open_and_load failed\n")) return; + skel->bss->tgid = getpid(); + + if (CHECK(pthread_create(&thread_id, NULL, &do_nothing, NULL), + "pthread_create", "pthread_create failed\n")) + goto done; + do_dummy_read(skel->progs.dump_task_file); + if (CHECK(pthread_join(thread_id, &ret) || ret != NULL, + "pthread_join", "pthread_join failed\n")) + goto done; + + CHECK(skel->bss->count != 0, "check_count", + "invalid non pthread file visit count %d\n", skel->bss->count); + +done: bpf_iter_task_file__destroy(skel); } +#define TASKBUFSZ 32768 + +static char taskbuf[TASKBUFSZ]; + +static int do_btf_read(struct bpf_iter_task_btf *skel) +{ + struct bpf_program *prog = skel->progs.dump_task_struct; + struct bpf_iter_task_btf__bss *bss = skel->bss; + int iter_fd = -1, len = 0, bufleft = TASKBUFSZ; + struct bpf_link *link; + char *buf = taskbuf; + int ret = 0; + + link = bpf_program__attach_iter(prog, NULL); + if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + return ret; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + goto free_link; + + do { + len = read(iter_fd, buf, bufleft); + if (len > 0) { + buf += len; + bufleft -= len; + } + } while (len > 0); + + if (bss->skip) { + printf("%s:SKIP:no __builtin_btf_type_id\n", __func__); + ret = 1; + test__skip(); + goto free_link; + } + + if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + goto free_link; + + CHECK(strstr(taskbuf, "(struct task_struct)") == NULL, + "check for btf representation of task_struct in iter data", + "struct task_struct not found"); +free_link: + if (iter_fd > 0) + close(iter_fd); + bpf_link__destroy(link); + return ret; +} + +static void test_task_btf(void) +{ + struct bpf_iter_task_btf__bss *bss; + struct bpf_iter_task_btf *skel; + int ret; + + skel = bpf_iter_task_btf__open_and_load(); + if (CHECK(!skel, "bpf_iter_task_btf__open_and_load", + "skeleton open_and_load failed\n")) + return; + + bss = skel->bss; + + ret = do_btf_read(skel); + if (ret) + goto cleanup; + + if (CHECK(bss->tasks == 0, "check if iterated over tasks", + "no task iteration, did BPF program run?\n")) + goto cleanup; + + CHECK(bss->seq_err != 0, "check for unexpected err", + "bpf_seq_printf_btf returned %ld", bss->seq_err); + +cleanup: + bpf_iter_task_btf__destroy(skel); +} + static void test_tcp4(void) { struct bpf_iter_tcp4 *skel; @@ -331,7 +430,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) struct bpf_map_info map_info = {}; struct bpf_iter_test_kern4 *skel; struct bpf_link *link; - __u32 page_size; + __u32 iter_size; char *buf; skel = bpf_iter_test_kern4__open(); @@ -353,19 +452,19 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) "map_creation failed: %s\n", strerror(errno))) goto free_map1; - /* bpf_seq_printf kernel buffer is one page, so one map + /* bpf_seq_printf kernel buffer is 8 pages, so one map * bpf_seq_write will mostly fill it, and the other map * will partially fill and then trigger overflow and need * bpf_seq_read restart. */ - page_size = sysconf(_SC_PAGE_SIZE); + iter_size = sysconf(_SC_PAGE_SIZE) << 3; if (test_e2big_overflow) { - skel->rodata->print_len = (page_size + 8) / 8; - expected_read_len = 2 * (page_size + 8); + skel->rodata->print_len = (iter_size + 8) / 8; + expected_read_len = 2 * (iter_size + 8); } else if (!ret1) { - skel->rodata->print_len = (page_size - 8) / 8; - expected_read_len = 2 * (page_size - 8); + skel->rodata->print_len = (iter_size - 8) / 8; + expected_read_len = 2 * (iter_size - 8); } else { skel->rodata->print_len = 1; expected_read_len = 2 * 8; @@ -936,6 +1035,8 @@ void test_bpf_iter(void) test_task_stack(); if (test__start_subtest("task_file")) test_task_file(); + if (test__start_subtest("task_btf")) + test_task_btf(); if (test__start_subtest("tcp4")) test_tcp4(); if (test__start_subtest("tcp6")) diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c index e9f2f12ba06b..e698ee6bb6c2 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c @@ -49,6 +49,7 @@ void test_bpf_verif_scale(void) { "test_verif_scale3.o", BPF_PROG_TYPE_SCHED_CLS }, { "pyperf_global.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, + { "pyperf_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, /* full unroll by llvm */ { "pyperf50.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, @@ -86,6 +87,9 @@ void test_bpf_verif_scale(void) { "strobemeta_nounroll1.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, { "strobemeta_nounroll2.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, + /* non-inlined subprogs */ + { "strobemeta_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, + { "test_sysctl_loop1.o", BPF_PROG_TYPE_CGROUP_SYSCTL }, { "test_sysctl_loop2.o", BPF_PROG_TYPE_CGROUP_SYSCTL }, diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index c75fc6447186..93162484c2ca 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -24,40 +24,17 @@ #include "bpf_rlimit.h" #include "bpf_util.h" -#include "test_btf.h" +#include "../test_btf.h" +#include "test_progs.h" #define MAX_INSNS 512 #define MAX_SUBPROGS 16 -static uint32_t pass_cnt; -static uint32_t error_cnt; -static uint32_t skip_cnt; +static int duration = 0; +static bool always_log; -#define CHECK(condition, format...) ({ \ - int __ret = !!(condition); \ - if (__ret) { \ - fprintf(stderr, "%s:%d:FAIL ", __func__, __LINE__); \ - fprintf(stderr, format); \ - } \ - __ret; \ -}) - -static int count_result(int err) -{ - if (err) - error_cnt++; - else - pass_cnt++; - - fprintf(stderr, "\n"); - return err; -} - -static int __base_pr(enum libbpf_print_level level __attribute__((unused)), - const char *format, va_list args) -{ - return vfprintf(stderr, format, args); -} +#undef CHECK +#define CHECK(condition, format...) _CHECK(condition, "check", duration, format) #define BTF_END_RAW 0xdeadbeef #define NAME_TBD 0xdeadb33f @@ -69,21 +46,6 @@ static int __base_pr(enum libbpf_print_level level __attribute__((unused)), #define MAX_NR_RAW_U32 1024 #define BTF_LOG_BUF_SIZE 65535 -static struct args { - unsigned int raw_test_num; - unsigned int file_test_num; - unsigned int get_info_test_num; - unsigned int info_raw_test_num; - unsigned int dedup_test_num; - bool raw_test; - bool file_test; - bool get_info_test; - bool pprint_test; - bool always_log; - bool info_raw_test; - bool dedup_test; -} args; - static char btf_log_buf[BTF_LOG_BUF_SIZE]; static struct btf_header hdr_tmpl = { @@ -3664,7 +3626,7 @@ done: return raw_btf; } -static int do_test_raw(unsigned int test_num) +static void do_test_raw(unsigned int test_num) { struct btf_raw_test *test = &raw_tests[test_num - 1]; struct bpf_create_map_attr create_attr = {}; @@ -3674,15 +3636,16 @@ static int do_test_raw(unsigned int test_num) void *raw_btf; int err; - fprintf(stderr, "BTF raw test[%u] (%s): ", test_num, test->descr); + if (!test__start_subtest(test->descr)) + return; + raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types, test->str_sec, test->str_sec_size, &raw_btf_size, NULL); - if (!raw_btf) - return -1; + return; hdr = raw_btf; @@ -3694,7 +3657,7 @@ static int do_test_raw(unsigned int test_num) *btf_log_buf = '\0'; btf_fd = bpf_load_btf(raw_btf, raw_btf_size, btf_log_buf, BTF_LOG_BUF_SIZE, - args.always_log); + always_log); free(raw_btf); err = ((btf_fd == -1) != test->btf_load_err); @@ -3725,32 +3688,12 @@ static int do_test_raw(unsigned int test_num) map_fd, test->map_create_err); done: - if (!err) - fprintf(stderr, "OK"); - - if (*btf_log_buf && (err || args.always_log)) + if (*btf_log_buf && (err || always_log)) fprintf(stderr, "\n%s", btf_log_buf); - if (btf_fd != -1) close(btf_fd); if (map_fd != -1) close(map_fd); - - return err; -} - -static int test_raw(void) -{ - unsigned int i; - int err = 0; - - if (args.raw_test_num) - return count_result(do_test_raw(args.raw_test_num)); - - for (i = 1; i <= ARRAY_SIZE(raw_tests); i++) - err |= count_result(do_test_raw(i)); - - return err; } struct btf_get_info_test { @@ -3814,11 +3757,6 @@ const struct btf_get_info_test get_info_tests[] = { }, }; -static inline __u64 ptr_to_u64(const void *ptr) -{ - return (__u64)(unsigned long)ptr; -} - static int test_big_btf_info(unsigned int test_num) { const struct btf_get_info_test *test = &get_info_tests[test_num - 1]; @@ -3851,7 +3789,7 @@ static int test_big_btf_info(unsigned int test_num) btf_fd = bpf_load_btf(raw_btf, raw_btf_size, btf_log_buf, BTF_LOG_BUF_SIZE, - args.always_log); + always_log); if (CHECK(btf_fd == -1, "errno:%d", errno)) { err = -1; goto done; @@ -3892,7 +3830,7 @@ static int test_big_btf_info(unsigned int test_num) fprintf(stderr, "OK"); done: - if (*btf_log_buf && (err || args.always_log)) + if (*btf_log_buf && (err || always_log)) fprintf(stderr, "\n%s", btf_log_buf); free(raw_btf); @@ -3939,7 +3877,7 @@ static int test_btf_id(unsigned int test_num) btf_fd[0] = bpf_load_btf(raw_btf, raw_btf_size, btf_log_buf, BTF_LOG_BUF_SIZE, - args.always_log); + always_log); if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) { err = -1; goto done; @@ -4024,7 +3962,7 @@ static int test_btf_id(unsigned int test_num) fprintf(stderr, "OK"); done: - if (*btf_log_buf && (err || args.always_log)) + if (*btf_log_buf && (err || always_log)) fprintf(stderr, "\n%s", btf_log_buf); free(raw_btf); @@ -4039,7 +3977,7 @@ done: return err; } -static int do_test_get_info(unsigned int test_num) +static void do_test_get_info(unsigned int test_num) { const struct btf_get_info_test *test = &get_info_tests[test_num - 1]; unsigned int raw_btf_size, user_btf_size, expected_nbytes; @@ -4048,11 +3986,14 @@ static int do_test_get_info(unsigned int test_num) int btf_fd = -1, err, ret; uint32_t info_len; - fprintf(stderr, "BTF GET_INFO test[%u] (%s): ", - test_num, test->descr); + if (!test__start_subtest(test->descr)) + return; - if (test->special_test) - return test->special_test(test_num); + if (test->special_test) { + err = test->special_test(test_num); + if (CHECK(err, "failed: %d\n", err)) + return; + } raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types, @@ -4061,7 +4002,7 @@ static int do_test_get_info(unsigned int test_num) &raw_btf_size, NULL); if (!raw_btf) - return -1; + return; *btf_log_buf = '\0'; @@ -4073,7 +4014,7 @@ static int do_test_get_info(unsigned int test_num) btf_fd = bpf_load_btf(raw_btf, raw_btf_size, btf_log_buf, BTF_LOG_BUF_SIZE, - args.always_log); + always_log); if (CHECK(btf_fd == -1, "errno:%d", errno)) { err = -1; goto done; @@ -4114,7 +4055,7 @@ static int do_test_get_info(unsigned int test_num) fprintf(stderr, "OK"); done: - if (*btf_log_buf && (err || args.always_log)) + if (*btf_log_buf && (err || always_log)) fprintf(stderr, "\n%s", btf_log_buf); free(raw_btf); @@ -4122,22 +4063,6 @@ done: if (btf_fd != -1) close(btf_fd); - - return err; -} - -static int test_get_info(void) -{ - unsigned int i; - int err = 0; - - if (args.get_info_test_num) - return count_result(do_test_get_info(args.get_info_test_num)); - - for (i = 1; i <= ARRAY_SIZE(get_info_tests); i++) - err |= count_result(do_test_get_info(i)); - - return err; } struct btf_file_test { @@ -4151,7 +4076,7 @@ static struct btf_file_test file_tests[] = { { .file = "test_btf_nokv.o", .btf_kv_notfound = true, }, }; -static int do_test_file(unsigned int test_num) +static void do_test_file(unsigned int test_num) { const struct btf_file_test *test = &file_tests[test_num - 1]; const char *expected_fnames[] = {"_dummy_tracepoint", @@ -4169,17 +4094,17 @@ static int do_test_file(unsigned int test_num) struct bpf_map *map; int i, err, prog_fd; - fprintf(stderr, "BTF libbpf test[%u] (%s): ", test_num, - test->file); + if (!test__start_subtest(test->file)) + return; btf = btf__parse_elf(test->file, &btf_ext); if (IS_ERR(btf)) { if (PTR_ERR(btf) == -ENOENT) { - fprintf(stderr, "SKIP. No ELF %s found", BTF_ELF_SEC); - skip_cnt++; - return 0; + printf("%s:SKIP: No ELF %s found", __func__, BTF_ELF_SEC); + test__skip(); + return; } - return PTR_ERR(btf); + return; } btf__free(btf); @@ -4188,7 +4113,7 @@ static int do_test_file(unsigned int test_num) obj = bpf_object__open(test->file); if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj))) - return PTR_ERR(obj); + return; prog = bpf_program__next(NULL, obj); if (CHECK(!prog, "Cannot find bpf_prog")) { @@ -4310,21 +4235,6 @@ skip: done: free(func_info); bpf_object__close(obj); - return err; -} - -static int test_file(void) -{ - unsigned int i; - int err = 0; - - if (args.file_test_num) - return count_result(do_test_file(args.file_test_num)); - - for (i = 1; i <= ARRAY_SIZE(file_tests); i++) - err |= count_result(do_test_file(i)); - - return err; } const char *pprint_enum_str[] = { @@ -4428,7 +4338,7 @@ static struct btf_raw_test pprint_test_template[] = { .value_size = sizeof(struct pprint_mapv), .key_type_id = 3, /* unsigned int */ .value_type_id = 16, /* struct pprint_mapv */ - .max_entries = 128 * 1024, + .max_entries = 128, }, { @@ -4493,7 +4403,7 @@ static struct btf_raw_test pprint_test_template[] = { .value_size = sizeof(struct pprint_mapv), .key_type_id = 3, /* unsigned int */ .value_type_id = 16, /* struct pprint_mapv */ - .max_entries = 128 * 1024, + .max_entries = 128, }, { @@ -4564,7 +4474,7 @@ static struct btf_raw_test pprint_test_template[] = { .value_size = sizeof(struct pprint_mapv), .key_type_id = 3, /* unsigned int */ .value_type_id = 16, /* struct pprint_mapv */ - .max_entries = 128 * 1024, + .max_entries = 128, }, #ifdef __SIZEOF_INT128__ @@ -4591,7 +4501,7 @@ static struct btf_raw_test pprint_test_template[] = { .value_size = sizeof(struct pprint_mapv_int128), .key_type_id = 1, .value_type_id = 4, - .max_entries = 128 * 1024, + .max_entries = 128, .mapv_kind = PPRINT_MAPV_KIND_INT128, }, #endif @@ -4790,7 +4700,7 @@ static int check_line(const char *expected_line, int nexpected_line, } -static int do_test_pprint(int test_num) +static void do_test_pprint(int test_num) { const struct btf_raw_test *test = &pprint_test_template[test_num]; enum pprint_mapv_kind_t mapv_kind = test->mapv_kind; @@ -4809,18 +4719,20 @@ static int do_test_pprint(int test_num) uint8_t *raw_btf; ssize_t nread; - fprintf(stderr, "%s(#%d)......", test->descr, test_num); + if (!test__start_subtest(test->descr)) + return; + raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types, test->str_sec, test->str_sec_size, &raw_btf_size, NULL); if (!raw_btf) - return -1; + return; *btf_log_buf = '\0'; btf_fd = bpf_load_btf(raw_btf, raw_btf_size, btf_log_buf, BTF_LOG_BUF_SIZE, - args.always_log); + always_log); free(raw_btf); if (CHECK(btf_fd == -1, "errno:%d", errno)) { @@ -4971,7 +4883,7 @@ done: free(mapv); if (!err) fprintf(stderr, "OK"); - if (*btf_log_buf && (err || args.always_log)) + if (*btf_log_buf && (err || always_log)) fprintf(stderr, "\n%s", btf_log_buf); if (btf_fd != -1) close(btf_fd); @@ -4981,14 +4893,11 @@ done: fclose(pin_file); unlink(pin_path); free(line); - - return err; } -static int test_pprint(void) +static void test_pprint(void) { unsigned int i; - int err = 0; /* test various maps with the first test template */ for (i = 0; i < ARRAY_SIZE(pprint_tests_meta); i++) { @@ -4999,7 +4908,7 @@ static int test_pprint(void) pprint_test_template[0].lossless_map = pprint_tests_meta[i].lossless_map; pprint_test_template[0].percpu_map = pprint_tests_meta[i].percpu_map; - err |= count_result(do_test_pprint(0)); + do_test_pprint(0); } /* test rest test templates with the first map */ @@ -5010,10 +4919,8 @@ static int test_pprint(void) pprint_test_template[i].ordered_map = pprint_tests_meta[0].ordered_map; pprint_test_template[i].lossless_map = pprint_tests_meta[0].lossless_map; pprint_test_template[i].percpu_map = pprint_tests_meta[0].percpu_map; - err |= count_result(do_test_pprint(i)); + do_test_pprint(i); } - - return err; } #define BPF_LINE_INFO_ENC(insn_off, file_off, line_off, line_num, line_col) \ @@ -6178,7 +6085,7 @@ done: return err; } -static int do_test_info_raw(unsigned int test_num) +static void do_test_info_raw(unsigned int test_num) { const struct prog_info_raw_test *test = &info_raw_tests[test_num - 1]; unsigned int raw_btf_size, linfo_str_off, linfo_size; @@ -6187,18 +6094,19 @@ static int do_test_info_raw(unsigned int test_num) const char *ret_next_str; union bpf_attr attr = {}; - fprintf(stderr, "BTF prog info raw test[%u] (%s): ", test_num, test->descr); + if (!test__start_subtest(test->descr)) + return; + raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types, test->str_sec, test->str_sec_size, &raw_btf_size, &ret_next_str); - if (!raw_btf) - return -1; + return; *btf_log_buf = '\0'; btf_fd = bpf_load_btf(raw_btf, raw_btf_size, btf_log_buf, BTF_LOG_BUF_SIZE, - args.always_log); + always_log); free(raw_btf); if (CHECK(btf_fd == -1, "invalid btf_fd errno:%d", errno)) { @@ -6206,7 +6114,7 @@ static int do_test_info_raw(unsigned int test_num) goto done; } - if (*btf_log_buf && args.always_log) + if (*btf_log_buf && always_log) fprintf(stderr, "\n%s", btf_log_buf); *btf_log_buf = '\0'; @@ -6261,10 +6169,7 @@ static int do_test_info_raw(unsigned int test_num) goto done; done: - if (!err) - fprintf(stderr, "OK"); - - if (*btf_log_buf && (err || args.always_log)) + if (*btf_log_buf && (err || always_log)) fprintf(stderr, "\n%s", btf_log_buf); if (btf_fd != -1) @@ -6274,22 +6179,6 @@ done: if (!IS_ERR(patched_linfo)) free(patched_linfo); - - return err; -} - -static int test_info_raw(void) -{ - unsigned int i; - int err = 0; - - if (args.info_raw_test_num) - return count_result(do_test_info_raw(args.info_raw_test_num)); - - for (i = 1; i <= ARRAY_SIZE(info_raw_tests); i++) - err |= count_result(do_test_info_raw(i)); - - return err; } struct btf_raw_data { @@ -6754,7 +6643,7 @@ static void dump_btf_strings(const char *strs, __u32 len) } } -static int do_test_dedup(unsigned int test_num) +static void do_test_dedup(unsigned int test_num) { const struct btf_dedup_test *test = &dedup_tests[test_num - 1]; __u32 test_nr_types, expect_nr_types, test_btf_size, expect_btf_size; @@ -6769,13 +6658,15 @@ static int do_test_dedup(unsigned int test_num) void *raw_btf; int err = 0, i; - fprintf(stderr, "BTF dedup test[%u] (%s):", test_num, test->descr); + if (!test__start_subtest(test->descr)) + return; raw_btf = btf_raw_create(&hdr_tmpl, test->input.raw_types, test->input.str_sec, test->input.str_sec_size, &raw_btf_size, &ret_test_next_str); if (!raw_btf) - return -1; + return; + test_btf = btf__new((__u8 *)raw_btf, raw_btf_size); free(raw_btf); if (CHECK(IS_ERR(test_btf), "invalid test_btf errno:%ld", @@ -6789,7 +6680,7 @@ static int do_test_dedup(unsigned int test_num) test->expect.str_sec_size, &raw_btf_size, &ret_expect_next_str); if (!raw_btf) - return -1; + return; expect_btf = btf__new((__u8 *)raw_btf, raw_btf_size); free(raw_btf); if (CHECK(IS_ERR(expect_btf), "invalid expect_btf errno:%ld", @@ -6894,174 +6785,27 @@ static int do_test_dedup(unsigned int test_num) } done: - if (!err) - fprintf(stderr, "OK"); if (!IS_ERR(test_btf)) btf__free(test_btf); if (!IS_ERR(expect_btf)) btf__free(expect_btf); - - return err; } -static int test_dedup(void) +void test_btf(void) { - unsigned int i; - int err = 0; + int i; - if (args.dedup_test_num) - return count_result(do_test_dedup(args.dedup_test_num)); + always_log = env.verbosity > VERBOSE_NONE; + for (i = 1; i <= ARRAY_SIZE(raw_tests); i++) + do_test_raw(i); + for (i = 1; i <= ARRAY_SIZE(get_info_tests); i++) + do_test_get_info(i); + for (i = 1; i <= ARRAY_SIZE(file_tests); i++) + do_test_file(i); + for (i = 1; i <= ARRAY_SIZE(info_raw_tests); i++) + do_test_info_raw(i); for (i = 1; i <= ARRAY_SIZE(dedup_tests); i++) - err |= count_result(do_test_dedup(i)); - - return err; -} - -static void usage(const char *cmd) -{ - fprintf(stderr, "Usage: %s [-l] [[-r btf_raw_test_num (1 - %zu)] |\n" - "\t[-g btf_get_info_test_num (1 - %zu)] |\n" - "\t[-f btf_file_test_num (1 - %zu)] |\n" - "\t[-k btf_prog_info_raw_test_num (1 - %zu)] |\n" - "\t[-p (pretty print test)] |\n" - "\t[-d btf_dedup_test_num (1 - %zu)]]\n", - cmd, ARRAY_SIZE(raw_tests), ARRAY_SIZE(get_info_tests), - ARRAY_SIZE(file_tests), ARRAY_SIZE(info_raw_tests), - ARRAY_SIZE(dedup_tests)); -} - -static int parse_args(int argc, char **argv) -{ - const char *optstr = "hlpk:f:r:g:d:"; - int opt; - - while ((opt = getopt(argc, argv, optstr)) != -1) { - switch (opt) { - case 'l': - args.always_log = true; - break; - case 'f': - args.file_test_num = atoi(optarg); - args.file_test = true; - break; - case 'r': - args.raw_test_num = atoi(optarg); - args.raw_test = true; - break; - case 'g': - args.get_info_test_num = atoi(optarg); - args.get_info_test = true; - break; - case 'p': - args.pprint_test = true; - break; - case 'k': - args.info_raw_test_num = atoi(optarg); - args.info_raw_test = true; - break; - case 'd': - args.dedup_test_num = atoi(optarg); - args.dedup_test = true; - break; - case 'h': - usage(argv[0]); - exit(0); - default: - usage(argv[0]); - return -1; - } - } - - if (args.raw_test_num && - (args.raw_test_num < 1 || - args.raw_test_num > ARRAY_SIZE(raw_tests))) { - fprintf(stderr, "BTF raw test number must be [1 - %zu]\n", - ARRAY_SIZE(raw_tests)); - return -1; - } - - if (args.file_test_num && - (args.file_test_num < 1 || - args.file_test_num > ARRAY_SIZE(file_tests))) { - fprintf(stderr, "BTF file test number must be [1 - %zu]\n", - ARRAY_SIZE(file_tests)); - return -1; - } - - if (args.get_info_test_num && - (args.get_info_test_num < 1 || - args.get_info_test_num > ARRAY_SIZE(get_info_tests))) { - fprintf(stderr, "BTF get info test number must be [1 - %zu]\n", - ARRAY_SIZE(get_info_tests)); - return -1; - } - - if (args.info_raw_test_num && - (args.info_raw_test_num < 1 || - args.info_raw_test_num > ARRAY_SIZE(info_raw_tests))) { - fprintf(stderr, "BTF prog info raw test number must be [1 - %zu]\n", - ARRAY_SIZE(info_raw_tests)); - return -1; - } - - if (args.dedup_test_num && - (args.dedup_test_num < 1 || - args.dedup_test_num > ARRAY_SIZE(dedup_tests))) { - fprintf(stderr, "BTF dedup test number must be [1 - %zu]\n", - ARRAY_SIZE(dedup_tests)); - return -1; - } - - return 0; -} - -static void print_summary(void) -{ - fprintf(stderr, "PASS:%u SKIP:%u FAIL:%u\n", - pass_cnt - skip_cnt, skip_cnt, error_cnt); -} - -int main(int argc, char **argv) -{ - int err = 0; - - err = parse_args(argc, argv); - if (err) - return err; - - if (args.always_log) - libbpf_set_print(__base_pr); - - if (args.raw_test) - err |= test_raw(); - - if (args.get_info_test) - err |= test_get_info(); - - if (args.file_test) - err |= test_file(); - - if (args.pprint_test) - err |= test_pprint(); - - if (args.info_raw_test) - err |= test_info_raw(); - - if (args.dedup_test) - err |= test_dedup(); - - if (args.raw_test || args.get_info_test || args.file_test || - args.pprint_test || args.info_raw_test || args.dedup_test) - goto done; - - err |= test_raw(); - err |= test_get_info(); - err |= test_file(); - err |= test_info_raw(); - err |= test_dedup(); - -done: - print_summary(); - return err; + do_test_dedup(i); + test_pprint(); } diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index 39fb81d9daeb..c60091ee8a21 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -129,6 +129,109 @@ done: return err; } +static char *dump_buf; +static size_t dump_buf_sz; +static FILE *dump_buf_file; + +void test_btf_dump_incremental(void) +{ + struct btf *btf = NULL; + struct btf_dump *d = NULL; + struct btf_dump_opts opts; + int id, err, i; + + dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz); + if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream")) + return; + btf = btf__new_empty(); + if (!ASSERT_OK_PTR(btf, "new_empty")) + goto err_out; + opts.ctx = dump_buf_file; + d = btf_dump__new(btf, NULL, &opts, btf_dump_printf); + if (!ASSERT_OK(libbpf_get_error(d), "btf_dump__new")) + goto err_out; + + /* First, generate BTF corresponding to the following C code: + * + * enum { VAL = 1 }; + * + * struct s { int x; }; + * + */ + id = btf__add_enum(btf, NULL, 4); + ASSERT_EQ(id, 1, "enum_id"); + err = btf__add_enum_value(btf, "VAL", 1); + ASSERT_OK(err, "enum_val_ok"); + + id = btf__add_int(btf, "int", 4, BTF_INT_SIGNED); + ASSERT_EQ(id, 2, "int_id"); + + id = btf__add_struct(btf, "s", 4); + ASSERT_EQ(id, 3, "struct_id"); + err = btf__add_field(btf, "x", 2, 0, 0); + ASSERT_OK(err, "field_ok"); + + for (i = 1; i <= btf__get_nr_types(btf); i++) { + err = btf_dump__dump_type(d, i); + ASSERT_OK(err, "dump_type_ok"); + } + + fflush(dump_buf_file); + dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */ + ASSERT_STREQ(dump_buf, +"enum {\n" +" VAL = 1,\n" +"};\n" +"\n" +"struct s {\n" +" int x;\n" +"};\n\n", "c_dump1"); + + /* Now, after dumping original BTF, append another struct that embeds + * anonymous enum. It also has a name conflict with the first struct: + * + * struct s___2 { + * enum { VAL___2 = 1 } x; + * struct s s; + * }; + * + * This will test that btf_dump'er maintains internal state properly. + * Note that VAL___2 enum value. It's because we've already emitted + * that enum as a global anonymous enum, so btf_dump will ensure that + * enum values don't conflict; + * + */ + fseek(dump_buf_file, 0, SEEK_SET); + + id = btf__add_struct(btf, "s", 4); + ASSERT_EQ(id, 4, "struct_id"); + err = btf__add_field(btf, "x", 1, 0, 0); + ASSERT_OK(err, "field_ok"); + err = btf__add_field(btf, "s", 3, 32, 0); + ASSERT_OK(err, "field_ok"); + + for (i = 1; i <= btf__get_nr_types(btf); i++) { + err = btf_dump__dump_type(d, i); + ASSERT_OK(err, "dump_type_ok"); + } + + fflush(dump_buf_file); + dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */ + ASSERT_STREQ(dump_buf, +"struct s___2 {\n" +" enum {\n" +" VAL___2 = 1,\n" +" } x;\n" +" struct s s;\n" +"};\n\n" , "c_dump1"); + +err_out: + fclose(dump_buf_file); + free(dump_buf); + btf_dump__free(d); + btf__free(btf); +} + void test_btf_dump() { int i; @@ -140,4 +243,6 @@ void test_btf_dump() { test_btf_dump_case(i, &btf_dump_test_cases[i]); } + if (test__start_subtest("btf_dump: incremental")) + test_btf_dump_incremental(); } diff --git a/tools/testing/selftests/bpf/prog_tests/btf_endian.c b/tools/testing/selftests/bpf/prog_tests/btf_endian.c new file mode 100644 index 000000000000..8c52d72c876e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/btf_endian.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#define _GNU_SOURCE +#include <string.h> +#include <byteswap.h> +#include <test_progs.h> +#include <bpf/btf.h> + +static int duration = 0; + +void test_btf_endian() { +#if __BYTE_ORDER == __LITTLE_ENDIAN + enum btf_endianness endian = BTF_LITTLE_ENDIAN; +#elif __BYTE_ORDER == __BIG_ENDIAN + enum btf_endianness endian = BTF_BIG_ENDIAN; +#else +#error "Unrecognized __BYTE_ORDER" +#endif + enum btf_endianness swap_endian = 1 - endian; + struct btf *btf = NULL, *swap_btf = NULL; + const void *raw_data, *swap_raw_data; + const struct btf_type *t; + const struct btf_header *hdr; + __u32 raw_sz, swap_raw_sz; + int var_id; + + /* Load BTF in native endianness */ + btf = btf__parse_elf("btf_dump_test_case_syntax.o", NULL); + if (!ASSERT_OK_PTR(btf, "parse_native_btf")) + goto err_out; + + ASSERT_EQ(btf__endianness(btf), endian, "endian"); + btf__set_endianness(btf, swap_endian); + ASSERT_EQ(btf__endianness(btf), swap_endian, "endian"); + + /* Get raw BTF data in non-native endianness... */ + raw_data = btf__get_raw_data(btf, &raw_sz); + if (!ASSERT_OK_PTR(raw_data, "raw_data_inverted")) + goto err_out; + + /* ...and open it as a new BTF instance */ + swap_btf = btf__new(raw_data, raw_sz); + if (!ASSERT_OK_PTR(swap_btf, "parse_swap_btf")) + goto err_out; + + ASSERT_EQ(btf__endianness(swap_btf), swap_endian, "endian"); + ASSERT_EQ(btf__get_nr_types(swap_btf), btf__get_nr_types(btf), "nr_types"); + + swap_raw_data = btf__get_raw_data(swap_btf, &swap_raw_sz); + if (!ASSERT_OK_PTR(swap_raw_data, "swap_raw_data")) + goto err_out; + + /* both raw data should be identical (with non-native endianness) */ + ASSERT_OK(memcmp(raw_data, swap_raw_data, raw_sz), "mem_identical"); + + /* make sure that at least BTF header data is really swapped */ + hdr = swap_raw_data; + ASSERT_EQ(bswap_16(hdr->magic), BTF_MAGIC, "btf_magic_swapped"); + ASSERT_EQ(raw_sz, swap_raw_sz, "raw_sizes"); + + /* swap it back to native endianness */ + btf__set_endianness(swap_btf, endian); + swap_raw_data = btf__get_raw_data(swap_btf, &swap_raw_sz); + if (!ASSERT_OK_PTR(swap_raw_data, "swap_raw_data")) + goto err_out; + + /* now header should have native BTF_MAGIC */ + hdr = swap_raw_data; + ASSERT_EQ(hdr->magic, BTF_MAGIC, "btf_magic_native"); + ASSERT_EQ(raw_sz, swap_raw_sz, "raw_sizes"); + + /* now modify original BTF */ + var_id = btf__add_var(btf, "some_var", BTF_VAR_GLOBAL_ALLOCATED, 1); + CHECK(var_id <= 0, "var_id", "failed %d\n", var_id); + + btf__free(swap_btf); + swap_btf = NULL; + + btf__set_endianness(btf, swap_endian); + raw_data = btf__get_raw_data(btf, &raw_sz); + if (!ASSERT_OK_PTR(raw_data, "raw_data_inverted")) + goto err_out; + + /* and re-open swapped raw data again */ + swap_btf = btf__new(raw_data, raw_sz); + if (!ASSERT_OK_PTR(swap_btf, "parse_swap_btf")) + goto err_out; + + ASSERT_EQ(btf__endianness(swap_btf), swap_endian, "endian"); + ASSERT_EQ(btf__get_nr_types(swap_btf), btf__get_nr_types(btf), "nr_types"); + + /* the type should appear as if it was stored in native endianness */ + t = btf__type_by_id(swap_btf, var_id); + ASSERT_STREQ(btf__str_by_offset(swap_btf, t->name_off), "some_var", "var_name"); + ASSERT_EQ(btf_var(t)->linkage, BTF_VAR_GLOBAL_ALLOCATED, "var_linkage"); + ASSERT_EQ(t->type, 1, "var_type"); + +err_out: + btf__free(btf); + btf__free(swap_btf); +} diff --git a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c index 6ccecbd39476..76ebe4c250f1 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c @@ -53,12 +53,12 @@ static int kern_sync_rcu(void) return err; } -void test_btf_map_in_map(void) +static void test_lookup_update(void) { - int err, key = 0, val, i; + int map1_fd, map2_fd, map3_fd, map4_fd, map5_fd, map1_id, map2_id; + int outer_arr_fd, outer_hash_fd, outer_arr_dyn_fd; struct test_btf_map_in_map *skel; - int outer_arr_fd, outer_hash_fd; - int fd, map1_fd, map2_fd, map1_id, map2_id; + int err, key = 0, val, i, fd; skel = test_btf_map_in_map__open_and_load(); if (CHECK(!skel, "skel_open", "failed to open&load skeleton\n")) @@ -70,32 +70,45 @@ void test_btf_map_in_map(void) map1_fd = bpf_map__fd(skel->maps.inner_map1); map2_fd = bpf_map__fd(skel->maps.inner_map2); + map3_fd = bpf_map__fd(skel->maps.inner_map3); + map4_fd = bpf_map__fd(skel->maps.inner_map4); + map5_fd = bpf_map__fd(skel->maps.inner_map5); + outer_arr_dyn_fd = bpf_map__fd(skel->maps.outer_arr_dyn); outer_arr_fd = bpf_map__fd(skel->maps.outer_arr); outer_hash_fd = bpf_map__fd(skel->maps.outer_hash); - /* inner1 = input, inner2 = input + 1 */ - map1_fd = bpf_map__fd(skel->maps.inner_map1); + /* inner1 = input, inner2 = input + 1, inner3 = input + 2 */ bpf_map_update_elem(outer_arr_fd, &key, &map1_fd, 0); - map2_fd = bpf_map__fd(skel->maps.inner_map2); bpf_map_update_elem(outer_hash_fd, &key, &map2_fd, 0); + bpf_map_update_elem(outer_arr_dyn_fd, &key, &map3_fd, 0); skel->bss->input = 1; usleep(1); - bpf_map_lookup_elem(map1_fd, &key, &val); CHECK(val != 1, "inner1", "got %d != exp %d\n", val, 1); bpf_map_lookup_elem(map2_fd, &key, &val); CHECK(val != 2, "inner2", "got %d != exp %d\n", val, 2); + bpf_map_lookup_elem(map3_fd, &key, &val); + CHECK(val != 3, "inner3", "got %d != exp %d\n", val, 3); - /* inner1 = input + 1, inner2 = input */ + /* inner2 = input, inner1 = input + 1, inner4 = input + 2 */ bpf_map_update_elem(outer_arr_fd, &key, &map2_fd, 0); bpf_map_update_elem(outer_hash_fd, &key, &map1_fd, 0); + bpf_map_update_elem(outer_arr_dyn_fd, &key, &map4_fd, 0); skel->bss->input = 3; usleep(1); - bpf_map_lookup_elem(map1_fd, &key, &val); CHECK(val != 4, "inner1", "got %d != exp %d\n", val, 4); bpf_map_lookup_elem(map2_fd, &key, &val); CHECK(val != 3, "inner2", "got %d != exp %d\n", val, 3); + bpf_map_lookup_elem(map4_fd, &key, &val); + CHECK(val != 5, "inner4", "got %d != exp %d\n", val, 5); + + /* inner5 = input + 2 */ + bpf_map_update_elem(outer_arr_dyn_fd, &key, &map5_fd, 0); + skel->bss->input = 5; + usleep(1); + bpf_map_lookup_elem(map5_fd, &key, &val); + CHECK(val != 7, "inner5", "got %d != exp %d\n", val, 7); for (i = 0; i < 5; i++) { val = i % 2 ? map1_fd : map2_fd; @@ -106,7 +119,13 @@ void test_btf_map_in_map(void) } err = bpf_map_update_elem(outer_arr_fd, &key, &val, 0); if (CHECK_FAIL(err)) { - printf("failed to update hash_of_maps on iter #%d\n", i); + printf("failed to update array_of_maps on iter #%d\n", i); + goto cleanup; + } + val = i % 2 ? map4_fd : map5_fd; + err = bpf_map_update_elem(outer_arr_dyn_fd, &key, &val, 0); + if (CHECK_FAIL(err)) { + printf("failed to update array_of_maps (dyn) on iter #%d\n", i); goto cleanup; } } @@ -143,3 +162,36 @@ void test_btf_map_in_map(void) cleanup: test_btf_map_in_map__destroy(skel); } + +static void test_diff_size(void) +{ + struct test_btf_map_in_map *skel; + int err, inner_map_fd, zero = 0; + + skel = test_btf_map_in_map__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open&load skeleton\n")) + return; + + inner_map_fd = bpf_map__fd(skel->maps.sockarr_sz2); + err = bpf_map_update_elem(bpf_map__fd(skel->maps.outer_sockarr), &zero, + &inner_map_fd, 0); + CHECK(err, "outer_sockarr inner map size check", + "cannot use a different size inner_map\n"); + + inner_map_fd = bpf_map__fd(skel->maps.inner_map_sz2); + err = bpf_map_update_elem(bpf_map__fd(skel->maps.outer_arr), &zero, + &inner_map_fd, 0); + CHECK(!err, "outer_arr inner map size check", + "incorrectly updated with a different size inner_map\n"); + + test_btf_map_in_map__destroy(skel); +} + +void test_btf_map_in_map(void) +{ + if (test__start_subtest("lookup_update")) + test_lookup_update(); + + if (test__start_subtest("diff_size")) + test_diff_size(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c new file mode 100644 index 000000000000..86ccf37e26b3 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#define _GNU_SOURCE +#include <netinet/in.h> +#include <arpa/inet.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <sched.h> +#include <linux/compiler.h> +#include <bpf/libbpf.h> + +#include "network_helpers.h" +#include "test_progs.h" +#include "test_btf_skc_cls_ingress.skel.h" + +static struct test_btf_skc_cls_ingress *skel; +struct sockaddr_in6 srv_sa6; +static __u32 duration; + +#define PROG_PIN_FILE "/sys/fs/bpf/btf_skc_cls_ingress" + +static int write_sysctl(const char *sysctl, const char *value) +{ + int fd, err, len; + + fd = open(sysctl, O_WRONLY); + if (CHECK(fd == -1, "open sysctl", "open(%s): %s (%d)\n", + sysctl, strerror(errno), errno)) + return -1; + + len = strlen(value); + err = write(fd, value, len); + close(fd); + if (CHECK(err != len, "write sysctl", + "write(%s, %s, %d): err:%d %s (%d)\n", + sysctl, value, len, err, strerror(errno), errno)) + return -1; + + return 0; +} + +static int prepare_netns(void) +{ + if (CHECK(unshare(CLONE_NEWNET), "create netns", + "unshare(CLONE_NEWNET): %s (%d)", + strerror(errno), errno)) + return -1; + + if (CHECK(system("ip link set dev lo up"), + "ip link set dev lo up", "failed\n")) + return -1; + + if (CHECK(system("tc qdisc add dev lo clsact"), + "tc qdisc add dev lo clsact", "failed\n")) + return -1; + + if (CHECK(system("tc filter add dev lo ingress bpf direct-action object-pinned " PROG_PIN_FILE), + "install tc cls-prog at ingress", "failed\n")) + return -1; + + /* Ensure 20 bytes options (i.e. in total 40 bytes tcp header) for the + * bpf_tcp_gen_syncookie() helper. + */ + if (write_sysctl("/proc/sys/net/ipv4/tcp_window_scaling", "1") || + write_sysctl("/proc/sys/net/ipv4/tcp_timestamps", "1") || + write_sysctl("/proc/sys/net/ipv4/tcp_sack", "1")) + return -1; + + return 0; +} + +static void reset_test(void) +{ + memset(&skel->bss->srv_sa6, 0, sizeof(skel->bss->srv_sa6)); + skel->bss->listen_tp_sport = 0; + skel->bss->req_sk_sport = 0; + skel->bss->recv_cookie = 0; + skel->bss->gen_cookie = 0; + skel->bss->linum = 0; +} + +static void print_err_line(void) +{ + if (skel->bss->linum) + printf("bpf prog error at line %u\n", skel->bss->linum); +} + +static void test_conn(void) +{ + int listen_fd = -1, cli_fd = -1, err; + socklen_t addrlen = sizeof(srv_sa6); + int srv_port; + + if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1")) + return; + + listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); + if (CHECK_FAIL(listen_fd == -1)) + return; + + err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen); + if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err, + errno)) + goto done; + memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6)); + srv_port = ntohs(srv_sa6.sin6_port); + + cli_fd = connect_to_fd(listen_fd, 0); + if (CHECK_FAIL(cli_fd == -1)) + goto done; + + if (CHECK(skel->bss->listen_tp_sport != srv_port || + skel->bss->req_sk_sport != srv_port, + "Unexpected sk src port", + "listen_tp_sport:%u req_sk_sport:%u expected:%u\n", + skel->bss->listen_tp_sport, skel->bss->req_sk_sport, + srv_port)) + goto done; + + if (CHECK(skel->bss->gen_cookie || skel->bss->recv_cookie, + "Unexpected syncookie states", + "gen_cookie:%u recv_cookie:%u\n", + skel->bss->gen_cookie, skel->bss->recv_cookie)) + goto done; + + CHECK(skel->bss->linum, "bpf prog detected error", "at line %u\n", + skel->bss->linum); + +done: + if (listen_fd != -1) + close(listen_fd); + if (cli_fd != -1) + close(cli_fd); +} + +static void test_syncookie(void) +{ + int listen_fd = -1, cli_fd = -1, err; + socklen_t addrlen = sizeof(srv_sa6); + int srv_port; + + /* Enforce syncookie mode */ + if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "2")) + return; + + listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); + if (CHECK_FAIL(listen_fd == -1)) + return; + + err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen); + if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err, + errno)) + goto done; + memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6)); + srv_port = ntohs(srv_sa6.sin6_port); + + cli_fd = connect_to_fd(listen_fd, 0); + if (CHECK_FAIL(cli_fd == -1)) + goto done; + + if (CHECK(skel->bss->listen_tp_sport != srv_port, + "Unexpected tp src port", + "listen_tp_sport:%u expected:%u\n", + skel->bss->listen_tp_sport, srv_port)) + goto done; + + if (CHECK(skel->bss->req_sk_sport, + "Unexpected req_sk src port", + "req_sk_sport:%u expected:0\n", + skel->bss->req_sk_sport)) + goto done; + + if (CHECK(!skel->bss->gen_cookie || + skel->bss->gen_cookie != skel->bss->recv_cookie, + "Unexpected syncookie states", + "gen_cookie:%u recv_cookie:%u\n", + skel->bss->gen_cookie, skel->bss->recv_cookie)) + goto done; + + CHECK(skel->bss->linum, "bpf prog detected error", "at line %u\n", + skel->bss->linum); + +done: + if (listen_fd != -1) + close(listen_fd); + if (cli_fd != -1) + close(cli_fd); +} + +struct test { + const char *desc; + void (*run)(void); +}; + +#define DEF_TEST(name) { #name, test_##name } +static struct test tests[] = { + DEF_TEST(conn), + DEF_TEST(syncookie), +}; + +void test_btf_skc_cls_ingress(void) +{ + int i, err; + + skel = test_btf_skc_cls_ingress__open_and_load(); + if (CHECK(!skel, "test_btf_skc_cls_ingress__open_and_load", "failed\n")) + return; + + err = bpf_program__pin(skel->progs.cls_ingress, PROG_PIN_FILE); + if (CHECK(err, "bpf_program__pin", + "cannot pin bpf prog to %s. err:%d\n", PROG_PIN_FILE, err)) { + test_btf_skc_cls_ingress__destroy(skel); + return; + } + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + if (!test__start_subtest(tests[i].desc)) + continue; + + if (prepare_netns()) + break; + + tests[i].run(); + + print_err_line(); + reset_test(); + } + + bpf_program__unpin(skel->progs.cls_ingress, PROG_PIN_FILE); + test_btf_skc_cls_ingress__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c new file mode 100644 index 000000000000..314e1e7c36df --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c @@ -0,0 +1,244 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include <test_progs.h> +#include <bpf/btf.h> + +static int duration = 0; + +void test_btf_write() { + const struct btf_var_secinfo *vi; + const struct btf_type *t; + const struct btf_member *m; + const struct btf_enum *v; + const struct btf_param *p; + struct btf *btf; + int id, err, str_off; + + btf = btf__new_empty(); + if (CHECK(IS_ERR(btf), "new_empty", "failed: %ld\n", PTR_ERR(btf))) + return; + + str_off = btf__find_str(btf, "int"); + ASSERT_EQ(str_off, -ENOENT, "int_str_missing_off"); + + str_off = btf__add_str(btf, "int"); + ASSERT_EQ(str_off, 1, "int_str_off"); + + str_off = btf__find_str(btf, "int"); + ASSERT_EQ(str_off, 1, "int_str_found_off"); + + /* BTF_KIND_INT */ + id = btf__add_int(btf, "int", 4, BTF_INT_SIGNED); + ASSERT_EQ(id, 1, "int_id"); + + t = btf__type_by_id(btf, 1); + /* should re-use previously added "int" string */ + ASSERT_EQ(t->name_off, str_off, "int_name_off"); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "int", "int_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_INT, "int_kind"); + ASSERT_EQ(t->size, 4, "int_sz"); + ASSERT_EQ(btf_int_encoding(t), BTF_INT_SIGNED, "int_enc"); + ASSERT_EQ(btf_int_bits(t), 32, "int_bits"); + + /* invalid int size */ + id = btf__add_int(btf, "bad sz int", 7, 0); + ASSERT_ERR(id, "int_bad_sz"); + /* invalid encoding */ + id = btf__add_int(btf, "bad enc int", 4, 123); + ASSERT_ERR(id, "int_bad_enc"); + /* NULL name */ + id = btf__add_int(btf, NULL, 4, 0); + ASSERT_ERR(id, "int_bad_null_name"); + /* empty name */ + id = btf__add_int(btf, "", 4, 0); + ASSERT_ERR(id, "int_bad_empty_name"); + + /* PTR/CONST/VOLATILE/RESTRICT */ + id = btf__add_ptr(btf, 1); + ASSERT_EQ(id, 2, "ptr_id"); + t = btf__type_by_id(btf, 2); + ASSERT_EQ(btf_kind(t), BTF_KIND_PTR, "ptr_kind"); + ASSERT_EQ(t->type, 1, "ptr_type"); + + id = btf__add_const(btf, 5); /* points forward to restrict */ + ASSERT_EQ(id, 3, "const_id"); + t = btf__type_by_id(btf, 3); + ASSERT_EQ(btf_kind(t), BTF_KIND_CONST, "const_kind"); + ASSERT_EQ(t->type, 5, "const_type"); + + id = btf__add_volatile(btf, 3); + ASSERT_EQ(id, 4, "volatile_id"); + t = btf__type_by_id(btf, 4); + ASSERT_EQ(btf_kind(t), BTF_KIND_VOLATILE, "volatile_kind"); + ASSERT_EQ(t->type, 3, "volatile_type"); + + id = btf__add_restrict(btf, 4); + ASSERT_EQ(id, 5, "restrict_id"); + t = btf__type_by_id(btf, 5); + ASSERT_EQ(btf_kind(t), BTF_KIND_RESTRICT, "restrict_kind"); + ASSERT_EQ(t->type, 4, "restrict_type"); + + /* ARRAY */ + id = btf__add_array(btf, 1, 2, 10); /* int *[10] */ + ASSERT_EQ(id, 6, "array_id"); + t = btf__type_by_id(btf, 6); + ASSERT_EQ(btf_kind(t), BTF_KIND_ARRAY, "array_kind"); + ASSERT_EQ(btf_array(t)->index_type, 1, "array_index_type"); + ASSERT_EQ(btf_array(t)->type, 2, "array_elem_type"); + ASSERT_EQ(btf_array(t)->nelems, 10, "array_nelems"); + + /* STRUCT */ + err = btf__add_field(btf, "field", 1, 0, 0); + ASSERT_ERR(err, "no_struct_field"); + id = btf__add_struct(btf, "s1", 8); + ASSERT_EQ(id, 7, "struct_id"); + err = btf__add_field(btf, "f1", 1, 0, 0); + ASSERT_OK(err, "f1_res"); + err = btf__add_field(btf, "f2", 1, 32, 16); + ASSERT_OK(err, "f2_res"); + + t = btf__type_by_id(btf, 7); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "s1", "struct_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_STRUCT, "struct_kind"); + ASSERT_EQ(btf_vlen(t), 2, "struct_vlen"); + ASSERT_EQ(btf_kflag(t), true, "struct_kflag"); + ASSERT_EQ(t->size, 8, "struct_sz"); + m = btf_members(t) + 0; + ASSERT_STREQ(btf__str_by_offset(btf, m->name_off), "f1", "f1_name"); + ASSERT_EQ(m->type, 1, "f1_type"); + ASSERT_EQ(btf_member_bit_offset(t, 0), 0, "f1_bit_off"); + ASSERT_EQ(btf_member_bitfield_size(t, 0), 0, "f1_bit_sz"); + m = btf_members(t) + 1; + ASSERT_STREQ(btf__str_by_offset(btf, m->name_off), "f2", "f2_name"); + ASSERT_EQ(m->type, 1, "f2_type"); + ASSERT_EQ(btf_member_bit_offset(t, 1), 32, "f2_bit_off"); + ASSERT_EQ(btf_member_bitfield_size(t, 1), 16, "f2_bit_sz"); + + /* UNION */ + id = btf__add_union(btf, "u1", 8); + ASSERT_EQ(id, 8, "union_id"); + + /* invalid, non-zero offset */ + err = btf__add_field(btf, "field", 1, 1, 0); + ASSERT_ERR(err, "no_struct_field"); + + err = btf__add_field(btf, "f1", 1, 0, 16); + ASSERT_OK(err, "f1_res"); + + t = btf__type_by_id(btf, 8); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "u1", "union_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_UNION, "union_kind"); + ASSERT_EQ(btf_vlen(t), 1, "union_vlen"); + ASSERT_EQ(btf_kflag(t), true, "union_kflag"); + ASSERT_EQ(t->size, 8, "union_sz"); + m = btf_members(t) + 0; + ASSERT_STREQ(btf__str_by_offset(btf, m->name_off), "f1", "f1_name"); + ASSERT_EQ(m->type, 1, "f1_type"); + ASSERT_EQ(btf_member_bit_offset(t, 0), 0, "f1_bit_off"); + ASSERT_EQ(btf_member_bitfield_size(t, 0), 16, "f1_bit_sz"); + + /* ENUM */ + id = btf__add_enum(btf, "e1", 4); + ASSERT_EQ(id, 9, "enum_id"); + err = btf__add_enum_value(btf, "v1", 1); + ASSERT_OK(err, "v1_res"); + err = btf__add_enum_value(btf, "v2", 2); + ASSERT_OK(err, "v2_res"); + + t = btf__type_by_id(btf, 9); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "e1", "enum_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM, "enum_kind"); + ASSERT_EQ(btf_vlen(t), 2, "enum_vlen"); + ASSERT_EQ(t->size, 4, "enum_sz"); + v = btf_enum(t) + 0; + ASSERT_STREQ(btf__str_by_offset(btf, v->name_off), "v1", "v1_name"); + ASSERT_EQ(v->val, 1, "v1_val"); + v = btf_enum(t) + 1; + ASSERT_STREQ(btf__str_by_offset(btf, v->name_off), "v2", "v2_name"); + ASSERT_EQ(v->val, 2, "v2_val"); + + /* FWDs */ + id = btf__add_fwd(btf, "struct_fwd", BTF_FWD_STRUCT); + ASSERT_EQ(id, 10, "struct_fwd_id"); + t = btf__type_by_id(btf, 10); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "struct_fwd", "fwd_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_FWD, "fwd_kind"); + ASSERT_EQ(btf_kflag(t), 0, "fwd_kflag"); + + id = btf__add_fwd(btf, "union_fwd", BTF_FWD_UNION); + ASSERT_EQ(id, 11, "union_fwd_id"); + t = btf__type_by_id(btf, 11); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "union_fwd", "fwd_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_FWD, "fwd_kind"); + ASSERT_EQ(btf_kflag(t), 1, "fwd_kflag"); + + id = btf__add_fwd(btf, "enum_fwd", BTF_FWD_ENUM); + ASSERT_EQ(id, 12, "enum_fwd_id"); + t = btf__type_by_id(btf, 12); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "enum_fwd", "fwd_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM, "enum_fwd_kind"); + ASSERT_EQ(btf_vlen(t), 0, "enum_fwd_kind"); + ASSERT_EQ(t->size, 4, "enum_fwd_sz"); + + /* TYPEDEF */ + id = btf__add_typedef(btf, "typedef1", 1); + ASSERT_EQ(id, 13, "typedef_fwd_id"); + t = btf__type_by_id(btf, 13); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "typedef1", "typedef_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_TYPEDEF, "typedef_kind"); + ASSERT_EQ(t->type, 1, "typedef_type"); + + /* FUNC & FUNC_PROTO */ + id = btf__add_func(btf, "func1", BTF_FUNC_GLOBAL, 15); + ASSERT_EQ(id, 14, "func_id"); + t = btf__type_by_id(btf, 14); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "func1", "func_name"); + ASSERT_EQ(t->type, 15, "func_type"); + ASSERT_EQ(btf_kind(t), BTF_KIND_FUNC, "func_kind"); + ASSERT_EQ(btf_vlen(t), BTF_FUNC_GLOBAL, "func_vlen"); + + id = btf__add_func_proto(btf, 1); + ASSERT_EQ(id, 15, "func_proto_id"); + err = btf__add_func_param(btf, "p1", 1); + ASSERT_OK(err, "p1_res"); + err = btf__add_func_param(btf, "p2", 2); + ASSERT_OK(err, "p2_res"); + + t = btf__type_by_id(btf, 15); + ASSERT_EQ(btf_kind(t), BTF_KIND_FUNC_PROTO, "func_proto_kind"); + ASSERT_EQ(btf_vlen(t), 2, "func_proto_vlen"); + ASSERT_EQ(t->type, 1, "func_proto_ret_type"); + p = btf_params(t) + 0; + ASSERT_STREQ(btf__str_by_offset(btf, p->name_off), "p1", "p1_name"); + ASSERT_EQ(p->type, 1, "p1_type"); + p = btf_params(t) + 1; + ASSERT_STREQ(btf__str_by_offset(btf, p->name_off), "p2", "p2_name"); + ASSERT_EQ(p->type, 2, "p2_type"); + + /* VAR */ + id = btf__add_var(btf, "var1", BTF_VAR_GLOBAL_ALLOCATED, 1); + ASSERT_EQ(id, 16, "var_id"); + t = btf__type_by_id(btf, 16); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "var1", "var_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_VAR, "var_kind"); + ASSERT_EQ(t->type, 1, "var_type"); + ASSERT_EQ(btf_var(t)->linkage, BTF_VAR_GLOBAL_ALLOCATED, "var_type"); + + /* DATASECT */ + id = btf__add_datasec(btf, "datasec1", 12); + ASSERT_EQ(id, 17, "datasec_id"); + err = btf__add_datasec_var_info(btf, 1, 4, 8); + ASSERT_OK(err, "v1_res"); + + t = btf__type_by_id(btf, 17); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "datasec1", "datasec_name"); + ASSERT_EQ(t->size, 12, "datasec_sz"); + ASSERT_EQ(btf_kind(t), BTF_KIND_DATASEC, "datasec_kind"); + ASSERT_EQ(btf_vlen(t), 1, "datasec_vlen"); + vi = btf_var_secinfos(t) + 0; + ASSERT_EQ(vi->type, 1, "v1_type"); + ASSERT_EQ(vi->offset, 4, "v1_off"); + ASSERT_EQ(vi->size, 8, "v1_sz"); + + btf__free(btf); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c index f259085cca6a..9781d85cb223 100644 --- a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c @@ -12,10 +12,13 @@ #include "progs/test_cls_redirect.h" #include "test_cls_redirect.skel.h" +#include "test_cls_redirect_subprogs.skel.h" #define ENCAP_IP INADDR_LOOPBACK #define ENCAP_PORT (1234) +static int duration = 0; + struct addr_port { in_port_t port; union { @@ -361,30 +364,18 @@ static void close_fds(int *fds, int n) close(fds[i]); } -void test_cls_redirect(void) +static void test_cls_redirect_common(struct bpf_program *prog) { - struct test_cls_redirect *skel = NULL; struct bpf_prog_test_run_attr tattr = {}; int families[] = { AF_INET, AF_INET6 }; struct sockaddr_storage ss; struct sockaddr *addr; socklen_t slen; int i, j, err; - int servers[__NR_KIND][ARRAY_SIZE(families)] = {}; int conns[__NR_KIND][ARRAY_SIZE(families)] = {}; struct tuple tuples[__NR_KIND][ARRAY_SIZE(families)]; - skel = test_cls_redirect__open(); - if (CHECK_FAIL(!skel)) - return; - - skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP); - skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT); - - if (CHECK_FAIL(test_cls_redirect__load(skel))) - goto cleanup; - addr = (struct sockaddr *)&ss; for (i = 0; i < ARRAY_SIZE(families); i++) { slen = prepare_addr(&ss, families[i]); @@ -402,7 +393,7 @@ void test_cls_redirect(void) goto cleanup; } - tattr.prog_fd = bpf_program__fd(skel->progs.cls_redirect); + tattr.prog_fd = bpf_program__fd(prog); for (i = 0; i < ARRAY_SIZE(tests); i++) { struct test_cfg *test = &tests[i]; @@ -450,7 +441,58 @@ void test_cls_redirect(void) } cleanup: - test_cls_redirect__destroy(skel); close_fds((int *)servers, sizeof(servers) / sizeof(servers[0][0])); close_fds((int *)conns, sizeof(conns) / sizeof(conns[0][0])); } + +static void test_cls_redirect_inlined(void) +{ + struct test_cls_redirect *skel; + int err; + + skel = test_cls_redirect__open(); + if (CHECK(!skel, "skel_open", "failed\n")) + return; + + skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP); + skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT); + + err = test_cls_redirect__load(skel); + if (CHECK(err, "skel_load", "failed: %d\n", err)) + goto cleanup; + + test_cls_redirect_common(skel->progs.cls_redirect); + +cleanup: + test_cls_redirect__destroy(skel); +} + +static void test_cls_redirect_subprogs(void) +{ + struct test_cls_redirect_subprogs *skel; + int err; + + skel = test_cls_redirect_subprogs__open(); + if (CHECK(!skel, "skel_open", "failed\n")) + return; + + skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP); + skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT); + + err = test_cls_redirect_subprogs__load(skel); + if (CHECK(err, "skel_load", "failed: %d\n", err)) + goto cleanup; + + test_cls_redirect_common(skel->progs.cls_redirect); + +cleanup: + test_cls_redirect_subprogs__destroy(skel); +} + +void test_cls_redirect(void) +{ + if (test__start_subtest("cls_redirect_inlined")) + test_cls_redirect_inlined(); + if (test__start_subtest("cls_redirect_subprogs")) + test_cls_redirect_subprogs(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/core_autosize.c b/tools/testing/selftests/bpf/prog_tests/core_autosize.c new file mode 100644 index 000000000000..981c251453d9 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/core_autosize.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include <test_progs.h> +#include <bpf/btf.h> + +/* real layout and sizes according to test's (32-bit) BTF + * needs to be defined before skeleton is included */ +struct test_struct___real { + unsigned int ptr; /* can't use `void *`, it is always 8 byte in BPF target */ + unsigned int val2; + unsigned long long val1; + unsigned short val3; + unsigned char val4; + unsigned char _pad; +}; + +#include "test_core_autosize.skel.h" + +static int duration = 0; + +static struct { + unsigned long long ptr_samesized; + unsigned long long val1_samesized; + unsigned long long val2_samesized; + unsigned long long val3_samesized; + unsigned long long val4_samesized; + struct test_struct___real output_samesized; + + unsigned long long ptr_downsized; + unsigned long long val1_downsized; + unsigned long long val2_downsized; + unsigned long long val3_downsized; + unsigned long long val4_downsized; + struct test_struct___real output_downsized; + + unsigned long long ptr_probed; + unsigned long long val1_probed; + unsigned long long val2_probed; + unsigned long long val3_probed; + unsigned long long val4_probed; + + unsigned long long ptr_signed; + unsigned long long val1_signed; + unsigned long long val2_signed; + unsigned long long val3_signed; + unsigned long long val4_signed; + struct test_struct___real output_signed; +} out; + +void test_core_autosize(void) +{ + char btf_file[] = "/tmp/core_autosize.btf.XXXXXX"; + int err, fd = -1, zero = 0; + int char_id, short_id, int_id, long_long_id, void_ptr_id, id; + struct test_core_autosize* skel = NULL; + struct bpf_object_load_attr load_attr = {}; + struct bpf_program *prog; + struct bpf_map *bss_map; + struct btf *btf = NULL; + size_t written; + const void *raw_data; + __u32 raw_sz; + FILE *f = NULL; + + btf = btf__new_empty(); + if (!ASSERT_OK_PTR(btf, "empty_btf")) + return; + /* Emit the following struct with 32-bit pointer size: + * + * struct test_struct { + * void *ptr; + * unsigned long val2; + * unsigned long long val1; + * unsigned short val3; + * unsigned char val4; + * char: 8; + * }; + * + * This struct is going to be used as the "kernel BTF" for this test. + * It's equivalent memory-layout-wise to test_struct__real above. + */ + + /* force 32-bit pointer size */ + btf__set_pointer_size(btf, 4); + + char_id = btf__add_int(btf, "unsigned char", 1, 0); + ASSERT_EQ(char_id, 1, "char_id"); + short_id = btf__add_int(btf, "unsigned short", 2, 0); + ASSERT_EQ(short_id, 2, "short_id"); + /* "long unsigned int" of 4 byte size tells BTF that sizeof(void *) == 4 */ + int_id = btf__add_int(btf, "long unsigned int", 4, 0); + ASSERT_EQ(int_id, 3, "int_id"); + long_long_id = btf__add_int(btf, "unsigned long long", 8, 0); + ASSERT_EQ(long_long_id, 4, "long_long_id"); + void_ptr_id = btf__add_ptr(btf, 0); + ASSERT_EQ(void_ptr_id, 5, "void_ptr_id"); + + id = btf__add_struct(btf, "test_struct", 20 /* bytes */); + ASSERT_EQ(id, 6, "struct_id"); + err = btf__add_field(btf, "ptr", void_ptr_id, 0, 0); + err = err ?: btf__add_field(btf, "val2", int_id, 32, 0); + err = err ?: btf__add_field(btf, "val1", long_long_id, 64, 0); + err = err ?: btf__add_field(btf, "val3", short_id, 128, 0); + err = err ?: btf__add_field(btf, "val4", char_id, 144, 0); + ASSERT_OK(err, "struct_fields"); + + fd = mkstemp(btf_file); + if (CHECK(fd < 0, "btf_tmp", "failed to create file: %d\n", fd)) + goto cleanup; + f = fdopen(fd, "w"); + if (!ASSERT_OK_PTR(f, "btf_fdopen")) + goto cleanup; + + raw_data = btf__get_raw_data(btf, &raw_sz); + if (!ASSERT_OK_PTR(raw_data, "raw_data")) + goto cleanup; + written = fwrite(raw_data, 1, raw_sz, f); + if (CHECK(written != raw_sz, "btf_write", "written: %zu, errno: %d\n", written, errno)) + goto cleanup; + fflush(f); + fclose(f); + f = NULL; + close(fd); + fd = -1; + + /* open and load BPF program with custom BTF as the kernel BTF */ + skel = test_core_autosize__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + /* disable handle_signed() for now */ + prog = bpf_object__find_program_by_name(skel->obj, "handle_signed"); + if (!ASSERT_OK_PTR(prog, "prog_find")) + goto cleanup; + bpf_program__set_autoload(prog, false); + + load_attr.obj = skel->obj; + load_attr.target_btf_path = btf_file; + err = bpf_object__load_xattr(&load_attr); + if (!ASSERT_OK(err, "prog_load")) + goto cleanup; + + prog = bpf_object__find_program_by_name(skel->obj, "handle_samesize"); + if (!ASSERT_OK_PTR(prog, "prog_find")) + goto cleanup; + skel->links.handle_samesize = bpf_program__attach(prog); + if (!ASSERT_OK_PTR(skel->links.handle_samesize, "prog_attach")) + goto cleanup; + + prog = bpf_object__find_program_by_name(skel->obj, "handle_downsize"); + if (!ASSERT_OK_PTR(prog, "prog_find")) + goto cleanup; + skel->links.handle_downsize = bpf_program__attach(prog); + if (!ASSERT_OK_PTR(skel->links.handle_downsize, "prog_attach")) + goto cleanup; + + prog = bpf_object__find_program_by_name(skel->obj, "handle_probed"); + if (!ASSERT_OK_PTR(prog, "prog_find")) + goto cleanup; + skel->links.handle_probed = bpf_program__attach(prog); + if (!ASSERT_OK_PTR(skel->links.handle_probed, "prog_attach")) + goto cleanup; + + usleep(1); + + bss_map = bpf_object__find_map_by_name(skel->obj, "test_cor.bss"); + if (!ASSERT_OK_PTR(bss_map, "bss_map_find")) + goto cleanup; + + err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &zero, (void *)&out); + if (!ASSERT_OK(err, "bss_lookup")) + goto cleanup; + + ASSERT_EQ(out.ptr_samesized, 0x01020304, "ptr_samesized"); + ASSERT_EQ(out.val1_samesized, 0x1020304050607080, "val1_samesized"); + ASSERT_EQ(out.val2_samesized, 0x0a0b0c0d, "val2_samesized"); + ASSERT_EQ(out.val3_samesized, 0xfeed, "val3_samesized"); + ASSERT_EQ(out.val4_samesized, 0xb9, "val4_samesized"); + ASSERT_EQ(out.output_samesized.ptr, 0x01020304, "ptr_samesized"); + ASSERT_EQ(out.output_samesized.val1, 0x1020304050607080, "val1_samesized"); + ASSERT_EQ(out.output_samesized.val2, 0x0a0b0c0d, "val2_samesized"); + ASSERT_EQ(out.output_samesized.val3, 0xfeed, "val3_samesized"); + ASSERT_EQ(out.output_samesized.val4, 0xb9, "val4_samesized"); + + ASSERT_EQ(out.ptr_downsized, 0x01020304, "ptr_downsized"); + ASSERT_EQ(out.val1_downsized, 0x1020304050607080, "val1_downsized"); + ASSERT_EQ(out.val2_downsized, 0x0a0b0c0d, "val2_downsized"); + ASSERT_EQ(out.val3_downsized, 0xfeed, "val3_downsized"); + ASSERT_EQ(out.val4_downsized, 0xb9, "val4_downsized"); + ASSERT_EQ(out.output_downsized.ptr, 0x01020304, "ptr_downsized"); + ASSERT_EQ(out.output_downsized.val1, 0x1020304050607080, "val1_downsized"); + ASSERT_EQ(out.output_downsized.val2, 0x0a0b0c0d, "val2_downsized"); + ASSERT_EQ(out.output_downsized.val3, 0xfeed, "val3_downsized"); + ASSERT_EQ(out.output_downsized.val4, 0xb9, "val4_downsized"); + + ASSERT_EQ(out.ptr_probed, 0x01020304, "ptr_probed"); + ASSERT_EQ(out.val1_probed, 0x1020304050607080, "val1_probed"); + ASSERT_EQ(out.val2_probed, 0x0a0b0c0d, "val2_probed"); + ASSERT_EQ(out.val3_probed, 0xfeed, "val3_probed"); + ASSERT_EQ(out.val4_probed, 0xb9, "val4_probed"); + + test_core_autosize__destroy(skel); + skel = NULL; + + /* now re-load with handle_signed() enabled, it should fail loading */ + skel = test_core_autosize__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + load_attr.obj = skel->obj; + load_attr.target_btf_path = btf_file; + err = bpf_object__load_xattr(&load_attr); + if (!ASSERT_ERR(err, "bad_prog_load")) + goto cleanup; + +cleanup: + if (f) + fclose(f); + if (fd >= 0) + close(fd); + remove(btf_file); + btf__free(btf); + test_core_autosize__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index a54eafc5e4b3..30e40ff4b0d8 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -3,6 +3,9 @@ #include "progs/core_reloc_types.h" #include <sys/mman.h> #include <sys/syscall.h> +#include <bpf/btf.h> + +static int duration = 0; #define STRUCT_TO_CHAR_PTR(struct_name) (const char *)&(struct struct_name) @@ -177,14 +180,13 @@ .fails = true, \ } -#define EXISTENCE_CASE_COMMON(name) \ +#define FIELD_EXISTS_CASE_COMMON(name) \ .case_name = #name, \ .bpf_obj_file = "test_core_reloc_existence.o", \ - .btf_src_file = "btf__core_reloc_" #name ".o", \ - .relaxed_core_relocs = true + .btf_src_file = "btf__core_reloc_" #name ".o" \ -#define EXISTENCE_ERR_CASE(name) { \ - EXISTENCE_CASE_COMMON(name), \ +#define FIELD_EXISTS_ERR_CASE(name) { \ + FIELD_EXISTS_CASE_COMMON(name), \ .fails = true, \ } @@ -253,6 +255,61 @@ .fails = true, \ } +#define TYPE_BASED_CASE_COMMON(name) \ + .case_name = #name, \ + .bpf_obj_file = "test_core_reloc_type_based.o", \ + .btf_src_file = "btf__core_reloc_" #name ".o" \ + +#define TYPE_BASED_CASE(name, ...) { \ + TYPE_BASED_CASE_COMMON(name), \ + .output = STRUCT_TO_CHAR_PTR(core_reloc_type_based_output) \ + __VA_ARGS__, \ + .output_len = sizeof(struct core_reloc_type_based_output), \ +} + +#define TYPE_BASED_ERR_CASE(name) { \ + TYPE_BASED_CASE_COMMON(name), \ + .fails = true, \ +} + +#define TYPE_ID_CASE_COMMON(name) \ + .case_name = #name, \ + .bpf_obj_file = "test_core_reloc_type_id.o", \ + .btf_src_file = "btf__core_reloc_" #name ".o" \ + +#define TYPE_ID_CASE(name, setup_fn) { \ + TYPE_ID_CASE_COMMON(name), \ + .output = STRUCT_TO_CHAR_PTR(core_reloc_type_id_output) {}, \ + .output_len = sizeof(struct core_reloc_type_id_output), \ + .setup = setup_fn, \ +} + +#define TYPE_ID_ERR_CASE(name) { \ + TYPE_ID_CASE_COMMON(name), \ + .fails = true, \ +} + +#define ENUMVAL_CASE_COMMON(name) \ + .case_name = #name, \ + .bpf_obj_file = "test_core_reloc_enumval.o", \ + .btf_src_file = "btf__core_reloc_" #name ".o" \ + +#define ENUMVAL_CASE(name, ...) { \ + ENUMVAL_CASE_COMMON(name), \ + .output = STRUCT_TO_CHAR_PTR(core_reloc_enumval_output) \ + __VA_ARGS__, \ + .output_len = sizeof(struct core_reloc_enumval_output), \ +} + +#define ENUMVAL_ERR_CASE(name) { \ + ENUMVAL_CASE_COMMON(name), \ + .fails = true, \ +} + +struct core_reloc_test_case; + +typedef int (*setup_test_fn)(struct core_reloc_test_case *test); + struct core_reloc_test_case { const char *case_name; const char *bpf_obj_file; @@ -264,8 +321,136 @@ struct core_reloc_test_case { bool fails; bool relaxed_core_relocs; bool direct_raw_tp; + setup_test_fn setup; }; +static int find_btf_type(const struct btf *btf, const char *name, __u32 kind) +{ + int id; + + id = btf__find_by_name_kind(btf, name, kind); + if (CHECK(id <= 0, "find_type_id", "failed to find '%s', kind %d: %d\n", name, kind, id)) + return -1; + + return id; +} + +static int setup_type_id_case_local(struct core_reloc_test_case *test) +{ + struct core_reloc_type_id_output *exp = (void *)test->output; + struct btf *local_btf = btf__parse(test->bpf_obj_file, NULL); + struct btf *targ_btf = btf__parse(test->btf_src_file, NULL); + const struct btf_type *t; + const char *name; + int i; + + if (CHECK(IS_ERR(local_btf), "local_btf", "failed: %ld\n", PTR_ERR(local_btf)) || + CHECK(IS_ERR(targ_btf), "targ_btf", "failed: %ld\n", PTR_ERR(targ_btf))) { + btf__free(local_btf); + btf__free(targ_btf); + return -EINVAL; + } + + exp->local_anon_struct = -1; + exp->local_anon_union = -1; + exp->local_anon_enum = -1; + exp->local_anon_func_proto_ptr = -1; + exp->local_anon_void_ptr = -1; + exp->local_anon_arr = -1; + + for (i = 1; i <= btf__get_nr_types(local_btf); i++) + { + t = btf__type_by_id(local_btf, i); + /* we are interested only in anonymous types */ + if (t->name_off) + continue; + + if (btf_is_struct(t) && btf_vlen(t) && + (name = btf__name_by_offset(local_btf, btf_members(t)[0].name_off)) && + strcmp(name, "marker_field") == 0) { + exp->local_anon_struct = i; + } else if (btf_is_union(t) && btf_vlen(t) && + (name = btf__name_by_offset(local_btf, btf_members(t)[0].name_off)) && + strcmp(name, "marker_field") == 0) { + exp->local_anon_union = i; + } else if (btf_is_enum(t) && btf_vlen(t) && + (name = btf__name_by_offset(local_btf, btf_enum(t)[0].name_off)) && + strcmp(name, "MARKER_ENUM_VAL") == 0) { + exp->local_anon_enum = i; + } else if (btf_is_ptr(t) && (t = btf__type_by_id(local_btf, t->type))) { + if (btf_is_func_proto(t) && (t = btf__type_by_id(local_btf, t->type)) && + btf_is_int(t) && (name = btf__name_by_offset(local_btf, t->name_off)) && + strcmp(name, "_Bool") == 0) { + /* ptr -> func_proto -> _Bool */ + exp->local_anon_func_proto_ptr = i; + } else if (btf_is_void(t)) { + /* ptr -> void */ + exp->local_anon_void_ptr = i; + } + } else if (btf_is_array(t) && (t = btf__type_by_id(local_btf, btf_array(t)->type)) && + btf_is_int(t) && (name = btf__name_by_offset(local_btf, t->name_off)) && + strcmp(name, "_Bool") == 0) { + /* _Bool[] */ + exp->local_anon_arr = i; + } + } + + exp->local_struct = find_btf_type(local_btf, "a_struct", BTF_KIND_STRUCT); + exp->local_union = find_btf_type(local_btf, "a_union", BTF_KIND_UNION); + exp->local_enum = find_btf_type(local_btf, "an_enum", BTF_KIND_ENUM); + exp->local_int = find_btf_type(local_btf, "int", BTF_KIND_INT); + exp->local_struct_typedef = find_btf_type(local_btf, "named_struct_typedef", BTF_KIND_TYPEDEF); + exp->local_func_proto_typedef = find_btf_type(local_btf, "func_proto_typedef", BTF_KIND_TYPEDEF); + exp->local_arr_typedef = find_btf_type(local_btf, "arr_typedef", BTF_KIND_TYPEDEF); + + btf__free(local_btf); + btf__free(targ_btf); + return 0; +} + +static int setup_type_id_case_success(struct core_reloc_test_case *test) { + struct core_reloc_type_id_output *exp = (void *)test->output; + struct btf *targ_btf = btf__parse(test->btf_src_file, NULL); + int err; + + err = setup_type_id_case_local(test); + if (err) + return err; + + targ_btf = btf__parse(test->btf_src_file, NULL); + + exp->targ_struct = find_btf_type(targ_btf, "a_struct", BTF_KIND_STRUCT); + exp->targ_union = find_btf_type(targ_btf, "a_union", BTF_KIND_UNION); + exp->targ_enum = find_btf_type(targ_btf, "an_enum", BTF_KIND_ENUM); + exp->targ_int = find_btf_type(targ_btf, "int", BTF_KIND_INT); + exp->targ_struct_typedef = find_btf_type(targ_btf, "named_struct_typedef", BTF_KIND_TYPEDEF); + exp->targ_func_proto_typedef = find_btf_type(targ_btf, "func_proto_typedef", BTF_KIND_TYPEDEF); + exp->targ_arr_typedef = find_btf_type(targ_btf, "arr_typedef", BTF_KIND_TYPEDEF); + + btf__free(targ_btf); + return 0; +} + +static int setup_type_id_case_failure(struct core_reloc_test_case *test) +{ + struct core_reloc_type_id_output *exp = (void *)test->output; + int err; + + err = setup_type_id_case_local(test); + if (err) + return err; + + exp->targ_struct = 0; + exp->targ_union = 0; + exp->targ_enum = 0; + exp->targ_int = 0; + exp->targ_struct_typedef = 0; + exp->targ_func_proto_typedef = 0; + exp->targ_arr_typedef = 0; + + return 0; +} + static struct core_reloc_test_case test_cases[] = { /* validate we can find kernel image and use its BTF for relocs */ { @@ -364,7 +549,7 @@ static struct core_reloc_test_case test_cases[] = { /* validate field existence checks */ { - EXISTENCE_CASE_COMMON(existence), + FIELD_EXISTS_CASE_COMMON(existence), .input = STRUCT_TO_CHAR_PTR(core_reloc_existence) { .a = 1, .b = 2, @@ -388,7 +573,7 @@ static struct core_reloc_test_case test_cases[] = { .output_len = sizeof(struct core_reloc_existence_output), }, { - EXISTENCE_CASE_COMMON(existence___minimal), + FIELD_EXISTS_CASE_COMMON(existence___minimal), .input = STRUCT_TO_CHAR_PTR(core_reloc_existence___minimal) { .a = 42, }, @@ -408,12 +593,12 @@ static struct core_reloc_test_case test_cases[] = { .output_len = sizeof(struct core_reloc_existence_output), }, - EXISTENCE_ERR_CASE(existence__err_int_sz), - EXISTENCE_ERR_CASE(existence__err_int_type), - EXISTENCE_ERR_CASE(existence__err_int_kind), - EXISTENCE_ERR_CASE(existence__err_arr_kind), - EXISTENCE_ERR_CASE(existence__err_arr_value_type), - EXISTENCE_ERR_CASE(existence__err_struct_type), + FIELD_EXISTS_ERR_CASE(existence__err_int_sz), + FIELD_EXISTS_ERR_CASE(existence__err_int_type), + FIELD_EXISTS_ERR_CASE(existence__err_int_kind), + FIELD_EXISTS_ERR_CASE(existence__err_arr_kind), + FIELD_EXISTS_ERR_CASE(existence__err_arr_value_type), + FIELD_EXISTS_ERR_CASE(existence__err_struct_type), /* bitfield relocation checks */ BITFIELDS_CASE(bitfields, { @@ -452,11 +637,117 @@ static struct core_reloc_test_case test_cases[] = { /* size relocation checks */ SIZE_CASE(size), SIZE_CASE(size___diff_sz), + SIZE_ERR_CASE(size___err_ambiguous), + + /* validate type existence and size relocations */ + TYPE_BASED_CASE(type_based, { + .struct_exists = 1, + .union_exists = 1, + .enum_exists = 1, + .typedef_named_struct_exists = 1, + .typedef_anon_struct_exists = 1, + .typedef_struct_ptr_exists = 1, + .typedef_int_exists = 1, + .typedef_enum_exists = 1, + .typedef_void_ptr_exists = 1, + .typedef_func_proto_exists = 1, + .typedef_arr_exists = 1, + .struct_sz = sizeof(struct a_struct), + .union_sz = sizeof(union a_union), + .enum_sz = sizeof(enum an_enum), + .typedef_named_struct_sz = sizeof(named_struct_typedef), + .typedef_anon_struct_sz = sizeof(anon_struct_typedef), + .typedef_struct_ptr_sz = sizeof(struct_ptr_typedef), + .typedef_int_sz = sizeof(int_typedef), + .typedef_enum_sz = sizeof(enum_typedef), + .typedef_void_ptr_sz = sizeof(void_ptr_typedef), + .typedef_func_proto_sz = sizeof(func_proto_typedef), + .typedef_arr_sz = sizeof(arr_typedef), + }), + TYPE_BASED_CASE(type_based___all_missing, { + /* all zeros */ + }), + TYPE_BASED_CASE(type_based___diff_sz, { + .struct_exists = 1, + .union_exists = 1, + .enum_exists = 1, + .typedef_named_struct_exists = 1, + .typedef_anon_struct_exists = 1, + .typedef_struct_ptr_exists = 1, + .typedef_int_exists = 1, + .typedef_enum_exists = 1, + .typedef_void_ptr_exists = 1, + .typedef_func_proto_exists = 1, + .typedef_arr_exists = 1, + .struct_sz = sizeof(struct a_struct___diff_sz), + .union_sz = sizeof(union a_union___diff_sz), + .enum_sz = sizeof(enum an_enum___diff_sz), + .typedef_named_struct_sz = sizeof(named_struct_typedef___diff_sz), + .typedef_anon_struct_sz = sizeof(anon_struct_typedef___diff_sz), + .typedef_struct_ptr_sz = sizeof(struct_ptr_typedef___diff_sz), + .typedef_int_sz = sizeof(int_typedef___diff_sz), + .typedef_enum_sz = sizeof(enum_typedef___diff_sz), + .typedef_void_ptr_sz = sizeof(void_ptr_typedef___diff_sz), + .typedef_func_proto_sz = sizeof(func_proto_typedef___diff_sz), + .typedef_arr_sz = sizeof(arr_typedef___diff_sz), + }), + TYPE_BASED_CASE(type_based___incompat, { + .enum_exists = 1, + .enum_sz = sizeof(enum an_enum), + }), + TYPE_BASED_CASE(type_based___fn_wrong_args, { + .struct_exists = 1, + .struct_sz = sizeof(struct a_struct), + }), + + /* BTF_TYPE_ID_LOCAL/BTF_TYPE_ID_TARGET tests */ + TYPE_ID_CASE(type_id, setup_type_id_case_success), + TYPE_ID_CASE(type_id___missing_targets, setup_type_id_case_failure), + + /* Enumerator value existence and value relocations */ + ENUMVAL_CASE(enumval, { + .named_val1_exists = true, + .named_val2_exists = true, + .named_val3_exists = true, + .anon_val1_exists = true, + .anon_val2_exists = true, + .anon_val3_exists = true, + .named_val1 = 1, + .named_val2 = 2, + .anon_val1 = 0x10, + .anon_val2 = 0x20, + }), + ENUMVAL_CASE(enumval___diff, { + .named_val1_exists = true, + .named_val2_exists = true, + .named_val3_exists = true, + .anon_val1_exists = true, + .anon_val2_exists = true, + .anon_val3_exists = true, + .named_val1 = 101, + .named_val2 = 202, + .anon_val1 = 0x11, + .anon_val2 = 0x22, + }), + ENUMVAL_CASE(enumval___val3_missing, { + .named_val1_exists = true, + .named_val2_exists = true, + .named_val3_exists = false, + .anon_val1_exists = true, + .anon_val2_exists = true, + .anon_val3_exists = false, + .named_val1 = 111, + .named_val2 = 222, + .anon_val1 = 0x111, + .anon_val2 = 0x222, + }), + ENUMVAL_ERR_CASE(enumval___err_missing), }; struct data { char in[256]; char out[256]; + bool skip; uint64_t my_pid_tgid; }; @@ -472,7 +763,7 @@ void test_core_reloc(void) struct bpf_object_load_attr load_attr = {}; struct core_reloc_test_case *test_case; const char *tp_name, *probe_name; - int err, duration = 0, i, equal; + int err, i, equal; struct bpf_link *link = NULL; struct bpf_map *data_map; struct bpf_program *prog; @@ -488,11 +779,13 @@ void test_core_reloc(void) if (!test__start_subtest(test_case->case_name)) continue; - DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, - .relaxed_core_relocs = test_case->relaxed_core_relocs, - ); + if (test_case->setup) { + err = test_case->setup(test_case); + if (CHECK(err, "test_setup", "test #%d setup failed: %d\n", i, err)) + continue; + } - obj = bpf_object__open_file(test_case->bpf_obj_file, &opts); + obj = bpf_object__open_file(test_case->bpf_obj_file, NULL); if (CHECK(IS_ERR(obj), "obj_open", "failed to open '%s': %ld\n", test_case->bpf_obj_file, PTR_ERR(obj))) continue; @@ -515,15 +808,10 @@ void test_core_reloc(void) load_attr.log_level = 0; load_attr.target_btf_path = test_case->btf_src_file; err = bpf_object__load_xattr(&load_attr); - if (test_case->fails) { - CHECK(!err, "obj_load_fail", - "should fail to load prog '%s'\n", probe_name); + if (err) { + if (!test_case->fails) + CHECK(false, "obj_load", "failed to load prog '%s': %d\n", probe_name, err); goto cleanup; - } else { - if (CHECK(err, "obj_load", - "failed to load prog '%s': %d\n", - probe_name, err)) - goto cleanup; } data_map = bpf_object__find_map_by_name(obj, "test_cor.bss"); @@ -551,6 +839,16 @@ void test_core_reloc(void) /* trigger test run */ usleep(1); + if (data->skip) { + test__skip(); + goto cleanup; + } + + if (test_case->fails) { + CHECK(false, "obj_load_fail", "should fail to load prog '%s'\n", probe_name); + goto cleanup; + } + equal = memcmp(data->out, test_case->output, test_case->output_len) == 0; if (CHECK(!equal, "check_result", diff --git a/tools/testing/selftests/bpf/prog_tests/d_path.c b/tools/testing/selftests/bpf/prog_tests/d_path.c new file mode 100644 index 000000000000..0a577a248d34 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/d_path.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <test_progs.h> +#include <sys/stat.h> +#include <linux/sched.h> +#include <sys/syscall.h> + +#define MAX_PATH_LEN 128 +#define MAX_FILES 7 + +#include "test_d_path.skel.h" + +static int duration; + +static struct { + __u32 cnt; + char paths[MAX_FILES][MAX_PATH_LEN]; +} src; + +static int set_pathname(int fd, pid_t pid) +{ + char buf[MAX_PATH_LEN]; + + snprintf(buf, MAX_PATH_LEN, "/proc/%d/fd/%d", pid, fd); + return readlink(buf, src.paths[src.cnt++], MAX_PATH_LEN); +} + +static int trigger_fstat_events(pid_t pid) +{ + int sockfd = -1, procfd = -1, devfd = -1; + int localfd = -1, indicatorfd = -1; + int pipefd[2] = { -1, -1 }; + struct stat fileStat; + int ret = -1; + + /* unmountable pseudo-filesystems */ + if (CHECK(pipe(pipefd) < 0, "trigger", "pipe failed\n")) + return ret; + /* unmountable pseudo-filesystems */ + sockfd = socket(AF_INET, SOCK_STREAM, 0); + if (CHECK(sockfd < 0, "trigger", "socket failed\n")) + goto out_close; + /* mountable pseudo-filesystems */ + procfd = open("/proc/self/comm", O_RDONLY); + if (CHECK(procfd < 0, "trigger", "open /proc/self/comm failed\n")) + goto out_close; + devfd = open("/dev/urandom", O_RDONLY); + if (CHECK(devfd < 0, "trigger", "open /dev/urandom failed\n")) + goto out_close; + localfd = open("/tmp/d_path_loadgen.txt", O_CREAT | O_RDONLY, 0644); + if (CHECK(localfd < 0, "trigger", "open /tmp/d_path_loadgen.txt failed\n")) + goto out_close; + /* bpf_d_path will return path with (deleted) */ + remove("/tmp/d_path_loadgen.txt"); + indicatorfd = open("/tmp/", O_PATH); + if (CHECK(indicatorfd < 0, "trigger", "open /tmp/ failed\n")) + goto out_close; + + ret = set_pathname(pipefd[0], pid); + if (CHECK(ret < 0, "trigger", "set_pathname failed for pipe[0]\n")) + goto out_close; + ret = set_pathname(pipefd[1], pid); + if (CHECK(ret < 0, "trigger", "set_pathname failed for pipe[1]\n")) + goto out_close; + ret = set_pathname(sockfd, pid); + if (CHECK(ret < 0, "trigger", "set_pathname failed for socket\n")) + goto out_close; + ret = set_pathname(procfd, pid); + if (CHECK(ret < 0, "trigger", "set_pathname failed for proc\n")) + goto out_close; + ret = set_pathname(devfd, pid); + if (CHECK(ret < 0, "trigger", "set_pathname failed for dev\n")) + goto out_close; + ret = set_pathname(localfd, pid); + if (CHECK(ret < 0, "trigger", "set_pathname failed for file\n")) + goto out_close; + ret = set_pathname(indicatorfd, pid); + if (CHECK(ret < 0, "trigger", "set_pathname failed for dir\n")) + goto out_close; + + /* triggers vfs_getattr */ + fstat(pipefd[0], &fileStat); + fstat(pipefd[1], &fileStat); + fstat(sockfd, &fileStat); + fstat(procfd, &fileStat); + fstat(devfd, &fileStat); + fstat(localfd, &fileStat); + fstat(indicatorfd, &fileStat); + +out_close: + /* triggers filp_close */ + close(pipefd[0]); + close(pipefd[1]); + close(sockfd); + close(procfd); + close(devfd); + close(localfd); + close(indicatorfd); + return ret; +} + +void test_d_path(void) +{ + struct test_d_path__bss *bss; + struct test_d_path *skel; + int err; + + skel = test_d_path__open_and_load(); + if (CHECK(!skel, "setup", "d_path skeleton failed\n")) + goto cleanup; + + err = test_d_path__attach(skel); + if (CHECK(err, "setup", "attach failed: %d\n", err)) + goto cleanup; + + bss = skel->bss; + bss->my_pid = getpid(); + + err = trigger_fstat_events(bss->my_pid); + if (err < 0) + goto cleanup; + + if (CHECK(!bss->called_stat, + "stat", + "trampoline for security_inode_getattr was not called\n")) + goto cleanup; + + if (CHECK(!bss->called_close, + "close", + "trampoline for filp_close was not called\n")) + goto cleanup; + + for (int i = 0; i < MAX_FILES; i++) { + CHECK(strncmp(src.paths[i], bss->paths_stat[i], MAX_PATH_LEN), + "check", + "failed to get stat path[%d]: %s vs %s\n", + i, src.paths[i], bss->paths_stat[i]); + CHECK(strncmp(src.paths[i], bss->paths_close[i], MAX_PATH_LEN), + "check", + "failed to get close path[%d]: %s vs %s\n", + i, src.paths[i], bss->paths_close[i]); + /* The d_path helper returns size plus NUL char, hence + 1 */ + CHECK(bss->rets_stat[i] != strlen(bss->paths_stat[i]) + 1, + "check", + "failed to match stat return [%d]: %d vs %zd [%s]\n", + i, bss->rets_stat[i], strlen(bss->paths_stat[i]) + 1, + bss->paths_stat[i]); + CHECK(bss->rets_close[i] != strlen(bss->paths_stat[i]) + 1, + "check", + "failed to match stat return [%d]: %d vs %zd [%s]\n", + i, bss->rets_close[i], strlen(bss->paths_close[i]) + 1, + bss->paths_stat[i]); + } + +cleanup: + test_d_path__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c index 197d0d217b56..5c0448910426 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c @@ -2,36 +2,79 @@ /* Copyright (c) 2019 Facebook */ #include <test_progs.h> #include <network_helpers.h> +#include <bpf/btf.h> + +typedef int (*test_cb)(struct bpf_object *obj); + +static int check_data_map(struct bpf_object *obj, int prog_cnt, bool reset) +{ + struct bpf_map *data_map = NULL, *map; + __u64 *result = NULL; + const int zero = 0; + __u32 duration = 0; + int ret = -1, i; + + result = malloc((prog_cnt + 32 /* spare */) * sizeof(__u64)); + if (CHECK(!result, "alloc_memory", "failed to alloc memory")) + return -ENOMEM; + + bpf_object__for_each_map(map, obj) + if (bpf_map__is_internal(map)) { + data_map = map; + break; + } + if (CHECK(!data_map, "find_data_map", "data map not found\n")) + goto out; + + ret = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, result); + if (CHECK(ret, "get_result", + "failed to get output data: %d\n", ret)) + goto out; + + for (i = 0; i < prog_cnt; i++) { + if (CHECK(result[i] != 1, "result", + "fexit_bpf2bpf result[%d] failed err %llu\n", + i, result[i])) + goto out; + result[i] = 0; + } + if (reset) { + ret = bpf_map_update_elem(bpf_map__fd(data_map), &zero, result, 0); + if (CHECK(ret, "reset_result", "failed to reset result\n")) + goto out; + } + + ret = 0; +out: + free(result); + return ret; +} static void test_fexit_bpf2bpf_common(const char *obj_file, const char *target_obj_file, int prog_cnt, const char **prog_name, - bool run_prog) + bool run_prog, + test_cb cb) { - struct bpf_object *obj = NULL, *pkt_obj; - int err, pkt_fd, i; - struct bpf_link **link = NULL; + struct bpf_object *obj = NULL, *tgt_obj; struct bpf_program **prog = NULL; + struct bpf_link **link = NULL; __u32 duration = 0, retval; - struct bpf_map *data_map; - const int zero = 0; - __u64 *result = NULL; + int err, tgt_fd, i; err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC, - &pkt_obj, &pkt_fd); + &tgt_obj, &tgt_fd); if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n", target_obj_file, err, errno)) return; DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, - .attach_prog_fd = pkt_fd, + .attach_prog_fd = tgt_fd, ); link = calloc(sizeof(struct bpf_link *), prog_cnt); prog = calloc(sizeof(struct bpf_program *), prog_cnt); - result = malloc((prog_cnt + 32 /* spare */) * sizeof(__u64)); - if (CHECK(!link || !prog || !result, "alloc_memory", - "failed to alloc memory")) + if (CHECK(!link || !prog, "alloc_memory", "failed to alloc memory")) goto close_prog; obj = bpf_object__open_file(obj_file, &opts); @@ -53,39 +96,33 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, goto close_prog; } - if (!run_prog) - goto close_prog; + if (cb) { + err = cb(obj); + if (err) + goto close_prog; + } - data_map = bpf_object__find_map_by_name(obj, "fexit_bp.bss"); - if (CHECK(!data_map, "find_data_map", "data map not found\n")) + if (!run_prog) goto close_prog; - err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), + err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6), NULL, NULL, &retval, &duration); CHECK(err || retval, "ipv6", "err %d errno %d retval %d duration %d\n", err, errno, retval, duration); - err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, result); - if (CHECK(err, "get_result", - "failed to get output data: %d\n", err)) + if (check_data_map(obj, prog_cnt, false)) goto close_prog; - for (i = 0; i < prog_cnt; i++) - if (CHECK(result[i] != 1, "result", "fexit_bpf2bpf failed err %llu\n", - result[i])) - goto close_prog; - close_prog: for (i = 0; i < prog_cnt; i++) if (!IS_ERR_OR_NULL(link[i])) bpf_link__destroy(link[i]); if (!IS_ERR_OR_NULL(obj)) bpf_object__close(obj); - bpf_object__close(pkt_obj); + bpf_object__close(tgt_obj); free(link); free(prog); - free(result); } static void test_target_no_callees(void) @@ -96,7 +133,7 @@ static void test_target_no_callees(void) test_fexit_bpf2bpf_common("./fexit_bpf2bpf_simple.o", "./test_pkt_md_access.o", ARRAY_SIZE(prog_name), - prog_name, true); + prog_name, true, NULL); } static void test_target_yes_callees(void) @@ -110,7 +147,7 @@ static void test_target_yes_callees(void) test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o", "./test_pkt_access.o", ARRAY_SIZE(prog_name), - prog_name, true); + prog_name, true, NULL); } static void test_func_replace(void) @@ -123,11 +160,12 @@ static void test_func_replace(void) "freplace/get_skb_len", "freplace/get_skb_ifindex", "freplace/get_constant", + "freplace/test_pkt_write_access_subprog", }; test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o", "./test_pkt_access.o", ARRAY_SIZE(prog_name), - prog_name, true); + prog_name, true, NULL); } static void test_func_replace_verify(void) @@ -138,13 +176,198 @@ static void test_func_replace_verify(void) test_fexit_bpf2bpf_common("./freplace_connect4.o", "./connect4_prog.o", ARRAY_SIZE(prog_name), - prog_name, false); + prog_name, false, NULL); +} + +static int test_second_attach(struct bpf_object *obj) +{ + const char *prog_name = "freplace/get_constant"; + const char *tgt_name = prog_name + 9; /* cut off freplace/ */ + const char *tgt_obj_file = "./test_pkt_access.o"; + struct bpf_program *prog = NULL; + struct bpf_object *tgt_obj; + __u32 duration = 0, retval; + struct bpf_link *link; + int err = 0, tgt_fd; + + prog = bpf_object__find_program_by_title(obj, prog_name); + if (CHECK(!prog, "find_prog", "prog %s not found\n", prog_name)) + return -ENOENT; + + err = bpf_prog_load(tgt_obj_file, BPF_PROG_TYPE_UNSPEC, + &tgt_obj, &tgt_fd); + if (CHECK(err, "second_prog_load", "file %s err %d errno %d\n", + tgt_obj_file, err, errno)) + return err; + + link = bpf_program__attach_freplace(prog, tgt_fd, tgt_name); + if (CHECK(IS_ERR(link), "second_link", "failed to attach second link prog_fd %d tgt_fd %d\n", bpf_program__fd(prog), tgt_fd)) + goto out; + + err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6), + NULL, NULL, &retval, &duration); + if (CHECK(err || retval, "ipv6", + "err %d errno %d retval %d duration %d\n", + err, errno, retval, duration)) + goto out; + + err = check_data_map(obj, 1, true); + if (err) + goto out; + +out: + bpf_link__destroy(link); + bpf_object__close(tgt_obj); + return err; +} + +static void test_func_replace_multi(void) +{ + const char *prog_name[] = { + "freplace/get_constant", + }; + test_fexit_bpf2bpf_common("./freplace_get_constant.o", + "./test_pkt_access.o", + ARRAY_SIZE(prog_name), + prog_name, true, test_second_attach); +} + +static void test_fmod_ret_freplace(void) +{ + struct bpf_object *freplace_obj = NULL, *pkt_obj, *fmod_obj = NULL; + const char *freplace_name = "./freplace_get_constant.o"; + const char *fmod_ret_name = "./fmod_ret_freplace.o"; + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts); + const char *tgt_name = "./test_pkt_access.o"; + struct bpf_link *freplace_link = NULL; + struct bpf_program *prog; + __u32 duration = 0; + int err, pkt_fd; + + err = bpf_prog_load(tgt_name, BPF_PROG_TYPE_UNSPEC, + &pkt_obj, &pkt_fd); + /* the target prog should load fine */ + if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n", + tgt_name, err, errno)) + return; + opts.attach_prog_fd = pkt_fd; + + freplace_obj = bpf_object__open_file(freplace_name, &opts); + if (CHECK(IS_ERR_OR_NULL(freplace_obj), "freplace_obj_open", + "failed to open %s: %ld\n", freplace_name, + PTR_ERR(freplace_obj))) + goto out; + + err = bpf_object__load(freplace_obj); + if (CHECK(err, "freplace_obj_load", "err %d\n", err)) + goto out; + + prog = bpf_program__next(NULL, freplace_obj); + freplace_link = bpf_program__attach_trace(prog); + if (CHECK(IS_ERR(freplace_link), "freplace_attach_trace", "failed to link\n")) + goto out; + + opts.attach_prog_fd = bpf_program__fd(prog); + fmod_obj = bpf_object__open_file(fmod_ret_name, &opts); + if (CHECK(IS_ERR_OR_NULL(fmod_obj), "fmod_obj_open", + "failed to open %s: %ld\n", fmod_ret_name, + PTR_ERR(fmod_obj))) + goto out; + + err = bpf_object__load(fmod_obj); + if (CHECK(!err, "fmod_obj_load", "loading fmod_ret should fail\n")) + goto out; + +out: + bpf_link__destroy(freplace_link); + bpf_object__close(freplace_obj); + bpf_object__close(fmod_obj); + bpf_object__close(pkt_obj); +} + + +static void test_func_sockmap_update(void) +{ + const char *prog_name[] = { + "freplace/cls_redirect", + }; + test_fexit_bpf2bpf_common("./freplace_cls_redirect.o", + "./test_cls_redirect.o", + ARRAY_SIZE(prog_name), + prog_name, false, NULL); +} + +static void test_obj_load_failure_common(const char *obj_file, + const char *target_obj_file) + +{ + /* + * standalone test that asserts failure to load freplace prog + * because of invalid return code. + */ + struct bpf_object *obj = NULL, *pkt_obj; + int err, pkt_fd; + __u32 duration = 0; + + err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC, + &pkt_obj, &pkt_fd); + /* the target prog should load fine */ + if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n", + target_obj_file, err, errno)) + return; + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, + .attach_prog_fd = pkt_fd, + ); + + obj = bpf_object__open_file(obj_file, &opts); + if (CHECK(IS_ERR_OR_NULL(obj), "obj_open", + "failed to open %s: %ld\n", obj_file, + PTR_ERR(obj))) + goto close_prog; + + /* It should fail to load the program */ + err = bpf_object__load(obj); + if (CHECK(!err, "bpf_obj_load should fail", "err %d\n", err)) + goto close_prog; + +close_prog: + if (!IS_ERR_OR_NULL(obj)) + bpf_object__close(obj); + bpf_object__close(pkt_obj); +} + +static void test_func_replace_return_code(void) +{ + /* test invalid return code in the replaced program */ + test_obj_load_failure_common("./freplace_connect_v4_prog.o", + "./connect4_prog.o"); +} + +static void test_func_map_prog_compatibility(void) +{ + /* test with spin lock map value in the replaced program */ + test_obj_load_failure_common("./freplace_attach_probe.o", + "./test_attach_probe.o"); } void test_fexit_bpf2bpf(void) { - test_target_no_callees(); - test_target_yes_callees(); - test_func_replace(); - test_func_replace_verify(); + if (test__start_subtest("target_no_callees")) + test_target_no_callees(); + if (test__start_subtest("target_yes_callees")) + test_target_yes_callees(); + if (test__start_subtest("func_replace")) + test_func_replace(); + if (test__start_subtest("func_replace_verify")) + test_func_replace_verify(); + if (test__start_subtest("func_sockmap_update")) + test_func_sockmap_update(); + if (test__start_subtest("func_replace_return_code")) + test_func_replace_return_code(); + if (test__start_subtest("func_map_prog_compatibility")) + test_func_map_prog_compatibility(); + if (test__start_subtest("func_replace_multi")) + test_func_replace_multi(); + if (test__start_subtest("fmod_ret_freplace")) + test_fmod_ret_freplace(); } diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c index 3bdaa5a40744..ee46b11f1f9a 100644 --- a/tools/testing/selftests/bpf/prog_tests/global_data_init.c +++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c @@ -12,7 +12,8 @@ void test_global_data_init(void) size_t sz; obj = bpf_object__open_file(file, NULL); - if (CHECK_FAIL(!obj)) + err = libbpf_get_error(obj); + if (CHECK_FAIL(err)) return; map = bpf_object__find_map_by_name(obj, "test_glo.rodata"); diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms.c b/tools/testing/selftests/bpf/prog_tests/ksyms.c index e3d6777226a8..b295969b263b 100644 --- a/tools/testing/selftests/bpf/prog_tests/ksyms.c +++ b/tools/testing/selftests/bpf/prog_tests/ksyms.c @@ -7,39 +7,28 @@ static int duration; -static __u64 kallsyms_find(const char *sym) -{ - char type, name[500]; - __u64 addr, res = 0; - FILE *f; - - f = fopen("/proc/kallsyms", "r"); - if (CHECK(!f, "kallsyms_fopen", "failed to open: %d\n", errno)) - return 0; - - while (fscanf(f, "%llx %c %499s%*[^\n]\n", &addr, &type, name) > 0) { - if (strcmp(name, sym) == 0) { - res = addr; - goto out; - } - } - - CHECK(false, "not_found", "symbol %s not found\n", sym); -out: - fclose(f); - return res; -} - void test_ksyms(void) { - __u64 link_fops_addr = kallsyms_find("bpf_link_fops"); const char *btf_path = "/sys/kernel/btf/vmlinux"; struct test_ksyms *skel; struct test_ksyms__data *data; + __u64 link_fops_addr, per_cpu_start_addr; struct stat st; __u64 btf_size; int err; + err = kallsyms_find("bpf_link_fops", &link_fops_addr); + if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno)) + return; + if (CHECK(err == -ENOENT, "ksym_find", "symbol 'bpf_link_fops' not found\n")) + return; + + err = kallsyms_find("__per_cpu_start", &per_cpu_start_addr); + if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno)) + return; + if (CHECK(err == -ENOENT, "ksym_find", "symbol 'per_cpu_start' not found\n")) + return; + if (CHECK(stat(btf_path, &st), "stat_btf", "err %d\n", errno)) return; btf_size = st.st_size; @@ -63,8 +52,9 @@ void test_ksyms(void) "got %llu, exp %llu\n", data->out__bpf_link_fops1, (__u64)0); CHECK(data->out__btf_size != btf_size, "btf_size", "got %llu, exp %llu\n", data->out__btf_size, btf_size); - CHECK(data->out__per_cpu_start != 0, "__per_cpu_start", - "got %llu, exp %llu\n", data->out__per_cpu_start, (__u64)0); + CHECK(data->out__per_cpu_start != per_cpu_start_addr, "__per_cpu_start", + "got %llu, exp %llu\n", data->out__per_cpu_start, + per_cpu_start_addr); cleanup: test_ksyms__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c new file mode 100644 index 000000000000..b58b775d19f3 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Google */ + +#include <test_progs.h> +#include <bpf/libbpf.h> +#include <bpf/btf.h> +#include "test_ksyms_btf.skel.h" +#include "test_ksyms_btf_null_check.skel.h" + +static int duration; + +static void test_basic(void) +{ + __u64 runqueues_addr, bpf_prog_active_addr; + __u32 this_rq_cpu; + int this_bpf_prog_active; + struct test_ksyms_btf *skel = NULL; + struct test_ksyms_btf__data *data; + int err; + + err = kallsyms_find("runqueues", &runqueues_addr); + if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno)) + return; + if (CHECK(err == -ENOENT, "ksym_find", "symbol 'runqueues' not found\n")) + return; + + err = kallsyms_find("bpf_prog_active", &bpf_prog_active_addr); + if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno)) + return; + if (CHECK(err == -ENOENT, "ksym_find", "symbol 'bpf_prog_active' not found\n")) + return; + + skel = test_ksyms_btf__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open and load skeleton\n")) + goto cleanup; + + err = test_ksyms_btf__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + /* trigger tracepoint */ + usleep(1); + + data = skel->data; + CHECK(data->out__runqueues_addr != runqueues_addr, "runqueues_addr", + "got %llu, exp %llu\n", + (unsigned long long)data->out__runqueues_addr, + (unsigned long long)runqueues_addr); + CHECK(data->out__bpf_prog_active_addr != bpf_prog_active_addr, "bpf_prog_active_addr", + "got %llu, exp %llu\n", + (unsigned long long)data->out__bpf_prog_active_addr, + (unsigned long long)bpf_prog_active_addr); + + CHECK(data->out__rq_cpu == -1, "rq_cpu", + "got %u, exp != -1\n", data->out__rq_cpu); + CHECK(data->out__bpf_prog_active < 0, "bpf_prog_active", + "got %d, exp >= 0\n", data->out__bpf_prog_active); + CHECK(data->out__cpu_0_rq_cpu != 0, "cpu_rq(0)->cpu", + "got %u, exp 0\n", data->out__cpu_0_rq_cpu); + + this_rq_cpu = data->out__this_rq_cpu; + CHECK(this_rq_cpu != data->out__rq_cpu, "this_rq_cpu", + "got %u, exp %u\n", this_rq_cpu, data->out__rq_cpu); + + this_bpf_prog_active = data->out__this_bpf_prog_active; + CHECK(this_bpf_prog_active != data->out__bpf_prog_active, "this_bpf_prog_active", + "got %d, exp %d\n", this_bpf_prog_active, + data->out__bpf_prog_active); + +cleanup: + test_ksyms_btf__destroy(skel); +} + +static void test_null_check(void) +{ + struct test_ksyms_btf_null_check *skel; + + skel = test_ksyms_btf_null_check__open_and_load(); + CHECK(skel, "skel_open", "unexpected load of a prog missing null check\n"); + + test_ksyms_btf_null_check__destroy(skel); +} + +void test_ksyms_btf(void) +{ + int percpu_datasec; + struct btf *btf; + + btf = libbpf_find_kernel_btf(); + if (CHECK(IS_ERR(btf), "btf_exists", "failed to load kernel BTF: %ld\n", + PTR_ERR(btf))) + return; + + percpu_datasec = btf__find_by_name_kind(btf, ".data..percpu", + BTF_KIND_DATASEC); + btf__free(btf); + if (percpu_datasec < 0) { + printf("%s:SKIP:no PERCPU DATASEC in kernel btf\n", + __func__); + test__skip(); + return; + } + + if (test__start_subtest("basic")) + test_basic(); + + if (test__start_subtest("null_check")) + test_null_check(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c index c2d373e294bb..8073105548ff 100644 --- a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c +++ b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c @@ -80,9 +80,8 @@ out: void test_l4lb_all(void) { - const char *file1 = "./test_l4lb.o"; - const char *file2 = "./test_l4lb_noinline.o"; - - test_l4lb(file1); - test_l4lb(file2); + if (test__start_subtest("l4lb_inline")) + test_l4lb("test_l4lb.o"); + if (test__start_subtest("l4lb_noinline")) + test_l4lb("test_l4lb_noinline.o"); } diff --git a/tools/testing/selftests/bpf/prog_tests/map_init.c b/tools/testing/selftests/bpf/prog_tests/map_init.c new file mode 100644 index 000000000000..14a31109dd0e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/map_init.c @@ -0,0 +1,214 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2020 Tessares SA <http://www.tessares.net> */ + +#include <test_progs.h> +#include "test_map_init.skel.h" + +#define TEST_VALUE 0x1234 +#define FILL_VALUE 0xdeadbeef + +static int nr_cpus; +static int duration; + +typedef unsigned long long map_key_t; +typedef unsigned long long map_value_t; +typedef struct { + map_value_t v; /* padding */ +} __bpf_percpu_val_align pcpu_map_value_t; + + +static int map_populate(int map_fd, int num) +{ + pcpu_map_value_t value[nr_cpus]; + int i, err; + map_key_t key; + + for (i = 0; i < nr_cpus; i++) + bpf_percpu(value, i) = FILL_VALUE; + + for (key = 1; key <= num; key++) { + err = bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem")) + return -1; + } + + return 0; +} + +static struct test_map_init *setup(enum bpf_map_type map_type, int map_sz, + int *map_fd, int populate) +{ + struct test_map_init *skel; + int err; + + skel = test_map_init__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return NULL; + + err = bpf_map__set_type(skel->maps.hashmap1, map_type); + if (!ASSERT_OK(err, "bpf_map__set_type")) + goto error; + + err = bpf_map__set_max_entries(skel->maps.hashmap1, map_sz); + if (!ASSERT_OK(err, "bpf_map__set_max_entries")) + goto error; + + err = test_map_init__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto error; + + *map_fd = bpf_map__fd(skel->maps.hashmap1); + if (CHECK(*map_fd < 0, "bpf_map__fd", "failed\n")) + goto error; + + err = map_populate(*map_fd, populate); + if (!ASSERT_OK(err, "map_populate")) + goto error_map; + + return skel; + +error_map: + close(*map_fd); +error: + test_map_init__destroy(skel); + return NULL; +} + +/* executes bpf program that updates map with key, value */ +static int prog_run_insert_elem(struct test_map_init *skel, map_key_t key, + map_value_t value) +{ + struct test_map_init__bss *bss; + + bss = skel->bss; + + bss->inKey = key; + bss->inValue = value; + bss->inPid = getpid(); + + if (!ASSERT_OK(test_map_init__attach(skel), "skel_attach")) + return -1; + + /* Let tracepoint trigger */ + syscall(__NR_getpgid); + + test_map_init__detach(skel); + + return 0; +} + +static int check_values_one_cpu(pcpu_map_value_t *value, map_value_t expected) +{ + int i, nzCnt = 0; + map_value_t val; + + for (i = 0; i < nr_cpus; i++) { + val = bpf_percpu(value, i); + if (val) { + if (CHECK(val != expected, "map value", + "unexpected for cpu %d: 0x%llx\n", i, val)) + return -1; + nzCnt++; + } + } + + if (CHECK(nzCnt != 1, "map value", "set for %d CPUs instead of 1!\n", + nzCnt)) + return -1; + + return 0; +} + +/* Add key=1 elem with values set for all CPUs + * Delete elem key=1 + * Run bpf prog that inserts new key=1 elem with value=0x1234 + * (bpf prog can only set value for current CPU) + * Lookup Key=1 and check value is as expected for all CPUs: + * value set by bpf prog for one CPU, 0 for all others + */ +static void test_pcpu_map_init(void) +{ + pcpu_map_value_t value[nr_cpus]; + struct test_map_init *skel; + int map_fd, err; + map_key_t key; + + /* max 1 elem in map so insertion is forced to reuse freed entry */ + skel = setup(BPF_MAP_TYPE_PERCPU_HASH, 1, &map_fd, 1); + if (!ASSERT_OK_PTR(skel, "prog_setup")) + return; + + /* delete element so the entry can be re-used*/ + key = 1; + err = bpf_map_delete_elem(map_fd, &key); + if (!ASSERT_OK(err, "bpf_map_delete_elem")) + goto cleanup; + + /* run bpf prog that inserts new elem, re-using the slot just freed */ + err = prog_run_insert_elem(skel, key, TEST_VALUE); + if (!ASSERT_OK(err, "prog_run_insert_elem")) + goto cleanup; + + /* check that key=1 was re-created by bpf prog */ + err = bpf_map_lookup_elem(map_fd, &key, value); + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) + goto cleanup; + + /* and has expected values */ + check_values_one_cpu(value, TEST_VALUE); + +cleanup: + test_map_init__destroy(skel); +} + +/* Add key=1 and key=2 elems with values set for all CPUs + * Run bpf prog that inserts new key=3 elem + * (only for current cpu; other cpus should have initial value = 0) + * Lookup Key=1 and check value is as expected for all CPUs + */ +static void test_pcpu_lru_map_init(void) +{ + pcpu_map_value_t value[nr_cpus]; + struct test_map_init *skel; + int map_fd, err; + map_key_t key; + + /* Set up LRU map with 2 elements, values filled for all CPUs. + * With these 2 elements, the LRU map is full + */ + skel = setup(BPF_MAP_TYPE_LRU_PERCPU_HASH, 2, &map_fd, 2); + if (!ASSERT_OK_PTR(skel, "prog_setup")) + return; + + /* run bpf prog that inserts new key=3 element, re-using LRU slot */ + key = 3; + err = prog_run_insert_elem(skel, key, TEST_VALUE); + if (!ASSERT_OK(err, "prog_run_insert_elem")) + goto cleanup; + + /* check that key=3 replaced one of earlier elements */ + err = bpf_map_lookup_elem(map_fd, &key, value); + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) + goto cleanup; + + /* and has expected values */ + check_values_one_cpu(value, TEST_VALUE); + +cleanup: + test_map_init__destroy(skel); +} + +void test_map_init(void) +{ + nr_cpus = bpf_num_possible_cpus(); + if (nr_cpus <= 1) { + printf("%s:SKIP: >1 cpu needed for this test\n", __func__); + test__skip(); + return; + } + + if (test__start_subtest("pcpu_map_init")) + test_pcpu_map_init(); + if (test__start_subtest("pcpu_lru_map_init")) + test_pcpu_lru_map_init(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/metadata.c b/tools/testing/selftests/bpf/prog_tests/metadata.c new file mode 100644 index 000000000000..2c53eade88e3 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/metadata.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Copyright 2020 Google LLC. + */ + +#include <test_progs.h> +#include <cgroup_helpers.h> +#include <network_helpers.h> + +#include "metadata_unused.skel.h" +#include "metadata_used.skel.h" + +static int duration; + +static int prog_holds_map(int prog_fd, int map_fd) +{ + struct bpf_prog_info prog_info = {}; + struct bpf_prog_info map_info = {}; + __u32 prog_info_len; + __u32 map_info_len; + __u32 *map_ids; + int nr_maps; + int ret; + int i; + + map_info_len = sizeof(map_info); + ret = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len); + if (ret) + return -errno; + + prog_info_len = sizeof(prog_info); + ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len); + if (ret) + return -errno; + + map_ids = calloc(prog_info.nr_map_ids, sizeof(__u32)); + if (!map_ids) + return -ENOMEM; + + nr_maps = prog_info.nr_map_ids; + memset(&prog_info, 0, sizeof(prog_info)); + prog_info.nr_map_ids = nr_maps; + prog_info.map_ids = ptr_to_u64(map_ids); + prog_info_len = sizeof(prog_info); + + ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len); + if (ret) { + ret = -errno; + goto free_map_ids; + } + + ret = -ENOENT; + for (i = 0; i < prog_info.nr_map_ids; i++) { + if (map_ids[i] == map_info.id) { + ret = 0; + break; + } + } + +free_map_ids: + free(map_ids); + return ret; +} + +static void test_metadata_unused(void) +{ + struct metadata_unused *obj; + int err; + + obj = metadata_unused__open_and_load(); + if (CHECK(!obj, "skel-load", "errno %d", errno)) + return; + + err = prog_holds_map(bpf_program__fd(obj->progs.prog), + bpf_map__fd(obj->maps.rodata)); + if (CHECK(err, "prog-holds-rodata", "errno: %d", err)) + return; + + /* Assert that we can access the metadata in skel and the values are + * what we expect. + */ + if (CHECK(strncmp(obj->rodata->bpf_metadata_a, "foo", + sizeof(obj->rodata->bpf_metadata_a)), + "bpf_metadata_a", "expected \"foo\", value differ")) + goto close_bpf_object; + if (CHECK(obj->rodata->bpf_metadata_b != 1, "bpf_metadata_b", + "expected 1, got %d", obj->rodata->bpf_metadata_b)) + goto close_bpf_object; + + /* Assert that binding metadata map to prog again succeeds. */ + err = bpf_prog_bind_map(bpf_program__fd(obj->progs.prog), + bpf_map__fd(obj->maps.rodata), NULL); + CHECK(err, "rebind_map", "errno %d, expected 0", errno); + +close_bpf_object: + metadata_unused__destroy(obj); +} + +static void test_metadata_used(void) +{ + struct metadata_used *obj; + int err; + + obj = metadata_used__open_and_load(); + if (CHECK(!obj, "skel-load", "errno %d", errno)) + return; + + err = prog_holds_map(bpf_program__fd(obj->progs.prog), + bpf_map__fd(obj->maps.rodata)); + if (CHECK(err, "prog-holds-rodata", "errno: %d", err)) + return; + + /* Assert that we can access the metadata in skel and the values are + * what we expect. + */ + if (CHECK(strncmp(obj->rodata->bpf_metadata_a, "bar", + sizeof(obj->rodata->bpf_metadata_a)), + "metadata_a", "expected \"bar\", value differ")) + goto close_bpf_object; + if (CHECK(obj->rodata->bpf_metadata_b != 2, "metadata_b", + "expected 2, got %d", obj->rodata->bpf_metadata_b)) + goto close_bpf_object; + + /* Assert that binding metadata map to prog again succeeds. */ + err = bpf_prog_bind_map(bpf_program__fd(obj->progs.prog), + bpf_map__fd(obj->maps.rodata), NULL); + CHECK(err, "rebind_map", "errno %d, expected 0", errno); + +close_bpf_object: + metadata_used__destroy(obj); +} + +void test_metadata(void) +{ + if (test__start_subtest("unused")) + test_metadata_unused(); + + if (test__start_subtest("used")) + test_metadata_used(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/pe_preserve_elems.c b/tools/testing/selftests/bpf/prog_tests/pe_preserve_elems.c new file mode 100644 index 000000000000..673d38395253 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/pe_preserve_elems.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2019 Facebook */ +#include <test_progs.h> +#include <linux/bpf.h> +#include "test_pe_preserve_elems.skel.h" + +static int duration; + +static void test_one_map(struct bpf_map *map, struct bpf_program *prog, + bool has_share_pe) +{ + int err, key = 0, pfd = -1, mfd = bpf_map__fd(map); + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts); + struct perf_event_attr attr = { + .size = sizeof(struct perf_event_attr), + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_CPU_CLOCK, + }; + + pfd = syscall(__NR_perf_event_open, &attr, 0 /* pid */, + -1 /* cpu 0 */, -1 /* group id */, 0 /* flags */); + if (CHECK(pfd < 0, "perf_event_open", "failed\n")) + return; + + err = bpf_map_update_elem(mfd, &key, &pfd, BPF_ANY); + close(pfd); + if (CHECK(err < 0, "bpf_map_update_elem", "failed\n")) + return; + + err = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts); + if (CHECK(err < 0, "bpf_prog_test_run_opts", "failed\n")) + return; + if (CHECK(opts.retval != 0, "bpf_perf_event_read_value", + "failed with %d\n", opts.retval)) + return; + + /* closing mfd, prog still holds a reference on map */ + close(mfd); + + err = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts); + if (CHECK(err < 0, "bpf_prog_test_run_opts", "failed\n")) + return; + + if (has_share_pe) { + CHECK(opts.retval != 0, "bpf_perf_event_read_value", + "failed with %d\n", opts.retval); + } else { + CHECK(opts.retval != -ENOENT, "bpf_perf_event_read_value", + "should have failed with %d, but got %d\n", -ENOENT, + opts.retval); + } +} + +void test_pe_preserve_elems(void) +{ + struct test_pe_preserve_elems *skel; + + skel = test_pe_preserve_elems__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + return; + + test_one_map(skel->maps.array_1, skel->progs.read_array_1, false); + test_one_map(skel->maps.array_2, skel->progs.read_array_2, true); + + test_pe_preserve_elems__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c index c33ec180b3f2..ca9f0895ec84 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c @@ -7,6 +7,8 @@ #include "test_perf_buffer.skel.h" #include "bpf/libbpf_internal.h" +static int duration; + /* AddressSanitizer sometimes crashes due to data dereference below, due to * this being mmap()'ed memory. Disable instrumentation with * no_sanitize_address attribute @@ -24,13 +26,31 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size) CPU_SET(cpu, cpu_seen); } +int trigger_on_cpu(int cpu) +{ + cpu_set_t cpu_set; + int err; + + CPU_ZERO(&cpu_set); + CPU_SET(cpu, &cpu_set); + + err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set); + if (err && CHECK(err, "set_affinity", "cpu #%d, err %d\n", cpu, err)) + return err; + + usleep(1); + + return 0; +} + void test_perf_buffer(void) { - int err, on_len, nr_on_cpus = 0, nr_cpus, i, duration = 0; + int err, on_len, nr_on_cpus = 0, nr_cpus, i; struct perf_buffer_opts pb_opts = {}; struct test_perf_buffer *skel; - cpu_set_t cpu_set, cpu_seen; + cpu_set_t cpu_seen; struct perf_buffer *pb; + int last_fd = -1, fd; bool *online; nr_cpus = libbpf_num_possible_cpus(); @@ -63,6 +83,9 @@ void test_perf_buffer(void) if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb))) goto out_close; + CHECK(perf_buffer__epoll_fd(pb) < 0, "epoll_fd", + "bad fd: %d\n", perf_buffer__epoll_fd(pb)); + /* trigger kprobe on every CPU */ CPU_ZERO(&cpu_seen); for (i = 0; i < nr_cpus; i++) { @@ -71,16 +94,8 @@ void test_perf_buffer(void) continue; } - CPU_ZERO(&cpu_set); - CPU_SET(i, &cpu_set); - - err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), - &cpu_set); - if (err && CHECK(err, "set_affinity", "cpu #%d, err %d\n", - i, err)) + if (trigger_on_cpu(i)) goto out_close; - - usleep(1); } /* read perf buffer */ @@ -92,6 +107,34 @@ void test_perf_buffer(void) "expect %d, seen %d\n", nr_on_cpus, CPU_COUNT(&cpu_seen))) goto out_free_pb; + if (CHECK(perf_buffer__buffer_cnt(pb) != nr_cpus, "buf_cnt", + "got %zu, expected %d\n", perf_buffer__buffer_cnt(pb), nr_cpus)) + goto out_close; + + for (i = 0; i < nr_cpus; i++) { + if (i >= on_len || !online[i]) + continue; + + fd = perf_buffer__buffer_fd(pb, i); + CHECK(fd < 0 || last_fd == fd, "fd_check", "last fd %d == fd %d\n", last_fd, fd); + last_fd = fd; + + err = perf_buffer__consume_buffer(pb, i); + if (CHECK(err, "drain_buf", "cpu %d, err %d\n", i, err)) + goto out_close; + + CPU_CLR(i, &cpu_seen); + if (trigger_on_cpu(i)) + goto out_close; + + err = perf_buffer__consume_buffer(pb, i); + if (CHECK(err, "consume_buf", "cpu %d, err %d\n", i, err)) + goto out_close; + + if (CHECK(!CPU_ISSET(i, &cpu_seen), "cpu_seen", "cpu %d not seen\n", i)) + goto out_close; + } + out_free_pb: perf_buffer__free(pb); out_close: diff --git a/tools/testing/selftests/bpf/prog_tests/pinning.c b/tools/testing/selftests/bpf/prog_tests/pinning.c index 041952524c55..fcf54b3a1dd0 100644 --- a/tools/testing/selftests/bpf/prog_tests/pinning.c +++ b/tools/testing/selftests/bpf/prog_tests/pinning.c @@ -37,7 +37,7 @@ void test_pinning(void) struct stat statbuf = {}; struct bpf_object *obj; struct bpf_map *map; - int err; + int err, map_fd; DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, .pin_root_path = custpath, ); @@ -213,6 +213,53 @@ void test_pinning(void) if (CHECK(err, "stat custpinpath", "err %d errno %d\n", err, errno)) goto out; + /* remove the custom pin path to re-test it with reuse fd below */ + err = unlink(custpinpath); + if (CHECK(err, "unlink custpinpath", "err %d errno %d\n", err, errno)) + goto out; + + err = rmdir(custpath); + if (CHECK(err, "rmdir custpindir", "err %d errno %d\n", err, errno)) + goto out; + + bpf_object__close(obj); + + /* test pinning at custom path with reuse fd */ + obj = bpf_object__open_file(file, NULL); + err = libbpf_get_error(obj); + if (CHECK(err, "default open", "err %d errno %d\n", err, errno)) { + obj = NULL; + goto out; + } + + map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(__u32), + sizeof(__u64), 1, 0); + if (CHECK(map_fd < 0, "create pinmap manually", "fd %d\n", map_fd)) + goto out; + + map = bpf_object__find_map_by_name(obj, "pinmap"); + if (CHECK(!map, "find map", "NULL map")) + goto close_map_fd; + + err = bpf_map__reuse_fd(map, map_fd); + if (CHECK(err, "reuse pinmap fd", "err %d errno %d\n", err, errno)) + goto close_map_fd; + + err = bpf_map__set_pin_path(map, custpinpath); + if (CHECK(err, "set pin path", "err %d errno %d\n", err, errno)) + goto close_map_fd; + + err = bpf_object__load(obj); + if (CHECK(err, "custom load", "err %d errno %d\n", err, errno)) + goto close_map_fd; + + /* check that pinmap was pinned at the custom path */ + err = stat(custpinpath, &statbuf); + if (CHECK(err, "stat custpinpath", "err %d errno %d\n", err, errno)) + goto close_map_fd; + +close_map_fd: + close(map_fd); out: unlink(pinpath); unlink(nopinpath); diff --git a/tools/testing/selftests/bpf/prog_tests/probe_read_user_str.c b/tools/testing/selftests/bpf/prog_tests/probe_read_user_str.c new file mode 100644 index 000000000000..e419298132b5 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/probe_read_user_str.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include "test_probe_read_user_str.skel.h" + +static const char str1[] = "mestring"; +static const char str2[] = "mestringalittlebigger"; +static const char str3[] = "mestringblubblubblubblubblub"; + +static int test_one_str(struct test_probe_read_user_str *skel, const char *str, + size_t len) +{ + int err, duration = 0; + char buf[256]; + + /* Ensure bytes after string are ones */ + memset(buf, 1, sizeof(buf)); + memcpy(buf, str, len); + + /* Give prog our userspace pointer */ + skel->bss->user_ptr = buf; + + /* Trigger tracepoint */ + usleep(1); + + /* Did helper fail? */ + if (CHECK(skel->bss->ret < 0, "prog_ret", "prog returned: %ld\n", + skel->bss->ret)) + return 1; + + /* Check that string was copied correctly */ + err = memcmp(skel->bss->buf, str, len); + if (CHECK(err, "memcmp", "prog copied wrong string")) + return 1; + + /* Now check that no extra trailing bytes were copied */ + memset(buf, 0, sizeof(buf)); + err = memcmp(skel->bss->buf + len, buf, sizeof(buf) - len); + if (CHECK(err, "memcmp", "trailing bytes were not stripped")) + return 1; + + return 0; +} + +void test_probe_read_user_str(void) +{ + struct test_probe_read_user_str *skel; + int err, duration = 0; + + skel = test_probe_read_user_str__open_and_load(); + if (CHECK(!skel, "test_probe_read_user_str__open_and_load", + "skeleton open and load failed\n")) + return; + + /* Give pid to bpf prog so it doesn't read from anyone else */ + skel->bss->pid = getpid(); + + err = test_probe_read_user_str__attach(skel); + if (CHECK(err, "test_probe_read_user_str__attach", + "skeleton attach failed: %d\n", err)) + goto out; + + if (test_one_str(skel, str1, sizeof(str1))) + goto out; + if (test_one_str(skel, str2, sizeof(str2))) + goto out; + if (test_one_str(skel, str3, sizeof(str3))) + goto out; + +out: + test_probe_read_user_str__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c new file mode 100644 index 000000000000..c5fb191874ac --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2019 Facebook */ +#include <test_progs.h> +#include <linux/bpf.h> +#include "bpf/libbpf_internal.h" +#include "test_raw_tp_test_run.skel.h" + +static int duration; + +void test_raw_tp_test_run(void) +{ + struct bpf_prog_test_run_attr test_attr = {}; + int comm_fd = -1, err, nr_online, i, prog_fd; + __u64 args[2] = {0x1234ULL, 0x5678ULL}; + int expected_retval = 0x1234 + 0x5678; + struct test_raw_tp_test_run *skel; + char buf[] = "new_name"; + bool *online = NULL; + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, + .ctx_in = args, + .ctx_size_in = sizeof(args), + .flags = BPF_F_TEST_RUN_ON_CPU, + ); + + err = parse_cpu_mask_file("/sys/devices/system/cpu/online", &online, + &nr_online); + if (CHECK(err, "parse_cpu_mask_file", "err %d\n", err)) + return; + + skel = test_raw_tp_test_run__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + goto cleanup; + + err = test_raw_tp_test_run__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + comm_fd = open("/proc/self/comm", O_WRONLY|O_TRUNC); + if (CHECK(comm_fd < 0, "open /proc/self/comm", "err %d\n", errno)) + goto cleanup; + + err = write(comm_fd, buf, sizeof(buf)); + CHECK(err < 0, "task rename", "err %d", errno); + + CHECK(skel->bss->count == 0, "check_count", "didn't increase\n"); + CHECK(skel->data->on_cpu != 0xffffffff, "check_on_cpu", "got wrong value\n"); + + prog_fd = bpf_program__fd(skel->progs.rename); + test_attr.prog_fd = prog_fd; + test_attr.ctx_in = args; + test_attr.ctx_size_in = sizeof(__u64); + + err = bpf_prog_test_run_xattr(&test_attr); + CHECK(err == 0, "test_run", "should fail for too small ctx\n"); + + test_attr.ctx_size_in = sizeof(args); + err = bpf_prog_test_run_xattr(&test_attr); + CHECK(err < 0, "test_run", "err %d\n", errno); + CHECK(test_attr.retval != expected_retval, "check_retval", + "expect 0x%x, got 0x%x\n", expected_retval, test_attr.retval); + + for (i = 0; i < nr_online; i++) { + if (!online[i]) + continue; + + opts.cpu = i; + opts.retval = 0; + err = bpf_prog_test_run_opts(prog_fd, &opts); + CHECK(err < 0, "test_run_opts", "err %d\n", errno); + CHECK(skel->data->on_cpu != i, "check_on_cpu", + "expect %d got %d\n", i, skel->data->on_cpu); + CHECK(opts.retval != expected_retval, + "check_retval", "expect 0x%x, got 0x%x\n", + expected_retval, opts.retval); + } + + /* invalid cpu ID should fail with ENXIO */ + opts.cpu = 0xffffffff; + err = bpf_prog_test_run_opts(prog_fd, &opts); + CHECK(err != -1 || errno != ENXIO, + "test_run_opts_fail", + "should failed with ENXIO\n"); + + /* non-zero cpu w/o BPF_F_TEST_RUN_ON_CPU should fail with EINVAL */ + opts.cpu = 1; + opts.flags = 0; + err = bpf_prog_test_run_opts(prog_fd, &opts); + CHECK(err != -1 || errno != EINVAL, + "test_run_opts_fail", + "should failed with EINVAL\n"); + +cleanup: + close(comm_fd); + test_raw_tp_test_run__destroy(skel); + free(online); +} diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c index fc0d7f4f02cf..ac1ee10cffd8 100644 --- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c +++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c @@ -27,7 +27,7 @@ void test_reference_tracking(void) const char *title; /* Ignore .text sections */ - title = bpf_program__title(prog, false); + title = bpf_program__section_name(prog); if (strstr(title, ".text") != NULL) continue; diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c index 3b127cab4864..6ace5e9efec1 100644 --- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c +++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c @@ -28,6 +28,12 @@ struct symbol test_symbols[] = { { "func", BTF_KIND_FUNC, -1 }, }; +/* Align the .BTF_ids section to 4 bytes */ +asm ( +".pushsection " BTF_IDS_SECTION " ,\"a\"; \n" +".balign 4, 0; \n" +".popsection; \n"); + BTF_ID_LIST(test_list_local) BTF_ID_UNUSED BTF_ID(typedef, S) @@ -47,6 +53,15 @@ BTF_ID(struct, S) BTF_ID(union, U) BTF_ID(func, func) +BTF_SET_START(test_set) +BTF_ID(typedef, S) +BTF_ID(typedef, T) +BTF_ID(typedef, U) +BTF_ID(struct, S) +BTF_ID(union, U) +BTF_ID(func, func) +BTF_SET_END(test_set) + static int __resolve_symbol(struct btf *btf, int type_id) { @@ -116,12 +131,40 @@ int test_resolve_btfids(void) */ for (j = 0; j < ARRAY_SIZE(test_lists); j++) { test_list = test_lists[j]; - for (i = 0; i < ARRAY_SIZE(test_symbols) && !ret; i++) { + for (i = 0; i < ARRAY_SIZE(test_symbols); i++) { ret = CHECK(test_list[i] != test_symbols[i].id, "id_check", "wrong ID for %s (%d != %d)\n", test_symbols[i].name, test_list[i], test_symbols[i].id); + if (ret) + return ret; + } + } + + /* Check BTF_SET_START(test_set) IDs */ + for (i = 0; i < test_set.cnt; i++) { + bool found = false; + + for (j = 0; j < ARRAY_SIZE(test_symbols); j++) { + if (test_symbols[j].id != test_set.ids[i]) + continue; + found = true; + break; + } + + ret = CHECK(!found, "id_check", + "ID %d not found in test_symbols\n", + test_set.ids[i]); + if (ret) + break; + + if (i > 0) { + ret = CHECK(test_set.ids[i - 1] > test_set.ids[i], + "sort_check", + "test_set is not sorted\n"); + if (ret) + break; } } diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c index 47fa04adc147..3a469099f30d 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c @@ -49,7 +49,7 @@ configure_stack(void) sprintf(tc_cmd, "%s %s %s %s", "tc filter add dev lo ingress bpf", "direct-action object-file ./test_sk_assign.o", "section classifier/sk_assign_test", - (env.verbosity < VERBOSE_VERY) ? " 2>/dev/null" : ""); + (env.verbosity < VERBOSE_VERY) ? " 2>/dev/null" : "verbose"); if (CHECK(system(tc_cmd), "BPF load failed;", "run with -vv for more info\n")) return false; @@ -265,9 +265,10 @@ void test_sk_assign(void) TEST("ipv6 udp port redir", AF_INET6, SOCK_DGRAM, false), TEST("ipv6 udp addr redir", AF_INET6, SOCK_DGRAM, true), }; - int server = -1; + __s64 server = -1; int server_map; int self_net; + int i; self_net = open(NS_SELF, O_RDONLY); if (CHECK_FAIL(self_net < 0)) { @@ -286,7 +287,7 @@ void test_sk_assign(void) goto cleanup; } - for (int i = 0; i < ARRAY_SIZE(tests) && !READ_ONCE(stop); i++) { + for (i = 0; i < ARRAY_SIZE(tests) && !READ_ONCE(stop); i++) { struct test_sk_cfg *test = &tests[i]; const struct sockaddr *addr; const int zero = 0; diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c new file mode 100644 index 000000000000..686b40f11a45 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <linux/btf.h> +#include "netif_receive_skb.skel.h" + +/* Demonstrate that bpf_snprintf_btf succeeds and that various data types + * are formatted correctly. + */ +void test_snprintf_btf(void) +{ + struct netif_receive_skb *skel; + struct netif_receive_skb__bss *bss; + int err, duration = 0; + + skel = netif_receive_skb__open(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + return; + + err = netif_receive_skb__load(skel); + if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err)) + goto cleanup; + + bss = skel->bss; + + err = netif_receive_skb__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + /* generate receive event */ + err = system("ping -c 1 127.0.0.1 > /dev/null"); + if (CHECK(err, "system", "ping failed: %d\n", err)) + goto cleanup; + + if (bss->skip) { + printf("%s:SKIP:no __builtin_btf_type_id\n", __func__); + test__skip(); + goto cleanup; + } + + /* + * Make sure netif_receive_skb program was triggered + * and it set expected return values from bpf_trace_printk()s + * and all tests ran. + */ + if (CHECK(bss->ret <= 0, + "bpf_snprintf_btf: got return value", + "ret <= 0 %ld test %d\n", bss->ret, bss->ran_subtests)) + goto cleanup; + + if (CHECK(bss->ran_subtests == 0, "check if subtests ran", + "no subtests ran, did BPF program run?")) + goto cleanup; + + if (CHECK(bss->num_subtests != bss->ran_subtests, + "check all subtests ran", + "only ran %d of %d tests\n", bss->num_subtests, + bss->ran_subtests)) + goto cleanup; + +cleanup: + netif_receive_skb__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c new file mode 100644 index 000000000000..af87118e748e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c @@ -0,0 +1,382 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ + +#include <netinet/in.h> +#include <arpa/inet.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> +#include <linux/compiler.h> + +#include "network_helpers.h" +#include "cgroup_helpers.h" +#include "test_progs.h" +#include "bpf_rlimit.h" +#include "test_sock_fields.skel.h" + +enum bpf_linum_array_idx { + EGRESS_LINUM_IDX, + INGRESS_LINUM_IDX, + __NR_BPF_LINUM_ARRAY_IDX, +}; + +struct bpf_spinlock_cnt { + struct bpf_spin_lock lock; + __u32 cnt; +}; + +#define PARENT_CGROUP "/test-bpf-sock-fields" +#define CHILD_CGROUP "/test-bpf-sock-fields/child" +#define DATA "Hello BPF!" +#define DATA_LEN sizeof(DATA) + +static struct sockaddr_in6 srv_sa6, cli_sa6; +static int sk_pkt_out_cnt10_fd; +static struct test_sock_fields *skel; +static int sk_pkt_out_cnt_fd; +static __u64 parent_cg_id; +static __u64 child_cg_id; +static int linum_map_fd; +static __u32 duration; + +static __u32 egress_linum_idx = EGRESS_LINUM_IDX; +static __u32 ingress_linum_idx = INGRESS_LINUM_IDX; + +static void print_sk(const struct bpf_sock *sk, const char *prefix) +{ + char src_ip4[24], dst_ip4[24]; + char src_ip6[64], dst_ip6[64]; + + inet_ntop(AF_INET, &sk->src_ip4, src_ip4, sizeof(src_ip4)); + inet_ntop(AF_INET6, &sk->src_ip6, src_ip6, sizeof(src_ip6)); + inet_ntop(AF_INET, &sk->dst_ip4, dst_ip4, sizeof(dst_ip4)); + inet_ntop(AF_INET6, &sk->dst_ip6, dst_ip6, sizeof(dst_ip6)); + + printf("%s: state:%u bound_dev_if:%u family:%u type:%u protocol:%u mark:%u priority:%u " + "src_ip4:%x(%s) src_ip6:%x:%x:%x:%x(%s) src_port:%u " + "dst_ip4:%x(%s) dst_ip6:%x:%x:%x:%x(%s) dst_port:%u\n", + prefix, + sk->state, sk->bound_dev_if, sk->family, sk->type, sk->protocol, + sk->mark, sk->priority, + sk->src_ip4, src_ip4, + sk->src_ip6[0], sk->src_ip6[1], sk->src_ip6[2], sk->src_ip6[3], + src_ip6, sk->src_port, + sk->dst_ip4, dst_ip4, + sk->dst_ip6[0], sk->dst_ip6[1], sk->dst_ip6[2], sk->dst_ip6[3], + dst_ip6, ntohs(sk->dst_port)); +} + +static void print_tp(const struct bpf_tcp_sock *tp, const char *prefix) +{ + printf("%s: snd_cwnd:%u srtt_us:%u rtt_min:%u snd_ssthresh:%u rcv_nxt:%u " + "snd_nxt:%u snd:una:%u mss_cache:%u ecn_flags:%u " + "rate_delivered:%u rate_interval_us:%u packets_out:%u " + "retrans_out:%u total_retrans:%u segs_in:%u data_segs_in:%u " + "segs_out:%u data_segs_out:%u lost_out:%u sacked_out:%u " + "bytes_received:%llu bytes_acked:%llu\n", + prefix, + tp->snd_cwnd, tp->srtt_us, tp->rtt_min, tp->snd_ssthresh, + tp->rcv_nxt, tp->snd_nxt, tp->snd_una, tp->mss_cache, + tp->ecn_flags, tp->rate_delivered, tp->rate_interval_us, + tp->packets_out, tp->retrans_out, tp->total_retrans, + tp->segs_in, tp->data_segs_in, tp->segs_out, + tp->data_segs_out, tp->lost_out, tp->sacked_out, + tp->bytes_received, tp->bytes_acked); +} + +static void check_result(void) +{ + struct bpf_tcp_sock srv_tp, cli_tp, listen_tp; + struct bpf_sock srv_sk, cli_sk, listen_sk; + __u32 ingress_linum, egress_linum; + int err; + + err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx, + &egress_linum); + CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)", + "err:%d errno:%d\n", err, errno); + + err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx, + &ingress_linum); + CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)", + "err:%d errno:%d\n", err, errno); + + memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk)); + memcpy(&srv_tp, &skel->bss->srv_tp, sizeof(srv_tp)); + memcpy(&cli_sk, &skel->bss->cli_sk, sizeof(cli_sk)); + memcpy(&cli_tp, &skel->bss->cli_tp, sizeof(cli_tp)); + memcpy(&listen_sk, &skel->bss->listen_sk, sizeof(listen_sk)); + memcpy(&listen_tp, &skel->bss->listen_tp, sizeof(listen_tp)); + + print_sk(&listen_sk, "listen_sk"); + print_sk(&srv_sk, "srv_sk"); + print_sk(&cli_sk, "cli_sk"); + print_tp(&listen_tp, "listen_tp"); + print_tp(&srv_tp, "srv_tp"); + print_tp(&cli_tp, "cli_tp"); + + CHECK(listen_sk.state != 10 || + listen_sk.family != AF_INET6 || + listen_sk.protocol != IPPROTO_TCP || + memcmp(listen_sk.src_ip6, &in6addr_loopback, + sizeof(listen_sk.src_ip6)) || + listen_sk.dst_ip6[0] || listen_sk.dst_ip6[1] || + listen_sk.dst_ip6[2] || listen_sk.dst_ip6[3] || + listen_sk.src_port != ntohs(srv_sa6.sin6_port) || + listen_sk.dst_port, + "listen_sk", + "Unexpected. Check listen_sk output. ingress_linum:%u\n", + ingress_linum); + + CHECK(srv_sk.state == 10 || + !srv_sk.state || + srv_sk.family != AF_INET6 || + srv_sk.protocol != IPPROTO_TCP || + memcmp(srv_sk.src_ip6, &in6addr_loopback, + sizeof(srv_sk.src_ip6)) || + memcmp(srv_sk.dst_ip6, &in6addr_loopback, + sizeof(srv_sk.dst_ip6)) || + srv_sk.src_port != ntohs(srv_sa6.sin6_port) || + srv_sk.dst_port != cli_sa6.sin6_port, + "srv_sk", "Unexpected. Check srv_sk output. egress_linum:%u\n", + egress_linum); + + CHECK(!skel->bss->lsndtime, "srv_tp", "Unexpected lsndtime:0\n"); + + CHECK(cli_sk.state == 10 || + !cli_sk.state || + cli_sk.family != AF_INET6 || + cli_sk.protocol != IPPROTO_TCP || + memcmp(cli_sk.src_ip6, &in6addr_loopback, + sizeof(cli_sk.src_ip6)) || + memcmp(cli_sk.dst_ip6, &in6addr_loopback, + sizeof(cli_sk.dst_ip6)) || + cli_sk.src_port != ntohs(cli_sa6.sin6_port) || + cli_sk.dst_port != srv_sa6.sin6_port, + "cli_sk", "Unexpected. Check cli_sk output. egress_linum:%u\n", + egress_linum); + + CHECK(listen_tp.data_segs_out || + listen_tp.data_segs_in || + listen_tp.total_retrans || + listen_tp.bytes_acked, + "listen_tp", + "Unexpected. Check listen_tp output. ingress_linum:%u\n", + ingress_linum); + + CHECK(srv_tp.data_segs_out != 2 || + srv_tp.data_segs_in || + srv_tp.snd_cwnd != 10 || + srv_tp.total_retrans || + srv_tp.bytes_acked < 2 * DATA_LEN, + "srv_tp", "Unexpected. Check srv_tp output. egress_linum:%u\n", + egress_linum); + + CHECK(cli_tp.data_segs_out || + cli_tp.data_segs_in != 2 || + cli_tp.snd_cwnd != 10 || + cli_tp.total_retrans || + cli_tp.bytes_received < 2 * DATA_LEN, + "cli_tp", "Unexpected. Check cli_tp output. egress_linum:%u\n", + egress_linum); + + CHECK(skel->bss->parent_cg_id != parent_cg_id, + "parent_cg_id", "%zu != %zu\n", + (size_t)skel->bss->parent_cg_id, (size_t)parent_cg_id); + + CHECK(skel->bss->child_cg_id != child_cg_id, + "child_cg_id", "%zu != %zu\n", + (size_t)skel->bss->child_cg_id, (size_t)child_cg_id); +} + +static void check_sk_pkt_out_cnt(int accept_fd, int cli_fd) +{ + struct bpf_spinlock_cnt pkt_out_cnt = {}, pkt_out_cnt10 = {}; + int err; + + pkt_out_cnt.cnt = ~0; + pkt_out_cnt10.cnt = ~0; + err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &accept_fd, &pkt_out_cnt); + if (!err) + err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &accept_fd, + &pkt_out_cnt10); + + /* The bpf prog only counts for fullsock and + * passive connection did not become fullsock until 3WHS + * had been finished, so the bpf prog only counted two data + * packet out. + */ + CHECK(err || pkt_out_cnt.cnt < 0xeB9F + 2 || + pkt_out_cnt10.cnt < 0xeB9F + 20, + "bpf_map_lookup_elem(sk_pkt_out_cnt, &accept_fd)", + "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u\n", + err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt); + + pkt_out_cnt.cnt = ~0; + pkt_out_cnt10.cnt = ~0; + err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &cli_fd, &pkt_out_cnt); + if (!err) + err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &cli_fd, + &pkt_out_cnt10); + /* Active connection is fullsock from the beginning. + * 1 SYN and 1 ACK during 3WHS + * 2 Acks on data packet. + * + * The bpf_prog initialized it to 0xeB9F. + */ + CHECK(err || pkt_out_cnt.cnt < 0xeB9F + 4 || + pkt_out_cnt10.cnt < 0xeB9F + 40, + "bpf_map_lookup_elem(sk_pkt_out_cnt, &cli_fd)", + "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u\n", + err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt); +} + +static int init_sk_storage(int sk_fd, __u32 pkt_out_cnt) +{ + struct bpf_spinlock_cnt scnt = {}; + int err; + + scnt.cnt = pkt_out_cnt; + err = bpf_map_update_elem(sk_pkt_out_cnt_fd, &sk_fd, &scnt, + BPF_NOEXIST); + if (CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt_fd)", + "err:%d errno:%d\n", err, errno)) + return err; + + err = bpf_map_update_elem(sk_pkt_out_cnt10_fd, &sk_fd, &scnt, + BPF_NOEXIST); + if (CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt10_fd)", + "err:%d errno:%d\n", err, errno)) + return err; + + return 0; +} + +static void test(void) +{ + int listen_fd = -1, cli_fd = -1, accept_fd = -1, err, i; + socklen_t addrlen = sizeof(struct sockaddr_in6); + char buf[DATA_LEN]; + + /* Prepare listen_fd */ + listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); + /* start_server() has logged the error details */ + if (CHECK_FAIL(listen_fd == -1)) + goto done; + + err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen); + if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err, + errno)) + goto done; + memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6)); + + cli_fd = connect_to_fd(listen_fd, 0); + if (CHECK_FAIL(cli_fd == -1)) + goto done; + + err = getsockname(cli_fd, (struct sockaddr *)&cli_sa6, &addrlen); + if (CHECK(err, "getsockname(cli_fd)", "err:%d errno:%d\n", + err, errno)) + goto done; + + accept_fd = accept(listen_fd, NULL, NULL); + if (CHECK(accept_fd == -1, "accept(listen_fd)", + "accept_fd:%d errno:%d\n", + accept_fd, errno)) + goto done; + + if (init_sk_storage(accept_fd, 0xeB9F)) + goto done; + + for (i = 0; i < 2; i++) { + /* Send some data from accept_fd to cli_fd. + * MSG_EOR to stop kernel from coalescing two pkts. + */ + err = send(accept_fd, DATA, DATA_LEN, MSG_EOR); + if (CHECK(err != DATA_LEN, "send(accept_fd)", + "err:%d errno:%d\n", err, errno)) + goto done; + + err = recv(cli_fd, buf, DATA_LEN, 0); + if (CHECK(err != DATA_LEN, "recv(cli_fd)", "err:%d errno:%d\n", + err, errno)) + goto done; + } + + shutdown(cli_fd, SHUT_WR); + err = recv(accept_fd, buf, 1, 0); + if (CHECK(err, "recv(accept_fd) for fin", "err:%d errno:%d\n", + err, errno)) + goto done; + shutdown(accept_fd, SHUT_WR); + err = recv(cli_fd, buf, 1, 0); + if (CHECK(err, "recv(cli_fd) for fin", "err:%d errno:%d\n", + err, errno)) + goto done; + check_sk_pkt_out_cnt(accept_fd, cli_fd); + check_result(); + +done: + if (accept_fd != -1) + close(accept_fd); + if (cli_fd != -1) + close(cli_fd); + if (listen_fd != -1) + close(listen_fd); +} + +void test_sock_fields(void) +{ + struct bpf_link *egress_link = NULL, *ingress_link = NULL; + int parent_cg_fd = -1, child_cg_fd = -1; + + /* Create a cgroup, get fd, and join it */ + parent_cg_fd = test__join_cgroup(PARENT_CGROUP); + if (CHECK_FAIL(parent_cg_fd < 0)) + return; + parent_cg_id = get_cgroup_id(PARENT_CGROUP); + if (CHECK_FAIL(!parent_cg_id)) + goto done; + + child_cg_fd = test__join_cgroup(CHILD_CGROUP); + if (CHECK_FAIL(child_cg_fd < 0)) + goto done; + child_cg_id = get_cgroup_id(CHILD_CGROUP); + if (CHECK_FAIL(!child_cg_id)) + goto done; + + skel = test_sock_fields__open_and_load(); + if (CHECK(!skel, "test_sock_fields__open_and_load", "failed\n")) + goto done; + + egress_link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields, + child_cg_fd); + if (CHECK(IS_ERR(egress_link), "attach_cgroup(egress)", "err:%ld\n", + PTR_ERR(egress_link))) + goto done; + + ingress_link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields, + child_cg_fd); + if (CHECK(IS_ERR(ingress_link), "attach_cgroup(ingress)", "err:%ld\n", + PTR_ERR(ingress_link))) + goto done; + + linum_map_fd = bpf_map__fd(skel->maps.linum_map); + sk_pkt_out_cnt_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt); + sk_pkt_out_cnt10_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt10); + + test(); + +done: + bpf_link__destroy(egress_link); + bpf_link__destroy(ingress_link); + test_sock_fields__destroy(skel); + if (child_cg_fd != -1) + close(child_cg_fd); + if (parent_cg_fd != -1) + close(parent_cg_fd); +} diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 96e7b7f84c65..85f73261fab0 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -4,6 +4,9 @@ #include "test_progs.h" #include "test_skmsg_load_helpers.skel.h" +#include "test_sockmap_update.skel.h" +#include "test_sockmap_invalid_update.skel.h" +#include "bpf_iter_sockmap.skel.h" #define TCP_REPAIR 19 /* TCP sock is under repair right now */ @@ -45,6 +48,37 @@ error: return -1; } +static void compare_cookies(struct bpf_map *src, struct bpf_map *dst) +{ + __u32 i, max_entries = bpf_map__max_entries(src); + int err, duration = 0, src_fd, dst_fd; + + src_fd = bpf_map__fd(src); + dst_fd = bpf_map__fd(dst); + + for (i = 0; i < max_entries; i++) { + __u64 src_cookie, dst_cookie; + + err = bpf_map_lookup_elem(src_fd, &i, &src_cookie); + if (err && errno == ENOENT) { + err = bpf_map_lookup_elem(dst_fd, &i, &dst_cookie); + CHECK(!err, "map_lookup_elem(dst)", "element %u not deleted\n", i); + CHECK(err && errno != ENOENT, "map_lookup_elem(dst)", "%s\n", + strerror(errno)); + continue; + } + if (CHECK(err, "lookup_elem(src)", "%s\n", strerror(errno))) + continue; + + err = bpf_map_lookup_elem(dst_fd, &i, &dst_cookie); + if (CHECK(err, "lookup_elem(dst)", "%s\n", strerror(errno))) + continue; + + CHECK(dst_cookie != src_cookie, "cookie mismatch", + "%llu != %llu (pos %u)\n", dst_cookie, src_cookie, i); + } +} + /* Create a map, populate it with one socket, and free the map. */ static void test_sockmap_create_update_free(enum bpf_map_type map_type) { @@ -101,6 +135,151 @@ out: test_skmsg_load_helpers__destroy(skel); } +static void test_sockmap_update(enum bpf_map_type map_type) +{ + struct bpf_prog_test_run_attr tattr; + int err, prog, src, duration = 0; + struct test_sockmap_update *skel; + struct bpf_map *dst_map; + const __u32 zero = 0; + char dummy[14] = {0}; + __s64 sk; + + sk = connected_socket_v4(); + if (CHECK(sk == -1, "connected_socket_v4", "cannot connect\n")) + return; + + skel = test_sockmap_update__open_and_load(); + if (CHECK(!skel, "open_and_load", "cannot load skeleton\n")) + goto close_sk; + + prog = bpf_program__fd(skel->progs.copy_sock_map); + src = bpf_map__fd(skel->maps.src); + if (map_type == BPF_MAP_TYPE_SOCKMAP) + dst_map = skel->maps.dst_sock_map; + else + dst_map = skel->maps.dst_sock_hash; + + err = bpf_map_update_elem(src, &zero, &sk, BPF_NOEXIST); + if (CHECK(err, "update_elem(src)", "errno=%u\n", errno)) + goto out; + + tattr = (struct bpf_prog_test_run_attr){ + .prog_fd = prog, + .repeat = 1, + .data_in = dummy, + .data_size_in = sizeof(dummy), + }; + + err = bpf_prog_test_run_xattr(&tattr); + if (CHECK_ATTR(err || !tattr.retval, "bpf_prog_test_run", + "errno=%u retval=%u\n", errno, tattr.retval)) + goto out; + + compare_cookies(skel->maps.src, dst_map); + +out: + test_sockmap_update__destroy(skel); +close_sk: + close(sk); +} + +static void test_sockmap_invalid_update(void) +{ + struct test_sockmap_invalid_update *skel; + int duration = 0; + + skel = test_sockmap_invalid_update__open_and_load(); + if (CHECK(skel, "open_and_load", "verifier accepted map_update\n")) + test_sockmap_invalid_update__destroy(skel); +} + +static void test_sockmap_copy(enum bpf_map_type map_type) +{ + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + int err, len, src_fd, iter_fd, duration = 0; + union bpf_iter_link_info linfo = {}; + __u32 i, num_sockets, num_elems; + struct bpf_iter_sockmap *skel; + __s64 *sock_fd = NULL; + struct bpf_link *link; + struct bpf_map *src; + char buf[64]; + + skel = bpf_iter_sockmap__open_and_load(); + if (CHECK(!skel, "bpf_iter_sockmap__open_and_load", "skeleton open_and_load failed\n")) + return; + + if (map_type == BPF_MAP_TYPE_SOCKMAP) { + src = skel->maps.sockmap; + num_elems = bpf_map__max_entries(src); + num_sockets = num_elems - 1; + } else { + src = skel->maps.sockhash; + num_elems = bpf_map__max_entries(src) - 1; + num_sockets = num_elems; + } + + sock_fd = calloc(num_sockets, sizeof(*sock_fd)); + if (CHECK(!sock_fd, "calloc(sock_fd)", "failed to allocate\n")) + goto out; + + for (i = 0; i < num_sockets; i++) + sock_fd[i] = -1; + + src_fd = bpf_map__fd(src); + + for (i = 0; i < num_sockets; i++) { + sock_fd[i] = connected_socket_v4(); + if (CHECK(sock_fd[i] == -1, "connected_socket_v4", "cannot connect\n")) + goto out; + + err = bpf_map_update_elem(src_fd, &i, &sock_fd[i], BPF_NOEXIST); + if (CHECK(err, "map_update", "failed: %s\n", strerror(errno))) + goto out; + } + + linfo.map.map_fd = src_fd; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); + link = bpf_program__attach_iter(skel->progs.copy, &opts); + if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + goto out; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + goto free_link; + + /* do some tests */ + while ((len = read(iter_fd, buf, sizeof(buf))) > 0) + ; + if (CHECK(len < 0, "read", "failed: %s\n", strerror(errno))) + goto close_iter; + + /* test results */ + if (CHECK(skel->bss->elems != num_elems, "elems", "got %u expected %u\n", + skel->bss->elems, num_elems)) + goto close_iter; + + if (CHECK(skel->bss->socks != num_sockets, "socks", "got %u expected %u\n", + skel->bss->socks, num_sockets)) + goto close_iter; + + compare_cookies(src, skel->maps.dst); + +close_iter: + close(iter_fd); +free_link: + bpf_link__destroy(link); +out: + for (i = 0; sock_fd && i < num_sockets; i++) + if (sock_fd[i] >= 0) + close(sock_fd[i]); + if (sock_fd) + free(sock_fd); + bpf_iter_sockmap__destroy(skel); +} + void test_sockmap_basic(void) { if (test__start_subtest("sockmap create_update_free")) @@ -111,4 +290,14 @@ void test_sockmap_basic(void) test_skmsg_helpers(BPF_MAP_TYPE_SOCKMAP); if (test__start_subtest("sockhash sk_msg load helpers")) test_skmsg_helpers(BPF_MAP_TYPE_SOCKHASH); + if (test__start_subtest("sockmap update")) + test_sockmap_update(BPF_MAP_TYPE_SOCKMAP); + if (test__start_subtest("sockhash update")) + test_sockmap_update(BPF_MAP_TYPE_SOCKHASH); + if (test__start_subtest("sockmap update in unsafe context")) + test_sockmap_invalid_update(); + if (test__start_subtest("sockmap copy")) + test_sockmap_copy(BPF_MAP_TYPE_SOCKMAP); + if (test__start_subtest("sockhash copy")) + test_sockmap_copy(BPF_MAP_TYPE_SOCKHASH); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c index 29188d6f5c8d..51fac975b316 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c @@ -138,7 +138,8 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent, */ buf = 0x40; - if (setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1) < 0) { + err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1); + if (err < 0) { log_err("Failed to call setsockopt(IP_TOS)"); goto detach; } diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c index 5f54c6aec7f0..b25c9c45c148 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c @@ -45,9 +45,9 @@ static int getsetsockopt(void) goto err; } - if (*(int *)big_buf != 0x08) { + if (*big_buf != 0x08) { log_err("Unexpected getsockopt(IP_TOS) optval 0x%x != 0x08", - *(int *)big_buf); + (int)*big_buf); goto err; } diff --git a/tools/testing/selftests/bpf/prog_tests/subprogs.c b/tools/testing/selftests/bpf/prog_tests/subprogs.c new file mode 100644 index 000000000000..3f3d2ac4dd57 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/subprogs.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include <test_progs.h> +#include <time.h> +#include "test_subprogs.skel.h" +#include "test_subprogs_unused.skel.h" + +static int duration; + +void test_subprogs(void) +{ + struct test_subprogs *skel; + struct test_subprogs_unused *skel2; + int err; + + skel = test_subprogs__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + return; + + err = test_subprogs__attach(skel); + if (CHECK(err, "skel_attach", "failed to attach skeleton: %d\n", err)) + goto cleanup; + + usleep(1); + + CHECK(skel->bss->res1 != 12, "res1", "got %d, exp %d\n", skel->bss->res1, 12); + CHECK(skel->bss->res2 != 17, "res2", "got %d, exp %d\n", skel->bss->res2, 17); + CHECK(skel->bss->res3 != 19, "res3", "got %d, exp %d\n", skel->bss->res3, 19); + CHECK(skel->bss->res4 != 36, "res4", "got %d, exp %d\n", skel->bss->res4, 36); + + skel2 = test_subprogs_unused__open_and_load(); + ASSERT_OK_PTR(skel2, "unused_progs_skel"); + test_subprogs_unused__destroy(skel2); + +cleanup: + test_subprogs__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c index bb8fe646dd9f..ee27d68d2a1c 100644 --- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c +++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include <network_helpers.h> /* test_tailcall_1 checks basic functionality by patching multiple locations * in a single program for a single tail call slot with nop->jmp, jmp->nop @@ -472,6 +473,329 @@ out: bpf_object__close(obj); } +/* test_tailcall_bpf2bpf_1 purpose is to make sure that tailcalls are working + * correctly in correlation with BPF subprograms + */ +static void test_tailcall_bpf2bpf_1(void) +{ + int err, map_fd, prog_fd, main_fd, i; + struct bpf_map *prog_array; + struct bpf_program *prog; + struct bpf_object *obj; + __u32 retval, duration; + char prog_name[32]; + + err = bpf_prog_load("tailcall_bpf2bpf1.o", BPF_PROG_TYPE_SCHED_CLS, + &obj, &prog_fd); + if (CHECK_FAIL(err)) + return; + + prog = bpf_object__find_program_by_title(obj, "classifier"); + if (CHECK_FAIL(!prog)) + goto out; + + main_fd = bpf_program__fd(prog); + if (CHECK_FAIL(main_fd < 0)) + goto out; + + prog_array = bpf_object__find_map_by_name(obj, "jmp_table"); + if (CHECK_FAIL(!prog_array)) + goto out; + + map_fd = bpf_map__fd(prog_array); + if (CHECK_FAIL(map_fd < 0)) + goto out; + + /* nop -> jmp */ + for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + snprintf(prog_name, sizeof(prog_name), "classifier/%i", i); + + prog = bpf_object__find_program_by_title(obj, prog_name); + if (CHECK_FAIL(!prog)) + goto out; + + prog_fd = bpf_program__fd(prog); + if (CHECK_FAIL(prog_fd < 0)) + goto out; + + err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY); + if (CHECK_FAIL(err)) + goto out; + } + + err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0, + 0, &retval, &duration); + CHECK(err || retval != 1, "tailcall", + "err %d errno %d retval %d\n", err, errno, retval); + + /* jmp -> nop, call subprog that will do tailcall */ + i = 1; + err = bpf_map_delete_elem(map_fd, &i); + if (CHECK_FAIL(err)) + goto out; + + err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0, + 0, &retval, &duration); + CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); + + /* make sure that subprog can access ctx and entry prog that + * called this subprog can properly return + */ + i = 0; + err = bpf_map_delete_elem(map_fd, &i); + if (CHECK_FAIL(err)) + goto out; + + err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0, + 0, &retval, &duration); + CHECK(err || retval != sizeof(pkt_v4) * 2, + "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); +out: + bpf_object__close(obj); +} + +/* test_tailcall_bpf2bpf_2 checks that the count value of the tail call limit + * enforcement matches with expectations when tailcall is preceded with + * bpf2bpf call. + */ +static void test_tailcall_bpf2bpf_2(void) +{ + int err, map_fd, prog_fd, main_fd, data_fd, i, val; + struct bpf_map *prog_array, *data_map; + struct bpf_program *prog; + struct bpf_object *obj; + __u32 retval, duration; + char buff[128] = {}; + + err = bpf_prog_load("tailcall_bpf2bpf2.o", BPF_PROG_TYPE_SCHED_CLS, + &obj, &prog_fd); + if (CHECK_FAIL(err)) + return; + + prog = bpf_object__find_program_by_title(obj, "classifier"); + if (CHECK_FAIL(!prog)) + goto out; + + main_fd = bpf_program__fd(prog); + if (CHECK_FAIL(main_fd < 0)) + goto out; + + prog_array = bpf_object__find_map_by_name(obj, "jmp_table"); + if (CHECK_FAIL(!prog_array)) + goto out; + + map_fd = bpf_map__fd(prog_array); + if (CHECK_FAIL(map_fd < 0)) + goto out; + + prog = bpf_object__find_program_by_title(obj, "classifier/0"); + if (CHECK_FAIL(!prog)) + goto out; + + prog_fd = bpf_program__fd(prog); + if (CHECK_FAIL(prog_fd < 0)) + goto out; + + i = 0; + err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY); + if (CHECK_FAIL(err)) + goto out; + + err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, + &duration, &retval, NULL); + CHECK(err || retval != 1, "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); + + data_map = bpf_object__find_map_by_name(obj, "tailcall.bss"); + if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map))) + return; + + data_fd = bpf_map__fd(data_map); + if (CHECK_FAIL(map_fd < 0)) + return; + + i = 0; + err = bpf_map_lookup_elem(data_fd, &i, &val); + CHECK(err || val != 33, "tailcall count", "err %d errno %d count %d\n", + err, errno, val); + + i = 0; + err = bpf_map_delete_elem(map_fd, &i); + if (CHECK_FAIL(err)) + goto out; + + err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, + &duration, &retval, NULL); + CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); +out: + bpf_object__close(obj); +} + +/* test_tailcall_bpf2bpf_3 checks that non-trivial amount of stack (up to + * 256 bytes) can be used within bpf subprograms that have the tailcalls + * in them + */ +static void test_tailcall_bpf2bpf_3(void) +{ + int err, map_fd, prog_fd, main_fd, i; + struct bpf_map *prog_array; + struct bpf_program *prog; + struct bpf_object *obj; + __u32 retval, duration; + char prog_name[32]; + + err = bpf_prog_load("tailcall_bpf2bpf3.o", BPF_PROG_TYPE_SCHED_CLS, + &obj, &prog_fd); + if (CHECK_FAIL(err)) + return; + + prog = bpf_object__find_program_by_title(obj, "classifier"); + if (CHECK_FAIL(!prog)) + goto out; + + main_fd = bpf_program__fd(prog); + if (CHECK_FAIL(main_fd < 0)) + goto out; + + prog_array = bpf_object__find_map_by_name(obj, "jmp_table"); + if (CHECK_FAIL(!prog_array)) + goto out; + + map_fd = bpf_map__fd(prog_array); + if (CHECK_FAIL(map_fd < 0)) + goto out; + + for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + snprintf(prog_name, sizeof(prog_name), "classifier/%i", i); + + prog = bpf_object__find_program_by_title(obj, prog_name); + if (CHECK_FAIL(!prog)) + goto out; + + prog_fd = bpf_program__fd(prog); + if (CHECK_FAIL(prog_fd < 0)) + goto out; + + err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY); + if (CHECK_FAIL(err)) + goto out; + } + + err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0, + &duration, &retval, NULL); + CHECK(err || retval != sizeof(pkt_v4) * 3, + "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); + + i = 1; + err = bpf_map_delete_elem(map_fd, &i); + if (CHECK_FAIL(err)) + goto out; + + err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0, + &duration, &retval, NULL); + CHECK(err || retval != sizeof(pkt_v4), + "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); + + i = 0; + err = bpf_map_delete_elem(map_fd, &i); + if (CHECK_FAIL(err)) + goto out; + + err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0, + &duration, &retval, NULL); + CHECK(err || retval != sizeof(pkt_v4) * 2, + "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); +out: + bpf_object__close(obj); +} + +/* test_tailcall_bpf2bpf_4 checks that tailcall counter is correctly preserved + * across tailcalls combined with bpf2bpf calls. for making sure that tailcall + * counter behaves correctly, bpf program will go through following flow: + * + * entry -> entry_subprog -> tailcall0 -> bpf_func0 -> subprog0 -> + * -> tailcall1 -> bpf_func1 -> subprog1 -> tailcall2 -> bpf_func2 -> + * subprog2 [here bump global counter] --------^ + * + * We go through first two tailcalls and start counting from the subprog2 where + * the loop begins. At the end of the test make sure that the global counter is + * equal to 31, because tailcall counter includes the first two tailcalls + * whereas global counter is incremented only on loop presented on flow above. + */ +static void test_tailcall_bpf2bpf_4(void) +{ + int err, map_fd, prog_fd, main_fd, data_fd, i, val; + struct bpf_map *prog_array, *data_map; + struct bpf_program *prog; + struct bpf_object *obj; + __u32 retval, duration; + char prog_name[32]; + + err = bpf_prog_load("tailcall_bpf2bpf4.o", BPF_PROG_TYPE_SCHED_CLS, + &obj, &prog_fd); + if (CHECK_FAIL(err)) + return; + + prog = bpf_object__find_program_by_title(obj, "classifier"); + if (CHECK_FAIL(!prog)) + goto out; + + main_fd = bpf_program__fd(prog); + if (CHECK_FAIL(main_fd < 0)) + goto out; + + prog_array = bpf_object__find_map_by_name(obj, "jmp_table"); + if (CHECK_FAIL(!prog_array)) + goto out; + + map_fd = bpf_map__fd(prog_array); + if (CHECK_FAIL(map_fd < 0)) + goto out; + + for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + snprintf(prog_name, sizeof(prog_name), "classifier/%i", i); + + prog = bpf_object__find_program_by_title(obj, prog_name); + if (CHECK_FAIL(!prog)) + goto out; + + prog_fd = bpf_program__fd(prog); + if (CHECK_FAIL(prog_fd < 0)) + goto out; + + err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY); + if (CHECK_FAIL(err)) + goto out; + } + + err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0, + &duration, &retval, NULL); + CHECK(err || retval != sizeof(pkt_v4) * 3, "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); + + data_map = bpf_object__find_map_by_name(obj, "tailcall.bss"); + if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map))) + return; + + data_fd = bpf_map__fd(data_map); + if (CHECK_FAIL(map_fd < 0)) + return; + + i = 0; + err = bpf_map_lookup_elem(data_fd, &i, &val); + CHECK(err || val != 31, "tailcall count", "err %d errno %d count %d\n", + err, errno, val); + +out: + bpf_object__close(obj); +} + void test_tailcalls(void) { if (test__start_subtest("tailcall_1")) @@ -484,4 +808,12 @@ void test_tailcalls(void) test_tailcall_4(); if (test__start_subtest("tailcall_5")) test_tailcall_5(); + if (test__start_subtest("tailcall_bpf2bpf_1")) + test_tailcall_bpf2bpf_1(); + if (test__start_subtest("tailcall_bpf2bpf_2")) + test_tailcall_bpf2bpf_2(); + if (test__start_subtest("tailcall_bpf2bpf_3")) + test_tailcall_bpf2bpf_3(); + if (test__start_subtest("tailcall_bpf2bpf_4")) + test_tailcall_bpf2bpf_4(); } diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c new file mode 100644 index 000000000000..c85174cdcb77 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c @@ -0,0 +1,610 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#define _GNU_SOURCE +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/socket.h> +#include <linux/compiler.h> + +#include "test_progs.h" +#include "cgroup_helpers.h" +#include "network_helpers.h" +#include "test_tcp_hdr_options.h" +#include "test_tcp_hdr_options.skel.h" +#include "test_misc_tcp_hdr_options.skel.h" + +#define LO_ADDR6 "::1" +#define CG_NAME "/tcpbpf-hdr-opt-test" + +struct bpf_test_option exp_passive_estab_in; +struct bpf_test_option exp_active_estab_in; +struct bpf_test_option exp_passive_fin_in; +struct bpf_test_option exp_active_fin_in; +struct hdr_stg exp_passive_hdr_stg; +struct hdr_stg exp_active_hdr_stg = { .active = true, }; + +static struct test_misc_tcp_hdr_options *misc_skel; +static struct test_tcp_hdr_options *skel; +static int lport_linum_map_fd; +static int hdr_stg_map_fd; +static __u32 duration; +static int cg_fd; + +struct sk_fds { + int srv_fd; + int passive_fd; + int active_fd; + int passive_lport; + int active_lport; +}; + +static int create_netns(void) +{ + if (CHECK(unshare(CLONE_NEWNET), "create netns", + "unshare(CLONE_NEWNET): %s (%d)", + strerror(errno), errno)) + return -1; + + if (CHECK(system("ip link set dev lo up"), "run ip cmd", + "failed to bring lo link up\n")) + return -1; + + return 0; +} + +static int write_sysctl(const char *sysctl, const char *value) +{ + int fd, err, len; + + fd = open(sysctl, O_WRONLY); + if (CHECK(fd == -1, "open sysctl", "open(%s): %s (%d)\n", + sysctl, strerror(errno), errno)) + return -1; + + len = strlen(value); + err = write(fd, value, len); + close(fd); + if (CHECK(err != len, "write sysctl", + "write(%s, %s): err:%d %s (%d)\n", + sysctl, value, err, strerror(errno), errno)) + return -1; + + return 0; +} + +static void print_hdr_stg(const struct hdr_stg *hdr_stg, const char *prefix) +{ + fprintf(stderr, "%s{active:%u, resend_syn:%u, syncookie:%u, fastopen:%u}\n", + prefix ? : "", hdr_stg->active, hdr_stg->resend_syn, + hdr_stg->syncookie, hdr_stg->fastopen); +} + +static void print_option(const struct bpf_test_option *opt, const char *prefix) +{ + fprintf(stderr, "%s{flags:0x%x, max_delack_ms:%u, rand:0x%x}\n", + prefix ? : "", opt->flags, opt->max_delack_ms, opt->rand); +} + +static void sk_fds_close(struct sk_fds *sk_fds) +{ + close(sk_fds->srv_fd); + close(sk_fds->passive_fd); + close(sk_fds->active_fd); +} + +static int sk_fds_shutdown(struct sk_fds *sk_fds) +{ + int ret, abyte; + + shutdown(sk_fds->active_fd, SHUT_WR); + ret = read(sk_fds->passive_fd, &abyte, sizeof(abyte)); + if (CHECK(ret != 0, "read-after-shutdown(passive_fd):", + "ret:%d %s (%d)\n", + ret, strerror(errno), errno)) + return -1; + + shutdown(sk_fds->passive_fd, SHUT_WR); + ret = read(sk_fds->active_fd, &abyte, sizeof(abyte)); + if (CHECK(ret != 0, "read-after-shutdown(active_fd):", + "ret:%d %s (%d)\n", + ret, strerror(errno), errno)) + return -1; + + return 0; +} + +static int sk_fds_connect(struct sk_fds *sk_fds, bool fast_open) +{ + const char fast[] = "FAST!!!"; + struct sockaddr_in6 addr6; + socklen_t len; + + sk_fds->srv_fd = start_server(AF_INET6, SOCK_STREAM, LO_ADDR6, 0, 0); + if (CHECK(sk_fds->srv_fd == -1, "start_server", "%s (%d)\n", + strerror(errno), errno)) + goto error; + + if (fast_open) + sk_fds->active_fd = fastopen_connect(sk_fds->srv_fd, fast, + sizeof(fast), 0); + else + sk_fds->active_fd = connect_to_fd(sk_fds->srv_fd, 0); + + if (CHECK_FAIL(sk_fds->active_fd == -1)) { + close(sk_fds->srv_fd); + goto error; + } + + len = sizeof(addr6); + if (CHECK(getsockname(sk_fds->srv_fd, (struct sockaddr *)&addr6, + &len), "getsockname(srv_fd)", "%s (%d)\n", + strerror(errno), errno)) + goto error_close; + sk_fds->passive_lport = ntohs(addr6.sin6_port); + + len = sizeof(addr6); + if (CHECK(getsockname(sk_fds->active_fd, (struct sockaddr *)&addr6, + &len), "getsockname(active_fd)", "%s (%d)\n", + strerror(errno), errno)) + goto error_close; + sk_fds->active_lport = ntohs(addr6.sin6_port); + + sk_fds->passive_fd = accept(sk_fds->srv_fd, NULL, 0); + if (CHECK(sk_fds->passive_fd == -1, "accept(srv_fd)", "%s (%d)\n", + strerror(errno), errno)) + goto error_close; + + if (fast_open) { + char bytes_in[sizeof(fast)]; + int ret; + + ret = read(sk_fds->passive_fd, bytes_in, sizeof(bytes_in)); + if (CHECK(ret != sizeof(fast), "read fastopen syn data", + "expected=%lu actual=%d\n", sizeof(fast), ret)) { + close(sk_fds->passive_fd); + goto error_close; + } + } + + return 0; + +error_close: + close(sk_fds->active_fd); + close(sk_fds->srv_fd); + +error: + memset(sk_fds, -1, sizeof(*sk_fds)); + return -1; +} + +static int check_hdr_opt(const struct bpf_test_option *exp, + const struct bpf_test_option *act, + const char *hdr_desc) +{ + if (CHECK(memcmp(exp, act, sizeof(*exp)), + "expected-vs-actual", "unexpected %s\n", hdr_desc)) { + print_option(exp, "expected: "); + print_option(act, " actual: "); + return -1; + } + + return 0; +} + +static int check_hdr_stg(const struct hdr_stg *exp, int fd, + const char *stg_desc) +{ + struct hdr_stg act; + + if (CHECK(bpf_map_lookup_elem(hdr_stg_map_fd, &fd, &act), + "map_lookup(hdr_stg_map_fd)", "%s %s (%d)\n", + stg_desc, strerror(errno), errno)) + return -1; + + if (CHECK(memcmp(exp, &act, sizeof(*exp)), + "expected-vs-actual", "unexpected %s\n", stg_desc)) { + print_hdr_stg(exp, "expected: "); + print_hdr_stg(&act, " actual: "); + return -1; + } + + return 0; +} + +static int check_error_linum(const struct sk_fds *sk_fds) +{ + unsigned int nr_errors = 0; + struct linum_err linum_err; + int lport; + + lport = sk_fds->passive_lport; + if (!bpf_map_lookup_elem(lport_linum_map_fd, &lport, &linum_err)) { + fprintf(stderr, + "bpf prog error out at lport:passive(%d), linum:%u err:%d\n", + lport, linum_err.linum, linum_err.err); + nr_errors++; + } + + lport = sk_fds->active_lport; + if (!bpf_map_lookup_elem(lport_linum_map_fd, &lport, &linum_err)) { + fprintf(stderr, + "bpf prog error out at lport:active(%d), linum:%u err:%d\n", + lport, linum_err.linum, linum_err.err); + nr_errors++; + } + + return nr_errors; +} + +static void check_hdr_and_close_fds(struct sk_fds *sk_fds) +{ + const __u32 expected_inherit_cb_flags = + BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG | + BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG | + BPF_SOCK_OPS_STATE_CB_FLAG; + + if (sk_fds_shutdown(sk_fds)) + goto check_linum; + + if (CHECK(expected_inherit_cb_flags != skel->bss->inherit_cb_flags, + "Unexpected inherit_cb_flags", "0x%x != 0x%x\n", + skel->bss->inherit_cb_flags, expected_inherit_cb_flags)) + goto check_linum; + + if (check_hdr_stg(&exp_passive_hdr_stg, sk_fds->passive_fd, + "passive_hdr_stg")) + goto check_linum; + + if (check_hdr_stg(&exp_active_hdr_stg, sk_fds->active_fd, + "active_hdr_stg")) + goto check_linum; + + if (check_hdr_opt(&exp_passive_estab_in, &skel->bss->passive_estab_in, + "passive_estab_in")) + goto check_linum; + + if (check_hdr_opt(&exp_active_estab_in, &skel->bss->active_estab_in, + "active_estab_in")) + goto check_linum; + + if (check_hdr_opt(&exp_passive_fin_in, &skel->bss->passive_fin_in, + "passive_fin_in")) + goto check_linum; + + check_hdr_opt(&exp_active_fin_in, &skel->bss->active_fin_in, + "active_fin_in"); + +check_linum: + CHECK_FAIL(check_error_linum(sk_fds)); + sk_fds_close(sk_fds); +} + +static void prepare_out(void) +{ + skel->bss->active_syn_out = exp_passive_estab_in; + skel->bss->passive_synack_out = exp_active_estab_in; + + skel->bss->active_fin_out = exp_passive_fin_in; + skel->bss->passive_fin_out = exp_active_fin_in; +} + +static void reset_test(void) +{ + size_t optsize = sizeof(struct bpf_test_option); + int lport, err; + + memset(&skel->bss->passive_synack_out, 0, optsize); + memset(&skel->bss->passive_fin_out, 0, optsize); + + memset(&skel->bss->passive_estab_in, 0, optsize); + memset(&skel->bss->passive_fin_in, 0, optsize); + + memset(&skel->bss->active_syn_out, 0, optsize); + memset(&skel->bss->active_fin_out, 0, optsize); + + memset(&skel->bss->active_estab_in, 0, optsize); + memset(&skel->bss->active_fin_in, 0, optsize); + + skel->bss->inherit_cb_flags = 0; + + skel->data->test_kind = TCPOPT_EXP; + skel->data->test_magic = 0xeB9F; + + memset(&exp_passive_estab_in, 0, optsize); + memset(&exp_active_estab_in, 0, optsize); + memset(&exp_passive_fin_in, 0, optsize); + memset(&exp_active_fin_in, 0, optsize); + + memset(&exp_passive_hdr_stg, 0, sizeof(exp_passive_hdr_stg)); + memset(&exp_active_hdr_stg, 0, sizeof(exp_active_hdr_stg)); + exp_active_hdr_stg.active = true; + + err = bpf_map_get_next_key(lport_linum_map_fd, NULL, &lport); + while (!err) { + bpf_map_delete_elem(lport_linum_map_fd, &lport); + err = bpf_map_get_next_key(lport_linum_map_fd, &lport, &lport); + } +} + +static void fastopen_estab(void) +{ + struct bpf_link *link; + struct sk_fds sk_fds; + + hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map); + lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map); + + exp_passive_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS; + exp_passive_estab_in.rand = 0xfa; + exp_passive_estab_in.max_delack_ms = 11; + + exp_active_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS; + exp_active_estab_in.rand = 0xce; + exp_active_estab_in.max_delack_ms = 22; + + exp_passive_hdr_stg.fastopen = true; + + prepare_out(); + + /* Allow fastopen without fastopen cookie */ + if (write_sysctl("/proc/sys/net/ipv4/tcp_fastopen", "1543")) + return; + + link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd); + if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n", + PTR_ERR(link))) + return; + + if (sk_fds_connect(&sk_fds, true)) { + bpf_link__destroy(link); + return; + } + + check_hdr_and_close_fds(&sk_fds); + bpf_link__destroy(link); +} + +static void syncookie_estab(void) +{ + struct bpf_link *link; + struct sk_fds sk_fds; + + hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map); + lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map); + + exp_passive_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS; + exp_passive_estab_in.rand = 0xfa; + exp_passive_estab_in.max_delack_ms = 11; + + exp_active_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS | + OPTION_F_RESEND; + exp_active_estab_in.rand = 0xce; + exp_active_estab_in.max_delack_ms = 22; + + exp_passive_hdr_stg.syncookie = true; + exp_active_hdr_stg.resend_syn = true, + + prepare_out(); + + /* Clear the RESEND to ensure the bpf prog can learn + * want_cookie and set the RESEND by itself. + */ + skel->bss->passive_synack_out.flags &= ~OPTION_F_RESEND; + + /* Enforce syncookie mode */ + if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "2")) + return; + + link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd); + if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n", + PTR_ERR(link))) + return; + + if (sk_fds_connect(&sk_fds, false)) { + bpf_link__destroy(link); + return; + } + + check_hdr_and_close_fds(&sk_fds); + bpf_link__destroy(link); +} + +static void fin(void) +{ + struct bpf_link *link; + struct sk_fds sk_fds; + + hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map); + lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map); + + exp_passive_fin_in.flags = OPTION_F_RAND; + exp_passive_fin_in.rand = 0xfa; + + exp_active_fin_in.flags = OPTION_F_RAND; + exp_active_fin_in.rand = 0xce; + + prepare_out(); + + if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1")) + return; + + link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd); + if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n", + PTR_ERR(link))) + return; + + if (sk_fds_connect(&sk_fds, false)) { + bpf_link__destroy(link); + return; + } + + check_hdr_and_close_fds(&sk_fds); + bpf_link__destroy(link); +} + +static void __simple_estab(bool exprm) +{ + struct bpf_link *link; + struct sk_fds sk_fds; + + hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map); + lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map); + + exp_passive_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS; + exp_passive_estab_in.rand = 0xfa; + exp_passive_estab_in.max_delack_ms = 11; + + exp_active_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS; + exp_active_estab_in.rand = 0xce; + exp_active_estab_in.max_delack_ms = 22; + + prepare_out(); + + if (!exprm) { + skel->data->test_kind = 0xB9; + skel->data->test_magic = 0; + } + + if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1")) + return; + + link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd); + if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n", + PTR_ERR(link))) + return; + + if (sk_fds_connect(&sk_fds, false)) { + bpf_link__destroy(link); + return; + } + + check_hdr_and_close_fds(&sk_fds); + bpf_link__destroy(link); +} + +static void no_exprm_estab(void) +{ + __simple_estab(false); +} + +static void simple_estab(void) +{ + __simple_estab(true); +} + +static void misc(void) +{ + const char send_msg[] = "MISC!!!"; + char recv_msg[sizeof(send_msg)]; + const unsigned int nr_data = 2; + struct bpf_link *link; + struct sk_fds sk_fds; + int i, ret; + + lport_linum_map_fd = bpf_map__fd(misc_skel->maps.lport_linum_map); + + if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1")) + return; + + link = bpf_program__attach_cgroup(misc_skel->progs.misc_estab, cg_fd); + if (CHECK(IS_ERR(link), "attach_cgroup(misc_estab)", "err: %ld\n", + PTR_ERR(link))) + return; + + if (sk_fds_connect(&sk_fds, false)) { + bpf_link__destroy(link); + return; + } + + for (i = 0; i < nr_data; i++) { + /* MSG_EOR to ensure skb will not be combined */ + ret = send(sk_fds.active_fd, send_msg, sizeof(send_msg), + MSG_EOR); + if (CHECK(ret != sizeof(send_msg), "send(msg)", "ret:%d\n", + ret)) + goto check_linum; + + ret = read(sk_fds.passive_fd, recv_msg, sizeof(recv_msg)); + if (CHECK(ret != sizeof(send_msg), "read(msg)", "ret:%d\n", + ret)) + goto check_linum; + } + + if (sk_fds_shutdown(&sk_fds)) + goto check_linum; + + CHECK(misc_skel->bss->nr_syn != 1, "unexpected nr_syn", + "expected (1) != actual (%u)\n", + misc_skel->bss->nr_syn); + + CHECK(misc_skel->bss->nr_data != nr_data, "unexpected nr_data", + "expected (%u) != actual (%u)\n", + nr_data, misc_skel->bss->nr_data); + + /* The last ACK may have been delayed, so it is either 1 or 2. */ + CHECK(misc_skel->bss->nr_pure_ack != 1 && + misc_skel->bss->nr_pure_ack != 2, + "unexpected nr_pure_ack", + "expected (1 or 2) != actual (%u)\n", + misc_skel->bss->nr_pure_ack); + + CHECK(misc_skel->bss->nr_fin != 1, "unexpected nr_fin", + "expected (1) != actual (%u)\n", + misc_skel->bss->nr_fin); + +check_linum: + CHECK_FAIL(check_error_linum(&sk_fds)); + sk_fds_close(&sk_fds); + bpf_link__destroy(link); +} + +struct test { + const char *desc; + void (*run)(void); +}; + +#define DEF_TEST(name) { #name, name } +static struct test tests[] = { + DEF_TEST(simple_estab), + DEF_TEST(no_exprm_estab), + DEF_TEST(syncookie_estab), + DEF_TEST(fastopen_estab), + DEF_TEST(fin), + DEF_TEST(misc), +}; + +void test_tcp_hdr_options(void) +{ + int i; + + skel = test_tcp_hdr_options__open_and_load(); + if (CHECK(!skel, "open and load skel", "failed")) + return; + + misc_skel = test_misc_tcp_hdr_options__open_and_load(); + if (CHECK(!misc_skel, "open and load misc test skel", "failed")) + goto skel_destroy; + + cg_fd = test__join_cgroup(CG_NAME); + if (CHECK_FAIL(cg_fd < 0)) + goto skel_destroy; + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + if (!test__start_subtest(tests[i].desc)) + continue; + + if (create_netns()) + break; + + tests[i].run(); + + reset_test(); + } + + close(cg_fd); +skel_destroy: + test_misc_tcp_hdr_options__destroy(misc_skel); + test_tcp_hdr_options__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c new file mode 100644 index 000000000000..172c999e523c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#define _GNU_SOURCE +#include <sched.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <test_progs.h> + +#define TDIR "/sys/kernel/debug" + +static int read_iter(char *file) +{ + /* 1024 should be enough to get contiguous 4 "iter" letters at some point */ + char buf[1024]; + int fd, len; + + fd = open(file, 0); + if (fd < 0) + return -1; + while ((len = read(fd, buf, sizeof(buf))) > 0) + if (strstr(buf, "iter")) { + close(fd); + return 0; + } + close(fd); + return -1; +} + +static int fn(void) +{ + int err, duration = 0; + + err = unshare(CLONE_NEWNS); + if (CHECK(err, "unshare", "failed: %d\n", errno)) + goto out; + + err = mount("", "/", "", MS_REC | MS_PRIVATE, NULL); + if (CHECK(err, "mount /", "failed: %d\n", errno)) + goto out; + + err = umount(TDIR); + if (CHECK(err, "umount " TDIR, "failed: %d\n", errno)) + goto out; + + err = mount("none", TDIR, "tmpfs", 0, NULL); + if (CHECK(err, "mount", "mount root failed: %d\n", errno)) + goto out; + + err = mkdir(TDIR "/fs1", 0777); + if (CHECK(err, "mkdir "TDIR"/fs1", "failed: %d\n", errno)) + goto out; + err = mkdir(TDIR "/fs2", 0777); + if (CHECK(err, "mkdir "TDIR"/fs2", "failed: %d\n", errno)) + goto out; + + err = mount("bpf", TDIR "/fs1", "bpf", 0, NULL); + if (CHECK(err, "mount bpffs "TDIR"/fs1", "failed: %d\n", errno)) + goto out; + err = mount("bpf", TDIR "/fs2", "bpf", 0, NULL); + if (CHECK(err, "mount bpffs " TDIR "/fs2", "failed: %d\n", errno)) + goto out; + + err = read_iter(TDIR "/fs1/maps.debug"); + if (CHECK(err, "reading " TDIR "/fs1/maps.debug", "failed\n")) + goto out; + err = read_iter(TDIR "/fs2/progs.debug"); + if (CHECK(err, "reading " TDIR "/fs2/progs.debug", "failed\n")) + goto out; +out: + umount(TDIR "/fs1"); + umount(TDIR "/fs2"); + rmdir(TDIR "/fs1"); + rmdir(TDIR "/fs2"); + umount(TDIR); + exit(err); +} + +void test_test_bpffs(void) +{ + int err, duration = 0, status = 0; + pid_t pid; + + pid = fork(); + if (CHECK(pid == -1, "clone", "clone failed %d", errno)) + return; + if (pid == 0) + fn(); + err = waitpid(pid, &status, 0); + if (CHECK(err == -1 && errno != ECHILD, "waitpid", "failed %d", errno)) + return; + if (CHECK(WEXITSTATUS(status), "bpffs test ", "failed %d", WEXITSTATUS(status))) + return; +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c index 193002b14d7f..32e4348b714b 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c +++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c @@ -60,6 +60,7 @@ void test_test_global_funcs(void) { "test_global_func5.o" , "expected pointer to ctx, but got PTR" }, { "test_global_func6.o" , "modified ctx ptr R2" }, { "test_global_func7.o" , "foo() doesn't return scalar" }, + { "test_global_func8.o" }, }; libbpf_print_fn_t old_print_fn = NULL; int err, i, duration = 0; diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c new file mode 100644 index 000000000000..91cd6f357246 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright (C) 2020 Google LLC. + */ + +#include <test_progs.h> +#include <linux/limits.h> + +#include "local_storage.skel.h" +#include "network_helpers.h" + +int create_and_unlink_file(void) +{ + char fname[PATH_MAX] = "/tmp/fileXXXXXX"; + int fd; + + fd = mkstemp(fname); + if (fd < 0) + return fd; + + close(fd); + unlink(fname); + return 0; +} + +void test_test_local_storage(void) +{ + struct local_storage *skel = NULL; + int err, duration = 0, serv_sk = -1; + + skel = local_storage__open_and_load(); + if (CHECK(!skel, "skel_load", "lsm skeleton failed\n")) + goto close_prog; + + err = local_storage__attach(skel); + if (CHECK(err, "attach", "lsm attach failed: %d\n", err)) + goto close_prog; + + skel->bss->monitored_pid = getpid(); + + err = create_and_unlink_file(); + if (CHECK(err < 0, "exec_cmd", "err %d errno %d\n", err, errno)) + goto close_prog; + + CHECK(skel->data->inode_storage_result != 0, "inode_storage_result", + "inode_local_storage not set\n"); + + serv_sk = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0); + if (CHECK(serv_sk < 0, "start_server", "failed to start server\n")) + goto close_prog; + + CHECK(skel->data->sk_storage_result != 0, "sk_storage_result", + "sk_local_storage not set\n"); + + close(serv_sk); + +close_prog: + local_storage__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_lsm.c b/tools/testing/selftests/bpf/prog_tests/test_lsm.c index b17eb2045c1d..6ab29226c99b 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_lsm.c +++ b/tools/testing/selftests/bpf/prog_tests/test_lsm.c @@ -10,6 +10,7 @@ #include <unistd.h> #include <malloc.h> #include <stdlib.h> +#include <unistd.h> #include "lsm.skel.h" @@ -55,6 +56,7 @@ void test_test_lsm(void) { struct lsm *skel = NULL; int err, duration = 0; + int buf = 1234; skel = lsm__open_and_load(); if (CHECK(!skel, "skel_load", "lsm skeleton failed\n")) @@ -81,6 +83,13 @@ void test_test_lsm(void) CHECK(skel->bss->mprotect_count != 1, "mprotect_count", "mprotect_count = %d\n", skel->bss->mprotect_count); + syscall(__NR_setdomainname, &buf, -2L); + syscall(__NR_setdomainname, 0, -3L); + syscall(__NR_setdomainname, ~0L, -4L); + + CHECK(skel->bss->copy_test != 3, "copy_test", + "copy_test = %d\n", skel->bss->copy_test); + close_prog: lsm__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/test_overhead.c b/tools/testing/selftests/bpf/prog_tests/test_overhead.c index 2702df2b2343..9966685866fd 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_overhead.c +++ b/tools/testing/selftests/bpf/prog_tests/test_overhead.c @@ -61,10 +61,9 @@ void test_test_overhead(void) const char *raw_tp_name = "raw_tp/task_rename"; const char *fentry_name = "fentry/__set_task_comm"; const char *fexit_name = "fexit/__set_task_comm"; - const char *fmodret_name = "fmod_ret/__set_task_comm"; const char *kprobe_func = "__set_task_comm"; struct bpf_program *kprobe_prog, *kretprobe_prog, *raw_tp_prog; - struct bpf_program *fentry_prog, *fexit_prog, *fmodret_prog; + struct bpf_program *fentry_prog, *fexit_prog; struct bpf_object *obj; struct bpf_link *link; int err, duration = 0; @@ -97,11 +96,6 @@ void test_test_overhead(void) if (CHECK(!fexit_prog, "find_probe", "prog '%s' not found\n", fexit_name)) goto cleanup; - fmodret_prog = bpf_object__find_program_by_title(obj, fmodret_name); - if (CHECK(!fmodret_prog, "find_probe", - "prog '%s' not found\n", fmodret_name)) - goto cleanup; - err = bpf_object__load(obj); if (CHECK(err, "obj_load", "err %d\n", err)) goto cleanup; @@ -148,12 +142,6 @@ void test_test_overhead(void) test_run("fexit"); bpf_link__destroy(link); - /* attach fmod_ret */ - link = bpf_program__attach_trace(fmodret_prog); - if (CHECK(IS_ERR(link), "attach fmod_ret", "err %ld\n", PTR_ERR(link))) - goto cleanup; - test_run("fmod_ret"); - bpf_link__destroy(link); cleanup: prctl(PR_SET_NAME, comm, 0L, 0L, 0L); bpf_object__close(obj); diff --git a/tools/testing/selftests/bpf/prog_tests/test_profiler.c b/tools/testing/selftests/bpf/prog_tests/test_profiler.c new file mode 100644 index 000000000000..4ca275101ee0 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_profiler.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include <test_progs.h> +#include "progs/profiler.h" +#include "profiler1.skel.h" +#include "profiler2.skel.h" +#include "profiler3.skel.h" + +static int sanity_run(struct bpf_program *prog) +{ + struct bpf_prog_test_run_attr test_attr = {}; + __u64 args[] = {1, 2, 3}; + __u32 duration = 0; + int err, prog_fd; + + prog_fd = bpf_program__fd(prog); + test_attr.prog_fd = prog_fd; + test_attr.ctx_in = args; + test_attr.ctx_size_in = sizeof(args); + err = bpf_prog_test_run_xattr(&test_attr); + if (CHECK(err || test_attr.retval, "test_run", + "err %d errno %d retval %d duration %d\n", + err, errno, test_attr.retval, duration)) + return -1; + return 0; +} + +void test_test_profiler(void) +{ + struct profiler1 *profiler1_skel = NULL; + struct profiler2 *profiler2_skel = NULL; + struct profiler3 *profiler3_skel = NULL; + __u32 duration = 0; + int err; + + profiler1_skel = profiler1__open_and_load(); + if (CHECK(!profiler1_skel, "profiler1_skel_load", "profiler1 skeleton failed\n")) + goto cleanup; + + err = profiler1__attach(profiler1_skel); + if (CHECK(err, "profiler1_attach", "profiler1 attach failed: %d\n", err)) + goto cleanup; + + if (sanity_run(profiler1_skel->progs.raw_tracepoint__sched_process_exec)) + goto cleanup; + + profiler2_skel = profiler2__open_and_load(); + if (CHECK(!profiler2_skel, "profiler2_skel_load", "profiler2 skeleton failed\n")) + goto cleanup; + + err = profiler2__attach(profiler2_skel); + if (CHECK(err, "profiler2_attach", "profiler2 attach failed: %d\n", err)) + goto cleanup; + + if (sanity_run(profiler2_skel->progs.raw_tracepoint__sched_process_exec)) + goto cleanup; + + profiler3_skel = profiler3__open_and_load(); + if (CHECK(!profiler3_skel, "profiler3_skel_load", "profiler3 skeleton failed\n")) + goto cleanup; + + err = profiler3__attach(profiler3_skel); + if (CHECK(err, "profiler3_attach", "profiler3 attach failed: %d\n", err)) + goto cleanup; + + if (sanity_run(profiler3_skel->progs.raw_tracepoint__sched_process_exec)) + goto cleanup; +cleanup: + profiler1__destroy(profiler1_skel); + profiler2__destroy(profiler2_skel); + profiler3__destroy(profiler3_skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/trace_ext.c b/tools/testing/selftests/bpf/prog_tests/trace_ext.c new file mode 100644 index 000000000000..924441d4362d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/trace_ext.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <test_progs.h> +#include <network_helpers.h> +#include <sys/stat.h> +#include <linux/sched.h> +#include <sys/syscall.h> + +#include "test_pkt_md_access.skel.h" +#include "test_trace_ext.skel.h" +#include "test_trace_ext_tracing.skel.h" + +static __u32 duration; + +void test_trace_ext(void) +{ + struct test_pkt_md_access *skel_pkt = NULL; + struct test_trace_ext_tracing *skel_trace = NULL; + struct test_trace_ext_tracing__bss *bss_trace; + struct test_trace_ext *skel_ext = NULL; + struct test_trace_ext__bss *bss_ext; + int err, pkt_fd, ext_fd; + struct bpf_program *prog; + char buf[100]; + __u32 retval; + __u64 len; + + /* open/load/attach test_pkt_md_access */ + skel_pkt = test_pkt_md_access__open_and_load(); + if (CHECK(!skel_pkt, "setup", "classifier/test_pkt_md_access open failed\n")) + goto cleanup; + + err = test_pkt_md_access__attach(skel_pkt); + if (CHECK(err, "setup", "classifier/test_pkt_md_access attach failed: %d\n", err)) + goto cleanup; + + prog = skel_pkt->progs.test_pkt_md_access; + pkt_fd = bpf_program__fd(prog); + + /* open extension */ + skel_ext = test_trace_ext__open(); + if (CHECK(!skel_ext, "setup", "freplace/test_pkt_md_access open failed\n")) + goto cleanup; + + /* set extension's attach target - test_pkt_md_access */ + prog = skel_ext->progs.test_pkt_md_access_new; + bpf_program__set_attach_target(prog, pkt_fd, "test_pkt_md_access"); + + /* load/attach extension */ + err = test_trace_ext__load(skel_ext); + if (CHECK(err, "setup", "freplace/test_pkt_md_access load failed\n")) { + libbpf_strerror(err, buf, sizeof(buf)); + fprintf(stderr, "%s\n", buf); + goto cleanup; + } + + err = test_trace_ext__attach(skel_ext); + if (CHECK(err, "setup", "freplace/test_pkt_md_access attach failed: %d\n", err)) + goto cleanup; + + prog = skel_ext->progs.test_pkt_md_access_new; + ext_fd = bpf_program__fd(prog); + + /* open tracing */ + skel_trace = test_trace_ext_tracing__open(); + if (CHECK(!skel_trace, "setup", "tracing/test_pkt_md_access_new open failed\n")) + goto cleanup; + + /* set tracing's attach target - fentry */ + prog = skel_trace->progs.fentry; + bpf_program__set_attach_target(prog, ext_fd, "test_pkt_md_access_new"); + + /* set tracing's attach target - fexit */ + prog = skel_trace->progs.fexit; + bpf_program__set_attach_target(prog, ext_fd, "test_pkt_md_access_new"); + + /* load/attach tracing */ + err = test_trace_ext_tracing__load(skel_trace); + if (CHECK(err, "setup", "tracing/test_pkt_md_access_new load failed\n")) { + libbpf_strerror(err, buf, sizeof(buf)); + fprintf(stderr, "%s\n", buf); + goto cleanup; + } + + err = test_trace_ext_tracing__attach(skel_trace); + if (CHECK(err, "setup", "tracing/test_pkt_md_access_new attach failed: %d\n", err)) + goto cleanup; + + /* trigger the test */ + err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4), + NULL, NULL, &retval, &duration); + CHECK(err || retval, "run", "err %d errno %d retval %d\n", err, errno, retval); + + bss_ext = skel_ext->bss; + bss_trace = skel_trace->bss; + + len = bss_ext->ext_called; + + CHECK(bss_ext->ext_called == 0, + "check", "failed to trigger freplace/test_pkt_md_access\n"); + CHECK(bss_trace->fentry_called != len, + "check", "failed to trigger fentry/test_pkt_md_access_new\n"); + CHECK(bss_trace->fexit_called != len, + "check", "failed to trigger fexit/test_pkt_md_access_new\n"); + +cleanup: + test_trace_ext_tracing__destroy(skel_trace); + test_trace_ext__destroy(skel_ext); + test_pkt_md_access__destroy(skel_pkt); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c index f284f72158ef..0281095de266 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c @@ -1,11 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> #include <network_helpers.h> +#include "test_xdp_noinline.skel.h" void test_xdp_noinline(void) { - const char *file = "./test_xdp_noinline.o"; unsigned int nr_cpus = bpf_num_possible_cpus(); + struct test_xdp_noinline *skel; struct vip key = {.protocol = 6}; struct vip_meta { __u32 flags; @@ -24,59 +25,43 @@ void test_xdp_noinline(void) __u8 flags; } real_def = {.dst = MAGIC_VAL}; __u32 ch_key = 11, real_num = 3; - __u32 duration, retval, size; - int err, i, prog_fd, map_fd; + __u32 duration = 0, retval, size; + int err, i; __u64 bytes = 0, pkts = 0; - struct bpf_object *obj; char buf[128]; u32 *magic = (u32 *)buf; - err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); - if (CHECK_FAIL(err)) + skel = test_xdp_noinline__open_and_load(); + if (CHECK(!skel, "skel_open_and_load", "failed\n")) return; - map_fd = bpf_find_map(__func__, obj, "vip_map"); - if (map_fd < 0) - goto out; - bpf_map_update_elem(map_fd, &key, &value, 0); + bpf_map_update_elem(bpf_map__fd(skel->maps.vip_map), &key, &value, 0); + bpf_map_update_elem(bpf_map__fd(skel->maps.ch_rings), &ch_key, &real_num, 0); + bpf_map_update_elem(bpf_map__fd(skel->maps.reals), &real_num, &real_def, 0); - map_fd = bpf_find_map(__func__, obj, "ch_rings"); - if (map_fd < 0) - goto out; - bpf_map_update_elem(map_fd, &ch_key, &real_num, 0); - - map_fd = bpf_find_map(__func__, obj, "reals"); - if (map_fd < 0) - goto out; - bpf_map_update_elem(map_fd, &real_num, &real_def, 0); - - err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4), + err = bpf_prog_test_run(bpf_program__fd(skel->progs.balancer_ingress_v4), + NUM_ITER, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); CHECK(err || retval != 1 || size != 54 || *magic != MAGIC_VAL, "ipv4", "err %d errno %d retval %d size %d magic %x\n", err, errno, retval, size, *magic); - err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6), + err = bpf_prog_test_run(bpf_program__fd(skel->progs.balancer_ingress_v6), + NUM_ITER, &pkt_v6, sizeof(pkt_v6), buf, &size, &retval, &duration); CHECK(err || retval != 1 || size != 74 || *magic != MAGIC_VAL, "ipv6", "err %d errno %d retval %d size %d magic %x\n", err, errno, retval, size, *magic); - map_fd = bpf_find_map(__func__, obj, "stats"); - if (map_fd < 0) - goto out; - bpf_map_lookup_elem(map_fd, &stats_key, stats); + bpf_map_lookup_elem(bpf_map__fd(skel->maps.stats), &stats_key, stats); for (i = 0; i < nr_cpus; i++) { bytes += stats[i].bytes; pkts += stats[i].pkts; } - if (CHECK_FAIL(bytes != MAGIC_BYTES * NUM_ITER * 2 || - pkts != NUM_ITER * 2)) { - printf("test_xdp_noinline:FAIL:stats %lld %lld\n", - bytes, pkts); - } -out: - bpf_object__close(obj); + CHECK(bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2, + "stats", "bytes %lld pkts %lld\n", + (unsigned long long)bytes, (unsigned long long)pkts); + test_xdp_noinline__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c index ef574087f1e1..6939bfd8690f 100644 --- a/tools/testing/selftests/bpf/progs/bpf_cubic.c +++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c @@ -15,6 +15,8 @@ */ #include <linux/bpf.h> +#include <linux/stddef.h> +#include <linux/tcp.h> #include "bpf_tcp_helpers.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c index 3fb4260570b1..4dc1a967776a 100644 --- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c +++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c @@ -9,6 +9,8 @@ #include <stddef.h> #include <linux/bpf.h> #include <linux/types.h> +#include <linux/stddef.h> +#include <linux/tcp.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> #include "bpf_tcp_helpers.h" diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c index de6de9221518..5a65f6b51377 100644 --- a/tools/testing/selftests/bpf/progs/bpf_flow.c +++ b/tools/testing/selftests/bpf/progs/bpf_flow.c @@ -118,18 +118,18 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto) switch (proto) { case bpf_htons(ETH_P_IP): - bpf_tail_call(skb, &jmp_table, IP); + bpf_tail_call_static(skb, &jmp_table, IP); break; case bpf_htons(ETH_P_IPV6): - bpf_tail_call(skb, &jmp_table, IPV6); + bpf_tail_call_static(skb, &jmp_table, IPV6); break; case bpf_htons(ETH_P_MPLS_MC): case bpf_htons(ETH_P_MPLS_UC): - bpf_tail_call(skb, &jmp_table, MPLS); + bpf_tail_call_static(skb, &jmp_table, MPLS); break; case bpf_htons(ETH_P_8021Q): case bpf_htons(ETH_P_8021AD): - bpf_tail_call(skb, &jmp_table, VLAN); + bpf_tail_call_static(skb, &jmp_table, VLAN); break; default: /* Protocol not supported */ @@ -246,10 +246,10 @@ static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr) switch (nexthdr) { case IPPROTO_HOPOPTS: case IPPROTO_DSTOPTS: - bpf_tail_call(skb, &jmp_table, IPV6OP); + bpf_tail_call_static(skb, &jmp_table, IPV6OP); break; case IPPROTO_FRAGMENT: - bpf_tail_call(skb, &jmp_table, IPV6FR); + bpf_tail_call_static(skb, &jmp_table, IPV6FR); break; default: return parse_ip_proto(skb, nexthdr); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h index c196280df90d..6a1255465fd6 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter.h +++ b/tools/testing/selftests/bpf/progs/bpf_iter.h @@ -13,6 +13,12 @@ #define udp6_sock udp6_sock___not_used #define bpf_iter__bpf_map_elem bpf_iter__bpf_map_elem___not_used #define bpf_iter__bpf_sk_storage_map bpf_iter__bpf_sk_storage_map___not_used +#define bpf_iter__sockmap bpf_iter__sockmap___not_used +#define btf_ptr btf_ptr___not_used +#define BTF_F_COMPACT BTF_F_COMPACT___not_used +#define BTF_F_NONAME BTF_F_NONAME___not_used +#define BTF_F_PTR_RAW BTF_F_PTR_RAW___not_used +#define BTF_F_ZERO BTF_F_ZERO___not_used #include "vmlinux.h" #undef bpf_iter_meta #undef bpf_iter__bpf_map @@ -26,6 +32,12 @@ #undef udp6_sock #undef bpf_iter__bpf_map_elem #undef bpf_iter__bpf_sk_storage_map +#undef bpf_iter__sockmap +#undef btf_ptr +#undef BTF_F_COMPACT +#undef BTF_F_NONAME +#undef BTF_F_PTR_RAW +#undef BTF_F_ZERO struct bpf_iter_meta { struct seq_file *seq; @@ -96,3 +108,23 @@ struct bpf_iter__bpf_sk_storage_map { struct sock *sk; void *value; }; + +struct bpf_iter__sockmap { + struct bpf_iter_meta *meta; + struct bpf_map *map; + void *key; + struct sock *sk; +}; + +struct btf_ptr { + void *ptr; + __u32 type_id; + __u32 flags; +}; + +enum { + BTF_F_COMPACT = (1ULL << 0), + BTF_F_NONAME = (1ULL << 1), + BTF_F_PTR_RAW = (1ULL << 2), + BTF_F_ZERO = (1ULL << 3), +}; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c b/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c new file mode 100644 index 000000000000..f3af0e30cead --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Cloudflare */ +#include "bpf_iter.h" +#include "bpf_tracing_net.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <errno.h> + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 64); + __type(key, __u32); + __type(value, __u64); +} sockmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_SOCKHASH); + __uint(max_entries, 64); + __type(key, __u32); + __type(value, __u64); +} sockhash SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_SOCKHASH); + __uint(max_entries, 64); + __type(key, __u32); + __type(value, __u64); +} dst SEC(".maps"); + +__u32 elems = 0; +__u32 socks = 0; + +SEC("iter/sockmap") +int copy(struct bpf_iter__sockmap *ctx) +{ + struct sock *sk = ctx->sk; + __u32 tmp, *key = ctx->key; + int ret; + + if (!key) + return 0; + + elems++; + + /* We need a temporary buffer on the stack, since the verifier doesn't + * let us use the pointer from the context as an argument to the helper. + */ + tmp = *key; + + if (sk) { + socks++; + return bpf_map_update_elem(&dst, &tmp, sk, 0) != 0; + } + + ret = bpf_map_delete_elem(&dst, &tmp); + return ret && ret != -ENOENT; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c new file mode 100644 index 000000000000..a1ddc36f13ec --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020, Oracle and/or its affiliates. */ +#include "bpf_iter.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> + +#include <errno.h> + +char _license[] SEC("license") = "GPL"; + +long tasks = 0; +long seq_err = 0; +bool skip = false; + +SEC("iter/task") +int dump_task_struct(struct bpf_iter__task *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct task_struct *task = ctx->task; + static struct btf_ptr ptr = { }; + long ret; + +#if __has_builtin(__builtin_btf_type_id) + ptr.type_id = bpf_core_type_id_kernel(struct task_struct); + ptr.ptr = task; + + if (ctx->meta->seq_num == 0) + BPF_SEQ_PRINTF(seq, "Raw BTF task\n"); + + ret = bpf_seq_printf_btf(seq, &ptr, sizeof(ptr), 0); + switch (ret) { + case 0: + tasks++; + break; + case -ERANGE: + /* NULL task or task->fs, don't count it as an error. */ + break; + case -E2BIG: + return 1; + default: + seq_err = ret; + break; + } +#else + skip = true; +#endif + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c index 8b787baa2654..b2f7c7c5f952 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c @@ -6,6 +6,9 @@ char _license[] SEC("license") = "GPL"; +int count = 0; +int tgid = 0; + SEC("iter/task_file") int dump_task_file(struct bpf_iter__task_file *ctx) { @@ -17,8 +20,13 @@ int dump_task_file(struct bpf_iter__task_file *ctx) if (task == (void *)0 || file == (void *)0) return 0; - if (ctx->meta->seq_num == 0) + if (ctx->meta->seq_num == 0) { + count = 0; BPF_SEQ_PRINTF(seq, " tgid gid fd file\n"); + } + + if (tgid == task->tgid && task->tgid != task->pid) + count++; BPF_SEQ_PRINTF(seq, "%8d %8d %8d %lx\n", task->tgid, task->pid, fd, (long)file->f_op); diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval.c new file mode 100644 index 000000000000..48e62f3f074f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_enumval x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___diff.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___diff.c new file mode 100644 index 000000000000..53e5e5a76888 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___diff.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_enumval___diff x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___err_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___err_missing.c new file mode 100644 index 000000000000..d024fb2ac06e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___err_missing.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_enumval___err_missing x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___val3_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___val3_missing.c new file mode 100644 index 000000000000..9de6595d250c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___val3_missing.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_enumval___val3_missing x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_size___err_ambiguous.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_size___err_ambiguous.c new file mode 100644 index 000000000000..f3e9904df9c2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_size___err_ambiguous.c @@ -0,0 +1,4 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_size___err_ambiguous1 x, + struct core_reloc_size___err_ambiguous2 y) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based.c new file mode 100644 index 000000000000..fc3f69e58c71 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_type_based x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___all_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___all_missing.c new file mode 100644 index 000000000000..51511648b4ec --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___all_missing.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_type_based___all_missing x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff_sz.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff_sz.c new file mode 100644 index 000000000000..67db3dceb279 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff_sz.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_type_based___diff_sz x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___fn_wrong_args.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___fn_wrong_args.c new file mode 100644 index 000000000000..b357fc65431d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___fn_wrong_args.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_type_based___fn_wrong_args x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___incompat.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___incompat.c new file mode 100644 index 000000000000..8ddf20d33d9e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___incompat.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_type_based___incompat x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id.c new file mode 100644 index 000000000000..abbe5bddcefd --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_type_id x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id___missing_targets.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id___missing_targets.c new file mode 100644 index 000000000000..24e7caf4f013 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id___missing_targets.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_type_id___missing_targets x) {} diff --git a/tools/testing/selftests/bpf/progs/btf_ptr.h b/tools/testing/selftests/bpf/progs/btf_ptr.h new file mode 100644 index 000000000000..c3c9797c67db --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf_ptr.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2020, Oracle and/or its affiliates. */ +/* "undefine" structs in vmlinux.h, because we "override" them below */ +#define btf_ptr btf_ptr___not_used +#define BTF_F_COMPACT BTF_F_COMPACT___not_used +#define BTF_F_NONAME BTF_F_NONAME___not_used +#define BTF_F_PTR_RAW BTF_F_PTR_RAW___not_used +#define BTF_F_ZERO BTF_F_ZERO___not_used +#include "vmlinux.h" +#undef btf_ptr +#undef BTF_F_COMPACT +#undef BTF_F_NONAME +#undef BTF_F_PTR_RAW +#undef BTF_F_ZERO + +struct btf_ptr { + void *ptr; + __u32 type_id; + __u32 flags; +}; + +enum { + BTF_F_COMPACT = (1ULL << 0), + BTF_F_NONAME = (1ULL << 1), + BTF_F_PTR_RAW = (1ULL << 2), + BTF_F_ZERO = (1ULL << 3), +}; diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c index b1b2773c0b9d..a943d394fd3a 100644 --- a/tools/testing/selftests/bpf/progs/connect4_prog.c +++ b/tools/testing/selftests/bpf/progs/connect4_prog.c @@ -23,6 +23,10 @@ #define TCP_CA_NAME_MAX 16 #endif +#ifndef TCP_NOTSENT_LOWAT +#define TCP_NOTSENT_LOWAT 25 +#endif + #ifndef IFNAMSIZ #define IFNAMSIZ 16 #endif @@ -128,6 +132,18 @@ static __inline int set_keepalive(struct bpf_sock_addr *ctx) return 0; } +static __inline int set_notsent_lowat(struct bpf_sock_addr *ctx) +{ + int lowat = 65535; + + if (ctx->type == SOCK_STREAM) { + if (bpf_setsockopt(ctx, SOL_TCP, TCP_NOTSENT_LOWAT, &lowat, sizeof(lowat))) + return 1; + } + + return 0; +} + SEC("cgroup/connect4") int connect_v4_prog(struct bpf_sock_addr *ctx) { @@ -148,6 +164,9 @@ int connect_v4_prog(struct bpf_sock_addr *ctx) if (set_keepalive(ctx)) return 0; + if (set_notsent_lowat(ctx)) + return 0; + if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM) return 0; else if (ctx->type == SOCK_STREAM) diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h index 69139ed66216..e6e616cb7bc9 100644 --- a/tools/testing/selftests/bpf/progs/core_reloc_types.h +++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h @@ -652,7 +652,7 @@ struct core_reloc_misc_extensible { }; /* - * EXISTENCE + * FIELD EXISTENCE */ struct core_reloc_existence_output { int a_exists; @@ -809,3 +809,353 @@ struct core_reloc_size___diff_sz { void *ptr_field; enum { OTHER_VALUE = 0xFFFFFFFFFFFFFFFF } enum_field; }; + +/* Error case of two candidates with the fields (int_field) at the same + * offset, but with differing final relocation values: size 4 vs size 1 + */ +struct core_reloc_size___err_ambiguous1 { + /* int at offset 0 */ + int int_field; + + struct { int x; } struct_field; + union { int x; } union_field; + int arr_field[4]; + void *ptr_field; + enum { VALUE___1 = 123 } enum_field; +}; + +struct core_reloc_size___err_ambiguous2 { + /* char at offset 0 */ + char int_field; + + struct { int x; } struct_field; + union { int x; } union_field; + int arr_field[4]; + void *ptr_field; + enum { VALUE___2 = 123 } enum_field; +}; + +/* + * TYPE EXISTENCE & SIZE + */ +struct core_reloc_type_based_output { + bool struct_exists; + bool union_exists; + bool enum_exists; + bool typedef_named_struct_exists; + bool typedef_anon_struct_exists; + bool typedef_struct_ptr_exists; + bool typedef_int_exists; + bool typedef_enum_exists; + bool typedef_void_ptr_exists; + bool typedef_func_proto_exists; + bool typedef_arr_exists; + + int struct_sz; + int union_sz; + int enum_sz; + int typedef_named_struct_sz; + int typedef_anon_struct_sz; + int typedef_struct_ptr_sz; + int typedef_int_sz; + int typedef_enum_sz; + int typedef_void_ptr_sz; + int typedef_func_proto_sz; + int typedef_arr_sz; +}; + +struct a_struct { + int x; +}; + +union a_union { + int y; + int z; +}; + +typedef struct a_struct named_struct_typedef; + +typedef struct { int x, y, z; } anon_struct_typedef; + +typedef struct { + int a, b, c; +} *struct_ptr_typedef; + +enum an_enum { + AN_ENUM_VAL1 = 1, + AN_ENUM_VAL2 = 2, + AN_ENUM_VAL3 = 3, +}; + +typedef int int_typedef; + +typedef enum { TYPEDEF_ENUM_VAL1, TYPEDEF_ENUM_VAL2 } enum_typedef; + +typedef void *void_ptr_typedef; + +typedef int (*func_proto_typedef)(long); + +typedef char arr_typedef[20]; + +struct core_reloc_type_based { + struct a_struct f1; + union a_union f2; + enum an_enum f3; + named_struct_typedef f4; + anon_struct_typedef f5; + struct_ptr_typedef f6; + int_typedef f7; + enum_typedef f8; + void_ptr_typedef f9; + func_proto_typedef f10; + arr_typedef f11; +}; + +/* no types in target */ +struct core_reloc_type_based___all_missing { +}; + +/* different type sizes, extra modifiers, anon vs named enums, etc */ +struct a_struct___diff_sz { + long x; + int y; + char z; +}; + +union a_union___diff_sz { + char yy; + char zz; +}; + +typedef struct a_struct___diff_sz named_struct_typedef___diff_sz; + +typedef struct { long xx, yy, zzz; } anon_struct_typedef___diff_sz; + +typedef struct { + char aa[1], bb[2], cc[3]; +} *struct_ptr_typedef___diff_sz; + +enum an_enum___diff_sz { + AN_ENUM_VAL1___diff_sz = 0x123412341234, + AN_ENUM_VAL2___diff_sz = 2, +}; + +typedef unsigned long int_typedef___diff_sz; + +typedef enum an_enum___diff_sz enum_typedef___diff_sz; + +typedef const void * const void_ptr_typedef___diff_sz; + +typedef int_typedef___diff_sz (*func_proto_typedef___diff_sz)(char); + +typedef int arr_typedef___diff_sz[2]; + +struct core_reloc_type_based___diff_sz { + struct a_struct___diff_sz f1; + union a_union___diff_sz f2; + enum an_enum___diff_sz f3; + named_struct_typedef___diff_sz f4; + anon_struct_typedef___diff_sz f5; + struct_ptr_typedef___diff_sz f6; + int_typedef___diff_sz f7; + enum_typedef___diff_sz f8; + void_ptr_typedef___diff_sz f9; + func_proto_typedef___diff_sz f10; + arr_typedef___diff_sz f11; +}; + +/* incompatibilities between target and local types */ +union a_struct___incompat { /* union instead of struct */ + int x; +}; + +struct a_union___incompat { /* struct instead of union */ + int y; + int z; +}; + +/* typedef to union, not to struct */ +typedef union a_struct___incompat named_struct_typedef___incompat; + +/* typedef to void pointer, instead of struct */ +typedef void *anon_struct_typedef___incompat; + +/* extra pointer indirection */ +typedef struct { + int a, b, c; +} **struct_ptr_typedef___incompat; + +/* typedef of a struct with int, instead of int */ +typedef struct { int x; } int_typedef___incompat; + +/* typedef to func_proto, instead of enum */ +typedef int (*enum_typedef___incompat)(void); + +/* pointer to char instead of void */ +typedef char *void_ptr_typedef___incompat; + +/* void return type instead of int */ +typedef void (*func_proto_typedef___incompat)(long); + +/* multi-dimensional array instead of a single-dimensional */ +typedef int arr_typedef___incompat[20][2]; + +struct core_reloc_type_based___incompat { + union a_struct___incompat f1; + struct a_union___incompat f2; + /* the only valid one is enum, to check that something still succeeds */ + enum an_enum f3; + named_struct_typedef___incompat f4; + anon_struct_typedef___incompat f5; + struct_ptr_typedef___incompat f6; + int_typedef___incompat f7; + enum_typedef___incompat f8; + void_ptr_typedef___incompat f9; + func_proto_typedef___incompat f10; + arr_typedef___incompat f11; +}; + +/* func_proto with incompatible signature */ +typedef void (*func_proto_typedef___fn_wrong_ret1)(long); +typedef int * (*func_proto_typedef___fn_wrong_ret2)(long); +typedef struct { int x; } int_struct_typedef; +typedef int_struct_typedef (*func_proto_typedef___fn_wrong_ret3)(long); +typedef int (*func_proto_typedef___fn_wrong_arg)(void *); +typedef int (*func_proto_typedef___fn_wrong_arg_cnt1)(long, long); +typedef int (*func_proto_typedef___fn_wrong_arg_cnt2)(void); + +struct core_reloc_type_based___fn_wrong_args { + /* one valid type to make sure relos still work */ + struct a_struct f1; + func_proto_typedef___fn_wrong_ret1 f2; + func_proto_typedef___fn_wrong_ret2 f3; + func_proto_typedef___fn_wrong_ret3 f4; + func_proto_typedef___fn_wrong_arg f5; + func_proto_typedef___fn_wrong_arg_cnt1 f6; + func_proto_typedef___fn_wrong_arg_cnt2 f7; +}; + +/* + * TYPE ID MAPPING (LOCAL AND TARGET) + */ +struct core_reloc_type_id_output { + int local_anon_struct; + int local_anon_union; + int local_anon_enum; + int local_anon_func_proto_ptr; + int local_anon_void_ptr; + int local_anon_arr; + + int local_struct; + int local_union; + int local_enum; + int local_int; + int local_struct_typedef; + int local_func_proto_typedef; + int local_arr_typedef; + + int targ_struct; + int targ_union; + int targ_enum; + int targ_int; + int targ_struct_typedef; + int targ_func_proto_typedef; + int targ_arr_typedef; +}; + +struct core_reloc_type_id { + struct a_struct f1; + union a_union f2; + enum an_enum f3; + named_struct_typedef f4; + func_proto_typedef f5; + arr_typedef f6; +}; + +struct core_reloc_type_id___missing_targets { + /* nothing */ +}; + +/* + * ENUMERATOR VALUE EXISTENCE AND VALUE RELOCATION + */ +struct core_reloc_enumval_output { + bool named_val1_exists; + bool named_val2_exists; + bool named_val3_exists; + bool anon_val1_exists; + bool anon_val2_exists; + bool anon_val3_exists; + + int named_val1; + int named_val2; + int anon_val1; + int anon_val2; +}; + +enum named_enum { + NAMED_ENUM_VAL1 = 1, + NAMED_ENUM_VAL2 = 2, + NAMED_ENUM_VAL3 = 3, +}; + +typedef enum { + ANON_ENUM_VAL1 = 0x10, + ANON_ENUM_VAL2 = 0x20, + ANON_ENUM_VAL3 = 0x30, +} anon_enum; + +struct core_reloc_enumval { + enum named_enum f1; + anon_enum f2; +}; + +/* differing enumerator values */ +enum named_enum___diff { + NAMED_ENUM_VAL1___diff = 101, + NAMED_ENUM_VAL2___diff = 202, + NAMED_ENUM_VAL3___diff = 303, +}; + +typedef enum { + ANON_ENUM_VAL1___diff = 0x11, + ANON_ENUM_VAL2___diff = 0x22, + ANON_ENUM_VAL3___diff = 0x33, +} anon_enum___diff; + +struct core_reloc_enumval___diff { + enum named_enum___diff f1; + anon_enum___diff f2; +}; + +/* missing (optional) third enum value */ +enum named_enum___val3_missing { + NAMED_ENUM_VAL1___val3_missing = 111, + NAMED_ENUM_VAL2___val3_missing = 222, +}; + +typedef enum { + ANON_ENUM_VAL1___val3_missing = 0x111, + ANON_ENUM_VAL2___val3_missing = 0x222, +} anon_enum___val3_missing; + +struct core_reloc_enumval___val3_missing { + enum named_enum___val3_missing f1; + anon_enum___val3_missing f2; +}; + +/* missing (mandatory) second enum value, should fail */ +enum named_enum___err_missing { + NAMED_ENUM_VAL1___err_missing = 1, + NAMED_ENUM_VAL3___err_missing = 3, +}; + +typedef enum { + ANON_ENUM_VAL1___err_missing = 0x111, + ANON_ENUM_VAL3___err_missing = 0x222, +} anon_enum___err_missing; + +struct core_reloc_enumval___err_missing { + enum named_enum___err_missing f1; + anon_enum___err_missing f2; +}; diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c index 98e1efe14549..49a84a3a2306 100644 --- a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c @@ -1,8 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ #include <linux/stddef.h> +#include <linux/if_ether.h> #include <linux/ipv6.h> #include <linux/bpf.h> +#include <linux/tcp.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> #include <bpf/bpf_tracing.h> @@ -151,4 +153,29 @@ int new_get_constant(long val) test_get_constant = 1; return test_get_constant; /* original get_constant() returns val - 122 */ } + +__u64 test_pkt_write_access_subprog = 0; +SEC("freplace/test_pkt_write_access_subprog") +int new_test_pkt_write_access_subprog(struct __sk_buff *skb, __u32 off) +{ + + void *data = (void *)(long)skb->data; + void *data_end = (void *)(long)skb->data_end; + struct tcphdr *tcp; + + if (off > sizeof(struct ethhdr) + sizeof(struct ipv6hdr)) + return -1; + + tcp = data + off; + if (tcp + 1 > data_end) + return -1; + + /* make modifications to the packet data */ + tcp->check++; + tcp->syn = 0; + + test_pkt_write_access_subprog = 1; + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/fmod_ret_freplace.c b/tools/testing/selftests/bpf/progs/fmod_ret_freplace.c new file mode 100644 index 000000000000..c8943ccee6c0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/fmod_ret_freplace.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +volatile __u64 test_fmod_ret = 0; +SEC("fmod_ret/security_new_get_constant") +int BPF_PROG(fmod_ret_test, long val, int ret) +{ + test_fmod_ret = 1; + return 120; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/freplace_attach_probe.c b/tools/testing/selftests/bpf/progs/freplace_attach_probe.c new file mode 100644 index 000000000000..bb2a77c5b62b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/freplace_attach_probe.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include <linux/ptrace.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +#define VAR_NUM 2 + +struct hmap_elem { + struct bpf_spin_lock lock; + int var[VAR_NUM]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, struct hmap_elem); +} hash_map SEC(".maps"); + +SEC("freplace/handle_kprobe") +int new_handle_kprobe(struct pt_regs *ctx) +{ + struct hmap_elem zero = {}, *val; + int key = 0; + + val = bpf_map_lookup_elem(&hash_map, &key); + if (!val) + return 1; + /* spin_lock in hash map */ + bpf_spin_lock(&val->lock); + val->var[0] = 99; + bpf_spin_unlock(&val->lock); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c b/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c new file mode 100644 index 000000000000..68a5a9db928a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include <linux/stddef.h> +#include <linux/bpf.h> +#include <linux/pkt_cls.h> +#include <bpf/bpf_endian.h> +#include <bpf/bpf_helpers.h> + +struct bpf_map_def SEC("maps") sock_map = { + .type = BPF_MAP_TYPE_SOCKMAP, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 2, +}; + +SEC("freplace/cls_redirect") +int freplace_cls_redirect_test(struct __sk_buff *skb) +{ + int ret = 0; + const int zero = 0; + struct bpf_sock *sk; + + sk = bpf_map_lookup_elem(&sock_map, &zero); + if (!sk) + return TC_ACT_SHOT; + + ret = bpf_map_update_elem(&sock_map, &zero, sk, 0); + bpf_sk_release(sk); + + return ret == 0 ? TC_ACT_OK : TC_ACT_SHOT; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c b/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c new file mode 100644 index 000000000000..544e5ac90461 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include <linux/stddef.h> +#include <linux/ipv6.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <sys/socket.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +SEC("freplace/connect_v4_prog") +int new_connect_v4_prog(struct bpf_sock_addr *ctx) +{ + // return value thats in invalid range + return 255; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/freplace_get_constant.c b/tools/testing/selftests/bpf/progs/freplace_get_constant.c new file mode 100644 index 000000000000..705e4b64dfc2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/freplace_get_constant.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +volatile __u64 test_get_constant = 0; +SEC("freplace/get_constant") +int security_new_get_constant(long val) +{ + if (val != 123) + return 0; + test_get_constant = 1; + return test_get_constant; /* original get_constant() returns val - 122 */ +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c new file mode 100644 index 000000000000..0758ba229ae0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/local_storage.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2020 Google LLC. + */ + +#include <errno.h> +#include <linux/bpf.h> +#include <stdbool.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +#define DUMMY_STORAGE_VALUE 0xdeadbeef + +int monitored_pid = 0; +int inode_storage_result = -1; +int sk_storage_result = -1; + +struct dummy_storage { + __u32 value; +}; + +struct { + __uint(type, BPF_MAP_TYPE_INODE_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct dummy_storage); +} inode_storage_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC | BPF_F_CLONE); + __type(key, int); + __type(value, struct dummy_storage); +} sk_storage_map SEC(".maps"); + +/* TODO Use vmlinux.h once BTF pruning for embedded types is fixed. + */ +struct sock {} __attribute__((preserve_access_index)); +struct sockaddr {} __attribute__((preserve_access_index)); +struct socket { + struct sock *sk; +} __attribute__((preserve_access_index)); + +struct inode {} __attribute__((preserve_access_index)); +struct dentry { + struct inode *d_inode; +} __attribute__((preserve_access_index)); +struct file { + struct inode *f_inode; +} __attribute__((preserve_access_index)); + + +SEC("lsm/inode_unlink") +int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim) +{ + __u32 pid = bpf_get_current_pid_tgid() >> 32; + struct dummy_storage *storage; + + if (pid != monitored_pid) + return 0; + + storage = bpf_inode_storage_get(&inode_storage_map, victim->d_inode, 0, + BPF_SK_STORAGE_GET_F_CREATE); + if (!storage) + return 0; + + if (storage->value == DUMMY_STORAGE_VALUE) + inode_storage_result = -1; + + inode_storage_result = + bpf_inode_storage_delete(&inode_storage_map, victim->d_inode); + + return 0; +} + +SEC("lsm/socket_bind") +int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address, + int addrlen) +{ + __u32 pid = bpf_get_current_pid_tgid() >> 32; + struct dummy_storage *storage; + + if (pid != monitored_pid) + return 0; + + storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0, + BPF_SK_STORAGE_GET_F_CREATE); + if (!storage) + return 0; + + if (storage->value == DUMMY_STORAGE_VALUE) + sk_storage_result = -1; + + sk_storage_result = bpf_sk_storage_delete(&sk_storage_map, sock->sk); + return 0; +} + +SEC("lsm/socket_post_create") +int BPF_PROG(socket_post_create, struct socket *sock, int family, int type, + int protocol, int kern) +{ + __u32 pid = bpf_get_current_pid_tgid() >> 32; + struct dummy_storage *storage; + + if (pid != monitored_pid) + return 0; + + storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0, + BPF_SK_STORAGE_GET_F_CREATE); + if (!storage) + return 0; + + storage->value = DUMMY_STORAGE_VALUE; + + return 0; +} + +SEC("lsm/file_open") +int BPF_PROG(file_open, struct file *file) +{ + __u32 pid = bpf_get_current_pid_tgid() >> 32; + struct dummy_storage *storage; + + if (pid != monitored_pid) + return 0; + + if (!file->f_inode) + return 0; + + storage = bpf_inode_storage_get(&inode_storage_map, file->f_inode, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!storage) + return 0; + + storage->value = DUMMY_STORAGE_VALUE; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c index b4598d4bc4f7..ff4d343b94b5 100644 --- a/tools/testing/selftests/bpf/progs/lsm.c +++ b/tools/testing/selftests/bpf/progs/lsm.c @@ -9,6 +9,27 @@ #include <bpf/bpf_tracing.h> #include <errno.h> +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} array SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} hash SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} lru_hash SEC(".maps"); + char _license[] SEC("license") = "GPL"; int monitored_pid = 0; @@ -36,13 +57,54 @@ int BPF_PROG(test_int_hook, struct vm_area_struct *vma, return ret; } -SEC("lsm/bprm_committed_creds") +SEC("lsm.s/bprm_committed_creds") int BPF_PROG(test_void_hook, struct linux_binprm *bprm) { __u32 pid = bpf_get_current_pid_tgid() >> 32; + char args[64]; + __u32 key = 0; + __u64 *value; if (monitored_pid == pid) bprm_count++; + bpf_copy_from_user(args, sizeof(args), (void *)bprm->vma->vm_mm->arg_start); + bpf_copy_from_user(args, sizeof(args), (void *)bprm->mm->arg_start); + + value = bpf_map_lookup_elem(&array, &key); + if (value) + *value = 0; + value = bpf_map_lookup_elem(&hash, &key); + if (value) + *value = 0; + value = bpf_map_lookup_elem(&lru_hash, &key); + if (value) + *value = 0; + + return 0; +} +SEC("lsm/task_free") /* lsm/ is ok, lsm.s/ fails */ +int BPF_PROG(test_task_free, struct task_struct *task) +{ + return 0; +} + +int copy_test = 0; + +SEC("fentry.s/__x64_sys_setdomainname") +int BPF_PROG(test_sys_setdomainname, struct pt_regs *regs) +{ + void *ptr = (void *)PT_REGS_PARM1(regs); + int len = PT_REGS_PARM2(regs); + int buf = 0; + long ret; + + ret = bpf_copy_from_user(&buf, sizeof(buf), ptr); + if (len == -2 && ret == 0 && buf == 1234) + copy_test++; + if (len == -3 && ret == -EFAULT) + copy_test++; + if (len == -4 && ret == -EFAULT) + copy_test++; return 0; } diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c index 473665cac67e..c325405751e2 100644 --- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c +++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c @@ -82,6 +82,14 @@ static inline int check_default(struct bpf_map *indirect, return 1; } +static __noinline int +check_default_noinline(struct bpf_map *indirect, struct bpf_map *direct) +{ + VERIFY(check(indirect, direct, sizeof(__u32), sizeof(__u32), + MAX_ENTRIES)); + return 1; +} + typedef struct { int counter; } atomic_t; @@ -107,7 +115,7 @@ static inline int check_hash(void) struct bpf_map *map = (struct bpf_map *)&m_hash; int i; - VERIFY(check_default(&hash->map, map)); + VERIFY(check_default_noinline(&hash->map, map)); VERIFY(hash->n_buckets == MAX_ENTRIES); VERIFY(hash->elem_size == 64); @@ -589,7 +597,7 @@ static inline int check_stack(void) return 1; } -struct bpf_sk_storage_map { +struct bpf_local_storage_map { struct bpf_map map; } __attribute__((preserve_access_index)); @@ -602,8 +610,8 @@ struct { static inline int check_sk_storage(void) { - struct bpf_sk_storage_map *sk_storage = - (struct bpf_sk_storage_map *)&m_sk_storage; + struct bpf_local_storage_map *sk_storage = + (struct bpf_local_storage_map *)&m_sk_storage; struct bpf_map *map = (struct bpf_map *)&m_sk_storage; VERIFY(check(&sk_storage->map, map, sizeof(__u32), sizeof(__u32), 0)); diff --git a/tools/testing/selftests/bpf/progs/metadata_unused.c b/tools/testing/selftests/bpf/progs/metadata_unused.c new file mode 100644 index 000000000000..672a0d19f8d0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/metadata_unused.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +volatile const char bpf_metadata_a[] SEC(".rodata") = "foo"; +volatile const int bpf_metadata_b SEC(".rodata") = 1; + +SEC("cgroup_skb/egress") +int prog(struct xdp_md *ctx) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/metadata_used.c b/tools/testing/selftests/bpf/progs/metadata_used.c new file mode 100644 index 000000000000..b7198e65383d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/metadata_used.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +volatile const char bpf_metadata_a[] SEC(".rodata") = "bar"; +volatile const int bpf_metadata_b SEC(".rodata") = 2; + +SEC("cgroup_skb/egress") +int prog(struct xdp_md *ctx) +{ + return bpf_metadata_b ? 1 : 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/netif_receive_skb.c b/tools/testing/selftests/bpf/progs/netif_receive_skb.c new file mode 100644 index 000000000000..6b670039ea67 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/netif_receive_skb.c @@ -0,0 +1,249 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020, Oracle and/or its affiliates. */ + +#include "btf_ptr.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> + +#include <errno.h> + +long ret = 0; +int num_subtests = 0; +int ran_subtests = 0; +bool skip = false; + +#define STRSIZE 2048 +#define EXPECTED_STRSIZE 256 + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, char[STRSIZE]); +} strdata SEC(".maps"); + +static int __strncmp(const void *m1, const void *m2, size_t len) +{ + const unsigned char *s1 = m1; + const unsigned char *s2 = m2; + int i, delta = 0; + + for (i = 0; i < len; i++) { + delta = s1[i] - s2[i]; + if (delta || s1[i] == 0 || s2[i] == 0) + break; + } + return delta; +} + +#if __has_builtin(__builtin_btf_type_id) +#define TEST_BTF(_str, _type, _flags, _expected, ...) \ + do { \ + static const char _expectedval[EXPECTED_STRSIZE] = \ + _expected; \ + static const char _ptrtype[64] = #_type; \ + __u64 _hflags = _flags | BTF_F_COMPACT; \ + static _type _ptrdata = __VA_ARGS__; \ + static struct btf_ptr _ptr = { }; \ + int _cmp; \ + \ + ++num_subtests; \ + if (ret < 0) \ + break; \ + ++ran_subtests; \ + _ptr.ptr = &_ptrdata; \ + _ptr.type_id = bpf_core_type_id_kernel(_type); \ + if (_ptr.type_id <= 0) { \ + ret = -EINVAL; \ + break; \ + } \ + ret = bpf_snprintf_btf(_str, STRSIZE, \ + &_ptr, sizeof(_ptr), _hflags); \ + if (ret) \ + break; \ + _cmp = __strncmp(_str, _expectedval, EXPECTED_STRSIZE); \ + if (_cmp != 0) { \ + bpf_printk("(%d) got %s", _cmp, _str); \ + bpf_printk("(%d) expected %s", _cmp, \ + _expectedval); \ + ret = -EBADMSG; \ + break; \ + } \ + } while (0) +#endif + +/* Use where expected data string matches its stringified declaration */ +#define TEST_BTF_C(_str, _type, _flags, ...) \ + TEST_BTF(_str, _type, _flags, "(" #_type ")" #__VA_ARGS__, \ + __VA_ARGS__) + +/* TRACE_EVENT(netif_receive_skb, + * TP_PROTO(struct sk_buff *skb), + */ +SEC("tp_btf/netif_receive_skb") +int BPF_PROG(trace_netif_receive_skb, struct sk_buff *skb) +{ + static __u64 flags[] = { 0, BTF_F_COMPACT, BTF_F_ZERO, BTF_F_PTR_RAW, + BTF_F_NONAME, BTF_F_COMPACT | BTF_F_ZERO | + BTF_F_PTR_RAW | BTF_F_NONAME }; + static struct btf_ptr p = { }; + __u32 key = 0; + int i, __ret; + char *str; + +#if __has_builtin(__builtin_btf_type_id) + str = bpf_map_lookup_elem(&strdata, &key); + if (!str) + return 0; + + /* Ensure we can write skb string representation */ + p.type_id = bpf_core_type_id_kernel(struct sk_buff); + p.ptr = skb; + for (i = 0; i < ARRAY_SIZE(flags); i++) { + ++num_subtests; + ret = bpf_snprintf_btf(str, STRSIZE, &p, sizeof(p), 0); + if (ret < 0) + bpf_printk("returned %d when writing skb", ret); + ++ran_subtests; + } + + /* Check invalid ptr value */ + p.ptr = 0; + __ret = bpf_snprintf_btf(str, STRSIZE, &p, sizeof(p), 0); + if (__ret >= 0) { + bpf_printk("printing NULL should generate error, got (%d)", + __ret); + ret = -ERANGE; + } + + /* Verify type display for various types. */ + + /* simple int */ + TEST_BTF_C(str, int, 0, 1234); + TEST_BTF(str, int, BTF_F_NONAME, "1234", 1234); + /* zero value should be printed at toplevel */ + TEST_BTF(str, int, 0, "(int)0", 0); + TEST_BTF(str, int, BTF_F_NONAME, "0", 0); + TEST_BTF(str, int, BTF_F_ZERO, "(int)0", 0); + TEST_BTF(str, int, BTF_F_NONAME | BTF_F_ZERO, "0", 0); + TEST_BTF_C(str, int, 0, -4567); + TEST_BTF(str, int, BTF_F_NONAME, "-4567", -4567); + + /* simple char */ + TEST_BTF_C(str, char, 0, 100); + TEST_BTF(str, char, BTF_F_NONAME, "100", 100); + /* zero value should be printed at toplevel */ + TEST_BTF(str, char, 0, "(char)0", 0); + TEST_BTF(str, char, BTF_F_NONAME, "0", 0); + TEST_BTF(str, char, BTF_F_ZERO, "(char)0", 0); + TEST_BTF(str, char, BTF_F_NONAME | BTF_F_ZERO, "0", 0); + + /* simple typedef */ + TEST_BTF_C(str, uint64_t, 0, 100); + TEST_BTF(str, u64, BTF_F_NONAME, "1", 1); + /* zero value should be printed at toplevel */ + TEST_BTF(str, u64, 0, "(u64)0", 0); + TEST_BTF(str, u64, BTF_F_NONAME, "0", 0); + TEST_BTF(str, u64, BTF_F_ZERO, "(u64)0", 0); + TEST_BTF(str, u64, BTF_F_NONAME|BTF_F_ZERO, "0", 0); + + /* typedef struct */ + TEST_BTF_C(str, atomic_t, 0, {.counter = (int)1,}); + TEST_BTF(str, atomic_t, BTF_F_NONAME, "{1,}", {.counter = 1,}); + /* typedef with 0 value should be printed at toplevel */ + TEST_BTF(str, atomic_t, 0, "(atomic_t){}", {.counter = 0,}); + TEST_BTF(str, atomic_t, BTF_F_NONAME, "{}", {.counter = 0,}); + TEST_BTF(str, atomic_t, BTF_F_ZERO, "(atomic_t){.counter = (int)0,}", + {.counter = 0,}); + TEST_BTF(str, atomic_t, BTF_F_NONAME|BTF_F_ZERO, + "{0,}", {.counter = 0,}); + + /* enum where enum value does (and does not) exist */ + TEST_BTF_C(str, enum bpf_cmd, 0, BPF_MAP_CREATE); + TEST_BTF(str, enum bpf_cmd, 0, "(enum bpf_cmd)BPF_MAP_CREATE", 0); + TEST_BTF(str, enum bpf_cmd, BTF_F_NONAME, "BPF_MAP_CREATE", + BPF_MAP_CREATE); + TEST_BTF(str, enum bpf_cmd, BTF_F_NONAME|BTF_F_ZERO, + "BPF_MAP_CREATE", 0); + + TEST_BTF(str, enum bpf_cmd, BTF_F_ZERO, "(enum bpf_cmd)BPF_MAP_CREATE", + BPF_MAP_CREATE); + TEST_BTF(str, enum bpf_cmd, BTF_F_NONAME|BTF_F_ZERO, + "BPF_MAP_CREATE", BPF_MAP_CREATE); + TEST_BTF_C(str, enum bpf_cmd, 0, 2000); + TEST_BTF(str, enum bpf_cmd, BTF_F_NONAME, "2000", 2000); + + /* simple struct */ + TEST_BTF_C(str, struct btf_enum, 0, + {.name_off = (__u32)3,.val = (__s32)-1,}); + TEST_BTF(str, struct btf_enum, BTF_F_NONAME, "{3,-1,}", + { .name_off = 3, .val = -1,}); + TEST_BTF(str, struct btf_enum, BTF_F_NONAME, "{-1,}", + { .name_off = 0, .val = -1,}); + TEST_BTF(str, struct btf_enum, BTF_F_NONAME|BTF_F_ZERO, "{0,-1,}", + { .name_off = 0, .val = -1,}); + /* empty struct should be printed */ + TEST_BTF(str, struct btf_enum, 0, "(struct btf_enum){}", + { .name_off = 0, .val = 0,}); + TEST_BTF(str, struct btf_enum, BTF_F_NONAME, "{}", + { .name_off = 0, .val = 0,}); + TEST_BTF(str, struct btf_enum, BTF_F_ZERO, + "(struct btf_enum){.name_off = (__u32)0,.val = (__s32)0,}", + { .name_off = 0, .val = 0,}); + + /* struct with pointers */ + TEST_BTF(str, struct list_head, BTF_F_PTR_RAW, + "(struct list_head){.next = (struct list_head *)0x0000000000000001,}", + { .next = (struct list_head *)1 }); + /* NULL pointer should not be displayed */ + TEST_BTF(str, struct list_head, BTF_F_PTR_RAW, + "(struct list_head){}", + { .next = (struct list_head *)0 }); + + /* struct with char array */ + TEST_BTF(str, struct bpf_prog_info, 0, + "(struct bpf_prog_info){.name = (char[])['f','o','o',],}", + { .name = "foo",}); + TEST_BTF(str, struct bpf_prog_info, BTF_F_NONAME, + "{['f','o','o',],}", + {.name = "foo",}); + /* leading null char means do not display string */ + TEST_BTF(str, struct bpf_prog_info, 0, + "(struct bpf_prog_info){}", + {.name = {'\0', 'f', 'o', 'o'}}); + /* handle non-printable characters */ + TEST_BTF(str, struct bpf_prog_info, 0, + "(struct bpf_prog_info){.name = (char[])[1,2,3,],}", + { .name = {1, 2, 3, 0}}); + + /* struct with non-char array */ + TEST_BTF(str, struct __sk_buff, 0, + "(struct __sk_buff){.cb = (__u32[])[1,2,3,4,5,],}", + { .cb = {1, 2, 3, 4, 5,},}); + TEST_BTF(str, struct __sk_buff, BTF_F_NONAME, + "{[1,2,3,4,5,],}", + { .cb = { 1, 2, 3, 4, 5},}); + /* For non-char, arrays, show non-zero values only */ + TEST_BTF(str, struct __sk_buff, 0, + "(struct __sk_buff){.cb = (__u32[])[1,],}", + { .cb = { 0, 0, 1, 0, 0},}); + + /* struct with bitfields */ + TEST_BTF_C(str, struct bpf_insn, 0, + {.code = (__u8)1,.dst_reg = (__u8)0x2,.src_reg = (__u8)0x3,.off = (__s16)4,.imm = (__s32)5,}); + TEST_BTF(str, struct bpf_insn, BTF_F_NONAME, "{1,0x2,0x3,4,5,}", + {.code = 1, .dst_reg = 0x2, .src_reg = 0x3, .off = 4, + .imm = 5,}); +#else + skip = true; +#endif + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/profiler.h b/tools/testing/selftests/bpf/progs/profiler.h new file mode 100644 index 000000000000..3bac4fdd4bdf --- /dev/null +++ b/tools/testing/selftests/bpf/progs/profiler.h @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#pragma once + +#define TASK_COMM_LEN 16 +#define MAX_ANCESTORS 4 +#define MAX_PATH 256 +#define KILL_TARGET_LEN 64 +#define CTL_MAXNAME 10 +#define MAX_ARGS_LEN 4096 +#define MAX_FILENAME_LEN 512 +#define MAX_ENVIRON_LEN 8192 +#define MAX_PATH_DEPTH 32 +#define MAX_FILEPATH_LENGTH (MAX_PATH_DEPTH * MAX_PATH) +#define MAX_CGROUPS_PATH_DEPTH 8 + +#define MAX_METADATA_PAYLOAD_LEN TASK_COMM_LEN + +#define MAX_CGROUP_PAYLOAD_LEN \ + (MAX_PATH * 2 + (MAX_PATH * MAX_CGROUPS_PATH_DEPTH)) + +#define MAX_CAP_PAYLOAD_LEN (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN) + +#define MAX_SYSCTL_PAYLOAD_LEN \ + (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + CTL_MAXNAME + MAX_PATH) + +#define MAX_KILL_PAYLOAD_LEN \ + (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + TASK_COMM_LEN + \ + KILL_TARGET_LEN) + +#define MAX_EXEC_PAYLOAD_LEN \ + (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + MAX_FILENAME_LEN + \ + MAX_ARGS_LEN + MAX_ENVIRON_LEN) + +#define MAX_FILEMOD_PAYLOAD_LEN \ + (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + MAX_FILEPATH_LENGTH + \ + MAX_FILEPATH_LENGTH) + +enum data_type { + INVALID_EVENT, + EXEC_EVENT, + FORK_EVENT, + KILL_EVENT, + SYSCTL_EVENT, + FILEMOD_EVENT, + MAX_DATA_TYPE_EVENT +}; + +enum filemod_type { + FMOD_OPEN, + FMOD_LINK, + FMOD_SYMLINK, +}; + +struct ancestors_data_t { + pid_t ancestor_pids[MAX_ANCESTORS]; + uint32_t ancestor_exec_ids[MAX_ANCESTORS]; + uint64_t ancestor_start_times[MAX_ANCESTORS]; + uint32_t num_ancestors; +}; + +struct var_metadata_t { + enum data_type type; + pid_t pid; + uint32_t exec_id; + uid_t uid; + gid_t gid; + uint64_t start_time; + uint32_t cpu_id; + uint64_t bpf_stats_num_perf_events; + uint64_t bpf_stats_start_ktime_ns; + uint8_t comm_length; +}; + +struct cgroup_data_t { + ino_t cgroup_root_inode; + ino_t cgroup_proc_inode; + uint64_t cgroup_root_mtime; + uint64_t cgroup_proc_mtime; + uint16_t cgroup_root_length; + uint16_t cgroup_proc_length; + uint16_t cgroup_full_length; + int cgroup_full_path_root_pos; +}; + +struct var_sysctl_data_t { + struct var_metadata_t meta; + struct cgroup_data_t cgroup_data; + struct ancestors_data_t ancestors_info; + uint8_t sysctl_val_length; + uint16_t sysctl_path_length; + char payload[MAX_SYSCTL_PAYLOAD_LEN]; +}; + +struct var_kill_data_t { + struct var_metadata_t meta; + struct cgroup_data_t cgroup_data; + struct ancestors_data_t ancestors_info; + pid_t kill_target_pid; + int kill_sig; + uint32_t kill_count; + uint64_t last_kill_time; + uint8_t kill_target_name_length; + uint8_t kill_target_cgroup_proc_length; + char payload[MAX_KILL_PAYLOAD_LEN]; + size_t payload_length; +}; + +struct var_exec_data_t { + struct var_metadata_t meta; + struct cgroup_data_t cgroup_data; + pid_t parent_pid; + uint32_t parent_exec_id; + uid_t parent_uid; + uint64_t parent_start_time; + uint16_t bin_path_length; + uint16_t cmdline_length; + uint16_t environment_length; + char payload[MAX_EXEC_PAYLOAD_LEN]; +}; + +struct var_fork_data_t { + struct var_metadata_t meta; + pid_t parent_pid; + uint32_t parent_exec_id; + uint64_t parent_start_time; + char payload[MAX_METADATA_PAYLOAD_LEN]; +}; + +struct var_filemod_data_t { + struct var_metadata_t meta; + struct cgroup_data_t cgroup_data; + enum filemod_type fmod_type; + unsigned int dst_flags; + uint32_t src_device_id; + uint32_t dst_device_id; + ino_t src_inode; + ino_t dst_inode; + uint16_t src_filepath_length; + uint16_t dst_filepath_length; + char payload[MAX_FILEMOD_PAYLOAD_LEN]; +}; + +struct profiler_config_struct { + bool fetch_cgroups_from_bpf; + ino_t cgroup_fs_inode; + ino_t cgroup_login_session_inode; + uint64_t kill_signals_mask; + ino_t inode_filter; + uint32_t stale_info_secs; + bool use_variable_buffers; + bool read_environ_from_exec; + bool enable_cgroup_v1_resolver; +}; + +struct bpf_func_stats_data { + uint64_t time_elapsed_ns; + uint64_t num_executions; + uint64_t num_perf_events; +}; + +struct bpf_func_stats_ctx { + uint64_t start_time_ns; + struct bpf_func_stats_data* bpf_func_stats_data_val; +}; + +enum bpf_function_id { + profiler_bpf_proc_sys_write, + profiler_bpf_sched_process_exec, + profiler_bpf_sched_process_exit, + profiler_bpf_sys_enter_kill, + profiler_bpf_do_filp_open_ret, + profiler_bpf_sched_process_fork, + profiler_bpf_vfs_link, + profiler_bpf_vfs_symlink, + profiler_bpf_max_function_id +}; diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h new file mode 100644 index 000000000000..30982a7e4d0f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/profiler.inc.h @@ -0,0 +1,974 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include <vmlinux.h> +#include <bpf/bpf_core_read.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +#include "profiler.h" + +#ifndef NULL +#define NULL 0 +#endif + +#define O_WRONLY 00000001 +#define O_RDWR 00000002 +#define O_DIRECTORY 00200000 +#define __O_TMPFILE 020000000 +#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY) +#define MAX_ERRNO 4095 +#define S_IFMT 00170000 +#define S_IFSOCK 0140000 +#define S_IFLNK 0120000 +#define S_IFREG 0100000 +#define S_IFBLK 0060000 +#define S_IFDIR 0040000 +#define S_IFCHR 0020000 +#define S_IFIFO 0010000 +#define S_ISUID 0004000 +#define S_ISGID 0002000 +#define S_ISVTX 0001000 +#define S_ISLNK(m) (((m)&S_IFMT) == S_IFLNK) +#define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR) +#define S_ISCHR(m) (((m)&S_IFMT) == S_IFCHR) +#define S_ISBLK(m) (((m)&S_IFMT) == S_IFBLK) +#define S_ISFIFO(m) (((m)&S_IFMT) == S_IFIFO) +#define S_ISSOCK(m) (((m)&S_IFMT) == S_IFSOCK) +#define IS_ERR_VALUE(x) (unsigned long)(void*)(x) >= (unsigned long)-MAX_ERRNO + +#define KILL_DATA_ARRAY_SIZE 8 + +struct var_kill_data_arr_t { + struct var_kill_data_t array[KILL_DATA_ARRAY_SIZE]; +}; + +union any_profiler_data_t { + struct var_exec_data_t var_exec; + struct var_kill_data_t var_kill; + struct var_sysctl_data_t var_sysctl; + struct var_filemod_data_t var_filemod; + struct var_fork_data_t var_fork; + struct var_kill_data_arr_t var_kill_data_arr; +}; + +volatile struct profiler_config_struct bpf_config = {}; + +#define FETCH_CGROUPS_FROM_BPF (bpf_config.fetch_cgroups_from_bpf) +#define CGROUP_FS_INODE (bpf_config.cgroup_fs_inode) +#define CGROUP_LOGIN_SESSION_INODE \ + (bpf_config.cgroup_login_session_inode) +#define KILL_SIGNALS (bpf_config.kill_signals_mask) +#define STALE_INFO (bpf_config.stale_info_secs) +#define INODE_FILTER (bpf_config.inode_filter) +#define READ_ENVIRON_FROM_EXEC (bpf_config.read_environ_from_exec) +#define ENABLE_CGROUP_V1_RESOLVER (bpf_config.enable_cgroup_v1_resolver) + +struct kernfs_iattrs___52 { + struct iattr ia_iattr; +}; + +struct kernfs_node___52 { + union /* kernfs_node_id */ { + struct { + u32 ino; + u32 generation; + }; + u64 id; + } id; +}; + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, u32); + __type(value, union any_profiler_data_t); +} data_heap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); +} events SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, KILL_DATA_ARRAY_SIZE); + __type(key, u32); + __type(value, struct var_kill_data_arr_t); +} var_tpid_to_data SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, profiler_bpf_max_function_id); + __type(key, u32); + __type(value, struct bpf_func_stats_data); +} bpf_func_stats SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u32); + __type(value, bool); + __uint(max_entries, 16); +} allowed_devices SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u64); + __type(value, bool); + __uint(max_entries, 1024); +} allowed_file_inodes SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u64); + __type(value, bool); + __uint(max_entries, 1024); +} allowed_directory_inodes SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u32); + __type(value, bool); + __uint(max_entries, 16); +} disallowed_exec_inodes SEC(".maps"); + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) +#endif + +static INLINE bool IS_ERR(const void* ptr) +{ + return IS_ERR_VALUE((unsigned long)ptr); +} + +static INLINE u32 get_userspace_pid() +{ + return bpf_get_current_pid_tgid() >> 32; +} + +static INLINE bool is_init_process(u32 tgid) +{ + return tgid == 1 || tgid == 0; +} + +static INLINE unsigned long +probe_read_lim(void* dst, void* src, unsigned long len, unsigned long max) +{ + len = len < max ? len : max; + if (len > 1) { + if (bpf_probe_read(dst, len, src)) + return 0; + } else if (len == 1) { + if (bpf_probe_read(dst, 1, src)) + return 0; + } + return len; +} + +static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct, + int spid) +{ +#ifdef UNROLL +#pragma unroll +#endif + for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) + if (arr_struct->array[i].meta.pid == spid) + return i; + return -1; +} + +static INLINE void populate_ancestors(struct task_struct* task, + struct ancestors_data_t* ancestors_data) +{ + struct task_struct* parent = task; + u32 num_ancestors, ppid; + + ancestors_data->num_ancestors = 0; +#ifdef UNROLL +#pragma unroll +#endif + for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) { + parent = BPF_CORE_READ(parent, real_parent); + if (parent == NULL) + break; + ppid = BPF_CORE_READ(parent, tgid); + if (is_init_process(ppid)) + break; + ancestors_data->ancestor_pids[num_ancestors] = ppid; + ancestors_data->ancestor_exec_ids[num_ancestors] = + BPF_CORE_READ(parent, self_exec_id); + ancestors_data->ancestor_start_times[num_ancestors] = + BPF_CORE_READ(parent, start_time); + ancestors_data->num_ancestors = num_ancestors; + } +} + +static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node, + struct kernfs_node* cgroup_root_node, + void* payload, + int* root_pos) +{ + void* payload_start = payload; + size_t filepart_length; + +#ifdef UNROLL +#pragma unroll +#endif + for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) { + filepart_length = + bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(cgroup_node, name)); + if (!cgroup_node) + return payload; + if (cgroup_node == cgroup_root_node) + *root_pos = payload - payload_start; + if (filepart_length <= MAX_PATH) { + barrier_var(filepart_length); + payload += filepart_length; + } + cgroup_node = BPF_CORE_READ(cgroup_node, parent); + } + return payload; +} + +static ino_t get_inode_from_kernfs(struct kernfs_node* node) +{ + struct kernfs_node___52* node52 = (void*)node; + + if (bpf_core_field_exists(node52->id.ino)) { + barrier_var(node52); + return BPF_CORE_READ(node52, id.ino); + } else { + barrier_var(node); + return (u64)BPF_CORE_READ(node, id); + } +} + +extern bool CONFIG_CGROUP_PIDS __kconfig __weak; +enum cgroup_subsys_id___local { + pids_cgrp_id___local = 123, /* value doesn't matter */ +}; + +static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, + struct task_struct* task, + void* payload) +{ + struct kernfs_node* root_kernfs = + BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn); + struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn); + + if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) { + int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local, + pids_cgrp_id___local); +#ifdef UNROLL +#pragma unroll +#endif + for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) { + struct cgroup_subsys_state* subsys = + BPF_CORE_READ(task, cgroups, subsys[i]); + if (subsys != NULL) { + int subsys_id = BPF_CORE_READ(subsys, ss, id); + if (subsys_id == cgrp_id) { + proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn); + root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn); + break; + } + } + } + } + + cgroup_data->cgroup_root_inode = get_inode_from_kernfs(root_kernfs); + cgroup_data->cgroup_proc_inode = get_inode_from_kernfs(proc_kernfs); + + if (bpf_core_field_exists(root_kernfs->iattr->ia_mtime)) { + cgroup_data->cgroup_root_mtime = + BPF_CORE_READ(root_kernfs, iattr, ia_mtime.tv_nsec); + cgroup_data->cgroup_proc_mtime = + BPF_CORE_READ(proc_kernfs, iattr, ia_mtime.tv_nsec); + } else { + struct kernfs_iattrs___52* root_iattr = + (struct kernfs_iattrs___52*)BPF_CORE_READ(root_kernfs, iattr); + cgroup_data->cgroup_root_mtime = + BPF_CORE_READ(root_iattr, ia_iattr.ia_mtime.tv_nsec); + + struct kernfs_iattrs___52* proc_iattr = + (struct kernfs_iattrs___52*)BPF_CORE_READ(proc_kernfs, iattr); + cgroup_data->cgroup_proc_mtime = + BPF_CORE_READ(proc_iattr, ia_iattr.ia_mtime.tv_nsec); + } + + cgroup_data->cgroup_root_length = 0; + cgroup_data->cgroup_proc_length = 0; + cgroup_data->cgroup_full_length = 0; + + size_t cgroup_root_length = + bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(root_kernfs, name)); + barrier_var(cgroup_root_length); + if (cgroup_root_length <= MAX_PATH) { + barrier_var(cgroup_root_length); + cgroup_data->cgroup_root_length = cgroup_root_length; + payload += cgroup_root_length; + } + + size_t cgroup_proc_length = + bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(proc_kernfs, name)); + barrier_var(cgroup_proc_length); + if (cgroup_proc_length <= MAX_PATH) { + barrier_var(cgroup_proc_length); + cgroup_data->cgroup_proc_length = cgroup_proc_length; + payload += cgroup_proc_length; + } + + if (FETCH_CGROUPS_FROM_BPF) { + cgroup_data->cgroup_full_path_root_pos = -1; + void* payload_end_pos = read_full_cgroup_path(proc_kernfs, root_kernfs, payload, + &cgroup_data->cgroup_full_path_root_pos); + cgroup_data->cgroup_full_length = payload_end_pos - payload; + payload = payload_end_pos; + } + + return (void*)payload; +} + +static INLINE void* populate_var_metadata(struct var_metadata_t* metadata, + struct task_struct* task, + u32 pid, void* payload) +{ + u64 uid_gid = bpf_get_current_uid_gid(); + + metadata->uid = (u32)uid_gid; + metadata->gid = uid_gid >> 32; + metadata->pid = pid; + metadata->exec_id = BPF_CORE_READ(task, self_exec_id); + metadata->start_time = BPF_CORE_READ(task, start_time); + metadata->comm_length = 0; + + size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm); + barrier_var(comm_length); + if (comm_length <= TASK_COMM_LEN) { + barrier_var(comm_length); + metadata->comm_length = comm_length; + payload += comm_length; + } + + return (void*)payload; +} + +static INLINE struct var_kill_data_t* +get_var_kill_data(struct pt_regs* ctx, int spid, int tpid, int sig) +{ + int zero = 0; + struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero); + + if (kill_data == NULL) + return NULL; + struct task_struct* task = (struct task_struct*)bpf_get_current_task(); + + void* payload = populate_var_metadata(&kill_data->meta, task, spid, kill_data->payload); + payload = populate_cgroup_info(&kill_data->cgroup_data, task, payload); + size_t payload_length = payload - (void*)kill_data->payload; + kill_data->payload_length = payload_length; + populate_ancestors(task, &kill_data->ancestors_info); + kill_data->meta.type = KILL_EVENT; + kill_data->kill_target_pid = tpid; + kill_data->kill_sig = sig; + kill_data->kill_count = 1; + kill_data->last_kill_time = bpf_ktime_get_ns(); + return kill_data; +} + +static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig) +{ + if ((KILL_SIGNALS & (1ULL << sig)) == 0) + return 0; + + u32 spid = get_userspace_pid(); + struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid); + + if (arr_struct == NULL) { + struct var_kill_data_t* kill_data = get_var_kill_data(ctx, spid, tpid, sig); + int zero = 0; + + if (kill_data == NULL) + return 0; + arr_struct = bpf_map_lookup_elem(&data_heap, &zero); + if (arr_struct == NULL) + return 0; + bpf_probe_read(&arr_struct->array[0], sizeof(arr_struct->array[0]), kill_data); + } else { + int index = get_var_spid_index(arr_struct, spid); + + if (index == -1) { + struct var_kill_data_t* kill_data = + get_var_kill_data(ctx, spid, tpid, sig); + if (kill_data == NULL) + return 0; +#ifdef UNROLL +#pragma unroll +#endif + for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) + if (arr_struct->array[i].meta.pid == 0) { + bpf_probe_read(&arr_struct->array[i], + sizeof(arr_struct->array[i]), kill_data); + bpf_map_update_elem(&var_tpid_to_data, &tpid, + arr_struct, 0); + + return 0; + } + return 0; + } + + struct var_kill_data_t* kill_data = &arr_struct->array[index]; + + u64 delta_sec = + (bpf_ktime_get_ns() - kill_data->last_kill_time) / 1000000000; + + if (delta_sec < STALE_INFO) { + kill_data->kill_count++; + kill_data->last_kill_time = bpf_ktime_get_ns(); + bpf_probe_read(&arr_struct->array[index], + sizeof(arr_struct->array[index]), + kill_data); + } else { + struct var_kill_data_t* kill_data = + get_var_kill_data(ctx, spid, tpid, sig); + if (kill_data == NULL) + return 0; + bpf_probe_read(&arr_struct->array[index], + sizeof(arr_struct->array[index]), + kill_data); + } + } + bpf_map_update_elem(&var_tpid_to_data, &tpid, arr_struct, 0); + return 0; +} + +static INLINE void bpf_stats_enter(struct bpf_func_stats_ctx* bpf_stat_ctx, + enum bpf_function_id func_id) +{ + int func_id_key = func_id; + + bpf_stat_ctx->start_time_ns = bpf_ktime_get_ns(); + bpf_stat_ctx->bpf_func_stats_data_val = + bpf_map_lookup_elem(&bpf_func_stats, &func_id_key); + if (bpf_stat_ctx->bpf_func_stats_data_val) + bpf_stat_ctx->bpf_func_stats_data_val->num_executions++; +} + +static INLINE void bpf_stats_exit(struct bpf_func_stats_ctx* bpf_stat_ctx) +{ + if (bpf_stat_ctx->bpf_func_stats_data_val) + bpf_stat_ctx->bpf_func_stats_data_val->time_elapsed_ns += + bpf_ktime_get_ns() - bpf_stat_ctx->start_time_ns; +} + +static INLINE void +bpf_stats_pre_submit_var_perf_event(struct bpf_func_stats_ctx* bpf_stat_ctx, + struct var_metadata_t* meta) +{ + if (bpf_stat_ctx->bpf_func_stats_data_val) { + bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events++; + meta->bpf_stats_num_perf_events = + bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events; + } + meta->bpf_stats_start_ktime_ns = bpf_stat_ctx->start_time_ns; + meta->cpu_id = bpf_get_smp_processor_id(); +} + +static INLINE size_t +read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload) +{ + size_t length = 0; + size_t filepart_length; + struct dentry* parent_dentry; + +#ifdef UNROLL +#pragma unroll +#endif + for (int i = 0; i < MAX_PATH_DEPTH; i++) { + filepart_length = bpf_probe_read_str(payload, MAX_PATH, + BPF_CORE_READ(filp_dentry, d_name.name)); + barrier_var(filepart_length); + if (filepart_length > MAX_PATH) + break; + barrier_var(filepart_length); + payload += filepart_length; + length += filepart_length; + + parent_dentry = BPF_CORE_READ(filp_dentry, d_parent); + if (filp_dentry == parent_dentry) + break; + filp_dentry = parent_dentry; + } + + return length; +} + +static INLINE bool +is_ancestor_in_allowed_inodes(struct dentry* filp_dentry) +{ + struct dentry* parent_dentry; +#ifdef UNROLL +#pragma unroll +#endif + for (int i = 0; i < MAX_PATH_DEPTH; i++) { + u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino); + bool* allowed_dir = bpf_map_lookup_elem(&allowed_directory_inodes, &dir_ino); + + if (allowed_dir != NULL) + return true; + parent_dentry = BPF_CORE_READ(filp_dentry, d_parent); + if (filp_dentry == parent_dentry) + break; + filp_dentry = parent_dentry; + } + return false; +} + +static INLINE bool is_dentry_allowed_for_filemod(struct dentry* file_dentry, + u32* device_id, + u64* file_ino) +{ + u32 dev_id = BPF_CORE_READ(file_dentry, d_sb, s_dev); + *device_id = dev_id; + bool* allowed_device = bpf_map_lookup_elem(&allowed_devices, &dev_id); + + if (allowed_device == NULL) + return false; + + u64 ino = BPF_CORE_READ(file_dentry, d_inode, i_ino); + *file_ino = ino; + bool* allowed_file = bpf_map_lookup_elem(&allowed_file_inodes, &ino); + + if (allowed_file == NULL) + if (!is_ancestor_in_allowed_inodes(BPF_CORE_READ(file_dentry, d_parent))) + return false; + return true; +} + +SEC("kprobe/proc_sys_write") +ssize_t BPF_KPROBE(kprobe__proc_sys_write, + struct file* filp, const char* buf, + size_t count, loff_t* ppos) +{ + struct bpf_func_stats_ctx stats_ctx; + bpf_stats_enter(&stats_ctx, profiler_bpf_proc_sys_write); + + u32 pid = get_userspace_pid(); + int zero = 0; + struct var_sysctl_data_t* sysctl_data = + bpf_map_lookup_elem(&data_heap, &zero); + if (!sysctl_data) + goto out; + + struct task_struct* task = (struct task_struct*)bpf_get_current_task(); + sysctl_data->meta.type = SYSCTL_EVENT; + void* payload = populate_var_metadata(&sysctl_data->meta, task, pid, sysctl_data->payload); + payload = populate_cgroup_info(&sysctl_data->cgroup_data, task, payload); + + populate_ancestors(task, &sysctl_data->ancestors_info); + + sysctl_data->sysctl_val_length = 0; + sysctl_data->sysctl_path_length = 0; + + size_t sysctl_val_length = bpf_probe_read_str(payload, CTL_MAXNAME, buf); + barrier_var(sysctl_val_length); + if (sysctl_val_length <= CTL_MAXNAME) { + barrier_var(sysctl_val_length); + sysctl_data->sysctl_val_length = sysctl_val_length; + payload += sysctl_val_length; + } + + size_t sysctl_path_length = bpf_probe_read_str(payload, MAX_PATH, + BPF_CORE_READ(filp, f_path.dentry, d_name.name)); + barrier_var(sysctl_path_length); + if (sysctl_path_length <= MAX_PATH) { + barrier_var(sysctl_path_length); + sysctl_data->sysctl_path_length = sysctl_path_length; + payload += sysctl_path_length; + } + + bpf_stats_pre_submit_var_perf_event(&stats_ctx, &sysctl_data->meta); + unsigned long data_len = payload - (void*)sysctl_data; + data_len = data_len > sizeof(struct var_sysctl_data_t) + ? sizeof(struct var_sysctl_data_t) + : data_len; + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, sysctl_data, data_len); +out: + bpf_stats_exit(&stats_ctx); + return 0; +} + +SEC("tracepoint/syscalls/sys_enter_kill") +int tracepoint__syscalls__sys_enter_kill(struct trace_event_raw_sys_enter* ctx) +{ + struct bpf_func_stats_ctx stats_ctx; + + bpf_stats_enter(&stats_ctx, profiler_bpf_sys_enter_kill); + int pid = ctx->args[0]; + int sig = ctx->args[1]; + int ret = trace_var_sys_kill(ctx, pid, sig); + bpf_stats_exit(&stats_ctx); + return ret; +}; + +SEC("raw_tracepoint/sched_process_exit") +int raw_tracepoint__sched_process_exit(void* ctx) +{ + int zero = 0; + struct bpf_func_stats_ctx stats_ctx; + bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exit); + + u32 tpid = get_userspace_pid(); + + struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid); + struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero); + + if (arr_struct == NULL || kill_data == NULL) + goto out; + + struct task_struct* task = (struct task_struct*)bpf_get_current_task(); + struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn); + +#ifdef UNROLL +#pragma unroll +#endif + for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) { + struct var_kill_data_t* past_kill_data = &arr_struct->array[i]; + + if (past_kill_data != NULL && past_kill_data->kill_target_pid == tpid) { + bpf_probe_read(kill_data, sizeof(*past_kill_data), past_kill_data); + void* payload = kill_data->payload; + size_t offset = kill_data->payload_length; + if (offset >= MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN) + return 0; + payload += offset; + + kill_data->kill_target_name_length = 0; + kill_data->kill_target_cgroup_proc_length = 0; + + size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm); + barrier_var(comm_length); + if (comm_length <= TASK_COMM_LEN) { + barrier_var(comm_length); + kill_data->kill_target_name_length = comm_length; + payload += comm_length; + } + + size_t cgroup_proc_length = bpf_probe_read_str(payload, KILL_TARGET_LEN, + BPF_CORE_READ(proc_kernfs, name)); + barrier_var(cgroup_proc_length); + if (cgroup_proc_length <= KILL_TARGET_LEN) { + barrier_var(cgroup_proc_length); + kill_data->kill_target_cgroup_proc_length = cgroup_proc_length; + payload += cgroup_proc_length; + } + + bpf_stats_pre_submit_var_perf_event(&stats_ctx, &kill_data->meta); + unsigned long data_len = (void*)payload - (void*)kill_data; + data_len = data_len > sizeof(struct var_kill_data_t) + ? sizeof(struct var_kill_data_t) + : data_len; + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, kill_data, data_len); + } + } + bpf_map_delete_elem(&var_tpid_to_data, &tpid); +out: + bpf_stats_exit(&stats_ctx); + return 0; +} + +SEC("raw_tracepoint/sched_process_exec") +int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx) +{ + struct bpf_func_stats_ctx stats_ctx; + bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exec); + + struct linux_binprm* bprm = (struct linux_binprm*)ctx->args[2]; + u64 inode = BPF_CORE_READ(bprm, file, f_inode, i_ino); + + bool* should_filter_binprm = bpf_map_lookup_elem(&disallowed_exec_inodes, &inode); + if (should_filter_binprm != NULL) + goto out; + + int zero = 0; + struct var_exec_data_t* proc_exec_data = bpf_map_lookup_elem(&data_heap, &zero); + if (!proc_exec_data) + goto out; + + if (INODE_FILTER && inode != INODE_FILTER) + return 0; + + u32 pid = get_userspace_pid(); + struct task_struct* task = (struct task_struct*)bpf_get_current_task(); + + proc_exec_data->meta.type = EXEC_EVENT; + proc_exec_data->bin_path_length = 0; + proc_exec_data->cmdline_length = 0; + proc_exec_data->environment_length = 0; + void* payload = populate_var_metadata(&proc_exec_data->meta, task, pid, + proc_exec_data->payload); + payload = populate_cgroup_info(&proc_exec_data->cgroup_data, task, payload); + + struct task_struct* parent_task = BPF_CORE_READ(task, real_parent); + proc_exec_data->parent_pid = BPF_CORE_READ(parent_task, tgid); + proc_exec_data->parent_uid = BPF_CORE_READ(parent_task, real_cred, uid.val); + proc_exec_data->parent_exec_id = BPF_CORE_READ(parent_task, self_exec_id); + proc_exec_data->parent_start_time = BPF_CORE_READ(parent_task, start_time); + + const char* filename = BPF_CORE_READ(bprm, filename); + size_t bin_path_length = bpf_probe_read_str(payload, MAX_FILENAME_LEN, filename); + barrier_var(bin_path_length); + if (bin_path_length <= MAX_FILENAME_LEN) { + barrier_var(bin_path_length); + proc_exec_data->bin_path_length = bin_path_length; + payload += bin_path_length; + } + + void* arg_start = (void*)BPF_CORE_READ(task, mm, arg_start); + void* arg_end = (void*)BPF_CORE_READ(task, mm, arg_end); + unsigned int cmdline_length = probe_read_lim(payload, arg_start, + arg_end - arg_start, MAX_ARGS_LEN); + + if (cmdline_length <= MAX_ARGS_LEN) { + barrier_var(cmdline_length); + proc_exec_data->cmdline_length = cmdline_length; + payload += cmdline_length; + } + + if (READ_ENVIRON_FROM_EXEC) { + void* env_start = (void*)BPF_CORE_READ(task, mm, env_start); + void* env_end = (void*)BPF_CORE_READ(task, mm, env_end); + unsigned long env_len = probe_read_lim(payload, env_start, + env_end - env_start, MAX_ENVIRON_LEN); + if (cmdline_length <= MAX_ENVIRON_LEN) { + proc_exec_data->environment_length = env_len; + payload += env_len; + } + } + + bpf_stats_pre_submit_var_perf_event(&stats_ctx, &proc_exec_data->meta); + unsigned long data_len = payload - (void*)proc_exec_data; + data_len = data_len > sizeof(struct var_exec_data_t) + ? sizeof(struct var_exec_data_t) + : data_len; + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, proc_exec_data, data_len); +out: + bpf_stats_exit(&stats_ctx); + return 0; +} + +SEC("kretprobe/do_filp_open") +int kprobe_ret__do_filp_open(struct pt_regs* ctx) +{ + struct bpf_func_stats_ctx stats_ctx; + bpf_stats_enter(&stats_ctx, profiler_bpf_do_filp_open_ret); + + struct file* filp = (struct file*)PT_REGS_RC_CORE(ctx); + + if (filp == NULL || IS_ERR(filp)) + goto out; + unsigned int flags = BPF_CORE_READ(filp, f_flags); + if ((flags & (O_RDWR | O_WRONLY)) == 0) + goto out; + if ((flags & O_TMPFILE) > 0) + goto out; + struct inode* file_inode = BPF_CORE_READ(filp, f_inode); + umode_t mode = BPF_CORE_READ(file_inode, i_mode); + if (S_ISDIR(mode) || S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) || + S_ISSOCK(mode)) + goto out; + + struct dentry* filp_dentry = BPF_CORE_READ(filp, f_path.dentry); + u32 device_id = 0; + u64 file_ino = 0; + if (!is_dentry_allowed_for_filemod(filp_dentry, &device_id, &file_ino)) + goto out; + + int zero = 0; + struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero); + if (!filemod_data) + goto out; + + u32 pid = get_userspace_pid(); + struct task_struct* task = (struct task_struct*)bpf_get_current_task(); + + filemod_data->meta.type = FILEMOD_EVENT; + filemod_data->fmod_type = FMOD_OPEN; + filemod_data->dst_flags = flags; + filemod_data->src_inode = 0; + filemod_data->dst_inode = file_ino; + filemod_data->src_device_id = 0; + filemod_data->dst_device_id = device_id; + filemod_data->src_filepath_length = 0; + filemod_data->dst_filepath_length = 0; + + void* payload = populate_var_metadata(&filemod_data->meta, task, pid, + filemod_data->payload); + payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload); + + size_t len = read_absolute_file_path_from_dentry(filp_dentry, payload); + barrier_var(len); + if (len <= MAX_FILEPATH_LENGTH) { + barrier_var(len); + payload += len; + filemod_data->dst_filepath_length = len; + } + bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta); + unsigned long data_len = payload - (void*)filemod_data; + data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len; + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len); +out: + bpf_stats_exit(&stats_ctx); + return 0; +} + +SEC("kprobe/vfs_link") +int BPF_KPROBE(kprobe__vfs_link, + struct dentry* old_dentry, struct inode* dir, + struct dentry* new_dentry, struct inode** delegated_inode) +{ + struct bpf_func_stats_ctx stats_ctx; + bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link); + + u32 src_device_id = 0; + u64 src_file_ino = 0; + u32 dst_device_id = 0; + u64 dst_file_ino = 0; + if (!is_dentry_allowed_for_filemod(old_dentry, &src_device_id, &src_file_ino) && + !is_dentry_allowed_for_filemod(new_dentry, &dst_device_id, &dst_file_ino)) + goto out; + + int zero = 0; + struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero); + if (!filemod_data) + goto out; + + u32 pid = get_userspace_pid(); + struct task_struct* task = (struct task_struct*)bpf_get_current_task(); + + filemod_data->meta.type = FILEMOD_EVENT; + filemod_data->fmod_type = FMOD_LINK; + filemod_data->dst_flags = 0; + filemod_data->src_inode = src_file_ino; + filemod_data->dst_inode = dst_file_ino; + filemod_data->src_device_id = src_device_id; + filemod_data->dst_device_id = dst_device_id; + filemod_data->src_filepath_length = 0; + filemod_data->dst_filepath_length = 0; + + void* payload = populate_var_metadata(&filemod_data->meta, task, pid, + filemod_data->payload); + payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload); + + size_t len = read_absolute_file_path_from_dentry(old_dentry, payload); + barrier_var(len); + if (len <= MAX_FILEPATH_LENGTH) { + barrier_var(len); + payload += len; + filemod_data->src_filepath_length = len; + } + + len = read_absolute_file_path_from_dentry(new_dentry, payload); + barrier_var(len); + if (len <= MAX_FILEPATH_LENGTH) { + barrier_var(len); + payload += len; + filemod_data->dst_filepath_length = len; + } + + bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta); + unsigned long data_len = payload - (void*)filemod_data; + data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len; + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len); +out: + bpf_stats_exit(&stats_ctx); + return 0; +} + +SEC("kprobe/vfs_symlink") +int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry, + const char* oldname) +{ + struct bpf_func_stats_ctx stats_ctx; + bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_symlink); + + u32 dst_device_id = 0; + u64 dst_file_ino = 0; + if (!is_dentry_allowed_for_filemod(dentry, &dst_device_id, &dst_file_ino)) + goto out; + + int zero = 0; + struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero); + if (!filemod_data) + goto out; + + u32 pid = get_userspace_pid(); + struct task_struct* task = (struct task_struct*)bpf_get_current_task(); + + filemod_data->meta.type = FILEMOD_EVENT; + filemod_data->fmod_type = FMOD_SYMLINK; + filemod_data->dst_flags = 0; + filemod_data->src_inode = 0; + filemod_data->dst_inode = dst_file_ino; + filemod_data->src_device_id = 0; + filemod_data->dst_device_id = dst_device_id; + filemod_data->src_filepath_length = 0; + filemod_data->dst_filepath_length = 0; + + void* payload = populate_var_metadata(&filemod_data->meta, task, pid, + filemod_data->payload); + payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload); + + size_t len = bpf_probe_read_str(payload, MAX_FILEPATH_LENGTH, oldname); + barrier_var(len); + if (len <= MAX_FILEPATH_LENGTH) { + barrier_var(len); + payload += len; + filemod_data->src_filepath_length = len; + } + len = read_absolute_file_path_from_dentry(dentry, payload); + barrier_var(len); + if (len <= MAX_FILEPATH_LENGTH) { + barrier_var(len); + payload += len; + filemod_data->dst_filepath_length = len; + } + bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta); + unsigned long data_len = payload - (void*)filemod_data; + data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len; + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len); +out: + bpf_stats_exit(&stats_ctx); + return 0; +} + +SEC("raw_tracepoint/sched_process_fork") +int raw_tracepoint__sched_process_fork(struct bpf_raw_tracepoint_args* ctx) +{ + struct bpf_func_stats_ctx stats_ctx; + bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_fork); + + int zero = 0; + struct var_fork_data_t* fork_data = bpf_map_lookup_elem(&data_heap, &zero); + if (!fork_data) + goto out; + + struct task_struct* parent = (struct task_struct*)ctx->args[0]; + struct task_struct* child = (struct task_struct*)ctx->args[1]; + fork_data->meta.type = FORK_EVENT; + + void* payload = populate_var_metadata(&fork_data->meta, child, + BPF_CORE_READ(child, pid), fork_data->payload); + fork_data->parent_pid = BPF_CORE_READ(parent, pid); + fork_data->parent_exec_id = BPF_CORE_READ(parent, self_exec_id); + fork_data->parent_start_time = BPF_CORE_READ(parent, start_time); + bpf_stats_pre_submit_var_perf_event(&stats_ctx, &fork_data->meta); + + unsigned long data_len = payload - (void*)fork_data; + data_len = data_len > sizeof(*fork_data) ? sizeof(*fork_data) : data_len; + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, fork_data, data_len); +out: + bpf_stats_exit(&stats_ctx); + return 0; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/profiler1.c b/tools/testing/selftests/bpf/progs/profiler1.c new file mode 100644 index 000000000000..4df9088bfc00 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/profiler1.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var)) +#define UNROLL +#define INLINE __always_inline +#include "profiler.inc.h" diff --git a/tools/testing/selftests/bpf/progs/profiler2.c b/tools/testing/selftests/bpf/progs/profiler2.c new file mode 100644 index 000000000000..0f32a3cbf556 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/profiler2.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#define barrier_var(var) /**/ +/* undef #define UNROLL */ +#define INLINE /**/ +#include "profiler.inc.h" diff --git a/tools/testing/selftests/bpf/progs/profiler3.c b/tools/testing/selftests/bpf/progs/profiler3.c new file mode 100644 index 000000000000..6249fc31ccb0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/profiler3.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#define barrier_var(var) /**/ +#define UNROLL +#define INLINE __noinline +#include "profiler.inc.h" diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h index cc615b82b56e..2fb7adafb6b6 100644 --- a/tools/testing/selftests/bpf/progs/pyperf.h +++ b/tools/testing/selftests/bpf/progs/pyperf.h @@ -67,7 +67,12 @@ typedef struct { void* co_name; // PyCodeObject.co_name } FrameData; -static __always_inline void *get_thread_state(void *tls_base, PidData *pidData) +#ifdef SUBPROGS +__noinline +#else +__always_inline +#endif +static void *get_thread_state(void *tls_base, PidData *pidData) { void* thread_state; int key; @@ -155,7 +160,9 @@ struct { } stackmap SEC(".maps"); #ifdef GLOBAL_FUNC -__attribute__((noinline)) +__noinline +#elif defined(SUBPROGS) +static __noinline #else static __always_inline #endif diff --git a/tools/testing/selftests/bpf/progs/pyperf_subprogs.c b/tools/testing/selftests/bpf/progs/pyperf_subprogs.c new file mode 100644 index 000000000000..60e27a7f0cca --- /dev/null +++ b/tools/testing/selftests/bpf/progs/pyperf_subprogs.c @@ -0,0 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#define STACK_MAX_LEN 50 +#define SUBPROGS +#include "pyperf.h" diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h index ad61b722a9de..7de534f38c3f 100644 --- a/tools/testing/selftests/bpf/progs/strobemeta.h +++ b/tools/testing/selftests/bpf/progs/strobemeta.h @@ -266,8 +266,12 @@ struct tls_index { uint64_t offset; }; -static __always_inline void *calc_location(struct strobe_value_loc *loc, - void *tls_base) +#ifdef SUBPROGS +__noinline +#else +__always_inline +#endif +static void *calc_location(struct strobe_value_loc *loc, void *tls_base) { /* * tls_mode value is: @@ -327,10 +331,15 @@ static __always_inline void *calc_location(struct strobe_value_loc *loc, : NULL; } -static __always_inline void read_int_var(struct strobemeta_cfg *cfg, - size_t idx, void *tls_base, - struct strobe_value_generic *value, - struct strobemeta_payload *data) +#ifdef SUBPROGS +__noinline +#else +__always_inline +#endif +static void read_int_var(struct strobemeta_cfg *cfg, + size_t idx, void *tls_base, + struct strobe_value_generic *value, + struct strobemeta_payload *data) { void *location = calc_location(&cfg->int_locs[idx], tls_base); if (!location) @@ -440,8 +449,13 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, * read_strobe_meta returns NULL, if no metadata was read; otherwise returns * pointer to *right after* payload ends */ -static __always_inline void *read_strobe_meta(struct task_struct *task, - struct strobemeta_payload *data) +#ifdef SUBPROGS +__noinline +#else +__always_inline +#endif +static void *read_strobe_meta(struct task_struct *task, + struct strobemeta_payload *data) { pid_t pid = bpf_get_current_pid_tgid() >> 32; struct strobe_value_generic value = {0}; diff --git a/tools/testing/selftests/bpf/progs/strobemeta_subprogs.c b/tools/testing/selftests/bpf/progs/strobemeta_subprogs.c new file mode 100644 index 000000000000..b6c01f8fc559 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/strobemeta_subprogs.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +// Copyright (c) 2019 Facebook + +#define STROBE_MAX_INTS 2 +#define STROBE_MAX_STRS 25 +#define STROBE_MAX_MAPS 13 +#define STROBE_MAX_MAP_ENTRIES 20 +#define NO_UNROLL +#define SUBPROGS +#include "strobemeta.h" diff --git a/tools/testing/selftests/bpf/progs/tailcall1.c b/tools/testing/selftests/bpf/progs/tailcall1.c index 1f407e65ae52..7115bcefbe8a 100644 --- a/tools/testing/selftests/bpf/progs/tailcall1.c +++ b/tools/testing/selftests/bpf/progs/tailcall1.c @@ -26,20 +26,20 @@ int entry(struct __sk_buff *skb) /* Multiple locations to make sure we patch * all of them. */ - bpf_tail_call(skb, &jmp_table, 0); - bpf_tail_call(skb, &jmp_table, 0); - bpf_tail_call(skb, &jmp_table, 0); - bpf_tail_call(skb, &jmp_table, 0); - - bpf_tail_call(skb, &jmp_table, 1); - bpf_tail_call(skb, &jmp_table, 1); - bpf_tail_call(skb, &jmp_table, 1); - bpf_tail_call(skb, &jmp_table, 1); - - bpf_tail_call(skb, &jmp_table, 2); - bpf_tail_call(skb, &jmp_table, 2); - bpf_tail_call(skb, &jmp_table, 2); - bpf_tail_call(skb, &jmp_table, 2); + bpf_tail_call_static(skb, &jmp_table, 0); + bpf_tail_call_static(skb, &jmp_table, 0); + bpf_tail_call_static(skb, &jmp_table, 0); + bpf_tail_call_static(skb, &jmp_table, 0); + + bpf_tail_call_static(skb, &jmp_table, 1); + bpf_tail_call_static(skb, &jmp_table, 1); + bpf_tail_call_static(skb, &jmp_table, 1); + bpf_tail_call_static(skb, &jmp_table, 1); + + bpf_tail_call_static(skb, &jmp_table, 2); + bpf_tail_call_static(skb, &jmp_table, 2); + bpf_tail_call_static(skb, &jmp_table, 2); + bpf_tail_call_static(skb, &jmp_table, 2); return 3; } diff --git a/tools/testing/selftests/bpf/progs/tailcall2.c b/tools/testing/selftests/bpf/progs/tailcall2.c index a093e739cf0e..0431e4fe7efd 100644 --- a/tools/testing/selftests/bpf/progs/tailcall2.c +++ b/tools/testing/selftests/bpf/progs/tailcall2.c @@ -13,14 +13,14 @@ struct { SEC("classifier/0") int bpf_func_0(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 1); + bpf_tail_call_static(skb, &jmp_table, 1); return 0; } SEC("classifier/1") int bpf_func_1(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 2); + bpf_tail_call_static(skb, &jmp_table, 2); return 1; } @@ -33,25 +33,25 @@ int bpf_func_2(struct __sk_buff *skb) SEC("classifier/3") int bpf_func_3(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 4); + bpf_tail_call_static(skb, &jmp_table, 4); return 3; } SEC("classifier/4") int bpf_func_4(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 3); + bpf_tail_call_static(skb, &jmp_table, 3); return 4; } SEC("classifier") int entry(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 0); + bpf_tail_call_static(skb, &jmp_table, 0); /* Check multi-prog update. */ - bpf_tail_call(skb, &jmp_table, 2); + bpf_tail_call_static(skb, &jmp_table, 2); /* Check tail call limit. */ - bpf_tail_call(skb, &jmp_table, 3); + bpf_tail_call_static(skb, &jmp_table, 3); return 3; } diff --git a/tools/testing/selftests/bpf/progs/tailcall3.c b/tools/testing/selftests/bpf/progs/tailcall3.c index cabda877cf0a..739dc2a51e74 100644 --- a/tools/testing/selftests/bpf/progs/tailcall3.c +++ b/tools/testing/selftests/bpf/progs/tailcall3.c @@ -16,14 +16,14 @@ SEC("classifier/0") int bpf_func_0(struct __sk_buff *skb) { count++; - bpf_tail_call(skb, &jmp_table, 0); + bpf_tail_call_static(skb, &jmp_table, 0); return 1; } SEC("classifier") int entry(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 0); + bpf_tail_call_static(skb, &jmp_table, 0); return 0; } diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c new file mode 100644 index 000000000000..0103f3dd9f02 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 2); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + +#define TAIL_FUNC(x) \ + SEC("classifier/" #x) \ + int bpf_func_##x(struct __sk_buff *skb) \ + { \ + return x; \ + } +TAIL_FUNC(0) +TAIL_FUNC(1) + +static __noinline +int subprog_tail(struct __sk_buff *skb) +{ + bpf_tail_call_static(skb, &jmp_table, 0); + + return skb->len * 2; +} + +SEC("classifier") +int entry(struct __sk_buff *skb) +{ + bpf_tail_call_static(skb, &jmp_table, 1); + + return subprog_tail(skb); +} + +char __license[] SEC("license") = "GPL"; +int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c new file mode 100644 index 000000000000..7b1c04183824 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_legacy.h" + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + +static __noinline +int subprog_tail(struct __sk_buff *skb) +{ + if (load_byte(skb, 0)) + bpf_tail_call_static(skb, &jmp_table, 1); + else + bpf_tail_call_static(skb, &jmp_table, 0); + return 1; +} + +static volatile int count; + +SEC("classifier/0") +int bpf_func_0(struct __sk_buff *skb) +{ + count++; + return subprog_tail(skb); +} + +SEC("classifier") +int entry(struct __sk_buff *skb) +{ + bpf_tail_call_static(skb, &jmp_table, 0); + + return 0; +} + +char __license[] SEC("license") = "GPL"; +int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c new file mode 100644 index 000000000000..0d5482bea6c9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_legacy.h" + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 2); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + +__noinline +int subprog_tail2(struct __sk_buff *skb) +{ + volatile char arr[64] = {}; + + if (load_word(skb, 0) || load_half(skb, 0)) + bpf_tail_call_static(skb, &jmp_table, 10); + else + bpf_tail_call_static(skb, &jmp_table, 1); + + return skb->len; +} + +static __noinline +int subprog_tail(struct __sk_buff *skb) +{ + volatile char arr[64] = {}; + + bpf_tail_call_static(skb, &jmp_table, 0); + + return skb->len * 2; +} + +SEC("classifier/0") +int bpf_func_0(struct __sk_buff *skb) +{ + volatile char arr[128] = {}; + + return subprog_tail2(skb); +} + +SEC("classifier/1") +int bpf_func_1(struct __sk_buff *skb) +{ + volatile char arr[128] = {}; + + return skb->len * 3; +} + +SEC("classifier") +int entry(struct __sk_buff *skb) +{ + volatile char arr[128] = {}; + + return subprog_tail(skb); +} + +char __license[] SEC("license") = "GPL"; +int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c new file mode 100644 index 000000000000..9a1b166b7fbe --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 3); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + +static volatile int count; + +__noinline +int subprog_tail_2(struct __sk_buff *skb) +{ + bpf_tail_call_static(skb, &jmp_table, 2); + return skb->len * 3; +} + +__noinline +int subprog_tail_1(struct __sk_buff *skb) +{ + bpf_tail_call_static(skb, &jmp_table, 1); + return skb->len * 2; +} + +__noinline +int subprog_tail(struct __sk_buff *skb) +{ + bpf_tail_call_static(skb, &jmp_table, 0); + return skb->len; +} + +SEC("classifier/1") +int bpf_func_1(struct __sk_buff *skb) +{ + return subprog_tail_2(skb); +} + +SEC("classifier/2") +int bpf_func_2(struct __sk_buff *skb) +{ + count++; + return subprog_tail_2(skb); +} + +SEC("classifier/0") +int bpf_func_0(struct __sk_buff *skb) +{ + return subprog_tail_1(skb); +} + +SEC("classifier") +int entry(struct __sk_buff *skb) +{ + return subprog_tail(skb); +} + +char __license[] SEC("license") = "GPL"; +int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c index e5093796be97..c1e0c8c7c55f 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c +++ b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c @@ -11,6 +11,13 @@ struct inner_map { } inner_map1 SEC(".maps"), inner_map2 SEC(".maps"); +struct inner_map_sz2 { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 2); + __type(key, int); + __type(value, int); +} inner_map_sz2 SEC(".maps"); + struct outer_arr { __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); __uint(max_entries, 3); @@ -34,6 +41,43 @@ struct outer_arr { .values = { (void *)&inner_map1, 0, (void *)&inner_map2 }, }; +struct inner_map_sz3 { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(map_flags, BPF_F_INNER_MAP); + __uint(max_entries, 3); + __type(key, int); + __type(value, int); +} inner_map3 SEC(".maps"), + inner_map4 SEC(".maps"); + +struct inner_map_sz4 { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(map_flags, BPF_F_INNER_MAP); + __uint(max_entries, 5); + __type(key, int); + __type(value, int); +} inner_map5 SEC(".maps"); + +struct outer_arr_dyn { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 3); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); + __array(values, struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(map_flags, BPF_F_INNER_MAP); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); + }); +} outer_arr_dyn SEC(".maps") = { + .values = { + [0] = (void *)&inner_map3, + [1] = (void *)&inner_map4, + [2] = (void *)&inner_map5, + }, +}; + struct outer_hash { __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); __uint(max_entries, 5); @@ -50,6 +94,30 @@ struct outer_hash { }, }; +struct sockarr_sz1 { + __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); +} sockarr_sz1 SEC(".maps"); + +struct sockarr_sz2 { + __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY); + __uint(max_entries, 2); + __type(key, int); + __type(value, int); +} sockarr_sz2 SEC(".maps"); + +struct outer_sockarr_sz1 { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 1); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); + __array(values, struct sockarr_sz1); +} outer_sockarr SEC(".maps") = { + .values = { (void *)&sockarr_sz1 }, +}; + int input = 0; SEC("raw_tp/sys_enter") @@ -70,6 +138,12 @@ int handle__sys_enter(void *ctx) val = input + 1; bpf_map_update_elem(inner_map, &key, &val, 0); + inner_map = bpf_map_lookup_elem(&outer_arr_dyn, &key); + if (!inner_map) + return 1; + val = input + 2; + bpf_map_update_elem(inner_map, &key, &val, 0); + return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c new file mode 100644 index 000000000000..9a6b85dd52d2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c @@ -0,0 +1,174 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include <string.h> +#include <errno.h> +#include <netinet/in.h> +#include <linux/stddef.h> +#include <linux/bpf.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> +#include <linux/if_ether.h> +#include <linux/pkt_cls.h> + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> +#include "bpf_tcp_helpers.h" + +struct sockaddr_in6 srv_sa6 = {}; +__u16 listen_tp_sport = 0; +__u16 req_sk_sport = 0; +__u32 recv_cookie = 0; +__u32 gen_cookie = 0; +__u32 linum = 0; + +#define LOG() ({ if (!linum) linum = __LINE__; }) + +static void test_syncookie_helper(struct ipv6hdr *ip6h, struct tcphdr *th, + struct tcp_sock *tp, + struct __sk_buff *skb) +{ + if (th->syn) { + __s64 mss_cookie; + void *data_end; + + data_end = (void *)(long)(skb->data_end); + + if (th->doff * 4 != 40) { + LOG(); + return; + } + + if ((void *)th + 40 > data_end) { + LOG(); + return; + } + + mss_cookie = bpf_tcp_gen_syncookie(tp, ip6h, sizeof(*ip6h), + th, 40); + if (mss_cookie < 0) { + if (mss_cookie != -ENOENT) + LOG(); + } else { + gen_cookie = (__u32)mss_cookie; + } + } else if (gen_cookie) { + /* It was in cookie mode */ + int ret = bpf_tcp_check_syncookie(tp, ip6h, sizeof(*ip6h), + th, sizeof(*th)); + + if (ret < 0) { + if (ret != -ENOENT) + LOG(); + } else { + recv_cookie = bpf_ntohl(th->ack_seq) - 1; + } + } +} + +static int handle_ip6_tcp(struct ipv6hdr *ip6h, struct __sk_buff *skb) +{ + struct bpf_sock_tuple *tuple; + struct bpf_sock *bpf_skc; + unsigned int tuple_len; + struct tcphdr *th; + void *data_end; + + data_end = (void *)(long)(skb->data_end); + + th = (struct tcphdr *)(ip6h + 1); + if (th + 1 > data_end) + return TC_ACT_OK; + + /* Is it the testing traffic? */ + if (th->dest != srv_sa6.sin6_port) + return TC_ACT_OK; + + tuple_len = sizeof(tuple->ipv6); + tuple = (struct bpf_sock_tuple *)&ip6h->saddr; + if ((void *)tuple + tuple_len > data_end) { + LOG(); + return TC_ACT_OK; + } + + bpf_skc = bpf_skc_lookup_tcp(skb, tuple, tuple_len, + BPF_F_CURRENT_NETNS, 0); + if (!bpf_skc) { + LOG(); + return TC_ACT_OK; + } + + if (bpf_skc->state == BPF_TCP_NEW_SYN_RECV) { + struct request_sock *req_sk; + + req_sk = (struct request_sock *)bpf_skc_to_tcp_request_sock(bpf_skc); + if (!req_sk) { + LOG(); + goto release; + } + + if (bpf_sk_assign(skb, req_sk, 0)) { + LOG(); + goto release; + } + + req_sk_sport = req_sk->__req_common.skc_num; + + bpf_sk_release(req_sk); + return TC_ACT_OK; + } else if (bpf_skc->state == BPF_TCP_LISTEN) { + struct tcp_sock *tp; + + tp = bpf_skc_to_tcp_sock(bpf_skc); + if (!tp) { + LOG(); + goto release; + } + + if (bpf_sk_assign(skb, tp, 0)) { + LOG(); + goto release; + } + + listen_tp_sport = tp->inet_conn.icsk_inet.sk.__sk_common.skc_num; + + test_syncookie_helper(ip6h, th, tp, skb); + bpf_sk_release(tp); + return TC_ACT_OK; + } + + if (bpf_sk_assign(skb, bpf_skc, 0)) + LOG(); + +release: + bpf_sk_release(bpf_skc); + return TC_ACT_OK; +} + +SEC("classifier/ingress") +int cls_ingress(struct __sk_buff *skb) +{ + struct ipv6hdr *ip6h; + struct ethhdr *eth; + void *data_end; + + data_end = (void *)(long)(skb->data_end); + + eth = (struct ethhdr *)(long)(skb->data); + if (eth + 1 > data_end) + return TC_ACT_OK; + + if (eth->h_proto != bpf_htons(ETH_P_IPV6)) + return TC_ACT_OK; + + ip6h = (struct ipv6hdr *)(eth + 1); + if (ip6h + 1 > data_end) + return TC_ACT_OK; + + if (ip6h->nexthdr == IPPROTO_TCP) + return handle_ip6_tcp(ip6h, skb); + + return TC_ACT_OK; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c index f0b72e86bee5..c9f8464996ea 100644 --- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c @@ -22,6 +22,12 @@ #include "test_cls_redirect.h" +#ifdef SUBPROGS +#define INLINING __noinline +#else +#define INLINING __always_inline +#endif + #define offsetofend(TYPE, MEMBER) \ (offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER))) @@ -125,7 +131,7 @@ typedef struct buf { uint8_t *const tail; } buf_t; -static size_t buf_off(const buf_t *buf) +static __always_inline size_t buf_off(const buf_t *buf) { /* Clang seems to optimize constructs like * a - b + c @@ -145,7 +151,7 @@ static size_t buf_off(const buf_t *buf) return off; } -static bool buf_copy(buf_t *buf, void *dst, size_t len) +static __always_inline bool buf_copy(buf_t *buf, void *dst, size_t len) { if (bpf_skb_load_bytes(buf->skb, buf_off(buf), dst, len)) { return false; @@ -155,7 +161,7 @@ static bool buf_copy(buf_t *buf, void *dst, size_t len) return true; } -static bool buf_skip(buf_t *buf, const size_t len) +static __always_inline bool buf_skip(buf_t *buf, const size_t len) { /* Check whether off + len is valid in the non-linear part. */ if (buf_off(buf) + len > buf->skb->len) { @@ -173,7 +179,7 @@ static bool buf_skip(buf_t *buf, const size_t len) * If scratch is not NULL, the function will attempt to load non-linear * data via bpf_skb_load_bytes. On success, scratch is returned. */ -static void *buf_assign(buf_t *buf, const size_t len, void *scratch) +static __always_inline void *buf_assign(buf_t *buf, const size_t len, void *scratch) { if (buf->head + len > buf->tail) { if (scratch == NULL) { @@ -188,7 +194,7 @@ static void *buf_assign(buf_t *buf, const size_t len, void *scratch) return ptr; } -static bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4) +static INLINING bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4) { if (ipv4->ihl <= 5) { return true; @@ -197,13 +203,13 @@ static bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4) return buf_skip(buf, (ipv4->ihl - 5) * 4); } -static bool ipv4_is_fragment(const struct iphdr *ip) +static INLINING bool ipv4_is_fragment(const struct iphdr *ip) { uint16_t frag_off = ip->frag_off & bpf_htons(IP_OFFSET_MASK); return (ip->frag_off & bpf_htons(IP_MF)) != 0 || frag_off > 0; } -static struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch) +static __always_inline struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch) { struct iphdr *ipv4 = buf_assign(pkt, sizeof(*ipv4), scratch); if (ipv4 == NULL) { @@ -222,7 +228,7 @@ static struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch) } /* Parse the L4 ports from a packet, assuming a layout like TCP or UDP. */ -static bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports) +static INLINING bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports) { if (!buf_copy(pkt, ports, sizeof(*ports))) { return false; @@ -237,7 +243,7 @@ static bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports) return true; } -static uint16_t pkt_checksum_fold(uint32_t csum) +static INLINING uint16_t pkt_checksum_fold(uint32_t csum) { /* The highest reasonable value for an IPv4 header * checksum requires two folds, so we just do that always. @@ -247,7 +253,7 @@ static uint16_t pkt_checksum_fold(uint32_t csum) return (uint16_t)~csum; } -static void pkt_ipv4_checksum(struct iphdr *iph) +static INLINING void pkt_ipv4_checksum(struct iphdr *iph) { iph->check = 0; @@ -268,10 +274,11 @@ static void pkt_ipv4_checksum(struct iphdr *iph) iph->check = pkt_checksum_fold(acc); } -static bool pkt_skip_ipv6_extension_headers(buf_t *pkt, - const struct ipv6hdr *ipv6, - uint8_t *upper_proto, - bool *is_fragment) +static INLINING +bool pkt_skip_ipv6_extension_headers(buf_t *pkt, + const struct ipv6hdr *ipv6, + uint8_t *upper_proto, + bool *is_fragment) { /* We understand five extension headers. * https://tools.ietf.org/html/rfc8200#section-4.1 states that all @@ -336,7 +343,7 @@ static bool pkt_skip_ipv6_extension_headers(buf_t *pkt, * scratch is allocated on the stack. However, this usage should be safe since * it's the callers stack after all. */ -static inline __attribute__((__always_inline__)) struct ipv6hdr * +static __always_inline struct ipv6hdr * pkt_parse_ipv6(buf_t *pkt, struct ipv6hdr *scratch, uint8_t *proto, bool *is_fragment) { @@ -354,20 +361,20 @@ pkt_parse_ipv6(buf_t *pkt, struct ipv6hdr *scratch, uint8_t *proto, /* Global metrics, per CPU */ -struct bpf_map_def metrics_map SEC("maps") = { - .type = BPF_MAP_TYPE_PERCPU_ARRAY, - .key_size = sizeof(unsigned int), - .value_size = sizeof(metrics_t), - .max_entries = 1, -}; - -static metrics_t *get_global_metrics(void) +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, unsigned int); + __type(value, metrics_t); +} metrics_map SEC(".maps"); + +static INLINING metrics_t *get_global_metrics(void) { uint64_t key = 0; return bpf_map_lookup_elem(&metrics_map, &key); } -static ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap) +static INLINING ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap) { const int payload_off = sizeof(*encap) + @@ -388,8 +395,8 @@ static ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap) return bpf_redirect(skb->ifindex, BPF_F_INGRESS); } -static ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap, - struct in_addr *next_hop, metrics_t *metrics) +static INLINING ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap, + struct in_addr *next_hop, metrics_t *metrics) { metrics->forwarded_packets_total_gre++; @@ -509,8 +516,8 @@ static ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap, return bpf_redirect(skb->ifindex, 0); } -static ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap, - struct in_addr *next_hop, metrics_t *metrics) +static INLINING ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap, + struct in_addr *next_hop, metrics_t *metrics) { /* swap L2 addresses */ /* This assumes that packets are received from a router. @@ -546,7 +553,7 @@ static ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap, return bpf_redirect(skb->ifindex, 0); } -static ret_t skip_next_hops(buf_t *pkt, int n) +static INLINING ret_t skip_next_hops(buf_t *pkt, int n) { switch (n) { case 1: @@ -566,8 +573,8 @@ static ret_t skip_next_hops(buf_t *pkt, int n) * pkt is positioned just after the variable length GLB header * iff the call is successful. */ -static ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap, - struct in_addr *next_hop) +static INLINING ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap, + struct in_addr *next_hop) { if (encap->unigue.next_hop > encap->unigue.hop_count) { return TC_ACT_SHOT; @@ -601,8 +608,8 @@ static ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap, * return value, and calling code works while still being "generic" to * IPv4 and IPv6. */ -static uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph, - uint64_t iphlen, uint16_t sport, uint16_t dport) +static INLINING uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph, + uint64_t iphlen, uint16_t sport, uint16_t dport) { switch (iphlen) { case sizeof(struct iphdr): { @@ -630,9 +637,9 @@ static uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph, } } -static verdict_t classify_tcp(struct __sk_buff *skb, - struct bpf_sock_tuple *tuple, uint64_t tuplen, - void *iph, struct tcphdr *tcp) +static INLINING verdict_t classify_tcp(struct __sk_buff *skb, + struct bpf_sock_tuple *tuple, uint64_t tuplen, + void *iph, struct tcphdr *tcp) { struct bpf_sock *sk = bpf_skc_lookup_tcp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0); @@ -663,8 +670,8 @@ static verdict_t classify_tcp(struct __sk_buff *skb, return UNKNOWN; } -static verdict_t classify_udp(struct __sk_buff *skb, - struct bpf_sock_tuple *tuple, uint64_t tuplen) +static INLINING verdict_t classify_udp(struct __sk_buff *skb, + struct bpf_sock_tuple *tuple, uint64_t tuplen) { struct bpf_sock *sk = bpf_sk_lookup_udp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0); @@ -681,9 +688,9 @@ static verdict_t classify_udp(struct __sk_buff *skb, return UNKNOWN; } -static verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto, - struct bpf_sock_tuple *tuple, uint64_t tuplen, - metrics_t *metrics) +static INLINING verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto, + struct bpf_sock_tuple *tuple, uint64_t tuplen, + metrics_t *metrics) { switch (proto) { case IPPROTO_TCP: @@ -698,7 +705,7 @@ static verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto, } } -static verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics) +static INLINING verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics) { struct icmphdr icmp; if (!buf_copy(pkt, &icmp, sizeof(icmp))) { @@ -745,7 +752,7 @@ static verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics) sizeof(tuple.ipv4), metrics); } -static verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics) +static INLINING verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics) { struct icmp6hdr icmp6; if (!buf_copy(pkt, &icmp6, sizeof(icmp6))) { @@ -797,8 +804,8 @@ static verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics) metrics); } -static verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen, - metrics_t *metrics) +static INLINING verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen, + metrics_t *metrics) { metrics->l4_protocol_packets_total_tcp++; @@ -819,8 +826,8 @@ static verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen, return classify_tcp(pkt->skb, &tuple, tuplen, iph, tcp); } -static verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen, - metrics_t *metrics) +static INLINING verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen, + metrics_t *metrics) { metrics->l4_protocol_packets_total_udp++; @@ -837,7 +844,7 @@ static verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen, return classify_udp(pkt->skb, &tuple, tuplen); } -static verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics) +static INLINING verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics) { metrics->l3_protocol_packets_total_ipv4++; @@ -874,7 +881,7 @@ static verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics) } } -static verdict_t process_ipv6(buf_t *pkt, metrics_t *metrics) +static INLINING verdict_t process_ipv6(buf_t *pkt, metrics_t *metrics) { metrics->l3_protocol_packets_total_ipv6++; diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect_subprogs.c b/tools/testing/selftests/bpf/progs/test_cls_redirect_subprogs.c new file mode 100644 index 000000000000..eed26b70e3a2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect_subprogs.c @@ -0,0 +1,2 @@ +#define SUBPROGS +#include "test_cls_redirect.c" diff --git a/tools/testing/selftests/bpf/progs/test_core_autosize.c b/tools/testing/selftests/bpf/progs/test_core_autosize.c new file mode 100644 index 000000000000..44f5aa2e8956 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_autosize.c @@ -0,0 +1,172 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include <linux/bpf.h> +#include <stdint.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +char _license[] SEC("license") = "GPL"; + +/* fields of exactly the same size */ +struct test_struct___samesize { + void *ptr; + unsigned long long val1; + unsigned int val2; + unsigned short val3; + unsigned char val4; +} __attribute((preserve_access_index)); + +/* unsigned fields that have to be downsized by libbpf */ +struct test_struct___downsize { + void *ptr; + unsigned long val1; + unsigned long val2; + unsigned long val3; + unsigned long val4; + /* total sz: 40 */ +} __attribute__((preserve_access_index)); + +/* fields with signed integers of wrong size, should be rejected */ +struct test_struct___signed { + void *ptr; + long val1; + long val2; + long val3; + long val4; +} __attribute((preserve_access_index)); + +/* real layout and sizes according to test's (32-bit) BTF */ +struct test_struct___real { + unsigned int ptr; /* can't use `void *`, it is always 8 byte in BPF target */ + unsigned int val2; + unsigned long long val1; + unsigned short val3; + unsigned char val4; + unsigned char _pad; + /* total sz: 20 */ +}; + +struct test_struct___real input = { + .ptr = 0x01020304, + .val1 = 0x1020304050607080, + .val2 = 0x0a0b0c0d, + .val3 = 0xfeed, + .val4 = 0xb9, + ._pad = 0xff, /* make sure no accidental zeros are present */ +}; + +unsigned long long ptr_samesized = 0; +unsigned long long val1_samesized = 0; +unsigned long long val2_samesized = 0; +unsigned long long val3_samesized = 0; +unsigned long long val4_samesized = 0; +struct test_struct___real output_samesized = {}; + +unsigned long long ptr_downsized = 0; +unsigned long long val1_downsized = 0; +unsigned long long val2_downsized = 0; +unsigned long long val3_downsized = 0; +unsigned long long val4_downsized = 0; +struct test_struct___real output_downsized = {}; + +unsigned long long ptr_probed = 0; +unsigned long long val1_probed = 0; +unsigned long long val2_probed = 0; +unsigned long long val3_probed = 0; +unsigned long long val4_probed = 0; + +unsigned long long ptr_signed = 0; +unsigned long long val1_signed = 0; +unsigned long long val2_signed = 0; +unsigned long long val3_signed = 0; +unsigned long long val4_signed = 0; +struct test_struct___real output_signed = {}; + +SEC("raw_tp/sys_exit") +int handle_samesize(void *ctx) +{ + struct test_struct___samesize *in = (void *)&input; + struct test_struct___samesize *out = (void *)&output_samesized; + + ptr_samesized = (unsigned long long)in->ptr; + val1_samesized = in->val1; + val2_samesized = in->val2; + val3_samesized = in->val3; + val4_samesized = in->val4; + + out->ptr = in->ptr; + out->val1 = in->val1; + out->val2 = in->val2; + out->val3 = in->val3; + out->val4 = in->val4; + + return 0; +} + +SEC("raw_tp/sys_exit") +int handle_downsize(void *ctx) +{ + struct test_struct___downsize *in = (void *)&input; + struct test_struct___downsize *out = (void *)&output_downsized; + + ptr_downsized = (unsigned long long)in->ptr; + val1_downsized = in->val1; + val2_downsized = in->val2; + val3_downsized = in->val3; + val4_downsized = in->val4; + + out->ptr = in->ptr; + out->val1 = in->val1; + out->val2 = in->val2; + out->val3 = in->val3; + out->val4 = in->val4; + + return 0; +} + +SEC("raw_tp/sys_enter") +int handle_probed(void *ctx) +{ + struct test_struct___downsize *in = (void *)&input; + __u64 tmp; + + tmp = 0; + bpf_core_read(&tmp, bpf_core_field_size(in->ptr), &in->ptr); + ptr_probed = tmp; + + tmp = 0; + bpf_core_read(&tmp, bpf_core_field_size(in->val1), &in->val1); + val1_probed = tmp; + + tmp = 0; + bpf_core_read(&tmp, bpf_core_field_size(in->val2), &in->val2); + val2_probed = tmp; + + tmp = 0; + bpf_core_read(&tmp, bpf_core_field_size(in->val3), &in->val3); + val3_probed = tmp; + + tmp = 0; + bpf_core_read(&tmp, bpf_core_field_size(in->val4), &in->val4); + val4_probed = tmp; + + return 0; +} + +SEC("raw_tp/sys_enter") +int handle_signed(void *ctx) +{ + struct test_struct___signed *in = (void *)&input; + struct test_struct___signed *out = (void *)&output_signed; + + val2_signed = in->val2; + val3_signed = in->val3; + val4_signed = in->val4; + + out->val2= in->val2; + out->val3= in->val3; + out->val4= in->val4; + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_enumval.c b/tools/testing/selftests/bpf/progs/test_core_reloc_enumval.c new file mode 100644 index 000000000000..e7ef3dada2bf --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_enumval.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include <linux/bpf.h> +#include <stdint.h> +#include <stdbool.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +char _license[] SEC("license") = "GPL"; + +struct { + char in[256]; + char out[256]; + bool skip; +} data = {}; + +enum named_enum { + NAMED_ENUM_VAL1 = 1, + NAMED_ENUM_VAL2 = 2, + NAMED_ENUM_VAL3 = 3, +}; + +typedef enum { + ANON_ENUM_VAL1 = 0x10, + ANON_ENUM_VAL2 = 0x20, + ANON_ENUM_VAL3 = 0x30, +} anon_enum; + +struct core_reloc_enumval_output { + bool named_val1_exists; + bool named_val2_exists; + bool named_val3_exists; + bool anon_val1_exists; + bool anon_val2_exists; + bool anon_val3_exists; + + int named_val1; + int named_val2; + int anon_val1; + int anon_val2; +}; + +SEC("raw_tracepoint/sys_enter") +int test_core_enumval(void *ctx) +{ +#if __has_builtin(__builtin_preserve_enum_value) + struct core_reloc_enumval_output *out = (void *)&data.out; + enum named_enum named = 0; + anon_enum anon = 0; + + out->named_val1_exists = bpf_core_enum_value_exists(named, NAMED_ENUM_VAL1); + out->named_val2_exists = bpf_core_enum_value_exists(enum named_enum, NAMED_ENUM_VAL2); + out->named_val3_exists = bpf_core_enum_value_exists(enum named_enum, NAMED_ENUM_VAL3); + + out->anon_val1_exists = bpf_core_enum_value_exists(anon, ANON_ENUM_VAL1); + out->anon_val2_exists = bpf_core_enum_value_exists(anon_enum, ANON_ENUM_VAL2); + out->anon_val3_exists = bpf_core_enum_value_exists(anon_enum, ANON_ENUM_VAL3); + + out->named_val1 = bpf_core_enum_value(named, NAMED_ENUM_VAL1); + out->named_val2 = bpf_core_enum_value(named, NAMED_ENUM_VAL2); + /* NAMED_ENUM_VAL3 value is optional */ + + out->anon_val1 = bpf_core_enum_value(anon, ANON_ENUM_VAL1); + out->anon_val2 = bpf_core_enum_value(anon, ANON_ENUM_VAL2); + /* ANON_ENUM_VAL3 value is optional */ +#else + data.skip = true; +#endif + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c index aba928fd60d3..145028b52ad8 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c @@ -3,6 +3,7 @@ #include <linux/bpf.h> #include <stdint.h> +#include <stdbool.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_core_read.h> @@ -11,6 +12,7 @@ char _license[] SEC("license") = "GPL"; struct { char in[256]; char out[256]; + bool skip; uint64_t my_pid_tgid; } data = {}; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c new file mode 100644 index 000000000000..fb60f8195c53 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include <linux/bpf.h> +#include <stdint.h> +#include <stdbool.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +char _license[] SEC("license") = "GPL"; + +struct { + char in[256]; + char out[256]; + bool skip; +} data = {}; + +struct a_struct { + int x; +}; + +union a_union { + int y; + int z; +}; + +typedef struct a_struct named_struct_typedef; + +typedef struct { int x, y, z; } anon_struct_typedef; + +typedef struct { + int a, b, c; +} *struct_ptr_typedef; + +enum an_enum { + AN_ENUM_VAL1 = 1, + AN_ENUM_VAL2 = 2, + AN_ENUM_VAL3 = 3, +}; + +typedef int int_typedef; + +typedef enum { TYPEDEF_ENUM_VAL1, TYPEDEF_ENUM_VAL2 } enum_typedef; + +typedef void *void_ptr_typedef; + +typedef int (*func_proto_typedef)(long); + +typedef char arr_typedef[20]; + +struct core_reloc_type_based_output { + bool struct_exists; + bool union_exists; + bool enum_exists; + bool typedef_named_struct_exists; + bool typedef_anon_struct_exists; + bool typedef_struct_ptr_exists; + bool typedef_int_exists; + bool typedef_enum_exists; + bool typedef_void_ptr_exists; + bool typedef_func_proto_exists; + bool typedef_arr_exists; + + int struct_sz; + int union_sz; + int enum_sz; + int typedef_named_struct_sz; + int typedef_anon_struct_sz; + int typedef_struct_ptr_sz; + int typedef_int_sz; + int typedef_enum_sz; + int typedef_void_ptr_sz; + int typedef_func_proto_sz; + int typedef_arr_sz; +}; + +SEC("raw_tracepoint/sys_enter") +int test_core_type_based(void *ctx) +{ +#if __has_builtin(__builtin_preserve_type_info) + struct core_reloc_type_based_output *out = (void *)&data.out; + + out->struct_exists = bpf_core_type_exists(struct a_struct); + out->union_exists = bpf_core_type_exists(union a_union); + out->enum_exists = bpf_core_type_exists(enum an_enum); + out->typedef_named_struct_exists = bpf_core_type_exists(named_struct_typedef); + out->typedef_anon_struct_exists = bpf_core_type_exists(anon_struct_typedef); + out->typedef_struct_ptr_exists = bpf_core_type_exists(struct_ptr_typedef); + out->typedef_int_exists = bpf_core_type_exists(int_typedef); + out->typedef_enum_exists = bpf_core_type_exists(enum_typedef); + out->typedef_void_ptr_exists = bpf_core_type_exists(void_ptr_typedef); + out->typedef_func_proto_exists = bpf_core_type_exists(func_proto_typedef); + out->typedef_arr_exists = bpf_core_type_exists(arr_typedef); + + out->struct_sz = bpf_core_type_size(struct a_struct); + out->union_sz = bpf_core_type_size(union a_union); + out->enum_sz = bpf_core_type_size(enum an_enum); + out->typedef_named_struct_sz = bpf_core_type_size(named_struct_typedef); + out->typedef_anon_struct_sz = bpf_core_type_size(anon_struct_typedef); + out->typedef_struct_ptr_sz = bpf_core_type_size(struct_ptr_typedef); + out->typedef_int_sz = bpf_core_type_size(int_typedef); + out->typedef_enum_sz = bpf_core_type_size(enum_typedef); + out->typedef_void_ptr_sz = bpf_core_type_size(void_ptr_typedef); + out->typedef_func_proto_sz = bpf_core_type_size(func_proto_typedef); + out->typedef_arr_sz = bpf_core_type_size(arr_typedef); +#else + data.skip = true; +#endif + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c new file mode 100644 index 000000000000..22aba3f6e344 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include <linux/bpf.h> +#include <stdint.h> +#include <stdbool.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +char _license[] SEC("license") = "GPL"; + +struct { + char in[256]; + char out[256]; + bool skip; +} data = {}; + +/* some types are shared with test_core_reloc_type_based.c */ +struct a_struct { + int x; +}; + +union a_union { + int y; + int z; +}; + +enum an_enum { + AN_ENUM_VAL1 = 1, + AN_ENUM_VAL2 = 2, + AN_ENUM_VAL3 = 3, +}; + +typedef struct a_struct named_struct_typedef; + +typedef int (*func_proto_typedef)(long); + +typedef char arr_typedef[20]; + +struct core_reloc_type_id_output { + int local_anon_struct; + int local_anon_union; + int local_anon_enum; + int local_anon_func_proto_ptr; + int local_anon_void_ptr; + int local_anon_arr; + + int local_struct; + int local_union; + int local_enum; + int local_int; + int local_struct_typedef; + int local_func_proto_typedef; + int local_arr_typedef; + + int targ_struct; + int targ_union; + int targ_enum; + int targ_int; + int targ_struct_typedef; + int targ_func_proto_typedef; + int targ_arr_typedef; +}; + +/* preserve types even if Clang doesn't support built-in */ +struct a_struct t1 = {}; +union a_union t2 = {}; +enum an_enum t3 = 0; +named_struct_typedef t4 = {}; +func_proto_typedef t5 = 0; +arr_typedef t6 = {}; + +SEC("raw_tracepoint/sys_enter") +int test_core_type_id(void *ctx) +{ + /* We use __builtin_btf_type_id() in this tests, but up until the time + * __builtin_preserve_type_info() was added it contained a bug that + * would make this test fail. The bug was fixed ([0]) with addition of + * __builtin_preserve_type_info(), though, so that's what we are using + * to detect whether this test has to be executed, however strange + * that might look like. + * + * [0] https://reviews.llvm.org/D85174 + */ +#if __has_builtin(__builtin_preserve_type_info) + struct core_reloc_type_id_output *out = (void *)&data.out; + + out->local_anon_struct = bpf_core_type_id_local(struct { int marker_field; }); + out->local_anon_union = bpf_core_type_id_local(union { int marker_field; }); + out->local_anon_enum = bpf_core_type_id_local(enum { MARKER_ENUM_VAL = 123 }); + out->local_anon_func_proto_ptr = bpf_core_type_id_local(_Bool(*)(int)); + out->local_anon_void_ptr = bpf_core_type_id_local(void *); + out->local_anon_arr = bpf_core_type_id_local(_Bool[47]); + + out->local_struct = bpf_core_type_id_local(struct a_struct); + out->local_union = bpf_core_type_id_local(union a_union); + out->local_enum = bpf_core_type_id_local(enum an_enum); + out->local_int = bpf_core_type_id_local(int); + out->local_struct_typedef = bpf_core_type_id_local(named_struct_typedef); + out->local_func_proto_typedef = bpf_core_type_id_local(func_proto_typedef); + out->local_arr_typedef = bpf_core_type_id_local(arr_typedef); + + out->targ_struct = bpf_core_type_id_kernel(struct a_struct); + out->targ_union = bpf_core_type_id_kernel(union a_union); + out->targ_enum = bpf_core_type_id_kernel(enum an_enum); + out->targ_int = bpf_core_type_id_kernel(int); + out->targ_struct_typedef = bpf_core_type_id_kernel(named_struct_typedef); + out->targ_func_proto_typedef = bpf_core_type_id_kernel(func_proto_typedef); + out->targ_arr_typedef = bpf_core_type_id_kernel(arr_typedef); +#else + data.skip = true; +#endif + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_d_path.c b/tools/testing/selftests/bpf/progs/test_d_path.c new file mode 100644 index 000000000000..84e1f883f97b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_d_path.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +#define MAX_PATH_LEN 128 +#define MAX_FILES 7 + +pid_t my_pid = 0; +__u32 cnt_stat = 0; +__u32 cnt_close = 0; +char paths_stat[MAX_FILES][MAX_PATH_LEN] = {}; +char paths_close[MAX_FILES][MAX_PATH_LEN] = {}; +int rets_stat[MAX_FILES] = {}; +int rets_close[MAX_FILES] = {}; + +int called_stat = 0; +int called_close = 0; + +SEC("fentry/security_inode_getattr") +int BPF_PROG(prog_stat, struct path *path, struct kstat *stat, + __u32 request_mask, unsigned int query_flags) +{ + pid_t pid = bpf_get_current_pid_tgid() >> 32; + __u32 cnt = cnt_stat; + int ret; + + called_stat = 1; + + if (pid != my_pid) + return 0; + + if (cnt >= MAX_FILES) + return 0; + ret = bpf_d_path(path, paths_stat[cnt], MAX_PATH_LEN); + + rets_stat[cnt] = ret; + cnt_stat++; + return 0; +} + +SEC("fentry/filp_close") +int BPF_PROG(prog_close, struct file *file, void *id) +{ + pid_t pid = bpf_get_current_pid_tgid() >> 32; + __u32 cnt = cnt_close; + int ret; + + called_close = 1; + + if (pid != my_pid) + return 0; + + if (cnt >= MAX_FILES) + return 0; + ret = bpf_d_path(&file->f_path, + paths_close[cnt], MAX_PATH_LEN); + + rets_close[cnt] = ret; + cnt_close++; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_global_func8.c b/tools/testing/selftests/bpf/progs/test_global_func8.c new file mode 100644 index 000000000000..d55a6544b1ab --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func8.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2020 Facebook */ +#include <stddef.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +__noinline int foo(struct __sk_buff *skb) +{ + return bpf_get_prandom_u32(); +} + +SEC("cgroup_skb/ingress") +int test_cls(struct __sk_buff *skb) +{ + if (!foo(skb)) + return 0; + + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_btf.c b/tools/testing/selftests/bpf/progs/test_ksyms_btf.c new file mode 100644 index 000000000000..bb8ea9270f29 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_ksyms_btf.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Google */ + +#include "vmlinux.h" + +#include <bpf/bpf_helpers.h> + +__u64 out__runqueues_addr = -1; +__u64 out__bpf_prog_active_addr = -1; + +__u32 out__rq_cpu = -1; /* percpu struct fields */ +int out__bpf_prog_active = -1; /* percpu int */ + +__u32 out__this_rq_cpu = -1; +int out__this_bpf_prog_active = -1; + +__u32 out__cpu_0_rq_cpu = -1; /* cpu_rq(0)->cpu */ + +extern const struct rq runqueues __ksym; /* struct type global var. */ +extern const int bpf_prog_active __ksym; /* int type global var. */ + +SEC("raw_tp/sys_enter") +int handler(const void *ctx) +{ + struct rq *rq; + int *active; + __u32 cpu; + + out__runqueues_addr = (__u64)&runqueues; + out__bpf_prog_active_addr = (__u64)&bpf_prog_active; + + cpu = bpf_get_smp_processor_id(); + + /* test bpf_per_cpu_ptr() */ + rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, cpu); + if (rq) + out__rq_cpu = rq->cpu; + active = (int *)bpf_per_cpu_ptr(&bpf_prog_active, cpu); + if (active) + out__bpf_prog_active = *active; + + rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, 0); + if (rq) /* should always be valid, but we can't spare the check. */ + out__cpu_0_rq_cpu = rq->cpu; + + /* test bpf_this_cpu_ptr */ + rq = (struct rq *)bpf_this_cpu_ptr(&runqueues); + out__this_rq_cpu = rq->cpu; + active = (int *)bpf_this_cpu_ptr(&bpf_prog_active); + out__this_bpf_prog_active = *active; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_btf_null_check.c b/tools/testing/selftests/bpf/progs/test_ksyms_btf_null_check.c new file mode 100644 index 000000000000..8bc8f7c637bc --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_ksyms_btf_null_check.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include "vmlinux.h" + +#include <bpf/bpf_helpers.h> + +extern const struct rq runqueues __ksym; /* struct type global var. */ +extern const int bpf_prog_active __ksym; /* int type global var. */ + +SEC("raw_tp/sys_enter") +int handler(const void *ctx) +{ + struct rq *rq; + int *active; + __u32 cpu; + + cpu = bpf_get_smp_processor_id(); + rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, cpu); + active = (int *)bpf_per_cpu_ptr(&bpf_prog_active, cpu); + if (active) { + /* READ_ONCE */ + *(volatile int *)active; + /* !rq has not been tested, so verifier should reject. */ + *(volatile int *)(&rq->cpu); + } + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c index 28351936a438..b9e2753f4f91 100644 --- a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c @@ -17,9 +17,7 @@ #include "test_iptunnel_common.h" #include <bpf/bpf_endian.h> -int _version SEC("version") = 1; - -static __u32 rol32(__u32 word, unsigned int shift) +static __always_inline __u32 rol32(__u32 word, unsigned int shift) { return (word << shift) | (word >> ((-shift) & 31)); } @@ -52,7 +50,7 @@ static __u32 rol32(__u32 word, unsigned int shift) typedef unsigned int u32; -static u32 jhash(const void *key, u32 length, u32 initval) +static __noinline u32 jhash(const void *key, u32 length, u32 initval) { u32 a, b, c; const unsigned char *k = key; @@ -88,7 +86,7 @@ static u32 jhash(const void *key, u32 length, u32 initval) return c; } -static u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) +static __noinline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) { a += initval; b += initval; @@ -97,7 +95,7 @@ static u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) return c; } -static u32 jhash_2words(u32 a, u32 b, u32 initval) +static __noinline u32 jhash_2words(u32 a, u32 b, u32 initval) { return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); } @@ -200,8 +198,7 @@ struct { __type(value, struct ctl_value); } ctl_array SEC(".maps"); -static __u32 get_packet_hash(struct packet_description *pckt, - bool ipv6) +static __noinline __u32 get_packet_hash(struct packet_description *pckt, bool ipv6) { if (ipv6) return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS), @@ -210,10 +207,10 @@ static __u32 get_packet_hash(struct packet_description *pckt, return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE); } -static bool get_packet_dst(struct real_definition **real, - struct packet_description *pckt, - struct vip_meta *vip_info, - bool is_ipv6) +static __noinline bool get_packet_dst(struct real_definition **real, + struct packet_description *pckt, + struct vip_meta *vip_info, + bool is_ipv6) { __u32 hash = get_packet_hash(pckt, is_ipv6); __u32 key = RING_SIZE * vip_info->vip_num + hash % RING_SIZE; @@ -233,8 +230,8 @@ static bool get_packet_dst(struct real_definition **real, return true; } -static int parse_icmpv6(void *data, void *data_end, __u64 off, - struct packet_description *pckt) +static __noinline int parse_icmpv6(void *data, void *data_end, __u64 off, + struct packet_description *pckt) { struct icmp6hdr *icmp_hdr; struct ipv6hdr *ip6h; @@ -255,8 +252,8 @@ static int parse_icmpv6(void *data, void *data_end, __u64 off, return TC_ACT_UNSPEC; } -static int parse_icmp(void *data, void *data_end, __u64 off, - struct packet_description *pckt) +static __noinline int parse_icmp(void *data, void *data_end, __u64 off, + struct packet_description *pckt) { struct icmphdr *icmp_hdr; struct iphdr *iph; @@ -280,8 +277,8 @@ static int parse_icmp(void *data, void *data_end, __u64 off, return TC_ACT_UNSPEC; } -static bool parse_udp(void *data, __u64 off, void *data_end, - struct packet_description *pckt) +static __noinline bool parse_udp(void *data, __u64 off, void *data_end, + struct packet_description *pckt) { struct udphdr *udp; udp = data + off; @@ -299,8 +296,8 @@ static bool parse_udp(void *data, __u64 off, void *data_end, return true; } -static bool parse_tcp(void *data, __u64 off, void *data_end, - struct packet_description *pckt) +static __noinline bool parse_tcp(void *data, __u64 off, void *data_end, + struct packet_description *pckt) { struct tcphdr *tcp; @@ -321,8 +318,8 @@ static bool parse_tcp(void *data, __u64 off, void *data_end, return true; } -static int process_packet(void *data, __u64 off, void *data_end, - bool is_ipv6, struct __sk_buff *skb) +static __noinline int process_packet(void *data, __u64 off, void *data_end, + bool is_ipv6, struct __sk_buff *skb) { void *pkt_start = (void *)(long)skb->data; struct packet_description pckt = {}; diff --git a/tools/testing/selftests/bpf/progs/test_map_init.c b/tools/testing/selftests/bpf/progs/test_map_init.c new file mode 100644 index 000000000000..c89d28ead673 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_map_init.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Tessares SA <http://www.tessares.net> */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +__u64 inKey = 0; +__u64 inValue = 0; +__u32 inPid = 0; + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(max_entries, 2); + __type(key, __u64); + __type(value, __u64); +} hashmap1 SEC(".maps"); + + +SEC("tp/syscalls/sys_enter_getpgid") +int sysenter_getpgid(const void *ctx) +{ + /* Just do it for once, when called from our own test prog. This + * ensures the map value is only updated for a single CPU. + */ + int cur_pid = bpf_get_current_pid_tgid() >> 32; + + if (cur_pid == inPid) + bpf_map_update_elem(&hashmap1, &inKey, &inValue, BPF_NOEXIST); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c new file mode 100644 index 000000000000..6077a025092c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c @@ -0,0 +1,325 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include <stddef.h> +#include <errno.h> +#include <stdbool.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> +#include <linux/socket.h> +#include <linux/bpf.h> +#include <linux/types.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> +#define BPF_PROG_TEST_TCP_HDR_OPTIONS +#include "test_tcp_hdr_options.h" + +__u16 last_addr16_n = __bpf_htons(1); +__u16 active_lport_n = 0; +__u16 active_lport_h = 0; +__u16 passive_lport_n = 0; +__u16 passive_lport_h = 0; + +/* options received at passive side */ +unsigned int nr_pure_ack = 0; +unsigned int nr_data = 0; +unsigned int nr_syn = 0; +unsigned int nr_fin = 0; + +/* Check the header received from the active side */ +static int __check_active_hdr_in(struct bpf_sock_ops *skops, bool check_syn) +{ + union { + struct tcphdr th; + struct ipv6hdr ip6; + struct tcp_exprm_opt exprm_opt; + struct tcp_opt reg_opt; + __u8 data[100]; /* IPv6 (40) + Max TCP hdr (60) */ + } hdr = {}; + __u64 load_flags = check_syn ? BPF_LOAD_HDR_OPT_TCP_SYN : 0; + struct tcphdr *pth; + int ret; + + hdr.reg_opt.kind = 0xB9; + + /* The option is 4 bytes long instead of 2 bytes */ + ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, 2, load_flags); + if (ret != -ENOSPC) + RET_CG_ERR(ret); + + /* Test searching magic with regular kind */ + hdr.reg_opt.len = 4; + ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, sizeof(hdr.reg_opt), + load_flags); + if (ret != -EINVAL) + RET_CG_ERR(ret); + + hdr.reg_opt.len = 0; + ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, sizeof(hdr.reg_opt), + load_flags); + if (ret != 4 || hdr.reg_opt.len != 4 || hdr.reg_opt.kind != 0xB9 || + hdr.reg_opt.data[0] != 0xfa || hdr.reg_opt.data[1] != 0xce) + RET_CG_ERR(ret); + + /* Test searching experimental option with invalid kind length */ + hdr.exprm_opt.kind = TCPOPT_EXP; + hdr.exprm_opt.len = 5; + hdr.exprm_opt.magic = 0; + ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt), + load_flags); + if (ret != -EINVAL) + RET_CG_ERR(ret); + + /* Test searching experimental option with 0 magic value */ + hdr.exprm_opt.len = 4; + ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt), + load_flags); + if (ret != -ENOMSG) + RET_CG_ERR(ret); + + hdr.exprm_opt.magic = __bpf_htons(0xeB9F); + ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt), + load_flags); + if (ret != 4 || hdr.exprm_opt.len != 4 || + hdr.exprm_opt.kind != TCPOPT_EXP || + hdr.exprm_opt.magic != __bpf_htons(0xeB9F)) + RET_CG_ERR(ret); + + if (!check_syn) + return CG_OK; + + /* Test loading from skops->syn_skb if sk_state == TCP_NEW_SYN_RECV + * + * Test loading from tp->saved_syn for other sk_state. + */ + ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN_IP, &hdr.ip6, + sizeof(hdr.ip6)); + if (ret != -ENOSPC) + RET_CG_ERR(ret); + + if (hdr.ip6.saddr.s6_addr16[7] != last_addr16_n || + hdr.ip6.daddr.s6_addr16[7] != last_addr16_n) + RET_CG_ERR(0); + + ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN_IP, &hdr, sizeof(hdr)); + if (ret < 0) + RET_CG_ERR(ret); + + pth = (struct tcphdr *)(&hdr.ip6 + 1); + if (pth->dest != passive_lport_n || pth->source != active_lport_n) + RET_CG_ERR(0); + + ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN, &hdr, sizeof(hdr)); + if (ret < 0) + RET_CG_ERR(ret); + + if (hdr.th.dest != passive_lport_n || hdr.th.source != active_lport_n) + RET_CG_ERR(0); + + return CG_OK; +} + +static int check_active_syn_in(struct bpf_sock_ops *skops) +{ + return __check_active_hdr_in(skops, true); +} + +static int check_active_hdr_in(struct bpf_sock_ops *skops) +{ + struct tcphdr *th; + + if (__check_active_hdr_in(skops, false) == CG_ERR) + return CG_ERR; + + th = skops->skb_data; + if (th + 1 > skops->skb_data_end) + RET_CG_ERR(0); + + if (tcp_hdrlen(th) < skops->skb_len) + nr_data++; + + if (th->fin) + nr_fin++; + + if (th->ack && !th->fin && tcp_hdrlen(th) == skops->skb_len) + nr_pure_ack++; + + return CG_OK; +} + +static int active_opt_len(struct bpf_sock_ops *skops) +{ + int err; + + /* Reserve more than enough to allow the -EEXIST test in + * the write_active_opt(). + */ + err = bpf_reserve_hdr_opt(skops, 12, 0); + if (err) + RET_CG_ERR(err); + + return CG_OK; +} + +static int write_active_opt(struct bpf_sock_ops *skops) +{ + struct tcp_exprm_opt exprm_opt = {}; + struct tcp_opt win_scale_opt = {}; + struct tcp_opt reg_opt = {}; + struct tcphdr *th; + int err, ret; + + exprm_opt.kind = TCPOPT_EXP; + exprm_opt.len = 4; + exprm_opt.magic = __bpf_htons(0xeB9F); + + reg_opt.kind = 0xB9; + reg_opt.len = 4; + reg_opt.data[0] = 0xfa; + reg_opt.data[1] = 0xce; + + win_scale_opt.kind = TCPOPT_WINDOW; + + err = bpf_store_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0); + if (err) + RET_CG_ERR(err); + + /* Store the same exprm option */ + err = bpf_store_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0); + if (err != -EEXIST) + RET_CG_ERR(err); + + err = bpf_store_hdr_opt(skops, ®_opt, sizeof(reg_opt), 0); + if (err) + RET_CG_ERR(err); + err = bpf_store_hdr_opt(skops, ®_opt, sizeof(reg_opt), 0); + if (err != -EEXIST) + RET_CG_ERR(err); + + /* Check the option has been written and can be searched */ + ret = bpf_load_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0); + if (ret != 4 || exprm_opt.len != 4 || exprm_opt.kind != TCPOPT_EXP || + exprm_opt.magic != __bpf_htons(0xeB9F)) + RET_CG_ERR(ret); + + reg_opt.len = 0; + ret = bpf_load_hdr_opt(skops, ®_opt, sizeof(reg_opt), 0); + if (ret != 4 || reg_opt.len != 4 || reg_opt.kind != 0xB9 || + reg_opt.data[0] != 0xfa || reg_opt.data[1] != 0xce) + RET_CG_ERR(ret); + + th = skops->skb_data; + if (th + 1 > skops->skb_data_end) + RET_CG_ERR(0); + + if (th->syn) { + active_lport_h = skops->local_port; + active_lport_n = th->source; + + /* Search the win scale option written by kernel + * in the SYN packet. + */ + ret = bpf_load_hdr_opt(skops, &win_scale_opt, + sizeof(win_scale_opt), 0); + if (ret != 3 || win_scale_opt.len != 3 || + win_scale_opt.kind != TCPOPT_WINDOW) + RET_CG_ERR(ret); + + /* Write the win scale option that kernel + * has already written. + */ + err = bpf_store_hdr_opt(skops, &win_scale_opt, + sizeof(win_scale_opt), 0); + if (err != -EEXIST) + RET_CG_ERR(err); + } + + return CG_OK; +} + +static int handle_hdr_opt_len(struct bpf_sock_ops *skops) +{ + __u8 tcp_flags = skops_tcp_flags(skops); + + if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK) + /* Check the SYN from bpf_sock_ops_kern->syn_skb */ + return check_active_syn_in(skops); + + /* Passive side should have cleared the write hdr cb by now */ + if (skops->local_port == passive_lport_h) + RET_CG_ERR(0); + + return active_opt_len(skops); +} + +static int handle_write_hdr_opt(struct bpf_sock_ops *skops) +{ + if (skops->local_port == passive_lport_h) + RET_CG_ERR(0); + + return write_active_opt(skops); +} + +static int handle_parse_hdr(struct bpf_sock_ops *skops) +{ + /* Passive side is not writing any non-standard/unknown + * option, so the active side should never be called. + */ + if (skops->local_port == active_lport_h) + RET_CG_ERR(0); + + return check_active_hdr_in(skops); +} + +static int handle_passive_estab(struct bpf_sock_ops *skops) +{ + int err; + + /* No more write hdr cb */ + bpf_sock_ops_cb_flags_set(skops, + skops->bpf_sock_ops_cb_flags & + ~BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG); + + /* Recheck the SYN but check the tp->saved_syn this time */ + err = check_active_syn_in(skops); + if (err == CG_ERR) + return err; + + nr_syn++; + + /* The ack has header option written by the active side also */ + return check_active_hdr_in(skops); +} + +SEC("sockops/misc_estab") +int misc_estab(struct bpf_sock_ops *skops) +{ + int true_val = 1; + + switch (skops->op) { + case BPF_SOCK_OPS_TCP_LISTEN_CB: + passive_lport_h = skops->local_port; + passive_lport_n = __bpf_htons(passive_lport_h); + bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN, + &true_val, sizeof(true_val)); + set_hdr_cb_flags(skops, 0); + break; + case BPF_SOCK_OPS_TCP_CONNECT_CB: + set_hdr_cb_flags(skops, 0); + break; + case BPF_SOCK_OPS_PARSE_HDR_OPT_CB: + return handle_parse_hdr(skops); + case BPF_SOCK_OPS_HDR_OPT_LEN_CB: + return handle_hdr_opt_len(skops); + case BPF_SOCK_OPS_WRITE_HDR_OPT_CB: + return handle_write_hdr_opt(skops); + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + return handle_passive_estab(skops); + } + + return CG_OK; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_overhead.c b/tools/testing/selftests/bpf/progs/test_overhead.c index 42403d088abc..abb7344b531f 100644 --- a/tools/testing/selftests/bpf/progs/test_overhead.c +++ b/tools/testing/selftests/bpf/progs/test_overhead.c @@ -39,10 +39,4 @@ int BPF_PROG(prog5, struct task_struct *tsk, const char *buf, bool exec) return 0; } -SEC("fmod_ret/__set_task_comm") -int BPF_PROG(prog6, struct task_struct *tsk, const char *buf, bool exec) -{ - return !tsk; -} - char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c b/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c new file mode 100644 index 000000000000..fb22de7c365d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); +} array_1 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); + __uint(map_flags, BPF_F_PRESERVE_ELEMS); +} array_2 SEC(".maps"); + +SEC("raw_tp/sched_switch") +int BPF_PROG(read_array_1) +{ + struct bpf_perf_event_value val; + + return bpf_perf_event_read_value(&array_1, 0, &val, sizeof(val)); +} + +SEC("raw_tp/task_rename") +int BPF_PROG(read_array_2) +{ + struct bpf_perf_event_value val; + + return bpf_perf_event_read_value(&array_2, 0, &val, sizeof(val)); +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_pkt_access.c b/tools/testing/selftests/bpf/progs/test_pkt_access.c index e72eba4a93d2..852051064507 100644 --- a/tools/testing/selftests/bpf/progs/test_pkt_access.c +++ b/tools/testing/selftests/bpf/progs/test_pkt_access.c @@ -79,6 +79,24 @@ int get_skb_ifindex(int val, struct __sk_buff *skb, int var) return skb->ifindex * val * var; } +__attribute__ ((noinline)) +int test_pkt_write_access_subprog(struct __sk_buff *skb, __u32 off) +{ + void *data = (void *)(long)skb->data; + void *data_end = (void *)(long)skb->data_end; + struct tcphdr *tcp = NULL; + + if (off > sizeof(struct ethhdr) + sizeof(struct ipv6hdr)) + return -1; + + tcp = data + off; + if (tcp + 1 > data_end) + return -1; + /* make modification to the packet data */ + tcp->check++; + return 0; +} + SEC("classifier/test_pkt_access") int test_pkt_access(struct __sk_buff *skb) { @@ -117,6 +135,8 @@ int test_pkt_access(struct __sk_buff *skb) if (test_pkt_access_subprog3(3, skb) != skb->len * 3 * skb->ifindex) return TC_ACT_SHOT; if (tcp) { + if (test_pkt_write_access_subprog(skb, (void *)tcp - data)) + return TC_ACT_SHOT; if (((void *)(tcp) + 20) > data_end || proto != 6) return TC_ACT_SHOT; barrier(); /* to force ordering of checks */ diff --git a/tools/testing/selftests/bpf/progs/test_probe_read_user_str.c b/tools/testing/selftests/bpf/progs/test_probe_read_user_str.c new file mode 100644 index 000000000000..3ae398b75dcd --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_probe_read_user_str.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +#include <sys/types.h> + +pid_t pid = 0; +long ret = 0; +void *user_ptr = 0; +char buf[256] = {}; + +SEC("tracepoint/syscalls/sys_enter_nanosleep") +int on_write(void *ctx) +{ + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + ret = bpf_probe_read_user_str(buf, sizeof(buf), user_ptr); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_raw_tp_test_run.c b/tools/testing/selftests/bpf/progs/test_raw_tp_test_run.c new file mode 100644 index 000000000000..4c63cc87b9d0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_raw_tp_test_run.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +__u32 count = 0; +__u32 on_cpu = 0xffffffff; + +SEC("raw_tp/task_rename") +int BPF_PROG(rename, struct task_struct *task, char *comm) +{ + + count++; + if ((__u64) task == 0x1234ULL && (__u64) comm == 0x5678ULL) { + on_cpu = bpf_get_smp_processor_id(); + return (long)task + (long)comm; + } + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c index bbf8296f4d66..1032b292af5b 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_lookup.c +++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c @@ -19,6 +19,17 @@ #define IP6(aaaa, bbbb, cccc, dddd) \ { bpf_htonl(aaaa), bpf_htonl(bbbb), bpf_htonl(cccc), bpf_htonl(dddd) } +/* Macros for least-significant byte and word accesses. */ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define LSE_INDEX(index, size) (index) +#else +#define LSE_INDEX(index, size) ((size) - (index) - 1) +#endif +#define LSB(value, index) \ + (((__u8 *)&(value))[LSE_INDEX((index), sizeof(value))]) +#define LSW(value, index) \ + (((__u16 *)&(value))[LSE_INDEX((index), sizeof(value) / 2)]) + #define MAX_SOCKS 32 struct { @@ -369,171 +380,146 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk; int err, family; - __u16 *half; - __u8 *byte; bool v4; v4 = (ctx->family == AF_INET); /* Narrow loads from family field */ - byte = (__u8 *)&ctx->family; - half = (__u16 *)&ctx->family; - if (byte[0] != (v4 ? AF_INET : AF_INET6) || - byte[1] != 0 || byte[2] != 0 || byte[3] != 0) + if (LSB(ctx->family, 0) != (v4 ? AF_INET : AF_INET6) || + LSB(ctx->family, 1) != 0 || LSB(ctx->family, 2) != 0 || LSB(ctx->family, 3) != 0) return SK_DROP; - if (half[0] != (v4 ? AF_INET : AF_INET6)) + if (LSW(ctx->family, 0) != (v4 ? AF_INET : AF_INET6)) return SK_DROP; - byte = (__u8 *)&ctx->protocol; - if (byte[0] != IPPROTO_TCP || - byte[1] != 0 || byte[2] != 0 || byte[3] != 0) + /* Narrow loads from protocol field */ + if (LSB(ctx->protocol, 0) != IPPROTO_TCP || + LSB(ctx->protocol, 1) != 0 || LSB(ctx->protocol, 2) != 0 || LSB(ctx->protocol, 3) != 0) return SK_DROP; - half = (__u16 *)&ctx->protocol; - if (half[0] != IPPROTO_TCP) + if (LSW(ctx->protocol, 0) != IPPROTO_TCP) return SK_DROP; /* Narrow loads from remote_port field. Expect non-0 value. */ - byte = (__u8 *)&ctx->remote_port; - if (byte[0] == 0 && byte[1] == 0 && byte[2] == 0 && byte[3] == 0) + if (LSB(ctx->remote_port, 0) == 0 && LSB(ctx->remote_port, 1) == 0 && + LSB(ctx->remote_port, 2) == 0 && LSB(ctx->remote_port, 3) == 0) return SK_DROP; - half = (__u16 *)&ctx->remote_port; - if (half[0] == 0) + if (LSW(ctx->remote_port, 0) == 0) return SK_DROP; /* Narrow loads from local_port field. Expect DST_PORT. */ - byte = (__u8 *)&ctx->local_port; - if (byte[0] != ((DST_PORT >> 0) & 0xff) || - byte[1] != ((DST_PORT >> 8) & 0xff) || - byte[2] != 0 || byte[3] != 0) + if (LSB(ctx->local_port, 0) != ((DST_PORT >> 0) & 0xff) || + LSB(ctx->local_port, 1) != ((DST_PORT >> 8) & 0xff) || + LSB(ctx->local_port, 2) != 0 || LSB(ctx->local_port, 3) != 0) return SK_DROP; - half = (__u16 *)&ctx->local_port; - if (half[0] != DST_PORT) + if (LSW(ctx->local_port, 0) != DST_PORT) return SK_DROP; /* Narrow loads from IPv4 fields */ if (v4) { /* Expect non-0.0.0.0 in remote_ip4 */ - byte = (__u8 *)&ctx->remote_ip4; - if (byte[0] == 0 && byte[1] == 0 && - byte[2] == 0 && byte[3] == 0) + if (LSB(ctx->remote_ip4, 0) == 0 && LSB(ctx->remote_ip4, 1) == 0 && + LSB(ctx->remote_ip4, 2) == 0 && LSB(ctx->remote_ip4, 3) == 0) return SK_DROP; - half = (__u16 *)&ctx->remote_ip4; - if (half[0] == 0 && half[1] == 0) + if (LSW(ctx->remote_ip4, 0) == 0 && LSW(ctx->remote_ip4, 1) == 0) return SK_DROP; /* Expect DST_IP4 in local_ip4 */ - byte = (__u8 *)&ctx->local_ip4; - if (byte[0] != ((DST_IP4 >> 0) & 0xff) || - byte[1] != ((DST_IP4 >> 8) & 0xff) || - byte[2] != ((DST_IP4 >> 16) & 0xff) || - byte[3] != ((DST_IP4 >> 24) & 0xff)) + if (LSB(ctx->local_ip4, 0) != ((DST_IP4 >> 0) & 0xff) || + LSB(ctx->local_ip4, 1) != ((DST_IP4 >> 8) & 0xff) || + LSB(ctx->local_ip4, 2) != ((DST_IP4 >> 16) & 0xff) || + LSB(ctx->local_ip4, 3) != ((DST_IP4 >> 24) & 0xff)) return SK_DROP; - half = (__u16 *)&ctx->local_ip4; - if (half[0] != ((DST_IP4 >> 0) & 0xffff) || - half[1] != ((DST_IP4 >> 16) & 0xffff)) + if (LSW(ctx->local_ip4, 0) != ((DST_IP4 >> 0) & 0xffff) || + LSW(ctx->local_ip4, 1) != ((DST_IP4 >> 16) & 0xffff)) return SK_DROP; } else { /* Expect 0.0.0.0 IPs when family != AF_INET */ - byte = (__u8 *)&ctx->remote_ip4; - if (byte[0] != 0 || byte[1] != 0 && - byte[2] != 0 || byte[3] != 0) + if (LSB(ctx->remote_ip4, 0) != 0 || LSB(ctx->remote_ip4, 1) != 0 || + LSB(ctx->remote_ip4, 2) != 0 || LSB(ctx->remote_ip4, 3) != 0) return SK_DROP; - half = (__u16 *)&ctx->remote_ip4; - if (half[0] != 0 || half[1] != 0) + if (LSW(ctx->remote_ip4, 0) != 0 || LSW(ctx->remote_ip4, 1) != 0) return SK_DROP; - byte = (__u8 *)&ctx->local_ip4; - if (byte[0] != 0 || byte[1] != 0 && - byte[2] != 0 || byte[3] != 0) + if (LSB(ctx->local_ip4, 0) != 0 || LSB(ctx->local_ip4, 1) != 0 || + LSB(ctx->local_ip4, 2) != 0 || LSB(ctx->local_ip4, 3) != 0) return SK_DROP; - half = (__u16 *)&ctx->local_ip4; - if (half[0] != 0 || half[1] != 0) + if (LSW(ctx->local_ip4, 0) != 0 || LSW(ctx->local_ip4, 1) != 0) return SK_DROP; } /* Narrow loads from IPv6 fields */ if (!v4) { - /* Expenct non-:: IP in remote_ip6 */ - byte = (__u8 *)&ctx->remote_ip6; - if (byte[0] == 0 && byte[1] == 0 && - byte[2] == 0 && byte[3] == 0 && - byte[4] == 0 && byte[5] == 0 && - byte[6] == 0 && byte[7] == 0 && - byte[8] == 0 && byte[9] == 0 && - byte[10] == 0 && byte[11] == 0 && - byte[12] == 0 && byte[13] == 0 && - byte[14] == 0 && byte[15] == 0) + /* Expect non-:: IP in remote_ip6 */ + if (LSB(ctx->remote_ip6[0], 0) == 0 && LSB(ctx->remote_ip6[0], 1) == 0 && + LSB(ctx->remote_ip6[0], 2) == 0 && LSB(ctx->remote_ip6[0], 3) == 0 && + LSB(ctx->remote_ip6[1], 0) == 0 && LSB(ctx->remote_ip6[1], 1) == 0 && + LSB(ctx->remote_ip6[1], 2) == 0 && LSB(ctx->remote_ip6[1], 3) == 0 && + LSB(ctx->remote_ip6[2], 0) == 0 && LSB(ctx->remote_ip6[2], 1) == 0 && + LSB(ctx->remote_ip6[2], 2) == 0 && LSB(ctx->remote_ip6[2], 3) == 0 && + LSB(ctx->remote_ip6[3], 0) == 0 && LSB(ctx->remote_ip6[3], 1) == 0 && + LSB(ctx->remote_ip6[3], 2) == 0 && LSB(ctx->remote_ip6[3], 3) == 0) return SK_DROP; - half = (__u16 *)&ctx->remote_ip6; - if (half[0] == 0 && half[1] == 0 && - half[2] == 0 && half[3] == 0 && - half[4] == 0 && half[5] == 0 && - half[6] == 0 && half[7] == 0) + if (LSW(ctx->remote_ip6[0], 0) == 0 && LSW(ctx->remote_ip6[0], 1) == 0 && + LSW(ctx->remote_ip6[1], 0) == 0 && LSW(ctx->remote_ip6[1], 1) == 0 && + LSW(ctx->remote_ip6[2], 0) == 0 && LSW(ctx->remote_ip6[2], 1) == 0 && + LSW(ctx->remote_ip6[3], 0) == 0 && LSW(ctx->remote_ip6[3], 1) == 0) return SK_DROP; - /* Expect DST_IP6 in local_ip6 */ - byte = (__u8 *)&ctx->local_ip6; - if (byte[0] != ((DST_IP6[0] >> 0) & 0xff) || - byte[1] != ((DST_IP6[0] >> 8) & 0xff) || - byte[2] != ((DST_IP6[0] >> 16) & 0xff) || - byte[3] != ((DST_IP6[0] >> 24) & 0xff) || - byte[4] != ((DST_IP6[1] >> 0) & 0xff) || - byte[5] != ((DST_IP6[1] >> 8) & 0xff) || - byte[6] != ((DST_IP6[1] >> 16) & 0xff) || - byte[7] != ((DST_IP6[1] >> 24) & 0xff) || - byte[8] != ((DST_IP6[2] >> 0) & 0xff) || - byte[9] != ((DST_IP6[2] >> 8) & 0xff) || - byte[10] != ((DST_IP6[2] >> 16) & 0xff) || - byte[11] != ((DST_IP6[2] >> 24) & 0xff) || - byte[12] != ((DST_IP6[3] >> 0) & 0xff) || - byte[13] != ((DST_IP6[3] >> 8) & 0xff) || - byte[14] != ((DST_IP6[3] >> 16) & 0xff) || - byte[15] != ((DST_IP6[3] >> 24) & 0xff)) + if (LSB(ctx->local_ip6[0], 0) != ((DST_IP6[0] >> 0) & 0xff) || + LSB(ctx->local_ip6[0], 1) != ((DST_IP6[0] >> 8) & 0xff) || + LSB(ctx->local_ip6[0], 2) != ((DST_IP6[0] >> 16) & 0xff) || + LSB(ctx->local_ip6[0], 3) != ((DST_IP6[0] >> 24) & 0xff) || + LSB(ctx->local_ip6[1], 0) != ((DST_IP6[1] >> 0) & 0xff) || + LSB(ctx->local_ip6[1], 1) != ((DST_IP6[1] >> 8) & 0xff) || + LSB(ctx->local_ip6[1], 2) != ((DST_IP6[1] >> 16) & 0xff) || + LSB(ctx->local_ip6[1], 3) != ((DST_IP6[1] >> 24) & 0xff) || + LSB(ctx->local_ip6[2], 0) != ((DST_IP6[2] >> 0) & 0xff) || + LSB(ctx->local_ip6[2], 1) != ((DST_IP6[2] >> 8) & 0xff) || + LSB(ctx->local_ip6[2], 2) != ((DST_IP6[2] >> 16) & 0xff) || + LSB(ctx->local_ip6[2], 3) != ((DST_IP6[2] >> 24) & 0xff) || + LSB(ctx->local_ip6[3], 0) != ((DST_IP6[3] >> 0) & 0xff) || + LSB(ctx->local_ip6[3], 1) != ((DST_IP6[3] >> 8) & 0xff) || + LSB(ctx->local_ip6[3], 2) != ((DST_IP6[3] >> 16) & 0xff) || + LSB(ctx->local_ip6[3], 3) != ((DST_IP6[3] >> 24) & 0xff)) return SK_DROP; - half = (__u16 *)&ctx->local_ip6; - if (half[0] != ((DST_IP6[0] >> 0) & 0xffff) || - half[1] != ((DST_IP6[0] >> 16) & 0xffff) || - half[2] != ((DST_IP6[1] >> 0) & 0xffff) || - half[3] != ((DST_IP6[1] >> 16) & 0xffff) || - half[4] != ((DST_IP6[2] >> 0) & 0xffff) || - half[5] != ((DST_IP6[2] >> 16) & 0xffff) || - half[6] != ((DST_IP6[3] >> 0) & 0xffff) || - half[7] != ((DST_IP6[3] >> 16) & 0xffff)) + if (LSW(ctx->local_ip6[0], 0) != ((DST_IP6[0] >> 0) & 0xffff) || + LSW(ctx->local_ip6[0], 1) != ((DST_IP6[0] >> 16) & 0xffff) || + LSW(ctx->local_ip6[1], 0) != ((DST_IP6[1] >> 0) & 0xffff) || + LSW(ctx->local_ip6[1], 1) != ((DST_IP6[1] >> 16) & 0xffff) || + LSW(ctx->local_ip6[2], 0) != ((DST_IP6[2] >> 0) & 0xffff) || + LSW(ctx->local_ip6[2], 1) != ((DST_IP6[2] >> 16) & 0xffff) || + LSW(ctx->local_ip6[3], 0) != ((DST_IP6[3] >> 0) & 0xffff) || + LSW(ctx->local_ip6[3], 1) != ((DST_IP6[3] >> 16) & 0xffff)) return SK_DROP; } else { /* Expect :: IPs when family != AF_INET6 */ - byte = (__u8 *)&ctx->remote_ip6; - if (byte[0] != 0 || byte[1] != 0 || - byte[2] != 0 || byte[3] != 0 || - byte[4] != 0 || byte[5] != 0 || - byte[6] != 0 || byte[7] != 0 || - byte[8] != 0 || byte[9] != 0 || - byte[10] != 0 || byte[11] != 0 || - byte[12] != 0 || byte[13] != 0 || - byte[14] != 0 || byte[15] != 0) + if (LSB(ctx->remote_ip6[0], 0) != 0 || LSB(ctx->remote_ip6[0], 1) != 0 || + LSB(ctx->remote_ip6[0], 2) != 0 || LSB(ctx->remote_ip6[0], 3) != 0 || + LSB(ctx->remote_ip6[1], 0) != 0 || LSB(ctx->remote_ip6[1], 1) != 0 || + LSB(ctx->remote_ip6[1], 2) != 0 || LSB(ctx->remote_ip6[1], 3) != 0 || + LSB(ctx->remote_ip6[2], 0) != 0 || LSB(ctx->remote_ip6[2], 1) != 0 || + LSB(ctx->remote_ip6[2], 2) != 0 || LSB(ctx->remote_ip6[2], 3) != 0 || + LSB(ctx->remote_ip6[3], 0) != 0 || LSB(ctx->remote_ip6[3], 1) != 0 || + LSB(ctx->remote_ip6[3], 2) != 0 || LSB(ctx->remote_ip6[3], 3) != 0) return SK_DROP; - half = (__u16 *)&ctx->remote_ip6; - if (half[0] != 0 || half[1] != 0 || - half[2] != 0 || half[3] != 0 || - half[4] != 0 || half[5] != 0 || - half[6] != 0 || half[7] != 0) + if (LSW(ctx->remote_ip6[0], 0) != 0 || LSW(ctx->remote_ip6[0], 1) != 0 || + LSW(ctx->remote_ip6[1], 0) != 0 || LSW(ctx->remote_ip6[1], 1) != 0 || + LSW(ctx->remote_ip6[2], 0) != 0 || LSW(ctx->remote_ip6[2], 1) != 0 || + LSW(ctx->remote_ip6[3], 0) != 0 || LSW(ctx->remote_ip6[3], 1) != 0) return SK_DROP; - byte = (__u8 *)&ctx->local_ip6; - if (byte[0] != 0 || byte[1] != 0 || - byte[2] != 0 || byte[3] != 0 || - byte[4] != 0 || byte[5] != 0 || - byte[6] != 0 || byte[7] != 0 || - byte[8] != 0 || byte[9] != 0 || - byte[10] != 0 || byte[11] != 0 || - byte[12] != 0 || byte[13] != 0 || - byte[14] != 0 || byte[15] != 0) + if (LSB(ctx->local_ip6[0], 0) != 0 || LSB(ctx->local_ip6[0], 1) != 0 || + LSB(ctx->local_ip6[0], 2) != 0 || LSB(ctx->local_ip6[0], 3) != 0 || + LSB(ctx->local_ip6[1], 0) != 0 || LSB(ctx->local_ip6[1], 1) != 0 || + LSB(ctx->local_ip6[1], 2) != 0 || LSB(ctx->local_ip6[1], 3) != 0 || + LSB(ctx->local_ip6[2], 0) != 0 || LSB(ctx->local_ip6[2], 1) != 0 || + LSB(ctx->local_ip6[2], 2) != 0 || LSB(ctx->local_ip6[2], 3) != 0 || + LSB(ctx->local_ip6[3], 0) != 0 || LSB(ctx->local_ip6[3], 1) != 0 || + LSB(ctx->local_ip6[3], 2) != 0 || LSB(ctx->local_ip6[3], 3) != 0) return SK_DROP; - half = (__u16 *)&ctx->local_ip6; - if (half[0] != 0 || half[1] != 0 || - half[2] != 0 || half[3] != 0 || - half[4] != 0 || half[5] != 0 || - half[6] != 0 || half[7] != 0) + if (LSW(ctx->remote_ip6[0], 0) != 0 || LSW(ctx->remote_ip6[0], 1) != 0 || + LSW(ctx->remote_ip6[1], 0) != 0 || LSW(ctx->remote_ip6[1], 1) != 0 || + LSW(ctx->remote_ip6[2], 0) != 0 || LSW(ctx->remote_ip6[2], 1) != 0 || + LSW(ctx->remote_ip6[3], 0) != 0 || LSW(ctx->remote_ip6[3], 1) != 0) return SK_DROP; } diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c index 9bcaa37f476a..81b57b9aaaea 100644 --- a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c +++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c @@ -7,19 +7,7 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> - -enum bpf_addr_array_idx { - ADDR_SRV_IDX, - ADDR_CLI_IDX, - __NR_BPF_ADDR_ARRAY_IDX, -}; - -enum bpf_result_array_idx { - EGRESS_SRV_IDX, - EGRESS_CLI_IDX, - INGRESS_LISTEN_IDX, - __NR_BPF_RESULT_ARRAY_IDX, -}; +#include "bpf_tcp_helpers.h" enum bpf_linum_array_idx { EGRESS_LINUM_IDX, @@ -29,27 +17,6 @@ enum bpf_linum_array_idx { struct { __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, __NR_BPF_ADDR_ARRAY_IDX); - __type(key, __u32); - __type(value, struct sockaddr_in6); -} addr_map SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, __NR_BPF_RESULT_ARRAY_IDX); - __type(key, __u32); - __type(value, struct bpf_sock); -} sock_result_map SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, __NR_BPF_RESULT_ARRAY_IDX); - __type(key, __u32); - __type(value, struct bpf_tcp_sock); -} tcp_sock_result_map SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, __NR_BPF_LINUM_ARRAY_IDX); __type(key, __u32); __type(value, __u32); @@ -74,6 +41,17 @@ struct { __type(value, struct bpf_spinlock_cnt); } sk_pkt_out_cnt10 SEC(".maps"); +struct bpf_tcp_sock listen_tp = {}; +struct sockaddr_in6 srv_sa6 = {}; +struct bpf_tcp_sock cli_tp = {}; +struct bpf_tcp_sock srv_tp = {}; +struct bpf_sock listen_sk = {}; +struct bpf_sock srv_sk = {}; +struct bpf_sock cli_sk = {}; +__u64 parent_cg_id = 0; +__u64 child_cg_id = 0; +__u64 lsndtime = 0; + static bool is_loopback6(__u32 *a6) { return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1); @@ -130,62 +108,86 @@ static void tpcpy(struct bpf_tcp_sock *dst, dst->bytes_acked = src->bytes_acked; } -#define RETURN { \ +/* Always return CG_OK so that no pkt will be filtered out */ +#define CG_OK 1 + +#define RET_LOG() ({ \ linum = __LINE__; \ - bpf_map_update_elem(&linum_map, &linum_idx, &linum, 0); \ - return 1; \ -} + bpf_map_update_elem(&linum_map, &linum_idx, &linum, BPF_NOEXIST); \ + return CG_OK; \ +}) SEC("cgroup_skb/egress") int egress_read_sock_fields(struct __sk_buff *skb) { struct bpf_spinlock_cnt cli_cnt_init = { .lock = 0, .cnt = 0xeB9F }; - __u32 srv_idx = ADDR_SRV_IDX, cli_idx = ADDR_CLI_IDX, result_idx; struct bpf_spinlock_cnt *pkt_out_cnt, *pkt_out_cnt10; - struct sockaddr_in6 *srv_sa6, *cli_sa6; struct bpf_tcp_sock *tp, *tp_ret; struct bpf_sock *sk, *sk_ret; __u32 linum, linum_idx; + struct tcp_sock *ktp; linum_idx = EGRESS_LINUM_IDX; sk = skb->sk; - if (!sk || sk->state == 10) - RETURN; + if (!sk) + RET_LOG(); + /* Not the testing egress traffic or + * TCP_LISTEN (10) socket will be copied at the ingress side. + */ + if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) || + sk->state == 10) + return CG_OK; + + if (sk->src_port == bpf_ntohs(srv_sa6.sin6_port)) { + /* Server socket */ + sk_ret = &srv_sk; + tp_ret = &srv_tp; + } else if (sk->dst_port == srv_sa6.sin6_port) { + /* Client socket */ + sk_ret = &cli_sk; + tp_ret = &cli_tp; + } else { + /* Not the testing egress traffic */ + return CG_OK; + } + + /* It must be a fullsock for cgroup_skb/egress prog */ sk = bpf_sk_fullsock(sk); - if (!sk || sk->family != AF_INET6 || sk->protocol != IPPROTO_TCP || - !is_loopback6(sk->src_ip6)) - RETURN; + if (!sk) + RET_LOG(); + + /* Not the testing egress traffic */ + if (sk->protocol != IPPROTO_TCP) + return CG_OK; tp = bpf_tcp_sock(sk); if (!tp) - RETURN; + RET_LOG(); - srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx); - cli_sa6 = bpf_map_lookup_elem(&addr_map, &cli_idx); - if (!srv_sa6 || !cli_sa6) - RETURN; + skcpy(sk_ret, sk); + tpcpy(tp_ret, tp); - if (sk->src_port == bpf_ntohs(srv_sa6->sin6_port)) - result_idx = EGRESS_SRV_IDX; - else if (sk->src_port == bpf_ntohs(cli_sa6->sin6_port)) - result_idx = EGRESS_CLI_IDX; - else - RETURN; + if (sk_ret == &srv_sk) { + ktp = bpf_skc_to_tcp_sock(sk); - sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx); - tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx); - if (!sk_ret || !tp_ret) - RETURN; + if (!ktp) + RET_LOG(); - skcpy(sk_ret, sk); - tpcpy(tp_ret, tp); + lsndtime = ktp->lsndtime; + + child_cg_id = bpf_sk_cgroup_id(ktp); + if (!child_cg_id) + RET_LOG(); + + parent_cg_id = bpf_sk_ancestor_cgroup_id(ktp, 2); + if (!parent_cg_id) + RET_LOG(); - if (result_idx == EGRESS_SRV_IDX) { /* The userspace has created it for srv sk */ - pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk, 0, 0); - pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, sk, + pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, ktp, 0, 0); + pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, ktp, 0, 0); } else { pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk, @@ -197,7 +199,7 @@ int egress_read_sock_fields(struct __sk_buff *skb) } if (!pkt_out_cnt || !pkt_out_cnt10) - RETURN; + RET_LOG(); /* Even both cnt and cnt10 have lock defined in their BTF, * intentionally one cnt takes lock while one does not @@ -208,48 +210,44 @@ int egress_read_sock_fields(struct __sk_buff *skb) pkt_out_cnt10->cnt += 10; bpf_spin_unlock(&pkt_out_cnt10->lock); - RETURN; + return CG_OK; } SEC("cgroup_skb/ingress") int ingress_read_sock_fields(struct __sk_buff *skb) { - __u32 srv_idx = ADDR_SRV_IDX, result_idx = INGRESS_LISTEN_IDX; - struct bpf_tcp_sock *tp, *tp_ret; - struct bpf_sock *sk, *sk_ret; - struct sockaddr_in6 *srv_sa6; + struct bpf_tcp_sock *tp; __u32 linum, linum_idx; + struct bpf_sock *sk; linum_idx = INGRESS_LINUM_IDX; sk = skb->sk; - if (!sk || sk->family != AF_INET6 || !is_loopback6(sk->src_ip6)) - RETURN; + if (!sk) + RET_LOG(); - srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx); - if (!srv_sa6 || sk->src_port != bpf_ntohs(srv_sa6->sin6_port)) - RETURN; + /* Not the testing ingress traffic to the server */ + if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) || + sk->src_port != bpf_ntohs(srv_sa6.sin6_port)) + return CG_OK; - if (sk->state != 10 && sk->state != 12) - RETURN; + /* Only interested in TCP_LISTEN */ + if (sk->state != 10) + return CG_OK; - sk = bpf_get_listener_sock(sk); + /* It must be a fullsock for cgroup_skb/ingress prog */ + sk = bpf_sk_fullsock(sk); if (!sk) - RETURN; + RET_LOG(); tp = bpf_tcp_sock(sk); if (!tp) - RETURN; - - sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx); - tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx); - if (!sk_ret || !tp_ret) - RETURN; + RET_LOG(); - skcpy(sk_ret, sk); - tpcpy(tp_ret, tp); + skcpy(&listen_sk, sk); + tpcpy(&listen_tp, tp); - RETURN; + return CG_OK; } char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_invalid_update.c b/tools/testing/selftests/bpf/progs/test_sockmap_invalid_update.c new file mode 100644 index 000000000000..02a59e220cbc --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sockmap_invalid_update.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Cloudflare +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} map SEC(".maps"); + +SEC("sockops") +int bpf_sockmap(struct bpf_sock_ops *skops) +{ + __u32 key = 0; + + if (skops->sk) + bpf_map_update_elem(&map, &key, skops->sk, 0); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h index 3dca4c2e2418..1858435de7aa 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h +++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h @@ -131,39 +131,55 @@ int bpf_prog2(struct __sk_buff *skb) } -SEC("sk_skb3") -int bpf_prog3(struct __sk_buff *skb) +static inline void bpf_write_pass(struct __sk_buff *skb, int offset) { - const int one = 1; - int err, *f, ret = SK_PASS; + int err = bpf_skb_pull_data(skb, 6 + offset); void *data_end; char *c; - err = bpf_skb_pull_data(skb, 19); if (err) - goto tls_out; + return; c = (char *)(long)skb->data; data_end = (void *)(long)skb->data_end; - if (c + 18 < data_end) - memcpy(&c[13], "PASS", 4); + if (c + 5 + offset < data_end) + memcpy(c + offset, "PASS", 4); +} + +SEC("sk_skb3") +int bpf_prog3(struct __sk_buff *skb) +{ + int err, *f, ret = SK_PASS; + const int one = 1; + f = bpf_map_lookup_elem(&sock_skb_opts, &one); if (f && *f) { __u64 flags = 0; ret = 0; flags = *f; + + err = bpf_skb_adjust_room(skb, -13, 0, 0); + if (err) + return SK_DROP; + err = bpf_skb_adjust_room(skb, 4, 0, 0); + if (err) + return SK_DROP; + bpf_write_pass(skb, 0); #ifdef SOCKMAP return bpf_sk_redirect_map(skb, &tls_sock_map, ret, flags); #else return bpf_sk_redirect_hash(skb, &tls_sock_map, &ret, flags); #endif } - f = bpf_map_lookup_elem(&sock_skb_opts, &one); if (f && *f) ret = SK_DROP; + err = bpf_skb_adjust_room(skb, 4, 0, 0); + if (err) + return SK_DROP; + bpf_write_pass(skb, 13); tls_out: return ret; } diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_update.c b/tools/testing/selftests/bpf/progs/test_sockmap_update.c new file mode 100644 index 000000000000..9d0c9f28cab2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sockmap_update.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Cloudflare +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} src SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} dst_sock_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_SOCKHASH); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} dst_sock_hash SEC(".maps"); + +SEC("classifier/copy_sock_map") +int copy_sock_map(void *ctx) +{ + struct bpf_sock *sk; + bool failed = false; + __u32 key = 0; + + sk = bpf_map_lookup_elem(&src, &key); + if (!sk) + return SK_DROP; + + if (bpf_map_update_elem(&dst_sock_map, &key, sk, 0)) + failed = true; + + if (bpf_map_update_elem(&dst_sock_hash, &key, sk, 0)) + failed = true; + + bpf_sk_release(sk); + return failed ? SK_DROP : SK_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_subprogs.c b/tools/testing/selftests/bpf/progs/test_subprogs.c new file mode 100644 index 000000000000..d3c5673c0218 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_subprogs.c @@ -0,0 +1,103 @@ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +const char LICENSE[] SEC("license") = "GPL"; + +__noinline int sub1(int x) +{ + return x + 1; +} + +static __noinline int sub5(int v); + +__noinline int sub2(int y) +{ + return sub5(y + 2); +} + +static __noinline int sub3(int z) +{ + return z + 3 + sub1(4); +} + +static __noinline int sub4(int w) +{ + return w + sub3(5) + sub1(6); +} + +/* sub5() is an identitify function, just to test weirder functions layout and + * call patterns + */ +static __noinline int sub5(int v) +{ + return sub1(v) - 1; /* compensates sub1()'s + 1 */ +} + +/* unfortunately verifier rejects `struct task_struct *t` as an unkown pointer + * type, so we need to accept pointer as integer and then cast it inside the + * function + */ +__noinline int get_task_tgid(uintptr_t t) +{ + /* this ensures that CO-RE relocs work in multi-subprogs .text */ + return BPF_CORE_READ((struct task_struct *)(void *)t, tgid); +} + +int res1 = 0; +int res2 = 0; +int res3 = 0; +int res4 = 0; + +SEC("raw_tp/sys_enter") +int prog1(void *ctx) +{ + /* perform some CO-RE relocations to ensure they work with multi-prog + * sections correctly + */ + struct task_struct *t = (void *)bpf_get_current_task(); + + if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t)) + return 1; + + res1 = sub1(1) + sub3(2); /* (1 + 1) + (2 + 3 + (4 + 1)) = 12 */ + return 0; +} + +SEC("raw_tp/sys_exit") +int prog2(void *ctx) +{ + struct task_struct *t = (void *)bpf_get_current_task(); + + if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t)) + return 1; + + res2 = sub2(3) + sub3(4); /* (3 + 2) + (4 + 3 + (4 + 1)) = 17 */ + return 0; +} + +/* prog3 has the same section name as prog1 */ +SEC("raw_tp/sys_enter") +int prog3(void *ctx) +{ + struct task_struct *t = (void *)bpf_get_current_task(); + + if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t)) + return 1; + + res3 = sub3(5) + 6; /* (5 + 3 + (4 + 1)) + 6 = 19 */ + return 0; +} + +/* prog4 has the same section name as prog2 */ +SEC("raw_tp/sys_exit") +int prog4(void *ctx) +{ + struct task_struct *t = (void *)bpf_get_current_task(); + + if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t)) + return 1; + + res4 = sub4(7) + sub1(8); /* (7 + (5 + 3 + (4 + 1)) + (6 + 1)) + (8 + 1) = 36 */ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_subprogs_unused.c b/tools/testing/selftests/bpf/progs/test_subprogs_unused.c new file mode 100644 index 000000000000..bc49e050d342 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_subprogs_unused.c @@ -0,0 +1,21 @@ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +const char LICENSE[] SEC("license") = "GPL"; + +__attribute__((unused)) __noinline int unused1(int x) +{ + return x + 1; +} + +static __attribute__((unused)) __noinline int unused2(int x) +{ + return x + 2; +} + +SEC("raw_tp/sys_enter") +int main_prog(void *ctx) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c index 458b0d69133e..553a282d816a 100644 --- a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c +++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c @@ -18,11 +18,11 @@ #define MAX_ULONG_STR_LEN 7 #define MAX_VALUE_STR_LEN (TCP_MEM_LOOPS * MAX_ULONG_STR_LEN) +const char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string"; static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx) { - volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string"; unsigned char i; - char name[64]; + char name[sizeof(tcp_mem_name)]; int ret; memset(name, 0, sizeof(name)); diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c index b2e6f9b0894d..2b64bc563a12 100644 --- a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c +++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c @@ -18,11 +18,11 @@ #define MAX_ULONG_STR_LEN 7 #define MAX_VALUE_STR_LEN (TCP_MEM_LOOPS * MAX_ULONG_STR_LEN) +const char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string_to_stress_byte_loop"; static __attribute__((noinline)) int is_tcp_mem(struct bpf_sysctl *ctx) { - volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string_to_stress_byte_loop"; unsigned char i; - char name[64]; + char name[sizeof(tcp_mem_name)]; int ret; memset(name, 0, sizeof(name)); diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c index 50525235380e..5489823c83fc 100644 --- a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c +++ b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c @@ -19,11 +19,11 @@ #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #endif +const char tcp_mem_name[] = "net/ipv4/tcp_mem"; static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx) { - char tcp_mem_name[] = "net/ipv4/tcp_mem"; unsigned char i; - char name[64]; + char name[sizeof(tcp_mem_name)]; int ret; memset(name, 0, sizeof(name)); diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c new file mode 100644 index 000000000000..b985ac4e7a81 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tc_neigh.c @@ -0,0 +1,149 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stddef.h> +#include <stdint.h> +#include <stdbool.h> + +#include <linux/bpf.h> +#include <linux/stddef.h> +#include <linux/pkt_cls.h> +#include <linux/if_ether.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/ipv6.h> + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +#ifndef ctx_ptr +# define ctx_ptr(field) (void *)(long)(field) +#endif + +#define ip4_src 0xac100164 /* 172.16.1.100 */ +#define ip4_dst 0xac100264 /* 172.16.2.100 */ + +#define ip6_src { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x01, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe } +#define ip6_dst { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x02, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe } + +#ifndef v6_equal +# define v6_equal(a, b) (a.s6_addr32[0] == b.s6_addr32[0] && \ + a.s6_addr32[1] == b.s6_addr32[1] && \ + a.s6_addr32[2] == b.s6_addr32[2] && \ + a.s6_addr32[3] == b.s6_addr32[3]) +#endif + +enum { + dev_src, + dev_dst, +}; + +struct bpf_map_def SEC("maps") ifindex_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 2, +}; + +static __always_inline bool is_remote_ep_v4(struct __sk_buff *skb, + __be32 addr) +{ + void *data_end = ctx_ptr(skb->data_end); + void *data = ctx_ptr(skb->data); + struct iphdr *ip4h; + + if (data + sizeof(struct ethhdr) > data_end) + return false; + + ip4h = (struct iphdr *)(data + sizeof(struct ethhdr)); + if ((void *)(ip4h + 1) > data_end) + return false; + + return ip4h->daddr == addr; +} + +static __always_inline bool is_remote_ep_v6(struct __sk_buff *skb, + struct in6_addr addr) +{ + void *data_end = ctx_ptr(skb->data_end); + void *data = ctx_ptr(skb->data); + struct ipv6hdr *ip6h; + + if (data + sizeof(struct ethhdr) > data_end) + return false; + + ip6h = (struct ipv6hdr *)(data + sizeof(struct ethhdr)); + if ((void *)(ip6h + 1) > data_end) + return false; + + return v6_equal(ip6h->daddr, addr); +} + +static __always_inline int get_dev_ifindex(int which) +{ + int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which); + + return ifindex ? *ifindex : 0; +} + +SEC("chk_egress") int tc_chk(struct __sk_buff *skb) +{ + void *data_end = ctx_ptr(skb->data_end); + void *data = ctx_ptr(skb->data); + __u32 *raw = data; + + if (data + sizeof(struct ethhdr) > data_end) + return TC_ACT_SHOT; + + return !raw[0] && !raw[1] && !raw[2] ? TC_ACT_SHOT : TC_ACT_OK; +} + +SEC("dst_ingress") int tc_dst(struct __sk_buff *skb) +{ + __u8 zero[ETH_ALEN * 2]; + bool redirect = false; + + switch (skb->protocol) { + case __bpf_constant_htons(ETH_P_IP): + redirect = is_remote_ep_v4(skb, __bpf_constant_htonl(ip4_src)); + break; + case __bpf_constant_htons(ETH_P_IPV6): + redirect = is_remote_ep_v6(skb, (struct in6_addr)ip6_src); + break; + } + + if (!redirect) + return TC_ACT_OK; + + __builtin_memset(&zero, 0, sizeof(zero)); + if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0) + return TC_ACT_SHOT; + + return bpf_redirect_neigh(get_dev_ifindex(dev_src), NULL, 0, 0); +} + +SEC("src_ingress") int tc_src(struct __sk_buff *skb) +{ + __u8 zero[ETH_ALEN * 2]; + bool redirect = false; + + switch (skb->protocol) { + case __bpf_constant_htons(ETH_P_IP): + redirect = is_remote_ep_v4(skb, __bpf_constant_htonl(ip4_dst)); + break; + case __bpf_constant_htons(ETH_P_IPV6): + redirect = is_remote_ep_v6(skb, (struct in6_addr)ip6_dst); + break; + } + + if (!redirect) + return TC_ACT_OK; + + __builtin_memset(&zero, 0, sizeof(zero)); + if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0) + return TC_ACT_SHOT; + + return bpf_redirect_neigh(get_dev_ifindex(dev_dst), NULL, 0, 0); +} + +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c new file mode 100644 index 000000000000..d82ed3457030 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c @@ -0,0 +1,155 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdint.h> +#include <stdbool.h> +#include <stddef.h> + +#include <linux/bpf.h> +#include <linux/stddef.h> +#include <linux/pkt_cls.h> +#include <linux/if_ether.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/ipv6.h> + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +#ifndef ctx_ptr +# define ctx_ptr(field) (void *)(long)(field) +#endif + +#define AF_INET 2 +#define AF_INET6 10 + +static __always_inline int fill_fib_params_v4(struct __sk_buff *skb, + struct bpf_fib_lookup *fib_params) +{ + void *data_end = ctx_ptr(skb->data_end); + void *data = ctx_ptr(skb->data); + struct iphdr *ip4h; + + if (data + sizeof(struct ethhdr) > data_end) + return -1; + + ip4h = (struct iphdr *)(data + sizeof(struct ethhdr)); + if ((void *)(ip4h + 1) > data_end) + return -1; + + fib_params->family = AF_INET; + fib_params->tos = ip4h->tos; + fib_params->l4_protocol = ip4h->protocol; + fib_params->sport = 0; + fib_params->dport = 0; + fib_params->tot_len = bpf_ntohs(ip4h->tot_len); + fib_params->ipv4_src = ip4h->saddr; + fib_params->ipv4_dst = ip4h->daddr; + + return 0; +} + +static __always_inline int fill_fib_params_v6(struct __sk_buff *skb, + struct bpf_fib_lookup *fib_params) +{ + struct in6_addr *src = (struct in6_addr *)fib_params->ipv6_src; + struct in6_addr *dst = (struct in6_addr *)fib_params->ipv6_dst; + void *data_end = ctx_ptr(skb->data_end); + void *data = ctx_ptr(skb->data); + struct ipv6hdr *ip6h; + + if (data + sizeof(struct ethhdr) > data_end) + return -1; + + ip6h = (struct ipv6hdr *)(data + sizeof(struct ethhdr)); + if ((void *)(ip6h + 1) > data_end) + return -1; + + fib_params->family = AF_INET6; + fib_params->flowinfo = 0; + fib_params->l4_protocol = ip6h->nexthdr; + fib_params->sport = 0; + fib_params->dport = 0; + fib_params->tot_len = bpf_ntohs(ip6h->payload_len); + *src = ip6h->saddr; + *dst = ip6h->daddr; + + return 0; +} + +SEC("chk_egress") int tc_chk(struct __sk_buff *skb) +{ + void *data_end = ctx_ptr(skb->data_end); + void *data = ctx_ptr(skb->data); + __u32 *raw = data; + + if (data + sizeof(struct ethhdr) > data_end) + return TC_ACT_SHOT; + + return !raw[0] && !raw[1] && !raw[2] ? TC_ACT_SHOT : TC_ACT_OK; +} + +static __always_inline int tc_redir(struct __sk_buff *skb) +{ + struct bpf_fib_lookup fib_params = { .ifindex = skb->ingress_ifindex }; + __u8 zero[ETH_ALEN * 2]; + int ret = -1; + + switch (skb->protocol) { + case __bpf_constant_htons(ETH_P_IP): + ret = fill_fib_params_v4(skb, &fib_params); + break; + case __bpf_constant_htons(ETH_P_IPV6): + ret = fill_fib_params_v6(skb, &fib_params); + break; + } + + if (ret) + return TC_ACT_OK; + + ret = bpf_fib_lookup(skb, &fib_params, sizeof(fib_params), 0); + if (ret == BPF_FIB_LKUP_RET_NOT_FWDED || ret < 0) + return TC_ACT_OK; + + __builtin_memset(&zero, 0, sizeof(zero)); + if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0) + return TC_ACT_SHOT; + + if (ret == BPF_FIB_LKUP_RET_NO_NEIGH) { + struct bpf_redir_neigh nh_params = {}; + + nh_params.nh_family = fib_params.family; + __builtin_memcpy(&nh_params.ipv6_nh, &fib_params.ipv6_dst, + sizeof(nh_params.ipv6_nh)); + + return bpf_redirect_neigh(fib_params.ifindex, &nh_params, + sizeof(nh_params), 0); + + } else if (ret == BPF_FIB_LKUP_RET_SUCCESS) { + void *data_end = ctx_ptr(skb->data_end); + struct ethhdr *eth = ctx_ptr(skb->data); + + if (eth + 1 > data_end) + return TC_ACT_SHOT; + + __builtin_memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN); + __builtin_memcpy(eth->h_source, fib_params.smac, ETH_ALEN); + + return bpf_redirect(fib_params.ifindex, 0); + } + + return TC_ACT_SHOT; +} + +/* these are identical, but keep them separate for compatibility with the + * section names expected by test_tc_redirect.sh + */ +SEC("dst_ingress") int tc_dst(struct __sk_buff *skb) +{ + return tc_redir(skb); +} + +SEC("src_ingress") int tc_src(struct __sk_buff *skb) +{ + return tc_redir(skb); +} + +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c new file mode 100644 index 000000000000..fc84a7685aa2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdint.h> +#include <stdbool.h> + +#include <linux/bpf.h> +#include <linux/stddef.h> +#include <linux/pkt_cls.h> + +#include <bpf/bpf_helpers.h> + +enum { + dev_src, + dev_dst, +}; + +struct bpf_map_def SEC("maps") ifindex_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 2, +}; + +static __always_inline int get_dev_ifindex(int which) +{ + int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which); + + return ifindex ? *ifindex : 0; +} + +SEC("chk_egress") int tc_chk(struct __sk_buff *skb) +{ + return TC_ACT_SHOT; +} + +SEC("dst_ingress") int tc_dst(struct __sk_buff *skb) +{ + return bpf_redirect_peer(get_dev_ifindex(dev_src), 0); +} + +SEC("src_ingress") int tc_src(struct __sk_buff *skb) +{ + return bpf_redirect_peer(get_dev_ifindex(dev_dst), 0); +} + +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c new file mode 100644 index 000000000000..678bd0fad29e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c @@ -0,0 +1,626 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include <stddef.h> +#include <errno.h> +#include <stdbool.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <linux/tcp.h> +#include <linux/socket.h> +#include <linux/bpf.h> +#include <linux/types.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> +#define BPF_PROG_TEST_TCP_HDR_OPTIONS +#include "test_tcp_hdr_options.h" + +#ifndef sizeof_field +#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) +#endif + +__u8 test_kind = TCPOPT_EXP; +__u16 test_magic = 0xeB9F; +__u32 inherit_cb_flags = 0; + +struct bpf_test_option passive_synack_out = {}; +struct bpf_test_option passive_fin_out = {}; + +struct bpf_test_option passive_estab_in = {}; +struct bpf_test_option passive_fin_in = {}; + +struct bpf_test_option active_syn_out = {}; +struct bpf_test_option active_fin_out = {}; + +struct bpf_test_option active_estab_in = {}; +struct bpf_test_option active_fin_in = {}; + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct hdr_stg); +} hdr_stg_map SEC(".maps"); + +static bool skops_want_cookie(const struct bpf_sock_ops *skops) +{ + return skops->args[0] == BPF_WRITE_HDR_TCP_SYNACK_COOKIE; +} + +static bool skops_current_mss(const struct bpf_sock_ops *skops) +{ + return skops->args[0] == BPF_WRITE_HDR_TCP_CURRENT_MSS; +} + +static __u8 option_total_len(__u8 flags) +{ + __u8 i, len = 1; /* +1 for flags */ + + if (!flags) + return 0; + + /* RESEND bit does not use a byte */ + for (i = OPTION_RESEND + 1; i < __NR_OPTION_FLAGS; i++) + len += !!TEST_OPTION_FLAGS(flags, i); + + if (test_kind == TCPOPT_EXP) + return len + TCP_BPF_EXPOPT_BASE_LEN; + else + return len + 2; /* +1 kind, +1 kind-len */ +} + +static void write_test_option(const struct bpf_test_option *test_opt, + __u8 *data) +{ + __u8 offset = 0; + + data[offset++] = test_opt->flags; + if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_MAX_DELACK_MS)) + data[offset++] = test_opt->max_delack_ms; + + if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_RAND)) + data[offset++] = test_opt->rand; +} + +static int store_option(struct bpf_sock_ops *skops, + const struct bpf_test_option *test_opt) +{ + union { + struct tcp_exprm_opt exprm; + struct tcp_opt regular; + } write_opt; + int err; + + if (test_kind == TCPOPT_EXP) { + write_opt.exprm.kind = TCPOPT_EXP; + write_opt.exprm.len = option_total_len(test_opt->flags); + write_opt.exprm.magic = __bpf_htons(test_magic); + write_opt.exprm.data32 = 0; + write_test_option(test_opt, write_opt.exprm.data); + err = bpf_store_hdr_opt(skops, &write_opt.exprm, + sizeof(write_opt.exprm), 0); + } else { + write_opt.regular.kind = test_kind; + write_opt.regular.len = option_total_len(test_opt->flags); + write_opt.regular.data32 = 0; + write_test_option(test_opt, write_opt.regular.data); + err = bpf_store_hdr_opt(skops, &write_opt.regular, + sizeof(write_opt.regular), 0); + } + + if (err) + RET_CG_ERR(err); + + return CG_OK; +} + +static int parse_test_option(struct bpf_test_option *opt, const __u8 *start) +{ + opt->flags = *start++; + + if (TEST_OPTION_FLAGS(opt->flags, OPTION_MAX_DELACK_MS)) + opt->max_delack_ms = *start++; + + if (TEST_OPTION_FLAGS(opt->flags, OPTION_RAND)) + opt->rand = *start++; + + return 0; +} + +static int load_option(struct bpf_sock_ops *skops, + struct bpf_test_option *test_opt, bool from_syn) +{ + union { + struct tcp_exprm_opt exprm; + struct tcp_opt regular; + } search_opt; + int ret, load_flags = from_syn ? BPF_LOAD_HDR_OPT_TCP_SYN : 0; + + if (test_kind == TCPOPT_EXP) { + search_opt.exprm.kind = TCPOPT_EXP; + search_opt.exprm.len = 4; + search_opt.exprm.magic = __bpf_htons(test_magic); + search_opt.exprm.data32 = 0; + ret = bpf_load_hdr_opt(skops, &search_opt.exprm, + sizeof(search_opt.exprm), load_flags); + if (ret < 0) + return ret; + return parse_test_option(test_opt, search_opt.exprm.data); + } else { + search_opt.regular.kind = test_kind; + search_opt.regular.len = 0; + search_opt.regular.data32 = 0; + ret = bpf_load_hdr_opt(skops, &search_opt.regular, + sizeof(search_opt.regular), load_flags); + if (ret < 0) + return ret; + return parse_test_option(test_opt, search_opt.regular.data); + } +} + +static int synack_opt_len(struct bpf_sock_ops *skops) +{ + struct bpf_test_option test_opt = {}; + __u8 optlen; + int err; + + if (!passive_synack_out.flags) + return CG_OK; + + err = load_option(skops, &test_opt, true); + + /* bpf_test_option is not found */ + if (err == -ENOMSG) + return CG_OK; + + if (err) + RET_CG_ERR(err); + + optlen = option_total_len(passive_synack_out.flags); + if (optlen) { + err = bpf_reserve_hdr_opt(skops, optlen, 0); + if (err) + RET_CG_ERR(err); + } + + return CG_OK; +} + +static int write_synack_opt(struct bpf_sock_ops *skops) +{ + struct bpf_test_option opt; + + if (!passive_synack_out.flags) + /* We should not even be called since no header + * space has been reserved. + */ + RET_CG_ERR(0); + + opt = passive_synack_out; + if (skops_want_cookie(skops)) + SET_OPTION_FLAGS(opt.flags, OPTION_RESEND); + + return store_option(skops, &opt); +} + +static int syn_opt_len(struct bpf_sock_ops *skops) +{ + __u8 optlen; + int err; + + if (!active_syn_out.flags) + return CG_OK; + + optlen = option_total_len(active_syn_out.flags); + if (optlen) { + err = bpf_reserve_hdr_opt(skops, optlen, 0); + if (err) + RET_CG_ERR(err); + } + + return CG_OK; +} + +static int write_syn_opt(struct bpf_sock_ops *skops) +{ + if (!active_syn_out.flags) + RET_CG_ERR(0); + + return store_option(skops, &active_syn_out); +} + +static int fin_opt_len(struct bpf_sock_ops *skops) +{ + struct bpf_test_option *opt; + struct hdr_stg *hdr_stg; + __u8 optlen; + int err; + + if (!skops->sk) + RET_CG_ERR(0); + + hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0); + if (!hdr_stg) + RET_CG_ERR(0); + + if (hdr_stg->active) + opt = &active_fin_out; + else + opt = &passive_fin_out; + + optlen = option_total_len(opt->flags); + if (optlen) { + err = bpf_reserve_hdr_opt(skops, optlen, 0); + if (err) + RET_CG_ERR(err); + } + + return CG_OK; +} + +static int write_fin_opt(struct bpf_sock_ops *skops) +{ + struct bpf_test_option *opt; + struct hdr_stg *hdr_stg; + + if (!skops->sk) + RET_CG_ERR(0); + + hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0); + if (!hdr_stg) + RET_CG_ERR(0); + + if (hdr_stg->active) + opt = &active_fin_out; + else + opt = &passive_fin_out; + + if (!opt->flags) + RET_CG_ERR(0); + + return store_option(skops, opt); +} + +static int resend_in_ack(struct bpf_sock_ops *skops) +{ + struct hdr_stg *hdr_stg; + + if (!skops->sk) + return -1; + + hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0); + if (!hdr_stg) + return -1; + + return !!hdr_stg->resend_syn; +} + +static int nodata_opt_len(struct bpf_sock_ops *skops) +{ + int resend; + + resend = resend_in_ack(skops); + if (resend < 0) + RET_CG_ERR(0); + + if (resend) + return syn_opt_len(skops); + + return CG_OK; +} + +static int write_nodata_opt(struct bpf_sock_ops *skops) +{ + int resend; + + resend = resend_in_ack(skops); + if (resend < 0) + RET_CG_ERR(0); + + if (resend) + return write_syn_opt(skops); + + return CG_OK; +} + +static int data_opt_len(struct bpf_sock_ops *skops) +{ + /* Same as the nodata version. Mostly to show + * an example usage on skops->skb_len. + */ + return nodata_opt_len(skops); +} + +static int write_data_opt(struct bpf_sock_ops *skops) +{ + return write_nodata_opt(skops); +} + +static int current_mss_opt_len(struct bpf_sock_ops *skops) +{ + /* Reserve maximum that may be needed */ + int err; + + err = bpf_reserve_hdr_opt(skops, option_total_len(OPTION_MASK), 0); + if (err) + RET_CG_ERR(err); + + return CG_OK; +} + +static int handle_hdr_opt_len(struct bpf_sock_ops *skops) +{ + __u8 tcp_flags = skops_tcp_flags(skops); + + if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK) + return synack_opt_len(skops); + + if (tcp_flags & TCPHDR_SYN) + return syn_opt_len(skops); + + if (tcp_flags & TCPHDR_FIN) + return fin_opt_len(skops); + + if (skops_current_mss(skops)) + /* The kernel is calculating the MSS */ + return current_mss_opt_len(skops); + + if (skops->skb_len) + return data_opt_len(skops); + + return nodata_opt_len(skops); +} + +static int handle_write_hdr_opt(struct bpf_sock_ops *skops) +{ + __u8 tcp_flags = skops_tcp_flags(skops); + struct tcphdr *th; + + if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK) + return write_synack_opt(skops); + + if (tcp_flags & TCPHDR_SYN) + return write_syn_opt(skops); + + if (tcp_flags & TCPHDR_FIN) + return write_fin_opt(skops); + + th = skops->skb_data; + if (th + 1 > skops->skb_data_end) + RET_CG_ERR(0); + + if (skops->skb_len > tcp_hdrlen(th)) + return write_data_opt(skops); + + return write_nodata_opt(skops); +} + +static int set_delack_max(struct bpf_sock_ops *skops, __u8 max_delack_ms) +{ + __u32 max_delack_us = max_delack_ms * 1000; + + return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_DELACK_MAX, + &max_delack_us, sizeof(max_delack_us)); +} + +static int set_rto_min(struct bpf_sock_ops *skops, __u8 peer_max_delack_ms) +{ + __u32 min_rto_us = peer_max_delack_ms * 1000; + + return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_RTO_MIN, &min_rto_us, + sizeof(min_rto_us)); +} + +static int handle_active_estab(struct bpf_sock_ops *skops) +{ + struct hdr_stg init_stg = { + .active = true, + }; + int err; + + err = load_option(skops, &active_estab_in, false); + if (err && err != -ENOMSG) + RET_CG_ERR(err); + + init_stg.resend_syn = TEST_OPTION_FLAGS(active_estab_in.flags, + OPTION_RESEND); + if (!skops->sk || !bpf_sk_storage_get(&hdr_stg_map, skops->sk, + &init_stg, + BPF_SK_STORAGE_GET_F_CREATE)) + RET_CG_ERR(0); + + if (init_stg.resend_syn) + /* Don't clear the write_hdr cb now because + * the ACK may get lost and retransmit may + * be needed. + * + * PARSE_ALL_HDR cb flag is set to learn if this + * resend_syn option has received by the peer. + * + * The header option will be resent until a valid + * packet is received at handle_parse_hdr() + * and all hdr cb flags will be cleared in + * handle_parse_hdr(). + */ + set_parse_all_hdr_cb_flags(skops); + else if (!active_fin_out.flags) + /* No options will be written from now */ + clear_hdr_cb_flags(skops); + + if (active_syn_out.max_delack_ms) { + err = set_delack_max(skops, active_syn_out.max_delack_ms); + if (err) + RET_CG_ERR(err); + } + + if (active_estab_in.max_delack_ms) { + err = set_rto_min(skops, active_estab_in.max_delack_ms); + if (err) + RET_CG_ERR(err); + } + + return CG_OK; +} + +static int handle_passive_estab(struct bpf_sock_ops *skops) +{ + struct hdr_stg init_stg = {}; + struct tcphdr *th; + int err; + + inherit_cb_flags = skops->bpf_sock_ops_cb_flags; + + err = load_option(skops, &passive_estab_in, true); + if (err == -ENOENT) { + /* saved_syn is not found. It was in syncookie mode. + * We have asked the active side to resend the options + * in ACK, so try to find the bpf_test_option from ACK now. + */ + err = load_option(skops, &passive_estab_in, false); + init_stg.syncookie = true; + } + + /* ENOMSG: The bpf_test_option is not found which is fine. + * Bail out now for all other errors. + */ + if (err && err != -ENOMSG) + RET_CG_ERR(err); + + th = skops->skb_data; + if (th + 1 > skops->skb_data_end) + RET_CG_ERR(0); + + if (th->syn) { + /* Fastopen */ + + /* Cannot clear cb_flags to stop write_hdr cb. + * synack is not sent yet for fast open. + * Even it was, the synack may need to be retransmitted. + * + * PARSE_ALL_HDR cb flag is set to learn + * if synack has reached the peer. + * All cb_flags will be cleared in handle_parse_hdr(). + */ + set_parse_all_hdr_cb_flags(skops); + init_stg.fastopen = true; + } else if (!passive_fin_out.flags) { + /* No options will be written from now */ + clear_hdr_cb_flags(skops); + } + + if (!skops->sk || + !bpf_sk_storage_get(&hdr_stg_map, skops->sk, &init_stg, + BPF_SK_STORAGE_GET_F_CREATE)) + RET_CG_ERR(0); + + if (passive_synack_out.max_delack_ms) { + err = set_delack_max(skops, passive_synack_out.max_delack_ms); + if (err) + RET_CG_ERR(err); + } + + if (passive_estab_in.max_delack_ms) { + err = set_rto_min(skops, passive_estab_in.max_delack_ms); + if (err) + RET_CG_ERR(err); + } + + return CG_OK; +} + +static int handle_parse_hdr(struct bpf_sock_ops *skops) +{ + struct hdr_stg *hdr_stg; + struct tcphdr *th; + + if (!skops->sk) + RET_CG_ERR(0); + + th = skops->skb_data; + if (th + 1 > skops->skb_data_end) + RET_CG_ERR(0); + + hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0); + if (!hdr_stg) + RET_CG_ERR(0); + + if (hdr_stg->resend_syn || hdr_stg->fastopen) + /* The PARSE_ALL_HDR cb flag was turned on + * to ensure that the previously written + * options have reached the peer. + * Those previously written option includes: + * - Active side: resend_syn in ACK during syncookie + * or + * - Passive side: SYNACK during fastopen + * + * A valid packet has been received here after + * the 3WHS, so the PARSE_ALL_HDR cb flag + * can be cleared now. + */ + clear_parse_all_hdr_cb_flags(skops); + + if (hdr_stg->resend_syn && !active_fin_out.flags) + /* Active side resent the syn option in ACK + * because the server was in syncookie mode. + * A valid packet has been received, so + * clear header cb flags if there is no + * more option to send. + */ + clear_hdr_cb_flags(skops); + + if (hdr_stg->fastopen && !passive_fin_out.flags) + /* Passive side was in fastopen. + * A valid packet has been received, so + * the SYNACK has reached the peer. + * Clear header cb flags if there is no more + * option to send. + */ + clear_hdr_cb_flags(skops); + + if (th->fin) { + struct bpf_test_option *fin_opt; + int err; + + if (hdr_stg->active) + fin_opt = &active_fin_in; + else + fin_opt = &passive_fin_in; + + err = load_option(skops, fin_opt, false); + if (err && err != -ENOMSG) + RET_CG_ERR(err); + } + + return CG_OK; +} + +SEC("sockops/estab") +int estab(struct bpf_sock_ops *skops) +{ + int true_val = 1; + + switch (skops->op) { + case BPF_SOCK_OPS_TCP_LISTEN_CB: + bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN, + &true_val, sizeof(true_val)); + set_hdr_cb_flags(skops, BPF_SOCK_OPS_STATE_CB_FLAG); + break; + case BPF_SOCK_OPS_TCP_CONNECT_CB: + set_hdr_cb_flags(skops, 0); + break; + case BPF_SOCK_OPS_PARSE_HDR_OPT_CB: + return handle_parse_hdr(skops); + case BPF_SOCK_OPS_HDR_OPT_LEN_CB: + return handle_hdr_opt_len(skops); + case BPF_SOCK_OPS_WRITE_HDR_OPT_CB: + return handle_write_hdr_opt(skops); + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + return handle_passive_estab(skops); + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + return handle_active_estab(skops); + } + + return CG_OK; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_trace_ext.c b/tools/testing/selftests/bpf/progs/test_trace_ext.c new file mode 100644 index 000000000000..d19a634d0e78 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_trace_ext.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#include <linux/bpf.h> +#include <stdbool.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> +#include <bpf/bpf_tracing.h> + +__u64 ext_called = 0; + +SEC("freplace/test_pkt_md_access") +int test_pkt_md_access_new(struct __sk_buff *skb) +{ + ext_called = skb->len; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_trace_ext_tracing.c b/tools/testing/selftests/bpf/progs/test_trace_ext_tracing.c new file mode 100644 index 000000000000..52f3baf98f20 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_trace_ext_tracing.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +__u64 fentry_called = 0; + +SEC("fentry/test_pkt_md_access_new") +int BPF_PROG(fentry, struct sk_buff *skb) +{ + fentry_called = skb->len; + return 0; +} + +__u64 fexit_called = 0; + +SEC("fexit/test_pkt_md_access_new") +int BPF_PROG(fexit, struct sk_buff *skb) +{ + fexit_called = skb->len; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_vmlinux.c b/tools/testing/selftests/bpf/progs/test_vmlinux.c index 29fa09d6a6c6..e9dfa0313d1b 100644 --- a/tools/testing/selftests/bpf/progs/test_vmlinux.c +++ b/tools/testing/selftests/bpf/progs/test_vmlinux.c @@ -19,12 +19,14 @@ SEC("tp/syscalls/sys_enter_nanosleep") int handle__tp(struct trace_event_raw_sys_enter *args) { struct __kernel_timespec *ts; + long tv_nsec; if (args->id != __NR_nanosleep) return 0; ts = (void *)args->args[0]; - if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC) + if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) || + tv_nsec != MY_TV_NSEC) return 0; tp_called = true; @@ -35,12 +37,14 @@ SEC("raw_tp/sys_enter") int BPF_PROG(handle__raw_tp, struct pt_regs *regs, long id) { struct __kernel_timespec *ts; + long tv_nsec; if (id != __NR_nanosleep) return 0; ts = (void *)PT_REGS_PARM1_CORE(regs); - if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC) + if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) || + tv_nsec != MY_TV_NSEC) return 0; raw_tp_called = true; @@ -51,12 +55,14 @@ SEC("tp_btf/sys_enter") int BPF_PROG(handle__tp_btf, struct pt_regs *regs, long id) { struct __kernel_timespec *ts; + long tv_nsec; if (id != __NR_nanosleep) return 0; ts = (void *)PT_REGS_PARM1_CORE(regs); - if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC) + if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) || + tv_nsec != MY_TV_NSEC) return 0; tp_btf_called = true; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c index 8beecec166d9..3a67921f62b5 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c @@ -16,7 +16,7 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> -static __u32 rol32(__u32 word, unsigned int shift) +static __always_inline __u32 rol32(__u32 word, unsigned int shift) { return (word << shift) | (word >> ((-shift) & 31)); } @@ -49,7 +49,7 @@ static __u32 rol32(__u32 word, unsigned int shift) typedef unsigned int u32; -static __attribute__ ((noinline)) +static __noinline u32 jhash(const void *key, u32 length, u32 initval) { u32 a, b, c; @@ -86,7 +86,7 @@ u32 jhash(const void *key, u32 length, u32 initval) return c; } -__attribute__ ((noinline)) +__noinline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) { a += initval; @@ -96,7 +96,7 @@ u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) return c; } -__attribute__ ((noinline)) +__noinline u32 jhash_2words(u32 a, u32 b, u32 initval) { return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); @@ -213,7 +213,7 @@ struct eth_hdr { unsigned short eth_proto; }; -static inline __u64 calc_offset(bool is_ipv6, bool is_icmp) +static __noinline __u64 calc_offset(bool is_ipv6, bool is_icmp) { __u64 off = sizeof(struct eth_hdr); if (is_ipv6) { @@ -797,8 +797,8 @@ out: return XDP_DROP; } -__attribute__ ((section("xdp-test"), used)) -int balancer_ingress(struct xdp_md *ctx) +SEC("xdp-test-v4") +int balancer_ingress_v4(struct xdp_md *ctx) { void *data = (void *)(long)ctx->data; void *data_end = (void *)(long)ctx->data_end; @@ -812,11 +812,27 @@ int balancer_ingress(struct xdp_md *ctx) eth_proto = bpf_ntohs(eth->eth_proto); if (eth_proto == ETH_P_IP) return process_packet(data, nh_off, data_end, 0, ctx); - else if (eth_proto == ETH_P_IPV6) + else + return XDP_DROP; +} + +SEC("xdp-test-v6") +int balancer_ingress_v6(struct xdp_md *ctx) +{ + void *data = (void *)(long)ctx->data; + void *data_end = (void *)(long)ctx->data_end; + struct eth_hdr *eth = data; + __u32 eth_proto; + __u32 nh_off; + + nh_off = sizeof(struct eth_hdr); + if (data + nh_off > data_end) + return XDP_DROP; + eth_proto = bpf_ntohs(eth->eth_proto); + if (eth_proto == ETH_P_IPV6) return process_packet(data, nh_off, data_end, 1, ctx); else return XDP_DROP; } -char _license[] __attribute__ ((section("license"), used)) = "GPL"; -int _version __attribute__ ((section("version"), used)) = 1; +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c index 8b36b6640e7e..9a4d09590b3d 100644 --- a/tools/testing/selftests/bpf/progs/trigger_bench.c +++ b/tools/testing/selftests/bpf/progs/trigger_bench.c @@ -39,6 +39,13 @@ int bench_trigger_fentry(void *ctx) return 0; } +SEC("fentry.s/__x64_sys_getpgid") +int bench_trigger_fentry_sleep(void *ctx) +{ + __sync_add_and_fetch(&hits, 1); + return 0; +} + SEC("fmod_ret/__x64_sys_getpgid") int bench_trigger_fmodret(void *ctx) { diff --git a/tools/testing/selftests/bpf/test_bpftool_build.sh b/tools/testing/selftests/bpf/test_bpftool_build.sh index ac349a5cea7e..2db3c60e1e61 100755 --- a/tools/testing/selftests/bpf/test_bpftool_build.sh +++ b/tools/testing/selftests/bpf/test_bpftool_build.sh @@ -85,6 +85,23 @@ make_with_tmpdir() { echo } +make_doc_and_clean() { + echo -e "\$PWD: $PWD" + echo -e "command: make -s $* doc >/dev/null" + RST2MAN_OPTS="--exit-status=1" make $J -s $* doc + if [ $? -ne 0 ] ; then + ERROR=1 + printf "FAILURE: Errors or warnings when building documentation\n" + fi + ( + if [ $# -ge 1 ] ; then + cd ${@: -1} + fi + make -s doc-clean + ) + echo +} + echo "Trying to build bpftool" echo -e "... through kbuild\n" @@ -145,3 +162,7 @@ make_and_clean make_with_tmpdir OUTPUT make_with_tmpdir O + +echo -e "Checking documentation build\n" +# From tools/bpf/bpftool +make_doc_and_clean diff --git a/tools/testing/selftests/bpf/test_bpftool_metadata.sh b/tools/testing/selftests/bpf/test_bpftool_metadata.sh new file mode 100755 index 000000000000..1bf81b49457a --- /dev/null +++ b/tools/testing/selftests/bpf/test_bpftool_metadata.sh @@ -0,0 +1,82 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +TESTNAME=bpftool_metadata +BPF_FS=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts) +BPF_DIR=$BPF_FS/test_$TESTNAME + +_cleanup() +{ + set +e + rm -rf $BPF_DIR 2> /dev/null +} + +cleanup_skip() +{ + echo "selftests: $TESTNAME [SKIP]" + _cleanup + + exit $ksft_skip +} + +cleanup() +{ + if [ "$?" = 0 ]; then + echo "selftests: $TESTNAME [PASS]" + else + echo "selftests: $TESTNAME [FAILED]" + fi + _cleanup +} + +if [ $(id -u) -ne 0 ]; then + echo "selftests: $TESTNAME [SKIP] Need root privileges" + exit $ksft_skip +fi + +if [ -z "$BPF_FS" ]; then + echo "selftests: $TESTNAME [SKIP] Could not run test without bpffs mounted" + exit $ksft_skip +fi + +if ! bpftool version > /dev/null 2>&1; then + echo "selftests: $TESTNAME [SKIP] Could not run test without bpftool" + exit $ksft_skip +fi + +set -e + +trap cleanup_skip EXIT + +mkdir $BPF_DIR + +trap cleanup EXIT + +bpftool prog load metadata_unused.o $BPF_DIR/unused + +METADATA_PLAIN="$(bpftool prog)" +echo "$METADATA_PLAIN" | grep 'a = "foo"' > /dev/null +echo "$METADATA_PLAIN" | grep 'b = 1' > /dev/null + +bpftool prog --json | grep '"metadata":{"a":"foo","b":1}' > /dev/null + +bpftool map | grep 'metadata.rodata' > /dev/null + +rm $BPF_DIR/unused + +bpftool prog load metadata_used.o $BPF_DIR/used + +METADATA_PLAIN="$(bpftool prog)" +echo "$METADATA_PLAIN" | grep 'a = "bar"' > /dev/null +echo "$METADATA_PLAIN" | grep 'b = 2' > /dev/null + +bpftool prog --json | grep '"metadata":{"a":"bar","b":2}' > /dev/null + +bpftool map | grep 'metadata.rodata' > /dev/null + +rm $BPF_DIR/used + +exit 0 diff --git a/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c b/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c index ed253f252cd0..ec53b1ef90d2 100644 --- a/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c +++ b/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c @@ -156,4 +156,5 @@ cleanup: bpf_object__close(obj); } } + return 0; } diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index dbb820dde138..238f5f61189e 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -130,6 +130,69 @@ extern int test__join_cgroup(const char *path); #define CHECK_ATTR(condition, tag, format...) \ _CHECK(condition, tag, tattr.duration, format) +#define ASSERT_EQ(actual, expected, name) ({ \ + static int duration = 0; \ + typeof(actual) ___act = (actual); \ + typeof(expected) ___exp = (expected); \ + bool ___ok = ___act == ___exp; \ + CHECK(!___ok, (name), \ + "unexpected %s: actual %lld != expected %lld\n", \ + (name), (long long)(___act), (long long)(___exp)); \ + ___ok; \ +}) + +#define ASSERT_STREQ(actual, expected, name) ({ \ + static int duration = 0; \ + const char *___act = actual; \ + const char *___exp = expected; \ + bool ___ok = strcmp(___act, ___exp) == 0; \ + CHECK(!___ok, (name), \ + "unexpected %s: actual '%s' != expected '%s'\n", \ + (name), ___act, ___exp); \ + ___ok; \ +}) + +#define ASSERT_OK(res, name) ({ \ + static int duration = 0; \ + long long ___res = (res); \ + bool ___ok = ___res == 0; \ + CHECK(!___ok, (name), "unexpected error: %lld\n", ___res); \ + ___ok; \ +}) + +#define ASSERT_ERR(res, name) ({ \ + static int duration = 0; \ + long long ___res = (res); \ + bool ___ok = ___res < 0; \ + CHECK(!___ok, (name), "unexpected success: %lld\n", ___res); \ + ___ok; \ +}) + +#define ASSERT_NULL(ptr, name) ({ \ + static int duration = 0; \ + const void *___res = (ptr); \ + bool ___ok = !___res; \ + CHECK(!___ok, (name), "unexpected pointer: %p\n", ___res); \ + ___ok; \ +}) + +#define ASSERT_OK_PTR(ptr, name) ({ \ + static int duration = 0; \ + const void *___res = (ptr); \ + bool ___ok = !IS_ERR_OR_NULL(___res); \ + CHECK(!___ok, (name), \ + "unexpected error: %ld\n", PTR_ERR(___res)); \ + ___ok; \ +}) + +#define ASSERT_ERR_PTR(ptr, name) ({ \ + static int duration = 0; \ + const void *___res = (ptr); \ + bool ___ok = IS_ERR(___res) \ + CHECK(!___ok, (name), "unexpected pointer: %p\n", ___res); \ + ___ok; \ +}) + static inline __u64 ptr_to_u64(const void *ptr) { return (__u64) (unsigned long) ptr; diff --git a/tools/testing/selftests/bpf/test_sock_fields.c b/tools/testing/selftests/bpf/test_sock_fields.c deleted file mode 100644 index 6c9f269c396d..000000000000 --- a/tools/testing/selftests/bpf/test_sock_fields.c +++ /dev/null @@ -1,482 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2019 Facebook */ - -#include <sys/socket.h> -#include <sys/epoll.h> -#include <netinet/in.h> -#include <arpa/inet.h> -#include <unistd.h> -#include <stdlib.h> -#include <string.h> -#include <errno.h> - -#include <bpf/bpf.h> -#include <bpf/libbpf.h> - -#include "cgroup_helpers.h" -#include "bpf_rlimit.h" - -enum bpf_addr_array_idx { - ADDR_SRV_IDX, - ADDR_CLI_IDX, - __NR_BPF_ADDR_ARRAY_IDX, -}; - -enum bpf_result_array_idx { - EGRESS_SRV_IDX, - EGRESS_CLI_IDX, - INGRESS_LISTEN_IDX, - __NR_BPF_RESULT_ARRAY_IDX, -}; - -enum bpf_linum_array_idx { - EGRESS_LINUM_IDX, - INGRESS_LINUM_IDX, - __NR_BPF_LINUM_ARRAY_IDX, -}; - -struct bpf_spinlock_cnt { - struct bpf_spin_lock lock; - __u32 cnt; -}; - -#define CHECK(condition, tag, format...) ({ \ - int __ret = !!(condition); \ - if (__ret) { \ - printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag); \ - printf(format); \ - printf("\n"); \ - exit(-1); \ - } \ -}) - -#define TEST_CGROUP "/test-bpf-sock-fields" -#define DATA "Hello BPF!" -#define DATA_LEN sizeof(DATA) - -static struct sockaddr_in6 srv_sa6, cli_sa6; -static int sk_pkt_out_cnt10_fd; -static int sk_pkt_out_cnt_fd; -static int linum_map_fd; -static int addr_map_fd; -static int tp_map_fd; -static int sk_map_fd; - -static __u32 addr_srv_idx = ADDR_SRV_IDX; -static __u32 addr_cli_idx = ADDR_CLI_IDX; - -static __u32 egress_srv_idx = EGRESS_SRV_IDX; -static __u32 egress_cli_idx = EGRESS_CLI_IDX; -static __u32 ingress_listen_idx = INGRESS_LISTEN_IDX; - -static __u32 egress_linum_idx = EGRESS_LINUM_IDX; -static __u32 ingress_linum_idx = INGRESS_LINUM_IDX; - -static void init_loopback6(struct sockaddr_in6 *sa6) -{ - memset(sa6, 0, sizeof(*sa6)); - sa6->sin6_family = AF_INET6; - sa6->sin6_addr = in6addr_loopback; -} - -static void print_sk(const struct bpf_sock *sk) -{ - char src_ip4[24], dst_ip4[24]; - char src_ip6[64], dst_ip6[64]; - - inet_ntop(AF_INET, &sk->src_ip4, src_ip4, sizeof(src_ip4)); - inet_ntop(AF_INET6, &sk->src_ip6, src_ip6, sizeof(src_ip6)); - inet_ntop(AF_INET, &sk->dst_ip4, dst_ip4, sizeof(dst_ip4)); - inet_ntop(AF_INET6, &sk->dst_ip6, dst_ip6, sizeof(dst_ip6)); - - printf("state:%u bound_dev_if:%u family:%u type:%u protocol:%u mark:%u priority:%u " - "src_ip4:%x(%s) src_ip6:%x:%x:%x:%x(%s) src_port:%u " - "dst_ip4:%x(%s) dst_ip6:%x:%x:%x:%x(%s) dst_port:%u\n", - sk->state, sk->bound_dev_if, sk->family, sk->type, sk->protocol, - sk->mark, sk->priority, - sk->src_ip4, src_ip4, - sk->src_ip6[0], sk->src_ip6[1], sk->src_ip6[2], sk->src_ip6[3], - src_ip6, sk->src_port, - sk->dst_ip4, dst_ip4, - sk->dst_ip6[0], sk->dst_ip6[1], sk->dst_ip6[2], sk->dst_ip6[3], - dst_ip6, ntohs(sk->dst_port)); -} - -static void print_tp(const struct bpf_tcp_sock *tp) -{ - printf("snd_cwnd:%u srtt_us:%u rtt_min:%u snd_ssthresh:%u rcv_nxt:%u " - "snd_nxt:%u snd:una:%u mss_cache:%u ecn_flags:%u " - "rate_delivered:%u rate_interval_us:%u packets_out:%u " - "retrans_out:%u total_retrans:%u segs_in:%u data_segs_in:%u " - "segs_out:%u data_segs_out:%u lost_out:%u sacked_out:%u " - "bytes_received:%llu bytes_acked:%llu\n", - tp->snd_cwnd, tp->srtt_us, tp->rtt_min, tp->snd_ssthresh, - tp->rcv_nxt, tp->snd_nxt, tp->snd_una, tp->mss_cache, - tp->ecn_flags, tp->rate_delivered, tp->rate_interval_us, - tp->packets_out, tp->retrans_out, tp->total_retrans, - tp->segs_in, tp->data_segs_in, tp->segs_out, - tp->data_segs_out, tp->lost_out, tp->sacked_out, - tp->bytes_received, tp->bytes_acked); -} - -static void check_result(void) -{ - struct bpf_tcp_sock srv_tp, cli_tp, listen_tp; - struct bpf_sock srv_sk, cli_sk, listen_sk; - __u32 ingress_linum, egress_linum; - int err; - - err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx, - &egress_linum); - CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)", - "err:%d errno:%d", err, errno); - - err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx, - &ingress_linum); - CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)", - "err:%d errno:%d", err, errno); - - err = bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx, &srv_sk); - CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx)", - "err:%d errno:%d", err, errno); - err = bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx, &srv_tp); - CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx)", - "err:%d errno:%d", err, errno); - - err = bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx, &cli_sk); - CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx)", - "err:%d errno:%d", err, errno); - err = bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx, &cli_tp); - CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx)", - "err:%d errno:%d", err, errno); - - err = bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx, &listen_sk); - CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx)", - "err:%d errno:%d", err, errno); - err = bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx, &listen_tp); - CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx)", - "err:%d errno:%d", err, errno); - - printf("listen_sk: "); - print_sk(&listen_sk); - printf("\n"); - - printf("srv_sk: "); - print_sk(&srv_sk); - printf("\n"); - - printf("cli_sk: "); - print_sk(&cli_sk); - printf("\n"); - - printf("listen_tp: "); - print_tp(&listen_tp); - printf("\n"); - - printf("srv_tp: "); - print_tp(&srv_tp); - printf("\n"); - - printf("cli_tp: "); - print_tp(&cli_tp); - printf("\n"); - - CHECK(listen_sk.state != 10 || - listen_sk.family != AF_INET6 || - listen_sk.protocol != IPPROTO_TCP || - memcmp(listen_sk.src_ip6, &in6addr_loopback, - sizeof(listen_sk.src_ip6)) || - listen_sk.dst_ip6[0] || listen_sk.dst_ip6[1] || - listen_sk.dst_ip6[2] || listen_sk.dst_ip6[3] || - listen_sk.src_port != ntohs(srv_sa6.sin6_port) || - listen_sk.dst_port, - "Unexpected listen_sk", - "Check listen_sk output. ingress_linum:%u", - ingress_linum); - - CHECK(srv_sk.state == 10 || - !srv_sk.state || - srv_sk.family != AF_INET6 || - srv_sk.protocol != IPPROTO_TCP || - memcmp(srv_sk.src_ip6, &in6addr_loopback, - sizeof(srv_sk.src_ip6)) || - memcmp(srv_sk.dst_ip6, &in6addr_loopback, - sizeof(srv_sk.dst_ip6)) || - srv_sk.src_port != ntohs(srv_sa6.sin6_port) || - srv_sk.dst_port != cli_sa6.sin6_port, - "Unexpected srv_sk", "Check srv_sk output. egress_linum:%u", - egress_linum); - - CHECK(cli_sk.state == 10 || - !cli_sk.state || - cli_sk.family != AF_INET6 || - cli_sk.protocol != IPPROTO_TCP || - memcmp(cli_sk.src_ip6, &in6addr_loopback, - sizeof(cli_sk.src_ip6)) || - memcmp(cli_sk.dst_ip6, &in6addr_loopback, - sizeof(cli_sk.dst_ip6)) || - cli_sk.src_port != ntohs(cli_sa6.sin6_port) || - cli_sk.dst_port != srv_sa6.sin6_port, - "Unexpected cli_sk", "Check cli_sk output. egress_linum:%u", - egress_linum); - - CHECK(listen_tp.data_segs_out || - listen_tp.data_segs_in || - listen_tp.total_retrans || - listen_tp.bytes_acked, - "Unexpected listen_tp", "Check listen_tp output. ingress_linum:%u", - ingress_linum); - - CHECK(srv_tp.data_segs_out != 2 || - srv_tp.data_segs_in || - srv_tp.snd_cwnd != 10 || - srv_tp.total_retrans || - srv_tp.bytes_acked != 2 * DATA_LEN, - "Unexpected srv_tp", "Check srv_tp output. egress_linum:%u", - egress_linum); - - CHECK(cli_tp.data_segs_out || - cli_tp.data_segs_in != 2 || - cli_tp.snd_cwnd != 10 || - cli_tp.total_retrans || - cli_tp.bytes_received != 2 * DATA_LEN, - "Unexpected cli_tp", "Check cli_tp output. egress_linum:%u", - egress_linum); -} - -static void check_sk_pkt_out_cnt(int accept_fd, int cli_fd) -{ - struct bpf_spinlock_cnt pkt_out_cnt = {}, pkt_out_cnt10 = {}; - int err; - - pkt_out_cnt.cnt = ~0; - pkt_out_cnt10.cnt = ~0; - err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &accept_fd, &pkt_out_cnt); - if (!err) - err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &accept_fd, - &pkt_out_cnt10); - - /* The bpf prog only counts for fullsock and - * passive conneciton did not become fullsock until 3WHS - * had been finished. - * The bpf prog only counted two data packet out but we - * specially init accept_fd's pkt_out_cnt by 2 in - * init_sk_storage(). Hence, 4 here. - */ - CHECK(err || pkt_out_cnt.cnt != 4 || pkt_out_cnt10.cnt != 40, - "bpf_map_lookup_elem(sk_pkt_out_cnt, &accept_fd)", - "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u", - err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt); - - pkt_out_cnt.cnt = ~0; - pkt_out_cnt10.cnt = ~0; - err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &cli_fd, &pkt_out_cnt); - if (!err) - err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &cli_fd, - &pkt_out_cnt10); - /* Active connection is fullsock from the beginning. - * 1 SYN and 1 ACK during 3WHS - * 2 Acks on data packet. - * - * The bpf_prog initialized it to 0xeB9F. - */ - CHECK(err || pkt_out_cnt.cnt != 0xeB9F + 4 || - pkt_out_cnt10.cnt != 0xeB9F + 40, - "bpf_map_lookup_elem(sk_pkt_out_cnt, &cli_fd)", - "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u", - err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt); -} - -static void init_sk_storage(int sk_fd, __u32 pkt_out_cnt) -{ - struct bpf_spinlock_cnt scnt = {}; - int err; - - scnt.cnt = pkt_out_cnt; - err = bpf_map_update_elem(sk_pkt_out_cnt_fd, &sk_fd, &scnt, - BPF_NOEXIST); - CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt_fd)", - "err:%d errno:%d", err, errno); - - scnt.cnt *= 10; - err = bpf_map_update_elem(sk_pkt_out_cnt10_fd, &sk_fd, &scnt, - BPF_NOEXIST); - CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt10_fd)", - "err:%d errno:%d", err, errno); -} - -static void test(void) -{ - int listen_fd, cli_fd, accept_fd, epfd, err; - struct epoll_event ev; - socklen_t addrlen; - int i; - - addrlen = sizeof(struct sockaddr_in6); - ev.events = EPOLLIN; - - epfd = epoll_create(1); - CHECK(epfd == -1, "epoll_create()", "epfd:%d errno:%d", epfd, errno); - - /* Prepare listen_fd */ - listen_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0); - CHECK(listen_fd == -1, "socket()", "listen_fd:%d errno:%d", - listen_fd, errno); - - init_loopback6(&srv_sa6); - err = bind(listen_fd, (struct sockaddr *)&srv_sa6, sizeof(srv_sa6)); - CHECK(err, "bind(listen_fd)", "err:%d errno:%d", err, errno); - - err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen); - CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d", err, errno); - - err = listen(listen_fd, 1); - CHECK(err, "listen(listen_fd)", "err:%d errno:%d", err, errno); - - /* Prepare cli_fd */ - cli_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0); - CHECK(cli_fd == -1, "socket()", "cli_fd:%d errno:%d", cli_fd, errno); - - init_loopback6(&cli_sa6); - err = bind(cli_fd, (struct sockaddr *)&cli_sa6, sizeof(cli_sa6)); - CHECK(err, "bind(cli_fd)", "err:%d errno:%d", err, errno); - - err = getsockname(cli_fd, (struct sockaddr *)&cli_sa6, &addrlen); - CHECK(err, "getsockname(cli_fd)", "err:%d errno:%d", - err, errno); - - /* Update addr_map with srv_sa6 and cli_sa6 */ - err = bpf_map_update_elem(addr_map_fd, &addr_srv_idx, &srv_sa6, 0); - CHECK(err, "map_update", "err:%d errno:%d", err, errno); - - err = bpf_map_update_elem(addr_map_fd, &addr_cli_idx, &cli_sa6, 0); - CHECK(err, "map_update", "err:%d errno:%d", err, errno); - - /* Connect from cli_sa6 to srv_sa6 */ - err = connect(cli_fd, (struct sockaddr *)&srv_sa6, addrlen); - printf("srv_sa6.sin6_port:%u cli_sa6.sin6_port:%u\n\n", - ntohs(srv_sa6.sin6_port), ntohs(cli_sa6.sin6_port)); - CHECK(err && errno != EINPROGRESS, - "connect(cli_fd)", "err:%d errno:%d", err, errno); - - ev.data.fd = listen_fd; - err = epoll_ctl(epfd, EPOLL_CTL_ADD, listen_fd, &ev); - CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, listen_fd)", "err:%d errno:%d", - err, errno); - - /* Accept the connection */ - /* Have some timeout in accept(listen_fd). Just in case. */ - err = epoll_wait(epfd, &ev, 1, 1000); - CHECK(err != 1 || ev.data.fd != listen_fd, - "epoll_wait(listen_fd)", - "err:%d errno:%d ev.data.fd:%d listen_fd:%d", - err, errno, ev.data.fd, listen_fd); - - accept_fd = accept(listen_fd, NULL, NULL); - CHECK(accept_fd == -1, "accept(listen_fd)", "accept_fd:%d errno:%d", - accept_fd, errno); - close(listen_fd); - - ev.data.fd = cli_fd; - err = epoll_ctl(epfd, EPOLL_CTL_ADD, cli_fd, &ev); - CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, cli_fd)", "err:%d errno:%d", - err, errno); - - init_sk_storage(accept_fd, 2); - - for (i = 0; i < 2; i++) { - /* Send some data from accept_fd to cli_fd */ - err = send(accept_fd, DATA, DATA_LEN, 0); - CHECK(err != DATA_LEN, "send(accept_fd)", "err:%d errno:%d", - err, errno); - - /* Have some timeout in recv(cli_fd). Just in case. */ - err = epoll_wait(epfd, &ev, 1, 1000); - CHECK(err != 1 || ev.data.fd != cli_fd, - "epoll_wait(cli_fd)", "err:%d errno:%d ev.data.fd:%d cli_fd:%d", - err, errno, ev.data.fd, cli_fd); - - err = recv(cli_fd, NULL, 0, MSG_TRUNC); - CHECK(err, "recv(cli_fd)", "err:%d errno:%d", err, errno); - } - - check_sk_pkt_out_cnt(accept_fd, cli_fd); - - close(epfd); - close(accept_fd); - close(cli_fd); - - check_result(); -} - -int main(int argc, char **argv) -{ - struct bpf_prog_load_attr attr = { - .file = "test_sock_fields_kern.o", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - .prog_flags = BPF_F_TEST_RND_HI32, - }; - int cgroup_fd, egress_fd, ingress_fd, err; - struct bpf_program *ingress_prog; - struct bpf_object *obj; - struct bpf_map *map; - - /* Create a cgroup, get fd, and join it */ - cgroup_fd = cgroup_setup_and_join(TEST_CGROUP); - CHECK(cgroup_fd < 0, "cgroup_setup_and_join()", - "cgroup_fd:%d errno:%d", cgroup_fd, errno); - atexit(cleanup_cgroup_environment); - - err = bpf_prog_load_xattr(&attr, &obj, &egress_fd); - CHECK(err, "bpf_prog_load_xattr()", "err:%d", err); - - ingress_prog = bpf_object__find_program_by_title(obj, - "cgroup_skb/ingress"); - CHECK(!ingress_prog, - "bpf_object__find_program_by_title(cgroup_skb/ingress)", - "not found"); - ingress_fd = bpf_program__fd(ingress_prog); - - err = bpf_prog_attach(egress_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0); - CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_EGRESS)", - "err:%d errno%d", err, errno); - - err = bpf_prog_attach(ingress_fd, cgroup_fd, - BPF_CGROUP_INET_INGRESS, 0); - CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_INGRESS)", - "err:%d errno%d", err, errno); - close(cgroup_fd); - - map = bpf_object__find_map_by_name(obj, "addr_map"); - CHECK(!map, "cannot find addr_map", "(null)"); - addr_map_fd = bpf_map__fd(map); - - map = bpf_object__find_map_by_name(obj, "sock_result_map"); - CHECK(!map, "cannot find sock_result_map", "(null)"); - sk_map_fd = bpf_map__fd(map); - - map = bpf_object__find_map_by_name(obj, "tcp_sock_result_map"); - CHECK(!map, "cannot find tcp_sock_result_map", "(null)"); - tp_map_fd = bpf_map__fd(map); - - map = bpf_object__find_map_by_name(obj, "linum_map"); - CHECK(!map, "cannot find linum_map", "(null)"); - linum_map_fd = bpf_map__fd(map); - - map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt"); - CHECK(!map, "cannot find sk_pkt_out_cnt", "(null)"); - sk_pkt_out_cnt_fd = bpf_map__fd(map); - - map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt10"); - CHECK(!map, "cannot find sk_pkt_out_cnt10", "(null)"); - sk_pkt_out_cnt10_fd = bpf_map__fd(map); - - test(); - - bpf_object__close(obj); - cleanup_cgroup_environment(); - - printf("PASS\n"); - - return 0; -} diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c index 154a8fd2a48d..ca7ca87e91aa 100644 --- a/tools/testing/selftests/bpf/test_socket_cookie.c +++ b/tools/testing/selftests/bpf/test_socket_cookie.c @@ -151,7 +151,7 @@ static int run_test(int cgfd) } bpf_object__for_each_program(prog, pobj) { - prog_name = bpf_program__title(prog, /*needs_copy*/ false); + prog_name = bpf_program__section_name(prog); if (libbpf_attach_type_by_name(prog_name, &attach_type)) goto err; diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 9b6fb00dc7a0..0fa1e421c3d7 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -86,6 +86,7 @@ int txmsg_ktls_skb_redir; int ktls; int peek_flag; int skb_use_parser; +int txmsg_omit_skb_parser; static const struct option long_options[] = { {"help", no_argument, NULL, 'h' }, @@ -111,6 +112,7 @@ static const struct option long_options[] = { {"txmsg_redir_skb", no_argument, &txmsg_redir_skb, 1 }, {"ktls", no_argument, &ktls, 1 }, {"peek", no_argument, &peek_flag, 1 }, + {"txmsg_omit_skb_parser", no_argument, &txmsg_omit_skb_parser, 1}, {"whitelist", required_argument, NULL, 'n' }, {"blacklist", required_argument, NULL, 'b' }, {0, 0, NULL, 0 } @@ -175,6 +177,7 @@ static void test_reset(void) txmsg_apply = txmsg_cork = 0; txmsg_ingress = txmsg_redir_skb = 0; txmsg_ktls_skb = txmsg_ktls_skb_drop = txmsg_ktls_skb_redir = 0; + txmsg_omit_skb_parser = 0; skb_use_parser = 0; } @@ -518,28 +521,13 @@ static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz) if (i == 0 && txmsg_ktls_skb) { if (msg->msg_iov[i].iov_len < 4) return -EIO; - if (txmsg_ktls_skb_redir) { - if (memcmp(&d[13], "PASS", 4) != 0) { - fprintf(stderr, - "detected redirect ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[13], d[14], d[15], d[16]); - return -EIO; - } - d[13] = 0; - d[14] = 1; - d[15] = 2; - d[16] = 3; - j = 13; - } else if (txmsg_ktls_skb) { - if (memcmp(d, "PASS", 4) != 0) { - fprintf(stderr, - "detected ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[0], d[1], d[2], d[3]); - return -EIO; - } - d[0] = 0; - d[1] = 1; - d[2] = 2; - d[3] = 3; + if (memcmp(d, "PASS", 4) != 0) { + fprintf(stderr, + "detected skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", + i, 0, d[0], d[1], d[2], d[3]); + return -EIO; } + j = 4; /* advance index past PASS header */ } for (; j < msg->msg_iov[i].iov_len && size; j++) { @@ -927,13 +915,15 @@ static int run_options(struct sockmap_options *options, int cg_fd, int test) goto run; /* Attach programs to sockmap */ - err = bpf_prog_attach(prog_fd[0], map_fd[0], - BPF_SK_SKB_STREAM_PARSER, 0); - if (err) { - fprintf(stderr, - "ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n", - prog_fd[0], map_fd[0], err, strerror(errno)); - return err; + if (!txmsg_omit_skb_parser) { + err = bpf_prog_attach(prog_fd[0], map_fd[0], + BPF_SK_SKB_STREAM_PARSER, 0); + if (err) { + fprintf(stderr, + "ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n", + prog_fd[0], map_fd[0], err, strerror(errno)); + return err; + } } err = bpf_prog_attach(prog_fd[1], map_fd[0], @@ -946,13 +936,15 @@ static int run_options(struct sockmap_options *options, int cg_fd, int test) /* Attach programs to TLS sockmap */ if (txmsg_ktls_skb) { - err = bpf_prog_attach(prog_fd[0], map_fd[8], - BPF_SK_SKB_STREAM_PARSER, 0); - if (err) { - fprintf(stderr, - "ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n", - prog_fd[0], map_fd[8], err, strerror(errno)); - return err; + if (!txmsg_omit_skb_parser) { + err = bpf_prog_attach(prog_fd[0], map_fd[8], + BPF_SK_SKB_STREAM_PARSER, 0); + if (err) { + fprintf(stderr, + "ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n", + prog_fd[0], map_fd[8], err, strerror(errno)); + return err; + } } err = bpf_prog_attach(prog_fd[2], map_fd[8], @@ -1480,12 +1472,29 @@ static void test_txmsg_skb(int cgrp, struct sockmap_options *opt) txmsg_ktls_skb_drop = 0; txmsg_ktls_skb_redir = 1; test_exec(cgrp, opt); + txmsg_ktls_skb_redir = 0; + + /* Tests that omit skb_parser */ + txmsg_omit_skb_parser = 1; + ktls = 0; + txmsg_ktls_skb = 0; + test_exec(cgrp, opt); + + txmsg_ktls_skb_drop = 1; + test_exec(cgrp, opt); + txmsg_ktls_skb_drop = 0; + + txmsg_ktls_skb_redir = 1; + test_exec(cgrp, opt); + + ktls = 1; + test_exec(cgrp, opt); + txmsg_omit_skb_parser = 0; opt->data_test = data; ktls = k; } - /* Test cork with hung data. This tests poor usage patterns where * cork can leave data on the ring if user program is buggy and * doesn't flush them somehow. They do take some time however diff --git a/tools/testing/selftests/bpf/test_tc_redirect.sh b/tools/testing/selftests/bpf/test_tc_redirect.sh new file mode 100755 index 000000000000..8868aa1ca902 --- /dev/null +++ b/tools/testing/selftests/bpf/test_tc_redirect.sh @@ -0,0 +1,216 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link +# between src and dst. The netns fwd has veth links to each src and dst. The +# client is in src and server in dst. The test installs a TC BPF program to each +# host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the +# neigh addr population and redirect or ii) bpf_redirect_peer() for namespace +# switch from ingress side; it also installs a checker prog on the egress side +# to drop unexpected traffic. + +if [[ $EUID -ne 0 ]]; then + echo "This script must be run as root" + echo "FAIL" + exit 1 +fi + +# check that needed tools are present +command -v nc >/dev/null 2>&1 || \ + { echo >&2 "nc is not available"; exit 1; } +command -v dd >/dev/null 2>&1 || \ + { echo >&2 "dd is not available"; exit 1; } +command -v timeout >/dev/null 2>&1 || \ + { echo >&2 "timeout is not available"; exit 1; } +command -v ping >/dev/null 2>&1 || \ + { echo >&2 "ping is not available"; exit 1; } +if command -v ping6 >/dev/null 2>&1; then PING6=ping6; else PING6=ping; fi +command -v perl >/dev/null 2>&1 || \ + { echo >&2 "perl is not available"; exit 1; } +command -v jq >/dev/null 2>&1 || \ + { echo >&2 "jq is not available"; exit 1; } +command -v bpftool >/dev/null 2>&1 || \ + { echo >&2 "bpftool is not available"; exit 1; } + +readonly GREEN='\033[0;92m' +readonly RED='\033[0;31m' +readonly NC='\033[0m' # No Color + +readonly PING_ARG="-c 3 -w 10 -q" + +readonly TIMEOUT=10 + +readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)" +readonly NS_FWD="ns-fwd-$(mktemp -u XXXXXX)" +readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)" + +readonly IP4_SRC="172.16.1.100" +readonly IP4_DST="172.16.2.100" + +readonly IP6_SRC="::1:dead:beef:cafe" +readonly IP6_DST="::2:dead:beef:cafe" + +readonly IP4_SLL="169.254.0.1" +readonly IP4_DLL="169.254.0.2" +readonly IP4_NET="169.254.0.0" + +netns_cleanup() +{ + ip netns del ${NS_SRC} + ip netns del ${NS_FWD} + ip netns del ${NS_DST} +} + +netns_setup() +{ + ip netns add "${NS_SRC}" + ip netns add "${NS_FWD}" + ip netns add "${NS_DST}" + + ip link add veth_src type veth peer name veth_src_fwd + ip link add veth_dst type veth peer name veth_dst_fwd + + ip link set veth_src netns ${NS_SRC} + ip link set veth_src_fwd netns ${NS_FWD} + + ip link set veth_dst netns ${NS_DST} + ip link set veth_dst_fwd netns ${NS_FWD} + + ip -netns ${NS_SRC} addr add ${IP4_SRC}/32 dev veth_src + ip -netns ${NS_DST} addr add ${IP4_DST}/32 dev veth_dst + + # The fwd netns automatically get a v6 LL address / routes, but also + # needs v4 one in order to start ARP probing. IP4_NET route is added + # to the endpoints so that the ARP processing will reply. + + ip -netns ${NS_FWD} addr add ${IP4_SLL}/32 dev veth_src_fwd + ip -netns ${NS_FWD} addr add ${IP4_DLL}/32 dev veth_dst_fwd + + ip -netns ${NS_SRC} addr add ${IP6_SRC}/128 dev veth_src nodad + ip -netns ${NS_DST} addr add ${IP6_DST}/128 dev veth_dst nodad + + ip -netns ${NS_SRC} link set dev veth_src up + ip -netns ${NS_FWD} link set dev veth_src_fwd up + + ip -netns ${NS_DST} link set dev veth_dst up + ip -netns ${NS_FWD} link set dev veth_dst_fwd up + + ip -netns ${NS_SRC} route add ${IP4_DST}/32 dev veth_src scope global + ip -netns ${NS_SRC} route add ${IP4_NET}/16 dev veth_src scope global + ip -netns ${NS_FWD} route add ${IP4_SRC}/32 dev veth_src_fwd scope global + + ip -netns ${NS_SRC} route add ${IP6_DST}/128 dev veth_src scope global + ip -netns ${NS_FWD} route add ${IP6_SRC}/128 dev veth_src_fwd scope global + + ip -netns ${NS_DST} route add ${IP4_SRC}/32 dev veth_dst scope global + ip -netns ${NS_DST} route add ${IP4_NET}/16 dev veth_dst scope global + ip -netns ${NS_FWD} route add ${IP4_DST}/32 dev veth_dst_fwd scope global + + ip -netns ${NS_DST} route add ${IP6_SRC}/128 dev veth_dst scope global + ip -netns ${NS_FWD} route add ${IP6_DST}/128 dev veth_dst_fwd scope global + + fmac_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/address) + fmac_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/address) + + ip -netns ${NS_SRC} neigh add ${IP4_DST} dev veth_src lladdr $fmac_src + ip -netns ${NS_DST} neigh add ${IP4_SRC} dev veth_dst lladdr $fmac_dst + + ip -netns ${NS_SRC} neigh add ${IP6_DST} dev veth_src lladdr $fmac_src + ip -netns ${NS_DST} neigh add ${IP6_SRC} dev veth_dst lladdr $fmac_dst +} + +netns_test_connectivity() +{ + set +e + + ip netns exec ${NS_DST} bash -c "nc -4 -l -p 9004 &" + ip netns exec ${NS_DST} bash -c "nc -6 -l -p 9006 &" + + TEST="TCPv4 connectivity test" + ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP4_DST}/9004" + if [ $? -ne 0 ]; then + echo -e "${TEST}: ${RED}FAIL${NC}" + exit 1 + fi + echo -e "${TEST}: ${GREEN}PASS${NC}" + + TEST="TCPv6 connectivity test" + ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP6_DST}/9006" + if [ $? -ne 0 ]; then + echo -e "${TEST}: ${RED}FAIL${NC}" + exit 1 + fi + echo -e "${TEST}: ${GREEN}PASS${NC}" + + TEST="ICMPv4 connectivity test" + ip netns exec ${NS_SRC} ping $PING_ARG ${IP4_DST} + if [ $? -ne 0 ]; then + echo -e "${TEST}: ${RED}FAIL${NC}" + exit 1 + fi + echo -e "${TEST}: ${GREEN}PASS${NC}" + + TEST="ICMPv6 connectivity test" + ip netns exec ${NS_SRC} $PING6 $PING_ARG ${IP6_DST} + if [ $? -ne 0 ]; then + echo -e "${TEST}: ${RED}FAIL${NC}" + exit 1 + fi + echo -e "${TEST}: ${GREEN}PASS${NC}" + + set -e +} + +hex_mem_str() +{ + perl -e 'print join(" ", unpack("(H2)8", pack("L", @ARGV)))' $1 +} + +netns_setup_bpf() +{ + local obj=$1 + local use_forwarding=${2:-0} + + ip netns exec ${NS_FWD} tc qdisc add dev veth_src_fwd clsact + ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj $obj sec src_ingress + ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd egress bpf da obj $obj sec chk_egress + + ip netns exec ${NS_FWD} tc qdisc add dev veth_dst_fwd clsact + ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj $obj sec dst_ingress + ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress bpf da obj $obj sec chk_egress + + if [ "$use_forwarding" -eq "1" ]; then + # bpf_fib_lookup() checks if forwarding is enabled + ip netns exec ${NS_FWD} sysctl -w net.ipv4.ip_forward=1 + ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.veth_dst_fwd.forwarding=1 + ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.veth_src_fwd.forwarding=1 + return 0 + fi + + veth_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/ifindex) + veth_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/ifindex) + + progs=$(ip netns exec ${NS_FWD} bpftool net --json | jq -r '.[] | .tc | map(.id) | .[]') + for prog in $progs; do + map=$(bpftool prog show id $prog --json | jq -r '.map_ids | .? | .[]') + if [ ! -z "$map" ]; then + bpftool map update id $map key hex $(hex_mem_str 0) value hex $(hex_mem_str $veth_src) + bpftool map update id $map key hex $(hex_mem_str 1) value hex $(hex_mem_str $veth_dst) + fi + done +} + +trap netns_cleanup EXIT +set -e + +netns_setup +netns_setup_bpf test_tc_neigh.o +netns_test_connectivity +netns_cleanup +netns_setup +netns_setup_bpf test_tc_neigh_fib.o 1 +netns_test_connectivity +netns_cleanup +netns_setup +netns_setup_bpf test_tc_peer.o +netns_test_connectivity diff --git a/tools/testing/selftests/bpf/test_tcp_hdr_options.h b/tools/testing/selftests/bpf/test_tcp_hdr_options.h new file mode 100644 index 000000000000..6118e3ab61fc --- /dev/null +++ b/tools/testing/selftests/bpf/test_tcp_hdr_options.h @@ -0,0 +1,152 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2020 Facebook */ + +#ifndef _TEST_TCP_HDR_OPTIONS_H +#define _TEST_TCP_HDR_OPTIONS_H + +struct bpf_test_option { + __u8 flags; + __u8 max_delack_ms; + __u8 rand; +} __attribute__((packed)); + +enum { + OPTION_RESEND, + OPTION_MAX_DELACK_MS, + OPTION_RAND, + __NR_OPTION_FLAGS, +}; + +#define OPTION_F_RESEND (1 << OPTION_RESEND) +#define OPTION_F_MAX_DELACK_MS (1 << OPTION_MAX_DELACK_MS) +#define OPTION_F_RAND (1 << OPTION_RAND) +#define OPTION_MASK ((1 << __NR_OPTION_FLAGS) - 1) + +#define TEST_OPTION_FLAGS(flags, option) (1 & ((flags) >> (option))) +#define SET_OPTION_FLAGS(flags, option) ((flags) |= (1 << (option))) + +/* Store in bpf_sk_storage */ +struct hdr_stg { + bool active; + bool resend_syn; /* active side only */ + bool syncookie; /* passive side only */ + bool fastopen; /* passive side only */ +}; + +struct linum_err { + unsigned int linum; + int err; +}; + +#define TCPHDR_FIN 0x01 +#define TCPHDR_SYN 0x02 +#define TCPHDR_RST 0x04 +#define TCPHDR_PSH 0x08 +#define TCPHDR_ACK 0x10 +#define TCPHDR_URG 0x20 +#define TCPHDR_ECE 0x40 +#define TCPHDR_CWR 0x80 +#define TCPHDR_SYNACK (TCPHDR_SYN | TCPHDR_ACK) + +#define TCPOPT_EOL 0 +#define TCPOPT_NOP 1 +#define TCPOPT_WINDOW 3 +#define TCPOPT_EXP 254 + +#define TCP_BPF_EXPOPT_BASE_LEN 4 +#define MAX_TCP_HDR_LEN 60 +#define MAX_TCP_OPTION_SPACE 40 + +#ifdef BPF_PROG_TEST_TCP_HDR_OPTIONS + +#define CG_OK 1 +#define CG_ERR 0 + +#ifndef SOL_TCP +#define SOL_TCP 6 +#endif + +struct tcp_exprm_opt { + __u8 kind; + __u8 len; + __u16 magic; + union { + __u8 data[4]; + __u32 data32; + }; +} __attribute__((packed)); + +struct tcp_opt { + __u8 kind; + __u8 len; + union { + __u8 data[4]; + __u32 data32; + }; +} __attribute__((packed)); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 2); + __type(key, int); + __type(value, struct linum_err); +} lport_linum_map SEC(".maps"); + +static inline unsigned int tcp_hdrlen(const struct tcphdr *th) +{ + return th->doff << 2; +} + +static inline __u8 skops_tcp_flags(const struct bpf_sock_ops *skops) +{ + return skops->skb_tcp_flags; +} + +static inline void clear_hdr_cb_flags(struct bpf_sock_ops *skops) +{ + bpf_sock_ops_cb_flags_set(skops, + skops->bpf_sock_ops_cb_flags & + ~(BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG | + BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG)); +} + +static inline void set_hdr_cb_flags(struct bpf_sock_ops *skops, __u32 extra) +{ + bpf_sock_ops_cb_flags_set(skops, + skops->bpf_sock_ops_cb_flags | + BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG | + BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG | + extra); +} +static inline void +clear_parse_all_hdr_cb_flags(struct bpf_sock_ops *skops) +{ + bpf_sock_ops_cb_flags_set(skops, + skops->bpf_sock_ops_cb_flags & + ~BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG); +} + +static inline void +set_parse_all_hdr_cb_flags(struct bpf_sock_ops *skops) +{ + bpf_sock_ops_cb_flags_set(skops, + skops->bpf_sock_ops_cb_flags | + BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG); +} + +#define RET_CG_ERR(__err) ({ \ + struct linum_err __linum_err; \ + int __lport; \ + \ + __linum_err.linum = __LINE__; \ + __linum_err.err = __err; \ + __lport = skops->local_port; \ + bpf_map_update_elem(&lport_linum_map, &__lport, &__linum_err, BPF_NOEXIST); \ + clear_hdr_cb_flags(skops); \ + clear_parse_all_hdr_cb_flags(skops); \ + return CG_ERR; \ +}) + +#endif /* BPF_PROG_TEST_TCP_HDR_OPTIONS */ + +#endif /* _TEST_TCP_HDR_OPTIONS_H */ diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 78a6bae56ea6..9be395d9dc64 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -114,6 +114,7 @@ struct bpf_test { bpf_testdata_struct_t retvals[MAX_TEST_RUNS]; }; enum bpf_attach_type expected_attach_type; + const char *kfunc; }; /* Note we want this to be 64 bit aligned so that the end of our array is @@ -984,8 +985,24 @@ static void do_test_single(struct bpf_test *test, bool unpriv, attr.log_level = 4; attr.prog_flags = pflags; + if (prog_type == BPF_PROG_TYPE_TRACING && test->kfunc) { + attr.attach_btf_id = libbpf_find_vmlinux_btf_id(test->kfunc, + attr.expected_attach_type); + if (attr.attach_btf_id < 0) { + printf("FAIL\nFailed to find BTF ID for '%s'!\n", + test->kfunc); + (*errors)++; + return; + } + } + fd_prog = bpf_load_program_xattr(&attr, bpf_vlog, sizeof(bpf_vlog)); - if (fd_prog < 0 && !bpf_probe_prog_type(prog_type, 0)) { + + /* BPF_PROG_TYPE_TRACING requires more setup and + * bpf_probe_prog_type won't give correct answer + */ + if (fd_prog < 0 && prog_type != BPF_PROG_TYPE_TRACING && + !bpf_probe_prog_type(prog_type, 0)) { printf("SKIP (unsupported program type %d)\n", prog_type); skips++; goto close_fds; diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 4d0e913bbb22..1bbd1d9830c8 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -90,6 +90,33 @@ long ksym_get_addr(const char *name) return 0; } +/* open kallsyms and read symbol addresses on the fly. Without caching all symbols, + * this is faster than load + find. + */ +int kallsyms_find(const char *sym, unsigned long long *addr) +{ + char type, name[500]; + unsigned long long value; + int err = 0; + FILE *f; + + f = fopen("/proc/kallsyms", "r"); + if (!f) + return -EINVAL; + + while (fscanf(f, "%llx %c %499s%*[^\n]\n", &value, &type, name) > 0) { + if (strcmp(name, sym) == 0) { + *addr = value; + goto out; + } + } + err = -ENOENT; + +out: + fclose(f); + return err; +} + void read_trace_pipe(void) { int trace_fd; diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h index 25ef597dd03f..f62fdef9e589 100644 --- a/tools/testing/selftests/bpf/trace_helpers.h +++ b/tools/testing/selftests/bpf/trace_helpers.h @@ -12,6 +12,10 @@ struct ksym { int load_kallsyms(void); struct ksym *ksym_search(long key); long ksym_get_addr(const char *name); + +/* open kallsyms and find addresses on the fly, faster than load + search. */ +int kallsyms_find(const char *sym, unsigned long long *addr); + void read_trace_pipe(void); #endif diff --git a/tools/testing/selftests/bpf/verifier/and.c b/tools/testing/selftests/bpf/verifier/and.c index d781bc86e100..ca8fdb1b3f01 100644 --- a/tools/testing/selftests/bpf/verifier/and.c +++ b/tools/testing/selftests/bpf/verifier/and.c @@ -48,3 +48,19 @@ .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "check known subreg with unknown reg", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 32), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFFFF1234), + /* Upper bits are unknown but AND above masks out 1 zero'ing lower bits */ + BPF_JMP32_IMM(BPF_JLT, BPF_REG_0, 1, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 512), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0 +}, diff --git a/tools/testing/selftests/bpf/verifier/basic.c b/tools/testing/selftests/bpf/verifier/basic.c index b8d18642653a..de84f0d57082 100644 --- a/tools/testing/selftests/bpf/verifier/basic.c +++ b/tools/testing/selftests/bpf/verifier/basic.c @@ -2,7 +2,7 @@ "empty prog", .insns = { }, - .errstr = "unknown opcode 00", + .errstr = "last insn is not an exit or jmp", .result = REJECT, }, { diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c index 4d6645f2874c..dac40de3f868 100644 --- a/tools/testing/selftests/bpf/verifier/bounds.c +++ b/tools/testing/selftests/bpf/verifier/bounds.c @@ -557,3 +557,149 @@ .result = ACCEPT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "bounds check for reg = 0, reg xor 1", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = ACCEPT, +}, +{ + "bounds check for reg32 = 0, reg32 xor 1", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_ALU32_IMM(BPF_XOR, BPF_REG_1, 1), + BPF_JMP32_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = ACCEPT, +}, +{ + "bounds check for reg = 2, reg xor 3", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 3), + BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = ACCEPT, +}, +{ + "bounds check for reg = any, reg xor 3", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 3), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = REJECT, + .errstr = "invalid access to map value", + .errstr_unpriv = "invalid access to map value", +}, +{ + "bounds check for reg32 = any, reg32 xor 3", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU32_IMM(BPF_XOR, BPF_REG_1, 3), + BPF_JMP32_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = REJECT, + .errstr = "invalid access to map value", + .errstr_unpriv = "invalid access to map value", +}, +{ + "bounds check for reg > 0, reg xor 3", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JLE, BPF_REG_1, 0, 3), + BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 3), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = ACCEPT, +}, +{ + "bounds check for reg32 > 0, reg32 xor 3", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP32_IMM(BPF_JLE, BPF_REG_1, 0, 3), + BPF_ALU32_IMM(BPF_XOR, BPF_REG_1, 3), + BPF_JMP32_IMM(BPF_JGE, BPF_REG_1, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = ACCEPT, +}, diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 94258c6b5235..c4f5d909e58a 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -647,13 +647,14 @@ .result = REJECT, }, { - "calls: ld_abs with changing ctx data in callee", + "calls: subprog call with ld_abs in main prog", .insns = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), BPF_LD_ABS(BPF_B, 0), BPF_LD_ABS(BPF_H, 0), BPF_LD_ABS(BPF_W, 0), BPF_MOV64_REG(BPF_REG_7, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5), BPF_MOV64_REG(BPF_REG_6, BPF_REG_7), BPF_LD_ABS(BPF_B, 0), @@ -666,8 +667,7 @@ BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "BPF_LD_[ABS|IND] instructions cannot be mixed", - .result = REJECT, + .result = ACCEPT, }, { "calls: two calls with bad fallthrough", diff --git a/tools/testing/selftests/bpf/verifier/d_path.c b/tools/testing/selftests/bpf/verifier/d_path.c new file mode 100644 index 000000000000..b988396379a7 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/d_path.c @@ -0,0 +1,37 @@ +{ + "d_path accept", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_MOV64_IMM(BPF_REG_6, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 0), + BPF_LD_IMM64(BPF_REG_3, 8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_d_path), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACING, + .expected_attach_type = BPF_TRACE_FENTRY, + .kfunc = "dentry_open", +}, +{ + "d_path reject", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_MOV64_IMM(BPF_REG_6, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 0), + BPF_LD_IMM64(BPF_REG_3, 8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_d_path), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "helper call is not allowed in probe", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACING, + .expected_attach_type = BPF_TRACE_FENTRY, + .kfunc = "d_path", +}, diff --git a/tools/testing/selftests/bpf/verifier/direct_packet_access.c b/tools/testing/selftests/bpf/verifier/direct_packet_access.c index 2c5fbe7bcd27..ae72536603fe 100644 --- a/tools/testing/selftests/bpf/verifier/direct_packet_access.c +++ b/tools/testing/selftests/bpf/verifier/direct_packet_access.c @@ -529,7 +529,7 @@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "invalid access to packet, off=0 size=8, R5(id=1,off=0,r=0)", + .errstr = "invalid access to packet, off=0 size=8, R5(id=2,off=0,r=0)", .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { diff --git a/tools/testing/selftests/bpf/verifier/ld_imm64.c b/tools/testing/selftests/bpf/verifier/ld_imm64.c index 3856dba733e9..f9297900cea6 100644 --- a/tools/testing/selftests/bpf/verifier/ld_imm64.c +++ b/tools/testing/selftests/bpf/verifier/ld_imm64.c @@ -51,14 +51,6 @@ .result = REJECT, }, { - "test5 ld_imm64", - .insns = { - BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0), - }, - .errstr = "invalid bpf_ld_imm64 insn", - .result = REJECT, -}, -{ "test6 ld_imm64", .insns = { BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0), diff --git a/tools/testing/selftests/bpf/verifier/map_ptr.c b/tools/testing/selftests/bpf/verifier/map_ptr.c index b52209db8250..637f9293bda8 100644 --- a/tools/testing/selftests/bpf/verifier/map_ptr.c +++ b/tools/testing/selftests/bpf/verifier/map_ptr.c @@ -60,3 +60,35 @@ .result = ACCEPT, .retval = 1, }, +{ + "bpf_map_ptr: r = 0, map_ptr = map_ptr + r", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 4 }, + .result = ACCEPT, +}, +{ + "bpf_map_ptr: r = 0, r = r + map_ptr", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_LD_MAP_FD(BPF_REG_0, 0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 4 }, + .result = ACCEPT, +}, diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c index 056e0273bf12..006b5bd99c08 100644 --- a/tools/testing/selftests/bpf/verifier/ref_tracking.c +++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c @@ -854,3 +854,50 @@ .errstr = "Unreleased reference", .result = REJECT, }, +{ + "reference tracking: bpf_sk_release(btf_tcp_sock)", + .insns = { + BPF_SK_LOOKUP(sk_lookup_tcp), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_skc_to_tcp_sock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "unknown func", +}, +{ + "reference tracking: use ptr from bpf_skc_to_tcp_sock() after release", + .insns = { + BPF_SK_LOOKUP(sk_lookup_tcp), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_skc_to_tcp_sock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "invalid mem access", + .result_unpriv = REJECT, + .errstr_unpriv = "unknown func", +}, diff --git a/tools/testing/selftests/bpf/verifier/regalloc.c b/tools/testing/selftests/bpf/verifier/regalloc.c new file mode 100644 index 000000000000..4ad7e05de706 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/regalloc.c @@ -0,0 +1,269 @@ +{ + "regalloc basic", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 20, 4), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "regalloc negative", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 24, 4), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=48 off=48 size=1", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "regalloc src_reg mark", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 20, 5), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_JMP_REG(BPF_JSGE, BPF_REG_3, BPF_REG_2, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "regalloc src_reg negative", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 22, 5), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_JMP_REG(BPF_JSGE, BPF_REG_3, BPF_REG_2, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=48 off=44 size=8", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "regalloc and spill", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 20, 7), + /* r0 has upper bound that should propagate into r2 */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8), /* spill r2 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_2, 0), /* clear r0 and r2 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -8), /* fill r3 */ + BPF_JMP_REG(BPF_JSGE, BPF_REG_0, BPF_REG_3, 2), + /* r3 has lower and upper bounds */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_3), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "regalloc and spill negative", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 48, 7), + /* r0 has upper bound that should propagate into r2 */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8), /* spill r2 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_2, 0), /* clear r0 and r2 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -8), /* fill r3 */ + BPF_JMP_REG(BPF_JSGE, BPF_REG_0, BPF_REG_3, 2), + /* r3 has lower and upper bounds */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_3), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=48 off=48 size=8", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "regalloc three regs", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 12, 5), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_4), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "regalloc after call", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_9, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_8, 20, 4), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_9, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_8), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_9), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "regalloc in callee", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 20, 5), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "regalloc, spill, JEQ", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), /* spill r0 */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 0), + /* The verifier will walk the rest twice with r0 == 0 and r0 == map_value */ + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 20, 0), + /* The verifier will walk the rest two more times with r0 == 20 and r0 == unknown */ + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -8), /* fill r3 with map_value */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0, 1), /* skip ldx if map_value == NULL */ + /* Buggy verifier will think that r3 == 20 here */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), /* read from map_value */ + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c index b1aac2641498..ce13ece08d51 100644 --- a/tools/testing/selftests/bpf/verifier/sock.c +++ b/tools/testing/selftests/bpf/verifier/sock.c @@ -631,3 +631,28 @@ .prog_type = BPF_PROG_TYPE_SK_REUSEPORT, .result = ACCEPT, }, +{ + "mark null check on return value of bpf_skc_to helpers", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_EMIT_CALL(BPF_FUNC_skc_to_tcp_sock), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_skc_to_tcp_request_sock), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_8, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "invalid mem access", + .result_unpriv = REJECT, + .errstr_unpriv = "unknown func", +}, diff --git a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c index 55bd387ce7ec..52d3f0364bda 100644 --- a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c +++ b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c @@ -145,7 +145,7 @@ TEST(clone3_cap_checkpoint_restore) test_clone3_supported(); EXPECT_EQ(getuid(), 0) - XFAIL(return, "Skipping all tests as non-root\n"); + SKIP(return, "Skipping all tests as non-root"); memset(&set_tid, 0, sizeof(set_tid)); diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c index c99b98b0d461..575b391ddc78 100644 --- a/tools/testing/selftests/core/close_range_test.c +++ b/tools/testing/selftests/core/close_range_test.c @@ -44,7 +44,7 @@ TEST(close_range) fd = open("/dev/null", O_RDONLY | O_CLOEXEC); ASSERT_GE(fd, 0) { if (errno == ENOENT) - XFAIL(return, "Skipping test since /dev/null does not exist"); + SKIP(return, "Skipping test since /dev/null does not exist"); } open_fds[i] = fd; @@ -52,7 +52,7 @@ TEST(close_range) EXPECT_EQ(-1, sys_close_range(open_fds[0], open_fds[100], -1)) { if (errno == ENOSYS) - XFAIL(return, "close_range() syscall not supported"); + SKIP(return, "close_range() syscall not supported"); } EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0)); @@ -108,7 +108,7 @@ TEST(close_range_unshare) fd = open("/dev/null", O_RDONLY | O_CLOEXEC); ASSERT_GE(fd, 0) { if (errno == ENOENT) - XFAIL(return, "Skipping test since /dev/null does not exist"); + SKIP(return, "Skipping test since /dev/null does not exist"); } open_fds[i] = fd; @@ -197,7 +197,7 @@ TEST(close_range_unshare_capped) fd = open("/dev/null", O_RDONLY | O_CLOEXEC); ASSERT_GE(fd, 0) { if (errno == ENOENT) - XFAIL(return, "Skipping test since /dev/null does not exist"); + SKIP(return, "Skipping test since /dev/null does not exist"); } open_fds[i] = fd; diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh index 47edf099a17e..508a702f0021 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh @@ -207,7 +207,7 @@ __rate_test() RET=0 - devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 16 + devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 512 devlink trap group set $DEVLINK_DEV group l3_drops policer $id # Send packets at highest possible rate and make sure they are dropped @@ -220,8 +220,8 @@ __rate_test() rate=$(trap_rate_get) pct=$((100 * (rate - 1000) / 1000)) - ((-5 <= pct && pct <= 5)) - check_err $? "Expected rate 1000 pps, got $rate pps, which is $pct% off. Required accuracy is +-5%" + ((-10 <= pct && pct <= 10)) + check_err $? "Expected rate 1000 pps, got $rate pps, which is $pct% off. Required accuracy is +-10%" log_info "Expected rate 1000 pps, measured rate $rate pps" drop_rate=$(policer_drop_rate_get $id) @@ -288,35 +288,12 @@ __burst_test() RET=0 - devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 32 + devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 512 devlink trap group set $DEVLINK_DEV group l3_drops policer $id - # Send a burst of 64 packets and make sure that about 32 are received - # and the rest are dropped by the policer - log_info "=== Tx burst size: 64, Policer burst size: 32 pps ===" - - t0_rx=$(devlink_trap_rx_packets_get blackhole_route) - t0_drop=$(devlink_trap_policer_rx_dropped_get $id) - - start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -c 64 - - t1_rx=$(devlink_trap_rx_packets_get blackhole_route) - t1_drop=$(devlink_trap_policer_rx_dropped_get $id) - - rx=$((t1_rx - t0_rx)) - pct=$((100 * (rx - 32) / 32)) - ((-20 <= pct && pct <= 20)) - check_err $? "Expected burst size of 32 packets, got $rx packets, which is $pct% off. Required accuracy is +-20%" - log_info "Expected burst size of 32 packets, measured burst size of $rx packets" - - drop=$((t1_drop - t0_drop)) - (( drop > 0 )) - check_err $? "Expected non-zero policer drops, got 0" - log_info "Measured policer drops of $drop packets" - # Send a burst of 16 packets and make sure that 16 are received # and that none are dropped by the policer - log_info "=== Tx burst size: 16, Policer burst size: 32 pps ===" + log_info "=== Tx burst size: 16, Policer burst size: 512 ===" t0_rx=$(devlink_trap_rx_packets_get blackhole_route) t0_drop=$(devlink_trap_policer_rx_dropped_get $id) diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh index 6d1790b5de7a..e9f8718af979 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh @@ -147,17 +147,26 @@ switch_create() # Make sure that ingress quotas are smaller than egress so that there is # room for both streams of traffic to be admitted to shared buffer. + devlink_pool_size_thtype_save 0 devlink_pool_size_thtype_set 0 dynamic 10000000 + devlink_pool_size_thtype_save 4 devlink_pool_size_thtype_set 4 dynamic 10000000 + devlink_port_pool_th_save $swp1 0 devlink_port_pool_th_set $swp1 0 6 + devlink_tc_bind_pool_th_save $swp1 1 ingress devlink_tc_bind_pool_th_set $swp1 1 ingress 0 6 + devlink_port_pool_th_save $swp2 0 devlink_port_pool_th_set $swp2 0 6 + devlink_tc_bind_pool_th_save $swp2 2 ingress devlink_tc_bind_pool_th_set $swp2 2 ingress 0 6 + devlink_tc_bind_pool_th_save $swp3 1 egress devlink_tc_bind_pool_th_set $swp3 1 egress 4 7 + devlink_tc_bind_pool_th_save $swp3 2 egress devlink_tc_bind_pool_th_set $swp3 2 egress 4 7 + devlink_port_pool_th_save $swp3 4 devlink_port_pool_th_set $swp3 4 7 } diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh new file mode 100755 index 000000000000..27de3d9ed08e --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh @@ -0,0 +1,379 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + test_defaults + test_dcb_ets + test_mtu + test_pfc + test_int_buf + test_tc_priomap + test_tc_mtu + test_tc_sizes + test_tc_int_buf +" + +lib_dir=$(dirname $0)/../../../net/forwarding + +NUM_NETIFS=0 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh +source qos_lib.sh + +swp=$NETIF_NO_CABLE + +cleanup() +{ + pre_cleanup +} + +get_prio_pg() +{ + __mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' | + grep buffer | sed 's/ \+/ /g' | cut -d' ' -f 2- +} + +get_prio_pfc() +{ + __mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' | + grep enabled | sed 's/ \+/ /g' | cut -d' ' -f 2- +} + +get_prio_tc() +{ + __mlnx_qos -i $swp | sed -n '/^tc/,$p' | + awk '/^tc/ { TC = $2 } + /priority:/ { PRIO[$2]=TC } + END { + for (i in PRIO) + printf("%d ", PRIO[i]) + }' +} + +get_buf_size() +{ + local idx=$1; shift + + __mlnx_qos -i $swp | grep Receive | sed 's/.*: //' | cut -d, -f $((idx + 1)) +} + +get_tot_size() +{ + __mlnx_qos -i $swp | grep Receive | sed 's/.*total_size=//' +} + +check_prio_pg() +{ + local expect=$1; shift + + local current=$(get_prio_pg) + test "$current" = "$expect" + check_err $? "prio2buffer is '$current', expected '$expect'" +} + +check_prio_pfc() +{ + local expect=$1; shift + + local current=$(get_prio_pfc) + test "$current" = "$expect" + check_err $? "prio PFC is '$current', expected '$expect'" +} + +check_prio_tc() +{ + local expect=$1; shift + + local current=$(get_prio_tc) + test "$current" = "$expect" + check_err $? "prio_tc is '$current', expected '$expect'" +} + +__check_buf_size() +{ + local idx=$1; shift + local expr=$1; shift + local what=$1; shift + + local current=$(get_buf_size $idx) + ((current $expr)) + check_err $? "${what}buffer $idx size is '$current', expected '$expr'" + echo $current +} + +check_buf_size() +{ + __check_buf_size "$@" > /dev/null +} + +test_defaults() +{ + RET=0 + + check_prio_pg "0 0 0 0 0 0 0 0 " + check_prio_tc "0 0 0 0 0 0 0 0 " + check_prio_pfc "0 0 0 0 0 0 0 0 " + + log_test "Default headroom configuration" +} + +test_dcb_ets() +{ + RET=0 + + __mlnx_qos -i $swp --prio_tc=0,2,4,6,1,3,5,7 > /dev/null + + check_prio_pg "0 2 4 6 1 3 5 7 " + check_prio_tc "0 2 4 6 1 3 5 7 " + check_prio_pfc "0 0 0 0 0 0 0 0 " + + __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null + + check_prio_pg "0 0 0 0 0 0 0 0 " + check_prio_tc "0 0 0 0 0 0 0 0 " + + __mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 &> /dev/null + check_fail $? "prio2buffer accepted in DCB mode" + + log_test "Configuring headroom through ETS" +} + +test_mtu() +{ + local what=$1; shift + local buf0size_2 + local buf0size + + RET=0 + buf0size=$(__check_buf_size 0 "> 0") + + mtu_set $swp 3000 + buf0size_2=$(__check_buf_size 0 "> $buf0size" "MTU 3000: ") + mtu_restore $swp + + mtu_set $swp 6000 + check_buf_size 0 "> $buf0size_2" "MTU 6000: " + mtu_restore $swp + + check_buf_size 0 "== $buf0size" + + log_test "${what}MTU impacts buffer size" +} + +test_tc_mtu() +{ + # In TC mode, MTU still impacts the threshold below which a buffer is + # not permitted to go. + + tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M + test_mtu "TC: " + tc qdisc delete dev $swp root +} + +test_pfc() +{ + RET=0 + + __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,1,2,3 > /dev/null + + local buf0size=$(get_buf_size 0) + local buf1size=$(get_buf_size 1) + local buf2size=$(get_buf_size 2) + local buf3size=$(get_buf_size 3) + check_buf_size 0 "> 0" + check_buf_size 1 "> 0" + check_buf_size 2 "> 0" + check_buf_size 3 "> 0" + check_buf_size 4 "== 0" + check_buf_size 5 "== 0" + check_buf_size 6 "== 0" + check_buf_size 7 "== 0" + + log_test "Buffer size sans PFC" + + RET=0 + + __mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=0 > /dev/null + + check_prio_pg "0 0 0 0 0 1 2 3 " + check_prio_pfc "0 0 0 0 0 1 1 1 " + check_buf_size 0 "== $buf0size" + check_buf_size 1 "> $buf1size" + check_buf_size 2 "> $buf2size" + check_buf_size 3 "> $buf3size" + + local buf1size=$(get_buf_size 1) + check_buf_size 2 "== $buf1size" + check_buf_size 3 "== $buf1size" + + log_test "PFC: Cable length 0" + + RET=0 + + __mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=1000 > /dev/null + + check_buf_size 0 "== $buf0size" + check_buf_size 1 "> $buf1size" + check_buf_size 2 "> $buf1size" + check_buf_size 3 "> $buf1size" + + log_test "PFC: Cable length 1000" + + RET=0 + + __mlnx_qos -i $swp --pfc=0,0,0,0,0,0,0,0 --cable_len=0 > /dev/null + __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null + + check_prio_pg "0 0 0 0 0 0 0 0 " + check_prio_tc "0 0 0 0 0 0 0 0 " + check_buf_size 0 "> 0" + check_buf_size 1 "== 0" + check_buf_size 2 "== 0" + check_buf_size 3 "== 0" + check_buf_size 4 "== 0" + check_buf_size 5 "== 0" + check_buf_size 6 "== 0" + check_buf_size 7 "== 0" + + log_test "PFC: Restore defaults" +} + +test_tc_priomap() +{ + RET=0 + + __mlnx_qos -i $swp --prio_tc=0,1,2,3,4,5,6,7 > /dev/null + check_prio_pg "0 1 2 3 4 5 6 7 " + + tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M + check_prio_pg "0 0 0 0 0 0 0 0 " + + __mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 > /dev/null + check_prio_pg "1 3 5 7 0 2 4 6 " + + tc qdisc delete dev $swp root + check_prio_pg "0 1 2 3 4 5 6 7 " + + # Clean up. + tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M + __mlnx_qos -i $swp --prio2buffer=0,0,0,0,0,0,0,0 > /dev/null + tc qdisc delete dev $swp root + __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null + + log_test "TC: priomap" +} + +test_tc_sizes() +{ + local cell_size=$(devlink_cell_size_get) + local size=$((cell_size * 1000)) + + RET=0 + + __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null + check_fail $? "buffer_size should fail before qdisc is added" + + tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M + + __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null + check_err $? "buffer_size should pass after qdisc is added" + check_buf_size 0 "== $size" "set size: " + + mtu_set $swp 6000 + check_buf_size 0 "== $size" "set MTU: " + mtu_restore $swp + + __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null + + # After replacing the qdisc for the same kind, buffer_size still has to + # work. + tc qdisc replace dev $swp root handle 1: bfifo limit 1M + + __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null + check_buf_size 0 "== $size" "post replace, set size: " + + __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null + + # Likewise after replacing for a different kind. + tc qdisc replace dev $swp root handle 2: prio bands 8 + + __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null + check_buf_size 0 "== $size" "post replace different kind, set size: " + + tc qdisc delete dev $swp root + + __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null + check_fail $? "buffer_size should fail after qdisc is deleted" + + log_test "TC: buffer size" +} + +test_int_buf() +{ + local what=$1; shift + + RET=0 + + local buf0size=$(get_buf_size 0) + local tot_size=$(get_tot_size) + + # Size of internal buffer and buffer 9. + local dsize=$((tot_size - buf0size)) + + tc qdisc add dev $swp clsact + tc filter add dev $swp egress matchall skip_sw action mirred egress mirror dev $swp + + local buf0size_2=$(get_buf_size 0) + local tot_size_2=$(get_tot_size) + local dsize_2=$((tot_size_2 - buf0size_2)) + + # Egress SPAN should have added to the "invisible" buffer configuration. + ((dsize_2 > dsize)) + check_err $? "Invisible buffers account for '$dsize_2', expected '> $dsize'" + + mtu_set $swp 3000 + + local buf0size_3=$(get_buf_size 0) + local tot_size_3=$(get_tot_size) + local dsize_3=$((tot_size_3 - buf0size_3)) + + # MTU change might change buffer 0, which will show at total, but the + # hidden buffers should stay the same size. + ((dsize_3 == dsize_2)) + check_err $? "MTU change: Invisible buffers account for '$dsize_3', expected '== $dsize_2'" + + mtu_restore $swp + tc qdisc del dev $swp clsact + + # After SPAN removal, hidden buffers should be back to the original sizes. + local buf0size_4=$(get_buf_size 0) + local tot_size_4=$(get_tot_size) + local dsize_4=$((tot_size_4 - buf0size_4)) + ((dsize_4 == dsize)) + check_err $? "SPAN removed: Invisible buffers account for '$dsize_4', expected '== $dsize'" + + log_test "${what}internal buffer size" +} + +test_tc_int_buf() +{ + local cell_size=$(devlink_cell_size_get) + local size=$((cell_size * 1000)) + + tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M + test_int_buf "TC: " + + __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null + test_int_buf "TC+buffsize: " + + __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null + tc qdisc delete dev $swp root +} + +trap cleanup EXIT + +bail_on_lldpad +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh index faa51012cdac..0bf76f13c030 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh @@ -82,3 +82,17 @@ bail_on_lldpad() fi fi } + +__mlnx_qos() +{ + local err + + mlnx_qos "$@" 2>/dev/null + err=$? + + if ((err)); then + echo "Error ($err) in mlnx_qos $@" >/dev/stderr + fi + + return $err +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh index b025daea062d..8f164c80e215 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh @@ -145,12 +145,17 @@ switch_create() # Make sure that ingress quotas are smaller than egress so that there is # room for both streams of traffic to be admitted to shared buffer. + devlink_port_pool_th_save $swp1 0 devlink_port_pool_th_set $swp1 0 5 + devlink_tc_bind_pool_th_save $swp1 0 ingress devlink_tc_bind_pool_th_set $swp1 0 ingress 0 5 + devlink_port_pool_th_save $swp2 0 devlink_port_pool_th_set $swp2 0 5 + devlink_tc_bind_pool_th_save $swp2 1 ingress devlink_tc_bind_pool_th_set $swp2 1 ingress 0 5 + devlink_port_pool_th_save $swp3 4 devlink_port_pool_th_set $swp3 4 12 } diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh new file mode 100755 index 000000000000..4d900bc1f76c --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh @@ -0,0 +1,403 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test injects a 10-MB burst of traffic with VLAN tag and 802.1p priority +# of 1. This stream is consistently prioritized as priority 1, is put to PG +# buffer 1, and scheduled at TC 1. +# +# - the stream first ingresses through $swp1, where it is forwarded to $swp3 +# +# - then it ingresses through $swp4. Here it is put to a lossless buffer and put +# to a small pool ("PFC pool"). The traffic is forwarded to $swp2, which is +# shaped, and thus the PFC pool eventually fills, therefore the headroom +# fills, and $swp3 is paused. +# +# - since $swp3 now can't send traffic, the traffic ingressing $swp1 is kept at +# a pool ("overflow pool"). The overflow pool needs to be large enough to +# contain the whole burst. +# +# - eventually the PFC pool gets some traffic out, headroom therefore gets some +# traffic to the pool, and $swp3 is unpaused again. This way the traffic is +# gradually forwarded from the overflow pool, through the PFC pool, out of +# $swp2, and eventually to $h2. +# +# - if PFC works, all lossless flow packets that ingress through $swp1 should +# also be seen ingressing $h2. If it doesn't, there will be drops due to +# discrepancy between the speeds of $swp1 and $h2. +# +# - it should all play out relatively quickly, so that SLL and HLL will not +# cause drops. +# +# +-----------------------+ +# | H1 | +# | + $h1.111 | +# | | 192.0.2.33/28 | +# | | | +# | + $h1 | +# +---|-------------------+ +--------------------+ +# | | | +# +---|----------------------|--------------------|---------------------------+ +# | + $swp1 $swp3 + + $swp4 | +# | | iPOOL1 iPOOL0 | | iPOOL2 | +# | | ePOOL4 ePOOL5 | | ePOOL4 | +# | | 1Gbps | | 1Gbps | +# | | PFC:enabled=1 | | PFC:enabled=1 | +# | +-|----------------------|-+ +-|------------------------+ | +# | | + $swp1.111 $swp3.111 + | | + $swp4.111 | | +# | | | | | | +# | | BR1 | | BR2 | | +# | | | | | | +# | | | | + $swp2.111 | | +# | +--------------------------+ +---------|----------------+ | +# | | | +# | iPOOL0: 500KB dynamic | | +# | iPOOL1: 10MB static | | +# | iPOOL2: 1MB static + $swp2 | +# | ePOOL4: 500KB dynamic | iPOOL0 | +# | ePOOL5: 10MB static | ePOOL6 | +# | ePOOL6: "infinite" static | 200Mbps shaper | +# +-------------------------------------------------------|-------------------+ +# | +# +---|-------------------+ +# | + $h2 H2 | +# | | | +# | + $h2.111 | +# | 192.0.2.34/28 | +# +-----------------------+ +# +# iPOOL0+ePOOL4 is a helper pool for control traffic etc. +# iPOOL1+ePOOL5 are overflow pools. +# iPOOL2+ePOOL6 are PFC pools. + +ALL_TESTS=" + ping_ipv4 + test_qos_pfc +" + +lib_dir=$(dirname $0)/../../../net/forwarding + +NUM_NETIFS=6 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh +source qos_lib.sh + +_1KB=1000 +_100KB=$((100 * _1KB)) +_500KB=$((500 * _1KB)) +_1MB=$((1000 * _1KB)) +_10MB=$((10 * _1MB)) + +h1_create() +{ + simple_if_init $h1 + mtu_set $h1 10000 + + vlan_create $h1 111 v$h1 192.0.2.33/28 +} + +h1_destroy() +{ + vlan_destroy $h1 111 + + mtu_restore $h1 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + mtu_set $h2 10000 + + vlan_create $h2 111 v$h2 192.0.2.34/28 +} + +h2_destroy() +{ + vlan_destroy $h2 111 + + mtu_restore $h2 + simple_if_fini $h2 +} + +switch_create() +{ + # pools + # ----- + + devlink_pool_size_thtype_save 0 + devlink_pool_size_thtype_save 4 + devlink_pool_size_thtype_save 1 + devlink_pool_size_thtype_save 5 + devlink_pool_size_thtype_save 2 + devlink_pool_size_thtype_save 6 + + devlink_port_pool_th_save $swp1 1 + devlink_port_pool_th_save $swp2 6 + devlink_port_pool_th_save $swp3 5 + devlink_port_pool_th_save $swp4 2 + + devlink_tc_bind_pool_th_save $swp1 1 ingress + devlink_tc_bind_pool_th_save $swp2 1 egress + devlink_tc_bind_pool_th_save $swp3 1 egress + devlink_tc_bind_pool_th_save $swp4 1 ingress + + # Control traffic pools. Just reduce the size. Keep them dynamic so that + # we don't need to change all the uninteresting quotas. + devlink_pool_size_thtype_set 0 dynamic $_500KB + devlink_pool_size_thtype_set 4 dynamic $_500KB + + # Overflow pools. + devlink_pool_size_thtype_set 1 static $_10MB + devlink_pool_size_thtype_set 5 static $_10MB + + # PFC pools. As per the writ, the size of egress PFC pool should be + # infinice, but actually it just needs to be large enough to not matter + # in practice, so reuse the 10MB limit. + devlink_pool_size_thtype_set 2 static $_1MB + devlink_pool_size_thtype_set 6 static $_10MB + + # $swp1 + # ----- + + ip link set dev $swp1 up + mtu_set $swp1 10000 + vlan_create $swp1 111 + ip link set dev $swp1.111 type vlan ingress-qos-map 0:0 1:1 + + devlink_port_pool_th_set $swp1 1 $_10MB + devlink_tc_bind_pool_th_set $swp1 1 ingress 1 $_10MB + + # Configure qdisc so that we can configure PG and therefore pool + # assignment. + tc qdisc replace dev $swp1 root handle 1: \ + ets bands 8 strict 8 priomap 7 6 + __mlnx_qos -i $swp1 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null + + # $swp2 + # ----- + + ip link set dev $swp2 up + mtu_set $swp2 10000 + vlan_create $swp2 111 + ip link set dev $swp2.111 type vlan egress-qos-map 0:0 1:1 + + devlink_port_pool_th_set $swp2 6 $_10MB + devlink_tc_bind_pool_th_set $swp2 1 egress 6 $_10MB + + # prio 0->TC0 (band 7), 1->TC1 (band 6). TC1 is shaped. + tc qdisc replace dev $swp2 root handle 1: \ + ets bands 8 strict 8 priomap 7 6 + tc qdisc replace dev $swp2 parent 1:7 handle 17: \ + tbf rate 200Mbit burst 131072 limit 1M + + # $swp3 + # ----- + + ip link set dev $swp3 up + mtu_set $swp3 10000 + vlan_create $swp3 111 + ip link set dev $swp3.111 type vlan egress-qos-map 0:0 1:1 + + devlink_port_pool_th_set $swp3 5 $_10MB + devlink_tc_bind_pool_th_set $swp3 1 egress 5 $_10MB + + # prio 0->TC0 (band 7), 1->TC1 (band 6) + tc qdisc replace dev $swp3 root handle 1: \ + ets bands 8 strict 8 priomap 7 6 + + # Need to enable PFC so that PAUSE takes effect. Therefore need to put + # the lossless prio into a buffer of its own. Don't bother with buffer + # sizes though, there is not going to be any pressure in the "backward" + # direction. + __mlnx_qos -i $swp3 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null + __mlnx_qos -i $swp3 --pfc=0,1,0,0,0,0,0,0 >/dev/null + + # $swp4 + # ----- + + ip link set dev $swp4 up + mtu_set $swp4 10000 + vlan_create $swp4 111 + ip link set dev $swp4.111 type vlan ingress-qos-map 0:0 1:1 + + devlink_port_pool_th_set $swp4 2 $_1MB + devlink_tc_bind_pool_th_set $swp4 1 ingress 2 $_1MB + + # Configure qdisc so that we can hand-tune headroom. + tc qdisc replace dev $swp4 root handle 1: \ + ets bands 8 strict 8 priomap 7 6 + __mlnx_qos -i $swp4 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null + __mlnx_qos -i $swp4 --pfc=0,1,0,0,0,0,0,0 >/dev/null + # PG0 will get autoconfigured to Xoff, give PG1 arbitrarily 100K, which + # is (-2*MTU) about 80K of delay provision. + __mlnx_qos -i $swp3 --buffer_size=0,$_100KB,0,0,0,0,0,0 >/dev/null + + # bridges + # ------- + + ip link add name br1 type bridge vlan_filtering 0 + ip link set dev $swp1.111 master br1 + ip link set dev $swp3.111 master br1 + ip link set dev br1 up + + ip link add name br2 type bridge vlan_filtering 0 + ip link set dev $swp2.111 master br2 + ip link set dev $swp4.111 master br2 + ip link set dev br2 up +} + +switch_destroy() +{ + # Do this first so that we can reset the limits to values that are only + # valid for the original static / dynamic setting. + devlink_pool_size_thtype_restore 6 + devlink_pool_size_thtype_restore 5 + devlink_pool_size_thtype_restore 4 + devlink_pool_size_thtype_restore 2 + devlink_pool_size_thtype_restore 1 + devlink_pool_size_thtype_restore 0 + + # bridges + # ------- + + ip link set dev br2 down + ip link set dev $swp4.111 nomaster + ip link set dev $swp2.111 nomaster + ip link del dev br2 + + ip link set dev br1 down + ip link set dev $swp3.111 nomaster + ip link set dev $swp1.111 nomaster + ip link del dev br1 + + # $swp4 + # ----- + + __mlnx_qos -i $swp4 --buffer_size=0,0,0,0,0,0,0,0 >/dev/null + __mlnx_qos -i $swp4 --pfc=0,0,0,0,0,0,0,0 >/dev/null + __mlnx_qos -i $swp4 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null + tc qdisc del dev $swp4 root + + devlink_tc_bind_pool_th_restore $swp4 1 ingress + devlink_port_pool_th_restore $swp4 2 + + vlan_destroy $swp4 111 + mtu_restore $swp4 + ip link set dev $swp4 down + + # $swp3 + # ----- + + __mlnx_qos -i $swp3 --pfc=0,0,0,0,0,0,0,0 >/dev/null + __mlnx_qos -i $swp3 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null + tc qdisc del dev $swp3 root + + devlink_tc_bind_pool_th_restore $swp3 1 egress + devlink_port_pool_th_restore $swp3 5 + + vlan_destroy $swp3 111 + mtu_restore $swp3 + ip link set dev $swp3 down + + # $swp2 + # ----- + + tc qdisc del dev $swp2 parent 1:7 + tc qdisc del dev $swp2 root + + devlink_tc_bind_pool_th_restore $swp2 1 egress + devlink_port_pool_th_restore $swp2 6 + + vlan_destroy $swp2 111 + mtu_restore $swp2 + ip link set dev $swp2 down + + # $swp1 + # ----- + + __mlnx_qos -i $swp1 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null + tc qdisc del dev $swp1 root + + devlink_tc_bind_pool_th_restore $swp1 1 ingress + devlink_port_pool_th_restore $swp1 1 + + vlan_destroy $swp1 111 + mtu_restore $swp1 + ip link set dev $swp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + swp4=${NETIFS[p6]} + + h2mac=$(mac_get $h2) + + vrf_prepare + + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.34 +} + +test_qos_pfc() +{ + RET=0 + + # 10M pool, each packet is 8K of payload + headers + local pkts=$((_10MB / 8050)) + local size=$((pkts * 8050)) + local in0=$(ethtool_stats_get $swp1 rx_octets_prio_1) + local out0=$(ethtool_stats_get $swp2 tx_octets_prio_1) + + $MZ $h1 -p 8000 -Q 1:111 -A 192.0.2.33 -B 192.0.2.34 \ + -a own -b $h2mac -c $pkts -t udp -q + sleep 2 + + local in1=$(ethtool_stats_get $swp1 rx_octets_prio_1) + local out1=$(ethtool_stats_get $swp2 tx_octets_prio_1) + + local din=$((in1 - in0)) + local dout=$((out1 - out0)) + + local pct_in=$((din * 100 / size)) + + ((pct_in > 95 && pct_in < 105)) + check_err $? "Relative ingress out of expected bounds, $pct_in% should be 100%" + + ((dout == din)) + check_err $? "$((din - dout)) bytes out of $din ingressed got lost" + + log_test "PFC" +} + +trap cleanup EXIT + +bail_on_lldpad +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh index 94c37124a840..af64bc9ea8ab 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh @@ -27,11 +27,17 @@ switch_create() # amount of traffic that is admitted to the shared buffers. This makes # sure that there is always enough traffic of all types to select from # for the DWRR process. + devlink_port_pool_th_save $swp1 0 devlink_port_pool_th_set $swp1 0 12 + devlink_tc_bind_pool_th_save $swp1 0 ingress devlink_tc_bind_pool_th_set $swp1 0 ingress 0 12 + devlink_port_pool_th_save $swp2 4 devlink_port_pool_th_set $swp2 4 12 + devlink_tc_bind_pool_th_save $swp2 7 egress devlink_tc_bind_pool_th_set $swp2 7 egress 4 5 + devlink_tc_bind_pool_th_save $swp2 6 egress devlink_tc_bind_pool_th_set $swp2 6 egress 4 5 + devlink_tc_bind_pool_th_save $swp2 5 egress devlink_tc_bind_pool_th_set $swp2 5 egress 4 5 # Note: sch_ets_core.sh uses VLAN ingress-qos-map to assign packet diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh index 517297a14ecf..b0cb1aaffdda 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh @@ -208,6 +208,7 @@ switch_create() ip link set dev br2_11 up local size=$(devlink_pool_size_thtype 0 | cut -d' ' -f 1) + devlink_port_pool_th_save $swp3 8 devlink_port_pool_th_set $swp3 8 $size } diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh index 4b96561c462f..3e3e06ea5703 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh @@ -24,6 +24,13 @@ tc_police_switch_destroy() simple_if_fini $swp1 } +tc_police_addr() +{ + local num=$1; shift + + printf "2001:db8:1::%x" $num +} + tc_police_rules_create() { local count=$1; shift @@ -34,8 +41,9 @@ tc_police_rules_create() for ((i = 0; i < count; ++i)); do cat >> $TC_POLICE_BATCH_FILE <<-EOF filter add dev $swp1 ingress \ - prot ip \ - flower skip_sw \ + prot ipv6 \ + pref 1000 \ + flower skip_sw dst_ip $(tc_police_addr $i) \ action police rate 10mbit burst 100k \ conform-exceed drop/ok EOF diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh index de4b32fc4223..40909c254365 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh @@ -23,6 +23,27 @@ fw_flash_test() devlink dev flash $DL_HANDLE file dummy check_err $? "Failed to flash with status updates on" + devlink dev flash $DL_HANDLE file dummy component fw.mgmt + check_err $? "Failed to flash with component attribute" + + devlink dev flash $DL_HANDLE file dummy overwrite settings + check_fail $? "Flash with overwrite settings should be rejected" + + echo "1"> $DEBUGFS_DIR/fw_update_overwrite_mask + check_err $? "Failed to change allowed overwrite mask" + + devlink dev flash $DL_HANDLE file dummy overwrite settings + check_err $? "Failed to flash with settings overwrite enabled" + + devlink dev flash $DL_HANDLE file dummy overwrite identifiers + check_fail $? "Flash with overwrite settings should be identifiers" + + echo "3"> $DEBUGFS_DIR/fw_update_overwrite_mask + check_err $? "Failed to change allowed overwrite mask" + + devlink dev flash $DL_HANDLE file dummy overwrite identifiers overwrite settings + check_err $? "Failed to flash with settings and identifiers overwrite enabled" + echo "n"> $DEBUGFS_DIR/fw_update_status check_err $? "Failed to disable status updates" diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh new file mode 100755 index 000000000000..25c896b9e2eb --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh @@ -0,0 +1,108 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only + +NSIM_ID=$((RANDOM % 1024)) +NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID +NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID/ports/0 +NSIM_NETDEV= +num_passes=0 +num_errors=0 + +function cleanup_nsim { + if [ -e $NSIM_DEV_SYS ]; then + echo $NSIM_ID > /sys/bus/netdevsim/del_device + fi +} + +function cleanup { + cleanup_nsim +} + +trap cleanup EXIT + +function get_netdev_name { + local -n old=$1 + + new=$(ls /sys/class/net) + + for netdev in $new; do + for check in $old; do + [ $netdev == $check ] && break + done + + if [ $netdev != $check ]; then + echo $netdev + break + fi + done +} + +function check { + local code=$1 + local str=$2 + local exp_str=$3 + + if [ $code -ne 0 ]; then + ((num_errors++)) + return + fi + + if [ "$str" != "$exp_str" ]; then + echo -e "Expected: '$exp_str', got '$str'" + ((num_errors++)) + return + fi + + ((num_passes++)) +} + +# Bail if ethtool is too old +if ! ethtool -h | grep include-stat 2>&1 >/dev/null; then + echo "SKIP: No --include-statistics support in ethtool" + exit 4 +fi + +# Make a netdevsim +old_netdevs=$(ls /sys/class/net) + +modprobe netdevsim +echo $NSIM_ID > /sys/bus/netdevsim/new_device + +NSIM_NETDEV=`get_netdev_name old_netdevs` + +set -o pipefail + +echo n > $NSIM_DEV_DFS/ethtool/pause/report_stats_tx +echo n > $NSIM_DEV_DFS/ethtool/pause/report_stats_rx + +s=$(ethtool --json -a $NSIM_NETDEV | jq '.[].statistics') +check $? "$s" "null" + +s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics') +check $? "$s" "{}" + +echo y > $NSIM_DEV_DFS/ethtool/pause/report_stats_tx + +s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics | length') +check $? "$s" "1" + +s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics.tx_pause_frames') +check $? "$s" "2" + +echo y > $NSIM_DEV_DFS/ethtool/pause/report_stats_rx + +s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics | length') +check $? "$s" "2" + +s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics.rx_pause_frames') +check $? "$s" "1" +s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics.tx_pause_frames') +check $? "$s" "2" + +if [ $num_errors -eq 0 ]; then + echo "PASSED all $((num_passes)) checks" + exit 0 +else + echo "FAILED $num_errors/$((num_errors+num_passes)) checks" + exit 1 +fi diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh index ba1d53b9f815..1b08e042cf94 100644..100755 --- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh @@ -7,6 +7,7 @@ NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID NSIM_NETDEV= HAS_ETHTOOL= +STATIC_ENTRIES= EXIT_STATUS=0 num_cases=0 num_errors=0 @@ -193,6 +194,21 @@ function check_tables { sleep 0.02 ((retries--)) done + + if [ -n "$HAS_ETHTOOL" -a -n "${STATIC_ENTRIES[0]}" ]; then + fail=0 + for i in "${!STATIC_ENTRIES[@]}"; do + pp_expected=`pre_ethtool ${STATIC_ENTRIES[i]}` + cnt=$(ethtool --show-tunnels $NSIM_NETDEV | grep -c "$pp_expected") + if [ $cnt -ne 1 ]; then + err_cnt "ethtool static entry: $pfx - $msg" + echo " check_table: ethtool does not contain '$pp_expected'" + ethtool --show-tunnels $NSIM_NETDEV + fail=1 + fi + done + [ $fail == 0 ] && pass_cnt + fi } function print_table { @@ -775,6 +791,157 @@ for port in 0 1; do exp1=( 0 0 0 0 ) done +cleanup_nsim + +# shared port tables +pfx="table sharing" + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port + +echo 0 > $NSIM_DEV_DFS/udp_ports_open_only +echo 1 > $NSIM_DEV_DFS/udp_ports_sleep +echo 1 > $NSIM_DEV_DFS/udp_ports_shared + +old_netdevs=$(ls /sys/class/net) +echo 1 > $NSIM_DEV_SYS/new_port +NSIM_NETDEV=`get_netdev_name old_netdevs` +old_netdevs=$(ls /sys/class/net) +echo 2 > $NSIM_DEV_SYS/new_port +NSIM_NETDEV2=`get_netdev_name old_netdevs` + +msg="VxLAN v4 devices" +exp0=( `mke 4789 1` 0 0 0 ) +exp1=( 0 0 0 0 ) +new_vxlan vxlan0 4789 $NSIM_NETDEV +new_vxlan vxlan1 4789 $NSIM_NETDEV2 + +msg="VxLAN v4 devices go down" +exp0=( 0 0 0 0 ) +ifconfig vxlan1 down +ifconfig vxlan0 down +check_tables + +for ifc in vxlan0 vxlan1; do + ifconfig $ifc up +done + +msg="VxLAN v6 device" +exp0=( `mke 4789 1` `mke 4790 1` 0 0 ) +new_vxlan vxlanC 4790 $NSIM_NETDEV 6 + +msg="Geneve device" +exp1=( `mke 6081 2` 0 0 0 ) +new_geneve gnv0 6081 + +msg="NIC device goes down" +ifconfig $NSIM_NETDEV down +check_tables + +msg="NIC device goes up again" +ifconfig $NSIM_NETDEV up +check_tables + +for i in `seq 2`; do + msg="turn feature off - 1, rep $i" + ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload off + check_tables + + msg="turn feature off - 2, rep $i" + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) + ethtool -K $NSIM_NETDEV2 rx-udp_tunnel-port-offload off + check_tables + + msg="turn feature on - 1, rep $i" + exp0=( `mke 4789 1` `mke 4790 1` 0 0 ) + exp1=( `mke 6081 2` 0 0 0 ) + ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload on + check_tables + + msg="turn feature on - 2, rep $i" + ethtool -K $NSIM_NETDEV2 rx-udp_tunnel-port-offload on + check_tables +done + +msg="tunnels destroyed 1" +cleanup_tuns +exp0=( 0 0 0 0 ) +exp1=( 0 0 0 0 ) +check_tables + +overflow_table0 "overflow NIC table" + +msg="re-add a port" + +echo 2 > $NSIM_DEV_SYS/del_port +echo 2 > $NSIM_DEV_SYS/new_port +check_tables + +msg="replace VxLAN in overflow table" +exp0=( `mke 10000 1` `mke 10004 1` `mke 10002 1` `mke 10003 1` ) +del_dev vxlan1 + +msg="vacate VxLAN in overflow table" +exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` ) +del_dev vxlan2 + +echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset +check_tables + +msg="tunnels destroyed 2" +cleanup_tuns +exp0=( 0 0 0 0 ) +exp1=( 0 0 0 0 ) +check_tables + +echo 1 > $NSIM_DEV_SYS/del_port +echo 2 > $NSIM_DEV_SYS/del_port + +cleanup_nsim + +# Static IANA port +pfx="static IANA vxlan" + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port + +echo 1 > $NSIM_DEV_DFS/udp_ports_static_iana_vxlan +STATIC_ENTRIES=( `mke 4789 1` ) + +port=1 +old_netdevs=$(ls /sys/class/net) +echo $port > $NSIM_DEV_SYS/new_port +NSIM_NETDEV=`get_netdev_name old_netdevs` + +msg="check empty" +exp0=( 0 0 0 0 ) +exp1=( 0 0 0 0 ) +check_tables + +msg="add on static port" +new_vxlan vxlan0 4789 $NSIM_NETDEV +new_vxlan vxlan1 4789 $NSIM_NETDEV + +msg="add on different port" +exp0=( `mke 4790 1` 0 0 0 ) +new_vxlan vxlan2 4790 $NSIM_NETDEV + +cleanup_tuns + +msg="tunnels destroyed" +exp0=( 0 0 0 0 ) +exp1=( 0 0 0 0 ) +check_tables + +msg="different type" +new_geneve gnv0 4789 + +cleanup_tuns +cleanup_nsim + +# END + modprobe -r netdevsim if [ $num_errors -eq 0 ]; then diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh new file mode 100755 index 000000000000..beee0d5646a6 --- /dev/null +++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh @@ -0,0 +1,316 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright 2020 NXP Semiconductors + +WAIT_TIME=1 +NUM_NETIFS=4 +lib_dir=$(dirname $0)/../../../net/forwarding +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh + +require_command tcpdump + +# +# +---------------------------------------------+ +# | DUT ports Generator ports | +# | +--------+ +--------+ +--------+ +--------+ | +# | | | | | | | | | | +# | | eth0 | | eth1 | | eth2 | | eth3 | | +# | | | | | | | | | | +# +-+--------+-+--------+-+--------+-+--------+-+ +# | | | | +# | | | | +# | +-----------+ | +# | | +# +--------------------------------+ + +eth0=${NETIFS[p1]} +eth1=${NETIFS[p2]} +eth2=${NETIFS[p3]} +eth3=${NETIFS[p4]} + +eth0_mac="de:ad:be:ef:00:00" +eth1_mac="de:ad:be:ef:00:01" +eth2_mac="de:ad:be:ef:00:02" +eth3_mac="de:ad:be:ef:00:03" + +# Helpers to map a VCAP IS1 and VCAP IS2 lookup and policy to a chain number +# used by the kernel driver. The numbers are: +# VCAP IS1 lookup 0: 10000 +# VCAP IS1 lookup 1: 11000 +# VCAP IS1 lookup 2: 12000 +# VCAP IS2 lookup 0 policy 0: 20000 +# VCAP IS2 lookup 0 policy 1: 20001 +# VCAP IS2 lookup 0 policy 255: 20255 +# VCAP IS2 lookup 1 policy 0: 21000 +# VCAP IS2 lookup 1 policy 1: 21001 +# VCAP IS2 lookup 1 policy 255: 21255 +IS1() +{ + local lookup=$1 + + echo $((10000 + 1000 * lookup)) +} + +IS2() +{ + local lookup=$1 + local pag=$2 + + echo $((20000 + 1000 * lookup + pag)) +} + +ES0() +{ + echo 0 +} + +# The Ocelot switches have a fixed ingress pipeline composed of: +# +# +----------------------------------------------+ +-----------------------------------------+ +# | VCAP IS1 | | VCAP IS2 | +# | | | | +# | +----------+ +----------+ +----------+ | | +----------+ +----------+ | +# | | Lookup 0 | | Lookup 1 | | Lookup 2 | | --+------> PAG 0: | Lookup 0 | -> | Lookup 1 | | +# | +----------+ -> +----------+ -> +----------+ | | | +----------+ +----------+ | +# | |key&action| |key&action| |key&action| | | | |key&action| |key&action| | +# | |key&action| |key&action| |key&action| | | | | .. | | .. | | +# | | .. | | .. | | .. | | | | +----------+ +----------+ | +# | +----------+ +----------+ +----------+ | | | | +# | selects PAG | | | +----------+ +----------+ | +# +----------------------------------------------+ +------> PAG 1: | Lookup 0 | -> | Lookup 1 | | +# | | +----------+ +----------+ | +# | | |key&action| |key&action| | +# | | | .. | | .. | | +# | | +----------+ +----------+ | +# | | ... | +# | | | +# | | +----------+ +----------+ | +# +----> PAG 254: | Lookup 0 | -> | Lookup 1 | | +# | | +----------+ +----------+ | +# | | |key&action| |key&action| | +# | | | .. | | .. | | +# | | +----------+ +----------+ | +# | | | +# | | +----------+ +----------+ | +# +----> PAG 255: | Lookup 0 | -> | Lookup 1 | | +# | +----------+ +----------+ | +# | |key&action| |key&action| | +# | | .. | | .. | | +# | +----------+ +----------+ | +# +-----------------------------------------+ +# +# Both the VCAP IS1 (Ingress Stage 1) and IS2 (Ingress Stage 2) are indexed +# (looked up) multiple times: IS1 3 times, and IS2 2 times. Each filter +# (key and action pair) can be configured to only match during the first, or +# second, etc, lookup. +# +# During one TCAM lookup, the filter processing stops at the first entry that +# matches, then the pipeline jumps to the next lookup. +# The driver maps each individual lookup of each individual ingress TCAM to a +# separate chain number. For correct rule offloading, it is mandatory that each +# filter installed in one TCAM is terminated by a non-optional GOTO action to +# the next lookup from the fixed pipeline. +# +# A chain can only be used if there is a GOTO action correctly set up from the +# prior lookup in the processing pipeline. Setting up all chains is not +# mandatory. + +# NOTE: VCAP IS1 currently uses only S1_NORMAL half keys and VCAP IS2 +# dynamically chooses between MAC_ETYPE, ARP, IP4_TCP_UDP, IP4_OTHER, which are +# all half keys as well. + +create_tcam_skeleton() +{ + local eth=$1 + + tc qdisc add dev $eth clsact + + # VCAP IS1 is the Ingress Classification TCAM and can offload the + # following actions: + # - skbedit priority + # - vlan pop + # - vlan modify + # - goto (only in lookup 2, the last IS1 lookup) + tc filter add dev $eth ingress chain 0 pref 49152 flower \ + skip_sw action goto chain $(IS1 0) + tc filter add dev $eth ingress chain $(IS1 0) pref 49152 \ + flower skip_sw action goto chain $(IS1 1) + tc filter add dev $eth ingress chain $(IS1 1) pref 49152 \ + flower skip_sw action goto chain $(IS1 2) + tc filter add dev $eth ingress chain $(IS1 2) pref 49152 \ + flower skip_sw action goto chain $(IS2 0 0) + + # VCAP IS2 is the Security Enforcement ingress TCAM and can offload the + # following actions: + # - trap + # - drop + # - police + # The two VCAP IS2 lookups can be segmented into up to 256 groups of + # rules, called Policies. A Policy is selected through the Policy + # Association Group (PAG) action of VCAP IS1 (which is the + # GOTO offload). + tc filter add dev $eth ingress chain $(IS2 0 0) pref 49152 \ + flower skip_sw action goto chain $(IS2 1 0) +} + +setup_prepare() +{ + create_tcam_skeleton $eth0 + + ip link add br0 type bridge + ip link set $eth0 master br0 + ip link set $eth1 master br0 + ip link set br0 up + + ip link add link $eth3 name $eth3.100 type vlan id 100 + ip link set $eth3.100 up + + ip link add link $eth3 name $eth3.200 type vlan id 200 + ip link set $eth3.200 up + + tc filter add dev $eth0 ingress chain $(IS1 1) pref 1 \ + protocol 802.1Q flower skip_sw vlan_id 100 \ + action vlan pop \ + action goto chain $(IS1 2) + + tc filter add dev $eth0 egress chain $(ES0) pref 1 \ + flower skip_sw indev $eth1 \ + action vlan push protocol 802.1Q id 100 + + tc filter add dev $eth0 ingress chain $(IS1 0) pref 2 \ + protocol ipv4 flower skip_sw src_ip 10.1.1.2 \ + action skbedit priority 7 \ + action goto chain $(IS1 1) + + tc filter add dev $eth0 ingress chain $(IS2 0 0) pref 1 \ + protocol ipv4 flower skip_sw ip_proto udp dst_port 5201 \ + action police rate 50mbit burst 64k \ + action goto chain $(IS2 1 0) +} + +cleanup() +{ + ip link del $eth3.200 + ip link del $eth3.100 + tc qdisc del dev $eth0 clsact + ip link del br0 +} + +test_vlan_pop() +{ + printf "Testing VLAN pop.. " + + tcpdump_start $eth2 + + # Work around Mausezahn VLAN builder bug + # (https://github.com/netsniff-ng/netsniff-ng/issues/225) by using + # an 8021q upper + $MZ $eth3.100 -q -c 1 -p 64 -a $eth3_mac -b $eth2_mac -t ip + + sleep 1 + + tcpdump_stop + + if tcpdump_show | grep -q "$eth3_mac > $eth2_mac, ethertype IPv4"; then + echo "OK" + else + echo "FAIL" + fi + + tcpdump_cleanup +} + +test_vlan_push() +{ + printf "Testing VLAN push.. " + + tcpdump_start $eth3.100 + + $MZ $eth2 -q -c 1 -p 64 -a $eth2_mac -b $eth3_mac -t ip + + sleep 1 + + tcpdump_stop + + if tcpdump_show | grep -q "$eth2_mac > $eth3_mac"; then + echo "OK" + else + echo "FAIL" + fi + + tcpdump_cleanup +} + +test_vlan_modify() +{ + printf "Testing VLAN modification.. " + + ip link set br0 type bridge vlan_filtering 1 + bridge vlan add dev $eth0 vid 200 + bridge vlan add dev $eth0 vid 300 + bridge vlan add dev $eth1 vid 300 + + tc filter add dev $eth0 ingress chain $(IS1 2) pref 3 \ + protocol 802.1Q flower skip_sw vlan_id 200 \ + action vlan modify id 300 \ + action goto chain $(IS2 0 0) + + tcpdump_start $eth2 + + $MZ $eth3.200 -q -c 1 -p 64 -a $eth3_mac -b $eth2_mac -t ip + + sleep 1 + + tcpdump_stop + + if tcpdump_show | grep -q "$eth3_mac > $eth2_mac, .* vlan 300"; then + echo "OK" + else + echo "FAIL" + fi + + tcpdump_cleanup + + tc filter del dev $eth0 ingress chain $(IS1 2) pref 3 + + bridge vlan del dev $eth0 vid 200 + bridge vlan del dev $eth0 vid 300 + bridge vlan del dev $eth1 vid 300 + ip link set br0 type bridge vlan_filtering 0 +} + +test_skbedit_priority() +{ + local num_pkts=100 + + printf "Testing frame prioritization.. " + + before=$(ethtool_stats_get $eth0 'rx_green_prio_7') + + $MZ $eth3 -q -c $num_pkts -p 64 -a $eth3_mac -b $eth2_mac -t ip -A 10.1.1.2 + + after=$(ethtool_stats_get $eth0 'rx_green_prio_7') + + if [ $((after - before)) = $num_pkts ]; then + echo "OK" + else + echo "FAIL" + fi +} + +trap cleanup EXIT + +ALL_TESTS=" + test_vlan_pop + test_vlan_push + test_vlan_modify + test_skbedit_priority +" + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/exec/.gitignore b/tools/testing/selftests/exec/.gitignore index 344a99c6da1b..9e2f00343f15 100644 --- a/tools/testing/selftests/exec/.gitignore +++ b/tools/testing/selftests/exec/.gitignore @@ -7,6 +7,7 @@ execveat.moved execveat.path.ephemeral execveat.ephemeral execveat.denatured +/load_address_* /recursion-depth xxxxxxxx* pipe diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile index 0a13b110c1e6..cf69b2fcce59 100644 --- a/tools/testing/selftests/exec/Makefile +++ b/tools/testing/selftests/exec/Makefile @@ -4,7 +4,7 @@ CFLAGS += -Wno-nonnull CFLAGS += -D_GNU_SOURCE TEST_PROGS := binfmt_script non-regular -TEST_GEN_PROGS := execveat +TEST_GEN_PROGS := execveat load_address_4096 load_address_2097152 load_address_16777216 TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir pipe # Makefile is a run-time dependency, since it's accessed by the execveat test TEST_FILES := Makefile @@ -27,4 +27,9 @@ $(OUTPUT)/execveat.symlink: $(OUTPUT)/execveat $(OUTPUT)/execveat.denatured: $(OUTPUT)/execveat cp $< $@ chmod -x $@ - +$(OUTPUT)/load_address_4096: load_address.c + $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000 -pie $< -o $@ +$(OUTPUT)/load_address_2097152: load_address.c + $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x200000 -pie $< -o $@ +$(OUTPUT)/load_address_16777216: load_address.c + $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000000 -pie $< -o $@ diff --git a/tools/testing/selftests/exec/load_address.c b/tools/testing/selftests/exec/load_address.c new file mode 100644 index 000000000000..d487c2f6a615 --- /dev/null +++ b/tools/testing/selftests/exec/load_address.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0-only +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include <link.h> +#include <stdio.h> +#include <stdlib.h> + +struct Statistics { + unsigned long long load_address; + unsigned long long alignment; +}; + +int ExtractStatistics(struct dl_phdr_info *info, size_t size, void *data) +{ + struct Statistics *stats = (struct Statistics *) data; + int i; + + if (info->dlpi_name != NULL && info->dlpi_name[0] != '\0') { + // Ignore headers from other than the executable. + return 2; + } + + stats->load_address = (unsigned long long) info->dlpi_addr; + stats->alignment = 0; + + for (i = 0; i < info->dlpi_phnum; i++) { + if (info->dlpi_phdr[i].p_type != PT_LOAD) + continue; + + if (info->dlpi_phdr[i].p_align > stats->alignment) + stats->alignment = info->dlpi_phdr[i].p_align; + } + + return 1; // Terminate dl_iterate_phdr. +} + +int main(int argc, char **argv) +{ + struct Statistics extracted; + unsigned long long misalign; + int ret; + + ret = dl_iterate_phdr(ExtractStatistics, &extracted); + if (ret != 1) { + fprintf(stderr, "FAILED\n"); + return 1; + } + + if (extracted.alignment == 0) { + fprintf(stderr, "No alignment found\n"); + return 1; + } else if (extracted.alignment & (extracted.alignment - 1)) { + fprintf(stderr, "Alignment is not a power of 2\n"); + return 1; + } + + misalign = extracted.load_address & (extracted.alignment - 1); + if (misalign) { + printf("alignment = %llu, load_address = %llu\n", + extracted.alignment, extracted.load_address); + fprintf(stderr, "FAILED\n"); + return 1; + } + + fprintf(stderr, "PASS\n"); + return 0; +} diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c index 1d27f52c61e6..477cbb042f5b 100644 --- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c +++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c @@ -74,7 +74,7 @@ static int __do_binderfs_test(struct __test_metadata *_metadata) ret = mount(NULL, binderfs_mntpt, "binder", 0, 0); EXPECT_EQ(ret, 0) { if (errno == ENODEV) - XFAIL(goto out, "binderfs missing"); + SKIP(goto out, "binderfs missing"); TH_LOG("%s - Failed to mount binderfs", strerror(errno)); goto rmdir; } @@ -475,10 +475,10 @@ TEST(binderfs_stress) TEST(binderfs_test_privileged) { if (geteuid() != 0) - XFAIL(return, "Tests are not run as root. Skipping privileged tests"); + SKIP(return, "Tests are not run as root. Skipping privileged tests"); if (__do_binderfs_test(_metadata)) - XFAIL(return, "The Android binderfs filesystem is not available"); + SKIP(return, "The Android binderfs filesystem is not available"); } TEST(binderfs_test_unprivileged) @@ -511,7 +511,7 @@ TEST(binderfs_test_unprivileged) ret = wait_for_pid(pid); if (ret) { if (ret == 2) - XFAIL(return, "The Android binderfs filesystem is not available"); + SKIP(return, "The Android binderfs filesystem is not available"); ASSERT_EQ(ret, 0) { TH_LOG("wait_for_pid() failed"); } diff --git a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c index d979ff14775a..8f82f99f7748 100644 --- a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c +++ b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c @@ -3282,4 +3282,99 @@ TEST(epoll60) close(ctx.epfd); } +struct epoll61_ctx { + int epfd; + int evfd; +}; + +static void *epoll61_write_eventfd(void *ctx_) +{ + struct epoll61_ctx *ctx = ctx_; + int64_t l = 1; + + usleep(10950); + write(ctx->evfd, &l, sizeof(l)); + return NULL; +} + +static void *epoll61_epoll_with_timeout(void *ctx_) +{ + struct epoll61_ctx *ctx = ctx_; + struct epoll_event events[1]; + int n; + + n = epoll_wait(ctx->epfd, events, 1, 11); + /* + * If epoll returned the eventfd, write on the eventfd to wake up the + * blocking poller. + */ + if (n == 1) { + int64_t l = 1; + + write(ctx->evfd, &l, sizeof(l)); + } + return NULL; +} + +static void *epoll61_blocking_epoll(void *ctx_) +{ + struct epoll61_ctx *ctx = ctx_; + struct epoll_event events[1]; + + epoll_wait(ctx->epfd, events, 1, -1); + return NULL; +} + +TEST(epoll61) +{ + struct epoll61_ctx ctx; + struct epoll_event ev; + int i, r; + + ctx.epfd = epoll_create1(0); + ASSERT_GE(ctx.epfd, 0); + ctx.evfd = eventfd(0, EFD_NONBLOCK); + ASSERT_GE(ctx.evfd, 0); + + ev.events = EPOLLIN | EPOLLET | EPOLLERR | EPOLLHUP; + ev.data.ptr = NULL; + r = epoll_ctl(ctx.epfd, EPOLL_CTL_ADD, ctx.evfd, &ev); + ASSERT_EQ(r, 0); + + /* + * We are testing a race. Repeat the test case 1000 times to make it + * more likely to fail in case of a bug. + */ + for (i = 0; i < 1000; i++) { + pthread_t threads[3]; + int n; + + /* + * Start 3 threads: + * Thread 1 sleeps for 10.9ms and writes to the evenfd. + * Thread 2 calls epoll with a timeout of 11ms. + * Thread 3 calls epoll with a timeout of -1. + * + * The eventfd write by Thread 1 should either wakeup Thread 2 + * or Thread 3. If it wakes up Thread 2, Thread 2 writes on the + * eventfd to wake up Thread 3. + * + * If no events are missed, all three threads should eventually + * be joinable. + */ + ASSERT_EQ(pthread_create(&threads[0], NULL, + epoll61_write_eventfd, &ctx), 0); + ASSERT_EQ(pthread_create(&threads[1], NULL, + epoll61_epoll_with_timeout, &ctx), 0); + ASSERT_EQ(pthread_create(&threads[2], NULL, + epoll61_blocking_epoll, &ctx), 0); + + for (n = 0; n < ARRAY_SIZE(threads); ++n) + ASSERT_EQ(pthread_join(threads[n], NULL), 0); + } + + close(ctx.epfd); + close(ctx.evfd); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/firmware/.gitignore b/tools/testing/selftests/firmware/.gitignore new file mode 100644 index 000000000000..62abc92a94c4 --- /dev/null +++ b/tools/testing/selftests/firmware/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +fw_namespace diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc index 3bcd4c3624ee..b4da41d126d5 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc @@ -6,7 +6,7 @@ echo 0 > events/enable echo > dynamic_events -PLACE=kernel_clone +PLACE=$FUNCTION_FORK echo "p:myevent1 $PLACE" >> dynamic_events echo "r:myevent2 $PLACE" >> dynamic_events diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc index 438961971b7e..3a0e2885fff5 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc @@ -6,7 +6,7 @@ echo 0 > events/enable echo > dynamic_events -PLACE=kernel_clone +PLACE=$FUNCTION_FORK setup_events() { echo "p:myevent1 $PLACE" >> dynamic_events diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc index a8603bd23e0d..d3e138e8377f 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc @@ -6,7 +6,7 @@ echo 0 > events/enable echo > dynamic_events -PLACE=kernel_clone +PLACE=$FUNCTION_FORK setup_events() { echo "p:myevent1 $PLACE" >> dynamic_events diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc index acb17ce543d2..80541964b927 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc @@ -39,7 +39,7 @@ do_test() { disable_tracing echo do_execve* > set_ftrace_filter - echo *do_fork >> set_ftrace_filter + echo $FUNCTION_FORK >> set_ftrace_filter echo $PID > set_ftrace_notrace_pid echo function > current_tracer diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc index 9f0a9687c773..2f7211254529 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc @@ -39,7 +39,7 @@ do_test() { disable_tracing echo do_execve* > set_ftrace_filter - echo *do_fork >> set_ftrace_filter + echo $FUNCTION_FORK >> set_ftrace_filter echo $PID > set_ftrace_pid echo function > current_tracer diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc index 98305d76bd04..191d116b7883 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc @@ -4,9 +4,9 @@ # requires: set_ftrace_filter # flags: instance -echo kernel_clone:stacktrace >> set_ftrace_filter +echo $FUNCTION_FORK:stacktrace >> set_ftrace_filter -grep -q "kernel_clone:stacktrace:unlimited" set_ftrace_filter +grep -q "$FUNCTION_FORK:stacktrace:unlimited" set_ftrace_filter (echo "forked"; sleep 1) diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index c5dec55b7d95..a6fac927ee82 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -133,6 +133,13 @@ yield() { ping $LOCALHOST -c 1 || sleep .001 || usleep 1 || sleep 1 } +# The fork function in the kernel was renamed from "_do_fork" to +# "kernel_fork". As older tests should still work with older kernels +# as well as newer kernels, check which version of fork is used on this +# kernel so that the tests can use the fork function for the running kernel. +FUNCTION_FORK=`(if grep '\bkernel_clone\b' /proc/kallsyms > /dev/null; then + echo kernel_clone; else echo '_do_fork'; fi)` + # Since probe event command may include backslash, explicitly use printf "%s" # to NOT interpret it. ftrace_errlog_check() { # err-prefix command-with-error-pos-by-^ command-file diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc index 9737cd0578a7..2428a3ed78c9 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc @@ -3,7 +3,7 @@ # description: Kprobe dynamic event - adding and removing # requires: kprobe_events -echo p:myevent kernel_clone > kprobe_events +echo p:myevent $FUNCTION_FORK > kprobe_events grep myevent kprobe_events test -d events/kprobes/myevent echo > kprobe_events diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc index f9a40af76888..010a8b1d6c1d 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc @@ -3,7 +3,7 @@ # description: Kprobe dynamic event - busy event check # requires: kprobe_events -echo p:myevent kernel_clone > kprobe_events +echo p:myevent $FUNCTION_FORK > kprobe_events test -d events/kprobes/myevent echo 1 > events/kprobes/myevent/enable echo > kprobe_events && exit_fail # this must fail diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc index eb543d3cfe5f..a96a1dc7014f 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc @@ -3,13 +3,13 @@ # description: Kprobe dynamic event with arguments # requires: kprobe_events -echo 'p:testprobe kernel_clone $stack $stack0 +0($stack)' > kprobe_events +echo "p:testprobe $FUNCTION_FORK \$stack \$stack0 +0(\$stack)" > kprobe_events grep testprobe kprobe_events | grep -q 'arg1=\$stack arg2=\$stack0 arg3=+0(\$stack)' test -d events/kprobes/testprobe echo 1 > events/kprobes/testprobe/enable ( echo "forked") -grep testprobe trace | grep 'kernel_clone' | \ +grep testprobe trace | grep "$FUNCTION_FORK" | \ grep -q 'arg1=0x[[:xdigit:]]* arg2=0x[[:xdigit:]]* arg3=0x[[:xdigit:]]*$' echo 0 > events/kprobes/testprobe/enable diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc index 4e5b63be51c9..a053ee2e7d77 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc @@ -5,7 +5,7 @@ grep -A1 "fetcharg:" README | grep -q "\$comm" || exit_unsupported # this is too old -echo 'p:testprobe kernel_clone comm=$comm ' > kprobe_events +echo "p:testprobe $FUNCTION_FORK comm=\$comm " > kprobe_events grep testprobe kprobe_events | grep -q 'comm=$comm' test -d events/kprobes/testprobe diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc index a1d70588ab21..84285a6f60b0 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc @@ -30,13 +30,13 @@ esac : "Test get argument (1)" echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string" > kprobe_events echo 1 > events/kprobes/testprobe/enable -echo "p:test kernel_clone" >> kprobe_events +echo "p:test $FUNCTION_FORK" >> kprobe_events grep -qe "testprobe.* arg1=\"test\"" trace echo 0 > events/kprobes/testprobe/enable : "Test get argument (2)" echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string arg2=+0(${ARG1}):string" > kprobe_events echo 1 > events/kprobes/testprobe/enable -echo "p:test kernel_clone" >> kprobe_events +echo "p:test $FUNCTION_FORK" >> kprobe_events grep -qe "testprobe.* arg1=\"test\" arg2=\"test\"" trace diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc index bd25dd0ba0d0..717130ed4feb 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc @@ -14,12 +14,12 @@ elif ! grep "$SYMBOL\$" /proc/kallsyms; then fi : "Test get basic types symbol argument" -echo "p:testprobe_u kernel_clone arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events -echo "p:testprobe_s kernel_clone arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events +echo "p:testprobe_u $FUNCTION_FORK arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events +echo "p:testprobe_s $FUNCTION_FORK arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events if grep -q "x8/16/32/64" README; then - echo "p:testprobe_x kernel_clone arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events + echo "p:testprobe_x $FUNCTION_FORK arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events fi -echo "p:testprobe_bf kernel_clone arg1=@linux_proc_banner:b8@4/32" >> kprobe_events +echo "p:testprobe_bf $FUNCTION_FORK arg1=@linux_proc_banner:b8@4/32" >> kprobe_events echo 1 > events/kprobes/enable (echo "forked") echo 0 > events/kprobes/enable @@ -27,7 +27,7 @@ grep "testprobe_[usx]:.* arg1=.* arg2=.* arg3=.* arg4=.*" trace grep "testprobe_bf:.* arg1=.*" trace : "Test get string symbol argument" -echo "p:testprobe_str kernel_clone arg1=@linux_proc_banner:string" > kprobe_events +echo "p:testprobe_str $FUNCTION_FORK arg1=@linux_proc_banner:string" > kprobe_events echo 1 > events/kprobes/enable (echo "forked") echo 0 > events/kprobes/enable diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc index 91fcce1c241c..25b7708eb559 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc @@ -4,7 +4,7 @@ # requires: kprobe_events "x8/16/32/64":README gen_event() { # Bitsize - echo "p:testprobe kernel_clone \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1" + echo "p:testprobe $FUNCTION_FORK \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1" } check_types() { # s-type u-type x-type bf-type width diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc index a30a9c07290d..d25d01a19778 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc @@ -9,12 +9,16 @@ grep -A10 "fetcharg:" README | grep -q '\[u\]<offset>' || exit_unsupported :;: "user-memory access syntax and ustring working on user memory";: echo 'p:myevent do_sys_open path=+0($arg2):ustring path2=+u0($arg2):string' \ > kprobe_events +echo 'p:myevent2 do_sys_openat2 path=+0($arg2):ustring path2=+u0($arg2):string' \ + >> kprobe_events grep myevent kprobe_events | \ grep -q 'path=+0($arg2):ustring path2=+u0($arg2):string' echo 1 > events/kprobes/myevent/enable +echo 1 > events/kprobes/myevent2/enable echo > /dev/null echo 0 > events/kprobes/myevent/enable +echo 0 > events/kprobes/myevent2/enable grep myevent trace | grep -q 'path="/dev/null" path2="/dev/null"' diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc index 0d179094191f..5556292601a4 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc @@ -5,29 +5,29 @@ # prepare echo nop > current_tracer -echo kernel_clone > set_ftrace_filter -echo 'p:testprobe kernel_clone' > kprobe_events +echo $FUNCTION_FORK > set_ftrace_filter +echo "p:testprobe $FUNCTION_FORK" > kprobe_events # kprobe on / ftrace off echo 1 > events/kprobes/testprobe/enable echo > trace ( echo "forked") grep testprobe trace -! grep 'kernel_clone <-' trace +! grep "$FUNCTION_FORK <-" trace # kprobe on / ftrace on echo function > current_tracer echo > trace ( echo "forked") grep testprobe trace -grep 'kernel_clone <-' trace +grep "$FUNCTION_FORK <-" trace # kprobe off / ftrace on echo 0 > events/kprobes/testprobe/enable echo > trace ( echo "forked") ! grep testprobe trace -grep 'kernel_clone <-' trace +grep "$FUNCTION_FORK <-" trace # kprobe on / ftrace on echo 1 > events/kprobes/testprobe/enable @@ -35,11 +35,11 @@ echo function > current_tracer echo > trace ( echo "forked") grep testprobe trace -grep 'kernel_clone <-' trace +grep "$FUNCTION_FORK <-" trace # kprobe on / ftrace off echo nop > current_tracer echo > trace ( echo "forked") grep testprobe trace -! grep 'kernel_clone <-' trace +! grep "$FUNCTION_FORK <-" trace diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc index 45d90b6c763d..f0d5b7777ed7 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc @@ -4,7 +4,7 @@ # requires: kprobe_events "Create/append/":README # Choose 2 symbols for target -SYM1=kernel_clone +SYM1=$FUNCTION_FORK SYM2=do_exit EVENT_NAME=kprobes/testevent diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc index c02ea50d63ea..fa928b431555 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc @@ -86,15 +86,21 @@ esac # multiprobe errors if grep -q "Create/append/" README && grep -q "imm-value" README; then -echo 'p:kprobes/testevent kernel_clone' > kprobe_events +echo "p:kprobes/testevent $FUNCTION_FORK" > kprobe_events check_error '^r:kprobes/testevent do_exit' # DIFF_PROBE_TYPE # Explicitly use printf "%s" to not interpret \1 -printf "%s" 'p:kprobes/testevent kernel_clone abcd=\1' > kprobe_events -check_error 'p:kprobes/testevent kernel_clone ^bcd=\1' # DIFF_ARG_TYPE -check_error 'p:kprobes/testevent kernel_clone ^abcd=\1:u8' # DIFF_ARG_TYPE -check_error 'p:kprobes/testevent kernel_clone ^abcd=\"foo"' # DIFF_ARG_TYPE -check_error '^p:kprobes/testevent kernel_clone abcd=\1' # SAME_PROBE +printf "%s" "p:kprobes/testevent $FUNCTION_FORK abcd=\\1" > kprobe_events +check_error "p:kprobes/testevent $FUNCTION_FORK ^bcd=\\1" # DIFF_ARG_TYPE +check_error "p:kprobes/testevent $FUNCTION_FORK ^abcd=\\1:u8" # DIFF_ARG_TYPE +check_error "p:kprobes/testevent $FUNCTION_FORK ^abcd=\\\"foo\"" # DIFF_ARG_TYPE +check_error "^p:kprobes/testevent $FUNCTION_FORK abcd=\\1" # SAME_PROBE +fi + +# %return suffix errors +if grep -q "place (kretprobe): .*%return.*" README; then +check_error 'p vfs_read^%hoge' # BAD_ADDR_SUFFIX +check_error 'p ^vfs_read+10%return' # BAD_RETPROBE fi exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc index 7ae492c204a4..197cc2afd404 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc @@ -4,14 +4,14 @@ # requires: kprobe_events # Add new kretprobe event -echo 'r:testprobe2 kernel_clone $retval' > kprobe_events +echo "r:testprobe2 $FUNCTION_FORK \$retval" > kprobe_events grep testprobe2 kprobe_events | grep -q 'arg1=\$retval' test -d events/kprobes/testprobe2 echo 1 > events/kprobes/testprobe2/enable ( echo "forked") -cat trace | grep testprobe2 | grep -q '<- kernel_clone' +cat trace | grep testprobe2 | grep -q "<- $FUNCTION_FORK" echo 0 > events/kprobes/testprobe2/enable echo '-:testprobe2' >> kprobe_events diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_return_suffix.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_return_suffix.tc new file mode 100644 index 000000000000..f07bd15cc033 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_return_suffix.tc @@ -0,0 +1,21 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Kretprobe %%return suffix test +# requires: kprobe_events '<symbol>[+<offset>]%return':README + +# Test for kretprobe by "r" +echo 'r:myprobeaccept vfs_read' > kprobe_events +RESULT1=`cat kprobe_events` + +# Test for kretprobe by "%return" +echo 'p:myprobeaccept vfs_read%return' > kprobe_events +RESULT2=`cat kprobe_events` + +if [ "$RESULT1" != "$RESULT2" ]; then + echo "Error: %return suffix didn't make a return probe." + echo "r-command: $RESULT1" + echo "%return: $RESULT2" + exit_fail +fi + +echo > kprobe_events diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc index c4093fc1a773..98166fa3eb91 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc @@ -4,7 +4,7 @@ # requires: kprobe_events ! grep -q 'myevent' kprobe_profile -echo p:myevent kernel_clone > kprobe_events +echo "p:myevent $FUNCTION_FORK" > kprobe_events grep -q 'myevent[[:space:]]*0[[:space:]]*0$' kprobe_profile echo 1 > events/kprobes/myevent/enable ( echo "forked" ) diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc index 7b5b60c3c5a2..f5e3f9e4a01f 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc @@ -17,4 +17,10 @@ check_error 'p /bin/sh:10(10)^a' # BAD_REFCNT_SUFFIX check_error 'p /bin/sh:10 ^@+ab' # BAD_FILE_OFFS check_error 'p /bin/sh:10 ^@symbol' # SYM_ON_UPROBE +# %return suffix error +if grep -q "place (uprobe): .*%return.*" README; then +check_error 'p /bin/sh:10^%hoge' # BAD_ADDR_SUFFIX +check_error 'p /bin/sh:10(10)^%return' # BAD_REFCNT_SUFFIX +fi + exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc index 7449a4b8f1f9..9098f1e7433f 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc @@ -25,12 +25,12 @@ echo 'wakeup_latency u64 lat pid_t pid' >> synthetic_events echo 'hist:keys=pid:ts1=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts1:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid) if next_comm=="ping"' > events/sched/sched_switch/trigger -echo 'waking+wakeup_latency u64 lat; pid_t pid' >> synthetic_events -echo 'hist:keys=pid,lat:sort=pid,lat:ww_lat=$waking_lat+$wakeup_lat:onmatch(synthetic.wakeup_latency).waking+wakeup_latency($ww_lat,pid)' >> events/synthetic/wakeup_latency/trigger -echo 'hist:keys=pid,lat:sort=pid,lat' >> events/synthetic/waking+wakeup_latency/trigger +echo 'waking_plus_wakeup_latency u64 lat; pid_t pid' >> synthetic_events +echo 'hist:keys=pid,lat:sort=pid,lat:ww_lat=$waking_lat+$wakeup_lat:onmatch(synthetic.wakeup_latency).waking_plus_wakeup_latency($ww_lat,pid)' >> events/synthetic/wakeup_latency/trigger +echo 'hist:keys=pid,lat:sort=pid,lat' >> events/synthetic/waking_plus_wakeup_latency/trigger ping $LOCALHOST -c 3 -if ! grep -q "pid:" events/synthetic/waking+wakeup_latency/hist; then +if ! grep -q "pid:" events/synthetic/waking_plus_wakeup_latency/hist; then fail "Failed to create combined histogram" fi diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc new file mode 100644 index 000000000000..3d65c856eca3 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc @@ -0,0 +1,31 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: event trigger - test inter-event histogram trigger trace action with dynamic string param +# requires: set_event synthetic_events events/sched/sched_process_exec/hist "char name[]' >> synthetic_events":README + +fail() { #msg + echo $1 + exit_fail +} + +echo "Test create synthetic event" + +echo 'ping_test_latency u64 lat; char filename[]' > synthetic_events +if [ ! -d events/synthetic/ping_test_latency ]; then + fail "Failed to create ping_test_latency synthetic event" +fi + +echo "Test create histogram for synthetic event using trace action and dynamic strings" +echo "Test histogram dynamic string variables,simple expression support and trace action" + +echo 'hist:key=pid:filenamevar=filename:ts0=common_timestamp.usecs' > events/sched/sched_process_exec/trigger +echo 'hist:key=pid:lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_process_exec).ping_test_latency($lat,$filenamevar) if comm == "ping"' > events/sched/sched_process_exit/trigger +echo 'hist:keys=filename,lat:sort=filename,lat' > events/synthetic/ping_test_latency/trigger + +ping $LOCALHOST -c 5 + +if ! grep -q "ping" events/synthetic/ping_test_latency/hist; then + fail "Failed to create dynamic string trace action inter-event histogram" +fi + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc new file mode 100644 index 000000000000..ada594fe16cb --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc @@ -0,0 +1,19 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: event trigger - test synthetic_events syntax parser errors +# requires: synthetic_events error_log + +check_error() { # command-with-error-pos-by-^ + ftrace_errlog_check 'synthetic_events' "$1" 'synthetic_events' +} + +check_error 'myevent ^chr arg' # INVALID_TYPE +check_error 'myevent ^char str[];; int v' # INVALID_TYPE +check_error 'myevent char ^str]; int v' # INVALID_NAME +check_error 'myevent char ^str;[]' # INVALID_NAME +check_error 'myevent ^char str[; int v' # INVALID_TYPE +check_error '^mye;vent char str[]' # BAD_NAME +check_error 'myevent char str[]; ^int' # INVALID_FIELD +check_error '^myevent' # INCOMPLETE_CMD + +exit 0 diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index f19804df244c..edce85420d19 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -126,7 +126,7 @@ snprintf(_metadata->results->reason, \ sizeof(_metadata->results->reason), fmt, ##__VA_ARGS__); \ if (TH_LOG_ENABLED) { \ - fprintf(TH_LOG_STREAM, "# SKIP %s\n", \ + fprintf(TH_LOG_STREAM, "# SKIP %s\n", \ _metadata->results->reason); \ } \ _metadata->passed = 1; \ @@ -432,7 +432,7 @@ */ /** - * ASSERT_EQ(expected, seen) + * ASSERT_EQ() * * @expected: expected value * @seen: measured value @@ -443,7 +443,7 @@ __EXPECT(expected, #expected, seen, #seen, ==, 1) /** - * ASSERT_NE(expected, seen) + * ASSERT_NE() * * @expected: expected value * @seen: measured value @@ -454,7 +454,7 @@ __EXPECT(expected, #expected, seen, #seen, !=, 1) /** - * ASSERT_LT(expected, seen) + * ASSERT_LT() * * @expected: expected value * @seen: measured value @@ -465,7 +465,7 @@ __EXPECT(expected, #expected, seen, #seen, <, 1) /** - * ASSERT_LE(expected, seen) + * ASSERT_LE() * * @expected: expected value * @seen: measured value @@ -476,7 +476,7 @@ __EXPECT(expected, #expected, seen, #seen, <=, 1) /** - * ASSERT_GT(expected, seen) + * ASSERT_GT() * * @expected: expected value * @seen: measured value @@ -487,7 +487,7 @@ __EXPECT(expected, #expected, seen, #seen, >, 1) /** - * ASSERT_GE(expected, seen) + * ASSERT_GE() * * @expected: expected value * @seen: measured value @@ -498,7 +498,7 @@ __EXPECT(expected, #expected, seen, #seen, >=, 1) /** - * ASSERT_NULL(seen) + * ASSERT_NULL() * * @seen: measured value * @@ -508,7 +508,7 @@ __EXPECT(NULL, "NULL", seen, #seen, ==, 1) /** - * ASSERT_TRUE(seen) + * ASSERT_TRUE() * * @seen: measured value * @@ -518,7 +518,7 @@ __EXPECT(0, "0", seen, #seen, !=, 1) /** - * ASSERT_FALSE(seen) + * ASSERT_FALSE() * * @seen: measured value * @@ -528,7 +528,7 @@ __EXPECT(0, "0", seen, #seen, ==, 1) /** - * ASSERT_STREQ(expected, seen) + * ASSERT_STREQ() * * @expected: expected value * @seen: measured value @@ -539,7 +539,7 @@ __EXPECT_STR(expected, seen, ==, 1) /** - * ASSERT_STRNE(expected, seen) + * ASSERT_STRNE() * * @expected: expected value * @seen: measured value @@ -550,7 +550,7 @@ __EXPECT_STR(expected, seen, !=, 1) /** - * EXPECT_EQ(expected, seen) + * EXPECT_EQ() * * @expected: expected value * @seen: measured value @@ -561,7 +561,7 @@ __EXPECT(expected, #expected, seen, #seen, ==, 0) /** - * EXPECT_NE(expected, seen) + * EXPECT_NE() * * @expected: expected value * @seen: measured value @@ -572,7 +572,7 @@ __EXPECT(expected, #expected, seen, #seen, !=, 0) /** - * EXPECT_LT(expected, seen) + * EXPECT_LT() * * @expected: expected value * @seen: measured value @@ -583,7 +583,7 @@ __EXPECT(expected, #expected, seen, #seen, <, 0) /** - * EXPECT_LE(expected, seen) + * EXPECT_LE() * * @expected: expected value * @seen: measured value @@ -594,7 +594,7 @@ __EXPECT(expected, #expected, seen, #seen, <=, 0) /** - * EXPECT_GT(expected, seen) + * EXPECT_GT() * * @expected: expected value * @seen: measured value @@ -605,7 +605,7 @@ __EXPECT(expected, #expected, seen, #seen, >, 0) /** - * EXPECT_GE(expected, seen) + * EXPECT_GE() * * @expected: expected value * @seen: measured value @@ -616,7 +616,7 @@ __EXPECT(expected, #expected, seen, #seen, >=, 0) /** - * EXPECT_NULL(seen) + * EXPECT_NULL() * * @seen: measured value * @@ -626,7 +626,7 @@ __EXPECT(NULL, "NULL", seen, #seen, ==, 0) /** - * EXPECT_TRUE(seen) + * EXPECT_TRUE() * * @seen: measured value * @@ -636,7 +636,7 @@ __EXPECT(0, "0", seen, #seen, !=, 0) /** - * EXPECT_FALSE(seen) + * EXPECT_FALSE() * * @seen: measured value * @@ -646,7 +646,7 @@ __EXPECT(0, "0", seen, #seen, ==, 0) /** - * EXPECT_STREQ(expected, seen) + * EXPECT_STREQ() * * @expected: expected value * @seen: measured value @@ -657,7 +657,7 @@ __EXPECT_STR(expected, seen, ==, 0) /** - * EXPECT_STRNE(expected, seen) + * EXPECT_STRNE() * * @expected: expected value * @seen: measured value diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 452787152748..7a2c242b7152 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -1,19 +1,24 @@ # SPDX-License-Identifier: GPL-2.0-only +/aarch64/get-reg-list +/aarch64/get-reg-list-sve /s390x/memop /s390x/resets /s390x/sync_regs_test /x86_64/cr4_cpuid_sync_test /x86_64/debug_regs /x86_64/evmcs_test +/x86_64/kvm_pv_test /x86_64/hyperv_cpuid /x86_64/mmio_warning_test /x86_64/platform_info_test /x86_64/set_sregs_test /x86_64/smm_test /x86_64/state_test +/x86_64/user_msr_test /x86_64/vmx_preemption_timer_test /x86_64/svm_vmcall_test /x86_64/sync_regs_test +/x86_64/vmx_apic_access_test /x86_64/vmx_close_while_nested_test /x86_64/vmx_dirty_log_test /x86_64/vmx_set_nested_state_test @@ -22,6 +27,7 @@ /clear_dirty_log_test /demand_paging_test /dirty_log_test +/dirty_log_perf_test /kvm_create_max_vcpus /set_memory_region_test /steal_time diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 4a166588d99f..3d14ef77755e 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -34,13 +34,14 @@ ifeq ($(ARCH),s390) endif LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c -LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c +LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid +TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test @@ -49,20 +50,24 @@ TEST_GEN_PROGS_x86_64 += x86_64/state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test +TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test TEST_GEN_PROGS_x86_64 += x86_64/debug_regs -TEST_GEN_PROGS_x86_64 += clear_dirty_log_test +TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test +TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test +TEST_GEN_PROGS_x86_64 += dirty_log_perf_test TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus TEST_GEN_PROGS_x86_64 += set_memory_region_test TEST_GEN_PROGS_x86_64 += steal_time -TEST_GEN_PROGS_aarch64 += clear_dirty_log_test +TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list +TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve TEST_GEN_PROGS_aarch64 += demand_paging_test TEST_GEN_PROGS_aarch64 += dirty_log_test TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus @@ -108,14 +113,21 @@ LDFLAGS += -pthread $(no-pie-option) $(pgste-option) include ../lib.mk STATIC_LIBS := $(OUTPUT)/libkvm.a -LIBKVM_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM)) -EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS) cscope.* +LIBKVM_C := $(filter %.c,$(LIBKVM)) +LIBKVM_S := $(filter %.S,$(LIBKVM)) +LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C)) +LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S)) +EXTRA_CLEAN += $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(STATIC_LIBS) cscope.* + +x := $(shell mkdir -p $(sort $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)))) +$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ -x := $(shell mkdir -p $(sort $(dir $(LIBKVM_OBJ)))) -$(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c +$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ -$(OUTPUT)/libkvm.a: $(LIBKVM_OBJ) +LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) +$(OUTPUT)/libkvm.a: $(LIBKVM_OBJS) $(AR) crs $@ $^ x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS)))) diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c b/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c new file mode 100644 index 000000000000..efba76682b4b --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c @@ -0,0 +1,3 @@ +// SPDX-License-Identifier: GPL-2.0 +#define REG_LIST_SVE +#include "get-reg-list.c" diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c new file mode 100644 index 000000000000..33218a395d9f --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -0,0 +1,841 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Check for KVM_GET_REG_LIST regressions. + * + * Copyright (C) 2020, Red Hat, Inc. + * + * When attempting to migrate from a host with an older kernel to a host + * with a newer kernel we allow the newer kernel on the destination to + * list new registers with get-reg-list. We assume they'll be unused, at + * least until the guest reboots, and so they're relatively harmless. + * However, if the destination host with the newer kernel is missing + * registers which the source host with the older kernel has, then that's + * a regression in get-reg-list. This test checks for that regression by + * checking the current list against a blessed list. We should never have + * missing registers, but if new ones appear then they can probably be + * added to the blessed list. A completely new blessed list can be created + * by running the test with the --list command line argument. + * + * Note, the blessed list should be created from the oldest possible + * kernel. We can't go older than v4.15, though, because that's the first + * release to expose the ID system registers in KVM_GET_REG_LIST, see + * commit 93390c0a1b20 ("arm64: KVM: Hide unsupported AArch64 CPU features + * from guests"). Also, one must use the --core-reg-fixup command line + * option when running on an older kernel that doesn't include df205b5c6328 + * ("KVM: arm64: Filter out invalid core register IDs in KVM_GET_REG_LIST") + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "kvm_util.h" +#include "test_util.h" +#include "processor.h" + +#ifdef REG_LIST_SVE +#define reg_list_sve() (true) +#else +#define reg_list_sve() (false) +#endif + +#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK) + +#define for_each_reg(i) \ + for ((i) = 0; (i) < reg_list->n; ++(i)) + +#define for_each_missing_reg(i) \ + for ((i) = 0; (i) < blessed_n; ++(i)) \ + if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i])) + +#define for_each_new_reg(i) \ + for ((i) = 0; (i) < reg_list->n; ++(i)) \ + if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i])) + + +static struct kvm_reg_list *reg_list; + +static __u64 base_regs[], vregs[], sve_regs[], rejects_set[]; +static __u64 base_regs_n, vregs_n, sve_regs_n, rejects_set_n; +static __u64 *blessed_reg, blessed_n; + +static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg) +{ + int i; + + for (i = 0; i < nr_regs; ++i) + if (reg == regs[i]) + return true; + return false; +} + +static const char *str_with_index(const char *template, __u64 index) +{ + char *str, *p; + int n; + + str = strdup(template); + p = strstr(str, "##"); + n = sprintf(p, "%lld", index); + strcat(p + n, strstr(template, "##") + 2); + + return (const char *)str; +} + +#define CORE_REGS_XX_NR_WORDS 2 +#define CORE_SPSR_XX_NR_WORDS 2 +#define CORE_FPREGS_XX_NR_WORDS 4 + +static const char *core_id_to_str(__u64 id) +{ + __u64 core_off = id & ~REG_MASK, idx; + + /* + * core_off is the offset into struct kvm_regs + */ + switch (core_off) { + case KVM_REG_ARM_CORE_REG(regs.regs[0]) ... + KVM_REG_ARM_CORE_REG(regs.regs[30]): + idx = (core_off - KVM_REG_ARM_CORE_REG(regs.regs[0])) / CORE_REGS_XX_NR_WORDS; + TEST_ASSERT(idx < 31, "Unexpected regs.regs index: %lld", idx); + return str_with_index("KVM_REG_ARM_CORE_REG(regs.regs[##])", idx); + case KVM_REG_ARM_CORE_REG(regs.sp): + return "KVM_REG_ARM_CORE_REG(regs.sp)"; + case KVM_REG_ARM_CORE_REG(regs.pc): + return "KVM_REG_ARM_CORE_REG(regs.pc)"; + case KVM_REG_ARM_CORE_REG(regs.pstate): + return "KVM_REG_ARM_CORE_REG(regs.pstate)"; + case KVM_REG_ARM_CORE_REG(sp_el1): + return "KVM_REG_ARM_CORE_REG(sp_el1)"; + case KVM_REG_ARM_CORE_REG(elr_el1): + return "KVM_REG_ARM_CORE_REG(elr_el1)"; + case KVM_REG_ARM_CORE_REG(spsr[0]) ... + KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]): + idx = (core_off - KVM_REG_ARM_CORE_REG(spsr[0])) / CORE_SPSR_XX_NR_WORDS; + TEST_ASSERT(idx < KVM_NR_SPSR, "Unexpected spsr index: %lld", idx); + return str_with_index("KVM_REG_ARM_CORE_REG(spsr[##])", idx); + case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... + KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): + idx = (core_off - KVM_REG_ARM_CORE_REG(fp_regs.vregs[0])) / CORE_FPREGS_XX_NR_WORDS; + TEST_ASSERT(idx < 32, "Unexpected fp_regs.vregs index: %lld", idx); + return str_with_index("KVM_REG_ARM_CORE_REG(fp_regs.vregs[##])", idx); + case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): + return "KVM_REG_ARM_CORE_REG(fp_regs.fpsr)"; + case KVM_REG_ARM_CORE_REG(fp_regs.fpcr): + return "KVM_REG_ARM_CORE_REG(fp_regs.fpcr)"; + } + + TEST_FAIL("Unknown core reg id: 0x%llx", id); + return NULL; +} + +static const char *sve_id_to_str(__u64 id) +{ + __u64 sve_off, n, i; + + if (id == KVM_REG_ARM64_SVE_VLS) + return "KVM_REG_ARM64_SVE_VLS"; + + sve_off = id & ~(REG_MASK | ((1ULL << 5) - 1)); + i = id & (KVM_ARM64_SVE_MAX_SLICES - 1); + + TEST_ASSERT(i == 0, "Currently we don't expect slice > 0, reg id 0x%llx", id); + + switch (sve_off) { + case KVM_REG_ARM64_SVE_ZREG_BASE ... + KVM_REG_ARM64_SVE_ZREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_ZREGS - 1: + n = (id >> 5) & (KVM_ARM64_SVE_NUM_ZREGS - 1); + TEST_ASSERT(id == KVM_REG_ARM64_SVE_ZREG(n, 0), + "Unexpected bits set in SVE ZREG id: 0x%llx", id); + return str_with_index("KVM_REG_ARM64_SVE_ZREG(##, 0)", n); + case KVM_REG_ARM64_SVE_PREG_BASE ... + KVM_REG_ARM64_SVE_PREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_PREGS - 1: + n = (id >> 5) & (KVM_ARM64_SVE_NUM_PREGS - 1); + TEST_ASSERT(id == KVM_REG_ARM64_SVE_PREG(n, 0), + "Unexpected bits set in SVE PREG id: 0x%llx", id); + return str_with_index("KVM_REG_ARM64_SVE_PREG(##, 0)", n); + case KVM_REG_ARM64_SVE_FFR_BASE: + TEST_ASSERT(id == KVM_REG_ARM64_SVE_FFR(0), + "Unexpected bits set in SVE FFR id: 0x%llx", id); + return "KVM_REG_ARM64_SVE_FFR(0)"; + } + + return NULL; +} + +static void print_reg(__u64 id) +{ + unsigned op0, op1, crn, crm, op2; + const char *reg_size = NULL; + + TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_ARM64, + "KVM_REG_ARM64 missing in reg id: 0x%llx", id); + + switch (id & KVM_REG_SIZE_MASK) { + case KVM_REG_SIZE_U8: + reg_size = "KVM_REG_SIZE_U8"; + break; + case KVM_REG_SIZE_U16: + reg_size = "KVM_REG_SIZE_U16"; + break; + case KVM_REG_SIZE_U32: + reg_size = "KVM_REG_SIZE_U32"; + break; + case KVM_REG_SIZE_U64: + reg_size = "KVM_REG_SIZE_U64"; + break; + case KVM_REG_SIZE_U128: + reg_size = "KVM_REG_SIZE_U128"; + break; + case KVM_REG_SIZE_U256: + reg_size = "KVM_REG_SIZE_U256"; + break; + case KVM_REG_SIZE_U512: + reg_size = "KVM_REG_SIZE_U512"; + break; + case KVM_REG_SIZE_U1024: + reg_size = "KVM_REG_SIZE_U1024"; + break; + case KVM_REG_SIZE_U2048: + reg_size = "KVM_REG_SIZE_U2048"; + break; + default: + TEST_FAIL("Unexpected reg size: 0x%llx in reg id: 0x%llx", + (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id); + } + + switch (id & KVM_REG_ARM_COPROC_MASK) { + case KVM_REG_ARM_CORE: + printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(id)); + break; + case KVM_REG_ARM_DEMUX: + TEST_ASSERT(!(id & ~(REG_MASK | KVM_REG_ARM_DEMUX_ID_MASK | KVM_REG_ARM_DEMUX_VAL_MASK)), + "Unexpected bits set in DEMUX reg id: 0x%llx", id); + printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | %lld,\n", + reg_size, id & KVM_REG_ARM_DEMUX_VAL_MASK); + break; + case KVM_REG_ARM64_SYSREG: + op0 = (id & KVM_REG_ARM64_SYSREG_OP0_MASK) >> KVM_REG_ARM64_SYSREG_OP0_SHIFT; + op1 = (id & KVM_REG_ARM64_SYSREG_OP1_MASK) >> KVM_REG_ARM64_SYSREG_OP1_SHIFT; + crn = (id & KVM_REG_ARM64_SYSREG_CRN_MASK) >> KVM_REG_ARM64_SYSREG_CRN_SHIFT; + crm = (id & KVM_REG_ARM64_SYSREG_CRM_MASK) >> KVM_REG_ARM64_SYSREG_CRM_SHIFT; + op2 = (id & KVM_REG_ARM64_SYSREG_OP2_MASK) >> KVM_REG_ARM64_SYSREG_OP2_SHIFT; + TEST_ASSERT(id == ARM64_SYS_REG(op0, op1, crn, crm, op2), + "Unexpected bits set in SYSREG reg id: 0x%llx", id); + printf("\tARM64_SYS_REG(%d, %d, %d, %d, %d),\n", op0, op1, crn, crm, op2); + break; + case KVM_REG_ARM_FW: + TEST_ASSERT(id == KVM_REG_ARM_FW_REG(id & 0xffff), + "Unexpected bits set in FW reg id: 0x%llx", id); + printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff); + break; + case KVM_REG_ARM64_SVE: + if (reg_list_sve()) + printf("\t%s,\n", sve_id_to_str(id)); + else + TEST_FAIL("KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", id); + break; + default: + TEST_FAIL("Unexpected coproc type: 0x%llx in reg id: 0x%llx", + (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id); + } +} + +/* + * Older kernels listed each 32-bit word of CORE registers separately. + * For 64 and 128-bit registers we need to ignore the extra words. We + * also need to fixup the sizes, because the older kernels stated all + * registers were 64-bit, even when they weren't. + */ +static void core_reg_fixup(void) +{ + struct kvm_reg_list *tmp; + __u64 id, core_off; + int i; + + tmp = calloc(1, sizeof(*tmp) + reg_list->n * sizeof(__u64)); + + for (i = 0; i < reg_list->n; ++i) { + id = reg_list->reg[i]; + + if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM_CORE) { + tmp->reg[tmp->n++] = id; + continue; + } + + core_off = id & ~REG_MASK; + + switch (core_off) { + case 0x52: case 0xd2: case 0xd6: + /* + * These offsets are pointing at padding. + * We need to ignore them too. + */ + continue; + case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... + KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): + if (core_off & 3) + continue; + id &= ~KVM_REG_SIZE_MASK; + id |= KVM_REG_SIZE_U128; + tmp->reg[tmp->n++] = id; + continue; + case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): + case KVM_REG_ARM_CORE_REG(fp_regs.fpcr): + id &= ~KVM_REG_SIZE_MASK; + id |= KVM_REG_SIZE_U32; + tmp->reg[tmp->n++] = id; + continue; + default: + if (core_off & 1) + continue; + tmp->reg[tmp->n++] = id; + break; + } + } + + free(reg_list); + reg_list = tmp; +} + +static void prepare_vcpu_init(struct kvm_vcpu_init *init) +{ + if (reg_list_sve()) + init->features[0] |= 1 << KVM_ARM_VCPU_SVE; +} + +static void finalize_vcpu(struct kvm_vm *vm, uint32_t vcpuid) +{ + int feature; + + if (reg_list_sve()) { + feature = KVM_ARM_VCPU_SVE; + vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_FINALIZE, &feature); + } +} + +static void check_supported(void) +{ + if (reg_list_sve() && !kvm_check_cap(KVM_CAP_ARM_SVE)) { + fprintf(stderr, "SVE not available, skipping tests\n"); + exit(KSFT_SKIP); + } +} + +int main(int ac, char **av) +{ + struct kvm_vcpu_init init = { .target = -1, }; + int new_regs = 0, missing_regs = 0, i; + int failed_get = 0, failed_set = 0, failed_reject = 0; + bool print_list = false, fixup_core_regs = false; + struct kvm_vm *vm; + __u64 *vec_regs; + + check_supported(); + + for (i = 1; i < ac; ++i) { + if (strcmp(av[i], "--core-reg-fixup") == 0) + fixup_core_regs = true; + else if (strcmp(av[i], "--list") == 0) + print_list = true; + else + fprintf(stderr, "Ignoring unknown option: %s\n", av[i]); + } + + vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); + prepare_vcpu_init(&init); + aarch64_vcpu_add_default(vm, 0, &init, NULL); + finalize_vcpu(vm, 0); + + reg_list = vcpu_get_reg_list(vm, 0); + + if (fixup_core_regs) + core_reg_fixup(); + + if (print_list) { + putchar('\n'); + for_each_reg(i) + print_reg(reg_list->reg[i]); + putchar('\n'); + return 0; + } + + /* + * We only test that we can get the register and then write back the + * same value. Some registers may allow other values to be written + * back, but others only allow some bits to be changed, and at least + * for ID registers set will fail if the value does not exactly match + * what was returned by get. If registers that allow other values to + * be written need to have the other values tested, then we should + * create a new set of tests for those in a new independent test + * executable. + */ + for_each_reg(i) { + uint8_t addr[2048 / 8]; + struct kvm_one_reg reg = { + .id = reg_list->reg[i], + .addr = (__u64)&addr, + }; + int ret; + + ret = _vcpu_ioctl(vm, 0, KVM_GET_ONE_REG, ®); + if (ret) { + puts("Failed to get "); + print_reg(reg.id); + putchar('\n'); + ++failed_get; + } + + /* rejects_set registers are rejected after KVM_ARM_VCPU_FINALIZE */ + if (find_reg(rejects_set, rejects_set_n, reg.id)) { + ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, ®); + if (ret != -1 || errno != EPERM) { + printf("Failed to reject (ret=%d, errno=%d) ", ret, errno); + print_reg(reg.id); + putchar('\n'); + ++failed_reject; + } + continue; + } + + ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, ®); + if (ret) { + puts("Failed to set "); + print_reg(reg.id); + putchar('\n'); + ++failed_set; + } + } + + if (reg_list_sve()) { + blessed_n = base_regs_n + sve_regs_n; + vec_regs = sve_regs; + } else { + blessed_n = base_regs_n + vregs_n; + vec_regs = vregs; + } + + blessed_reg = calloc(blessed_n, sizeof(__u64)); + for (i = 0; i < base_regs_n; ++i) + blessed_reg[i] = base_regs[i]; + for (i = 0; i < blessed_n - base_regs_n; ++i) + blessed_reg[base_regs_n + i] = vec_regs[i]; + + for_each_new_reg(i) + ++new_regs; + + for_each_missing_reg(i) + ++missing_regs; + + if (new_regs || missing_regs) { + printf("Number blessed registers: %5lld\n", blessed_n); + printf("Number registers: %5lld\n", reg_list->n); + } + + if (new_regs) { + printf("\nThere are %d new registers.\n" + "Consider adding them to the blessed reg " + "list with the following lines:\n\n", new_regs); + for_each_new_reg(i) + print_reg(reg_list->reg[i]); + putchar('\n'); + } + + if (missing_regs) { + printf("\nThere are %d missing registers.\n" + "The following lines are missing registers:\n\n", missing_regs); + for_each_missing_reg(i) + print_reg(blessed_reg[i]); + putchar('\n'); + } + + TEST_ASSERT(!missing_regs && !failed_get && !failed_set && !failed_reject, + "There are %d missing registers; " + "%d registers failed get; %d registers failed set; %d registers failed reject", + missing_regs, failed_get, failed_set, failed_reject); + + return 0; +} + +/* + * The current blessed list was primed with the output of kernel version + * v4.15 with --core-reg-fixup and then later updated with new registers. + */ +static __u64 base_regs[] = { + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[1]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[2]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[3]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[4]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[5]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[6]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[7]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[8]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[9]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[10]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[11]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[12]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[13]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[14]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[15]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[16]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[17]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[18]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[19]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[20]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[21]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[22]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[23]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[24]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[25]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[26]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[27]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[28]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[29]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[30]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.sp), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pc), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pstate), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(sp_el1), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(elr_el1), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[0]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[1]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[2]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[3]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[4]), + KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpsr), + KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpcr), + KVM_REG_ARM_FW_REG(0), + KVM_REG_ARM_FW_REG(1), + KVM_REG_ARM_FW_REG(2), + ARM64_SYS_REG(3, 3, 14, 3, 1), /* CNTV_CTL_EL0 */ + ARM64_SYS_REG(3, 3, 14, 3, 2), /* CNTV_CVAL_EL0 */ + ARM64_SYS_REG(3, 3, 14, 0, 2), + ARM64_SYS_REG(3, 0, 0, 0, 0), /* MIDR_EL1 */ + ARM64_SYS_REG(3, 0, 0, 0, 6), /* REVIDR_EL1 */ + ARM64_SYS_REG(3, 1, 0, 0, 1), /* CLIDR_EL1 */ + ARM64_SYS_REG(3, 1, 0, 0, 7), /* AIDR_EL1 */ + ARM64_SYS_REG(3, 3, 0, 0, 1), /* CTR_EL0 */ + ARM64_SYS_REG(2, 0, 0, 0, 4), + ARM64_SYS_REG(2, 0, 0, 0, 5), + ARM64_SYS_REG(2, 0, 0, 0, 6), + ARM64_SYS_REG(2, 0, 0, 0, 7), + ARM64_SYS_REG(2, 0, 0, 1, 4), + ARM64_SYS_REG(2, 0, 0, 1, 5), + ARM64_SYS_REG(2, 0, 0, 1, 6), + ARM64_SYS_REG(2, 0, 0, 1, 7), + ARM64_SYS_REG(2, 0, 0, 2, 0), /* MDCCINT_EL1 */ + ARM64_SYS_REG(2, 0, 0, 2, 2), /* MDSCR_EL1 */ + ARM64_SYS_REG(2, 0, 0, 2, 4), + ARM64_SYS_REG(2, 0, 0, 2, 5), + ARM64_SYS_REG(2, 0, 0, 2, 6), + ARM64_SYS_REG(2, 0, 0, 2, 7), + ARM64_SYS_REG(2, 0, 0, 3, 4), + ARM64_SYS_REG(2, 0, 0, 3, 5), + ARM64_SYS_REG(2, 0, 0, 3, 6), + ARM64_SYS_REG(2, 0, 0, 3, 7), + ARM64_SYS_REG(2, 0, 0, 4, 4), + ARM64_SYS_REG(2, 0, 0, 4, 5), + ARM64_SYS_REG(2, 0, 0, 4, 6), + ARM64_SYS_REG(2, 0, 0, 4, 7), + ARM64_SYS_REG(2, 0, 0, 5, 4), + ARM64_SYS_REG(2, 0, 0, 5, 5), + ARM64_SYS_REG(2, 0, 0, 5, 6), + ARM64_SYS_REG(2, 0, 0, 5, 7), + ARM64_SYS_REG(2, 0, 0, 6, 4), + ARM64_SYS_REG(2, 0, 0, 6, 5), + ARM64_SYS_REG(2, 0, 0, 6, 6), + ARM64_SYS_REG(2, 0, 0, 6, 7), + ARM64_SYS_REG(2, 0, 0, 7, 4), + ARM64_SYS_REG(2, 0, 0, 7, 5), + ARM64_SYS_REG(2, 0, 0, 7, 6), + ARM64_SYS_REG(2, 0, 0, 7, 7), + ARM64_SYS_REG(2, 0, 0, 8, 4), + ARM64_SYS_REG(2, 0, 0, 8, 5), + ARM64_SYS_REG(2, 0, 0, 8, 6), + ARM64_SYS_REG(2, 0, 0, 8, 7), + ARM64_SYS_REG(2, 0, 0, 9, 4), + ARM64_SYS_REG(2, 0, 0, 9, 5), + ARM64_SYS_REG(2, 0, 0, 9, 6), + ARM64_SYS_REG(2, 0, 0, 9, 7), + ARM64_SYS_REG(2, 0, 0, 10, 4), + ARM64_SYS_REG(2, 0, 0, 10, 5), + ARM64_SYS_REG(2, 0, 0, 10, 6), + ARM64_SYS_REG(2, 0, 0, 10, 7), + ARM64_SYS_REG(2, 0, 0, 11, 4), + ARM64_SYS_REG(2, 0, 0, 11, 5), + ARM64_SYS_REG(2, 0, 0, 11, 6), + ARM64_SYS_REG(2, 0, 0, 11, 7), + ARM64_SYS_REG(2, 0, 0, 12, 4), + ARM64_SYS_REG(2, 0, 0, 12, 5), + ARM64_SYS_REG(2, 0, 0, 12, 6), + ARM64_SYS_REG(2, 0, 0, 12, 7), + ARM64_SYS_REG(2, 0, 0, 13, 4), + ARM64_SYS_REG(2, 0, 0, 13, 5), + ARM64_SYS_REG(2, 0, 0, 13, 6), + ARM64_SYS_REG(2, 0, 0, 13, 7), + ARM64_SYS_REG(2, 0, 0, 14, 4), + ARM64_SYS_REG(2, 0, 0, 14, 5), + ARM64_SYS_REG(2, 0, 0, 14, 6), + ARM64_SYS_REG(2, 0, 0, 14, 7), + ARM64_SYS_REG(2, 0, 0, 15, 4), + ARM64_SYS_REG(2, 0, 0, 15, 5), + ARM64_SYS_REG(2, 0, 0, 15, 6), + ARM64_SYS_REG(2, 0, 0, 15, 7), + ARM64_SYS_REG(2, 4, 0, 7, 0), /* DBGVCR32_EL2 */ + ARM64_SYS_REG(3, 0, 0, 0, 5), /* MPIDR_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 0), /* ID_PFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 1), /* ID_PFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 2), /* ID_DFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 3), /* ID_AFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 4), /* ID_MMFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 5), /* ID_MMFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 6), /* ID_MMFR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 7), /* ID_MMFR3_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 0), /* ID_ISAR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 1), /* ID_ISAR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 2), /* ID_ISAR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 3), /* ID_ISAR3_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 4), /* ID_ISAR4_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 5), /* ID_ISAR5_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 6), /* ID_MMFR4_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 7), /* ID_ISAR6_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 0), /* MVFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 1), /* MVFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 2), /* MVFR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 3), + ARM64_SYS_REG(3, 0, 0, 3, 4), /* ID_PFR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 5), /* ID_DFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 6), /* ID_MMFR5_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 7), + ARM64_SYS_REG(3, 0, 0, 4, 0), /* ID_AA64PFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 4, 1), /* ID_AA64PFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 4, 2), + ARM64_SYS_REG(3, 0, 0, 4, 3), + ARM64_SYS_REG(3, 0, 0, 4, 4), /* ID_AA64ZFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 4, 5), + ARM64_SYS_REG(3, 0, 0, 4, 6), + ARM64_SYS_REG(3, 0, 0, 4, 7), + ARM64_SYS_REG(3, 0, 0, 5, 0), /* ID_AA64DFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 5, 1), /* ID_AA64DFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 5, 2), + ARM64_SYS_REG(3, 0, 0, 5, 3), + ARM64_SYS_REG(3, 0, 0, 5, 4), /* ID_AA64AFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 5, 5), /* ID_AA64AFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 5, 6), + ARM64_SYS_REG(3, 0, 0, 5, 7), + ARM64_SYS_REG(3, 0, 0, 6, 0), /* ID_AA64ISAR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 6, 1), /* ID_AA64ISAR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 6, 2), + ARM64_SYS_REG(3, 0, 0, 6, 3), + ARM64_SYS_REG(3, 0, 0, 6, 4), + ARM64_SYS_REG(3, 0, 0, 6, 5), + ARM64_SYS_REG(3, 0, 0, 6, 6), + ARM64_SYS_REG(3, 0, 0, 6, 7), + ARM64_SYS_REG(3, 0, 0, 7, 0), /* ID_AA64MMFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 1), /* ID_AA64MMFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 2), /* ID_AA64MMFR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 3), + ARM64_SYS_REG(3, 0, 0, 7, 4), + ARM64_SYS_REG(3, 0, 0, 7, 5), + ARM64_SYS_REG(3, 0, 0, 7, 6), + ARM64_SYS_REG(3, 0, 0, 7, 7), + ARM64_SYS_REG(3, 0, 1, 0, 0), /* SCTLR_EL1 */ + ARM64_SYS_REG(3, 0, 1, 0, 1), /* ACTLR_EL1 */ + ARM64_SYS_REG(3, 0, 1, 0, 2), /* CPACR_EL1 */ + ARM64_SYS_REG(3, 0, 2, 0, 0), /* TTBR0_EL1 */ + ARM64_SYS_REG(3, 0, 2, 0, 1), /* TTBR1_EL1 */ + ARM64_SYS_REG(3, 0, 2, 0, 2), /* TCR_EL1 */ + ARM64_SYS_REG(3, 0, 5, 1, 0), /* AFSR0_EL1 */ + ARM64_SYS_REG(3, 0, 5, 1, 1), /* AFSR1_EL1 */ + ARM64_SYS_REG(3, 0, 5, 2, 0), /* ESR_EL1 */ + ARM64_SYS_REG(3, 0, 6, 0, 0), /* FAR_EL1 */ + ARM64_SYS_REG(3, 0, 7, 4, 0), /* PAR_EL1 */ + ARM64_SYS_REG(3, 0, 9, 14, 1), /* PMINTENSET_EL1 */ + ARM64_SYS_REG(3, 0, 9, 14, 2), /* PMINTENCLR_EL1 */ + ARM64_SYS_REG(3, 0, 10, 2, 0), /* MAIR_EL1 */ + ARM64_SYS_REG(3, 0, 10, 3, 0), /* AMAIR_EL1 */ + ARM64_SYS_REG(3, 0, 12, 0, 0), /* VBAR_EL1 */ + ARM64_SYS_REG(3, 0, 12, 1, 1), /* DISR_EL1 */ + ARM64_SYS_REG(3, 0, 13, 0, 1), /* CONTEXTIDR_EL1 */ + ARM64_SYS_REG(3, 0, 13, 0, 4), /* TPIDR_EL1 */ + ARM64_SYS_REG(3, 0, 14, 1, 0), /* CNTKCTL_EL1 */ + ARM64_SYS_REG(3, 2, 0, 0, 0), /* CSSELR_EL1 */ + ARM64_SYS_REG(3, 3, 9, 12, 0), /* PMCR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 1), /* PMCNTENSET_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 2), /* PMCNTENCLR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 3), /* PMOVSCLR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 4), /* PMSWINC_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 5), /* PMSELR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 13, 0), /* PMCCNTR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 14, 0), /* PMUSERENR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 14, 3), /* PMOVSSET_EL0 */ + ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */ + ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */ + ARM64_SYS_REG(3, 3, 14, 8, 0), + ARM64_SYS_REG(3, 3, 14, 8, 1), + ARM64_SYS_REG(3, 3, 14, 8, 2), + ARM64_SYS_REG(3, 3, 14, 8, 3), + ARM64_SYS_REG(3, 3, 14, 8, 4), + ARM64_SYS_REG(3, 3, 14, 8, 5), + ARM64_SYS_REG(3, 3, 14, 8, 6), + ARM64_SYS_REG(3, 3, 14, 8, 7), + ARM64_SYS_REG(3, 3, 14, 9, 0), + ARM64_SYS_REG(3, 3, 14, 9, 1), + ARM64_SYS_REG(3, 3, 14, 9, 2), + ARM64_SYS_REG(3, 3, 14, 9, 3), + ARM64_SYS_REG(3, 3, 14, 9, 4), + ARM64_SYS_REG(3, 3, 14, 9, 5), + ARM64_SYS_REG(3, 3, 14, 9, 6), + ARM64_SYS_REG(3, 3, 14, 9, 7), + ARM64_SYS_REG(3, 3, 14, 10, 0), + ARM64_SYS_REG(3, 3, 14, 10, 1), + ARM64_SYS_REG(3, 3, 14, 10, 2), + ARM64_SYS_REG(3, 3, 14, 10, 3), + ARM64_SYS_REG(3, 3, 14, 10, 4), + ARM64_SYS_REG(3, 3, 14, 10, 5), + ARM64_SYS_REG(3, 3, 14, 10, 6), + ARM64_SYS_REG(3, 3, 14, 10, 7), + ARM64_SYS_REG(3, 3, 14, 11, 0), + ARM64_SYS_REG(3, 3, 14, 11, 1), + ARM64_SYS_REG(3, 3, 14, 11, 2), + ARM64_SYS_REG(3, 3, 14, 11, 3), + ARM64_SYS_REG(3, 3, 14, 11, 4), + ARM64_SYS_REG(3, 3, 14, 11, 5), + ARM64_SYS_REG(3, 3, 14, 11, 6), + ARM64_SYS_REG(3, 3, 14, 12, 0), + ARM64_SYS_REG(3, 3, 14, 12, 1), + ARM64_SYS_REG(3, 3, 14, 12, 2), + ARM64_SYS_REG(3, 3, 14, 12, 3), + ARM64_SYS_REG(3, 3, 14, 12, 4), + ARM64_SYS_REG(3, 3, 14, 12, 5), + ARM64_SYS_REG(3, 3, 14, 12, 6), + ARM64_SYS_REG(3, 3, 14, 12, 7), + ARM64_SYS_REG(3, 3, 14, 13, 0), + ARM64_SYS_REG(3, 3, 14, 13, 1), + ARM64_SYS_REG(3, 3, 14, 13, 2), + ARM64_SYS_REG(3, 3, 14, 13, 3), + ARM64_SYS_REG(3, 3, 14, 13, 4), + ARM64_SYS_REG(3, 3, 14, 13, 5), + ARM64_SYS_REG(3, 3, 14, 13, 6), + ARM64_SYS_REG(3, 3, 14, 13, 7), + ARM64_SYS_REG(3, 3, 14, 14, 0), + ARM64_SYS_REG(3, 3, 14, 14, 1), + ARM64_SYS_REG(3, 3, 14, 14, 2), + ARM64_SYS_REG(3, 3, 14, 14, 3), + ARM64_SYS_REG(3, 3, 14, 14, 4), + ARM64_SYS_REG(3, 3, 14, 14, 5), + ARM64_SYS_REG(3, 3, 14, 14, 6), + ARM64_SYS_REG(3, 3, 14, 14, 7), + ARM64_SYS_REG(3, 3, 14, 15, 0), + ARM64_SYS_REG(3, 3, 14, 15, 1), + ARM64_SYS_REG(3, 3, 14, 15, 2), + ARM64_SYS_REG(3, 3, 14, 15, 3), + ARM64_SYS_REG(3, 3, 14, 15, 4), + ARM64_SYS_REG(3, 3, 14, 15, 5), + ARM64_SYS_REG(3, 3, 14, 15, 6), + ARM64_SYS_REG(3, 3, 14, 15, 7), /* PMCCFILTR_EL0 */ + ARM64_SYS_REG(3, 4, 3, 0, 0), /* DACR32_EL2 */ + ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */ + ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */ + KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 0, + KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 1, + KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 2, +}; +static __u64 base_regs_n = ARRAY_SIZE(base_regs); + +static __u64 vregs[] = { + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[1]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[2]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[3]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[4]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[5]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[6]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[7]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[8]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[9]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[10]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[11]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[12]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[13]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[14]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[15]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[16]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[17]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[18]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[19]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[20]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[21]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[22]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[23]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[24]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[25]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[26]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[27]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[28]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[29]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]), +}; +static __u64 vregs_n = ARRAY_SIZE(vregs); + +static __u64 sve_regs[] = { + KVM_REG_ARM64_SVE_VLS, + KVM_REG_ARM64_SVE_ZREG(0, 0), + KVM_REG_ARM64_SVE_ZREG(1, 0), + KVM_REG_ARM64_SVE_ZREG(2, 0), + KVM_REG_ARM64_SVE_ZREG(3, 0), + KVM_REG_ARM64_SVE_ZREG(4, 0), + KVM_REG_ARM64_SVE_ZREG(5, 0), + KVM_REG_ARM64_SVE_ZREG(6, 0), + KVM_REG_ARM64_SVE_ZREG(7, 0), + KVM_REG_ARM64_SVE_ZREG(8, 0), + KVM_REG_ARM64_SVE_ZREG(9, 0), + KVM_REG_ARM64_SVE_ZREG(10, 0), + KVM_REG_ARM64_SVE_ZREG(11, 0), + KVM_REG_ARM64_SVE_ZREG(12, 0), + KVM_REG_ARM64_SVE_ZREG(13, 0), + KVM_REG_ARM64_SVE_ZREG(14, 0), + KVM_REG_ARM64_SVE_ZREG(15, 0), + KVM_REG_ARM64_SVE_ZREG(16, 0), + KVM_REG_ARM64_SVE_ZREG(17, 0), + KVM_REG_ARM64_SVE_ZREG(18, 0), + KVM_REG_ARM64_SVE_ZREG(19, 0), + KVM_REG_ARM64_SVE_ZREG(20, 0), + KVM_REG_ARM64_SVE_ZREG(21, 0), + KVM_REG_ARM64_SVE_ZREG(22, 0), + KVM_REG_ARM64_SVE_ZREG(23, 0), + KVM_REG_ARM64_SVE_ZREG(24, 0), + KVM_REG_ARM64_SVE_ZREG(25, 0), + KVM_REG_ARM64_SVE_ZREG(26, 0), + KVM_REG_ARM64_SVE_ZREG(27, 0), + KVM_REG_ARM64_SVE_ZREG(28, 0), + KVM_REG_ARM64_SVE_ZREG(29, 0), + KVM_REG_ARM64_SVE_ZREG(30, 0), + KVM_REG_ARM64_SVE_ZREG(31, 0), + KVM_REG_ARM64_SVE_PREG(0, 0), + KVM_REG_ARM64_SVE_PREG(1, 0), + KVM_REG_ARM64_SVE_PREG(2, 0), + KVM_REG_ARM64_SVE_PREG(3, 0), + KVM_REG_ARM64_SVE_PREG(4, 0), + KVM_REG_ARM64_SVE_PREG(5, 0), + KVM_REG_ARM64_SVE_PREG(6, 0), + KVM_REG_ARM64_SVE_PREG(7, 0), + KVM_REG_ARM64_SVE_PREG(8, 0), + KVM_REG_ARM64_SVE_PREG(9, 0), + KVM_REG_ARM64_SVE_PREG(10, 0), + KVM_REG_ARM64_SVE_PREG(11, 0), + KVM_REG_ARM64_SVE_PREG(12, 0), + KVM_REG_ARM64_SVE_PREG(13, 0), + KVM_REG_ARM64_SVE_PREG(14, 0), + KVM_REG_ARM64_SVE_PREG(15, 0), + KVM_REG_ARM64_SVE_FFR(0), + ARM64_SYS_REG(3, 0, 1, 2, 0), /* ZCR_EL1 */ +}; +static __u64 sve_regs_n = ARRAY_SIZE(sve_regs); + +static __u64 rejects_set[] = { +#ifdef REG_LIST_SVE + KVM_REG_ARM64_SVE_VLS, +#endif +}; +static __u64 rejects_set_n = ARRAY_SIZE(rejects_set); diff --git a/tools/testing/selftests/kvm/clear_dirty_log_test.c b/tools/testing/selftests/kvm/clear_dirty_log_test.c deleted file mode 100644 index 11672ec6f74e..000000000000 --- a/tools/testing/selftests/kvm/clear_dirty_log_test.c +++ /dev/null @@ -1,6 +0,0 @@ -#define USE_CLEAR_DIRTY_LOG -#define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0) -#define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1) -#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ - KVM_DIRTY_LOG_INITIALLY_SET) -#include "dirty_log_test.c" diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 360cd3ea4cd6..3d96a7bfaff3 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -21,20 +21,12 @@ #include <linux/bitops.h> #include <linux/userfaultfd.h> -#include "test_util.h" -#include "kvm_util.h" +#include "perf_test_util.h" #include "processor.h" +#include "test_util.h" #ifdef __NR_userfaultfd -/* The memory slot index demand page */ -#define TEST_MEM_SLOT_INDEX 1 - -/* Default guest test virtual memory offset */ -#define DEFAULT_GUEST_TEST_MEM 0xc0000000 - -#define DEFAULT_GUEST_TEST_MEM_SIZE (1 << 30) /* 1G */ - #ifdef PRINT_PER_PAGE_UPDATES #define PER_PAGE_DEBUG(...) printf(__VA_ARGS__) #else @@ -47,77 +39,17 @@ #define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__) #endif -#define MAX_VCPUS 512 - -/* - * Guest/Host shared variables. Ensure addr_gva2hva() and/or - * sync_global_to/from_guest() are used when accessing from - * the host. READ/WRITE_ONCE() should also be used with anything - * that may change. - */ -static uint64_t host_page_size; -static uint64_t guest_page_size; - static char *guest_data_prototype; -/* - * Guest physical memory offset of the testing memory slot. - * This will be set to the topmost valid physical address minus - * the test memory size. - */ -static uint64_t guest_test_phys_mem; - -/* - * Guest virtual memory offset of the testing memory slot. - * Must not conflict with identity mapped test code. - */ -static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; - -struct vcpu_args { - uint64_t gva; - uint64_t pages; - - /* Only used by the host userspace part of the vCPU thread */ - int vcpu_id; - struct kvm_vm *vm; -}; - -static struct vcpu_args vcpu_args[MAX_VCPUS]; - -/* - * Continuously write to the first 8 bytes of each page in the demand paging - * memory region. - */ -static void guest_code(uint32_t vcpu_id) -{ - uint64_t gva; - uint64_t pages; - int i; - - /* Make sure vCPU args data structure is not corrupt. */ - GUEST_ASSERT(vcpu_args[vcpu_id].vcpu_id == vcpu_id); - - gva = vcpu_args[vcpu_id].gva; - pages = vcpu_args[vcpu_id].pages; - - for (i = 0; i < pages; i++) { - uint64_t addr = gva + (i * guest_page_size); - - addr &= ~(host_page_size - 1); - *(uint64_t *)addr = 0x0123456789ABCDEF; - } - - GUEST_SYNC(1); -} - static void *vcpu_worker(void *data) { int ret; - struct vcpu_args *args = (struct vcpu_args *)data; - struct kvm_vm *vm = args->vm; - int vcpu_id = args->vcpu_id; + struct vcpu_args *vcpu_args = (struct vcpu_args *)data; + int vcpu_id = vcpu_args->vcpu_id; + struct kvm_vm *vm = perf_test_args.vm; struct kvm_run *run; - struct timespec start, end, ts_diff; + struct timespec start; + struct timespec ts_diff; vcpu_args_set(vm, vcpu_id, 1, vcpu_id); run = vcpu_state(vm, vcpu_id); @@ -133,52 +65,18 @@ static void *vcpu_worker(void *data) exit_reason_str(run->exit_reason)); } - clock_gettime(CLOCK_MONOTONIC, &end); - ts_diff = timespec_sub(end, start); + ts_diff = timespec_diff_now(start); PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_id, ts_diff.tv_sec, ts_diff.tv_nsec); return NULL; } -#define PAGE_SHIFT_4K 12 -#define PTES_PER_4K_PT 512 - -static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus, - uint64_t vcpu_memory_bytes) -{ - struct kvm_vm *vm; - uint64_t pages = DEFAULT_GUEST_PHY_PAGES; - - /* Account for a few pages per-vCPU for stacks */ - pages += DEFAULT_STACK_PGS * vcpus; - - /* - * Reserve twice the ammount of memory needed to map the test region and - * the page table / stacks region, at 4k, for page tables. Do the - * calculation with 4K page size: the smallest of all archs. (e.g., 64K - * page size guest will need even less memory for page tables). - */ - pages += (2 * pages) / PTES_PER_4K_PT; - pages += ((2 * vcpus * vcpu_memory_bytes) >> PAGE_SHIFT_4K) / - PTES_PER_4K_PT; - pages = vm_adjust_num_guest_pages(mode, pages); - - pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - - vm = _vm_create(mode, pages, O_RDWR); - kvm_vm_elf_load(vm, program_invocation_name, 0, 0); -#ifdef __x86_64__ - vm_create_irqchip(vm); -#endif - return vm; -} - static int handle_uffd_page_request(int uffd, uint64_t addr) { pid_t tid; struct timespec start; - struct timespec end; + struct timespec ts_diff; struct uffdio_copy copy; int r; @@ -186,7 +84,7 @@ static int handle_uffd_page_request(int uffd, uint64_t addr) copy.src = (uint64_t)guest_data_prototype; copy.dst = addr; - copy.len = host_page_size; + copy.len = perf_test_args.host_page_size; copy.mode = 0; clock_gettime(CLOCK_MONOTONIC, &start); @@ -198,12 +96,12 @@ static int handle_uffd_page_request(int uffd, uint64_t addr) return r; } - clock_gettime(CLOCK_MONOTONIC, &end); + ts_diff = timespec_diff_now(start); PER_PAGE_DEBUG("UFFDIO_COPY %d \t%ld ns\n", tid, - timespec_to_ns(timespec_sub(end, start))); + timespec_to_ns(ts_diff)); PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n", - host_page_size, addr, tid); + perf_test_args.host_page_size, addr, tid); return 0; } @@ -223,7 +121,8 @@ static void *uffd_handler_thread_fn(void *arg) int pipefd = uffd_args->pipefd; useconds_t delay = uffd_args->delay; int64_t pages = 0; - struct timespec start, end, ts_diff; + struct timespec start; + struct timespec ts_diff; clock_gettime(CLOCK_MONOTONIC, &start); while (!quit_uffd_thread) { @@ -292,8 +191,7 @@ static void *uffd_handler_thread_fn(void *arg) pages++; } - clock_gettime(CLOCK_MONOTONIC, &end); - ts_diff = timespec_sub(end, start); + ts_diff = timespec_diff_now(start); PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n", pages, ts_diff.tv_sec, ts_diff.tv_nsec, pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); @@ -351,99 +249,54 @@ static int setup_demand_paging(struct kvm_vm *vm, } static void run_test(enum vm_guest_mode mode, bool use_uffd, - useconds_t uffd_delay, int vcpus, - uint64_t vcpu_memory_bytes) + useconds_t uffd_delay) { pthread_t *vcpu_threads; pthread_t *uffd_handler_threads = NULL; struct uffd_handler_args *uffd_args = NULL; - struct timespec start, end, ts_diff; + struct timespec start; + struct timespec ts_diff; int *pipefds = NULL; struct kvm_vm *vm; - uint64_t guest_num_pages; int vcpu_id; int r; - vm = create_vm(mode, vcpus, vcpu_memory_bytes); - - guest_page_size = vm_get_page_size(vm); - - TEST_ASSERT(vcpu_memory_bytes % guest_page_size == 0, - "Guest memory size is not guest page size aligned."); - - guest_num_pages = (vcpus * vcpu_memory_bytes) / guest_page_size; - guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); - - /* - * If there should be more memory in the guest test region than there - * can be pages in the guest, it will definitely cause problems. - */ - TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm), - "Requested more guest memory than address space allows.\n" - " guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n", - guest_num_pages, vm_get_max_gfn(vm), vcpus, - vcpu_memory_bytes); - - host_page_size = getpagesize(); - TEST_ASSERT(vcpu_memory_bytes % host_page_size == 0, - "Guest memory size is not host page size aligned."); + vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size); - guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * - guest_page_size; - guest_test_phys_mem &= ~(host_page_size - 1); + perf_test_args.wr_fract = 1; -#ifdef __s390x__ - /* Align to 1M (segment size) */ - guest_test_phys_mem &= ~((1 << 20) - 1); -#endif - - pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); - - /* Add an extra memory slot for testing demand paging */ - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - guest_test_phys_mem, - TEST_MEM_SLOT_INDEX, - guest_num_pages, 0); - - /* Do mapping for the demand paging memory slot */ - virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0); - - ucall_init(vm, NULL); - - guest_data_prototype = malloc(host_page_size); + guest_data_prototype = malloc(perf_test_args.host_page_size); TEST_ASSERT(guest_data_prototype, "Failed to allocate buffer for guest data pattern"); - memset(guest_data_prototype, 0xAB, host_page_size); + memset(guest_data_prototype, 0xAB, perf_test_args.host_page_size); - vcpu_threads = malloc(vcpus * sizeof(*vcpu_threads)); + vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); TEST_ASSERT(vcpu_threads, "Memory allocation failed"); + add_vcpus(vm, nr_vcpus, guest_percpu_mem_size); + if (use_uffd) { uffd_handler_threads = - malloc(vcpus * sizeof(*uffd_handler_threads)); + malloc(nr_vcpus * sizeof(*uffd_handler_threads)); TEST_ASSERT(uffd_handler_threads, "Memory allocation failed"); - uffd_args = malloc(vcpus * sizeof(*uffd_args)); + uffd_args = malloc(nr_vcpus * sizeof(*uffd_args)); TEST_ASSERT(uffd_args, "Memory allocation failed"); - pipefds = malloc(sizeof(int) * vcpus * 2); + pipefds = malloc(sizeof(int) * nr_vcpus * 2); TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd"); - } - - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { - vm_paddr_t vcpu_gpa; - void *vcpu_hva; - vm_vcpu_add_default(vm, vcpu_id, guest_code); + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { + vm_paddr_t vcpu_gpa; + void *vcpu_hva; - vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes); - PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n", - vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes); + vcpu_gpa = guest_test_phys_mem + (vcpu_id * guest_percpu_mem_size); + PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n", + vcpu_id, vcpu_gpa, vcpu_gpa + guest_percpu_mem_size); - /* Cache the HVA pointer of the region */ - vcpu_hva = addr_gpa2hva(vm, vcpu_gpa); + /* Cache the HVA pointer of the region */ + vcpu_hva = addr_gpa2hva(vm, vcpu_gpa); - if (use_uffd) { /* * Set up user fault fd to handle demand paging * requests. @@ -456,53 +309,41 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, &uffd_handler_threads[vcpu_id], pipefds[vcpu_id * 2], uffd_delay, &uffd_args[vcpu_id], - vcpu_hva, vcpu_memory_bytes); + vcpu_hva, guest_percpu_mem_size); if (r < 0) exit(-r); } - -#ifdef __x86_64__ - vcpu_set_cpuid(vm, vcpu_id, kvm_get_supported_cpuid()); -#endif - - vcpu_args[vcpu_id].vm = vm; - vcpu_args[vcpu_id].vcpu_id = vcpu_id; - vcpu_args[vcpu_id].gva = guest_test_virt_mem + - (vcpu_id * vcpu_memory_bytes); - vcpu_args[vcpu_id].pages = vcpu_memory_bytes / guest_page_size; } /* Export the shared variables to the guest */ - sync_global_to_guest(vm, host_page_size); - sync_global_to_guest(vm, guest_page_size); - sync_global_to_guest(vm, vcpu_args); + sync_global_to_guest(vm, perf_test_args); pr_info("Finished creating vCPUs and starting uffd threads\n"); clock_gettime(CLOCK_MONOTONIC, &start); - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, - &vcpu_args[vcpu_id]); + &perf_test_args.vcpu_args[vcpu_id]); } pr_info("Started all vCPUs\n"); /* Wait for the vcpu threads to quit */ - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { pthread_join(vcpu_threads[vcpu_id], NULL); PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id); } - pr_info("All vCPU threads joined\n"); + ts_diff = timespec_diff_now(start); - clock_gettime(CLOCK_MONOTONIC, &end); + pr_info("All vCPU threads joined\n"); if (use_uffd) { char c; /* Tell the user fault fd handler threads to quit */ - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { r = write(pipefds[vcpu_id * 2 + 1], &c, 1); TEST_ASSERT(r == 1, "Unable to write to pipefd"); @@ -510,11 +351,11 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, } } - ts_diff = timespec_sub(end, start); pr_info("Total guest execution time: %ld.%.9lds\n", ts_diff.tv_sec, ts_diff.tv_nsec); pr_info("Overall demand paging rate: %f pgs/sec\n", - guest_num_pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); + perf_test_args.vcpu_args[0].pages * nr_vcpus / + ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); ucall_uninit(vm); kvm_vm_free(vm); @@ -568,9 +409,8 @@ static void help(char *name) int main(int argc, char *argv[]) { + int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); bool mode_selected = false; - uint64_t vcpu_memory_bytes = DEFAULT_GUEST_TEST_MEM_SIZE; - int vcpus = 1; unsigned int mode; int opt, i; bool use_uffd = false; @@ -619,15 +459,12 @@ int main(int argc, char *argv[]) "A negative UFFD delay is not supported."); break; case 'b': - vcpu_memory_bytes = parse_size(optarg); + guest_percpu_mem_size = parse_size(optarg); break; case 'v': - vcpus = atoi(optarg); - TEST_ASSERT(vcpus > 0, - "Must have a positive number of vCPUs"); - TEST_ASSERT(vcpus <= MAX_VCPUS, - "This test does not currently support\n" - "more than %d vCPUs.", MAX_VCPUS); + nr_vcpus = atoi(optarg); + TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, + "Invalid number of vcpus, must be between 1 and %d", max_vcpus); break; case 'h': default: @@ -642,7 +479,7 @@ int main(int argc, char *argv[]) TEST_ASSERT(guest_modes[i].supported, "Guest mode ID %d (%s) not supported.", i, vm_guest_mode_string(i)); - run_test(i, use_uffd, uffd_delay, vcpus, vcpu_memory_bytes); + run_test(i, use_uffd, uffd_delay); } return 0; diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c new file mode 100644 index 000000000000..85c9b8f73142 --- /dev/null +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -0,0 +1,376 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KVM dirty page logging performance test + * + * Based on dirty_log_test.c + * + * Copyright (C) 2018, Red Hat, Inc. + * Copyright (C) 2020, Google, Inc. + */ + +#define _GNU_SOURCE /* for program_invocation_name */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <time.h> +#include <pthread.h> +#include <linux/bitmap.h> +#include <linux/bitops.h> + +#include "kvm_util.h" +#include "perf_test_util.h" +#include "processor.h" +#include "test_util.h" + +/* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/ +#define TEST_HOST_LOOP_N 2UL + +/* Host variables */ +static bool host_quit; +static uint64_t iteration; +static uint64_t vcpu_last_completed_iteration[MAX_VCPUS]; + +static void *vcpu_worker(void *data) +{ + int ret; + struct kvm_vm *vm = perf_test_args.vm; + uint64_t pages_count = 0; + struct kvm_run *run; + struct timespec start; + struct timespec ts_diff; + struct timespec total = (struct timespec){0}; + struct timespec avg; + struct vcpu_args *vcpu_args = (struct vcpu_args *)data; + int vcpu_id = vcpu_args->vcpu_id; + + vcpu_args_set(vm, vcpu_id, 1, vcpu_id); + run = vcpu_state(vm, vcpu_id); + + while (!READ_ONCE(host_quit)) { + uint64_t current_iteration = READ_ONCE(iteration); + + clock_gettime(CLOCK_MONOTONIC, &start); + ret = _vcpu_run(vm, vcpu_id); + ts_diff = timespec_diff_now(start); + + TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); + TEST_ASSERT(get_ucall(vm, vcpu_id, NULL) == UCALL_SYNC, + "Invalid guest sync status: exit_reason=%s\n", + exit_reason_str(run->exit_reason)); + + pr_debug("Got sync event from vCPU %d\n", vcpu_id); + vcpu_last_completed_iteration[vcpu_id] = current_iteration; + pr_debug("vCPU %d updated last completed iteration to %lu\n", + vcpu_id, vcpu_last_completed_iteration[vcpu_id]); + + if (current_iteration) { + pages_count += vcpu_args->pages; + total = timespec_add(total, ts_diff); + pr_debug("vCPU %d iteration %lu dirty memory time: %ld.%.9lds\n", + vcpu_id, current_iteration, ts_diff.tv_sec, + ts_diff.tv_nsec); + } else { + pr_debug("vCPU %d iteration %lu populate memory time: %ld.%.9lds\n", + vcpu_id, current_iteration, ts_diff.tv_sec, + ts_diff.tv_nsec); + } + + while (current_iteration == READ_ONCE(iteration) && + !READ_ONCE(host_quit)) {} + } + + avg = timespec_div(total, vcpu_last_completed_iteration[vcpu_id]); + pr_debug("\nvCPU %d dirtied 0x%lx pages over %lu iterations in %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", + vcpu_id, pages_count, vcpu_last_completed_iteration[vcpu_id], + total.tv_sec, total.tv_nsec, avg.tv_sec, avg.tv_nsec); + + return NULL; +} + +#ifdef USE_CLEAR_DIRTY_LOG +static u64 dirty_log_manual_caps; +#endif + +static void run_test(enum vm_guest_mode mode, unsigned long iterations, + uint64_t phys_offset, int wr_fract) +{ + pthread_t *vcpu_threads; + struct kvm_vm *vm; + unsigned long *bmap; + uint64_t guest_num_pages; + uint64_t host_num_pages; + int vcpu_id; + struct timespec start; + struct timespec ts_diff; + struct timespec get_dirty_log_total = (struct timespec){0}; + struct timespec vcpu_dirty_total = (struct timespec){0}; + struct timespec avg; +#ifdef USE_CLEAR_DIRTY_LOG + struct kvm_enable_cap cap = {}; + struct timespec clear_dirty_log_total = (struct timespec){0}; +#endif + + vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size); + + perf_test_args.wr_fract = wr_fract; + + guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm); + guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); + host_num_pages = vm_num_host_pages(mode, guest_num_pages); + bmap = bitmap_alloc(host_num_pages); + +#ifdef USE_CLEAR_DIRTY_LOG + cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2; + cap.args[0] = dirty_log_manual_caps; + vm_enable_cap(vm, &cap); +#endif + + vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); + TEST_ASSERT(vcpu_threads, "Memory allocation failed"); + + add_vcpus(vm, nr_vcpus, guest_percpu_mem_size); + + sync_global_to_guest(vm, perf_test_args); + + /* Start the iterations */ + iteration = 0; + host_quit = false; + + clock_gettime(CLOCK_MONOTONIC, &start); + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { + pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, + &perf_test_args.vcpu_args[vcpu_id]); + } + + /* Allow the vCPU to populate memory */ + pr_debug("Starting iteration %lu - Populating\n", iteration); + while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != iteration) + pr_debug("Waiting for vcpu_last_completed_iteration == %lu\n", + iteration); + + ts_diff = timespec_diff_now(start); + pr_info("Populate memory time: %ld.%.9lds\n", + ts_diff.tv_sec, ts_diff.tv_nsec); + + /* Enable dirty logging */ + clock_gettime(CLOCK_MONOTONIC, &start); + vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, + KVM_MEM_LOG_DIRTY_PAGES); + ts_diff = timespec_diff_now(start); + pr_info("Enabling dirty logging time: %ld.%.9lds\n\n", + ts_diff.tv_sec, ts_diff.tv_nsec); + + while (iteration < iterations) { + /* + * Incrementing the iteration number will start the vCPUs + * dirtying memory again. + */ + clock_gettime(CLOCK_MONOTONIC, &start); + iteration++; + + pr_debug("Starting iteration %lu\n", iteration); + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { + while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != iteration) + pr_debug("Waiting for vCPU %d vcpu_last_completed_iteration == %lu\n", + vcpu_id, iteration); + } + + ts_diff = timespec_diff_now(start); + vcpu_dirty_total = timespec_add(vcpu_dirty_total, ts_diff); + pr_info("Iteration %lu dirty memory time: %ld.%.9lds\n", + iteration, ts_diff.tv_sec, ts_diff.tv_nsec); + + clock_gettime(CLOCK_MONOTONIC, &start); + kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); + + ts_diff = timespec_diff_now(start); + get_dirty_log_total = timespec_add(get_dirty_log_total, + ts_diff); + pr_info("Iteration %lu get dirty log time: %ld.%.9lds\n", + iteration, ts_diff.tv_sec, ts_diff.tv_nsec); + +#ifdef USE_CLEAR_DIRTY_LOG + clock_gettime(CLOCK_MONOTONIC, &start); + kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0, + host_num_pages); + + ts_diff = timespec_diff_now(start); + clear_dirty_log_total = timespec_add(clear_dirty_log_total, + ts_diff); + pr_info("Iteration %lu clear dirty log time: %ld.%.9lds\n", + iteration, ts_diff.tv_sec, ts_diff.tv_nsec); +#endif + } + + /* Tell the vcpu thread to quit */ + host_quit = true; + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) + pthread_join(vcpu_threads[vcpu_id], NULL); + + /* Disable dirty logging */ + clock_gettime(CLOCK_MONOTONIC, &start); + vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, 0); + ts_diff = timespec_diff_now(start); + pr_info("Disabling dirty logging time: %ld.%.9lds\n", + ts_diff.tv_sec, ts_diff.tv_nsec); + + avg = timespec_div(get_dirty_log_total, iterations); + pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", + iterations, get_dirty_log_total.tv_sec, + get_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec); + +#ifdef USE_CLEAR_DIRTY_LOG + avg = timespec_div(clear_dirty_log_total, iterations); + pr_info("Clear dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", + iterations, clear_dirty_log_total.tv_sec, + clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec); +#endif + + free(bmap); + free(vcpu_threads); + ucall_uninit(vm); + kvm_vm_free(vm); +} + +struct guest_mode { + bool supported; + bool enabled; +}; +static struct guest_mode guest_modes[NUM_VM_MODES]; + +#define guest_mode_init(mode, supported, enabled) ({ \ + guest_modes[mode] = (struct guest_mode){ supported, enabled }; \ +}) + +static void help(char *name) +{ + int i; + + puts(""); + printf("usage: %s [-h] [-i iterations] [-p offset] " + "[-m mode] [-b vcpu bytes] [-v vcpus]\n", name); + puts(""); + printf(" -i: specify iteration counts (default: %"PRIu64")\n", + TEST_HOST_LOOP_N); + printf(" -p: specify guest physical test memory offset\n" + " Warning: a low offset can conflict with the loaded test code.\n"); + printf(" -m: specify the guest mode ID to test " + "(default: test all supported modes)\n" + " This option may be used multiple times.\n" + " Guest mode IDs:\n"); + for (i = 0; i < NUM_VM_MODES; ++i) { + printf(" %d: %s%s\n", i, vm_guest_mode_string(i), + guest_modes[i].supported ? " (supported)" : ""); + } + printf(" -b: specify the size of the memory region which should be\n" + " dirtied by each vCPU. e.g. 10M or 3G.\n" + " (default: 1G)\n"); + printf(" -f: specify the fraction of pages which should be written to\n" + " as opposed to simply read, in the form\n" + " 1/<fraction of pages to write>.\n" + " (default: 1 i.e. all pages are written to.)\n"); + printf(" -v: specify the number of vCPUs to run.\n"); + puts(""); + exit(0); +} + +int main(int argc, char *argv[]) +{ + unsigned long iterations = TEST_HOST_LOOP_N; + bool mode_selected = false; + uint64_t phys_offset = 0; + unsigned int mode; + int opt, i; + int wr_fract = 1; + +#ifdef USE_CLEAR_DIRTY_LOG + dirty_log_manual_caps = + kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); + if (!dirty_log_manual_caps) { + print_skip("KVM_CLEAR_DIRTY_LOG not available"); + exit(KSFT_SKIP); + } + dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | + KVM_DIRTY_LOG_INITIALLY_SET); +#endif + +#ifdef __x86_64__ + guest_mode_init(VM_MODE_PXXV48_4K, true, true); +#endif +#ifdef __aarch64__ + guest_mode_init(VM_MODE_P40V48_4K, true, true); + guest_mode_init(VM_MODE_P40V48_64K, true, true); + + { + unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); + + if (limit >= 52) + guest_mode_init(VM_MODE_P52V48_64K, true, true); + if (limit >= 48) { + guest_mode_init(VM_MODE_P48V48_4K, true, true); + guest_mode_init(VM_MODE_P48V48_64K, true, true); + } + } +#endif +#ifdef __s390x__ + guest_mode_init(VM_MODE_P40V48_4K, true, true); +#endif + + while ((opt = getopt(argc, argv, "hi:p:m:b:f:v:")) != -1) { + switch (opt) { + case 'i': + iterations = strtol(optarg, NULL, 10); + break; + case 'p': + phys_offset = strtoull(optarg, NULL, 0); + break; + case 'm': + if (!mode_selected) { + for (i = 0; i < NUM_VM_MODES; ++i) + guest_modes[i].enabled = false; + mode_selected = true; + } + mode = strtoul(optarg, NULL, 10); + TEST_ASSERT(mode < NUM_VM_MODES, + "Guest mode ID %d too big", mode); + guest_modes[mode].enabled = true; + break; + case 'b': + guest_percpu_mem_size = parse_size(optarg); + break; + case 'f': + wr_fract = atoi(optarg); + TEST_ASSERT(wr_fract >= 1, + "Write fraction cannot be less than one"); + break; + case 'v': + nr_vcpus = atoi(optarg); + TEST_ASSERT(nr_vcpus > 0, + "Must have a positive number of vCPUs"); + TEST_ASSERT(nr_vcpus <= MAX_VCPUS, + "This test does not currently support\n" + "more than %d vCPUs.", MAX_VCPUS); + break; + case 'h': + default: + help(argv[0]); + break; + } + } + + TEST_ASSERT(iterations >= 2, "The test should have at least two iterations"); + + pr_info("Test iterations: %"PRIu64"\n", iterations); + + for (i = 0; i < NUM_VM_MODES; ++i) { + if (!guest_modes[i].enabled) + continue; + TEST_ASSERT(guest_modes[i].supported, + "Guest mode ID %d (%s) not supported.", + i, vm_guest_mode_string(i)); + run_test(i, iterations, phys_offset, wr_fract); + } + + return 0; +} diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 752ec158ac59..54da9cc20db4 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -128,6 +128,78 @@ static uint64_t host_dirty_count; static uint64_t host_clear_count; static uint64_t host_track_next_count; +enum log_mode_t { + /* Only use KVM_GET_DIRTY_LOG for logging */ + LOG_MODE_DIRTY_LOG = 0, + + /* Use both KVM_[GET|CLEAR]_DIRTY_LOG for logging */ + LOG_MODE_CLEAR_LOG = 1, + + LOG_MODE_NUM, + + /* Run all supported modes */ + LOG_MODE_ALL = LOG_MODE_NUM, +}; + +/* Mode of logging to test. Default is to run all supported modes */ +static enum log_mode_t host_log_mode_option = LOG_MODE_ALL; +/* Logging mode for current run */ +static enum log_mode_t host_log_mode; + +static bool clear_log_supported(void) +{ + return kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); +} + +static void clear_log_create_vm_done(struct kvm_vm *vm) +{ + struct kvm_enable_cap cap = {}; + u64 manual_caps; + + manual_caps = kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); + TEST_ASSERT(manual_caps, "MANUAL_CAPS is zero!"); + manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | + KVM_DIRTY_LOG_INITIALLY_SET); + cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2; + cap.args[0] = manual_caps; + vm_enable_cap(vm, &cap); +} + +static void dirty_log_collect_dirty_pages(struct kvm_vm *vm, int slot, + void *bitmap, uint32_t num_pages) +{ + kvm_vm_get_dirty_log(vm, slot, bitmap); +} + +static void clear_log_collect_dirty_pages(struct kvm_vm *vm, int slot, + void *bitmap, uint32_t num_pages) +{ + kvm_vm_get_dirty_log(vm, slot, bitmap); + kvm_vm_clear_dirty_log(vm, slot, bitmap, 0, num_pages); +} + +struct log_mode { + const char *name; + /* Return true if this mode is supported, otherwise false */ + bool (*supported)(void); + /* Hook when the vm creation is done (before vcpu creation) */ + void (*create_vm_done)(struct kvm_vm *vm); + /* Hook to collect the dirty pages into the bitmap provided */ + void (*collect_dirty_pages) (struct kvm_vm *vm, int slot, + void *bitmap, uint32_t num_pages); +} log_modes[LOG_MODE_NUM] = { + { + .name = "dirty-log", + .collect_dirty_pages = dirty_log_collect_dirty_pages, + }, + { + .name = "clear-log", + .supported = clear_log_supported, + .create_vm_done = clear_log_create_vm_done, + .collect_dirty_pages = clear_log_collect_dirty_pages, + }, +}; + /* * We use this bitmap to track some pages that should have its dirty * bit set in the _next_ iteration. For example, if we detected the @@ -137,6 +209,44 @@ static uint64_t host_track_next_count; */ static unsigned long *host_bmap_track; +static void log_modes_dump(void) +{ + int i; + + printf("all"); + for (i = 0; i < LOG_MODE_NUM; i++) + printf(", %s", log_modes[i].name); + printf("\n"); +} + +static bool log_mode_supported(void) +{ + struct log_mode *mode = &log_modes[host_log_mode]; + + if (mode->supported) + return mode->supported(); + + return true; +} + +static void log_mode_create_vm_done(struct kvm_vm *vm) +{ + struct log_mode *mode = &log_modes[host_log_mode]; + + if (mode->create_vm_done) + mode->create_vm_done(vm); +} + +static void log_mode_collect_dirty_pages(struct kvm_vm *vm, int slot, + void *bitmap, uint32_t num_pages) +{ + struct log_mode *mode = &log_modes[host_log_mode]; + + TEST_ASSERT(mode->collect_dirty_pages != NULL, + "collect_dirty_pages() is required for any log mode!"); + mode->collect_dirty_pages(vm, slot, bitmap, num_pages); +} + static void generate_random_array(uint64_t *guest_array, uint64_t size) { uint64_t i; @@ -195,7 +305,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) page); } - if (test_bit_le(page, bmap)) { + if (test_and_clear_bit_le(page, bmap)) { host_dirty_count++; /* * If the bit is set, the value written onto @@ -252,11 +362,12 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); + vm = vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); kvm_vm_elf_load(vm, program_invocation_name, 0, 0); #ifdef __x86_64__ vm_create_irqchip(vm); #endif + log_mode_create_vm_done(vm); vm_vcpu_add_default(vm, vcpuid, guest_code); return vm; } @@ -264,10 +375,6 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, #define DIRTY_MEM_BITS 30 /* 1G */ #define PAGE_SHIFT_4K 12 -#ifdef USE_CLEAR_DIRTY_LOG -static u64 dirty_log_manual_caps; -#endif - static void run_test(enum vm_guest_mode mode, unsigned long iterations, unsigned long interval, uint64_t phys_offset) { @@ -275,6 +382,12 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, struct kvm_vm *vm; unsigned long *bmap; + if (!log_mode_supported()) { + print_skip("Log mode '%s' not supported", + log_modes[host_log_mode].name); + return; + } + /* * We reserve page table for 2 times of extra dirty mem which * will definitely cover the original (1G+) test range. Here @@ -317,14 +430,6 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, bmap = bitmap_alloc(host_num_pages); host_bmap_track = bitmap_alloc(host_num_pages); -#ifdef USE_CLEAR_DIRTY_LOG - struct kvm_enable_cap cap = {}; - - cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2; - cap.args[0] = dirty_log_manual_caps; - vm_enable_cap(vm, &cap); -#endif - /* Add an extra memory slot for testing dirty logging */ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, guest_test_phys_mem, @@ -362,11 +467,8 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, while (iteration < iterations) { /* Give the vcpu thread some time to dirty some pages */ usleep(interval * 1000); - kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); -#ifdef USE_CLEAR_DIRTY_LOG - kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0, - host_num_pages); -#endif + log_mode_collect_dirty_pages(vm, TEST_MEM_SLOT_INDEX, + bmap, host_num_pages); vm_dirty_log_verify(mode, bmap); iteration++; sync_global_to_guest(vm, iteration); @@ -410,6 +512,9 @@ static void help(char *name) TEST_HOST_LOOP_INTERVAL); printf(" -p: specify guest physical test memory offset\n" " Warning: a low offset can conflict with the loaded test code.\n"); + printf(" -M: specify the host logging mode " + "(default: run all log modes). Supported modes: \n\t"); + log_modes_dump(); printf(" -m: specify the guest mode ID to test " "(default: test all supported modes)\n" " This option may be used multiple times.\n" @@ -429,18 +534,7 @@ int main(int argc, char *argv[]) bool mode_selected = false; uint64_t phys_offset = 0; unsigned int mode; - int opt, i; - -#ifdef USE_CLEAR_DIRTY_LOG - dirty_log_manual_caps = - kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); - if (!dirty_log_manual_caps) { - print_skip("KVM_CLEAR_DIRTY_LOG not available"); - exit(KSFT_SKIP); - } - dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | - KVM_DIRTY_LOG_INITIALLY_SET); -#endif + int opt, i, j; #ifdef __x86_64__ guest_mode_init(VM_MODE_PXXV48_4K, true, true); @@ -464,7 +558,7 @@ int main(int argc, char *argv[]) guest_mode_init(VM_MODE_P40V48_4K, true, true); #endif - while ((opt = getopt(argc, argv, "hi:I:p:m:")) != -1) { + while ((opt = getopt(argc, argv, "hi:I:p:m:M:")) != -1) { switch (opt) { case 'i': iterations = strtol(optarg, NULL, 10); @@ -486,6 +580,26 @@ int main(int argc, char *argv[]) "Guest mode ID %d too big", mode); guest_modes[mode].enabled = true; break; + case 'M': + if (!strcmp(optarg, "all")) { + host_log_mode_option = LOG_MODE_ALL; + break; + } + for (i = 0; i < LOG_MODE_NUM; i++) { + if (!strcmp(optarg, log_modes[i].name)) { + pr_info("Setting log mode to: '%s'\n", + optarg); + host_log_mode_option = i; + break; + } + } + if (i == LOG_MODE_NUM) { + printf("Log mode '%s' invalid. Please choose " + "from: ", optarg); + log_modes_dump(); + exit(1); + } + break; case 'h': default: help(argv[0]); @@ -507,7 +621,18 @@ int main(int argc, char *argv[]) TEST_ASSERT(guest_modes[i].supported, "Guest mode ID %d (%s) not supported.", i, vm_guest_mode_string(i)); - run_test(i, iterations, interval, phys_offset); + if (host_log_mode_option == LOG_MODE_ALL) { + /* Run each log mode */ + for (j = 0; j < LOG_MODE_NUM; j++) { + pr_info("Testing Log Mode '%s'\n", + log_modes[j].name); + host_log_mode = j; + run_test(i, iterations, interval, phys_offset); + } + } else { + host_log_mode = host_log_mode_option; + run_test(i, iterations, interval, phys_offset); + } } return 0; diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 919e161dd289..7d29aa786959 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -63,9 +63,11 @@ enum vm_mem_backing_src_type { int kvm_check_cap(long cap); int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap); +int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id, + struct kvm_enable_cap *cap); +void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size); struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); -struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); void kvm_vm_free(struct kvm_vm *vmp); void kvm_vm_restart(struct kvm_vm *vmp, int perm); void kvm_vm_release(struct kvm_vm *vmp); @@ -149,6 +151,7 @@ void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_guest_debug *debug); void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_mp_state *mp_state); +struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid); void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs); void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs); @@ -294,6 +297,8 @@ int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd); memcpy(&(g), _p, sizeof(g)); \ }) +void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid); + /* Common ucalls */ enum { UCALL_NONE, diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h new file mode 100644 index 000000000000..2618052057b1 --- /dev/null +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * tools/testing/selftests/kvm/include/perf_test_util.h + * + * Copyright (C) 2020, Google LLC. + */ + +#ifndef SELFTEST_KVM_PERF_TEST_UTIL_H +#define SELFTEST_KVM_PERF_TEST_UTIL_H + +#include "kvm_util.h" +#include "processor.h" + +#define MAX_VCPUS 512 + +#define PAGE_SHIFT_4K 12 +#define PTES_PER_4K_PT 512 + +#define TEST_MEM_SLOT_INDEX 1 + +/* Default guest test virtual memory offset */ +#define DEFAULT_GUEST_TEST_MEM 0xc0000000 + +#define DEFAULT_PER_VCPU_MEM_SIZE (1 << 30) /* 1G */ + +/* + * Guest physical memory offset of the testing memory slot. + * This will be set to the topmost valid physical address minus + * the test memory size. + */ +static uint64_t guest_test_phys_mem; + +/* + * Guest virtual memory offset of the testing memory slot. + * Must not conflict with identity mapped test code. + */ +static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; +static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; + +/* Number of VCPUs for the test */ +static int nr_vcpus = 1; + +struct vcpu_args { + uint64_t gva; + uint64_t pages; + + /* Only used by the host userspace part of the vCPU thread */ + int vcpu_id; +}; + +struct perf_test_args { + struct kvm_vm *vm; + uint64_t host_page_size; + uint64_t guest_page_size; + int wr_fract; + + struct vcpu_args vcpu_args[MAX_VCPUS]; +}; + +static struct perf_test_args perf_test_args; + +/* + * Continuously write to the first 8 bytes of each page in the + * specified region. + */ +static void guest_code(uint32_t vcpu_id) +{ + struct vcpu_args *vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; + uint64_t gva; + uint64_t pages; + int i; + + /* Make sure vCPU args data structure is not corrupt. */ + GUEST_ASSERT(vcpu_args->vcpu_id == vcpu_id); + + gva = vcpu_args->gva; + pages = vcpu_args->pages; + + while (true) { + for (i = 0; i < pages; i++) { + uint64_t addr = gva + (i * perf_test_args.guest_page_size); + + if (i % perf_test_args.wr_fract == 0) + *(uint64_t *)addr = 0x0123456789ABCDEF; + else + READ_ONCE(*(uint64_t *)addr); + } + + GUEST_SYNC(1); + } +} + +static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus, + uint64_t vcpu_memory_bytes) +{ + struct kvm_vm *vm; + uint64_t pages = DEFAULT_GUEST_PHY_PAGES; + uint64_t guest_num_pages; + + /* Account for a few pages per-vCPU for stacks */ + pages += DEFAULT_STACK_PGS * vcpus; + + /* + * Reserve twice the ammount of memory needed to map the test region and + * the page table / stacks region, at 4k, for page tables. Do the + * calculation with 4K page size: the smallest of all archs. (e.g., 64K + * page size guest will need even less memory for page tables). + */ + pages += (2 * pages) / PTES_PER_4K_PT; + pages += ((2 * vcpus * vcpu_memory_bytes) >> PAGE_SHIFT_4K) / + PTES_PER_4K_PT; + pages = vm_adjust_num_guest_pages(mode, pages); + + pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); + + vm = vm_create(mode, pages, O_RDWR); + kvm_vm_elf_load(vm, program_invocation_name, 0, 0); +#ifdef __x86_64__ + vm_create_irqchip(vm); +#endif + + perf_test_args.vm = vm; + perf_test_args.guest_page_size = vm_get_page_size(vm); + perf_test_args.host_page_size = getpagesize(); + + TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0, + "Guest memory size is not guest page size aligned."); + + guest_num_pages = (vcpus * vcpu_memory_bytes) / + perf_test_args.guest_page_size; + guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); + + /* + * If there should be more memory in the guest test region than there + * can be pages in the guest, it will definitely cause problems. + */ + TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm), + "Requested more guest memory than address space allows.\n" + " guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n", + guest_num_pages, vm_get_max_gfn(vm), vcpus, + vcpu_memory_bytes); + + TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0, + "Guest memory size is not host page size aligned."); + + guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * + perf_test_args.guest_page_size; + guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1); + +#ifdef __s390x__ + /* Align to 1M (segment size) */ + guest_test_phys_mem &= ~((1 << 20) - 1); +#endif + + pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); + + /* Add an extra memory slot for testing */ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + guest_test_phys_mem, + TEST_MEM_SLOT_INDEX, + guest_num_pages, 0); + + /* Do mapping for the demand paging memory slot */ + virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0); + + ucall_init(vm, NULL); + + return vm; +} + +static void add_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes) +{ + vm_paddr_t vcpu_gpa; + struct vcpu_args *vcpu_args; + int vcpu_id; + + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; + + vm_vcpu_add_default(vm, vcpu_id, guest_code); + +#ifdef __x86_64__ + vcpu_set_cpuid(vm, vcpu_id, kvm_get_supported_cpuid()); +#endif + + vcpu_args->vcpu_id = vcpu_id; + vcpu_args->gva = guest_test_virt_mem + + (vcpu_id * vcpu_memory_bytes); + vcpu_args->pages = vcpu_memory_bytes / + perf_test_args.guest_page_size; + + vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes); + pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n", + vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes); + } +} + +#endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 5eb01bf51b86..ffffa560436b 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -64,5 +64,7 @@ int64_t timespec_to_ns(struct timespec ts); struct timespec timespec_add_ns(struct timespec ts, int64_t ns); struct timespec timespec_add(struct timespec ts1, struct timespec ts2); struct timespec timespec_sub(struct timespec ts1, struct timespec ts2); +struct timespec timespec_diff_now(struct timespec start); +struct timespec timespec_div(struct timespec ts, int divisor); #endif /* SELFTEST_KVM_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 82b7fe16a824..8e61340b3911 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -36,6 +36,8 @@ #define X86_CR4_SMAP (1ul << 21) #define X86_CR4_PKE (1ul << 22) +#define UNEXPECTED_VECTOR_PORT 0xfff0u + /* General Registers in 64-Bit Mode */ struct gpr64_regs { u64 rax; @@ -59,7 +61,7 @@ struct gpr64_regs { struct desc64 { uint16_t limit0; uint16_t base0; - unsigned base1:8, s:1, type:4, dpl:2, p:1; + unsigned base1:8, type:4, s:1, dpl:2, p:1; unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8; uint32_t base3; uint32_t zero1; @@ -239,6 +241,11 @@ static inline struct desc_ptr get_idt(void) return idt; } +static inline void outl(uint16_t port, uint32_t value) +{ + __asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value)); +} + #define SET_XMM(__var, __xmm) \ asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm) @@ -338,6 +345,35 @@ uint32_t kvm_get_cpuid_max_basic(void); uint32_t kvm_get_cpuid_max_extended(void); void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits); +struct ex_regs { + uint64_t rax, rcx, rdx, rbx; + uint64_t rbp, rsi, rdi; + uint64_t r8, r9, r10, r11; + uint64_t r12, r13, r14, r15; + uint64_t vector; + uint64_t error_code; + uint64_t rip; + uint64_t cs; + uint64_t rflags; +}; + +void vm_init_descriptor_tables(struct kvm_vm *vm); +void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid); +void vm_handle_exception(struct kvm_vm *vm, int vector, + void (*handler)(struct ex_regs *)); + +/* + * set_cpuid() - overwrites a matching cpuid entry with the provided value. + * matches based on ent->function && ent->index. returns true + * if a match was found and successfully overwritten. + * @cpuid: the kvm cpuid list to modify. + * @ent: cpuid entry to insert + */ +bool set_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 *ent); + +uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, + uint64_t a3); + /* * Basic CPU control in CR0 */ diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h index 16fa21ebb99c..e78d7e26ba61 100644 --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h +++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h @@ -48,7 +48,7 @@ #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 #define SECONDARY_EXEC_ENABLE_EPT 0x00000002 #define SECONDARY_EXEC_DESC 0x00000004 -#define SECONDARY_EXEC_RDTSCP 0x00000008 +#define SECONDARY_EXEC_ENABLE_RDTSCP 0x00000008 #define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010 #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 @@ -573,6 +573,10 @@ struct vmx_pages { void *eptp_hva; uint64_t eptp_gpa; void *eptp; + + void *apic_access_hva; + uint64_t apic_access_gpa; + void *apic_access; }; union vmx_basic { @@ -615,5 +619,7 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, uint32_t memslot, uint32_t eptp_memslot); void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, uint32_t eptp_memslot); +void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t eptp_memslot); #endif /* SELFTEST_KVM_VMX_H */ diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 2afa6618b396..d6c32c328e9a 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -350,3 +350,7 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) va_end(ap); } + +void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) +{ +} diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c index c8e0ec20d3bf..2f37b90ee1a9 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c +++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c @@ -94,6 +94,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) struct kvm_run *run = vcpu_state(vm, vcpu_id); struct ucall ucall = {}; + if (uc) + memset(uc, 0, sizeof(*uc)); + if (run->exit_reason == KVM_EXIT_MMIO && run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) { vm_vaddr_t gva; diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 74776ee228f2..126c6727a6b0 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -14,6 +14,7 @@ #include <sys/mman.h> #include <sys/types.h> #include <sys/stat.h> +#include <unistd.h> #include <linux/kernel.h> #define KVM_UTIL_PGS_PER_HUGEPG 512 @@ -85,6 +86,34 @@ int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap) return ret; } +/* VCPU Enable Capability + * + * Input Args: + * vm - Virtual Machine + * vcpu_id - VCPU + * cap - Capability + * + * Output Args: None + * + * Return: On success, 0. On failure a TEST_ASSERT failure is produced. + * + * Enables a capability (KVM_CAP_*) on the VCPU. + */ +int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id, + struct kvm_enable_cap *cap) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpu_id); + int r; + + TEST_ASSERT(vcpu, "cannot find vcpu %d", vcpu_id); + + r = ioctl(vcpu->fd, KVM_ENABLE_CAP, cap); + TEST_ASSERT(!r, "KVM_ENABLE_CAP vCPU ioctl failed,\n" + " rc: %i, errno: %i", r, errno); + + return r; +} + static void vm_open(struct kvm_vm *vm, int perm) { vm->kvm_fd = open(KVM_DEV_PATH, perm); @@ -151,7 +180,7 @@ _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) * descriptor to control the created VM is created with the permissions * given by perm (e.g. O_RDWR). */ -struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) +struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) { struct kvm_vm *vm; @@ -242,11 +271,6 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) return vm; } -struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) -{ - return _vm_create(mode, phy_pages, perm); -} - /* * VM Restart * @@ -664,13 +688,21 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, /* As needed perform madvise */ if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) { - ret = madvise(region->host_mem, npages * vm->page_size, - src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); - TEST_ASSERT(ret == 0, "madvise failed,\n" - " addr: %p\n" - " length: 0x%lx\n" - " src_type: %x", - region->host_mem, npages * vm->page_size, src_type); + struct stat statbuf; + + ret = stat("/sys/kernel/mm/transparent_hugepage", &statbuf); + TEST_ASSERT(ret == 0 || (ret == -1 && errno == ENOENT), + "stat /sys/kernel/mm/transparent_hugepage"); + + TEST_ASSERT(ret == 0 || src_type != VM_MEM_SRC_ANONYMOUS_THP, + "VM_MEM_SRC_ANONYMOUS_THP requires THP to be configured in the host kernel"); + + if (ret == 0) { + ret = madvise(region->host_mem, npages * vm->page_size, + src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); + TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %x", + region->host_mem, npages * vm->page_size, src_type); + } } region->unused_phy_pages = sparsebit_alloc(); @@ -1195,6 +1227,9 @@ int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid) do { rc = ioctl(vcpu->fd, KVM_RUN, NULL); } while (rc == -1 && errno == EINTR); + + assert_on_unhandled_exception(vm, vcpuid); + return rc; } @@ -1252,6 +1287,35 @@ void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, } /* + * VM VCPU Get Reg List + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * + * Output Args: + * None + * + * Return: + * A pointer to an allocated struct kvm_reg_list + * + * Get the list of guest registers which are supported for + * KVM_GET_ONE_REG/KVM_SET_ONE_REG calls + */ +struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list; + int ret; + + ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, ®_list_n); + TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0"); + reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64)); + reg_list->n = reg_list_n.n; + vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, reg_list); + return reg_list; +} + +/* * VM VCPU Regs Get * * Input Args: diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h index 2ef446520748..f07d383d03a1 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h +++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h @@ -50,6 +50,8 @@ struct kvm_vm { vm_paddr_t pgd; vm_vaddr_t gdt; vm_vaddr_t tss; + vm_vaddr_t idt; + vm_vaddr_t handlers; }; struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid); diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c index a88c5d665725..7349bb2e1a24 100644 --- a/tools/testing/selftests/kvm/lib/s390x/processor.c +++ b/tools/testing/selftests/kvm/lib/s390x/processor.c @@ -241,3 +241,7 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n", indent, "", vcpu->state->psw_mask, vcpu->state->psw_addr); } + +void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) +{ +} diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c index fd589dc9bfab..9d3b0f15249a 100644 --- a/tools/testing/selftests/kvm/lib/s390x/ucall.c +++ b/tools/testing/selftests/kvm/lib/s390x/ucall.c @@ -38,6 +38,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) struct kvm_run *run = vcpu_state(vm, vcpu_id); struct ucall ucall = {}; + if (uc) + memset(uc, 0, sizeof(*uc)); + if (run->exit_reason == KVM_EXIT_S390_SIEIC && run->s390_sieic.icptcode == 4 && (run->s390_sieic.ipa >> 8) == 0x83 && /* 0x83 means DIAGNOSE */ diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 689e97c27ee2..8e04c0b1608e 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -4,10 +4,13 @@ * * Copyright (C) 2020, Google LLC. */ -#include <stdlib.h> + +#include <assert.h> #include <ctype.h> #include <limits.h> -#include <assert.h> +#include <stdlib.h> +#include <time.h> + #include "test_util.h" /* @@ -81,6 +84,21 @@ struct timespec timespec_sub(struct timespec ts1, struct timespec ts2) return timespec_add_ns((struct timespec){0}, ns1 - ns2); } +struct timespec timespec_diff_now(struct timespec start) +{ + struct timespec end; + + clock_gettime(CLOCK_MONOTONIC, &end); + return timespec_sub(end, start); +} + +struct timespec timespec_div(struct timespec ts, int divisor) +{ + int64_t ns = timespec_to_ns(ts) / divisor; + + return timespec_add_ns((struct timespec){0}, ns); +} + void print_skip(const char *fmt, ...) { va_list ap; diff --git a/tools/testing/selftests/kvm/lib/x86_64/handlers.S b/tools/testing/selftests/kvm/lib/x86_64/handlers.S new file mode 100644 index 000000000000..aaf7bc7d2ce1 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86_64/handlers.S @@ -0,0 +1,81 @@ +handle_exception: + push %r15 + push %r14 + push %r13 + push %r12 + push %r11 + push %r10 + push %r9 + push %r8 + + push %rdi + push %rsi + push %rbp + push %rbx + push %rdx + push %rcx + push %rax + mov %rsp, %rdi + + call route_exception + + pop %rax + pop %rcx + pop %rdx + pop %rbx + pop %rbp + pop %rsi + pop %rdi + pop %r8 + pop %r9 + pop %r10 + pop %r11 + pop %r12 + pop %r13 + pop %r14 + pop %r15 + + /* Discard vector and error code. */ + add $16, %rsp + iretq + +/* + * Build the handle_exception wrappers which push the vector/error code on the + * stack and an array of pointers to those wrappers. + */ +.pushsection .rodata +.globl idt_handlers +idt_handlers: +.popsection + +.macro HANDLERS has_error from to + vector = \from + .rept \to - \from + 1 + .align 8 + + /* Fetch current address and append it to idt_handlers. */ + current_handler = . +.pushsection .rodata +.quad current_handler +.popsection + + .if ! \has_error + pushq $0 + .endif + pushq $vector + jmp handle_exception + vector = vector + 1 + .endr +.endm + +.global idt_handler_code +idt_handler_code: + HANDLERS has_error=0 from=0 to=7 + HANDLERS has_error=1 from=8 to=8 + HANDLERS has_error=0 from=9 to=9 + HANDLERS has_error=1 from=10 to=14 + HANDLERS has_error=0 from=15 to=16 + HANDLERS has_error=1 from=17 to=17 + HANDLERS has_error=0 from=18 to=255 + +.section .note.GNU-stack, "", %progbits diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index f6eb34eaa0d2..d10c5c05bdf0 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -12,9 +12,18 @@ #include "../kvm_util_internal.h" #include "processor.h" +#ifndef NUM_INTERRUPTS +#define NUM_INTERRUPTS 256 +#endif + +#define DEFAULT_CODE_SELECTOR 0x8 +#define DEFAULT_DATA_SELECTOR 0x10 + /* Minimum physical address used for virtual translation tables. */ #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 +vm_vaddr_t exception_handlers; + /* Virtual translation table structure declarations */ struct pageMapL4Entry { uint64_t present:1; @@ -392,11 +401,12 @@ static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp) desc->limit0 = segp->limit & 0xFFFF; desc->base0 = segp->base & 0xFFFF; desc->base1 = segp->base >> 16; - desc->s = segp->s; desc->type = segp->type; + desc->s = segp->s; desc->dpl = segp->dpl; desc->p = segp->present; desc->limit1 = segp->limit >> 16; + desc->avl = segp->avl; desc->l = segp->l; desc->db = segp->db; desc->g = segp->g; @@ -556,9 +566,9 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); kvm_seg_set_unusable(&sregs.ldt); - kvm_seg_set_kernel_code_64bit(vm, 0x8, &sregs.cs); - kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.ds); - kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.es); + kvm_seg_set_kernel_code_64bit(vm, DEFAULT_CODE_SELECTOR, &sregs.cs); + kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.ds); + kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.es); kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot); break; @@ -1118,3 +1128,131 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) *va_bits = (entry->eax >> 8) & 0xff; } } + +struct idt_entry { + uint16_t offset0; + uint16_t selector; + uint16_t ist : 3; + uint16_t : 5; + uint16_t type : 4; + uint16_t : 1; + uint16_t dpl : 2; + uint16_t p : 1; + uint16_t offset1; + uint32_t offset2; uint32_t reserved; +}; + +static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr, + int dpl, unsigned short selector) +{ + struct idt_entry *base = + (struct idt_entry *)addr_gva2hva(vm, vm->idt); + struct idt_entry *e = &base[vector]; + + memset(e, 0, sizeof(*e)); + e->offset0 = addr; + e->selector = selector; + e->ist = 0; + e->type = 14; + e->dpl = dpl; + e->p = 1; + e->offset1 = addr >> 16; + e->offset2 = addr >> 32; +} + +void kvm_exit_unexpected_vector(uint32_t value) +{ + outl(UNEXPECTED_VECTOR_PORT, value); +} + +void route_exception(struct ex_regs *regs) +{ + typedef void(*handler)(struct ex_regs *); + handler *handlers = (handler *)exception_handlers; + + if (handlers && handlers[regs->vector]) { + handlers[regs->vector](regs); + return; + } + + kvm_exit_unexpected_vector(regs->vector); +} + +void vm_init_descriptor_tables(struct kvm_vm *vm) +{ + extern void *idt_handlers; + int i; + + vm->idt = vm_vaddr_alloc(vm, getpagesize(), 0x2000, 0, 0); + vm->handlers = vm_vaddr_alloc(vm, 256 * sizeof(void *), 0x2000, 0, 0); + /* Handlers have the same address in both address spaces.*/ + for (i = 0; i < NUM_INTERRUPTS; i++) + set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, + DEFAULT_CODE_SELECTOR); +} + +void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct kvm_sregs sregs; + + vcpu_sregs_get(vm, vcpuid, &sregs); + sregs.idt.base = vm->idt; + sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1; + sregs.gdt.base = vm->gdt; + sregs.gdt.limit = getpagesize() - 1; + kvm_seg_set_kernel_data_64bit(NULL, DEFAULT_DATA_SELECTOR, &sregs.gs); + vcpu_sregs_set(vm, vcpuid, &sregs); + *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; +} + +void vm_handle_exception(struct kvm_vm *vm, int vector, + void (*handler)(struct ex_regs *)) +{ + vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers); + + handlers[vector] = (vm_vaddr_t)handler; +} + +void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) +{ + if (vcpu_state(vm, vcpuid)->exit_reason == KVM_EXIT_IO + && vcpu_state(vm, vcpuid)->io.port == UNEXPECTED_VECTOR_PORT + && vcpu_state(vm, vcpuid)->io.size == 4) { + /* Grab pointer to io data */ + uint32_t *data = (void *)vcpu_state(vm, vcpuid) + + vcpu_state(vm, vcpuid)->io.data_offset; + + TEST_ASSERT(false, + "Unexpected vectored event in guest (vector:0x%x)", + *data); + } +} + +bool set_cpuid(struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 *ent) +{ + int i; + + for (i = 0; i < cpuid->nent; i++) { + struct kvm_cpuid_entry2 *cur = &cpuid->entries[i]; + + if (cur->function != ent->function || cur->index != ent->index) + continue; + + memcpy(cur, ent, sizeof(struct kvm_cpuid_entry2)); + return true; + } + + return false; +} + +uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, + uint64_t a3) +{ + uint64_t r; + + asm volatile("vmcall" + : "=a"(r) + : "b"(a0), "c"(a1), "d"(a2), "S"(a3)); + return r; +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c index da4d89ad5419..a3489973e290 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c +++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c @@ -40,6 +40,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) struct kvm_run *run = vcpu_state(vm, vcpu_id); struct ucall ucall = {}; + if (uc) + memset(uc, 0, sizeof(*uc)); + if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) { struct kvm_regs regs; diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index f1e00d43eea2..2448b30e8efa 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -542,3 +542,12 @@ void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp); vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp); } + +void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t eptp_memslot) +{ + vmx->apic_access = (void *)vm_vaddr_alloc(vm, getpagesize(), + 0x10000, 0, 0); + vmx->apic_access_hva = addr_gva2hva(vm, (uintptr_t)vmx->apic_access); + vmx->apic_access_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->apic_access); +} diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c new file mode 100644 index 000000000000..b10a27485bad --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020, Google LLC. + * + * Tests for KVM paravirtual feature disablement + */ +#include <asm/kvm_para.h> +#include <linux/kvm_para.h> +#include <stdint.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +extern unsigned char rdmsr_start; +extern unsigned char rdmsr_end; + +static u64 do_rdmsr(u32 idx) +{ + u32 lo, hi; + + asm volatile("rdmsr_start: rdmsr;" + "rdmsr_end:" + : "=a"(lo), "=c"(hi) + : "c"(idx)); + + return (((u64) hi) << 32) | lo; +} + +extern unsigned char wrmsr_start; +extern unsigned char wrmsr_end; + +static void do_wrmsr(u32 idx, u64 val) +{ + u32 lo, hi; + + lo = val; + hi = val >> 32; + + asm volatile("wrmsr_start: wrmsr;" + "wrmsr_end:" + : : "a"(lo), "c"(idx), "d"(hi)); +} + +static int nr_gp; + +static void guest_gp_handler(struct ex_regs *regs) +{ + unsigned char *rip = (unsigned char *)regs->rip; + bool r, w; + + r = rip == &rdmsr_start; + w = rip == &wrmsr_start; + GUEST_ASSERT(r || w); + + nr_gp++; + + if (r) + regs->rip = (uint64_t)&rdmsr_end; + else + regs->rip = (uint64_t)&wrmsr_end; +} + +struct msr_data { + uint32_t idx; + const char *name; +}; + +#define TEST_MSR(msr) { .idx = msr, .name = #msr } +#define UCALL_PR_MSR 0xdeadbeef +#define PR_MSR(msr) ucall(UCALL_PR_MSR, 1, msr) + +/* + * KVM paravirtual msrs to test. Expect a #GP if any of these msrs are read or + * written, as the KVM_CPUID_FEATURES leaf is cleared. + */ +static struct msr_data msrs_to_test[] = { + TEST_MSR(MSR_KVM_SYSTEM_TIME), + TEST_MSR(MSR_KVM_SYSTEM_TIME_NEW), + TEST_MSR(MSR_KVM_WALL_CLOCK), + TEST_MSR(MSR_KVM_WALL_CLOCK_NEW), + TEST_MSR(MSR_KVM_ASYNC_PF_EN), + TEST_MSR(MSR_KVM_STEAL_TIME), + TEST_MSR(MSR_KVM_PV_EOI_EN), + TEST_MSR(MSR_KVM_POLL_CONTROL), + TEST_MSR(MSR_KVM_ASYNC_PF_INT), + TEST_MSR(MSR_KVM_ASYNC_PF_ACK), +}; + +static void test_msr(struct msr_data *msr) +{ + PR_MSR(msr); + do_rdmsr(msr->idx); + GUEST_ASSERT(READ_ONCE(nr_gp) == 1); + + nr_gp = 0; + do_wrmsr(msr->idx, 0); + GUEST_ASSERT(READ_ONCE(nr_gp) == 1); + nr_gp = 0; +} + +struct hcall_data { + uint64_t nr; + const char *name; +}; + +#define TEST_HCALL(hc) { .nr = hc, .name = #hc } +#define UCALL_PR_HCALL 0xdeadc0de +#define PR_HCALL(hc) ucall(UCALL_PR_HCALL, 1, hc) + +/* + * KVM hypercalls to test. Expect -KVM_ENOSYS when called, as the corresponding + * features have been cleared in KVM_CPUID_FEATURES. + */ +static struct hcall_data hcalls_to_test[] = { + TEST_HCALL(KVM_HC_KICK_CPU), + TEST_HCALL(KVM_HC_SEND_IPI), + TEST_HCALL(KVM_HC_SCHED_YIELD), +}; + +static void test_hcall(struct hcall_data *hc) +{ + uint64_t r; + + PR_HCALL(hc); + r = kvm_hypercall(hc->nr, 0, 0, 0, 0); + GUEST_ASSERT(r == -KVM_ENOSYS); +} + +static void guest_main(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(msrs_to_test); i++) { + test_msr(&msrs_to_test[i]); + } + + for (i = 0; i < ARRAY_SIZE(hcalls_to_test); i++) { + test_hcall(&hcalls_to_test[i]); + } + + GUEST_DONE(); +} + +static void clear_kvm_cpuid_features(struct kvm_cpuid2 *cpuid) +{ + struct kvm_cpuid_entry2 ent = {0}; + + ent.function = KVM_CPUID_FEATURES; + TEST_ASSERT(set_cpuid(cpuid, &ent), + "failed to clear KVM_CPUID_FEATURES leaf"); +} + +static void pr_msr(struct ucall *uc) +{ + struct msr_data *msr = (struct msr_data *)uc->args[0]; + + pr_info("testing msr: %s (%#x)\n", msr->name, msr->idx); +} + +static void pr_hcall(struct ucall *uc) +{ + struct hcall_data *hc = (struct hcall_data *)uc->args[0]; + + pr_info("testing hcall: %s (%lu)\n", hc->name, hc->nr); +} + +static void handle_abort(struct ucall *uc) +{ + TEST_FAIL("%s at %s:%ld", (const char *)uc->args[0], + __FILE__, uc->args[1]); +} + +#define VCPU_ID 0 + +static void enter_guest(struct kvm_vm *vm) +{ + struct kvm_run *run; + struct ucall uc; + int r; + + run = vcpu_state(vm, VCPU_ID); + + while (true) { + r = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(!r, "vcpu_run failed: %d\n", r); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "unexpected exit reason: %u (%s)", + run->exit_reason, exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_PR_MSR: + pr_msr(&uc); + break; + case UCALL_PR_HCALL: + pr_hcall(&uc); + break; + case UCALL_ABORT: + handle_abort(&uc); + return; + case UCALL_DONE: + return; + } + } +} + +int main(void) +{ + struct kvm_enable_cap cap = {0}; + struct kvm_cpuid2 *best; + struct kvm_vm *vm; + + if (!kvm_check_cap(KVM_CAP_ENFORCE_PV_FEATURE_CPUID)) { + pr_info("will skip kvm paravirt restriction tests.\n"); + return 0; + } + + vm = vm_create_default(VCPU_ID, 0, guest_main); + + cap.cap = KVM_CAP_ENFORCE_PV_FEATURE_CPUID; + cap.args[0] = 1; + vcpu_enable_cap(vm, VCPU_ID, &cap); + + best = kvm_get_supported_cpuid(); + clear_kvm_cpuid_features(best); + vcpu_set_cpuid(vm, VCPU_ID, best); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + vm_handle_exception(vm, GP_VECTOR, guest_gp_handler); + + enter_guest(vm); + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c new file mode 100644 index 000000000000..f8e761149daa --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Tests for MSR_IA32_TSC and MSR_IA32_TSC_ADJUST. + * + * Copyright (C) 2020, Red Hat, Inc. + */ +#include <stdio.h> +#include <string.h> +#include "kvm_util.h" +#include "processor.h" + +#define VCPU_ID 0 + +#define UNITY (1ull << 30) +#define HOST_ADJUST (UNITY * 64) +#define GUEST_STEP (UNITY * 4) +#define ROUND(x) ((x + UNITY / 2) & -UNITY) +#define rounded_rdmsr(x) ROUND(rdmsr(x)) +#define rounded_host_rdmsr(x) ROUND(vcpu_get_msr(vm, 0, x)) + +#define GUEST_ASSERT_EQ(a, b) do { \ + __typeof(a) _a = (a); \ + __typeof(b) _b = (b); \ + if (_a != _b) \ + ucall(UCALL_ABORT, 4, \ + "Failed guest assert: " \ + #a " == " #b, __LINE__, _a, _b); \ + } while(0) + +static void guest_code(void) +{ + u64 val = 0; + + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val); + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val); + + /* Guest: writes to MSR_IA32_TSC affect both MSRs. */ + val = 1ull * GUEST_STEP; + wrmsr(MSR_IA32_TSC, val); + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val); + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val); + + /* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs. */ + GUEST_SYNC(2); + val = 2ull * GUEST_STEP; + wrmsr(MSR_IA32_TSC_ADJUST, val); + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val); + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val); + + /* Host: setting the TSC offset. */ + GUEST_SYNC(3); + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val); + + /* + * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the + * host-side offset and affect both MSRs. + */ + GUEST_SYNC(4); + val = 3ull * GUEST_STEP; + wrmsr(MSR_IA32_TSC_ADJUST, val); + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val); + + /* + * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side + * offset is now visible in MSR_IA32_TSC_ADJUST. + */ + GUEST_SYNC(5); + val = 4ull * GUEST_STEP; + wrmsr(MSR_IA32_TSC, val); + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val); + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST); + + GUEST_DONE(); +} + +static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid, int stage) +{ + struct ucall uc; + + vcpu_args_set(vm, vcpuid, 1, vcpuid); + + vcpu_ioctl(vm, vcpuid, KVM_RUN, NULL); + + switch (get_ucall(vm, vcpuid, &uc)) { + case UCALL_SYNC: + TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && + uc.args[1] == stage + 1, "Stage %d: Unexpected register values vmexit, got %lx", + stage + 1, (ulong)uc.args[1]); + return; + case UCALL_DONE: + return; + case UCALL_ABORT: + TEST_ASSERT(false, "%s at %s:%ld\n" \ + "\tvalues: %#lx, %#lx", (const char *)uc.args[0], + __FILE__, uc.args[1], uc.args[2], uc.args[3]); + default: + TEST_ASSERT(false, "Unexpected exit: %s", + exit_reason_str(vcpu_state(vm, vcpuid)->exit_reason)); + } +} + +int main(void) +{ + struct kvm_vm *vm; + uint64_t val; + + vm = vm_create_default(VCPU_ID, 0, guest_code); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + + val = 0; + ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); + ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + + /* Guest: writes to MSR_IA32_TSC affect both MSRs. */ + run_vcpu(vm, VCPU_ID, 1); + val = 1ull * GUEST_STEP; + ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); + ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + + /* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs. */ + run_vcpu(vm, VCPU_ID, 2); + val = 2ull * GUEST_STEP; + ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); + ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + + /* + * Host: writes to MSR_IA32_TSC set the host-side offset + * and therefore do not change MSR_IA32_TSC_ADJUST. + */ + vcpu_set_msr(vm, 0, MSR_IA32_TSC, HOST_ADJUST + val); + ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); + ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + run_vcpu(vm, VCPU_ID, 3); + + /* Host: writes to MSR_IA32_TSC_ADJUST do not modify the TSC. */ + vcpu_set_msr(vm, 0, MSR_IA32_TSC_ADJUST, UNITY * 123456); + ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); + ASSERT_EQ(vcpu_get_msr(vm, 0, MSR_IA32_TSC_ADJUST), UNITY * 123456); + + /* Restore previous value. */ + vcpu_set_msr(vm, 0, MSR_IA32_TSC_ADJUST, val); + ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); + ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + + /* + * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the + * host-side offset and affect both MSRs. + */ + run_vcpu(vm, VCPU_ID, 4); + val = 3ull * GUEST_STEP; + ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); + ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + + /* + * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side + * offset is now visible in MSR_IA32_TSC_ADJUST. + */ + run_vcpu(vm, VCPU_ID, 5); + val = 4ull * GUEST_STEP; + ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); + ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST); + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/user_msr_test.c b/tools/testing/selftests/kvm/x86_64/user_msr_test.c new file mode 100644 index 000000000000..cbe1b08890ff --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/user_msr_test.c @@ -0,0 +1,248 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * tests for KVM_CAP_X86_USER_SPACE_MSR and KVM_X86_SET_MSR_FILTER + * + * Copyright (C) 2020, Amazon Inc. + * + * This is a functional test to verify that we can deflect MSR events + * into user space. + */ +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" + +#include "kvm_util.h" +#include "processor.h" + +#define VCPU_ID 5 + +static u32 msr_reads, msr_writes; + +static u8 bitmap_00000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; +static u8 bitmap_00000000_write[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; +static u8 bitmap_40000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; +static u8 bitmap_c0000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; +static u8 bitmap_c0000000_read[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; +static u8 bitmap_deadbeef[1] = { 0x1 }; + +static void deny_msr(uint8_t *bitmap, u32 msr) +{ + u32 idx = msr & (KVM_MSR_FILTER_MAX_BITMAP_SIZE - 1); + + bitmap[idx / 8] &= ~(1 << (idx % 8)); +} + +static void prepare_bitmaps(void) +{ + memset(bitmap_00000000, 0xff, sizeof(bitmap_00000000)); + memset(bitmap_00000000_write, 0xff, sizeof(bitmap_00000000_write)); + memset(bitmap_40000000, 0xff, sizeof(bitmap_40000000)); + memset(bitmap_c0000000, 0xff, sizeof(bitmap_c0000000)); + memset(bitmap_c0000000_read, 0xff, sizeof(bitmap_c0000000_read)); + + deny_msr(bitmap_00000000_write, MSR_IA32_POWER_CTL); + deny_msr(bitmap_c0000000_read, MSR_SYSCALL_MASK); + deny_msr(bitmap_c0000000_read, MSR_GS_BASE); +} + +struct kvm_msr_filter filter = { + .flags = KVM_MSR_FILTER_DEFAULT_DENY, + .ranges = { + { + .flags = KVM_MSR_FILTER_READ, + .base = 0x00000000, + .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, + .bitmap = bitmap_00000000, + }, { + .flags = KVM_MSR_FILTER_WRITE, + .base = 0x00000000, + .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, + .bitmap = bitmap_00000000_write, + }, { + .flags = KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE, + .base = 0x40000000, + .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, + .bitmap = bitmap_40000000, + }, { + .flags = KVM_MSR_FILTER_READ, + .base = 0xc0000000, + .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, + .bitmap = bitmap_c0000000_read, + }, { + .flags = KVM_MSR_FILTER_WRITE, + .base = 0xc0000000, + .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, + .bitmap = bitmap_c0000000, + }, { + .flags = KVM_MSR_FILTER_WRITE | KVM_MSR_FILTER_READ, + .base = 0xdeadbeef, + .nmsrs = 1, + .bitmap = bitmap_deadbeef, + }, + }, +}; + +struct kvm_msr_filter no_filter = { + .flags = KVM_MSR_FILTER_DEFAULT_ALLOW, +}; + +static void guest_msr_calls(bool trapped) +{ + /* This goes into the in-kernel emulation */ + wrmsr(MSR_SYSCALL_MASK, 0); + + if (trapped) { + /* This goes into user space emulation */ + GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) == MSR_SYSCALL_MASK); + GUEST_ASSERT(rdmsr(MSR_GS_BASE) == MSR_GS_BASE); + } else { + GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) != MSR_SYSCALL_MASK); + GUEST_ASSERT(rdmsr(MSR_GS_BASE) != MSR_GS_BASE); + } + + /* If trapped == true, this goes into user space emulation */ + wrmsr(MSR_IA32_POWER_CTL, 0x1234); + + /* This goes into the in-kernel emulation */ + rdmsr(MSR_IA32_POWER_CTL); + + /* Invalid MSR, should always be handled by user space exit */ + GUEST_ASSERT(rdmsr(0xdeadbeef) == 0xdeadbeef); + wrmsr(0xdeadbeef, 0x1234); +} + +static void guest_code(void) +{ + guest_msr_calls(true); + + /* + * Disable msr filtering, so that the kernel + * handles everything in the next round + */ + GUEST_SYNC(0); + + guest_msr_calls(false); + + GUEST_DONE(); +} + +static int handle_ucall(struct kvm_vm *vm) +{ + struct ucall uc; + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_ABORT: + TEST_FAIL("Guest assertion not met"); + break; + case UCALL_SYNC: + vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &no_filter); + break; + case UCALL_DONE: + return 1; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + return 0; +} + +static void handle_rdmsr(struct kvm_run *run) +{ + run->msr.data = run->msr.index; + msr_reads++; + + if (run->msr.index == MSR_SYSCALL_MASK || + run->msr.index == MSR_GS_BASE) { + TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER, + "MSR read trap w/o access fault"); + } + + if (run->msr.index == 0xdeadbeef) { + TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN, + "MSR deadbeef read trap w/o inval fault"); + } +} + +static void handle_wrmsr(struct kvm_run *run) +{ + /* ignore */ + msr_writes++; + + if (run->msr.index == MSR_IA32_POWER_CTL) { + TEST_ASSERT(run->msr.data == 0x1234, + "MSR data for MSR_IA32_POWER_CTL incorrect"); + TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER, + "MSR_IA32_POWER_CTL trap w/o access fault"); + } + + if (run->msr.index == 0xdeadbeef) { + TEST_ASSERT(run->msr.data == 0x1234, + "MSR data for deadbeef incorrect"); + TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN, + "deadbeef trap w/o inval fault"); + } +} + +int main(int argc, char *argv[]) +{ + struct kvm_enable_cap cap = { + .cap = KVM_CAP_X86_USER_SPACE_MSR, + .args[0] = KVM_MSR_EXIT_REASON_INVAL | + KVM_MSR_EXIT_REASON_UNKNOWN | + KVM_MSR_EXIT_REASON_FILTER, + }; + struct kvm_vm *vm; + struct kvm_run *run; + int rc; + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, guest_code); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + run = vcpu_state(vm, VCPU_ID); + + rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); + TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available"); + vm_enable_cap(vm, &cap); + + rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER); + TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available"); + + prepare_bitmaps(); + vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter); + + while (1) { + rc = _vcpu_run(vm, VCPU_ID); + + TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc); + + switch (run->exit_reason) { + case KVM_EXIT_X86_RDMSR: + handle_rdmsr(run); + break; + case KVM_EXIT_X86_WRMSR: + handle_wrmsr(run); + break; + case KVM_EXIT_IO: + if (handle_ucall(vm)) + goto done; + break; + } + + } + +done: + TEST_ASSERT(msr_reads == 4, "Handled 4 rdmsr in user space"); + TEST_ASSERT(msr_writes == 3, "Handled 3 wrmsr in user space"); + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c new file mode 100644 index 000000000000..1f65342d6cb7 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vmx_apic_access_test + * + * Copyright (C) 2020, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * The first subtest simply checks to see that an L2 guest can be + * launched with a valid APIC-access address that is backed by a + * page of L1 physical memory. + * + * The second subtest sets the APIC-access address to a (valid) L1 + * physical address that is not backed by memory. KVM can't handle + * this situation, so resuming L2 should result in a KVM exit for + * internal error (emulation). This is not an architectural + * requirement. It is just a shortcoming of KVM. The internal error + * is unfortunate, but it's better than what used to happen! + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#include <string.h> +#include <sys/ioctl.h> + +#include "kselftest.h" + +#define VCPU_ID 0 + +/* The virtual machine object. */ +static struct kvm_vm *vm; + +static void l2_guest_code(void) +{ + /* Exit to L1 */ + __asm__ __volatile__("vmcall"); +} + +static void l1_guest_code(struct vmx_pages *vmx_pages, unsigned long high_gpa) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + uint32_t control; + + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + + /* Prepare the VMCS for L2 execution. */ + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); + control |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; + vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); + control = vmreadz(SECONDARY_VM_EXEC_CONTROL); + control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + vmwrite(SECONDARY_VM_EXEC_CONTROL, control); + vmwrite(APIC_ACCESS_ADDR, vmx_pages->apic_access_gpa); + + /* Try to launch L2 with the memory-backed APIC-access address. */ + GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR)); + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + vmwrite(APIC_ACCESS_ADDR, high_gpa); + + /* Try to resume L2 with the unbacked APIC-access address. */ + GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR)); + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + unsigned long apic_access_addr = ~0ul; + unsigned int paddr_width; + unsigned int vaddr_width; + vm_vaddr_t vmx_pages_gva; + unsigned long high_gpa; + struct vmx_pages *vmx; + bool done = false; + + nested_vmx_check_supported(); + + vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + + kvm_get_cpu_address_width(&paddr_width, &vaddr_width); + high_gpa = (1ul << paddr_width) - getpagesize(); + if ((unsigned long)DEFAULT_GUEST_PHY_PAGES * getpagesize() > high_gpa) { + print_skip("No unbacked physical page available"); + exit(KSFT_SKIP); + } + + vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva); + prepare_virtualize_apic_accesses(vmx, vm, 0); + vcpu_args_set(vm, VCPU_ID, 2, vmx_pages_gva, high_gpa); + + while (!done) { + volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct ucall uc; + + vcpu_run(vm, VCPU_ID); + if (apic_access_addr == high_gpa) { + TEST_ASSERT(run->exit_reason == + KVM_EXIT_INTERNAL_ERROR, + "Got exit reason other than KVM_EXIT_INTERNAL_ERROR: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + TEST_ASSERT(run->internal.suberror == + KVM_INTERNAL_ERROR_EMULATION, + "Got internal suberror other than KVM_INTERNAL_ERROR_EMULATION: %u\n", + run->internal.suberror); + break; + } + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_ABORT: + TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], + __FILE__, uc.args[1]); + /* NOT REACHED */ + case UCALL_SYNC: + apic_access_addr = uc.args[1]; + break; + case UCALL_DONE: + done = true; + break; + default: + TEST_ASSERT(false, "Unknown ucall %lu", uc.cmd); + } + } + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 7a17ea815736..a5ce26d548e4 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -47,9 +47,9 @@ ARCH ?= $(SUBARCH) khdr: ifndef KSFT_KHDR_INSTALL_DONE ifeq (1,$(DEFAULT_INSTALL_HDR_PATH)) - make --no-builtin-rules ARCH=$(ARCH) -C $(top_srcdir) headers_install + $(MAKE) --no-builtin-rules ARCH=$(ARCH) -C $(top_srcdir) headers_install else - make --no-builtin-rules INSTALL_HDR_PATH=$$OUTPUT/usr \ + $(MAKE) --no-builtin-rules INSTALL_HDR_PATH=$$OUTPUT/usr \ ARCH=$(ARCH) -C $(top_srcdir) headers_install endif endif @@ -107,9 +107,8 @@ endif emit_tests: for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \ BASENAME_TEST=`basename $$TEST`; \ - echo " \\"; \ - echo -n " \"$$BASENAME_TEST\""; \ - done; \ + echo "$(COLLECTION):$$BASENAME_TEST"; \ + done # define if isn't already. It is undefined in make O= case. ifeq ($(RM),) @@ -137,7 +136,7 @@ endif ifeq ($(OVERRIDE_TARGETS),) LOCAL_HDRS := $(selfdir)/kselftest_harness.h $(selfdir)/kselftest.h $(OUTPUT)/%:%.c $(LOCAL_HDRS) - $(LINK.c) $^ $(LDLIBS) -o $@ + $(LINK.c) $(filter-out $(LOCAL_HDRS),$^) $(LDLIBS) -o $@ $(OUTPUT)/%.o:%.S $(COMPILE.S) $^ -o $@ diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt index 9d266e79c6a2..74a8d329a72c 100644 --- a/tools/testing/selftests/lkdtm/tests.txt +++ b/tools/testing/selftests/lkdtm/tests.txt @@ -9,7 +9,6 @@ EXCEPTION #CORRUPT_STACK_STRONG Crashes entire system on success CORRUPT_LIST_ADD list_add corruption CORRUPT_LIST_DEL list_del corruption -CORRUPT_USER_DS Invalid address limit on user-mode return STACK_GUARD_PAGE_LEADING STACK_GUARD_PAGE_TRAILING UNSET_SMEP CR4 bits went missing @@ -67,6 +66,5 @@ USERCOPY_STACK_FRAME_TO USERCOPY_STACK_FRAME_FROM USERCOPY_STACK_BEYOND USERCOPY_KERNEL -USERCOPY_KERNEL_DS STACKLEAK_ERASING OK: the rest of the thread stack is properly erased CFI_FORWARD_PROTO diff --git a/tools/testing/selftests/mount/.gitignore b/tools/testing/selftests/mount/.gitignore index 0bc64a6d4c18..17f2d8415162 100644 --- a/tools/testing/selftests/mount/.gitignore +++ b/tools/testing/selftests/mount/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only unprivileged-remount-test +nosymfollow-test diff --git a/tools/testing/selftests/mount/Makefile b/tools/testing/selftests/mount/Makefile index 026890744215..2d9454841644 100644 --- a/tools/testing/selftests/mount/Makefile +++ b/tools/testing/selftests/mount/Makefile @@ -3,7 +3,7 @@ CFLAGS = -Wall \ -O2 -TEST_PROGS := run_tests.sh -TEST_GEN_FILES := unprivileged-remount-test +TEST_PROGS := run_unprivileged_remount.sh run_nosymfollow.sh +TEST_GEN_FILES := unprivileged-remount-test nosymfollow-test include ../lib.mk diff --git a/tools/testing/selftests/mount/nosymfollow-test.c b/tools/testing/selftests/mount/nosymfollow-test.c new file mode 100644 index 000000000000..650d6d80a1d2 --- /dev/null +++ b/tools/testing/selftests/mount/nosymfollow-test.c @@ -0,0 +1,218 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <sched.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/vfs.h> +#include <unistd.h> + +#ifndef MS_NOSYMFOLLOW +# define MS_NOSYMFOLLOW 256 /* Do not follow symlinks */ +#endif + +#ifndef ST_NOSYMFOLLOW +# define ST_NOSYMFOLLOW 0x2000 /* Do not follow symlinks */ +#endif + +#define DATA "/tmp/data" +#define LINK "/tmp/symlink" +#define TMP "/tmp" + +static void die(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + exit(EXIT_FAILURE); +} + +static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, + va_list ap) +{ + ssize_t written; + char buf[4096]; + int buf_len; + int fd; + + buf_len = vsnprintf(buf, sizeof(buf), fmt, ap); + if (buf_len < 0) + die("vsnprintf failed: %s\n", strerror(errno)); + + if (buf_len >= sizeof(buf)) + die("vsnprintf output truncated\n"); + + fd = open(filename, O_WRONLY); + if (fd < 0) { + if ((errno == ENOENT) && enoent_ok) + return; + die("open of %s failed: %s\n", filename, strerror(errno)); + } + + written = write(fd, buf, buf_len); + if (written != buf_len) { + if (written >= 0) { + die("short write to %s\n", filename); + } else { + die("write to %s failed: %s\n", + filename, strerror(errno)); + } + } + + if (close(fd) != 0) + die("close of %s failed: %s\n", filename, strerror(errno)); +} + +static void maybe_write_file(char *filename, char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vmaybe_write_file(true, filename, fmt, ap); + va_end(ap); +} + +static void write_file(char *filename, char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vmaybe_write_file(false, filename, fmt, ap); + va_end(ap); +} + +static void create_and_enter_ns(void) +{ + uid_t uid = getuid(); + gid_t gid = getgid(); + + if (unshare(CLONE_NEWUSER) != 0) + die("unshare(CLONE_NEWUSER) failed: %s\n", strerror(errno)); + + maybe_write_file("/proc/self/setgroups", "deny"); + write_file("/proc/self/uid_map", "0 %d 1", uid); + write_file("/proc/self/gid_map", "0 %d 1", gid); + + if (setgid(0) != 0) + die("setgid(0) failed %s\n", strerror(errno)); + if (setuid(0) != 0) + die("setuid(0) failed %s\n", strerror(errno)); + + if (unshare(CLONE_NEWNS) != 0) + die("unshare(CLONE_NEWNS) failed: %s\n", strerror(errno)); +} + +static void setup_symlink(void) +{ + int data, err; + + data = creat(DATA, O_RDWR); + if (data < 0) + die("creat failed: %s\n", strerror(errno)); + + err = symlink(DATA, LINK); + if (err < 0) + die("symlink failed: %s\n", strerror(errno)); + + if (close(data) != 0) + die("close of %s failed: %s\n", DATA, strerror(errno)); +} + +static void test_link_traversal(bool nosymfollow) +{ + int link; + + link = open(LINK, 0, O_RDWR); + if (nosymfollow) { + if ((link != -1 || errno != ELOOP)) { + die("link traversal unexpected result: %d, %s\n", + link, strerror(errno)); + } + } else { + if (link < 0) + die("link traversal failed: %s\n", strerror(errno)); + + if (close(link) != 0) + die("close of link failed: %s\n", strerror(errno)); + } +} + +static void test_readlink(void) +{ + char buf[4096]; + ssize_t ret; + + bzero(buf, sizeof(buf)); + + ret = readlink(LINK, buf, sizeof(buf)); + if (ret < 0) + die("readlink failed: %s\n", strerror(errno)); + if (strcmp(buf, DATA) != 0) + die("readlink strcmp failed: '%s' '%s'\n", buf, DATA); +} + +static void test_realpath(void) +{ + char *path = realpath(LINK, NULL); + + if (!path) + die("realpath failed: %s\n", strerror(errno)); + if (strcmp(path, DATA) != 0) + die("realpath strcmp failed\n"); + + free(path); +} + +static void test_statfs(bool nosymfollow) +{ + struct statfs buf; + int ret; + + ret = statfs(TMP, &buf); + if (ret) + die("statfs failed: %s\n", strerror(errno)); + + if (nosymfollow) { + if ((buf.f_flags & ST_NOSYMFOLLOW) == 0) + die("ST_NOSYMFOLLOW not set on %s\n", TMP); + } else { + if ((buf.f_flags & ST_NOSYMFOLLOW) != 0) + die("ST_NOSYMFOLLOW set on %s\n", TMP); + } +} + +static void run_tests(bool nosymfollow) +{ + test_link_traversal(nosymfollow); + test_readlink(); + test_realpath(); + test_statfs(nosymfollow); +} + +int main(int argc, char **argv) +{ + create_and_enter_ns(); + + if (mount("testing", TMP, "ramfs", 0, NULL) != 0) + die("mount failed: %s\n", strerror(errno)); + + setup_symlink(); + run_tests(false); + + if (mount("testing", TMP, "ramfs", MS_REMOUNT|MS_NOSYMFOLLOW, NULL) != 0) + die("remount failed: %s\n", strerror(errno)); + + run_tests(true); + + return EXIT_SUCCESS; +} diff --git a/tools/testing/selftests/mount/run_nosymfollow.sh b/tools/testing/selftests/mount/run_nosymfollow.sh new file mode 100755 index 000000000000..5fbbf03043a2 --- /dev/null +++ b/tools/testing/selftests/mount/run_nosymfollow.sh @@ -0,0 +1,4 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +./nosymfollow-test diff --git a/tools/testing/selftests/mount/run_tests.sh b/tools/testing/selftests/mount/run_unprivileged_remount.sh index 4ab8f507dcba..4ab8f507dcba 100755 --- a/tools/testing/selftests/mount/run_tests.sh +++ b/tools/testing/selftests/mount/run_unprivileged_remount.sh diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 742c499328b2..61ae899cfc17 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only +ipsec msg_zerocopy socket psock_fanout diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 9491bbaa0831..ef352477cac6 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -19,6 +19,8 @@ TEST_PROGS += txtimestamp.sh TEST_PROGS += vrf-xfrm-tests.sh TEST_PROGS += rxtimestamp.sh TEST_PROGS += devlink_port_split.py +TEST_PROGS += drop_monitor_tests.sh +TEST_PROGS += vrf_route_leaking.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any @@ -29,6 +31,7 @@ TEST_GEN_FILES += tcp_fastopen_backup_key TEST_GEN_FILES += fin_ack_lat TEST_GEN_FILES += reuseaddr_ports_exhausted TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp +TEST_GEN_FILES += ipsec TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 3b42c06b5985..4d5df8e1eee7 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -24,10 +24,13 @@ CONFIG_IP_NF_NAT=m CONFIG_NF_TABLES=m CONFIG_NF_TABLES_IPV6=y CONFIG_NF_TABLES_IPV4=y -CONFIG_NFT_CHAIN_NAT_IPV6=m -CONFIG_NFT_CHAIN_NAT_IPV4=m +CONFIG_NFT_NAT=m CONFIG_NET_SCH_FQ=m CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_NETEM=y CONFIG_TEST_BLACKHOLE_DEV=m CONFIG_KALLSYMS=y +CONFIG_TRACEPOINTS=y +CONFIG_NET_DROP_MONITOR=m +CONFIG_NETDEVSIM=m +CONFIG_NET_FOU=m diff --git a/tools/testing/selftests/net/drop_monitor_tests.sh b/tools/testing/selftests/net/drop_monitor_tests.sh new file mode 100755 index 000000000000..b7650e30d18b --- /dev/null +++ b/tools/testing/selftests/net/drop_monitor_tests.sh @@ -0,0 +1,215 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test is for checking drop monitor functionality. + +ret=0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +# all tests in this script. Can be overridden with -t option +TESTS=" + sw_drops + hw_drops +" + +IP="ip -netns ns1" +TC="tc -netns ns1" +DEVLINK="devlink -N ns1" +NS_EXEC="ip netns exec ns1" +NETDEVSIM_PATH=/sys/bus/netdevsim/ +DEV_ADDR=1337 +DEV=netdevsim${DEV_ADDR} +DEVLINK_DEV=netdevsim/${DEV} + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + printf " TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) + else + ret=1 + nfail=$((nfail+1)) + printf " TEST: %-60s [FAIL]\n" "${msg}" + fi +} + +setup() +{ + modprobe netdevsim &> /dev/null + + set -e + ip netns add ns1 + $IP link add dummy10 up type dummy + + $NS_EXEC echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device + udevadm settle + local netdev=$($NS_EXEC ls ${NETDEVSIM_PATH}/devices/${DEV}/net/) + $IP link set dev $netdev up + + set +e +} + +cleanup() +{ + $NS_EXEC echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device + ip netns del ns1 +} + +sw_drops_test() +{ + echo + echo "Software drops test" + + setup + + local dir=$(mktemp -d) + + $TC qdisc add dev dummy10 clsact + $TC filter add dev dummy10 egress pref 1 handle 101 proto ip \ + flower dst_ip 192.0.2.10 action drop + + $NS_EXEC mausezahn dummy10 -a 00:11:22:33:44:55 -b 00:aa:bb:cc:dd:ee \ + -A 192.0.2.1 -B 192.0.2.10 -t udp sp=12345,dp=54321 -c 0 -q \ + -d 100msec & + timeout 5 dwdump -o sw -w ${dir}/packets.pcap + (( $(tshark -r ${dir}/packets.pcap \ + -Y 'ip.dst == 192.0.2.10' 2> /dev/null | wc -l) != 0)) + log_test $? 0 "Capturing active software drops" + + rm ${dir}/packets.pcap + + { kill %% && wait %%; } 2>/dev/null + timeout 5 dwdump -o sw -w ${dir}/packets.pcap + (( $(tshark -r ${dir}/packets.pcap \ + -Y 'ip.dst == 192.0.2.10' 2> /dev/null | wc -l) == 0)) + log_test $? 0 "Capturing inactive software drops" + + rm -r $dir + + cleanup +} + +hw_drops_test() +{ + echo + echo "Hardware drops test" + + setup + + local dir=$(mktemp -d) + + $DEVLINK trap set $DEVLINK_DEV trap blackhole_route action trap + timeout 5 dwdump -o hw -w ${dir}/packets.pcap + (( $(tshark -r ${dir}/packets.pcap \ + -Y 'net_dm.hw_trap_name== blackhole_route' 2> /dev/null \ + | wc -l) != 0)) + log_test $? 0 "Capturing active hardware drops" + + rm ${dir}/packets.pcap + + $DEVLINK trap set $DEVLINK_DEV trap blackhole_route action drop + timeout 5 dwdump -o hw -w ${dir}/packets.pcap + (( $(tshark -r ${dir}/packets.pcap \ + -Y 'net_dm.hw_trap_name== blackhole_route' 2> /dev/null \ + | wc -l) == 0)) + log_test $? 0 "Capturing inactive hardware drops" + + rm -r $dir + + cleanup +} + +################################################################################ +# usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -t <test> Test(s) to run (default: all) + (options: $TESTS) +EOF +} + +################################################################################ +# main + +while getopts ":t:h" opt; do + case $opt in + t) TESTS=$OPTARG;; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip; +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v devlink)" ]; then + echo "SKIP: Could not run test without devlink tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v tshark)" ]; then + echo "SKIP: Could not run test without tshark tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v dwdump)" ]; then + echo "SKIP: Could not run test without dwdump tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v udevadm)" ]; then + echo "SKIP: Could not run test without udevadm tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v timeout)" ]; then + echo "SKIP: Could not run test without timeout tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test without mausezahn tool" + exit $ksft_skip +fi + +tshark -G fields 2> /dev/null | grep -q net_dm +if [ $? -ne 0 ]; then + echo "SKIP: tshark too old, missing net_dm dissector" + exit $ksft_skip +fi + +# start clean +cleanup &> /dev/null + +for t in $TESTS +do + case $t in + sw_drops|sw) sw_drops_test;; + hw_drops|hw) hw_drops_test;; + + help) echo "Test names: $TESTS"; exit 0;; + esac +done + +if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} +fi + +exit $ret diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index 22dc2f3d428b..eb693a3b7b4a 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -411,9 +411,16 @@ ipv6_fdb_grp_fcnal() run_cmd "$IP -6 ro add 2001:db8:101::1/128 nhid 103" log_test $? 2 "Route add with fdb nexthop group" + run_cmd "$IP nexthop del id 61" + run_cmd "$BRIDGE fdb get to 02:02:00:00:00:13 dev vx10 self" + log_test $? 0 "Fdb entry after deleting a single nexthop" + run_cmd "$IP nexthop del id 102" log_test $? 0 "Fdb nexthop delete" + run_cmd "$BRIDGE fdb get to 02:02:00:00:00:13 dev vx10 self" + log_test $? 254 "Fdb entry after deleting a nexthop group" + $IP link del dev vx10 } @@ -484,9 +491,16 @@ ipv4_fdb_grp_fcnal() run_cmd "$IP ro add 172.16.0.0/22 nhid 103" log_test $? 2 "Route add with fdb nexthop group" + run_cmd "$IP nexthop del id 12" + run_cmd "$BRIDGE fdb get to 02:02:00:00:00:13 dev vx10 self" + log_test $? 0 "Fdb entry after deleting a single nexthop" + run_cmd "$IP nexthop del id 102" log_test $? 0 "Fdb nexthop delete" + run_cmd "$BRIDGE fdb get to 02:02:00:00:00:13 dev vx10 self" + log_test $? 254 "Fdb entry after deleting a nexthop group" + $IP link del dev vx10 } @@ -739,6 +753,36 @@ ipv6_fcnal_runtime() run_cmd "$IP nexthop replace id 81 via 172.16.1.1 dev veth1" log_test $? 2 "Nexthop replace of group entry - v6 route, v4 nexthop" + run_cmd "$IP nexthop add id 86 via 2001:db8:92::2 dev veth3" + run_cmd "$IP nexthop add id 87 via 172.16.1.1 dev veth1" + run_cmd "$IP nexthop add id 88 via 172.16.1.1 dev veth1" + run_cmd "$IP nexthop add id 124 group 86/87/88" + run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124" + log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways" + + run_cmd "$IP nexthop del id 88" + run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124" + log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways" + + run_cmd "$IP nexthop del id 87" + run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124" + log_test $? 0 "IPv6 route using a group after removing v4 gateways" + + run_cmd "$IP ro delete 2001:db8:101::1/128" + run_cmd "$IP nexthop add id 87 via 172.16.1.1 dev veth1" + run_cmd "$IP nexthop add id 88 via 172.16.1.1 dev veth1" + run_cmd "$IP nexthop replace id 124 group 86/87/88" + run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124" + log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways" + + run_cmd "$IP nexthop replace id 88 via 2001:db8:92::2 dev veth3" + run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124" + log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways" + + run_cmd "$IP nexthop replace id 87 via 2001:db8:92::2 dev veth3" + run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124" + log_test $? 0 "IPv6 route using a group after replacing v4 gateways" + $IP nexthop flush >/dev/null 2>&1 # diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index 75fe24bcb9cd..9c12c4fd3afc 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -5,7 +5,7 @@ # Defines if [[ ! -v DEVLINK_DEV ]]; then - DEVLINK_DEV=$(devlink port show "${NETIFS[p1]}" -j \ + DEVLINK_DEV=$(devlink port show "${NETIFS[p1]:-$NETIF_NO_CABLE}" -j \ | jq -r '.port | keys[]' | cut -d/ -f-2) if [ -z "$DEVLINK_DEV" ]; then echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it" @@ -117,6 +117,12 @@ devlink_reload() declare -A DEVLINK_ORIG +# Changing pool type from static to dynamic causes reinterpretation of threshold +# values. They therefore need to be saved before pool type is changed, then the +# pool type can be changed, and then the new values need to be set up. Therefore +# instead of saving the current state implicitly in the _set call, provide +# functions for all three primitives: save, set, and restore. + devlink_port_pool_threshold() { local port=$1; shift @@ -126,14 +132,21 @@ devlink_port_pool_threshold() | jq '.port_pool."'"$port"'"[].threshold' } -devlink_port_pool_th_set() +devlink_port_pool_th_save() { local port=$1; shift local pool=$1; shift - local th=$1; shift local key="port_pool($port,$pool).threshold" DEVLINK_ORIG[$key]=$(devlink_port_pool_threshold $port $pool) +} + +devlink_port_pool_th_set() +{ + local port=$1; shift + local pool=$1; shift + local th=$1; shift + devlink sb port pool set $port pool $pool th $th } @@ -142,8 +155,13 @@ devlink_port_pool_th_restore() local port=$1; shift local pool=$1; shift local key="port_pool($port,$pool).threshold" + local -a orig=(${DEVLINK_ORIG[$key]}) - devlink sb port pool set $port pool $pool th ${DEVLINK_ORIG[$key]} + if [[ -z $orig ]]; then + echo "WARNING: Mismatched devlink_port_pool_th_restore" + else + devlink sb port pool set $port pool $pool th $orig + fi } devlink_pool_size_thtype() @@ -154,14 +172,20 @@ devlink_pool_size_thtype() | jq -r '.pool[][] | (.size, .thtype)' } +devlink_pool_size_thtype_save() +{ + local pool=$1; shift + local key="pool($pool).size_thtype" + + DEVLINK_ORIG[$key]=$(devlink_pool_size_thtype $pool) +} + devlink_pool_size_thtype_set() { local pool=$1; shift local thtype=$1; shift local size=$1; shift - local key="pool($pool).size_thtype" - DEVLINK_ORIG[$key]=$(devlink_pool_size_thtype $pool) devlink sb pool set "$DEVLINK_DEV" pool $pool size $size thtype $thtype } @@ -171,8 +195,12 @@ devlink_pool_size_thtype_restore() local key="pool($pool).size_thtype" local -a orig=(${DEVLINK_ORIG[$key]}) - devlink sb pool set "$DEVLINK_DEV" pool $pool \ - size ${orig[0]} thtype ${orig[1]} + if [[ -z ${orig[0]} ]]; then + echo "WARNING: Mismatched devlink_pool_size_thtype_restore" + else + devlink sb pool set "$DEVLINK_DEV" pool $pool \ + size ${orig[0]} thtype ${orig[1]} + fi } devlink_tc_bind_pool_th() @@ -185,6 +213,16 @@ devlink_tc_bind_pool_th() | jq -r '.tc_bind[][] | (.pool, .threshold)' } +devlink_tc_bind_pool_th_save() +{ + local port=$1; shift + local tc=$1; shift + local dir=$1; shift + local key="tc_bind($port,$dir,$tc).pool_th" + + DEVLINK_ORIG[$key]=$(devlink_tc_bind_pool_th $port $tc $dir) +} + devlink_tc_bind_pool_th_set() { local port=$1; shift @@ -192,9 +230,7 @@ devlink_tc_bind_pool_th_set() local dir=$1; shift local pool=$1; shift local th=$1; shift - local key="tc_bind($port,$dir,$tc).pool_th" - DEVLINK_ORIG[$key]=$(devlink_tc_bind_pool_th $port $tc $dir) devlink sb tc bind set $port tc $tc type $dir pool $pool th $th } @@ -206,8 +242,12 @@ devlink_tc_bind_pool_th_restore() local key="tc_bind($port,$dir,$tc).pool_th" local -a orig=(${DEVLINK_ORIG[$key]}) - devlink sb tc bind set $port tc $tc type $dir \ - pool ${orig[0]} th ${orig[1]} + if [[ -z ${orig[0]} ]]; then + echo "WARNING: Mismatched devlink_tc_bind_pool_th_restore" + else + devlink sb tc bind set $port tc $tc type $dir \ + pool ${orig[0]} th ${orig[1]} + fi } devlink_traps_num_get() @@ -509,3 +549,9 @@ devlink_cpu_port_get() echo "$DEVLINK_DEV/$cpu_dl_port_num" } + +devlink_cell_size_get() +{ + devlink sb pool show "$DEVLINK_DEV" pool 0 -j \ + | jq '.pool[][].cell_size' +} diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 977fc2b326a2..927f9ba49e08 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -1227,3 +1227,46 @@ stop_traffic() # Suppress noise from killing mausezahn. { kill %% && wait %%; } 2>/dev/null } + +tcpdump_start() +{ + local if_name=$1; shift + local ns=$1; shift + + capfile=$(mktemp) + capout=$(mktemp) + + if [ -z $ns ]; then + ns_cmd="" + else + ns_cmd="ip netns exec ${ns}" + fi + + if [ -z $SUDO_USER ] ; then + capuser="" + else + capuser="-Z $SUDO_USER" + fi + + $ns_cmd tcpdump -e -n -Q in -i $if_name \ + -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 & + cappid=$! + + sleep 1 +} + +tcpdump_stop() +{ + $ns_cmd kill $cappid + sleep 1 +} + +tcpdump_cleanup() +{ + rm $capfile $capout +} + +tcpdump_show() +{ + tcpdump -e -n -r $capfile 2>&1 +} diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh index c33bfd7ba214..13db1cb50e57 100644 --- a/tools/testing/selftests/net/forwarding/mirror_lib.sh +++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh @@ -31,7 +31,7 @@ mirror_test() local t0=$(tc_rule_stats_get $dev $pref) $MZ $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \ - -c 10 -d 100ms -t icmp type=8 + -c 10 -d 100msec -t icmp type=8 sleep 0.5 local t1=$(tc_rule_stats_get $dev $pref) local delta=$((t1 - t0)) diff --git a/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh b/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh index a0b5f57d6bd3..0727e2012b68 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh @@ -215,10 +215,16 @@ switch_create() bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10 bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20 + + sysctl_set net.ipv4.conf.all.rp_filter 0 + sysctl_set net.ipv4.conf.vlan10-v.rp_filter 0 + sysctl_set net.ipv4.conf.vlan20-v.rp_filter 0 } switch_destroy() { + sysctl_restore net.ipv4.conf.all.rp_filter + bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 20 bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 10 @@ -359,6 +365,10 @@ ns_switch_create() bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10 bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20 + + sysctl_set net.ipv4.conf.all.rp_filter 0 + sysctl_set net.ipv4.conf.vlan10-v.rp_filter 0 + sysctl_set net.ipv4.conf.vlan20-v.rp_filter 0 } export -f ns_switch_create diff --git a/tools/testing/selftests/net/forwarding/vxlan_symmetric.sh b/tools/testing/selftests/net/forwarding/vxlan_symmetric.sh index 1209031bc794..5d97fa347d75 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_symmetric.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_symmetric.sh @@ -237,10 +237,16 @@ switch_create() bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10 bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20 + + sysctl_set net.ipv4.conf.all.rp_filter 0 + sysctl_set net.ipv4.conf.vlan10-v.rp_filter 0 + sysctl_set net.ipv4.conf.vlan20-v.rp_filter 0 } switch_destroy() { + sysctl_restore net.ipv4.conf.all.rp_filter + bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 20 bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 10 @@ -402,6 +408,10 @@ ns_switch_create() bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10 bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20 + + sysctl_set net.ipv4.conf.all.rp_filter 0 + sysctl_set net.ipv4.conf.vlan10-v.rp_filter 0 + sysctl_set net.ipv4.conf.vlan20-v.rp_filter 0 } export -f ns_switch_create diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c new file mode 100644 index 000000000000..17ced7d6ce25 --- /dev/null +++ b/tools/testing/selftests/net/ipsec.c @@ -0,0 +1,2195 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ipsec.c - Check xfrm on veth inside a net-ns. + * Copyright (c) 2018 Dmitry Safonov + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <asm/types.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <linux/limits.h> +#include <linux/netlink.h> +#include <linux/random.h> +#include <linux/rtnetlink.h> +#include <linux/veth.h> +#include <linux/xfrm.h> +#include <netinet/in.h> +#include <net/if.h> +#include <sched.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <time.h> +#include <unistd.h> + +#include "../kselftest.h" + +#define printk(fmt, ...) \ + ksft_print_msg("%d[%u] " fmt "\n", getpid(), __LINE__, ##__VA_ARGS__) + +#define pr_err(fmt, ...) printk(fmt ": %m", ##__VA_ARGS__) + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) +#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) + +#define IPV4_STR_SZ 16 /* xxx.xxx.xxx.xxx is longest + \0 */ +#define MAX_PAYLOAD 2048 +#define XFRM_ALGO_KEY_BUF_SIZE 512 +#define MAX_PROCESSES (1 << 14) /* /16 mask divided by /30 subnets */ +#define INADDR_A ((in_addr_t) 0x0a000000) /* 10.0.0.0 */ +#define INADDR_B ((in_addr_t) 0xc0a80000) /* 192.168.0.0 */ + +/* /30 mask for one veth connection */ +#define PREFIX_LEN 30 +#define child_ip(nr) (4*nr + 1) +#define grchild_ip(nr) (4*nr + 2) + +#define VETH_FMT "ktst-%d" +#define VETH_LEN 12 + +static int nsfd_parent = -1; +static int nsfd_childa = -1; +static int nsfd_childb = -1; +static long page_size; + +/* + * ksft_cnt is static in kselftest, so isn't shared with children. + * We have to send a test result back to parent and count there. + * results_fd is a pipe with test feedback from children. + */ +static int results_fd[2]; + +const unsigned int ping_delay_nsec = 50 * 1000 * 1000; +const unsigned int ping_timeout = 300; +const unsigned int ping_count = 100; +const unsigned int ping_success = 80; + +static void randomize_buffer(void *buf, size_t buflen) +{ + int *p = (int *)buf; + size_t words = buflen / sizeof(int); + size_t leftover = buflen % sizeof(int); + + if (!buflen) + return; + + while (words--) + *p++ = rand(); + + if (leftover) { + int tmp = rand(); + + memcpy(buf + buflen - leftover, &tmp, leftover); + } + + return; +} + +static int unshare_open(void) +{ + const char *netns_path = "/proc/self/ns/net"; + int fd; + + if (unshare(CLONE_NEWNET) != 0) { + pr_err("unshare()"); + return -1; + } + + fd = open(netns_path, O_RDONLY); + if (fd <= 0) { + pr_err("open(%s)", netns_path); + return -1; + } + + return fd; +} + +static int switch_ns(int fd) +{ + if (setns(fd, CLONE_NEWNET)) { + pr_err("setns()"); + return -1; + } + return 0; +} + +/* + * Running the test inside a new parent net namespace to bother less + * about cleanup on error-path. + */ +static int init_namespaces(void) +{ + nsfd_parent = unshare_open(); + if (nsfd_parent <= 0) + return -1; + + nsfd_childa = unshare_open(); + if (nsfd_childa <= 0) + return -1; + + if (switch_ns(nsfd_parent)) + return -1; + + nsfd_childb = unshare_open(); + if (nsfd_childb <= 0) + return -1; + + if (switch_ns(nsfd_parent)) + return -1; + return 0; +} + +static int netlink_sock(int *sock, uint32_t *seq_nr, int proto) +{ + if (*sock > 0) { + seq_nr++; + return 0; + } + + *sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, proto); + if (*sock <= 0) { + pr_err("socket(AF_NETLINK)"); + return -1; + } + + randomize_buffer(seq_nr, sizeof(*seq_nr)); + + return 0; +} + +static inline struct rtattr *rtattr_hdr(struct nlmsghdr *nh) +{ + return (struct rtattr *)((char *)(nh) + RTA_ALIGN((nh)->nlmsg_len)); +} + +static int rtattr_pack(struct nlmsghdr *nh, size_t req_sz, + unsigned short rta_type, const void *payload, size_t size) +{ + /* NLMSG_ALIGNTO == RTA_ALIGNTO, nlmsg_len already aligned */ + struct rtattr *attr = rtattr_hdr(nh); + size_t nl_size = RTA_ALIGN(nh->nlmsg_len) + RTA_LENGTH(size); + + if (req_sz < nl_size) { + printk("req buf is too small: %zu < %zu", req_sz, nl_size); + return -1; + } + nh->nlmsg_len = nl_size; + + attr->rta_len = RTA_LENGTH(size); + attr->rta_type = rta_type; + memcpy(RTA_DATA(attr), payload, size); + + return 0; +} + +static struct rtattr *_rtattr_begin(struct nlmsghdr *nh, size_t req_sz, + unsigned short rta_type, const void *payload, size_t size) +{ + struct rtattr *ret = rtattr_hdr(nh); + + if (rtattr_pack(nh, req_sz, rta_type, payload, size)) + return 0; + + return ret; +} + +static inline struct rtattr *rtattr_begin(struct nlmsghdr *nh, size_t req_sz, + unsigned short rta_type) +{ + return _rtattr_begin(nh, req_sz, rta_type, 0, 0); +} + +static inline void rtattr_end(struct nlmsghdr *nh, struct rtattr *attr) +{ + char *nlmsg_end = (char *)nh + nh->nlmsg_len; + + attr->rta_len = nlmsg_end - (char *)attr; +} + +static int veth_pack_peerb(struct nlmsghdr *nh, size_t req_sz, + const char *peer, int ns) +{ + struct ifinfomsg pi; + struct rtattr *peer_attr; + + memset(&pi, 0, sizeof(pi)); + pi.ifi_family = AF_UNSPEC; + pi.ifi_change = 0xFFFFFFFF; + + peer_attr = _rtattr_begin(nh, req_sz, VETH_INFO_PEER, &pi, sizeof(pi)); + if (!peer_attr) + return -1; + + if (rtattr_pack(nh, req_sz, IFLA_IFNAME, peer, strlen(peer))) + return -1; + + if (rtattr_pack(nh, req_sz, IFLA_NET_NS_FD, &ns, sizeof(ns))) + return -1; + + rtattr_end(nh, peer_attr); + + return 0; +} + +static int netlink_check_answer(int sock) +{ + struct nlmsgerror { + struct nlmsghdr hdr; + int error; + struct nlmsghdr orig_msg; + } answer; + + if (recv(sock, &answer, sizeof(answer), 0) < 0) { + pr_err("recv()"); + return -1; + } else if (answer.hdr.nlmsg_type != NLMSG_ERROR) { + printk("expected NLMSG_ERROR, got %d", (int)answer.hdr.nlmsg_type); + return -1; + } else if (answer.error) { + printk("NLMSG_ERROR: %d: %s", + answer.error, strerror(-answer.error)); + return answer.error; + } + + return 0; +} + +static int veth_add(int sock, uint32_t seq, const char *peera, int ns_a, + const char *peerb, int ns_b) +{ + uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE; + struct { + struct nlmsghdr nh; + struct ifinfomsg info; + char attrbuf[MAX_PAYLOAD]; + } req; + const char veth_type[] = "veth"; + struct rtattr *link_info, *info_data; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = RTM_NEWLINK; + req.nh.nlmsg_flags = flags; + req.nh.nlmsg_seq = seq; + req.info.ifi_family = AF_UNSPEC; + req.info.ifi_change = 0xFFFFFFFF; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_IFNAME, peera, strlen(peera))) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_NET_NS_FD, &ns_a, sizeof(ns_a))) + return -1; + + link_info = rtattr_begin(&req.nh, sizeof(req), IFLA_LINKINFO); + if (!link_info) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_INFO_KIND, veth_type, sizeof(veth_type))) + return -1; + + info_data = rtattr_begin(&req.nh, sizeof(req), IFLA_INFO_DATA); + if (!info_data) + return -1; + + if (veth_pack_peerb(&req.nh, sizeof(req), peerb, ns_b)) + return -1; + + rtattr_end(&req.nh, info_data); + rtattr_end(&req.nh, link_info); + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + return netlink_check_answer(sock); +} + +static int ip4_addr_set(int sock, uint32_t seq, const char *intf, + struct in_addr addr, uint8_t prefix) +{ + uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE; + struct { + struct nlmsghdr nh; + struct ifaddrmsg info; + char attrbuf[MAX_PAYLOAD]; + } req; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = RTM_NEWADDR; + req.nh.nlmsg_flags = flags; + req.nh.nlmsg_seq = seq; + req.info.ifa_family = AF_INET; + req.info.ifa_prefixlen = prefix; + req.info.ifa_index = if_nametoindex(intf); + +#ifdef DEBUG + { + char addr_str[IPV4_STR_SZ] = {}; + + strncpy(addr_str, inet_ntoa(addr), IPV4_STR_SZ - 1); + + printk("ip addr set %s", addr_str); + } +#endif + + if (rtattr_pack(&req.nh, sizeof(req), IFA_LOCAL, &addr, sizeof(addr))) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), IFA_ADDRESS, &addr, sizeof(addr))) + return -1; + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + return netlink_check_answer(sock); +} + +static int link_set_up(int sock, uint32_t seq, const char *intf) +{ + struct { + struct nlmsghdr nh; + struct ifinfomsg info; + char attrbuf[MAX_PAYLOAD]; + } req; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = RTM_NEWLINK; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = seq; + req.info.ifi_family = AF_UNSPEC; + req.info.ifi_change = 0xFFFFFFFF; + req.info.ifi_index = if_nametoindex(intf); + req.info.ifi_flags = IFF_UP; + req.info.ifi_change = IFF_UP; + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + return netlink_check_answer(sock); +} + +static int ip4_route_set(int sock, uint32_t seq, const char *intf, + struct in_addr src, struct in_addr dst) +{ + struct { + struct nlmsghdr nh; + struct rtmsg rt; + char attrbuf[MAX_PAYLOAD]; + } req; + unsigned int index = if_nametoindex(intf); + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.rt)); + req.nh.nlmsg_type = RTM_NEWROUTE; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE; + req.nh.nlmsg_seq = seq; + req.rt.rtm_family = AF_INET; + req.rt.rtm_dst_len = 32; + req.rt.rtm_table = RT_TABLE_MAIN; + req.rt.rtm_protocol = RTPROT_BOOT; + req.rt.rtm_scope = RT_SCOPE_LINK; + req.rt.rtm_type = RTN_UNICAST; + + if (rtattr_pack(&req.nh, sizeof(req), RTA_DST, &dst, sizeof(dst))) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), RTA_PREFSRC, &src, sizeof(src))) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), RTA_OIF, &index, sizeof(index))) + return -1; + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + + return netlink_check_answer(sock); +} + +static int tunnel_set_route(int route_sock, uint32_t *route_seq, char *veth, + struct in_addr tunsrc, struct in_addr tundst) +{ + if (ip4_addr_set(route_sock, (*route_seq)++, "lo", + tunsrc, PREFIX_LEN)) { + printk("Failed to set ipv4 addr"); + return -1; + } + + if (ip4_route_set(route_sock, (*route_seq)++, veth, tunsrc, tundst)) { + printk("Failed to set ipv4 route"); + return -1; + } + + return 0; +} + +static int init_child(int nsfd, char *veth, unsigned int src, unsigned int dst) +{ + struct in_addr intsrc = inet_makeaddr(INADDR_B, src); + struct in_addr tunsrc = inet_makeaddr(INADDR_A, src); + struct in_addr tundst = inet_makeaddr(INADDR_A, dst); + int route_sock = -1, ret = -1; + uint32_t route_seq; + + if (switch_ns(nsfd)) + return -1; + + if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE)) { + printk("Failed to open netlink route socket in child"); + return -1; + } + + if (ip4_addr_set(route_sock, route_seq++, veth, intsrc, PREFIX_LEN)) { + printk("Failed to set ipv4 addr"); + goto err; + } + + if (link_set_up(route_sock, route_seq++, veth)) { + printk("Failed to bring up %s", veth); + goto err; + } + + if (tunnel_set_route(route_sock, &route_seq, veth, tunsrc, tundst)) { + printk("Failed to add tunnel route on %s", veth); + goto err; + } + ret = 0; + +err: + close(route_sock); + return ret; +} + +#define ALGO_LEN 64 +enum desc_type { + CREATE_TUNNEL = 0, + ALLOCATE_SPI, + MONITOR_ACQUIRE, + EXPIRE_STATE, + EXPIRE_POLICY, +}; +const char *desc_name[] = { + "create tunnel", + "alloc spi", + "monitor acquire", + "expire state", + "expire policy" +}; +struct xfrm_desc { + enum desc_type type; + uint8_t proto; + char a_algo[ALGO_LEN]; + char e_algo[ALGO_LEN]; + char c_algo[ALGO_LEN]; + char ae_algo[ALGO_LEN]; + unsigned int icv_len; + /* unsigned key_len; */ +}; + +enum msg_type { + MSG_ACK = 0, + MSG_EXIT, + MSG_PING, + MSG_XFRM_PREPARE, + MSG_XFRM_ADD, + MSG_XFRM_DEL, + MSG_XFRM_CLEANUP, +}; + +struct test_desc { + enum msg_type type; + union { + struct { + in_addr_t reply_ip; + unsigned int port; + } ping; + struct xfrm_desc xfrm_desc; + } body; +}; + +struct test_result { + struct xfrm_desc desc; + unsigned int res; +}; + +static void write_test_result(unsigned int res, struct xfrm_desc *d) +{ + struct test_result tr = {}; + ssize_t ret; + + tr.desc = *d; + tr.res = res; + + ret = write(results_fd[1], &tr, sizeof(tr)); + if (ret != sizeof(tr)) + pr_err("Failed to write the result in pipe %zd", ret); +} + +static void write_msg(int fd, struct test_desc *msg, bool exit_of_fail) +{ + ssize_t bytes = write(fd, msg, sizeof(*msg)); + + /* Make sure that write/read is atomic to a pipe */ + BUILD_BUG_ON(sizeof(struct test_desc) > PIPE_BUF); + + if (bytes < 0) { + pr_err("write()"); + if (exit_of_fail) + exit(KSFT_FAIL); + } + if (bytes != sizeof(*msg)) { + pr_err("sent part of the message %zd/%zu", bytes, sizeof(*msg)); + if (exit_of_fail) + exit(KSFT_FAIL); + } +} + +static void read_msg(int fd, struct test_desc *msg, bool exit_of_fail) +{ + ssize_t bytes = read(fd, msg, sizeof(*msg)); + + if (bytes < 0) { + pr_err("read()"); + if (exit_of_fail) + exit(KSFT_FAIL); + } + if (bytes != sizeof(*msg)) { + pr_err("got incomplete message %zd/%zu", bytes, sizeof(*msg)); + if (exit_of_fail) + exit(KSFT_FAIL); + } +} + +static int udp_ping_init(struct in_addr listen_ip, unsigned int u_timeout, + unsigned int *server_port, int sock[2]) +{ + struct sockaddr_in server; + struct timeval t = { .tv_sec = 0, .tv_usec = u_timeout }; + socklen_t s_len = sizeof(server); + + sock[0] = socket(AF_INET, SOCK_DGRAM, 0); + if (sock[0] < 0) { + pr_err("socket()"); + return -1; + } + + server.sin_family = AF_INET; + server.sin_port = 0; + memcpy(&server.sin_addr.s_addr, &listen_ip, sizeof(struct in_addr)); + + if (bind(sock[0], (struct sockaddr *)&server, s_len)) { + pr_err("bind()"); + goto err_close_server; + } + + if (getsockname(sock[0], (struct sockaddr *)&server, &s_len)) { + pr_err("getsockname()"); + goto err_close_server; + } + + *server_port = ntohs(server.sin_port); + + if (setsockopt(sock[0], SOL_SOCKET, SO_RCVTIMEO, (const char *)&t, sizeof t)) { + pr_err("setsockopt()"); + goto err_close_server; + } + + sock[1] = socket(AF_INET, SOCK_DGRAM, 0); + if (sock[1] < 0) { + pr_err("socket()"); + goto err_close_server; + } + + return 0; + +err_close_server: + close(sock[0]); + return -1; +} + +static int udp_ping_send(int sock[2], in_addr_t dest_ip, unsigned int port, + char *buf, size_t buf_len) +{ + struct sockaddr_in server; + const struct sockaddr *dest_addr = (struct sockaddr *)&server; + char *sock_buf[buf_len]; + ssize_t r_bytes, s_bytes; + + server.sin_family = AF_INET; + server.sin_port = htons(port); + server.sin_addr.s_addr = dest_ip; + + s_bytes = sendto(sock[1], buf, buf_len, 0, dest_addr, sizeof(server)); + if (s_bytes < 0) { + pr_err("sendto()"); + return -1; + } else if (s_bytes != buf_len) { + printk("send part of the message: %zd/%zu", s_bytes, sizeof(server)); + return -1; + } + + r_bytes = recv(sock[0], sock_buf, buf_len, 0); + if (r_bytes < 0) { + if (errno != EAGAIN) + pr_err("recv()"); + return -1; + } else if (r_bytes == 0) { /* EOF */ + printk("EOF on reply to ping"); + return -1; + } else if (r_bytes != buf_len || memcmp(buf, sock_buf, buf_len)) { + printk("ping reply packet is corrupted %zd/%zu", r_bytes, buf_len); + return -1; + } + + return 0; +} + +static int udp_ping_reply(int sock[2], in_addr_t dest_ip, unsigned int port, + char *buf, size_t buf_len) +{ + struct sockaddr_in server; + const struct sockaddr *dest_addr = (struct sockaddr *)&server; + char *sock_buf[buf_len]; + ssize_t r_bytes, s_bytes; + + server.sin_family = AF_INET; + server.sin_port = htons(port); + server.sin_addr.s_addr = dest_ip; + + r_bytes = recv(sock[0], sock_buf, buf_len, 0); + if (r_bytes < 0) { + if (errno != EAGAIN) + pr_err("recv()"); + return -1; + } + if (r_bytes == 0) { /* EOF */ + printk("EOF on reply to ping"); + return -1; + } + if (r_bytes != buf_len || memcmp(buf, sock_buf, buf_len)) { + printk("ping reply packet is corrupted %zd/%zu", r_bytes, buf_len); + return -1; + } + + s_bytes = sendto(sock[1], buf, buf_len, 0, dest_addr, sizeof(server)); + if (s_bytes < 0) { + pr_err("sendto()"); + return -1; + } else if (s_bytes != buf_len) { + printk("send part of the message: %zd/%zu", s_bytes, sizeof(server)); + return -1; + } + + return 0; +} + +typedef int (*ping_f)(int sock[2], in_addr_t dest_ip, unsigned int port, + char *buf, size_t buf_len); +static int do_ping(int cmd_fd, char *buf, size_t buf_len, struct in_addr from, + bool init_side, int d_port, in_addr_t to, ping_f func) +{ + struct test_desc msg; + unsigned int s_port, i, ping_succeeded = 0; + int ping_sock[2]; + char to_str[IPV4_STR_SZ] = {}, from_str[IPV4_STR_SZ] = {}; + + if (udp_ping_init(from, ping_timeout, &s_port, ping_sock)) { + printk("Failed to init ping"); + return -1; + } + + memset(&msg, 0, sizeof(msg)); + msg.type = MSG_PING; + msg.body.ping.port = s_port; + memcpy(&msg.body.ping.reply_ip, &from, sizeof(from)); + + write_msg(cmd_fd, &msg, 0); + if (init_side) { + /* The other end sends ip to ping */ + read_msg(cmd_fd, &msg, 0); + if (msg.type != MSG_PING) + return -1; + to = msg.body.ping.reply_ip; + d_port = msg.body.ping.port; + } + + for (i = 0; i < ping_count ; i++) { + struct timespec sleep_time = { + .tv_sec = 0, + .tv_nsec = ping_delay_nsec, + }; + + ping_succeeded += !func(ping_sock, to, d_port, buf, page_size); + nanosleep(&sleep_time, 0); + } + + close(ping_sock[0]); + close(ping_sock[1]); + + strncpy(to_str, inet_ntoa(*(struct in_addr *)&to), IPV4_STR_SZ - 1); + strncpy(from_str, inet_ntoa(from), IPV4_STR_SZ - 1); + + if (ping_succeeded < ping_success) { + printk("ping (%s) %s->%s failed %u/%u times", + init_side ? "send" : "reply", from_str, to_str, + ping_count - ping_succeeded, ping_count); + return -1; + } + +#ifdef DEBUG + printk("ping (%s) %s->%s succeeded %u/%u times", + init_side ? "send" : "reply", from_str, to_str, + ping_succeeded, ping_count); +#endif + + return 0; +} + +static int xfrm_fill_key(char *name, char *buf, + size_t buf_len, unsigned int *key_len) +{ + /* TODO: use set/map instead */ + if (strncmp(name, "digest_null", ALGO_LEN) == 0) + *key_len = 0; + else if (strncmp(name, "ecb(cipher_null)", ALGO_LEN) == 0) + *key_len = 0; + else if (strncmp(name, "cbc(des)", ALGO_LEN) == 0) + *key_len = 64; + else if (strncmp(name, "hmac(md5)", ALGO_LEN) == 0) + *key_len = 128; + else if (strncmp(name, "cmac(aes)", ALGO_LEN) == 0) + *key_len = 128; + else if (strncmp(name, "xcbc(aes)", ALGO_LEN) == 0) + *key_len = 128; + else if (strncmp(name, "cbc(cast5)", ALGO_LEN) == 0) + *key_len = 128; + else if (strncmp(name, "cbc(serpent)", ALGO_LEN) == 0) + *key_len = 128; + else if (strncmp(name, "hmac(sha1)", ALGO_LEN) == 0) + *key_len = 160; + else if (strncmp(name, "hmac(rmd160)", ALGO_LEN) == 0) + *key_len = 160; + else if (strncmp(name, "cbc(des3_ede)", ALGO_LEN) == 0) + *key_len = 192; + else if (strncmp(name, "hmac(sha256)", ALGO_LEN) == 0) + *key_len = 256; + else if (strncmp(name, "cbc(aes)", ALGO_LEN) == 0) + *key_len = 256; + else if (strncmp(name, "cbc(camellia)", ALGO_LEN) == 0) + *key_len = 256; + else if (strncmp(name, "cbc(twofish)", ALGO_LEN) == 0) + *key_len = 256; + else if (strncmp(name, "rfc3686(ctr(aes))", ALGO_LEN) == 0) + *key_len = 288; + else if (strncmp(name, "hmac(sha384)", ALGO_LEN) == 0) + *key_len = 384; + else if (strncmp(name, "cbc(blowfish)", ALGO_LEN) == 0) + *key_len = 448; + else if (strncmp(name, "hmac(sha512)", ALGO_LEN) == 0) + *key_len = 512; + else if (strncmp(name, "rfc4106(gcm(aes))-128", ALGO_LEN) == 0) + *key_len = 160; + else if (strncmp(name, "rfc4543(gcm(aes))-128", ALGO_LEN) == 0) + *key_len = 160; + else if (strncmp(name, "rfc4309(ccm(aes))-128", ALGO_LEN) == 0) + *key_len = 152; + else if (strncmp(name, "rfc4106(gcm(aes))-192", ALGO_LEN) == 0) + *key_len = 224; + else if (strncmp(name, "rfc4543(gcm(aes))-192", ALGO_LEN) == 0) + *key_len = 224; + else if (strncmp(name, "rfc4309(ccm(aes))-192", ALGO_LEN) == 0) + *key_len = 216; + else if (strncmp(name, "rfc4106(gcm(aes))-256", ALGO_LEN) == 0) + *key_len = 288; + else if (strncmp(name, "rfc4543(gcm(aes))-256", ALGO_LEN) == 0) + *key_len = 288; + else if (strncmp(name, "rfc4309(ccm(aes))-256", ALGO_LEN) == 0) + *key_len = 280; + else if (strncmp(name, "rfc7539(chacha20,poly1305)-128", ALGO_LEN) == 0) + *key_len = 0; + + if (*key_len > buf_len) { + printk("Can't pack a key - too big for buffer"); + return -1; + } + + randomize_buffer(buf, *key_len); + + return 0; +} + +static int xfrm_state_pack_algo(struct nlmsghdr *nh, size_t req_sz, + struct xfrm_desc *desc) +{ + struct { + union { + struct xfrm_algo alg; + struct xfrm_algo_aead aead; + struct xfrm_algo_auth auth; + } u; + char buf[XFRM_ALGO_KEY_BUF_SIZE]; + } alg = {}; + size_t alen, elen, clen, aelen; + unsigned short type; + + alen = strlen(desc->a_algo); + elen = strlen(desc->e_algo); + clen = strlen(desc->c_algo); + aelen = strlen(desc->ae_algo); + + /* Verify desc */ + switch (desc->proto) { + case IPPROTO_AH: + if (!alen || elen || clen || aelen) { + printk("BUG: buggy ah desc"); + return -1; + } + strncpy(alg.u.alg.alg_name, desc->a_algo, ALGO_LEN - 1); + if (xfrm_fill_key(desc->a_algo, alg.u.alg.alg_key, + sizeof(alg.buf), &alg.u.alg.alg_key_len)) + return -1; + type = XFRMA_ALG_AUTH; + break; + case IPPROTO_COMP: + if (!clen || elen || alen || aelen) { + printk("BUG: buggy comp desc"); + return -1; + } + strncpy(alg.u.alg.alg_name, desc->c_algo, ALGO_LEN - 1); + if (xfrm_fill_key(desc->c_algo, alg.u.alg.alg_key, + sizeof(alg.buf), &alg.u.alg.alg_key_len)) + return -1; + type = XFRMA_ALG_COMP; + break; + case IPPROTO_ESP: + if (!((alen && elen) ^ aelen) || clen) { + printk("BUG: buggy esp desc"); + return -1; + } + if (aelen) { + alg.u.aead.alg_icv_len = desc->icv_len; + strncpy(alg.u.aead.alg_name, desc->ae_algo, ALGO_LEN - 1); + if (xfrm_fill_key(desc->ae_algo, alg.u.aead.alg_key, + sizeof(alg.buf), &alg.u.aead.alg_key_len)) + return -1; + type = XFRMA_ALG_AEAD; + } else { + + strncpy(alg.u.alg.alg_name, desc->e_algo, ALGO_LEN - 1); + type = XFRMA_ALG_CRYPT; + if (xfrm_fill_key(desc->e_algo, alg.u.alg.alg_key, + sizeof(alg.buf), &alg.u.alg.alg_key_len)) + return -1; + if (rtattr_pack(nh, req_sz, type, &alg, sizeof(alg))) + return -1; + + strncpy(alg.u.alg.alg_name, desc->a_algo, ALGO_LEN); + type = XFRMA_ALG_AUTH; + if (xfrm_fill_key(desc->a_algo, alg.u.alg.alg_key, + sizeof(alg.buf), &alg.u.alg.alg_key_len)) + return -1; + } + break; + default: + printk("BUG: unknown proto in desc"); + return -1; + } + + if (rtattr_pack(nh, req_sz, type, &alg, sizeof(alg))) + return -1; + + return 0; +} + +static inline uint32_t gen_spi(struct in_addr src) +{ + return htonl(inet_lnaof(src)); +} + +static int xfrm_state_add(int xfrm_sock, uint32_t seq, uint32_t spi, + struct in_addr src, struct in_addr dst, + struct xfrm_desc *desc) +{ + struct { + struct nlmsghdr nh; + struct xfrm_usersa_info info; + char attrbuf[MAX_PAYLOAD]; + } req; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = XFRM_MSG_NEWSA; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = seq; + + /* Fill selector. */ + memcpy(&req.info.sel.daddr, &dst, sizeof(dst)); + memcpy(&req.info.sel.saddr, &src, sizeof(src)); + req.info.sel.family = AF_INET; + req.info.sel.prefixlen_d = PREFIX_LEN; + req.info.sel.prefixlen_s = PREFIX_LEN; + + /* Fill id */ + memcpy(&req.info.id.daddr, &dst, sizeof(dst)); + /* Note: zero-spi cannot be deleted */ + req.info.id.spi = spi; + req.info.id.proto = desc->proto; + + memcpy(&req.info.saddr, &src, sizeof(src)); + + /* Fill lifteme_cfg */ + req.info.lft.soft_byte_limit = XFRM_INF; + req.info.lft.hard_byte_limit = XFRM_INF; + req.info.lft.soft_packet_limit = XFRM_INF; + req.info.lft.hard_packet_limit = XFRM_INF; + + req.info.family = AF_INET; + req.info.mode = XFRM_MODE_TUNNEL; + + if (xfrm_state_pack_algo(&req.nh, sizeof(req), desc)) + return -1; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + + return netlink_check_answer(xfrm_sock); +} + +static bool xfrm_usersa_found(struct xfrm_usersa_info *info, uint32_t spi, + struct in_addr src, struct in_addr dst, + struct xfrm_desc *desc) +{ + if (memcmp(&info->sel.daddr, &dst, sizeof(dst))) + return false; + + if (memcmp(&info->sel.saddr, &src, sizeof(src))) + return false; + + if (info->sel.family != AF_INET || + info->sel.prefixlen_d != PREFIX_LEN || + info->sel.prefixlen_s != PREFIX_LEN) + return false; + + if (info->id.spi != spi || info->id.proto != desc->proto) + return false; + + if (memcmp(&info->id.daddr, &dst, sizeof(dst))) + return false; + + if (memcmp(&info->saddr, &src, sizeof(src))) + return false; + + if (info->lft.soft_byte_limit != XFRM_INF || + info->lft.hard_byte_limit != XFRM_INF || + info->lft.soft_packet_limit != XFRM_INF || + info->lft.hard_packet_limit != XFRM_INF) + return false; + + if (info->family != AF_INET || info->mode != XFRM_MODE_TUNNEL) + return false; + + /* XXX: check xfrm algo, see xfrm_state_pack_algo(). */ + + return true; +} + +static int xfrm_state_check(int xfrm_sock, uint32_t seq, uint32_t spi, + struct in_addr src, struct in_addr dst, + struct xfrm_desc *desc) +{ + struct { + struct nlmsghdr nh; + char attrbuf[MAX_PAYLOAD]; + } req; + struct { + struct nlmsghdr nh; + union { + struct xfrm_usersa_info info; + int error; + }; + char attrbuf[MAX_PAYLOAD]; + } answer; + struct xfrm_address_filter filter = {}; + bool found = false; + + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(0); + req.nh.nlmsg_type = XFRM_MSG_GETSA; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.nh.nlmsg_seq = seq; + + /* + * Add dump filter by source address as there may be other tunnels + * in this netns (if tests run in parallel). + */ + filter.family = AF_INET; + filter.splen = 0x1f; /* 0xffffffff mask see addr_match() */ + memcpy(&filter.saddr, &src, sizeof(src)); + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_ADDRESS_FILTER, + &filter, sizeof(filter))) + return -1; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + + while (1) { + if (recv(xfrm_sock, &answer, sizeof(answer), 0) < 0) { + pr_err("recv()"); + return -1; + } + if (answer.nh.nlmsg_type == NLMSG_ERROR) { + printk("NLMSG_ERROR: %d: %s", + answer.error, strerror(-answer.error)); + return -1; + } else if (answer.nh.nlmsg_type == NLMSG_DONE) { + if (found) + return 0; + printk("didn't find allocated xfrm state in dump"); + return -1; + } else if (answer.nh.nlmsg_type == XFRM_MSG_NEWSA) { + if (xfrm_usersa_found(&answer.info, spi, src, dst, desc)) + found = true; + } + } +} + +static int xfrm_set(int xfrm_sock, uint32_t *seq, + struct in_addr src, struct in_addr dst, + struct in_addr tunsrc, struct in_addr tundst, + struct xfrm_desc *desc) +{ + int err; + + err = xfrm_state_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst, desc); + if (err) { + printk("Failed to add xfrm state"); + return -1; + } + + err = xfrm_state_add(xfrm_sock, (*seq)++, gen_spi(src), dst, src, desc); + if (err) { + printk("Failed to add xfrm state"); + return -1; + } + + /* Check dumps for XFRM_MSG_GETSA */ + err = xfrm_state_check(xfrm_sock, (*seq)++, gen_spi(src), src, dst, desc); + err |= xfrm_state_check(xfrm_sock, (*seq)++, gen_spi(src), dst, src, desc); + if (err) { + printk("Failed to check xfrm state"); + return -1; + } + + return 0; +} + +static int xfrm_policy_add(int xfrm_sock, uint32_t seq, uint32_t spi, + struct in_addr src, struct in_addr dst, uint8_t dir, + struct in_addr tunsrc, struct in_addr tundst, uint8_t proto) +{ + struct { + struct nlmsghdr nh; + struct xfrm_userpolicy_info info; + char attrbuf[MAX_PAYLOAD]; + } req; + struct xfrm_user_tmpl tmpl; + + memset(&req, 0, sizeof(req)); + memset(&tmpl, 0, sizeof(tmpl)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = XFRM_MSG_NEWPOLICY; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = seq; + + /* Fill selector. */ + memcpy(&req.info.sel.daddr, &dst, sizeof(tundst)); + memcpy(&req.info.sel.saddr, &src, sizeof(tunsrc)); + req.info.sel.family = AF_INET; + req.info.sel.prefixlen_d = PREFIX_LEN; + req.info.sel.prefixlen_s = PREFIX_LEN; + + /* Fill lifteme_cfg */ + req.info.lft.soft_byte_limit = XFRM_INF; + req.info.lft.hard_byte_limit = XFRM_INF; + req.info.lft.soft_packet_limit = XFRM_INF; + req.info.lft.hard_packet_limit = XFRM_INF; + + req.info.dir = dir; + + /* Fill tmpl */ + memcpy(&tmpl.id.daddr, &dst, sizeof(dst)); + /* Note: zero-spi cannot be deleted */ + tmpl.id.spi = spi; + tmpl.id.proto = proto; + tmpl.family = AF_INET; + memcpy(&tmpl.saddr, &src, sizeof(src)); + tmpl.mode = XFRM_MODE_TUNNEL; + tmpl.aalgos = (~(uint32_t)0); + tmpl.ealgos = (~(uint32_t)0); + tmpl.calgos = (~(uint32_t)0); + + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_TMPL, &tmpl, sizeof(tmpl))) + return -1; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + + return netlink_check_answer(xfrm_sock); +} + +static int xfrm_prepare(int xfrm_sock, uint32_t *seq, + struct in_addr src, struct in_addr dst, + struct in_addr tunsrc, struct in_addr tundst, uint8_t proto) +{ + if (xfrm_policy_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst, + XFRM_POLICY_OUT, tunsrc, tundst, proto)) { + printk("Failed to add xfrm policy"); + return -1; + } + + if (xfrm_policy_add(xfrm_sock, (*seq)++, gen_spi(src), dst, src, + XFRM_POLICY_IN, tunsrc, tundst, proto)) { + printk("Failed to add xfrm policy"); + return -1; + } + + return 0; +} + +static int xfrm_policy_del(int xfrm_sock, uint32_t seq, + struct in_addr src, struct in_addr dst, uint8_t dir, + struct in_addr tunsrc, struct in_addr tundst) +{ + struct { + struct nlmsghdr nh; + struct xfrm_userpolicy_id id; + char attrbuf[MAX_PAYLOAD]; + } req; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.id)); + req.nh.nlmsg_type = XFRM_MSG_DELPOLICY; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = seq; + + /* Fill id */ + memcpy(&req.id.sel.daddr, &dst, sizeof(tundst)); + memcpy(&req.id.sel.saddr, &src, sizeof(tunsrc)); + req.id.sel.family = AF_INET; + req.id.sel.prefixlen_d = PREFIX_LEN; + req.id.sel.prefixlen_s = PREFIX_LEN; + req.id.dir = dir; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + + return netlink_check_answer(xfrm_sock); +} + +static int xfrm_cleanup(int xfrm_sock, uint32_t *seq, + struct in_addr src, struct in_addr dst, + struct in_addr tunsrc, struct in_addr tundst) +{ + if (xfrm_policy_del(xfrm_sock, (*seq)++, src, dst, + XFRM_POLICY_OUT, tunsrc, tundst)) { + printk("Failed to add xfrm policy"); + return -1; + } + + if (xfrm_policy_del(xfrm_sock, (*seq)++, dst, src, + XFRM_POLICY_IN, tunsrc, tundst)) { + printk("Failed to add xfrm policy"); + return -1; + } + + return 0; +} + +static int xfrm_state_del(int xfrm_sock, uint32_t seq, uint32_t spi, + struct in_addr src, struct in_addr dst, uint8_t proto) +{ + struct { + struct nlmsghdr nh; + struct xfrm_usersa_id id; + char attrbuf[MAX_PAYLOAD]; + } req; + xfrm_address_t saddr = {}; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.id)); + req.nh.nlmsg_type = XFRM_MSG_DELSA; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = seq; + + memcpy(&req.id.daddr, &dst, sizeof(dst)); + req.id.family = AF_INET; + req.id.proto = proto; + /* Note: zero-spi cannot be deleted */ + req.id.spi = spi; + + memcpy(&saddr, &src, sizeof(src)); + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SRCADDR, &saddr, sizeof(saddr))) + return -1; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + + return netlink_check_answer(xfrm_sock); +} + +static int xfrm_delete(int xfrm_sock, uint32_t *seq, + struct in_addr src, struct in_addr dst, + struct in_addr tunsrc, struct in_addr tundst, uint8_t proto) +{ + if (xfrm_state_del(xfrm_sock, (*seq)++, gen_spi(src), src, dst, proto)) { + printk("Failed to remove xfrm state"); + return -1; + } + + if (xfrm_state_del(xfrm_sock, (*seq)++, gen_spi(src), dst, src, proto)) { + printk("Failed to remove xfrm state"); + return -1; + } + + return 0; +} + +static int xfrm_state_allocspi(int xfrm_sock, uint32_t *seq, + uint32_t spi, uint8_t proto) +{ + struct { + struct nlmsghdr nh; + struct xfrm_userspi_info spi; + } req; + struct { + struct nlmsghdr nh; + union { + struct xfrm_usersa_info info; + int error; + }; + } answer; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.spi)); + req.nh.nlmsg_type = XFRM_MSG_ALLOCSPI; + req.nh.nlmsg_flags = NLM_F_REQUEST; + req.nh.nlmsg_seq = (*seq)++; + + req.spi.info.family = AF_INET; + req.spi.min = spi; + req.spi.max = spi; + req.spi.info.id.proto = proto; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return KSFT_FAIL; + } + + if (recv(xfrm_sock, &answer, sizeof(answer), 0) < 0) { + pr_err("recv()"); + return KSFT_FAIL; + } else if (answer.nh.nlmsg_type == XFRM_MSG_NEWSA) { + uint32_t new_spi = htonl(answer.info.id.spi); + + if (new_spi != spi) { + printk("allocated spi is different from requested: %#x != %#x", + new_spi, spi); + return KSFT_FAIL; + } + return KSFT_PASS; + } else if (answer.nh.nlmsg_type != NLMSG_ERROR) { + printk("expected NLMSG_ERROR, got %d", (int)answer.nh.nlmsg_type); + return KSFT_FAIL; + } + + printk("NLMSG_ERROR: %d: %s", answer.error, strerror(-answer.error)); + return (answer.error) ? KSFT_FAIL : KSFT_PASS; +} + +static int netlink_sock_bind(int *sock, uint32_t *seq, int proto, uint32_t groups) +{ + struct sockaddr_nl snl = {}; + socklen_t addr_len; + int ret = -1; + + snl.nl_family = AF_NETLINK; + snl.nl_groups = groups; + + if (netlink_sock(sock, seq, proto)) { + printk("Failed to open xfrm netlink socket"); + return -1; + } + + if (bind(*sock, (struct sockaddr *)&snl, sizeof(snl)) < 0) { + pr_err("bind()"); + goto out_close; + } + + addr_len = sizeof(snl); + if (getsockname(*sock, (struct sockaddr *)&snl, &addr_len) < 0) { + pr_err("getsockname()"); + goto out_close; + } + if (addr_len != sizeof(snl)) { + printk("Wrong address length %d", addr_len); + goto out_close; + } + if (snl.nl_family != AF_NETLINK) { + printk("Wrong address family %d", snl.nl_family); + goto out_close; + } + return 0; + +out_close: + close(*sock); + return ret; +} + +static int xfrm_monitor_acquire(int xfrm_sock, uint32_t *seq, unsigned int nr) +{ + struct { + struct nlmsghdr nh; + union { + struct xfrm_user_acquire acq; + int error; + }; + char attrbuf[MAX_PAYLOAD]; + } req; + struct xfrm_user_tmpl xfrm_tmpl = {}; + int xfrm_listen = -1, ret = KSFT_FAIL; + uint32_t seq_listen; + + if (netlink_sock_bind(&xfrm_listen, &seq_listen, NETLINK_XFRM, XFRMNLGRP_ACQUIRE)) + return KSFT_FAIL; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.acq)); + req.nh.nlmsg_type = XFRM_MSG_ACQUIRE; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = (*seq)++; + + req.acq.policy.sel.family = AF_INET; + req.acq.aalgos = 0xfeed; + req.acq.ealgos = 0xbaad; + req.acq.calgos = 0xbabe; + + xfrm_tmpl.family = AF_INET; + xfrm_tmpl.id.proto = IPPROTO_ESP; + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_TMPL, &xfrm_tmpl, sizeof(xfrm_tmpl))) + goto out_close; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + goto out_close; + } + + if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + goto out_close; + } else if (req.nh.nlmsg_type != NLMSG_ERROR) { + printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type); + goto out_close; + } + + if (req.error) { + printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error)); + ret = req.error; + goto out_close; + } + + if (recv(xfrm_listen, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + goto out_close; + } + + if (req.acq.aalgos != 0xfeed || req.acq.ealgos != 0xbaad + || req.acq.calgos != 0xbabe) { + printk("xfrm_user_acquire has changed %x %x %x", + req.acq.aalgos, req.acq.ealgos, req.acq.calgos); + goto out_close; + } + + ret = KSFT_PASS; +out_close: + close(xfrm_listen); + return ret; +} + +static int xfrm_expire_state(int xfrm_sock, uint32_t *seq, + unsigned int nr, struct xfrm_desc *desc) +{ + struct { + struct nlmsghdr nh; + union { + struct xfrm_user_expire expire; + int error; + }; + } req; + struct in_addr src, dst; + int xfrm_listen = -1, ret = KSFT_FAIL; + uint32_t seq_listen; + + src = inet_makeaddr(INADDR_B, child_ip(nr)); + dst = inet_makeaddr(INADDR_B, grchild_ip(nr)); + + if (xfrm_state_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst, desc)) { + printk("Failed to add xfrm state"); + return KSFT_FAIL; + } + + if (netlink_sock_bind(&xfrm_listen, &seq_listen, NETLINK_XFRM, XFRMNLGRP_EXPIRE)) + return KSFT_FAIL; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.expire)); + req.nh.nlmsg_type = XFRM_MSG_EXPIRE; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = (*seq)++; + + memcpy(&req.expire.state.id.daddr, &dst, sizeof(dst)); + req.expire.state.id.spi = gen_spi(src); + req.expire.state.id.proto = desc->proto; + req.expire.state.family = AF_INET; + req.expire.hard = 0xff; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + goto out_close; + } + + if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + goto out_close; + } else if (req.nh.nlmsg_type != NLMSG_ERROR) { + printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type); + goto out_close; + } + + if (req.error) { + printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error)); + ret = req.error; + goto out_close; + } + + if (recv(xfrm_listen, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + goto out_close; + } + + if (req.expire.hard != 0x1) { + printk("expire.hard is not set: %x", req.expire.hard); + goto out_close; + } + + ret = KSFT_PASS; +out_close: + close(xfrm_listen); + return ret; +} + +static int xfrm_expire_policy(int xfrm_sock, uint32_t *seq, + unsigned int nr, struct xfrm_desc *desc) +{ + struct { + struct nlmsghdr nh; + union { + struct xfrm_user_polexpire expire; + int error; + }; + } req; + struct in_addr src, dst, tunsrc, tundst; + int xfrm_listen = -1, ret = KSFT_FAIL; + uint32_t seq_listen; + + src = inet_makeaddr(INADDR_B, child_ip(nr)); + dst = inet_makeaddr(INADDR_B, grchild_ip(nr)); + tunsrc = inet_makeaddr(INADDR_A, child_ip(nr)); + tundst = inet_makeaddr(INADDR_A, grchild_ip(nr)); + + if (xfrm_policy_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst, + XFRM_POLICY_OUT, tunsrc, tundst, desc->proto)) { + printk("Failed to add xfrm policy"); + return KSFT_FAIL; + } + + if (netlink_sock_bind(&xfrm_listen, &seq_listen, NETLINK_XFRM, XFRMNLGRP_EXPIRE)) + return KSFT_FAIL; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.expire)); + req.nh.nlmsg_type = XFRM_MSG_POLEXPIRE; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = (*seq)++; + + /* Fill selector. */ + memcpy(&req.expire.pol.sel.daddr, &dst, sizeof(tundst)); + memcpy(&req.expire.pol.sel.saddr, &src, sizeof(tunsrc)); + req.expire.pol.sel.family = AF_INET; + req.expire.pol.sel.prefixlen_d = PREFIX_LEN; + req.expire.pol.sel.prefixlen_s = PREFIX_LEN; + req.expire.pol.dir = XFRM_POLICY_OUT; + req.expire.hard = 0xff; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + goto out_close; + } + + if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + goto out_close; + } else if (req.nh.nlmsg_type != NLMSG_ERROR) { + printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type); + goto out_close; + } + + if (req.error) { + printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error)); + ret = req.error; + goto out_close; + } + + if (recv(xfrm_listen, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + goto out_close; + } + + if (req.expire.hard != 0x1) { + printk("expire.hard is not set: %x", req.expire.hard); + goto out_close; + } + + ret = KSFT_PASS; +out_close: + close(xfrm_listen); + return ret; +} + +static int child_serv(int xfrm_sock, uint32_t *seq, + unsigned int nr, int cmd_fd, void *buf, struct xfrm_desc *desc) +{ + struct in_addr src, dst, tunsrc, tundst; + struct test_desc msg; + int ret = KSFT_FAIL; + + src = inet_makeaddr(INADDR_B, child_ip(nr)); + dst = inet_makeaddr(INADDR_B, grchild_ip(nr)); + tunsrc = inet_makeaddr(INADDR_A, child_ip(nr)); + tundst = inet_makeaddr(INADDR_A, grchild_ip(nr)); + + /* UDP pinging without xfrm */ + if (do_ping(cmd_fd, buf, page_size, src, true, 0, 0, udp_ping_send)) { + printk("ping failed before setting xfrm"); + return KSFT_FAIL; + } + + memset(&msg, 0, sizeof(msg)); + msg.type = MSG_XFRM_PREPARE; + memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc)); + write_msg(cmd_fd, &msg, 1); + + if (xfrm_prepare(xfrm_sock, seq, src, dst, tunsrc, tundst, desc->proto)) { + printk("failed to prepare xfrm"); + goto cleanup; + } + + memset(&msg, 0, sizeof(msg)); + msg.type = MSG_XFRM_ADD; + memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc)); + write_msg(cmd_fd, &msg, 1); + if (xfrm_set(xfrm_sock, seq, src, dst, tunsrc, tundst, desc)) { + printk("failed to set xfrm"); + goto delete; + } + + /* UDP pinging with xfrm tunnel */ + if (do_ping(cmd_fd, buf, page_size, tunsrc, + true, 0, 0, udp_ping_send)) { + printk("ping failed for xfrm"); + goto delete; + } + + ret = KSFT_PASS; +delete: + /* xfrm delete */ + memset(&msg, 0, sizeof(msg)); + msg.type = MSG_XFRM_DEL; + memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc)); + write_msg(cmd_fd, &msg, 1); + + if (xfrm_delete(xfrm_sock, seq, src, dst, tunsrc, tundst, desc->proto)) { + printk("failed ping to remove xfrm"); + ret = KSFT_FAIL; + } + +cleanup: + memset(&msg, 0, sizeof(msg)); + msg.type = MSG_XFRM_CLEANUP; + memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc)); + write_msg(cmd_fd, &msg, 1); + if (xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst)) { + printk("failed ping to cleanup xfrm"); + ret = KSFT_FAIL; + } + return ret; +} + +static int child_f(unsigned int nr, int test_desc_fd, int cmd_fd, void *buf) +{ + struct xfrm_desc desc; + struct test_desc msg; + int xfrm_sock = -1; + uint32_t seq; + + if (switch_ns(nsfd_childa)) + exit(KSFT_FAIL); + + if (netlink_sock(&xfrm_sock, &seq, NETLINK_XFRM)) { + printk("Failed to open xfrm netlink socket"); + exit(KSFT_FAIL); + } + + /* Check that seq sock is ready, just for sure. */ + memset(&msg, 0, sizeof(msg)); + msg.type = MSG_ACK; + write_msg(cmd_fd, &msg, 1); + read_msg(cmd_fd, &msg, 1); + if (msg.type != MSG_ACK) { + printk("Ack failed"); + exit(KSFT_FAIL); + } + + for (;;) { + ssize_t received = read(test_desc_fd, &desc, sizeof(desc)); + int ret; + + if (received == 0) /* EOF */ + break; + + if (received != sizeof(desc)) { + pr_err("read() returned %zd", received); + exit(KSFT_FAIL); + } + + switch (desc.type) { + case CREATE_TUNNEL: + ret = child_serv(xfrm_sock, &seq, nr, + cmd_fd, buf, &desc); + break; + case ALLOCATE_SPI: + ret = xfrm_state_allocspi(xfrm_sock, &seq, + -1, desc.proto); + break; + case MONITOR_ACQUIRE: + ret = xfrm_monitor_acquire(xfrm_sock, &seq, nr); + break; + case EXPIRE_STATE: + ret = xfrm_expire_state(xfrm_sock, &seq, nr, &desc); + break; + case EXPIRE_POLICY: + ret = xfrm_expire_policy(xfrm_sock, &seq, nr, &desc); + break; + default: + printk("Unknown desc type %d", desc.type); + exit(KSFT_FAIL); + } + write_test_result(ret, &desc); + } + + close(xfrm_sock); + + msg.type = MSG_EXIT; + write_msg(cmd_fd, &msg, 1); + exit(KSFT_PASS); +} + +static void grand_child_serv(unsigned int nr, int cmd_fd, void *buf, + struct test_desc *msg, int xfrm_sock, uint32_t *seq) +{ + struct in_addr src, dst, tunsrc, tundst; + bool tun_reply; + struct xfrm_desc *desc = &msg->body.xfrm_desc; + + src = inet_makeaddr(INADDR_B, grchild_ip(nr)); + dst = inet_makeaddr(INADDR_B, child_ip(nr)); + tunsrc = inet_makeaddr(INADDR_A, grchild_ip(nr)); + tundst = inet_makeaddr(INADDR_A, child_ip(nr)); + + switch (msg->type) { + case MSG_EXIT: + exit(KSFT_PASS); + case MSG_ACK: + write_msg(cmd_fd, msg, 1); + break; + case MSG_PING: + tun_reply = memcmp(&dst, &msg->body.ping.reply_ip, sizeof(in_addr_t)); + /* UDP pinging without xfrm */ + if (do_ping(cmd_fd, buf, page_size, tun_reply ? tunsrc : src, + false, msg->body.ping.port, + msg->body.ping.reply_ip, udp_ping_reply)) { + printk("ping failed before setting xfrm"); + } + break; + case MSG_XFRM_PREPARE: + if (xfrm_prepare(xfrm_sock, seq, src, dst, tunsrc, tundst, + desc->proto)) { + xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst); + printk("failed to prepare xfrm"); + } + break; + case MSG_XFRM_ADD: + if (xfrm_set(xfrm_sock, seq, src, dst, tunsrc, tundst, desc)) { + xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst); + printk("failed to set xfrm"); + } + break; + case MSG_XFRM_DEL: + if (xfrm_delete(xfrm_sock, seq, src, dst, tunsrc, tundst, + desc->proto)) { + xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst); + printk("failed to remove xfrm"); + } + break; + case MSG_XFRM_CLEANUP: + if (xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst)) { + printk("failed to cleanup xfrm"); + } + break; + default: + printk("got unknown msg type %d", msg->type); + }; +} + +static int grand_child_f(unsigned int nr, int cmd_fd, void *buf) +{ + struct test_desc msg; + int xfrm_sock = -1; + uint32_t seq; + + if (switch_ns(nsfd_childb)) + exit(KSFT_FAIL); + + if (netlink_sock(&xfrm_sock, &seq, NETLINK_XFRM)) { + printk("Failed to open xfrm netlink socket"); + exit(KSFT_FAIL); + } + + do { + read_msg(cmd_fd, &msg, 1); + grand_child_serv(nr, cmd_fd, buf, &msg, xfrm_sock, &seq); + } while (1); + + close(xfrm_sock); + exit(KSFT_FAIL); +} + +static int start_child(unsigned int nr, char *veth, int test_desc_fd[2]) +{ + int cmd_sock[2]; + void *data_map; + pid_t child; + + if (init_child(nsfd_childa, veth, child_ip(nr), grchild_ip(nr))) + return -1; + + if (init_child(nsfd_childb, veth, grchild_ip(nr), child_ip(nr))) + return -1; + + child = fork(); + if (child < 0) { + pr_err("fork()"); + return -1; + } else if (child) { + /* in parent - selftest */ + return switch_ns(nsfd_parent); + } + + if (close(test_desc_fd[1])) { + pr_err("close()"); + return -1; + } + + /* child */ + data_map = mmap(0, page_size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (data_map == MAP_FAILED) { + pr_err("mmap()"); + return -1; + } + + randomize_buffer(data_map, page_size); + + if (socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, cmd_sock)) { + pr_err("socketpair()"); + return -1; + } + + child = fork(); + if (child < 0) { + pr_err("fork()"); + return -1; + } else if (child) { + if (close(cmd_sock[0])) { + pr_err("close()"); + return -1; + } + return child_f(nr, test_desc_fd[0], cmd_sock[1], data_map); + } + if (close(cmd_sock[1])) { + pr_err("close()"); + return -1; + } + return grand_child_f(nr, cmd_sock[0], data_map); +} + +static void exit_usage(char **argv) +{ + printk("Usage: %s [nr_process]", argv[0]); + exit(KSFT_FAIL); +} + +static int __write_desc(int test_desc_fd, struct xfrm_desc *desc) +{ + ssize_t ret; + + ret = write(test_desc_fd, desc, sizeof(*desc)); + + if (ret == sizeof(*desc)) + return 0; + + pr_err("Writing test's desc failed %ld", ret); + + return -1; +} + +static int write_desc(int proto, int test_desc_fd, + char *a, char *e, char *c, char *ae) +{ + struct xfrm_desc desc = {}; + + desc.type = CREATE_TUNNEL; + desc.proto = proto; + + if (a) + strncpy(desc.a_algo, a, ALGO_LEN - 1); + if (e) + strncpy(desc.e_algo, e, ALGO_LEN - 1); + if (c) + strncpy(desc.c_algo, c, ALGO_LEN - 1); + if (ae) + strncpy(desc.ae_algo, ae, ALGO_LEN - 1); + + return __write_desc(test_desc_fd, &desc); +} + +int proto_list[] = { IPPROTO_AH, IPPROTO_COMP, IPPROTO_ESP }; +char *ah_list[] = { + "digest_null", "hmac(md5)", "hmac(sha1)", "hmac(sha256)", + "hmac(sha384)", "hmac(sha512)", "hmac(rmd160)", + "xcbc(aes)", "cmac(aes)" +}; +char *comp_list[] = { + "deflate", +#if 0 + /* No compression backend realization */ + "lzs", "lzjh" +#endif +}; +char *e_list[] = { + "ecb(cipher_null)", "cbc(des)", "cbc(des3_ede)", "cbc(cast5)", + "cbc(blowfish)", "cbc(aes)", "cbc(serpent)", "cbc(camellia)", + "cbc(twofish)", "rfc3686(ctr(aes))" +}; +char *ae_list[] = { +#if 0 + /* not implemented */ + "rfc4106(gcm(aes))", "rfc4309(ccm(aes))", "rfc4543(gcm(aes))", + "rfc7539esp(chacha20,poly1305)" +#endif +}; + +const unsigned int proto_plan = ARRAY_SIZE(ah_list) + ARRAY_SIZE(comp_list) \ + + (ARRAY_SIZE(ah_list) * ARRAY_SIZE(e_list)) \ + + ARRAY_SIZE(ae_list); + +static int write_proto_plan(int fd, int proto) +{ + unsigned int i; + + switch (proto) { + case IPPROTO_AH: + for (i = 0; i < ARRAY_SIZE(ah_list); i++) { + if (write_desc(proto, fd, ah_list[i], 0, 0, 0)) + return -1; + } + break; + case IPPROTO_COMP: + for (i = 0; i < ARRAY_SIZE(comp_list); i++) { + if (write_desc(proto, fd, 0, 0, comp_list[i], 0)) + return -1; + } + break; + case IPPROTO_ESP: + for (i = 0; i < ARRAY_SIZE(ah_list); i++) { + int j; + + for (j = 0; j < ARRAY_SIZE(e_list); j++) { + if (write_desc(proto, fd, ah_list[i], + e_list[j], 0, 0)) + return -1; + } + } + for (i = 0; i < ARRAY_SIZE(ae_list); i++) { + if (write_desc(proto, fd, 0, 0, 0, ae_list[i])) + return -1; + } + break; + default: + printk("BUG: Specified unknown proto %d", proto); + return -1; + } + + return 0; +} + +/* + * Some structures in xfrm uapi header differ in size between + * 64-bit and 32-bit ABI: + * + * 32-bit UABI | 64-bit UABI + * -------------------------------------|------------------------------------- + * sizeof(xfrm_usersa_info) = 220 | sizeof(xfrm_usersa_info) = 224 + * sizeof(xfrm_userpolicy_info) = 164 | sizeof(xfrm_userpolicy_info) = 168 + * sizeof(xfrm_userspi_info) = 228 | sizeof(xfrm_userspi_info) = 232 + * sizeof(xfrm_user_acquire) = 276 | sizeof(xfrm_user_acquire) = 280 + * sizeof(xfrm_user_expire) = 224 | sizeof(xfrm_user_expire) = 232 + * sizeof(xfrm_user_polexpire) = 168 | sizeof(xfrm_user_polexpire) = 176 + * + * Check the affected by the UABI difference structures. + */ +const unsigned int compat_plan = 4; +static int write_compat_struct_tests(int test_desc_fd) +{ + struct xfrm_desc desc = {}; + + desc.type = ALLOCATE_SPI; + desc.proto = IPPROTO_AH; + strncpy(desc.a_algo, ah_list[0], ALGO_LEN - 1); + + if (__write_desc(test_desc_fd, &desc)) + return -1; + + desc.type = MONITOR_ACQUIRE; + if (__write_desc(test_desc_fd, &desc)) + return -1; + + desc.type = EXPIRE_STATE; + if (__write_desc(test_desc_fd, &desc)) + return -1; + + desc.type = EXPIRE_POLICY; + if (__write_desc(test_desc_fd, &desc)) + return -1; + + return 0; +} + +static int write_test_plan(int test_desc_fd) +{ + unsigned int i; + pid_t child; + + child = fork(); + if (child < 0) { + pr_err("fork()"); + return -1; + } + if (child) { + if (close(test_desc_fd)) + printk("close(): %m"); + return 0; + } + + if (write_compat_struct_tests(test_desc_fd)) + exit(KSFT_FAIL); + + for (i = 0; i < ARRAY_SIZE(proto_list); i++) { + if (write_proto_plan(test_desc_fd, proto_list[i])) + exit(KSFT_FAIL); + } + + exit(KSFT_PASS); +} + +static int children_cleanup(void) +{ + unsigned ret = KSFT_PASS; + + while (1) { + int status; + pid_t p = wait(&status); + + if ((p < 0) && errno == ECHILD) + break; + + if (p < 0) { + pr_err("wait()"); + return KSFT_FAIL; + } + + if (!WIFEXITED(status)) { + ret = KSFT_FAIL; + continue; + } + + if (WEXITSTATUS(status) == KSFT_FAIL) + ret = KSFT_FAIL; + } + + return ret; +} + +typedef void (*print_res)(const char *, ...); + +static int check_results(void) +{ + struct test_result tr = {}; + struct xfrm_desc *d = &tr.desc; + int ret = KSFT_PASS; + + while (1) { + ssize_t received = read(results_fd[0], &tr, sizeof(tr)); + print_res result; + + if (received == 0) /* EOF */ + break; + + if (received != sizeof(tr)) { + pr_err("read() returned %zd", received); + return KSFT_FAIL; + } + + switch (tr.res) { + case KSFT_PASS: + result = ksft_test_result_pass; + break; + case KSFT_FAIL: + default: + result = ksft_test_result_fail; + ret = KSFT_FAIL; + } + + result(" %s: [%u, '%s', '%s', '%s', '%s', %u]\n", + desc_name[d->type], (unsigned int)d->proto, d->a_algo, + d->e_algo, d->c_algo, d->ae_algo, d->icv_len); + } + + return ret; +} + +int main(int argc, char **argv) +{ + unsigned int nr_process = 1; + int route_sock = -1, ret = KSFT_SKIP; + int test_desc_fd[2]; + uint32_t route_seq; + unsigned int i; + + if (argc > 2) + exit_usage(argv); + + if (argc > 1) { + char *endptr; + + errno = 0; + nr_process = strtol(argv[1], &endptr, 10); + if ((errno == ERANGE && (nr_process == LONG_MAX || nr_process == LONG_MIN)) + || (errno != 0 && nr_process == 0) + || (endptr == argv[1]) || (*endptr != '\0')) { + printk("Failed to parse [nr_process]"); + exit_usage(argv); + } + + if (nr_process > MAX_PROCESSES || !nr_process) { + printk("nr_process should be between [1; %u]", + MAX_PROCESSES); + exit_usage(argv); + } + } + + srand(time(NULL)); + page_size = sysconf(_SC_PAGESIZE); + if (page_size < 1) + ksft_exit_skip("sysconf(): %m\n"); + + if (pipe2(test_desc_fd, O_DIRECT) < 0) + ksft_exit_skip("pipe(): %m\n"); + + if (pipe2(results_fd, O_DIRECT) < 0) + ksft_exit_skip("pipe(): %m\n"); + + if (init_namespaces()) + ksft_exit_skip("Failed to create namespaces\n"); + + if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE)) + ksft_exit_skip("Failed to open netlink route socket\n"); + + for (i = 0; i < nr_process; i++) { + char veth[VETH_LEN]; + + snprintf(veth, VETH_LEN, VETH_FMT, i); + + if (veth_add(route_sock, route_seq++, veth, nsfd_childa, veth, nsfd_childb)) { + close(route_sock); + ksft_exit_fail_msg("Failed to create veth device"); + } + + if (start_child(i, veth, test_desc_fd)) { + close(route_sock); + ksft_exit_fail_msg("Child %u failed to start", i); + } + } + + if (close(route_sock) || close(test_desc_fd[0]) || close(results_fd[1])) + ksft_exit_fail_msg("close(): %m"); + + ksft_set_plan(proto_plan + compat_plan); + + if (write_test_plan(test_desc_fd[1])) + ksft_exit_fail_msg("Failed to write test plan to pipe"); + + ret = check_results(); + + if (children_cleanup() == KSFT_FAIL) + exit(KSFT_FAIL); + + exit(ret); +} diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index aa254aefc2c3..00bb158b4a5d 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -5,7 +5,8 @@ KSFT_KHDR_INSTALL := 1 CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include -TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh +TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \ + simult_flows.sh TEST_GEN_FILES = mptcp_connect pm_nl_ctl diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 8df5cb8f71ff..741a1c4f4ae8 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -1,4 +1,5 @@ CONFIG_MPTCP=y +CONFIG_IPV6=y CONFIG_MPTCP_IPV6=y CONFIG_INET_DIAG=m CONFIG_INET_MPTCP_DIAG=m diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index 090620c3e10c..77bb62feb872 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -54,6 +54,7 @@ static int pf = AF_INET; static int cfg_sndbuf; static int cfg_rcvbuf; static bool cfg_join; +static bool cfg_remove; static int cfg_wait; static void die_usage(void) @@ -65,8 +66,8 @@ static void die_usage(void) fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n"); fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n"); fprintf(stderr, "\t-p num -- use port num\n"); - fprintf(stderr, "\t-m [MPTCP|TCP] -- use tcp or mptcp sockets\n"); - fprintf(stderr, "\t-s [mmap|poll] -- use poll (default) or mmap\n"); + fprintf(stderr, "\t-s [MPTCP|TCP] -- use mptcp(default) or tcp sockets\n"); + fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n"); fprintf(stderr, "\t-u -- check mptcp ulp\n"); fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n"); exit(1); @@ -271,6 +272,9 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len) if (cfg_join && first && do_w > 100) do_w = 100; + if (cfg_remove && do_w > 50) + do_w = 50; + bw = write(fd, buf, do_w); if (bw < 0) perror("write"); @@ -281,6 +285,9 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len) first = false; } + if (cfg_remove) + usleep(200000); + return bw; } @@ -428,7 +435,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd) } /* leave some time for late join/announce */ - if (cfg_join) + if (cfg_join || cfg_remove) usleep(cfg_wait); close(peerfd); @@ -686,7 +693,7 @@ static void maybe_close(int fd) { unsigned int r = rand(); - if (!cfg_join && (r & 1)) + if (!(cfg_join || cfg_remove) && (r & 1)) close(fd); } @@ -822,13 +829,18 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "6jlp:s:hut:m:S:R:w:")) != -1) { + while ((c = getopt(argc, argv, "6jrlp:s:hut:m:S:R:w:")) != -1) { switch (c) { case 'j': cfg_join = true; cfg_mode = CFG_MODE_POLL; cfg_wait = 400000; break; + case 'r': + cfg_remove = true; + cfg_mode = CFG_MODE_POLL; + cfg_wait = 400000; + break; case 'l': listen_mode = true; break; diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 57d75b7f6220..2cfd87d94db8 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -14,9 +14,8 @@ capture=false timeout=30 ipv6=true ethtool_random_on=true -tc_delay="$((RANDOM%400))" +tc_delay="$((RANDOM%50))" tc_loss=$((RANDOM%101)) -tc_reorder="" testmode="" sndbuf=0 rcvbuf=0 @@ -444,9 +443,9 @@ do_transfer() duration=$(printf "(duration %05sms)" $duration) if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then echo "$duration [ FAIL ] client exit code $retc, server $rets" 1>&2 - echo "\nnetns ${listener_ns} socket stat for $port:" 1>&2 + echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port" - echo "\nnetns ${connector_ns} socket stat for $port:" 1>&2 + echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2 ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port" cat "$capout" @@ -628,30 +627,32 @@ for sender in "$ns1" "$ns2" "$ns3" "$ns4";do do_ping "$ns4" $sender dead:beef:3::1 done -[ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss +[ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss delay ${tc_delay}ms echo -n "INFO: Using loss of $tc_loss " test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms " +reorder_delay=$(($tc_delay / 4)) + if [ -z "${tc_reorder}" ]; then reorder1=$((RANDOM%10)) reorder1=$((100 - reorder1)) reorder2=$((RANDOM%100)) - if [ $tc_delay -gt 0 ] && [ $reorder1 -lt 100 ] && [ $reorder2 -gt 0 ]; then + if [ $reorder_delay -gt 0 ] && [ $reorder1 -lt 100 ] && [ $reorder2 -gt 0 ]; then tc_reorder="reorder ${reorder1}% ${reorder2}%" - echo -n "$tc_reorder " + echo -n "$tc_reorder with delay ${reorder_delay}ms " fi elif [ "$tc_reorder" = "0" ];then tc_reorder="" -elif [ "$tc_delay" -gt 0 ];then +elif [ "$reorder_delay" -gt 0 ];then # reordering requires some delay tc_reorder="reorder $tc_reorder" - echo -n "$tc_reorder " + echo -n "$tc_reorder with delay ${reorder_delay}ms " fi echo "on ns3eth4" -tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${tc_delay}ms $tc_reorder +tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${reorder_delay}ms $tc_reorder for sender in $ns1 $ns2 $ns3 $ns4;do run_tests_lo "$ns1" "$sender" 10.0.1.1 1 diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index f39c1129ce5f..08f53d86dedc 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -8,6 +8,7 @@ cin="" cout="" ksft_skip=4 timeout=30 +mptcp_connect="" capture=0 TEST_COUNT=0 @@ -132,6 +133,8 @@ do_transfer() cl_proto="$3" srv_proto="$4" connect_addr="$5" + rm_nr_ns1="$6" + rm_nr_ns2="$7" port=$((10000+$TEST_COUNT)) TEST_COUNT=$((TEST_COUNT+1)) @@ -156,14 +159,44 @@ do_transfer() sleep 1 fi - ip netns exec ${listener_ns} ./mptcp_connect -j -t $timeout -l -p $port -s ${srv_proto} 0.0.0.0 < "$sin" > "$sout" & + if [[ $rm_nr_ns1 -eq 0 && $rm_nr_ns2 -eq 0 ]]; then + mptcp_connect="./mptcp_connect -j" + else + mptcp_connect="./mptcp_connect -r" + fi + + ip netns exec ${listener_ns} $mptcp_connect -t $timeout -l -p $port -s ${srv_proto} 0.0.0.0 < "$sin" > "$sout" & spid=$! sleep 1 - ip netns exec ${connector_ns} ./mptcp_connect -j -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" & + ip netns exec ${connector_ns} $mptcp_connect -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" & cpid=$! + if [ $rm_nr_ns1 -gt 0 ]; then + counter=1 + sleep 1 + + while [ $counter -le $rm_nr_ns1 ] + do + ip netns exec ${listener_ns} ./pm_nl_ctl del $counter + sleep 1 + let counter+=1 + done + fi + + if [ $rm_nr_ns2 -gt 0 ]; then + counter=1 + sleep 1 + + while [ $counter -le $rm_nr_ns2 ] + do + ip netns exec ${connector_ns} ./pm_nl_ctl del $counter + sleep 1 + let counter+=1 + done + fi + wait $cpid retc=$? wait $spid @@ -176,9 +209,9 @@ do_transfer() if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then echo " client exit code $retc, server $rets" 1>&2 - echo "\nnetns ${listener_ns} socket stat for $port:" 1>&2 + echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port" - echo "\nnetns ${connector_ns} socket stat for $port:" 1>&2 + echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2 ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port" cat "$capout" @@ -219,7 +252,24 @@ run_tests() connect_addr="$3" lret=0 - do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} + do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} 0 0 + lret=$? + if [ $lret -ne 0 ]; then + ret=$lret + return + fi +} + +run_remove_tests() +{ + listener_ns="$1" + connector_ns="$2" + connect_addr="$3" + rm_nr_ns1="$4" + rm_nr_ns2="$5" + lret=0 + + do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} ${rm_nr_ns1} ${rm_nr_ns2} lret=$? if [ $lret -ne 0 ]; then ret=$lret @@ -276,6 +326,80 @@ chk_join_nr() fi } +chk_add_nr() +{ + local add_nr=$1 + local echo_nr=$2 + local count + local dump_stats + + printf "%-39s %s" " " "add" + count=`ip netns exec $ns2 nstat -as | grep MPTcpExtAddAddr | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$add_nr" ]; then + echo "[fail] got $count ADD_ADDR[s] expected $add_nr" + ret=1 + dump_stats=1 + else + echo -n "[ ok ]" + fi + + echo -n " - echo " + count=`ip netns exec $ns1 nstat -as | grep MPTcpExtEchoAdd | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$echo_nr" ]; then + echo "[fail] got $count ADD_ADDR echo[s] expected $echo_nr" + ret=1 + dump_stats=1 + else + echo "[ ok ]" + fi + + if [ "${dump_stats}" = 1 ]; then + echo Server ns stats + ip netns exec $ns1 nstat -as | grep MPTcp + echo Client ns stats + ip netns exec $ns2 nstat -as | grep MPTcp + fi +} + +chk_rm_nr() +{ + local rm_addr_nr=$1 + local rm_subflow_nr=$2 + local count + local dump_stats + + printf "%-39s %s" " " "rm " + count=`ip netns exec $ns1 nstat -as | grep MPTcpExtRmAddr | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$rm_addr_nr" ]; then + echo "[fail] got $count RM_ADDR[s] expected $rm_addr_nr" + ret=1 + dump_stats=1 + else + echo -n "[ ok ]" + fi + + echo -n " - sf " + count=`ip netns exec $ns2 nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$rm_subflow_nr" ]; then + echo "[fail] got $count RM_SUBFLOW[s] expected $rm_subflow_nr" + ret=1 + dump_stats=1 + else + echo "[ ok ]" + fi + + if [ "${dump_stats}" = 1 ]; then + echo Server ns stats + ip netns exec $ns1 nstat -as | grep MPTcp + echo Client ns stats + ip netns exec $ns2 nstat -as | grep MPTcp + fi +} + sin=$(mktemp) sout=$(mktemp) cin=$(mktemp) @@ -332,6 +456,7 @@ reset ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "unused signal address" 0 0 0 +chk_add_nr 1 1 # accept and use add_addr reset @@ -340,6 +465,7 @@ ip netns exec $ns2 ./pm_nl_ctl limits 1 1 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "signal address" 1 1 1 +chk_add_nr 1 1 # accept and use add_addr with an additional subflow # note: signal address in server ns and local addresses in client ns must @@ -352,6 +478,7 @@ ip netns exec $ns2 ./pm_nl_ctl limits 1 2 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "subflow and signal" 2 2 2 +chk_add_nr 1 1 # accept and use add_addr with additional subflows reset @@ -362,6 +489,59 @@ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "multiple subflows and signal" 3 3 3 +chk_add_nr 1 1 + +# single subflow, remove +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +run_remove_tests $ns1 $ns2 10.0.1.1 0 1 +chk_join_nr "remove single subflow" 1 1 1 +chk_rm_nr 1 1 + +# multiple subflows, remove +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 2 +ip netns exec $ns2 ./pm_nl_ctl limits 0 2 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +run_remove_tests $ns1 $ns2 10.0.1.1 0 2 +chk_join_nr "remove multiple subflows" 2 2 2 +chk_rm_nr 2 2 + +# single address, remove +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +ip netns exec $ns2 ./pm_nl_ctl limits 1 1 +run_remove_tests $ns1 $ns2 10.0.1.1 1 0 +chk_join_nr "remove single address" 1 1 1 +chk_add_nr 1 1 +chk_rm_nr 0 0 + +# subflow and signal, remove +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 2 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +ip netns exec $ns2 ./pm_nl_ctl limits 1 2 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +run_remove_tests $ns1 $ns2 10.0.1.1 1 1 +chk_join_nr "remove subflow and signal" 2 2 2 +chk_add_nr 1 1 +chk_rm_nr 1 1 + +# subflows and signal, remove +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 3 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +ip netns exec $ns2 ./pm_nl_ctl limits 1 3 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow +run_remove_tests $ns1 $ns2 10.0.1.1 1 2 +chk_join_nr "remove subflows and signal" 3 3 3 +chk_add_nr 1 1 +chk_rm_nr 2 2 # single subflow, syncookies reset_with_cookies @@ -396,6 +576,7 @@ ip netns exec $ns2 ./pm_nl_ctl limits 1 1 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "signal address with syn cookies" 1 1 1 +chk_add_nr 1 1 # test cookie with subflow and signal reset_with_cookies @@ -405,6 +586,7 @@ ip netns exec $ns2 ./pm_nl_ctl limits 1 2 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "subflow and signal w cookies" 2 2 2 +chk_add_nr 1 1 # accept and use add_addr with additional subflows reset_with_cookies @@ -415,5 +597,6 @@ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "subflows and signal w. cookies" 3 3 3 +chk_add_nr 1 1 exit $ret diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh new file mode 100755 index 000000000000..2f649b431456 --- /dev/null +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -0,0 +1,293 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) +ns1="ns1-$rndh" +ns2="ns2-$rndh" +ns3="ns3-$rndh" +capture=false +ksft_skip=4 +timeout=30 +test_cnt=1 +ret=0 +bail=0 + +usage() { + echo "Usage: $0 [ -b ] [ -c ] [ -d ]" + echo -e "\t-b: bail out after first error, otherwise runs al testcases" + echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)" + echo -e "\t-d: debug this script" +} + +cleanup() +{ + rm -f "$cin" "$cout" + rm -f "$sin" "$sout" + rm -f "$capout" + + local netns + for netns in "$ns1" "$ns2" "$ns3";do + ip netns del $netns + done +} + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +# "$ns1" ns2 ns3 +# ns1eth1 ns2eth1 ns2eth3 ns3eth1 +# netem +# ns1eth2 ns2eth2 +# netem + +setup() +{ + large=$(mktemp) + small=$(mktemp) + sout=$(mktemp) + cout=$(mktemp) + capout=$(mktemp) + size=$((2048 * 4096)) + dd if=/dev/zero of=$small bs=4096 count=20 >/dev/null 2>&1 + dd if=/dev/zero of=$large bs=4096 count=$((size / 4096)) >/dev/null 2>&1 + + trap cleanup EXIT + + for i in "$ns1" "$ns2" "$ns3";do + ip netns add $i || exit $ksft_skip + ip -net $i link set lo up + done + + ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2" + ip link add ns1eth2 netns "$ns1" type veth peer name ns2eth2 netns "$ns2" + ip link add ns2eth3 netns "$ns2" type veth peer name ns3eth1 netns "$ns3" + + ip -net "$ns1" addr add 10.0.1.1/24 dev ns1eth1 + ip -net "$ns1" addr add dead:beef:1::1/64 dev ns1eth1 nodad + ip -net "$ns1" link set ns1eth1 up mtu 1500 + ip -net "$ns1" route add default via 10.0.1.2 + ip -net "$ns1" route add default via dead:beef:1::2 + + ip -net "$ns1" addr add 10.0.2.1/24 dev ns1eth2 + ip -net "$ns1" addr add dead:beef:2::1/64 dev ns1eth2 nodad + ip -net "$ns1" link set ns1eth2 up mtu 1500 + ip -net "$ns1" route add default via 10.0.2.2 metric 101 + ip -net "$ns1" route add default via dead:beef:2::2 metric 101 + + ip netns exec "$ns1" ./pm_nl_ctl limits 1 1 + ip netns exec "$ns1" ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags subflow + ip netns exec "$ns1" sysctl -q net.ipv4.conf.all.rp_filter=0 + + ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1 + ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad + ip -net "$ns2" link set ns2eth1 up mtu 1500 + + ip -net "$ns2" addr add 10.0.2.2/24 dev ns2eth2 + ip -net "$ns2" addr add dead:beef:2::2/64 dev ns2eth2 nodad + ip -net "$ns2" link set ns2eth2 up mtu 1500 + + ip -net "$ns2" addr add 10.0.3.2/24 dev ns2eth3 + ip -net "$ns2" addr add dead:beef:3::2/64 dev ns2eth3 nodad + ip -net "$ns2" link set ns2eth3 up mtu 1500 + ip netns exec "$ns2" sysctl -q net.ipv4.ip_forward=1 + ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.forwarding=1 + + ip -net "$ns3" addr add 10.0.3.3/24 dev ns3eth1 + ip -net "$ns3" addr add dead:beef:3::3/64 dev ns3eth1 nodad + ip -net "$ns3" link set ns3eth1 up mtu 1500 + ip -net "$ns3" route add default via 10.0.3.2 + ip -net "$ns3" route add default via dead:beef:3::2 + + ip netns exec "$ns3" ./pm_nl_ctl limits 1 1 +} + +# $1: ns, $2: port +wait_local_port_listen() +{ + local listener_ns="${1}" + local port="${2}" + + local port_hex i + + port_hex="$(printf "%04X" "${port}")" + for i in $(seq 10); do + ip netns exec "${listener_ns}" cat /proc/net/tcp* | \ + awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" && + break + sleep 0.1 + done +} + +do_transfer() +{ + local cin=$1 + local sin=$2 + local max_time=$3 + local port + port=$((10000+$test_cnt)) + test_cnt=$((test_cnt+1)) + + :> "$cout" + :> "$sout" + :> "$capout" + + local addr_port + addr_port=$(printf "%s:%d" ${connect_addr} ${port}) + + if $capture; then + local capuser + if [ -z $SUDO_USER ] ; then + capuser="" + else + capuser="-Z $SUDO_USER" + fi + + local capfile="${rndh}-${port}" + local capopt="-i any -s 65535 -B 32768 ${capuser}" + + ip netns exec ${ns3} tcpdump ${capopt} -w "${capfile}-listener.pcap" >> "${capout}" 2>&1 & + local cappid_listener=$! + + ip netns exec ${ns1} tcpdump ${capopt} -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 & + local cappid_connector=$! + + sleep 1 + fi + + ip netns exec ${ns3} ./mptcp_connect -jt $timeout -l -p $port 0.0.0.0 < "$sin" > "$sout" & + local spid=$! + + wait_local_port_listen "${ns3}" "${port}" + + local start + start=$(date +%s%3N) + ip netns exec ${ns1} ./mptcp_connect -jt $timeout -p $port 10.0.3.3 < "$cin" > "$cout" & + local cpid=$! + + wait $cpid + local retc=$? + wait $spid + local rets=$? + + local stop + stop=$(date +%s%3N) + + if $capture; then + sleep 1 + kill ${cappid_listener} + kill ${cappid_connector} + fi + + local duration + duration=$((stop-start)) + + cmp $sin $cout > /dev/null 2>&1 + local cmps=$? + cmp $cin $sout > /dev/null 2>&1 + local cmpc=$? + + printf "%16s" "$duration max $max_time " + if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \ + [ $cmpc -eq 0 ] && [ $cmps -eq 0 ] && \ + [ $duration -lt $max_time ]; then + echo "[ OK ]" + cat "$capout" + return 0 + fi + + echo " [ fail ]" + echo "client exit code $retc, server $rets" 1>&2 + echo -e "\nnetns ${ns3} socket stat for $port:" 1>&2 + ip netns exec ${ns3} ss -nita 1>&2 -o "sport = :$port" + echo -e "\nnetns ${ns1} socket stat for $port:" 1>&2 + ip netns exec ${ns1} ss -nita 1>&2 -o "dport = :$port" + ls -l $sin $cout + ls -l $cin $sout + + cat "$capout" + return 1 +} + +run_test() +{ + local rate1=$1 + local rate2=$2 + local delay1=$3 + local delay2=$4 + local lret + local dev + shift 4 + local msg=$* + + [ $delay1 -gt 0 ] && delay1="delay $delay1" || delay1="" + [ $delay2 -gt 0 ] && delay2="delay $delay2" || delay2="" + + for dev in ns1eth1 ns1eth2; do + tc -n $ns1 qdisc del dev $dev root >/dev/null 2>&1 + done + for dev in ns2eth1 ns2eth2; do + tc -n $ns2 qdisc del dev $dev root >/dev/null 2>&1 + done + tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1 + tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2 + tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1 + tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2 + + # time is measure in ms + local time=$((size * 8 * 1000 / (( $rate1 + $rate2) * 1024 *1024) )) + + # mptcp_connect will do some sleeps to allow the mp_join handshake + # completion + time=$((time + 1350)) + + printf "%-50s" "$msg" + do_transfer $small $large $((time * 11 / 10)) + lret=$? + if [ $lret -ne 0 ]; then + ret=$lret + [ $bail -eq 0 ] || exit $ret + fi + + printf "%-50s" "$msg - reverse direction" + do_transfer $large $small $((time * 11 / 10)) + lret=$? + if [ $lret -ne 0 ]; then + ret=$lret + [ $bail -eq 0 ] || exit $ret + fi +} + +while getopts "bcdh" option;do + case "$option" in + "h") + usage $0 + exit 0 + ;; + "b") + bail=1 + ;; + "c") + capture=true + ;; + "d") + set -x + ;; + "?") + usage $0 + exit 1 + ;; + esac +done + +setup +run_test 10 10 0 0 "balanced bwidth" +run_test 10 10 1 50 "balanced bwidth with unbalanced delay" + +# we still need some additional infrastructure to pass the following test-cases +# run_test 30 10 0 0 "unbalanced bwidth" +# run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay" +# run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay" +exit $ret diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index 93208caacbe6..f75c53ce0a2d 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -1667,6 +1667,8 @@ int main(int argc, char *argv[]) case 'R': args.type = SOCK_RAW; args.port = 0; + if (!args.protocol) + args.protocol = IPPROTO_RAW; break; case 'P': pe = getprotobyname(optarg); diff --git a/tools/testing/selftests/net/psock_snd.sh b/tools/testing/selftests/net/psock_snd.sh index 6331d91b86a6..170be65e0816 100755 --- a/tools/testing/selftests/net/psock_snd.sh +++ b/tools/testing/selftests/net/psock_snd.sh @@ -45,7 +45,7 @@ echo "raw vnet hdr" echo "raw csum_off" ./in_netns.sh ./psock_snd -v -c -echo "raw csum_off with bad offset (fails)" +echo "raw csum_off with bad offset (expected to fail)" (! ./in_netns.sh ./psock_snd -v -c -C) @@ -57,7 +57,7 @@ echo "raw min size" echo "raw mtu size" ./in_netns.sh ./psock_snd -l "${mss}" -echo "raw mtu size + 1 (fails)" +echo "raw mtu size + 1 (expected to fail)" (! ./in_netns.sh ./psock_snd -l "${mss_exceeds}") # fails due to ARPHRD_ETHER check in packet_extra_vlan_len_allowed @@ -65,19 +65,19 @@ echo "raw mtu size + 1 (fails)" # echo "raw vlan mtu size" # ./in_netns.sh ./psock_snd -V -l "${mss}" -echo "raw vlan mtu size + 1 (fails)" +echo "raw vlan mtu size + 1 (expected to fail)" (! ./in_netns.sh ./psock_snd -V -l "${mss_exceeds}") echo "dgram mtu size" ./in_netns.sh ./psock_snd -d -l "${mss}" -echo "dgram mtu size + 1 (fails)" +echo "dgram mtu size + 1 (expected to fail)" (! ./in_netns.sh ./psock_snd -d -l "${mss_exceeds}") -echo "raw truncate hlen (fails: does not arrive)" +echo "raw truncate hlen (expected to fail: does not arrive)" (! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen}))") -echo "raw truncate hlen - 1 (fails: EINVAL)" +echo "raw truncate hlen - 1 (expected to fail: EINVAL)" (! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen} - 1))") @@ -86,13 +86,13 @@ echo "raw truncate hlen - 1 (fails: EINVAL)" echo "raw gso min size" ./in_netns.sh ./psock_snd -v -c -g -l "${mss_exceeds}" -echo "raw gso min size - 1 (fails)" +echo "raw gso min size - 1 (expected to fail)" (! ./in_netns.sh ./psock_snd -v -c -g -l "${mss}") echo "raw gso max size" ./in_netns.sh ./psock_snd -v -c -g -l "${max_mss}" -echo "raw gso max size + 1 (fails)" +echo "raw gso max size + 1 (expected to fail)" (! ./in_netns.sh ./psock_snd -v -c -g -l "${max_mss_exceeds}") echo "OK. All tests passed" diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 8a2fe6d64bf2..c9ce3dfa42ee 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -520,6 +520,11 @@ kci_test_encap_fou() return $ksft_skip fi + if ! /sbin/modprobe -q -n fou; then + echo "SKIP: module fou is not found" + return $ksft_skip + fi + /sbin/modprobe -q fou ip -netns "$testns" fou add port 7777 ipproto 47 2>/dev/null if [ $? -ne 0 ];then echo "FAIL: can't add fou port 7777, skipping test" diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c index a61b7b3da549..00f837c9bc6c 100644 --- a/tools/testing/selftests/net/tcp_mmap.c +++ b/tools/testing/selftests/net/tcp_mmap.c @@ -123,6 +123,28 @@ void hash_zone(void *zone, unsigned int length) #define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1)) #define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to)) + +static void *mmap_large_buffer(size_t need, size_t *allocated) +{ + void *buffer; + size_t sz; + + /* Attempt to use huge pages if possible. */ + sz = ALIGN_UP(need, map_align); + buffer = mmap(NULL, sz, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); + + if (buffer == (void *)-1) { + sz = need; + buffer = mmap(NULL, sz, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (buffer != (void *)-1) + fprintf(stderr, "MAP_HUGETLB attempt failed, look at /sys/kernel/mm/hugepages for optimal performance\n"); + } + *allocated = sz; + return buffer; +} + void *child_thread(void *arg) { unsigned long total_mmap = 0, total = 0; @@ -135,6 +157,7 @@ void *child_thread(void *arg) void *addr = NULL; double throughput; struct rusage ru; + size_t buffer_sz; int lu, fd; fd = (int)(unsigned long)arg; @@ -142,9 +165,9 @@ void *child_thread(void *arg) gettimeofday(&t0, NULL); fcntl(fd, F_SETFL, O_NDELAY); - buffer = malloc(chunk_size); - if (!buffer) { - perror("malloc"); + buffer = mmap_large_buffer(chunk_size, &buffer_sz); + if (buffer == (void *)-1) { + perror("mmap"); goto error; } if (zflg) { @@ -179,6 +202,10 @@ void *child_thread(void *arg) total_mmap += zc.length; if (xflg) hash_zone(addr, zc.length); + /* It is more efficient to unmap the pages right now, + * instead of doing this in next TCP_ZEROCOPY_RECEIVE. + */ + madvise(addr, zc.length, MADV_DONTNEED); total += zc.length; } if (zc.recv_skip_hint) { @@ -230,7 +257,7 @@ end: ru.ru_nvcsw); } error: - free(buffer); + munmap(buffer, buffer_sz); close(fd); if (zflg) munmap(raddr, chunk_size + map_align); @@ -347,6 +374,7 @@ int main(int argc, char *argv[]) uint64_t total = 0; char *host = NULL; int fd, c, on = 1; + size_t buffer_sz; char *buffer; int sflg = 0; int mss = 0; @@ -437,8 +465,8 @@ int main(int argc, char *argv[]) } do_accept(fdlisten); } - buffer = mmap(NULL, chunk_size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + buffer = mmap_large_buffer(chunk_size, &buffer_sz); if (buffer == (char *)-1) { perror("mmap"); exit(1); @@ -484,6 +512,6 @@ int main(int argc, char *argv[]) total += wr; } close(fd); - munmap(buffer, chunk_size); + munmap(buffer, buffer_sz); return 0; } diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh new file mode 100755 index 000000000000..23cf924754a5 --- /dev/null +++ b/tools/testing/selftests/net/vrf_route_leaking.sh @@ -0,0 +1,626 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2019 David Ahern <dsahern@gmail.com>. All rights reserved. +# Copyright (c) 2020 Michael Jeanson <mjeanson@efficios.com>. All rights reserved. +# +# Requires CONFIG_NET_VRF, CONFIG_VETH, CONFIG_BRIDGE and CONFIG_NET_NS. +# +# +# Symmetric routing topology +# +# blue red +# +----+ .253 +----+ .253 +----+ +# | h1 |-------------------| r1 |-------------------| h2 | +# +----+ .1 +----+ .2 +----+ +# 172.16.1/24 172.16.2/24 +# 2001:db8:16:1/64 2001:db8:16:2/64 +# +# +# Route from h1 to h2 and back goes through r1, incoming vrf blue has a route +# to the outgoing vrf red for the n2 network and red has a route back to n1. +# The red VRF interface has a MTU of 1400. +# +# The first test sends a ping with a ttl of 1 from h1 to h2 and parses the +# output of the command to check that a ttl expired error is received. +# +# The second test runs traceroute from h1 to h2 and parses the output to check +# for a hop on r1. +# +# The third test sends a ping with a packet size of 1450 from h1 to h2 and +# parses the output of the command to check that a fragmentation error is +# received. +# +# +# Asymmetric routing topology +# +# This topology represents a customer setup where the issue with icmp errors +# and VRF route leaking was initialy reported. The MTU test isn't done here +# because of the lack of a return route in the red VRF. +# +# blue red +# .253 +----+ .253 +# +----| r1 |----+ +# | +----+ | +# +----+ | | +----+ +# | h1 |--------------+ +--------------| h2 | +# +----+ .1 | | .2 +----+ +# 172.16.1/24 | +----+ | 172.16.2/24 +# 2001:db8:16:1/64 +----| r2 |----+ 2001:db8:16:2/64 +# .254 +----+ .254 +# +# +# Route from h1 to h2 goes through r1, incoming vrf blue has a route to the +# outgoing vrf red for the n2 network but red doesn't have a route back to n1. +# Route from h2 to h1 goes through r2. +# +# The objective is to check that the incoming vrf routing table is selected +# to send an ICMP error back to the source when the ttl of a packet reaches 1 +# while it is forwarded between different vrfs. + +VERBOSE=0 +PAUSE_ON_FAIL=no +DEFAULT_TTYPE=sym + +H1_N1=172.16.1.0/24 +H1_N1_6=2001:db8:16:1::/64 + +H1_N1_IP=172.16.1.1 +R1_N1_IP=172.16.1.253 +R2_N1_IP=172.16.1.254 + +H1_N1_IP6=2001:db8:16:1::1 +R1_N1_IP6=2001:db8:16:1::253 +R2_N1_IP6=2001:db8:16:1::254 + +H2_N2=172.16.2.0/24 +H2_N2_6=2001:db8:16:2::/64 + +H2_N2_IP=172.16.2.2 +R1_N2_IP=172.16.2.253 +R2_N2_IP=172.16.2.254 + +H2_N2_IP6=2001:db8:16:2::2 +R1_N2_IP6=2001:db8:16:2::253 +R2_N2_IP6=2001:db8:16:2::254 + +################################################################################ +# helpers + +log_section() +{ + echo + echo "###########################################################################" + echo "$*" + echo "###########################################################################" + echo +} + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ "${rc}" -eq "${expected}" ]; then + printf "TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) + else + ret=1 + nfail=$((nfail+1)) + printf "TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read -r a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +run_cmd() +{ + local cmd="$*" + local out + local rc + + if [ "$VERBOSE" = "1" ]; then + echo "COMMAND: $cmd" + fi + + # shellcheck disable=SC2086 + out=$(eval $cmd 2>&1) + rc=$? + if [ "$VERBOSE" = "1" ] && [ -n "$out" ]; then + echo "$out" + fi + + [ "$VERBOSE" = "1" ] && echo + + return $rc +} + +run_cmd_grep() +{ + local grep_pattern="$1" + shift + local cmd="$*" + local out + local rc + + if [ "$VERBOSE" = "1" ]; then + echo "COMMAND: $cmd" + fi + + # shellcheck disable=SC2086 + out=$(eval $cmd 2>&1) + if [ "$VERBOSE" = "1" ] && [ -n "$out" ]; then + echo "$out" + fi + + echo "$out" | grep -q "$grep_pattern" + rc=$? + + [ "$VERBOSE" = "1" ] && echo + + return $rc +} + +################################################################################ +# setup and teardown + +cleanup() +{ + local ns + + for ns in h1 h2 r1 r2; do + ip netns del $ns 2>/dev/null + done +} + +setup_vrf() +{ + local ns=$1 + + ip -netns "${ns}" rule del pref 0 + ip -netns "${ns}" rule add pref 32765 from all lookup local + ip -netns "${ns}" -6 rule del pref 0 + ip -netns "${ns}" -6 rule add pref 32765 from all lookup local +} + +create_vrf() +{ + local ns=$1 + local vrf=$2 + local table=$3 + + ip -netns "${ns}" link add "${vrf}" type vrf table "${table}" + ip -netns "${ns}" link set "${vrf}" up + ip -netns "${ns}" route add vrf "${vrf}" unreachable default metric 8192 + ip -netns "${ns}" -6 route add vrf "${vrf}" unreachable default metric 8192 + + ip -netns "${ns}" addr add 127.0.0.1/8 dev "${vrf}" + ip -netns "${ns}" -6 addr add ::1 dev "${vrf}" nodad +} + +setup_sym() +{ + local ns + + # make sure we are starting with a clean slate + cleanup + + # + # create nodes as namespaces + # + for ns in h1 h2 r1; do + ip netns add $ns + ip -netns $ns link set lo up + + case "${ns}" in + h[12]) ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 + ;; + r1) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 + esac + done + + # + # create interconnects + # + ip -netns h1 link add eth0 type veth peer name r1h1 + ip -netns h1 link set r1h1 netns r1 name eth0 up + + ip -netns h2 link add eth0 type veth peer name r1h2 + ip -netns h2 link set r1h2 netns r1 name eth1 up + + # + # h1 + # + ip -netns h1 addr add dev eth0 ${H1_N1_IP}/24 + ip -netns h1 -6 addr add dev eth0 ${H1_N1_IP6}/64 nodad + ip -netns h1 link set eth0 up + + # h1 to h2 via r1 + ip -netns h1 route add ${H2_N2} via ${R1_N1_IP} dev eth0 + ip -netns h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev eth0 + + # + # h2 + # + ip -netns h2 addr add dev eth0 ${H2_N2_IP}/24 + ip -netns h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad + ip -netns h2 link set eth0 up + + # h2 to h1 via r1 + ip -netns h2 route add default via ${R1_N2_IP} dev eth0 + ip -netns h2 -6 route add default via ${R1_N2_IP6} dev eth0 + + # + # r1 + # + setup_vrf r1 + create_vrf r1 blue 1101 + create_vrf r1 red 1102 + ip -netns r1 link set mtu 1400 dev eth1 + ip -netns r1 link set eth0 vrf blue up + ip -netns r1 link set eth1 vrf red up + ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24 + ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad + ip -netns r1 addr add dev eth1 ${R1_N2_IP}/24 + ip -netns r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad + + # Route leak from blue to red + ip -netns r1 route add vrf blue ${H2_N2} dev red + ip -netns r1 -6 route add vrf blue ${H2_N2_6} dev red + + # Route leak from red to blue + ip -netns r1 route add vrf red ${H1_N1} dev blue + ip -netns r1 -6 route add vrf red ${H1_N1_6} dev blue + + + # Wait for ip config to settle + sleep 2 +} + +setup_asym() +{ + local ns + + # make sure we are starting with a clean slate + cleanup + + # + # create nodes as namespaces + # + for ns in h1 h2 r1 r2; do + ip netns add $ns + ip -netns $ns link set lo up + + case "${ns}" in + h[12]) ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 + ;; + r[12]) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 + esac + done + + # + # create interconnects + # + ip -netns h1 link add eth0 type veth peer name r1h1 + ip -netns h1 link set r1h1 netns r1 name eth0 up + + ip -netns h1 link add eth1 type veth peer name r2h1 + ip -netns h1 link set r2h1 netns r2 name eth0 up + + ip -netns h2 link add eth0 type veth peer name r1h2 + ip -netns h2 link set r1h2 netns r1 name eth1 up + + ip -netns h2 link add eth1 type veth peer name r2h2 + ip -netns h2 link set r2h2 netns r2 name eth1 up + + # + # h1 + # + ip -netns h1 link add br0 type bridge + ip -netns h1 link set br0 up + ip -netns h1 addr add dev br0 ${H1_N1_IP}/24 + ip -netns h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad + ip -netns h1 link set eth0 master br0 up + ip -netns h1 link set eth1 master br0 up + + # h1 to h2 via r1 + ip -netns h1 route add ${H2_N2} via ${R1_N1_IP} dev br0 + ip -netns h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev br0 + + # + # h2 + # + ip -netns h2 link add br0 type bridge + ip -netns h2 link set br0 up + ip -netns h2 addr add dev br0 ${H2_N2_IP}/24 + ip -netns h2 -6 addr add dev br0 ${H2_N2_IP6}/64 nodad + ip -netns h2 link set eth0 master br0 up + ip -netns h2 link set eth1 master br0 up + + # h2 to h1 via r2 + ip -netns h2 route add default via ${R2_N2_IP} dev br0 + ip -netns h2 -6 route add default via ${R2_N2_IP6} dev br0 + + # + # r1 + # + setup_vrf r1 + create_vrf r1 blue 1101 + create_vrf r1 red 1102 + ip -netns r1 link set mtu 1400 dev eth1 + ip -netns r1 link set eth0 vrf blue up + ip -netns r1 link set eth1 vrf red up + ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24 + ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad + ip -netns r1 addr add dev eth1 ${R1_N2_IP}/24 + ip -netns r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad + + # Route leak from blue to red + ip -netns r1 route add vrf blue ${H2_N2} dev red + ip -netns r1 -6 route add vrf blue ${H2_N2_6} dev red + + # No route leak from red to blue + + # + # r2 + # + ip -netns r2 addr add dev eth0 ${R2_N1_IP}/24 + ip -netns r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad + ip -netns r2 addr add dev eth1 ${R2_N2_IP}/24 + ip -netns r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad + + # Wait for ip config to settle + sleep 2 +} + +check_connectivity() +{ + ip netns exec h1 ping -c1 -w1 ${H2_N2_IP} >/dev/null 2>&1 + log_test $? 0 "Basic IPv4 connectivity" + return $? +} + +check_connectivity6() +{ + ip netns exec h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1 + log_test $? 0 "Basic IPv6 connectivity" + return $? +} + +check_traceroute() +{ + if [ ! -x "$(command -v traceroute)" ]; then + echo "SKIP: Could not run IPV4 test without traceroute" + return 1 + fi +} + +check_traceroute6() +{ + if [ ! -x "$(command -v traceroute6)" ]; then + echo "SKIP: Could not run IPV6 test without traceroute6" + return 1 + fi +} + +ipv4_traceroute() +{ + local ttype="$1" + + [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE" + + log_section "IPv4 ($ttype route): VRF ICMP error route lookup traceroute" + + check_traceroute || return + + setup_"$ttype" + + check_connectivity || return + + run_cmd_grep "${R1_N1_IP}" ip netns exec h1 traceroute ${H2_N2_IP} + log_test $? 0 "Traceroute reports a hop on r1" +} + +ipv4_traceroute_asym() +{ + ipv4_traceroute asym +} + +ipv6_traceroute() +{ + local ttype="$1" + + [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE" + + log_section "IPv6 ($ttype route): VRF ICMP error route lookup traceroute" + + check_traceroute6 || return + + setup_"$ttype" + + check_connectivity6 || return + + run_cmd_grep "${R1_N1_IP6}" ip netns exec h1 traceroute6 ${H2_N2_IP6} + log_test $? 0 "Traceroute6 reports a hop on r1" +} + +ipv6_traceroute_asym() +{ + ipv6_traceroute asym +} + +ipv4_ping_ttl() +{ + local ttype="$1" + + [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE" + + log_section "IPv4 ($ttype route): VRF ICMP ttl error route lookup ping" + + setup_"$ttype" + + check_connectivity || return + + run_cmd_grep "Time to live exceeded" ip netns exec h1 ping -t1 -c1 -W2 ${H2_N2_IP} + log_test $? 0 "Ping received ICMP ttl exceeded" +} + +ipv4_ping_ttl_asym() +{ + ipv4_ping_ttl asym +} + +ipv4_ping_frag() +{ + local ttype="$1" + + [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE" + + log_section "IPv4 ($ttype route): VRF ICMP fragmentation error route lookup ping" + + setup_"$ttype" + + check_connectivity || return + + run_cmd_grep "Frag needed" ip netns exec h1 ping -s 1450 -Mdo -c1 -W2 ${H2_N2_IP} + log_test $? 0 "Ping received ICMP Frag needed" +} + +ipv4_ping_frag_asym() +{ + ipv4_ping_frag asym +} + +ipv6_ping_ttl() +{ + local ttype="$1" + + [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE" + + log_section "IPv6 ($ttype route): VRF ICMP ttl error route lookup ping" + + setup_"$ttype" + + check_connectivity6 || return + + run_cmd_grep "Time exceeded: Hop limit" ip netns exec h1 "${ping6}" -t1 -c1 -W2 ${H2_N2_IP6} + log_test $? 0 "Ping received ICMP Hop limit" +} + +ipv6_ping_ttl_asym() +{ + ipv6_ping_ttl asym +} + +ipv6_ping_frag() +{ + local ttype="$1" + + [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE" + + log_section "IPv6 ($ttype route): VRF ICMP fragmentation error route lookup ping" + + setup_"$ttype" + + check_connectivity6 || return + + run_cmd_grep "Packet too big" ip netns exec h1 "${ping6}" -s 1450 -Mdo -c1 -W2 ${H2_N2_IP6} + log_test $? 0 "Ping received ICMP Packet too big" +} + +ipv6_ping_frag_asym() +{ + ipv6_ping_frag asym +} + +################################################################################ +# usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -4 Run IPv4 tests only + -6 Run IPv6 tests only + -t TEST Run only TEST + -p Pause on fail + -v verbose mode (show commands and output) +EOF +} + +################################################################################ +# main + +# Some systems don't have a ping6 binary anymore +command -v ping6 > /dev/null 2>&1 && ping6=$(command -v ping6) || ping6=$(command -v ping) + +TESTS_IPV4="ipv4_ping_ttl ipv4_traceroute ipv4_ping_frag ipv4_ping_ttl_asym ipv4_traceroute_asym" +TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_frag ipv6_ping_ttl_asym ipv6_traceroute_asym" + +ret=0 +nsuccess=0 +nfail=0 + +while getopts :46t:pvh o +do + case $o in + 4) TESTS=ipv4;; + 6) TESTS=ipv6;; + t) TESTS=$OPTARG;; + p) PAUSE_ON_FAIL=yes;; + v) VERBOSE=1;; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +# +# show user test config +# +if [ -z "$TESTS" ]; then + TESTS="$TESTS_IPV4 $TESTS_IPV6" +elif [ "$TESTS" = "ipv4" ]; then + TESTS="$TESTS_IPV4" +elif [ "$TESTS" = "ipv6" ]; then + TESTS="$TESTS_IPV6" +fi + +for t in $TESTS +do + case $t in + ipv4_ping_ttl|ping) ipv4_ping_ttl;;& + ipv4_ping_ttl_asym|ping) ipv4_ping_ttl_asym;;& + ipv4_traceroute|traceroute) ipv4_traceroute;;& + ipv4_traceroute_asym|traceroute) ipv4_traceroute_asym;;& + ipv4_ping_frag|ping) ipv4_ping_frag;;& + + ipv6_ping_ttl|ping) ipv6_ping_ttl;;& + ipv6_ping_ttl_asym|ping) ipv6_ping_ttl_asym;;& + ipv6_traceroute|traceroute) ipv6_traceroute;;& + ipv6_traceroute_asym|traceroute) ipv6_traceroute_asym;;& + ipv6_ping_frag|ping) ipv6_ping_frag;;& + + # setup namespaces and config, but do not run any tests + setup_sym|setup) setup_sym; exit 0;; + setup_asym) setup_asym; exit 0;; + + help) echo "Test names: $TESTS"; exit 0;; + esac +done + +cleanup + +printf "\nTests passed: %3d\n" ${nsuccess} +printf "Tests failed: %3d\n" ${nfail} + +exit $ret diff --git a/tools/testing/selftests/netfilter/.gitignore b/tools/testing/selftests/netfilter/.gitignore new file mode 100644 index 000000000000..8448f74adfec --- /dev/null +++ b/tools/testing/selftests/netfilter/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +nf-queue diff --git a/tools/testing/selftests/netfilter/nf-queue.c b/tools/testing/selftests/netfilter/nf-queue.c index 29c73bce38fa..9e56b9d47037 100644 --- a/tools/testing/selftests/netfilter/nf-queue.c +++ b/tools/testing/selftests/netfilter/nf-queue.c @@ -17,9 +17,12 @@ struct options { bool count_packets; + bool gso_enabled; int verbose; unsigned int queue_num; unsigned int timeout; + uint32_t verdict; + uint32_t delay_ms; }; static unsigned int queue_stats[5]; @@ -27,7 +30,7 @@ static struct options opts; static void help(const char *p) { - printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num]\n", p); + printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num] [-Qdst_queue ] [ -d ms_delay ] [-G]\n", p); } static int parse_attr_cb(const struct nlattr *attr, void *data) @@ -162,7 +165,7 @@ nfq_build_cfg_params(char *buf, uint8_t mode, int range, int queue_num) } static struct nlmsghdr * -nfq_build_verdict(char *buf, int id, int queue_num, int verd) +nfq_build_verdict(char *buf, int id, int queue_num, uint32_t verd) { struct nfqnl_msg_verdict_hdr vh = { .verdict = htonl(verd), @@ -189,9 +192,6 @@ static void print_stats(void) unsigned int last, total; int i; - if (!opts.count_packets) - return; - total = 0; last = queue_stats[0]; @@ -234,7 +234,8 @@ struct mnl_socket *open_queue(void) nlh = nfq_build_cfg_params(buf, NFQNL_COPY_PACKET, 0xFFFF, queue_num); - flags = NFQA_CFG_F_GSO | NFQA_CFG_F_UID_GID; + flags = opts.gso_enabled ? NFQA_CFG_F_GSO : 0; + flags |= NFQA_CFG_F_UID_GID; mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(flags)); mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(flags)); @@ -255,6 +256,17 @@ struct mnl_socket *open_queue(void) return nl; } +static void sleep_ms(uint32_t delay) +{ + struct timespec ts = { .tv_sec = delay / 1000 }; + + delay %= 1000; + + ts.tv_nsec = delay * 1000llu * 1000llu; + + nanosleep(&ts, NULL); +} + static int mainloop(void) { unsigned int buflen = 64 * 1024 + MNL_SOCKET_BUFFER_SIZE; @@ -278,7 +290,7 @@ static int mainloop(void) ret = mnl_socket_recvfrom(nl, buf, buflen); if (ret == -1) { - if (errno == ENOBUFS) + if (errno == ENOBUFS || errno == EINTR) continue; if (errno == EAGAIN) { @@ -298,7 +310,10 @@ static int mainloop(void) } id = ret - MNL_CB_OK; - nlh = nfq_build_verdict(buf, id, opts.queue_num, NF_ACCEPT); + if (opts.delay_ms) + sleep_ms(opts.delay_ms); + + nlh = nfq_build_verdict(buf, id, opts.queue_num, opts.verdict); if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) { perror("mnl_socket_sendto"); exit(EXIT_FAILURE); @@ -314,7 +329,7 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "chvt:q:")) != -1) { + while ((c = getopt(argc, argv, "chvt:q:Q:d:G")) != -1) { switch (c) { case 'c': opts.count_packets = true; @@ -328,20 +343,48 @@ static void parse_opts(int argc, char **argv) if (opts.queue_num > 0xffff) opts.queue_num = 0; break; + case 'Q': + opts.verdict = atoi(optarg); + if (opts.verdict > 0xffff) { + fprintf(stderr, "Expected destination queue number\n"); + exit(1); + } + + opts.verdict <<= 16; + opts.verdict |= NF_QUEUE; + break; + case 'd': + opts.delay_ms = atoi(optarg); + if (opts.delay_ms == 0) { + fprintf(stderr, "Expected nonzero delay (in milliseconds)\n"); + exit(1); + } + break; case 't': opts.timeout = atoi(optarg); break; + case 'G': + opts.gso_enabled = false; + break; case 'v': opts.verbose++; break; } } + + if (opts.verdict != NF_ACCEPT && (opts.verdict >> 16 == opts.queue_num)) { + fprintf(stderr, "Cannot use same destination and source queue\n"); + exit(1); + } } int main(int argc, char *argv[]) { int ret; + opts.verdict = NF_ACCEPT; + opts.gso_enabled = true; + parse_opts(argc, argv); ret = mainloop(); diff --git a/tools/testing/selftests/netfilter/nft_meta.sh b/tools/testing/selftests/netfilter/nft_meta.sh index d250b84dd5bc..087f0e6e71ce 100755 --- a/tools/testing/selftests/netfilter/nft_meta.sh +++ b/tools/testing/selftests/netfilter/nft_meta.sh @@ -7,8 +7,7 @@ ksft_skip=4 sfx=$(mktemp -u "XXXXXXXX") ns0="ns0-$sfx" -nft --version > /dev/null 2>&1 -if [ $? -ne 0 ];then +if ! nft --version > /dev/null 2>&1; then echo "SKIP: Could not run test without nft tool" exit $ksft_skip fi @@ -24,6 +23,8 @@ ip -net "$ns0" addr add 127.0.0.1 dev lo trap cleanup EXIT +currentyear=$(date +%G) +lastyear=$((currentyear-1)) ip netns exec "$ns0" nft -f /dev/stdin <<EOF table inet filter { counter iifcount {} @@ -33,6 +34,9 @@ table inet filter { counter infproto4count {} counter il4protocounter {} counter imarkcounter {} + counter icpu0counter {} + counter ilastyearcounter {} + counter icurrentyearcounter {} counter oifcount {} counter oifnamecount {} @@ -54,6 +58,9 @@ table inet filter { meta nfproto ipv4 counter name "infproto4count" meta l4proto icmp counter name "il4protocounter" meta mark 42 counter name "imarkcounter" + meta cpu 0 counter name "icpu0counter" + meta time "$lastyear-01-01" - "$lastyear-12-31" counter name ilastyearcounter + meta time "$currentyear-01-01" - "$currentyear-12-31" counter name icurrentyearcounter } chain output { @@ -84,11 +91,10 @@ check_one_counter() local want="packets $2" local verbose="$3" - cnt=$(ip netns exec "$ns0" nft list counter inet filter $cname | grep -q "$want") - if [ $? -ne 0 ];then + if ! ip netns exec "$ns0" nft list counter inet filter $cname | grep -q "$want"; then echo "FAIL: $cname, want \"$want\", got" ret=1 - ip netns exec "$ns0" nft list counter inet filter $counter + ip netns exec "$ns0" nft list counter inet filter $cname fi } @@ -100,8 +106,7 @@ check_lo_counters() for counter in iifcount iifnamecount iifgroupcount iiftypecount infproto4count \ oifcount oifnamecount oifgroupcount oiftypecount onfproto4count \ - il4protocounter \ - ol4protocounter \ + il4protocounter icurrentyearcounter ol4protocounter \ ; do check_one_counter "$counter" "$want" "$verbose" done @@ -116,9 +121,22 @@ check_one_counter oskuidcounter "1" true check_one_counter oskgidcounter "1" true check_one_counter imarkcounter "1" true check_one_counter omarkcounter "1" true +check_one_counter ilastyearcounter "0" true if [ $ret -eq 0 ];then echo "OK: nftables meta iif/oif counters at expected values" +else + exit $ret +fi + +#First CPU execution and counter +taskset -p 01 $$ > /dev/null +ip netns exec "$ns0" nft reset counters > /dev/null +ip netns exec "$ns0" ping -q -c 1 127.0.0.1 > /dev/null +check_one_counter icpu0counter "2" true + +if [ $ret -eq 0 ];then + echo "OK: nftables meta cpu counter at expected values" fi exit $ret diff --git a/tools/testing/selftests/netfilter/nft_queue.sh b/tools/testing/selftests/netfilter/nft_queue.sh index 6898448b4266..3d202b90b33d 100755 --- a/tools/testing/selftests/netfilter/nft_queue.sh +++ b/tools/testing/selftests/netfilter/nft_queue.sh @@ -12,6 +12,7 @@ sfx=$(mktemp -u "XXXXXXXX") ns1="ns1-$sfx" ns2="ns2-$sfx" nsrouter="nsrouter-$sfx" +timeout=4 cleanup() { @@ -20,6 +21,7 @@ cleanup() ip netns del ${nsrouter} rm -f "$TMPFILE0" rm -f "$TMPFILE1" + rm -f "$TMPFILE2" "$TMPFILE3" } nft --version > /dev/null 2>&1 @@ -42,6 +44,8 @@ fi TMPFILE0=$(mktemp) TMPFILE1=$(mktemp) +TMPFILE2=$(mktemp) +TMPFILE3=$(mktemp) trap cleanup EXIT ip netns add ${ns1} @@ -83,7 +87,7 @@ load_ruleset() { local name=$1 local prio=$2 -ip netns exec ${nsrouter} nft -f - <<EOF +ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF table inet $name { chain nfq { ip protocol icmp queue bypass @@ -118,7 +122,7 @@ EOF load_counter_ruleset() { local prio=$1 -ip netns exec ${nsrouter} nft -f - <<EOF +ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF table inet countrules { chain pre { type filter hook prerouting priority $prio; policy accept; @@ -175,7 +179,7 @@ test_ping_router() { test_queue_blackhole() { local proto=$1 -ip netns exec ${nsrouter} nft -f - <<EOF +ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF table $proto blackh { chain forward { type filter hook forward priority 0; policy accept; @@ -184,10 +188,10 @@ table $proto blackh { } EOF if [ $proto = "ip" ] ;then - ip netns exec ${ns1} ping -c 1 -q 10.0.2.99 > /dev/null + ip netns exec ${ns1} ping -W 2 -c 1 -q 10.0.2.99 > /dev/null lret=$? elif [ $proto = "ip6" ]; then - ip netns exec ${ns1} ping -c 1 -q dead:2::99 > /dev/null + ip netns exec ${ns1} ping -W 2 -c 1 -q dead:2::99 > /dev/null lret=$? else lret=111 @@ -214,8 +218,8 @@ test_queue() local last="" # spawn nf-queue listeners - ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t 3 > "$TMPFILE0" & - ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t 3 > "$TMPFILE1" & + ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t $timeout > "$TMPFILE0" & + ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t $timeout > "$TMPFILE1" & sleep 1 test_ping ret=$? @@ -250,11 +254,11 @@ test_queue() test_tcp_forward() { - ip netns exec ${nsrouter} ./nf-queue -q 2 -t 10 & + ip netns exec ${nsrouter} ./nf-queue -q 2 -t $timeout & local nfqpid=$! tmpfile=$(mktemp) || exit 1 - dd conv=sparse status=none if=/dev/zero bs=1M count=100 of=$tmpfile + dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile ip netns exec ${ns2} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null & local rpid=$! @@ -270,15 +274,13 @@ test_tcp_forward() test_tcp_localhost() { - tc -net "${nsrouter}" qdisc add dev lo root netem loss random 1% - tmpfile=$(mktemp) || exit 1 - dd conv=sparse status=none if=/dev/zero bs=1M count=900 of=$tmpfile + dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null & local rpid=$! - ip netns exec ${nsrouter} ./nf-queue -q 3 -t 30 & + ip netns exec ${nsrouter} ./nf-queue -q 3 -t $timeout & local nfqpid=$! sleep 1 @@ -287,6 +289,47 @@ test_tcp_localhost() wait $rpid [ $? -eq 0 ] && echo "PASS: tcp via loopback" + wait 2>/dev/null +} + +test_tcp_localhost_requeue() +{ +ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF +flush ruleset +table inet filter { + chain output { + type filter hook output priority 0; policy accept; + tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0 + } + chain post { + type filter hook postrouting priority 0; policy accept; + tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0 + } +} +EOF + tmpfile=$(mktemp) || exit 1 + dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile + ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null & + local rpid=$! + + ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t $timeout > "$TMPFILE2" & + + # nfqueue 1 will be called via output hook. But this time, + # re-queue the packet to nfqueue program on queue 2. + ip netns exec ${nsrouter} ./nf-queue -G -d 150 -c -q 0 -Q 1 -t $timeout > "$TMPFILE3" & + + sleep 1 + ip netns exec ${nsrouter} nc -w 5 127.0.0.1 12345 <"$tmpfile" > /dev/null + rm -f "$tmpfile" + + wait + + if ! diff -u "$TMPFILE2" "$TMPFILE3" ; then + echo "FAIL: lost packets during requeue?!" 1>&2 + return + fi + + echo "PASS: tcp via loopback and re-queueing" } ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null @@ -328,5 +371,6 @@ test_queue 20 test_tcp_forward test_tcp_localhost +test_tcp_localhost_requeue exit $ret diff --git a/tools/testing/selftests/pidfd/config b/tools/testing/selftests/pidfd/config index bb11de90c0c9..f6f2965e17af 100644 --- a/tools/testing/selftests/pidfd/config +++ b/tools/testing/selftests/pidfd/config @@ -4,3 +4,4 @@ CONFIG_USER_NS=y CONFIG_PID_NS=y CONFIG_NET_NS=y CONFIG_CGROUPS=y +CONFIG_CHECKPOINT_RESTORE=y diff --git a/tools/testing/selftests/pidfd/pidfd_getfd_test.c b/tools/testing/selftests/pidfd/pidfd_getfd_test.c index 7758c98be015..0930e2411dfb 100644 --- a/tools/testing/selftests/pidfd/pidfd_getfd_test.c +++ b/tools/testing/selftests/pidfd/pidfd_getfd_test.c @@ -204,7 +204,10 @@ TEST_F(child, fetch_fd) fd = sys_pidfd_getfd(self->pidfd, self->remote_fd, 0); ASSERT_GE(fd, 0); - EXPECT_EQ(0, sys_kcmp(getpid(), self->pid, KCMP_FILE, fd, self->remote_fd)); + ret = sys_kcmp(getpid(), self->pid, KCMP_FILE, fd, self->remote_fd); + if (ret < 0 && errno == ENOSYS) + SKIP(return, "kcmp() syscall not supported"); + EXPECT_EQ(ret, 0); ret = fcntl(fd, F_GETFD); ASSERT_GE(ret, 0); diff --git a/tools/testing/selftests/pidfd/pidfd_open_test.c b/tools/testing/selftests/pidfd/pidfd_open_test.c index b9fe75fc3e51..8a59438ccc78 100644 --- a/tools/testing/selftests/pidfd/pidfd_open_test.c +++ b/tools/testing/selftests/pidfd/pidfd_open_test.c @@ -6,7 +6,6 @@ #include <inttypes.h> #include <limits.h> #include <linux/types.h> -#include <linux/wait.h> #include <sched.h> #include <signal.h> #include <stdbool.h> diff --git a/tools/testing/selftests/pidfd/pidfd_poll_test.c b/tools/testing/selftests/pidfd/pidfd_poll_test.c index 4b115444dfe9..610811275357 100644 --- a/tools/testing/selftests/pidfd/pidfd_poll_test.c +++ b/tools/testing/selftests/pidfd/pidfd_poll_test.c @@ -3,7 +3,6 @@ #define _GNU_SOURCE #include <errno.h> #include <linux/types.h> -#include <linux/wait.h> #include <poll.h> #include <signal.h> #include <stdbool.h> diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c index 1f085b922c6e..6e2f2cd400ca 100644 --- a/tools/testing/selftests/pidfd/pidfd_setns_test.c +++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c @@ -16,7 +16,6 @@ #include <unistd.h> #include <sys/socket.h> #include <sys/stat.h> -#include <linux/kcmp.h> #include "pidfd.h" #include "../clone3/clone3_selftests.h" diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c index c585aaa2acd8..529eb700ac26 100644 --- a/tools/testing/selftests/pidfd/pidfd_test.c +++ b/tools/testing/selftests/pidfd/pidfd_test.c @@ -330,7 +330,7 @@ static int test_pidfd_send_signal_recycled_pid_fail(void) ksft_exit_fail_msg("%s test: Failed to recycle pid %d\n", test_name, PID_RECYCLE); case PIDFD_SKIP: - ksft_print_msg("%s test: Skipping test\n", test_name); + ksft_test_result_skip("%s test: Skipping test\n", test_name); ret = 0; break; case PIDFD_XFAIL: diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c index 55ef15184057..cb53a8b777e6 100644 --- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c +++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c @@ -55,8 +55,6 @@ #include <setjmp.h> #include <signal.h> -#include <asm/cputable.h> - #include "utils.h" #include "instructions.h" @@ -64,6 +62,7 @@ int bufsize; int debug; int testing; volatile int gotsig; +bool prefixes_enabled; char *cipath = "/dev/fb0"; long cioffset; @@ -77,7 +76,12 @@ void sighandler(int sig, siginfo_t *info, void *ctx) } gotsig = sig; #ifdef __powerpc64__ - ucp->uc_mcontext.gp_regs[PT_NIP] += 4; + if (prefixes_enabled) { + u32 inst = *(u32 *)ucp->uc_mcontext.gp_regs[PT_NIP]; + ucp->uc_mcontext.gp_regs[PT_NIP] += ((inst >> 26 == 1) ? 8 : 4); + } else { + ucp->uc_mcontext.gp_regs[PT_NIP] += 4; + } #else ucp->uc_mcontext.uc_regs->gregs[PT_NIP] += 4; #endif @@ -262,8 +266,12 @@ int do_test(char *test_name, void (*test_func)(char *, char *)) } rc = 0; - /* offset = 0 no alignment fault, so skip */ - for (offset = 1; offset < 16; offset++) { + /* + * offset = 0 is aligned but tests the workaround for the P9N + * DD2.1 vector CI load issue (see 5080332c2c89 "powerpc/64s: + * Add workaround for P9 vector CI load issue") + */ + for (offset = 0; offset < 16; offset++) { width = 16; /* vsx == 16 bytes */ r = 0; @@ -648,6 +656,8 @@ int main(int argc, char *argv[]) exit(1); } + prefixes_enabled = have_hwcap2(PPC_FEATURE2_ARCH_3_1); + rc |= test_harness(test_alignment_handler_vsx_206, "test_alignment_handler_vsx_206"); rc |= test_harness(test_alignment_handler_vsx_207, diff --git a/tools/testing/selftests/powerpc/benchmarks/context_switch.c b/tools/testing/selftests/powerpc/benchmarks/context_switch.c index d50cc05df495..96554e2794d1 100644 --- a/tools/testing/selftests/powerpc/benchmarks/context_switch.c +++ b/tools/testing/selftests/powerpc/benchmarks/context_switch.c @@ -481,6 +481,12 @@ int main(int argc, char *argv[]) else printf("futex"); + if (!have_hwcap(PPC_FEATURE_HAS_ALTIVEC)) + touch_altivec = 0; + + if (!have_hwcap(PPC_FEATURE_HAS_VSX)) + touch_vector = 0; + printf(" on cpus %d/%d touching FP:%s altivec:%s vector:%s vdso:%s\n", cpu1, cpu2, touch_fp ? "yes" : "no", touch_altivec ? "yes" : "no", touch_vector ? "yes" : "no", touch_vdso ? "yes" : "no"); diff --git a/tools/testing/selftests/powerpc/dscr/Makefile b/tools/testing/selftests/powerpc/dscr/Makefile index cfa6eedcb66c..845db6273a1b 100644 --- a/tools/testing/selftests/powerpc/dscr/Makefile +++ b/tools/testing/selftests/powerpc/dscr/Makefile @@ -10,4 +10,4 @@ include ../../lib.mk $(OUTPUT)/dscr_default_test: LDLIBS += -lpthread -$(TEST_GEN_PROGS): ../harness.c +$(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/dscr/dscr_default_test.c b/tools/testing/selftests/powerpc/dscr/dscr_default_test.c index 288a4e2ad156..e76611e608af 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr_default_test.c +++ b/tools/testing/selftests/powerpc/dscr/dscr_default_test.c @@ -63,6 +63,8 @@ int dscr_default(void) unsigned long i, *status[THREADS]; unsigned long orig_dscr_default; + SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR)); + orig_dscr_default = get_default_dscr(); /* Initial DSCR default */ diff --git a/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c b/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c index aefcd8d8759b..32fcf2b324b1 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c +++ b/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c @@ -21,6 +21,8 @@ int dscr_explicit(void) { unsigned long i, dscr = 0; + SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR)); + srand(getpid()); set_dscr(dscr); diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c index 7c1cb46397c6..c6a81b2d6b91 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c +++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c @@ -44,6 +44,8 @@ int dscr_inherit_exec(void) unsigned long i, dscr = 0; pid_t pid; + SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR)); + for (i = 0; i < COUNT; i++) { dscr++; if (dscr > DSCR_MAX) diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c index 04297a69ab59..f9dfd3d3c2d5 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c +++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c @@ -22,6 +22,8 @@ int dscr_inherit(void) unsigned long i, dscr = 0; pid_t pid; + SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR)); + srand(getpid()); set_dscr(dscr); diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c index 02f6b4efde14..fbbdffdb2e5d 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c +++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c @@ -77,6 +77,8 @@ int dscr_sysfs(void) unsigned long orig_dscr_default; int i, j; + SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR)); + orig_dscr_default = get_default_dscr(); for (i = 0; i < COUNT; i++) { for (j = 0; j < DSCR_MAX; j++) { diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c index 37be2c25f277..191ed126f118 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c +++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c @@ -56,6 +56,8 @@ int dscr_sysfs_thread(void) unsigned long orig_dscr_default; int i, j; + SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR)); + orig_dscr_default = get_default_dscr(); for (i = 0; i < COUNT; i++) { for (j = 0; j < DSCR_MAX; j++) { diff --git a/tools/testing/selftests/powerpc/dscr/dscr_user_test.c b/tools/testing/selftests/powerpc/dscr/dscr_user_test.c index eaf785d11eed..e09072446dd3 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr_user_test.c +++ b/tools/testing/selftests/powerpc/dscr/dscr_user_test.c @@ -36,6 +36,8 @@ int dscr_user(void) { int i; + SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR)); + check_dscr(""); for (i = 0; i < COUNT; i++) { diff --git a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh index 8a8d0f456946..0d783e1065c8 100755 --- a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh +++ b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh @@ -1,17 +1,19 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0-only +KSELFTESTS_SKIP=4 + . ./eeh-functions.sh if ! eeh_supported ; then echo "EEH not supported on this system, skipping" - exit 0; + exit $KSELFTESTS_SKIP; fi if [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_check" ] && \ [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_break" ] ; then echo "debugfs EEH testing files are missing. Is debugfs mounted?" - exit 1; + exit $KSELFTESTS_SKIP; fi pre_lspci=`mktemp` @@ -84,4 +86,5 @@ echo "$failed devices failed to recover ($dev_count tested)" lspci | diff -u $pre_lspci - rm -f $pre_lspci -exit $failed +test "$failed" == 0 +exit $? diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h index 71d2924f5b8b..b7d188fc87c7 100644 --- a/tools/testing/selftests/powerpc/include/utils.h +++ b/tools/testing/selftests/powerpc/include/utils.h @@ -12,6 +12,7 @@ #include <stdbool.h> #include <linux/auxvec.h> #include <linux/perf_event.h> +#include <asm/cputable.h> #include "reg.h" /* Avoid headaches with PRI?64 - just use %ll? always */ @@ -35,13 +36,17 @@ int pick_online_cpu(void); int read_debugfs_file(char *debugfs_file, int *result); int write_debugfs_file(char *debugfs_file, int result); int read_sysfs_file(char *debugfs_file, char *result, size_t result_size); -void set_dscr(unsigned long val); int perf_event_open_counter(unsigned int type, unsigned long config, int group_fd); int perf_event_enable(int fd); int perf_event_disable(int fd); int perf_event_reset(int fd); +struct perf_event_read { + __u64 nr; + __u64 l1d_misses; +}; + #if !defined(__GLIBC_PREREQ) || !__GLIBC_PREREQ(2, 30) #include <unistd.h> #include <sys/syscall.h> diff --git a/tools/testing/selftests/powerpc/mm/bad_accesses.c b/tools/testing/selftests/powerpc/mm/bad_accesses.c index a864ed7e2008..fd747b2ffcfc 100644 --- a/tools/testing/selftests/powerpc/mm/bad_accesses.c +++ b/tools/testing/selftests/powerpc/mm/bad_accesses.c @@ -139,5 +139,6 @@ static int test(void) int main(void) { + test_harness_set_timeout(300); return test_harness(test, "bad_accesses"); } diff --git a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c index 2980abca31e0..2070a1e2b3a5 100644 --- a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c +++ b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c @@ -9,7 +9,6 @@ #include <stdbool.h> #include <string.h> #include <sys/prctl.h> -#include <asm/cputable.h> #include "event.h" #include "utils.h" diff --git a/tools/testing/selftests/powerpc/pmu/l3_bank_test.c b/tools/testing/selftests/powerpc/pmu/l3_bank_test.c index a96d512a18c4..a5dfa9bf3b9f 100644 --- a/tools/testing/selftests/powerpc/pmu/l3_bank_test.c +++ b/tools/testing/selftests/powerpc/pmu/l3_bank_test.c @@ -20,6 +20,9 @@ static int l3_bank_test(void) char *p; int i; + // The L3 bank logic is only used on Power8 or later + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); + p = malloc(MALLOC_SIZE); FAIL_IF(!p); diff --git a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c index 2d37942bf72b..ad32a09a6540 100644 --- a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c +++ b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c @@ -12,8 +12,6 @@ #include <string.h> #include <sys/prctl.h> -#include <asm/cputable.h> - #include "event.h" #include "lib.h" #include "utils.h" diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c index fc477dfe86a2..2e0d86e0687e 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c @@ -20,6 +20,8 @@ #include <signal.h> #include <sys/types.h> #include <sys/wait.h> +#include <sys/syscall.h> +#include <linux/limits.h> #include "ptrace.h" #define SPRN_PVR 0x11F @@ -44,6 +46,7 @@ struct gstruct { }; static volatile struct gstruct gstruct __attribute__((aligned(512))); +static volatile char cwd[PATH_MAX] __attribute__((aligned(8))); static void get_dbginfo(pid_t child_pid, struct ppc_debug_info *dbginfo) { @@ -138,6 +141,9 @@ static void test_workload(void) write_var(len); } + /* PTRACE_SET_DEBUGREG, Kernel Access Userspace test */ + syscall(__NR_getcwd, &cwd, PATH_MAX); + /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, WO test */ write_var(1); @@ -150,6 +156,9 @@ static void test_workload(void) else read_var(1); + /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, Kernel Access Userspace test */ + syscall(__NR_getcwd, &cwd, PATH_MAX); + /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, WO test */ gstruct.a[rand() % A_LEN] = 'a'; @@ -293,6 +302,24 @@ static int test_set_debugreg(pid_t child_pid) return 0; } +static int test_set_debugreg_kernel_userspace(pid_t child_pid) +{ + unsigned long wp_addr = (unsigned long)cwd; + char *name = "PTRACE_SET_DEBUGREG"; + + /* PTRACE_SET_DEBUGREG, Kernel Access Userspace test */ + wp_addr &= ~0x7UL; + wp_addr |= (1Ul << DABR_READ_SHIFT); + wp_addr |= (1UL << DABR_WRITE_SHIFT); + wp_addr |= (1UL << DABR_TRANSLATION_SHIFT); + ptrace_set_debugreg(child_pid, wp_addr); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "Kernel Access Userspace", wp_addr, 8); + + ptrace_set_debugreg(child_pid, 0); + return 0; +} + static void get_ppc_hw_breakpoint(struct ppc_hw_breakpoint *info, int type, unsigned long addr, int len) { @@ -338,6 +365,22 @@ static void test_sethwdebug_exact(pid_t child_pid) ptrace_delhwdebug(child_pid, wh); } +static void test_sethwdebug_exact_kernel_userspace(pid_t child_pid) +{ + struct ppc_hw_breakpoint info; + unsigned long wp_addr = (unsigned long)&cwd; + char *name = "PPC_PTRACE_SETHWDEBUG, MODE_EXACT"; + int len = 1; /* hardcoded in kernel */ + int wh; + + /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, Kernel Access Userspace test */ + get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr, 0); + wh = ptrace_sethwdebug(child_pid, &info); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "Kernel Access Userspace", wp_addr, len); + ptrace_delhwdebug(child_pid, wh); +} + static void test_sethwdebug_range_aligned(pid_t child_pid) { struct ppc_hw_breakpoint info; @@ -452,9 +495,10 @@ static void run_tests(pid_t child_pid, struct ppc_debug_info *dbginfo, bool dawr) { test_set_debugreg(child_pid); + test_set_debugreg_kernel_userspace(child_pid); + test_sethwdebug_exact(child_pid); + test_sethwdebug_exact_kernel_userspace(child_pid); if (dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_RANGE) { - test_sethwdebug_exact(child_pid); - test_sethwdebug_range_aligned(child_pid); if (dawr || is_8xx) { test_sethwdebug_range_unaligned(child_pid); diff --git a/tools/testing/selftests/powerpc/security/.gitignore b/tools/testing/selftests/powerpc/security/.gitignore index f795e06f5ae3..4257a1f156bb 100644 --- a/tools/testing/selftests/powerpc/security/.gitignore +++ b/tools/testing/selftests/powerpc/security/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only rfi_flush +entry_flush diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile index eadbbff50be6..f25e854fe370 100644 --- a/tools/testing/selftests/powerpc/security/Makefile +++ b/tools/testing/selftests/powerpc/security/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0+ -TEST_GEN_PROGS := rfi_flush spectre_v2 +TEST_GEN_PROGS := rfi_flush entry_flush spectre_v2 top_srcdir = ../../../../.. CFLAGS += -I../../../../../usr/include @@ -11,3 +11,5 @@ $(TEST_GEN_PROGS): ../harness.c ../utils.c $(OUTPUT)/spectre_v2: CFLAGS += -m64 $(OUTPUT)/spectre_v2: ../pmu/event.c branch_loops.S +$(OUTPUT)/rfi_flush: flush_utils.c +$(OUTPUT)/entry_flush: flush_utils.c diff --git a/tools/testing/selftests/powerpc/security/entry_flush.c b/tools/testing/selftests/powerpc/security/entry_flush.c new file mode 100644 index 000000000000..78cf914fa321 --- /dev/null +++ b/tools/testing/selftests/powerpc/security/entry_flush.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Copyright 2018 IBM Corporation. + */ + +#define __SANE_USERSPACE_TYPES__ + +#include <sys/types.h> +#include <stdint.h> +#include <malloc.h> +#include <unistd.h> +#include <signal.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include "utils.h" +#include "flush_utils.h" + +int entry_flush_test(void) +{ + char *p; + int repetitions = 10; + int fd, passes = 0, iter, rc = 0; + struct perf_event_read v; + __u64 l1d_misses_total = 0; + unsigned long iterations = 100000, zero_size = 24 * 1024; + unsigned long l1d_misses_expected; + int rfi_flush_orig; + int entry_flush, entry_flush_orig; + + SKIP_IF(geteuid() != 0); + + // The PMU event we use only works on Power7 or later + SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); + + if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_orig) < 0) { + perror("Unable to read powerpc/rfi_flush debugfs file"); + SKIP_IF(1); + } + + if (read_debugfs_file("powerpc/entry_flush", &entry_flush_orig) < 0) { + perror("Unable to read powerpc/entry_flush debugfs file"); + SKIP_IF(1); + } + + if (rfi_flush_orig != 0) { + if (write_debugfs_file("powerpc/rfi_flush", 0) < 0) { + perror("error writing to powerpc/rfi_flush debugfs file"); + FAIL_IF(1); + } + } + + entry_flush = entry_flush_orig; + + fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1); + FAIL_IF(fd < 0); + + p = (char *)memalign(zero_size, CACHELINE_SIZE); + + FAIL_IF(perf_event_enable(fd)); + + // disable L1 prefetching + set_dscr(1); + + iter = repetitions; + + /* + * We expect to see l1d miss for each cacheline access when entry_flush + * is set. Allow a small variation on this. + */ + l1d_misses_expected = iterations * (zero_size / CACHELINE_SIZE - 2); + +again: + FAIL_IF(perf_event_reset(fd)); + + syscall_loop(p, iterations, zero_size); + + FAIL_IF(read(fd, &v, sizeof(v)) != sizeof(v)); + + if (entry_flush && v.l1d_misses >= l1d_misses_expected) + passes++; + else if (!entry_flush && v.l1d_misses < (l1d_misses_expected / 2)) + passes++; + + l1d_misses_total += v.l1d_misses; + + while (--iter) + goto again; + + if (passes < repetitions) { + printf("FAIL (L1D misses with entry_flush=%d: %llu %c %lu) [%d/%d failures]\n", + entry_flush, l1d_misses_total, entry_flush ? '<' : '>', + entry_flush ? repetitions * l1d_misses_expected : + repetitions * l1d_misses_expected / 2, + repetitions - passes, repetitions); + rc = 1; + } else { + printf("PASS (L1D misses with entry_flush=%d: %llu %c %lu) [%d/%d pass]\n", + entry_flush, l1d_misses_total, entry_flush ? '>' : '<', + entry_flush ? repetitions * l1d_misses_expected : + repetitions * l1d_misses_expected / 2, + passes, repetitions); + } + + if (entry_flush == entry_flush_orig) { + entry_flush = !entry_flush_orig; + if (write_debugfs_file("powerpc/entry_flush", entry_flush) < 0) { + perror("error writing to powerpc/entry_flush debugfs file"); + return 1; + } + iter = repetitions; + l1d_misses_total = 0; + passes = 0; + goto again; + } + + perf_event_disable(fd); + close(fd); + + set_dscr(0); + + if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_orig) < 0) { + perror("unable to restore original value of powerpc/rfi_flush debugfs file"); + return 1; + } + + if (write_debugfs_file("powerpc/entry_flush", entry_flush_orig) < 0) { + perror("unable to restore original value of powerpc/entry_flush debugfs file"); + return 1; + } + + return rc; +} + +int main(int argc, char *argv[]) +{ + return test_harness(entry_flush_test, "entry_flush_test"); +} diff --git a/tools/testing/selftests/powerpc/security/flush_utils.c b/tools/testing/selftests/powerpc/security/flush_utils.c new file mode 100644 index 000000000000..0c3c4c40c7fb --- /dev/null +++ b/tools/testing/selftests/powerpc/security/flush_utils.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Copyright 2018 IBM Corporation. + */ + +#define __SANE_USERSPACE_TYPES__ + +#include <sys/types.h> +#include <stdint.h> +#include <unistd.h> +#include <signal.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include "utils.h" +#include "flush_utils.h" + +static inline __u64 load(void *addr) +{ + __u64 tmp; + + asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr)); + + return tmp; +} + +void syscall_loop(char *p, unsigned long iterations, + unsigned long zero_size) +{ + for (unsigned long i = 0; i < iterations; i++) { + for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE) + load(p + j); + getppid(); + } +} + +static void sigill_handler(int signr, siginfo_t *info, void *unused) +{ + static int warned; + ucontext_t *ctx = (ucontext_t *)unused; + unsigned long *pc = &UCONTEXT_NIA(ctx); + + /* mtspr 3,RS to check for move to DSCR below */ + if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) { + if (!warned++) + printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n"); + *pc += 4; + } else { + printf("SIGILL at %p\n", pc); + abort(); + } +} + +void set_dscr(unsigned long val) +{ + static int init; + struct sigaction sa; + + if (!init) { + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = sigill_handler; + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGILL, &sa, NULL)) + perror("sigill_handler"); + init = 1; + } + + asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); +} diff --git a/tools/testing/selftests/powerpc/security/flush_utils.h b/tools/testing/selftests/powerpc/security/flush_utils.h new file mode 100644 index 000000000000..07a5eb301466 --- /dev/null +++ b/tools/testing/selftests/powerpc/security/flush_utils.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +/* + * Copyright 2018 IBM Corporation. + */ + +#ifndef _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H +#define _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H + +#define CACHELINE_SIZE 128 + +void syscall_loop(char *p, unsigned long iterations, + unsigned long zero_size); + +void set_dscr(unsigned long val); + +#endif /* _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H */ diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c index 0a7d0afb26b8..7565fd786640 100644 --- a/tools/testing/selftests/powerpc/security/rfi_flush.c +++ b/tools/testing/selftests/powerpc/security/rfi_flush.c @@ -14,32 +14,8 @@ #include <string.h> #include <stdio.h> #include "utils.h" +#include "flush_utils.h" -#define CACHELINE_SIZE 128 - -struct perf_event_read { - __u64 nr; - __u64 l1d_misses; -}; - -static inline __u64 load(void *addr) -{ - __u64 tmp; - - asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr)); - - return tmp; -} - -static void syscall_loop(char *p, unsigned long iterations, - unsigned long zero_size) -{ - for (unsigned long i = 0; i < iterations; i++) { - for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE) - load(p + j); - getppid(); - } -} int rfi_flush_test(void) { @@ -50,16 +26,33 @@ int rfi_flush_test(void) __u64 l1d_misses_total = 0; unsigned long iterations = 100000, zero_size = 24 * 1024; unsigned long l1d_misses_expected; - int rfi_flush_org, rfi_flush; + int rfi_flush_orig, rfi_flush; + int have_entry_flush, entry_flush_orig; SKIP_IF(geteuid() != 0); - if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_org)) { + // The PMU event we use only works on Power7 or later + SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); + + if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_orig) < 0) { perror("Unable to read powerpc/rfi_flush debugfs file"); SKIP_IF(1); } - rfi_flush = rfi_flush_org; + if (read_debugfs_file("powerpc/entry_flush", &entry_flush_orig) < 0) { + have_entry_flush = 0; + } else { + have_entry_flush = 1; + + if (entry_flush_orig != 0) { + if (write_debugfs_file("powerpc/entry_flush", 0) < 0) { + perror("error writing to powerpc/entry_flush debugfs file"); + return 1; + } + } + } + + rfi_flush = rfi_flush_orig; fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1); FAIL_IF(fd < 0); @@ -68,6 +61,7 @@ int rfi_flush_test(void) FAIL_IF(perf_event_enable(fd)); + // disable L1 prefetching set_dscr(1); iter = repetitions; @@ -109,8 +103,8 @@ again: repetitions * l1d_misses_expected / 2, passes, repetitions); - if (rfi_flush == rfi_flush_org) { - rfi_flush = !rfi_flush_org; + if (rfi_flush == rfi_flush_orig) { + rfi_flush = !rfi_flush_orig; if (write_debugfs_file("powerpc/rfi_flush", rfi_flush) < 0) { perror("error writing to powerpc/rfi_flush debugfs file"); return 1; @@ -126,11 +120,19 @@ again: set_dscr(0); - if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_org) < 0) { + if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_orig) < 0) { perror("unable to restore original value of powerpc/rfi_flush debugfs file"); return 1; } + if (have_entry_flush) { + if (write_debugfs_file("powerpc/entry_flush", entry_flush_orig) < 0) { + perror("unable to restore original value of powerpc/entry_flush " + "debugfs file"); + return 1; + } + } + return rc; } diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c index c8d82b784102..adc2b7294e5f 100644 --- a/tools/testing/selftests/powerpc/security/spectre_v2.c +++ b/tools/testing/selftests/powerpc/security/spectre_v2.c @@ -134,6 +134,9 @@ int spectre_v2_test(void) s64 miss_percent; bool is_p9; + // The PMU events we use only work on Power8 or later + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); + state = get_sysfs_state(); if (state == UNKNOWN) { printf("Error: couldn't determine spectre_v2 mitigation state?\n"); diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp.c b/tools/testing/selftests/powerpc/stringloops/memcmp.c index 979df3d98368..cb2f18855c8d 100644 --- a/tools/testing/selftests/powerpc/stringloops/memcmp.c +++ b/tools/testing/selftests/powerpc/stringloops/memcmp.c @@ -4,7 +4,7 @@ #include <string.h> #include <sys/mman.h> #include <time.h> -#include <asm/cputable.h> + #include "utils.h" #define SIZE 256 diff --git a/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S index cc4930467235..7887f78cf072 100644 --- a/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S +++ b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S @@ -3,9 +3,13 @@ .data .balign 8 -message: +success_message: .ascii "success: switch_endian_test\n\0" + .balign 8 +failure_message: + .ascii "failure: switch_endian_test\n\0" + .section ".toc" .balign 8 pattern: @@ -64,6 +68,9 @@ FUNC_START(_start) li r0, __NR_switch_endian sc + tdi 0, 0, 0x48 // b +8 if the endian was switched + b .Lfail // exit if endian didn't switch + #include "check-reversed.S" /* Flip back, r0 already has the switch syscall number */ @@ -71,12 +78,20 @@ FUNC_START(_start) #include "check.S" + ld r4, success_message@got(%r2) + li r5, 28 // strlen(success_message) + li r14, 0 // exit status +.Lout: li r0, __NR_write li r3, 1 /* stdout */ - ld r4, message@got(%r2) - li r5, 28 /* strlen(message3) */ sc li r0, __NR_exit - li r3, 0 + mr r3, r14 sc b . + +.Lfail: + ld r4, failure_message@got(%r2) + li r5, 28 // strlen(failure_message) + li r14, 1 + b .Lout diff --git a/tools/testing/selftests/powerpc/syscalls/Makefile b/tools/testing/selftests/powerpc/syscalls/Makefile index 01b22775ca87..b63f8459c704 100644 --- a/tools/testing/selftests/powerpc/syscalls/Makefile +++ b/tools/testing/selftests/powerpc/syscalls/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -TEST_GEN_PROGS := ipc_unmuxed +TEST_GEN_PROGS := ipc_unmuxed rtas_filter CFLAGS += -I../../../../../usr/include diff --git a/tools/testing/selftests/powerpc/syscalls/rtas_filter.c b/tools/testing/selftests/powerpc/syscalls/rtas_filter.c new file mode 100644 index 000000000000..03b487f18d00 --- /dev/null +++ b/tools/testing/selftests/powerpc/syscalls/rtas_filter.c @@ -0,0 +1,285 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright 2005-2020 IBM Corporation. + * + * Includes code from librtas (https://github.com/ibm-power-utilities/librtas/) + */ + +#include <byteswap.h> +#include <stdint.h> +#include <inttypes.h> +#include <stdio.h> +#include <string.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <unistd.h> +#include <stdarg.h> +#include <stdlib.h> +#include <fcntl.h> +#include <errno.h> +#include "utils.h" + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define cpu_to_be32(x) bswap_32(x) +#define be32_to_cpu(x) bswap_32(x) +#else +#define cpu_to_be32(x) (x) +#define be32_to_cpu(x) (x) +#endif + +#define RTAS_IO_ASSERT -1098 /* Unexpected I/O Error */ +#define RTAS_UNKNOWN_OP -1099 /* No Firmware Implementation of Function */ +#define BLOCK_SIZE 4096 +#define PAGE_SIZE 4096 +#define MAX_PAGES 64 + +static const char *ofdt_rtas_path = "/proc/device-tree/rtas"; + +typedef __be32 uint32_t; +struct rtas_args { + __be32 token; + __be32 nargs; + __be32 nret; + __be32 args[16]; + __be32 *rets; /* Pointer to return values in args[]. */ +}; + +struct region { + uint64_t addr; + uint32_t size; + struct region *next; +}; + +int read_entire_file(int fd, char **buf, size_t *len) +{ + size_t buf_size = 0; + size_t off = 0; + int rc; + + *buf = NULL; + do { + buf_size += BLOCK_SIZE; + if (*buf == NULL) + *buf = malloc(buf_size); + else + *buf = realloc(*buf, buf_size); + + if (*buf == NULL) + return -ENOMEM; + + rc = read(fd, *buf + off, BLOCK_SIZE); + if (rc < 0) + return -EIO; + + off += rc; + } while (rc == BLOCK_SIZE); + + if (len) + *len = off; + + return 0; +} + +static int open_prop_file(const char *prop_path, const char *prop_name, int *fd) +{ + char *path; + int len; + + /* allocate enough for two string, a slash and trailing NULL */ + len = strlen(prop_path) + strlen(prop_name) + 1 + 1; + path = malloc(len); + if (path == NULL) + return -ENOMEM; + + snprintf(path, len, "%s/%s", prop_path, prop_name); + + *fd = open(path, O_RDONLY); + free(path); + if (*fd < 0) + return -errno; + + return 0; +} + +static int get_property(const char *prop_path, const char *prop_name, + char **prop_val, size_t *prop_len) +{ + int rc, fd; + + rc = open_prop_file(prop_path, prop_name, &fd); + if (rc) + return rc; + + rc = read_entire_file(fd, prop_val, prop_len); + close(fd); + + return rc; +} + +int rtas_token(const char *call_name) +{ + char *prop_buf = NULL; + size_t len; + int rc; + + rc = get_property(ofdt_rtas_path, call_name, &prop_buf, &len); + if (rc < 0) { + rc = RTAS_UNKNOWN_OP; + goto err; + } + + rc = be32_to_cpu(*(int *)prop_buf); + +err: + free(prop_buf); + return rc; +} + +static int read_kregion_bounds(struct region *kregion) +{ + char *buf; + int fd; + int rc; + + fd = open("/proc/ppc64/rtas/rmo_buffer", O_RDONLY); + if (fd < 0) { + printf("Could not open rmo_buffer file\n"); + return RTAS_IO_ASSERT; + } + + rc = read_entire_file(fd, &buf, NULL); + close(fd); + if (rc) { + free(buf); + return rc; + } + + sscanf(buf, "%" SCNx64 " %x", &kregion->addr, &kregion->size); + free(buf); + + if (!(kregion->size && kregion->addr) || + (kregion->size > (PAGE_SIZE * MAX_PAGES))) { + printf("Unexpected kregion bounds\n"); + return RTAS_IO_ASSERT; + } + + return 0; +} + +static int rtas_call(const char *name, int nargs, + int nrets, ...) +{ + struct rtas_args args; + __be32 *rets[16]; + int i, rc, token; + va_list ap; + + va_start(ap, nrets); + + token = rtas_token(name); + if (token == RTAS_UNKNOWN_OP) { + // We don't care if the call doesn't exist + printf("call '%s' not available, skipping...", name); + rc = RTAS_UNKNOWN_OP; + goto err; + } + + args.token = cpu_to_be32(token); + args.nargs = cpu_to_be32(nargs); + args.nret = cpu_to_be32(nrets); + + for (i = 0; i < nargs; i++) + args.args[i] = (__be32) va_arg(ap, unsigned long); + + for (i = 0; i < nrets; i++) + rets[i] = (__be32 *) va_arg(ap, unsigned long); + + rc = syscall(__NR_rtas, &args); + if (rc) { + rc = -errno; + goto err; + } + + if (nrets) { + *(rets[0]) = be32_to_cpu(args.args[nargs]); + + for (i = 1; i < nrets; i++) { + *(rets[i]) = args.args[nargs + i]; + } + } + +err: + va_end(ap); + return rc; +} + +static int test(void) +{ + struct region rmo_region; + uint32_t rmo_start; + uint32_t rmo_end; + __be32 rets[1]; + int rc; + + // Test a legitimate harmless call + // Expected: call succeeds + printf("Test a permitted call, no parameters... "); + rc = rtas_call("get-time-of-day", 0, 1, rets); + printf("rc: %d\n", rc); + FAIL_IF(rc != 0 && rc != RTAS_UNKNOWN_OP); + + // Test a prohibited call + // Expected: call returns -EINVAL + printf("Test a prohibited call... "); + rc = rtas_call("nvram-fetch", 0, 1, rets); + printf("rc: %d\n", rc); + FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP); + + // Get RMO + rc = read_kregion_bounds(&rmo_region); + if (rc) { + printf("Couldn't read RMO region bounds, skipping remaining cases\n"); + return 0; + } + rmo_start = rmo_region.addr; + rmo_end = rmo_start + rmo_region.size - 1; + printf("RMO range: %08x - %08x\n", rmo_start, rmo_end); + + // Test a permitted call, user-supplied size, buffer inside RMO + // Expected: call succeeds + printf("Test a permitted call, user-supplied size, buffer inside RMO... "); + rc = rtas_call("ibm,get-system-parameter", 3, 1, 0, cpu_to_be32(rmo_start), + cpu_to_be32(rmo_end - rmo_start + 1), rets); + printf("rc: %d\n", rc); + FAIL_IF(rc != 0 && rc != RTAS_UNKNOWN_OP); + + // Test a permitted call, user-supplied size, buffer start outside RMO + // Expected: call returns -EINVAL + printf("Test a permitted call, user-supplied size, buffer start outside RMO... "); + rc = rtas_call("ibm,get-system-parameter", 3, 1, 0, cpu_to_be32(rmo_end + 1), + cpu_to_be32(4000), rets); + printf("rc: %d\n", rc); + FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP); + + // Test a permitted call, user-supplied size, buffer end outside RMO + // Expected: call returns -EINVAL + printf("Test a permitted call, user-supplied size, buffer end outside RMO... "); + rc = rtas_call("ibm,get-system-parameter", 3, 1, 0, cpu_to_be32(rmo_start), + cpu_to_be32(rmo_end - rmo_start + 2), rets); + printf("rc: %d\n", rc); + FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP); + + // Test a permitted call, fixed size, buffer end outside RMO + // Expected: call returns -EINVAL + printf("Test a permitted call, fixed size, buffer end outside RMO... "); + rc = rtas_call("ibm,configure-connector", 2, 1, cpu_to_be32(rmo_end - 4000), 0, rets); + printf("rc: %d\n", rc); + FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP); + + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(test, "rtas_filter"); +} diff --git a/tools/testing/selftests/powerpc/tm/tm-poison.c b/tools/testing/selftests/powerpc/tm/tm-poison.c index 977558497c16..29e5f26af7b9 100644 --- a/tools/testing/selftests/powerpc/tm/tm-poison.c +++ b/tools/testing/selftests/powerpc/tm/tm-poison.c @@ -26,7 +26,7 @@ int tm_poison_test(void) { - int pid; + int cpu, pid; cpu_set_t cpuset; uint64_t poison = 0xdeadbeefc0dec0fe; uint64_t unknown = 0; @@ -35,10 +35,13 @@ int tm_poison_test(void) SKIP_IF(!have_htm()); - /* Attach both Child and Parent to CPU 0 */ + cpu = pick_online_cpu(); + FAIL_IF(cpu < 0); + + // Attach both Child and Parent to the same CPU CPU_ZERO(&cpuset); - CPU_SET(0, &cpuset); - sched_setaffinity(0, sizeof(cpuset), &cpuset); + CPU_SET(cpu, &cpuset); + FAIL_IF(sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0); pid = fork(); if (!pid) { diff --git a/tools/testing/selftests/powerpc/tm/tm-tmspr.c b/tools/testing/selftests/powerpc/tm/tm-tmspr.c index 17becf3dcee4..794d574db784 100644 --- a/tools/testing/selftests/powerpc/tm/tm-tmspr.c +++ b/tools/testing/selftests/powerpc/tm/tm-tmspr.c @@ -33,19 +33,13 @@ #include "utils.h" #include "tm.h" -int num_loops = 10000; +int num_loops = 1000000; int passed = 1; void tfiar_tfhar(void *in) { - int i, cpu; unsigned long tfhar, tfhar_rd, tfiar, tfiar_rd; - cpu_set_t cpuset; - - CPU_ZERO(&cpuset); - cpu = (unsigned long)in >> 1; - CPU_SET(cpu, &cpuset); - sched_setaffinity(0, sizeof(cpuset), &cpuset); + int i; /* TFIAR: Last bit has to be high so userspace can read register */ tfiar = ((unsigned long)in) + 1; diff --git a/tools/testing/selftests/powerpc/tm/tm-trap.c b/tools/testing/selftests/powerpc/tm/tm-trap.c index 601f0c1d450d..c75960af8018 100644 --- a/tools/testing/selftests/powerpc/tm/tm-trap.c +++ b/tools/testing/selftests/powerpc/tm/tm-trap.c @@ -247,8 +247,7 @@ void *pong(void *not_used) int tm_trap_test(void) { uint16_t k = 1; - - int rc; + int cpu, rc; pthread_attr_t attr; cpu_set_t cpuset; @@ -267,9 +266,12 @@ int tm_trap_test(void) usr1_sa.sa_sigaction = usr1_signal_handler; sigaction(SIGUSR1, &usr1_sa, NULL); - /* Set only CPU 0 in the mask. Both threads will be bound to cpu 0. */ + cpu = pick_online_cpu(); + FAIL_IF(cpu < 0); + + // Set only one CPU in the mask. Both threads will be bound to that CPU. CPU_ZERO(&cpuset); - CPU_SET(0, &cpuset); + CPU_SET(cpu, &cpuset); /* Init pthread attribute */ rc = pthread_attr_init(&attr); diff --git a/tools/testing/selftests/powerpc/tm/tm-unavailable.c b/tools/testing/selftests/powerpc/tm/tm-unavailable.c index 2ca2fccb0a3e..a1348a5f721a 100644 --- a/tools/testing/selftests/powerpc/tm/tm-unavailable.c +++ b/tools/testing/selftests/powerpc/tm/tm-unavailable.c @@ -338,16 +338,19 @@ void test_fp_vec(int fp, int vec, pthread_attr_t *attr) int tm_unavailable_test(void) { - int rc, exception; /* FP = 0, VEC = 1, VSX = 2 */ + int cpu, rc, exception; /* FP = 0, VEC = 1, VSX = 2 */ pthread_t t1; pthread_attr_t attr; cpu_set_t cpuset; SKIP_IF(!have_htm()); - /* Set only CPU 0 in the mask. Both threads will be bound to CPU 0. */ + cpu = pick_online_cpu(); + FAIL_IF(cpu < 0); + + // Set only one CPU in the mask. Both threads will be bound to that CPU. CPU_ZERO(&cpuset); - CPU_SET(0, &cpuset); + CPU_SET(cpu, &cpuset); /* Init pthread attribute. */ rc = pthread_attr_init(&attr); diff --git a/tools/testing/selftests/powerpc/tm/tm.h b/tools/testing/selftests/powerpc/tm/tm.h index c402464b038f..c5a1e5c163fc 100644 --- a/tools/testing/selftests/powerpc/tm/tm.h +++ b/tools/testing/selftests/powerpc/tm/tm.h @@ -6,9 +6,8 @@ #ifndef _SELFTESTS_POWERPC_TM_TM_H #define _SELFTESTS_POWERPC_TM_TM_H -#include <asm/tm.h> -#include <asm/cputable.h> #include <stdbool.h> +#include <asm/tm.h> #include "utils.h" diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c index 18b6a773d5c7..1f36ee1a909a 100644 --- a/tools/testing/selftests/powerpc/utils.c +++ b/tools/testing/selftests/powerpc/utils.c @@ -10,7 +10,6 @@ #include <fcntl.h> #include <link.h> #include <sched.h> -#include <signal.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -273,40 +272,6 @@ int perf_event_reset(int fd) return 0; } -static void sigill_handler(int signr, siginfo_t *info, void *unused) -{ - static int warned = 0; - ucontext_t *ctx = (ucontext_t *)unused; - unsigned long *pc = &UCONTEXT_NIA(ctx); - - /* mtspr 3,RS to check for move to DSCR below */ - if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) { - if (!warned++) - printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n"); - *pc += 4; - } else { - printf("SIGILL at %p\n", pc); - abort(); - } -} - -void set_dscr(unsigned long val) -{ - static int init = 0; - struct sigaction sa; - - if (!init) { - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = sigill_handler; - sa.sa_flags = SA_SIGINFO; - if (sigaction(SIGILL, &sa, NULL)) - perror("sigill_handler"); - init = 1; - } - - asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); -} - int using_hash_mmu(bool *using_hash) { char line[128]; @@ -318,7 +283,9 @@ int using_hash_mmu(bool *using_hash) rc = 0; while (fgets(line, sizeof(line), f) != NULL) { - if (strcmp(line, "MMU : Hash\n") == 0) { + if (!strcmp(line, "MMU : Hash\n") || + !strcmp(line, "platform : Cell\n") || + !strcmp(line, "platform : PowerMac\n")) { *using_hash = true; goto out; } diff --git a/tools/testing/selftests/proc/proc-loadavg-001.c b/tools/testing/selftests/proc/proc-loadavg-001.c index 471e2aa28077..fb4fe9188806 100644 --- a/tools/testing/selftests/proc/proc-loadavg-001.c +++ b/tools/testing/selftests/proc/proc-loadavg-001.c @@ -14,7 +14,6 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /* Test that /proc/loadavg correctly reports last pid in pid namespace. */ -#define _GNU_SOURCE #include <errno.h> #include <sched.h> #include <sys/types.h> diff --git a/tools/testing/selftests/proc/proc-self-syscall.c b/tools/testing/selftests/proc/proc-self-syscall.c index 9f6d000c0245..8511dcfe67c7 100644 --- a/tools/testing/selftests/proc/proc-self-syscall.c +++ b/tools/testing/selftests/proc/proc-self-syscall.c @@ -13,7 +13,6 @@ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#define _GNU_SOURCE #include <unistd.h> #include <sys/syscall.h> #include <sys/types.h> diff --git a/tools/testing/selftests/proc/proc-uptime-002.c b/tools/testing/selftests/proc/proc-uptime-002.c index 30e2b7849089..e7ceabed7f51 100644 --- a/tools/testing/selftests/proc/proc-uptime-002.c +++ b/tools/testing/selftests/proc/proc-uptime-002.c @@ -15,7 +15,6 @@ */ // Test that values in /proc/uptime increment monotonically // while shifting across CPUs. -#define _GNU_SOURCE #undef NDEBUG #include <assert.h> #include <unistd.h> diff --git a/tools/testing/selftests/ptrace/.gitignore b/tools/testing/selftests/ptrace/.gitignore index 7bebf9534a86..792318aaa30c 100644 --- a/tools/testing/selftests/ptrace/.gitignore +++ b/tools/testing/selftests/ptrace/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only get_syscall_info peeksiginfo +vmaccess diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale-ftrace.sh index 7d3c2be66c64..d4bec538086d 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale-ftrace.sh @@ -1,12 +1,12 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0+ # -# Analyze a given results directory for rcuperf performance measurements, +# Analyze a given results directory for rcuscale performance measurements, # looking for ftrace data. Exits with 0 if data was found, analyzed, and -# printed. Intended to be invoked from kvm-recheck-rcuperf.sh after +# printed. Intended to be invoked from kvm-recheck-rcuscale.sh after # argument checking. # -# Usage: kvm-recheck-rcuperf-ftrace.sh resdir +# Usage: kvm-recheck-rcuscale-ftrace.sh resdir # # Copyright (C) IBM Corporation, 2016 # diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh index db0375a57f28..aa745152a525 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh @@ -1,9 +1,9 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0+ # -# Analyze a given results directory for rcuperf performance measurements. +# Analyze a given results directory for rcuscale scalability measurements. # -# Usage: kvm-recheck-rcuperf.sh resdir +# Usage: kvm-recheck-rcuscale.sh resdir # # Copyright (C) IBM Corporation, 2016 # @@ -20,7 +20,7 @@ fi PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH . functions.sh -if kvm-recheck-rcuperf-ftrace.sh $i +if kvm-recheck-rcuscale-ftrace.sh $i then # ftrace data was successfully analyzed, call it good! exit 0 @@ -30,12 +30,12 @@ configfile=`echo $i | sed -e 's/^.*\///'` sed -e 's/^\[[^]]*]//' < $i/console.log | awk ' -/-perf: .* gps: .* batches:/ { +/-scale: .* gps: .* batches:/ { ngps = $9; nbatches = $11; } -/-perf: .*writer-duration/ { +/-scale: .*writer-duration/ { gptimes[++n] = $5 / 1000.; sum += $5 / 1000.; } @@ -43,7 +43,7 @@ awk ' END { newNR = asort(gptimes); if (newNR <= 0) { - print "No rcuperf records found???" + print "No rcuscale records found???" exit; } pct50 = int(newNR * 50 / 100); @@ -79,5 +79,5 @@ END { print "99th percentile grace-period duration: " gptimes[pct99]; print "Maximum grace-period duration: " gptimes[newNR]; print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches; - print "Computed from rcuperf printk output."; + print "Computed from rcuscale printk output."; }' diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh new file mode 100755 index 000000000000..671bfee4fcef --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh @@ -0,0 +1,38 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Analyze a given results directory for rcutorture progress. +# +# Usage: kvm-recheck-rcu.sh resdir +# +# Copyright (C) Facebook, 2020 +# +# Authors: Paul E. McKenney <paulmck@kernel.org> + +i="$1" +if test -d "$i" -a -r "$i" +then + : +else + echo Unreadable results directory: $i + exit 1 +fi +. functions.sh + +configfile=`echo $i | sed -e 's/^.*\///'` +nscfs="`grep 'scf_invoked_count ver:' $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* scf_invoked_count ver: //' -e 's/ .*$//' | tr -d '\015'`" +if test -z "$nscfs" +then + echo "$configfile ------- " +else + dur="`sed -e 's/^.* scftorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null`" + if test -z "$dur" + then + rate="" + else + nscfss=`awk -v nscfs=$nscfs -v dur=$dur ' + BEGIN { print nscfs / dur }' < /dev/null` + rate=" ($nscfss/s)" + fi + echo "${configfile} ------- ${nscfs} SCF handler invocations$rate" +fi diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index e07779a62634..6dc2b49b85ea 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -66,6 +66,7 @@ config_override_param () { echo > $T/KcList config_override_param "$config_dir/CFcommon" KcList "`cat $config_dir/CFcommon 2> /dev/null`" config_override_param "$config_template" KcList "`cat $config_template 2> /dev/null`" +config_override_param "--gdb options" KcList "$TORTURE_KCONFIG_GDB_ARG" config_override_param "--kasan options" KcList "$TORTURE_KCONFIG_KASAN_ARG" config_override_param "--kcsan options" KcList "$TORTURE_KCONFIG_KCSAN_ARG" config_override_param "--kconfig argument" KcList "$TORTURE_KCONFIG_ARG" @@ -152,7 +153,11 @@ qemu_append="`identify_qemu_append "$QEMU"`" boot_args="`configfrag_boot_params "$boot_args" "$config_template"`" # Generate kernel-version-specific boot parameters boot_args="`per_version_boot_params "$boot_args" $resdir/.config $seconds`" -echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd +if test -n "$TORTURE_BOOT_GDB_ARG" +then + boot_args="$boot_args $TORTURE_BOOT_GDB_ARG" +fi +echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" $TORTURE_QEMU_GDB_ARG > $resdir/qemu-cmd if test -n "$TORTURE_BUILDONLY" then @@ -171,14 +176,26 @@ echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log # Attempt to run qemu ( . $T/qemu-cmd; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) & commandcompleted=0 -sleep 10 # Give qemu's pid a chance to reach the file -if test -s "$resdir/qemu_pid" +if test -z "$TORTURE_KCONFIG_GDB_ARG" then - qemu_pid=`cat "$resdir/qemu_pid"` - echo Monitoring qemu job at pid $qemu_pid -else - qemu_pid="" - echo Monitoring qemu job at yet-as-unknown pid + sleep 10 # Give qemu's pid a chance to reach the file + if test -s "$resdir/qemu_pid" + then + qemu_pid=`cat "$resdir/qemu_pid"` + echo Monitoring qemu job at pid $qemu_pid + else + qemu_pid="" + echo Monitoring qemu job at yet-as-unknown pid + fi +fi +if test -n "$TORTURE_KCONFIG_GDB_ARG" +then + echo Waiting for you to attach a debug session, for example: > /dev/tty + echo " gdb $base_resdir/vmlinux" > /dev/tty + echo 'After symbols load and the "(gdb)" prompt appears:' > /dev/tty + echo " target remote :1234" > /dev/tty + echo " continue" > /dev/tty + kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null` fi while : do diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index e655983b7429..6eb1d3f6524d 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -31,6 +31,9 @@ TORTURE_DEFCONFIG=defconfig TORTURE_BOOT_IMAGE="" TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD TORTURE_KCONFIG_ARG="" +TORTURE_KCONFIG_GDB_ARG="" +TORTURE_BOOT_GDB_ARG="" +TORTURE_QEMU_GDB_ARG="" TORTURE_KCONFIG_KASAN_ARG="" TORTURE_KCONFIG_KCSAN_ARG="" TORTURE_KMAKE_ARG="" @@ -46,6 +49,7 @@ jitter="-1" usage () { echo "Usage: $scriptname optional arguments:" + echo " --allcpus" echo " --bootargs kernel-boot-arguments" echo " --bootimage relative-path-to-kernel-boot-image" echo " --buildonly" @@ -55,17 +59,19 @@ usage () { echo " --defconfig string" echo " --dryrun sched|script" echo " --duration minutes" + echo " --gdb" + echo " --help" echo " --interactive" echo " --jitter N [ maxsleep (us) [ maxspin (us) ] ]" echo " --kconfig Kconfig-options" echo " --kmake-arg kernel-make-arguments" echo " --mac nn:nn:nn:nn:nn:nn" - echo " --memory megabytes | nnnG" + echo " --memory megabytes|nnnG" echo " --no-initrd" echo " --qemu-args qemu-arguments" echo " --qemu-cmd qemu-system-..." echo " --results absolute-pathname" - echo " --torture rcu" + echo " --torture lock|rcu|rcuscale|refscale|scf" echo " --trust-make" exit 1 } @@ -126,6 +132,14 @@ do dur=$(($2*60)) shift ;; + --gdb) + TORTURE_KCONFIG_GDB_ARG="CONFIG_DEBUG_INFO=y"; export TORTURE_KCONFIG_GDB_ARG + TORTURE_BOOT_GDB_ARG="nokaslr"; export TORTURE_BOOT_GDB_ARG + TORTURE_QEMU_GDB_ARG="-s -S"; export TORTURE_QEMU_GDB_ARG + ;; + --help|-h) + usage + ;; --interactive) TORTURE_QEMU_INTERACTIVE=1; export TORTURE_QEMU_INTERACTIVE ;; @@ -184,13 +198,13 @@ do shift ;; --torture) - checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuperf\|refscale\)$' '^--' + checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuscale\|refscale\|scf\)$' '^--' TORTURE_SUITE=$2 shift - if test "$TORTURE_SUITE" = rcuperf || test "$TORTURE_SUITE" = refscale + if test "$TORTURE_SUITE" = rcuscale || test "$TORTURE_SUITE" = refscale then # If you really want jitter for refscale or - # rcuperf, specify it after specifying the rcuperf + # rcuscale, specify it after specifying the rcuscale # or the refscale. (But why jitter in these cases?) jitter=0 fi @@ -248,6 +262,15 @@ do done touch $T/cfgcpu configs_derep="`echo $configs_derep | sed -e "s/\<CFLIST\>/$defaultconfigs/g"`" +if test -n "$TORTURE_KCONFIG_GDB_ARG" +then + if test "`echo $configs_derep | wc -w`" -gt 1 + then + echo "The --config list is: $configs_derep." + echo "Only one --config permitted with --gdb, terminating." + exit 1 + fi +fi for CF1 in $configs_derep do if test -f "$CONFIGFRAG/$CF1" @@ -323,6 +346,9 @@ TORTURE_BUILDONLY="$TORTURE_BUILDONLY"; export TORTURE_BUILDONLY TORTURE_DEFCONFIG="$TORTURE_DEFCONFIG"; export TORTURE_DEFCONFIG TORTURE_INITRD="$TORTURE_INITRD"; export TORTURE_INITRD TORTURE_KCONFIG_ARG="$TORTURE_KCONFIG_ARG"; export TORTURE_KCONFIG_ARG +TORTURE_KCONFIG_GDB_ARG="$TORTURE_KCONFIG_GDB_ARG"; export TORTURE_KCONFIG_GDB_ARG +TORTURE_BOOT_GDB_ARG="$TORTURE_BOOT_GDB_ARG"; export TORTURE_BOOT_GDB_ARG +TORTURE_QEMU_GDB_ARG="$TORTURE_QEMU_GDB_ARG"; export TORTURE_QEMU_GDB_ARG TORTURE_KCONFIG_KASAN_ARG="$TORTURE_KCONFIG_KASAN_ARG"; export TORTURE_KCONFIG_KASAN_ARG TORTURE_KCONFIG_KCSAN_ARG="$TORTURE_KCONFIG_KCSAN_ARG"; export TORTURE_KCONFIG_KCSAN_ARG TORTURE_KMAKE_ARG="$TORTURE_KMAKE_ARG"; export TORTURE_KMAKE_ARG diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh index 71a9f43a3918..e03338091a06 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-console.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh @@ -33,8 +33,8 @@ then fi cat /dev/null > $file.diags -# Check for proper termination, except for rcuperf and refscale. -if test "$TORTURE_SUITE" != rcuperf && test "$TORTURE_SUITE" != refscale +# Check for proper termination, except for rcuscale and refscale. +if test "$TORTURE_SUITE" != rcuscale && test "$TORTURE_SUITE" != refscale then # check for abject failure @@ -67,6 +67,7 @@ then grep --binary-files=text 'torture:.*ver:' $file | egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' | sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' | + sed -e 's/^.*ver: //' | awk ' BEGIN { ver = 0; @@ -74,13 +75,13 @@ then } { - if (!badseq && ($5 + 0 != $5 || $5 <= ver)) { + if (!badseq && ($1 + 0 != $1 || $1 <= ver)) { badseqno1 = ver; - badseqno2 = $5; + badseqno2 = $1; badseqnr = NR; badseq = 1; } - ver = $5 + ver = $1 } END { diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 b/tools/testing/selftests/rcutorture/configs/rcu/TREE05 index 2dde0d9964e3..4f95f8544f3f 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05 @@ -16,5 +16,6 @@ CONFIG_RCU_NOCB_CPU=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y #CHECK#CONFIG_PROVE_RCU=y +CONFIG_PROVE_RCU_LIST=y CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon b/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon deleted file mode 100644 index a09816b8c0f3..000000000000 --- a/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon +++ /dev/null @@ -1,2 +0,0 @@ -CONFIG_RCU_PERF_TEST=y -CONFIG_PRINTK_TIME=y diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST b/tools/testing/selftests/rcutorture/configs/rcuscale/CFLIST index c9f56cf20775..c9f56cf20775 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/CFLIST diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon new file mode 100644 index 000000000000..87caa0e932c7 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon @@ -0,0 +1,2 @@ +CONFIG_RCU_SCALE_TEST=y +CONFIG_PRINTK_TIME=y diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TINY b/tools/testing/selftests/rcutorture/configs/rcuscale/TINY index fb05ef5279b4..fb05ef5279b4 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuperf/TINY +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TINY diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE index 721cfda76ab2..721cfda76ab2 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54 b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54 index 7629f5dd73b2..7629f5dd73b2 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54 +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54 diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh index 777d5b0c190f..0333e9b18522 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh @@ -11,6 +11,6 @@ # # Adds per-version torture-module parameters to kernels supporting them. per_version_boot_params () { - echo $1 rcuperf.shutdown=1 \ - rcuperf.verbose=1 + echo $1 rcuscale.shutdown=1 \ + rcuscale.verbose=1 } diff --git a/tools/testing/selftests/rcutorture/configs/scf/CFLIST b/tools/testing/selftests/rcutorture/configs/scf/CFLIST new file mode 100644 index 000000000000..4d62eb4a39f9 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/scf/CFLIST @@ -0,0 +1,2 @@ +NOPREEMPT +PREEMPT diff --git a/tools/testing/selftests/rcutorture/configs/scf/CFcommon b/tools/testing/selftests/rcutorture/configs/scf/CFcommon new file mode 100644 index 000000000000..c11ab91f49f5 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/scf/CFcommon @@ -0,0 +1,2 @@ +CONFIG_SCF_TORTURE_TEST=y +CONFIG_PRINTK_TIME=y diff --git a/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT new file mode 100644 index 000000000000..b8429d6c6ebc --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT @@ -0,0 +1,9 @@ +CONFIG_SMP=y +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +CONFIG_HZ_PERIODIC=n +CONFIG_NO_HZ_IDLE=n +CONFIG_NO_HZ_FULL=y +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_PROVE_LOCKING=n diff --git a/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT.boot b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT.boot new file mode 100644 index 000000000000..d6a7fa097c2e --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT.boot @@ -0,0 +1 @@ +nohz_full=1 diff --git a/tools/testing/selftests/rcutorture/configs/scf/PREEMPT b/tools/testing/selftests/rcutorture/configs/scf/PREEMPT new file mode 100644 index 000000000000..ae4992b141b0 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/scf/PREEMPT @@ -0,0 +1,9 @@ +CONFIG_SMP=y +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +CONFIG_HZ_PERIODIC=n +CONFIG_NO_HZ_IDLE=y +CONFIG_NO_HZ_FULL=n +CONFIG_DEBUG_LOCK_ALLOC=y +CONFIG_PROVE_LOCKING=y diff --git a/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh new file mode 100644 index 000000000000..d3d9e35d3d55 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Torture-suite-dependent shell functions for the rest of the scripts. +# +# Copyright (C) Facebook, 2020 +# +# Authors: Paul E. McKenney <paulmck@kernel.org> + +# scftorture_param_onoff bootparam-string config-file +# +# Adds onoff scftorture module parameters to kernels having it. +scftorture_param_onoff () { + if ! bootparam_hotplug_cpu "$1" && configfrag_hotplug_cpu "$2" + then + echo CPU-hotplug kernel, adding scftorture onoff. 1>&2 + echo scftorture.onoff_interval=1000 scftorture.onoff_holdoff=30 + fi +} + +# per_version_boot_params bootparam-string config-file seconds +# +# Adds per-version torture-module parameters to kernels supporting them. +per_version_boot_params () { + echo $1 `scftorture_param_onoff "$1" "$2"` \ + scftorture.stat_interval=15 \ + scftorture.shutdown_secs=$3 \ + scftorture.verbose=1 \ + scf +} diff --git a/tools/testing/selftests/rcutorture/doc/initrd.txt b/tools/testing/selftests/rcutorture/doc/initrd.txt index 933b4fd12327..41a4255865d4 100644 --- a/tools/testing/selftests/rcutorture/doc/initrd.txt +++ b/tools/testing/selftests/rcutorture/doc/initrd.txt @@ -1,12 +1,11 @@ -The rcutorture scripting tools automatically create the needed initrd -directory using dracut. Failing that, this tool will create an initrd -containing a single statically linked binary named "init" that loops -over a very long sleep() call. In both cases, this creation is done -by tools/testing/selftests/rcutorture/bin/mkinitrd.sh. +The rcutorture scripting tools automatically create an initrd containing +a single statically linked binary named "init" that loops over a +very long sleep() call. In both cases, this creation is done by +tools/testing/selftests/rcutorture/bin/mkinitrd.sh. -However, if you are attempting to run rcutorture on a system that does -not have dracut installed, and if you don't like the notion of static -linking, you might wish to press an existing initrd into service: +However, if you don't like the notion of statically linked bare-bones +userspace environments, you might wish to press an existing initrd +into service: ------------------------------------------------------------------------ cd tools/testing/selftests/rcutorture @@ -15,24 +14,3 @@ mkdir initrd cd initrd cpio -id < /tmp/initrd.img.zcat # Manually verify that initrd contains needed binaries and libraries. ------------------------------------------------------------------------- - -Interestingly enough, if you are running rcutorture, you don't really -need userspace in many cases. Running without userspace has the -advantage of allowing you to test your kernel independently of the -distro in place, the root-filesystem layout, and so on. To make this -happen, put the following script in the initrd's tree's "/init" file, -with 0755 mode. - ------------------------------------------------------------------------- -#!/bin/sh - -while : -do - sleep 10 -done ------------------------------------------------------------------------- - -This approach also allows most of the binaries and libraries in the -initrd filesystem to be dispensed with, which can save significant -space in rcutorture's "res" directory. diff --git a/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt b/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt index 449cf579d6f9..b2fc247976b1 100644 --- a/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt +++ b/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt @@ -1,8 +1,33 @@ -This document describes one way to create the rcu-test-image file -that contains the filesystem used by the guest-OS kernel. There are -probably much better ways of doing this, and this filesystem could no -doubt be smaller. It is probably also possible to simply download -an appropriate image from any number of places. +Normally, a minimal initrd is created automatically by the rcutorture +scripting. But minimal really does mean "minimal", namely just a single +root directory with a single statically linked executable named "init": + +$ size tools/testing/selftests/rcutorture/initrd/init + text data bss dec hex filename + 328 0 8 336 150 tools/testing/selftests/rcutorture/initrd/init + +Suppose you need to run some scripts, perhaps to monitor or control +some aspect of the rcutorture testing. This will require a more fully +filled-out userspace, perhaps containing libraries, executables for +the shell and other utilities, and soforth. In that case, place your +desired filesystem here: + + tools/testing/selftests/rcutorture/initrd + +For example, your tools/testing/selftests/rcutorture/initrd/init might +be a script that does any needed mount operations and starts whatever +scripts need starting to properly monitor or control your testing. +The next rcutorture build will then incorporate this filesystem into +the kernel image that is passed to qemu. + +Or maybe you need a real root filesystem for some reason, in which case +please read on! + +The remainder of this document describes one way to create the +rcu-test-image file that contains the filesystem used by the guest-OS +kernel. There are probably much better ways of doing this, and this +filesystem could no doubt be smaller. It is probably also possible to +simply download an appropriate image from any number of places. That said, here are the commands: @@ -36,7 +61,7 @@ References: https://help.ubuntu.com/community/JeOSVMBuilder http://wiki.libvirt.org/page/UbuntuKVMWalkthrough http://www.moe.co.uk/2011/01/07/pci_add_option_rom-failed-to-find-romfile-pxe-rtl8139-bin/ -- "apt-get install kvm-pxe" - http://www.landley.net/writing/rootfs-howto.html - http://en.wikipedia.org/wiki/Initrd - http://en.wikipedia.org/wiki/Cpio + https://www.landley.net/writing/rootfs-howto.html + https://en.wikipedia.org/wiki/Initrd + https://en.wikipedia.org/wiki/Cpio http://wiki.libvirt.org/page/UbuntuKVMWalkthrough diff --git a/tools/testing/selftests/run_kselftest.sh b/tools/testing/selftests/run_kselftest.sh new file mode 100755 index 000000000000..609a4ef9300e --- /dev/null +++ b/tools/testing/selftests/run_kselftest.sh @@ -0,0 +1,93 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Run installed kselftest tests. +# +BASE_DIR=$(realpath $(dirname $0)) +cd $BASE_DIR +TESTS="$BASE_DIR"/kselftest-list.txt +if [ ! -r "$TESTS" ] ; then + echo "$0: Could not find list of tests to run ($TESTS)" >&2 + available="" +else + available="$(cat "$TESTS")" +fi + +. ./kselftest/runner.sh +ROOT=$PWD + +usage() +{ + cat <<EOF +Usage: $0 [OPTIONS] + -s | --summary Print summary with detailed log in output.log + -t | --test COLLECTION:TEST Run TEST from COLLECTION + -c | --collection COLLECTION Run all tests from COLLECTION + -l | --list List the available collection:test entries + -d | --dry-run Don't actually run any tests + -h | --help Show this usage info +EOF + exit $1 +} + +COLLECTIONS="" +TESTS="" +dryrun="" +while true; do + case "$1" in + -s | --summary) + logfile="$BASE_DIR"/output.log + cat /dev/null > $logfile + shift ;; + -t | --test) + TESTS="$TESTS $2" + shift 2 ;; + -c | --collection) + COLLECTIONS="$COLLECTIONS $2" + shift 2 ;; + -l | --list) + echo "$available" + exit 0 ;; + -n | --dry-run) + dryrun="echo" + shift ;; + -h | --help) + usage 0 ;; + "") + break ;; + *) + usage 1 ;; + esac +done + +# Add all selected collections to the explicit test list. +if [ -n "$COLLECTIONS" ]; then + for collection in $COLLECTIONS ; do + found="$(echo "$available" | grep "^$collection:")" + if [ -z "$found" ] ; then + echo "No such collection '$collection'" >&2 + exit 1 + fi + TESTS="$TESTS $found" + done +fi +# Replace available test list with explicitly selected tests. +if [ -n "$TESTS" ]; then + valid="" + for test in $TESTS ; do + found="$(echo "$available" | grep "^${test}$")" + if [ -z "$found" ] ; then + echo "No such test '$test'" >&2 + exit 1 + fi + valid="$valid $found" + done + available="$(echo "$valid" | sed -e 's/ /\n/g')" +fi + +collections=$(echo "$available" | cut -d: -f1 | uniq) +for collection in $collections ; do + [ -w /dev/kmsg ] && echo "kselftest: Running tests in $collection" >> /dev/kmsg + tests=$(echo "$available" | grep "^$collection:" | cut -d: -f2) + ($dryrun cd "$collection" && $dryrun run_many $tests) +done diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 4a180439ee9e..26c72f2b61b1 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1758,10 +1758,10 @@ TEST_F(TRACE_poke, getpid_runs_normally) * and the code is stored as a positive value. \ */ \ if (_result < 0) { \ - SYSCALL_RET(_regs) = -result; \ + SYSCALL_RET(_regs) = -_result; \ (_regs).ccr |= 0x10000000; \ } else { \ - SYSCALL_RET(_regs) = result; \ + SYSCALL_RET(_regs) = _result; \ (_regs).ccr &= ~0x10000000; \ } \ } while (0) @@ -1804,8 +1804,8 @@ TEST_F(TRACE_poke, getpid_runs_normally) #define SYSCALL_RET(_regs) (_regs).a[(_regs).windowbase * 4 + 2] #elif defined(__sh__) # define ARCH_REGS struct pt_regs -# define SYSCALL_NUM(_regs) (_regs).gpr[3] -# define SYSCALL_RET(_regs) (_regs).gpr[0] +# define SYSCALL_NUM(_regs) (_regs).regs[3] +# define SYSCALL_RET(_regs) (_regs).regs[0] #else # error "Do not know how to find your architecture's registers and syscalls" #endif diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json index bb543bf69d69..361235ad574b 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json @@ -100,7 +100,7 @@ ], "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop", "expExitCode": "0", - "verifyCmd": "$TC filter show terse dev $DEV2 ingress", + "verifyCmd": "$TC -br filter show dev $DEV2 ingress", "matchPattern": "filter protocol ip pref 1 flower.*handle", "matchCount": "1", "teardown": [ @@ -119,7 +119,7 @@ ], "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop", "expExitCode": "0", - "verifyCmd": "$TC filter show terse dev $DEV2 ingress", + "verifyCmd": "$TC -br filter show dev $DEV2 ingress", "matchPattern": " dst_mac e4:11:22:11:4a:51", "matchCount": "0", "teardown": [ diff --git a/tools/testing/selftests/timens/Makefile b/tools/testing/selftests/timens/Makefile index b4fd9a934654..3a5936cc10ab 100644 --- a/tools/testing/selftests/timens/Makefile +++ b/tools/testing/selftests/timens/Makefile @@ -1,4 +1,4 @@ -TEST_GEN_PROGS := timens timerfd timer clock_nanosleep procfs exec +TEST_GEN_PROGS := timens timerfd timer clock_nanosleep procfs exec futex TEST_GEN_PROGS_EXTENDED := gettime_perf CFLAGS := -Wall -Werror -pthread diff --git a/tools/testing/selftests/timens/futex.c b/tools/testing/selftests/timens/futex.c new file mode 100644 index 000000000000..6b2b9264e851 --- /dev/null +++ b/tools/testing/selftests/timens/futex.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <sched.h> + +#include <linux/unistd.h> +#include <linux/futex.h> +#include <stdio.h> +#include <string.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <time.h> +#include <unistd.h> + +#include "log.h" +#include "timens.h" + +#define NSEC_PER_SEC 1000000000ULL + +static int run_test(int clockid) +{ + int futex_op = FUTEX_WAIT_BITSET; + struct timespec timeout, end; + int val = 0; + + if (clockid == CLOCK_REALTIME) + futex_op |= FUTEX_CLOCK_REALTIME; + + clock_gettime(clockid, &timeout); + timeout.tv_nsec += NSEC_PER_SEC / 10; // 100ms + if (timeout.tv_nsec > NSEC_PER_SEC) { + timeout.tv_sec++; + timeout.tv_nsec -= NSEC_PER_SEC; + } + + if (syscall(__NR_futex, &val, futex_op, 0, + &timeout, 0, FUTEX_BITSET_MATCH_ANY) >= 0) { + ksft_test_result_fail("futex didn't return ETIMEDOUT\n"); + return 1; + } + + if (errno != ETIMEDOUT) { + ksft_test_result_fail("futex didn't return ETIMEDOUT: %s\n", + strerror(errno)); + return 1; + } + + clock_gettime(clockid, &end); + + if (end.tv_sec < timeout.tv_sec || + (end.tv_sec == timeout.tv_sec && end.tv_nsec < timeout.tv_nsec)) { + ksft_test_result_fail("futex slept less than 100ms\n"); + return 1; + } + + + ksft_test_result_pass("futex with the %d clockid\n", clockid); + + return 0; +} + +int main(int argc, char *argv[]) +{ + int status, len, fd; + char buf[4096]; + pid_t pid; + struct timespec mtime_now; + + nscheck(); + + ksft_set_plan(2); + + clock_gettime(CLOCK_MONOTONIC, &mtime_now); + + if (unshare_timens()) + return 1; + + len = snprintf(buf, sizeof(buf), "%d %d 0", + CLOCK_MONOTONIC, 70 * 24 * 3600); + fd = open("/proc/self/timens_offsets", O_WRONLY); + if (fd < 0) + return pr_perror("/proc/self/timens_offsets"); + + if (write(fd, buf, len) != len) + return pr_perror("/proc/self/timens_offsets"); + + close(fd); + + pid = fork(); + if (pid < 0) + return pr_perror("Unable to fork"); + if (pid == 0) { + int ret = 0; + + ret |= run_test(CLOCK_REALTIME); + ret |= run_test(CLOCK_MONOTONIC); + if (ret) + ksft_exit_fail(); + ksft_exit_pass(); + return 0; + } + + if (waitpid(pid, &status, 0) != pid) + return pr_perror("Unable to wait the child process"); + + if (WIFEXITED(status)) + return WEXITSTATUS(status); + + return 1; +} diff --git a/tools/testing/selftests/vm/config b/tools/testing/selftests/vm/config index 3ba674b64fa9..69dd0d1aa30b 100644 --- a/tools/testing/selftests/vm/config +++ b/tools/testing/selftests/vm/config @@ -3,3 +3,4 @@ CONFIG_USERFAULTFD=y CONFIG_TEST_VMALLOC=m CONFIG_DEVICE_PRIVATE=y CONFIG_TEST_HMM=m +CONFIG_GUP_BENCHMARK=y diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c index 0a28a6a29581..c9404ef9698e 100644 --- a/tools/testing/selftests/vm/hmm-tests.c +++ b/tools/testing/selftests/vm/hmm-tests.c @@ -45,7 +45,7 @@ struct hmm_buffer { #define TWOMEG (1 << 21) #define HMM_BUFFER_SIZE (1024 << 12) #define HMM_PATH_MAX 64 -#define NTIMES 256 +#define NTIMES 10 #define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1))) diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index d77f4829f1e0..74c69b75f6f5 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -316,6 +316,14 @@ pp sleep 3 n2 ping -W 1 -c 1 192.168.241.1 n1 wg set wg0 peer "$pub2" persistent-keepalive 0 +# Test that sk_bound_dev_if works +n1 ping -I wg0 -c 1 -W 1 192.168.241.2 +# What about when the mark changes and the packet must be rerouted? +n1 iptables -t mangle -I OUTPUT -j MARK --set-xmark 1 +n1 ping -c 1 -W 1 192.168.241.2 # First the boring case +n1 ping -I wg0 -c 1 -W 1 192.168.241.2 # Then the sk_bound_dev_if case +n1 iptables -t mangle -D OUTPUT -j MARK --set-xmark 1 + # Test that onion routing works, even when it loops n1 wg set wg0 peer "$pub3" allowed-ips 192.168.242.2/32 endpoint 192.168.241.2:5 ip1 addr add 192.168.242.1/24 dev wg0 diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config index d531de13c95b..4eecb432a66c 100644 --- a/tools/testing/selftests/wireguard/qemu/kernel.config +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -18,10 +18,12 @@ CONFIG_NF_NAT=y CONFIG_NETFILTER_XTABLES=y CONFIG_NETFILTER_XT_NAT=y CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MARK=y CONFIG_NF_CONNTRACK_IPV4=y CONFIG_NF_NAT_IPV4=y CONFIG_IP_NF_IPTABLES=y CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_MANGLE=y CONFIG_IP_NF_NAT=y CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MULTIPLE_TABLES=y |