From 69ca3d29a75554122b998e8dfa20117766f52f48 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 27 May 2021 16:31:40 -0700 Subject: mptcp: update selftest for fallback due to OoO The previous commit noted that we can have fallback scenario due to OoO (or packet drop). Update the self-tests accordingly Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 3c4cb72ed8a4..9ca5f1ba461e 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -501,6 +501,7 @@ do_transfer() local stat_ackrx_now_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") local stat_cookietx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent") local stat_cookierx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv") + local stat_ooo_now=$(get_mib_counter "${listener_ns}" "TcpExtTCPOFOQueue") expect_synrx=$((stat_synrx_last_l)) expect_ackrx=$((stat_ackrx_last_l)) @@ -518,10 +519,14 @@ do_transfer() "${stat_synrx_now_l}" "${expect_synrx}" 1>&2 retc=1 fi - if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ]; then - printf "[ FAIL ] lower MPC ACK rx (%d) than expected (%d)\n" \ - "${stat_ackrx_now_l}" "${expect_ackrx}" 1>&2 - rets=1 + if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} -a ${stat_ooo_now} -eq 0 ]; then + if [ ${stat_ooo_now} -eq 0 ]; then + printf "[ FAIL ] lower MPC ACK rx (%d) than expected (%d)\n" \ + "${stat_ackrx_now_l}" "${expect_ackrx}" 1>&2 + rets=1 + else + printf "[ Note ] fallback due to TCP OoO" + fi fi if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then -- cgit v1.2.3 From 6c1ced2f701618e912be6c549139d58c180419ea Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 1 Jun 2021 19:53:56 +0800 Subject: perf tools: Copy uapi/asm/perf_regs.h from the kernel for MIPS To allow the build to complete on older systems, where those files are either not uptodate, lacking some recent additions or not present at all. And check if the copy drifts from the kernel. This commit is similar with commit 12f020338a2c ("tools: Copy uapi/asm/perf_regs.h from the kernel") With this commit, we can avoid the following build error in any case: tools/perf/arch/mips/include/perf_regs.h:7:10: fatal error: asm/perf_regs.h: No such file or directory #include ^~~~~~~~~~~~~~~~~ compilation terminated. Signed-off-by: Tiezhu Yang Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Xuefeng Li Link: http://lore.kernel.org/lkml/1622548436-12472-1-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/mips/include/uapi/asm/perf_regs.h | 40 ++++++++++++++++++++++++++++ tools/perf/Makefile.config | 1 - tools/perf/check-headers.sh | 1 + 3 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 tools/arch/mips/include/uapi/asm/perf_regs.h (limited to 'tools') diff --git a/tools/arch/mips/include/uapi/asm/perf_regs.h b/tools/arch/mips/include/uapi/asm/perf_regs.h new file mode 100644 index 000000000000..d0f4ecd616cf --- /dev/null +++ b/tools/arch/mips/include/uapi/asm/perf_regs.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_MIPS_PERF_REGS_H +#define _ASM_MIPS_PERF_REGS_H + +enum perf_event_mips_regs { + PERF_REG_MIPS_PC, + PERF_REG_MIPS_R1, + PERF_REG_MIPS_R2, + PERF_REG_MIPS_R3, + PERF_REG_MIPS_R4, + PERF_REG_MIPS_R5, + PERF_REG_MIPS_R6, + PERF_REG_MIPS_R7, + PERF_REG_MIPS_R8, + PERF_REG_MIPS_R9, + PERF_REG_MIPS_R10, + PERF_REG_MIPS_R11, + PERF_REG_MIPS_R12, + PERF_REG_MIPS_R13, + PERF_REG_MIPS_R14, + PERF_REG_MIPS_R15, + PERF_REG_MIPS_R16, + PERF_REG_MIPS_R17, + PERF_REG_MIPS_R18, + PERF_REG_MIPS_R19, + PERF_REG_MIPS_R20, + PERF_REG_MIPS_R21, + PERF_REG_MIPS_R22, + PERF_REG_MIPS_R23, + PERF_REG_MIPS_R24, + PERF_REG_MIPS_R25, + PERF_REG_MIPS_R26, + PERF_REG_MIPS_R27, + PERF_REG_MIPS_R28, + PERF_REG_MIPS_R29, + PERF_REG_MIPS_R30, + PERF_REG_MIPS_R31, + PERF_REG_MIPS_MAX = PERF_REG_MIPS_R31 + 1, +}; +#endif /* _ASM_MIPS_PERF_REGS_H */ diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 406a9519145e..73df23dd664c 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -90,7 +90,6 @@ endif ifeq ($(ARCH),mips) NO_PERF_REGS := 0 CFLAGS += -I$(OUTPUT)arch/mips/include/generated - CFLAGS += -I../../arch/mips/include/uapi -I../../arch/mips/include/generated/uapi LIBUNWIND_LIBS = -lunwind -lunwind-mips endif diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index dd8ff287e930..c783558332b8 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -39,6 +39,7 @@ arch/x86/lib/x86-opcode-map.txt arch/x86/tools/gen-insn-attr-x86.awk arch/arm/include/uapi/asm/perf_regs.h arch/arm64/include/uapi/asm/perf_regs.h +arch/mips/include/uapi/asm/perf_regs.h arch/powerpc/include/uapi/asm/perf_regs.h arch/s390/include/uapi/asm/perf_regs.h arch/x86/include/uapi/asm/perf_regs.h -- cgit v1.2.3 From 3cb17cce1e76ccc5499915a4d7e095a1ad6bf7ff Mon Sep 17 00:00:00 2001 From: Li Huafei Date: Tue, 1 Jun 2021 17:27:50 +0800 Subject: perf probe: Fix NULL pointer dereference in convert_variable_location() If we just check whether the variable can be converted, 'tvar' should be a null pointer. However, the null pointer check is missing in the 'Constant value' execution path. The following cases can trigger this problem: $ cat test.c #include void main(void) { int a; const int b = 1; asm volatile("mov %1, %0" : "=r"(a): "i"(b)); printf("a: %d\n", a); } $ gcc test.c -o test -O -g $ sudo ./perf probe -x ./test -L "main" 0 void main(void) { 2 int a; const int b = 1; asm volatile("mov %1, %0" : "=r"(a): "i"(b)); 6 printf("a: %d\n", a); } $ sudo ./perf probe -x ./test -V "main:6" Segmentation fault The check on 'tvar' is added. If 'tavr' is a null pointer, we return 0 to indicate that the variable can be converted. Now, we can successfully show the variables that can be accessed. $ sudo ./perf probe -x ./test -V "main:6" Available variables at main:6 @ char* __fmt int a int b However, the variable 'b' cannot be tracked. $ sudo ./perf probe -x ./test -D "main:6 b" Failed to find the location of the 'b' variable at this address. Perhaps it has been optimized out. Use -V with the --range option to show 'b' location range. Error: Failed to add events. This is because __die_find_variable_cb() did not successfully match variable 'b', which has the DW_AT_const_value attribute instead of DW_AT_location. We added support for DW_AT_const_value in __die_find_variable_cb(). With this modification, we can successfully track the variable 'b'. $ sudo ./perf probe -x ./test -D "main:6 b" p:probe_test/main_L6 /home/lhf/test:0x1156 b=\1:s32 Fixes: 66f69b219716 ("perf probe: Support DW_AT_const_value constant value") Signed-off-by: Li Huafei Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Frank Ch. Eigler Cc: Jianlin Lv Cc: Jiri Olsa Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Yang Jihong Cc: Zhang Jinhao http://lore.kernel.org/lkml/20210601092750.169601-1-lihuafei1@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dwarf-aux.c | 8 ++++++-- tools/perf/util/probe-finder.c | 3 +++ 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index b2f4920e19a6..7d2ba8419b0c 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -975,9 +975,13 @@ static int __die_find_variable_cb(Dwarf_Die *die_mem, void *data) if ((tag == DW_TAG_formal_parameter || tag == DW_TAG_variable) && die_compare_name(die_mem, fvp->name) && - /* Does the DIE have location information or external instance? */ + /* + * Does the DIE have location information or const value + * or external instance? + */ (dwarf_attr(die_mem, DW_AT_external, &attr) || - dwarf_attr(die_mem, DW_AT_location, &attr))) + dwarf_attr(die_mem, DW_AT_location, &attr) || + dwarf_attr(die_mem, DW_AT_const_value, &attr))) return DIE_FIND_CB_END; if (dwarf_haspc(die_mem, fvp->addr)) return DIE_FIND_CB_CONTINUE; diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 866f2d514d72..b029c29ce227 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -190,6 +190,9 @@ static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr, immediate_value_is_supported()) { Dwarf_Sword snum; + if (!tvar) + return 0; + dwarf_formsdata(&attr, &snum); ret = asprintf(&tvar->value, "\\%ld", (long)snum); -- cgit v1.2.3 From 4f2abe91922ba02bb419d91d92a518e4c805220b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 27 May 2021 11:28:35 -0700 Subject: perf record: Move probing cgroup sampling support I found that checking cgroup sampling support using the missing features doesn't work on old kernels. Because it added both attr.cgroup bit and PERF_SAMPLE_CGROUP bit, it needs to check whichever comes first (usually the actual event, not dummy). But it only checks the attr.cgroup bit which is set only in the dummy event so cannot detect failtures due the sample bits. Also we don't ignore the missing feature and retry, it'd be better checking it with the API probing logic. Committer notes: Extracted the minimal part to check using the new cgroup API probe routine, the part that removes the cgroup member can be left for further discussion. Signed-off-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210527182835.1634339-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 6 ++++++ tools/perf/util/perf_api_probe.c | 10 ++++++++++ tools/perf/util/perf_api_probe.h | 1 + 3 files changed, 17 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 3337b5f93336..84803abeb942 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -2714,6 +2714,12 @@ int cmd_record(int argc, const char **argv) rec->no_buildid = true; } + if (rec->opts.record_cgroup && !perf_can_record_cgroup()) { + pr_err("Kernel has no cgroup sampling support.\n"); + err = -EINVAL; + goto out_opts; + } + if (rec->opts.kcore) rec->data.is_dir = true; diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c index 829af17a0867..020411682a3c 100644 --- a/tools/perf/util/perf_api_probe.c +++ b/tools/perf/util/perf_api_probe.c @@ -103,6 +103,11 @@ static void perf_probe_build_id(struct evsel *evsel) evsel->core.attr.build_id = 1; } +static void perf_probe_cgroup(struct evsel *evsel) +{ + evsel->core.attr.cgroup = 1; +} + bool perf_can_sample_identifier(void) { return perf_probe_api(perf_probe_sample_identifier); @@ -182,3 +187,8 @@ bool perf_can_record_build_id(void) { return perf_probe_api(perf_probe_build_id); } + +bool perf_can_record_cgroup(void) +{ + return perf_probe_api(perf_probe_cgroup); +} diff --git a/tools/perf/util/perf_api_probe.h b/tools/perf/util/perf_api_probe.h index f12ca55f509a..b104168efb15 100644 --- a/tools/perf/util/perf_api_probe.h +++ b/tools/perf/util/perf_api_probe.h @@ -12,5 +12,6 @@ bool perf_can_record_switch_events(void); bool perf_can_record_text_poke_events(void); bool perf_can_sample_identifier(void); bool perf_can_record_build_id(void); +bool perf_can_record_cgroup(void); #endif // __PERF_API_PROBE_H -- cgit v1.2.3 From d3fddc355a4a4415e8d43d1faae1be713d65cf5e Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Mon, 17 May 2021 16:12:54 +0800 Subject: perf stat: Fix error return code in bperf__load() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Committer notes: Added the missing {} for the now multiline 'if' block, fixing this error: CC /tmp/build/perf/util/bpf_counter.o util/bpf_counter.c: In function ‘bperf__load’: util/bpf_counter.c:523:9: error: this ‘if’ clause does not guard... [-Werror=misleading-indentation] 523 | if (evsel->bperf_leader_link_fd < 0 && | ^~ util/bpf_counter.c:526:17: note: ...this statement, but the latter is misleadingly indented as if it were guarded by the ‘if’ 526 | goto out; | ^~~~ cc1: all warnings being treated as errors Fixes: 7fac83aaf2eecc9e ("perf stat: Introduce 'bperf' to share hardware PMCs with BPF") Reported-by: Hulk Robot Signed-off-by: Yu Kuai Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: Peter Zijlstra Cc: Song Liu Cc: Yu Kuai Cc: Zhang Yi Link: http://lore.kernel.org/lkml/20210517081254.1561564-1-yukuai3@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf_counter.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index 974f10e356f0..5ed674a2f55e 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -521,9 +521,10 @@ static int bperf__load(struct evsel *evsel, struct target *target) evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry.link_id); if (evsel->bperf_leader_link_fd < 0 && - bperf_reload_leader_program(evsel, attr_map_fd, &entry)) + bperf_reload_leader_program(evsel, attr_map_fd, &entry)) { + err = -1; goto out; - + } /* * The bpf_link holds reference to the leader program, and the * leader program holds reference to the maps. Therefore, if @@ -550,6 +551,7 @@ static int bperf__load(struct evsel *evsel, struct target *target) /* Step 2: load the follower skeleton */ evsel->follower_skel = bperf_follower_bpf__open(); if (!evsel->follower_skel) { + err = -1; pr_err("Failed to open follower skeleton\n"); goto out; } -- cgit v1.2.3 From f677ec94f6fb9d895f40403bd54236f7763c29db Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 28 May 2021 11:10:50 +0200 Subject: perf test: Test 17 fails with make LIBPFM4=1 on s390 z/VM This test case fails on s390 virtual machine z/VM which has no PMU support when the perf tool is built with LIBPFM4=1. Using make LIBPFM4=1 builds the perf tool with support for libpfm event notation. The command line flag --pfm-events is valid: # ./perf record --pfm-events cycles -- true [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.001 MB perf.data (2 samples) ] # However the command 'perf test -Fv 17' fails on s390 z/VM virtual machine with LIBPFM4=1: # perf test -Fv 17 17: Setup struct perf_event_attr : --- start --- ..... running './tests/attr/test-record-group2' unsupp './tests/attr/test-record-group2' running './tests/attr/test-record-pfm-period' expected exclude_hv=0, got 1 FAILED './tests/attr/test-record-pfm-period' - match failure ---- end ---- Setup struct perf_event_attr: FAILED! When --pfm-event system is not supported, the test returns unsupported and continues. Here is an example using a virtual machine on x86 and Fedora 34: [root@f33 perf]# perf test -Fv 17 17: Setup struct perf_event_attr : --- start --- ..... running './tests/attr/test-record-group2' unsupp './tests/attr/test-record-group2' running './tests/attr/test-record-pfm-period' unsupp './tests/attr/test-record-pfm-period' .... The issue is file ./tests/attr/test-record-pfm-period which requires perf event attribute member exclude_hv to be zero. This is not the case on s390 where the value of exclude_hv is one when executing on a z/VM virtual machine without PMU hardware support. Fix this by allowing value exlucde_hv to be zero or one. Output before: # /usr/bin/python ./tests/attr.py -d ./tests/attr/ -t \ test-record-pfm-period -p ./perf -vvv 2>&1| fgrep match matching [event:base-record] match: [event:base-record] matches [] FAILED './tests/attr//test-record-pfm-period' - match failure # Output after: # /usr/bin/python ./tests/attr.py -d ./tests/attr/ -t \ test-record-pfm-period -p ./perf -vvv 2>&1| fgrep match matching [event:base-record] match: [event:base-record] matches ['event-1-0-6', 'event-1-0-5'] matched Background: Using libpfm library ends up in this function call sequence pfm_get_perf_event_encoding() +-- pfm_get_os_event_encoding() +-- pfmlib_perf_event_encode() is called when no hardware specific PMU unit can be detected as in the s390 z/VM virtual machine case. This uses the "perf_events generic PMU" data structure which sets exclude_hv to 1 per default. Using this PMU that test case always fails. That is the reason why exclude_hv attribute setting varies. Version 2: As suggested by Ian Rogers make perf_event_attribute member exclude_hv more robust and accept value 0 or 1 to handle more test cases which might fail on s390 virtual machine z/VM. Suggested-by: Ian Rogers Signed-off-by: Thomas Richter Reviewed-by: Ian Rogers Cc: Heiko Carstens Cc: Ian Rogers Cc: Sumanth Korikkar Cc: Sven Schnelle Cc: Vasily Gorbik Link: http://lore.kernel.org/lkml/20210528091050.245838-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/attr/base-record | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record index 4a7b8deef3fd..8c10955eff93 100644 --- a/tools/perf/tests/attr/base-record +++ b/tools/perf/tests/attr/base-record @@ -16,7 +16,7 @@ pinned=0 exclusive=0 exclude_user=0 exclude_kernel=0|1 -exclude_hv=0 +exclude_hv=0|1 exclude_idle=0 mmap=1 comm=1 -- cgit v1.2.3 From 2dc065eae56df804e4da5f8a9e4139033f7ea605 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 2 Jun 2021 14:22:40 -0700 Subject: perf evsel: Add missing cloning of evsel->use_config_name The evsel__clone() should copy all fields in the evsel which are set during the event parsing. But it missed the use_config_name field. Fixes: 12279429d862 ("perf stat: Uniquify hybrid event name") Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210602212241.2175005-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 1 + tools/perf/util/evsel.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 4a3cd1b5bb33..a8d8463f8ee5 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -428,6 +428,7 @@ struct evsel *evsel__clone(struct evsel *orig) evsel->auto_merge_stats = orig->auto_merge_stats; evsel->collect_stat = orig->collect_stat; evsel->weak_group = orig->weak_group; + evsel->use_config_name = orig->use_config_name; if (evsel__copy_config_terms(evsel, orig) < 0) goto out_err; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 75cf5dbfe208..bdad52a06438 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -83,8 +83,10 @@ struct evsel { bool collect_stat; bool weak_group; bool bpf_counter; + bool use_config_name; int bpf_fd; struct bpf_object *bpf_obj; + struct list_head config_terms; }; /* @@ -116,10 +118,8 @@ struct evsel { bool merged_stat; bool reset_group; bool errored; - bool use_config_name; struct hashmap *per_pkg_mask; struct evsel *leader; - struct list_head config_terms; int err; int cpu_iter; struct { -- cgit v1.2.3 From 3cc84399e9b60463bc39cf352ffd8bccb92e02bd Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 2 Jun 2021 14:22:41 -0700 Subject: perf stat: Honor event config name on --no-merge If user gave an event name explicitly, it should be displayed in the output as is. But with --no-merge option it adds a pmu name at the end so might confuse users. Actually this is true for hybrid pmus, I think we should do the same for others. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210602212241.2175005-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index a76fff5e7d83..ca326f98c7a2 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -541,7 +541,7 @@ static void uniquify_event_name(struct evsel *counter) char *config; int ret = 0; - if (counter->uniquified_name || + if (counter->uniquified_name || counter->use_config_name || !counter->pmu_name || !strncmp(counter->name, counter->pmu_name, strlen(counter->pmu_name))) return; @@ -555,10 +555,8 @@ static void uniquify_event_name(struct evsel *counter) } } else { if (perf_pmu__has_hybrid()) { - if (!counter->use_config_name) { - ret = asprintf(&new_name, "%s/%s/", - counter->pmu_name, counter->name); - } + ret = asprintf(&new_name, "%s/%s/", + counter->pmu_name, counter->name); } else { ret = asprintf(&new_name, "%s [%s]", counter->name, counter->pmu_name); -- cgit v1.2.3 From 69c9ffed6cede9c11697861f654946e3ae95a930 Mon Sep 17 00:00:00 2001 From: Riccardo Mancini Date: Thu, 3 Jun 2021 00:08:33 +0200 Subject: perf symbol-elf: Fix memory leak by freeing sdt_note.args Reported by ASan. Signed-off-by: Riccardo Mancini Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Fabian Hemmer Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Remi Bernon Cc: Jiri Slaby Link: http://lore.kernel.org/lkml/20210602220833.285226-1-rickyman7@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 4c56aa837434..a73345730ba9 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -2412,6 +2412,7 @@ int cleanup_sdt_note_list(struct list_head *sdt_notes) list_for_each_entry_safe(pos, tmp, sdt_notes, note_list) { list_del_init(&pos->note_list); + zfree(&pos->args); zfree(&pos->name); zfree(&pos->provider); free(pos); -- cgit v1.2.3 From 67069a1f0fe5f9eeca86d954fff2087f5542a008 Mon Sep 17 00:00:00 2001 From: Riccardo Mancini Date: Thu, 3 Jun 2021 00:40:23 +0200 Subject: perf env: Fix memory leak of bpf_prog_info_linear member ASan reported a memory leak caused by info_linear not being deallocated. The info_linear was allocated during in perf_event__synthesize_one_bpf_prog(). This patch adds the corresponding free() when bpf_prog_info_node is freed in perf_env__purge_bpf(). $ sudo ./perf record -- sleep 5 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.025 MB perf.data (8 samples) ] ================================================================= ==297735==ERROR: LeakSanitizer: detected memory leaks Direct leak of 7688 byte(s) in 19 object(s) allocated from: #0 0x4f420f in malloc (/home/user/linux/tools/perf/perf+0x4f420f) #1 0xc06a74 in bpf_program__get_prog_info_linear /home/user/linux/tools/lib/bpf/libbpf.c:11113:16 #2 0xb426fe in perf_event__synthesize_one_bpf_prog /home/user/linux/tools/perf/util/bpf-event.c:191:16 #3 0xb42008 in perf_event__synthesize_bpf_events /home/user/linux/tools/perf/util/bpf-event.c:410:9 #4 0x594596 in record__synthesize /home/user/linux/tools/perf/builtin-record.c:1490:8 #5 0x58c9ac in __cmd_record /home/user/linux/tools/perf/builtin-record.c:1798:8 #6 0x58990b in cmd_record /home/user/linux/tools/perf/builtin-record.c:2901:8 #7 0x7b2a20 in run_builtin /home/user/linux/tools/perf/perf.c:313:11 #8 0x7b12ff in handle_internal_command /home/user/linux/tools/perf/perf.c:365:8 #9 0x7b2583 in run_argv /home/user/linux/tools/perf/perf.c:409:2 #10 0x7b0d79 in main /home/user/linux/tools/perf/perf.c:539:3 #11 0x7fa357ef6b74 in __libc_start_main /usr/src/debug/glibc-2.33-8.fc34.x86_64/csu/../csu/libc-start.c:332:16 Signed-off-by: Riccardo Mancini Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: Yonghong Song Link: http://lore.kernel.org/lkml/20210602224024.300485-1-rickyman7@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/env.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 9130f6fad8d5..bc5e4f294e9e 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -144,6 +144,7 @@ static void perf_env__purge_bpf(struct perf_env *env) node = rb_entry(next, struct bpf_prog_info_node, rb_node); next = rb_next(&node->rb_node); rb_erase(&node->rb_node, root); + free(node->info_linear); free(node); } -- cgit v1.2.3 From acf2492b51c9a3c4dfb947f4d3477a86d315150f Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 4 Jun 2021 17:17:30 +0200 Subject: wireguard: selftests: remove old conntrack kconfig value On recent kernels, this config symbol is no longer used. Reported-by: Rui Salvaterra Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") Cc: stable@vger.kernel.org Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- tools/testing/selftests/wireguard/qemu/kernel.config | 1 - 1 file changed, 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config index 4eecb432a66c..74db83a0aedd 100644 --- a/tools/testing/selftests/wireguard/qemu/kernel.config +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -19,7 +19,6 @@ CONFIG_NETFILTER_XTABLES=y CONFIG_NETFILTER_XT_NAT=y CONFIG_NETFILTER_XT_MATCH_LENGTH=y CONFIG_NETFILTER_XT_MARK=y -CONFIG_NF_CONNTRACK_IPV4=y CONFIG_NF_NAT_IPV4=y CONFIG_IP_NF_IPTABLES=y CONFIG_IP_NF_FILTER=y -- cgit v1.2.3 From f8873d11d4121aad35024f9379e431e0c83abead Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 4 Jun 2021 17:17:31 +0200 Subject: wireguard: selftests: make sure rp_filter is disabled on vethc Some distros may enable strict rp_filter by default, which will prevent vethc from receiving the packets with an unrouteable reverse path address. Reported-by: Hangbin Liu Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") Cc: stable@vger.kernel.org Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- tools/testing/selftests/wireguard/netns.sh | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index 7ed7cd95e58f..ebc4ee0fe179 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -363,6 +363,7 @@ ip1 -6 rule add table main suppress_prefixlength 0 ip1 -4 route add default dev wg0 table 51820 ip1 -4 rule add not fwmark 51820 table 51820 ip1 -4 rule add table main suppress_prefixlength 0 +n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/vethc/rp_filter' # Flood the pings instead of sending just one, to trigger routing table reference counting bugs. n1 ping -W 1 -c 100 -f 192.168.99.7 n1 ping -W 1 -c 100 -f abab::1111 -- cgit v1.2.3 From 263e88d678baa1a2e3f2d5afbdcd9fd3feb80a4d Mon Sep 17 00:00:00 2001 From: David Matlack Date: Fri, 4 Jun 2021 20:01:30 -0700 Subject: proc: add .gitignore for proc-subset-pid selftest This new selftest needs an entry in the .gitignore file otherwise git will try to track the binary. Link: https://lkml.kernel.org/r/20210601164305.11776-1-dmatlack@google.com Fixes: 268af17ada5855 ("selftests: proc: test subset=pid") Signed-off-by: David Matlack Acked-by: Christian Brauner Cc: Shuah Khan Cc: Alexey Dobriyan Cc: Alexey Gladkov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/proc/.gitignore | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore index bed4b5318a86..8f3e72e626fa 100644 --- a/tools/testing/selftests/proc/.gitignore +++ b/tools/testing/selftests/proc/.gitignore @@ -10,6 +10,7 @@ /proc-self-map-files-002 /proc-self-syscall /proc-self-wchan +/proc-subset-pid /proc-uptime-001 /proc-uptime-002 /read -- cgit v1.2.3 From 11fc79fc9f2e395aa39fa5baccae62767c5d8280 Mon Sep 17 00:00:00 2001 From: Kev Jackson Date: Mon, 7 Jun 2021 14:08:35 +0100 Subject: libbpf: Fixes incorrect rx_ring_setup_done When calling xsk_socket__create_shared(), the logic at line 1097 marks a boolean flag true within the xsk_umem structure to track setup progress in order to support multiple calls to the function. However, instead of marking umem->tx_ring_setup_done, the code incorrectly sets umem->rx_ring_setup_done. This leads to improper behaviour when creating and destroying xsk and umem structures. Multiple calls to this function is documented as supported. Fixes: ca7a83e2487a ("libbpf: Only create rx and tx XDP rings when necessary") Signed-off-by: Kev Jackson Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/YL4aU4f3Aaik7CN0@linux-dev --- tools/lib/bpf/xsk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 6061431ee04c..e9b619aa0cdf 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -1094,7 +1094,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, goto out_put_ctx; } if (xsk->fd == umem->fd) - umem->rx_ring_setup_done = true; + umem->tx_ring_setup_done = true; } err = xsk_get_mmap_offsets(xsk->fd, &off); -- cgit v1.2.3 From e8ba0b2b64126381643bb50df3556b139a60545a Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Sat, 8 May 2021 11:42:16 +0800 Subject: tools/bootconfig: Fix error return code in apply_xbc() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Link: https://lkml.kernel.org/r/20210508034216.2277-1-thunder.leizhen@huawei.com Fixes: a995e6bc0524 ("tools/bootconfig: Fix to check the write failure correctly") Reported-by: Hulk Robot Acked-by: Masami Hiramatsu Signed-off-by: Zhen Lei Signed-off-by: Steven Rostedt (VMware) --- tools/bootconfig/main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index 7362bef1a368..6cd6080cac04 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -399,6 +399,7 @@ static int apply_xbc(const char *path, const char *xbc_path) } /* TODO: Ensure the @path is initramfs/initrd image */ if (fstat(fd, &stat) < 0) { + ret = -errno; pr_err("Failed to get the size of %s\n", path); goto out; } -- cgit v1.2.3 From 824afd55e95c3cb12c55d297a0ae408be1779cc8 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 13 May 2021 12:06:33 +0900 Subject: tools/bootconfig: Fix a build error accroding to undefined fallthrough Since the "fallthrough" is defined only in the kernel, building lib/bootconfig.c as a part of user-space tools causes a build error. Add a dummy fallthrough to avoid the build error. Link: https://lkml.kernel.org/r/162087519356.442660.11385099982318160180.stgit@devnote2 Cc: Ingo Molnar Cc: stable@vger.kernel.org Fixes: 4c1ca831adb1 ("Revert "lib: Revert use of fallthrough pseudo-keyword in lib/"") Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- tools/bootconfig/include/linux/bootconfig.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/bootconfig/include/linux/bootconfig.h b/tools/bootconfig/include/linux/bootconfig.h index 078cbd2ba651..de7f30f99af3 100644 --- a/tools/bootconfig/include/linux/bootconfig.h +++ b/tools/bootconfig/include/linux/bootconfig.h @@ -4,4 +4,8 @@ #include "../../../../include/linux/bootconfig.h" +#ifndef fallthrough +# define fallthrough +#endif + #endif -- cgit v1.2.3 From 82944421243e5988898f54266687586ba07d889e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 8 Jun 2021 13:48:17 +0200 Subject: selftests: netfilter: add fib test case There is a bug report on netfilter.org bugzilla pointing to fib expression dropping ipv6 DAD packets. Add a test case that demonstrates this problem. Next patch excludes icmpv6 packets coming from any to linklocal. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/Makefile | 2 +- tools/testing/selftests/netfilter/nft_fib.sh | 221 +++++++++++++++++++++++++++ 2 files changed, 222 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/netfilter/nft_fib.sh (limited to 'tools') diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 3171069a6b46..cd6430b39982 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for netfilter selftests -TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \ +TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \ conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \ nft_concat_range.sh nft_conntrack_helper.sh \ nft_queue.sh nft_meta.sh nf_nat_edemux.sh \ diff --git a/tools/testing/selftests/netfilter/nft_fib.sh b/tools/testing/selftests/netfilter/nft_fib.sh new file mode 100755 index 000000000000..6caf6ac8c285 --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_fib.sh @@ -0,0 +1,221 @@ +#!/bin/bash +# +# This tests the fib expression. +# +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +sfx=$(mktemp -u "XXXXXXXX") +ns1="ns1-$sfx" +ns2="ns2-$sfx" +nsrouter="nsrouter-$sfx" +timeout=4 + +log_netns=$(sysctl -n net.netfilter.nf_log_all_netns) + +cleanup() +{ + ip netns del ${ns1} + ip netns del ${ns2} + ip netns del ${nsrouter} + + [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns +} + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ip netns add ${nsrouter} +if [ $? -ne 0 ];then + echo "SKIP: Could not create net namespace" + exit $ksft_skip +fi + +trap cleanup EXIT + +dmesg | grep -q ' nft_rpfilter: ' +if [ $? -eq 0 ]; then + dmesg -c | grep ' nft_rpfilter: ' + echo "WARN: a previous test run has failed" 1>&2 +fi + +sysctl -q net.netfilter.nf_log_all_netns=1 +ip netns add ${ns1} +ip netns add ${ns2} + +load_ruleset() { + local netns=$1 + +ip netns exec ${netns} nft -f /dev/stdin <&2 + ip netns exec ${ns} nft list table inet filter + return 1 + fi + + if [ $want -gt 0 ]; then + echo "PASS: fib expression did drop packets for $address" + fi + + return 0 +} + +load_ruleset ${nsrouter} +load_ruleset ${ns1} +load_ruleset ${ns2} + +ip link add veth0 netns ${nsrouter} type veth peer name eth0 netns ${ns1} > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: No virtual ethernet pair device support in kernel" + exit $ksft_skip +fi +ip link add veth1 netns ${nsrouter} type veth peer name eth0 netns ${ns2} + +ip -net ${nsrouter} link set lo up +ip -net ${nsrouter} link set veth0 up +ip -net ${nsrouter} addr add 10.0.1.1/24 dev veth0 +ip -net ${nsrouter} addr add dead:1::1/64 dev veth0 + +ip -net ${nsrouter} link set veth1 up +ip -net ${nsrouter} addr add 10.0.2.1/24 dev veth1 +ip -net ${nsrouter} addr add dead:2::1/64 dev veth1 + +ip -net ${ns1} link set lo up +ip -net ${ns1} link set eth0 up + +ip -net ${ns2} link set lo up +ip -net ${ns2} link set eth0 up + +ip -net ${ns1} addr add 10.0.1.99/24 dev eth0 +ip -net ${ns1} addr add dead:1::99/64 dev eth0 +ip -net ${ns1} route add default via 10.0.1.1 +ip -net ${ns1} route add default via dead:1::1 + +ip -net ${ns2} addr add 10.0.2.99/24 dev eth0 +ip -net ${ns2} addr add dead:2::99/64 dev eth0 +ip -net ${ns2} route add default via 10.0.2.1 +ip -net ${ns2} route add default via dead:2::1 + +test_ping() { + local daddr4=$1 + local daddr6=$2 + + ip netns exec ${ns1} ping -c 1 -q $daddr4 > /dev/null + ret=$? + if [ $ret -ne 0 ];then + check_drops + echo "FAIL: ${ns1} cannot reach $daddr4, ret $ret" 1>&2 + return 1 + fi + + ip netns exec ${ns1} ping -c 3 -q $daddr6 > /dev/null + ret=$? + if [ $ret -ne 0 ];then + check_drops + echo "FAIL: ${ns1} cannot reach $daddr6, ret $ret" 1>&2 + return 1 + fi + + return 0 +} + +ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null +ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + +sleep 3 + +test_ping 10.0.2.1 dead:2::1 || exit 1 +check_drops || exit 1 + +test_ping 10.0.2.99 dead:2::99 || exit 1 +check_drops || exit 1 + +echo "PASS: fib expression did not cause unwanted packet drops" + +ip netns exec ${nsrouter} nft flush table inet filter + +ip -net ${ns1} route del default +ip -net ${ns1} -6 route del default + +ip -net ${ns1} addr del 10.0.1.99/24 dev eth0 +ip -net ${ns1} addr del dead:1::99/64 dev eth0 + +ip -net ${ns1} addr add 10.0.2.99/24 dev eth0 +ip -net ${ns1} addr add dead:2::99/64 dev eth0 + +ip -net ${ns1} route add default via 10.0.2.1 +ip -net ${ns1} -6 route add default via dead:2::1 + +ip -net ${nsrouter} addr add dead:2::1/64 dev veth0 + +# switch to ruleset that doesn't log, this time +# its expected that this does drop the packets. +load_ruleset_count ${nsrouter} + +# ns1 has a default route, but nsrouter does not. +# must not check return value, ping to 1.1.1.1 will +# fail. +check_fib_counter 0 ${nsrouter} 1.1.1.1 || exit 1 +check_fib_counter 0 ${nsrouter} 1c3::c01d || exit 1 + +ip netns exec ${ns1} ping -c 1 -W 1 -q 1.1.1.1 > /dev/null +check_fib_counter 1 ${nsrouter} 1.1.1.1 || exit 1 + +sleep 2 +ip netns exec ${ns1} ping -c 3 -q 1c3::c01d > /dev/null +check_fib_counter 3 ${nsrouter} 1c3::c01d || exit 1 + +exit 0 -- cgit v1.2.3 From 584fd3b31889852d0d6f3dd1e3d8e9619b660d2c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 7 Jun 2021 11:45:58 +0200 Subject: objtool: Fix .symtab_shndx handling for elf_create_undef_symbol() When an ELF object uses extended symbol section indexes (IOW it has a .symtab_shndx section), these must be kept in sync with the regular symbol table (.symtab). So for every new symbol we emit, make sure to also emit a .symtab_shndx value to keep the arrays of equal size. Note: since we're writing an UNDEF symbol, most GElf_Sym fields will be 0 and we can repurpose one (st_size) to host the 0 for the xshndx value. Fixes: 2f2f7e47f052 ("objtool: Add elf_create_undef_symbol()") Reported-by: Nick Desaulniers Suggested-by: Fangrui Song Signed-off-by: Peter Zijlstra (Intel) Tested-by: Nick Desaulniers Link: https://lkml.kernel.org/r/YL3q1qFO9QIRL/BA@hirez.programming.kicks-ass.net --- tools/objtool/elf.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 743c2e9d0f56..41bca1d13d8e 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -717,7 +717,7 @@ static int elf_add_string(struct elf *elf, struct section *strtab, char *str) struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name) { - struct section *symtab; + struct section *symtab, *symtab_shndx; struct symbol *sym; Elf_Data *data; Elf_Scn *s; @@ -769,6 +769,29 @@ struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name) symtab->len += data->d_size; symtab->changed = true; + symtab_shndx = find_section_by_name(elf, ".symtab_shndx"); + if (symtab_shndx) { + s = elf_getscn(elf->elf, symtab_shndx->idx); + if (!s) { + WARN_ELF("elf_getscn"); + return NULL; + } + + data = elf_newdata(s); + if (!data) { + WARN_ELF("elf_newdata"); + return NULL; + } + + data->d_buf = &sym->sym.st_size; /* conveniently 0 */ + data->d_size = sizeof(Elf32_Word); + data->d_align = 4; + data->d_type = ELF_T_WORD; + + symtab_shndx->len += 4; + symtab_shndx->changed = true; + } + sym->sec = find_section_by_index(elf, 0); elf_add_symbol(elf, sym); -- cgit v1.2.3 From 2395da0e17935ce9158cdfae433962bdb6cbfa67 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 10 Jun 2021 15:59:43 -0700 Subject: selftests: mptcp: enable syncookie only in absence of reorders Syncookie validation may fail for OoO packets, causing spurious resets and self-tests failures, so let's force syncookie only for tests iteration with no OoO. Fixes: fed61c4b584c ("selftests: mptcp: make 2nd net namespace use tcp syn cookies unconditionally") Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/198 Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 9ca5f1ba461e..2b495dc8d78e 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -197,9 +197,6 @@ ip -net "$ns4" link set ns4eth3 up ip -net "$ns4" route add default via 10.0.3.2 ip -net "$ns4" route add default via dead:beef:3::2 -# use TCP syn cookies, even if no flooding was detected. -ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2 - set_ethtool_flags() { local ns="$1" local dev="$2" @@ -737,6 +734,14 @@ for sender in $ns1 $ns2 $ns3 $ns4;do exit $ret fi + # ns1<->ns2 is not subject to reordering/tc delays. Use it to test + # mptcp syncookie support. + if [ $sender = $ns1 ]; then + ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2 + else + ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=1 + fi + run_tests "$ns2" $sender 10.0.1.2 run_tests "$ns2" $sender dead:beef:1::2 run_tests "$ns2" $sender 10.0.2.1 -- cgit v1.2.3 From 2d49b721dc18c113d5221f4cf5a6104eb66cb7f2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 10 Jun 2021 09:04:29 +0200 Subject: objtool: Only rewrite unconditional retpoline thunk calls It turns out that the compilers generate conditional branches to the retpoline thunks like: 5d5: 0f 85 00 00 00 00 jne 5db 5d7: R_X86_64_PLT32 __x86_indirect_thunk_r11-0x4 while the rewrite can only handle JMP/CALL to the thunks. The result is the alternative wrecking the code. Make sure to skip writing the alternatives for conditional branches. Fixes: 9bc0bb50727c ("objtool/x86: Rewrite retpoline thunk calls") Reported-by: Lukasz Majczak Reported-by: Nathan Chancellor Signed-off-by: Peter Zijlstra (Intel) Tested-by: Nathan Chancellor --- tools/objtool/arch/x86/decode.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 24295d39713b..523aa4157f80 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -747,6 +747,10 @@ int arch_rewrite_retpolines(struct objtool_file *file) list_for_each_entry(insn, &file->retpoline_call_list, call_node) { + if (insn->type != INSN_JUMP_DYNAMIC && + insn->type != INSN_CALL_DYNAMIC) + continue; + if (!strcmp(insn->sec->name, ".text.__x86.indirect_thunk")) continue; -- cgit v1.2.3 From 197eecb6ecae0b04bd694432f640ff75597fed9c Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Sat, 5 Jun 2021 13:29:57 +0800 Subject: perf session: Correct buffer copying when peeking events When peeking an event, it has a short path and a long path. The short path uses the session pointer "one_mmap_addr" to directly fetch the event; and the long path needs to read out the event header and the following event data from file and fill into the buffer pointer passed through the argument "buf". The issue is in the long path that it copies the event header and event data into the same destination address which pointer "buf", this means the event header is overwritten. We are just lucky to run into the short path in most cases, so we don't hit the issue in the long path. This patch adds the offset "hdr_sz" to the pointer "buf" when copying the event data, so that it can reserve the event header which can be used properly by its caller. Fixes: 5a52f33adf02 ("perf session: Add perf_session__peek_event()") Signed-off-by: Leo Yan Acked-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210605052957.1070720-1-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 106b3d60881a..e59242c361ce 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1723,6 +1723,7 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset, if (event->header.size < hdr_sz || event->header.size > buf_sz) return -1; + buf += hdr_sz; rest = event->header.size - hdr_sz; if (readn(fd, buf, rest) != (ssize_t)rest) -- cgit v1.2.3 From 36524112aba3246d1240c1791c72b26fa54008a3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 8 Jun 2021 13:46:18 -0300 Subject: tools headers cpufeatures: Sync with the kernel sources To pick the changes in: fb35d30fe5b06cc2 ("x86/cpufeatures: Assign dedicated feature word for CPUID_0x8000001F[EAX]") e7b6385b01d8e9fb ("x86/cpufeatures: Add Intel SGX hardware bits") 1478b99a76534b6c ("x86/cpufeatures: Mark ENQCMD as disabled when configured out") That don't cause any change in the tools, just silences this perf build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/asm/disabled-features.h' differs from latest version at 'arch/x86/include/asm/disabled-features.h' diff -u tools/arch/x86/include/asm/disabled-features.h arch/x86/include/asm/disabled-features.h Cc: Borislav Petkov Cc: Fenghua Yu Cc: Sean Christopherson Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/disabled-features.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index b7dd944dc867..8f28fafa98b3 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -56,11 +56,8 @@ # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) #endif -#ifdef CONFIG_IOMMU_SUPPORT -# define DISABLE_ENQCMD 0 -#else -# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) -#endif +/* Force disable because it's broken beyond repair */ +#define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) #ifdef CONFIG_X86_SGX # define DISABLE_SGX 0 -- cgit v1.2.3 From b87b04f5019e821c8c6c7761f258402e43500a1f Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 12 Jun 2021 18:24:59 -0600 Subject: ipv4: Fix device used for dst_alloc with local routes Oliver reported a use case where deleting a VRF device can hang waiting for the refcnt to drop to 0. The root cause is that the dst is allocated against the VRF device but cached on the loopback device. The use case (added to the selftests) has an implicit VRF crossing due to the ordering of the FIB rules (lookup local is before the l3mdev rule, but the problem occurs even if the FIB rules are re-ordered with local after l3mdev because the VRF table does not have a default route to terminate the lookup). The end result is is that the FIB lookup returns the loopback device as the nexthop, but the ingress device is in a VRF. The mismatch causes the dst alloc against the VRF device but then cached on the loopback. The fix is to bring the trick used for IPv6 (see ip6_rt_get_dev_rcu): pick the dst alloc device based the fib lookup result but with checks that the result has a nexthop device (e.g., not an unreachable or prohibit entry). Fixes: f5a0aab84b74 ("net: ipv4: dst for local input routes should use l3mdev if relevant") Reported-by: Oliver Herms Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/route.c | 15 ++++++++++++++- tools/testing/selftests/net/fib_tests.sh | 25 +++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f6787c55f6ab..6a36ac98476f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2056,6 +2056,19 @@ martian_source: return err; } +/* get device for dst_alloc with local routes */ +static struct net_device *ip_rt_get_dev(struct net *net, + const struct fib_result *res) +{ + struct fib_nh_common *nhc = res->fi ? res->nhc : NULL; + struct net_device *dev = NULL; + + if (nhc) + dev = l3mdev_master_dev_rcu(nhc->nhc_dev); + + return dev ? : net->loopback_dev; +} + /* * NOTE. We drop all the packets that has local source * addresses, because every properly looped back packet @@ -2212,7 +2225,7 @@ local_input: } } - rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev, + rth = rt_dst_alloc(ip_rt_get_dev(net, res), flags | RTCF_LOCAL, res->type, IN_DEV_ORCONF(in_dev, NOPOLICY), false); if (!rth) diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 76d9487fb03c..5abe92d55b69 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -1384,12 +1384,37 @@ ipv4_rt_replace() ipv4_rt_replace_mpath } +# checks that cached input route on VRF port is deleted +# when VRF is deleted +ipv4_local_rt_cache() +{ + run_cmd "ip addr add 10.0.0.1/32 dev lo" + run_cmd "ip netns add test-ns" + run_cmd "ip link add veth-outside type veth peer name veth-inside" + run_cmd "ip link add vrf-100 type vrf table 1100" + run_cmd "ip link set veth-outside master vrf-100" + run_cmd "ip link set veth-inside netns test-ns" + run_cmd "ip link set veth-outside up" + run_cmd "ip link set vrf-100 up" + run_cmd "ip route add 10.1.1.1/32 dev veth-outside table 1100" + run_cmd "ip netns exec test-ns ip link set veth-inside up" + run_cmd "ip netns exec test-ns ip addr add 10.1.1.1/32 dev veth-inside" + run_cmd "ip netns exec test-ns ip route add 10.0.0.1/32 dev veth-inside" + run_cmd "ip netns exec test-ns ip route add default via 10.0.0.1" + run_cmd "ip netns exec test-ns ping 10.0.0.1 -c 1 -i 1" + run_cmd "ip link delete vrf-100" + + # if we do not hang test is a success + log_test $? 0 "Cached route removed from VRF port device" +} + ipv4_route_test() { route_setup ipv4_rt_add ipv4_rt_replace + ipv4_local_rt_cache route_cleanup } -- cgit v1.2.3 From 973377ffe8148180b2651825b92ae91988141b05 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 31 May 2021 12:34:24 +0000 Subject: bpf, selftests: Adjust few selftest outcomes wrt unreachable code In almost all cases from test_verifier that have been changed in here, we've had an unreachable path with a load from a register which has an invalid address on purpose. This was basically to make sure that we never walk this path and to have the verifier complain if it would otherwise. Change it to match on the right error for unprivileged given we now test these paths under speculative execution. There's one case where we match on exact # of insns_processed. Due to the extra path, this will of course mismatch on unprivileged. Thus, restrict the test->insn_processed check to privileged-only. In one other case, we result in a 'pointer comparison prohibited' error. This is similarly due to verifying an 'invalid' branch where we end up with a value pointer on one side of the comparison. Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_verifier.c | 2 +- tools/testing/selftests/bpf/verifier/and.c | 2 ++ tools/testing/selftests/bpf/verifier/bounds.c | 14 ++++++++++++++ tools/testing/selftests/bpf/verifier/dead_code.c | 2 ++ tools/testing/selftests/bpf/verifier/jmp32.c | 22 ++++++++++++++++++++++ tools/testing/selftests/bpf/verifier/jset.c | 10 ++++++---- tools/testing/selftests/bpf/verifier/unpriv.c | 2 ++ .../selftests/bpf/verifier/value_ptr_arith.c | 7 ++++--- 8 files changed, 53 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 1512092e1e68..3a9e332c5e36 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1147,7 +1147,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, } } - if (test->insn_processed) { + if (!unpriv && test->insn_processed) { uint32_t insn_processed; char *proc; diff --git a/tools/testing/selftests/bpf/verifier/and.c b/tools/testing/selftests/bpf/verifier/and.c index ca8fdb1b3f01..7d7ebee5cc7a 100644 --- a/tools/testing/selftests/bpf/verifier/and.c +++ b/tools/testing/selftests/bpf/verifier/and.c @@ -61,6 +61,8 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R1 !read_ok", + .result_unpriv = REJECT, .result = ACCEPT, .retval = 0 }, diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c index 8a1caf46ffbc..e061e8799ce2 100644 --- a/tools/testing/selftests/bpf/verifier/bounds.c +++ b/tools/testing/selftests/bpf/verifier/bounds.c @@ -508,6 +508,8 @@ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, -1), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R0 invalid mem access 'inv'", + .result_unpriv = REJECT, .result = ACCEPT }, { @@ -528,6 +530,8 @@ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, -1), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R0 invalid mem access 'inv'", + .result_unpriv = REJECT, .result = ACCEPT }, { @@ -569,6 +573,8 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R0 min value is outside of the allowed memory range", + .result_unpriv = REJECT, .fixup_map_hash_8b = { 3 }, .result = ACCEPT, }, @@ -589,6 +595,8 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R0 min value is outside of the allowed memory range", + .result_unpriv = REJECT, .fixup_map_hash_8b = { 3 }, .result = ACCEPT, }, @@ -609,6 +617,8 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R0 min value is outside of the allowed memory range", + .result_unpriv = REJECT, .fixup_map_hash_8b = { 3 }, .result = ACCEPT, }, @@ -674,6 +684,8 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R0 min value is outside of the allowed memory range", + .result_unpriv = REJECT, .fixup_map_hash_8b = { 3 }, .result = ACCEPT, }, @@ -695,6 +707,8 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R0 min value is outside of the allowed memory range", + .result_unpriv = REJECT, .fixup_map_hash_8b = { 3 }, .result = ACCEPT, }, diff --git a/tools/testing/selftests/bpf/verifier/dead_code.c b/tools/testing/selftests/bpf/verifier/dead_code.c index 17fe33a75034..2c8935b3e65d 100644 --- a/tools/testing/selftests/bpf/verifier/dead_code.c +++ b/tools/testing/selftests/bpf/verifier/dead_code.c @@ -8,6 +8,8 @@ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 10, -4), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R9 !read_ok", + .result_unpriv = REJECT, .result = ACCEPT, .retval = 7, }, diff --git a/tools/testing/selftests/bpf/verifier/jmp32.c b/tools/testing/selftests/bpf/verifier/jmp32.c index bd5cae4a7f73..1c857b2fbdf0 100644 --- a/tools/testing/selftests/bpf/verifier/jmp32.c +++ b/tools/testing/selftests/bpf/verifier/jmp32.c @@ -87,6 +87,8 @@ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R9 !read_ok", + .result_unpriv = REJECT, .result = ACCEPT, }, { @@ -150,6 +152,8 @@ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R9 !read_ok", + .result_unpriv = REJECT, .result = ACCEPT, }, { @@ -213,6 +217,8 @@ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R9 !read_ok", + .result_unpriv = REJECT, .result = ACCEPT, }, { @@ -280,6 +286,8 @@ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R0 invalid mem access 'inv'", + .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, }, @@ -348,6 +356,8 @@ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R0 invalid mem access 'inv'", + .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, }, @@ -416,6 +426,8 @@ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R0 invalid mem access 'inv'", + .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, }, @@ -484,6 +496,8 @@ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R0 invalid mem access 'inv'", + .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, }, @@ -552,6 +566,8 @@ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R0 invalid mem access 'inv'", + .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, }, @@ -620,6 +636,8 @@ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R0 invalid mem access 'inv'", + .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, }, @@ -688,6 +706,8 @@ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R0 invalid mem access 'inv'", + .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, }, @@ -756,6 +776,8 @@ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R0 invalid mem access 'inv'", + .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, }, diff --git a/tools/testing/selftests/bpf/verifier/jset.c b/tools/testing/selftests/bpf/verifier/jset.c index 8dcd4e0383d5..11fc68da735e 100644 --- a/tools/testing/selftests/bpf/verifier/jset.c +++ b/tools/testing/selftests/bpf/verifier/jset.c @@ -82,8 +82,8 @@ BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - .retval_unpriv = 1, - .result_unpriv = ACCEPT, + .errstr_unpriv = "R9 !read_ok", + .result_unpriv = REJECT, .retval = 1, .result = ACCEPT, }, @@ -141,7 +141,8 @@ BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - .result_unpriv = ACCEPT, + .errstr_unpriv = "R9 !read_ok", + .result_unpriv = REJECT, .result = ACCEPT, }, { @@ -162,6 +163,7 @@ BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - .result_unpriv = ACCEPT, + .errstr_unpriv = "R9 !read_ok", + .result_unpriv = REJECT, .result = ACCEPT, }, diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c index bd436df5cc32..111801aea5e3 100644 --- a/tools/testing/selftests/bpf/verifier/unpriv.c +++ b/tools/testing/selftests/bpf/verifier/unpriv.c @@ -420,6 +420,8 @@ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R7 invalid mem access 'inv'", + .result_unpriv = REJECT, .result = ACCEPT, .retval = 0, }, diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c index 7ae2859d495c..a3e593ddfafc 100644 --- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c +++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c @@ -120,7 +120,7 @@ .fixup_map_array_48b = { 1 }, .result = ACCEPT, .result_unpriv = REJECT, - .errstr_unpriv = "R2 tried to add from different maps, paths or scalars", + .errstr_unpriv = "R2 pointer comparison prohibited", .retval = 0, }, { @@ -159,7 +159,8 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), // fake-dead code; targeted from branch A to - // prevent dead code sanitization + // prevent dead code sanitization, rejected + // via branch B however BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), @@ -167,7 +168,7 @@ .fixup_map_array_48b = { 1 }, .result = ACCEPT, .result_unpriv = REJECT, - .errstr_unpriv = "R2 tried to add from different maps, paths or scalars", + .errstr_unpriv = "R0 invalid mem access 'inv'", .retval = 0, }, { -- cgit v1.2.3 From 0fd158b89b50b3a31c97a639ff496e1c59686e97 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Wed, 16 Jun 2021 16:03:21 +0200 Subject: selftests: net: veth: make test compatible with dash veth.sh is a shell script that uses /bin/sh; some distro (Ubuntu for example) use dash as /bin/sh and in this case the test reports the following error: # ./veth.sh: 21: local: -r: bad variable name # ./veth.sh: 21: local: -r: bad variable name This happens because dash doesn't support the option "-r" with local. Moreover, in case of missing bpf object, the script is exiting -1, that is an illegal number for dash: exit: Illegal number: -1 Change the script to be compatible both with bash and dash and prevent the errors above. Signed-off-by: Andrea Righi Signed-off-by: David S. Miller --- tools/testing/selftests/net/veth.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh index 2fedc0781ce8..11d7cdb898c0 100755 --- a/tools/testing/selftests/net/veth.sh +++ b/tools/testing/selftests/net/veth.sh @@ -18,7 +18,8 @@ ret=0 cleanup() { local ns - local -r jobs="$(jobs -p)" + local jobs + readonly jobs="$(jobs -p)" [ -n "${jobs}" ] && kill -1 ${jobs} 2>/dev/null rm -f $STATS @@ -108,7 +109,7 @@ chk_gro() { if [ ! -f ../bpf/xdp_dummy.o ]; then echo "Missing xdp_dummy helper. Build bpf selftest first" - exit -1 + exit 1 fi create_ns -- cgit v1.2.3 From 1b29df0e2e802cb15a5196c936f494161ec97502 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Wed, 16 Jun 2021 16:57:27 +0200 Subject: selftests: net: use bash to run udpgro_fwd test case udpgro_fwd.sh contains many bash specific operators ("[[", "local -r"), but it's using /bin/sh; in some distro /bin/sh is mapped to /bin/dash, that doesn't support such operators. Force the test to use /bin/bash explicitly and prevent false positive test failures. Signed-off-by: Andrea Righi Signed-off-by: David S. Miller --- tools/testing/selftests/net/udpgro_fwd.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index a8fa64136282..7f26591f236b 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 readonly BASE="ns-$(mktemp -u XXXXXX)" -- cgit v1.2.3 From 7e9838b7915e29ae0dfe4a3e5f007c9dc6ab9b45 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Fri, 18 Jun 2021 13:04:36 +0200 Subject: selftests/net: Add icmp.sh for testing ICMP dummy address responses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds a new icmp.sh selftest for testing that the kernel will respond correctly with an ICMP unreachable message with the dummy (192.0.0.8) source address when there are no IPv4 addresses configured to use as source addresses. Signed-off-by: Toke Høiland-Jørgensen Reviewed-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/icmp.sh | 74 +++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100755 tools/testing/selftests/net/icmp.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/icmp.sh b/tools/testing/selftests/net/icmp.sh new file mode 100755 index 000000000000..e4b04cd1644a --- /dev/null +++ b/tools/testing/selftests/net/icmp.sh @@ -0,0 +1,74 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test for checking ICMP response with dummy address instead of 0.0.0.0. +# Sets up two namespaces like: +# +----------------------+ +--------------------+ +# | ns1 | v4-via-v6 routes: | ns2 | +# | | ' | | +# | +--------+ -> 172.16.1.0/24 -> +--------+ | +# | | veth0 +--------------------------+ veth0 | | +# | +--------+ <- 172.16.0.0/24 <- +--------+ | +# | 172.16.0.1 | | 2001:db8:1::2/64 | +# | 2001:db8:1::2/64 | | | +# +----------------------+ +--------------------+ +# +# And then tries to ping 172.16.1.1 from ns1. This results in a "net +# unreachable" message being sent from ns2, but there is no IPv4 address set in +# that address space, so the kernel should substitute the dummy address +# 192.0.0.8 defined in RFC7600. + +NS1=ns1 +NS2=ns2 +H1_IP=172.16.0.1/32 +H1_IP6=2001:db8:1::1 +RT1=172.16.1.0/24 +PINGADDR=172.16.1.1 +RT2=172.16.0.0/24 +H2_IP6=2001:db8:1::2 + +TMPFILE=$(mktemp) + +cleanup() +{ + rm -f "$TMPFILE" + ip netns del $NS1 + ip netns del $NS2 +} + +trap cleanup EXIT + +# Namespaces +ip netns add $NS1 +ip netns add $NS2 + +# Connectivity +ip -netns $NS1 link add veth0 type veth peer name veth0 netns $NS2 +ip -netns $NS1 link set dev veth0 up +ip -netns $NS2 link set dev veth0 up +ip -netns $NS1 addr add $H1_IP dev veth0 +ip -netns $NS1 addr add $H1_IP6/64 dev veth0 nodad +ip -netns $NS2 addr add $H2_IP6/64 dev veth0 nodad +ip -netns $NS1 route add $RT1 via inet6 $H2_IP6 +ip -netns $NS2 route add $RT2 via inet6 $H1_IP6 + +# Make sure ns2 will respond with ICMP unreachable +ip netns exec $NS2 sysctl -qw net.ipv4.icmp_ratelimit=0 net.ipv4.ip_forward=1 + +# Run the test - a ping runs in the background, and we capture ICMP responses +# with tcpdump; -c 1 means it should exit on the first ping, but add a timeout +# in case something goes wrong +ip netns exec $NS1 ping -w 3 -i 0.5 $PINGADDR >/dev/null & +ip netns exec $NS1 timeout 10 tcpdump -tpni veth0 -c 1 'icmp and icmp[icmptype] != icmp-echo' > $TMPFILE 2>/dev/null + +# Parse response and check for dummy address +# tcpdump output looks like: +# IP 192.0.0.8 > 172.16.0.1: ICMP net 172.16.1.1 unreachable, length 92 +RESP_IP=$(awk '{print $2}' < $TMPFILE) +if [[ "$RESP_IP" != "192.0.0.8" ]]; then + echo "FAIL - got ICMP response from $RESP_IP, should be 192.0.0.8" + exit 1 +else + echo "OK" + exit 0 +fi -- cgit v1.2.3 From fc96ec4d5d4155c61cbafd49fb2dd403c899a9f4 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 10 Jun 2021 22:32:59 +0800 Subject: perf metricgroup: Fix find_evsel_group() event selector The following command segfaults on my x86 broadwell: $ ./perf stat -M frontend_bound,retiring,backend_bound,bad_speculation sleep 1 WARNING: grouped events cpus do not match, disabling group: anon group { raw 0x10e } anon group { raw 0x10e } perf: util/evsel.c:1596: get_group_fd: Assertion `!(!leader->core.fd)' failed. Aborted (core dumped) The issue shows itself as a use-after-free in evlist__check_cpu_maps(), whereby the leader of an event selector (evsel) has been deleted (yet we still attempt to verify for an evsel). Fundamentally the problem comes from metricgroup__setup_events() -> find_evsel_group(), and has developed from the previous fix attempt in commit 9c880c24cb0d ("perf metricgroup: Fix for metrics containing duration_time"). The problem now is that the logic in checking if an evsel is in the same group is subtly broken for the "cycles" event. For the "cycles" event, the pmu_name is NULL; however the logic in find_evsel_group() may set an event matched against "cycles" as used, when it should not be. This leads to a condition where an evsel is set, yet its leader is not. Fix the check for evsel pmu_name by not matching evsels when either has a NULL pmu_name. There is still a pre-existing metric issue whereby the ordering of the metrics may break the 'stat' function, as discussed at: https://lore.kernel.org/lkml/49c6fccb-b716-1bf0-18a6-cace1cdb66b9@huawei.com/ Fixes: 9c880c24cb0d ("perf metricgroup: Fix for metrics containing duration_time") Signed-off-by: John Garry Tested-by: Arnaldo Carvalho de Melo # On a Thinkpad T450S Cc: Alexander Shishkin Cc: Ian Rogers Cc: Jiri Olsa Cc: Kajol Jain Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/1623335580-187317-2-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 8336dd8e8098..c456fdeae06a 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -162,10 +162,10 @@ static bool contains_event(struct evsel **metric_events, int num_events, return false; } -static bool evsel_same_pmu(struct evsel *ev1, struct evsel *ev2) +static bool evsel_same_pmu_or_none(struct evsel *ev1, struct evsel *ev2) { if (!ev1->pmu_name || !ev2->pmu_name) - return false; + return true; return !strcmp(ev1->pmu_name, ev2->pmu_name); } @@ -288,7 +288,7 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, */ if (!has_constraint && ev->leader != metric_events[i]->leader && - evsel_same_pmu(ev->leader, metric_events[i]->leader)) + evsel_same_pmu_or_none(ev->leader, metric_events[i]->leader)) break; if (!strcmp(metric_events[i]->name, ev->name)) { set_bit(ev->idx, evlist_used); -- cgit v1.2.3 From fe7a98b9d9b36e5c8a22d76b67d29721f153f66e Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 10 Jun 2021 22:33:00 +0800 Subject: perf metricgroup: Return error code from metricgroup__add_metric_sys_event_iter() The error code is not set at all in the sys event iter function. This may lead to an uninitialized value of "ret" in metricgroup__add_metric() when no CPU metric is added. Fix by properly setting the error code. It is not necessary to init "ret" to 0 in metricgroup__add_metric(), as if we have no CPU or sys event metric matching, then "has_match" should be 0 and "ret" is set to -EINVAL. However gcc cannot detect that it may not have been set after the map_for_each_metric() loop for CPU metrics, which is strange. Fixes: be335ec28efa8 ("perf metricgroup: Support adding metrics for system PMUs") Signed-off-by: John Garry Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Kajol Jain Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/1623335580-187317-3-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index c456fdeae06a..d3cf2dee36c8 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -1073,16 +1073,18 @@ static int metricgroup__add_metric_sys_event_iter(struct pmu_event *pe, ret = add_metric(d->metric_list, pe, d->metric_no_group, &m, NULL, d->ids); if (ret) - return ret; + goto out; ret = resolve_metric(d->metric_no_group, d->metric_list, NULL, d->ids); if (ret) - return ret; + goto out; *(d->has_match) = true; - return *d->ret; +out: + *(d->ret) = ret; + return ret; } static int metricgroup__add_metric(const char *metric, bool metric_no_group, -- cgit v1.2.3 From c087e9480cf33672ef2c6cce4348d754988b8437 Mon Sep 17 00:00:00 2001 From: Riccardo Mancini Date: Sat, 12 Jun 2021 19:37:48 +0200 Subject: perf machine: Fix refcount usage when processing PERF_RECORD_KSYMBOL ASan reported a memory leak of BPF-related ksymbols map and dso. The leak is caused by refount never reaching 0, due to missing __put calls in the function machine__process_ksymbol_register. Once the dso is inserted in the map, dso__put() should be called (map__new2() increases the refcount to 2). The same thing applies for the map when it's inserted into maps (maps__insert() increases the refcount to 2). $ sudo ./perf record -- sleep 5 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.025 MB perf.data (8 samples) ] ================================================================= ==297735==ERROR: LeakSanitizer: detected memory leaks Direct leak of 6992 byte(s) in 19 object(s) allocated from: #0 0x4f43c7 in calloc (/home/user/linux/tools/perf/perf+0x4f43c7) #1 0x8e4e53 in map__new2 /home/user/linux/tools/perf/util/map.c:216:20 #2 0x8cf68c in machine__process_ksymbol_register /home/user/linux/tools/perf/util/machine.c:778:10 [...] Indirect leak of 8702 byte(s) in 19 object(s) allocated from: #0 0x4f43c7 in calloc (/home/user/linux/tools/perf/perf+0x4f43c7) #1 0x8728d7 in dso__new_id /home/user/linux/tools/perf/util/dso.c:1256:20 #2 0x872015 in dso__new /home/user/linux/tools/perf/util/dso.c:1295:9 #3 0x8cf623 in machine__process_ksymbol_register /home/user/linux/tools/perf/util/machine.c:774:21 [...] Indirect leak of 1520 byte(s) in 19 object(s) allocated from: #0 0x4f43c7 in calloc (/home/user/linux/tools/perf/perf+0x4f43c7) #1 0x87b3da in symbol__new /home/user/linux/tools/perf/util/symbol.c:269:23 #2 0x888954 in map__process_kallsym_symbol /home/user/linux/tools/perf/util/symbol.c:710:8 [...] Indirect leak of 1406 byte(s) in 19 object(s) allocated from: #0 0x4f43c7 in calloc (/home/user/linux/tools/perf/perf+0x4f43c7) #1 0x87b3da in symbol__new /home/user/linux/tools/perf/util/symbol.c:269:23 #2 0x8cfbd8 in machine__process_ksymbol_register /home/user/linux/tools/perf/util/machine.c:803:8 [...] Signed-off-by: Riccardo Mancini Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jiapeng Chong Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Tommi Rantala Link: http://lore.kernel.org/lkml/20210612173751.188582-1-rickyman7@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 3ff4936a15a4..da19be7da284 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -776,10 +776,10 @@ static int machine__process_ksymbol_register(struct machine *machine, if (dso) { dso->kernel = DSO_SPACE__KERNEL; map = map__new2(0, dso); + dso__put(dso); } if (!dso || !map) { - dso__put(dso); return -ENOMEM; } @@ -792,6 +792,7 @@ static int machine__process_ksymbol_register(struct machine *machine, map->start = event->ksymbol.addr; map->end = map->start + event->ksymbol.len; maps__insert(&machine->kmaps, map); + map__put(map); dso__set_loaded(dso); if (is_bpf_image(event->ksymbol.name)) { -- cgit v1.2.3 From 482698c2f848f9dee1a5bd949793c2fe6a71adc5 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 17 Jun 2021 11:42:13 -0700 Subject: perf test: Fix non-bash issue with stat bpf counters $(( .. )) is a bash feature but the test's interpreter is !/bin/sh, switch the code to use expr. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: bpf@vger.kernel.org Link: http://lore.kernel.org/lkml/20210617184216.2075588-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/stat_bpf_counters.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/shell/stat_bpf_counters.sh b/tools/perf/tests/shell/stat_bpf_counters.sh index 22eb31e48ca7..2f9948b3d943 100755 --- a/tools/perf/tests/shell/stat_bpf_counters.sh +++ b/tools/perf/tests/shell/stat_bpf_counters.sh @@ -11,9 +11,9 @@ compare_number() second_num=$2 # upper bound is first_num * 110% - upper=$(( $first_num + $first_num / 10 )) + upper=$(expr $first_num + $first_num / 10 ) # lower bound is first_num * 90% - lower=$(( $first_num - $first_num / 10 )) + lower=$(expr $first_num - $first_num / 10 ) if [ $second_num -gt $upper ] || [ $second_num -lt $lower ]; then echo "The difference between $first_num and $second_num are greater than 10%." -- cgit v1.2.3 From ef83f9efe8461b8fd71eb60b53dbb6a5dd7b39e9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 19 Jun 2021 10:09:08 -0300 Subject: perf beauty: Update copy of linux/socket.h with the kernel sources To pick the changes in: ea6932d70e223e02 ("net: make get_net_ns return error if NET_NS is disabled") That don't result in any changes in the tables generated from that header. This silences this perf build warning: Warning: Kernel ABI header at 'tools/perf/trace/beauty/include/linux/socket.h' differs from latest version at 'include/linux/socket.h' diff -u tools/perf/trace/beauty/include/linux/socket.h include/linux/socket.h Cc: Changbin Du Cc: David S. Miller Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/include/linux/socket.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index b8fc5c53ba6f..0d8e3dcb7f88 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -438,6 +438,4 @@ extern int __sys_socketpair(int family, int type, int protocol, int __user *usockvec); extern int __sys_shutdown_sock(struct socket *sock, int how); extern int __sys_shutdown(int fd, int how); - -extern struct ns_common *get_net_ns(struct ns_common *ns); #endif /* _LINUX_SOCKET_H */ -- cgit v1.2.3 From 17d27fc314cba0205eec8966735a7a241cc8a5e0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 19 Jun 2021 10:11:46 -0300 Subject: tools headers UAPI: Sync asm-generic/unistd.h with the kernel original To pick the changes in: 8b1462b67f23da54 ("quota: finish disable quotactl_path syscall") Those headers are used in some arches to generate the syscall table used in 'perf trace' to translate syscall numbers into strings. This addresses this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/asm-generic/unistd.h' differs from latest version at 'include/uapi/asm-generic/unistd.h' diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h Cc: Jan Kara Cc: Marcin Juszkiewicz Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/asm-generic/unistd.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 6de5a7fc066b..d2a942086fcb 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -863,8 +863,7 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise) __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2) #define __NR_mount_setattr 442 __SYSCALL(__NR_mount_setattr, sys_mount_setattr) -#define __NR_quotactl_path 443 -__SYSCALL(__NR_quotactl_path, sys_quotactl_path) +/* 443 is reserved for quotactl_path */ #define __NR_landlock_create_ruleset 444 __SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset) -- cgit v1.2.3 From 1792a59eab9593de2eae36c40c5a22d70f52c026 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 19 Jun 2021 10:15:22 -0300 Subject: tools headers UAPI: Sync linux/in.h copy with the kernel sources MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To pick the changes in: 321827477360934d ("icmp: don't send out ICMP messages with a source address of 0.0.0.0") That don't result in any change in tooling, as INADDR_ are not used to generate id->string tables used by 'perf trace'. This addresses this build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/in.h' differs from latest version at 'include/uapi/linux/in.h' diff -u tools/include/uapi/linux/in.h include/uapi/linux/in.h Cc: David S. Miller Cc: Toke Høiland-Jørgensen Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/in.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index 7d6687618d80..d1b327036ae4 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -289,6 +289,9 @@ struct sockaddr_in { /* Address indicating an error return. */ #define INADDR_NONE ((unsigned long int) 0xffffffff) +/* Dummy address for src of ICMP replies if no real address is set (RFC7600). */ +#define INADDR_DUMMY ((unsigned long int) 0xc0000008) + /* Network number for local host loopback. */ #define IN_LOOPBACKNET 127 -- cgit v1.2.3 From 309505dd56854c1f9744c9a2b8aa40d897002bca Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Thu, 24 Jun 2021 15:09:31 +0800 Subject: KVM: selftests: Fix mapping length truncation in m{,un}map() max_mem_slots is now declared as uint32_t. The result of (0x200000 * 32767) is unexpectedly truncated to be 0xffe00000, whilst we actually need to allocate about, 63GB. Cast max_mem_slots to size_t in both mmap() and munmap() to fix the length truncation. We'll otherwise see the failure on arm64 thanks to the access_ok() checking in __kvm_set_memory_region(), as the unmapped VA happen to go beyond the task's allowed address space. # ./set_memory_region_test Allowed number of memory slots: 32767 Adding slots 0..32766, each memory region with 2048K size ==== Test Assertion Failure ==== set_memory_region_test.c:391: ret == 0 pid=94861 tid=94861 errno=22 - Invalid argument 1 0x00000000004015a7: test_add_max_memory_regions at set_memory_region_test.c:389 2 (inlined by) main at set_memory_region_test.c:426 3 0x0000ffffb8e67bdf: ?? ??:0 4 0x00000000004016db: _start at :? KVM_SET_USER_MEMORY_REGION IOCTL failed, rc: -1 errno: 22 slot: 2615 Fixes: 3bf0fcd75434 ("KVM: selftests: Speed up set_memory_region_test") Signed-off-by: Zenghui Yu Message-Id: <20210624070931.565-1-yuzenghui@huawei.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/set_memory_region_test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index 978f5b5f4dc0..d8812f27648c 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -376,7 +376,7 @@ static void test_add_max_memory_regions(void) pr_info("Adding slots 0..%i, each memory region with %dK size\n", (max_mem_slots - 1), MEM_REGION_SIZE >> 10); - mem = mmap(NULL, MEM_REGION_SIZE * max_mem_slots + alignment, + mem = mmap(NULL, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host"); mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1)); @@ -401,7 +401,7 @@ static void test_add_max_memory_regions(void) TEST_ASSERT(ret == -1 && errno == EINVAL, "Adding one more memory slot should fail with EINVAL"); - munmap(mem, MEM_REGION_SIZE * max_mem_slots + alignment); + munmap(mem, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment); munmap(mem_extra, MEM_REGION_SIZE); kvm_vm_free(vm); } -- cgit v1.2.3 From fb5dad4084f0ea6b6df5fe90f157531ca6e20681 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 8 Jun 2021 14:39:54 +0200 Subject: KVM: selftests: introduce P44V64 for z196 and EC12 Older machines like z196 and zEC12 do only support 44 bits of physical addresses. Make this the default and check via IBC if we are on a later machine. We then add P47V64 as an additional model. Reviewed-by: David Hildenbrand Reviewed-by: Thomas Huth Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger Link: https://lore.kernel.org/kvm/20210701153853.33063-1-borntraeger@de.ibm.com/ Fixes: 1bc603af73dd ("KVM: selftests: introduce P47V64 for s390x") --- tools/testing/selftests/kvm/include/kvm_util.h | 3 ++- tools/testing/selftests/kvm/lib/guest_modes.c | 16 ++++++++++++++++ tools/testing/selftests/kvm/lib/kvm_util.c | 5 +++++ 3 files changed, 23 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 35739567189e..74d73532fce9 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -44,6 +44,7 @@ enum vm_guest_mode { VM_MODE_P40V48_64K, VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */ VM_MODE_P47V64_4K, + VM_MODE_P44V64_4K, NUM_VM_MODES, }; @@ -61,7 +62,7 @@ enum vm_guest_mode { #elif defined(__s390x__) -#define VM_MODE_DEFAULT VM_MODE_P47V64_4K +#define VM_MODE_DEFAULT VM_MODE_P44V64_4K #define MIN_PAGE_SHIFT 12U #define ptes_per_page(page_size) ((page_size) / 16) diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c index 25bff307c71f..c330f414ef96 100644 --- a/tools/testing/selftests/kvm/lib/guest_modes.c +++ b/tools/testing/selftests/kvm/lib/guest_modes.c @@ -22,6 +22,22 @@ void guest_modes_append_default(void) } } #endif +#ifdef __s390x__ + { + int kvm_fd, vm_fd; + struct kvm_s390_vm_cpu_processor info; + + kvm_fd = open_kvm_dev_path_or_exit(); + vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, 0); + kvm_device_access(vm_fd, KVM_S390_VM_CPU_MODEL, + KVM_S390_VM_CPU_PROCESSOR, &info, false); + close(vm_fd); + close(kvm_fd); + /* Starting with z13 we have 47bits of physical address */ + if (info.ibc >= 0x30) + guest_mode_append(VM_MODE_P47V64_4K, true, true); + } +#endif } void for_each_guest_mode(void (*func)(enum vm_guest_mode, void *), void *arg) diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index a2b732cf96ea..8606000c439e 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -176,6 +176,7 @@ const char *vm_guest_mode_string(uint32_t i) [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages", [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages", [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages", + [VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages", }; _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES, "Missing new mode strings?"); @@ -194,6 +195,7 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = { { 40, 48, 0x10000, 16 }, { 0, 0, 0x1000, 12 }, { 47, 64, 0x1000, 12 }, + { 44, 64, 0x1000, 12 }, }; _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, "Missing new mode params?"); @@ -282,6 +284,9 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) case VM_MODE_P47V64_4K: vm->pgtable_levels = 5; break; + case VM_MODE_P44V64_4K: + vm->pgtable_levels = 5; + break; default: TEST_FAIL("Unknown guest mode, mode: 0x%x", mode); } -- cgit v1.2.3 From cd4220d23bf3f43cf720e82bdee681f383433ae2 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 1 Jul 2021 17:42:24 +0200 Subject: KVM: selftests: do not require 64GB in set_memory_region_test Unless the user sets overcommit_memory or has plenty of swap, the latest changes to the testcase will result in ENOMEM failures for hosts with less than 64GB RAM. As we do not use much of the allocated memory, we can use MAP_NORESERVE to avoid this error. Cc: Zenghui Yu Cc: vkuznets@redhat.com Cc: wanghaibin.wang@huawei.com Cc: stable@vger.kernel.org Fixes: 309505dd5685 ("KVM: selftests: Fix mapping length truncation in m{,un}map()") Tested-by: Zenghui Yu Link: https://lore.kernel.org/kvm/20210701160425.33666-1-borntraeger@de.ibm.com/ Signed-off-by: Christian Borntraeger --- tools/testing/selftests/kvm/set_memory_region_test.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index d8812f27648c..d31f54ac4e98 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -377,7 +377,8 @@ static void test_add_max_memory_regions(void) (max_mem_slots - 1), MEM_REGION_SIZE >> 10); mem = mmap(NULL, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment, - PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host"); mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1)); -- cgit v1.2.3