From 818e41995bf3654e27c7f470a77cb4f6e90d338e Mon Sep 17 00:00:00 2001 From: Ethan Adams Date: Thu, 14 Mar 2024 15:20:12 -0700 Subject: perf build: Fix out of tree build related to installation of sysreg-defs [ Upstream commit efae55bb78cf8722c7df01cd974197dfd13ece39 ] It seems that a previous modification to sysreg-defs, which corrected emitting the header to the specified output directory, exposed missing subdir, prefix variables. This breaks out of tree builds of perf as the file is now built into the output directory, but still tries to descend into output directory as a subdir. Fixes: a29ee6aea7030786 ("perf build: Ensure sysreg-defs Makefile respects output dir") Reviewed-by: Oliver Upton Reviewed-by: Tycho Andersen Signed-off-by: Ethan Adams Tested-by: Tycho Andersen Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20240314222012.47193-1-j.ethan.adams@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/Makefile.perf | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 04d89d2ed209..d769aa447fb7 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -458,18 +458,19 @@ SHELL = $(SHELL_PATH) arm64_gen_sysreg_dir := $(srctree)/tools/arch/arm64/tools ifneq ($(OUTPUT),) - arm64_gen_sysreg_outdir := $(OUTPUT) + arm64_gen_sysreg_outdir := $(abspath $(OUTPUT)) else arm64_gen_sysreg_outdir := $(CURDIR) endif arm64-sysreg-defs: FORCE - $(Q)$(MAKE) -C $(arm64_gen_sysreg_dir) O=$(arm64_gen_sysreg_outdir) + $(Q)$(MAKE) -C $(arm64_gen_sysreg_dir) O=$(arm64_gen_sysreg_outdir) \ + prefix= subdir= arm64-sysreg-defs-clean: $(call QUIET_CLEAN,arm64-sysreg-defs) $(Q)$(MAKE) -C $(arm64_gen_sysreg_dir) O=$(arm64_gen_sysreg_outdir) \ - clean > /dev/null + prefix= subdir= clean > /dev/null beauty_linux_dir := $(srctree)/tools/perf/trace/beauty/include/linux/ linux_uapi_dir := $(srctree)/tools/include/uapi/linux -- cgit v1.2.3 From a1b6bc10666e47d45a8c35c1f6b8e19c4cf9f205 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 29 Feb 2024 23:46:36 -0800 Subject: perf record: Delete session after stopping sideband thread [ Upstream commit 88ce0106a1f603bf360cb397e8fe293f8298fabb ] The session has a header in it which contains a perf env with bpf_progs. The bpf_progs are accessed by the sideband thread and so the sideband thread must be stopped before the session is deleted, to avoid a use after free. This error was detected by AddressSanitizer in the following: ==2054673==ERROR: AddressSanitizer: heap-use-after-free on address 0x61d000161e00 at pc 0x55769289de54 bp 0x7f9df36d4ab0 sp 0x7f9df36d4aa8 READ of size 8 at 0x61d000161e00 thread T1 #0 0x55769289de53 in __perf_env__insert_bpf_prog_info util/env.c:42 #1 0x55769289dbb1 in perf_env__insert_bpf_prog_info util/env.c:29 #2 0x557692bbae29 in perf_env__add_bpf_info util/bpf-event.c:483 #3 0x557692bbb01a in bpf_event__sb_cb util/bpf-event.c:512 #4 0x5576928b75f4 in perf_evlist__poll_thread util/sideband_evlist.c:68 #5 0x7f9df96a63eb in start_thread nptl/pthread_create.c:444 #6 0x7f9df9726a4b in clone3 ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81 0x61d000161e00 is located 384 bytes inside of 2136-byte region [0x61d000161c80,0x61d0001624d8) freed by thread T0 here: #0 0x7f9dfa6d7288 in __interceptor_free libsanitizer/asan/asan_malloc_linux.cpp:52 #1 0x557692978d50 in perf_session__delete util/session.c:319 #2 0x557692673959 in __cmd_record tools/perf/builtin-record.c:2884 #3 0x55769267a9f0 in cmd_record tools/perf/builtin-record.c:4259 #4 0x55769286710c in run_builtin tools/perf/perf.c:349 #5 0x557692867678 in handle_internal_command tools/perf/perf.c:402 #6 0x557692867a40 in run_argv tools/perf/perf.c:446 #7 0x557692867fae in main tools/perf/perf.c:562 #8 0x7f9df96456c9 in __libc_start_call_main ../sysdeps/nptl/libc_start_call_main.h:58 Fixes: 657ee5531903339b ("perf evlist: Introduce side band thread") Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Rajeev Cc: Christian Brauner Cc: Disha Goel Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: K Prateek Nayak Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: Tim Chen Cc: Yicong Yang Link: https://lore.kernel.org/r/20240301074639.2260708-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-record.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index ff7e1d6cfcd2..40d2c1c48666 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -2881,10 +2881,10 @@ out_delete_session: } #endif zstd_fini(&session->zstd_data); - perf_session__delete(session); - if (!opts->no_bpf_event) evlist__stop_sb_thread(rec->sb_evlist); + + perf_session__delete(session); return status; } -- cgit v1.2.3 From 51325935ddd82a21ee5980feafc67e76283590f8 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 29 Feb 2024 23:46:38 -0800 Subject: perf test: Use a single fd for the child process out/err [ Upstream commit e120f7091a25460a19967380725558c36bca7c6c ] Switch from dumping err then out, to a single file descriptor for both of them. This allows the err and output to be correctly interleaved in verbose output. Fixes: b482f5f8e0168f1e ("perf tests: Add option to run tests in parallel") Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Rajeev Cc: Christian Brauner Cc: Disha Goel Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: K Prateek Nayak Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: Tim Chen Cc: Yicong Yang Link: https://lore.kernel.org/r/20240301074639.2260708-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/tests/builtin-test.c | 37 ++++++------------------------------- 1 file changed, 6 insertions(+), 31 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index d13ee7683d9d..e05b370b1e2b 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -274,11 +274,8 @@ static int finish_test(struct child_test *child_test, int width) struct test_suite *t = child_test->test; int i = child_test->test_num; int subi = child_test->subtest; - int out = child_test->process.out; int err = child_test->process.err; - bool out_done = out <= 0; bool err_done = err <= 0; - struct strbuf out_output = STRBUF_INIT; struct strbuf err_output = STRBUF_INIT; int ret; @@ -290,11 +287,9 @@ static int finish_test(struct child_test *child_test, int width) pr_info("%3d: %-*s:\n", i + 1, width, test_description(t, -1)); /* - * Busy loop reading from the child's stdout and stderr that are set to - * be non-blocking until EOF. + * Busy loop reading from the child's stdout/stderr that are set to be + * non-blocking until EOF. */ - if (!out_done) - fcntl(out, F_SETFL, O_NONBLOCK); if (!err_done) fcntl(err, F_SETFL, O_NONBLOCK); if (verbose > 1) { @@ -303,11 +298,8 @@ static int finish_test(struct child_test *child_test, int width) else pr_info("%3d: %s:\n", i + 1, test_description(t, -1)); } - while (!out_done || !err_done) { - struct pollfd pfds[2] = { - { .fd = out, - .events = POLLIN | POLLERR | POLLHUP | POLLNVAL, - }, + while (!err_done) { + struct pollfd pfds[1] = { { .fd = err, .events = POLLIN | POLLERR | POLLHUP | POLLNVAL, }, @@ -317,21 +309,7 @@ static int finish_test(struct child_test *child_test, int width) /* Poll to avoid excessive spinning, timeout set for 1000ms. */ poll(pfds, ARRAY_SIZE(pfds), /*timeout=*/1000); - if (!out_done && pfds[0].revents) { - errno = 0; - len = read(out, buf, sizeof(buf) - 1); - - if (len <= 0) { - out_done = errno != EAGAIN; - } else { - buf[len] = '\0'; - if (verbose > 1) - fprintf(stdout, "%s", buf); - else - strbuf_addstr(&out_output, buf); - } - } - if (!err_done && pfds[1].revents) { + if (!err_done && pfds[0].revents) { errno = 0; len = read(err, buf, sizeof(buf) - 1); @@ -354,14 +332,10 @@ static int finish_test(struct child_test *child_test, int width) pr_info("%3d.%1d: %s:\n", i + 1, subi + 1, test_description(t, subi)); else pr_info("%3d: %s:\n", i + 1, test_description(t, -1)); - fprintf(stdout, "%s", out_output.buf); fprintf(stderr, "%s", err_output.buf); } - strbuf_release(&out_output); strbuf_release(&err_output); print_test_result(t, i, subi, ret, width); - if (out > 0) - close(out); if (err > 0) close(err); return 0; @@ -394,6 +368,7 @@ static int start_test(struct test_suite *test, int i, int subi, struct child_tes (*child)->process.no_stdout = 1; (*child)->process.no_stderr = 1; } else { + (*child)->process.stdout_to_stderr = 1; (*child)->process.out = -1; (*child)->process.err = -1; } -- cgit v1.2.3 From 62bf20dea38c478cadac060c8178b549ed7c482a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 21 Mar 2024 11:13:30 -0300 Subject: perf probe: Add missing libgen.h header needed for using basename() [ Upstream commit 581037151910126a7934e369e4b6ac70eda9a703 ] This prototype is obtained indirectly, by luck, from some other header in probe-event.c in most systems, but recently exploded on alpine:edge: 8 13.39 alpine:edge : FAIL gcc version 13.2.1 20240309 (Alpine 13.2.1_git20240309) util/probe-event.c: In function 'convert_exec_to_group': util/probe-event.c:225:16: error: implicit declaration of function 'basename' [-Werror=implicit-function-declaration] 225 | ptr1 = basename(exec_copy); | ^~~~~~~~ util/probe-event.c:225:14: error: assignment to 'char *' from 'int' makes pointer from integer without a cast [-Werror=int-conversion] 225 | ptr1 = basename(exec_copy); | ^ cc1: all warnings being treated as errors make[3]: *** [/git/perf-6.8.0/tools/build/Makefile.build:158: util] Error 2 Fix it by adding the libgen.h header where basename() is prototyped. Fixes: fb7345bbf7fad9bf ("perf probe: Support basic dwarf-based operations on uprobe events") Cc: Masami Hiramatsu Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/ Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/probe-event.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/perf') diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 2a0ad9ecf0a2..5c12459e9765 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 5522002a91fbb1ed9395ddccc7d306b8a4e431a2 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 22 Mar 2024 15:43:12 -0700 Subject: perf annotate: Get rid of duplicate --group option item [ Upstream commit 374af9f1f06b5e991c810d2e4983d6f58df32136 ] The options array in cmd_annotate() has duplicate --group options. It only needs one and let's get rid of the other. $ perf annotate -h 2>&1 | grep group --group Show event group information together --group Show event group information together Fixes: 7ebaf4890f63eb90 ("perf annotate: Support '--group' option") Reviewed-by: Kan Liang Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jin Yao Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20240322224313.423181-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-annotate.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 6c1cc797692d..9cd97fd76bb5 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -809,8 +809,6 @@ int cmd_annotate(int argc, const char **argv) "Enable symbol demangling"), OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel, "Enable kernel symbol demangling"), - OPT_BOOLEAN(0, "group", &symbol_conf.event_group, - "Show event group information together"), OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, "Show a column with the sum of periods"), OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples, -- cgit v1.2.3 From 7896ced6ad186e47e4713cf98804e68cd57e43f1 Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Mon, 1 Apr 2024 14:27:23 +0800 Subject: perf sched timehist: Fix -g/--call-graph option failure [ Upstream commit 6e4b398770d5023eb6383da9360a23bd537c155b ] When 'perf sched' enables the call-graph recording, sample_type of dummy event does not have PERF_SAMPLE_CALLCHAIN, timehist_check_attr() checks that the evsel does not have a callchain, and set show_callchain to 0. Currently 'perf sched timehist' only saves callchain when processing the 'sched:sched_switch event', timehist_check_attr() only needs to determine whether the event has PERF_SAMPLE_CALLCHAIN. Before: # perf sched record -g true [ perf record: Woken up 0 times to write data ] [ perf record: Captured and wrote 4.153 MB perf.data (7536 samples) ] # perf sched timehist Samples do not have callchains. time cpu task name wait time sch delay run time [tid/pid] (msec) (msec) (msec) --------------- ------ ------------------------------ --------- --------- --------- 147851.826019 [0000] perf[285035] 0.000 0.000 0.000 147851.826029 [0000] migration/0[15] 0.000 0.003 0.009 147851.826063 [0001] perf[285035] 0.000 0.000 0.000 147851.826069 [0001] migration/1[21] 0.000 0.003 0.006 After: # perf sched record -g true [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 2.572 MB perf.data (822 samples) ] # perf sched timehist time cpu task name waittime sch delay runtime [tid/pid] (msec) (msec) (msec) ----------- --- --------------- -------- -------- ----- 4193.035164 [0] perf[277062] 0.000 0.000 0.000 __traceiter_sched_switch <- __traceiter_sched_switch <- __sched_text_start <- preempt_schedule_common <- __cond_resched <- __wait_for_common <- wait_for_completion 4193.035174 [0] migration/0[15] 0.000 0.003 0.009 __traceiter_sched_switch <- __traceiter_sched_switch <- __sched_text_start <- smpboot_thread_fn <- kthread <- ret_from_fork 4193.035207 [1] perf[277062] 0.000 0.000 0.000 __traceiter_sched_switch <- __traceiter_sched_switch <- __sched_text_start <- preempt_schedule_common <- __cond_resched <- __wait_for_common <- wait_for_completion 4193.035214 [1] migration/1[21] 0.000 0.003 0.007 __traceiter_sched_switch <- __traceiter_sched_switch <- __sched_text_start <- smpboot_thread_fn <- kthread <- ret_from_fork Fixes: 9c95e4ef06572349 ("perf evlist: Add evlist__findnew_tracking_event() helper") Reviewed-by: Ian Rogers Signed-off-by: Yang Jihong Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Yang Jihong Link: https://lore.kernel.org/r/20240401062724.1006010-2-yangjihong@bytedance.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-sched.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index b248c433529a..1bfb22347371 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -2963,8 +2963,11 @@ static int timehist_check_attr(struct perf_sched *sched, return -1; } - if (sched->show_callchain && !evsel__has_callchain(evsel)) { - pr_info("Samples do not have callchains.\n"); + /* only need to save callchain related to sched_switch event */ + if (sched->show_callchain && + evsel__name_is(evsel, "sched:sched_switch") && + !evsel__has_callchain(evsel)) { + pr_info("Samples of sched_switch event do not have callchains.\n"); sched->show_callchain = 0; symbol_conf.use_callchain = 0; } -- cgit v1.2.3 From b26b078fbf4e9e2b91350a7549692d56d7831678 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 4 Apr 2024 08:48:05 +0200 Subject: perf report: Fix PAI counter names for s390 virtual machines [ Upstream commit b74bc5a633a7d72f89141d481d835e73bda3c3ae ] s390 introduced the Processor Activity Instrumentation (PAI) counter facility on LPAR and virtual machines z/VM for models 3931 and 3932. These counters are stored as raw data in the perf.data file and are displayed with: # perf report -i /tmp//perfout-635468 -D | grep Counter Counter:007 Value:0x00000000000186a0 Counter:032 Value:0x0000000000000001 Counter:032 Value:0x0000000000000001 Counter:032 Value:0x0000000000000001 # However on z/VM virtual machines, the counter names are not retrieved from the PMU and are shown as ''. This is caused by the CPU string saved in the mapfile.csv for this machine: ^IBM.393[12].*3\.7.[[:xdigit:]]+$,3,cf_z16,core This string contains the CPU Measurement facility first and second version number and authorization level (3\.7.[[:xdigit:]]+). These numbers do not apply to the PAI counter facility. In fact they can be omitted. Shorten the CPU identification string for this machine to manufacturer and model. This is sufficient for all PMU devices. Output after: # perf report -i /tmp//perfout-635468 -D | grep Counter Counter:007 km_aes_128 Value:0x00000000000186a0 Counter:032 kma_gcm_aes_256 Value:0x0000000000000001 Counter:032 kma_gcm_aes_256 Value:0x0000000000000001 Counter:032 kma_gcm_aes_256 Value:0x0000000000000001 # Fixes: b539deafbadb2fc6 ("perf report: Add s390 raw data interpretation for PAI counters") Reviewed-by: Ian Rogers Signed-off-by: Thomas Richter Acked-by: Sumanth Korikkar Cc: Heiko Carstens Cc: Namhyung Kim Cc: Sven Schnelle Cc: Thomas Richter Cc: Vasily Gorbik Link: https://lore.kernel.org/r/20240404064806.1362876-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/pmu-events/arch/s390/mapfile.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/s390/mapfile.csv b/tools/perf/pmu-events/arch/s390/mapfile.csv index a918e1af77a5..b22648d12751 100644 --- a/tools/perf/pmu-events/arch/s390/mapfile.csv +++ b/tools/perf/pmu-events/arch/s390/mapfile.csv @@ -5,4 +5,4 @@ Family-model,Version,Filename,EventType ^IBM.296[45].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_z13,core ^IBM.390[67].*[13]\.[1-5].[[:xdigit:]]+$,3,cf_z14,core ^IBM.856[12].*3\.6.[[:xdigit:]]+$,3,cf_z15,core -^IBM.393[12].*3\.7.[[:xdigit:]]+$,3,cf_z16,core +^IBM.393[12].*$,3,cf_z16,core -- cgit v1.2.3 From 5b3ed9348cbaed559fd4675dc3786e3c8f68b132 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 4 Apr 2024 08:48:06 +0200 Subject: perf stat: Do not fail on metrics on s390 z/VM systems [ Upstream commit c2f3d7dfc7373d53286f2a5c882d3397a5070adc ] On s390 z/VM virtual machines command 'perf list' also displays metrics: # perf list | grep -A 20 'Metric Groups:' Metric Groups: No_group: cpi [Cycles per Instruction] est_cpi [Estimated Instruction Complexity CPI infinite Level 1] finite_cpi [Cycles per Instructions from Finite cache/memory] l1mp [Level One Miss per 100 Instructions] l2p [Percentage sourced from Level 2 cache] l3p [Percentage sourced from Level 3 on same chip cache] l4lp [Percentage sourced from Level 4 Local cache on same book] l4rp [Percentage sourced from Level 4 Remote cache on different book] memp [Percentage sourced from memory] .... # The command # perf stat -M cpi -- true event syntax error: '{CPU_CYCLES/metric-id=CPU_CYCLES/.....' \___ Bad event or PMU Unable to find PMU or event on a PMU of 'CPU_CYCLES' event syntax error: '{CPU_CYCLES/metric-id=CPU_CYCLES/...' \___ Cannot find PMU `CPU_CYCLES'. Missing kernel support? # fails. 'perf stat' should not fail on metrics when the referenced CPU Counter Measurement PMU is not available. Output after: # perf stat -M est_cpi -- sleep 1 Performance counter stats for 'sleep 1': 1,000,887,494 ns duration_time # 0.00 est_cpi 1.000887494 seconds time elapsed 0.000143000 seconds user 0.000662000 seconds sys # Fixes: 7f76b31130680fb3 ("perf list: Add IBM z16 event description for s390") Suggested-by: Ian Rogers Reviewed-by: Ian Rogers Signed-off-by: Thomas Richter Cc: Heiko Carstens Cc: Namhyung Kim Cc: Sumanth Korikkar Cc: Sven Schnelle Cc: Vasily Gorbik Link: https://lore.kernel.org/r/20240404064806.1362876-2-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- .../pmu-events/arch/s390/cf_z16/transaction.json | 28 +++++++++++----------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/s390/cf_z16/transaction.json b/tools/perf/pmu-events/arch/s390/cf_z16/transaction.json index ec2ff78e2b5f..3ab1d3a6638c 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z16/transaction.json +++ b/tools/perf/pmu-events/arch/s390/cf_z16/transaction.json @@ -2,71 +2,71 @@ { "BriefDescription": "Transaction count", "MetricName": "transaction", - "MetricExpr": "TX_C_TEND + TX_NC_TEND + TX_NC_TABORT + TX_C_TABORT_SPECIAL + TX_C_TABORT_NO_SPECIAL" + "MetricExpr": "TX_C_TEND + TX_NC_TEND + TX_NC_TABORT + TX_C_TABORT_SPECIAL + TX_C_TABORT_NO_SPECIAL if has_event(TX_C_TEND) else 0" }, { "BriefDescription": "Cycles per Instruction", "MetricName": "cpi", - "MetricExpr": "CPU_CYCLES / INSTRUCTIONS" + "MetricExpr": "CPU_CYCLES / INSTRUCTIONS if has_event(INSTRUCTIONS) else 0" }, { "BriefDescription": "Problem State Instruction Ratio", "MetricName": "prbstate", - "MetricExpr": "(PROBLEM_STATE_INSTRUCTIONS / INSTRUCTIONS) * 100" + "MetricExpr": "(PROBLEM_STATE_INSTRUCTIONS / INSTRUCTIONS) * 100 if has_event(INSTRUCTIONS) else 0" }, { "BriefDescription": "Level One Miss per 100 Instructions", "MetricName": "l1mp", - "MetricExpr": "((L1I_DIR_WRITES + L1D_DIR_WRITES) / INSTRUCTIONS) * 100" + "MetricExpr": "((L1I_DIR_WRITES + L1D_DIR_WRITES) / INSTRUCTIONS) * 100 if has_event(INSTRUCTIONS) else 0" }, { "BriefDescription": "Percentage sourced from Level 2 cache", "MetricName": "l2p", - "MetricExpr": "((DCW_REQ + DCW_REQ_IV + ICW_REQ + ICW_REQ_IV) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100" + "MetricExpr": "((DCW_REQ + DCW_REQ_IV + ICW_REQ + ICW_REQ_IV) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100 if has_event(DCW_REQ) else 0" }, { "BriefDescription": "Percentage sourced from Level 3 on same chip cache", "MetricName": "l3p", - "MetricExpr": "((DCW_REQ_CHIP_HIT + DCW_ON_CHIP + DCW_ON_CHIP_IV + DCW_ON_CHIP_CHIP_HIT + ICW_REQ_CHIP_HIT + ICW_ON_CHIP + ICW_ON_CHIP_IV + ICW_ON_CHIP_CHIP_HIT) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100" + "MetricExpr": "((DCW_REQ_CHIP_HIT + DCW_ON_CHIP + DCW_ON_CHIP_IV + DCW_ON_CHIP_CHIP_HIT + ICW_REQ_CHIP_HIT + ICW_ON_CHIP + ICW_ON_CHIP_IV + ICW_ON_CHIP_CHIP_HIT) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100 if has_event(DCW_REQ_CHIP_HIT) else 0" }, { "BriefDescription": "Percentage sourced from Level 4 Local cache on same book", "MetricName": "l4lp", - "MetricExpr": "((DCW_REQ_DRAWER_HIT + DCW_ON_CHIP_DRAWER_HIT + DCW_ON_MODULE + DCW_ON_DRAWER + IDCW_ON_MODULE_IV + IDCW_ON_MODULE_CHIP_HIT + IDCW_ON_MODULE_DRAWER_HIT + IDCW_ON_DRAWER_IV + IDCW_ON_DRAWER_CHIP_HIT + IDCW_ON_DRAWER_DRAWER_HIT + ICW_REQ_DRAWER_HIT + ICW_ON_CHIP_DRAWER_HIT + ICW_ON_MODULE + ICW_ON_DRAWER) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100" + "MetricExpr": "((DCW_REQ_DRAWER_HIT + DCW_ON_CHIP_DRAWER_HIT + DCW_ON_MODULE + DCW_ON_DRAWER + IDCW_ON_MODULE_IV + IDCW_ON_MODULE_CHIP_HIT + IDCW_ON_MODULE_DRAWER_HIT + IDCW_ON_DRAWER_IV + IDCW_ON_DRAWER_CHIP_HIT + IDCW_ON_DRAWER_DRAWER_HIT + ICW_REQ_DRAWER_HIT + ICW_ON_CHIP_DRAWER_HIT + ICW_ON_MODULE + ICW_ON_DRAWER) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100 if has_event(DCW_REQ_DRAWER_HIT) else 0" }, { "BriefDescription": "Percentage sourced from Level 4 Remote cache on different book", "MetricName": "l4rp", - "MetricExpr": "((DCW_OFF_DRAWER + IDCW_OFF_DRAWER_IV + IDCW_OFF_DRAWER_CHIP_HIT + IDCW_OFF_DRAWER_DRAWER_HIT + ICW_OFF_DRAWER) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100" + "MetricExpr": "((DCW_OFF_DRAWER + IDCW_OFF_DRAWER_IV + IDCW_OFF_DRAWER_CHIP_HIT + IDCW_OFF_DRAWER_DRAWER_HIT + ICW_OFF_DRAWER) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100 if has_event(DCW_OFF_DRAWER) else 0" }, { "BriefDescription": "Percentage sourced from memory", "MetricName": "memp", - "MetricExpr": "((DCW_ON_CHIP_MEMORY + DCW_ON_MODULE_MEMORY + DCW_ON_DRAWER_MEMORY + DCW_OFF_DRAWER_MEMORY + ICW_ON_CHIP_MEMORY + ICW_ON_MODULE_MEMORY + ICW_ON_DRAWER_MEMORY + ICW_OFF_DRAWER_MEMORY) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100" + "MetricExpr": "((DCW_ON_CHIP_MEMORY + DCW_ON_MODULE_MEMORY + DCW_ON_DRAWER_MEMORY + DCW_OFF_DRAWER_MEMORY + ICW_ON_CHIP_MEMORY + ICW_ON_MODULE_MEMORY + ICW_ON_DRAWER_MEMORY + ICW_OFF_DRAWER_MEMORY) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100 if has_event(DCW_ON_CHIP_MEMORY) else 0" }, { "BriefDescription": "Cycles per Instructions from Finite cache/memory", "MetricName": "finite_cpi", - "MetricExpr": "L1C_TLB2_MISSES / INSTRUCTIONS" + "MetricExpr": "L1C_TLB2_MISSES / INSTRUCTIONS if has_event(L1C_TLB2_MISSES) else 0" }, { "BriefDescription": "Estimated Instruction Complexity CPI infinite Level 1", "MetricName": "est_cpi", - "MetricExpr": "(CPU_CYCLES / INSTRUCTIONS) - (L1C_TLB2_MISSES / INSTRUCTIONS)" + "MetricExpr": "(CPU_CYCLES / INSTRUCTIONS) - (L1C_TLB2_MISSES / INSTRUCTIONS) if has_event(INSTRUCTIONS) else 0" }, { "BriefDescription": "Estimated Sourcing Cycles per Level 1 Miss", "MetricName": "scpl1m", - "MetricExpr": "L1C_TLB2_MISSES / (L1I_DIR_WRITES + L1D_DIR_WRITES)" + "MetricExpr": "L1C_TLB2_MISSES / (L1I_DIR_WRITES + L1D_DIR_WRITES) if has_event(L1C_TLB2_MISSES) else 0" }, { "BriefDescription": "Estimated TLB CPU percentage of Total CPU", "MetricName": "tlb_percent", - "MetricExpr": "((DTLB2_MISSES + ITLB2_MISSES) / CPU_CYCLES) * (L1C_TLB2_MISSES / (L1I_PENALTY_CYCLES + L1D_PENALTY_CYCLES)) * 100" + "MetricExpr": "((DTLB2_MISSES + ITLB2_MISSES) / CPU_CYCLES) * (L1C_TLB2_MISSES / (L1I_PENALTY_CYCLES + L1D_PENALTY_CYCLES)) * 100 if has_event(CPU_CYCLES) else 0" }, { "BriefDescription": "Estimated Cycles per TLB Miss", "MetricName": "tlb_miss", - "MetricExpr": "((DTLB2_MISSES + ITLB2_MISSES) / (DTLB2_WRITES + ITLB2_WRITES)) * (L1C_TLB2_MISSES / (L1I_PENALTY_CYCLES + L1D_PENALTY_CYCLES))" + "MetricExpr": "((DTLB2_MISSES + ITLB2_MISSES) / (DTLB2_WRITES + ITLB2_WRITES)) * (L1C_TLB2_MISSES / (L1I_PENALTY_CYCLES + L1D_PENALTY_CYCLES)) if has_event(DTLB2_MISSES) else 0" } ] -- cgit v1.2.3 From e6917adb9a04c47b870ec98e8182df66c9c92cf0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 4 Apr 2024 10:57:08 -0700 Subject: perf annotate: Fix annotation_calc_lines() to pass correct address to get_srcline() [ Upstream commit aaf494cf483a1a835c44e942861429b30a00cab0 ] It should pass a proper address (i.e. suitable for objdump or addr2line) to get_srcline() in order to work correctly. It used to pass an address with map__rip_2objdump() as the second argument but later it's changed to use notes->start. It's ok in normal cases but it can be changed when annotate_opts.full_addr is set. So let's convert the address directly instead of using the notes->start. Also the last argument is an IP to print symbol offset if requested. So it should pass symbol-relative address. Fixes: 7d18a824b5e57ddd ("perf annotate: Toggle full address <-> offset display") Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20240404175716.1225482-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/annotate.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 50ca92255ff6..2ebe2fe92a10 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -3025,7 +3025,7 @@ void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *m annotation__update_column_widths(notes); } -static void annotation__calc_lines(struct annotation *notes, struct map *map, +static void annotation__calc_lines(struct annotation *notes, struct map_symbol *ms, struct rb_root *root) { struct annotation_line *al; @@ -3033,6 +3033,7 @@ static void annotation__calc_lines(struct annotation *notes, struct map *map, list_for_each_entry(al, ¬es->src->source, node) { double percent_max = 0.0; + u64 addr; int i; for (i = 0; i < al->data_nr; i++) { @@ -3048,8 +3049,9 @@ static void annotation__calc_lines(struct annotation *notes, struct map *map, if (percent_max <= 0.5) continue; - al->path = get_srcline(map__dso(map), notes->start + al->offset, NULL, - false, true, notes->start + al->offset); + addr = map__rip_2objdump(ms->map, ms->sym->start); + al->path = get_srcline(map__dso(ms->map), addr + al->offset, NULL, + false, true, ms->sym->start + al->offset); insert_source_line(&tmp_root, al); } @@ -3060,7 +3062,7 @@ static void symbol__calc_lines(struct map_symbol *ms, struct rb_root *root) { struct annotation *notes = symbol__annotation(ms->sym); - annotation__calc_lines(notes, ms->map, root); + annotation__calc_lines(notes, ms, root); } int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel) -- cgit v1.2.3 From e2d9443cacdc5ffeaeabd050cae9b5fe11959f00 Mon Sep 17 00:00:00 2001 From: James Clark Date: Wed, 10 Apr 2024 11:34:52 +0100 Subject: perf tests: Make "test data symbol" more robust on Neoverse N1 [ Upstream commit 256ef072b3842273ce703db18b603b051aca95fe ] To prevent anyone from seeing a test failure appear as a regression and thinking that it was caused by their code change, insert some noise into the loop which makes it immune to sampling bias issues (errata 1694299). The "test data symbol" test can fail with any unrelated change that shifts the loop into an unfortunate position in the Perf binary which is almost impossible to debug as the root cause of the test failure. Ultimately it's caused by the referenced errata. Fixes: 60abedb8aa902b06 ("perf test: Introduce script for data symbol testing") Reviewed-by: Ian Rogers Signed-off-by: James Clark Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Rajeev Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Spoorthy S Link: https://lore.kernel.org/r/20240410103458.813656-2-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/tests/workloads/datasym.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/tests/workloads/datasym.c b/tools/perf/tests/workloads/datasym.c index ddd40bc63448..8e08fc75a973 100644 --- a/tools/perf/tests/workloads/datasym.c +++ b/tools/perf/tests/workloads/datasym.c @@ -16,6 +16,22 @@ static int datasym(int argc __maybe_unused, const char **argv __maybe_unused) { for (;;) { buf1.data1++; + if (buf1.data1 == 123) { + /* + * Add some 'noise' in the loop to work around errata + * 1694299 on Arm N1. + * + * Bias exists in SPE sampling which can cause the load + * and store instructions to be skipped entirely. This + * comes and goes randomly depending on the offset the + * linker places the datasym loop at in the Perf binary. + * With an extra branch in the middle of the loop that + * isn't always taken, the instruction stream is no + * longer a continuous repeating pattern that interacts + * badly with the bias. + */ + buf1.data1++; + } buf1.data2 += buf1.data1; } return 0; -- cgit v1.2.3 From bef6f79c722ed9504b2d523e2c7f66dffd631e8e Mon Sep 17 00:00:00 2001 From: James Clark Date: Wed, 10 Apr 2024 11:34:53 +0100 Subject: perf tests: Apply attributes to all events in object code reading test [ Upstream commit 2dade41a533f337447b945239b87ff31a8857890 ] PERF_PMU_CAP_EXTENDED_HW_TYPE results in multiple events being opened on heterogeneous systems. Currently this test only sets its required attributes on the first event. Not disabling enable_on_exec on the other events causes the test to fail because the forked objdump processes are sampled. No tracking event is opened so Perf only knows about its own mappings causing the objdump samples to give the following error: $ perf test -vvv "object code reading" Reading object code for memory address: 0xffff9aaa55ec thread__find_map failed ---- end(-1) ---- 24: Object code reading : FAILED! Fixes: 251aa040244a3b17 ("perf parse-events: Wildcard most "numeric" events") Reviewed-by: Ian Rogers Signed-off-by: James Clark Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Rajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Spoorthy S Link: https://lore.kernel.org/r/20240410103458.813656-3-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/tests/code-reading.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 7a3a7bbbec71..29d2f3ee4e10 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -637,11 +637,11 @@ static int do_test_code_reading(bool try_kcore) evlist__config(evlist, &opts, NULL); - evsel = evlist__first(evlist); - - evsel->core.attr.comm = 1; - evsel->core.attr.disabled = 1; - evsel->core.attr.enable_on_exec = 0; + evlist__for_each_entry(evlist, evsel) { + evsel->core.attr.comm = 1; + evsel->core.attr.disabled = 1; + evsel->core.attr.enable_on_exec = 0; + } ret = evlist__open(evlist); if (ret < 0) { -- cgit v1.2.3 From 4211ba4096905dc2feec840d13b7b7f767a92107 Mon Sep 17 00:00:00 2001 From: James Clark Date: Wed, 10 Apr 2024 11:34:54 +0100 Subject: perf map: Remove kernel map before updating start and end addresses [ Upstream commit df12e21d4e15e48a5e7d12e58f1a00742c4177d0 ] In a debug build there is validation that mmap lists are sorted when taking a lock. In machine__update_kernel_mmap() the start and end addresses are updated resulting in an unsorted list before the map is removed from the list. When the map is removed, the lock is taken which triggers the validation and the failure: $ perf test "object code reading" --- start --- perf: util/maps.c:88: check_invariants: Assertion `map__start(prev) <= map__start(map)' failed. Aborted Fix it by updating the addresses after removal, but before insertion. The bug depends on the ordering and type of debug info on the system and doesn't reproduce everywhere. Fixes: 659ad3492b913c90 ("perf maps: Switch from rbtree to lazily sorted array for addresses") Reviewed-by: Ian Rogers Signed-off-by: James Clark Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Rajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Spoorthy S Link: https://lore.kernel.org/r/20240410103458.813656-4-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/machine.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 527517db3182..07c22f765fab 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1549,8 +1549,8 @@ static int machine__update_kernel_mmap(struct machine *machine, updated = map__get(orig); machine->vmlinux_map = updated; - machine__set_kernel_mmap(machine, start, end); maps__remove(machine__kernel_maps(machine), orig); + machine__set_kernel_mmap(machine, start, end); err = maps__insert(machine__kernel_maps(machine), updated); map__put(orig); -- cgit v1.2.3 From 330dfeaefd1943a9302035229083fa51ab570dcb Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 11 Apr 2024 10:54:47 +0300 Subject: perf record: Fix debug message placement for test consumption [ Upstream commit 792bc998baf9ae17297b1f93b1edc3ca34a0b7e2 ] evlist__config() might mess up the debug output consumed by test "Test per-thread recording" in "Miscellaneous Intel PT testing". Move it out from between the debug prints: "perf record opening and mmapping events" and "perf record done opening and mmapping events" Fixes: da4062021e0e6da5 ("perf tools: Add debug messages and comments for testing") Closes: https://lore.kernel.org/linux-perf-users/ZhVfc5jYLarnGzKa@x1/ Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/r/20240411075447.17306-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-record.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 40d2c1c48666..6aeae398ec28 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1355,8 +1355,6 @@ static int record__open(struct record *rec) struct record_opts *opts = &rec->opts; int rc = 0; - evlist__config(evlist, opts, &callchain_param); - evlist__for_each_entry(evlist, pos) { try_again: if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) { @@ -2483,6 +2481,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) evlist__uniquify_name(rec->evlist); + evlist__config(rec->evlist, opts, &callchain_param); + /* Debug message used by test scripts */ pr_debug3("perf record opening and mmapping events\n"); if (record__open(rec) != 0) { -- cgit v1.2.3 From bb1f43ffaca05db0827db06d6b57c56f503357ff Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 5 Apr 2024 21:09:10 -0700 Subject: perf bench uprobe: Remove lib64 from libc.so.6 binary path [ Upstream commit 459fee7b508231cd4622b3bd94aaa85e8e16b888 ] bpf_program__attach_uprobe_opts will search LD_LIBRARY_PATH and so specifying `/lib64` is unnecessary and causes failures for libc.so.6 paths like `/lib/x86_64-linux-gnu/libc.so.6`. Fixes: 7b47623b8cae8149 ("perf bench uprobe trace_printk: Add entry attaching an BPF program that does a trace_printk") Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andrei Vagin Cc: Ingo Molnar Cc: Kan Liang Cc: Kees Kook Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20240406040911.1603801-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/bench/uprobe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/bench/uprobe.c b/tools/perf/bench/uprobe.c index 5c71fdc419dd..b722ff88fe7d 100644 --- a/tools/perf/bench/uprobe.c +++ b/tools/perf/bench/uprobe.c @@ -47,7 +47,7 @@ static const char * const bench_uprobe_usage[] = { #define bench_uprobe__attach_uprobe(prog) \ skel->links.prog = bpf_program__attach_uprobe_opts(/*prog=*/skel->progs.prog, \ /*pid=*/-1, \ - /*binary_path=*/"/lib64/libc.so.6", \ + /*binary_path=*/"libc.so.6", \ /*func_offset=*/0, \ /*opts=*/&uprobe_opts); \ if (!skel->links.prog) { \ -- cgit v1.2.3 From 602089a6a50acb6f4561ee9dc58795b8287bb478 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 12 Apr 2024 11:33:08 -0700 Subject: perf dwarf-aux: Check pointer offset when checking variables [ Upstream commit 645af3fb62bf12911ce1fc79efa676dae9a8289b ] In match_var_offset(), it checks the offset range with the target type only for non-pointer types. But it also needs to check the pointer types with the target type. This is because there can be more than one pointer variable located in the same register. Let's look at the following example. It's looking up a variable for reg3 at tcp_get_info+0x62. It found "sk" variable but it wasn't the right one since it accesses beyond the target type (struct 'sock' in this case) size. ----------------------------------------------------------- find data type for 0x7bc(reg3) at tcp_get_info+0x62 CU for net/ipv4/tcp.c (die:0x7b5f516) frame base: cfa=0 fbreg=6 offset: 1980 is bigger than size: 760 check variable "sk" failed (die: 0x7b92b2c) variable location: reg3 type='struct sock' size=0x2f8 (die:0x7b63c3a) Actually there was another variable "tp" in the function and it's located at the same (reg3) because it's just type-casted like below. void tcp_get_info(struct sock *sk, struct tcp_info *info) { const struct tcp_sock *tp = tcp_sk(sk); ... The 'struct tcp_sock' contains the 'struct sock' at offset 0 so it can just use the same address as a pointer to tcp_sock. That means it should match variables correctly by checking the offset and size. Actually it cannot distinguish if the offset was smaller than the size of the original struct sock. But I think it's fine as they are the same at that part. So let's check the target type size and retry if it doesn't match. Now it succeeded to find the correct variable. ----------------------------------------------------------- find data type for 0x7bc(reg3) at tcp_get_info+0x62 CU for net/ipv4/tcp.c (die:0x7b5f516) frame base: cfa=0 fbreg=6 found "tp" in scope=1/1 (die: 0x7b92b16) type_offset=0x7bc variable location: reg3 type='struct tcp_sock' size=0xa68 (die:0x7b81380) Fixes: bc10db8eb8955fbc ("perf annotate-data: Support stack variables") Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20240412183310.2518474-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/dwarf-aux.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 2791126069b4..f7f7171539d3 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -1280,7 +1280,7 @@ struct find_var_data { #define DWARF_OP_DIRECT_REGS 32 static bool match_var_offset(Dwarf_Die *die_mem, struct find_var_data *data, - u64 addr_offset, u64 addr_type) + u64 addr_offset, u64 addr_type, bool is_pointer) { Dwarf_Die type_die; Dwarf_Word size; @@ -1294,6 +1294,12 @@ static bool match_var_offset(Dwarf_Die *die_mem, struct find_var_data *data, if (die_get_real_type(die_mem, &type_die) == NULL) return false; + if (is_pointer && dwarf_tag(&type_die) == DW_TAG_pointer_type) { + /* Get the target type of the pointer */ + if (die_get_real_type(&type_die, &type_die) == NULL) + return false; + } + if (dwarf_aggregate_size(&type_die, &size) < 0) return false; @@ -1361,31 +1367,38 @@ static int __die_find_var_reg_cb(Dwarf_Die *die_mem, void *arg) if (data->is_fbreg && ops->atom == DW_OP_fbreg && data->offset >= (int)ops->number && check_allowed_ops(ops, nops) && - match_var_offset(die_mem, data, data->offset, ops->number)) + match_var_offset(die_mem, data, data->offset, ops->number, + /*is_pointer=*/false)) return DIE_FIND_CB_END; /* Only match with a simple case */ if (data->reg < DWARF_OP_DIRECT_REGS) { /* pointer variables saved in a register 0 to 31 */ if (ops->atom == (DW_OP_reg0 + data->reg) && - check_allowed_ops(ops, nops)) + check_allowed_ops(ops, nops) && + match_var_offset(die_mem, data, data->offset, 0, + /*is_pointer=*/true)) return DIE_FIND_CB_END; /* Local variables accessed by a register + offset */ if (ops->atom == (DW_OP_breg0 + data->reg) && check_allowed_ops(ops, nops) && - match_var_offset(die_mem, data, data->offset, ops->number)) + match_var_offset(die_mem, data, data->offset, ops->number, + /*is_pointer=*/false)) return DIE_FIND_CB_END; } else { /* pointer variables saved in a register 32 or above */ if (ops->atom == DW_OP_regx && ops->number == data->reg && - check_allowed_ops(ops, nops)) + check_allowed_ops(ops, nops) && + match_var_offset(die_mem, data, data->offset, 0, + /*is_pointer=*/true)) return DIE_FIND_CB_END; /* Local variables accessed by a register + offset */ if (ops->atom == DW_OP_bregx && data->reg == ops->number && check_allowed_ops(ops, nops) && - match_var_offset(die_mem, data, data->offset, ops->number2)) + match_var_offset(die_mem, data, data->offset, ops->number2, + /*is_poitner=*/false)) return DIE_FIND_CB_END; } } @@ -1447,7 +1460,8 @@ static int __die_find_var_addr_cb(Dwarf_Die *die_mem, void *arg) continue; if (check_allowed_ops(ops, nops) && - match_var_offset(die_mem, data, data->addr, ops->number)) + match_var_offset(die_mem, data, data->addr, ops->number, + /*is_pointer=*/false)) return DIE_FIND_CB_END; } return DIE_FIND_CB_SIBLING; -- cgit v1.2.3 From edb18262c71377ac7fa5423763a0e720b9be8591 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 16 Apr 2024 10:00:13 -0700 Subject: perf docs: Document bpf event modifier [ Upstream commit eb4d27cf9aef3e6c9bcaf8fa1a1cadc2433d847b ] Document that 'b' is used as a modifier to make an event use a BPF counter. Fixes: 01bd8efcec444468 ("perf stat: Introduce ':b' modifier") Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Rajeev Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Song Liu Cc: Thomas Richter Link: https://lore.kernel.org/r/20240416170014.985191-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/Documentation/perf-list.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/perf') diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 3b12595193c9..6bf2468f59d3 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -71,6 +71,7 @@ counted. The following modifiers exist: D - pin the event to the PMU W - group is weak and will fallback to non-group if not schedulable, e - group or event are exclusive and do not share the PMU + b - use BPF aggregration (see perf stat --bpf-counters) The 'p' modifier can be used for specifying how precise the instruction address should be. The 'p' modifier can be specified multiple times: -- cgit v1.2.3 From b6023b2cdc4664884babdaf4ceb8fc409dd11517 Mon Sep 17 00:00:00 2001 From: James Clark Date: Tue, 26 Mar 2024 11:37:49 +0000 Subject: perf test shell arm_coresight: Increase buffer size for Coresight basic tests [ Upstream commit 10b6ee3b597b1b1b4dc390aaf9d589664af31df9 ] These tests record in a mode that includes kernel trace but look for samples of a userspace process. This makes them sensitive to any kernel compilation options that increase the amount of time spent in the kernel. If the trace buffer is completely filled before userspace is reached then the test will fail. Double the buffer size to fix this. The other tests in the same file aren't sensitive to this for various reasons, for example the iterate devices test filters by userspace trace only. But in order to keep coverage of all the modes, increase the buffer size rather than filtering by userspace for the basic tests. Fixes: d1efa4a0a696e487 ("perf cs-etm: Add separate decode paths for timeless and per-thread modes") Reviewed-by: Anshuman Khandual Signed-off-by: James Clark Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Link: https://lore.kernel.org/r/20240326113749.257250-1-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/tests/shell/test_arm_coresight.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/tests/shell/test_arm_coresight.sh b/tools/perf/tests/shell/test_arm_coresight.sh index 65dd85207125..3302ea0b9672 100755 --- a/tools/perf/tests/shell/test_arm_coresight.sh +++ b/tools/perf/tests/shell/test_arm_coresight.sh @@ -188,7 +188,7 @@ arm_cs_etm_snapshot_test() { arm_cs_etm_basic_test() { echo "Recording trace with '$*'" - perf record -o ${perfdata} "$@" -- ls > /dev/null 2>&1 + perf record -o ${perfdata} "$@" -m,8M -- ls > /dev/null 2>&1 perf_script_branch_samples ls && perf_report_branch_samples ls && -- cgit v1.2.3 From 9f8d046ff326dd363957f4431196484f9a484604 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 26 Mar 2024 10:32:23 +0200 Subject: perf intel-pt: Fix unassigned instruction op (discovered by MemorySanitizer) [ Upstream commit e101a05f79fd4ee3e89d2f3fb716493c33a33708 ] MemorySanitizer discovered instances where the instruction op value was not assigned.: WARNING: MemorySanitizer: use-of-uninitialized-value #0 0x5581c00a76b3 in intel_pt_sample_flags tools/perf/util/intel-pt.c:1527:17 Uninitialized value was stored to memory at #0 0x5581c005ddf8 in intel_pt_walk_insn tools/perf/util/intel-pt-decoder/intel-pt-decoder.c:1256:25 The op value is used to set branch flags for branch instructions encountered when walking the code, so fix by setting op to INTEL_PT_OP_OTHER in other cases. Fixes: 4c761d805bb2d2ea ("perf intel-pt: Fix intel_pt_fup_event() assumptions about setting state type") Reported-by: Ian Rogers Signed-off-by: Adrian Hunter Tested-by: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Closes: https://lore.kernel.org/linux-perf-users/20240320162619.1272015-1-irogers@google.com/ Link: https://lore.kernel.org/r/20240326083223.10883-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 2 ++ tools/perf/util/intel-pt.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index b450178e3420..e733f6b1f7ac 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -1319,6 +1319,8 @@ static bool intel_pt_fup_event(struct intel_pt_decoder *decoder, bool no_tip) bool ret = false; decoder->state.type &= ~INTEL_PT_BRANCH; + decoder->state.insn_op = INTEL_PT_OP_OTHER; + decoder->state.insn_len = 0; if (decoder->set_fup_cfe_ip || decoder->set_fup_cfe) { bool ip = decoder->set_fup_cfe_ip; diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index f38893e0b036..4db9a098f592 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -764,6 +764,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, addr_location__init(&al); intel_pt_insn->length = 0; + intel_pt_insn->op = INTEL_PT_OP_OTHER; if (to_ip && *ip == to_ip) goto out_no_cache; @@ -898,6 +899,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, if (to_ip && *ip == to_ip) { intel_pt_insn->length = 0; + intel_pt_insn->op = INTEL_PT_OP_OTHER; goto out_no_cache; } -- cgit v1.2.3 From e70ad7979baaff56ba08e6e188c30e713ddd48c8 Mon Sep 17 00:00:00 2001 From: He Zhe Date: Tue, 7 May 2024 14:50:26 +0800 Subject: perf bench internals inject-build-id: Fix trap divide when collecting just one DSO [ Upstream commit d9180e23fbfa3875424d3a6b28b71b072862a52a ] 'perf bench internals inject-build-id' suffers from the following error when only one DSO is collected. # perf bench internals inject-build-id -v Collected 1 DSOs traps: internals-injec[2305] trap divide error ip:557566ba6394 sp:7ffd4de97fe0 error:0 in perf[557566b2a000+23d000] Build-id injection benchmark Iteration #1 Floating point exception This patch removes the unnecessary minus one from the divisor which also corrects the randomization range. Signed-off-by: He Zhe Fixes: 0bf02a0d80427f26 ("perf bench: Add build-id injection benchmark") Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Link: https://lore.kernel.org/r/20240507065026.2652929-1-zhe.he@windriver.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/bench/inject-buildid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/bench/inject-buildid.c b/tools/perf/bench/inject-buildid.c index 49331743c743..a759eb2328be 100644 --- a/tools/perf/bench/inject-buildid.c +++ b/tools/perf/bench/inject-buildid.c @@ -362,7 +362,7 @@ static int inject_build_id(struct bench_data *data, u64 *max_rss) return -1; for (i = 0; i < nr_mmaps; i++) { - int idx = rand() % (nr_dsos - 1); + int idx = rand() % nr_dsos; struct bench_dso *dso = &dsos[idx]; u64 timestamp = rand() % 1000000; -- cgit v1.2.3 From 2f0d8e230841af6a404914a5c901877fc0647c43 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 7 May 2024 11:35:38 -0700 Subject: perf ui browser: Don't save pointer to stack memory [ Upstream commit 769e6a1e15bdbbaf2b0d2f37c24f2c53268bd21f ] ui_browser__show() is capturing the input title that is stack allocated memory in hist_browser__run(). Avoid a use after return by strdup-ing the string. Committer notes: Further explanation from Ian Rogers: My command line using tui is: $ sudo bash -c 'rm /tmp/asan.log*; export ASAN_OPTIONS="log_path=/tmp/asan.log"; /tmp/perf/perf mem record -a sleep 1; /tmp/perf/perf mem report' I then go to the perf annotate view and quit. This triggers the asan error (from the log file): ``` ==1254591==ERROR: AddressSanitizer: stack-use-after-return on address 0x7f2813331920 at pc 0x7f28180 65991 bp 0x7fff0a21c750 sp 0x7fff0a21bf10 READ of size 80 at 0x7f2813331920 thread T0 #0 0x7f2818065990 in __interceptor_strlen ../../../../src/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc:461 #1 0x7f2817698251 in SLsmg_write_wrapped_string (/lib/x86_64-linux-gnu/libslang.so.2+0x98251) #2 0x7f28176984b9 in SLsmg_write_nstring (/lib/x86_64-linux-gnu/libslang.so.2+0x984b9) #3 0x55c94045b365 in ui_browser__write_nstring ui/browser.c:60 #4 0x55c94045c558 in __ui_browser__show_title ui/browser.c:266 #5 0x55c94045c776 in ui_browser__show ui/browser.c:288 #6 0x55c94045c06d in ui_browser__handle_resize ui/browser.c:206 #7 0x55c94047979b in do_annotate ui/browsers/hists.c:2458 #8 0x55c94047fb17 in evsel__hists_browse ui/browsers/hists.c:3412 #9 0x55c940480a0c in perf_evsel_menu__run ui/browsers/hists.c:3527 #10 0x55c940481108 in __evlist__tui_browse_hists ui/browsers/hists.c:3613 #11 0x55c9404813f7 in evlist__tui_browse_hists ui/browsers/hists.c:3661 #12 0x55c93ffa253f in report__browse_hists tools/perf/builtin-report.c:671 #13 0x55c93ffa58ca in __cmd_report tools/perf/builtin-report.c:1141 #14 0x55c93ffaf159 in cmd_report tools/perf/builtin-report.c:1805 #15 0x55c94000c05c in report_events tools/perf/builtin-mem.c:374 #16 0x55c94000d96d in cmd_mem tools/perf/builtin-mem.c:516 #17 0x55c9400e44ee in run_builtin tools/perf/perf.c:350 #18 0x55c9400e4a5a in handle_internal_command tools/perf/perf.c:403 #19 0x55c9400e4e22 in run_argv tools/perf/perf.c:447 #20 0x55c9400e53ad in main tools/perf/perf.c:561 #21 0x7f28170456c9 in __libc_start_call_main ../sysdeps/nptl/libc_start_call_main.h:58 #22 0x7f2817045784 in __libc_start_main_impl ../csu/libc-start.c:360 #23 0x55c93ff544c0 in _start (/tmp/perf/perf+0x19a4c0) (BuildId: 84899b0e8c7d3a3eaa67b2eb35e3d8b2f8cd4c93) Address 0x7f2813331920 is located in stack of thread T0 at offset 32 in frame #0 0x55c94046e85e in hist_browser__run ui/browsers/hists.c:746 This frame has 1 object(s): [32, 192) 'title' (line 747) <== Memory access at offset 32 is inside this variable HINT: this may be a false positive if your program uses some custom stack unwind mechanism, swapcontext or vfork ``` hist_browser__run isn't on the stack so the asan error looks legit. There's no clean init/exit on struct ui_browser so I may be trading a use-after-return for a memory leak, but that seems look a good trade anyway. Fixes: 05e8b0804ec4 ("perf ui browser: Stop using 'self'") Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Rajeev Cc: Ben Gainey Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: K Prateek Nayak Cc: Li Dong Cc: Mark Rutland Cc: Namhyung Kim Cc: Oliver Upton Cc: Paran Lee Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sun Haiyong Cc: Tim Chen Cc: Yanteng Si Cc: Yicong Yang Link: https://lore.kernel.org/r/20240507183545.1236093-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/ui/browser.c | 4 +++- tools/perf/ui/browser.h | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index 603d11283cbd..c4cdf2ea69b7 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -287,7 +287,8 @@ int ui_browser__show(struct ui_browser *browser, const char *title, mutex_lock(&ui__lock); __ui_browser__show_title(browser, title); - browser->title = title; + free(browser->title); + browser->title = strdup(title); zfree(&browser->helpline); va_start(ap, helpline); @@ -304,6 +305,7 @@ void ui_browser__hide(struct ui_browser *browser) mutex_lock(&ui__lock); ui_helpline__pop(); zfree(&browser->helpline); + zfree(&browser->title); mutex_unlock(&ui__lock); } diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h index 510ce4554050..6e98d5f8f71c 100644 --- a/tools/perf/ui/browser.h +++ b/tools/perf/ui/browser.h @@ -21,7 +21,7 @@ struct ui_browser { u8 extra_title_lines; int current_color; void *priv; - const char *title; + char *title; char *helpline; const char *no_samples_msg; void (*refresh_dimensions)(struct ui_browser *browser); -- cgit v1.2.3 From e4a7f29a7eead60fa3263018496dfb4c1fdf55d3 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 7 May 2024 11:35:39 -0700 Subject: perf annotate: Fix memory leak in annotated_source [ Upstream commit a3f7768bcf48281df14d98715f076c5656571527 ] Freeing hash map doesn't free the entries added to the hashmap, add the missing free(). Fixes: d3e7cad6f36d9e80 ("perf annotate: Add a hashmap for symbol histogram") Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Rajeev Cc: Ben Gainey Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: K Prateek Nayak Cc: Li Dong Cc: Mark Rutland Cc: Namhyung Kim Cc: Oliver Upton Cc: Paran Lee Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sun Haiyong Cc: Tim Chen Cc: Yanteng Si Cc: Yicong Yang Link: https://lore.kernel.org/r/20240507183545.1236093-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/annotate.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 2ebe2fe92a10..617b98da377e 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -887,9 +887,15 @@ static struct annotated_source *annotated_source__new(void) static __maybe_unused void annotated_source__delete(struct annotated_source *src) { + struct hashmap_entry *cur; + size_t bkt; + if (src == NULL) return; + hashmap__for_each_entry(src->samples, cur, bkt) + zfree(&cur->pvalue); + hashmap__free(src->samples); zfree(&src->histograms); free(src); -- cgit v1.2.3 From 0f9af3bc8deb8e9c6b093af33590cc8bc27c95a8 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 7 May 2024 20:52:58 -0700 Subject: perf ui browser: Avoid SEGV on title [ Upstream commit 90f01afb0dfafbc9b094bb61e61a4ac297d9d0d2 ] If the title is NULL then it can lead to a SEGV. Fixes: 769e6a1e15bdbbaf ("perf ui browser: Don't save pointer to stack memory") Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20240508035301.1554434-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/ui/browser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index c4cdf2ea69b7..19503e838738 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -203,7 +203,7 @@ void ui_browser__refresh_dimensions(struct ui_browser *browser) void ui_browser__handle_resize(struct ui_browser *browser) { ui__refresh_dimensions(false); - ui_browser__show(browser, browser->title, ui_helpline__current); + ui_browser__show(browser, browser->title ?: "", ui_helpline__current); ui_browser__refresh(browser); } -- cgit v1.2.3 From d0fddddc1bcd5e7a309fa462b810c61df31898b9 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 7 May 2024 20:53:00 -0700 Subject: perf report: Avoid SEGV in report__setup_sample_type() [ Upstream commit 45b4f402a6b782352c4bafcff682bfb01da9ca05 ] In some cases evsel->name is lazily initialized in evsel__name(). If not initialized passing NULL to strstr() leads to a SEGV. Fixes: ccb17caecfbd542f ("perf report: Set PERF_SAMPLE_DATA_SRC bit for Arm SPE event") Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20240508035301.1554434-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-report.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index dcd93ee5fc24..5b684d2ab4be 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -428,7 +428,7 @@ static int report__setup_sample_type(struct report *rep) * compatibility, set the bit if it's an old perf data file. */ evlist__for_each_entry(session->evlist, evsel) { - if (strstr(evsel->name, "arm_spe") && + if (strstr(evsel__name(evsel), "arm_spe") && !(sample_type & PERF_SAMPLE_DATA_SRC)) { evsel->core.attr.sample_type |= PERF_SAMPLE_DATA_SRC; sample_type |= PERF_SAMPLE_DATA_SRC; -- cgit v1.2.3 From 3b210d94a522b77c26295e39e0dfb5f9d3a735cf Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 7 May 2024 20:53:01 -0700 Subject: perf thread: Fixes to thread__new() related to initializing comm [ Upstream commit 3536c2575e88a890cf696b4ccd3da36bc937853b ] Freeing the thread on failure won't work with reference count checking, use thread__delete(). Don't allocate the comm_str, use a stack allocation instead. Fixes: f6005cafebab72f8 ("perf thread: Add reference count checking") Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20240508035301.1554434-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/thread.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 1aa8962dcf52..515726489e36 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -39,12 +39,13 @@ int thread__init_maps(struct thread *thread, struct machine *machine) struct thread *thread__new(pid_t pid, pid_t tid) { - char *comm_str; - struct comm *comm; RC_STRUCT(thread) *_thread = zalloc(sizeof(*_thread)); struct thread *thread; if (ADD_RC_CHK(thread, _thread) != NULL) { + struct comm *comm; + char comm_str[32]; + thread__set_pid(thread, pid); thread__set_tid(thread, tid); thread__set_ppid(thread, -1); @@ -56,13 +57,8 @@ struct thread *thread__new(pid_t pid, pid_t tid) init_rwsem(thread__namespaces_lock(thread)); init_rwsem(thread__comm_lock(thread)); - comm_str = malloc(32); - if (!comm_str) - goto err_thread; - - snprintf(comm_str, 32, ":%d", tid); + snprintf(comm_str, sizeof(comm_str), ":%d", tid); comm = comm__new(comm_str, 0, false); - free(comm_str); if (!comm) goto err_thread; @@ -76,7 +72,7 @@ struct thread *thread__new(pid_t pid, pid_t tid) return thread; err_thread: - free(thread); + thread__delete(thread); return NULL; } -- cgit v1.2.3 From 96064187ef85675ae4fdb67d9c7d17e8026a869a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Mar 2024 22:50:54 -0700 Subject: perf dwarf-aux: Add die_collect_vars() [ Upstream commit 932dcc2c39aedf54ef291bc0b4129a54f5fe1e84 ] The die_collect_vars() is to find all variable information in the scope including function parameters. The struct die_var_type is to save the type of the variable with the location (reg and offset) as well as where it's defined in the code (addr). Signed-off-by: Namhyung Kim Acked-by: Masami Hiramatsu Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20240319055115.4063940-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Stable-dep-of: c9d492378fae ("perf dwarf-aux: Fix build with HAVE_DWARF_CFI_SUPPORT") Signed-off-by: Sasha Levin --- tools/perf/util/dwarf-aux.c | 118 +++++++++++++++++++++++++++++++++----------- tools/perf/util/dwarf-aux.h | 17 +++++++ 2 files changed, 107 insertions(+), 28 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index f7f7171539d3..ff1874ae0832 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -1136,6 +1136,40 @@ int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf) return ret < 0 ? ret : strbuf_addf(buf, "\t%s", dwarf_diename(vr_die)); } +#if defined(HAVE_DWARF_GETLOCATIONS_SUPPORT) || defined(HAVE_DWARF_CFI_SUPPORT) +static int reg_from_dwarf_op(Dwarf_Op *op) +{ + switch (op->atom) { + case DW_OP_reg0 ... DW_OP_reg31: + return op->atom - DW_OP_reg0; + case DW_OP_breg0 ... DW_OP_breg31: + return op->atom - DW_OP_breg0; + case DW_OP_regx: + case DW_OP_bregx: + return op->number; + default: + break; + } + return -1; +} + +static int offset_from_dwarf_op(Dwarf_Op *op) +{ + switch (op->atom) { + case DW_OP_reg0 ... DW_OP_reg31: + case DW_OP_regx: + return 0; + case DW_OP_breg0 ... DW_OP_breg31: + return op->number; + case DW_OP_bregx: + return op->number2; + default: + break; + } + return -1; +} +#endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT || HAVE_DWARF_CFI_SUPPORT */ + #ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT /** * die_get_var_innermost_scope - Get innermost scope range of given variable DIE @@ -1493,41 +1527,69 @@ Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die, Dwarf_Addr pc, *offset = data.offset; return result; } -#endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT */ -#ifdef HAVE_DWARF_CFI_SUPPORT -static int reg_from_dwarf_op(Dwarf_Op *op) +static int __die_collect_vars_cb(Dwarf_Die *die_mem, void *arg) { - switch (op->atom) { - case DW_OP_reg0 ... DW_OP_reg31: - return op->atom - DW_OP_reg0; - case DW_OP_breg0 ... DW_OP_breg31: - return op->atom - DW_OP_breg0; - case DW_OP_regx: - case DW_OP_bregx: - return op->number; - default: - break; - } - return -1; + struct die_var_type **var_types = arg; + Dwarf_Die type_die; + int tag = dwarf_tag(die_mem); + Dwarf_Attribute attr; + Dwarf_Addr base, start, end; + Dwarf_Op *ops; + size_t nops; + struct die_var_type *vt; + + if (tag != DW_TAG_variable && tag != DW_TAG_formal_parameter) + return DIE_FIND_CB_SIBLING; + + if (dwarf_attr(die_mem, DW_AT_location, &attr) == NULL) + return DIE_FIND_CB_SIBLING; + + /* + * Only collect the first location as it can reconstruct the + * remaining state by following the instructions. + * start = 0 means it covers the whole range. + */ + if (dwarf_getlocations(&attr, 0, &base, &start, &end, &ops, &nops) <= 0) + return DIE_FIND_CB_SIBLING; + + if (die_get_real_type(die_mem, &type_die) == NULL) + return DIE_FIND_CB_SIBLING; + + vt = malloc(sizeof(*vt)); + if (vt == NULL) + return DIE_FIND_CB_END; + + vt->die_off = dwarf_dieoffset(&type_die); + vt->addr = start; + vt->reg = reg_from_dwarf_op(ops); + vt->offset = offset_from_dwarf_op(ops); + vt->next = *var_types; + *var_types = vt; + + return DIE_FIND_CB_SIBLING; } -static int offset_from_dwarf_op(Dwarf_Op *op) +/** + * die_collect_vars - Save all variables and parameters + * @sc_die: a scope DIE + * @var_types: a pointer to save the resulting list + * + * Save all variables and parameters in the @sc_die and save them to @var_types. + * The @var_types is a singly-linked list containing type and location info. + * Actual type can be retrieved using dwarf_offdie() with 'die_off' later. + * + * Callers should free @var_types. + */ +void die_collect_vars(Dwarf_Die *sc_die, struct die_var_type **var_types) { - switch (op->atom) { - case DW_OP_reg0 ... DW_OP_reg31: - case DW_OP_regx: - return 0; - case DW_OP_breg0 ... DW_OP_breg31: - return op->number; - case DW_OP_bregx: - return op->number2; - default: - break; - } - return -1; + Dwarf_Die die_mem; + + die_find_child(sc_die, __die_collect_vars_cb, (void *)var_types, &die_mem); } +#endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT */ +#ifdef HAVE_DWARF_CFI_SUPPORT /** * die_get_cfa - Get frame base information * @dwarf: a Dwarf info diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index 85dd527ae1f7..efafd3a1f5b6 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -135,6 +135,15 @@ void die_skip_prologue(Dwarf_Die *sp_die, Dwarf_Die *cu_die, /* Get the list of including scopes */ int die_get_scopes(Dwarf_Die *cu_die, Dwarf_Addr pc, Dwarf_Die **scopes); +/* Variable type information */ +struct die_var_type { + struct die_var_type *next; + u64 die_off; + u64 addr; + int reg; + int offset; +}; + #ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT /* Get byte offset range of given variable DIE */ @@ -150,6 +159,9 @@ Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die, Dwarf_Addr pc, Dwarf_Addr addr, Dwarf_Die *die_mem, int *offset); +/* Save all variables and parameters in this scope */ +void die_collect_vars(Dwarf_Die *sc_die, struct die_var_type **var_types); + #else /* HAVE_DWARF_GETLOCATIONS_SUPPORT */ static inline int die_get_var_range(Dwarf_Die *sp_die __maybe_unused, @@ -178,6 +190,11 @@ static inline Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die __maybe_unu return NULL; } +static inline void die_collect_vars(Dwarf_Die *sc_die __maybe_unused, + struct die_var_type **var_types __maybe_unused) +{ +} + #endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT */ #ifdef HAVE_DWARF_CFI_SUPPORT -- cgit v1.2.3 From b9684ba58ea31b9b00806b54a860b1db2392187e Mon Sep 17 00:00:00 2001 From: James Clark Date: Wed, 8 May 2024 15:14:57 +0100 Subject: perf dwarf-aux: Fix build with HAVE_DWARF_CFI_SUPPORT [ Upstream commit c9d492378faec5c1fb8ea1534c620f7320bbb23a ] check_allowed_ops() is used from both HAVE_DWARF_GETLOCATIONS_SUPPORT and HAVE_DWARF_CFI_SUPPORT sections, so move it into the right place so that it's available when either are defined. This shows up when doing a static cross compile for arm64: $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- LDFLAGS="-static" \ EXTRA_PERFLIBS="-lexpat" util/dwarf-aux.c:1723:6: error: implicit declaration of function 'check_allowed_ops' Fixes: 55442cc2f22d0727 ("perf dwarf-aux: Check allowed DWARF Ops") Reviewed-by: Ian Rogers Signed-off-by: James Clark Acked-by: Masami Hiramatsu Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20240508141458.439017-1-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/dwarf-aux.c | 56 ++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 28 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index ff1874ae0832..f93e57e2fc42 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -1168,6 +1168,34 @@ static int offset_from_dwarf_op(Dwarf_Op *op) } return -1; } + +static bool check_allowed_ops(Dwarf_Op *ops, size_t nops) +{ + /* The first op is checked separately */ + ops++; + nops--; + + /* + * It needs to make sure if the location expression matches to the given + * register and offset exactly. Thus it rejects any complex expressions + * and only allows a few of selected operators that doesn't change the + * location. + */ + while (nops) { + switch (ops->atom) { + case DW_OP_stack_value: + case DW_OP_deref_size: + case DW_OP_deref: + case DW_OP_piece: + break; + default: + return false; + } + ops++; + nops--; + } + return true; +} #endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT || HAVE_DWARF_CFI_SUPPORT */ #ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT @@ -1345,34 +1373,6 @@ static bool match_var_offset(Dwarf_Die *die_mem, struct find_var_data *data, return true; } -static bool check_allowed_ops(Dwarf_Op *ops, size_t nops) -{ - /* The first op is checked separately */ - ops++; - nops--; - - /* - * It needs to make sure if the location expression matches to the given - * register and offset exactly. Thus it rejects any complex expressions - * and only allows a few of selected operators that doesn't change the - * location. - */ - while (nops) { - switch (ops->atom) { - case DW_OP_stack_value: - case DW_OP_deref_size: - case DW_OP_deref: - case DW_OP_piece: - break; - default: - return false; - } - ops++; - nops--; - } - return true; -} - /* Only checks direct child DIEs in the given scope. */ static int __die_find_var_reg_cb(Dwarf_Die *die_mem, void *arg) { -- cgit v1.2.3 From 253738a2e59681eb1c6772c385b7b81d675f7718 Mon Sep 17 00:00:00 2001 From: James Clark Date: Tue, 7 May 2024 15:12:05 +0100 Subject: perf symbols: Remove map from list before updating addresses [ Upstream commit 9fe410a7ef483a9aca08bf620d8ddfd35ac99bc7 ] Make the order of operations remove, update, add. Updating addresses before the map is removed causes the ordering check to fail when the map is removed. This can be reproduced when running Perf on an Arm system with a static kernel and Perf uses kcore rather than other sources: $ perf record -- ls $ perf report util/maps.c:96: check_invariants: Assertion `map__end(prev) <= map__start(map) || map__start(prev) == map__start(map)' failed Fixes: 659ad3492b913c90 ("perf maps: Switch from rbtree to lazily sorted array for addresses") Signed-off-by: James Clark Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Rajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20240507141210.195939-2-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/symbol.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 9ebdb8e13c0b..cd825241e07d 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1375,13 +1375,15 @@ static int dso__load_kcore(struct dso *dso, struct map *map, if (RC_CHK_EQUAL(new_map, replacement_map)) { struct map *map_ref; - map__set_start(map, map__start(new_map)); - map__set_end(map, map__end(new_map)); - map__set_pgoff(map, map__pgoff(new_map)); - map__set_mapping_type(map, map__mapping_type(new_map)); /* Ensure maps are correctly ordered */ map_ref = map__get(map); maps__remove(kmaps, map_ref); + + map__set_start(map_ref, map__start(new_map)); + map__set_end(map_ref, map__end(new_map)); + map__set_pgoff(map_ref, map__pgoff(new_map)); + map__set_mapping_type(map_ref, map__mapping_type(new_map)); + err = maps__insert(kmaps, map_ref); map__put(map_ref); map__put(new_map); -- cgit v1.2.3 From 53ba56e438ef7c88d6948d4294a807a5e581fb1b Mon Sep 17 00:00:00 2001 From: James Clark Date: Tue, 7 May 2024 15:12:07 +0100 Subject: perf symbols: Update kcore map before merging in remaining symbols [ Upstream commit f30232b20fadea8c0f2f43f764bc06e51e8cfcdf ] When loading kcore, the main vmlinux map is updated in the same loop that merges the remaining maps. If a map that overlaps is merged in before kcore, the list can become unsortable when the main map addresses are updated. This will later trigger the check_invariants() assert: $ perf record $ perf report util/maps.c:96: check_invariants: Assertion `map__end(prev) <= map__start(map) || map__start(prev) == map__start(map)' failed. Aborted Fix it by moving the main map update prior to the loop so that maps__merge_in() can split it if necessary. Fixes: 659ad3492b913c90 ("perf maps: Switch from rbtree to lazily sorted array for addresses") Signed-off-by: James Clark Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Rajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20240507141210.195939-4-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/symbol.c | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index cd825241e07d..a1ca1b8156fe 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1287,7 +1287,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map, { struct maps *kmaps = map__kmaps(map); struct kcore_mapfn_data md; - struct map *replacement_map = NULL; + struct map *map_ref, *replacement_map = NULL; struct machine *machine; bool is_64_bit; int err, fd; @@ -1365,6 +1365,24 @@ static int dso__load_kcore(struct dso *dso, struct map *map, if (!replacement_map) replacement_map = list_entry(md.maps.next, struct map_list_node, node)->map; + /* + * Update addresses of vmlinux map. Re-insert it to ensure maps are + * correctly ordered. Do this before using maps__merge_in() for the + * remaining maps so vmlinux gets split if necessary. + */ + map_ref = map__get(map); + maps__remove(kmaps, map_ref); + + map__set_start(map_ref, map__start(replacement_map)); + map__set_end(map_ref, map__end(replacement_map)); + map__set_pgoff(map_ref, map__pgoff(replacement_map)); + map__set_mapping_type(map_ref, map__mapping_type(replacement_map)); + + err = maps__insert(kmaps, map_ref); + map__put(map_ref); + if (err) + goto out_err; + /* Add new maps */ while (!list_empty(&md.maps)) { struct map_list_node *new_node = list_entry(md.maps.next, struct map_list_node, node); @@ -1372,24 +1390,8 @@ static int dso__load_kcore(struct dso *dso, struct map *map, list_del_init(&new_node->node); - if (RC_CHK_EQUAL(new_map, replacement_map)) { - struct map *map_ref; - - /* Ensure maps are correctly ordered */ - map_ref = map__get(map); - maps__remove(kmaps, map_ref); - - map__set_start(map_ref, map__start(new_map)); - map__set_end(map_ref, map__end(new_map)); - map__set_pgoff(map_ref, map__pgoff(new_map)); - map__set_mapping_type(map_ref, map__mapping_type(new_map)); - - err = maps__insert(kmaps, map_ref); - map__put(map_ref); - map__put(new_map); - if (err) - goto out_err; - } else { + /* skip if replacement_map, already inserted above */ + if (!RC_CHK_EQUAL(new_map, replacement_map)) { /* * Merge kcore map into existing maps, * and ensure that current maps (eBPF) -- cgit v1.2.3 From 870535ca0319670308807560ccfd3d0b34dc4373 Mon Sep 17 00:00:00 2001 From: James Clark Date: Tue, 7 May 2024 15:12:08 +0100 Subject: perf symbols: Fix ownership of string in dso__load_vmlinux() [ Upstream commit 25626e19ae6df34f336f235b6b3dbd1b566d2738 ] The linked commit updated dso__load_vmlinux() to call dso__set_long_name() before loading the symbols. Loading the symbols may not succeed but dso__set_long_name() takes ownership of the string. The two callers of this function free the string themselves on failure cases, resulting in the following error: $ perf record -- ls $ perf report free(): double free detected in tcache 2 Fix it by always taking ownership of the string, even on failure. This means the string is either freed at the very first early exit condition, or later when the dso is deleted or the long name is replaced. Now no special return value is needed to signify that the caller needs to free the string. Fixes: e59fea47f83e8a9a ("perf symbols: Fix DSO kernel load and symbol process to correctly map DSO to its long_name, type and adjust_symbols") Reviewed-by: Ian Rogers Signed-off-by: James Clark Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Rajeev Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20240507141210.195939-5-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/symbol.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index a1ca1b8156fe..68dbeae8d2bf 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1973,6 +1973,10 @@ out: return ret; } +/* + * Always takes ownership of vmlinux when vmlinux_allocated == true, even if + * it returns an error. + */ int dso__load_vmlinux(struct dso *dso, struct map *map, const char *vmlinux, bool vmlinux_allocated) { @@ -1991,8 +1995,11 @@ int dso__load_vmlinux(struct dso *dso, struct map *map, else symtab_type = DSO_BINARY_TYPE__VMLINUX; - if (symsrc__init(&ss, dso, symfs_vmlinux, symtab_type)) + if (symsrc__init(&ss, dso, symfs_vmlinux, symtab_type)) { + if (vmlinux_allocated) + free((char *) vmlinux); return -1; + } /* * dso__load_sym() may copy 'dso' which will result in the copies having @@ -2035,7 +2042,6 @@ int dso__load_vmlinux_path(struct dso *dso, struct map *map) err = dso__load_vmlinux(dso, map, filename, true); if (err > 0) goto out; - free(filename); } out: return err; @@ -2187,7 +2193,6 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map) err = dso__load_vmlinux(dso, map, filename, true); if (err > 0) return err; - free(filename); } if (!symbol_conf.ignore_vmlinux && vmlinux_path != NULL) { -- cgit v1.2.3 From 48ef87147247ee4c80626ef65c8c7b3da05946e8 Mon Sep 17 00:00:00 2001 From: Samasth Norway Ananda Date: Thu, 9 May 2024 17:34:24 -0700 Subject: perf daemon: Fix file leak in daemon_session__control [ Upstream commit 09541603462c399c7408d50295db99b4b8042eaa ] The open() function returns -1 on error. The 'control' and 'ack' file descriptors are both initialized with open() and further validated with 'if' statement. 'if (!control)' would evaluate to 'true' if returned value on error were '0' but it is actually '-1'. Fixes: edcaa47958c7438b ("perf daemon: Add 'ping' command") Signed-off-by: Samasth Norway Ananda Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20240510003424.2016914-1-samasth.norway.ananda@oracle.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-daemon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c index 83954af36753..de76bbc50bfb 100644 --- a/tools/perf/builtin-daemon.c +++ b/tools/perf/builtin-daemon.c @@ -523,7 +523,7 @@ static int daemon_session__control(struct daemon_session *session, session->base, SESSION_CONTROL); control = open(control_path, O_WRONLY|O_NONBLOCK); - if (!control) + if (control < 0) return -1; if (do_ack) { @@ -532,7 +532,7 @@ static int daemon_session__control(struct daemon_session *session, session->base, SESSION_ACK); ack = open(ack_path, O_RDONLY, O_NONBLOCK); - if (!ack) { + if (ack < 0) { close(control); return -1; } -- cgit v1.2.3 From 9816300dd636dcf35491d4d5963c5406eea601b2 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 10 May 2024 14:04:51 -0700 Subject: perf annotate: Fix segfault on sample histogram [ Upstream commit 9ef30265a483f0405e4f7b3f15cda251b9a2c7da ] A symbol can have no samples, then accessing the annotated_source->samples hashmap will result in a segfault. Fixes: a3f7768bcf48281d ("perf annotate: Fix memory leak in annotated_source") Reviewed-by: Ian Rogers Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20240510210452.2449944-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/annotate.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 617b98da377e..79d082155c2f 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -893,10 +893,11 @@ static __maybe_unused void annotated_source__delete(struct annotated_source *src if (src == NULL) return; - hashmap__for_each_entry(src->samples, cur, bkt) - zfree(&cur->pvalue); - - hashmap__free(src->samples); + if (src->samples) { + hashmap__for_each_entry(src->samples, cur, bkt) + zfree(&cur->pvalue); + hashmap__free(src->samples); + } zfree(&src->histograms); free(src); } -- cgit v1.2.3 From 5e14d9883c3c0a39e3473d4720e618b7ad1dd227 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 9 May 2024 22:13:09 -0700 Subject: perf stat: Don't display metric header for non-leader uncore events [ Upstream commit 193a9e30207f54777ff42d0d8be8389edc522277 ] On an Intel tigerlake laptop a metric like: { "BriefDescription": "Test", "MetricExpr": "imc_free_running@data_read@ + imc_free_running@data_write@", "MetricGroup": "Test", "MetricName": "Test", "ScaleUnit": "6.103515625e-5MiB" }, Will have 4 events: uncore_imc_free_running_0/data_read/ uncore_imc_free_running_0/data_write/ uncore_imc_free_running_1/data_read/ uncore_imc_free_running_1/data_write/ If aggregration is disabled with metric-only 2 column headers are needed: $ perf stat -M test --metric-only -A -a sleep 1 Performance counter stats for 'system wide': MiB Test MiB Test CPU0 1821.0 1820.5 But when not, the counts aggregated in the metric leader and only 1 column should be shown: $ perf stat -M test --metric-only -a sleep 1 Performance counter stats for 'system wide': MiB Test 5909.4 1.001258915 seconds time elapsed Achieve this by skipping events that aren't metric leaders when printing column headers and aggregation isn't disabled. The bug is long standing, the fixes tag is set to a refactor as that is as far back as is reasonable to backport. Fixes: 088519f318be3a41 ("perf stat: Move the display functions to stat-display.c") Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kaige Ye Cc: Kan Liang Cc: K Prateek Nayak Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Yicong Yang Link: https://lore.kernel.org/r/20240510051309.2452468-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/stat-display.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index bfc1d705f437..91d2f7f65df7 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -1223,6 +1223,9 @@ static void print_metric_headers(struct perf_stat_config *config, /* Print metrics headers only */ evlist__for_each_entry(evlist, counter) { + if (config->aggr_mode != AGGR_NONE && counter->metric_leader != counter) + continue; + os.evsel = counter; perf_stat__print_shadow_stats(config, counter, 0, -- cgit v1.2.3 From ba3bbb34392fedcb0dd1d2a335fd8009f9fce161 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 7 Mar 2024 16:19:14 -0800 Subject: perf tools: Use pmus to describe type from attribute [ Upstream commit 7093882067e2e2f88d3449c35c5f0f3f566c8a26 ] When dumping a perf_event_attr, use pmus to find the PMU and its name by the type number. This allows dynamically added PMUs to be described. Before: $ perf stat -vv -e data_read true ... perf_event_attr: type 24 size 136 config 0x20ff sample_type IDENTIFIER read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING disabled 1 inherit 1 exclude_guest 1 ... After: $ perf stat -vv -e data_read true ... perf_event_attr: type 24 (uncore_imc_free_running_0) size 136 config 0x20ff sample_type IDENTIFIER read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING disabled 1 inherit 1 exclude_guest 1 ... However, it also means that when we have a PMU name we prefer it to a hard coded name: Before: $ perf stat -vv -e faults true ... perf_event_attr: type 1 (PERF_TYPE_SOFTWARE) size 136 config 0x2 (PERF_COUNT_SW_PAGE_FAULTS) sample_type IDENTIFIER read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING disabled 1 inherit 1 enable_on_exec 1 exclude_guest 1 ... After: $ perf stat -vv -e faults true ... perf_event_attr: type 1 (software) size 136 config 0x2 (PERF_COUNT_SW_PAGE_FAULTS) sample_type IDENTIFIER read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING disabled 1 inherit 1 enable_on_exec 1 exclude_guest 1 ... It feels more consistent to do this, rather than only prefer a PMU name when a hard coded name isn't available. Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Tested-by: Kan Liang Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Yang Jihong Link: https://lore.kernel.org/r/20240308001915.4060155-6-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Stable-dep-of: d9c5f5f94c2d ("perf pmu: Count sys and cpuid JSON events separately") Signed-off-by: Sasha Levin --- tools/perf/util/perf_event_attr_fprintf.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index 8f04d3b7f3ec..29e66835da3a 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -7,6 +7,8 @@ #include #include #include "util/evsel_fprintf.h" +#include "util/pmu.h" +#include "util/pmus.h" #include "trace-event.h" struct bit_names { @@ -75,9 +77,12 @@ static void __p_read_format(char *buf, size_t size, u64 value) } #define ENUM_ID_TO_STR_CASE(x) case x: return (#x); -static const char *stringify_perf_type_id(u64 value) +static const char *stringify_perf_type_id(struct perf_pmu *pmu, u32 type) { - switch (value) { + if (pmu) + return pmu->name; + + switch (type) { ENUM_ID_TO_STR_CASE(PERF_TYPE_HARDWARE) ENUM_ID_TO_STR_CASE(PERF_TYPE_SOFTWARE) ENUM_ID_TO_STR_CASE(PERF_TYPE_TRACEPOINT) @@ -175,9 +180,9 @@ do { \ #define print_id_unsigned(_s) PRINT_ID(_s, "%"PRIu64) #define print_id_hex(_s) PRINT_ID(_s, "%#"PRIx64) -static void __p_type_id(char *buf, size_t size, u64 value) +static void __p_type_id(struct perf_pmu *pmu, char *buf, size_t size, u64 value) { - print_id_unsigned(stringify_perf_type_id(value)); + print_id_unsigned(stringify_perf_type_id(pmu, value)); } static void __p_config_hw_id(char *buf, size_t size, u64 value) @@ -246,7 +251,7 @@ static void __p_config_id(char *buf, size_t size, u32 type, u64 value) #define p_sample_type(val) __p_sample_type(buf, BUF_SIZE, val) #define p_branch_sample_type(val) __p_branch_sample_type(buf, BUF_SIZE, val) #define p_read_format(val) __p_read_format(buf, BUF_SIZE, val) -#define p_type_id(val) __p_type_id(buf, BUF_SIZE, val) +#define p_type_id(val) __p_type_id(pmu, buf, BUF_SIZE, val) #define p_config_id(val) __p_config_id(buf, BUF_SIZE, attr->type, val) #define PRINT_ATTRn(_n, _f, _p, _a) \ @@ -262,6 +267,7 @@ do { \ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, attr__fprintf_f attr__fprintf, void *priv) { + struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); char buf[BUF_SIZE]; int ret = 0; -- cgit v1.2.3 From 30716f8db676addcb10117f16c2b69f92d6b9085 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 7 Mar 2024 16:19:15 -0800 Subject: perf tools: Add/use PMU reverse lookup from config to name [ Upstream commit 67ee8e71daabb8632931b7559e5c8a4b69a427f8 ] Add perf_pmu__name_from_config that does a reverse lookup from a config number to an alias name. The lookup is expensive as the config is computed for every alias by filling in a perf_event_attr, but this is only done when verbose output is enabled. The lookup also only considers config, and not config1, config2 or config3. An example of the output: $ perf stat -vv -e data_read true ... perf_event_attr: type 24 (uncore_imc_free_running_0) size 136 config 0x20ff (data_read) sample_type IDENTIFIER read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING disabled 1 inherit 1 exclude_guest 1 ... Committer notes: Fix the python binding build by adding dummies for not strictly needed perf_pmu__name_from_config() and perf_pmus__find_by_type(). Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Tested-by: Kan Liang Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Yang Jihong Link: https://lore.kernel.org/r/20240308001915.4060155-7-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Stable-dep-of: d9c5f5f94c2d ("perf pmu: Count sys and cpuid JSON events separately") Signed-off-by: Sasha Levin --- tools/perf/util/perf_event_attr_fprintf.c | 10 ++++++++-- tools/perf/util/pmu.c | 18 ++++++++++++++++++ tools/perf/util/pmu.h | 1 + tools/perf/util/python.c | 10 ++++++++++ 4 files changed, 37 insertions(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index 29e66835da3a..59fbbba79697 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -222,8 +222,14 @@ static void __p_config_tracepoint_id(char *buf, size_t size, u64 value) } #endif -static void __p_config_id(char *buf, size_t size, u32 type, u64 value) +static void __p_config_id(struct perf_pmu *pmu, char *buf, size_t size, u32 type, u64 value) { + const char *name = perf_pmu__name_from_config(pmu, value); + + if (name) { + print_id_hex(name); + return; + } switch (type) { case PERF_TYPE_HARDWARE: return __p_config_hw_id(buf, size, value); @@ -252,7 +258,7 @@ static void __p_config_id(char *buf, size_t size, u32 type, u64 value) #define p_branch_sample_type(val) __p_branch_sample_type(buf, BUF_SIZE, val) #define p_read_format(val) __p_read_format(buf, BUF_SIZE, val) #define p_type_id(val) __p_type_id(pmu, buf, BUF_SIZE, val) -#define p_config_id(val) __p_config_id(buf, BUF_SIZE, attr->type, val) +#define p_config_id(val) __p_config_id(pmu, buf, BUF_SIZE, attr->type, val) #define PRINT_ATTRn(_n, _f, _p, _a) \ do { \ diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index f39cbbc1a7ec..8695b47491f0 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -2085,3 +2085,21 @@ void perf_pmu__delete(struct perf_pmu *pmu) zfree(&pmu->id); free(pmu); } + +const char *perf_pmu__name_from_config(struct perf_pmu *pmu, u64 config) +{ + struct perf_pmu_alias *event; + + if (!pmu) + return NULL; + + pmu_add_cpu_aliases(pmu); + list_for_each_entry(event, &pmu->aliases, list) { + struct perf_event_attr attr = {.config = 0,}; + int ret = perf_pmu__config(pmu, &attr, &event->terms, NULL); + + if (ret == 0 && config == attr.config) + return event->name; + } + return NULL; +} diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index e35d985206db..2430083b151d 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -273,5 +273,6 @@ struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char struct perf_pmu *perf_pmu__create_placeholder_core_pmu(struct list_head *core_pmus); void perf_pmu__delete(struct perf_pmu *pmu); struct perf_pmu *perf_pmus__find_core_pmu(void); +const char *perf_pmu__name_from_config(struct perf_pmu *pmu, u64 config); #endif /* __PMU_H */ diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 075c0f79b1b9..0aeb97c11c03 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -103,6 +103,16 @@ int perf_pmu__scan_file(const struct perf_pmu *pmu, const char *name, const char return EOF; } +const char *perf_pmu__name_from_config(struct perf_pmu *pmu __maybe_unused, u64 config __maybe_unused) +{ + return NULL; +} + +struct perf_pmu *perf_pmus__find_by_type(unsigned int type __maybe_unused) +{ + return NULL; +} + int perf_pmus__num_core_pmus(void) { return 1; -- cgit v1.2.3 From 0675ff8fed442770a9874977429a76e80efb61ed Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 2 May 2024 14:35:07 -0700 Subject: perf pmu: Assume sysfs events are always the same case [ Upstream commit 7b6dd7a923281a7ccb980a0f768d6926721eb3cc ] Perf event names aren't case sensitive. For sysfs events the entire directory of events is read then iterated comparing names in a case insensitive way, most often to see if an event is present. Consider: $ perf stat -e inst_retired.any true The event inst_retired.any may be present in any PMU, so every PMU's sysfs events are loaded and then searched with strcasecmp to see if any match. This event is only present on the cpu PMU as a JSON event so a lot of events were loaded from sysfs unnecessarily just to prove an event didn't exist there. This change avoids loading all the events by assuming sysfs event names are always either lower or uppercase. It uses file exists and only loads the events when the desired event is present. For the example above, the number of openat calls measured by 'perf trace' on a tigerlake laptop goes from 325 down to 255. The reduction will be larger for machines with many PMUs, particularly replicated uncore PMUs. Ensure pmu_aliases_parse() is called before all uses of the aliases list, but remove some "pmu->sysfs_aliases_loaded" tests as they are now part of the function. Reviewed-by: Kan Liang Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Bjorn Helgaas Cc: Ingo Molnar Cc: James Clark Cc: Jing Zhang Cc: Jiri Olsa Cc: Jonathan Corbet Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Randy Dunlap Cc: Ravi Bangoria Cc: Thomas Richter Link: https://lore.kernel.org/r/20240502213507.2339733-7-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Stable-dep-of: d9c5f5f94c2d ("perf pmu: Count sys and cpuid JSON events separately") Signed-off-by: Sasha Levin --- tools/perf/util/pmu.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 8695b47491f0..feab54606a99 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -425,9 +425,30 @@ static struct perf_pmu_alias *perf_pmu__find_alias(struct perf_pmu *pmu, { struct perf_pmu_alias *alias; - if (load && !pmu->sysfs_aliases_loaded) - pmu_aliases_parse(pmu); + if (load && !pmu->sysfs_aliases_loaded) { + bool has_sysfs_event; + char event_file_name[FILENAME_MAX + 8]; + /* + * Test if alias/event 'name' exists in the PMU's sysfs/events + * directory. If not skip parsing the sysfs aliases. Sysfs event + * name must be all lower or all upper case. + */ + scnprintf(event_file_name, sizeof(event_file_name), "events/%s", name); + for (size_t i = 7, n = 7 + strlen(name); i < n; i++) + event_file_name[i] = tolower(event_file_name[i]); + + has_sysfs_event = perf_pmu__file_exists(pmu, event_file_name); + if (!has_sysfs_event) { + for (size_t i = 7, n = 7 + strlen(name); i < n; i++) + event_file_name[i] = toupper(event_file_name[i]); + + has_sysfs_event = perf_pmu__file_exists(pmu, event_file_name); + } + if (has_sysfs_event) + pmu_aliases_parse(pmu); + + } list_for_each_entry(alias, &pmu->aliases, list) { if (!strcasecmp(alias->name, name)) return alias; @@ -1632,9 +1653,7 @@ size_t perf_pmu__num_events(struct perf_pmu *pmu) { size_t nr; - if (!pmu->sysfs_aliases_loaded) - pmu_aliases_parse(pmu); - + pmu_aliases_parse(pmu); nr = pmu->sysfs_aliases; if (pmu->cpu_aliases_added) @@ -1693,6 +1712,7 @@ int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus, struct strbuf sb; strbuf_init(&sb, /*hint=*/ 0); + pmu_aliases_parse(pmu); pmu_add_cpu_aliases(pmu); list_for_each_entry(event, &pmu->aliases, list) { size_t buf_used; @@ -2093,6 +2113,7 @@ const char *perf_pmu__name_from_config(struct perf_pmu *pmu, u64 config) if (!pmu) return NULL; + pmu_aliases_parse(pmu); pmu_add_cpu_aliases(pmu); list_for_each_entry(event, &pmu->aliases, list) { struct perf_event_attr attr = {.config = 0,}; -- cgit v1.2.3 From 2a33c2b03c2903faa2185be972a715b314cfaee6 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 10 May 2024 17:36:01 -0700 Subject: perf pmu: Count sys and cpuid JSON events separately [ Upstream commit d9c5f5f94c2d356fdf3503f7fcaf254512bc032d ] Sys events are eagerly loaded as each event has a compat option that may mean the event is or isn't associated with the PMU. These shouldn't be counted as loaded_json_events as that is used for JSON events matching the CPUID that may or may not have been loaded. The mismatch causes issues on ARM64 that uses sys events. Fixes: e6ff1eed3584362d ("perf pmu: Lazily add JSON events") Closes: https://lore.kernel.org/lkml/20240510024729.1075732-1-justin.he@arm.com/ Reported-by: Jia He Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20240511003601.2666907-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/pmu.c | 70 +++++++++++++++++++++++++++++++++++---------------- tools/perf/util/pmu.h | 6 +++-- 2 files changed, 53 insertions(+), 23 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index feab54606a99..cc349d9cb0f9 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -36,6 +36,18 @@ struct perf_pmu perf_pmu__fake = { #define UNIT_MAX_LEN 31 /* max length for event unit name */ +enum event_source { + /* An event loaded from /sys/devices//events. */ + EVENT_SRC_SYSFS, + /* An event loaded from a CPUID matched json file. */ + EVENT_SRC_CPU_JSON, + /* + * An event loaded from a /sys/devices//identifier matched json + * file. + */ + EVENT_SRC_SYS_JSON, +}; + /** * struct perf_pmu_alias - An event either read from sysfs or builtin in * pmu-events.c, created by parsing the pmu-events json files. @@ -521,7 +533,7 @@ static int update_alias(const struct pmu_event *pe, static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name, const char *desc, const char *val, FILE *val_fd, - const struct pmu_event *pe) + const struct pmu_event *pe, enum event_source src) { struct perf_pmu_alias *alias; int ret; @@ -573,25 +585,30 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name, } snprintf(alias->unit, sizeof(alias->unit), "%s", unit); } - if (!pe) { - /* Update an event from sysfs with json data. */ - struct update_alias_data data = { - .pmu = pmu, - .alias = alias, - }; - + switch (src) { + default: + case EVENT_SRC_SYSFS: alias->from_sysfs = true; if (pmu->events_table) { + /* Update an event from sysfs with json data. */ + struct update_alias_data data = { + .pmu = pmu, + .alias = alias, + }; if (pmu_events_table__find_event(pmu->events_table, pmu, name, update_alias, &data) == 0) - pmu->loaded_json_aliases++; + pmu->cpu_json_aliases++; } - } - - if (!pe) pmu->sysfs_aliases++; - else - pmu->loaded_json_aliases++; + break; + case EVENT_SRC_CPU_JSON: + pmu->cpu_json_aliases++; + break; + case EVENT_SRC_SYS_JSON: + pmu->sys_json_aliases++; + break; + + } list_add_tail(&alias->list, &pmu->aliases); return 0; } @@ -667,7 +684,8 @@ static int pmu_aliases_parse(struct perf_pmu *pmu) } if (perf_pmu__new_alias(pmu, name, /*desc=*/ NULL, - /*val=*/ NULL, file, /*pe=*/ NULL) < 0) + /*val=*/ NULL, file, /*pe=*/ NULL, + EVENT_SRC_SYSFS) < 0) pr_debug("Cannot set up %s\n", name); fclose(file); } @@ -921,7 +939,8 @@ static int pmu_add_cpu_aliases_map_callback(const struct pmu_event *pe, { struct perf_pmu *pmu = vdata; - perf_pmu__new_alias(pmu, pe->name, pe->desc, pe->event, /*val_fd=*/ NULL, pe); + perf_pmu__new_alias(pmu, pe->name, pe->desc, pe->event, /*val_fd=*/ NULL, + pe, EVENT_SRC_CPU_JSON); return 0; } @@ -956,13 +975,14 @@ static int pmu_add_sys_aliases_iter_fn(const struct pmu_event *pe, return 0; if (pmu_uncore_alias_match(pe->pmu, pmu->name) && - pmu_uncore_identifier_match(pe->compat, pmu->id)) { + pmu_uncore_identifier_match(pe->compat, pmu->id)) { perf_pmu__new_alias(pmu, pe->name, pe->desc, pe->event, /*val_fd=*/ NULL, - pe); + pe, + EVENT_SRC_SYS_JSON); } return 0; @@ -1056,6 +1076,12 @@ struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char pmu->max_precise = pmu_max_precise(dirfd, pmu); pmu->alias_name = pmu_find_alias_name(pmu, dirfd); pmu->events_table = perf_pmu__find_events_table(pmu); + /* + * Load the sys json events/aliases when loading the PMU as each event + * may have a different compat regular expression. We therefore can't + * know the number of sys json events/aliases without computing the + * regular expressions for them all. + */ pmu_add_sys_aliases(pmu); list_add_tail(&pmu->list, pmus); @@ -1654,12 +1680,14 @@ size_t perf_pmu__num_events(struct perf_pmu *pmu) size_t nr; pmu_aliases_parse(pmu); - nr = pmu->sysfs_aliases; + nr = pmu->sysfs_aliases + pmu->sys_json_aliases;; if (pmu->cpu_aliases_added) - nr += pmu->loaded_json_aliases; + nr += pmu->cpu_json_aliases; else if (pmu->events_table) - nr += pmu_events_table__num_events(pmu->events_table, pmu) - pmu->loaded_json_aliases; + nr += pmu_events_table__num_events(pmu->events_table, pmu) - pmu->cpu_json_aliases; + else + assert(pmu->cpu_json_aliases == 0); return pmu->selectable ? nr + 1 : nr; } diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 2430083b151d..77c59ebc0526 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -123,8 +123,10 @@ struct perf_pmu { const struct pmu_events_table *events_table; /** @sysfs_aliases: Number of sysfs aliases loaded. */ uint32_t sysfs_aliases; - /** @sysfs_aliases: Number of json event aliases loaded. */ - uint32_t loaded_json_aliases; + /** @cpu_json_aliases: Number of json event aliases loaded specific to the CPUID. */ + uint32_t cpu_json_aliases; + /** @sys_json_aliases: Number of json event aliases loaded matching the PMU's identifier. */ + uint32_t sys_json_aliases; /** @sysfs_aliases_loaded: Are sysfs aliases loaded from disk? */ bool sysfs_aliases_loaded; /** -- cgit v1.2.3