summaryrefslogtreecommitdiff
path: root/tools/lib/perf
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-03-27 23:42:32 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2022-03-27 23:42:32 +0300
commit7b58b82b86c8b65a2b57a4c6cb96a460654f9e09 (patch)
treea13e19f216389f16f1cb6641d54751f167482515 /tools/lib/perf
parent02f9a04d76b76b80b05ddc33ceabe806b84fda3c (diff)
parentab0809af0bee88b689ba289ec8c40aa2be3a17ec (diff)
downloadlinux-7b58b82b86c8b65a2b57a4c6cb96a460654f9e09.tar.xz
Merge tag 'perf-tools-for-v5.18-2022-03-26' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull perf tools updates from Arnaldo Carvalho de Melo: "New features: perf ftrace: - Add -n/--use-nsec option to the 'latency' subcommand. Default: usecs: $ sudo perf ftrace latency -T dput -a sleep 1 # DURATION | COUNT | GRAPH | 0 - 1 us | 2098375 | ############################# | 1 - 2 us | 61 | | 2 - 4 us | 33 | | 4 - 8 us | 13 | | 8 - 16 us | 124 | | 16 - 32 us | 123 | | 32 - 64 us | 1 | | 64 - 128 us | 0 | | 128 - 256 us | 1 | | 256 - 512 us | 0 | | Better granularity with nsec: $ sudo perf ftrace latency -T dput -a -n sleep 1 # DURATION | COUNT | GRAPH | 0 - 1 us | 0 | | 1 - 2 ns | 0 | | 2 - 4 ns | 0 | | 4 - 8 ns | 0 | | 8 - 16 ns | 0 | | 16 - 32 ns | 0 | | 32 - 64 ns | 0 | | 64 - 128 ns | 1163434 | ############## | 128 - 256 ns | 914102 | ############# | 256 - 512 ns | 884 | | 512 - 1024 ns | 613 | | 1 - 2 us | 31 | | 2 - 4 us | 17 | | 4 - 8 us | 7 | | 8 - 16 us | 123 | | 16 - 32 us | 83 | | perf lock: - Add -c/--combine-locks option to merge lock instances in the same class into a single entry. # perf lock report -c Name acquired contended avg wait(ns) total wait(ns) max wait(ns) min wait(ns) rcu_read_lock 251225 0 0 0 0 0 hrtimer_bases.lock 39450 0 0 0 0 0 &sb->s_type->i_l... 10301 1 662 662 662 662 ptlock_ptr(page) 10173 2 701 1402 760 642 &(ei->i_block_re... 8732 0 0 0 0 0 &xa->xa_lock 8088 0 0 0 0 0 &base->lock 6705 0 0 0 0 0 &p->pi_lock 5549 0 0 0 0 0 &dentry->d_lockr... 5010 4 1274 5097 1844 789 &ep->lock 3958 0 0 0 0 0 - Add -F/--field option to customize the list of fields to output: $ perf lock report -F contended,wait_max -k avg_wait Name contended max wait(ns) avg wait(ns) slock-AF_INET6 1 23543 23543 &lruvec->lru_lock 5 18317 11254 slock-AF_INET6 1 10379 10379 rcu_node_1 1 2104 2104 &dentry->d_lockr... 1 1844 1844 &dentry->d_lockr... 1 1672 1672 &newf->file_lock 15 2279 1025 &dentry->d_lockr... 1 792 792 - Add --synth=no option for record, as there is no need to symbolize, lock names comes from the tracepoints. perf record: - Threaded recording, opt-in, via the new --threads command line option. - Improve AMD IBS (Instruction-Based Sampling) error handling messages. perf script: - Add 'brstackinsnlen' field (use it with -F) for branch stacks. - Output branch sample type in 'perf script'. perf report: - Add "addr_from" and "addr_to" sort dimensions. - Print branch stack entry type in 'perf report --dump-raw-trace' - Fix symbolization for chrooted workloads. Hardware tracing: Intel PT: - Add CFE (Control Flow Event) and EVD (Event Data) packets support. - Add MODE.Exec IFLAG bit support. Explanation about these features from the "Intel® 64 and IA-32 architectures software developer’s manual combined volumes: 1, 2A, 2B, 2C, 2D, 3A, 3B, 3C, 3D, and 4" PDF at: https://cdrdv2.intel.com/v1/dl/getContent/671200 At page 3951: "32.2.4 Event Trace is a capability that exposes details about the asynchronous events, when they are generated, and when their corresponding software event handler completes execution. These include: o Interrupts, including NMI and SMI, including the interrupt vector when defined. o Faults, exceptions including the fault vector. - Page faults additionally include the page fault address, when in context. o Event handler returns, including IRET and RSM. o VM exits and VM entries.¹ - VM exits include the values written to the “exit reason” and “exit qualification” VMCS fields. INIT and SIPI events. o TSX aborts, including the abort status returned for the RTM instructions. o Shutdown. Additionally, it provides indication of the status of the Interrupt Flag (IF), to indicate when interrupts are masked" ARM CoreSight: - Use advertised caps/min_interval as default sample_period on ARM spe. - Update deduction of TRCCONFIGR register for branch broadcast on ARM's CoreSight ETM. Vendor Events (JSON): Intel: - Update events and metrics for: Alderlake, Broadwell, Broadwell DE, BroadwellX, CascadelakeX, Elkhartlake, Bonnell, Goldmont, GoldmontPlus, Westmere EP-DP, Haswell, HaswellX, Icelake, IcelakeX, Ivybridge, Ivytown, Jaketown, Knights Landing, Nehalem EP, Sandybridge, Silvermont, Skylake, Skylake Server, SkylakeX, Tigerlake, TremontX, Westmere EP-SP, and Westmere EX. ARM: - Add support for HiSilicon CPA PMU aliasing. perf stat: - Fix forked applications enablement of counters. - The 'slots' should only be printed on a different order than the one specified on the command line when 'topdown' events are present, fix it. Miscellaneous: - Sync msr-index, cpufeatures header files with the kernel sources. - Stop using some deprecated libbpf APIs in 'perf trace'. - Fix some spelling mistakes. - Refactor the maps pointers usage to pave the way for using refcount debugging. - Only offer the --tui option on perf top, report and annotate when perf was built with libslang. - Don't mention --to-ctf in 'perf data --help' when not linking with the required library, libbabeltrace. - Use ARRAY_SIZE() instead of ad hoc equivalent, spotted by array_size.cocci. - Enhance the matching of sub-commands abbreviations: 'perf c2c rec' -> 'perf c2c record' 'perf c2c recport -> error - Set build-id using build-id header on new mmap records. - Fix generation of 'perf --version' string. perf test: - Add test for the arm_spe event. - Add test to check unwinding using fame-pointer (fp) mode on arm64. - Make metric testing more robust in 'perf test'. - Add error message for unsupported branch stack cases. libperf: - Add API for allocating new thread map array. - Fix typo in perf_evlist__open() failure error messages in libperf tests. perf c2c: - Replace bitmap_weight() with bitmap_empty() where appropriate" * tag 'perf-tools-for-v5.18-2022-03-26' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (143 commits) perf evsel: Improve AMD IBS (Instruction-Based Sampling) error handling messages perf python: Add perf_env stubs that will be needed in evsel__open_strerror() perf tools: Enhance the matching of sub-commands abbreviations libperf tests: Fix typo in perf_evlist__open() failure error messages tools arm64: Import cputype.h perf lock: Add -F/--field option to control output perf lock: Extend struct lock_key to have print function perf lock: Add --synth=no option for record tools headers cpufeatures: Sync with the kernel sources tools headers cpufeatures: Sync with the kernel sources perf stat: Fix forked applications enablement of counters tools arch x86: Sync the msr-index.h copy with the kernel sources perf evsel: Make evsel__env() always return a valid env perf build-id: Fix spelling mistake "Cant" -> "Can't" perf header: Fix spelling mistake "could't" -> "couldn't" perf script: Add 'brstackinsnlen' for branch stacks perf parse-events: Move slots only with topdown perf ftrace latency: Update documentation perf ftrace latency: Add -n/--use-nsec option perf tools: Fix version kernel tag ...
Diffstat (limited to 'tools/lib/perf')
-rw-r--r--tools/lib/perf/Documentation/libperf.txt7
-rw-r--r--tools/lib/perf/include/perf/threadmap.h7
-rw-r--r--tools/lib/perf/libperf.map1
-rw-r--r--tools/lib/perf/tests/test-evlist.c8
-rw-r--r--tools/lib/perf/tests/test-threadmap.c41
-rw-r--r--tools/lib/perf/threadmap.c36
6 files changed, 77 insertions, 23 deletions
diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt
index 32c5051c24eb..a8f1a237931b 100644
--- a/tools/lib/perf/Documentation/libperf.txt
+++ b/tools/lib/perf/Documentation/libperf.txt
@@ -62,11 +62,12 @@ SYNOPSIS
struct perf_thread_map;
struct perf_thread_map *perf_thread_map__new_dummy(void);
+ struct perf_thread_map *perf_thread_map__new_array(int nr_threads, pid_t *array);
- void perf_thread_map__set_pid(struct perf_thread_map *map, int thread, pid_t pid);
- char *perf_thread_map__comm(struct perf_thread_map *map, int thread);
+ void perf_thread_map__set_pid(struct perf_thread_map *map, int idx, pid_t pid);
+ char *perf_thread_map__comm(struct perf_thread_map *map, int idx);
int perf_thread_map__nr(struct perf_thread_map *threads);
- pid_t perf_thread_map__pid(struct perf_thread_map *map, int thread);
+ pid_t perf_thread_map__pid(struct perf_thread_map *map, int idx);
struct perf_thread_map *perf_thread_map__get(struct perf_thread_map *map);
void perf_thread_map__put(struct perf_thread_map *map);
diff --git a/tools/lib/perf/include/perf/threadmap.h b/tools/lib/perf/include/perf/threadmap.h
index a7c50de8d010..8b40e7777cea 100644
--- a/tools/lib/perf/include/perf/threadmap.h
+++ b/tools/lib/perf/include/perf/threadmap.h
@@ -8,11 +8,12 @@
struct perf_thread_map;
LIBPERF_API struct perf_thread_map *perf_thread_map__new_dummy(void);
+LIBPERF_API struct perf_thread_map *perf_thread_map__new_array(int nr_threads, pid_t *array);
-LIBPERF_API void perf_thread_map__set_pid(struct perf_thread_map *map, int thread, pid_t pid);
-LIBPERF_API char *perf_thread_map__comm(struct perf_thread_map *map, int thread);
+LIBPERF_API void perf_thread_map__set_pid(struct perf_thread_map *map, int idx, pid_t pid);
+LIBPERF_API char *perf_thread_map__comm(struct perf_thread_map *map, int idx);
LIBPERF_API int perf_thread_map__nr(struct perf_thread_map *threads);
-LIBPERF_API pid_t perf_thread_map__pid(struct perf_thread_map *map, int thread);
+LIBPERF_API pid_t perf_thread_map__pid(struct perf_thread_map *map, int idx);
LIBPERF_API struct perf_thread_map *perf_thread_map__get(struct perf_thread_map *map);
LIBPERF_API void perf_thread_map__put(struct perf_thread_map *map);
diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map
index 6fa0d651576b..190b56ae923a 100644
--- a/tools/lib/perf/libperf.map
+++ b/tools/lib/perf/libperf.map
@@ -12,6 +12,7 @@ LIBPERF_0.0.1 {
perf_cpu_map__empty;
perf_cpu_map__max;
perf_cpu_map__has;
+ perf_thread_map__new_array;
perf_thread_map__new_dummy;
perf_thread_map__set_pid;
perf_thread_map__comm;
diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c
index fa854c83b7e7..ed616fc19b4f 100644
--- a/tools/lib/perf/tests/test-evlist.c
+++ b/tools/lib/perf/tests/test-evlist.c
@@ -69,7 +69,7 @@ static int test_stat_cpu(void)
perf_evlist__set_maps(evlist, cpus, NULL);
err = perf_evlist__open(evlist);
- __T("failed to open evsel", err == 0);
+ __T("failed to open evlist", err == 0);
perf_evlist__for_each_evsel(evlist, evsel) {
cpus = perf_evsel__cpus(evsel);
@@ -130,7 +130,7 @@ static int test_stat_thread(void)
perf_evlist__set_maps(evlist, NULL, threads);
err = perf_evlist__open(evlist);
- __T("failed to open evsel", err == 0);
+ __T("failed to open evlist", err == 0);
perf_evlist__for_each_evsel(evlist, evsel) {
perf_evsel__read(evsel, 0, 0, &counts);
@@ -187,7 +187,7 @@ static int test_stat_thread_enable(void)
perf_evlist__set_maps(evlist, NULL, threads);
err = perf_evlist__open(evlist);
- __T("failed to open evsel", err == 0);
+ __T("failed to open evlist", err == 0);
perf_evlist__for_each_evsel(evlist, evsel) {
perf_evsel__read(evsel, 0, 0, &counts);
@@ -507,7 +507,7 @@ static int test_stat_multiplexing(void)
perf_evlist__set_maps(evlist, NULL, threads);
err = perf_evlist__open(evlist);
- __T("failed to open evsel", err == 0);
+ __T("failed to open evlist", err == 0);
perf_evlist__enable(evlist);
diff --git a/tools/lib/perf/tests/test-threadmap.c b/tools/lib/perf/tests/test-threadmap.c
index 5e2a0291e94c..f728ad7002bb 100644
--- a/tools/lib/perf/tests/test-threadmap.c
+++ b/tools/lib/perf/tests/test-threadmap.c
@@ -11,9 +11,43 @@ static int libperf_print(enum libperf_print_level level,
return vfprintf(stderr, fmt, ap);
}
+static int test_threadmap_array(int nr, pid_t *array)
+{
+ struct perf_thread_map *threads;
+ int i;
+
+ threads = perf_thread_map__new_array(nr, array);
+ __T("Failed to allocate new thread map", threads);
+
+ __T("Unexpected number of threads", perf_thread_map__nr(threads) == nr);
+
+ for (i = 0; i < nr; i++) {
+ __T("Unexpected initial value of thread",
+ perf_thread_map__pid(threads, i) == (array ? array[i] : -1));
+ }
+
+ for (i = 1; i < nr; i++)
+ perf_thread_map__set_pid(threads, i, i * 100);
+
+ __T("Unexpected value of thread 0",
+ perf_thread_map__pid(threads, 0) == (array ? array[0] : -1));
+
+ for (i = 1; i < nr; i++) {
+ __T("Unexpected thread value",
+ perf_thread_map__pid(threads, i) == i * 100);
+ }
+
+ perf_thread_map__put(threads);
+
+ return 0;
+}
+
+#define THREADS_NR 10
int test_threadmap(int argc, char **argv)
{
struct perf_thread_map *threads;
+ pid_t thr_array[THREADS_NR];
+ int i;
__T_START;
@@ -27,6 +61,13 @@ int test_threadmap(int argc, char **argv)
perf_thread_map__put(threads);
perf_thread_map__put(threads);
+ test_threadmap_array(THREADS_NR, NULL);
+
+ for (i = 0; i < THREADS_NR; i++)
+ thr_array[i] = i + 100;
+
+ test_threadmap_array(THREADS_NR, thr_array);
+
__T_END;
return tests_failed == 0 ? 0 : -1;
}
diff --git a/tools/lib/perf/threadmap.c b/tools/lib/perf/threadmap.c
index e92c368b0a6c..07968f3ea093 100644
--- a/tools/lib/perf/threadmap.c
+++ b/tools/lib/perf/threadmap.c
@@ -32,28 +32,38 @@ struct perf_thread_map *perf_thread_map__realloc(struct perf_thread_map *map, in
#define thread_map__alloc(__nr) perf_thread_map__realloc(NULL, __nr)
-void perf_thread_map__set_pid(struct perf_thread_map *map, int thread, pid_t pid)
+void perf_thread_map__set_pid(struct perf_thread_map *map, int idx, pid_t pid)
{
- map->map[thread].pid = pid;
+ map->map[idx].pid = pid;
}
-char *perf_thread_map__comm(struct perf_thread_map *map, int thread)
+char *perf_thread_map__comm(struct perf_thread_map *map, int idx)
{
- return map->map[thread].comm;
+ return map->map[idx].comm;
}
-struct perf_thread_map *perf_thread_map__new_dummy(void)
+struct perf_thread_map *perf_thread_map__new_array(int nr_threads, pid_t *array)
{
- struct perf_thread_map *threads = thread_map__alloc(1);
+ struct perf_thread_map *threads = thread_map__alloc(nr_threads);
+ int i;
+
+ if (!threads)
+ return NULL;
+
+ for (i = 0; i < nr_threads; i++)
+ perf_thread_map__set_pid(threads, i, array ? array[i] : -1);
+
+ threads->nr = nr_threads;
+ refcount_set(&threads->refcnt, 1);
- if (threads != NULL) {
- perf_thread_map__set_pid(threads, 0, -1);
- threads->nr = 1;
- refcount_set(&threads->refcnt, 1);
- }
return threads;
}
+struct perf_thread_map *perf_thread_map__new_dummy(void)
+{
+ return perf_thread_map__new_array(1, NULL);
+}
+
static void perf_thread_map__delete(struct perf_thread_map *threads)
{
if (threads) {
@@ -85,7 +95,7 @@ int perf_thread_map__nr(struct perf_thread_map *threads)
return threads ? threads->nr : 1;
}
-pid_t perf_thread_map__pid(struct perf_thread_map *map, int thread)
+pid_t perf_thread_map__pid(struct perf_thread_map *map, int idx)
{
- return map->map[thread].pid;
+ return map->map[idx].pid;
}