summaryrefslogtreecommitdiff
path: root/tools/perf/arch/x86/util
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-03-15 02:31:23 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2024-03-15 02:31:23 +0300
commit1bbeaf83dd7b5e3628b98bec66ff8fe2646e14aa (patch)
treea391eed8ae206613b48e02e56e6ad5c4432d8767 /tools/perf/arch/x86/util
parent63bd30f249dcf0a7ce16967935cecee8feec24bb (diff)
parent0f66dfe7b91d2743cc71dfff37af503215b204ef (diff)
downloadlinux-1bbeaf83dd7b5e3628b98bec66ff8fe2646e14aa.tar.xz
Merge tag 'perf-tools-for-v6.9-2024-03-13' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools
Pull perf tools updates from Namhyung Kim: "perf stat: - Support new 'cluster' aggregation mode for shared resources depending on the hardware configuration: $ sudo perf stat -a --per-cluster -e cycles,instructions sleep 1 Performance counter stats for 'system wide': S0-D0-CLS0 2 85,051,822 cycles S0-D0-CLS0 2 73,909,908 instructions # 0.87 insn per cycle S0-D0-CLS2 2 93,365,918 cycles S0-D0-CLS2 2 83,006,158 instructions # 0.89 insn per cycle S0-D0-CLS4 2 104,157,523 cycles S0-D0-CLS4 2 53,234,396 instructions # 0.51 insn per cycle S0-D0-CLS6 2 65,891,079 cycles S0-D0-CLS6 2 41,478,273 instructions # 0.63 insn per cycle 1.002407989 seconds time elapsed - Various fixes and cleanups for event metrics including NaN handling perf script: - Use libcapstone if available to disassemble the instructions. This enables 'perf script -F disasm' and 'perf script --insn-trace=disasm' (for Intel-PT): $ perf script -F event,ip,disasm cycles:P: ffffffffa988d428 wrmsr cycles:P: ffffffffa9839d25 movq %rax, %r14 cycles:P: ffffffffa9cdcaf0 endbr64 cycles:P: ffffffffa988d428 wrmsr cycles:P: ffffffffa988d428 wrmsr cycles:P: ffffffffaa401f86 iretq cycles:P: ffffffffa99c4de5 movq 0x30(%rcx), %r8 cycles:P: ffffffffa988d428 wrmsr cycles:P: ffffffffaa401f86 iretq cycles:P: ffffffffa9907983 movl 0x68(%rbx), %eax cycles:P: ffffffffa988d428 wrmsr - Expose sample ID / stream ID to python scripts perf test: - Add more perf test cases from Redhat internal test suites. This time it adds the base infra and a few perf probe tests. More to come. :) - Add 'perf test -p' for parallel execution and fix some issues found by the parallel test - Support symbol test to print symbols in given (active) module: $ perf test -F -v Symbols --dso /lib/modules/$(uname -r)/kernel/fs/ext4/ext4.ko --- start --- Testing /lib/modules/6.5.13-1rodete2-amd64/kernel/fs/ext4/ext4.ko Overlapping symbols: 7a990-7a9a0 l __pfx_ext4_exit_fs 7a990-7a9a0 g __pfx_cleanup_module Overlapping symbols: 7a9a0-7aa1c l ext4_exit_fs 7a9a0-7aa1c g cleanup_module ... JSON metric updates: - A new round of Intel metric updates - Support Power11 PVR (compatible to Power10) - Fix cache latency events on Zen 4 to set SliceId properly Internal: - Fix reference counting for 'map' data structure, tireless work from Ian! - More memory optimization for struct thread and annotate histogram. Now, 'perf report' (TUI) and 'perf annotate' should be much lighter-weight in terms of memory footprint - Support cross-arch perf register access. Clean up the build configuration so that it can detect arch-register support at runtime. This can allow to parse register data in sample which was recorded in a different arch Others: - Sync task state in 'perf sched' to kernel using trace event fields. The task states have been changed so tools cannot assume a fixed encoding - Clean up 'perf mem' to generalize the arch-specific events - Add support for local and global variables to data type profiling. This would increase the success rate of type resolution with DWARF - Add short option -H for --hierarchy in 'perf report' and 'perf top'" * tag 'perf-tools-for-v6.9-2024-03-13' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools: (154 commits) perf annotate: Add comments in the data structures perf annotate: Remove sym_hist.addr[] array perf annotate: Calculate instruction overhead using hashmap perf annotate: Add a hashmap for symbol histogram perf threads: Reduce table size from 256 to 8 perf threads: Switch from rbtree to hashmap perf threads: Move threads to its own files perf machine: Move machine's threads into its own abstraction perf machine: Move fprintf to for_each loop and a callback perf trace: Ignore thread hashing in summary perf report: Sort child tasks by tid perf vendor events amd: Fix Zen 4 cache latency events perf version: Display availability of OpenCSD support perf vendor events intel: Add umasks/occ_sel to PCU events. perf map: Fix map reference count issues libperf evlist: Avoid out-of-bounds access perf lock contention: Account contending locks too perf metrics: Fix segv for metrics with no events perf metrics: Fix metric matching perf pmu: Fix a potential memory leak in perf_pmu__lookup() ...
Diffstat (limited to 'tools/perf/arch/x86/util')
-rw-r--r--tools/perf/arch/x86/util/mem-events.c99
-rw-r--r--tools/perf/arch/x86/util/mem-events.h10
-rw-r--r--tools/perf/arch/x86/util/perf_regs.c7
-rw-r--r--tools/perf/arch/x86/util/pmu.c19
-rw-r--r--tools/perf/arch/x86/util/tsc.c4
5 files changed, 45 insertions, 94 deletions
diff --git a/tools/perf/arch/x86/util/mem-events.c b/tools/perf/arch/x86/util/mem-events.c
index 191b372f9a2d..62df03e91c7e 100644
--- a/tools/perf/arch/x86/util/mem-events.c
+++ b/tools/perf/arch/x86/util/mem-events.c
@@ -1,93 +1,28 @@
// SPDX-License-Identifier: GPL-2.0
-#include "util/pmu.h"
-#include "util/pmus.h"
-#include "util/env.h"
-#include "map_symbol.h"
-#include "mem-events.h"
#include "linux/string.h"
-#include "env.h"
+#include "util/map_symbol.h"
+#include "util/mem-events.h"
+#include "mem-events.h"
-static char mem_loads_name[100];
-static bool mem_loads_name__init;
-static char mem_stores_name[100];
#define MEM_LOADS_AUX 0x8203
-#define MEM_LOADS_AUX_NAME "{%s/mem-loads-aux/,%s/mem-loads,ldlat=%u/}:P"
-#define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
+#define E(t, n, s, l, a) { .tag = t, .name = n, .event_name = s, .ldlat = l, .aux_event = a }
-static struct perf_mem_event perf_mem_events_intel[PERF_MEM_EVENTS__MAX] = {
- E("ldlat-loads", "%s/mem-loads,ldlat=%u/P", "%s/events/mem-loads"),
- E("ldlat-stores", "%s/mem-stores/P", "%s/events/mem-stores"),
- E(NULL, NULL, NULL),
+struct perf_mem_event perf_mem_events_intel[PERF_MEM_EVENTS__MAX] = {
+ E("ldlat-loads", "%s/mem-loads,ldlat=%u/P", "mem-loads", true, 0),
+ E("ldlat-stores", "%s/mem-stores/P", "mem-stores", false, 0),
+ E(NULL, NULL, NULL, false, 0),
};
-static struct perf_mem_event perf_mem_events_amd[PERF_MEM_EVENTS__MAX] = {
- E(NULL, NULL, NULL),
- E(NULL, NULL, NULL),
- E("mem-ldst", "ibs_op//", "ibs_op"),
+struct perf_mem_event perf_mem_events_intel_aux[PERF_MEM_EVENTS__MAX] = {
+ E("ldlat-loads", "{%s/mem-loads-aux/,%s/mem-loads,ldlat=%u/}:P", "mem-loads", true, MEM_LOADS_AUX),
+ E("ldlat-stores", "%s/mem-stores/P", "mem-stores", false, 0),
+ E(NULL, NULL, NULL, false, 0),
};
-struct perf_mem_event *perf_mem_events__ptr(int i)
-{
- if (i >= PERF_MEM_EVENTS__MAX)
- return NULL;
-
- if (x86__is_amd_cpu())
- return &perf_mem_events_amd[i];
-
- return &perf_mem_events_intel[i];
-}
-
-bool is_mem_loads_aux_event(struct evsel *leader)
-{
- struct perf_pmu *pmu = perf_pmus__find("cpu");
-
- if (!pmu)
- pmu = perf_pmus__find("cpu_core");
-
- if (pmu && !perf_pmu__have_event(pmu, "mem-loads-aux"))
- return false;
-
- return leader->core.attr.config == MEM_LOADS_AUX;
-}
-
-const char *perf_mem_events__name(int i, const char *pmu_name)
-{
- struct perf_mem_event *e = perf_mem_events__ptr(i);
-
- if (!e)
- return NULL;
-
- if (i == PERF_MEM_EVENTS__LOAD) {
- if (mem_loads_name__init && !pmu_name)
- return mem_loads_name;
-
- if (!pmu_name) {
- mem_loads_name__init = true;
- pmu_name = "cpu";
- }
-
- if (perf_pmus__have_event(pmu_name, "mem-loads-aux")) {
- scnprintf(mem_loads_name, sizeof(mem_loads_name),
- MEM_LOADS_AUX_NAME, pmu_name, pmu_name,
- perf_mem_events__loads_ldlat);
- } else {
- scnprintf(mem_loads_name, sizeof(mem_loads_name),
- e->name, pmu_name,
- perf_mem_events__loads_ldlat);
- }
- return mem_loads_name;
- }
-
- if (i == PERF_MEM_EVENTS__STORE) {
- if (!pmu_name)
- pmu_name = "cpu";
-
- scnprintf(mem_stores_name, sizeof(mem_stores_name),
- e->name, pmu_name);
- return mem_stores_name;
- }
-
- return e->name;
-}
+struct perf_mem_event perf_mem_events_amd[PERF_MEM_EVENTS__MAX] = {
+ E(NULL, NULL, NULL, false, 0),
+ E(NULL, NULL, NULL, false, 0),
+ E("mem-ldst", "%s//", NULL, false, 0),
+};
diff --git a/tools/perf/arch/x86/util/mem-events.h b/tools/perf/arch/x86/util/mem-events.h
new file mode 100644
index 000000000000..f55c8d3b7d59
--- /dev/null
+++ b/tools/perf/arch/x86/util/mem-events.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _X86_MEM_EVENTS_H
+#define _X86_MEM_EVENTS_H
+
+extern struct perf_mem_event perf_mem_events_intel[PERF_MEM_EVENTS__MAX];
+extern struct perf_mem_event perf_mem_events_intel_aux[PERF_MEM_EVENTS__MAX];
+
+extern struct perf_mem_event perf_mem_events_amd[PERF_MEM_EVENTS__MAX];
+
+#endif /* _X86_MEM_EVENTS_H */
diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c
index b813502a2727..12fd93f04802 100644
--- a/tools/perf/arch/x86/util/perf_regs.c
+++ b/tools/perf/arch/x86/util/perf_regs.c
@@ -13,7 +13,7 @@
#include "../../../util/pmu.h"
#include "../../../util/pmus.h"
-const struct sample_reg sample_reg_masks[] = {
+static const struct sample_reg sample_reg_masks[] = {
SMPL_REG(AX, PERF_REG_X86_AX),
SMPL_REG(BX, PERF_REG_X86_BX),
SMPL_REG(CX, PERF_REG_X86_CX),
@@ -276,6 +276,11 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op)
return SDT_ARG_VALID;
}
+const struct sample_reg *arch__sample_reg_masks(void)
+{
+ return sample_reg_masks;
+}
+
uint64_t arch__intr_reg_mask(void)
{
struct perf_event_attr attr = {
diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
index 469555ae9b3c..c3d89d6ba1bf 100644
--- a/tools/perf/arch/x86/util/pmu.c
+++ b/tools/perf/arch/x86/util/pmu.c
@@ -15,6 +15,7 @@
#include "../../../util/pmu.h"
#include "../../../util/fncache.h"
#include "../../../util/pmus.h"
+#include "mem-events.h"
#include "env.h"
void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused)
@@ -30,14 +31,14 @@ void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused)
pmu->selectable = true;
}
#endif
-}
-
-int perf_pmus__num_mem_pmus(void)
-{
- /* AMD uses IBS OP pmu and not a core PMU for perf mem/c2c */
- if (x86__is_amd_cpu())
- return 1;
- /* Intel uses core pmus for perf mem/c2c */
- return perf_pmus__num_core_pmus();
+ if (x86__is_amd_cpu()) {
+ if (!strcmp(pmu->name, "ibs_op"))
+ pmu->mem_events = perf_mem_events_amd;
+ } else if (pmu->is_core) {
+ if (perf_pmu__have_event(pmu, "mem-loads-aux"))
+ pmu->mem_events = perf_mem_events_intel_aux;
+ else
+ pmu->mem_events = perf_mem_events_intel;
+ }
}
diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c
index 9b99f48b923c..e2d6cfe21057 100644
--- a/tools/perf/arch/x86/util/tsc.c
+++ b/tools/perf/arch/x86/util/tsc.c
@@ -33,7 +33,7 @@ static double cpuinfo_tsc_freq(void)
cpuinfo = fopen("/proc/cpuinfo", "r");
if (!cpuinfo) {
- pr_err("Failed to read /proc/cpuinfo for TSC frequency");
+ pr_err("Failed to read /proc/cpuinfo for TSC frequency\n");
return NAN;
}
while (getline(&line, &len, cpuinfo) > 0) {
@@ -48,7 +48,7 @@ static double cpuinfo_tsc_freq(void)
}
out:
if (fpclassify(result) == FP_ZERO)
- pr_err("Failed to find TSC frequency in /proc/cpuinfo");
+ pr_err("Failed to find TSC frequency in /proc/cpuinfo\n");
free(line);
fclose(cpuinfo);