summaryrefslogtreecommitdiff
path: root/tools/perf/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-06-30 21:35:41 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2023-06-30 21:35:41 +0300
commitb30d7a77c53ec04a6d94683d7680ec406b7f3ac8 (patch)
tree5c8d99d15eb1a9b28810a5358b098ac18daefa71 /tools/perf/arch/x86
parentd2a6fd45c5c4a5c5fdfe6c57f74f630e61d8d9a0 (diff)
parent4d60e83dfcee794213878155463d8f7353a80864 (diff)
downloadlinux-b30d7a77c53ec04a6d94683d7680ec406b7f3ac8.tar.xz
Merge tag 'perf-tools-for-v6.5-1-2023-06-28' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next
Pull perf tools updates from Namhyung Kim: "Internal cleanup: - Refactor PMU data management to handle hybrid systems in a generic way. Do more work in the lexer so that legacy event types parse more easily. A side-effect of this is that if a PMU is specified, scanning sysfs is avoided improving start-up time. - Fix hybrid metrics, for example, the TopdownL1 works for both performance and efficiency cores on Intel machines. To support this, sort and regroup events after parsing. - Add reference count checking for the 'thread' data structure. - Lots of fixes for memory leaks in various places thanks to the ASAN and Ian's refcount checker. - Reduce the binary size by replacing static variables with local or dynamically allocated memory. - Introduce shared_mutex for annotate data to reduce memory footprint. - Make filesystem access library functions more thread safe. Test: - Organize cpu_map tests into a single suite. - Add metric value validation test to check if the values are within correct value ranges. - Add perf stat stdio output test to check if event and metric names match. - Add perf data converter JSON output test. - Fix a lot of issues reported by shellcheck(1). This is a preparation to enable shellcheck by default. - Make the large x86 new instructions test optional at build time using EXTRA_TESTS=1. - Add a test for libpfm4 events. perf script: - Add 'dsoff' outpuf field to display offset from the DSO. $ perf script -F comm,pid,event,ip,dsoff ls 2695501 cycles: 152cc73ef4b5 (/usr/lib/x86_64-linux-gnu/ld-2.31.so+0x1c4b5) ls 2695501 cycles: ffffffff99045b3e ([kernel.kallsyms]) ls 2695501 cycles: ffffffff9968e107 ([kernel.kallsyms]) ls 2695501 cycles: ffffffffc1f54afb ([kernel.kallsyms]) ls 2695501 cycles: ffffffff9968382f ([kernel.kallsyms]) ls 2695501 cycles: ffffffff99e00094 ([kernel.kallsyms]) ls 2695501 cycles: 152cc718a8d0 (/usr/lib/x86_64-linux-gnu/libselinux.so.1+0x68d0) ls 2695501 cycles: ffffffff992a6db0 ([kernel.kallsyms]) - Adjust width for large PID/TID values. perf report: - Robustify reading addr2line output for srcline by checking sentinel output before the actual data and by using timeout of 1 second. - Allow config terms (like 'name=ABC') with breakpoint events. $ perf record -e mem:0x55feb98dd169:x/name=breakpoint/ -p 19646 -- sleep 1 perf annotate: - Handle x86 instruction suffix like 'l' in 'movl' generally. - Parse instruction operands properly even with a whitespace. This is needed for llvm-objdump output. - Support RISC-V binutils lookup using the triplet prefixes. - Add '<' and '>' key to navigate to prev/next symbols in TUI. - Fix instruction association and parsing for LoongArch. perf stat: - Add --per-cache aggregation option, optionally specify a cache level like `--per-cache=L2`. $ sudo perf stat --per-cache -a -e ls_dmnd_fills_from_sys.ext_cache_remote --\ taskset -c 0-15,64-79,128-143,192-207\ perf bench sched messaging -p -t -l 100000 -g 8 # Running 'sched/messaging' benchmark: # 20 sender and receiver threads per group # 8 groups == 320 threads run Total time: 7.648 [sec] Performance counter stats for 'system wide': S0-D0-L3-ID0 16 17,145,912 ls_dmnd_fills_from_sys.ext_cache_remote S0-D0-L3-ID8 16 14,977,628 ls_dmnd_fills_from_sys.ext_cache_remote S0-D0-L3-ID16 16 262,539 ls_dmnd_fills_from_sys.ext_cache_remote S0-D0-L3-ID24 16 3,140 ls_dmnd_fills_from_sys.ext_cache_remote S0-D0-L3-ID32 16 27,403 ls_dmnd_fills_from_sys.ext_cache_remote S0-D0-L3-ID40 16 17,026 ls_dmnd_fills_from_sys.ext_cache_remote S0-D0-L3-ID48 16 7,292 ls_dmnd_fills_from_sys.ext_cache_remote S0-D0-L3-ID56 16 2,464 ls_dmnd_fills_from_sys.ext_cache_remote S1-D1-L3-ID64 16 22,489,306 ls_dmnd_fills_from_sys.ext_cache_remote S1-D1-L3-ID72 16 21,455,257 ls_dmnd_fills_from_sys.ext_cache_remote S1-D1-L3-ID80 16 11,619 ls_dmnd_fills_from_sys.ext_cache_remote S1-D1-L3-ID88 16 30,978 ls_dmnd_fills_from_sys.ext_cache_remote S1-D1-L3-ID96 16 37,628 ls_dmnd_fills_from_sys.ext_cache_remote S1-D1-L3-ID104 16 13,594 ls_dmnd_fills_from_sys.ext_cache_remote S1-D1-L3-ID112 16 10,164 ls_dmnd_fills_from_sys.ext_cache_remote S1-D1-L3-ID120 16 11,259 ls_dmnd_fills_from_sys.ext_cache_remote 7.779171484 seconds time elapsed - Change default (no event/metric) formatting for default metrics so that events are hidden and the metric and group appear. Performance counter stats for 'ls /': 1.85 msec task-clock # 0.594 CPUs utilized 0 context-switches # 0.000 /sec 0 cpu-migrations # 0.000 /sec 97 page-faults # 52.517 K/sec 2,187,173 cycles # 1.184 GHz 2,474,459 instructions # 1.13 insn per cycle 531,584 branches # 287.805 M/sec 13,626 branch-misses # 2.56% of all branches TopdownL1 # 23.5 % tma_backend_bound # 11.5 % tma_bad_speculation # 39.1 % tma_frontend_bound # 25.9 % tma_retiring - Allow --cputype option to have any PMU name (not just hybrid). - Fix output value not to added when it runs multiple times with -r option. perf list: - Show metricgroup description from JSON file called metricgroups.json. - Allow 'pfm' argument to list only libpfm4 events and check each event is supported before showing it. JSON vendor events: - Avoid event grouping using "NO_GROUP_EVENTS" constraints. The topdown events are correctly grouped even if no group exists. - Add "Default" metric group to print it in the default output. And use "DefaultMetricgroupName" to indicate the real metric group name. - Add AmpereOne core PMU events. Misc: - Define man page date correctly. - Track exception level properly on ARM CoreSight ETM. - Allow anonymous struct, union or enum when retrieving type names from DWARF. - Fix incorrect filename when calling `perf inject --jit`. - Handle PLT size correctly on LoongArch" * tag 'perf-tools-for-v6.5-1-2023-06-28' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next: (269 commits) perf test: Skip metrics w/o event name in stat STD output linter perf test: Reorder event name checks in stat STD output linter perf pmu: Remove a hard coded cpu PMU assumption perf pmus: Add notion of default PMU for JSON events perf unwind: Fix map reference counts perf test: Set PERF_EXEC_PATH for script execution perf script: Initialize buffer for regs_map() perf tests: Fix test_arm_callgraph_fp variable expansion perf symbol: Add LoongArch case in get_plt_sizes() perf test: Remove x permission from lib/stat_output.sh perf test: Rerun failed metrics with longer workload perf test: Add skip list for metrics known would fail perf test: Add metric value validation test perf jit: Fix incorrect file name in DWARF line table perf annotate: Fix instruction association and parsing for LoongArch perf annotation: Switch lock from a mutex to a sharded_mutex perf sharded_mutex: Introduce sharded_mutex tools: Fix incorrect calculation of object size by sizeof perf subcmd: Fix missing check for return value of malloc() in add_cmdname() perf parse-events: Remove unneeded semicolon ...
Diffstat (limited to 'tools/perf/arch/x86')
-rw-r--r--tools/perf/arch/x86/annotate/instructions.c50
-rwxr-xr-xtools/perf/arch/x86/entry/syscalls/syscalltbl.sh2
-rw-r--r--tools/perf/arch/x86/include/arch-tests.h3
-rw-r--r--tools/perf/arch/x86/tests/Build6
-rw-r--r--tools/perf/arch/x86/tests/amd-ibs-via-core-pmu.c5
-rw-r--r--tools/perf/arch/x86/tests/arch-tests.c14
-rw-r--r--tools/perf/arch/x86/tests/dwarf-unwind.c2
-rw-r--r--tools/perf/arch/x86/tests/hybrid.c288
-rw-r--r--tools/perf/arch/x86/tests/insn-x86.c10
-rw-r--r--tools/perf/arch/x86/tests/intel-pt-test.c14
-rw-r--r--tools/perf/arch/x86/util/Build1
-rw-r--r--tools/perf/arch/x86/util/auxtrace.c5
-rw-r--r--tools/perf/arch/x86/util/env.c19
-rw-r--r--tools/perf/arch/x86/util/env.h7
-rw-r--r--tools/perf/arch/x86/util/evlist.c29
-rw-r--r--tools/perf/arch/x86/util/evsel.c43
-rw-r--r--tools/perf/arch/x86/util/intel-bts.c4
-rw-r--r--tools/perf/arch/x86/util/intel-pt.c4
-rw-r--r--tools/perf/arch/x86/util/mem-events.c36
-rw-r--r--tools/perf/arch/x86/util/perf_regs.c15
-rw-r--r--tools/perf/arch/x86/util/pmu.c12
-rw-r--r--tools/perf/arch/x86/util/topdown.c5
22 files changed, 430 insertions, 144 deletions
diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c
index 305872692bfd..5f4ac4fc7fcf 100644
--- a/tools/perf/arch/x86/annotate/instructions.c
+++ b/tools/perf/arch/x86/annotate/instructions.c
@@ -1,46 +1,37 @@
// SPDX-License-Identifier: GPL-2.0
+/*
+ * x86 instruction nmemonic table to parse disasm lines for annotate.
+ * This table is searched twice - one for exact match and another for
+ * match without a size suffix (b, w, l, q) in case of AT&T syntax.
+ *
+ * So this table should not have entries with the suffix unless it's
+ * a complete different instruction than ones without the suffix.
+ */
static struct ins x86__instructions[] = {
{ .name = "adc", .ops = &mov_ops, },
- { .name = "adcb", .ops = &mov_ops, },
- { .name = "adcl", .ops = &mov_ops, },
{ .name = "add", .ops = &mov_ops, },
- { .name = "addl", .ops = &mov_ops, },
- { .name = "addq", .ops = &mov_ops, },
{ .name = "addsd", .ops = &mov_ops, },
- { .name = "addw", .ops = &mov_ops, },
{ .name = "and", .ops = &mov_ops, },
- { .name = "andb", .ops = &mov_ops, },
- { .name = "andl", .ops = &mov_ops, },
{ .name = "andpd", .ops = &mov_ops, },
{ .name = "andps", .ops = &mov_ops, },
- { .name = "andq", .ops = &mov_ops, },
- { .name = "andw", .ops = &mov_ops, },
{ .name = "bsr", .ops = &mov_ops, },
{ .name = "bt", .ops = &mov_ops, },
{ .name = "btr", .ops = &mov_ops, },
{ .name = "bts", .ops = &mov_ops, },
- { .name = "btsq", .ops = &mov_ops, },
{ .name = "call", .ops = &call_ops, },
- { .name = "callq", .ops = &call_ops, },
{ .name = "cmovbe", .ops = &mov_ops, },
{ .name = "cmove", .ops = &mov_ops, },
{ .name = "cmovae", .ops = &mov_ops, },
{ .name = "cmp", .ops = &mov_ops, },
- { .name = "cmpb", .ops = &mov_ops, },
- { .name = "cmpl", .ops = &mov_ops, },
- { .name = "cmpq", .ops = &mov_ops, },
- { .name = "cmpw", .ops = &mov_ops, },
{ .name = "cmpxch", .ops = &mov_ops, },
{ .name = "cmpxchg", .ops = &mov_ops, },
{ .name = "cs", .ops = &mov_ops, },
{ .name = "dec", .ops = &dec_ops, },
- { .name = "decl", .ops = &dec_ops, },
{ .name = "divsd", .ops = &mov_ops, },
{ .name = "divss", .ops = &mov_ops, },
{ .name = "gs", .ops = &mov_ops, },
{ .name = "imul", .ops = &mov_ops, },
{ .name = "inc", .ops = &dec_ops, },
- { .name = "incl", .ops = &dec_ops, },
{ .name = "ja", .ops = &jump_ops, },
{ .name = "jae", .ops = &jump_ops, },
{ .name = "jb", .ops = &jump_ops, },
@@ -54,7 +45,6 @@ static struct ins x86__instructions[] = {
{ .name = "jl", .ops = &jump_ops, },
{ .name = "jle", .ops = &jump_ops, },
{ .name = "jmp", .ops = &jump_ops, },
- { .name = "jmpq", .ops = &jump_ops, },
{ .name = "jna", .ops = &jump_ops, },
{ .name = "jnae", .ops = &jump_ops, },
{ .name = "jnb", .ops = &jump_ops, },
@@ -81,48 +71,32 @@ static struct ins x86__instructions[] = {
{ .name = "mov", .ops = &mov_ops, },
{ .name = "movapd", .ops = &mov_ops, },
{ .name = "movaps", .ops = &mov_ops, },
- { .name = "movb", .ops = &mov_ops, },
{ .name = "movdqa", .ops = &mov_ops, },
{ .name = "movdqu", .ops = &mov_ops, },
- { .name = "movl", .ops = &mov_ops, },
- { .name = "movq", .ops = &mov_ops, },
{ .name = "movsd", .ops = &mov_ops, },
{ .name = "movslq", .ops = &mov_ops, },
{ .name = "movss", .ops = &mov_ops, },
{ .name = "movupd", .ops = &mov_ops, },
{ .name = "movups", .ops = &mov_ops, },
- { .name = "movw", .ops = &mov_ops, },
{ .name = "movzbl", .ops = &mov_ops, },
{ .name = "movzwl", .ops = &mov_ops, },
{ .name = "mulsd", .ops = &mov_ops, },
{ .name = "mulss", .ops = &mov_ops, },
{ .name = "nop", .ops = &nop_ops, },
- { .name = "nopl", .ops = &nop_ops, },
- { .name = "nopw", .ops = &nop_ops, },
{ .name = "or", .ops = &mov_ops, },
- { .name = "orb", .ops = &mov_ops, },
- { .name = "orl", .ops = &mov_ops, },
{ .name = "orps", .ops = &mov_ops, },
- { .name = "orq", .ops = &mov_ops, },
{ .name = "pand", .ops = &mov_ops, },
{ .name = "paddq", .ops = &mov_ops, },
{ .name = "pcmpeqb", .ops = &mov_ops, },
{ .name = "por", .ops = &mov_ops, },
- { .name = "rclb", .ops = &mov_ops, },
- { .name = "rcll", .ops = &mov_ops, },
+ { .name = "rcl", .ops = &mov_ops, },
{ .name = "ret", .ops = &ret_ops, },
- { .name = "retq", .ops = &ret_ops, },
{ .name = "sbb", .ops = &mov_ops, },
- { .name = "sbbl", .ops = &mov_ops, },
{ .name = "sete", .ops = &mov_ops, },
{ .name = "sub", .ops = &mov_ops, },
- { .name = "subl", .ops = &mov_ops, },
- { .name = "subq", .ops = &mov_ops, },
{ .name = "subsd", .ops = &mov_ops, },
- { .name = "subw", .ops = &mov_ops, },
{ .name = "test", .ops = &mov_ops, },
- { .name = "testb", .ops = &mov_ops, },
- { .name = "testl", .ops = &mov_ops, },
+ { .name = "tzcnt", .ops = &mov_ops, },
{ .name = "ucomisd", .ops = &mov_ops, },
{ .name = "ucomiss", .ops = &mov_ops, },
{ .name = "vaddsd", .ops = &mov_ops, },
@@ -135,11 +109,9 @@ static struct ins x86__instructions[] = {
{ .name = "vsubsd", .ops = &mov_ops, },
{ .name = "vucomisd", .ops = &mov_ops, },
{ .name = "xadd", .ops = &mov_ops, },
- { .name = "xbeginl", .ops = &jump_ops, },
- { .name = "xbeginq", .ops = &jump_ops, },
+ { .name = "xbegin", .ops = &jump_ops, },
{ .name = "xchg", .ops = &mov_ops, },
{ .name = "xor", .ops = &mov_ops, },
- { .name = "xorb", .ops = &mov_ops, },
{ .name = "xorpd", .ops = &mov_ops, },
{ .name = "xorps", .ops = &mov_ops, },
};
diff --git a/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh b/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh
index 029a72c20b19..fa526a993845 100755
--- a/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh
+++ b/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh
@@ -18,7 +18,7 @@ emit() {
syscall_macro "$nr" "$entry"
}
-echo "static const char *syscalltbl_${arch}[] = {"
+echo "static const char *const syscalltbl_${arch}[] = {"
sorted_table=$(mktemp /tmp/syscalltbl.XXXXXX)
grep '^[0-9]' "$in" | sort -n > $sorted_table
diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h
index 93d3b8877baa..c0421a26b875 100644
--- a/tools/perf/arch/x86/include/arch-tests.h
+++ b/tools/perf/arch/x86/include/arch-tests.h
@@ -6,12 +6,15 @@ struct test_suite;
/* Tests */
int test__rdpmc(struct test_suite *test, int subtest);
+#ifdef HAVE_EXTRA_TESTS
int test__insn_x86(struct test_suite *test, int subtest);
+#endif
int test__intel_pt_pkt_decoder(struct test_suite *test, int subtest);
int test__intel_pt_hybrid_compat(struct test_suite *test, int subtest);
int test__bp_modify(struct test_suite *test, int subtest);
int test__x86_sample_parsing(struct test_suite *test, int subtest);
int test__amd_ibs_via_core_pmu(struct test_suite *test, int subtest);
+int test__hybrid(struct test_suite *test, int subtest);
extern struct test_suite *arch_tests[];
diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build
index fd02d8181718..b87f46e5feea 100644
--- a/tools/perf/arch/x86/tests/Build
+++ b/tools/perf/arch/x86/tests/Build
@@ -3,6 +3,10 @@ perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
perf-y += arch-tests.o
perf-y += sample-parsing.o
-perf-$(CONFIG_AUXTRACE) += insn-x86.o intel-pt-test.o
+perf-y += hybrid.o
+perf-$(CONFIG_AUXTRACE) += intel-pt-test.o
+ifeq ($(CONFIG_EXTRA_TESTS),y)
+perf-$(CONFIG_AUXTRACE) += insn-x86.o
+endif
perf-$(CONFIG_X86_64) += bp-modify.o
perf-y += amd-ibs-via-core-pmu.o
diff --git a/tools/perf/arch/x86/tests/amd-ibs-via-core-pmu.c b/tools/perf/arch/x86/tests/amd-ibs-via-core-pmu.c
index 2902798ca5c1..78b1902f6f59 100644
--- a/tools/perf/arch/x86/tests/amd-ibs-via-core-pmu.c
+++ b/tools/perf/arch/x86/tests/amd-ibs-via-core-pmu.c
@@ -44,10 +44,7 @@ int test__amd_ibs_via_core_pmu(struct test_suite *test __maybe_unused,
int ret = TEST_OK;
int fd, i;
- if (list_empty(&pmus))
- perf_pmu__scan(NULL);
-
- ibs_pmu = perf_pmu__find("ibs_op");
+ ibs_pmu = perf_pmus__find("ibs_op");
if (!ibs_pmu)
return TEST_SKIP;
diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c
index b5c85ab8d92e..a216a5d172ed 100644
--- a/tools/perf/arch/x86/tests/arch-tests.c
+++ b/tools/perf/arch/x86/tests/arch-tests.c
@@ -4,7 +4,9 @@
#include "arch-tests.h"
#ifdef HAVE_AUXTRACE_SUPPORT
+#ifdef HAVE_EXTRA_TESTS
DEFINE_SUITE("x86 instruction decoder - new instructions", insn_x86);
+#endif
static struct test_case intel_pt_tests[] = {
TEST_CASE("Intel PT packet decoder", intel_pt_pkt_decoder),
@@ -23,13 +25,24 @@ DEFINE_SUITE("x86 bp modify", bp_modify);
#endif
DEFINE_SUITE("x86 Sample parsing", x86_sample_parsing);
DEFINE_SUITE("AMD IBS via core pmu", amd_ibs_via_core_pmu);
+static struct test_case hybrid_tests[] = {
+ TEST_CASE_REASON("x86 hybrid event parsing", hybrid, "not hybrid"),
+ { .name = NULL, }
+};
+
+struct test_suite suite__hybrid = {
+ .desc = "x86 hybrid",
+ .test_cases = hybrid_tests,
+};
struct test_suite *arch_tests[] = {
#ifdef HAVE_DWARF_UNWIND_SUPPORT
&suite__dwarf_unwind,
#endif
#ifdef HAVE_AUXTRACE_SUPPORT
+#ifdef HAVE_EXTRA_TESTS
&suite__insn_x86,
+#endif
&suite__intel_pt,
#endif
#if defined(__x86_64__)
@@ -37,5 +50,6 @@ struct test_suite *arch_tests[] = {
#endif
&suite__x86_sample_parsing,
&suite__amd_ibs_via_core_pmu,
+ &suite__hybrid,
NULL,
};
diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c
index 497593be80f2..5bfec3345d59 100644
--- a/tools/perf/arch/x86/tests/dwarf-unwind.c
+++ b/tools/perf/arch/x86/tests/dwarf-unwind.c
@@ -26,7 +26,7 @@ static int sample_ustack(struct perf_sample *sample,
sp = (unsigned long) regs[PERF_REG_X86_SP];
- map = maps__find(thread->maps, (u64)sp);
+ map = maps__find(thread__maps(thread), (u64)sp);
if (!map) {
pr_debug("failed to get stack map\n");
free(buf);
diff --git a/tools/perf/arch/x86/tests/hybrid.c b/tools/perf/arch/x86/tests/hybrid.c
new file mode 100644
index 000000000000..eb152770f148
--- /dev/null
+++ b/tools/perf/arch/x86/tests/hybrid.c
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "arch-tests.h"
+#include "debug.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "pmu.h"
+#include "pmus.h"
+#include "tests/tests.h"
+
+static bool test_config(const struct evsel *evsel, __u64 expected_config)
+{
+ return (evsel->core.attr.config & PERF_HW_EVENT_MASK) == expected_config;
+}
+
+static bool test_perf_config(const struct perf_evsel *evsel, __u64 expected_config)
+{
+ return (evsel->attr.config & PERF_HW_EVENT_MASK) == expected_config;
+}
+
+static bool test_hybrid_type(const struct evsel *evsel, __u64 expected_config)
+{
+ return (evsel->core.attr.config >> PERF_PMU_TYPE_SHIFT) == expected_config;
+}
+
+static int test__hybrid_hw_event_with_pmu(struct evlist *evlist)
+{
+ struct evsel *evsel = evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW));
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+ return TEST_OK;
+}
+
+static int test__hybrid_hw_group_event(struct evlist *evlist)
+{
+ struct evsel *evsel, *leader;
+
+ evsel = leader = evlist__first(evlist);
+ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW));
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+ TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+
+ evsel = evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW));
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS));
+ TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+ return TEST_OK;
+}
+
+static int test__hybrid_sw_hw_group_event(struct evlist *evlist)
+{
+ struct evsel *evsel, *leader;
+
+ evsel = leader = evlist__first(evlist);
+ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+
+ evsel = evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW));
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+ TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+ return TEST_OK;
+}
+
+static int test__hybrid_hw_sw_group_event(struct evlist *evlist)
+{
+ struct evsel *evsel, *leader;
+
+ evsel = leader = evlist__first(evlist);
+ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW));
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+ TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+
+ evsel = evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+ return TEST_OK;
+}
+
+static int test__hybrid_group_modifier1(struct evlist *evlist)
+{
+ struct evsel *evsel, *leader;
+
+ evsel = leader = evlist__first(evlist);
+ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW));
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+ TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+ TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
+
+ evsel = evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW));
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS));
+ TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
+ return TEST_OK;
+}
+
+static int test__hybrid_raw1(struct evlist *evlist)
+{
+ struct perf_evsel *evsel;
+
+ perf_evlist__for_each_evsel(&evlist->core, evsel) {
+ struct perf_pmu *pmu = perf_pmus__find_by_type(evsel->attr.type);
+
+ TEST_ASSERT_VAL("missing pmu", pmu);
+ TEST_ASSERT_VAL("unexpected pmu", !strncmp(pmu->name, "cpu_", 4));
+ TEST_ASSERT_VAL("wrong config", test_perf_config(evsel, 0x1a));
+ }
+ return TEST_OK;
+}
+
+static int test__hybrid_raw2(struct evlist *evlist)
+{
+ struct evsel *evsel = evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, 0x1a));
+ return TEST_OK;
+}
+
+static int test__hybrid_cache_event(struct evlist *evlist)
+{
+ struct evsel *evsel = evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HW_CACHE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", 0x2 == (evsel->core.attr.config & 0xffffffff));
+ return TEST_OK;
+}
+
+static int test__checkevent_pmu(struct evlist *evlist)
+{
+
+ struct evsel *evsel = evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", 10 == evsel->core.attr.config);
+ TEST_ASSERT_VAL("wrong config1", 1 == evsel->core.attr.config1);
+ TEST_ASSERT_VAL("wrong config2", 3 == evsel->core.attr.config2);
+ TEST_ASSERT_VAL("wrong config3", 0 == evsel->core.attr.config3);
+ /*
+ * The period value gets configured within evlist__config,
+ * while this test executes only parse events method.
+ */
+ TEST_ASSERT_VAL("wrong period", 0 == evsel->core.attr.sample_period);
+
+ return TEST_OK;
+}
+
+struct evlist_test {
+ const char *name;
+ bool (*valid)(void);
+ int (*check)(struct evlist *evlist);
+};
+
+static const struct evlist_test test__hybrid_events[] = {
+ {
+ .name = "cpu_core/cpu-cycles/",
+ .check = test__hybrid_hw_event_with_pmu,
+ /* 0 */
+ },
+ {
+ .name = "{cpu_core/cpu-cycles/,cpu_core/instructions/}",
+ .check = test__hybrid_hw_group_event,
+ /* 1 */
+ },
+ {
+ .name = "{cpu-clock,cpu_core/cpu-cycles/}",
+ .check = test__hybrid_sw_hw_group_event,
+ /* 2 */
+ },
+ {
+ .name = "{cpu_core/cpu-cycles/,cpu-clock}",
+ .check = test__hybrid_hw_sw_group_event,
+ /* 3 */
+ },
+ {
+ .name = "{cpu_core/cpu-cycles/k,cpu_core/instructions/u}",
+ .check = test__hybrid_group_modifier1,
+ /* 4 */
+ },
+ {
+ .name = "r1a",
+ .check = test__hybrid_raw1,
+ /* 5 */
+ },
+ {
+ .name = "cpu_core/r1a/",
+ .check = test__hybrid_raw2,
+ /* 6 */
+ },
+ {
+ .name = "cpu_core/config=10,config1,config2=3,period=1000/u",
+ .check = test__checkevent_pmu,
+ /* 7 */
+ },
+ {
+ .name = "cpu_core/LLC-loads/",
+ .check = test__hybrid_cache_event,
+ /* 8 */
+ },
+};
+
+static int test_event(const struct evlist_test *e)
+{
+ struct parse_events_error err;
+ struct evlist *evlist;
+ int ret;
+
+ if (e->valid && !e->valid()) {
+ pr_debug("... SKIP\n");
+ return TEST_OK;
+ }
+
+ evlist = evlist__new();
+ if (evlist == NULL) {
+ pr_err("Failed allocation");
+ return TEST_FAIL;
+ }
+ parse_events_error__init(&err);
+ ret = parse_events(evlist, e->name, &err);
+ if (ret) {
+ pr_debug("failed to parse event '%s', err %d, str '%s'\n",
+ e->name, ret, err.str);
+ parse_events_error__print(&err, e->name);
+ ret = TEST_FAIL;
+ if (strstr(err.str, "can't access trace events"))
+ ret = TEST_SKIP;
+ } else {
+ ret = e->check(evlist);
+ }
+ parse_events_error__exit(&err);
+ evlist__delete(evlist);
+
+ return ret;
+}
+
+static int combine_test_results(int existing, int latest)
+{
+ if (existing == TEST_FAIL)
+ return TEST_FAIL;
+ if (existing == TEST_SKIP)
+ return latest == TEST_OK ? TEST_SKIP : latest;
+ return latest;
+}
+
+static int test_events(const struct evlist_test *events, int cnt)
+{
+ int ret = TEST_OK;
+
+ for (int i = 0; i < cnt; i++) {
+ const struct evlist_test *e = &events[i];
+ int test_ret;
+
+ pr_debug("running test %d '%s'\n", i, e->name);
+ test_ret = test_event(e);
+ if (test_ret != TEST_OK) {
+ pr_debug("Event test failure: test %d '%s'", i, e->name);
+ ret = combine_test_results(ret, test_ret);
+ }
+ }
+
+ return ret;
+}
+
+int test__hybrid(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
+{
+ if (perf_pmus__num_core_pmus() == 1)
+ return TEST_SKIP;
+
+ return test_events(test__hybrid_events, ARRAY_SIZE(test__hybrid_events));
+}
diff --git a/tools/perf/arch/x86/tests/insn-x86.c b/tools/perf/arch/x86/tests/insn-x86.c
index 735257d205b5..7b5eb8baf0f2 100644
--- a/tools/perf/arch/x86/tests/insn-x86.c
+++ b/tools/perf/arch/x86/tests/insn-x86.c
@@ -18,14 +18,14 @@ struct test_data {
const char *asm_rep;
};
-struct test_data test_data_32[] = {
+const struct test_data test_data_32[] = {
#include "insn-x86-dat-32.c"
{{0x0f, 0x01, 0xee}, 3, 0, NULL, NULL, "0f 01 ee \trdpkru"},
{{0x0f, 0x01, 0xef}, 3, 0, NULL, NULL, "0f 01 ef \twrpkru"},
{{0}, 0, 0, NULL, NULL, NULL},
};
-struct test_data test_data_64[] = {
+const struct test_data test_data_64[] = {
#include "insn-x86-dat-64.c"
{{0x0f, 0x01, 0xee}, 3, 0, NULL, NULL, "0f 01 ee \trdpkru"},
{{0x0f, 0x01, 0xef}, 3, 0, NULL, NULL, "0f 01 ef \twrpkru"},
@@ -97,7 +97,7 @@ static int get_branch(const char *branch_str)
return -1;
}
-static int test_data_item(struct test_data *dat, int x86_64)
+static int test_data_item(const struct test_data *dat, int x86_64)
{
struct intel_pt_insn intel_pt_insn;
int op, branch, ret;
@@ -147,9 +147,9 @@ static int test_data_item(struct test_data *dat, int x86_64)
return 0;
}
-static int test_data_set(struct test_data *dat_set, int x86_64)
+static int test_data_set(const struct test_data *dat_set, int x86_64)
{
- struct test_data *dat;
+ const struct test_data *dat;
int ret = 0;
for (dat = dat_set; dat->expected_length; dat++) {
diff --git a/tools/perf/arch/x86/tests/intel-pt-test.c b/tools/perf/arch/x86/tests/intel-pt-test.c
index 70b7f79396b1..09d61fa736e3 100644
--- a/tools/perf/arch/x86/tests/intel-pt-test.c
+++ b/tools/perf/arch/x86/tests/intel-pt-test.c
@@ -22,7 +22,7 @@
* @new_ctx: expected new packet context
* @ctx_unchanged: the packet context must not change
*/
-static struct test_data {
+static const struct test_data {
int len;
u8 bytes[INTEL_PT_PKT_MAX_SZ];
enum intel_pt_pkt_ctx ctx;
@@ -186,7 +186,7 @@ static struct test_data {
{0, {0}, 0, {0, 0, 0}, 0, 0 },
};
-static int dump_packet(struct intel_pt_pkt *packet, u8 *bytes, int len)
+static int dump_packet(const struct intel_pt_pkt *packet, const u8 *bytes, int len)
{
char desc[INTEL_PT_PKT_DESC_MAX];
int ret, i;
@@ -206,14 +206,14 @@ static int dump_packet(struct intel_pt_pkt *packet, u8 *bytes, int len)
return TEST_OK;
}
-static void decoding_failed(struct test_data *d)
+static void decoding_failed(const struct test_data *d)
{
pr_debug("Decoding failed!\n");
pr_debug("Decoding: ");
dump_packet(&d->packet, d->bytes, d->len);
}
-static int fail(struct test_data *d, struct intel_pt_pkt *packet, int len,
+static int fail(const struct test_data *d, struct intel_pt_pkt *packet, int len,
enum intel_pt_pkt_ctx new_ctx)
{
decoding_failed(d);
@@ -242,7 +242,7 @@ static int fail(struct test_data *d, struct intel_pt_pkt *packet, int len,
return TEST_FAIL;
}
-static int test_ctx_unchanged(struct test_data *d, struct intel_pt_pkt *packet,
+static int test_ctx_unchanged(const struct test_data *d, struct intel_pt_pkt *packet,
enum intel_pt_pkt_ctx ctx)
{
enum intel_pt_pkt_ctx old_ctx = ctx;
@@ -258,7 +258,7 @@ static int test_ctx_unchanged(struct test_data *d, struct intel_pt_pkt *packet,
return TEST_OK;
}
-static int test_one(struct test_data *d)
+static int test_one(const struct test_data *d)
{
struct intel_pt_pkt packet;
enum intel_pt_pkt_ctx ctx = d->ctx;
@@ -307,7 +307,7 @@ static int test_one(struct test_data *d)
*/
int test__intel_pt_pkt_decoder(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
{
- struct test_data *d = data;
+ const struct test_data *d = data;
int ret;
for (d = data; d->len; d++) {
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index 195ccfdef7aa..005907cb97d8 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -10,6 +10,7 @@ perf-y += evlist.o
perf-y += mem-events.o
perf-y += evsel.o
perf-y += iostat.o
+perf-y += env.o
perf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c
index 330d03216b0e..354780ff1605 100644
--- a/tools/perf/arch/x86/util/auxtrace.c
+++ b/tools/perf/arch/x86/util/auxtrace.c
@@ -10,6 +10,7 @@
#include "../../../util/header.h"
#include "../../../util/debug.h"
#include "../../../util/pmu.h"
+#include "../../../util/pmus.h"
#include "../../../util/auxtrace.h"
#include "../../../util/intel-pt.h"
#include "../../../util/intel-bts.h"
@@ -25,8 +26,8 @@ struct auxtrace_record *auxtrace_record__init_intel(struct evlist *evlist,
bool found_pt = false;
bool found_bts = false;
- intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
- intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
+ intel_pt_pmu = perf_pmus__find(INTEL_PT_PMU_NAME);
+ intel_bts_pmu = perf_pmus__find(INTEL_BTS_PMU_NAME);
evlist__for_each_entry(evlist, evsel) {
if (intel_pt_pmu && evsel->core.attr.type == intel_pt_pmu->type)
diff --git a/tools/perf/arch/x86/util/env.c b/tools/perf/arch/x86/util/env.c
new file mode 100644
index 000000000000..3e537ffb1353
--- /dev/null
+++ b/tools/perf/arch/x86/util/env.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "linux/string.h"
+#include "util/env.h"
+#include "env.h"
+
+bool x86__is_amd_cpu(void)
+{
+ struct perf_env env = { .total_mem = 0, };
+ static int is_amd; /* 0: Uninitialized, 1: Yes, -1: No */
+
+ if (is_amd)
+ goto ret;
+
+ perf_env__cpuid(&env);
+ is_amd = env.cpuid && strstarts(env.cpuid, "AuthenticAMD") ? 1 : -1;
+ perf_env__exit(&env);
+ret:
+ return is_amd >= 1 ? true : false;
+}
diff --git a/tools/perf/arch/x86/util/env.h b/tools/perf/arch/x86/util/env.h
new file mode 100644
index 000000000000..d78f080b6b3f
--- /dev/null
+++ b/tools/perf/arch/x86/util/env.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _X86_ENV_H
+#define _X86_ENV_H
+
+bool x86__is_amd_cpu(void);
+
+#endif /* _X86_ENV_H */
diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
index d4193479a364..cbd582182932 100644
--- a/tools/perf/arch/x86/util/evlist.c
+++ b/tools/perf/arch/x86/util/evlist.c
@@ -1,38 +1,42 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
#include "util/pmu.h"
+#include "util/pmus.h"
#include "util/evlist.h"
#include "util/parse-events.h"
#include "util/event.h"
-#include "util/pmu-hybrid.h"
#include "topdown.h"
+#include "evsel.h"
static int ___evlist__add_default_attrs(struct evlist *evlist,
struct perf_event_attr *attrs,
size_t nr_attrs)
{
- struct perf_cpu_map *cpus;
- struct evsel *evsel, *n;
- struct perf_pmu *pmu;
LIST_HEAD(head);
size_t i = 0;
for (i = 0; i < nr_attrs; i++)
event_attr_init(attrs + i);
- if (!perf_pmu__has_hybrid())
+ if (perf_pmus__num_core_pmus() == 1)
return evlist__add_attrs(evlist, attrs, nr_attrs);
for (i = 0; i < nr_attrs; i++) {
+ struct perf_pmu *pmu = NULL;
+
if (attrs[i].type == PERF_TYPE_SOFTWARE) {
- evsel = evsel__new(attrs + i);
+ struct evsel *evsel = evsel__new(attrs + i);
+
if (evsel == NULL)
goto out_delete_partial_list;
list_add_tail(&evsel->core.node, &head);
continue;
}
- perf_pmu__for_each_hybrid_pmu(pmu) {
+ while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
+ struct perf_cpu_map *cpus;
+ struct evsel *evsel;
+
evsel = evsel__new(attrs + i);
if (evsel == NULL)
goto out_delete_partial_list;
@@ -50,8 +54,12 @@ static int ___evlist__add_default_attrs(struct evlist *evlist,
return 0;
out_delete_partial_list:
- __evlist__for_each_entry_safe(&head, n, evsel)
- evsel__delete(evsel);
+ {
+ struct evsel *evsel, *n;
+
+ __evlist__for_each_entry_safe(&head, n, evsel)
+ evsel__delete(evsel);
+ }
return -1;
}
@@ -67,8 +75,7 @@ int arch_evlist__add_default_attrs(struct evlist *evlist,
int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
{
- if (topdown_sys_has_perf_metrics() &&
- (!lhs->pmu_name || !strncmp(lhs->pmu_name, "cpu", 3))) {
+ if (topdown_sys_has_perf_metrics() && evsel__sys_has_perf_metrics(lhs)) {
/* Ensure the topdown slots comes first. */
if (strcasestr(lhs->name, "slots"))
return -1;
diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c
index ea3972d785d1..512c2d885d24 100644
--- a/tools/perf/arch/x86/util/evsel.c
+++ b/tools/perf/arch/x86/util/evsel.c
@@ -4,9 +4,11 @@
#include "util/evsel.h"
#include "util/env.h"
#include "util/pmu.h"
+#include "util/pmus.h"
#include "linux/string.h"
#include "evsel.h"
#include "util/debug.h"
+#include "env.h"
#define IBS_FETCH_L3MISSONLY (1ULL << 59)
#define IBS_OP_L3MISSONLY (1ULL << 16)
@@ -16,26 +18,6 @@ void arch_evsel__set_sample_weight(struct evsel *evsel)
evsel__set_sample_bit(evsel, WEIGHT_STRUCT);
}
-void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr)
-{
- struct perf_env env = { .total_mem = 0, } ;
-
- if (!perf_env__cpuid(&env))
- return;
-
- /*
- * On AMD, precise cycles event sampling internally uses IBS pmu.
- * But IBS does not have filtering capabilities and perf by default
- * sets exclude_guest = 1. This makes IBS pmu event init fail and
- * thus perf ends up doing non-precise sampling. Avoid it by clearing
- * exclude_guest.
- */
- if (env.cpuid && strstarts(env.cpuid, "AuthenticAMD"))
- attr->exclude_guest = 0;
-
- free(env.cpuid);
-}
-
/* Check whether the evsel's PMU supports the perf metrics */
bool evsel__sys_has_perf_metrics(const struct evsel *evsel)
{
@@ -50,7 +32,7 @@ bool evsel__sys_has_perf_metrics(const struct evsel *evsel)
* should be good enough to detect the perf metrics feature.
*/
if ((evsel->core.attr.type == PERF_TYPE_RAW) &&
- pmu_have_event(pmu_name, "slots"))
+ perf_pmus__have_event(pmu_name, "slots"))
return true;
return false;
@@ -97,29 +79,16 @@ void arch__post_evsel_config(struct evsel *evsel, struct perf_event_attr *attr)
{
struct perf_pmu *evsel_pmu, *ibs_fetch_pmu, *ibs_op_pmu;
static int warned_once;
- /* 0: Uninitialized, 1: Yes, -1: No */
- static int is_amd;
- if (warned_once || is_amd == -1)
+ if (warned_once || !x86__is_amd_cpu())
return;
- if (!is_amd) {
- struct perf_env *env = evsel__env(evsel);
-
- if (!perf_env__cpuid(env) || !env->cpuid ||
- !strstarts(env->cpuid, "AuthenticAMD")) {
- is_amd = -1;
- return;
- }
- is_amd = 1;
- }
-
evsel_pmu = evsel__find_pmu(evsel);
if (!evsel_pmu)
return;
- ibs_fetch_pmu = perf_pmu__find("ibs_fetch");
- ibs_op_pmu = perf_pmu__find("ibs_op");
+ ibs_fetch_pmu = perf_pmus__find("ibs_fetch");
+ ibs_op_pmu = perf_pmus__find("ibs_op");
if (ibs_fetch_pmu && ibs_fetch_pmu->type == evsel_pmu->type) {
if (attr->config & IBS_FETCH_L3MISSONLY) {
diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
index 439c2956f3e7..d2c8cac11470 100644
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -17,7 +17,7 @@
#include "../../../util/evlist.h"
#include "../../../util/mmap.h"
#include "../../../util/session.h"
-#include "../../../util/pmu.h"
+#include "../../../util/pmus.h"
#include "../../../util/debug.h"
#include "../../../util/record.h"
#include "../../../util/tsc.h"
@@ -416,7 +416,7 @@ out_err:
struct auxtrace_record *intel_bts_recording_init(int *err)
{
- struct perf_pmu *intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
+ struct perf_pmu *intel_bts_pmu = perf_pmus__find(INTEL_BTS_PMU_NAME);
struct intel_bts_recording *btsr;
if (!intel_bts_pmu)
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index 17336da08b58..74b70fd379df 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -23,7 +23,7 @@
#include "../../../util/mmap.h"
#include <subcmd/parse-options.h>
#include "../../../util/parse-events.h"
-#include "../../../util/pmu.h"
+#include "../../../util/pmus.h"
#include "../../../util/debug.h"
#include "../../../util/auxtrace.h"
#include "../../../util/perf_api_probe.h"
@@ -1185,7 +1185,7 @@ static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused)
struct auxtrace_record *intel_pt_recording_init(int *err)
{
- struct perf_pmu *intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
+ struct perf_pmu *intel_pt_pmu = perf_pmus__find(INTEL_PT_PMU_NAME);
struct intel_pt_recording *ptr;
if (!intel_pt_pmu)
diff --git a/tools/perf/arch/x86/util/mem-events.c b/tools/perf/arch/x86/util/mem-events.c
index f683ac702247..a8a782bcb121 100644
--- a/tools/perf/arch/x86/util/mem-events.c
+++ b/tools/perf/arch/x86/util/mem-events.c
@@ -1,9 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
#include "util/pmu.h"
+#include "util/pmus.h"
#include "util/env.h"
#include "map_symbol.h"
#include "mem-events.h"
#include "linux/string.h"
+#include "env.h"
static char mem_loads_name[100];
static bool mem_loads_name__init;
@@ -26,28 +28,12 @@ static struct perf_mem_event perf_mem_events_amd[PERF_MEM_EVENTS__MAX] = {
E("mem-ldst", "ibs_op//", "ibs_op"),
};
-static int perf_mem_is_amd_cpu(void)
-{
- struct perf_env env = { .total_mem = 0, };
-
- perf_env__cpuid(&env);
- if (env.cpuid && strstarts(env.cpuid, "AuthenticAMD"))
- return 1;
- return -1;
-}
-
struct perf_mem_event *perf_mem_events__ptr(int i)
{
- /* 0: Uninitialized, 1: Yes, -1: No */
- static int is_amd;
-
if (i >= PERF_MEM_EVENTS__MAX)
return NULL;
- if (!is_amd)
- is_amd = perf_mem_is_amd_cpu();
-
- if (is_amd == 1)
+ if (x86__is_amd_cpu())
return &perf_mem_events_amd[i];
return &perf_mem_events_intel[i];
@@ -55,13 +41,13 @@ struct perf_mem_event *perf_mem_events__ptr(int i)
bool is_mem_loads_aux_event(struct evsel *leader)
{
- if (perf_pmu__find("cpu")) {
- if (!pmu_have_event("cpu", "mem-loads-aux"))
- return false;
- } else if (perf_pmu__find("cpu_core")) {
- if (!pmu_have_event("cpu_core", "mem-loads-aux"))
- return false;
- }
+ struct perf_pmu *pmu = perf_pmus__find("cpu");
+
+ if (!pmu)
+ pmu = perf_pmus__find("cpu_core");
+
+ if (pmu && !perf_pmu__have_event(pmu, "mem-loads-aux"))
+ return false;
return leader->core.attr.config == MEM_LOADS_AUX;
}
@@ -82,7 +68,7 @@ char *perf_mem_events__name(int i, char *pmu_name)
pmu_name = (char *)"cpu";
}
- if (pmu_have_event(pmu_name, "mem-loads-aux")) {
+ if (perf_pmus__have_event(pmu_name, "mem-loads-aux")) {
scnprintf(mem_loads_name, sizeof(mem_loads_name),
MEM_LOADS_AUX_NAME, pmu_name, pmu_name,
perf_mem_events__loads_ldlat);
diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c
index 0ed177991ad0..8ad4112ad10c 100644
--- a/tools/perf/arch/x86/util/perf_regs.c
+++ b/tools/perf/arch/x86/util/perf_regs.c
@@ -10,7 +10,7 @@
#include "../../../util/debug.h"
#include "../../../util/event.h"
#include "../../../util/pmu.h"
-#include "../../../util/pmu-hybrid.h"
+#include "../../../util/pmus.h"
const struct sample_reg sample_reg_masks[] = {
SMPL_REG(AX, PERF_REG_X86_AX),
@@ -286,20 +286,25 @@ uint64_t arch__intr_reg_mask(void)
.disabled = 1,
.exclude_kernel = 1,
};
- struct perf_pmu *pmu;
int fd;
/*
* In an unnamed union, init it here to build on older gcc versions
*/
attr.sample_period = 1;
- if (perf_pmu__has_hybrid()) {
+ if (perf_pmus__num_core_pmus() > 1) {
+ struct perf_pmu *pmu = NULL;
+ __u64 type = PERF_TYPE_RAW;
+
/*
* The same register set is supported among different hybrid PMUs.
* Only check the first available one.
*/
- pmu = list_first_entry(&perf_pmu__hybrid_pmus, typeof(*pmu), hybrid_list);
- attr.config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT;
+ while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
+ type = pmu->type;
+ break;
+ }
+ attr.config |= type << PERF_PMU_TYPE_SHIFT;
}
event_attr_init(&attr);
diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
index 3c0de3370d7e..65d8cdff4d5f 100644
--- a/tools/perf/arch/x86/util/pmu.c
+++ b/tools/perf/arch/x86/util/pmu.c
@@ -14,6 +14,8 @@
#include "../../../util/intel-bts.h"
#include "../../../util/pmu.h"
#include "../../../util/fncache.h"
+#include "../../../util/pmus.h"
+#include "env.h"
struct pmu_alias {
char *name;
@@ -168,3 +170,13 @@ char *pmu_find_alias_name(const char *name)
return __pmu_find_alias_name(name);
}
+
+int perf_pmus__num_mem_pmus(void)
+{
+ /* AMD uses IBS OP pmu and not a core PMU for perf mem/c2c */
+ if (x86__is_amd_cpu())
+ return 1;
+
+ /* Intel uses core pmus for perf mem/c2c */
+ return perf_pmus__num_core_pmus();
+}
diff --git a/tools/perf/arch/x86/util/topdown.c b/tools/perf/arch/x86/util/topdown.c
index 9ad5e5c7bd27..3f9a267d4501 100644
--- a/tools/perf/arch/x86/util/topdown.c
+++ b/tools/perf/arch/x86/util/topdown.c
@@ -2,6 +2,7 @@
#include "api/fs/fs.h"
#include "util/evsel.h"
#include "util/pmu.h"
+#include "util/pmus.h"
#include "util/topdown.h"
#include "topdown.h"
#include "evsel.h"
@@ -22,8 +23,8 @@ bool topdown_sys_has_perf_metrics(void)
* The slots event is only available when the core PMU
* supports the perf metrics feature.
*/
- pmu = perf_pmu__find_by_type(PERF_TYPE_RAW);
- if (pmu && pmu_have_event(pmu->name, "slots"))
+ pmu = perf_pmus__find_by_type(PERF_TYPE_RAW);
+ if (pmu && perf_pmu__have_event(pmu, "slots"))
has_perf_metrics = true;
cached = true;