summaryrefslogtreecommitdiff
path: root/tools/perf
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/Build1
-rw-r--r--tools/perf/Documentation/Build.txt28
-rw-r--r--tools/perf/Documentation/perf-annotate.txt3
-rw-r--r--tools/perf/Documentation/perf-check.txt82
-rw-r--r--tools/perf/Documentation/perf-ftrace.txt48
-rw-r--r--tools/perf/Documentation/perf-kvm.txt6
-rw-r--r--tools/perf/Documentation/perf-list.txt1
-rw-r--r--tools/perf/Documentation/perf-mem.txt94
-rw-r--r--tools/perf/Documentation/perf-record.txt14
-rw-r--r--tools/perf/Documentation/perf-report.txt1
-rw-r--r--tools/perf/Documentation/perf-sched.txt9
-rw-r--r--tools/perf/Documentation/perf-script.txt5
-rw-r--r--tools/perf/Documentation/perf-stat.txt8
-rw-r--r--tools/perf/Documentation/perf-top.txt4
-rw-r--r--tools/perf/Documentation/perf-trace.txt4
-rw-r--r--tools/perf/Documentation/topdown.txt30
-rw-r--r--tools/perf/Makefile8
-rw-r--r--tools/perf/Makefile.config87
-rw-r--r--tools/perf/Makefile.perf33
-rw-r--r--tools/perf/arch/arm/util/cs-etm.c12
-rw-r--r--tools/perf/arch/arm/util/pmu.c3
-rw-r--r--tools/perf/arch/arm64/annotate/instructions.c3
-rw-r--r--tools/perf/arch/arm64/util/arm-spe.c108
-rw-r--r--tools/perf/arch/arm64/util/hisi-ptt.c1
-rw-r--r--tools/perf/arch/loongarch/annotate/instructions.c6
-rw-r--r--tools/perf/arch/powerpc/annotate/instructions.c254
-rw-r--r--tools/perf/arch/powerpc/entry/syscalls/syscall.tbl6
-rw-r--r--tools/perf/arch/powerpc/util/dwarf-regs.c53
-rw-r--r--tools/perf/arch/s390/annotate/instructions.c5
-rw-r--r--tools/perf/arch/s390/entry/syscalls/syscall.tbl2
-rw-r--r--tools/perf/arch/x86/Makefile6
-rw-r--r--tools/perf/arch/x86/annotate/instructions.c389
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_32.tbl470
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_64.tbl8
-rw-r--r--tools/perf/arch/x86/util/event.c4
-rw-r--r--tools/perf/arch/x86/util/evlist.c6
-rw-r--r--tools/perf/arch/x86/util/intel-bts.c1
-rw-r--r--tools/perf/arch/x86/util/intel-pt.c1
-rw-r--r--tools/perf/bench/synthesize.c2
-rw-r--r--tools/perf/builtin-annotate.c77
-rw-r--r--tools/perf/builtin-buildid-list.c10
-rw-r--r--tools/perf/builtin-c2c.c47
-rw-r--r--tools/perf/builtin-check.c180
-rw-r--r--tools/perf/builtin-daemon.c19
-rw-r--r--tools/perf/builtin-diff.c38
-rw-r--r--tools/perf/builtin-evlist.c18
-rw-r--r--tools/perf/builtin-ftrace.c462
-rw-r--r--tools/perf/builtin-help.c2
-rw-r--r--tools/perf/builtin-inject.c739
-rw-r--r--tools/perf/builtin-kmem.c22
-rw-r--r--tools/perf/builtin-kvm.c22
-rw-r--r--tools/perf/builtin-kwork.c36
-rw-r--r--tools/perf/builtin-list.c2
-rw-r--r--tools/perf/builtin-lock.c44
-rw-r--r--tools/perf/builtin-mem.c161
-rw-r--r--tools/perf/builtin-record.c79
-rw-r--r--tools/perf/builtin-report.c106
-rw-r--r--tools/perf/builtin-sched.c232
-rw-r--r--tools/perf/builtin-script.c184
-rw-r--r--tools/perf/builtin-stat.c42
-rw-r--r--tools/perf/builtin-timechart.c25
-rw-r--r--tools/perf/builtin-top.c10
-rw-r--r--tools/perf/builtin-trace.c497
-rw-r--r--tools/perf/builtin-version.c43
-rw-r--r--tools/perf/builtin.h17
-rw-r--r--tools/perf/check-header_ignore_hunks/lib/list_sort.c31
-rwxr-xr-xtools/perf/check-headers.sh32
-rw-r--r--tools/perf/perf.c1
-rw-r--r--tools/perf/pmu-events/Build12
-rw-r--r--tools/perf/pmu-events/arch/arm64/ampere/ampereone/instruction.json3
-rw-r--r--tools/perf/pmu-events/arch/arm64/thead/yitian710/sys/ali_drw.json (renamed from tools/perf/pmu-events/arch/arm64/freescale/yitian710/sys/ali_drw.json)0
-rw-r--r--tools/perf/pmu-events/arch/arm64/thead/yitian710/sys/metrics.json (renamed from tools/perf/pmu-events/arch/arm64/freescale/yitian710/sys/metrics.json)0
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/cache.json20
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/datasource.json40
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/frontend.json30
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/locks.json10
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/memory.json30
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/others.json106
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/pipeline.json45
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/pmc.json10
-rw-r--r--tools/perf/pmu-events/arch/riscv/andes/ax45/firmware.json2
-rw-r--r--tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json2
-rw-r--r--tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json2
-rw-r--r--tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json2
-rw-r--r--tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/uncore-cache.json60
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/metricgroups.json142
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/mtl-metrics.json2535
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/uncore-cache.json60
-rw-r--r--tools/perf/pmu-events/arch/x86/snowridgex/uncore-cache.json57
-rw-r--r--tools/perf/pmu-events/empty-pmu-events.c894
-rwxr-xr-xtools/perf/pmu-events/jevents.py27
-rwxr-xr-xtools/perf/pmu-events/models.py73
-rw-r--r--tools/perf/pmu-events/pmu-events.h9
-rwxr-xr-xtools/perf/scripts/python/arm-cs-trace-disasm.py9
-rw-r--r--tools/perf/tests/bp_account.c4
-rw-r--r--tools/perf/tests/bp_signal.c3
-rw-r--r--tools/perf/tests/bp_signal_overflow.c3
-rw-r--r--tools/perf/tests/builtin-test.c1
-rw-r--r--tools/perf/tests/cpumap.c6
-rw-r--r--tools/perf/tests/dlfilter-test.c2
-rw-r--r--tools/perf/tests/dwarf-unwind.c2
-rw-r--r--tools/perf/tests/event_update.c9
-rw-r--r--tools/perf/tests/make4
-rw-r--r--tools/perf/tests/parse-events.c6
-rw-r--r--tools/perf/tests/pmu-events.c12
-rw-r--r--tools/perf/tests/pmu.c11
-rwxr-xr-xtools/perf/tests/shell/annotate.sh3
-rw-r--r--tools/perf/tests/shell/base_probe/settings.sh48
-rwxr-xr-xtools/perf/tests/shell/base_probe/test_adding_blacklisted.sh67
-rwxr-xr-xtools/perf/tests/shell/base_probe/test_adding_kernel.sh3
-rwxr-xr-xtools/perf/tests/shell/base_probe/test_basic.sh80
-rwxr-xr-xtools/perf/tests/shell/base_probe/test_invalid_options.sh79
-rwxr-xr-xtools/perf/tests/shell/base_probe/test_line_semantics.sh55
-rwxr-xr-xtools/perf/tests/shell/base_report/setup.sh32
-rw-r--r--tools/perf/tests/shell/base_report/stderr-whitelist.txt5
-rwxr-xr-xtools/perf/tests/shell/base_report/test_basic.sh190
-rwxr-xr-xtools/perf/tests/shell/common/check_errors_whitelisted.pl51
-rw-r--r--tools/perf/tests/shell/common/init.sh31
-rw-r--r--tools/perf/tests/shell/common/settings.sh28
-rwxr-xr-xtools/perf/tests/shell/ftrace.sh89
-rw-r--r--tools/perf/tests/shell/lib/perf_metric_validation.py10
-rw-r--r--tools/perf/tests/shell/lib/probe_vfs_getname.sh11
-rwxr-xr-xtools/perf/tests/shell/perftool-testsuite_report.sh23
-rwxr-xr-xtools/perf/tests/shell/pipe_test.sh129
-rwxr-xr-xtools/perf/tests/shell/record+probe_libc_inet_pton.sh5
-rwxr-xr-xtools/perf/tests/shell/record+script_probe_vfs_getname.sh5
-rwxr-xr-xtools/perf/tests/shell/record.sh59
-rwxr-xr-xtools/perf/tests/shell/record_bpf_filter.sh86
-rwxr-xr-xtools/perf/tests/shell/record_lbr.sh161
-rwxr-xr-xtools/perf/tests/shell/script.sh3
-rwxr-xr-xtools/perf/tests/shell/test_stat_intel_tpebs.sh19
-rwxr-xr-xtools/perf/tests/shell/test_task_analyzer.sh7
-rwxr-xr-xtools/perf/tests/shell/test_uprobe_from_different_cu.sh7
-rwxr-xr-xtools/perf/tests/shell/trace_btf_enum.sh62
-rw-r--r--tools/perf/tests/stat.c6
-rw-r--r--tools/perf/tests/tests-scripts.c37
-rw-r--r--tools/perf/tests/tests.h1
-rw-r--r--tools/perf/tests/thread-map.c2
-rw-r--r--tools/perf/tests/vmlinux-kallsyms.c5
-rw-r--r--tools/perf/tests/workloads/Build1
-rw-r--r--tools/perf/tests/workloads/landlock.c66
-rw-r--r--tools/perf/tests/wp.c5
-rw-r--r--tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h4
-rw-r--r--tools/perf/trace/beauty/beauty.h11
-rwxr-xr-xtools/perf/trace/beauty/fs_at_flags.sh5
-rw-r--r--tools/perf/trace/beauty/include/linux/socket.h6
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/fcntl.h84
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/fs.h163
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/mount.h10
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/sched.h1
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/stat.h12
-rw-r--r--tools/perf/trace/beauty/include/uapi/sound/asound.h26
-rw-r--r--tools/perf/trace/beauty/msg_flags.c4
-rw-r--r--tools/perf/trace/beauty/perf_event_open.c6
-rw-r--r--tools/perf/trace/beauty/sockaddr.c2
-rw-r--r--tools/perf/trace/beauty/timespec.c2
-rw-r--r--tools/perf/ui/browsers/annotate-data.c376
-rw-r--r--tools/perf/ui/browsers/annotate.c20
-rw-r--r--tools/perf/ui/browsers/hists.c18
-rw-r--r--tools/perf/ui/hist.c10
-rw-r--r--tools/perf/ui/stdio/hist.c4
-rw-r--r--tools/perf/util/Build10
-rw-r--r--tools/perf/util/annotate-data.c1164
-rw-r--r--tools/perf/util/annotate-data.h86
-rw-r--r--tools/perf/util/annotate.c360
-rw-r--r--tools/perf/util/annotate.h33
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c2
-rw-r--r--tools/perf/util/arm-spe.c55
-rw-r--r--tools/perf/util/auxtrace.c16
-rw-r--r--tools/perf/util/auxtrace.h21
-rw-r--r--tools/perf/util/block-info.c66
-rw-r--r--tools/perf/util/block-info.h8
-rw-r--r--tools/perf/util/bpf-event.c4
-rw-r--r--tools/perf/util/bpf-filter.c631
-rw-r--r--tools/perf/util/bpf-filter.h19
-rw-r--r--tools/perf/util/bpf-filter.l28
-rw-r--r--tools/perf/util/bpf-filter.y28
-rw-r--r--tools/perf/util/bpf_counter_cgroup.c6
-rw-r--r--tools/perf/util/bpf_ftrace.c8
-rw-r--r--tools/perf/util/bpf_kwork.c9
-rw-r--r--tools/perf/util/bpf_kwork_top.c7
-rw-r--r--tools/perf/util/bpf_lock_contention.c48
-rw-r--r--tools/perf/util/bpf_map.c3
-rw-r--r--tools/perf/util/bpf_off_cpu.c16
-rw-r--r--tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c222
-rw-r--r--tools/perf/util/bpf_skel/bperf_cgroup.bpf.c2
-rw-r--r--tools/perf/util/bpf_skel/func_latency.bpf.c7
-rw-r--r--tools/perf/util/bpf_skel/kwork_top.bpf.c2
-rw-r--r--tools/perf/util/bpf_skel/kwork_trace.bpf.c5
-rw-r--r--tools/perf/util/bpf_skel/lock_contention.bpf.c53
-rw-r--r--tools/perf/util/bpf_skel/lock_data.h4
-rw-r--r--tools/perf/util/bpf_skel/off_cpu.bpf.c9
-rw-r--r--tools/perf/util/bpf_skel/sample-filter.h13
-rw-r--r--tools/perf/util/bpf_skel/sample_filter.bpf.c105
-rw-r--r--tools/perf/util/bpf_skel/vmlinux/vmlinux.h7
-rw-r--r--tools/perf/util/branch.h1
-rw-r--r--tools/perf/util/build-id.c40
-rw-r--r--tools/perf/util/build-id.h8
-rw-r--r--tools/perf/util/callchain.c37
-rw-r--r--tools/perf/util/callchain.h6
-rw-r--r--tools/perf/util/cap.c63
-rw-r--r--tools/perf/util/cap.h23
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.c36
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.h2
-rw-r--r--tools/perf/util/cs-etm.c675
-rw-r--r--tools/perf/util/cs-etm.h12
-rw-r--r--tools/perf/util/data-convert-bt.c34
-rw-r--r--tools/perf/util/data-convert-json.c47
-rw-r--r--tools/perf/util/data.c7
-rw-r--r--tools/perf/util/debuginfo.h2
-rw-r--r--tools/perf/util/disasm.c852
-rw-r--r--tools/perf/util/disasm.h19
-rw-r--r--tools/perf/util/disasm_bpf.c195
-rw-r--r--tools/perf/util/disasm_bpf.h12
-rw-r--r--tools/perf/util/dso.c6
-rw-r--r--tools/perf/util/dso.h9
-rw-r--r--tools/perf/util/dsos.c12
-rw-r--r--tools/perf/util/dsos.h2
-rw-r--r--tools/perf/util/dump-insn.c2
-rw-r--r--tools/perf/util/dump-insn.h2
-rw-r--r--tools/perf/util/dwarf-aux.c18
-rw-r--r--tools/perf/util/dwarf-aux.h3
-rw-r--r--tools/perf/util/env.c15
-rw-r--r--tools/perf/util/env.h3
-rw-r--r--tools/perf/util/event.c54
-rw-r--r--tools/perf/util/event.h38
-rw-r--r--tools/perf/util/events_stats.h15
-rw-r--r--tools/perf/util/evlist.c89
-rw-r--r--tools/perf/util/evlist.h7
-rw-r--r--tools/perf/util/evsel.c122
-rw-r--r--tools/perf/util/evsel.h27
-rw-r--r--tools/perf/util/evsel_fprintf.c2
-rw-r--r--tools/perf/util/ftrace.h3
-rw-r--r--tools/perf/util/header.c157
-rw-r--r--tools/perf/util/header.h25
-rw-r--r--tools/perf/util/hisi-ptt.c6
-rw-r--r--tools/perf/util/hist.c63
-rw-r--r--tools/perf/util/hist.h4
-rw-r--r--tools/perf/util/include/dwarf-regs.h11
-rw-r--r--tools/perf/util/intel-bts.c37
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c5
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c2
-rw-r--r--tools/perf/util/intel-pt.c30
-rw-r--r--tools/perf/util/intel-tpebs.c432
-rw-r--r--tools/perf/util/intel-tpebs.h35
-rw-r--r--tools/perf/util/jit.h3
-rw-r--r--tools/perf/util/jitdump.c10
-rw-r--r--tools/perf/util/llvm-c-helpers.cpp197
-rw-r--r--tools/perf/util/llvm-c-helpers.h60
-rw-r--r--tools/perf/util/machine.c120
-rw-r--r--tools/perf/util/machine.h36
-rw-r--r--tools/perf/util/map.c25
-rw-r--r--tools/perf/util/map.h22
-rw-r--r--tools/perf/util/map_symbol.c18
-rw-r--r--tools/perf/util/map_symbol.h3
-rw-r--r--tools/perf/util/mem-events.c20
-rw-r--r--tools/perf/util/mem-events.h4
-rw-r--r--tools/perf/util/mem-info.c13
-rw-r--r--tools/perf/util/mem-info.h1
-rw-r--r--tools/perf/util/metricgroup.c10
-rw-r--r--tools/perf/util/mmap.c4
-rw-r--r--tools/perf/util/parse-events.c69
-rw-r--r--tools/perf/util/parse-events.h11
-rw-r--r--tools/perf/util/parse-events.l3
-rw-r--r--tools/perf/util/pmu.c75
-rw-r--r--tools/perf/util/pmu.h8
-rw-r--r--tools/perf/util/pmus.c22
-rw-r--r--tools/perf/util/pmus.h1
-rw-r--r--tools/perf/util/print-events.c3
-rw-r--r--tools/perf/util/print_insn.c14
-rw-r--r--tools/perf/util/python.c1
-rw-r--r--tools/perf/util/s390-cpumsf.c11
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c16
-rw-r--r--tools/perf/util/session.c394
-rw-r--r--tools/perf/util/session.h61
-rw-r--r--tools/perf/util/setup.py4
-rw-r--r--tools/perf/util/sort.c66
-rw-r--r--tools/perf/util/sort.h3
-rw-r--r--tools/perf/util/srcline.c59
-rw-r--r--tools/perf/util/stat-display.c3
-rw-r--r--tools/perf/util/stat-shadow.c2
-rw-r--r--tools/perf/util/symbol.c11
-rw-r--r--tools/perf/util/symbol_conf.h2
-rw-r--r--tools/perf/util/synthetic-events.c181
-rw-r--r--tools/perf/util/synthetic-events.h89
-rw-r--r--tools/perf/util/syscalltbl.c4
-rw-r--r--tools/perf/util/thread.c4
-rw-r--r--tools/perf/util/thread.h1
-rw-r--r--tools/perf/util/time-utils.c4
-rw-r--r--tools/perf/util/tool.c294
-rw-r--r--tools/perf/util/tool.h19
-rw-r--r--tools/perf/util/trace_augment.h6
-rw-r--r--tools/perf/util/tsc.c2
-rw-r--r--tools/perf/util/unwind-libunwind-local.c2
-rw-r--r--tools/perf/util/util.c12
-rw-r--r--tools/perf/util/vdso.c4
297 files changed, 15558 insertions, 4277 deletions
diff --git a/tools/perf/Build b/tools/perf/Build
index 1d4957957d75..3e486f7df94b 100644
--- a/tools/perf/Build
+++ b/tools/perf/Build
@@ -1,5 +1,6 @@
perf-bench-y += builtin-bench.o
perf-y += builtin-annotate.o
+perf-y += builtin-check.o
perf-y += builtin-config.o
perf-y += builtin-diff.o
perf-y += builtin-evlist.o
diff --git a/tools/perf/Documentation/Build.txt b/tools/perf/Documentation/Build.txt
index 3766886c4bca..83dc87c662b6 100644
--- a/tools/perf/Documentation/Build.txt
+++ b/tools/perf/Documentation/Build.txt
@@ -71,3 +71,31 @@ supported by GCC. UBSan detects undefined behaviors of programs at runtime.
$ UBSAN_OPTIONS=print_stacktrace=1 ./perf record -a
If UBSan detects any problem at runtime, it outputs a “runtime error:” message.
+
+4) Cross compilation
+====================
+As Multiarch is commonly supported in Linux distributions, we can install
+libraries for multiple architectures on the same system and then cross-compile
+Linux perf. For example, Aarch64 libraries and toolchains can be installed on
+an x86_64 machine, allowing us to compile perf for an Aarch64 target.
+
+Below is the command for building the perf with dynamic linking.
+
+ $ cd /path/to/Linux
+ $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- -C tools/perf
+
+For static linking, the option `LDFLAGS="-static"` is required.
+
+ $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- \
+ LDFLAGS="-static" -C tools/perf
+
+In the embedded system world, a use case is to explicitly specify the package
+configuration paths for cross building:
+
+ $ PKG_CONFIG_SYSROOT_DIR="/path/to/cross/build/sysroot" \
+ PKG_CONFIG_LIBDIR="/usr/lib/:/usr/local/lib" \
+ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- -C tools/perf
+
+In this case, the variable PKG_CONFIG_SYSROOT_DIR can be used alongside the
+variable PKG_CONFIG_LIBDIR or PKG_CONFIG_PATH to prepend the sysroot path to
+the library paths for cross compilation.
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index b95524bea021..156c5f37b051 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -165,6 +165,9 @@ include::itrace.txt[]
--type-stat::
Show stats for the data type annotation.
+--skip-empty::
+ Do not display empty (or dummy) events.
+
SEE ALSO
--------
diff --git a/tools/perf/Documentation/perf-check.txt b/tools/perf/Documentation/perf-check.txt
new file mode 100644
index 000000000000..10f69fb6850b
--- /dev/null
+++ b/tools/perf/Documentation/perf-check.txt
@@ -0,0 +1,82 @@
+perf-check(1)
+===============
+
+NAME
+----
+perf-check - check if features are present in perf
+
+SYNOPSIS
+--------
+[verse]
+'perf check' [<options>]
+'perf check' {feature <feature_list>} [<options>]
+
+DESCRIPTION
+-----------
+With no subcommands given, 'perf check' command just prints the command
+usage on the standard output.
+
+If the subcommand 'feature' is used, then status of feature is printed
+on the standard output (unless '-q' is also passed), ie. whether it is
+compiled-in/built-in or not.
+Also, 'perf check feature' returns with exit status 0 if the feature
+is built-in, otherwise returns with exit status 1.
+
+SUBCOMMANDS
+-----------
+
+feature::
+
+ Print whether feature(s) is compiled-in or not, and also returns with an
+ exit status of 0, if passed feature(s) are compiled-in, else 1.
+
+ It expects a feature list as an argument. There can be a single feature
+ name/macro, or multiple features can also be passed as a comma-separated
+ list, in which case the exit status will be 0 only if all of the passed
+ features are compiled-in.
+
+ The feature names/macros are case-insensitive.
+
+ Example Usage:
+ perf check feature libtraceevent
+ perf check feature HAVE_LIBTRACEEVENT
+ perf check feature libtraceevent,bpf
+
+ Supported feature names/macro:
+ aio / HAVE_AIO_SUPPORT
+ bpf / HAVE_LIBBPF_SUPPORT
+ bpf_skeletons / HAVE_BPF_SKEL
+ debuginfod / HAVE_DEBUGINFOD_SUPPORT
+ dwarf / HAVE_DWARF_SUPPORT
+ dwarf_getlocations / HAVE_DWARF_GETLOCATIONS_SUPPORT
+ dwarf-unwind / HAVE_DWARF_UNWIND_SUPPORT
+ auxtrace / HAVE_AUXTRACE_SUPPORT
+ libaudit / HAVE_LIBAUDIT_SUPPORT
+ libbfd / HAVE_LIBBFD_SUPPORT
+ libcapstone / HAVE_LIBCAPSTONE_SUPPORT
+ libcrypto / HAVE_LIBCRYPTO_SUPPORT
+ libdw-dwarf-unwind / HAVE_DWARF_SUPPORT
+ libelf / HAVE_LIBELF_SUPPORT
+ libnuma / HAVE_LIBNUMA_SUPPORT
+ libopencsd / HAVE_CSTRACE_SUPPORT
+ libperl / HAVE_LIBPERL_SUPPORT
+ libpfm4 / HAVE_LIBPFM
+ libpython / HAVE_LIBPYTHON_SUPPORT
+ libslang / HAVE_SLANG_SUPPORT
+ libtraceevent / HAVE_LIBTRACEEVENT
+ libunwind / HAVE_LIBUNWIND_SUPPORT
+ lzma / HAVE_LZMA_SUPPORT
+ numa_num_possible_cpus / HAVE_LIBNUMA_SUPPORT
+ syscall_table / HAVE_SYSCALL_TABLE_SUPPORT
+ zlib / HAVE_ZLIB_SUPPORT
+ zstd / HAVE_ZSTD_SUPPORT
+
+OPTIONS
+-------
+-q::
+--quiet::
+ Do not print any messages or warnings
+
+ This can be used along with subcommands such as 'perf check feature'
+ to hide unnecessary output in test scripts, eg.
+ 'perf check feature --quiet libtraceevent'
diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt
index d780b93fcf87..eaec8253be68 100644
--- a/tools/perf/Documentation/perf-ftrace.txt
+++ b/tools/perf/Documentation/perf-ftrace.txt
@@ -9,7 +9,7 @@ perf-ftrace - simple wrapper for kernel's ftrace functionality
SYNOPSIS
--------
[verse]
-'perf ftrace' {trace|latency} <command>
+'perf ftrace' {trace|latency|profile} <command>
DESCRIPTION
-----------
@@ -23,6 +23,9 @@ kernel's ftrace infrastructure.
'perf ftrace latency' calculates execution latency of a given function
(optionally with BPF) and display it as a histogram.
+ 'perf ftrace profile' show a execution profile for each function including
+ total, average, max time and the number of calls.
+
The following options apply to perf ftrace.
COMMON OPTIONS
@@ -125,6 +128,7 @@ OPTIONS for 'perf ftrace trace'
- verbose - Show process names, PIDs, timestamps, etc.
- thresh=<n> - Setup trace duration threshold in microseconds.
- depth=<n> - Set max depth for function graph tracer to follow.
+ - tail - Print function name at the end.
OPTIONS for 'perf ftrace latency'
@@ -145,6 +149,48 @@ OPTIONS for 'perf ftrace latency'
Use nano-second instead of micro-second as a base unit of the histogram.
+OPTIONS for 'perf ftrace profile'
+---------------------------------
+
+-T::
+--trace-funcs=::
+ Set function filter on the given function (or a glob pattern).
+ Multiple functions can be given by using this option more than once.
+ The function argument also can be a glob pattern. It will be passed
+ to 'set_ftrace_filter' in tracefs.
+
+-N::
+--notrace-funcs=::
+ Do not trace functions given by the argument. Like -T option, this
+ can be used more than once to specify multiple functions (or glob
+ patterns). It will be passed to 'set_ftrace_notrace' in tracefs.
+
+-G::
+--graph-funcs=::
+ Set graph filter on the given function (or a glob pattern). This is
+ useful to trace for functions executed from the given function. This
+ can be used more than once to specify multiple functions. It will be
+ passed to 'set_graph_function' in tracefs.
+
+-g::
+--nograph-funcs=::
+ Set graph notrace filter on the given function (or a glob pattern).
+ Like -G option, this is useful for the function_graph tracer only and
+ disables tracing for function executed from the given function. This
+ can be used more than once to specify multiple functions. It will be
+ passed to 'set_graph_notrace' in tracefs.
+
+-m::
+--buffer-size::
+ Set the size of per-cpu tracing buffer, <size> is expected to
+ be a number with appended unit character - B/K/M/G.
+
+-s::
+--sort=::
+ Sort the result by the given field. Available values are:
+ total, avg, max, count, name. Default is 'total'.
+
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-trace[1]
diff --git a/tools/perf/Documentation/perf-kvm.txt b/tools/perf/Documentation/perf-kvm.txt
index b66be66fe836..c26524d38f47 100644
--- a/tools/perf/Documentation/perf-kvm.txt
+++ b/tools/perf/Documentation/perf-kvm.txt
@@ -115,9 +115,9 @@ STAT LIVE OPTIONS
-m::
--mmap-pages=::
- Number of mmap data pages (must be a power of two) or size
- specification with appended unit character - B/K/M/G. The
- size is rounded up to have nearest pages power of two value.
+ Number of mmap data pages (must be a power of two) or size
+ specification in bytes with appended unit character - B/K/M/G.
+ The size is rounded up to the nearest power-of-two page value.
-a::
--all-cpus::
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 6bf2468f59d3..dea005410ec0 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -72,6 +72,7 @@ counted. The following modifiers exist:
W - group is weak and will fallback to non-group if not schedulable,
e - group or event are exclusive and do not share the PMU
b - use BPF aggregration (see perf stat --bpf-counters)
+ R - retire latency value of the event
The 'p' modifier can be used for specifying how precise the instruction
address should be. The 'p' modifier can be specified multiple times:
diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt
index 47456b212e99..8a1bd9ff0f86 100644
--- a/tools/perf/Documentation/perf-mem.txt
+++ b/tools/perf/Documentation/perf-mem.txt
@@ -28,15 +28,8 @@ and kernel support is required. See linkperf:perf-arm-spe[1] for a setup guide.
Due to the statistical nature of SPE sampling, not every memory operation will
be sampled.
-OPTIONS
--------
-<command>...::
- Any command you can specify in a shell.
-
--i::
---input=<file>::
- Input file name.
-
+COMMON OPTIONS
+--------------
-f::
--force::
Don't do ownership validation
@@ -45,24 +38,9 @@ OPTIONS
--type=<type>::
Select the memory operation type: load or store (default: load,store)
--D::
---dump-raw-samples::
- Dump the raw decoded samples on the screen in a format that is easy to parse with
- one sample per line.
-
--x::
---field-separator=<separator>::
- Specify the field separator used when dump raw samples (-D option). By default,
- The separator is the space character.
-
--C::
---cpu=<cpu>::
- Monitor only on the list of CPUs provided. Multiple CPUs can be provided as a
- comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. Default
- is to monitor all CPUS.
--U::
---hide-unresolved::
- Only display entries resolved to a symbol.
+-v::
+--verbose::
+ Be more verbose (show counter open errors, etc)
-p::
--phys-data::
@@ -73,6 +51,9 @@ OPTIONS
RECORD OPTIONS
--------------
+<command>...::
+ Any command you can specify in a shell.
+
-e::
--event <event>::
Event selector. Use 'perf mem record -e list' to list available events.
@@ -85,14 +66,65 @@ RECORD OPTIONS
--all-user::
Configure all used events to run in user space.
--v::
---verbose::
- Be more verbose (show counter open errors, etc)
-
--ldlat <n>::
Specify desired latency for loads event. Supported on Intel and Arm64
processors only. Ignored on other archs.
+REPORT OPTIONS
+--------------
+-i::
+--input=<file>::
+ Input file name.
+
+-C::
+--cpu=<cpu>::
+ Monitor only on the list of CPUs provided. Multiple CPUs can be provided as a
+ comma-separated list with no space: 0,1. Ranges of CPUs are specified with -
+ like 0-2. Default is to monitor all CPUS.
+
+-D::
+--dump-raw-samples::
+ Dump the raw decoded samples on the screen in a format that is easy to parse with
+ one sample per line.
+
+-s::
+--sort=<key>::
+ Group result by given key(s) - multiple keys can be specified
+ in CSV format. The keys are specific to memory samples are:
+ symbol_daddr, symbol_iaddr, dso_daddr, locked, tlb, mem, snoop,
+ dcacheline, phys_daddr, data_page_size, blocked.
+
+ - symbol_daddr: name of data symbol being executed on at the time of sample
+ - symbol_iaddr: name of code symbol being executed on at the time of sample
+ - dso_daddr: name of library or module containing the data being executed
+ on at the time of the sample
+ - locked: whether the bus was locked at the time of the sample
+ - tlb: type of tlb access for the data at the time of the sample
+ - mem: type of memory access for the data at the time of the sample
+ - snoop: type of snoop (if any) for the data at the time of the sample
+ - dcacheline: the cacheline the data address is on at the time of the sample
+ - phys_daddr: physical address of data being executed on at the time of sample
+ - data_page_size: the data page size of data being executed on at the time of sample
+ - blocked: reason of blocked load access for the data at the time of the sample
+
+ And the default sort keys are changed to local_weight, mem, sym, dso,
+ symbol_daddr, dso_daddr, snoop, tlb, locked, blocked, local_ins_lat.
+
+-T::
+--type-profile::
+ Show data-type profile result instead of code symbols. This requires
+ the debug information and it will change the default sort keys to:
+ mem, snoop, tlb, type.
+
+-U::
+--hide-unresolved::
+ Only display entries resolved to a symbol.
+
+-x::
+--field-separator=<separator>::
+ Specify the field separator used when dump raw samples (-D option). By default,
+ The separator is the space character.
+
In addition, for report all perf report options are valid, and for record
all perf record options.
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index d6532ed97c02..242223240a08 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -273,10 +273,11 @@ OPTIONS
-m::
--mmap-pages=::
Number of mmap data pages (must be a power of two) or size
- specification with appended unit character - B/K/M/G. The
- size is rounded up to have nearest pages power of two value.
- Also, by adding a comma, the number of mmap pages for AUX
- area tracing can be specified.
+ specification in bytes with appended unit character - B/K/M/G.
+ The size is rounded up to the nearest power-of-two page value.
+ By adding a comma, an additional parameter with the same
+ semantics used for the normal mmap areas can be specified for
+ AUX tracing area.
-g::
Enables call-graph (stack chain/backtrace) recording for both
@@ -828,6 +829,11 @@ filtered through the mask provided by -C option.
only, as of now. So the applications built without the frame
pointer might see bogus addresses.
+--setup-filter=<action>::
+ Prepare BPF filter to be used by regular users. The action should be
+ either "pin" or "unpin". The filter can be used after it's pinned.
+
+
include::intel-hybrid.txt[]
SEE ALSO
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index d2b1593ef700..7c66d81ab978 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -614,6 +614,7 @@ include::itrace.txt[]
'Avg Cycles%' - block average sampled cycles / sum of total block average
sampled cycles
'Avg Cycles' - block average sampled cycles
+ 'Branch Counter' - block branch counter histogram (with -v showing the number)
--skip-empty::
Do not print 0 results in the --stat output.
diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt
index 84d49f9241b1..3db64954a267 100644
--- a/tools/perf/Documentation/perf-sched.txt
+++ b/tools/perf/Documentation/perf-sched.txt
@@ -212,6 +212,15 @@ OPTIONS for 'perf sched timehist'
--state::
Show task state when it switched out.
+--show-prio::
+ Show task priority.
+
+--prio::
+ Only show events for given task priority(ies). Multiple priorities can be
+ provided as a comma-separated list with no spaces: 0,120. Ranges of
+ priorities are specified with -: 120-129. A combination of both can also be
+ provided: 0,120-129.
+
OPTIONS for 'perf sched replay'
------------------------------
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index ff086ef05a0c..b72866ef270b 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -134,7 +134,7 @@ OPTIONS
srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output,
brstackinsn, brstackinsnlen, brstackdisasm, brstackoff, callindent, insn, disasm,
insnlen, synth, phys_addr, metric, misc, srccode, ipc, data_page_size,
- code_page_size, ins_lat, machine_pid, vcpu, cgroup, retire_lat,
+ code_page_size, ins_lat, machine_pid, vcpu, cgroup, retire_lat, brcntr,
Field list can be prepended with the type, trace, sw or hw,
to indicate to which event type the field list applies.
@@ -369,6 +369,9 @@ OPTIONS
--demangle-kernel::
Demangle kernel symbol names to human readable form (for C++ kernels).
+--addr2line=<path>::
+ Path to addr2line binary.
+
--header
Show perf.data header.
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 29756a87ab6f..2bc063672486 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -498,6 +498,14 @@ To interpret the results it is usually needed to know on which
CPUs the workload runs on. If needed the CPUs can be forced using
taskset.
+--record-tpebs::
+Enable automatic sampling on Intel TPEBS retire_latency events (event with :R
+modifier). Without this option, perf would not capture dynamic retire_latency
+at runtime. Currently, a zero value is assigned to the retire_latency event when
+this option is not set. The TPEBS hardware feature starts from Intel Granite
+Rapids microarchitecture. This option only exists in X86_64 and is meaningful on
+Intel platforms with TPEBS feature.
+
--td-level::
Print the top-down statistics that equal the input level. It allows
users to print the interested top-down metrics level instead of the
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 667e5102075e..af3e4230c72f 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -83,8 +83,8 @@ Default is to monitor all CPUS.
-m <pages>::
--mmap-pages=<pages>::
Number of mmap data pages (must be a power of two) or size
- specification with appended unit character - B/K/M/G. The
- size is rounded up to have nearest pages power of two value.
+ specification in bytes with appended unit character - B/K/M/G.
+ The size is rounded up to the nearest power-of-two page value.
-p <pid>::
--pid=<pid>::
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index f0da8cf63e9a..6e0cc50bbc13 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -106,8 +106,8 @@ filter out the startup phase of the program, which is often very different.
-m::
--mmap-pages=::
Number of mmap data pages (must be a power of two) or size
- specification with appended unit character - B/K/M/G. The
- size is rounded up to have nearest pages power of two value.
+ specification in bytes with appended unit character - B/K/M/G.
+ The size is rounded up to the nearest power-of-two page value.
-C::
--cpu::
diff --git a/tools/perf/Documentation/topdown.txt b/tools/perf/Documentation/topdown.txt
index ae0aee86844f..5c17fff694ee 100644
--- a/tools/perf/Documentation/topdown.txt
+++ b/tools/perf/Documentation/topdown.txt
@@ -325,6 +325,36 @@ other four level 2 metrics by subtracting corresponding metrics as below.
Fetch_Bandwidth = Frontend_Bound - Fetch_Latency
Core_Bound = Backend_Bound - Memory_Bound
+TPEBS in TopDown
+================
+
+TPEBS (Timed PEBS) is one of the new Intel PMU features provided since Granite
+Rapids microarchitecture. The TPEBS feature adds a 16 bit retire_latency field
+in the Basic Info group of the PEBS record. It records the Core cycles since the
+retirement of the previous instruction to the retirement of current instruction.
+Please refer to Section 8.4.1 of "Intel® Architecture Instruction Set Extensions
+Programming Reference" for more details about this feature. Because this feature
+extends PEBS record, sampling with weight option is required to get the
+retire_latency value.
+
+ perf record -e event_name -W ...
+
+In the most recent release of TMA, the metrics begin to use event retire_latency
+values in some of the metrics’ formulas on processors that support TPEBS feature.
+For previous generations that do not support TPEBS, the values are static and
+predefined per processor family by the hardware architects. Due to the diversity
+of workloads in execution environments, retire_latency values measured at real
+time are more accurate. Therefore, new TMA metrics that use TPEBS will provide
+more accurate performance analysis results.
+
+To support TPEBS in TMA metrics, a new modifier :R on event is added. Perf would
+capture retire_latency value of required events(event with :R in metric formula)
+with perf record. The retire_latency value would be used in metric calculation.
+Currently, this feature is supported through perf stat
+
+ perf stat -M metric_name --record-tpebs ...
+
+
[1] https://software.intel.com/en-us/top-down-microarchitecture-analysis-method-win
[2] https://sites.google.com/site/analysismethods/yasin-pubs
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 75f3f6e0a231..816d5d84816b 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -51,8 +51,14 @@ else
override DEBUG = 0
endif
+ifeq ($(JOBS),1)
+ BUILD_TYPE := sequential
+else
+ BUILD_TYPE := parallel
+endif
+
define print_msg
- @printf ' BUILD: Doing '\''make \033[33m-j'$(JOBS)'\033[m'\'' parallel build\n'
+ @printf ' BUILD: Doing '\''make \033[33m-j'$(JOBS)'\033[m'\'' $(BUILD_TYPE) build\n'
endef
define make
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index a4829b6532d8..4ddb27a48eed 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -31,14 +31,8 @@ $(call detected_var,SRCARCH)
ifneq ($(NO_SYSCALL_TABLE),1)
NO_SYSCALL_TABLE := 1
- ifeq ($(SRCARCH),x86)
- ifeq (${IS_64_BIT}, 1)
- NO_SYSCALL_TABLE := 0
- endif
- else
- ifeq ($(SRCARCH),$(filter $(SRCARCH),powerpc arm64 s390 mips loongarch))
- NO_SYSCALL_TABLE := 0
- endif
+ ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 powerpc arm64 s390 mips loongarch))
+ NO_SYSCALL_TABLE := 0
endif
ifneq ($(NO_SYSCALL_TABLE),1)
@@ -55,8 +49,9 @@ endif
# Additional ARCH settings for x86
ifeq ($(SRCARCH),x86)
$(call detected,CONFIG_X86)
+ CFLAGS += -I$(OUTPUT)arch/x86/include/generated
ifeq (${IS_64_BIT}, 1)
- CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -I$(OUTPUT)arch/x86/include/generated
+ CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT
ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S
LIBUNWIND_LIBS = -lunwind-x86_64 -lunwind -llzma
$(call detected,CONFIG_X86_64)
@@ -152,7 +147,20 @@ ifdef LIBDW_DIR
endif
DWARFLIBS := -ldw
ifeq ($(findstring -static,${LDFLAGS}),-static)
- DWARFLIBS += -lelf -lebl -ldl -lz -llzma -lbz2
+ DWARFLIBS += -lelf -lz -llzma -lbz2 -lzstd
+
+ LIBDW_VERSION := $(shell $(PKG_CONFIG) --modversion libdw).0.0
+ LIBDW_VERSION_1 := $(word 1, $(subst ., ,$(LIBDW_VERSION)))
+ LIBDW_VERSION_2 := $(word 2, $(subst ., ,$(LIBDW_VERSION)))
+
+ # Elfutils merged libebl.a into libdw.a starting from version 0.177,
+ # Link libebl.a only if libdw is older than this version.
+ ifeq ($(shell test $(LIBDW_VERSION_2) -lt 177; echo $$?),0)
+ DWARFLIBS += -lebl
+ endif
+
+ # Must put -ldl after -lebl for dependency
+ DWARFLIBS += -ldl
endif
FEATURE_CHECK_CFLAGS-libdw-dwarf-unwind := $(LIBDW_CFLAGS)
FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) $(DWARFLIBS)
@@ -182,20 +190,15 @@ endif
FEATURE_CHECK_CFLAGS-libzstd := $(LIBZSTD_CFLAGS)
FEATURE_CHECK_LDFLAGS-libzstd := $(LIBZSTD_LDFLAGS)
+# for linking with debug library, run like:
+# make DEBUG=1 PKG_CONFIG_PATH=/opt/libtraceevent/(lib|lib64)/pkgconfig
+
ifneq ($(NO_LIBTRACEEVENT),1)
ifeq ($(call get-executable,$(PKG_CONFIG)),)
$(error Error: $(PKG_CONFIG) needed by libtraceevent is missing on this system, please install it)
endif
endif
-# for linking with debug library, run like:
-# make DEBUG=1 PKG_CONFIG_PATH=/opt/libtraceevent/(lib|lib64)/pkgconfig
-FEATURE_CHECK_CFLAGS-libtraceevent := $(shell $(PKG_CONFIG) --cflags libtraceevent 2>/dev/null)
-FEATURE_CHECK_LDFLAGS-libtraceevent := $(shell $(PKG_CONFIG) --libs libtraceevent 2>/dev/null)
-
-FEATURE_CHECK_CFLAGS-libtracefs := $(shell $(PKG_CONFIG) --cflags libtracefs 2>/dev/null)
-FEATURE_CHECK_LDFLAGS-libtracefs := $(shell $(PKG_CONFIG) --libs libtracefs 2>/dev/null)
-
FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi -I$(srctree)/tools/include/uapi
# include ARCH specific config
-include $(src-perf)/arch/$(SRCARCH)/Makefile
@@ -233,11 +236,7 @@ endif
ifeq ($(DEBUG),0)
CORE_CFLAGS += -DNDEBUG=1
-ifeq ($(CC_NO_CLANG), 0)
- CORE_CFLAGS += -O3
-else
- CORE_CFLAGS += -O6
-endif
+CORE_CFLAGS += -O3
else
CORE_CFLAGS += -g
CXXFLAGS += -g
@@ -301,6 +300,11 @@ endif
ifdef PYTHON_CONFIG
PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) $(PYTHON_CONFIG_LDFLAGS) 2>/dev/null)
+ # Update the python flags for cross compilation
+ ifdef CROSS_COMPILE
+ PYTHON_NATIVE := $(shell echo $(PYTHON_EMBED_LDOPTS) | sed 's/\(-L.*\/\)\(.*-linux-gnu\).*/\2/')
+ PYTHON_EMBED_LDOPTS := $(subst $(PYTHON_NATIVE),$(shell $(CC) -dumpmachine),$(PYTHON_EMBED_LDOPTS))
+ endif
PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil
PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --includes 2>/dev/null)
@@ -700,8 +704,8 @@ ifeq ($(BUILD_BPF_SKEL),1)
BUILD_BPF_SKEL := 0
else
CLANG_VERSION := $(shell $(CLANG) --version | head -1 | sed 's/.*clang version \([[:digit:]]\+.[[:digit:]]\+.[[:digit:]]\+\).*/\1/g')
- ifeq ($(call version-lt3,$(CLANG_VERSION),12.0.1),1)
- $(warning Warning: Disabled BPF skeletons as reliable BTF generation needs at least $(CLANG) version 12.0.1)
+ ifeq ($(call version-lt3,$(CLANG_VERSION),16.0.6),1)
+ $(warning Warning: Disabled BPF skeletons as at least $(CLANG) version 16.0.6 is reported to be a working setup with the current of BPF based perf features)
BUILD_BPF_SKEL := 0
endif
endif
@@ -902,6 +906,9 @@ else
PYTHON_SETUPTOOLS_INSTALLED := $(shell $(PYTHON) -c 'import setuptools;' 2> /dev/null && echo "yes" || echo "no")
ifeq ($(PYTHON_SETUPTOOLS_INSTALLED), yes)
PYTHON_EXTENSION_SUFFIX := $(shell $(PYTHON) -c 'from importlib import machinery; print(machinery.EXTENSION_SUFFIXES[0])')
+ ifdef CROSS_COMPILE
+ PYTHON_EXTENSION_SUFFIX := $(subst $(PYTHON_NATIVE),$(shell $(CC) -dumpmachine),$(PYTHON_EXTENSION_SUFFIX))
+ endif
LANG_BINDINGS += $(obj-perf)python/perf$(PYTHON_EXTENSION_SUFFIX)
else
$(warning Missing python setuptools, the python binding won't be built, please install python3-setuptools or equivalent)
@@ -972,6 +979,23 @@ ifdef BUILD_NONDISTRO
endif
endif
+ifndef NO_LIBLLVM
+ $(call feature_check,llvm-perf)
+ ifeq ($(feature-llvm-perf), 1)
+ CFLAGS += -DHAVE_LIBLLVM_SUPPORT
+ CFLAGS += $(shell $(LLVM_CONFIG) --cflags)
+ CXXFLAGS += -DHAVE_LIBLLVM_SUPPORT
+ CXXFLAGS += $(shell $(LLVM_CONFIG) --cxxflags)
+ LIBLLVM = $(shell $(LLVM_CONFIG) --libs all) $(shell $(LLVM_CONFIG) --system-libs)
+ EXTLIBS += -L$(shell $(LLVM_CONFIG) --libdir) $(LIBLLVM)
+ EXTLIBS += -lstdc++
+ $(call detected,CONFIG_LIBLLVM)
+ else
+ $(warning No libllvm 13+ found, slower source file resolution, please install llvm-devel/llvm-dev)
+ NO_LIBLLVM := 1
+ endif
+endif
+
ifndef NO_DEMANGLE
$(call feature_check,cxa-demangle)
ifeq ($(feature-cxa-demangle), 1)
@@ -1018,17 +1042,6 @@ ifndef NO_LIBZSTD
endif
endif
-ifndef NO_LIBCAP
- ifeq ($(feature-libcap), 1)
- CFLAGS += -DHAVE_LIBCAP_SUPPORT
- EXTLIBS += -lcap
- $(call detected,CONFIG_LIBCAP)
- else
- $(warning No libcap found, disables capability support, please install libcap-devel/libcap-dev)
- NO_LIBCAP := 1
- endif
-endif
-
ifndef NO_BACKTRACE
ifeq ($(feature-backtrace), 1)
CFLAGS += -DHAVE_BACKTRACE_SUPPORT
@@ -1206,6 +1219,8 @@ ifneq ($(NO_LIBTRACEEVENT),1)
LIBTRACEFS_VERSION_3 := $(word 3, $(subst ., ,$(LIBTRACEFS_VERSION)))
LIBTRACEFS_VERSION_CPP := $(shell expr $(LIBTRACEFS_VERSION_1) \* 255 \* 255 + $(LIBTRACEFS_VERSION_2) \* 255 + $(LIBTRACEFS_VERSION_3))
CFLAGS += -DLIBTRACEFS_VERSION=$(LIBTRACEFS_VERSION_CPP)
+ else
+ $(warning libtracefs is missing. Please install libtracefs-dev/libtracefs-devel)
endif
endif
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 175e4c7898f0..9dd2e8d3f3c9 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -163,6 +163,8 @@ ifneq ($(OUTPUT),)
# for flex/bison parsers.
VPATH += $(OUTPUT)
export VPATH
+# create symlink to the original source
+SOURCE := $(shell ln -sf $(srctree)/tools/perf $(OUTPUT)/source)
endif
ifeq ($(V),1)
@@ -193,7 +195,32 @@ HOSTLD ?= ld
HOSTAR ?= ar
CLANG ?= clang
-PKG_CONFIG = $(CROSS_COMPILE)pkg-config
+# Some distros provide the command $(CROSS_COMPILE)pkg-config for
+# searching packges installed with Multiarch. Use it for cross
+# compilation if it is existed.
+ifneq (, $(shell which $(CROSS_COMPILE)pkg-config))
+ PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config
+else
+ PKG_CONFIG ?= pkg-config
+
+ # PKG_CONFIG_PATH or PKG_CONFIG_LIBDIR, alongside PKG_CONFIG_SYSROOT_DIR
+ # for modified system root, is required for the cross compilation.
+ # If these PKG_CONFIG environment variables are not set, Multiarch library
+ # paths are used instead.
+ ifdef CROSS_COMPILE
+ ifeq ($(PKG_CONFIG_LIBDIR)$(PKG_CONFIG_PATH)$(PKG_CONFIG_SYSROOT_DIR),)
+ CROSS_ARCH = $(shell $(CC) -dumpmachine)
+ PKG_CONFIG_LIBDIR := /usr/local/$(CROSS_ARCH)/lib/pkgconfig/
+ PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/local/lib/$(CROSS_ARCH)/pkgconfig/
+ PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/lib/$(CROSS_ARCH)/pkgconfig/
+ PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/local/share/pkgconfig/
+ PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/share/pkgconfig/
+ export PKG_CONFIG_LIBDIR
+ $(warning Missing PKG_CONFIG_LIBDIR, PKG_CONFIG_PATH and PKG_CONFIG_SYSROOT_DIR for cross compilation,)
+ $(warning set PKG_CONFIG_LIBDIR for using Multiarch libs.)
+ endif
+ endif
+endif
RM = rm -f
LN = ln -f
@@ -1115,6 +1142,8 @@ install-tests: all install-gtk
$(INSTALL) tests/shell/common/*.pl '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/common'; \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/base_probe'; \
$(INSTALL) tests/shell/base_probe/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/base_probe'; \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/base_report'; \
+ $(INSTALL) tests/shell/base_probe/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/base_report'; \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/coresight' ; \
$(INSTALL) tests/shell/coresight/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/coresight'
$(Q)$(MAKE) -C tests/shell/coresight install-tests
@@ -1252,6 +1281,8 @@ clean:: $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBSYMBOL)-clean $(
$(OUTPUT)util/intel-pt-decoder/inat-tables.c \
$(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \
$(OUTPUT)pmu-events/pmu-events.c \
+ $(OUTPUT)pmu-events/test-empty-pmu-events.c \
+ $(OUTPUT)pmu-events/empty-pmu-events.log \
$(OUTPUT)pmu-events/metric_test.log \
$(OUTPUT)$(fadvise_advice_array) \
$(OUTPUT)$(fsconfig_arrays) \
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index da6231367993..ea891d12f8f4 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -643,7 +643,8 @@ static bool cs_etm_is_ete(struct perf_pmu *cs_etm_pmu, struct perf_cpu cpu)
static __u64 cs_etm_get_legacy_trace_id(struct perf_cpu cpu)
{
- return CORESIGHT_LEGACY_CPU_TRACE_ID(cpu.cpu);
+ /* Wrap at 48 so that invalid trace IDs aren't saved into files. */
+ return CORESIGHT_LEGACY_CPU_TRACE_ID(cpu.cpu % 48);
}
static void cs_etm_save_etmv4_header(__u64 data[], struct auxtrace_record *itr, struct perf_cpu cpu)
@@ -654,8 +655,7 @@ static void cs_etm_save_etmv4_header(__u64 data[], struct auxtrace_record *itr,
/* Get trace configuration register */
data[CS_ETMV4_TRCCONFIGR] = cs_etmv4_get_config(itr);
/* traceID set to legacy version, in case new perf running on older system */
- data[CS_ETMV4_TRCTRACEIDR] = cs_etm_get_legacy_trace_id(cpu) |
- CORESIGHT_TRACE_ID_UNUSED_FLAG;
+ data[CS_ETMV4_TRCTRACEIDR] = cs_etm_get_legacy_trace_id(cpu);
/* Get read-only information from sysFS */
cs_etm_get_ro(cs_etm_pmu, cpu, metadata_etmv4_ro[CS_ETMV4_TRCIDR0],
@@ -687,7 +687,7 @@ static void cs_etm_save_ete_header(__u64 data[], struct auxtrace_record *itr, st
/* Get trace configuration register */
data[CS_ETE_TRCCONFIGR] = cs_etmv4_get_config(itr);
/* traceID set to legacy version, in case new perf running on older system */
- data[CS_ETE_TRCTRACEIDR] = cs_etm_get_legacy_trace_id(cpu) | CORESIGHT_TRACE_ID_UNUSED_FLAG;
+ data[CS_ETE_TRCTRACEIDR] = cs_etm_get_legacy_trace_id(cpu);
/* Get read-only information from sysFS */
cs_etm_get_ro(cs_etm_pmu, cpu, metadata_ete_ro[CS_ETE_TRCIDR0], &data[CS_ETE_TRCIDR0]);
@@ -743,8 +743,7 @@ static void cs_etm_get_metadata(struct perf_cpu cpu, u32 *offset,
/* Get configuration register */
info->priv[*offset + CS_ETM_ETMCR] = cs_etm_get_config(itr);
/* traceID set to legacy value in case new perf running on old system */
- info->priv[*offset + CS_ETM_ETMTRACEIDR] = cs_etm_get_legacy_trace_id(cpu) |
- CORESIGHT_TRACE_ID_UNUSED_FLAG;
+ info->priv[*offset + CS_ETM_ETMTRACEIDR] = cs_etm_get_legacy_trace_id(cpu);
/* Get read-only information from sysFS */
cs_etm_get_ro(cs_etm_pmu, cpu, metadata_etmv3_ro[CS_ETM_ETMCCER],
&info->priv[*offset + CS_ETM_ETMCCER]);
@@ -888,7 +887,6 @@ struct auxtrace_record *cs_etm_record_init(int *err)
}
ptr->cs_etm_pmu = cs_etm_pmu;
- ptr->itr.pmu = cs_etm_pmu;
ptr->itr.parse_snapshot_options = cs_etm_parse_snapshot_options;
ptr->itr.recording_options = cs_etm_recording_options;
ptr->itr.info_priv_size = cs_etm_info_priv_size;
diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c
index 1c9541d01722..57dc94a6e38c 100644
--- a/tools/perf/arch/arm/util/pmu.c
+++ b/tools/perf/arch/arm/util/pmu.c
@@ -23,16 +23,19 @@ void perf_pmu__arch_init(struct perf_pmu *pmu)
#ifdef HAVE_AUXTRACE_SUPPORT
if (!strcmp(pmu->name, CORESIGHT_ETM_PMU_NAME)) {
/* add ETM default config here */
+ pmu->auxtrace = true;
pmu->selectable = true;
pmu->perf_event_attr_init_default = cs_etm_get_default_config;
#if defined(__aarch64__)
} else if (strstarts(pmu->name, ARM_SPE_PMU_NAME)) {
+ pmu->auxtrace = true;
pmu->selectable = true;
pmu->is_uncore = false;
pmu->perf_event_attr_init_default = arm_spe_pmu_default_config;
if (strstarts(pmu->name, "arm_spe_"))
pmu->mem_events = perf_mem_events_arm;
} else if (strstarts(pmu->name, HISI_PTT_PMU_NAME)) {
+ pmu->auxtrace = true;
pmu->selectable = true;
#endif
}
diff --git a/tools/perf/arch/arm64/annotate/instructions.c b/tools/perf/arch/arm64/annotate/instructions.c
index 4af0c3a0f86e..f86d9f4798bd 100644
--- a/tools/perf/arch/arm64/annotate/instructions.c
+++ b/tools/perf/arch/arm64/annotate/instructions.c
@@ -11,7 +11,8 @@ struct arm64_annotate {
static int arm64_mov__parse(struct arch *arch __maybe_unused,
struct ins_operands *ops,
- struct map_symbol *ms __maybe_unused)
+ struct map_symbol *ms __maybe_unused,
+ struct disasm_line *dl __maybe_unused)
{
char *s = strchr(ops->raw, ','), *target, *endptr;
diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
index 0b52e67edb3b..2be99fdf997d 100644
--- a/tools/perf/arch/arm64/util/arm-spe.c
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -8,6 +8,7 @@
#include <linux/types.h>
#include <linux/bitops.h>
#include <linux/log2.h>
+#include <linux/string.h>
#include <linux/zalloc.h>
#include <time.h>
@@ -132,32 +133,66 @@ static __u64 arm_spe_pmu__sample_period(const struct perf_pmu *arm_spe_pmu)
return sample_period;
}
+static void arm_spe_setup_evsel(struct evsel *evsel, struct perf_cpu_map *cpus)
+{
+ u64 bit;
+
+ evsel->core.attr.freq = 0;
+ evsel->core.attr.sample_period = arm_spe_pmu__sample_period(evsel->pmu);
+ evsel->needs_auxtrace_mmap = true;
+
+ /*
+ * To obtain the auxtrace buffer file descriptor, the auxtrace event
+ * must come first.
+ */
+ evlist__to_front(evsel->evlist, evsel);
+
+ /*
+ * In the case of per-cpu mmaps, sample CPU for AUX event;
+ * also enable the timestamp tracing for samples correlation.
+ */
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
+ evsel__set_sample_bit(evsel, CPU);
+ evsel__set_config_if_unset(evsel->pmu, evsel, "ts_enable", 1);
+ }
+
+ /*
+ * Set this only so that perf report knows that SPE generates memory info. It has no effect
+ * on the opening of the event or the SPE data produced.
+ */
+ evsel__set_sample_bit(evsel, DATA_SRC);
+
+ /*
+ * The PHYS_ADDR flag does not affect the driver behaviour, it is used to
+ * inform that the resulting output's SPE samples contain physical addresses
+ * where applicable.
+ */
+ bit = perf_pmu__format_bits(evsel->pmu, "pa_enable");
+ if (evsel->core.attr.config & bit)
+ evsel__set_sample_bit(evsel, PHYS_ADDR);
+}
+
static int arm_spe_recording_options(struct auxtrace_record *itr,
struct evlist *evlist,
struct record_opts *opts)
{
struct arm_spe_recording *sper =
container_of(itr, struct arm_spe_recording, itr);
- struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu;
- struct evsel *evsel, *arm_spe_evsel = NULL;
+ struct evsel *evsel, *tmp;
struct perf_cpu_map *cpus = evlist->core.user_requested_cpus;
bool privileged = perf_event_paranoid_check(-1);
struct evsel *tracking_evsel;
int err;
- u64 bit;
sper->evlist = evlist;
evlist__for_each_entry(evlist, evsel) {
- if (evsel->core.attr.type == arm_spe_pmu->type) {
- if (arm_spe_evsel) {
- pr_err("There may be only one " ARM_SPE_PMU_NAME "x event\n");
+ if (evsel__is_aux_event(evsel)) {
+ if (!strstarts(evsel->pmu_name, ARM_SPE_PMU_NAME)) {
+ pr_err("Found unexpected auxtrace event: %s\n",
+ evsel->pmu_name);
return -EINVAL;
}
- evsel->core.attr.freq = 0;
- evsel->core.attr.sample_period = arm_spe_pmu__sample_period(arm_spe_pmu);
- evsel->needs_auxtrace_mmap = true;
- arm_spe_evsel = evsel;
opts->full_auxtrace = true;
}
}
@@ -222,37 +257,11 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
pr_debug2("%sx snapshot size: %zu\n", ARM_SPE_PMU_NAME,
opts->auxtrace_snapshot_size);
- /*
- * To obtain the auxtrace buffer file descriptor, the auxtrace event
- * must come first.
- */
- evlist__to_front(evlist, arm_spe_evsel);
-
- /*
- * In the case of per-cpu mmaps, sample CPU for AUX event;
- * also enable the timestamp tracing for samples correlation.
- */
- if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
- evsel__set_sample_bit(arm_spe_evsel, CPU);
- evsel__set_config_if_unset(arm_spe_pmu, arm_spe_evsel,
- "ts_enable", 1);
+ evlist__for_each_entry_safe(evlist, tmp, evsel) {
+ if (evsel__is_aux_event(evsel))
+ arm_spe_setup_evsel(evsel, cpus);
}
- /*
- * Set this only so that perf report knows that SPE generates memory info. It has no effect
- * on the opening of the event or the SPE data produced.
- */
- evsel__set_sample_bit(arm_spe_evsel, DATA_SRC);
-
- /*
- * The PHYS_ADDR flag does not affect the driver behaviour, it is used to
- * inform that the resulting output's SPE samples contain physical addresses
- * where applicable.
- */
- bit = perf_pmu__format_bits(arm_spe_pmu, "pa_enable");
- if (arm_spe_evsel->core.attr.config & bit)
- evsel__set_sample_bit(arm_spe_evsel, PHYS_ADDR);
-
/* Add dummy event to keep tracking */
err = parse_event(evlist, "dummy:u");
if (err)
@@ -301,12 +310,16 @@ static int arm_spe_snapshot_start(struct auxtrace_record *itr)
struct arm_spe_recording *ptr =
container_of(itr, struct arm_spe_recording, itr);
struct evsel *evsel;
+ int ret = -EINVAL;
evlist__for_each_entry(ptr->evlist, evsel) {
- if (evsel->core.attr.type == ptr->arm_spe_pmu->type)
- return evsel__disable(evsel);
+ if (evsel__is_aux_event(evsel)) {
+ ret = evsel__disable(evsel);
+ if (ret < 0)
+ return ret;
+ }
}
- return -EINVAL;
+ return ret;
}
static int arm_spe_snapshot_finish(struct auxtrace_record *itr)
@@ -314,12 +327,16 @@ static int arm_spe_snapshot_finish(struct auxtrace_record *itr)
struct arm_spe_recording *ptr =
container_of(itr, struct arm_spe_recording, itr);
struct evsel *evsel;
+ int ret = -EINVAL;
evlist__for_each_entry(ptr->evlist, evsel) {
- if (evsel->core.attr.type == ptr->arm_spe_pmu->type)
- return evsel__enable(evsel);
+ if (evsel__is_aux_event(evsel)) {
+ ret = evsel__enable(evsel);
+ if (ret < 0)
+ return ret;
+ }
}
- return -EINVAL;
+ return ret;
}
static int arm_spe_alloc_wrapped_array(struct arm_spe_recording *ptr, int idx)
@@ -497,7 +514,6 @@ struct auxtrace_record *arm_spe_recording_init(int *err,
}
sper->arm_spe_pmu = arm_spe_pmu;
- sper->itr.pmu = arm_spe_pmu;
sper->itr.snapshot_start = arm_spe_snapshot_start;
sper->itr.snapshot_finish = arm_spe_snapshot_finish;
sper->itr.find_snapshot = arm_spe_find_snapshot;
diff --git a/tools/perf/arch/arm64/util/hisi-ptt.c b/tools/perf/arch/arm64/util/hisi-ptt.c
index ba97c8a562a0..eac9739c87e6 100644
--- a/tools/perf/arch/arm64/util/hisi-ptt.c
+++ b/tools/perf/arch/arm64/util/hisi-ptt.c
@@ -174,7 +174,6 @@ struct auxtrace_record *hisi_ptt_recording_init(int *err,
}
pttr->hisi_ptt_pmu = hisi_ptt_pmu;
- pttr->itr.pmu = hisi_ptt_pmu;
pttr->itr.recording_options = hisi_ptt_recording_options;
pttr->itr.info_priv_size = hisi_ptt_info_priv_size;
pttr->itr.info_fill = hisi_ptt_info_fill;
diff --git a/tools/perf/arch/loongarch/annotate/instructions.c b/tools/perf/arch/loongarch/annotate/instructions.c
index 21cc7e4149f7..ab43b1ab51e3 100644
--- a/tools/perf/arch/loongarch/annotate/instructions.c
+++ b/tools/perf/arch/loongarch/annotate/instructions.c
@@ -5,7 +5,8 @@
* Copyright (C) 2020-2023 Loongson Technology Corporation Limited
*/
-static int loongarch_call__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms)
+static int loongarch_call__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms,
+ struct disasm_line *dl __maybe_unused)
{
char *c, *endptr, *tok, *name;
struct map *map = ms->map;
@@ -51,7 +52,8 @@ static struct ins_ops loongarch_call_ops = {
.scnprintf = call__scnprintf,
};
-static int loongarch_jump__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms)
+static int loongarch_jump__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms,
+ struct disasm_line *dl __maybe_unused)
{
struct map *map = ms->map;
struct symbol *sym = ms->sym;
diff --git a/tools/perf/arch/powerpc/annotate/instructions.c b/tools/perf/arch/powerpc/annotate/instructions.c
index a3f423c27cae..ede9eeade0ab 100644
--- a/tools/perf/arch/powerpc/annotate/instructions.c
+++ b/tools/perf/arch/powerpc/annotate/instructions.c
@@ -49,12 +49,266 @@ static struct ins_ops *powerpc__associate_instruction_ops(struct arch *arch, con
return ops;
}
+#define PPC_OP(op) (((op) >> 26) & 0x3F)
+#define PPC_21_30(R) (((R) >> 1) & 0x3ff)
+#define PPC_22_30(R) (((R) >> 1) & 0x1ff)
+
+struct insn_offset {
+ const char *name;
+ int value;
+};
+
+/*
+ * There are memory instructions with opcode 31 which are
+ * of X Form, Example:
+ * ldx RT,RA,RB
+ * ______________________________________
+ * | 31 | RT | RA | RB | 21 |/|
+ * --------------------------------------
+ * 0 6 11 16 21 30 31
+ *
+ * But all instructions with opcode 31 are not memory.
+ * Example: add RT,RA,RB
+ *
+ * Use bits 21 to 30 to check memory insns with 31 as opcode.
+ * In ins_array below, for ldx instruction:
+ * name => OP_31_XOP_LDX
+ * value => 21
+ */
+
+static struct insn_offset ins_array[] = {
+ { .name = "OP_31_XOP_LXSIWZX", .value = 12, },
+ { .name = "OP_31_XOP_LWARX", .value = 20, },
+ { .name = "OP_31_XOP_LDX", .value = 21, },
+ { .name = "OP_31_XOP_LWZX", .value = 23, },
+ { .name = "OP_31_XOP_LDUX", .value = 53, },
+ { .name = "OP_31_XOP_LWZUX", .value = 55, },
+ { .name = "OP_31_XOP_LXSIWAX", .value = 76, },
+ { .name = "OP_31_XOP_LDARX", .value = 84, },
+ { .name = "OP_31_XOP_LBZX", .value = 87, },
+ { .name = "OP_31_XOP_LVX", .value = 103, },
+ { .name = "OP_31_XOP_LBZUX", .value = 119, },
+ { .name = "OP_31_XOP_STXSIWX", .value = 140, },
+ { .name = "OP_31_XOP_STDX", .value = 149, },
+ { .name = "OP_31_XOP_STWX", .value = 151, },
+ { .name = "OP_31_XOP_STDUX", .value = 181, },
+ { .name = "OP_31_XOP_STWUX", .value = 183, },
+ { .name = "OP_31_XOP_STBX", .value = 215, },
+ { .name = "OP_31_XOP_STVX", .value = 231, },
+ { .name = "OP_31_XOP_STBUX", .value = 247, },
+ { .name = "OP_31_XOP_LHZX", .value = 279, },
+ { .name = "OP_31_XOP_LHZUX", .value = 311, },
+ { .name = "OP_31_XOP_LXVDSX", .value = 332, },
+ { .name = "OP_31_XOP_LWAX", .value = 341, },
+ { .name = "OP_31_XOP_LHAX", .value = 343, },
+ { .name = "OP_31_XOP_LWAUX", .value = 373, },
+ { .name = "OP_31_XOP_LHAUX", .value = 375, },
+ { .name = "OP_31_XOP_STHX", .value = 407, },
+ { .name = "OP_31_XOP_STHUX", .value = 439, },
+ { .name = "OP_31_XOP_LXSSPX", .value = 524, },
+ { .name = "OP_31_XOP_LDBRX", .value = 532, },
+ { .name = "OP_31_XOP_LSWX", .value = 533, },
+ { .name = "OP_31_XOP_LWBRX", .value = 534, },
+ { .name = "OP_31_XOP_LFSUX", .value = 567, },
+ { .name = "OP_31_XOP_LXSDX", .value = 588, },
+ { .name = "OP_31_XOP_LSWI", .value = 597, },
+ { .name = "OP_31_XOP_LFDX", .value = 599, },
+ { .name = "OP_31_XOP_LFDUX", .value = 631, },
+ { .name = "OP_31_XOP_STXSSPX", .value = 652, },
+ { .name = "OP_31_XOP_STDBRX", .value = 660, },
+ { .name = "OP_31_XOP_STXWX", .value = 661, },
+ { .name = "OP_31_XOP_STWBRX", .value = 662, },
+ { .name = "OP_31_XOP_STFSX", .value = 663, },
+ { .name = "OP_31_XOP_STFSUX", .value = 695, },
+ { .name = "OP_31_XOP_STXSDX", .value = 716, },
+ { .name = "OP_31_XOP_STSWI", .value = 725, },
+ { .name = "OP_31_XOP_STFDX", .value = 727, },
+ { .name = "OP_31_XOP_STFDUX", .value = 759, },
+ { .name = "OP_31_XOP_LXVW4X", .value = 780, },
+ { .name = "OP_31_XOP_LHBRX", .value = 790, },
+ { .name = "OP_31_XOP_LXVD2X", .value = 844, },
+ { .name = "OP_31_XOP_LFIWAX", .value = 855, },
+ { .name = "OP_31_XOP_LFIWZX", .value = 887, },
+ { .name = "OP_31_XOP_STXVW4X", .value = 908, },
+ { .name = "OP_31_XOP_STHBRX", .value = 918, },
+ { .name = "OP_31_XOP_STXVD2X", .value = 972, },
+ { .name = "OP_31_XOP_STFIWX", .value = 983, },
+};
+
+/*
+ * Arithmetic instructions which are having opcode as 31.
+ * These instructions are tracked to save the register state
+ * changes. Example:
+ *
+ * lwz r10,264(r3)
+ * add r31, r3, r3
+ * lwz r9, 0(r31)
+ *
+ * Here instruction tracking needs to identify the "add"
+ * instruction and save data type of r3 to r31. If a sample
+ * is hit at next "lwz r9, 0(r31)", by this instruction tracking,
+ * data type of r31 can be resolved.
+ */
+static struct insn_offset arithmetic_ins_op_31[] = {
+ { .name = "SUB_CARRY_XO_FORM", .value = 8, },
+ { .name = "MUL_HDW_XO_FORM1", .value = 9, },
+ { .name = "ADD_CARRY_XO_FORM", .value = 10, },
+ { .name = "MUL_HW_XO_FORM1", .value = 11, },
+ { .name = "SUB_XO_FORM", .value = 40, },
+ { .name = "MUL_HDW_XO_FORM", .value = 73, },
+ { .name = "MUL_HW_XO_FORM", .value = 75, },
+ { .name = "SUB_EXT_XO_FORM", .value = 136, },
+ { .name = "ADD_EXT_XO_FORM", .value = 138, },
+ { .name = "SUB_ZERO_EXT_XO_FORM", .value = 200, },
+ { .name = "ADD_ZERO_EXT_XO_FORM", .value = 202, },
+ { .name = "SUB_EXT_XO_FORM2", .value = 232, },
+ { .name = "MUL_DW_XO_FORM", .value = 233, },
+ { .name = "ADD_EXT_XO_FORM2", .value = 234, },
+ { .name = "MUL_W_XO_FORM", .value = 235, },
+ { .name = "ADD_XO_FORM", .value = 266, },
+ { .name = "DIV_DW_XO_FORM1", .value = 457, },
+ { .name = "DIV_W_XO_FORM1", .value = 459, },
+ { .name = "DIV_DW_XO_FORM", .value = 489, },
+ { .name = "DIV_W_XO_FORM", .value = 491, },
+};
+
+static struct insn_offset arithmetic_two_ops[] = {
+ { .name = "mulli", .value = 7, },
+ { .name = "subfic", .value = 8, },
+ { .name = "addic", .value = 12, },
+ { .name = "addic.", .value = 13, },
+ { .name = "addi", .value = 14, },
+ { .name = "addis", .value = 15, },
+};
+
+static int cmp_offset(const void *a, const void *b)
+{
+ const struct insn_offset *val1 = a;
+ const struct insn_offset *val2 = b;
+
+ return (val1->value - val2->value);
+}
+
+static struct ins_ops *check_ppc_insn(struct disasm_line *dl)
+{
+ int raw_insn = dl->raw.raw_insn;
+ int opcode = PPC_OP(raw_insn);
+ int mem_insn_31 = PPC_21_30(raw_insn);
+ struct insn_offset *ret;
+ struct insn_offset mem_insns_31_opcode = {
+ "OP_31_INSN",
+ mem_insn_31
+ };
+ char name_insn[32];
+
+ /*
+ * Instructions with opcode 32 to 63 are memory
+ * instructions in powerpc
+ */
+ if ((opcode & 0x20)) {
+ /*
+ * Set name in case of raw instruction to
+ * opcode to be used in insn-stat
+ */
+ if (!strlen(dl->ins.name)) {
+ sprintf(name_insn, "%d", opcode);
+ dl->ins.name = strdup(name_insn);
+ }
+ return &load_store_ops;
+ } else if (opcode == 31) {
+ /* Check for memory instructions with opcode 31 */
+ ret = bsearch(&mem_insns_31_opcode, ins_array, ARRAY_SIZE(ins_array), sizeof(ins_array[0]), cmp_offset);
+ if (ret) {
+ if (!strlen(dl->ins.name))
+ dl->ins.name = strdup(ret->name);
+ return &load_store_ops;
+ } else {
+ mem_insns_31_opcode.value = PPC_22_30(raw_insn);
+ ret = bsearch(&mem_insns_31_opcode, arithmetic_ins_op_31, ARRAY_SIZE(arithmetic_ins_op_31),
+ sizeof(arithmetic_ins_op_31[0]), cmp_offset);
+ if (ret != NULL)
+ return &arithmetic_ops;
+ /* Bits 21 to 30 has value 444 for "mr" insn ie, OR X form */
+ if (PPC_21_30(raw_insn) == 444)
+ return &arithmetic_ops;
+ }
+ } else {
+ mem_insns_31_opcode.value = opcode;
+ ret = bsearch(&mem_insns_31_opcode, arithmetic_two_ops, ARRAY_SIZE(arithmetic_two_ops),
+ sizeof(arithmetic_two_ops[0]), cmp_offset);
+ if (ret != NULL)
+ return &arithmetic_ops;
+ }
+
+ return NULL;
+}
+
+/*
+ * Instruction tracking function to track register state moves.
+ * Example sequence:
+ * ld r10,264(r3)
+ * mr r31,r3
+ * <<after some sequence>
+ * ld r9,312(r31)
+ *
+ * Previous instruction sequence shows that register state of r3
+ * is moved to r31. update_insn_state_powerpc tracks these state
+ * changes
+ */
+#ifdef HAVE_DWARF_SUPPORT
+static void update_insn_state_powerpc(struct type_state *state,
+ struct data_loc_info *dloc, Dwarf_Die * cu_die __maybe_unused,
+ struct disasm_line *dl)
+{
+ struct annotated_insn_loc loc;
+ struct annotated_op_loc *src = &loc.ops[INSN_OP_SOURCE];
+ struct annotated_op_loc *dst = &loc.ops[INSN_OP_TARGET];
+ struct type_state_reg *tsr;
+ u32 insn_offset = dl->al.offset;
+
+ if (annotate_get_insn_location(dloc->arch, dl, &loc) < 0)
+ return;
+
+ /*
+ * Value 444 for bits 21:30 is for "mr"
+ * instruction. "mr" is extended OR. So set the
+ * source and destination reg correctly
+ */
+ if (PPC_21_30(dl->raw.raw_insn) == 444) {
+ int src_reg = src->reg1;
+
+ src->reg1 = dst->reg1;
+ dst->reg1 = src_reg;
+ }
+
+ if (!has_reg_type(state, dst->reg1))
+ return;
+
+ tsr = &state->regs[dst->reg1];
+
+ if (!has_reg_type(state, src->reg1) ||
+ !state->regs[src->reg1].ok) {
+ tsr->ok = false;
+ return;
+ }
+
+ tsr->type = state->regs[src->reg1].type;
+ tsr->kind = state->regs[src->reg1].kind;
+ tsr->ok = true;
+
+ pr_debug_dtp("mov [%x] reg%d -> reg%d",
+ insn_offset, src->reg1, dst->reg1);
+ pr_debug_type_name(&tsr->type, tsr->kind);
+}
+#endif /* HAVE_DWARF_SUPPORT */
+
static int powerpc__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
{
if (!arch->initialized) {
arch->initialized = true;
arch->associate_instruction_ops = powerpc__associate_instruction_ops;
arch->objdump.comment_char = '#';
+ annotate_opts.show_asm_raw = true;
}
return 0;
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 3656f1ca7a21..ebae8415dfbb 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -230,8 +230,10 @@
178 nospu rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend
179 32 pread64 sys_ppc_pread64 compat_sys_ppc_pread64
179 64 pread64 sys_pread64
+179 spu pread64 sys_pread64
180 32 pwrite64 sys_ppc_pwrite64 compat_sys_ppc_pwrite64
180 64 pwrite64 sys_pwrite64
+180 spu pwrite64 sys_pwrite64
181 common chown sys_chown
182 common getcwd sys_getcwd
183 common capget sys_capget
@@ -246,6 +248,7 @@
190 common ugetrlimit sys_getrlimit compat_sys_getrlimit
191 32 readahead sys_ppc_readahead compat_sys_ppc_readahead
191 64 readahead sys_readahead
+191 spu readahead sys_readahead
192 32 mmap2 sys_mmap2 compat_sys_mmap2
193 32 truncate64 sys_ppc_truncate64 compat_sys_ppc_truncate64
194 32 ftruncate64 sys_ppc_ftruncate64 compat_sys_ppc_ftruncate64
@@ -293,6 +296,7 @@
232 nospu set_tid_address sys_set_tid_address
233 32 fadvise64 sys_ppc32_fadvise64 compat_sys_ppc32_fadvise64
233 64 fadvise64 sys_fadvise64
+233 spu fadvise64 sys_fadvise64
234 nospu exit_group sys_exit_group
235 nospu lookup_dcookie sys_ni_syscall
236 common epoll_create sys_epoll_create
@@ -502,7 +506,7 @@
412 32 utimensat_time64 sys_utimensat sys_utimensat
413 32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64
414 32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64
-416 32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents
+416 32 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64
417 32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64
418 32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend
419 32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive
diff --git a/tools/perf/arch/powerpc/util/dwarf-regs.c b/tools/perf/arch/powerpc/util/dwarf-regs.c
index 0c4f4caf53ac..104c7ae5c433 100644
--- a/tools/perf/arch/powerpc/util/dwarf-regs.c
+++ b/tools/perf/arch/powerpc/util/dwarf-regs.c
@@ -98,3 +98,56 @@ int regs_query_register_offset(const char *name)
return roff->ptregs_offset;
return -EINVAL;
}
+
+#define PPC_OP(op) (((op) >> 26) & 0x3F)
+#define PPC_RA(a) (((a) >> 16) & 0x1f)
+#define PPC_RT(t) (((t) >> 21) & 0x1f)
+#define PPC_RB(b) (((b) >> 11) & 0x1f)
+#define PPC_D(D) ((D) & 0xfffe)
+#define PPC_DS(DS) ((DS) & 0xfffc)
+#define OP_LD 58
+#define OP_STD 62
+
+static int get_source_reg(u32 raw_insn)
+{
+ return PPC_RA(raw_insn);
+}
+
+static int get_target_reg(u32 raw_insn)
+{
+ return PPC_RT(raw_insn);
+}
+
+static int get_offset_opcode(u32 raw_insn)
+{
+ int opcode = PPC_OP(raw_insn);
+
+ /* DS- form */
+ if ((opcode == OP_LD) || (opcode == OP_STD))
+ return PPC_DS(raw_insn);
+ else
+ return PPC_D(raw_insn);
+}
+
+/*
+ * Fills the required fields for op_loc depending on if it
+ * is a source or target.
+ * D form: ins RT,D(RA) -> src_reg1 = RA, offset = D, dst_reg1 = RT
+ * DS form: ins RT,DS(RA) -> src_reg1 = RA, offset = DS, dst_reg1 = RT
+ * X form: ins RT,RA,RB -> src_reg1 = RA, src_reg2 = RB, dst_reg1 = RT
+ */
+void get_powerpc_regs(u32 raw_insn, int is_source,
+ struct annotated_op_loc *op_loc)
+{
+ if (is_source)
+ op_loc->reg1 = get_source_reg(raw_insn);
+ else
+ op_loc->reg1 = get_target_reg(raw_insn);
+
+ if (op_loc->multi_regs)
+ op_loc->reg2 = PPC_RB(raw_insn);
+
+ /* TODO: Implement offset handling for X Form */
+ if ((op_loc->mem_ref) && (PPC_OP(raw_insn) != 31))
+ op_loc->offset = get_offset_opcode(raw_insn);
+}
diff --git a/tools/perf/arch/s390/annotate/instructions.c b/tools/perf/arch/s390/annotate/instructions.c
index da5aa3e1f04c..eeac25cca699 100644
--- a/tools/perf/arch/s390/annotate/instructions.c
+++ b/tools/perf/arch/s390/annotate/instructions.c
@@ -2,7 +2,7 @@
#include <linux/compiler.h>
static int s390_call__parse(struct arch *arch, struct ins_operands *ops,
- struct map_symbol *ms)
+ struct map_symbol *ms, struct disasm_line *dl __maybe_unused)
{
char *endptr, *tok, *name;
struct map *map = ms->map;
@@ -52,7 +52,8 @@ static struct ins_ops s390_call_ops = {
static int s390_mov__parse(struct arch *arch __maybe_unused,
struct ins_operands *ops,
- struct map_symbol *ms __maybe_unused)
+ struct map_symbol *ms __maybe_unused,
+ struct disasm_line *dl __maybe_unused)
{
char *s = strchr(ops->raw, ','), *target, *endptr;
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index bd0fee24ad10..01071182763e 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -418,7 +418,7 @@
412 32 utimensat_time64 - sys_utimensat
413 32 pselect6_time64 - compat_sys_pselect6_time64
414 32 ppoll_time64 - compat_sys_ppoll_time64
-416 32 io_pgetevents_time64 - sys_io_pgetevents
+416 32 io_pgetevents_time64 - compat_sys_io_pgetevents_time64
417 32 recvmmsg_time64 - compat_sys_recvmmsg_time64
418 32 mq_timedsend_time64 - sys_mq_timedsend
419 32 mq_timedreceive_time64 - sys_mq_timedreceive
diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile
index 8952e00f9b60..67b4969a6738 100644
--- a/tools/perf/arch/x86/Makefile
+++ b/tools/perf/arch/x86/Makefile
@@ -13,6 +13,7 @@ PERF_HAVE_JITDUMP := 1
generated := $(OUTPUT)arch/x86/include/generated
out := $(generated)/asm
header := $(out)/syscalls_64.c
+header_32 := $(out)/syscalls_32.c
sys := $(srctree)/tools/perf/arch/x86/entry/syscalls
systbl := $(sys)/syscalltbl.sh
@@ -22,7 +23,10 @@ $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
$(header): $(sys)/syscall_64.tbl $(systbl)
$(Q)$(SHELL) '$(systbl)' $(sys)/syscall_64.tbl 'x86_64' > $@
+$(header_32): $(sys)/syscall_32.tbl $(systbl)
+ $(Q)$(SHELL) '$(systbl)' $(sys)/syscall_32.tbl 'x86' > $@
+
clean::
$(call QUIET_CLEAN, x86) $(RM) -r $(header) $(generated)
-archheaders: $(header)
+archheaders: $(header) $(header_32)
diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c
index 5cdf457f5cbe..5caf5a17f03d 100644
--- a/tools/perf/arch/x86/annotate/instructions.c
+++ b/tools/perf/arch/x86/annotate/instructions.c
@@ -206,3 +206,392 @@ static int x86__annotate_init(struct arch *arch, char *cpuid)
arch->initialized = true;
return err;
}
+
+#ifdef HAVE_DWARF_SUPPORT
+static void update_insn_state_x86(struct type_state *state,
+ struct data_loc_info *dloc, Dwarf_Die *cu_die,
+ struct disasm_line *dl)
+{
+ struct annotated_insn_loc loc;
+ struct annotated_op_loc *src = &loc.ops[INSN_OP_SOURCE];
+ struct annotated_op_loc *dst = &loc.ops[INSN_OP_TARGET];
+ struct type_state_reg *tsr;
+ Dwarf_Die type_die;
+ u32 insn_offset = dl->al.offset;
+ int fbreg = dloc->fbreg;
+ int fboff = 0;
+
+ if (annotate_get_insn_location(dloc->arch, dl, &loc) < 0)
+ return;
+
+ if (ins__is_call(&dl->ins)) {
+ struct symbol *func = dl->ops.target.sym;
+
+ if (func == NULL)
+ return;
+
+ /* __fentry__ will preserve all registers */
+ if (!strcmp(func->name, "__fentry__"))
+ return;
+
+ pr_debug_dtp("call [%x] %s\n", insn_offset, func->name);
+
+ /* Otherwise invalidate caller-saved registers after call */
+ for (unsigned i = 0; i < ARRAY_SIZE(state->regs); i++) {
+ if (state->regs[i].caller_saved)
+ state->regs[i].ok = false;
+ }
+
+ /* Update register with the return type (if any) */
+ if (die_find_func_rettype(cu_die, func->name, &type_die)) {
+ tsr = &state->regs[state->ret_reg];
+ tsr->type = type_die;
+ tsr->kind = TSR_KIND_TYPE;
+ tsr->ok = true;
+
+ pr_debug_dtp("call [%x] return -> reg%d",
+ insn_offset, state->ret_reg);
+ pr_debug_type_name(&type_die, tsr->kind);
+ }
+ return;
+ }
+
+ if (!strncmp(dl->ins.name, "add", 3)) {
+ u64 imm_value = -1ULL;
+ int offset;
+ const char *var_name = NULL;
+ struct map_symbol *ms = dloc->ms;
+ u64 ip = ms->sym->start + dl->al.offset;
+
+ if (!has_reg_type(state, dst->reg1))
+ return;
+
+ tsr = &state->regs[dst->reg1];
+ tsr->copied_from = -1;
+
+ if (src->imm)
+ imm_value = src->offset;
+ else if (has_reg_type(state, src->reg1) &&
+ state->regs[src->reg1].kind == TSR_KIND_CONST)
+ imm_value = state->regs[src->reg1].imm_value;
+ else if (src->reg1 == DWARF_REG_PC) {
+ u64 var_addr = annotate_calc_pcrel(dloc->ms, ip,
+ src->offset, dl);
+
+ if (get_global_var_info(dloc, var_addr,
+ &var_name, &offset) &&
+ !strcmp(var_name, "this_cpu_off") &&
+ tsr->kind == TSR_KIND_CONST) {
+ tsr->kind = TSR_KIND_PERCPU_BASE;
+ tsr->ok = true;
+ imm_value = tsr->imm_value;
+ }
+ }
+ else
+ return;
+
+ if (tsr->kind != TSR_KIND_PERCPU_BASE)
+ return;
+
+ if (get_global_var_type(cu_die, dloc, ip, imm_value, &offset,
+ &type_die) && offset == 0) {
+ /*
+ * This is not a pointer type, but it should be treated
+ * as a pointer.
+ */
+ tsr->type = type_die;
+ tsr->kind = TSR_KIND_POINTER;
+ tsr->ok = true;
+
+ pr_debug_dtp("add [%x] percpu %#"PRIx64" -> reg%d",
+ insn_offset, imm_value, dst->reg1);
+ pr_debug_type_name(&tsr->type, tsr->kind);
+ }
+ return;
+ }
+
+ if (strncmp(dl->ins.name, "mov", 3))
+ return;
+
+ if (dloc->fb_cfa) {
+ u64 ip = dloc->ms->sym->start + dl->al.offset;
+ u64 pc = map__rip_2objdump(dloc->ms->map, ip);
+
+ if (die_get_cfa(dloc->di->dbg, pc, &fbreg, &fboff) < 0)
+ fbreg = -1;
+ }
+
+ /* Case 1. register to register or segment:offset to register transfers */
+ if (!src->mem_ref && !dst->mem_ref) {
+ if (!has_reg_type(state, dst->reg1))
+ return;
+
+ tsr = &state->regs[dst->reg1];
+ tsr->copied_from = -1;
+
+ if (dso__kernel(map__dso(dloc->ms->map)) &&
+ src->segment == INSN_SEG_X86_GS && src->imm) {
+ u64 ip = dloc->ms->sym->start + dl->al.offset;
+ u64 var_addr;
+ int offset;
+
+ /*
+ * In kernel, %gs points to a per-cpu region for the
+ * current CPU. Access with a constant offset should
+ * be treated as a global variable access.
+ */
+ var_addr = src->offset;
+
+ if (var_addr == 40) {
+ tsr->kind = TSR_KIND_CANARY;
+ tsr->ok = true;
+
+ pr_debug_dtp("mov [%x] stack canary -> reg%d\n",
+ insn_offset, dst->reg1);
+ return;
+ }
+
+ if (!get_global_var_type(cu_die, dloc, ip, var_addr,
+ &offset, &type_die) ||
+ !die_get_member_type(&type_die, offset, &type_die)) {
+ tsr->ok = false;
+ return;
+ }
+
+ tsr->type = type_die;
+ tsr->kind = TSR_KIND_TYPE;
+ tsr->ok = true;
+
+ pr_debug_dtp("mov [%x] this-cpu addr=%#"PRIx64" -> reg%d",
+ insn_offset, var_addr, dst->reg1);
+ pr_debug_type_name(&tsr->type, tsr->kind);
+ return;
+ }
+
+ if (src->imm) {
+ tsr->kind = TSR_KIND_CONST;
+ tsr->imm_value = src->offset;
+ tsr->ok = true;
+
+ pr_debug_dtp("mov [%x] imm=%#x -> reg%d\n",
+ insn_offset, tsr->imm_value, dst->reg1);
+ return;
+ }
+
+ if (!has_reg_type(state, src->reg1) ||
+ !state->regs[src->reg1].ok) {
+ tsr->ok = false;
+ return;
+ }
+
+ tsr->type = state->regs[src->reg1].type;
+ tsr->kind = state->regs[src->reg1].kind;
+ tsr->imm_value = state->regs[src->reg1].imm_value;
+ tsr->ok = true;
+
+ /* To copy back the variable type later (hopefully) */
+ if (tsr->kind == TSR_KIND_TYPE)
+ tsr->copied_from = src->reg1;
+
+ pr_debug_dtp("mov [%x] reg%d -> reg%d",
+ insn_offset, src->reg1, dst->reg1);
+ pr_debug_type_name(&tsr->type, tsr->kind);
+ }
+ /* Case 2. memory to register transers */
+ if (src->mem_ref && !dst->mem_ref) {
+ int sreg = src->reg1;
+
+ if (!has_reg_type(state, dst->reg1))
+ return;
+
+ tsr = &state->regs[dst->reg1];
+ tsr->copied_from = -1;
+
+retry:
+ /* Check stack variables with offset */
+ if (sreg == fbreg) {
+ struct type_state_stack *stack;
+ int offset = src->offset - fboff;
+
+ stack = find_stack_state(state, offset);
+ if (stack == NULL) {
+ tsr->ok = false;
+ return;
+ } else if (!stack->compound) {
+ tsr->type = stack->type;
+ tsr->kind = stack->kind;
+ tsr->ok = true;
+ } else if (die_get_member_type(&stack->type,
+ offset - stack->offset,
+ &type_die)) {
+ tsr->type = type_die;
+ tsr->kind = TSR_KIND_TYPE;
+ tsr->ok = true;
+ } else {
+ tsr->ok = false;
+ return;
+ }
+
+ pr_debug_dtp("mov [%x] -%#x(stack) -> reg%d",
+ insn_offset, -offset, dst->reg1);
+ pr_debug_type_name(&tsr->type, tsr->kind);
+ }
+ /* And then dereference the pointer if it has one */
+ else if (has_reg_type(state, sreg) && state->regs[sreg].ok &&
+ state->regs[sreg].kind == TSR_KIND_TYPE &&
+ die_deref_ptr_type(&state->regs[sreg].type,
+ src->offset, &type_die)) {
+ tsr->type = type_die;
+ tsr->kind = TSR_KIND_TYPE;
+ tsr->ok = true;
+
+ pr_debug_dtp("mov [%x] %#x(reg%d) -> reg%d",
+ insn_offset, src->offset, sreg, dst->reg1);
+ pr_debug_type_name(&tsr->type, tsr->kind);
+ }
+ /* Or check if it's a global variable */
+ else if (sreg == DWARF_REG_PC) {
+ struct map_symbol *ms = dloc->ms;
+ u64 ip = ms->sym->start + dl->al.offset;
+ u64 addr;
+ int offset;
+
+ addr = annotate_calc_pcrel(ms, ip, src->offset, dl);
+
+ if (!get_global_var_type(cu_die, dloc, ip, addr, &offset,
+ &type_die) ||
+ !die_get_member_type(&type_die, offset, &type_die)) {
+ tsr->ok = false;
+ return;
+ }
+
+ tsr->type = type_die;
+ tsr->kind = TSR_KIND_TYPE;
+ tsr->ok = true;
+
+ pr_debug_dtp("mov [%x] global addr=%"PRIx64" -> reg%d",
+ insn_offset, addr, dst->reg1);
+ pr_debug_type_name(&type_die, tsr->kind);
+ }
+ /* And check percpu access with base register */
+ else if (has_reg_type(state, sreg) &&
+ state->regs[sreg].kind == TSR_KIND_PERCPU_BASE) {
+ u64 ip = dloc->ms->sym->start + dl->al.offset;
+ u64 var_addr = src->offset;
+ int offset;
+
+ if (src->multi_regs) {
+ int reg2 = (sreg == src->reg1) ? src->reg2 : src->reg1;
+
+ if (has_reg_type(state, reg2) && state->regs[reg2].ok &&
+ state->regs[reg2].kind == TSR_KIND_CONST)
+ var_addr += state->regs[reg2].imm_value;
+ }
+
+ /*
+ * In kernel, %gs points to a per-cpu region for the
+ * current CPU. Access with a constant offset should
+ * be treated as a global variable access.
+ */
+ if (get_global_var_type(cu_die, dloc, ip, var_addr,
+ &offset, &type_die) &&
+ die_get_member_type(&type_die, offset, &type_die)) {
+ tsr->type = type_die;
+ tsr->kind = TSR_KIND_TYPE;
+ tsr->ok = true;
+
+ if (src->multi_regs) {
+ pr_debug_dtp("mov [%x] percpu %#x(reg%d,reg%d) -> reg%d",
+ insn_offset, src->offset, src->reg1,
+ src->reg2, dst->reg1);
+ } else {
+ pr_debug_dtp("mov [%x] percpu %#x(reg%d) -> reg%d",
+ insn_offset, src->offset, sreg, dst->reg1);
+ }
+ pr_debug_type_name(&tsr->type, tsr->kind);
+ } else {
+ tsr->ok = false;
+ }
+ }
+ /* And then dereference the calculated pointer if it has one */
+ else if (has_reg_type(state, sreg) && state->regs[sreg].ok &&
+ state->regs[sreg].kind == TSR_KIND_POINTER &&
+ die_get_member_type(&state->regs[sreg].type,
+ src->offset, &type_die)) {
+ tsr->type = type_die;
+ tsr->kind = TSR_KIND_TYPE;
+ tsr->ok = true;
+
+ pr_debug_dtp("mov [%x] pointer %#x(reg%d) -> reg%d",
+ insn_offset, src->offset, sreg, dst->reg1);
+ pr_debug_type_name(&tsr->type, tsr->kind);
+ }
+ /* Or try another register if any */
+ else if (src->multi_regs && sreg == src->reg1 &&
+ src->reg1 != src->reg2) {
+ sreg = src->reg2;
+ goto retry;
+ }
+ else {
+ int offset;
+ const char *var_name = NULL;
+
+ /* it might be per-cpu variable (in kernel) access */
+ if (src->offset < 0) {
+ if (get_global_var_info(dloc, (s64)src->offset,
+ &var_name, &offset) &&
+ !strcmp(var_name, "__per_cpu_offset")) {
+ tsr->kind = TSR_KIND_PERCPU_BASE;
+ tsr->ok = true;
+
+ pr_debug_dtp("mov [%x] percpu base reg%d\n",
+ insn_offset, dst->reg1);
+ return;
+ }
+ }
+
+ tsr->ok = false;
+ }
+ }
+ /* Case 3. register to memory transfers */
+ if (!src->mem_ref && dst->mem_ref) {
+ if (!has_reg_type(state, src->reg1) ||
+ !state->regs[src->reg1].ok)
+ return;
+
+ /* Check stack variables with offset */
+ if (dst->reg1 == fbreg) {
+ struct type_state_stack *stack;
+ int offset = dst->offset - fboff;
+
+ tsr = &state->regs[src->reg1];
+
+ stack = find_stack_state(state, offset);
+ if (stack) {
+ /*
+ * The source register is likely to hold a type
+ * of member if it's a compound type. Do not
+ * update the stack variable type since we can
+ * get the member type later by using the
+ * die_get_member_type().
+ */
+ if (!stack->compound)
+ set_stack_state(stack, offset, tsr->kind,
+ &tsr->type);
+ } else {
+ findnew_stack_state(state, offset, tsr->kind,
+ &tsr->type);
+ }
+
+ pr_debug_dtp("mov [%x] reg%d -> -%#x(stack)",
+ insn_offset, src->reg1, -offset);
+ pr_debug_type_name(&tsr->type, tsr->kind);
+ }
+ /*
+ * Ignore other transfers since it'd set a value in a struct
+ * and won't change the type.
+ */
+ }
+ /* Case 4. memory to memory transfers (not handled for now) */
+}
+#endif
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl
new file mode 100644
index 000000000000..534c74b14fab
--- /dev/null
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl
@@ -0,0 +1,470 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# 32-bit system call numbers and entry vectors
+#
+# The format is:
+# <number> <abi> <name> <entry point> [<compat entry point> [noreturn]]
+#
+# The __ia32_sys and __ia32_compat_sys stubs are created on-the-fly for
+# sys_*() system calls and compat_sys_*() compat system calls if
+# IA32_EMULATION is defined, and expect struct pt_regs *regs as their only
+# parameter.
+#
+# The abi is always "i386" for this file.
+#
+0 i386 restart_syscall sys_restart_syscall
+1 i386 exit sys_exit - noreturn
+2 i386 fork sys_fork
+3 i386 read sys_read
+4 i386 write sys_write
+5 i386 open sys_open compat_sys_open
+6 i386 close sys_close
+7 i386 waitpid sys_waitpid
+8 i386 creat sys_creat
+9 i386 link sys_link
+10 i386 unlink sys_unlink
+11 i386 execve sys_execve compat_sys_execve
+12 i386 chdir sys_chdir
+13 i386 time sys_time32
+14 i386 mknod sys_mknod
+15 i386 chmod sys_chmod
+16 i386 lchown sys_lchown16
+17 i386 break
+18 i386 oldstat sys_stat
+19 i386 lseek sys_lseek compat_sys_lseek
+20 i386 getpid sys_getpid
+21 i386 mount sys_mount
+22 i386 umount sys_oldumount
+23 i386 setuid sys_setuid16
+24 i386 getuid sys_getuid16
+25 i386 stime sys_stime32
+26 i386 ptrace sys_ptrace compat_sys_ptrace
+27 i386 alarm sys_alarm
+28 i386 oldfstat sys_fstat
+29 i386 pause sys_pause
+30 i386 utime sys_utime32
+31 i386 stty
+32 i386 gtty
+33 i386 access sys_access
+34 i386 nice sys_nice
+35 i386 ftime
+36 i386 sync sys_sync
+37 i386 kill sys_kill
+38 i386 rename sys_rename
+39 i386 mkdir sys_mkdir
+40 i386 rmdir sys_rmdir
+41 i386 dup sys_dup
+42 i386 pipe sys_pipe
+43 i386 times sys_times compat_sys_times
+44 i386 prof
+45 i386 brk sys_brk
+46 i386 setgid sys_setgid16
+47 i386 getgid sys_getgid16
+48 i386 signal sys_signal
+49 i386 geteuid sys_geteuid16
+50 i386 getegid sys_getegid16
+51 i386 acct sys_acct
+52 i386 umount2 sys_umount
+53 i386 lock
+54 i386 ioctl sys_ioctl compat_sys_ioctl
+55 i386 fcntl sys_fcntl compat_sys_fcntl64
+56 i386 mpx
+57 i386 setpgid sys_setpgid
+58 i386 ulimit
+59 i386 oldolduname sys_olduname
+60 i386 umask sys_umask
+61 i386 chroot sys_chroot
+62 i386 ustat sys_ustat compat_sys_ustat
+63 i386 dup2 sys_dup2
+64 i386 getppid sys_getppid
+65 i386 getpgrp sys_getpgrp
+66 i386 setsid sys_setsid
+67 i386 sigaction sys_sigaction compat_sys_sigaction
+68 i386 sgetmask sys_sgetmask
+69 i386 ssetmask sys_ssetmask
+70 i386 setreuid sys_setreuid16
+71 i386 setregid sys_setregid16
+72 i386 sigsuspend sys_sigsuspend
+73 i386 sigpending sys_sigpending compat_sys_sigpending
+74 i386 sethostname sys_sethostname
+75 i386 setrlimit sys_setrlimit compat_sys_setrlimit
+76 i386 getrlimit sys_old_getrlimit compat_sys_old_getrlimit
+77 i386 getrusage sys_getrusage compat_sys_getrusage
+78 i386 gettimeofday sys_gettimeofday compat_sys_gettimeofday
+79 i386 settimeofday sys_settimeofday compat_sys_settimeofday
+80 i386 getgroups sys_getgroups16
+81 i386 setgroups sys_setgroups16
+82 i386 select sys_old_select compat_sys_old_select
+83 i386 symlink sys_symlink
+84 i386 oldlstat sys_lstat
+85 i386 readlink sys_readlink
+86 i386 uselib sys_uselib
+87 i386 swapon sys_swapon
+88 i386 reboot sys_reboot
+89 i386 readdir sys_old_readdir compat_sys_old_readdir
+90 i386 mmap sys_old_mmap compat_sys_ia32_mmap
+91 i386 munmap sys_munmap
+92 i386 truncate sys_truncate compat_sys_truncate
+93 i386 ftruncate sys_ftruncate compat_sys_ftruncate
+94 i386 fchmod sys_fchmod
+95 i386 fchown sys_fchown16
+96 i386 getpriority sys_getpriority
+97 i386 setpriority sys_setpriority
+98 i386 profil
+99 i386 statfs sys_statfs compat_sys_statfs
+100 i386 fstatfs sys_fstatfs compat_sys_fstatfs
+101 i386 ioperm sys_ioperm
+102 i386 socketcall sys_socketcall compat_sys_socketcall
+103 i386 syslog sys_syslog
+104 i386 setitimer sys_setitimer compat_sys_setitimer
+105 i386 getitimer sys_getitimer compat_sys_getitimer
+106 i386 stat sys_newstat compat_sys_newstat
+107 i386 lstat sys_newlstat compat_sys_newlstat
+108 i386 fstat sys_newfstat compat_sys_newfstat
+109 i386 olduname sys_uname
+110 i386 iopl sys_iopl
+111 i386 vhangup sys_vhangup
+112 i386 idle
+113 i386 vm86old sys_vm86old sys_ni_syscall
+114 i386 wait4 sys_wait4 compat_sys_wait4
+115 i386 swapoff sys_swapoff
+116 i386 sysinfo sys_sysinfo compat_sys_sysinfo
+117 i386 ipc sys_ipc compat_sys_ipc
+118 i386 fsync sys_fsync
+119 i386 sigreturn sys_sigreturn compat_sys_sigreturn
+120 i386 clone sys_clone compat_sys_ia32_clone
+121 i386 setdomainname sys_setdomainname
+122 i386 uname sys_newuname
+123 i386 modify_ldt sys_modify_ldt
+124 i386 adjtimex sys_adjtimex_time32
+125 i386 mprotect sys_mprotect
+126 i386 sigprocmask sys_sigprocmask compat_sys_sigprocmask
+127 i386 create_module
+128 i386 init_module sys_init_module
+129 i386 delete_module sys_delete_module
+130 i386 get_kernel_syms
+131 i386 quotactl sys_quotactl
+132 i386 getpgid sys_getpgid
+133 i386 fchdir sys_fchdir
+134 i386 bdflush sys_ni_syscall
+135 i386 sysfs sys_sysfs
+136 i386 personality sys_personality
+137 i386 afs_syscall
+138 i386 setfsuid sys_setfsuid16
+139 i386 setfsgid sys_setfsgid16
+140 i386 _llseek sys_llseek
+141 i386 getdents sys_getdents compat_sys_getdents
+142 i386 _newselect sys_select compat_sys_select
+143 i386 flock sys_flock
+144 i386 msync sys_msync
+145 i386 readv sys_readv
+146 i386 writev sys_writev
+147 i386 getsid sys_getsid
+148 i386 fdatasync sys_fdatasync
+149 i386 _sysctl sys_ni_syscall
+150 i386 mlock sys_mlock
+151 i386 munlock sys_munlock
+152 i386 mlockall sys_mlockall
+153 i386 munlockall sys_munlockall
+154 i386 sched_setparam sys_sched_setparam
+155 i386 sched_getparam sys_sched_getparam
+156 i386 sched_setscheduler sys_sched_setscheduler
+157 i386 sched_getscheduler sys_sched_getscheduler
+158 i386 sched_yield sys_sched_yield
+159 i386 sched_get_priority_max sys_sched_get_priority_max
+160 i386 sched_get_priority_min sys_sched_get_priority_min
+161 i386 sched_rr_get_interval sys_sched_rr_get_interval_time32
+162 i386 nanosleep sys_nanosleep_time32
+163 i386 mremap sys_mremap
+164 i386 setresuid sys_setresuid16
+165 i386 getresuid sys_getresuid16
+166 i386 vm86 sys_vm86 sys_ni_syscall
+167 i386 query_module
+168 i386 poll sys_poll
+169 i386 nfsservctl
+170 i386 setresgid sys_setresgid16
+171 i386 getresgid sys_getresgid16
+172 i386 prctl sys_prctl
+173 i386 rt_sigreturn sys_rt_sigreturn compat_sys_rt_sigreturn
+174 i386 rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction
+175 i386 rt_sigprocmask sys_rt_sigprocmask compat_sys_rt_sigprocmask
+176 i386 rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending
+177 i386 rt_sigtimedwait sys_rt_sigtimedwait_time32 compat_sys_rt_sigtimedwait_time32
+178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo
+179 i386 rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend
+180 i386 pread64 sys_ia32_pread64
+181 i386 pwrite64 sys_ia32_pwrite64
+182 i386 chown sys_chown16
+183 i386 getcwd sys_getcwd
+184 i386 capget sys_capget
+185 i386 capset sys_capset
+186 i386 sigaltstack sys_sigaltstack compat_sys_sigaltstack
+187 i386 sendfile sys_sendfile compat_sys_sendfile
+188 i386 getpmsg
+189 i386 putpmsg
+190 i386 vfork sys_vfork
+191 i386 ugetrlimit sys_getrlimit compat_sys_getrlimit
+192 i386 mmap2 sys_mmap_pgoff
+193 i386 truncate64 sys_ia32_truncate64
+194 i386 ftruncate64 sys_ia32_ftruncate64
+195 i386 stat64 sys_stat64 compat_sys_ia32_stat64
+196 i386 lstat64 sys_lstat64 compat_sys_ia32_lstat64
+197 i386 fstat64 sys_fstat64 compat_sys_ia32_fstat64
+198 i386 lchown32 sys_lchown
+199 i386 getuid32 sys_getuid
+200 i386 getgid32 sys_getgid
+201 i386 geteuid32 sys_geteuid
+202 i386 getegid32 sys_getegid
+203 i386 setreuid32 sys_setreuid
+204 i386 setregid32 sys_setregid
+205 i386 getgroups32 sys_getgroups
+206 i386 setgroups32 sys_setgroups
+207 i386 fchown32 sys_fchown
+208 i386 setresuid32 sys_setresuid
+209 i386 getresuid32 sys_getresuid
+210 i386 setresgid32 sys_setresgid
+211 i386 getresgid32 sys_getresgid
+212 i386 chown32 sys_chown
+213 i386 setuid32 sys_setuid
+214 i386 setgid32 sys_setgid
+215 i386 setfsuid32 sys_setfsuid
+216 i386 setfsgid32 sys_setfsgid
+217 i386 pivot_root sys_pivot_root
+218 i386 mincore sys_mincore
+219 i386 madvise sys_madvise
+220 i386 getdents64 sys_getdents64
+221 i386 fcntl64 sys_fcntl64 compat_sys_fcntl64
+# 222 is unused
+# 223 is unused
+224 i386 gettid sys_gettid
+225 i386 readahead sys_ia32_readahead
+226 i386 setxattr sys_setxattr
+227 i386 lsetxattr sys_lsetxattr
+228 i386 fsetxattr sys_fsetxattr
+229 i386 getxattr sys_getxattr
+230 i386 lgetxattr sys_lgetxattr
+231 i386 fgetxattr sys_fgetxattr
+232 i386 listxattr sys_listxattr
+233 i386 llistxattr sys_llistxattr
+234 i386 flistxattr sys_flistxattr
+235 i386 removexattr sys_removexattr
+236 i386 lremovexattr sys_lremovexattr
+237 i386 fremovexattr sys_fremovexattr
+238 i386 tkill sys_tkill
+239 i386 sendfile64 sys_sendfile64
+240 i386 futex sys_futex_time32
+241 i386 sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity
+242 i386 sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity
+243 i386 set_thread_area sys_set_thread_area
+244 i386 get_thread_area sys_get_thread_area
+245 i386 io_setup sys_io_setup compat_sys_io_setup
+246 i386 io_destroy sys_io_destroy
+247 i386 io_getevents sys_io_getevents_time32
+248 i386 io_submit sys_io_submit compat_sys_io_submit
+249 i386 io_cancel sys_io_cancel
+250 i386 fadvise64 sys_ia32_fadvise64
+# 251 is available for reuse (was briefly sys_set_zone_reclaim)
+252 i386 exit_group sys_exit_group - noreturn
+253 i386 lookup_dcookie
+254 i386 epoll_create sys_epoll_create
+255 i386 epoll_ctl sys_epoll_ctl
+256 i386 epoll_wait sys_epoll_wait
+257 i386 remap_file_pages sys_remap_file_pages
+258 i386 set_tid_address sys_set_tid_address
+259 i386 timer_create sys_timer_create compat_sys_timer_create
+260 i386 timer_settime sys_timer_settime32
+261 i386 timer_gettime sys_timer_gettime32
+262 i386 timer_getoverrun sys_timer_getoverrun
+263 i386 timer_delete sys_timer_delete
+264 i386 clock_settime sys_clock_settime32
+265 i386 clock_gettime sys_clock_gettime32
+266 i386 clock_getres sys_clock_getres_time32
+267 i386 clock_nanosleep sys_clock_nanosleep_time32
+268 i386 statfs64 sys_statfs64 compat_sys_statfs64
+269 i386 fstatfs64 sys_fstatfs64 compat_sys_fstatfs64
+270 i386 tgkill sys_tgkill
+271 i386 utimes sys_utimes_time32
+272 i386 fadvise64_64 sys_ia32_fadvise64_64
+273 i386 vserver
+274 i386 mbind sys_mbind
+275 i386 get_mempolicy sys_get_mempolicy
+276 i386 set_mempolicy sys_set_mempolicy
+277 i386 mq_open sys_mq_open compat_sys_mq_open
+278 i386 mq_unlink sys_mq_unlink
+279 i386 mq_timedsend sys_mq_timedsend_time32
+280 i386 mq_timedreceive sys_mq_timedreceive_time32
+281 i386 mq_notify sys_mq_notify compat_sys_mq_notify
+282 i386 mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr
+283 i386 kexec_load sys_kexec_load compat_sys_kexec_load
+284 i386 waitid sys_waitid compat_sys_waitid
+# 285 sys_setaltroot
+286 i386 add_key sys_add_key
+287 i386 request_key sys_request_key
+288 i386 keyctl sys_keyctl compat_sys_keyctl
+289 i386 ioprio_set sys_ioprio_set
+290 i386 ioprio_get sys_ioprio_get
+291 i386 inotify_init sys_inotify_init
+292 i386 inotify_add_watch sys_inotify_add_watch
+293 i386 inotify_rm_watch sys_inotify_rm_watch
+294 i386 migrate_pages sys_migrate_pages
+295 i386 openat sys_openat compat_sys_openat
+296 i386 mkdirat sys_mkdirat
+297 i386 mknodat sys_mknodat
+298 i386 fchownat sys_fchownat
+299 i386 futimesat sys_futimesat_time32
+300 i386 fstatat64 sys_fstatat64 compat_sys_ia32_fstatat64
+301 i386 unlinkat sys_unlinkat
+302 i386 renameat sys_renameat
+303 i386 linkat sys_linkat
+304 i386 symlinkat sys_symlinkat
+305 i386 readlinkat sys_readlinkat
+306 i386 fchmodat sys_fchmodat
+307 i386 faccessat sys_faccessat
+308 i386 pselect6 sys_pselect6_time32 compat_sys_pselect6_time32
+309 i386 ppoll sys_ppoll_time32 compat_sys_ppoll_time32
+310 i386 unshare sys_unshare
+311 i386 set_robust_list sys_set_robust_list compat_sys_set_robust_list
+312 i386 get_robust_list sys_get_robust_list compat_sys_get_robust_list
+313 i386 splice sys_splice
+314 i386 sync_file_range sys_ia32_sync_file_range
+315 i386 tee sys_tee
+316 i386 vmsplice sys_vmsplice
+317 i386 move_pages sys_move_pages
+318 i386 getcpu sys_getcpu
+319 i386 epoll_pwait sys_epoll_pwait
+320 i386 utimensat sys_utimensat_time32
+321 i386 signalfd sys_signalfd compat_sys_signalfd
+322 i386 timerfd_create sys_timerfd_create
+323 i386 eventfd sys_eventfd
+324 i386 fallocate sys_ia32_fallocate
+325 i386 timerfd_settime sys_timerfd_settime32
+326 i386 timerfd_gettime sys_timerfd_gettime32
+327 i386 signalfd4 sys_signalfd4 compat_sys_signalfd4
+328 i386 eventfd2 sys_eventfd2
+329 i386 epoll_create1 sys_epoll_create1
+330 i386 dup3 sys_dup3
+331 i386 pipe2 sys_pipe2
+332 i386 inotify_init1 sys_inotify_init1
+333 i386 preadv sys_preadv compat_sys_preadv
+334 i386 pwritev sys_pwritev compat_sys_pwritev
+335 i386 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
+336 i386 perf_event_open sys_perf_event_open
+337 i386 recvmmsg sys_recvmmsg_time32 compat_sys_recvmmsg_time32
+338 i386 fanotify_init sys_fanotify_init
+339 i386 fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark
+340 i386 prlimit64 sys_prlimit64
+341 i386 name_to_handle_at sys_name_to_handle_at
+342 i386 open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at
+343 i386 clock_adjtime sys_clock_adjtime32
+344 i386 syncfs sys_syncfs
+345 i386 sendmmsg sys_sendmmsg compat_sys_sendmmsg
+346 i386 setns sys_setns
+347 i386 process_vm_readv sys_process_vm_readv
+348 i386 process_vm_writev sys_process_vm_writev
+349 i386 kcmp sys_kcmp
+350 i386 finit_module sys_finit_module
+351 i386 sched_setattr sys_sched_setattr
+352 i386 sched_getattr sys_sched_getattr
+353 i386 renameat2 sys_renameat2
+354 i386 seccomp sys_seccomp
+355 i386 getrandom sys_getrandom
+356 i386 memfd_create sys_memfd_create
+357 i386 bpf sys_bpf
+358 i386 execveat sys_execveat compat_sys_execveat
+359 i386 socket sys_socket
+360 i386 socketpair sys_socketpair
+361 i386 bind sys_bind
+362 i386 connect sys_connect
+363 i386 listen sys_listen
+364 i386 accept4 sys_accept4
+365 i386 getsockopt sys_getsockopt sys_getsockopt
+366 i386 setsockopt sys_setsockopt sys_setsockopt
+367 i386 getsockname sys_getsockname
+368 i386 getpeername sys_getpeername
+369 i386 sendto sys_sendto
+370 i386 sendmsg sys_sendmsg compat_sys_sendmsg
+371 i386 recvfrom sys_recvfrom compat_sys_recvfrom
+372 i386 recvmsg sys_recvmsg compat_sys_recvmsg
+373 i386 shutdown sys_shutdown
+374 i386 userfaultfd sys_userfaultfd
+375 i386 membarrier sys_membarrier
+376 i386 mlock2 sys_mlock2
+377 i386 copy_file_range sys_copy_file_range
+378 i386 preadv2 sys_preadv2 compat_sys_preadv2
+379 i386 pwritev2 sys_pwritev2 compat_sys_pwritev2
+380 i386 pkey_mprotect sys_pkey_mprotect
+381 i386 pkey_alloc sys_pkey_alloc
+382 i386 pkey_free sys_pkey_free
+383 i386 statx sys_statx
+384 i386 arch_prctl sys_arch_prctl compat_sys_arch_prctl
+385 i386 io_pgetevents sys_io_pgetevents_time32 compat_sys_io_pgetevents
+386 i386 rseq sys_rseq
+393 i386 semget sys_semget
+394 i386 semctl sys_semctl compat_sys_semctl
+395 i386 shmget sys_shmget
+396 i386 shmctl sys_shmctl compat_sys_shmctl
+397 i386 shmat sys_shmat compat_sys_shmat
+398 i386 shmdt sys_shmdt
+399 i386 msgget sys_msgget
+400 i386 msgsnd sys_msgsnd compat_sys_msgsnd
+401 i386 msgrcv sys_msgrcv compat_sys_msgrcv
+402 i386 msgctl sys_msgctl compat_sys_msgctl
+403 i386 clock_gettime64 sys_clock_gettime
+404 i386 clock_settime64 sys_clock_settime
+405 i386 clock_adjtime64 sys_clock_adjtime
+406 i386 clock_getres_time64 sys_clock_getres
+407 i386 clock_nanosleep_time64 sys_clock_nanosleep
+408 i386 timer_gettime64 sys_timer_gettime
+409 i386 timer_settime64 sys_timer_settime
+410 i386 timerfd_gettime64 sys_timerfd_gettime
+411 i386 timerfd_settime64 sys_timerfd_settime
+412 i386 utimensat_time64 sys_utimensat
+413 i386 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64
+414 i386 ppoll_time64 sys_ppoll compat_sys_ppoll_time64
+416 i386 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64
+417 i386 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64
+418 i386 mq_timedsend_time64 sys_mq_timedsend
+419 i386 mq_timedreceive_time64 sys_mq_timedreceive
+420 i386 semtimedop_time64 sys_semtimedop
+421 i386 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64
+422 i386 futex_time64 sys_futex
+423 i386 sched_rr_get_interval_time64 sys_sched_rr_get_interval
+424 i386 pidfd_send_signal sys_pidfd_send_signal
+425 i386 io_uring_setup sys_io_uring_setup
+426 i386 io_uring_enter sys_io_uring_enter
+427 i386 io_uring_register sys_io_uring_register
+428 i386 open_tree sys_open_tree
+429 i386 move_mount sys_move_mount
+430 i386 fsopen sys_fsopen
+431 i386 fsconfig sys_fsconfig
+432 i386 fsmount sys_fsmount
+433 i386 fspick sys_fspick
+434 i386 pidfd_open sys_pidfd_open
+435 i386 clone3 sys_clone3
+436 i386 close_range sys_close_range
+437 i386 openat2 sys_openat2
+438 i386 pidfd_getfd sys_pidfd_getfd
+439 i386 faccessat2 sys_faccessat2
+440 i386 process_madvise sys_process_madvise
+441 i386 epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
+442 i386 mount_setattr sys_mount_setattr
+443 i386 quotactl_fd sys_quotactl_fd
+444 i386 landlock_create_ruleset sys_landlock_create_ruleset
+445 i386 landlock_add_rule sys_landlock_add_rule
+446 i386 landlock_restrict_self sys_landlock_restrict_self
+447 i386 memfd_secret sys_memfd_secret
+448 i386 process_mrelease sys_process_mrelease
+449 i386 futex_waitv sys_futex_waitv
+450 i386 set_mempolicy_home_node sys_set_mempolicy_home_node
+451 i386 cachestat sys_cachestat
+452 i386 fchmodat2 sys_fchmodat2
+453 i386 map_shadow_stack sys_map_shadow_stack
+454 i386 futex_wake sys_futex_wake
+455 i386 futex_wait sys_futex_wait
+456 i386 futex_requeue sys_futex_requeue
+457 i386 statmount sys_statmount
+458 i386 listmount sys_listmount
+459 i386 lsm_get_self_attr sys_lsm_get_self_attr
+460 i386 lsm_set_self_attr sys_lsm_set_self_attr
+461 i386 lsm_list_modules sys_lsm_list_modules
+462 i386 mseal sys_mseal
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index a396f6e6ab5b..7093ee21c0d1 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -1,8 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
# 64-bit system call numbers and entry vectors
#
# The format is:
-# <number> <abi> <name> <entry point>
+# <number> <abi> <name> <entry point> [<compat entry point> [noreturn]]
#
# The __x64_sys_*() stubs are created on-the-fly for sys_*() system calls
#
@@ -68,7 +69,7 @@
57 common fork sys_fork
58 common vfork sys_vfork
59 64 execve sys_execve
-60 common exit sys_exit
+60 common exit sys_exit - noreturn
61 common wait4 sys_wait4
62 common kill sys_kill
63 common uname sys_newuname
@@ -239,7 +240,7 @@
228 common clock_gettime sys_clock_gettime
229 common clock_getres sys_clock_getres
230 common clock_nanosleep sys_clock_nanosleep
-231 common exit_group sys_exit_group
+231 common exit_group sys_exit_group - noreturn
232 common epoll_wait sys_epoll_wait
233 common epoll_ctl sys_epoll_ctl
234 common tgkill sys_tgkill
@@ -343,6 +344,7 @@
332 common statx sys_statx
333 common io_pgetevents sys_io_pgetevents
334 common rseq sys_rseq
+335 common uretprobe sys_uretprobe
# don't use numbers 387 through 423, add new calls after the last
# 'common' entry
424 common pidfd_send_signal sys_pidfd_send_signal
diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c
index e65b7dbe27fb..a0400707180c 100644
--- a/tools/perf/arch/x86/util/event.c
+++ b/tools/perf/arch/x86/util/event.c
@@ -15,7 +15,7 @@
#if defined(__x86_64__)
struct perf_event__synthesize_extra_kmaps_cb_args {
- struct perf_tool *tool;
+ const struct perf_tool *tool;
perf_event__handler_t process;
struct machine *machine;
union perf_event *event;
@@ -65,7 +65,7 @@ static int perf_event__synthesize_extra_kmaps_cb(struct map *map, void *data)
return 0;
}
-int perf_event__synthesize_extra_kmaps(struct perf_tool *tool,
+int perf_event__synthesize_extra_kmaps(const struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine)
{
diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
index b1ce0c52d88d..cebdd483149e 100644
--- a/tools/perf/arch/x86/util/evlist.c
+++ b/tools/perf/arch/x86/util/evlist.c
@@ -89,6 +89,12 @@ int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
return 1;
}
+ /* Retire latency event should not be group leader*/
+ if (lhs->retire_lat && !rhs->retire_lat)
+ return 1;
+ if (!lhs->retire_lat && rhs->retire_lat)
+ return -1;
+
/* Default ordering by insertion index. */
return lhs->core.idx - rhs->core.idx;
}
diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
index 34696f3d3d5d..85c8186300c8 100644
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -434,7 +434,6 @@ struct auxtrace_record *intel_bts_recording_init(int *err)
}
btsr->intel_bts_pmu = intel_bts_pmu;
- btsr->itr.pmu = intel_bts_pmu;
btsr->itr.recording_options = intel_bts_recording_options;
btsr->itr.info_priv_size = intel_bts_info_priv_size;
btsr->itr.info_fill = intel_bts_info_fill;
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index 4b710e875953..ea510a7486b1 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -1197,7 +1197,6 @@ struct auxtrace_record *intel_pt_recording_init(int *err)
}
ptr->intel_pt_pmu = intel_pt_pmu;
- ptr->itr.pmu = intel_pt_pmu;
ptr->itr.recording_options = intel_pt_recording_options;
ptr->itr.info_priv_size = intel_pt_info_priv_size;
ptr->itr.info_fill = intel_pt_info_fill;
diff --git a/tools/perf/bench/synthesize.c b/tools/perf/bench/synthesize.c
index 7401ebbac100..9b333276cbdb 100644
--- a/tools/perf/bench/synthesize.c
+++ b/tools/perf/bench/synthesize.c
@@ -49,7 +49,7 @@ static const char *const bench_usage[] = {
static atomic_t event_count;
-static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
+static int process_synthesized_event(const struct perf_tool *tool __maybe_unused,
union perf_event *event __maybe_unused,
struct perf_sample *sample __maybe_unused,
struct machine *machine __maybe_unused)
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index b10b7f005658..3dc6197ef3fa 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -221,7 +221,8 @@ static int process_branch_callback(struct evsel *evsel,
if (a.map != NULL)
dso__set_hit(map__dso(a.map));
- hist__account_cycles(sample->branch_stack, al, sample, false, NULL);
+ hist__account_cycles(sample->branch_stack, al, sample, false,
+ NULL, evsel);
ret = hist_entry_iter__add(&iter, &a, PERF_MAX_STACK_DEPTH, ann);
out:
@@ -279,7 +280,7 @@ static int evsel__add_sample(struct evsel *evsel, struct perf_sample *sample,
return ret;
}
-static int process_sample_event(struct perf_tool *tool,
+static int process_sample_event(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct evsel *evsel,
@@ -396,10 +397,10 @@ static void print_annotate_item_stat(struct list_head *head, const char *title)
printf("total %d, ok %d (%.1f%%), bad %d (%.1f%%)\n\n", total,
total_good, 100.0 * total_good / (total ?: 1),
total_bad, 100.0 * total_bad / (total ?: 1));
- printf(" %-10s: %5s %5s\n", "Name", "Good", "Bad");
+ printf(" %-20s: %5s %5s\n", "Name/opcode", "Good", "Bad");
printf("-----------------------------------------------------------\n");
list_for_each_entry(istat, head, list)
- printf(" %-10s: %5d %5d\n", istat->name, istat->good, istat->bad);
+ printf(" %-20s: %5d %5d\n", istat->name, istat->good, istat->bad);
printf("\n");
}
@@ -632,13 +633,23 @@ static int __cmd_annotate(struct perf_annotate *ann)
evlist__for_each_entry(session->evlist, pos) {
struct hists *hists = evsel__hists(pos);
u32 nr_samples = hists->stats.nr_samples;
+ struct ui_progress prog;
+ struct evsel *evsel;
- if (nr_samples == 0)
+ if (!symbol_conf.event_group || !evsel__is_group_leader(pos))
continue;
- if (!symbol_conf.event_group || !evsel__is_group_leader(pos))
+ for_each_group_member(evsel, pos)
+ nr_samples += evsel__hists(evsel)->stats.nr_samples;
+
+ if (nr_samples == 0)
continue;
+ ui_progress__init(&prog, nr_samples,
+ "Sorting group events for output...");
+ evsel__output_resort(pos, &prog);
+ ui_progress__finish();
+
hists__find_annotations(hists, pos, ann);
}
@@ -686,28 +697,7 @@ static const char * const annotate_usage[] = {
int cmd_annotate(int argc, const char **argv)
{
- struct perf_annotate annotate = {
- .tool = {
- .sample = process_sample_event,
- .mmap = perf_event__process_mmap,
- .mmap2 = perf_event__process_mmap2,
- .comm = perf_event__process_comm,
- .exit = perf_event__process_exit,
- .fork = perf_event__process_fork,
- .namespaces = perf_event__process_namespaces,
- .attr = perf_event__process_attr,
- .build_id = perf_event__process_build_id,
-#ifdef HAVE_LIBTRACEEVENT
- .tracing_data = perf_event__process_tracing_data,
-#endif
- .id_index = perf_event__process_id_index,
- .auxtrace_info = perf_event__process_auxtrace_info,
- .auxtrace = perf_event__process_auxtrace,
- .feature = process_feature_event,
- .ordered_events = true,
- .ordering_requires_timestamps = true,
- },
- };
+ struct perf_annotate annotate = {};
struct perf_data data = {
.mode = PERF_DATA_MODE_READ,
};
@@ -795,6 +785,8 @@ int cmd_annotate(int argc, const char **argv)
"Show stats for the data type annotation"),
OPT_BOOLEAN(0, "insn-stat", &annotate.insn_stat,
"Show instruction stats for the data type annotation"),
+ OPT_BOOLEAN(0, "skip-empty", &symbol_conf.skip_empty,
+ "Do not display empty (or dummy) events in the output"),
OPT_END()
};
int ret;
@@ -864,6 +856,25 @@ int cmd_annotate(int argc, const char **argv)
data.path = input_name;
+ perf_tool__init(&annotate.tool, /*ordered_events=*/true);
+ annotate.tool.sample = process_sample_event;
+ annotate.tool.mmap = perf_event__process_mmap;
+ annotate.tool.mmap2 = perf_event__process_mmap2;
+ annotate.tool.comm = perf_event__process_comm;
+ annotate.tool.exit = perf_event__process_exit;
+ annotate.tool.fork = perf_event__process_fork;
+ annotate.tool.namespaces = perf_event__process_namespaces;
+ annotate.tool.attr = perf_event__process_attr;
+ annotate.tool.build_id = perf_event__process_build_id;
+#ifdef HAVE_LIBTRACEEVENT
+ annotate.tool.tracing_data = perf_event__process_tracing_data;
+#endif
+ annotate.tool.id_index = perf_event__process_id_index;
+ annotate.tool.auxtrace_info = perf_event__process_auxtrace_info;
+ annotate.tool.auxtrace = perf_event__process_auxtrace;
+ annotate.tool.feature = process_feature_event;
+ annotate.tool.ordering_requires_timestamps = true;
+
annotate.session = perf_session__new(&data, &annotate.tool);
if (IS_ERR(annotate.session))
return PTR_ERR(annotate.session);
@@ -916,11 +927,15 @@ int cmd_annotate(int argc, const char **argv)
sort_order = "dso,symbol";
/*
- * Set SORT_MODE__BRANCH so that annotate display IPC/Cycle
- * if branch info is in perf data in TUI mode.
+ * Set SORT_MODE__BRANCH so that annotate displays IPC/Cycle and
+ * branch counters, if the corresponding branch info is available
+ * in the perf data in the TUI mode.
*/
- if ((use_browser == 1 || annotate.use_stdio2) && annotate.has_br_stack)
+ if ((use_browser == 1 || annotate.use_stdio2) && annotate.has_br_stack) {
sort__mode = SORT_MODE__BRANCH;
+ if (annotate.session->evlist->nr_br_cntr > 0)
+ annotate_opts.show_br_cntr = true;
+ }
if (setup_sorting(NULL) < 0)
usage_with_options(annotate_usage, options);
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index 383d5de36ce4..52dfacaff8e3 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -89,6 +89,7 @@ static int perf_session__list_build_ids(bool force, bool with_hits)
.mode = PERF_DATA_MODE_READ,
.force = force,
};
+ struct perf_tool build_id__mark_dso_hit_ops;
symbol__elf_init();
/*
@@ -97,6 +98,15 @@ static int perf_session__list_build_ids(bool force, bool with_hits)
if (filename__fprintf_build_id(input_name, stdout) > 0)
goto out;
+ perf_tool__init(&build_id__mark_dso_hit_ops, /*ordered_events=*/true);
+ build_id__mark_dso_hit_ops.sample = build_id__mark_dso_hit;
+ build_id__mark_dso_hit_ops.mmap = perf_event__process_mmap;
+ build_id__mark_dso_hit_ops.mmap2 = perf_event__process_mmap2;
+ build_id__mark_dso_hit_ops.fork = perf_event__process_fork;
+ build_id__mark_dso_hit_ops.exit = perf_event__exit_del_thread;
+ build_id__mark_dso_hit_ops.attr = perf_event__process_attr;
+ build_id__mark_dso_hit_ops.build_id = perf_event__process_build_id;
+
session = perf_session__new(&data, &build_id__mark_dso_hit_ops);
if (IS_ERR(session))
return PTR_ERR(session);
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index c157bd31f2e5..15e1fce71c72 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -273,7 +273,7 @@ static void compute_stats(struct c2c_hist_entry *c2c_he,
update_stats(&cstats->load, weight);
}
-static int process_sample_event(struct perf_tool *tool __maybe_unused,
+static int process_sample_event(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct evsel *evsel,
@@ -385,24 +385,6 @@ free_mi:
goto out;
}
-static struct perf_c2c c2c = {
- .tool = {
- .sample = process_sample_event,
- .mmap = perf_event__process_mmap,
- .mmap2 = perf_event__process_mmap2,
- .comm = perf_event__process_comm,
- .exit = perf_event__process_exit,
- .fork = perf_event__process_fork,
- .lost = perf_event__process_lost,
- .attr = perf_event__process_attr,
- .auxtrace_info = perf_event__process_auxtrace_info,
- .auxtrace = perf_event__process_auxtrace,
- .auxtrace_error = perf_event__process_auxtrace_error,
- .ordered_events = true,
- .ordering_requires_timestamps = true,
- },
-};
-
static const char * const c2c_usage[] = {
"perf c2c {record|report}",
NULL
@@ -3070,6 +3052,19 @@ static int perf_c2c__report(int argc, const char **argv)
data.path = input_name;
data.force = symbol_conf.force;
+ perf_tool__init(&c2c.tool, /*ordered_events=*/true);
+ c2c.tool.sample = process_sample_event;
+ c2c.tool.mmap = perf_event__process_mmap;
+ c2c.tool.mmap2 = perf_event__process_mmap2;
+ c2c.tool.comm = perf_event__process_comm;
+ c2c.tool.exit = perf_event__process_exit;
+ c2c.tool.fork = perf_event__process_fork;
+ c2c.tool.lost = perf_event__process_lost;
+ c2c.tool.attr = perf_event__process_attr;
+ c2c.tool.auxtrace_info = perf_event__process_auxtrace_info;
+ c2c.tool.auxtrace = perf_event__process_auxtrace;
+ c2c.tool.auxtrace_error = perf_event__process_auxtrace_error;
+ c2c.tool.ordering_requires_timestamps = true;
session = perf_session__new(&data, &c2c.tool);
if (IS_ERR(session)) {
err = PTR_ERR(session);
@@ -3266,7 +3261,7 @@ static int perf_c2c__record(int argc, const char **argv)
return -1;
}
- if (perf_pmu__mem_events_init(pmu)) {
+ if (perf_pmu__mem_events_init()) {
pr_err("failed: memory events not supported\n");
return -1;
}
@@ -3290,19 +3285,15 @@ static int perf_c2c__record(int argc, const char **argv)
* PERF_MEM_EVENTS__LOAD_STORE if it is supported.
*/
if (e->tag) {
- e->record = true;
+ perf_mem_record[PERF_MEM_EVENTS__LOAD_STORE] = true;
rec_argv[i++] = "-W";
} else {
- e = perf_pmu__mem_events_ptr(pmu, PERF_MEM_EVENTS__LOAD);
- e->record = true;
-
- e = perf_pmu__mem_events_ptr(pmu, PERF_MEM_EVENTS__STORE);
- e->record = true;
+ perf_mem_record[PERF_MEM_EVENTS__LOAD] = true;
+ perf_mem_record[PERF_MEM_EVENTS__STORE] = true;
}
}
- e = perf_pmu__mem_events_ptr(pmu, PERF_MEM_EVENTS__LOAD);
- if (e->record)
+ if (perf_mem_record[PERF_MEM_EVENTS__LOAD])
rec_argv[i++] = "-W";
rec_argv[i++] = "-d";
diff --git a/tools/perf/builtin-check.c b/tools/perf/builtin-check.c
new file mode 100644
index 000000000000..0b76b6e42b78
--- /dev/null
+++ b/tools/perf/builtin-check.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "builtin.h"
+#include "color.h"
+#include "util/debug.h"
+#include "util/header.h"
+#include <tools/config.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <subcmd/parse-options.h>
+
+static const char * const check_subcommands[] = { "feature", NULL };
+static struct option check_options[] = {
+ OPT_BOOLEAN('q', "quiet", &quiet, "do not show any warnings or messages"),
+ OPT_END()
+};
+static struct option check_feature_options[] = { OPT_PARENT(check_options) };
+
+static const char *check_usage[] = { NULL, NULL };
+static const char *check_feature_usage[] = {
+ "perf check feature <feature_list>",
+ NULL
+};
+
+struct feature_status supported_features[] = {
+ FEATURE_STATUS("aio", HAVE_AIO_SUPPORT),
+ FEATURE_STATUS("bpf", HAVE_LIBBPF_SUPPORT),
+ FEATURE_STATUS("bpf_skeletons", HAVE_BPF_SKEL),
+ FEATURE_STATUS("debuginfod", HAVE_DEBUGINFOD_SUPPORT),
+ FEATURE_STATUS("dwarf", HAVE_DWARF_SUPPORT),
+ FEATURE_STATUS("dwarf_getlocations", HAVE_DWARF_GETLOCATIONS_SUPPORT),
+ FEATURE_STATUS("dwarf-unwind", HAVE_DWARF_UNWIND_SUPPORT),
+ FEATURE_STATUS("auxtrace", HAVE_AUXTRACE_SUPPORT),
+ FEATURE_STATUS("libaudit", HAVE_LIBAUDIT_SUPPORT),
+ FEATURE_STATUS("libbfd", HAVE_LIBBFD_SUPPORT),
+ FEATURE_STATUS("libcapstone", HAVE_LIBCAPSTONE_SUPPORT),
+ FEATURE_STATUS("libcrypto", HAVE_LIBCRYPTO_SUPPORT),
+ FEATURE_STATUS("libdw-dwarf-unwind", HAVE_DWARF_SUPPORT),
+ FEATURE_STATUS("libelf", HAVE_LIBELF_SUPPORT),
+ FEATURE_STATUS("libnuma", HAVE_LIBNUMA_SUPPORT),
+ FEATURE_STATUS("libopencsd", HAVE_CSTRACE_SUPPORT),
+ FEATURE_STATUS("libperl", HAVE_LIBPERL_SUPPORT),
+ FEATURE_STATUS("libpfm4", HAVE_LIBPFM),
+ FEATURE_STATUS("libpython", HAVE_LIBPYTHON_SUPPORT),
+ FEATURE_STATUS("libslang", HAVE_SLANG_SUPPORT),
+ FEATURE_STATUS("libtraceevent", HAVE_LIBTRACEEVENT),
+ FEATURE_STATUS("libunwind", HAVE_LIBUNWIND_SUPPORT),
+ FEATURE_STATUS("lzma", HAVE_LZMA_SUPPORT),
+ FEATURE_STATUS("numa_num_possible_cpus", HAVE_LIBNUMA_SUPPORT),
+ FEATURE_STATUS("syscall_table", HAVE_SYSCALL_TABLE_SUPPORT),
+ FEATURE_STATUS("zlib", HAVE_ZLIB_SUPPORT),
+ FEATURE_STATUS("zstd", HAVE_ZSTD_SUPPORT),
+
+ /* this should remain at end, to know the array end */
+ FEATURE_STATUS(NULL, _)
+};
+
+static void on_off_print(const char *status)
+{
+ printf("[ ");
+
+ if (!strcmp(status, "OFF"))
+ color_fprintf(stdout, PERF_COLOR_RED, "%-3s", status);
+ else
+ color_fprintf(stdout, PERF_COLOR_GREEN, "%-3s", status);
+
+ printf(" ]");
+}
+
+/* Helper function to print status of a feature along with name/macro */
+static void status_print(const char *name, const char *macro,
+ const char *status)
+{
+ printf("%22s: ", name);
+ on_off_print(status);
+ printf(" # %s\n", macro);
+}
+
+#define STATUS(feature) \
+do { \
+ if (feature.is_builtin) \
+ status_print(feature.name, feature.macro, "on"); \
+ else \
+ status_print(feature.name, feature.macro, "OFF"); \
+} while (0)
+
+/**
+ * check whether "feature" is built-in with perf
+ *
+ * returns:
+ * 0: NOT built-in or Feature not known
+ * 1: Built-in
+ */
+static int has_support(const char *feature)
+{
+ for (int i = 0; supported_features[i].name; ++i) {
+ if ((strcasecmp(feature, supported_features[i].name) == 0) ||
+ (strcasecmp(feature, supported_features[i].macro) == 0)) {
+ if (!quiet)
+ STATUS(supported_features[i]);
+ return supported_features[i].is_builtin;
+ }
+ }
+
+ if (!quiet)
+ pr_err("Unknown feature '%s', please use 'perf version --build-options' to see which ones are available.\n", feature);
+
+ return 0;
+}
+
+
+/**
+ * Usage: 'perf check feature <feature_list>'
+ *
+ * <feature_list> can be a single feature name/macro, or a comma-separated list
+ * of feature names/macros
+ * eg. argument can be "libtraceevent" or "libtraceevent,bpf" etc
+ *
+ * In case of a comma-separated list, feature_enabled will be 1, only if
+ * all features passed in the string are supported
+ *
+ * Note that argv will get modified
+ */
+static int subcommand_feature(int argc, const char **argv)
+{
+ char *feature_list;
+ char *feature_name;
+ int feature_enabled;
+
+ argc = parse_options(argc, argv, check_feature_options,
+ check_feature_usage, 0);
+
+ if (!argc)
+ usage_with_options(check_feature_usage, check_feature_options);
+
+ if (argc > 1) {
+ pr_err("Too many arguments passed to 'perf check feature'\n");
+ return -1;
+ }
+
+ feature_enabled = 1;
+ /* feature_list is a non-const copy of 'argv[0]' */
+ feature_list = strdup(argv[0]);
+ if (!feature_list) {
+ pr_err("ERROR: failed to allocate memory for feature list\n");
+ return -1;
+ }
+
+ feature_name = strtok(feature_list, ",");
+
+ while (feature_name) {
+ feature_enabled &= has_support(feature_name);
+ feature_name = strtok(NULL, ",");
+ }
+
+ free(feature_list);
+
+ return !feature_enabled;
+}
+
+int cmd_check(int argc, const char **argv)
+{
+ argc = parse_options_subcommand(argc, argv, check_options,
+ check_subcommands, check_usage, 0);
+
+ if (!argc)
+ usage_with_options(check_usage, check_options);
+
+ if (strcmp(argv[0], "feature") == 0)
+ return subcommand_feature(argc, argv);
+
+ /* If no subcommand matched above, print usage help */
+ pr_err("Unknown subcommand: %s\n", argv[0]);
+ usage_with_options(check_usage, check_options);
+
+ /* free usage string allocated by parse_options_subcommand */
+ free((void *)check_usage[0]);
+
+ return 0;
+}
diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c
index de76bbc50bfb..f0568431fbd5 100644
--- a/tools/perf/builtin-daemon.c
+++ b/tools/perf/builtin-daemon.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <internal/lib.h>
+#include <inttypes.h>
#include <subcmd/parse-options.h>
#include <api/fd/array.h>
#include <api/fs/fs.h>
@@ -688,9 +689,9 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out)
/* lock */
csv_sep, daemon->base, "lock");
- fprintf(out, "%c%lu",
+ fprintf(out, "%c%" PRIu64,
/* session up time */
- csv_sep, (curr - daemon->start) / 60);
+ csv_sep, (uint64_t)((curr - daemon->start) / 60));
fprintf(out, "\n");
} else {
@@ -700,8 +701,8 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out)
daemon->base, SESSION_OUTPUT);
fprintf(out, " lock: %s/lock\n",
daemon->base);
- fprintf(out, " up: %lu minutes\n",
- (curr - daemon->start) / 60);
+ fprintf(out, " up: %" PRIu64 " minutes\n",
+ (uint64_t)((curr - daemon->start) / 60));
}
}
@@ -727,9 +728,9 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out)
/* session ack */
csv_sep, session->base, SESSION_ACK);
- fprintf(out, "%c%lu",
+ fprintf(out, "%c%" PRIu64,
/* session up time */
- csv_sep, (curr - session->start) / 60);
+ csv_sep, (uint64_t)((curr - session->start) / 60));
fprintf(out, "\n");
} else {
@@ -745,8 +746,8 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out)
session->base, SESSION_CONTROL);
fprintf(out, " ack: %s/%s\n",
session->base, SESSION_ACK);
- fprintf(out, " up: %lu minutes\n",
- (curr - session->start) / 60);
+ fprintf(out, " up: %" PRIu64 " minutes\n",
+ (uint64_t)((curr - session->start) / 60));
}
}
@@ -1433,7 +1434,7 @@ static int __cmd_signal(struct daemon *daemon, struct option parent_options[],
}
memset(&cmd, 0, sizeof(cmd));
- cmd.signal.cmd = CMD_SIGNAL,
+ cmd.signal.cmd = CMD_SIGNAL;
cmd.signal.sig = SIGUSR2;
strncpy(cmd.signal.name, name, sizeof(cmd.signal.name) - 1);
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 57d300d8e570..23326dd20333 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -388,7 +388,7 @@ struct hist_entry_ops block_hist_ops = {
.free = block_hist_free,
};
-static int diff__process_sample_event(struct perf_tool *tool,
+static int diff__process_sample_event(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct evsel *evsel,
@@ -431,8 +431,8 @@ static int diff__process_sample_event(struct perf_tool *tool,
goto out;
}
- hist__account_cycles(sample->branch_stack, &al, sample, false,
- NULL);
+ hist__account_cycles(sample->branch_stack, &al, sample,
+ false, NULL, evsel);
break;
case COMPUTE_STREAM:
@@ -467,21 +467,7 @@ out:
return ret;
}
-static struct perf_diff pdiff = {
- .tool = {
- .sample = diff__process_sample_event,
- .mmap = perf_event__process_mmap,
- .mmap2 = perf_event__process_mmap2,
- .comm = perf_event__process_comm,
- .exit = perf_event__process_exit,
- .fork = perf_event__process_fork,
- .lost = perf_event__process_lost,
- .namespaces = perf_event__process_namespaces,
- .cgroup = perf_event__process_cgroup,
- .ordered_events = true,
- .ordering_requires_timestamps = true,
- },
-};
+static struct perf_diff pdiff;
static struct evsel *evsel_match(struct evsel *evsel,
struct evlist *evlist)
@@ -705,7 +691,7 @@ static void hists__precompute(struct hists *hists)
if (compute == COMPUTE_CYCLES) {
bh = container_of(he, struct block_hist, he);
init_block_hist(bh);
- block_info__process_sym(he, bh, NULL, 0);
+ block_info__process_sym(he, bh, NULL, 0, 0);
}
data__for_each_file_new(i, d) {
@@ -728,7 +714,7 @@ static void hists__precompute(struct hists *hists)
pair_bh = container_of(pair, struct block_hist,
he);
init_block_hist(pair_bh);
- block_info__process_sym(pair, pair_bh, NULL, 0);
+ block_info__process_sym(pair, pair_bh, NULL, 0, 0);
bh = container_of(he, struct block_hist, he);
@@ -1959,6 +1945,18 @@ int cmd_diff(int argc, const char **argv)
if (ret < 0)
return ret;
+ perf_tool__init(&pdiff.tool, /*ordered_events=*/true);
+ pdiff.tool.sample = diff__process_sample_event;
+ pdiff.tool.mmap = perf_event__process_mmap;
+ pdiff.tool.mmap2 = perf_event__process_mmap2;
+ pdiff.tool.comm = perf_event__process_comm;
+ pdiff.tool.exit = perf_event__process_exit;
+ pdiff.tool.fork = perf_event__process_fork;
+ pdiff.tool.lost = perf_event__process_lost;
+ pdiff.tool.namespaces = perf_event__process_namespaces;
+ pdiff.tool.cgroup = perf_event__process_cgroup;
+ pdiff.tool.ordering_requires_timestamps = true;
+
perf_config(diff__config, NULL);
argc = parse_options(argc, argv, options, diff_usage, 0);
diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c
index 7117656939e7..a9bd7bbef5a9 100644
--- a/tools/perf/builtin-evlist.c
+++ b/tools/perf/builtin-evlist.c
@@ -35,13 +35,13 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details
.mode = PERF_DATA_MODE_READ,
.force = details->force,
};
- struct perf_tool tool = {
- /* only needed for pipe mode */
- .attr = perf_event__process_attr,
- .feature = process_header_feature,
- };
- bool has_tracepoint = false;
+ struct perf_tool tool;
+ bool has_tracepoint = false, has_group = false;
+ perf_tool__init(&tool, /*ordered_events=*/false);
+ /* only needed for pipe mode */
+ tool.attr = perf_event__process_attr;
+ tool.feature = process_header_feature;
session = perf_session__new(&data, &tool);
if (IS_ERR(session))
return PTR_ERR(session);
@@ -54,11 +54,17 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details
if (pos->core.attr.type == PERF_TYPE_TRACEPOINT)
has_tracepoint = true;
+
+ if (!evsel__is_group_leader(pos))
+ has_group = true;
}
if (has_tracepoint && !details->trace_fields)
printf("# Tip: use 'perf evlist --trace-fields' to show fields for tracepoint events\n");
+ if (has_group && !details->event_group)
+ printf("# Tip: use 'perf evlist -g' to show group information\n");
+
perf_session__delete(session);
return 0;
}
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index eb30c8eca488..abcdc49b7a98 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -13,6 +13,7 @@
#include <signal.h>
#include <stdlib.h>
#include <fcntl.h>
+#include <inttypes.h>
#include <math.h>
#include <poll.h>
#include <ctype.h>
@@ -22,15 +23,18 @@
#include "debug.h"
#include <subcmd/pager.h>
#include <subcmd/parse-options.h>
+#include <api/io.h>
#include <api/fs/tracing_path.h>
#include "evlist.h"
#include "target.h"
#include "cpumap.h"
+#include "hashmap.h"
#include "thread_map.h"
#include "strfilter.h"
#include "util/cap.h"
#include "util/config.h"
#include "util/ftrace.h"
+#include "util/stat.h"
#include "util/units.h"
#include "util/parse-sublevel-options.h"
@@ -59,6 +63,41 @@ static void ftrace__workload_exec_failed_signal(int signo __maybe_unused,
done = true;
}
+static bool check_ftrace_capable(void)
+{
+ bool used_root;
+
+ if (perf_cap__capable(CAP_PERFMON, &used_root))
+ return true;
+
+ if (!used_root && perf_cap__capable(CAP_SYS_ADMIN, &used_root))
+ return true;
+
+ pr_err("ftrace only works for %s!\n",
+ used_root ? "root"
+ : "users with the CAP_PERFMON or CAP_SYS_ADMIN capability"
+ );
+ return false;
+}
+
+static bool is_ftrace_supported(void)
+{
+ char *file;
+ bool supported = false;
+
+ file = get_tracing_file("set_ftrace_pid");
+ if (!file) {
+ pr_debug("cannot get tracing file set_ftrace_pid\n");
+ return false;
+ }
+
+ if (!access(file, F_OK))
+ supported = true;
+
+ put_tracing_file(file);
+ return supported;
+}
+
static int __write_tracing_file(const char *name, const char *val, bool append)
{
char *file;
@@ -228,6 +267,7 @@ static void reset_tracing_options(struct perf_ftrace *ftrace __maybe_unused)
write_tracing_option_file("funcgraph-irqs", "1");
write_tracing_option_file("funcgraph-proc", "0");
write_tracing_option_file("funcgraph-abstime", "0");
+ write_tracing_option_file("funcgraph-tail", "0");
write_tracing_option_file("latency-format", "0");
write_tracing_option_file("irq-info", "0");
}
@@ -464,6 +504,17 @@ static int set_tracing_funcgraph_verbose(struct perf_ftrace *ftrace)
return 0;
}
+static int set_tracing_funcgraph_tail(struct perf_ftrace *ftrace)
+{
+ if (!ftrace->graph_tail)
+ return 0;
+
+ if (write_tracing_option_file("funcgraph-tail", "1") < 0)
+ return -1;
+
+ return 0;
+}
+
static int set_tracing_thresh(struct perf_ftrace *ftrace)
{
int ret;
@@ -540,6 +591,11 @@ static int set_tracing_options(struct perf_ftrace *ftrace)
return -1;
}
+ if (set_tracing_funcgraph_tail(ftrace) < 0) {
+ pr_err("failed to set tracing option funcgraph-tail\n");
+ return -1;
+ }
+
return 0;
}
@@ -569,18 +625,6 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace)
.events = POLLIN,
};
- if (!(perf_cap__capable(CAP_PERFMON) ||
- perf_cap__capable(CAP_SYS_ADMIN))) {
- pr_err("ftrace only works for %s!\n",
-#ifdef HAVE_LIBCAP_SUPPORT
- "users with the CAP_PERFMON or CAP_SYS_ADMIN capability"
-#else
- "root"
-#endif
- );
- return -1;
- }
-
select_tracer(ftrace);
if (reset_tracing_files(ftrace) < 0) {
@@ -885,18 +929,6 @@ static int __cmd_latency(struct perf_ftrace *ftrace)
};
int buckets[NUM_BUCKET] = { };
- if (!(perf_cap__capable(CAP_PERFMON) ||
- perf_cap__capable(CAP_SYS_ADMIN))) {
- pr_err("ftrace only works for %s!\n",
-#ifdef HAVE_LIBCAP_SUPPORT
- "users with the CAP_PERFMON or CAP_SYS_ADMIN capability"
-#else
- "root"
-#endif
- );
- return -1;
- }
-
trace_fd = prepare_func_latency(ftrace);
if (trace_fd < 0)
goto out;
@@ -950,6 +982,326 @@ out:
return (done && !workload_exec_errno) ? 0 : -1;
}
+static size_t profile_hash(long func, void *ctx __maybe_unused)
+{
+ return str_hash((char *)func);
+}
+
+static bool profile_equal(long func1, long func2, void *ctx __maybe_unused)
+{
+ return !strcmp((char *)func1, (char *)func2);
+}
+
+static int prepare_func_profile(struct perf_ftrace *ftrace)
+{
+ ftrace->tracer = "function_graph";
+ ftrace->graph_tail = 1;
+
+ ftrace->profile_hash = hashmap__new(profile_hash, profile_equal, NULL);
+ if (ftrace->profile_hash == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/* This is saved in a hashmap keyed by the function name */
+struct ftrace_profile_data {
+ struct stats st;
+};
+
+static int add_func_duration(struct perf_ftrace *ftrace, char *func, double time_ns)
+{
+ struct ftrace_profile_data *prof = NULL;
+
+ if (!hashmap__find(ftrace->profile_hash, func, &prof)) {
+ char *key = strdup(func);
+
+ if (key == NULL)
+ return -ENOMEM;
+
+ prof = zalloc(sizeof(*prof));
+ if (prof == NULL) {
+ free(key);
+ return -ENOMEM;
+ }
+
+ init_stats(&prof->st);
+ hashmap__add(ftrace->profile_hash, key, prof);
+ }
+
+ update_stats(&prof->st, time_ns);
+ return 0;
+}
+
+/*
+ * The ftrace function_graph text output normally looks like below:
+ *
+ * CPU DURATION FUNCTION
+ *
+ * 0) | syscall_trace_enter.isra.0() {
+ * 0) | __audit_syscall_entry() {
+ * 0) | auditd_test_task() {
+ * 0) 0.271 us | __rcu_read_lock();
+ * 0) 0.275 us | __rcu_read_unlock();
+ * 0) 1.254 us | } /\* auditd_test_task *\/
+ * 0) 0.279 us | ktime_get_coarse_real_ts64();
+ * 0) 2.227 us | } /\* __audit_syscall_entry *\/
+ * 0) 2.713 us | } /\* syscall_trace_enter.isra.0 *\/
+ *
+ * Parse the line and get the duration and function name.
+ */
+static int parse_func_duration(struct perf_ftrace *ftrace, char *line, size_t len)
+{
+ char *p;
+ char *func;
+ double duration;
+
+ /* skip CPU */
+ p = strchr(line, ')');
+ if (p == NULL)
+ return 0;
+
+ /* get duration */
+ p = skip_spaces(p + 1);
+
+ /* no duration? */
+ if (p == NULL || *p == '|')
+ return 0;
+
+ /* skip markers like '*' or '!' for longer than ms */
+ if (!isdigit(*p))
+ p++;
+
+ duration = strtod(p, &p);
+
+ if (strncmp(p, " us", 3)) {
+ pr_debug("non-usec time found.. ignoring\n");
+ return 0;
+ }
+
+ /*
+ * profile stat keeps the max and min values as integer,
+ * convert to nsec time so that we can have accurate max.
+ */
+ duration *= 1000;
+
+ /* skip to the pipe */
+ while (p < line + len && *p != '|')
+ p++;
+
+ if (*p++ != '|')
+ return -EINVAL;
+
+ /* get function name */
+ func = skip_spaces(p);
+
+ /* skip the closing bracket and the start of comment */
+ if (*func == '}')
+ func += 5;
+
+ /* remove semi-colon or end of comment at the end */
+ p = line + len - 1;
+ while (!isalnum(*p) && *p != ']') {
+ *p = '\0';
+ --p;
+ }
+
+ return add_func_duration(ftrace, func, duration);
+}
+
+enum perf_ftrace_profile_sort_key {
+ PFP_SORT_TOTAL = 0,
+ PFP_SORT_AVG,
+ PFP_SORT_MAX,
+ PFP_SORT_COUNT,
+ PFP_SORT_NAME,
+};
+
+static enum perf_ftrace_profile_sort_key profile_sort = PFP_SORT_TOTAL;
+
+static int cmp_profile_data(const void *a, const void *b)
+{
+ const struct hashmap_entry *e1 = *(const struct hashmap_entry **)a;
+ const struct hashmap_entry *e2 = *(const struct hashmap_entry **)b;
+ struct ftrace_profile_data *p1 = e1->pvalue;
+ struct ftrace_profile_data *p2 = e2->pvalue;
+ double v1, v2;
+
+ switch (profile_sort) {
+ case PFP_SORT_NAME:
+ return strcmp(e1->pkey, e2->pkey);
+ case PFP_SORT_AVG:
+ v1 = p1->st.mean;
+ v2 = p2->st.mean;
+ break;
+ case PFP_SORT_MAX:
+ v1 = p1->st.max;
+ v2 = p2->st.max;
+ break;
+ case PFP_SORT_COUNT:
+ v1 = p1->st.n;
+ v2 = p2->st.n;
+ break;
+ case PFP_SORT_TOTAL:
+ default:
+ v1 = p1->st.n * p1->st.mean;
+ v2 = p2->st.n * p2->st.mean;
+ break;
+ }
+
+ if (v1 > v2)
+ return -1;
+ else
+ return 1;
+}
+
+static void print_profile_result(struct perf_ftrace *ftrace)
+{
+ struct hashmap_entry *entry, **profile;
+ size_t i, nr, bkt;
+
+ nr = hashmap__size(ftrace->profile_hash);
+ if (nr == 0)
+ return;
+
+ profile = calloc(nr, sizeof(*profile));
+ if (profile == NULL) {
+ pr_err("failed to allocate memory for the result\n");
+ return;
+ }
+
+ i = 0;
+ hashmap__for_each_entry(ftrace->profile_hash, entry, bkt)
+ profile[i++] = entry;
+
+ assert(i == nr);
+
+ //cmp_profile_data(profile[0], profile[1]);
+ qsort(profile, nr, sizeof(*profile), cmp_profile_data);
+
+ printf("# %10s %10s %10s %10s %s\n",
+ "Total (us)", "Avg (us)", "Max (us)", "Count", "Function");
+
+ for (i = 0; i < nr; i++) {
+ const char *name = profile[i]->pkey;
+ struct ftrace_profile_data *p = profile[i]->pvalue;
+
+ printf("%12.3f %10.3f %6"PRIu64".%03"PRIu64" %10.0f %s\n",
+ p->st.n * p->st.mean / 1000, p->st.mean / 1000,
+ p->st.max / 1000, p->st.max % 1000, p->st.n, name);
+ }
+
+ free(profile);
+
+ hashmap__for_each_entry(ftrace->profile_hash, entry, bkt) {
+ free((char *)entry->pkey);
+ free(entry->pvalue);
+ }
+
+ hashmap__free(ftrace->profile_hash);
+ ftrace->profile_hash = NULL;
+}
+
+static int __cmd_profile(struct perf_ftrace *ftrace)
+{
+ char *trace_file;
+ int trace_fd;
+ char buf[4096];
+ struct io io;
+ char *line = NULL;
+ size_t line_len = 0;
+
+ if (prepare_func_profile(ftrace) < 0) {
+ pr_err("failed to prepare func profiler\n");
+ goto out;
+ }
+
+ if (reset_tracing_files(ftrace) < 0) {
+ pr_err("failed to reset ftrace\n");
+ goto out;
+ }
+
+ /* reset ftrace buffer */
+ if (write_tracing_file("trace", "0") < 0)
+ goto out;
+
+ if (set_tracing_options(ftrace) < 0)
+ return -1;
+
+ if (write_tracing_file("current_tracer", ftrace->tracer) < 0) {
+ pr_err("failed to set current_tracer to %s\n", ftrace->tracer);
+ goto out_reset;
+ }
+
+ setup_pager();
+
+ trace_file = get_tracing_file("trace_pipe");
+ if (!trace_file) {
+ pr_err("failed to open trace_pipe\n");
+ goto out_reset;
+ }
+
+ trace_fd = open(trace_file, O_RDONLY);
+
+ put_tracing_file(trace_file);
+
+ if (trace_fd < 0) {
+ pr_err("failed to open trace_pipe\n");
+ goto out_reset;
+ }
+
+ fcntl(trace_fd, F_SETFL, O_NONBLOCK);
+
+ if (write_tracing_file("tracing_on", "1") < 0) {
+ pr_err("can't enable tracing\n");
+ goto out_close_fd;
+ }
+
+ evlist__start_workload(ftrace->evlist);
+
+ io__init(&io, trace_fd, buf, sizeof(buf));
+ io.timeout_ms = -1;
+
+ while (!done && !io.eof) {
+ if (io__getline(&io, &line, &line_len) < 0)
+ break;
+
+ if (parse_func_duration(ftrace, line, line_len) < 0)
+ break;
+ }
+
+ write_tracing_file("tracing_on", "0");
+
+ if (workload_exec_errno) {
+ const char *emsg = str_error_r(workload_exec_errno, buf, sizeof(buf));
+ /* flush stdout first so below error msg appears at the end. */
+ fflush(stdout);
+ pr_err("workload failed: %s\n", emsg);
+ goto out_free_line;
+ }
+
+ /* read remaining buffer contents */
+ io.timeout_ms = 0;
+ while (!io.eof) {
+ if (io__getline(&io, &line, &line_len) < 0)
+ break;
+
+ if (parse_func_duration(ftrace, line, line_len) < 0)
+ break;
+ }
+
+ print_profile_result(ftrace);
+
+out_free_line:
+ free(line);
+out_close_fd:
+ close(trace_fd);
+out_reset:
+ reset_tracing_files(ftrace);
+out:
+ return (done && !workload_exec_errno) ? 0 : -1;
+}
+
static int perf_ftrace_config(const char *var, const char *value, void *cb)
{
struct perf_ftrace *ftrace = cb;
@@ -1099,6 +1451,7 @@ static int parse_graph_tracer_opts(const struct option *opt,
{ .name = "verbose", .value_ptr = &ftrace->graph_verbose },
{ .name = "thresh", .value_ptr = &ftrace->graph_thresh },
{ .name = "depth", .value_ptr = &ftrace->graph_depth },
+ { .name = "tail", .value_ptr = &ftrace->graph_tail },
{ .name = NULL, }
};
@@ -1112,10 +1465,35 @@ static int parse_graph_tracer_opts(const struct option *opt,
return 0;
}
+static int parse_sort_key(const struct option *opt, const char *str, int unset)
+{
+ enum perf_ftrace_profile_sort_key *key = (void *)opt->value;
+
+ if (unset)
+ return 0;
+
+ if (!strcmp(str, "total"))
+ *key = PFP_SORT_TOTAL;
+ else if (!strcmp(str, "avg"))
+ *key = PFP_SORT_AVG;
+ else if (!strcmp(str, "max"))
+ *key = PFP_SORT_MAX;
+ else if (!strcmp(str, "count"))
+ *key = PFP_SORT_COUNT;
+ else if (!strcmp(str, "name"))
+ *key = PFP_SORT_NAME;
+ else {
+ pr_err("Unknown sort key: %s\n", str);
+ return -1;
+ }
+ return 0;
+}
+
enum perf_ftrace_subcommand {
PERF_FTRACE_NONE,
PERF_FTRACE_TRACE,
PERF_FTRACE_LATENCY,
+ PERF_FTRACE_PROFILE,
};
int cmd_ftrace(int argc, const char **argv)
@@ -1181,13 +1559,31 @@ int cmd_ftrace(int argc, const char **argv)
"Use nano-second histogram"),
OPT_PARENT(common_options),
};
+ const struct option profile_options[] = {
+ OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
+ "Trace given functions using function tracer",
+ parse_filter_func),
+ OPT_CALLBACK('N', "notrace-funcs", &ftrace.notrace, "func",
+ "Do not trace given functions", parse_filter_func),
+ OPT_CALLBACK('G', "graph-funcs", &ftrace.graph_funcs, "func",
+ "Trace given functions using function_graph tracer",
+ parse_filter_func),
+ OPT_CALLBACK('g', "nograph-funcs", &ftrace.nograph_funcs, "func",
+ "Set nograph filter on given functions", parse_filter_func),
+ OPT_CALLBACK('m', "buffer-size", &ftrace.percpu_buffer_size, "size",
+ "Size of per cpu buffer, needs to use a B, K, M or G suffix.", parse_buffer_size),
+ OPT_CALLBACK('s', "sort", &profile_sort, "key",
+ "Sort result by key: total (default), avg, max, count, name.",
+ parse_sort_key),
+ OPT_PARENT(common_options),
+ };
const struct option *options = ftrace_options;
const char * const ftrace_usage[] = {
"perf ftrace [<options>] [<command>]",
"perf ftrace [<options>] -- [<command>] [<options>]",
- "perf ftrace {trace|latency} [<options>] [<command>]",
- "perf ftrace {trace|latency} [<options>] -- [<command>] [<options>]",
+ "perf ftrace {trace|latency|profile} [<options>] [<command>]",
+ "perf ftrace {trace|latency|profile} [<options>] -- [<command>] [<options>]",
NULL
};
enum perf_ftrace_subcommand subcmd = PERF_FTRACE_NONE;
@@ -1202,6 +1598,14 @@ int cmd_ftrace(int argc, const char **argv)
signal(SIGCHLD, sig_handler);
signal(SIGPIPE, sig_handler);
+ if (!check_ftrace_capable())
+ return -1;
+
+ if (!is_ftrace_supported()) {
+ pr_err("ftrace is not supported on this system\n");
+ return -ENOTSUP;
+ }
+
ret = perf_config(perf_ftrace_config, &ftrace);
if (ret < 0)
return -1;
@@ -1212,6 +1616,9 @@ int cmd_ftrace(int argc, const char **argv)
} else if (!strcmp(argv[1], "latency")) {
subcmd = PERF_FTRACE_LATENCY;
options = latency_options;
+ } else if (!strcmp(argv[1], "profile")) {
+ subcmd = PERF_FTRACE_PROFILE;
+ options = profile_options;
}
if (subcmd != PERF_FTRACE_NONE) {
@@ -1247,6 +1654,9 @@ int cmd_ftrace(int argc, const char **argv)
}
cmd_func = __cmd_latency;
break;
+ case PERF_FTRACE_PROFILE:
+ cmd_func = __cmd_profile;
+ break;
case PERF_FTRACE_NONE:
default:
pr_err("Invalid subcommand\n");
diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c
index b2a368ae295a..0854d3cd9f6a 100644
--- a/tools/perf/builtin-help.c
+++ b/tools/perf/builtin-help.c
@@ -417,7 +417,7 @@ static void open_html(const char *path)
static int show_html_page(const char *perf_cmd)
{
const char *page = cmd_to_page(perf_cmd);
- char *page_path; /* it leaks but we exec bellow */
+ char *page_path; /* it leaks but we exec below */
if (get_html_page_path(&page_path, page) < 0)
return -1;
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index a212678d47be..d6989195a061 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -103,18 +103,24 @@ struct guest_session {
struct guest_event ev;
};
+enum build_id_rewrite_style {
+ BID_RWS__NONE = 0,
+ BID_RWS__INJECT_HEADER_LAZY,
+ BID_RWS__INJECT_HEADER_ALL,
+ BID_RWS__MMAP2_BUILDID_ALL,
+ BID_RWS__MMAP2_BUILDID_LAZY,
+};
+
struct perf_inject {
struct perf_tool tool;
struct perf_session *session;
- bool build_ids;
- bool build_id_all;
+ enum build_id_rewrite_style build_id_style;
bool sched_stat;
bool have_auxtrace;
bool strip;
bool jit_mode;
bool in_place_update;
bool in_place_update_dry_run;
- bool is_pipe;
bool copy_kcore_dir;
const char *input_name;
struct perf_data output;
@@ -126,6 +132,7 @@ struct perf_inject {
struct perf_file_section secs[HEADER_FEAT_BITS];
struct guest_session guest_session;
struct strlist *known_build_ids;
+ const struct evsel *mmap_evsel;
};
struct event_entry {
@@ -134,8 +141,23 @@ struct event_entry {
union perf_event event[];
};
-static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool,
- struct machine *machine, u8 cpumode, u32 flags);
+static int tool__inject_build_id(const struct perf_tool *tool,
+ struct perf_sample *sample,
+ struct machine *machine,
+ const struct evsel *evsel,
+ __u16 misc,
+ const char *filename,
+ struct dso *dso, u32 flags);
+static int tool__inject_mmap2_build_id(const struct perf_tool *tool,
+ struct perf_sample *sample,
+ struct machine *machine,
+ const struct evsel *evsel,
+ __u16 misc,
+ __u32 pid, __u32 tid,
+ __u64 start, __u64 len, __u64 pgoff,
+ struct dso *dso,
+ __u32 prot, __u32 flags,
+ const char *filename);
static int output_bytes(struct perf_inject *inject, void *buf, size_t sz)
{
@@ -149,8 +171,9 @@ static int output_bytes(struct perf_inject *inject, void *buf, size_t sz)
return 0;
}
-static int perf_event__repipe_synth(struct perf_tool *tool,
+static int perf_event__repipe_synth(const struct perf_tool *tool,
union perf_event *event)
+
{
struct perf_inject *inject = container_of(tool, struct perf_inject,
tool);
@@ -158,7 +181,7 @@ static int perf_event__repipe_synth(struct perf_tool *tool,
return output_bytes(inject, event, event->header.size);
}
-static int perf_event__repipe_oe_synth(struct perf_tool *tool,
+static int perf_event__repipe_oe_synth(const struct perf_tool *tool,
union perf_event *event,
struct ordered_events *oe __maybe_unused)
{
@@ -166,7 +189,7 @@ static int perf_event__repipe_oe_synth(struct perf_tool *tool,
}
#ifdef HAVE_JITDUMP
-static int perf_event__drop_oe(struct perf_tool *tool __maybe_unused,
+static int perf_event__drop_oe(const struct perf_tool *tool __maybe_unused,
union perf_event *event __maybe_unused,
struct ordered_events *oe __maybe_unused)
{
@@ -188,7 +211,7 @@ static int perf_event__repipe_op4_synth(struct perf_session *session,
return perf_event__repipe_synth(session->tool, event);
}
-static int perf_event__repipe_attr(struct perf_tool *tool,
+static int perf_event__repipe_attr(const struct perf_tool *tool,
union perf_event *event,
struct evlist **pevlist)
{
@@ -200,13 +223,14 @@ static int perf_event__repipe_attr(struct perf_tool *tool,
if (ret)
return ret;
- if (!inject->is_pipe)
+ /* If the output isn't a pipe then the attributes will be written as part of the header. */
+ if (!inject->output.is_pipe)
return 0;
return perf_event__repipe_synth(tool, event);
}
-static int perf_event__repipe_event_update(struct perf_tool *tool,
+static int perf_event__repipe_event_update(const struct perf_tool *tool,
union perf_event *event,
struct evlist **pevlist __maybe_unused)
{
@@ -237,7 +261,7 @@ static int copy_bytes(struct perf_inject *inject, struct perf_data *data, off_t
static s64 perf_event__repipe_auxtrace(struct perf_session *session,
union perf_event *event)
{
- struct perf_tool *tool = session->tool;
+ const struct perf_tool *tool = session->tool;
struct perf_inject *inject = container_of(tool, struct perf_inject,
tool);
int ret;
@@ -284,7 +308,7 @@ perf_event__repipe_auxtrace(struct perf_session *session __maybe_unused,
#endif
-static int perf_event__repipe(struct perf_tool *tool,
+static int perf_event__repipe(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine __maybe_unused)
@@ -292,7 +316,7 @@ static int perf_event__repipe(struct perf_tool *tool,
return perf_event__repipe_synth(tool, event);
}
-static int perf_event__drop(struct perf_tool *tool __maybe_unused,
+static int perf_event__drop(const struct perf_tool *tool __maybe_unused,
union perf_event *event __maybe_unused,
struct perf_sample *sample __maybe_unused,
struct machine *machine __maybe_unused)
@@ -300,7 +324,7 @@ static int perf_event__drop(struct perf_tool *tool __maybe_unused,
return 0;
}
-static int perf_event__drop_aux(struct perf_tool *tool,
+static int perf_event__drop_aux(const struct perf_tool *tool,
union perf_event *event __maybe_unused,
struct perf_sample *sample,
struct machine *machine __maybe_unused)
@@ -341,13 +365,13 @@ perf_inject__cut_auxtrace_sample(struct perf_inject *inject,
return ev;
}
-typedef int (*inject_handler)(struct perf_tool *tool,
+typedef int (*inject_handler)(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct evsel *evsel,
struct machine *machine);
-static int perf_event__repipe_sample(struct perf_tool *tool,
+static int perf_event__repipe_sample(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct evsel *evsel,
@@ -372,46 +396,8 @@ static int perf_event__repipe_sample(struct perf_tool *tool,
return perf_event__repipe_synth(tool, event);
}
-static int perf_event__repipe_mmap(struct perf_tool *tool,
- union perf_event *event,
- struct perf_sample *sample,
- struct machine *machine)
-{
- int err;
-
- err = perf_event__process_mmap(tool, event, sample, machine);
- perf_event__repipe(tool, event, sample, machine);
-
- return err;
-}
-
-#ifdef HAVE_JITDUMP
-static int perf_event__jit_repipe_mmap(struct perf_tool *tool,
- union perf_event *event,
- struct perf_sample *sample,
- struct machine *machine)
-{
- struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
- u64 n = 0;
- int ret;
-
- /*
- * if jit marker, then inject jit mmaps and generate ELF images
- */
- ret = jit_process(inject->session, &inject->output, machine,
- event->mmap.filename, event->mmap.pid, event->mmap.tid, &n);
- if (ret < 0)
- return ret;
- if (ret) {
- inject->bytes_written += n;
- return 0;
- }
- return perf_event__repipe_mmap(tool, event, sample, machine);
-}
-#endif
-
static struct dso *findnew_dso(int pid, int tid, const char *filename,
- struct dso_id *id, struct machine *machine)
+ const struct dso_id *id, struct machine *machine)
{
struct thread *thread;
struct nsinfo *nsi = NULL;
@@ -455,117 +441,173 @@ static struct dso *findnew_dso(int pid, int tid, const char *filename,
return dso;
}
-static int perf_event__repipe_buildid_mmap(struct perf_tool *tool,
- union perf_event *event,
- struct perf_sample *sample,
- struct machine *machine)
+/*
+ * The evsel used for the sample ID for mmap events. Typically stashed when
+ * processing mmap events. If not stashed, search the evlist for the first mmap
+ * gathering event.
+ */
+static const struct evsel *inject__mmap_evsel(struct perf_inject *inject)
{
- struct dso *dso;
+ struct evsel *pos;
- dso = findnew_dso(event->mmap.pid, event->mmap.tid,
- event->mmap.filename, NULL, machine);
+ if (inject->mmap_evsel)
+ return inject->mmap_evsel;
- if (dso && !dso__hit(dso)) {
- dso__set_hit(dso);
- dso__inject_build_id(dso, tool, machine, sample->cpumode, 0);
+ evlist__for_each_entry(inject->session->evlist, pos) {
+ if (pos->core.attr.mmap) {
+ inject->mmap_evsel = pos;
+ return pos;
+ }
}
- dso__put(dso);
-
- return perf_event__repipe(tool, event, sample, machine);
+ pr_err("No mmap events found\n");
+ return NULL;
}
-static int perf_event__repipe_mmap2(struct perf_tool *tool,
- union perf_event *event,
- struct perf_sample *sample,
- struct machine *machine)
+static int perf_event__repipe_common_mmap(const struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine,
+ __u32 pid, __u32 tid,
+ __u64 start, __u64 len, __u64 pgoff,
+ __u32 flags, __u32 prot,
+ const char *filename,
+ const struct dso_id *dso_id,
+ int (*perf_event_process)(const struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine))
{
- int err;
+ struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+ struct dso *dso = NULL;
+ bool dso_sought = false;
- err = perf_event__process_mmap2(tool, event, sample, machine);
- perf_event__repipe(tool, event, sample, machine);
+#ifdef HAVE_JITDUMP
+ if (inject->jit_mode) {
+ u64 n = 0;
+ int ret;
+ /* If jit marker, then inject jit mmaps and generate ELF images. */
+ ret = jit_process(inject->session, &inject->output, machine,
+ filename, pid, tid, &n);
+ if (ret < 0)
+ return ret;
+ if (ret) {
+ inject->bytes_written += n;
+ return 0;
+ }
+ }
+#endif
if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) {
- struct dso *dso;
-
- dso = findnew_dso(event->mmap2.pid, event->mmap2.tid,
- event->mmap2.filename, NULL, machine);
+ dso = findnew_dso(pid, tid, filename, dso_id, machine);
+ dso_sought = true;
if (dso) {
/* mark it not to inject build-id */
dso__set_hit(dso);
}
- dso__put(dso);
}
+ if (inject->build_id_style == BID_RWS__INJECT_HEADER_ALL) {
+ if (!dso_sought) {
+ dso = findnew_dso(pid, tid, filename, dso_id, machine);
+ dso_sought = true;
+ }
- return err;
-}
+ if (dso && !dso__hit(dso)) {
+ struct evsel *evsel = evlist__event2evsel(inject->session->evlist, event);
-#ifdef HAVE_JITDUMP
-static int perf_event__jit_repipe_mmap2(struct perf_tool *tool,
- union perf_event *event,
- struct perf_sample *sample,
- struct machine *machine)
-{
- struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
- u64 n = 0;
- int ret;
+ if (evsel) {
+ dso__set_hit(dso);
+ tool__inject_build_id(tool, sample, machine, evsel,
+ /*misc=*/sample->cpumode,
+ filename, dso, flags);
+ }
+ }
+ } else {
+ int err;
- /*
- * if jit marker, then inject jit mmaps and generate ELF images
- */
- ret = jit_process(inject->session, &inject->output, machine,
- event->mmap2.filename, event->mmap2.pid, event->mmap2.tid, &n);
- if (ret < 0)
- return ret;
- if (ret) {
- inject->bytes_written += n;
- return 0;
- }
- return perf_event__repipe_mmap2(tool, event, sample, machine);
-}
-#endif
+ /*
+ * Remember the evsel for lazy build id generation. It is used
+ * for the sample id header type.
+ */
+ if ((inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY ||
+ inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) &&
+ !inject->mmap_evsel)
+ inject->mmap_evsel = evlist__event2evsel(inject->session->evlist, event);
-static int perf_event__repipe_buildid_mmap2(struct perf_tool *tool,
- union perf_event *event,
- struct perf_sample *sample,
- struct machine *machine)
-{
- struct dso_id dso_id = {
- .maj = event->mmap2.maj,
- .min = event->mmap2.min,
- .ino = event->mmap2.ino,
- .ino_generation = event->mmap2.ino_generation,
- };
- struct dso *dso;
+ /* Create the thread, map, etc. Not done for the unordered inject all case. */
+ err = perf_event_process(tool, event, sample, machine);
- if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) {
- /* cannot use dso_id since it'd have invalid info */
- dso = findnew_dso(event->mmap2.pid, event->mmap2.tid,
- event->mmap2.filename, NULL, machine);
- if (dso) {
- /* mark it not to inject build-id */
- dso__set_hit(dso);
+ if (err) {
+ dso__put(dso);
+ return err;
}
- dso__put(dso);
- perf_event__repipe(tool, event, sample, machine);
- return 0;
}
+ if ((inject->build_id_style == BID_RWS__MMAP2_BUILDID_ALL) &&
+ !(event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID)) {
+ struct evsel *evsel = evlist__event2evsel(inject->session->evlist, event);
- dso = findnew_dso(event->mmap2.pid, event->mmap2.tid,
- event->mmap2.filename, &dso_id, machine);
-
- if (dso && !dso__hit(dso)) {
- dso__set_hit(dso);
- dso__inject_build_id(dso, tool, machine, sample->cpumode,
- event->mmap2.flags);
+ if (evsel && !dso_sought) {
+ dso = findnew_dso(pid, tid, filename, dso_id, machine);
+ dso_sought = true;
+ }
+ if (evsel && dso &&
+ !tool__inject_mmap2_build_id(tool, sample, machine, evsel,
+ sample->cpumode | PERF_RECORD_MISC_MMAP_BUILD_ID,
+ pid, tid, start, len, pgoff,
+ dso,
+ prot, flags,
+ filename)) {
+ /* Injected mmap2 so no need to repipe. */
+ dso__put(dso);
+ return 0;
+ }
}
dso__put(dso);
+ if (inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY)
+ return 0;
- perf_event__repipe(tool, event, sample, machine);
+ return perf_event__repipe(tool, event, sample, machine);
+}
- return 0;
+static int perf_event__repipe_mmap(const struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return perf_event__repipe_common_mmap(
+ tool, event, sample, machine,
+ event->mmap.pid, event->mmap.tid,
+ event->mmap.start, event->mmap.len, event->mmap.pgoff,
+ /*flags=*/0, PROT_EXEC,
+ event->mmap.filename, /*dso_id=*/NULL,
+ perf_event__process_mmap);
}
-static int perf_event__repipe_fork(struct perf_tool *tool,
+static int perf_event__repipe_mmap2(const struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct dso_id id;
+ struct dso_id *dso_id = NULL;
+
+ if (!(event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID)) {
+ id.maj = event->mmap2.maj;
+ id.min = event->mmap2.min;
+ id.ino = event->mmap2.ino;
+ id.ino_generation = event->mmap2.ino_generation;
+ dso_id = &id;
+ }
+
+ return perf_event__repipe_common_mmap(
+ tool, event, sample, machine,
+ event->mmap2.pid, event->mmap2.tid,
+ event->mmap2.start, event->mmap2.len, event->mmap2.pgoff,
+ event->mmap2.flags, event->mmap2.prot,
+ event->mmap2.filename, dso_id,
+ perf_event__process_mmap2);
+}
+
+static int perf_event__repipe_fork(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -578,7 +620,7 @@ static int perf_event__repipe_fork(struct perf_tool *tool,
return err;
}
-static int perf_event__repipe_comm(struct perf_tool *tool,
+static int perf_event__repipe_comm(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -591,7 +633,7 @@ static int perf_event__repipe_comm(struct perf_tool *tool,
return err;
}
-static int perf_event__repipe_namespaces(struct perf_tool *tool,
+static int perf_event__repipe_namespaces(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -603,7 +645,7 @@ static int perf_event__repipe_namespaces(struct perf_tool *tool,
return err;
}
-static int perf_event__repipe_exit(struct perf_tool *tool,
+static int perf_event__repipe_exit(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -712,16 +754,20 @@ static bool perf_inject__lookup_known_build_id(struct perf_inject *inject,
return false;
}
-static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool,
- struct machine *machine, u8 cpumode, u32 flags)
+static int tool__inject_build_id(const struct perf_tool *tool,
+ struct perf_sample *sample,
+ struct machine *machine,
+ const struct evsel *evsel,
+ __u16 misc,
+ const char *filename,
+ struct dso *dso, u32 flags)
{
- struct perf_inject *inject = container_of(tool, struct perf_inject,
- tool);
+ struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
int err;
- if (is_anon_memory(dso__long_name(dso)) || flags & MAP_HUGETLB)
+ if (is_anon_memory(filename) || flags & MAP_HUGETLB)
return 0;
- if (is_no_dso_memory(dso__long_name(dso)))
+ if (is_no_dso_memory(filename))
return 0;
if (inject->known_build_ids != NULL &&
@@ -729,27 +775,158 @@ static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool,
return 1;
if (dso__read_build_id(dso) < 0) {
- pr_debug("no build_id found for %s\n", dso__long_name(dso));
+ pr_debug("no build_id found for %s\n", filename);
return -1;
}
- err = perf_event__synthesize_build_id(tool, dso, cpumode,
- perf_event__repipe, machine);
+ err = perf_event__synthesize_build_id(tool, sample, machine,
+ perf_event__repipe,
+ evsel, misc, dso__bid(dso),
+ filename);
if (err) {
- pr_err("Can't synthesize build_id event for %s\n", dso__long_name(dso));
+ pr_err("Can't synthesize build_id event for %s\n", filename);
return -1;
}
return 0;
}
-int perf_event__inject_buildid(struct perf_tool *tool, union perf_event *event,
+static int tool__inject_mmap2_build_id(const struct perf_tool *tool,
+ struct perf_sample *sample,
+ struct machine *machine,
+ const struct evsel *evsel,
+ __u16 misc,
+ __u32 pid, __u32 tid,
+ __u64 start, __u64 len, __u64 pgoff,
+ struct dso *dso,
+ __u32 prot, __u32 flags,
+ const char *filename)
+{
+ int err;
+
+ /* Return to repipe anonymous maps. */
+ if (is_anon_memory(filename) || flags & MAP_HUGETLB)
+ return 1;
+ if (is_no_dso_memory(filename))
+ return 1;
+
+ if (dso__read_build_id(dso)) {
+ pr_debug("no build_id found for %s\n", filename);
+ return -1;
+ }
+
+ err = perf_event__synthesize_mmap2_build_id(tool, sample, machine,
+ perf_event__repipe,
+ evsel,
+ misc, pid, tid,
+ start, len, pgoff,
+ dso__bid(dso),
+ prot, flags,
+ filename);
+ if (err) {
+ pr_err("Can't synthesize build_id event for %s\n", filename);
+ return -1;
+ }
+ return 0;
+}
+
+static int mark_dso_hit(const struct perf_inject *inject,
+ const struct perf_tool *tool,
+ struct perf_sample *sample,
+ struct machine *machine,
+ const struct evsel *mmap_evsel,
+ struct map *map, bool sample_in_dso)
+{
+ struct dso *dso;
+ u16 misc = sample->cpumode;
+
+ if (!map)
+ return 0;
+
+ if (!sample_in_dso) {
+ u16 guest_mask = PERF_RECORD_MISC_GUEST_KERNEL |
+ PERF_RECORD_MISC_GUEST_USER;
+
+ if ((misc & guest_mask) != 0) {
+ misc &= PERF_RECORD_MISC_HYPERVISOR;
+ misc |= __map__is_kernel(map)
+ ? PERF_RECORD_MISC_GUEST_KERNEL
+ : PERF_RECORD_MISC_GUEST_USER;
+ } else {
+ misc &= PERF_RECORD_MISC_HYPERVISOR;
+ misc |= __map__is_kernel(map)
+ ? PERF_RECORD_MISC_KERNEL
+ : PERF_RECORD_MISC_USER;
+ }
+ }
+ dso = map__dso(map);
+ if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY) {
+ if (dso && !dso__hit(dso)) {
+ dso__set_hit(dso);
+ tool__inject_build_id(tool, sample, machine,
+ mmap_evsel, misc, dso__long_name(dso), dso,
+ map__flags(map));
+ }
+ } else if (inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) {
+ if (!map__hit(map)) {
+ const struct build_id null_bid = { .size = 0 };
+ const struct build_id *bid = dso ? dso__bid(dso) : &null_bid;
+ const char *filename = dso ? dso__long_name(dso) : "";
+
+ map__set_hit(map);
+ perf_event__synthesize_mmap2_build_id(tool, sample, machine,
+ perf_event__repipe,
+ mmap_evsel,
+ misc,
+ sample->pid, sample->tid,
+ map__start(map),
+ map__end(map) - map__start(map),
+ map__pgoff(map),
+ bid,
+ map__prot(map),
+ map__flags(map),
+ filename);
+ }
+ }
+ return 0;
+}
+
+struct mark_dso_hit_args {
+ const struct perf_inject *inject;
+ const struct perf_tool *tool;
+ struct perf_sample *sample;
+ struct machine *machine;
+ const struct evsel *mmap_evsel;
+};
+
+static int mark_dso_hit_callback(struct callchain_cursor_node *node, void *data)
+{
+ struct mark_dso_hit_args *args = data;
+ struct map *map = node->ms.map;
+
+ return mark_dso_hit(args->inject, args->tool, args->sample, args->machine,
+ args->mmap_evsel, map, /*sample_in_dso=*/false);
+}
+
+int perf_event__inject_buildid(const struct perf_tool *tool, union perf_event *event,
struct perf_sample *sample,
struct evsel *evsel __maybe_unused,
struct machine *machine)
{
struct addr_location al;
struct thread *thread;
+ struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+ struct mark_dso_hit_args args = {
+ .inject = inject,
+ .tool = tool,
+ /*
+ * Use the parsed sample data of the sample event, which will
+ * have a later timestamp than the mmap event.
+ */
+ .sample = sample,
+ .machine = machine,
+ .mmap_evsel = inject__mmap_evsel(inject),
+ };
addr_location__init(&al);
thread = machine__findnew_thread(machine, sample->pid, sample->tid);
@@ -760,15 +937,13 @@ int perf_event__inject_buildid(struct perf_tool *tool, union perf_event *event,
}
if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) {
- struct dso *dso = map__dso(al.map);
-
- if (!dso__hit(dso)) {
- dso__set_hit(dso);
- dso__inject_build_id(dso, tool, machine,
- sample->cpumode, map__flags(al.map));
- }
+ mark_dso_hit(inject, tool, sample, machine, args.mmap_evsel, al.map,
+ /*sample_in_dso=*/true);
}
+ sample__for_each_callchain_node(thread, evsel, sample, PERF_MAX_STACK_DEPTH,
+ /*symbols=*/false, mark_dso_hit_callback, &args);
+
thread__put(thread);
repipe:
perf_event__repipe(tool, event, sample, machine);
@@ -776,7 +951,7 @@ repipe:
return 0;
}
-static int perf_inject__sched_process_exit(struct perf_tool *tool,
+static int perf_inject__sched_process_exit(const struct perf_tool *tool,
union perf_event *event __maybe_unused,
struct perf_sample *sample,
struct evsel *evsel __maybe_unused,
@@ -796,7 +971,7 @@ static int perf_inject__sched_process_exit(struct perf_tool *tool,
return 0;
}
-static int perf_inject__sched_switch(struct perf_tool *tool,
+static int perf_inject__sched_switch(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct evsel *evsel,
@@ -821,7 +996,7 @@ static int perf_inject__sched_switch(struct perf_tool *tool,
}
#ifdef HAVE_LIBTRACEEVENT
-static int perf_inject__sched_stat(struct perf_tool *tool,
+static int perf_inject__sched_stat(const struct perf_tool *tool,
union perf_event *event __maybe_unused,
struct perf_sample *sample,
struct evsel *evsel,
@@ -866,7 +1041,7 @@ static int guest_session__output_bytes(struct guest_session *gs, void *buf, size
return ret < 0 ? ret : 0;
}
-static int guest_session__repipe(struct perf_tool *tool,
+static int guest_session__repipe(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine __maybe_unused)
@@ -1032,7 +1207,7 @@ static struct guest_id *guest_session__lookup_id(struct guest_session *gs, u64 i
return NULL;
}
-static int process_attr(struct perf_tool *tool, union perf_event *event,
+static int process_attr(const struct perf_tool *tool, union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine __maybe_unused)
{
@@ -1160,7 +1335,7 @@ static u64 evlist__first_id(struct evlist *evlist)
return 0;
}
-static int process_build_id(struct perf_tool *tool,
+static int process_build_id(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine __maybe_unused)
@@ -1173,17 +1348,27 @@ static int process_build_id(struct perf_tool *tool,
static int synthesize_build_id(struct perf_inject *inject, struct dso *dso, pid_t machine_pid)
{
struct machine *machine = perf_session__findnew_machine(inject->session, machine_pid);
- u8 cpumode = dso__is_in_kernel_space(dso) ?
- PERF_RECORD_MISC_GUEST_KERNEL :
- PERF_RECORD_MISC_GUEST_USER;
+ struct perf_sample synth_sample = {
+ .pid = -1,
+ .tid = -1,
+ .time = -1,
+ .stream_id = -1,
+ .cpu = -1,
+ .period = 1,
+ .cpumode = dso__is_in_kernel_space(dso)
+ ? PERF_RECORD_MISC_GUEST_KERNEL
+ : PERF_RECORD_MISC_GUEST_USER,
+ };
if (!machine)
return -ENOMEM;
dso__set_hit(dso);
- return perf_event__synthesize_build_id(&inject->tool, dso, cpumode,
- process_build_id, machine);
+ return perf_event__synthesize_build_id(&inject->tool, &synth_sample, machine,
+ process_build_id, inject__mmap_evsel(inject),
+ /*misc=*/synth_sample.cpumode,
+ dso__bid(dso), dso__long_name(dso));
}
static int guest_session__add_build_ids_cb(struct dso *dso, void *data)
@@ -1210,7 +1395,7 @@ static int guest_session__add_build_ids(struct guest_session *gs)
gs);
}
-static int guest_session__ksymbol_event(struct perf_tool *tool,
+static int guest_session__ksymbol_event(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine __maybe_unused)
@@ -1574,7 +1759,7 @@ static int guest_session__flush_events(struct guest_session *gs)
return guest_session__inject_events(gs, -1);
}
-static int host__repipe(struct perf_tool *tool,
+static int host__repipe(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -1647,7 +1832,7 @@ static int host__finished_init(struct perf_session *session, union perf_event *e
* guest events up to the same time. Finally write out the FINISHED_ROUND event
* itself.
*/
-static int host__finished_round(struct perf_tool *tool,
+static int host__finished_round(const struct perf_tool *tool,
union perf_event *event,
struct ordered_events *oe)
{
@@ -1665,7 +1850,7 @@ static int host__finished_round(struct perf_tool *tool,
return perf_event__repipe_oe_synth(tool, event, oe);
}
-static int host__context_switch(struct perf_tool *tool,
+static int host__context_switch(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -1719,7 +1904,7 @@ static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char *
return 0;
}
-static int drop_sample(struct perf_tool *tool __maybe_unused,
+static int drop_sample(const struct perf_tool *tool __maybe_unused,
union perf_event *event __maybe_unused,
struct perf_sample *sample __maybe_unused,
struct evsel *evsel __maybe_unused,
@@ -1980,12 +2165,18 @@ static int __cmd_inject(struct perf_inject *inject)
struct guest_session *gs = &inject->guest_session;
struct perf_session *session = inject->session;
int fd = output_fd(inject);
- u64 output_data_offset;
+ u64 output_data_offset = perf_session__data_offset(session->evlist);
+ /*
+ * Pipe input hasn't loaded the attributes and will handle them as
+ * events. So that the attributes don't overlap the data, write the
+ * attributes after the data.
+ */
+ bool write_attrs_after_data = !inject->output.is_pipe && inject->session->data->is_pipe;
signal(SIGINT, sig_handler);
- if (inject->build_ids || inject->sched_stat ||
- inject->itrace_synth_opts.set || inject->build_id_all) {
+ if (inject->build_id_style != BID_RWS__NONE || inject->sched_stat ||
+ inject->itrace_synth_opts.set) {
inject->tool.mmap = perf_event__repipe_mmap;
inject->tool.mmap2 = perf_event__repipe_mmap2;
inject->tool.fork = perf_event__repipe_fork;
@@ -1994,12 +2185,8 @@ static int __cmd_inject(struct perf_inject *inject)
#endif
}
- output_data_offset = perf_session__data_offset(session->evlist);
-
- if (inject->build_id_all) {
- inject->tool.mmap = perf_event__repipe_buildid_mmap;
- inject->tool.mmap2 = perf_event__repipe_buildid_mmap2;
- } else if (inject->build_ids) {
+ if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY ||
+ inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) {
inject->tool.sample = perf_event__inject_buildid;
} else if (inject->sched_stat) {
struct evsel *evsel;
@@ -2069,7 +2256,7 @@ static int __cmd_inject(struct perf_inject *inject)
*/
inject->tool.finished_init = host__finished_init;
/* Obey finished round ordering */
- inject->tool.finished_round = host__finished_round,
+ inject->tool.finished_round = host__finished_round;
/* Keep track of which CPU a VCPU is runnng on */
inject->tool.context_switch = host__context_switch;
/*
@@ -2092,7 +2279,7 @@ static int __cmd_inject(struct perf_inject *inject)
if (!inject->itrace_synth_opts.set)
auxtrace_index__free(&session->auxtrace_index);
- if (!inject->is_pipe && !inject->in_place_update)
+ if (!inject->output.is_pipe && !inject->in_place_update)
lseek(fd, output_data_offset, SEEK_SET);
ret = perf_session__process_events(session);
@@ -2111,15 +2298,15 @@ static int __cmd_inject(struct perf_inject *inject)
}
}
- if (!inject->is_pipe && !inject->in_place_update) {
+ if (!inject->output.is_pipe && !inject->in_place_update) {
struct inject_fc inj_fc = {
.fc.copy = feat_copy_cb,
.inject = inject,
};
- if (inject->build_ids)
- perf_header__set_feat(&session->header,
- HEADER_BUILD_ID);
+ if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY ||
+ inject->build_id_style == BID_RWS__INJECT_HEADER_ALL)
+ perf_header__set_feat(&session->header, HEADER_BUILD_ID);
/*
* Keep all buildids when there is unprocessed AUX data because
* it is not known which ones the AUX trace hits.
@@ -2141,7 +2328,8 @@ static int __cmd_inject(struct perf_inject *inject)
}
session->header.data_offset = output_data_offset;
session->header.data_size = inject->bytes_written;
- perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc);
+ perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc,
+ write_attrs_after_data);
if (inject->copy_kcore_dir) {
ret = copy_kcore_dir(inject);
@@ -2165,46 +2353,6 @@ static int __cmd_inject(struct perf_inject *inject)
int cmd_inject(int argc, const char **argv)
{
struct perf_inject inject = {
- .tool = {
- .sample = perf_event__repipe_sample,
- .read = perf_event__repipe_sample,
- .mmap = perf_event__repipe,
- .mmap2 = perf_event__repipe,
- .comm = perf_event__repipe,
- .namespaces = perf_event__repipe,
- .cgroup = perf_event__repipe,
- .fork = perf_event__repipe,
- .exit = perf_event__repipe,
- .lost = perf_event__repipe,
- .lost_samples = perf_event__repipe,
- .aux = perf_event__repipe,
- .itrace_start = perf_event__repipe,
- .aux_output_hw_id = perf_event__repipe,
- .context_switch = perf_event__repipe,
- .throttle = perf_event__repipe,
- .unthrottle = perf_event__repipe,
- .ksymbol = perf_event__repipe,
- .bpf = perf_event__repipe,
- .text_poke = perf_event__repipe,
- .attr = perf_event__repipe_attr,
- .event_update = perf_event__repipe_event_update,
- .tracing_data = perf_event__repipe_op2_synth,
- .finished_round = perf_event__repipe_oe_synth,
- .build_id = perf_event__repipe_op2_synth,
- .id_index = perf_event__repipe_op2_synth,
- .auxtrace_info = perf_event__repipe_op2_synth,
- .auxtrace_error = perf_event__repipe_op2_synth,
- .time_conv = perf_event__repipe_op2_synth,
- .thread_map = perf_event__repipe_op2_synth,
- .cpu_map = perf_event__repipe_op2_synth,
- .stat_config = perf_event__repipe_op2_synth,
- .stat = perf_event__repipe_op2_synth,
- .stat_round = perf_event__repipe_op2_synth,
- .feature = perf_event__repipe_op2_synth,
- .finished_init = perf_event__repipe_op2_synth,
- .compressed = perf_event__repipe_op4_synth,
- .auxtrace = perf_event__repipe_auxtrace,
- },
.input_name = "-",
.samples = LIST_HEAD_INIT(inject.samples),
.output = {
@@ -2218,14 +2366,21 @@ int cmd_inject(int argc, const char **argv)
.use_stdio = true,
};
int ret;
- bool repipe = true;
const char *known_build_ids = NULL;
+ bool build_ids;
+ bool build_id_all;
+ bool mmap2_build_ids;
+ bool mmap2_build_id_all;
struct option options[] = {
- OPT_BOOLEAN('b', "build-ids", &inject.build_ids,
+ OPT_BOOLEAN('b', "build-ids", &build_ids,
"Inject build-ids into the output stream"),
- OPT_BOOLEAN(0, "buildid-all", &inject.build_id_all,
+ OPT_BOOLEAN(0, "buildid-all", &build_id_all,
"Inject build-ids of all DSOs into the output stream"),
+ OPT_BOOLEAN('B', "mmap2-buildids", &mmap2_build_ids,
+ "Drop unused mmap events, make others mmap2 with build IDs"),
+ OPT_BOOLEAN(0, "mmap2-buildid-all", &mmap2_build_id_all,
+ "Rewrite all mmap events as mmap2 events with build IDs"),
OPT_STRING(0, "known-build-ids", &known_build_ids,
"buildid path [,buildid path...]",
"build-ids to use for given paths"),
@@ -2269,6 +2424,7 @@ int cmd_inject(int argc, const char **argv)
"perf inject [<options>]",
NULL
};
+ bool ordered_events;
if (!inject.itrace_synth_opts.set) {
/* Disable eager loading of kernel symbols that adds overhead to perf inject. */
@@ -2321,22 +2477,63 @@ int cmd_inject(int argc, const char **argv)
return -1;
}
}
+ if (mmap2_build_ids)
+ inject.build_id_style = BID_RWS__MMAP2_BUILDID_LAZY;
+ if (mmap2_build_id_all)
+ inject.build_id_style = BID_RWS__MMAP2_BUILDID_ALL;
+ if (build_ids)
+ inject.build_id_style = BID_RWS__INJECT_HEADER_LAZY;
+ if (build_id_all)
+ inject.build_id_style = BID_RWS__INJECT_HEADER_ALL;
data.path = inject.input_name;
- if (!strcmp(inject.input_name, "-") || inject.output.is_pipe) {
- inject.is_pipe = true;
- /*
- * Do not repipe header when input is a regular file
- * since either it can rewrite the header at the end
- * or write a new pipe header.
- */
- if (strcmp(inject.input_name, "-"))
- repipe = false;
- }
- inject.session = __perf_session__new(&data, repipe,
- output_fd(&inject),
- &inject.tool);
+ ordered_events = inject.jit_mode || inject.sched_stat ||
+ inject.build_id_style == BID_RWS__INJECT_HEADER_LAZY ||
+ inject.build_id_style == BID_RWS__MMAP2_BUILDID_LAZY;
+ perf_tool__init(&inject.tool, ordered_events);
+ inject.tool.sample = perf_event__repipe_sample;
+ inject.tool.read = perf_event__repipe_sample;
+ inject.tool.mmap = perf_event__repipe;
+ inject.tool.mmap2 = perf_event__repipe;
+ inject.tool.comm = perf_event__repipe;
+ inject.tool.namespaces = perf_event__repipe;
+ inject.tool.cgroup = perf_event__repipe;
+ inject.tool.fork = perf_event__repipe;
+ inject.tool.exit = perf_event__repipe;
+ inject.tool.lost = perf_event__repipe;
+ inject.tool.lost_samples = perf_event__repipe;
+ inject.tool.aux = perf_event__repipe;
+ inject.tool.itrace_start = perf_event__repipe;
+ inject.tool.aux_output_hw_id = perf_event__repipe;
+ inject.tool.context_switch = perf_event__repipe;
+ inject.tool.throttle = perf_event__repipe;
+ inject.tool.unthrottle = perf_event__repipe;
+ inject.tool.ksymbol = perf_event__repipe;
+ inject.tool.bpf = perf_event__repipe;
+ inject.tool.text_poke = perf_event__repipe;
+ inject.tool.attr = perf_event__repipe_attr;
+ inject.tool.event_update = perf_event__repipe_event_update;
+ inject.tool.tracing_data = perf_event__repipe_op2_synth;
+ inject.tool.finished_round = perf_event__repipe_oe_synth;
+ inject.tool.build_id = perf_event__repipe_op2_synth;
+ inject.tool.id_index = perf_event__repipe_op2_synth;
+ inject.tool.auxtrace_info = perf_event__repipe_op2_synth;
+ inject.tool.auxtrace_error = perf_event__repipe_op2_synth;
+ inject.tool.time_conv = perf_event__repipe_op2_synth;
+ inject.tool.thread_map = perf_event__repipe_op2_synth;
+ inject.tool.cpu_map = perf_event__repipe_op2_synth;
+ inject.tool.stat_config = perf_event__repipe_op2_synth;
+ inject.tool.stat = perf_event__repipe_op2_synth;
+ inject.tool.stat_round = perf_event__repipe_op2_synth;
+ inject.tool.feature = perf_event__repipe_op2_synth;
+ inject.tool.finished_init = perf_event__repipe_op2_synth;
+ inject.tool.compressed = perf_event__repipe_op4_synth;
+ inject.tool.auxtrace = perf_event__repipe_auxtrace;
+ inject.tool.dont_split_sample_group = true;
+ inject.session = __perf_session__new(&data, &inject.tool,
+ /*trace_event_repipe=*/inject.output.is_pipe);
+
if (IS_ERR(inject.session)) {
ret = PTR_ERR(inject.session);
goto out_close_output;
@@ -2350,50 +2547,52 @@ int cmd_inject(int argc, const char **argv)
if (ret)
goto out_delete;
- if (!data.is_pipe && inject.output.is_pipe) {
+ if (inject.output.is_pipe) {
ret = perf_header__write_pipe(perf_data__fd(&inject.output));
if (ret < 0) {
pr_err("Couldn't write a new pipe header.\n");
goto out_delete;
}
- ret = perf_event__synthesize_for_pipe(&inject.tool,
- inject.session,
- &inject.output,
- perf_event__repipe);
- if (ret < 0)
- goto out_delete;
+ /*
+ * If the input is already a pipe then the features and
+ * attributes don't need synthesizing, they will be present in
+ * the input.
+ */
+ if (!data.is_pipe) {
+ ret = perf_event__synthesize_for_pipe(&inject.tool,
+ inject.session,
+ &inject.output,
+ perf_event__repipe);
+ if (ret < 0)
+ goto out_delete;
+ }
}
- if (inject.build_ids && !inject.build_id_all) {
+ if (inject.build_id_style == BID_RWS__INJECT_HEADER_LAZY ||
+ inject.build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) {
/*
* to make sure the mmap records are ordered correctly
* and so that the correct especially due to jitted code
* mmaps. We cannot generate the buildid hit list and
* inject the jit mmaps at the same time for now.
*/
- inject.tool.ordered_events = true;
inject.tool.ordering_requires_timestamps = true;
- if (known_build_ids != NULL) {
- inject.known_build_ids =
- perf_inject__parse_known_build_ids(known_build_ids);
-
- if (inject.known_build_ids == NULL) {
- pr_err("Couldn't parse known build ids.\n");
- goto out_delete;
- }
- }
}
+ if (inject.build_id_style != BID_RWS__NONE && known_build_ids != NULL) {
+ inject.known_build_ids =
+ perf_inject__parse_known_build_ids(known_build_ids);
- if (inject.sched_stat) {
- inject.tool.ordered_events = true;
+ if (inject.known_build_ids == NULL) {
+ pr_err("Couldn't parse known build ids.\n");
+ goto out_delete;
+ }
}
#ifdef HAVE_JITDUMP
if (inject.jit_mode) {
- inject.tool.mmap2 = perf_event__jit_repipe_mmap2;
- inject.tool.mmap = perf_event__jit_repipe_mmap;
- inject.tool.ordered_events = true;
+ inject.tool.mmap2 = perf_event__repipe_mmap2;
+ inject.tool.mmap = perf_event__repipe_mmap;
inject.tool.ordering_requires_timestamps = true;
/*
* JIT MMAP injection injects all MMAP events in one go, so it
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 6fd95be5032b..a756147e2eec 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -955,7 +955,7 @@ static bool perf_kmem__skip_sample(struct perf_sample *sample)
typedef int (*tracepoint_handler)(struct evsel *evsel,
struct perf_sample *sample);
-static int process_sample_event(struct perf_tool *tool __maybe_unused,
+static int process_sample_event(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct evsel *evsel,
@@ -986,15 +986,6 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
return err;
}
-static struct perf_tool perf_kmem = {
- .sample = process_sample_event,
- .comm = perf_event__process_comm,
- .mmap = perf_event__process_mmap,
- .mmap2 = perf_event__process_mmap2,
- .namespaces = perf_event__process_namespaces,
- .ordered_events = true,
-};
-
static double fragmentation(unsigned long n_req, unsigned long n_alloc)
{
if (n_alloc == 0)
@@ -1971,6 +1962,7 @@ int cmd_kmem(int argc, const char **argv)
NULL
};
struct perf_session *session;
+ struct perf_tool perf_kmem;
static const char errmsg[] = "No %s allocation events found. Have you run 'perf kmem record --%s'?\n";
int ret = perf_config(kmem_config, NULL);
@@ -1998,6 +1990,13 @@ int cmd_kmem(int argc, const char **argv)
data.path = input_name;
+ perf_tool__init(&perf_kmem, /*ordered_events=*/true);
+ perf_kmem.sample = process_sample_event;
+ perf_kmem.comm = perf_event__process_comm;
+ perf_kmem.mmap = perf_event__process_mmap;
+ perf_kmem.mmap2 = perf_event__process_mmap2;
+ perf_kmem.namespaces = perf_event__process_namespaces;
+
kmem_session = session = perf_session__new(&data, &perf_kmem);
if (IS_ERR(session))
return PTR_ERR(session);
@@ -2058,7 +2057,8 @@ int cmd_kmem(int argc, const char **argv)
out_delete:
perf_session__delete(session);
+ /* free usage string allocated by parse_options_subcommand */
+ free((void *)kmem_usage[0]);
return ret;
}
-
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 71165036e4ca..55ea17c5ff02 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -1166,7 +1166,7 @@ static void print_result(struct perf_kvm_stat *kvm)
}
#if defined(HAVE_TIMERFD_SUPPORT) && defined(HAVE_LIBTRACEEVENT)
-static int process_lost_event(struct perf_tool *tool,
+static int process_lost_event(const struct perf_tool *tool,
union perf_event *event __maybe_unused,
struct perf_sample *sample __maybe_unused,
struct machine *machine __maybe_unused)
@@ -1187,7 +1187,7 @@ static bool skip_sample(struct perf_kvm_stat *kvm,
return false;
}
-static int process_sample_event(struct perf_tool *tool,
+static int process_sample_event(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct evsel *evsel,
@@ -1603,19 +1603,17 @@ static int read_events(struct perf_kvm_stat *kvm)
{
int ret;
- struct perf_tool eops = {
- .sample = process_sample_event,
- .comm = perf_event__process_comm,
- .namespaces = perf_event__process_namespaces,
- .ordered_events = true,
- };
struct perf_data file = {
.path = kvm->file_name,
.mode = PERF_DATA_MODE_READ,
.force = kvm->force,
};
- kvm->tool = eops;
+ perf_tool__init(&kvm->tool, /*ordered_events=*/true);
+ kvm->tool.sample = process_sample_event;
+ kvm->tool.comm = perf_event__process_comm;
+ kvm->tool.namespaces = perf_event__process_namespaces;
+
kvm->session = perf_session__new(&file, &kvm->tool);
if (IS_ERR(kvm->session)) {
pr_err("Initializing perf session failed\n");
@@ -1919,14 +1917,13 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
/* event handling */
+ perf_tool__init(&kvm->tool, /*ordered_events=*/true);
kvm->tool.sample = process_sample_event;
kvm->tool.comm = perf_event__process_comm;
kvm->tool.exit = perf_event__process_exit;
kvm->tool.fork = perf_event__process_fork;
kvm->tool.lost = process_lost_event;
kvm->tool.namespaces = perf_event__process_namespaces;
- kvm->tool.ordered_events = true;
- perf_tool__fill_defaults(&kvm->tool);
/* set defaults */
kvm->display_time = 1;
@@ -2187,5 +2184,8 @@ int cmd_kvm(int argc, const char **argv)
else
usage_with_options(kvm_usage, kvm_options);
+ /* free usage string allocated by parse_options_subcommand */
+ free((void *)kvm_usage[0]);
+
return 0;
}
diff --git a/tools/perf/builtin-kwork.c b/tools/perf/builtin-kwork.c
index 56e3f3a5e03a..c1daf82c9b92 100644
--- a/tools/perf/builtin-kwork.c
+++ b/tools/perf/builtin-kwork.c
@@ -958,7 +958,7 @@ static int top_sched_switch_event(struct perf_kwork *kwork,
}
static struct kwork_class kwork_irq;
-static int process_irq_handler_entry_event(struct perf_tool *tool,
+static int process_irq_handler_entry_event(const struct perf_tool *tool,
struct evsel *evsel,
struct perf_sample *sample,
struct machine *machine)
@@ -971,7 +971,7 @@ static int process_irq_handler_entry_event(struct perf_tool *tool,
return 0;
}
-static int process_irq_handler_exit_event(struct perf_tool *tool,
+static int process_irq_handler_exit_event(const struct perf_tool *tool,
struct evsel *evsel,
struct perf_sample *sample,
struct machine *machine)
@@ -1037,7 +1037,7 @@ static struct kwork_class kwork_irq = {
};
static struct kwork_class kwork_softirq;
-static int process_softirq_raise_event(struct perf_tool *tool,
+static int process_softirq_raise_event(const struct perf_tool *tool,
struct evsel *evsel,
struct perf_sample *sample,
struct machine *machine)
@@ -1051,7 +1051,7 @@ static int process_softirq_raise_event(struct perf_tool *tool,
return 0;
}
-static int process_softirq_entry_event(struct perf_tool *tool,
+static int process_softirq_entry_event(const struct perf_tool *tool,
struct evsel *evsel,
struct perf_sample *sample,
struct machine *machine)
@@ -1065,7 +1065,7 @@ static int process_softirq_entry_event(struct perf_tool *tool,
return 0;
}
-static int process_softirq_exit_event(struct perf_tool *tool,
+static int process_softirq_exit_event(const struct perf_tool *tool,
struct evsel *evsel,
struct perf_sample *sample,
struct machine *machine)
@@ -1167,7 +1167,7 @@ static struct kwork_class kwork_softirq = {
};
static struct kwork_class kwork_workqueue;
-static int process_workqueue_activate_work_event(struct perf_tool *tool,
+static int process_workqueue_activate_work_event(const struct perf_tool *tool,
struct evsel *evsel,
struct perf_sample *sample,
struct machine *machine)
@@ -1181,7 +1181,7 @@ static int process_workqueue_activate_work_event(struct perf_tool *tool,
return 0;
}
-static int process_workqueue_execute_start_event(struct perf_tool *tool,
+static int process_workqueue_execute_start_event(const struct perf_tool *tool,
struct evsel *evsel,
struct perf_sample *sample,
struct machine *machine)
@@ -1195,7 +1195,7 @@ static int process_workqueue_execute_start_event(struct perf_tool *tool,
return 0;
}
-static int process_workqueue_execute_end_event(struct perf_tool *tool,
+static int process_workqueue_execute_end_event(const struct perf_tool *tool,
struct evsel *evsel,
struct perf_sample *sample,
struct machine *machine)
@@ -1266,7 +1266,7 @@ static struct kwork_class kwork_workqueue = {
};
static struct kwork_class kwork_sched;
-static int process_sched_switch_event(struct perf_tool *tool,
+static int process_sched_switch_event(const struct perf_tool *tool,
struct evsel *evsel,
struct perf_sample *sample,
struct machine *machine)
@@ -1945,12 +1945,12 @@ static int perf_kwork__report(struct perf_kwork *kwork)
return 0;
}
-typedef int (*tracepoint_handler)(struct perf_tool *tool,
+typedef int (*tracepoint_handler)(const struct perf_tool *tool,
struct evsel *evsel,
struct perf_sample *sample,
struct machine *machine);
-static int perf_kwork__process_tracepoint_sample(struct perf_tool *tool,
+static int perf_kwork__process_tracepoint_sample(const struct perf_tool *tool,
union perf_event *event __maybe_unused,
struct perf_sample *sample,
struct evsel *evsel,
@@ -2322,12 +2322,6 @@ int cmd_kwork(int argc, const char **argv)
{
static struct perf_kwork kwork = {
.class_list = LIST_HEAD_INIT(kwork.class_list),
- .tool = {
- .mmap = perf_event__process_mmap,
- .mmap2 = perf_event__process_mmap2,
- .sample = perf_kwork__process_tracepoint_sample,
- .ordered_events = true,
- },
.atom_page_list = LIST_HEAD_INIT(kwork.atom_page_list),
.sort_list = LIST_HEAD_INIT(kwork.sort_list),
.cmp_id = LIST_HEAD_INIT(kwork.cmp_id),
@@ -2462,6 +2456,11 @@ int cmd_kwork(int argc, const char **argv)
"record", "report", "latency", "timehist", "top", NULL
};
+ perf_tool__init(&kwork.tool, /*ordered_events=*/true);
+ kwork.tool.mmap = perf_event__process_mmap;
+ kwork.tool.mmap2 = perf_event__process_mmap2;
+ kwork.tool.sample = perf_kwork__process_tracepoint_sample;
+
argc = parse_options_subcommand(argc, argv, kwork_options,
kwork_subcommands, kwork_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
@@ -2520,5 +2519,8 @@ int cmd_kwork(int argc, const char **argv)
} else
usage_with_options(kwork_usage, kwork_options);
+ /* free usage string allocated by parse_options_subcommand */
+ free((void *)kwork_usage[0]);
+
return 0;
}
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index 82cb4b1010aa..65b8cba324be 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -173,7 +173,7 @@ static void default_print_event(void *ps, const char *pmu_name, const char *topi
if (pmu_name && strcmp(pmu_name, "default_core")) {
desc_len = strlen(desc);
desc_len = asprintf(&desc_with_unit,
- desc[desc_len - 1] != '.'
+ desc_len > 0 && desc[desc_len - 1] != '.'
? "%s. Unit: %s" : "%s Unit: %s",
desc, pmu_name);
}
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 0253184b3b58..062e2b56a2ab 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -1501,7 +1501,7 @@ static const struct evsel_str_handler contention_tracepoints[] = {
{ "lock:contention_end", evsel__process_contention_end, },
};
-static int process_event_update(struct perf_tool *tool,
+static int process_event_update(const struct perf_tool *tool,
union perf_event *event,
struct evlist **pevlist)
{
@@ -1520,7 +1520,7 @@ static int process_event_update(struct perf_tool *tool,
typedef int (*tracepoint_handler)(struct evsel *evsel,
struct perf_sample *sample);
-static int process_sample_event(struct perf_tool *tool __maybe_unused,
+static int process_sample_event(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct evsel *evsel,
@@ -1933,22 +1933,21 @@ static bool force;
static int __cmd_report(bool display_info)
{
int err = -EINVAL;
- struct perf_tool eops = {
- .attr = perf_event__process_attr,
- .event_update = process_event_update,
- .sample = process_sample_event,
- .comm = perf_event__process_comm,
- .mmap = perf_event__process_mmap,
- .namespaces = perf_event__process_namespaces,
- .tracing_data = perf_event__process_tracing_data,
- .ordered_events = true,
- };
+ struct perf_tool eops;
struct perf_data data = {
.path = input_name,
.mode = PERF_DATA_MODE_READ,
.force = force,
};
+ perf_tool__init(&eops, /*ordered_events=*/true);
+ eops.attr = perf_event__process_attr;
+ eops.event_update = process_event_update;
+ eops.sample = process_sample_event;
+ eops.comm = perf_event__process_comm;
+ eops.mmap = perf_event__process_mmap;
+ eops.namespaces = perf_event__process_namespaces;
+ eops.tracing_data = perf_event__process_tracing_data;
session = perf_session__new(&data, &eops);
if (IS_ERR(session)) {
pr_err("Initializing perf session failed\n");
@@ -2069,15 +2068,7 @@ static int check_lock_contention_options(const struct option *options,
static int __cmd_contention(int argc, const char **argv)
{
int err = -EINVAL;
- struct perf_tool eops = {
- .attr = perf_event__process_attr,
- .event_update = process_event_update,
- .sample = process_sample_event,
- .comm = perf_event__process_comm,
- .mmap = perf_event__process_mmap,
- .tracing_data = perf_event__process_tracing_data,
- .ordered_events = true,
- };
+ struct perf_tool eops;
struct perf_data data = {
.path = input_name,
.mode = PERF_DATA_MODE_READ,
@@ -2100,6 +2091,14 @@ static int __cmd_contention(int argc, const char **argv)
con.result = &lockhash_table[0];
+ perf_tool__init(&eops, /*ordered_events=*/true);
+ eops.attr = perf_event__process_attr;
+ eops.event_update = process_event_update;
+ eops.sample = process_sample_event;
+ eops.comm = perf_event__process_comm;
+ eops.mmap = perf_event__process_mmap;
+ eops.tracing_data = perf_event__process_tracing_data;
+
session = perf_session__new(use_bpf ? NULL : &data, &eops);
if (IS_ERR(session)) {
pr_err("Initializing perf session failed\n");
@@ -2713,6 +2712,9 @@ int cmd_lock(int argc, const char **argv)
usage_with_options(lock_usage, lock_options);
}
+ /* free usage string allocated by parse_options_subcommand */
+ free((void *)lock_usage[0]);
+
zfree(&lockhash_table);
return rc;
}
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 863fcd735dae..651188c1d825 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -19,6 +19,7 @@
#include "util/symbol.h"
#include "util/pmus.h"
#include "util/sample.h"
+#include "util/sort.h"
#include "util/string2.h"
#include "util/util.h"
#include <linux/err.h>
@@ -28,12 +29,16 @@
struct perf_mem {
struct perf_tool tool;
- char const *input_name;
+ const char *input_name;
+ const char *sort_key;
bool hide_unresolved;
bool dump_raw;
bool force;
bool phys_addr;
bool data_page_size;
+ bool all_kernel;
+ bool all_user;
+ bool data_type;
int operation;
const char *cpu_list;
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
@@ -42,7 +47,7 @@ struct perf_mem {
static int parse_record_events(const struct option *opt,
const char *str, int unset __maybe_unused)
{
- struct perf_mem *mem = *(struct perf_mem **)opt->value;
+ struct perf_mem *mem = (struct perf_mem *)opt->value;
struct perf_pmu *pmu;
pmu = perf_mem_events_find_pmu();
@@ -62,33 +67,19 @@ static int parse_record_events(const struct option *opt,
return 0;
}
-static const char * const __usage[] = {
- "perf mem record [<options>] [<command>]",
- "perf mem record [<options>] -- <command> [<options>]",
- NULL
-};
-
-static const char * const *record_mem_usage = __usage;
-
-static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
+static int __cmd_record(int argc, const char **argv, struct perf_mem *mem,
+ const struct option *options)
{
int rec_argc, i = 0, j;
int start, end;
const char **rec_argv;
int ret;
- bool all_user = false, all_kernel = false;
struct perf_mem_event *e;
struct perf_pmu *pmu;
- struct option options[] = {
- OPT_CALLBACK('e', "event", &mem, "event",
- "event selector. use 'perf mem record -e list' to list available events",
- parse_record_events),
- OPT_UINTEGER(0, "ldlat", &perf_mem_events__loads_ldlat, "mem-loads latency"),
- OPT_INCR('v', "verbose", &verbose,
- "be more verbose (show counter open errors, etc)"),
- OPT_BOOLEAN('U', "all-user", &all_user, "collect only user level data"),
- OPT_BOOLEAN('K', "all-kernel", &all_kernel, "collect only kernel level data"),
- OPT_END()
+ const char * const record_usage[] = {
+ "perf mem record [<options>] [<command>]",
+ "perf mem record [<options>] -- <command> [<options>]",
+ NULL
};
pmu = perf_mem_events_find_pmu();
@@ -97,12 +88,12 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
return -1;
}
- if (perf_pmu__mem_events_init(pmu)) {
+ if (perf_pmu__mem_events_init()) {
pr_err("failed: memory events not supported\n");
return -1;
}
- argc = parse_options(argc, argv, options, record_mem_usage,
+ argc = parse_options(argc, argv, options, record_usage,
PARSE_OPT_KEEP_UNKNOWN);
/* Max number of arguments multiplied by number of PMUs that can support them. */
@@ -126,22 +117,17 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
if (e->tag &&
(mem->operation & MEM_OPERATION_LOAD) &&
(mem->operation & MEM_OPERATION_STORE)) {
- e->record = true;
+ perf_mem_record[PERF_MEM_EVENTS__LOAD_STORE] = true;
rec_argv[i++] = "-W";
} else {
- if (mem->operation & MEM_OPERATION_LOAD) {
- e = perf_pmu__mem_events_ptr(pmu, PERF_MEM_EVENTS__LOAD);
- e->record = true;
- }
+ if (mem->operation & MEM_OPERATION_LOAD)
+ perf_mem_record[PERF_MEM_EVENTS__LOAD] = true;
- if (mem->operation & MEM_OPERATION_STORE) {
- e = perf_pmu__mem_events_ptr(pmu, PERF_MEM_EVENTS__STORE);
- e->record = true;
- }
+ if (mem->operation & MEM_OPERATION_STORE)
+ perf_mem_record[PERF_MEM_EVENTS__STORE] = true;
}
- e = perf_pmu__mem_events_ptr(pmu, PERF_MEM_EVENTS__LOAD);
- if (e->record)
+ if (perf_mem_record[PERF_MEM_EVENTS__LOAD])
rec_argv[i++] = "-W";
rec_argv[i++] = "-d";
@@ -158,10 +144,10 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
goto out;
end = i;
- if (all_user)
+ if (mem->all_user)
rec_argv[i++] = "--all-user";
- if (all_kernel)
+ if (mem->all_kernel)
rec_argv[i++] = "--all-kernel";
if (mem->cpu_list) {
@@ -188,7 +174,7 @@ out:
}
static int
-dump_raw_samples(struct perf_tool *tool,
+dump_raw_samples(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -262,7 +248,7 @@ out_put:
return 0;
}
-static int process_sample_event(struct perf_tool *tool,
+static int process_sample_event(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct evsel *evsel __maybe_unused,
@@ -285,7 +271,23 @@ static int report_raw_events(struct perf_mem *mem)
.force = mem->force,
};
int ret;
- struct perf_session *session = perf_session__new(&data, &mem->tool);
+ struct perf_session *session;
+
+ perf_tool__init(&mem->tool, /*ordered_events=*/true);
+ mem->tool.sample = process_sample_event;
+ mem->tool.mmap = perf_event__process_mmap;
+ mem->tool.mmap2 = perf_event__process_mmap2;
+ mem->tool.comm = perf_event__process_comm;
+ mem->tool.lost = perf_event__process_lost;
+ mem->tool.fork = perf_event__process_fork;
+ mem->tool.attr = perf_event__process_attr;
+ mem->tool.build_id = perf_event__process_build_id;
+ mem->tool.namespaces = perf_event__process_namespaces;
+ mem->tool.auxtrace_info = perf_event__process_auxtrace_info;
+ mem->tool.auxtrace = perf_event__process_auxtrace;
+ mem->tool.auxtrace_error = perf_event__process_auxtrace_error;
+
+ session = perf_session__new(&data, &mem->tool);
if (IS_ERR(session))
return PTR_ERR(session);
@@ -319,16 +321,21 @@ out_delete:
perf_session__delete(session);
return ret;
}
+
static char *get_sort_order(struct perf_mem *mem)
{
bool has_extra_options = (mem->phys_addr | mem->data_page_size) ? true : false;
char sort[128];
+ if (mem->sort_key)
+ scnprintf(sort, sizeof(sort), "--sort=%s", mem->sort_key);
+ else if (mem->data_type)
+ strcpy(sort, "--sort=mem,snoop,tlb,type");
/*
* there is no weight (cost) associated with stores, so don't print
* the column
*/
- if (!(mem->operation & MEM_OPERATION_LOAD)) {
+ else if (!(mem->operation & MEM_OPERATION_LOAD)) {
strcpy(sort, "--sort=mem,sym,dso,symbol_daddr,"
"dso_daddr,tlb,locked");
} else if (has_extra_options) {
@@ -343,14 +350,26 @@ static char *get_sort_order(struct perf_mem *mem)
if (mem->data_page_size)
strcat(sort, ",data_page_size");
+ /* make sure it has 'type' sort key even -s option is used */
+ if (mem->data_type && !strstr(sort, "type"))
+ strcat(sort, ",type");
+
return strdup(sort);
}
-static int report_events(int argc, const char **argv, struct perf_mem *mem)
+static int __cmd_report(int argc, const char **argv, struct perf_mem *mem,
+ const struct option *options)
{
const char **rep_argv;
int ret, i = 0, j, rep_argc;
char *new_sort_order;
+ const char * const report_usage[] = {
+ "perf mem report [<options>]",
+ NULL
+ };
+
+ argc = parse_options(argc, argv, options, report_usage,
+ PARSE_OPT_KEEP_UNKNOWN);
if (mem->dump_raw)
return report_raw_events(mem);
@@ -368,10 +387,11 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem)
if (new_sort_order)
rep_argv[i++] = new_sort_order;
- for (j = 1; j < argc; j++, i++)
+ for (j = 0; j < argc; j++, i++)
rep_argv[i] = argv[j];
ret = cmd_report(i, rep_argv);
+ free(new_sort_order);
free(rep_argv);
return ret;
}
@@ -449,47 +469,51 @@ int cmd_mem(int argc, const char **argv)
{
struct stat st;
struct perf_mem mem = {
- .tool = {
- .sample = process_sample_event,
- .mmap = perf_event__process_mmap,
- .mmap2 = perf_event__process_mmap2,
- .comm = perf_event__process_comm,
- .lost = perf_event__process_lost,
- .fork = perf_event__process_fork,
- .attr = perf_event__process_attr,
- .build_id = perf_event__process_build_id,
- .namespaces = perf_event__process_namespaces,
- .auxtrace_info = perf_event__process_auxtrace_info,
- .auxtrace = perf_event__process_auxtrace,
- .auxtrace_error = perf_event__process_auxtrace_error,
- .ordered_events = true,
- },
.input_name = "perf.data",
/*
* default to both load an store sampling
*/
.operation = MEM_OPERATION_LOAD | MEM_OPERATION_STORE,
};
+ char *sort_order_help = sort_help("sort by key(s):", SORT_MODE__MEMORY);
const struct option mem_options[] = {
OPT_CALLBACK('t', "type", &mem.operation,
"type", "memory operations(load,store) Default load,store",
parse_mem_ops),
+ OPT_STRING('C', "cpu", &mem.cpu_list, "cpu",
+ "list of cpus to profile"),
+ OPT_BOOLEAN('f', "force", &mem.force, "don't complain, do it"),
+ OPT_INCR('v', "verbose", &verbose,
+ "be more verbose (show counter open errors, etc)"),
+ OPT_BOOLEAN('p', "phys-data", &mem.phys_addr, "Record/Report sample physical addresses"),
+ OPT_BOOLEAN(0, "data-page-size", &mem.data_page_size, "Record/Report sample data address page size"),
+ OPT_END()
+ };
+ const struct option record_options[] = {
+ OPT_CALLBACK('e', "event", &mem, "event",
+ "event selector. use 'perf mem record -e list' to list available events",
+ parse_record_events),
+ OPT_UINTEGER(0, "ldlat", &perf_mem_events__loads_ldlat, "mem-loads latency"),
+ OPT_BOOLEAN('U', "all-user", &mem.all_user, "collect only user level data"),
+ OPT_BOOLEAN('K', "all-kernel", &mem.all_kernel, "collect only kernel level data"),
+ OPT_PARENT(mem_options)
+ };
+ const struct option report_options[] = {
OPT_BOOLEAN('D', "dump-raw-samples", &mem.dump_raw,
"dump raw samples in ASCII"),
OPT_BOOLEAN('U', "hide-unresolved", &mem.hide_unresolved,
"Only display entries resolved to a symbol"),
OPT_STRING('i', "input", &input_name, "file",
"input file name"),
- OPT_STRING('C', "cpu", &mem.cpu_list, "cpu",
- "list of cpus to profile"),
OPT_STRING_NOEMPTY('x', "field-separator", &symbol_conf.field_sep,
"separator",
"separator for columns, no spaces will be added"
" between columns '.' is reserved."),
- OPT_BOOLEAN('f', "force", &mem.force, "don't complain, do it"),
- OPT_BOOLEAN('p', "phys-data", &mem.phys_addr, "Record/Report sample physical addresses"),
- OPT_BOOLEAN(0, "data-page-size", &mem.data_page_size, "Record/Report sample data address page size"),
- OPT_END()
+ OPT_STRING('s', "sort", &mem.sort_key, "key[,key2...]",
+ sort_order_help),
+ OPT_BOOLEAN('T', "type-profile", &mem.data_type,
+ "Show data-type profile result"),
+ OPT_PARENT(mem_options)
};
const char *const mem_subcommands[] = { "record", "report", NULL };
const char *mem_usage[] = {
@@ -498,7 +522,7 @@ int cmd_mem(int argc, const char **argv)
};
argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands,
- mem_usage, PARSE_OPT_KEEP_UNKNOWN);
+ mem_usage, PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc || !(strncmp(argv[0], "rec", 3) || mem.operation))
usage_with_options(mem_usage, mem_options);
@@ -511,11 +535,14 @@ int cmd_mem(int argc, const char **argv)
}
if (strlen(argv[0]) > 2 && strstarts("record", argv[0]))
- return __cmd_record(argc, argv, &mem);
+ return __cmd_record(argc, argv, &mem, record_options);
else if (strlen(argv[0]) > 2 && strstarts("report", argv[0]))
- return report_events(argc, argv, &mem);
+ return __cmd_report(argc, argv, &mem, report_options);
else
usage_with_options(mem_usage, mem_options);
+ /* free usage string allocated by parse_options_subcommand */
+ free((void *)mem_usage[0]);
+
return 0;
}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index a94516e8c522..adbaf80b398c 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -171,6 +171,7 @@ struct record {
bool timestamp_filename;
bool timestamp_boundary;
bool off_cpu;
+ const char *filter_action;
struct switch_output switch_output;
unsigned long long samples;
unsigned long output_max_size; /* = 0: unlimited */
@@ -193,6 +194,15 @@ static const char *affinity_tags[PERF_AFFINITY_MAX] = {
"SYS", "NODE", "CPU"
};
+static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event,
+ struct perf_sample *sample, struct machine *machine);
+static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event,
+ struct perf_sample *sample, struct machine *machine);
+static int process_timestamp_boundary(const struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+
#ifndef HAVE_GETTID
static inline pid_t gettid(void)
{
@@ -608,7 +618,7 @@ static int record__comp_enabled(struct record *rec)
return rec->opts.comp_level > 0;
}
-static int process_synthesized_event(struct perf_tool *tool,
+static int process_synthesized_event(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine __maybe_unused)
@@ -619,7 +629,7 @@ static int process_synthesized_event(struct perf_tool *tool,
static struct mutex synth_lock;
-static int process_locked_synthesized_event(struct perf_tool *tool,
+static int process_locked_synthesized_event(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine __maybe_unused)
@@ -704,7 +714,7 @@ static void record__sig_exit(void)
#ifdef HAVE_AUXTRACE_SUPPORT
-static int record__process_auxtrace(struct perf_tool *tool,
+static int record__process_auxtrace(const struct perf_tool *tool,
struct mmap *map,
union perf_event *event, void *data1,
size_t len1, void *data2, size_t len2)
@@ -1389,7 +1399,7 @@ try_again:
"even with a suitable vmlinux or kallsyms file.\n\n");
}
- if (evlist__apply_filters(evlist, &pos)) {
+ if (evlist__apply_filters(evlist, &pos, &opts->target)) {
pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
pos->filter ?: "BPF", evsel__name(pos), errno,
str_error_r(errno, msg, sizeof(msg)));
@@ -1416,7 +1426,7 @@ static void set_timestamp_boundary(struct record *rec, u64 sample_time)
rec->evlist->last_sample_time = sample_time;
}
-static int process_sample_event(struct perf_tool *tool,
+static int process_sample_event(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct evsel *evsel,
@@ -1458,7 +1468,7 @@ static int process_buildids(struct record *rec)
* first/last samples.
*/
if (rec->buildid_all && !rec->timestamp_boundary)
- rec->tool.sample = NULL;
+ rec->tool.sample = process_event_sample_stub;
return perf_session__process_events(session);
}
@@ -2364,13 +2374,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
signal(SIGTERM, sig_handler);
signal(SIGSEGV, sigsegv_handler);
- if (rec->opts.record_namespaces)
- tool->namespace_events = true;
-
if (rec->opts.record_cgroup) {
-#ifdef HAVE_FILE_HANDLE
- tool->cgroup_events = true;
-#else
+#ifndef HAVE_FILE_HANDLE
pr_err("cgroup tracking is not supported\n");
return -1;
#endif
@@ -2386,6 +2391,18 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
signal(SIGUSR2, SIG_IGN);
}
+ perf_tool__init(tool, /*ordered_events=*/true);
+ tool->sample = process_sample_event;
+ tool->fork = perf_event__process_fork;
+ tool->exit = perf_event__process_exit;
+ tool->comm = perf_event__process_comm;
+ tool->namespaces = perf_event__process_namespaces;
+ tool->mmap = build_id__process_mmap;
+ tool->mmap2 = build_id__process_mmap2;
+ tool->itrace_start = process_timestamp_boundary;
+ tool->aux = process_timestamp_boundary;
+ tool->namespace_events = rec->opts.record_namespaces;
+ tool->cgroup_events = rec->opts.record_cgroup;
session = perf_session__new(data, tool);
if (IS_ERR(session)) {
pr_err("Perf session creation failed.\n");
@@ -3243,7 +3260,7 @@ static const char * const __record_usage[] = {
};
const char * const *record_usage = __record_usage;
-static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event,
+static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event,
struct perf_sample *sample, struct machine *machine)
{
/*
@@ -3255,7 +3272,7 @@ static int build_id__process_mmap(struct perf_tool *tool, union perf_event *even
return perf_event__process_mmap(tool, event, sample, machine);
}
-static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event,
+static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event,
struct perf_sample *sample, struct machine *machine)
{
/*
@@ -3268,7 +3285,7 @@ static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *eve
return perf_event__process_mmap2(tool, event, sample, machine);
}
-static int process_timestamp_boundary(struct perf_tool *tool,
+static int process_timestamp_boundary(const struct perf_tool *tool,
union perf_event *event __maybe_unused,
struct perf_sample *sample,
struct machine *machine __maybe_unused)
@@ -3326,18 +3343,6 @@ static struct record record = {
.ctl_fd_ack = -1,
.synth = PERF_SYNTH_ALL,
},
- .tool = {
- .sample = process_sample_event,
- .fork = perf_event__process_fork,
- .exit = perf_event__process_exit,
- .comm = perf_event__process_comm,
- .namespaces = perf_event__process_namespaces,
- .mmap = build_id__process_mmap,
- .mmap2 = build_id__process_mmap2,
- .itrace_start = process_timestamp_boundary,
- .aux = process_timestamp_boundary,
- .ordered_events = true,
- },
};
const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
@@ -3557,6 +3562,8 @@ static struct option __record_options[] = {
"write collected trace data into several data files using parallel threads",
record__parse_threads),
OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"),
+ OPT_STRING(0, "setup-filter", &record.filter_action, "pin|unpin",
+ "BPF filter action"),
OPT_END()
};
@@ -4086,6 +4093,18 @@ int cmd_record(int argc, const char **argv)
pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n");
}
+ if (rec->filter_action) {
+ if (!strcmp(rec->filter_action, "pin"))
+ err = perf_bpf_filter__pin();
+ else if (!strcmp(rec->filter_action, "unpin"))
+ err = perf_bpf_filter__unpin();
+ else {
+ pr_warning("Unknown BPF filter action: %s\n", rec->filter_action);
+ err = -EINVAL;
+ }
+ goto out_opts;
+ }
+
/*
* Allow aliases to facilitate the lookup of symbols for address
* filters. Refer to auxtrace_parse_filters().
@@ -4242,13 +4261,13 @@ int cmd_record(int argc, const char **argv)
err = __cmd_record(&record, argc, argv);
out:
- evlist__delete(rec->evlist);
+ record__free_thread_masks(rec, rec->nr_threads);
+ rec->nr_threads = 0;
symbol__exit();
auxtrace_record__free(rec->itr);
out_opts:
- record__free_thread_masks(rec, rec->nr_threads);
- rec->nr_threads = 0;
evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close);
+ evlist__delete(rec->evlist);
return err;
}
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 6edc0d4ce6fb..5dc17ffee27a 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -263,7 +263,7 @@ static int process_feature_event(struct perf_session *session,
return 0;
}
-static int process_sample_event(struct perf_tool *tool,
+static int process_sample_event(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct evsel *evsel,
@@ -328,7 +328,7 @@ static int process_sample_event(struct perf_tool *tool,
if (ui__has_annotation() || rep->symbol_ipc || rep->total_cycles_mode) {
hist__account_cycles(sample->branch_stack, &al, sample,
rep->nonany_branch_mode,
- &rep->total_cycles);
+ &rep->total_cycles, evsel);
}
ret = hist_entry_iter__add(&iter, &al, rep->max_stack, rep);
@@ -339,7 +339,7 @@ out_put:
return ret;
}
-static int process_read_event(struct perf_tool *tool,
+static int process_read_event(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct evsel *evsel,
@@ -565,6 +565,7 @@ static int evlist__tty_browse_hists(struct evlist *evlist, struct report *rep, c
struct hists *hists = evsel__hists(pos);
const char *evname = evsel__name(pos);
+ i++;
if (symbol_conf.event_group && !evsel__is_group_leader(pos))
continue;
@@ -574,7 +575,14 @@ static int evlist__tty_browse_hists(struct evlist *evlist, struct report *rep, c
hists__fprintf_nr_sample_events(hists, rep, evname, stdout);
if (rep->total_cycles_mode) {
- report__browse_block_hists(&rep->block_reports[i++].hist,
+ char *buf;
+
+ if (!annotation_br_cntr_abbr_list(&buf, pos, true)) {
+ fprintf(stdout, "%s", buf);
+ fprintf(stdout, "#\n");
+ free(buf);
+ }
+ report__browse_block_hists(&rep->block_reports[i - 1].hist,
rep->min_percent, pos, NULL);
continue;
}
@@ -765,7 +773,7 @@ static void report__output_resort(struct report *rep)
ui_progress__finish();
}
-static int count_sample_event(struct perf_tool *tool __maybe_unused,
+static int count_sample_event(const struct perf_tool *tool __maybe_unused,
union perf_event *event __maybe_unused,
struct perf_sample *sample __maybe_unused,
struct evsel *evsel,
@@ -777,7 +785,7 @@ static int count_sample_event(struct perf_tool *tool __maybe_unused,
return 0;
}
-static int count_lost_samples_event(struct perf_tool *tool,
+static int count_lost_samples_event(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine __maybe_unused)
@@ -787,22 +795,28 @@ static int count_lost_samples_event(struct perf_tool *tool,
evsel = evlist__id2evsel(rep->session->evlist, sample->id);
if (evsel) {
- hists__inc_nr_lost_samples(evsel__hists(evsel),
- event->lost_samples.lost);
+ struct hists *hists = evsel__hists(evsel);
+ u32 count = event->lost_samples.lost;
+
+ if (event->header.misc & PERF_RECORD_MISC_LOST_SAMPLES_BPF)
+ hists__inc_nr_dropped_samples(hists, count);
+ else
+ hists__inc_nr_lost_samples(hists, count);
}
return 0;
}
-static int process_attr(struct perf_tool *tool __maybe_unused,
+static int process_attr(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct evlist **pevlist);
static void stats_setup(struct report *rep)
{
- memset(&rep->tool, 0, sizeof(rep->tool));
+ perf_tool__init(&rep->tool, /*ordered_events=*/false);
rep->tool.attr = process_attr;
rep->tool.sample = count_sample_event;
rep->tool.lost_samples = count_lost_samples_event;
+ rep->tool.event_update = perf_event__process_event_update;
rep->tool.no_warn = true;
}
@@ -817,8 +831,7 @@ static int stats_print(struct report *rep)
static void tasks_setup(struct report *rep)
{
- memset(&rep->tool, 0, sizeof(rep->tool));
- rep->tool.ordered_events = true;
+ perf_tool__init(&rep->tool, /*ordered_events=*/true);
if (rep->mmaps_mode) {
rep->tool.mmap = perf_event__process_mmap;
rep->tool.mmap2 = perf_event__process_mmap2;
@@ -1119,18 +1132,23 @@ static int __cmd_report(struct report *rep)
report__output_resort(rep);
if (rep->total_cycles_mode) {
- int block_hpps[6] = {
+ int nr_hpps = 4;
+ int block_hpps[PERF_HPP_REPORT__BLOCK_MAX_INDEX] = {
PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT,
PERF_HPP_REPORT__BLOCK_LBR_CYCLES,
PERF_HPP_REPORT__BLOCK_CYCLES_PCT,
PERF_HPP_REPORT__BLOCK_AVG_CYCLES,
- PERF_HPP_REPORT__BLOCK_RANGE,
- PERF_HPP_REPORT__BLOCK_DSO,
};
+ if (session->evlist->nr_br_cntr > 0)
+ block_hpps[nr_hpps++] = PERF_HPP_REPORT__BLOCK_BRANCH_COUNTER;
+
+ block_hpps[nr_hpps++] = PERF_HPP_REPORT__BLOCK_RANGE;
+ block_hpps[nr_hpps++] = PERF_HPP_REPORT__BLOCK_DSO;
+
rep->block_reports = block_info__create_report(session->evlist,
rep->total_cycles,
- block_hpps, 6,
+ block_hpps, nr_hpps,
&rep->nr_block_reports);
if (!rep->block_reports)
return -1;
@@ -1233,7 +1251,7 @@ parse_percent_limit(const struct option *opt, const char *str,
return 0;
}
-static int process_attr(struct perf_tool *tool __maybe_unused,
+static int process_attr(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct evlist **pevlist)
{
@@ -1272,37 +1290,13 @@ int cmd_report(int argc, const char **argv)
NULL
};
struct report report = {
- .tool = {
- .sample = process_sample_event,
- .mmap = perf_event__process_mmap,
- .mmap2 = perf_event__process_mmap2,
- .comm = perf_event__process_comm,
- .namespaces = perf_event__process_namespaces,
- .cgroup = perf_event__process_cgroup,
- .exit = perf_event__process_exit,
- .fork = perf_event__process_fork,
- .lost = perf_event__process_lost,
- .read = process_read_event,
- .attr = process_attr,
-#ifdef HAVE_LIBTRACEEVENT
- .tracing_data = perf_event__process_tracing_data,
-#endif
- .build_id = perf_event__process_build_id,
- .id_index = perf_event__process_id_index,
- .auxtrace_info = perf_event__process_auxtrace_info,
- .auxtrace = perf_event__process_auxtrace,
- .event_update = perf_event__process_event_update,
- .feature = process_feature_event,
- .ordered_events = true,
- .ordering_requires_timestamps = true,
- },
.max_stack = PERF_MAX_STACK_DEPTH,
.pretty_printing_style = "normal",
.socket_filter = -1,
.skip_empty = true,
};
- char *sort_order_help = sort_help("sort by key(s):");
- char *field_order_help = sort_help("output field(s): overhead period sample ");
+ char *sort_order_help = sort_help("sort by key(s):", SORT_MODE__NORMAL);
+ char *field_order_help = sort_help("output field(s):", SORT_MODE__NORMAL);
const char *disassembler_style = NULL, *objdump_path = NULL, *addr2line_path = NULL;
const struct option options[] = {
OPT_STRING('i', "input", &input_name, "file",
@@ -1477,6 +1471,7 @@ int cmd_report(int argc, const char **argv)
};
int ret = hists__init();
char sort_tmp[128];
+ bool ordered_events = true;
if (ret < 0)
goto exit;
@@ -1531,7 +1526,7 @@ int cmd_report(int argc, const char **argv)
report.tasks_mode = true;
if (dump_trace && report.disable_order)
- report.tool.ordered_events = false;
+ ordered_events = false;
if (quiet)
perf_quiet_option();
@@ -1562,6 +1557,29 @@ int cmd_report(int argc, const char **argv)
symbol_conf.skip_empty = report.skip_empty;
repeat:
+ perf_tool__init(&report.tool, ordered_events);
+ report.tool.sample = process_sample_event;
+ report.tool.mmap = perf_event__process_mmap;
+ report.tool.mmap2 = perf_event__process_mmap2;
+ report.tool.comm = perf_event__process_comm;
+ report.tool.namespaces = perf_event__process_namespaces;
+ report.tool.cgroup = perf_event__process_cgroup;
+ report.tool.exit = perf_event__process_exit;
+ report.tool.fork = perf_event__process_fork;
+ report.tool.lost = perf_event__process_lost;
+ report.tool.read = process_read_event;
+ report.tool.attr = process_attr;
+#ifdef HAVE_LIBTRACEEVENT
+ report.tool.tracing_data = perf_event__process_tracing_data;
+#endif
+ report.tool.build_id = perf_event__process_build_id;
+ report.tool.id_index = perf_event__process_id_index;
+ report.tool.auxtrace_info = perf_event__process_auxtrace_info;
+ report.tool.auxtrace = perf_event__process_auxtrace;
+ report.tool.event_update = perf_event__process_event_update;
+ report.tool.feature = process_feature_event;
+ report.tool.ordering_requires_timestamps = true;
+
session = perf_session__new(&data, &report.tool);
if (IS_ERR(session)) {
ret = PTR_ERR(session);
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 8750b5f2d49b..5981cc51abc8 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -51,6 +51,7 @@
#define COMM_LEN 20
#define SYM_LEN 129
#define MAX_PID 1024000
+#define MAX_PRIO 140
static const char *cpu_list;
static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
@@ -228,11 +229,14 @@ struct perf_sched {
bool show_next;
bool show_migrations;
bool show_state;
+ bool show_prio;
u64 skipped_samples;
const char *time_str;
struct perf_time_interval ptime;
struct perf_time_interval hist_time;
volatile bool thread_funcs_exit;
+ const char *prio_str;
+ DECLARE_BITMAP(prio_bitmap, MAX_PRIO);
};
/* per thread run time data */
@@ -258,6 +262,8 @@ struct thread_runtime {
bool comm_changed;
u64 migrations;
+
+ int prio;
};
/* per event run time data */
@@ -920,6 +926,11 @@ struct sort_dimension {
struct list_head list;
};
+static inline void init_prio(struct thread_runtime *r)
+{
+ r->prio = -1;
+}
+
/*
* handle runtime stats saved per thread
*/
@@ -932,6 +943,7 @@ static struct thread_runtime *thread__init_runtime(struct thread *thread)
return NULL;
init_stats(&r->run_stats);
+ init_prio(r);
thread__set_priv(thread, r);
return r;
@@ -1489,7 +1501,7 @@ again:
}
}
-static int process_sched_wakeup_event(struct perf_tool *tool,
+static int process_sched_wakeup_event(const struct perf_tool *tool,
struct evsel *evsel,
struct perf_sample *sample,
struct machine *machine)
@@ -1502,7 +1514,7 @@ static int process_sched_wakeup_event(struct perf_tool *tool,
return 0;
}
-static int process_sched_wakeup_ignore(struct perf_tool *tool __maybe_unused,
+static int process_sched_wakeup_ignore(const struct perf_tool *tool __maybe_unused,
struct evsel *evsel __maybe_unused,
struct perf_sample *sample __maybe_unused,
struct machine *machine __maybe_unused)
@@ -1770,7 +1782,7 @@ out:
return 0;
}
-static int process_sched_switch_event(struct perf_tool *tool,
+static int process_sched_switch_event(const struct perf_tool *tool,
struct evsel *evsel,
struct perf_sample *sample,
struct machine *machine)
@@ -1796,7 +1808,7 @@ static int process_sched_switch_event(struct perf_tool *tool,
return err;
}
-static int process_sched_runtime_event(struct perf_tool *tool,
+static int process_sched_runtime_event(const struct perf_tool *tool,
struct evsel *evsel,
struct perf_sample *sample,
struct machine *machine)
@@ -1809,7 +1821,7 @@ static int process_sched_runtime_event(struct perf_tool *tool,
return 0;
}
-static int perf_sched__process_fork_event(struct perf_tool *tool,
+static int perf_sched__process_fork_event(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -1826,7 +1838,7 @@ static int perf_sched__process_fork_event(struct perf_tool *tool,
return 0;
}
-static int process_sched_migrate_task_event(struct perf_tool *tool,
+static int process_sched_migrate_task_event(const struct perf_tool *tool,
struct evsel *evsel,
struct perf_sample *sample,
struct machine *machine)
@@ -1839,12 +1851,12 @@ static int process_sched_migrate_task_event(struct perf_tool *tool,
return 0;
}
-typedef int (*tracepoint_handler)(struct perf_tool *tool,
+typedef int (*tracepoint_handler)(const struct perf_tool *tool,
struct evsel *evsel,
struct perf_sample *sample,
struct machine *machine);
-static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_unused,
+static int perf_sched__process_tracepoint_sample(const struct perf_tool *tool __maybe_unused,
union perf_event *event __maybe_unused,
struct perf_sample *sample,
struct evsel *evsel,
@@ -1860,7 +1872,7 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_
return err;
}
-static int perf_sched__process_comm(struct perf_tool *tool __maybe_unused,
+static int perf_sched__process_comm(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -2036,6 +2048,24 @@ static char *timehist_get_commstr(struct thread *thread)
return str;
}
+/* prio field format: xxx or xxx->yyy */
+#define MAX_PRIO_STR_LEN 8
+static char *timehist_get_priostr(struct evsel *evsel,
+ struct thread *thread,
+ struct perf_sample *sample)
+{
+ static char prio_str[16];
+ int prev_prio = (int)evsel__intval(evsel, sample, "prev_prio");
+ struct thread_runtime *tr = thread__priv(thread);
+
+ if (tr->prio != prev_prio && tr->prio != -1)
+ scnprintf(prio_str, sizeof(prio_str), "%d->%d", tr->prio, prev_prio);
+ else
+ scnprintf(prio_str, sizeof(prio_str), "%d", prev_prio);
+
+ return prio_str;
+}
+
static void timehist_header(struct perf_sched *sched)
{
u32 ncpus = sched->max_cpu.cpu + 1;
@@ -2053,8 +2083,14 @@ static void timehist_header(struct perf_sched *sched)
printf(" ");
}
- printf(" %-*s %9s %9s %9s", comm_width,
- "task name", "wait time", "sch delay", "run time");
+ if (sched->show_prio) {
+ printf(" %-*s %-*s %9s %9s %9s",
+ comm_width, "task name", MAX_PRIO_STR_LEN, "prio",
+ "wait time", "sch delay", "run time");
+ } else {
+ printf(" %-*s %9s %9s %9s", comm_width,
+ "task name", "wait time", "sch delay", "run time");
+ }
if (sched->show_state)
printf(" %s", "state");
@@ -2069,8 +2105,14 @@ static void timehist_header(struct perf_sched *sched)
if (sched->show_cpu_visual)
printf(" %*s ", ncpus, "");
- printf(" %-*s %9s %9s %9s", comm_width,
- "[tid/pid]", "(msec)", "(msec)", "(msec)");
+ if (sched->show_prio) {
+ printf(" %-*s %-*s %9s %9s %9s",
+ comm_width, "[tid/pid]", MAX_PRIO_STR_LEN, "",
+ "(msec)", "(msec)", "(msec)");
+ } else {
+ printf(" %-*s %9s %9s %9s", comm_width,
+ "[tid/pid]", "(msec)", "(msec)", "(msec)");
+ }
if (sched->show_state)
printf(" %5s", "");
@@ -2085,9 +2127,15 @@ static void timehist_header(struct perf_sched *sched)
if (sched->show_cpu_visual)
printf(" %.*s ", ncpus, graph_dotted_line);
- printf(" %.*s %.9s %.9s %.9s", comm_width,
- graph_dotted_line, graph_dotted_line, graph_dotted_line,
- graph_dotted_line);
+ if (sched->show_prio) {
+ printf(" %.*s %.*s %.9s %.9s %.9s",
+ comm_width, graph_dotted_line, MAX_PRIO_STR_LEN, graph_dotted_line,
+ graph_dotted_line, graph_dotted_line, graph_dotted_line);
+ } else {
+ printf(" %.*s %.9s %.9s %.9s", comm_width,
+ graph_dotted_line, graph_dotted_line, graph_dotted_line,
+ graph_dotted_line);
+ }
if (sched->show_state)
printf(" %.5s", graph_dotted_line);
@@ -2134,6 +2182,9 @@ static void timehist_print_sample(struct perf_sched *sched,
printf(" %-*s ", comm_width, timehist_get_commstr(thread));
+ if (sched->show_prio)
+ printf(" %-*s ", MAX_PRIO_STR_LEN, timehist_get_priostr(evsel, thread, sample));
+
wait_time = tr->dt_sleep + tr->dt_iowait + tr->dt_preempt;
print_sched_time(wait_time, 6);
@@ -2301,6 +2352,7 @@ static int init_idle_thread(struct thread *thread)
if (itr == NULL)
return -ENOMEM;
+ init_prio(&itr->tr);
init_stats(&itr->tr.run_stats);
callchain_init(&itr->callchain);
callchain_cursor_reset(&itr->cursor);
@@ -2455,12 +2507,33 @@ static bool timehist_skip_sample(struct perf_sched *sched,
struct perf_sample *sample)
{
bool rc = false;
+ int prio = -1;
+ struct thread_runtime *tr = NULL;
if (thread__is_filtered(thread)) {
rc = true;
sched->skipped_samples++;
}
+ if (sched->prio_str) {
+ /*
+ * Because priority may be changed during task execution,
+ * first read priority from prev sched_in event for current task.
+ * If prev sched_in event is not saved, then read priority from
+ * current task sched_out event.
+ */
+ tr = thread__get_runtime(thread);
+ if (tr && tr->prio != -1)
+ prio = tr->prio;
+ else if (evsel__name_is(evsel, "sched:sched_switch"))
+ prio = evsel__intval(evsel, sample, "prev_prio");
+
+ if (prio != -1 && !test_bit(prio, sched->prio_bitmap)) {
+ rc = true;
+ sched->skipped_samples++;
+ }
+ }
+
if (sched->idle_hist) {
if (!evsel__name_is(evsel, "sched:sched_switch"))
rc = true;
@@ -2506,7 +2579,7 @@ static void timehist_print_wakeup_event(struct perf_sched *sched,
printf("\n");
}
-static int timehist_sched_wakeup_ignore(struct perf_tool *tool __maybe_unused,
+static int timehist_sched_wakeup_ignore(const struct perf_tool *tool __maybe_unused,
union perf_event *event __maybe_unused,
struct evsel *evsel __maybe_unused,
struct perf_sample *sample __maybe_unused,
@@ -2515,7 +2588,7 @@ static int timehist_sched_wakeup_ignore(struct perf_tool *tool __maybe_unused,
return 0;
}
-static int timehist_sched_wakeup_event(struct perf_tool *tool,
+static int timehist_sched_wakeup_event(const struct perf_tool *tool,
union perf_event *event __maybe_unused,
struct evsel *evsel,
struct perf_sample *sample,
@@ -2599,7 +2672,7 @@ static void timehist_print_migration_event(struct perf_sched *sched,
printf("\n");
}
-static int timehist_migrate_task_event(struct perf_tool *tool,
+static int timehist_migrate_task_event(const struct perf_tool *tool,
union perf_event *event __maybe_unused,
struct evsel *evsel,
struct perf_sample *sample,
@@ -2627,7 +2700,31 @@ static int timehist_migrate_task_event(struct perf_tool *tool,
return 0;
}
-static int timehist_sched_change_event(struct perf_tool *tool,
+static void timehist_update_task_prio(struct evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct thread *thread;
+ struct thread_runtime *tr = NULL;
+ const u32 next_pid = evsel__intval(evsel, sample, "next_pid");
+ const u32 next_prio = evsel__intval(evsel, sample, "next_prio");
+
+ if (next_pid == 0)
+ thread = get_idle_thread(sample->cpu);
+ else
+ thread = machine__findnew_thread(machine, -1, next_pid);
+
+ if (thread == NULL)
+ return;
+
+ tr = thread__get_runtime(thread);
+ if (tr == NULL)
+ return;
+
+ tr->prio = next_prio;
+}
+
+static int timehist_sched_change_event(const struct perf_tool *tool,
union perf_event *event,
struct evsel *evsel,
struct perf_sample *sample,
@@ -2650,6 +2747,9 @@ static int timehist_sched_change_event(struct perf_tool *tool,
goto out;
}
+ if (sched->show_prio || sched->prio_str)
+ timehist_update_task_prio(evsel, sample, machine);
+
thread = timehist_get_thread(sched, sample, machine, evsel);
if (thread == NULL) {
rc = -1;
@@ -2683,9 +2783,12 @@ static int timehist_sched_change_event(struct perf_tool *tool,
* - previous sched event is out of window - we are done
* - sample time is beyond window user cares about - reset it
* to close out stats for time window interest
+ * - If tprev is 0, that is, sched_in event for current task is
+ * not recorded, cannot determine whether sched_in event is
+ * within time window interest - ignore it
*/
if (ptime->end) {
- if (tprev > ptime->end)
+ if (!tprev || tprev > ptime->end)
goto out;
if (t > ptime->end)
@@ -2700,8 +2803,6 @@ static int timehist_sched_change_event(struct perf_tool *tool,
struct idle_thread_runtime *itr = (void *)tr;
struct thread_runtime *last_tr;
- BUG_ON(thread__tid(thread) != 0);
-
if (itr->last_thread == NULL)
goto out;
@@ -2727,10 +2828,10 @@ static int timehist_sched_change_event(struct perf_tool *tool,
itr->last_thread = NULL;
}
- }
- if (!sched->summary_only)
- timehist_print_sample(sched, evsel, sample, &al, thread, t, state);
+ if (!sched->summary_only)
+ timehist_print_sample(sched, evsel, sample, &al, thread, t, state);
+ }
out:
if (sched->hist_time.start == 0 && t >= ptime->start)
@@ -2758,7 +2859,7 @@ out:
return rc;
}
-static int timehist_sched_switch_event(struct perf_tool *tool,
+static int timehist_sched_switch_event(const struct perf_tool *tool,
union perf_event *event,
struct evsel *evsel,
struct perf_sample *sample,
@@ -2767,7 +2868,7 @@ static int timehist_sched_switch_event(struct perf_tool *tool,
return timehist_sched_change_event(tool, event, evsel, sample, machine);
}
-static int process_lost(struct perf_tool *tool __maybe_unused,
+static int process_lost(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine __maybe_unused)
@@ -3010,13 +3111,13 @@ static void timehist_print_summary(struct perf_sched *sched,
printf(" (x %d)\n", sched->max_cpu.cpu);
}
-typedef int (*sched_handler)(struct perf_tool *tool,
+typedef int (*sched_handler)(const struct perf_tool *tool,
union perf_event *event,
struct evsel *evsel,
struct perf_sample *sample,
struct machine *machine);
-static int perf_timehist__process_sample(struct perf_tool *tool,
+static int perf_timehist__process_sample(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct evsel *evsel,
@@ -3066,6 +3167,47 @@ static int timehist_check_attr(struct perf_sched *sched,
return 0;
}
+static int timehist_parse_prio_str(struct perf_sched *sched)
+{
+ char *p;
+ unsigned long start_prio, end_prio;
+ const char *str = sched->prio_str;
+
+ if (!str)
+ return 0;
+
+ while (isdigit(*str)) {
+ p = NULL;
+ start_prio = strtoul(str, &p, 0);
+ if (start_prio >= MAX_PRIO || (*p != '\0' && *p != ',' && *p != '-'))
+ return -1;
+
+ if (*p == '-') {
+ str = ++p;
+ p = NULL;
+ end_prio = strtoul(str, &p, 0);
+
+ if (end_prio >= MAX_PRIO || (*p != '\0' && *p != ','))
+ return -1;
+
+ if (end_prio < start_prio)
+ return -1;
+ } else {
+ end_prio = start_prio;
+ }
+
+ for (; start_prio <= end_prio; start_prio++)
+ __set_bit(start_prio, sched->prio_bitmap);
+
+ if (*p)
+ ++p;
+
+ str = p;
+ }
+
+ return 0;
+}
+
static int perf_sched__timehist(struct perf_sched *sched)
{
struct evsel_str_handler handlers[] = {
@@ -3100,7 +3242,6 @@ static int perf_sched__timehist(struct perf_sched *sched)
sched->tool.tracing_data = perf_event__process_tracing_data;
sched->tool.build_id = perf_event__process_build_id;
- sched->tool.ordered_events = true;
sched->tool.ordering_requires_timestamps = true;
symbol_conf.use_callchain = sched->show_callchain;
@@ -3121,12 +3262,18 @@ static int perf_sched__timehist(struct perf_sched *sched)
if (perf_time__parse_str(&sched->ptime, sched->time_str) != 0) {
pr_err("Invalid time string\n");
- return -EINVAL;
+ err = -EINVAL;
+ goto out;
}
if (timehist_check_attr(sched, evlist) != 0)
goto out;
+ if (timehist_parse_prio_str(sched) != 0) {
+ pr_err("Invalid prio string\n");
+ goto out;
+ }
+
setup_pager();
/* prefer sched_waking if it is captured */
@@ -3605,14 +3752,6 @@ int cmd_sched(int argc, const char **argv)
{
static const char default_sort_order[] = "avg, max, switch, runtime";
struct perf_sched sched = {
- .tool = {
- .sample = perf_sched__process_tracepoint_sample,
- .comm = perf_sched__process_comm,
- .namespaces = perf_event__process_namespaces,
- .lost = perf_event__process_lost,
- .fork = perf_sched__process_fork_event,
- .ordered_events = true,
- },
.cmp_pid = LIST_HEAD_INIT(sched.cmp_pid),
.sort_list = LIST_HEAD_INIT(sched.sort_list),
.sort_order = default_sort_order,
@@ -3691,6 +3830,9 @@ int cmd_sched(int argc, const char **argv)
OPT_STRING('t', "tid", &symbol_conf.tid_list_str, "tid[,tid...]",
"analyze events only for given thread id(s)"),
OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
+ OPT_BOOLEAN(0, "show-prio", &sched.show_prio, "Show task priority"),
+ OPT_STRING(0, "prio", &sched.prio_str, "prio",
+ "analyze events only for given task priority(ies)"),
OPT_PARENT(sched_options)
};
@@ -3733,6 +3875,13 @@ int cmd_sched(int argc, const char **argv)
};
int ret;
+ perf_tool__init(&sched.tool, /*ordered_events=*/true);
+ sched.tool.sample = perf_sched__process_tracepoint_sample;
+ sched.tool.comm = perf_sched__process_comm;
+ sched.tool.namespaces = perf_event__process_namespaces;
+ sched.tool.lost = perf_event__process_lost;
+ sched.tool.fork = perf_sched__process_fork_event;
+
argc = parse_options_subcommand(argc, argv, sched_options, sched_subcommands,
sched_usage, PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc)
@@ -3805,5 +3954,8 @@ int cmd_sched(int argc, const char **argv)
usage_with_options(sched_usage, sched_options);
}
+ /* free usage string allocated by parse_options_subcommand */
+ free((void *)sched_usage[0]);
+
return 0;
}
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index c16224b1fef3..a644787fa9e1 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -62,6 +62,7 @@
#include "util/record.h"
#include "util/util.h"
#include "util/cgroup.h"
+#include "util/annotate.h"
#include "perf.h"
#include <linux/ctype.h>
@@ -138,6 +139,7 @@ enum perf_output_field {
PERF_OUTPUT_DSOFF = 1ULL << 41,
PERF_OUTPUT_DISASM = 1ULL << 42,
PERF_OUTPUT_BRSTACKDISASM = 1ULL << 43,
+ PERF_OUTPUT_BRCNTR = 1ULL << 44,
};
struct perf_script {
@@ -213,6 +215,7 @@ struct output_option {
{.str = "cgroup", .field = PERF_OUTPUT_CGROUP},
{.str = "retire_lat", .field = PERF_OUTPUT_RETIRE_LAT},
{.str = "brstackdisasm", .field = PERF_OUTPUT_BRSTACKDISASM},
+ {.str = "brcntr", .field = PERF_OUTPUT_BRCNTR},
};
enum {
@@ -520,6 +523,12 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session)
"Hint: run 'perf record -b ...'\n");
return -EINVAL;
}
+ if (PRINT_FIELD(BRCNTR) &&
+ !(evlist__combined_branch_type(session->evlist) & PERF_SAMPLE_BRANCH_COUNTERS)) {
+ pr_err("Display of branch counter requested but it's not enabled\n"
+ "Hint: run 'perf record -j any,counter ...'\n");
+ return -EINVAL;
+ }
if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) &&
evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID", PERF_OUTPUT_TID|PERF_OUTPUT_PID))
return -EINVAL;
@@ -789,6 +798,19 @@ static int perf_sample__fprintf_start(struct perf_script *script,
int printed = 0;
char tstr[128];
+ /*
+ * Print the branch counter's abbreviation list,
+ * if the branch counter is available.
+ */
+ if (PRINT_FIELD(BRCNTR) && !verbose) {
+ char *buf;
+
+ if (!annotation_br_cntr_abbr_list(&buf, evsel, true)) {
+ printed += fprintf(stdout, "%s", buf);
+ free(buf);
+ }
+ }
+
if (PRINT_FIELD(MACHINE_PID) && sample->machine_pid)
printed += fprintf(fp, "VM:%5d ", sample->machine_pid);
@@ -1195,7 +1217,9 @@ static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
struct perf_insn *x, u8 *inbuf, int len,
int insn, FILE *fp, int *total_cycles,
struct perf_event_attr *attr,
- struct thread *thread)
+ struct thread *thread,
+ struct evsel *evsel,
+ u64 br_cntr)
{
int ilen = 0;
int printed = fprintf(fp, "\t%016" PRIx64 "\t", ip);
@@ -1216,6 +1240,29 @@ static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
addr_location__exit(&al);
}
+ if (PRINT_FIELD(BRCNTR)) {
+ struct evsel *pos = evsel__leader(evsel);
+ unsigned int i = 0, j, num, mask, width;
+
+ perf_env__find_br_cntr_info(evsel__env(evsel), NULL, &width);
+ mask = (1L << width) - 1;
+ printed += fprintf(fp, "br_cntr: ");
+ evlist__for_each_entry_from(evsel->evlist, pos) {
+ if (!(pos->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS))
+ continue;
+ if (evsel__leader(pos) != evsel__leader(evsel))
+ break;
+
+ num = (br_cntr >> (i++ * width)) & mask;
+ if (!verbose) {
+ for (j = 0; j < num; j++)
+ printed += fprintf(fp, "%s", pos->abbr_name);
+ } else
+ printed += fprintf(fp, "%s %d ", pos->name, num);
+ }
+ printed += fprintf(fp, "\t");
+ }
+
printed += fprintf(fp, "#%s%s%s%s",
en->flags.predicted ? " PRED" : "",
en->flags.mispred ? " MISPRED" : "",
@@ -1272,6 +1319,7 @@ out:
}
static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
+ struct evsel *evsel,
struct thread *thread,
struct perf_event_attr *attr,
struct machine *machine, FILE *fp)
@@ -1285,6 +1333,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
unsigned off;
struct symbol *lastsym = NULL;
int total_cycles = 0;
+ u64 br_cntr = 0;
if (!(br && br->nr))
return 0;
@@ -1296,6 +1345,9 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
x.machine = machine;
x.cpu = sample->cpu;
+ if (PRINT_FIELD(BRCNTR) && sample->branch_stack_cntr)
+ br_cntr = sample->branch_stack_cntr[nr - 1];
+
printed += fprintf(fp, "%c", '\n');
/* Handle first from jump, of which we don't know the entry. */
@@ -1307,7 +1359,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
x.cpumode, x.cpu, &lastsym, attr, fp);
printed += ip__fprintf_jump(entries[nr - 1].from, &entries[nr - 1],
&x, buffer, len, 0, fp, &total_cycles,
- attr, thread);
+ attr, thread, evsel, br_cntr);
if (PRINT_FIELD(SRCCODE))
printed += print_srccode(thread, x.cpumode, entries[nr - 1].from);
}
@@ -1337,8 +1389,10 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
if (ip == end) {
+ if (PRINT_FIELD(BRCNTR) && sample->branch_stack_cntr)
+ br_cntr = sample->branch_stack_cntr[i];
printed += ip__fprintf_jump(ip, &entries[i], &x, buffer + off, len - off, ++insn, fp,
- &total_cycles, attr, thread);
+ &total_cycles, attr, thread, evsel, br_cntr);
if (PRINT_FIELD(SRCCODE))
printed += print_srccode(thread, x.cpumode, ip);
break;
@@ -1375,7 +1429,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
* Due to pipeline delays the LBRs might be missing a branch
* or two, which can result in very large or negative blocks
* between final branch and sample. When this happens just
- * continue walking after the last TO until we hit a branch.
+ * continue walking after the last TO.
*/
start = entries[0].to;
end = sample->ip;
@@ -1410,7 +1464,9 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
printed += fprintf(fp, "\n");
if (ilen == 0)
break;
- if (arch_is_branch(buffer + off, len - off, x.is64bit) && start + off != sample->ip) {
+ if ((attr->branch_sample_type == 0 || attr->branch_sample_type & PERF_SAMPLE_BRANCH_ANY)
+ && arch_is_uncond_branch(buffer + off, len - off, x.is64bit)
+ && start + off != sample->ip) {
/*
* Hit a missing branch. Just stop.
*/
@@ -1547,6 +1603,7 @@ void script_fetch_insn(struct perf_sample *sample, struct thread *thread,
}
static int perf_sample__fprintf_insn(struct perf_sample *sample,
+ struct evsel *evsel,
struct perf_event_attr *attr,
struct thread *thread,
struct machine *machine, FILE *fp,
@@ -1567,7 +1624,7 @@ static int perf_sample__fprintf_insn(struct perf_sample *sample,
printed += sample__fprintf_insn_asm(sample, thread, machine, fp, al);
}
if (PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN) || PRINT_FIELD(BRSTACKDISASM))
- printed += perf_sample__fprintf_brstackinsn(sample, thread, attr, machine, fp);
+ printed += perf_sample__fprintf_brstackinsn(sample, evsel, thread, attr, machine, fp);
return printed;
}
@@ -1639,7 +1696,7 @@ static int perf_sample__fprintf_bts(struct perf_sample *sample,
if (print_srcline_last)
printed += map__fprintf_srcline(al->map, al->addr, "\n ", fp);
- printed += perf_sample__fprintf_insn(sample, attr, thread, machine, fp, al);
+ printed += perf_sample__fprintf_insn(sample, evsel, attr, thread, machine, fp, al);
printed += fprintf(fp, "\n");
if (PRINT_FIELD(SRCCODE)) {
int ret = map__fprintf_srccode(al->map, al->addr, stdout,
@@ -2297,7 +2354,7 @@ static void process_event(struct perf_script *script,
if (evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
perf_sample__fprintf_bpf_output(sample, fp);
- perf_sample__fprintf_insn(sample, attr, thread, machine, fp, al);
+ perf_sample__fprintf_insn(sample, evsel, attr, thread, machine, fp, al);
if (PRINT_FIELD(PHYS_ADDR))
fprintf(fp, "%16" PRIx64, sample->phys_addr);
@@ -2399,7 +2456,7 @@ static bool filter_cpu(struct perf_sample *sample)
return false;
}
-static int process_sample_event(struct perf_tool *tool,
+static int process_sample_event(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct evsel *evsel,
@@ -2486,7 +2543,7 @@ out_put:
// Used when scr->per_event_dump is not set
static struct evsel_script es_stdout;
-static int process_attr(struct perf_tool *tool, union perf_event *event,
+static int process_attr(const struct perf_tool *tool, union perf_event *event,
struct evlist **pevlist)
{
struct perf_script *scr = container_of(tool, struct perf_script, tool);
@@ -2552,7 +2609,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
return 0;
}
-static int print_event_with_time(struct perf_tool *tool,
+static int print_event_with_time(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine,
@@ -2588,14 +2645,14 @@ static int print_event_with_time(struct perf_tool *tool,
return 0;
}
-static int print_event(struct perf_tool *tool, union perf_event *event,
+static int print_event(const struct perf_tool *tool, union perf_event *event,
struct perf_sample *sample, struct machine *machine,
pid_t pid, pid_t tid)
{
return print_event_with_time(tool, event, sample, machine, pid, tid, 0);
}
-static int process_comm_event(struct perf_tool *tool,
+static int process_comm_event(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -2607,7 +2664,7 @@ static int process_comm_event(struct perf_tool *tool,
event->comm.tid);
}
-static int process_namespaces_event(struct perf_tool *tool,
+static int process_namespaces_event(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -2619,7 +2676,7 @@ static int process_namespaces_event(struct perf_tool *tool,
event->namespaces.tid);
}
-static int process_cgroup_event(struct perf_tool *tool,
+static int process_cgroup_event(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -2631,7 +2688,7 @@ static int process_cgroup_event(struct perf_tool *tool,
sample->tid);
}
-static int process_fork_event(struct perf_tool *tool,
+static int process_fork_event(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -2643,7 +2700,7 @@ static int process_fork_event(struct perf_tool *tool,
event->fork.pid, event->fork.tid,
event->fork.time);
}
-static int process_exit_event(struct perf_tool *tool,
+static int process_exit_event(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -2656,7 +2713,7 @@ static int process_exit_event(struct perf_tool *tool,
return perf_event__process_exit(tool, event, sample, machine);
}
-static int process_mmap_event(struct perf_tool *tool,
+static int process_mmap_event(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -2668,7 +2725,7 @@ static int process_mmap_event(struct perf_tool *tool,
event->mmap.tid);
}
-static int process_mmap2_event(struct perf_tool *tool,
+static int process_mmap2_event(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -2680,7 +2737,7 @@ static int process_mmap2_event(struct perf_tool *tool,
event->mmap2.tid);
}
-static int process_switch_event(struct perf_tool *tool,
+static int process_switch_event(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -2712,7 +2769,7 @@ static int process_auxtrace_error(struct perf_session *session,
}
static int
-process_lost_event(struct perf_tool *tool,
+process_lost_event(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -2722,7 +2779,7 @@ process_lost_event(struct perf_tool *tool,
}
static int
-process_throttle_event(struct perf_tool *tool __maybe_unused,
+process_throttle_event(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -2733,7 +2790,7 @@ process_throttle_event(struct perf_tool *tool __maybe_unused,
}
static int
-process_finished_round_event(struct perf_tool *tool __maybe_unused,
+process_finished_round_event(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct ordered_events *oe __maybe_unused)
@@ -2743,7 +2800,7 @@ process_finished_round_event(struct perf_tool *tool __maybe_unused,
}
static int
-process_bpf_events(struct perf_tool *tool __maybe_unused,
+process_bpf_events(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -2755,7 +2812,7 @@ process_bpf_events(struct perf_tool *tool __maybe_unused,
sample->tid);
}
-static int process_text_poke_events(struct perf_tool *tool,
+static int process_text_poke_events(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -3757,7 +3814,7 @@ static
int process_thread_map_event(struct perf_session *session,
union perf_event *event)
{
- struct perf_tool *tool = session->tool;
+ const struct perf_tool *tool = session->tool;
struct perf_script *script = container_of(tool, struct perf_script, tool);
if (dump_trace)
@@ -3779,7 +3836,7 @@ static
int process_cpu_map_event(struct perf_session *session,
union perf_event *event)
{
- struct perf_tool *tool = session->tool;
+ const struct perf_tool *tool = session->tool;
struct perf_script *script = container_of(tool, struct perf_script, tool);
if (dump_trace)
@@ -3809,11 +3866,10 @@ static int process_feature_event(struct perf_session *session,
static int perf_script__process_auxtrace_info(struct perf_session *session,
union perf_event *event)
{
- struct perf_tool *tool = session->tool;
-
int ret = perf_event__process_auxtrace_info(session, event);
if (ret == 0) {
+ const struct perf_tool *tool = session->tool;
struct perf_script *script = container_of(tool, struct perf_script, tool);
ret = perf_script__setup_per_event_dump(script);
@@ -3900,38 +3956,7 @@ int cmd_script(int argc, const char **argv)
const char *dlfilter_file = NULL;
const char **__argv;
int i, j, err = 0;
- struct perf_script script = {
- .tool = {
- .sample = process_sample_event,
- .mmap = perf_event__process_mmap,
- .mmap2 = perf_event__process_mmap2,
- .comm = perf_event__process_comm,
- .namespaces = perf_event__process_namespaces,
- .cgroup = perf_event__process_cgroup,
- .exit = perf_event__process_exit,
- .fork = perf_event__process_fork,
- .attr = process_attr,
- .event_update = perf_event__process_event_update,
-#ifdef HAVE_LIBTRACEEVENT
- .tracing_data = perf_event__process_tracing_data,
-#endif
- .feature = process_feature_event,
- .build_id = perf_event__process_build_id,
- .id_index = perf_event__process_id_index,
- .auxtrace_info = perf_script__process_auxtrace_info,
- .auxtrace = perf_event__process_auxtrace,
- .auxtrace_error = perf_event__process_auxtrace_error,
- .stat = perf_event__process_stat_event,
- .stat_round = process_stat_round_event,
- .stat_config = process_stat_config_event,
- .thread_map = process_thread_map_event,
- .cpu_map = process_cpu_map_event,
- .throttle = process_throttle_event,
- .unthrottle = process_throttle_event,
- .ordered_events = true,
- .ordering_requires_timestamps = true,
- },
- };
+ struct perf_script script = {};
struct perf_data data = {
.mode = PERF_DATA_MODE_READ,
};
@@ -3979,7 +4004,8 @@ int cmd_script(int argc, const char **argv)
"brstacksym,flags,data_src,weight,bpf-output,brstackinsn,"
"brstackinsnlen,brstackdisasm,brstackoff,callindent,insn,disasm,insnlen,synth,"
"phys_addr,metric,misc,srccode,ipc,tod,data_page_size,"
- "code_page_size,ins_lat,machine_pid,vcpu,cgroup,retire_lat",
+ "code_page_size,ins_lat,machine_pid,vcpu,cgroup,retire_lat,"
+ "brcntr",
parse_output_fields),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"),
@@ -4052,6 +4078,8 @@ int cmd_script(int argc, const char **argv)
"Enable symbol demangling"),
OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
"Enable kernel symbol demangling"),
+ OPT_STRING(0, "addr2line", &symbol_conf.addr2line_path, "path",
+ "addr2line binary to use for line numbers"),
OPT_STRING(0, "time", &script.time_str, "str",
"Time span of interest (start,stop)"),
OPT_BOOLEAN(0, "inline", &symbol_conf.inline_name,
@@ -4103,10 +4131,8 @@ int cmd_script(int argc, const char **argv)
data.path = input_name;
data.force = symbol_conf.force;
- if (unsorted_dump) {
+ if (unsorted_dump)
dump_trace = true;
- script.tool.ordered_events = false;
- }
if (symbol__validate_sym_arguments())
return -1;
@@ -4297,6 +4323,34 @@ script_found:
use_browser = 0;
}
+ perf_tool__init(&script.tool, !unsorted_dump);
+ script.tool.sample = process_sample_event;
+ script.tool.mmap = perf_event__process_mmap;
+ script.tool.mmap2 = perf_event__process_mmap2;
+ script.tool.comm = perf_event__process_comm;
+ script.tool.namespaces = perf_event__process_namespaces;
+ script.tool.cgroup = perf_event__process_cgroup;
+ script.tool.exit = perf_event__process_exit;
+ script.tool.fork = perf_event__process_fork;
+ script.tool.attr = process_attr;
+ script.tool.event_update = perf_event__process_event_update;
+#ifdef HAVE_LIBTRACEEVENT
+ script.tool.tracing_data = perf_event__process_tracing_data;
+#endif
+ script.tool.feature = process_feature_event;
+ script.tool.build_id = perf_event__process_build_id;
+ script.tool.id_index = perf_event__process_id_index;
+ script.tool.auxtrace_info = perf_script__process_auxtrace_info;
+ script.tool.auxtrace = perf_event__process_auxtrace;
+ script.tool.auxtrace_error = perf_event__process_auxtrace_error;
+ script.tool.stat = perf_event__process_stat_event;
+ script.tool.stat_round = process_stat_round_event;
+ script.tool.stat_config = process_stat_config_event;
+ script.tool.thread_map = process_thread_map_event;
+ script.tool.cpu_map = process_cpu_map_event;
+ script.tool.throttle = process_throttle_event;
+ script.tool.unthrottle = process_throttle_event;
+ script.tool.ordering_requires_timestamps = true;
session = perf_session__new(&data, &script.tool);
if (IS_ERR(session))
return PTR_ERR(session);
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 661832756a24..689a3d43c258 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -70,6 +70,7 @@
#include "util/bpf_counter.h"
#include "util/iostat.h"
#include "util/util.h"
+#include "util/intel-tpebs.h"
#include "asm/bug.h"
#include <linux/time64.h>
@@ -248,7 +249,7 @@ static void perf_stat__reset_stats(void)
perf_stat__reset_shadow_stats();
}
-static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
+static int process_synthesized_event(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine __maybe_unused)
@@ -293,14 +294,14 @@ static int read_single_counter(struct evsel *counter, int cpu_map_idx, int threa
* terminates. Use the wait4 values in that case.
*/
if (err && cpu_map_idx == 0 &&
- (counter->tool_event == PERF_TOOL_USER_TIME ||
- counter->tool_event == PERF_TOOL_SYSTEM_TIME)) {
+ (evsel__tool_event(counter) == PERF_TOOL_USER_TIME ||
+ evsel__tool_event(counter) == PERF_TOOL_SYSTEM_TIME)) {
u64 val, *start_time;
struct perf_counts_values *count =
perf_counts(counter->counts, cpu_map_idx, thread);
start_time = xyarray__entry(counter->start_times, cpu_map_idx, thread);
- if (counter->tool_event == PERF_TOOL_USER_TIME)
+ if (evsel__tool_event(counter) == PERF_TOOL_USER_TIME)
val = ru_stats.ru_utime_usec_stat.mean;
else
val = ru_stats.ru_stime_usec_stat.mean;
@@ -683,6 +684,9 @@ static enum counter_recovery stat_handle_error(struct evsel *counter)
if (child_pid != -1)
kill(child_pid, SIGTERM);
+
+ tpebs_delete();
+
return COUNTER_FATAL;
}
@@ -833,7 +837,7 @@ try_again_reset:
return -1;
}
- if (evlist__apply_filters(evsel_list, &counter)) {
+ if (evlist__apply_filters(evsel_list, &counter, &target)) {
pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
counter->filter, evsel__name(counter), errno,
str_error_r(errno, msg, sizeof(msg)));
@@ -2180,7 +2184,7 @@ static
int process_stat_config_event(struct perf_session *session,
union perf_event *event)
{
- struct perf_tool *tool = session->tool;
+ const struct perf_tool *tool = session->tool;
struct perf_stat *st = container_of(tool, struct perf_stat, tool);
perf_event__read_stat_config(&stat_config, &event->stat_config);
@@ -2229,7 +2233,7 @@ static
int process_thread_map_event(struct perf_session *session,
union perf_event *event)
{
- struct perf_tool *tool = session->tool;
+ const struct perf_tool *tool = session->tool;
struct perf_stat *st = container_of(tool, struct perf_stat, tool);
if (st->threads) {
@@ -2248,7 +2252,7 @@ static
int process_cpu_map_event(struct perf_session *session,
union perf_event *event)
{
- struct perf_tool *tool = session->tool;
+ const struct perf_tool *tool = session->tool;
struct perf_stat *st = container_of(tool, struct perf_stat, tool);
struct perf_cpu_map *cpus;
@@ -2271,15 +2275,6 @@ static const char * const stat_report_usage[] = {
};
static struct perf_stat perf_stat = {
- .tool = {
- .attr = perf_event__process_attr,
- .event_update = perf_event__process_event_update,
- .thread_map = process_thread_map_event,
- .cpu_map = process_cpu_map_event,
- .stat_config = process_stat_config_event,
- .stat = perf_event__process_stat_event,
- .stat_round = process_stat_round_event,
- },
.aggr_mode = AGGR_UNSET,
.aggr_level = 0,
};
@@ -2322,6 +2317,15 @@ static int __cmd_report(int argc, const char **argv)
perf_stat.data.path = input_name;
perf_stat.data.mode = PERF_DATA_MODE_READ;
+ perf_tool__init(&perf_stat.tool, /*ordered_events=*/false);
+ perf_stat.tool.attr = perf_event__process_attr;
+ perf_stat.tool.event_update = perf_event__process_event_update;
+ perf_stat.tool.thread_map = process_thread_map_event;
+ perf_stat.tool.cpu_map = process_cpu_map_event;
+ perf_stat.tool.stat_config = process_stat_config_event;
+ perf_stat.tool.stat = perf_event__process_stat_event;
+ perf_stat.tool.stat_round = process_stat_round_event;
+
session = perf_session__new(&perf_stat.data, &perf_stat.tool);
if (IS_ERR(session))
return PTR_ERR(session);
@@ -2471,6 +2475,10 @@ int cmd_stat(int argc, const char **argv)
"disable adding events for the metric threshold calculation"),
OPT_BOOLEAN(0, "topdown", &topdown_run,
"measure top-down statistics"),
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+ OPT_BOOLEAN(0, "record-tpebs", &tpebs_recording,
+ "enable recording for tpebs when retire_latency required"),
+#endif
OPT_UINTEGER(0, "td-level", &stat_config.topdown_level,
"Set the metrics level for the top-down statistics (0: max level)"),
OPT_BOOLEAN(0, "smi-cost", &smi_cost,
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 19d4542ea18a..218c8b44d7be 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -320,7 +320,7 @@ static int *cpus_cstate_state;
static u64 *cpus_pstate_start_times;
static u64 *cpus_pstate_state;
-static int process_comm_event(struct perf_tool *tool,
+static int process_comm_event(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine __maybe_unused)
@@ -330,7 +330,7 @@ static int process_comm_event(struct perf_tool *tool,
return 0;
}
-static int process_fork_event(struct perf_tool *tool,
+static int process_fork_event(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine __maybe_unused)
@@ -340,7 +340,7 @@ static int process_fork_event(struct perf_tool *tool,
return 0;
}
-static int process_exit_event(struct perf_tool *tool,
+static int process_exit_event(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine __maybe_unused)
@@ -571,7 +571,7 @@ typedef int (*tracepoint_handler)(struct timechart *tchart,
struct perf_sample *sample,
const char *backtrace);
-static int process_sample_event(struct perf_tool *tool,
+static int process_sample_event(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct evsel *evsel,
@@ -1606,10 +1606,16 @@ static int __cmd_timechart(struct timechart *tchart, const char *output_name)
.mode = PERF_DATA_MODE_READ,
.force = tchart->force,
};
-
- struct perf_session *session = perf_session__new(&data, &tchart->tool);
+ struct perf_session *session;
int ret = -EINVAL;
+ perf_tool__init(&tchart->tool, /*ordered_events=*/true);
+ tchart->tool.comm = process_comm_event;
+ tchart->tool.fork = process_fork_event;
+ tchart->tool.exit = process_exit_event;
+ tchart->tool.sample = process_sample_event;
+
+ session = perf_session__new(&data, &tchart->tool);
if (IS_ERR(session))
return PTR_ERR(session);
@@ -1924,13 +1930,6 @@ parse_time(const struct option *opt, const char *arg, int __maybe_unused unset)
int cmd_timechart(int argc, const char **argv)
{
struct timechart tchart = {
- .tool = {
- .comm = process_comm_event,
- .fork = process_fork_event,
- .exit = process_exit_event,
- .sample = process_sample_event,
- .ordered_events = true,
- },
.proc_num = 15,
.min_time = NSEC_PER_MSEC,
.merge_dist = 1000,
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index e8cbbf10d361..724a79386321 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -191,7 +191,7 @@ static void ui__warn_map_erange(struct map *map, struct symbol *sym, u64 ip)
if (use_browser <= 0)
sleep(5);
- map__set_erange_warned(map, true);
+ map__set_erange_warned(map);
}
static void perf_top__record_precise_ip(struct perf_top *top,
@@ -735,12 +735,12 @@ static int hist_iter__top_callback(struct hist_entry_iter *iter,
perf_top__record_precise_ip(top, iter->he, iter->sample, evsel, al->addr);
hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
- !(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY),
- NULL);
+ !(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY),
+ NULL, evsel);
return 0;
}
-static void perf_event__process_sample(struct perf_tool *tool,
+static void perf_event__process_sample(const struct perf_tool *tool,
const union perf_event *event,
struct evsel *evsel,
struct perf_sample *sample,
@@ -1055,7 +1055,7 @@ try_again:
}
}
- if (evlist__apply_filters(evlist, &counter)) {
+ if (evlist__apply_filters(evlist, &counter, &opts->target)) {
pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
counter->filter ?: "BPF", evsel__name(counter), errno,
str_error_r(errno, msg, sizeof(msg)));
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 8449f2beb54d..f6e847529073 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -19,6 +19,7 @@
#ifdef HAVE_LIBBPF_SUPPORT
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
+#include <bpf/btf.h>
#ifdef HAVE_BPF_SKEL
#include "bpf_skel/augmented_raw_syscalls.skel.h"
#endif
@@ -64,6 +65,7 @@
#include "syscalltbl.h"
#include "rb_resort.h"
#include "../perf.h"
+#include "trace_augment.h"
#include <errno.h>
#include <inttypes.h>
@@ -101,6 +103,12 @@
/*
* strtoul: Go from a string to a value, i.e. for msr: MSR_FS_BASE to 0xc0000100
+ *
+ * We have to explicitely mark the direction of the flow of data, if from the
+ * kernel to user space or the other way around, since the BPF collector we
+ * have so far copies only from user to kernel space, mark the arguments that
+ * go that direction, so that we don´t end up collecting the previous contents
+ * for syscall args that goes from kernel to user space.
*/
struct syscall_arg_fmt {
size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
@@ -109,7 +117,12 @@ struct syscall_arg_fmt {
void *parm;
const char *name;
u16 nr_entries; // for arrays
+ bool from_user;
bool show_zero;
+#ifdef HAVE_LIBBPF_SUPPORT
+ const struct btf_type *type;
+ int type_id; /* used in btf_dump */
+#endif
};
struct syscall_fmt {
@@ -140,6 +153,9 @@ struct trace {
#ifdef HAVE_BPF_SKEL
struct augmented_raw_syscalls_bpf *skel;
#endif
+#ifdef HAVE_LIBBPF_SUPPORT
+ struct btf *btf;
+#endif
struct record_opts opts;
struct evlist *evlist;
struct machine *host;
@@ -196,6 +212,7 @@ struct trace {
bool show_string_prefix;
bool force;
bool vfs_getname;
+ bool force_btf;
int trace_pgfaults;
char *perfconfig_events;
struct {
@@ -204,6 +221,20 @@ struct trace {
} oe;
};
+static void trace__load_vmlinux_btf(struct trace *trace __maybe_unused)
+{
+#ifdef HAVE_LIBBPF_SUPPORT
+ if (trace->btf != NULL)
+ return;
+
+ trace->btf = btf__load_vmlinux_btf();
+ if (verbose > 0) {
+ fprintf(trace->output, trace->btf ? "vmlinux BTF loaded\n" :
+ "Failed to load vmlinux BTF\n");
+ }
+#endif
+}
+
struct tp_field {
int offset;
union {
@@ -830,6 +861,15 @@ static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
#define SCA_FILENAME syscall_arg__scnprintf_filename
+// 'argname' is just documentational at this point, to remove the previous comment with that info
+#define SCA_FILENAME_FROM_USER(argname) \
+ { .scnprintf = SCA_FILENAME, \
+ .from_user = true, }
+
+static size_t syscall_arg__scnprintf_buf(char *bf, size_t size, struct syscall_arg *arg);
+
+#define SCA_BUF syscall_arg__scnprintf_buf
+
static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
struct syscall_arg *arg)
{
@@ -887,6 +927,177 @@ static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
+#ifdef HAVE_LIBBPF_SUPPORT
+static void syscall_arg_fmt__cache_btf_enum(struct syscall_arg_fmt *arg_fmt, struct btf *btf, char *type)
+{
+ int id;
+
+ type = strstr(type, "enum ");
+ if (type == NULL)
+ return;
+
+ type += 5; // skip "enum " to get the enumeration name
+
+ id = btf__find_by_name(btf, type);
+ if (id < 0)
+ return;
+
+ arg_fmt->type = btf__type_by_id(btf, id);
+}
+
+static bool syscall_arg__strtoul_btf_enum(char *bf, size_t size, struct syscall_arg *arg, u64 *val)
+{
+ const struct btf_type *bt = arg->fmt->type;
+ struct btf *btf = arg->trace->btf;
+ struct btf_enum *be = btf_enum(bt);
+
+ for (int i = 0; i < btf_vlen(bt); ++i, ++be) {
+ const char *name = btf__name_by_offset(btf, be->name_off);
+ int max_len = max(size, strlen(name));
+
+ if (strncmp(name, bf, max_len) == 0) {
+ *val = be->val;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool syscall_arg__strtoul_btf_type(char *bf, size_t size, struct syscall_arg *arg, u64 *val)
+{
+ const struct btf_type *bt;
+ char *type = arg->type_name;
+ struct btf *btf;
+
+ trace__load_vmlinux_btf(arg->trace);
+
+ btf = arg->trace->btf;
+ if (btf == NULL)
+ return false;
+
+ if (arg->fmt->type == NULL) {
+ // See if this is an enum
+ syscall_arg_fmt__cache_btf_enum(arg->fmt, btf, type);
+ }
+
+ // Now let's see if we have a BTF type resolved
+ bt = arg->fmt->type;
+ if (bt == NULL)
+ return false;
+
+ // If it is an enum:
+ if (btf_is_enum(arg->fmt->type))
+ return syscall_arg__strtoul_btf_enum(bf, size, arg, val);
+
+ return false;
+}
+
+static size_t btf_enum_scnprintf(const struct btf_type *type, struct btf *btf, char *bf, size_t size, int val)
+{
+ struct btf_enum *be = btf_enum(type);
+ const int nr_entries = btf_vlen(type);
+
+ for (int i = 0; i < nr_entries; ++i, ++be) {
+ if (be->val == val) {
+ return scnprintf(bf, size, "%s",
+ btf__name_by_offset(btf, be->name_off));
+ }
+ }
+
+ return 0;
+}
+
+struct trace_btf_dump_snprintf_ctx {
+ char *bf;
+ size_t printed, size;
+};
+
+static void trace__btf_dump_snprintf(void *vctx, const char *fmt, va_list args)
+{
+ struct trace_btf_dump_snprintf_ctx *ctx = vctx;
+
+ ctx->printed += vscnprintf(ctx->bf + ctx->printed, ctx->size - ctx->printed, fmt, args);
+}
+
+static size_t btf_struct_scnprintf(const struct btf_type *type, struct btf *btf, char *bf, size_t size, struct syscall_arg *arg)
+{
+ struct trace_btf_dump_snprintf_ctx ctx = {
+ .bf = bf,
+ .size = size,
+ };
+ struct augmented_arg *augmented_arg = arg->augmented.args;
+ int type_id = arg->fmt->type_id, consumed;
+ struct btf_dump *btf_dump;
+
+ LIBBPF_OPTS(btf_dump_opts, dump_opts);
+ LIBBPF_OPTS(btf_dump_type_data_opts, dump_data_opts);
+
+ if (arg == NULL || arg->augmented.args == NULL)
+ return 0;
+
+ dump_data_opts.compact = true;
+ dump_data_opts.skip_names = !arg->trace->show_arg_names;
+
+ btf_dump = btf_dump__new(btf, trace__btf_dump_snprintf, &ctx, &dump_opts);
+ if (btf_dump == NULL)
+ return 0;
+
+ /* pretty print the struct data here */
+ if (btf_dump__dump_type_data(btf_dump, type_id, arg->augmented.args->value, type->size, &dump_data_opts) == 0)
+ return 0;
+
+ consumed = sizeof(*augmented_arg) + augmented_arg->size;
+ arg->augmented.args = ((void *)arg->augmented.args) + consumed;
+ arg->augmented.size -= consumed;
+
+ btf_dump__free(btf_dump);
+
+ return ctx.printed;
+}
+
+static size_t trace__btf_scnprintf(struct trace *trace, struct syscall_arg *arg, char *bf,
+ size_t size, int val, char *type)
+{
+ struct syscall_arg_fmt *arg_fmt = arg->fmt;
+
+ if (trace->btf == NULL)
+ return 0;
+
+ if (arg_fmt->type == NULL) {
+ // Check if this is an enum and if we have the BTF type for it.
+ syscall_arg_fmt__cache_btf_enum(arg_fmt, trace->btf, type);
+ }
+
+ // Did we manage to find a BTF type for the syscall/tracepoint argument?
+ if (arg_fmt->type == NULL)
+ return 0;
+
+ if (btf_is_enum(arg_fmt->type))
+ return btf_enum_scnprintf(arg_fmt->type, trace->btf, bf, size, val);
+ else if (btf_is_struct(arg_fmt->type) || btf_is_union(arg_fmt->type))
+ return btf_struct_scnprintf(arg_fmt->type, trace->btf, bf, size, arg);
+
+ return 0;
+}
+
+#else // HAVE_LIBBPF_SUPPORT
+static size_t trace__btf_scnprintf(struct trace *trace __maybe_unused, struct syscall_arg *arg __maybe_unused,
+ char *bf __maybe_unused, size_t size __maybe_unused, int val __maybe_unused,
+ char *type __maybe_unused)
+{
+ return 0;
+}
+
+static bool syscall_arg__strtoul_btf_type(char *bf __maybe_unused, size_t size __maybe_unused,
+ struct syscall_arg *arg __maybe_unused, u64 *val __maybe_unused)
+{
+ return false;
+}
+#endif // HAVE_LIBBPF_SUPPORT
+
+#define STUL_BTF_TYPE syscall_arg__strtoul_btf_type
+
#define STRARRAY(name, array) \
{ .scnprintf = SCA_STRARRAY, \
.strtoul = STUL_STRARRAY, \
@@ -921,16 +1132,17 @@ static const struct syscall_fmt syscall_fmts[] = {
[1] = { .scnprintf = SCA_PTR, /* arg2 */ }, }, },
{ .name = "bind",
.arg = { [0] = { .scnprintf = SCA_INT, /* fd */ },
- [1] = { .scnprintf = SCA_SOCKADDR, /* umyaddr */ },
+ [1] = SCA_SOCKADDR_FROM_USER(umyaddr),
[2] = { .scnprintf = SCA_INT, /* addrlen */ }, }, },
{ .name = "bpf",
- .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, },
+ .arg = { [0] = STRARRAY(cmd, bpf_cmd),
+ [1] = { .from_user = true /* attr */, }, } },
{ .name = "brk", .hexret = true,
.arg = { [0] = { .scnprintf = SCA_PTR, /* brk */ }, }, },
{ .name = "clock_gettime",
.arg = { [0] = STRARRAY(clk_id, clockid), }, },
{ .name = "clock_nanosleep",
- .arg = { [2] = { .scnprintf = SCA_TIMESPEC, /* rqtp */ }, }, },
+ .arg = { [2] = SCA_TIMESPEC_FROM_USER(req), }, },
{ .name = "clone", .errpid = true, .nr_args = 5,
.arg = { [0] = { .name = "flags", .scnprintf = SCA_CLONE_FLAGS, },
[1] = { .name = "child_stack", .scnprintf = SCA_HEX, },
@@ -941,7 +1153,7 @@ static const struct syscall_fmt syscall_fmts[] = {
.arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, },
{ .name = "connect",
.arg = { [0] = { .scnprintf = SCA_INT, /* fd */ },
- [1] = { .scnprintf = SCA_SOCKADDR, /* servaddr */ },
+ [1] = SCA_SOCKADDR_FROM_USER(servaddr),
[2] = { .scnprintf = SCA_INT, /* addrlen */ }, }, },
{ .name = "epoll_ctl",
.arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, },
@@ -949,11 +1161,11 @@ static const struct syscall_fmt syscall_fmts[] = {
.arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, /* flags */ }, }, },
{ .name = "faccessat",
.arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ },
- [1] = { .scnprintf = SCA_FILENAME, /* pathname */ },
+ [1] = SCA_FILENAME_FROM_USER(pathname),
[2] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, },
{ .name = "faccessat2",
.arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ },
- [1] = { .scnprintf = SCA_FILENAME, /* pathname */ },
+ [1] = SCA_FILENAME_FROM_USER(pathname),
[2] = { .scnprintf = SCA_ACCMODE, /* mode */ },
[3] = { .scnprintf = SCA_FACCESSAT2_FLAGS, /* flags */ }, }, },
{ .name = "fchmodat",
@@ -975,7 +1187,7 @@ static const struct syscall_fmt syscall_fmts[] = {
[2] = { .scnprintf = SCA_FSMOUNT_ATTR_FLAGS, /* attr_flags */ }, }, },
{ .name = "fspick",
.arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
- [1] = { .scnprintf = SCA_FILENAME, /* path */ },
+ [1] = SCA_FILENAME_FROM_USER(path),
[2] = { .scnprintf = SCA_FSPICK_FLAGS, /* flags */ }, }, },
{ .name = "fstat", .alias = "newfstat", },
{ .name = "futex",
@@ -1039,29 +1251,29 @@ static const struct syscall_fmt syscall_fmts[] = {
.parm = &strarray__mmap_flags, },
[5] = { .scnprintf = SCA_HEX, /* offset */ }, }, },
{ .name = "mount",
- .arg = { [0] = { .scnprintf = SCA_FILENAME, /* dev_name */ },
+ .arg = { [0] = SCA_FILENAME_FROM_USER(devname),
[3] = { .scnprintf = SCA_MOUNT_FLAGS, /* flags */
.mask_val = SCAMV_MOUNT_FLAGS, /* flags */ }, }, },
{ .name = "move_mount",
.arg = { [0] = { .scnprintf = SCA_FDAT, /* from_dfd */ },
- [1] = { .scnprintf = SCA_FILENAME, /* from_pathname */ },
+ [1] = SCA_FILENAME_FROM_USER(pathname),
[2] = { .scnprintf = SCA_FDAT, /* to_dfd */ },
- [3] = { .scnprintf = SCA_FILENAME, /* to_pathname */ },
+ [3] = SCA_FILENAME_FROM_USER(pathname),
[4] = { .scnprintf = SCA_MOVE_MOUNT_FLAGS, /* flags */ }, }, },
{ .name = "mprotect",
.arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
[2] = { .scnprintf = SCA_MMAP_PROT, .show_zero = true, /* prot */ }, }, },
{ .name = "mq_unlink",
- .arg = { [0] = { .scnprintf = SCA_FILENAME, /* u_name */ }, }, },
+ .arg = { [0] = SCA_FILENAME_FROM_USER(u_name), }, },
{ .name = "mremap", .hexret = true,
.arg = { [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ }, }, },
{ .name = "name_to_handle_at",
.arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
{ .name = "nanosleep",
- .arg = { [0] = { .scnprintf = SCA_TIMESPEC, /* req */ }, }, },
+ .arg = { [0] = SCA_TIMESPEC_FROM_USER(req), }, },
{ .name = "newfstatat", .alias = "fstatat",
.arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ },
- [1] = { .scnprintf = SCA_FILENAME, /* pathname */ },
+ [1] = SCA_FILENAME_FROM_USER(pathname),
[3] = { .scnprintf = SCA_FS_AT_FLAGS, /* flags */ }, }, },
{ .name = "open",
.arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
@@ -1072,7 +1284,7 @@ static const struct syscall_fmt syscall_fmts[] = {
.arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
[2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
{ .name = "perf_event_open",
- .arg = { [0] = { .scnprintf = SCA_PERF_ATTR, /* attr */ },
+ .arg = { [0] = SCA_PERF_ATTR_FROM_USER(attr),
[2] = { .scnprintf = SCA_INT, /* cpu */ },
[3] = { .scnprintf = SCA_FD, /* group_fd */ },
[4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, },
@@ -1097,7 +1309,8 @@ static const struct syscall_fmt syscall_fmts[] = {
{ .name = "pread", .alias = "pread64", },
{ .name = "preadv", .alias = "pread", },
{ .name = "prlimit64",
- .arg = { [1] = STRARRAY(resource, rlimit_resources), }, },
+ .arg = { [1] = STRARRAY(resource, rlimit_resources),
+ [2] = { .from_user = true /* new_rlim */, }, }, },
{ .name = "pwrite", .alias = "pwrite64", },
{ .name = "readlinkat",
.arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
@@ -1114,6 +1327,8 @@ static const struct syscall_fmt syscall_fmts[] = {
.arg = { [0] = { .scnprintf = SCA_FDAT, /* olddirfd */ },
[2] = { .scnprintf = SCA_FDAT, /* newdirfd */ },
[4] = { .scnprintf = SCA_RENAMEAT2_FLAGS, /* flags */ }, }, },
+ { .name = "rseq", .errpid = true,
+ .arg = { [0] = { .from_user = true /* rseq */, }, }, },
{ .name = "rt_sigaction",
.arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
{ .name = "rt_sigprocmask",
@@ -1135,12 +1350,15 @@ static const struct syscall_fmt syscall_fmts[] = {
.arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
{ .name = "sendto",
.arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ },
- [4] = { .scnprintf = SCA_SOCKADDR, /* addr */ }, }, },
+ [4] = SCA_SOCKADDR_FROM_USER(addr), }, },
+ { .name = "set_robust_list", .errpid = true,
+ .arg = { [0] = { .from_user = true /* head */, }, }, },
{ .name = "set_tid_address", .errpid = true, },
{ .name = "setitimer",
.arg = { [0] = STRARRAY(which, itimers), }, },
{ .name = "setrlimit",
- .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
+ .arg = { [0] = STRARRAY(resource, rlimit_resources),
+ [1] = { .from_user = true /* rlim */, }, }, },
{ .name = "setsockopt",
.arg = { [1] = STRARRAY(level, socket_level), }, },
{ .name = "socket",
@@ -1157,9 +1375,9 @@ static const struct syscall_fmt syscall_fmts[] = {
[2] = { .scnprintf = SCA_FS_AT_FLAGS, /* flags */ } ,
[3] = { .scnprintf = SCA_STATX_MASK, /* mask */ }, }, },
{ .name = "swapoff",
- .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
+ .arg = { [0] = SCA_FILENAME_FROM_USER(specialfile), }, },
{ .name = "swapon",
- .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
+ .arg = { [0] = SCA_FILENAME_FROM_USER(specialfile), }, },
{ .name = "symlinkat",
.arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
{ .name = "sync_file_range",
@@ -1169,11 +1387,11 @@ static const struct syscall_fmt syscall_fmts[] = {
{ .name = "tkill",
.arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
{ .name = "umount2", .alias = "umount",
- .arg = { [0] = { .scnprintf = SCA_FILENAME, /* name */ }, }, },
+ .arg = { [0] = SCA_FILENAME_FROM_USER(name), }, },
{ .name = "uname", .alias = "newuname", },
{ .name = "unlinkat",
.arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
- [1] = { .scnprintf = SCA_FILENAME, /* pathname */ },
+ [1] = SCA_FILENAME_FROM_USER(pathname),
[2] = { .scnprintf = SCA_FS_AT_FLAGS, /* flags */ }, }, },
{ .name = "utimensat",
.arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ }, }, },
@@ -1181,6 +1399,8 @@ static const struct syscall_fmt syscall_fmts[] = {
.arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
{ .name = "waitid", .errpid = true,
.arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
+ { .name = "write", .errpid = true,
+ .arg = { [1] = { .scnprintf = SCA_BUF /* buf */, .from_user = true, }, }, },
};
static int syscall_fmt__cmp(const void *name, const void *fmtp)
@@ -1238,6 +1458,7 @@ struct syscall {
bool is_exit;
bool is_open;
bool nonexistent;
+ bool use_btf;
struct tep_format_field *args;
const char *name;
const struct syscall_fmt *fmt;
@@ -1551,6 +1772,32 @@ static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
return 0;
}
+#define MAX_CONTROL_CHAR 31
+#define MAX_ASCII 127
+
+static size_t syscall_arg__scnprintf_buf(char *bf, size_t size, struct syscall_arg *arg)
+{
+ struct augmented_arg *augmented_arg = arg->augmented.args;
+ unsigned char *orig = (unsigned char *)augmented_arg->value;
+ size_t printed = 0;
+ int consumed;
+
+ if (augmented_arg == NULL)
+ return 0;
+
+ for (int j = 0; j < augmented_arg->size; ++j) {
+ bool control_char = orig[j] <= MAX_CONTROL_CHAR || orig[j] >= MAX_ASCII;
+ /* print control characters (0~31 and 127), and non-ascii characters in \(digits) */
+ printed += scnprintf(bf + printed, size - printed, control_char ? "\\%d" : "%c", (int)orig[j]);
+ }
+
+ consumed = sizeof(*augmented_arg) + augmented_arg->size;
+ arg->augmented.args = ((void *)arg->augmented.args) + consumed;
+ arg->augmented.size -= consumed;
+
+ return printed;
+}
+
static bool trace__filter_duration(struct trace *trace, double t)
{
return t < (trace->duration_filter * NSEC_PER_MSEC);
@@ -1637,7 +1884,7 @@ static int trace__process_event(struct trace *trace, struct machine *machine,
return ret;
}
-static int trace__tool_process(struct perf_tool *tool,
+static int trace__tool_process(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -1744,7 +1991,8 @@ static const struct syscall_arg_fmt *syscall_arg_fmt__find_by_name(const char *n
}
static struct tep_format_field *
-syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field *field)
+syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field *field,
+ bool *use_btf)
{
struct tep_format_field *last_field = NULL;
int len;
@@ -1757,11 +2005,15 @@ syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field
len = strlen(field->name);
+ // As far as heuristics (or intention) goes this seems to hold true, and makes sense!
+ if ((field->flags & TEP_FIELD_IS_POINTER) && strstarts(field->type, "const "))
+ arg->from_user = true;
+
if (strcmp(field->type, "const char *") == 0 &&
((len >= 4 && strcmp(field->name + len - 4, "name") == 0) ||
- strstr(field->name, "path") != NULL))
+ strstr(field->name, "path") != NULL)) {
arg->scnprintf = SCA_FILENAME;
- else if ((field->flags & TEP_FIELD_IS_POINTER) || strstr(field->name, "addr"))
+ } else if ((field->flags & TEP_FIELD_IS_POINTER) || strstr(field->name, "addr"))
arg->scnprintf = SCA_PTR;
else if (strcmp(field->type, "pid_t") == 0)
arg->scnprintf = SCA_PID;
@@ -1782,6 +2034,9 @@ syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field
* 7 unsigned long
*/
arg->scnprintf = SCA_FD;
+ } else if (strstr(field->type, "enum") && use_btf != NULL) {
+ *use_btf = true;
+ arg->strtoul = STUL_BTF_TYPE;
} else {
const struct syscall_arg_fmt *fmt =
syscall_arg_fmt__find_by_name(field->name);
@@ -1798,7 +2053,8 @@ syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field
static int syscall__set_arg_fmts(struct syscall *sc)
{
- struct tep_format_field *last_field = syscall_arg_fmt__init_array(sc->arg_fmt, sc->args);
+ struct tep_format_field *last_field = syscall_arg_fmt__init_array(sc->arg_fmt, sc->args,
+ &sc->use_btf);
if (last_field)
sc->args_size = last_field->offset + last_field->size;
@@ -1811,6 +2067,7 @@ static int trace__read_syscall_info(struct trace *trace, int id)
char tp_name[128];
struct syscall *sc;
const char *name = syscalltbl__name(trace->sctbl, id);
+ int err;
#ifdef HAVE_SYSCALL_TABLE_SUPPORT
if (trace->syscalls.table == NULL) {
@@ -1883,15 +2140,21 @@ static int trace__read_syscall_info(struct trace *trace, int id)
sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
sc->is_open = !strcmp(name, "open") || !strcmp(name, "openat");
- return syscall__set_arg_fmts(sc);
+ err = syscall__set_arg_fmts(sc);
+
+ /* after calling syscall__set_arg_fmts() we'll know whether use_btf is true */
+ if (sc->use_btf)
+ trace__load_vmlinux_btf(trace);
+
+ return err;
}
-static int evsel__init_tp_arg_scnprintf(struct evsel *evsel)
+static int evsel__init_tp_arg_scnprintf(struct evsel *evsel, bool *use_btf)
{
struct syscall_arg_fmt *fmt = evsel__syscall_arg_fmt(evsel);
if (fmt != NULL) {
- syscall_arg_fmt__init_array(fmt, evsel->tp_format->format.fields);
+ syscall_arg_fmt__init_array(fmt, evsel->tp_format->format.fields, use_btf);
return 0;
}
@@ -2050,7 +2313,7 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
unsigned char *args, void *augmented_args, int augmented_args_size,
struct trace *trace, struct thread *thread)
{
- size_t printed = 0;
+ size_t printed = 0, btf_printed;
unsigned long val;
u8 bit = 1;
struct syscall_arg arg = {
@@ -2066,6 +2329,7 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
.show_string_prefix = trace->show_string_prefix,
};
struct thread_trace *ttrace = thread__priv(thread);
+ void *default_scnprintf;
/*
* Things like fcntl will set this in its 'cmd' formatter to pick the
@@ -2093,9 +2357,13 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
/*
* Suppress this argument if its value is zero and show_zero
* property isn't set.
+ *
+ * If it has a BTF type, then override the zero suppression knob
+ * as the common case is for zero in an enum to have an associated entry.
*/
if (val == 0 && !trace->show_zeros &&
- !(sc->arg_fmt && sc->arg_fmt[arg.idx].show_zero))
+ !(sc->arg_fmt && sc->arg_fmt[arg.idx].show_zero) &&
+ !(sc->arg_fmt && sc->arg_fmt[arg.idx].strtoul == STUL_BTF_TYPE))
continue;
printed += scnprintf(bf + printed, size - printed, "%s", printed ? ", " : "");
@@ -2103,6 +2371,17 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
if (trace->show_arg_names)
printed += scnprintf(bf + printed, size - printed, "%s: ", field->name);
+ default_scnprintf = sc->arg_fmt[arg.idx].scnprintf;
+
+ if (trace->force_btf || default_scnprintf == NULL || default_scnprintf == SCA_PTR) {
+ btf_printed = trace__btf_scnprintf(trace, &arg, bf + printed,
+ size - printed, val, field->type);
+ if (btf_printed) {
+ printed += btf_printed;
+ continue;
+ }
+ }
+
printed += syscall_arg_fmt__scnprintf_val(&sc->arg_fmt[arg.idx],
bf + printed, size - printed, &arg, val);
}
@@ -2749,7 +3028,7 @@ static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel,
size_t size = sizeof(bf);
struct tep_format_field *field = evsel->tp_format->format.fields;
struct syscall_arg_fmt *arg = __evsel__syscall_arg_fmt(evsel);
- size_t printed = 0;
+ size_t printed = 0, btf_printed;
unsigned long val;
u8 bit = 1;
struct syscall_arg syscall_arg = {
@@ -2791,7 +3070,7 @@ static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel,
val = syscall_arg_fmt__mask_val(arg, &syscall_arg, val);
/* Suppress this argument if its value is zero and show_zero property isn't set. */
- if (val == 0 && !trace->show_zeros && !arg->show_zero)
+ if (val == 0 && !trace->show_zeros && !arg->show_zero && arg->strtoul != STUL_BTF_TYPE)
continue;
printed += scnprintf(bf + printed, size - printed, "%s", printed ? ", " : "");
@@ -2799,6 +3078,12 @@ static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel,
if (trace->show_arg_names)
printed += scnprintf(bf + printed, size - printed, "%s: ", field->name);
+ btf_printed = trace__btf_scnprintf(trace, &syscall_arg, bf + printed, size - printed, val, field->type);
+ if (btf_printed) {
+ printed += btf_printed;
+ continue;
+ }
+
printed += syscall_arg_fmt__scnprintf_val(arg, bf + printed, size - printed, &syscall_arg, val);
}
@@ -3009,7 +3294,7 @@ static void trace__set_base_time(struct trace *trace,
trace->base_time = sample->time;
}
-static int trace__process_sample(struct perf_tool *tool,
+static int trace__process_sample(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct evsel *evsel,
@@ -3276,6 +3561,23 @@ out_enomem:
}
#ifdef HAVE_BPF_SKEL
+static int syscall_arg_fmt__cache_btf_struct(struct syscall_arg_fmt *arg_fmt, struct btf *btf, char *type)
+{
+ int id;
+
+ if (arg_fmt->type != NULL)
+ return -1;
+
+ id = btf__find_by_name(btf, type);
+ if (id < 0)
+ return -1;
+
+ arg_fmt->type = btf__type_by_id(btf, id);
+ arg_fmt->type_id = id;
+
+ return 0;
+}
+
static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name)
{
struct bpf_program *pos, *prog = NULL;
@@ -3351,6 +3653,91 @@ static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id)
return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->skel->progs.syscall_unaugmented);
}
+static int trace__bpf_sys_enter_beauty_map(struct trace *trace, int key, unsigned int *beauty_array)
+{
+ struct tep_format_field *field;
+ struct syscall *sc = trace__syscall_info(trace, NULL, key);
+ const struct btf_type *bt;
+ char *struct_offset, *tmp, name[32];
+ bool can_augment = false;
+ int i, cnt;
+
+ if (sc == NULL)
+ return -1;
+
+ trace__load_vmlinux_btf(trace);
+ if (trace->btf == NULL)
+ return -1;
+
+ for (i = 0, field = sc->args; field; ++i, field = field->next) {
+ // XXX We're only collecting pointer payloads _from_ user space
+ if (!sc->arg_fmt[i].from_user)
+ continue;
+
+ struct_offset = strstr(field->type, "struct ");
+ if (struct_offset == NULL)
+ struct_offset = strstr(field->type, "union ");
+ else
+ struct_offset++; // "union" is shorter
+
+ if (field->flags & TEP_FIELD_IS_POINTER && struct_offset) { /* struct or union (think BPF's attr arg) */
+ struct_offset += 6;
+
+ /* for 'struct foo *', we only want 'foo' */
+ for (tmp = struct_offset, cnt = 0; *tmp != ' ' && *tmp != '\0'; ++tmp, ++cnt) {
+ }
+
+ strncpy(name, struct_offset, cnt);
+ name[cnt] = '\0';
+
+ /* cache struct's btf_type and type_id */
+ if (syscall_arg_fmt__cache_btf_struct(&sc->arg_fmt[i], trace->btf, name))
+ continue;
+
+ bt = sc->arg_fmt[i].type;
+ beauty_array[i] = bt->size;
+ can_augment = true;
+ } else if (field->flags & TEP_FIELD_IS_POINTER && /* string */
+ strcmp(field->type, "const char *") == 0 &&
+ (strstr(field->name, "name") ||
+ strstr(field->name, "path") ||
+ strstr(field->name, "file") ||
+ strstr(field->name, "root") ||
+ strstr(field->name, "key") ||
+ strstr(field->name, "special") ||
+ strstr(field->name, "type") ||
+ strstr(field->name, "description"))) {
+ beauty_array[i] = 1;
+ can_augment = true;
+ } else if (field->flags & TEP_FIELD_IS_POINTER && /* buffer */
+ strstr(field->type, "char *") &&
+ (strstr(field->name, "buf") ||
+ strstr(field->name, "val") ||
+ strstr(field->name, "msg"))) {
+ int j;
+ struct tep_format_field *field_tmp;
+
+ /* find the size of the buffer that appears in pairs with buf */
+ for (j = 0, field_tmp = sc->args; field_tmp; ++j, field_tmp = field_tmp->next) {
+ if (!(field_tmp->flags & TEP_FIELD_IS_POINTER) && /* only integers */
+ (strstr(field_tmp->name, "count") ||
+ strstr(field_tmp->name, "siz") || /* size, bufsiz */
+ (strstr(field_tmp->name, "len") && strcmp(field_tmp->name, "filename")))) {
+ /* filename's got 'len' in it, we don't want that */
+ beauty_array[i] = -(j + 1);
+ can_augment = true;
+ break;
+ }
+ }
+ }
+ }
+
+ if (can_augment)
+ return 0;
+
+ return -1;
+}
+
static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc)
{
struct tep_format_field *field, *candidate_field;
@@ -3455,7 +3842,9 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
{
int map_enter_fd = bpf_map__fd(trace->skel->maps.syscalls_sys_enter);
int map_exit_fd = bpf_map__fd(trace->skel->maps.syscalls_sys_exit);
+ int beauty_map_fd = bpf_map__fd(trace->skel->maps.beauty_map_enter);
int err = 0;
+ unsigned int beauty_array[6];
for (int i = 0; i < trace->sctbl->syscalls.nr_entries; ++i) {
int prog_fd, key = syscalltbl__id_at_idx(trace->sctbl, i);
@@ -3474,6 +3863,15 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
err = bpf_map_update_elem(map_exit_fd, &key, &prog_fd, BPF_ANY);
if (err)
break;
+
+ /* use beauty_map to tell BPF how many bytes to collect, set beauty_map's value here */
+ memset(beauty_array, 0, sizeof(beauty_array));
+ err = trace__bpf_sys_enter_beauty_map(trace, key, (unsigned int *)beauty_array);
+ if (err)
+ continue;
+ err = bpf_map_update_elem(beauty_map_fd, &key, beauty_array, BPF_ANY);
+ if (err)
+ break;
}
/*
@@ -3680,7 +4078,8 @@ static int ordered_events__deliver_event(struct ordered_events *oe,
return __trace__deliver_event(trace, event->event);
}
-static struct syscall_arg_fmt *evsel__find_syscall_arg_fmt_by_name(struct evsel *evsel, char *arg)
+static struct syscall_arg_fmt *evsel__find_syscall_arg_fmt_by_name(struct evsel *evsel, char *arg,
+ char **type)
{
struct tep_format_field *field;
struct syscall_arg_fmt *fmt = __evsel__syscall_arg_fmt(evsel);
@@ -3689,13 +4088,15 @@ static struct syscall_arg_fmt *evsel__find_syscall_arg_fmt_by_name(struct evsel
return NULL;
for (field = evsel->tp_format->format.fields; field; field = field->next, ++fmt)
- if (strcmp(field->name, arg) == 0)
+ if (strcmp(field->name, arg) == 0) {
+ *type = field->type;
return fmt;
+ }
return NULL;
}
-static int trace__expand_filter(struct trace *trace __maybe_unused, struct evsel *evsel)
+static int trace__expand_filter(struct trace *trace, struct evsel *evsel)
{
char *tok, *left = evsel->filter, *new_filter = evsel->filter;
@@ -3728,14 +4129,14 @@ static int trace__expand_filter(struct trace *trace __maybe_unused, struct evsel
struct syscall_arg_fmt *fmt;
int left_size = tok - left,
right_size = right_end - right;
- char arg[128];
+ char arg[128], *type;
while (isspace(left[left_size - 1]))
--left_size;
scnprintf(arg, sizeof(arg), "%.*s", left_size, left);
- fmt = evsel__find_syscall_arg_fmt_by_name(evsel, arg);
+ fmt = evsel__find_syscall_arg_fmt_by_name(evsel, arg, &type);
if (fmt == NULL) {
pr_err("\"%s\" not found in \"%s\", can't set filter \"%s\"\n",
arg, evsel->name, evsel->filter);
@@ -3748,6 +4149,9 @@ static int trace__expand_filter(struct trace *trace __maybe_unused, struct evsel
if (fmt->strtoul) {
u64 val;
struct syscall_arg syscall_arg = {
+ .trace = trace,
+ .fmt = fmt,
+ .type_name = type,
.parm = fmt->parm,
};
@@ -3959,7 +4363,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
err = trace__expand_filters(trace, &evsel);
if (err)
goto out_delete_evlist;
- err = evlist__apply_filters(evlist, &evsel);
+ err = evlist__apply_filters(evlist, &evsel, &trace->opts.target);
if (err < 0)
goto out_error_apply_filters;
@@ -4451,7 +4855,7 @@ static void evsel__set_syscall_arg_fmt(struct evsel *evsel, const char *name)
}
}
-static int evlist__set_syscall_tp_fields(struct evlist *evlist)
+static int evlist__set_syscall_tp_fields(struct evlist *evlist, bool *use_btf)
{
struct evsel *evsel;
@@ -4460,7 +4864,7 @@ static int evlist__set_syscall_tp_fields(struct evlist *evlist)
continue;
if (strcmp(evsel->tp_format->system, "syscalls")) {
- evsel__init_tp_arg_scnprintf(evsel);
+ evsel__init_tp_arg_scnprintf(evsel, use_btf);
continue;
}
@@ -4781,6 +5185,8 @@ int cmd_trace(int argc, const char **argv)
OPT_INTEGER('D', "delay", &trace.opts.target.initial_delay,
"ms to wait before starting measurement after program "
"start"),
+ OPT_BOOLEAN(0, "force-btf", &trace.force_btf, "Prefer btf_dump general pretty printer"
+ "to customized ones"),
OPTS_EVSWITCH(&trace.evswitch),
OPT_END()
};
@@ -4938,11 +5344,16 @@ skip_augmentation:
}
if (trace.evlist->core.nr_entries > 0) {
+ bool use_btf = false;
+
evlist__set_default_evsel_handler(trace.evlist, trace__event_handler);
- if (evlist__set_syscall_tp_fields(trace.evlist)) {
+ if (evlist__set_syscall_tp_fields(trace.evlist, &use_btf)) {
perror("failed to set syscalls:* tracepoint fields");
goto out;
}
+
+ if (use_btf)
+ trace__load_vmlinux_btf(&trace);
}
if (trace.sort_events) {
diff --git a/tools/perf/builtin-version.c b/tools/perf/builtin-version.c
index 398aa53e9e2e..e149d96c6dc5 100644
--- a/tools/perf/builtin-version.c
+++ b/tools/perf/builtin-version.c
@@ -46,45 +46,18 @@ static void status_print(const char *name, const char *macro,
printf(" # %s\n", macro);
}
-#define STATUS(__d, __m) \
-do { \
- if (IS_BUILTIN(__d)) \
- status_print(#__m, #__d, "on"); \
- else \
- status_print(#__m, #__d, "OFF"); \
+#define STATUS(feature) \
+do { \
+ if (feature.is_builtin) \
+ status_print(feature.name, feature.macro, "on"); \
+ else \
+ status_print(feature.name, feature.macro, "OFF"); \
} while (0)
static void library_status(void)
{
- STATUS(HAVE_DWARF_SUPPORT, dwarf);
- STATUS(HAVE_DWARF_GETLOCATIONS_SUPPORT, dwarf_getlocations);
-#ifndef HAVE_SYSCALL_TABLE_SUPPORT
- STATUS(HAVE_LIBAUDIT_SUPPORT, libaudit);
-#endif
- STATUS(HAVE_SYSCALL_TABLE_SUPPORT, syscall_table);
- STATUS(HAVE_LIBBFD_SUPPORT, libbfd);
- STATUS(HAVE_DEBUGINFOD_SUPPORT, debuginfod);
- STATUS(HAVE_LIBELF_SUPPORT, libelf);
- STATUS(HAVE_LIBNUMA_SUPPORT, libnuma);
- STATUS(HAVE_LIBNUMA_SUPPORT, numa_num_possible_cpus);
- STATUS(HAVE_LIBPERL_SUPPORT, libperl);
- STATUS(HAVE_LIBPYTHON_SUPPORT, libpython);
- STATUS(HAVE_SLANG_SUPPORT, libslang);
- STATUS(HAVE_LIBCRYPTO_SUPPORT, libcrypto);
- STATUS(HAVE_LIBUNWIND_SUPPORT, libunwind);
- STATUS(HAVE_DWARF_SUPPORT, libdw-dwarf-unwind);
- STATUS(HAVE_LIBCAPSTONE_SUPPORT, libcapstone);
- STATUS(HAVE_ZLIB_SUPPORT, zlib);
- STATUS(HAVE_LZMA_SUPPORT, lzma);
- STATUS(HAVE_AUXTRACE_SUPPORT, get_cpuid);
- STATUS(HAVE_LIBBPF_SUPPORT, bpf);
- STATUS(HAVE_AIO_SUPPORT, aio);
- STATUS(HAVE_ZSTD_SUPPORT, zstd);
- STATUS(HAVE_LIBPFM, libpfm4);
- STATUS(HAVE_LIBTRACEEVENT, libtraceevent);
- STATUS(HAVE_BPF_SKEL, bpf_skeletons);
- STATUS(HAVE_DWARF_UNWIND_SUPPORT, dwarf-unwind-support);
- STATUS(HAVE_CSTRACE_SUPPORT, libopencsd);
+ for (int i = 0; supported_features[i].name; ++i)
+ STATUS(supported_features[i]);
}
int cmd_version(int argc, const char **argv)
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index f4375deabfa3..94f4b3769bf7 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -2,6 +2,22 @@
#ifndef BUILTIN_H
#define BUILTIN_H
+#include <stddef.h>
+#include <linux/compiler.h>
+#include <tools/config.h>
+
+struct feature_status {
+ const char *name;
+ const char *macro;
+ int is_builtin;
+};
+
+#define FEATURE_STATUS(name_, macro_) { \
+ .name = name_, \
+ .macro = #macro_, \
+ .is_builtin = IS_BUILTIN(macro_) }
+
+extern struct feature_status supported_features[];
struct cmdnames;
void list_common_cmds_help(void);
@@ -11,6 +27,7 @@ int cmd_annotate(int argc, const char **argv);
int cmd_bench(int argc, const char **argv);
int cmd_buildid_cache(int argc, const char **argv);
int cmd_buildid_list(int argc, const char **argv);
+int cmd_check(int argc, const char **argv);
int cmd_config(int argc, const char **argv);
int cmd_c2c(int argc, const char **argv);
int cmd_diff(int argc, const char **argv);
diff --git a/tools/perf/check-header_ignore_hunks/lib/list_sort.c b/tools/perf/check-header_ignore_hunks/lib/list_sort.c
new file mode 100644
index 000000000000..32d98cb34f80
--- /dev/null
+++ b/tools/perf/check-header_ignore_hunks/lib/list_sort.c
@@ -0,0 +1,31 @@
+@@ -1,5 +1,6 @@
+ // SPDX-License-Identifier: GPL-2.0
+ #include <linux/kernel.h>
++#include <linux/bug.h>
+ #include <linux/compiler.h>
+ #include <linux/export.h>
+ #include <linux/string.h>
+@@ -52,6 +53,7 @@
+ struct list_head *a, struct list_head *b)
+ {
+ struct list_head *tail = head;
++ u8 count = 0;
+
+ for (;;) {
+ /* if equal, take 'a' -- important for sort stability */
+@@ -77,6 +79,15 @@
+ /* Finish linking remainder of list b on to tail */
+ tail->next = b;
+ do {
++ /*
++ * If the merge is highly unbalanced (e.g. the input is
++ * already sorted), this loop may run many iterations.
++ * Continue callbacks to the client even though no
++ * element comparison is needed, so the client's cmp()
++ * routine can invoke cond_resched() periodically.
++ */
++ if (unlikely(!++count))
++ cmp(priv, b, b);
+ b->prev = tail;
+ tail = b;
+ b = b->next;
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 672421b858ac..29adbb423327 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -136,6 +136,30 @@ beauty_check () {
check_2 "tools/perf/trace/beauty/$file" "$file" "$@"
}
+check_ignore_some_hunks () {
+ orig_file="$1"
+ tools_file="tools/$orig_file"
+ hunks_to_ignore="tools/perf/check-header_ignore_hunks/$orig_file"
+
+ if [ ! -f "$hunks_to_ignore" ]; then
+ echo "$hunks_to_ignore not found. Skipping $orig_file check."
+ FAILURES+=(
+ "$tools_file $orig_file"
+ )
+ return
+ fi
+
+ cmd="diff -u \"$tools_file\" \"$orig_file\" | grep -vf \"$hunks_to_ignore\" | wc -l | grep -qw 0"
+
+ if [ -f "$orig_file" ] && ! eval "$cmd"
+ then
+ FAILURES+=(
+ "$tools_file $orig_file"
+ )
+ fi
+}
+
+
# Check if we have the kernel headers (tools/perf/../../include), else
# we're probably on a detached tarball, so no point in trying to check
# differences.
@@ -163,15 +187,15 @@ check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/ex
check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"'
check arch/x86/include/asm/amd-ibs.h '-I "^#include [<\"]\(asm/\)*msr-index.h"'
check arch/arm64/include/asm/cputype.h '-I "^#include [<\"]\(asm/\)*sysreg.h"'
-check include/asm-generic/unaligned.h '-I "^#include <linux/unaligned/packed_struct.h>" -I "^#include <asm/byteorder.h>" -I "^#pragma GCC diagnostic"'
+check include/linux/unaligned.h '-I "^#include <linux/unaligned/packed_struct.h>" -I "^#include <asm/byteorder.h>" -I "^#pragma GCC diagnostic"'
check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"'
check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"'
check include/linux/build_bug.h '-I "^#\(ifndef\|endif\)\( \/\/\)* static_assert$"'
check include/linux/ctype.h '-I "isdigit("'
check lib/ctype.c '-I "^EXPORT_SYMBOL" -I "^#include <linux/export.h>" -B'
-check lib/list_sort.c '-I "^#include <linux/bug.h>"'
# diff non-symmetric files
+check_2 tools/perf/arch/x86/entry/syscalls/syscall_32.tbl arch/x86/entry/syscalls/syscall_32.tbl
check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl
check_2 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl
check_2 tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl
@@ -186,6 +210,10 @@ done
check_2 tools/perf/util/hashmap.h tools/lib/bpf/hashmap.h
check_2 tools/perf/util/hashmap.c tools/lib/bpf/hashmap.c
+# Files with larger differences
+
+check_ignore_some_hunks lib/list_sort.c
+
cd tools/perf || exit
if [ ${#FAILURES[@]} -gt 0 ]
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index bd3f80b5bb46..4def800f4089 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -52,6 +52,7 @@ static struct cmd_struct commands[] = {
{ "archive", NULL, 0 },
{ "buildid-cache", cmd_buildid_cache, 0 },
{ "buildid-list", cmd_buildid_list, 0 },
+ { "check", cmd_check, 0 },
{ "config", cmd_config, 0 },
{ "c2c", cmd_c2c, 0 },
{ "diff", cmd_diff, 0 },
diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build
index 1d18bb89402e..d941bc9d16e9 100644
--- a/tools/perf/pmu-events/Build
+++ b/tools/perf/pmu-events/Build
@@ -11,6 +11,8 @@ METRIC_TEST_PY = pmu-events/metric_test.py
EMPTY_PMU_EVENTS_C = pmu-events/empty-pmu-events.c
PMU_EVENTS_C = $(OUTPUT)pmu-events/pmu-events.c
METRIC_TEST_LOG = $(OUTPUT)pmu-events/metric_test.log
+TEST_EMPTY_PMU_EVENTS_C = $(OUTPUT)pmu-events/test-empty-pmu-events.c
+EMPTY_PMU_EVENTS_TEST_LOG = $(OUTPUT)pmu-events/empty-pmu-events.log
ifeq ($(JEVENTS_ARCH),)
JEVENTS_ARCH=$(SRCARCH)
@@ -31,7 +33,15 @@ $(METRIC_TEST_LOG): $(METRIC_TEST_PY) $(METRIC_PY)
$(call rule_mkdir)
$(Q)$(call echo-cmd,test)$(PYTHON) $< 2> $@ || (cat $@ && false)
-$(PMU_EVENTS_C): $(JSON) $(JSON_TEST) $(JEVENTS_PY) $(METRIC_PY) $(METRIC_TEST_LOG)
+$(TEST_EMPTY_PMU_EVENTS_C): $(JSON) $(JSON_TEST) $(JEVENTS_PY) $(METRIC_PY) $(METRIC_TEST_LOG)
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,gen)$(PYTHON) $(JEVENTS_PY) none none pmu-events/arch $@
+
+$(EMPTY_PMU_EVENTS_TEST_LOG): $(EMPTY_PMU_EVENTS_C) $(TEST_EMPTY_PMU_EVENTS_C)
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,test)diff -u $^ 2> $@ || (cat $@ && false)
+
+$(PMU_EVENTS_C): $(JSON) $(JSON_TEST) $(JEVENTS_PY) $(METRIC_PY) $(METRIC_TEST_LOG) $(EMPTY_PMU_EVENTS_TEST_LOG)
$(call rule_mkdir)
$(Q)$(call echo-cmd,gen)$(PYTHON) $(JEVENTS_PY) $(JEVENTS_ARCH) $(JEVENTS_MODEL) pmu-events/arch $@
endif
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/instruction.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/instruction.json
index 18d1f2f76a23..9fe697d12fe0 100644
--- a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/instruction.json
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/instruction.json
@@ -78,9 +78,6 @@
"ArchStdEvent": "OP_RETIRED"
},
{
- "ArchStdEvent": "OP_SPEC"
- },
- {
"PublicDescription": "Operation speculatively executed, NOP",
"EventCode": "0x100",
"EventName": "NOP_SPEC",
diff --git a/tools/perf/pmu-events/arch/arm64/freescale/yitian710/sys/ali_drw.json b/tools/perf/pmu-events/arch/arm64/thead/yitian710/sys/ali_drw.json
index e21c469a8ef0..e21c469a8ef0 100644
--- a/tools/perf/pmu-events/arch/arm64/freescale/yitian710/sys/ali_drw.json
+++ b/tools/perf/pmu-events/arch/arm64/thead/yitian710/sys/ali_drw.json
diff --git a/tools/perf/pmu-events/arch/arm64/freescale/yitian710/sys/metrics.json b/tools/perf/pmu-events/arch/arm64/thead/yitian710/sys/metrics.json
index bc865b374b6a..bc865b374b6a 100644
--- a/tools/perf/pmu-events/arch/arm64/freescale/yitian710/sys/metrics.json
+++ b/tools/perf/pmu-events/arch/arm64/thead/yitian710/sys/metrics.json
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/cache.json b/tools/perf/pmu-events/arch/powerpc/power10/cache.json
index 839ae26945fb..b7e0be09ff57 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/cache.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/cache.json
@@ -1,12 +1,22 @@
[
{
+ "EventCode": "0x1002C",
+ "EventName": "PM_LD_PREFETCH_CACHE_LINE_MISS",
+ "BriefDescription": "The L1 cache was reloaded with a line that fulfills a prefetch request."
+ },
+ {
+ "EventCode": "0x200FD",
+ "EventName": "PM_L1_ICACHE_MISS",
+ "BriefDescription": "Demand instruction cache miss."
+ },
+ {
+ "EventCode": "0x30068",
+ "EventName": "PM_L1_ICACHE_RELOADED_PREF",
+ "BriefDescription": "Counts all instruction cache prefetch reloads (includes demand turned into prefetch)."
+ },
+ {
"EventCode": "0x300F4",
"EventName": "PM_RUN_INST_CMPL_CONC",
"BriefDescription": "PowerPC instruction completed by this thread when all threads in the core had the run-latch set."
- },
- {
- "EventCode": "0x400F6",
- "EventName": "PM_BR_MPRED_CMPL",
- "BriefDescription": "A mispredicted branch completed. Includes direction and target."
}
]
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/datasource.json b/tools/perf/pmu-events/arch/powerpc/power10/datasource.json
index 0eeaaf1a95b8..a5d5be35b5e6 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/datasource.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/datasource.json
@@ -1,5 +1,15 @@
[
{
+ "EventCode": "0x1505E",
+ "EventName": "PM_LD_HIT_L1",
+ "BriefDescription": "Load finished without experiencing an L1 miss."
+ },
+ {
+ "EventCode": "0x100FC",
+ "EventName": "PM_LD_REF_L1",
+ "BriefDescription": "All L1 D cache load references counted at finish, gated by reject. In P9 and earlier this event counted only cacheable loads but in P10 both cacheable and non-cacheable loads are included."
+ },
+ {
"EventCode": "0x200FE",
"EventName": "PM_DATA_FROM_L2MISS",
"BriefDescription": "The processor's L1 data cache was reloaded from a source beyond the local core's L2 due to a demand miss."
@@ -10,11 +20,41 @@
"BriefDescription": "The processor's L1 data cache was reloaded from beyond the local core's L3 due to a demand miss."
},
{
+ "EventCode": "0x400F0",
+ "EventName": "PM_LD_DEMAND_MISS_L1_FIN",
+ "BriefDescription": "Load missed L1, counted at finish time."
+ },
+ {
"EventCode": "0x400FE",
"EventName": "PM_DATA_FROM_MEMORY",
"BriefDescription": "The processor's data cache was reloaded from local, remote, or distant memory due to a demand miss."
},
{
+ "EventCode": "0x0000004080",
+ "EventName": "PM_INST_FROM_L1",
+ "BriefDescription": "An instruction fetch hit in the L1. Each fetch group contains 8 instructions. The same line can hit 4 times if 32 sequential instructions are fetched."
+ },
+ {
+ "EventCode": "0x000000026080",
+ "EventName": "PM_L2_LD_MISS",
+ "BriefDescription": "All successful D-Side Load dispatches for this thread that missed in the L2. Since the event happens in a 2:1 clock domain and is time-sliced across all 4 threads, the event count should be multiplied by 2."
+ },
+ {
+ "EventCode": "0x000000026880",
+ "EventName": "PM_L2_ST_MISS",
+ "BriefDescription": "All successful D-Side Store dispatches for this thread that missed in the L2. Since the event happens in a 2:1 clock domain and is time-sliced across all 4 threads, the event count should be multiplied by 2."
+ },
+ {
+ "EventCode": "0x010000046880",
+ "EventName": "PM_L2_ST_HIT",
+ "BriefDescription": "All successful D-side store dispatches for this thread that were L2 hits. Since the event happens in a 2:1 clock domain and is time-sliced across all 4 threads, the event count should be multiplied by 2."
+ },
+ {
+ "EventCode": "0x000000036880",
+ "EventName": "PM_L2_INST_MISS",
+ "BriefDescription": "All successful instruction (demand and prefetch) dispatches for this thread that missed in the L2. Since the event happens in a 2:1 clock domain and is time-sliced across all 4 threads, the event count should be multiplied by 2."
+ },
+ {
"EventCode": "0x000300000000C040",
"EventName": "PM_INST_FROM_L2",
"BriefDescription": "The processor's instruction cache was reloaded from the local core's L2 due to a demand miss."
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/frontend.json b/tools/perf/pmu-events/arch/powerpc/power10/frontend.json
index 5977f5e64212..b6998987ab75 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/frontend.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/frontend.json
@@ -75,18 +75,48 @@
"BriefDescription": "Cycles in which an instruction or group of instructions were cancelled after being issued. This event increments once per occurrence, regardless of how many instructions are included in the issue group."
},
{
+ "EventCode": "0x44054",
+ "EventName": "PM_VECTOR_LD_CMPL",
+ "BriefDescription": "Vector load instruction completed."
+ },
+ {
"EventCode": "0x44056",
"EventName": "PM_VECTOR_ST_CMPL",
"BriefDescription": "Vector store instruction completed."
},
{
+ "EventCode": "0x4D05E",
+ "EventName": "PM_BR_CMPL",
+ "BriefDescription": "A branch completed. All branches are included."
+ },
+ {
"EventCode": "0x4E054",
"EventName": "PM_DTLB_HIT_1G",
"BriefDescription": "Data TLB hit (DERAT reload) page size 1G. Implies radix translation. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
+ "EventCode": "0x400F6",
+ "EventName": "PM_BR_MPRED_CMPL",
+ "BriefDescription": "A mispredicted branch completed. Includes direction and target."
+ },
+ {
"EventCode": "0x400FC",
"EventName": "PM_ITLB_MISS",
"BriefDescription": "Instruction TLB reload (after a miss), all page sizes. Includes only demand misses."
+ },
+ {
+ "EventCode": "0x00000048B4",
+ "EventName": "PM_BR_TKN_UNCOND_FIN",
+ "BriefDescription": "An unconditional branch finished. All unconditional branches are taken."
+ },
+ {
+ "EventCode": "0x00000040B8",
+ "EventName": "PM_PRED_BR_TKN_COND_DIR",
+ "BriefDescription": "A conditional branch finished with correctly predicted direction. Resolved taken."
+ },
+ {
+ "EventCode": "0x00000048B8",
+ "EventName": "PM_PRED_BR_NTKN_COND_DIR",
+ "BriefDescription": "A conditional branch finished with correctly predicted direction. Resolved not taken."
}
]
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/locks.json b/tools/perf/pmu-events/arch/powerpc/power10/locks.json
index b5a0d6521963..a8ea4d0def1a 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/locks.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/locks.json
@@ -5,8 +5,18 @@
"BriefDescription": "Conditional store instruction (STCX) failed. LARX and STCX are instructions used to acquire a lock."
},
{
+ "EventCode": "0x2E014",
+ "EventName": "PM_STCX_FIN",
+ "BriefDescription": "Conditional store instruction (STCX) finished. LARX and STCX are instructions used to acquire a lock."
+ },
+ {
"EventCode": "0x4E050",
"EventName": "PM_STCX_PASS_FIN",
"BriefDescription": "Conditional store instruction (STCX) passed. LARX and STCX are instructions used to acquire a lock."
+ },
+ {
+ "EventCode": "0x000000C8B8",
+ "EventName": "PM_STCX_SUCCESS_CMPL",
+ "BriefDescription": "STCX instructions that completed successfully. Specifically, counts only when a pass status is returned from the nest."
}
]
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/memory.json b/tools/perf/pmu-events/arch/powerpc/power10/memory.json
index 885262957beb..0d7191b3f2c6 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/memory.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/memory.json
@@ -70,6 +70,11 @@
"BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[30:42]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
},
{
+ "EventCode": "0x3F04A",
+ "EventName": "PM_LSU_ST5_FIN",
+ "BriefDescription": "LSU Finished an internal operation in ST2 port."
+ },
+ {
"EventCode": "0x3C054",
"EventName": "PM_DERAT_MISS_16M",
"BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16M. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
@@ -108,5 +113,30 @@
"EventCode": "0x4C05A",
"EventName": "PM_DTLB_MISS_1G",
"BriefDescription": "Data TLB reload (after a miss) page size 1G. Implies radix translation was used. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
+ },
+ {
+ "EventCode": "0x000000F880",
+ "EventName": "PM_SNOOP_TLBIE_CYC",
+ "BriefDescription": "Cycles in which TLBIE snoops are executed in the LSU."
+ },
+ {
+ "EventCode": "0x000000F084",
+ "EventName": "PM_SNOOP_TLBIE_CACHE_WALK_CYC",
+ "BriefDescription": "TLBIE snoop cycles in which the data cache is being walked."
+ },
+ {
+ "EventCode": "0x000000F884",
+ "EventName": "PM_SNOOP_TLBIE_WAIT_ST_CYC",
+ "BriefDescription": "TLBIE snoop cycles in which older stores are still draining."
+ },
+ {
+ "EventCode": "0x000000F088",
+ "EventName": "PM_SNOOP_TLBIE_WAIT_LD_CYC",
+ "BriefDescription": "TLBIE snoop cycles in which older loads are still draining."
+ },
+ {
+ "EventCode": "0x000000F08C",
+ "EventName": "PM_SNOOP_TLBIE_WAIT_MMU_CYC",
+ "BriefDescription": "TLBIE snoop cycles in which the Load-Store unit is waiting for the MMU to finish invalidation."
}
]
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/others.json b/tools/perf/pmu-events/arch/powerpc/power10/others.json
index fcf8a8ebe7bd..1bf802076ee0 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/others.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/others.json
@@ -1,112 +1,72 @@
[
{
- "EventCode": "0x1002C",
- "EventName": "PM_LD_PREFETCH_CACHE_LINE_MISS",
- "BriefDescription": "The L1 cache was reloaded with a line that fulfills a prefetch request."
- },
- {
- "EventCode": "0x1505E",
- "EventName": "PM_LD_HIT_L1",
- "BriefDescription": "Load finished without experiencing an L1 miss."
- },
- {
- "EventCode": "0x1F056",
- "EventName": "PM_DISP_SS0_2_INSTR_CYC",
- "BriefDescription": "Cycles in which Superslice 0 dispatches either 1 or 2 instructions."
- },
- {
- "EventCode": "0x1F05A",
- "EventName": "PM_DISP_HELD_SYNC_CYC",
- "BriefDescription": "Cycles dispatch is held because of a synchronizing instruction that requires the ICT to be empty before dispatch."
- },
- {
"EventCode": "0x10066",
"EventName": "PM_ADJUNCT_CYC",
"BriefDescription": "Cycles in which the thread is in Adjunct state. MSR[S HV PR] bits = 011."
},
{
- "EventCode": "0x100FC",
- "EventName": "PM_LD_REF_L1",
- "BriefDescription": "All L1 D cache load references counted at finish, gated by reject. In P9 and earlier this event counted only cacheable loads but in P10 both cacheable and non-cacheable loads are included."
- },
- {
"EventCode": "0x2E010",
"EventName": "PM_ADJUNCT_INST_CMPL",
"BriefDescription": "PowerPC instruction completed while the thread was in Adjunct state."
},
{
- "EventCode": "0x2E014",
- "EventName": "PM_STCX_FIN",
- "BriefDescription": "Conditional store instruction (STCX) finished. LARX and STCX are instructions used to acquire a lock."
- },
- {
- "EventCode": "0x2F054",
- "EventName": "PM_DISP_SS1_2_INSTR_CYC",
- "BriefDescription": "Cycles in which Superslice 1 dispatches either 1 or 2 instructions."
- },
- {
- "EventCode": "0x2F056",
- "EventName": "PM_DISP_SS1_4_INSTR_CYC",
- "BriefDescription": "Cycles in which Superslice 1 dispatches either 3 or 4 instructions."
- },
- {
"EventCode": "0x200F2",
"EventName": "PM_INST_DISP",
"BriefDescription": "PowerPC instruction dispatched."
},
{
- "EventCode": "0x200FD",
- "EventName": "PM_L1_ICACHE_MISS",
- "BriefDescription": "Demand instruction cache miss."
+ "EventCode": "0x300F6",
+ "EventName": "PM_LD_DEMAND_MISS_L1",
+ "BriefDescription": "The L1 cache was reloaded with a line that fulfills a demand miss request. Counted at reload time, before finish."
},
{
- "EventCode": "0x3F04A",
- "EventName": "PM_LSU_ST5_FIN",
- "BriefDescription": "LSU Finished an internal operation in ST2 port."
+ "EventCode": "0x40012",
+ "EventName": "PM_L1_ICACHE_RELOADED_ALL",
+ "BriefDescription": "Counts all instruction cache reloads includes demand, prefetch, prefetch turned into demand and demand turned into prefetch."
},
{
- "EventCode": "0x3405A",
- "EventName": "PM_PRIVILEGED_INST_CMPL",
- "BriefDescription": "PowerPC instruction completed while the thread was in Privileged state."
+ "EventCode": "0x00000038BC",
+ "EventName": "PM_ISYNC_CMPL",
+ "BriefDescription": "Isync completion count per thread."
},
{
- "EventCode": "0x3F054",
- "EventName": "PM_DISP_SS0_4_INSTR_CYC",
- "BriefDescription": "Cycles in which Superslice 0 dispatches either 3 or 4 instructions."
+ "EventCode": "0x000000C088",
+ "EventName": "PM_LD0_32B_FIN",
+ "BriefDescription": "256-bit load finished in the LD0 load execution unit."
},
{
- "EventCode": "0x3F056",
- "EventName": "PM_DISP_SS0_8_INSTR_CYC",
- "BriefDescription": "Cycles in which Superslice 0 dispatches either 5, 6, 7 or 8 instructions."
+ "EventCode": "0x000000C888",
+ "EventName": "PM_LD1_32B_FIN",
+ "BriefDescription": "256-bit load finished in the LD1 load execution unit."
},
{
- "EventCode": "0x30068",
- "EventName": "PM_L1_ICACHE_RELOADED_PREF",
- "BriefDescription": "Counts all instruction cache prefetch reloads (includes demand turned into prefetch)."
+ "EventCode": "0x000000C090",
+ "EventName": "PM_LD0_UNALIGNED_FIN",
+ "BriefDescription": "Load instructions in LD0 port that are either unaligned, or treated as unaligned and require an additional recycle through the pipeline using the load gather buffer. This typically adds about 10 cycles to the latency of the instruction. This includes loads that cross the 128 byte boundary, octword loads that are not aligned, and a special forward progress case of a load that does not hit in the L1 and crosses the 32 byte boundary and is launched NTC. Counted at finish time."
},
{
- "EventCode": "0x300F6",
- "EventName": "PM_LD_DEMAND_MISS_L1",
- "BriefDescription": "The L1 cache was reloaded with a line that fulfills a demand miss request. Counted at reload time, before finish."
+ "EventCode": "0x000000C890",
+ "EventName": "PM_LD1_UNALIGNED_FIN",
+ "BriefDescription": "Load instructions in LD1 port that are either unaligned, or treated as unaligned and require an additional recycle through the pipeline using the load gather buffer. This typically adds about 10 cycles to the latency of the instruction. This includes loads that cross the 128 byte boundary, octword loads that are not aligned, and a special forward progress case of a load that does not hit in the L1 and crosses the 32 byte boundary and is launched NTC. Counted at finish time."
},
{
- "EventCode": "0x40012",
- "EventName": "PM_L1_ICACHE_RELOADED_ALL",
- "BriefDescription": "Counts all instruction cache reloads includes demand, prefetch, prefetch turned into demand and demand turned into prefetch."
+ "EventCode": "0x000000C0A4",
+ "EventName": "PM_ST0_UNALIGNED_FIN",
+ "BriefDescription": "Store instructions in ST0 port that are either unaligned, or treated as unaligned and require an additional recycle through the pipeline. This typically adds about 10 cycles to the latency of the instruction. This only includes stores that cross the 128 byte boundary. Counted at finish time."
},
{
- "EventCode": "0x44054",
- "EventName": "PM_VECTOR_LD_CMPL",
- "BriefDescription": "Vector load instruction completed."
+ "EventCode": "0x000000C8A4",
+ "EventName": "PM_ST1_UNALIGNED_FIN",
+ "BriefDescription": "Store instructions in ST1 port that are either unaligned, or treated as unaligned and require an additional recycle through the pipeline. This typically adds about 10 cycles to the latency of the instruction. This only includes stores that cross the 128 byte boundary. Counted at finish time."
},
{
- "EventCode": "0x4D05E",
- "EventName": "PM_BR_CMPL",
- "BriefDescription": "A branch completed. All branches are included."
+ "EventCode": "0x000000D0B4",
+ "EventName": "PM_DC_PREF_STRIDED_CONF",
+ "BriefDescription": "A demand load referenced a line in an active strided prefetch stream. The stream could have been allocated through the hardware prefetch mechanism or through software."
},
{
- "EventCode": "0x400F0",
- "EventName": "PM_LD_DEMAND_MISS_L1_FIN",
- "BriefDescription": "Load missed L1, counted at finish time."
+ "EventCode": "0x0000004884",
+ "EventName": "PM_NO_FETCH_IBUF_FULL_CYC",
+ "BriefDescription": "Cycles in which no instructions are fetched because there is no room in the instruction buffers."
}
]
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json b/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json
index 21b23bb55d0d..940375d251cb 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json
@@ -95,11 +95,21 @@
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was a lwsync waiting to complete."
},
{
+ "EventCode": "0x1F056",
+ "EventName": "PM_DISP_SS0_2_INSTR_CYC",
+ "BriefDescription": "Cycles in which Superslice 0 dispatches either 1 or 2 instructions."
+ },
+ {
"EventCode": "0x1F058",
"EventName": "PM_DISP_HELD_CYC",
"BriefDescription": "Cycles dispatch is held."
},
{
+ "EventCode": "0x1F05A",
+ "EventName": "PM_DISP_HELD_SYNC_CYC",
+ "BriefDescription": "Cycles dispatch is held because of a synchronizing instruction that requires the ICT to be empty before dispatch."
+ },
+ {
"EventCode": "0x10064",
"EventName": "PM_DISP_STALL_IC_L2",
"BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2."
@@ -230,6 +240,16 @@
"BriefDescription": "Cycles in which the oldest instruction in the pipeline (NTC) finishes. Note that instructions can finish out of order, therefore not all the instructions that finish have a Next-to-complete status."
},
{
+ "EventCode": "0x2F054",
+ "EventName": "PM_DISP_SS1_2_INSTR_CYC",
+ "BriefDescription": "Cycles in which Superslice 1 dispatches either 1 or 2 instructions."
+ },
+ {
+ "EventCode": "0x2F056",
+ "EventName": "PM_DISP_SS1_4_INSTR_CYC",
+ "BriefDescription": "Cycles in which Superslice 1 dispatches either 3 or 4 instructions."
+ },
+ {
"EventCode": "0x20066",
"EventName": "PM_DISP_HELD_OTHER_CYC",
"BriefDescription": "Cycles dispatch is held for any other reason."
@@ -330,6 +350,16 @@
"BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L3."
},
{
+ "EventCode": "0x3F054",
+ "EventName": "PM_DISP_SS0_4_INSTR_CYC",
+ "BriefDescription": "Cycles in which Superslice 0 dispatches either 3 or 4 instructions."
+ },
+ {
+ "EventCode": "0x3F056",
+ "EventName": "PM_DISP_SS0_8_INSTR_CYC",
+ "BriefDescription": "Cycles in which Superslice 0 dispatches either 5, 6, 7 or 8 instructions."
+ },
+ {
"EventCode": "0x30060",
"EventName": "PM_DISP_HELD_XVFC_MAPPER_CYC",
"BriefDescription": "Cycles dispatch is held because the XVFC mapper/SRB was full."
@@ -458,5 +488,20 @@
"EventCode": "0x400F8",
"EventName": "PM_FLUSH",
"BriefDescription": "Flush (any type)."
+ },
+ {
+ "EventCode": "0x0B0000016080",
+ "EventName": "PM_L2_TLBIE_SLBIE_START",
+ "BriefDescription": "NCU Master received a TLBIE/SLBIEG/SLBIAG operation from the core. Event count should be multiplied by 2 since the data is coming from a 2:1 clock domain and the data is time sliced across all 4 threads."
+ },
+ {
+ "EventCode": "0x0B0000016880",
+ "EventName": "PM_L2_TLBIE_SLBIE_DELAY",
+ "BriefDescription": "Cycles when a TLBIE/SLBIEG/SLBIAG command was held in a hottemp condition by the NCU Master. Multiply this count by 1000 to obtain the total number of cycles. This can be divided by PM_L2_TLBIE_SLBIE_SENT to obtain the average time a TLBIE/SLBIEG/SLBIAG command was held. Event count should be multiplied by 2 since the data is coming from a 2:1 clock domain and the data is time sliced across all 4 threads."
+ },
+ {
+ "EventCode": "0x0B0000026880",
+ "EventName": "PM_L2_SNP_TLBIE_SLBIE_DELAY",
+ "BriefDescription": "Cycles when a TLBIE/SLBIEG/SLBIAG that targets this thread's LPAR was in flight while in a hottemp condition. Multiply this count by 1000 to obtain the total number of cycles. This can be divided by PM_L2_SNP_TLBIE_SLBIE_START to obtain the overall efficiency. Note: 'inflight' means SnpTLB has been sent to core(ie doesn't include when SnpTLB is in NCU waiting to be launched serially behind different SnpTLB). The NCU Snooper gets in a 'hottemp' delay window when it detects it is above its TLBIE/SLBIE threshold for process SnpTLBIE/SLBIE with this core. Event count should be multiplied by 2 since the data is coming from a 2:1 clock domain and the data is time sliced across all 4 threads."
}
]
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/pmc.json b/tools/perf/pmu-events/arch/powerpc/power10/pmc.json
index 0e0253d0e757..6f5b0e8fde12 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/pmc.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/pmc.json
@@ -105,6 +105,11 @@
"BriefDescription": "Processor cycles gated by the run latch."
},
{
+ "EventCode": "0x200F8",
+ "EventName": "PM_EXT_INT",
+ "BriefDescription": "Cycles an external interrupt was active."
+ },
+ {
"EventCode": "0x30010",
"EventName": "PM_PMC2_OVERFLOW",
"BriefDescription": "The event selected for PMC2 caused the event counter to overflow."
@@ -125,6 +130,11 @@
"BriefDescription": "The event selected for PMC6 caused the event counter to overflow."
},
{
+ "EventCode": "0x3405A",
+ "EventName": "PM_PRIVILEGED_INST_CMPL",
+ "BriefDescription": "PowerPC instruction completed while the thread was in Privileged state."
+ },
+ {
"EventCode": "0x3006C",
"EventName": "PM_RUN_CYC_SMT2_MODE",
"BriefDescription": "Cycles when this thread's run latch is set and the core is in SMT2 mode."
diff --git a/tools/perf/pmu-events/arch/riscv/andes/ax45/firmware.json b/tools/perf/pmu-events/arch/riscv/andes/ax45/firmware.json
index 9b4a032186a7..7149caec4f80 100644
--- a/tools/perf/pmu-events/arch/riscv/andes/ax45/firmware.json
+++ b/tools/perf/pmu-events/arch/riscv/andes/ax45/firmware.json
@@ -36,7 +36,7 @@
"ArchStdEvent": "FW_SFENCE_VMA_RECEIVED"
},
{
- "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED"
+ "ArchStdEvent": "FW_SFENCE_VMA_ASID_SENT"
},
{
"ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED"
diff --git a/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json b/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json
index a9939823b14b..0c9b9a2d2958 100644
--- a/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json
+++ b/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json
@@ -74,7 +74,7 @@
{
"PublicDescription": "Sent SFENCE.VMA with ASID request to other HART event",
"ConfigCode": "0x800000000000000c",
- "EventName": "FW_SFENCE_VMA_RECEIVED",
+ "EventName": "FW_SFENCE_VMA_ASID_SENT",
"BriefDescription": "Sent SFENCE.VMA with ASID request to other HART event"
},
{
diff --git a/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json b/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json
index 9b4a032186a7..7149caec4f80 100644
--- a/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json
+++ b/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json
@@ -36,7 +36,7 @@
"ArchStdEvent": "FW_SFENCE_VMA_RECEIVED"
},
{
- "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED"
+ "ArchStdEvent": "FW_SFENCE_VMA_ASID_SENT"
},
{
"ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED"
diff --git a/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json b/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json
index 9b4a032186a7..7149caec4f80 100644
--- a/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json
+++ b/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json
@@ -36,7 +36,7 @@
"ArchStdEvent": "FW_SFENCE_VMA_RECEIVED"
},
{
- "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED"
+ "ArchStdEvent": "FW_SFENCE_VMA_ASID_SENT"
},
{
"ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED"
diff --git a/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json
index 9b4a032186a7..7149caec4f80 100644
--- a/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json
+++ b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json
@@ -36,7 +36,7 @@
"ArchStdEvent": "FW_SFENCE_VMA_RECEIVED"
},
{
- "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED"
+ "ArchStdEvent": "FW_SFENCE_VMA_ASID_SENT"
},
{
"ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED"
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-cache.json b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-cache.json
index c9596e18ec09..6347eba48810 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-cache.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-cache.json
@@ -4577,7 +4577,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_CRD",
- "Filter": "config1=0x40233",
+ "Filter": "config1=0x4023300000000",
"PerPkg": "1",
"PublicDescription": "TOR Inserts : CRds issued by iA Cores that Hit the LLC : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
"UMask": "0x11",
@@ -4588,7 +4588,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_DRD",
- "Filter": "config1=0x40433",
+ "Filter": "config1=0x4043300000000",
"PerPkg": "1",
"PublicDescription": "TOR Inserts : DRds issued by iA Cores that Hit the LLC : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
"UMask": "0x11",
@@ -4599,7 +4599,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_LlcPrefCRD",
- "Filter": "config1=0x4b233",
+ "Filter": "config1=0x4b23300000000",
"PerPkg": "1",
"UMask": "0x11",
"Unit": "CHA"
@@ -4609,7 +4609,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_LlcPrefDRD",
- "Filter": "config1=0x4b433",
+ "Filter": "config1=0x4b43300000000",
"PerPkg": "1",
"UMask": "0x11",
"Unit": "CHA"
@@ -4619,7 +4619,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_LlcPrefRFO",
- "Filter": "config1=0x4b033",
+ "Filter": "config1=0x4b03300000000",
"PerPkg": "1",
"PublicDescription": "TOR Inserts : LLCPrefRFO issued by iA Cores that hit the LLC : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
"UMask": "0x11",
@@ -4630,7 +4630,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_RFO",
- "Filter": "config1=0x40033",
+ "Filter": "config1=0x4003300000000",
"PerPkg": "1",
"PublicDescription": "TOR Inserts : RFOs issued by iA Cores that Hit the LLC : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
"UMask": "0x11",
@@ -4651,7 +4651,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_CRD",
- "Filter": "config1=0x40233",
+ "Filter": "config1=0x4023300000000",
"PerPkg": "1",
"PublicDescription": "TOR Inserts : CRds issued by iA Cores that Missed the LLC : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
"UMask": "0x21",
@@ -4662,7 +4662,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD",
- "Filter": "config1=0x40433",
+ "Filter": "config1=0x4043300000000",
"PerPkg": "1",
"PublicDescription": "TOR Inserts : DRds issued by iA Cores that Missed the LLC : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
"UMask": "0x21",
@@ -4673,7 +4673,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LlcPrefCRD",
- "Filter": "config1=0x4b233",
+ "Filter": "config1=0x4b23300000000",
"PerPkg": "1",
"UMask": "0x21",
"Unit": "CHA"
@@ -4683,7 +4683,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LlcPrefDRD",
- "Filter": "config1=0x4b433",
+ "Filter": "config1=0x4b43300000000",
"PerPkg": "1",
"UMask": "0x21",
"Unit": "CHA"
@@ -4693,7 +4693,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LlcPrefRFO",
- "Filter": "config1=0x4b033",
+ "Filter": "config1=0x4b03300000000",
"PerPkg": "1",
"PublicDescription": "TOR Inserts : LLCPrefRFO issued by iA Cores that missed the LLC : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
"UMask": "0x21",
@@ -4704,7 +4704,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO",
- "Filter": "config1=0x40033",
+ "Filter": "config1=0x4003300000000",
"PerPkg": "1",
"PublicDescription": "TOR Inserts : RFOs issued by iA Cores that Missed the LLC : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
"UMask": "0x21",
@@ -4747,7 +4747,7 @@
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IO_MISS_ITOM",
"Experimental": "1",
- "Filter": "config1=0x49033",
+ "Filter": "config1=0x4903300000000",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfully inserted into the TOR that are generated from local IO ItoM requests that miss the LLC. An ItoM request is used by IIO to request a data write without first reading the data for ownership.",
"UMask": "0x24",
@@ -4759,7 +4759,7 @@
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IO_MISS_RDCUR",
"Experimental": "1",
- "Filter": "config1=0x43C33",
+ "Filter": "config1=0x43c3300000000",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfully inserted into the TOR that are generated from local IO RdCur requests and miss the LLC. A RdCur request is used by IIO to read data without changing state.",
"UMask": "0x24",
@@ -4771,7 +4771,7 @@
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IO_MISS_RFO",
"Experimental": "1",
- "Filter": "config1=0x40033",
+ "Filter": "config1=0x4003300000000",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfully inserted into the TOR that are generated from local IO RFO requests that miss the LLC. A read for ownership (RFO) requests a cache line to be cached in E state with the intent to modify.",
"UMask": "0x24",
@@ -4999,7 +4999,7 @@
"Counter": "0",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_CRD",
- "Filter": "config1=0x40233",
+ "Filter": "config1=0x4023300000000",
"PerPkg": "1",
"PublicDescription": "TOR Occupancy : CRds issued by iA Cores that Hit the LLC : For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
"UMask": "0x11",
@@ -5010,7 +5010,7 @@
"Counter": "0",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_DRD",
- "Filter": "config1=0x40433",
+ "Filter": "config1=0x4043300000000",
"PerPkg": "1",
"PublicDescription": "TOR Occupancy : DRds issued by iA Cores that Hit the LLC : For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
"UMask": "0x11",
@@ -5021,7 +5021,7 @@
"Counter": "0",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_LlcPrefCRD",
- "Filter": "config1=0x4b233",
+ "Filter": "config1=0x4b23300000000",
"PerPkg": "1",
"UMask": "0x11",
"Unit": "CHA"
@@ -5031,7 +5031,7 @@
"Counter": "0",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_LlcPrefDRD",
- "Filter": "config1=0x4b433",
+ "Filter": "config1=0x4b43300000000",
"PerPkg": "1",
"UMask": "0x11",
"Unit": "CHA"
@@ -5041,7 +5041,7 @@
"Counter": "0",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_LlcPrefRFO",
- "Filter": "config1=0x4b033",
+ "Filter": "config1=0x4b03300000000",
"PerPkg": "1",
"PublicDescription": "TOR Occupancy : LLCPrefRFO issued by iA Cores that hit the LLC : For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
"UMask": "0x11",
@@ -5052,7 +5052,7 @@
"Counter": "0",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_RFO",
- "Filter": "config1=0x40033",
+ "Filter": "config1=0x4003300000000",
"PerPkg": "1",
"PublicDescription": "TOR Occupancy : RFOs issued by iA Cores that Hit the LLC : For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
"UMask": "0x11",
@@ -5073,7 +5073,7 @@
"Counter": "0",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_CRD",
- "Filter": "config1=0x40233",
+ "Filter": "config1=0x4023300000000",
"PerPkg": "1",
"PublicDescription": "TOR Occupancy : CRds issued by iA Cores that Missed the LLC : For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
"UMask": "0x21",
@@ -5084,7 +5084,7 @@
"Counter": "0",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD",
- "Filter": "config1=0x40433",
+ "Filter": "config1=0x4043300000000",
"PerPkg": "1",
"PublicDescription": "TOR Occupancy : DRds issued by iA Cores that Missed the LLC : For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
"UMask": "0x21",
@@ -5095,7 +5095,7 @@
"Counter": "0",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LlcPrefCRD",
- "Filter": "config1=0x4b233",
+ "Filter": "config1=0x4b23300000000",
"PerPkg": "1",
"UMask": "0x21",
"Unit": "CHA"
@@ -5105,7 +5105,7 @@
"Counter": "0",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LlcPrefDRD",
- "Filter": "config1=0x4b433",
+ "Filter": "config1=0x4b43300000000",
"PerPkg": "1",
"UMask": "0x21",
"Unit": "CHA"
@@ -5115,7 +5115,7 @@
"Counter": "0",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LlcPrefRFO",
- "Filter": "config1=0x4b033",
+ "Filter": "config1=0x4b03300000000",
"PerPkg": "1",
"PublicDescription": "TOR Occupancy : LLCPrefRFO issued by iA Cores that missed the LLC : For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
"UMask": "0x21",
@@ -5126,7 +5126,7 @@
"Counter": "0",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO",
- "Filter": "config1=0x40033",
+ "Filter": "config1=0x4003300000000",
"PerPkg": "1",
"PublicDescription": "TOR Occupancy : RFOs issued by iA Cores that Missed the LLC : For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
"UMask": "0x21",
@@ -5171,7 +5171,7 @@
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_ITOM",
"Experimental": "1",
- "Filter": "config1=0x49033",
+ "Filter": "config1=0x4903300000000",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that are generated from local IO ItoM requests that miss the LLC. An ItoM is used by IIO to request a data write without first reading the data for ownership.",
"UMask": "0x24",
@@ -5183,7 +5183,7 @@
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_RDCUR",
"Experimental": "1",
- "Filter": "config1=0x43C33",
+ "Filter": "config1=0x43c3300000000",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that are generated from local IO RdCur requests that miss the LLC. A RdCur request is used by IIO to read data without changing state.",
"UMask": "0x24",
@@ -5195,7 +5195,7 @@
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_RFO",
"Experimental": "1",
- "Filter": "config1=0x40033",
+ "Filter": "config1=0x4003300000000",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that are generated from local IO RFO requests that miss the LLC. A read for ownership (RFO) requests data to be cached in E state with the intent to modify.",
"UMask": "0x24",
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/metricgroups.json b/tools/perf/pmu-events/arch/x86/meteorlake/metricgroups.json
new file mode 100644
index 000000000000..b54a5fc0861f
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/metricgroups.json
@@ -0,0 +1,142 @@
+{
+ "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BigFootprint": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BvBC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BvBO": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BvCB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BvFB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BvIO": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BvMB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BvML": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BvMP": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BvMS": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BvMT": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BvOB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BvUW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "C0Wait": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "CacheHits": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "CodeGen": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSBmiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DataSharing": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Fed": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Flops": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpScalar": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpVector": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Frontend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "HPC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "IcMiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Ifetch": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "InsType": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "IntVector": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Load_Store_Miss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Machine_Clears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemOffcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Mem_Exec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryTLB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Memory_BW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Memory_Lat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MicroSeq": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "OS": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Offcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PGO": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Pipeline": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PortsUtil": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Power": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Prefetches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Ret": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Retire": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SMT": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Server": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Snoop": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SoC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Summary": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL1": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL2": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL3mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TopdownL1": "Metrics for top-down breakdown at level 1",
+ "TopdownL2": "Metrics for top-down breakdown at level 2",
+ "TopdownL3": "Metrics for top-down breakdown at level 3",
+ "TopdownL4": "Metrics for top-down breakdown at level 4",
+ "TopdownL5": "Metrics for top-down breakdown at level 5",
+ "TopdownL6": "Metrics for top-down breakdown at level 6",
+ "load_store_bound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "tma_L1_group": "Metrics for top-down breakdown at level 1",
+ "tma_L2_group": "Metrics for top-down breakdown at level 2",
+ "tma_L3_group": "Metrics for top-down breakdown at level 3",
+ "tma_L4_group": "Metrics for top-down breakdown at level 4",
+ "tma_L5_group": "Metrics for top-down breakdown at level 5",
+ "tma_L6_group": "Metrics for top-down breakdown at level 6",
+ "tma_alu_op_utilization_group": "Metrics contributing to tma_alu_op_utilization category",
+ "tma_assists_group": "Metrics contributing to tma_assists category",
+ "tma_backend_bound_group": "Metrics contributing to tma_backend_bound category",
+ "tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category",
+ "tma_branch_mispredicts_group": "Metrics contributing to tma_branch_mispredicts category",
+ "tma_branch_resteers_group": "Metrics contributing to tma_branch_resteers category",
+ "tma_core_bound_group": "Metrics contributing to tma_core_bound category",
+ "tma_dram_bound_group": "Metrics contributing to tma_dram_bound category",
+ "tma_dtlb_load_group": "Metrics contributing to tma_dtlb_load category",
+ "tma_dtlb_store_group": "Metrics contributing to tma_dtlb_store category",
+ "tma_fetch_bandwidth_group": "Metrics contributing to tma_fetch_bandwidth category",
+ "tma_fetch_latency_group": "Metrics contributing to tma_fetch_latency category",
+ "tma_fp_arith_group": "Metrics contributing to tma_fp_arith category",
+ "tma_fp_vector_group": "Metrics contributing to tma_fp_vector category",
+ "tma_frontend_bound_group": "Metrics contributing to tma_frontend_bound category",
+ "tma_heavy_operations_group": "Metrics contributing to tma_heavy_operations category",
+ "tma_ifetch_bandwidth_group": "Metrics contributing to tma_ifetch_bandwidth category",
+ "tma_ifetch_latency_group": "Metrics contributing to tma_ifetch_latency category",
+ "tma_int_operations_group": "Metrics contributing to tma_int_operations category",
+ "tma_issue2P": "Metrics related by the issue $issue2P",
+ "tma_issueBM": "Metrics related by the issue $issueBM",
+ "tma_issueBW": "Metrics related by the issue $issueBW",
+ "tma_issueComp": "Metrics related by the issue $issueComp",
+ "tma_issueD0": "Metrics related by the issue $issueD0",
+ "tma_issueFB": "Metrics related by the issue $issueFB",
+ "tma_issueFL": "Metrics related by the issue $issueFL",
+ "tma_issueL1": "Metrics related by the issue $issueL1",
+ "tma_issueLat": "Metrics related by the issue $issueLat",
+ "tma_issueMC": "Metrics related by the issue $issueMC",
+ "tma_issueMS": "Metrics related by the issue $issueMS",
+ "tma_issueMV": "Metrics related by the issue $issueMV",
+ "tma_issueRFO": "Metrics related by the issue $issueRFO",
+ "tma_issueSL": "Metrics related by the issue $issueSL",
+ "tma_issueSO": "Metrics related by the issue $issueSO",
+ "tma_issueSmSt": "Metrics related by the issue $issueSmSt",
+ "tma_issueSpSt": "Metrics related by the issue $issueSpSt",
+ "tma_issueSyncxn": "Metrics related by the issue $issueSyncxn",
+ "tma_issueTLB": "Metrics related by the issue $issueTLB",
+ "tma_l1_bound_group": "Metrics contributing to tma_l1_bound category",
+ "tma_l3_bound_group": "Metrics contributing to tma_l3_bound category",
+ "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
+ "tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category",
+ "tma_machine_clears_group": "Metrics contributing to tma_machine_clears category",
+ "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
+ "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
+ "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
+ "tma_mite_group": "Metrics contributing to tma_mite category",
+ "tma_other_light_ops_group": "Metrics contributing to tma_other_light_ops category",
+ "tma_ports_utilization_group": "Metrics contributing to tma_ports_utilization category",
+ "tma_ports_utilized_0_group": "Metrics contributing to tma_ports_utilized_0 category",
+ "tma_ports_utilized_3m_group": "Metrics contributing to tma_ports_utilized_3m category",
+ "tma_resource_bound_group": "Metrics contributing to tma_resource_bound category",
+ "tma_retiring_group": "Metrics contributing to tma_retiring category",
+ "tma_serializing_operation_group": "Metrics contributing to tma_serializing_operation category",
+ "tma_store_bound_group": "Metrics contributing to tma_store_bound category",
+ "tma_store_op_utilization_group": "Metrics contributing to tma_store_op_utilization category"
+}
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/mtl-metrics.json b/tools/perf/pmu-events/arch/x86/meteorlake/mtl-metrics.json
new file mode 100644
index 000000000000..1a7392f0da86
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/mtl-metrics.json
@@ -0,0 +1,2535 @@
+[
+ {
+ "BriefDescription": "C10 residency percent per package",
+ "MetricExpr": "cstate_pkg@c10\\-residency@ / TSC",
+ "MetricGroup": "Power",
+ "MetricName": "C10_Pkg_Residency",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "C1 residency percent per core",
+ "MetricExpr": "cstate_core@c1\\-residency@ / TSC",
+ "MetricGroup": "Power",
+ "MetricName": "C1_Core_Residency",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "C2 residency percent per package",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricGroup": "Power",
+ "MetricName": "C2_Pkg_Residency",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "C3 residency percent per package",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Pkg_Residency",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "C6 residency percent per core",
+ "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Core_Residency",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "C6 residency percent per package",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Pkg_Residency",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "C7 residency percent per core",
+ "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Core_Residency",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "C7 residency percent per package",
+ "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Pkg_Residency",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "C8 residency percent per package",
+ "MetricExpr": "cstate_pkg@c8\\-residency@ / TSC",
+ "MetricGroup": "Power",
+ "MetricName": "C8_Pkg_Residency",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "C9 residency percent per package",
+ "MetricExpr": "cstate_pkg@c9\\-residency@ / TSC",
+ "MetricGroup": "Power",
+ "MetricName": "C9_Pkg_Residency",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Percentage of cycles spent in System Management Interrupts.",
+ "MetricExpr": "((msr@aperf@ - cycles) / msr@aperf@ if msr@smi@ > 0 else 0)",
+ "MetricGroup": "smi",
+ "MetricName": "smi_cycles",
+ "MetricThreshold": "smi_cycles > 0.1",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Number of SMI interrupts.",
+ "MetricExpr": "msr@smi@",
+ "MetricGroup": "smi",
+ "MetricName": "smi_num",
+ "ScaleUnit": "1SMI#"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to certain allocation restrictions",
+ "MetricExpr": "tma_core_bound",
+ "MetricGroup": "TopdownL3;tma_L3_group;tma_core_bound_group",
+ "MetricName": "tma_allocation_restriction",
+ "MetricThreshold": "tma_allocation_restriction > 0.1 & (tma_core_bound > 0.1 & tma_backend_bound > 0.1)",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls",
+ "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.ALL_P@ / (6 * cpu_atom@CPU_CLK_UNHALTED.CORE@)",
+ "MetricGroup": "TopdownL1;tma_L1_group",
+ "MetricName": "tma_backend_bound",
+ "MetricThreshold": "tma_backend_bound > 0.1",
+ "MetricgroupNoGroup": "TopdownL1",
+ "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that uops must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear",
+ "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.ALL_P@ / (6 * cpu_atom@CPU_CLK_UNHALTED.CORE@)",
+ "MetricGroup": "TopdownL1;tma_L1_group",
+ "MetricName": "tma_bad_speculation",
+ "MetricThreshold": "tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
+ "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window including relevant microcode flows and while uops are not yet available in the instruction queue (IQ). Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend",
+ "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.BRANCH_DETECT@ / (6 * cpu_atom@CPU_CLK_UNHALTED.CORE@)",
+ "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_latency_group",
+ "MetricName": "tma_branch_detect",
+ "MetricThreshold": "tma_branch_detect > 0.05 & (tma_ifetch_latency > 0.15 & tma_frontend_bound > 0.2)",
+ "PublicDescription": "Counts the number of issue slots that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend. Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to branch mispredicts",
+ "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.MISPREDICT@ / (6 * cpu_atom@CPU_CLK_UNHALTED.CORE@)",
+ "MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group",
+ "MetricName": "tma_branch_mispredicts",
+ "MetricThreshold": "tma_branch_mispredicts > 0.05 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BTCLEARS, which occurs when the Branch Target Buffer (BTB) predicts a taken branch.",
+ "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.BRANCH_RESTEER@ / (6 * cpu_atom@CPU_CLK_UNHALTED.CORE@)",
+ "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_latency_group",
+ "MetricName": "tma_branch_resteer",
+ "MetricThreshold": "tma_branch_resteer > 0.05 & (tma_ifetch_latency > 0.15 & tma_frontend_bound > 0.2)",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to the microcode sequencer (MS).",
+ "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.CISC@ / (6 * cpu_atom@CPU_CLK_UNHALTED.CORE@)",
+ "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_bandwidth_group",
+ "MetricName": "tma_cisc",
+ "MetricThreshold": "tma_cisc > 0.05 & (tma_ifetch_bandwidth > 0.1 & tma_frontend_bound > 0.2)",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles due to backend bound stalls that are bounded by core restrictions and not attributed to an outstanding load or stores, or resource limitation",
+ "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS@ / (6 * cpu_atom@CPU_CLK_UNHALTED.CORE@)",
+ "MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_group",
+ "MetricName": "tma_core_bound",
+ "MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.1",
+ "MetricgroupNoGroup": "TopdownL2",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to decode stalls.",
+ "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.DECODE@ / (6 * cpu_atom@CPU_CLK_UNHALTED.CORE@)",
+ "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_bandwidth_group",
+ "MetricName": "tma_decode",
+ "MetricThreshold": "tma_decode > 0.05 & (tma_ifetch_bandwidth > 0.1 & tma_frontend_bound > 0.2)",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear that does not require the use of microcode, classified as a fast nuke, due to memory ordering, memory disambiguation and memory renaming",
+ "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.FASTNUKE@ / (6 * cpu_atom@CPU_CLK_UNHALTED.CORE@)",
+ "MetricGroup": "TopdownL3;tma_L3_group;tma_machine_clears_group",
+ "MetricName": "tma_fast_nuke",
+ "MetricThreshold": "tma_fast_nuke > 0.05 & (tma_machine_clears > 0.05 & tma_bad_speculation > 0.15)",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to frontend stalls.",
+ "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.ALL_P@ / (6 * cpu_atom@CPU_CLK_UNHALTED.CORE@)",
+ "MetricGroup": "TopdownL1;tma_L1_group",
+ "MetricName": "tma_frontend_bound",
+ "MetricThreshold": "tma_frontend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL1",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to instruction cache misses.",
+ "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.ICACHE@ / (6 * cpu_atom@CPU_CLK_UNHALTED.CORE@)",
+ "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_latency_group",
+ "MetricName": "tma_icache_misses",
+ "MetricThreshold": "tma_icache_misses > 0.05 & (tma_ifetch_latency > 0.15 & tma_frontend_bound > 0.2)",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.",
+ "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH@ / (6 * cpu_atom@CPU_CLK_UNHALTED.CORE@)",
+ "MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group",
+ "MetricName": "tma_ifetch_bandwidth",
+ "MetricThreshold": "tma_ifetch_bandwidth > 0.1 & tma_frontend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend latency restrictions due to icache misses, itlb misses, branch detection, and resteer limitations.",
+ "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.FRONTEND_LATENCY@ / (6 * cpu_atom@CPU_CLK_UNHALTED.CORE@)",
+ "MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group",
+ "MetricName": "tma_ifetch_latency",
+ "MetricThreshold": "tma_ifetch_latency > 0.15 & tma_frontend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Instructions per Floating Point (FP) Operation",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / cpu_atom@FP_FLOPS_RETIRED.ALL@",
+ "MetricGroup": "Flops",
+ "MetricName": "tma_info_arith_inst_mix_ipflop",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / (cpu_atom@FP_INST_RETIRED.128B_DP@ + cpu_atom@FP_INST_RETIRED.128B_SP@)",
+ "MetricGroup": "Flops",
+ "MetricName": "tma_info_arith_inst_mix_ipfparith_avx128",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / cpu_atom@FP_INST_RETIRED.64B_DP@",
+ "MetricGroup": "Flops",
+ "MetricName": "tma_info_arith_inst_mix_ipfparith_scalar_dp",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / cpu_atom@FP_INST_RETIRED.32B_SP@",
+ "MetricGroup": "Flops",
+ "MetricName": "tma_info_arith_inst_mix_ipfparith_scalar_sp",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Percentage of time that retirement is stalled due to a first level data TLB miss",
+ "MetricExpr": "100 * (cpu_atom@LD_HEAD.DTLB_MISS_AT_RET@ + cpu_atom@LD_HEAD.PGWALK_AT_RET@) / cpu_atom@CPU_CLK_UNHALTED.CORE@",
+ "MetricName": "tma_info_bottleneck_%_dtlb_miss_bound_cycles",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Percentage of time that allocation and retirement is stalled by the Frontend Cluster due to an Ifetch Miss, either Icache or ITLB Miss",
+ "MetricExpr": "100 * cpu_atom@MEM_BOUND_STALLS_IFETCH.ALL@ / cpu_atom@CPU_CLK_UNHALTED.CORE@",
+ "MetricGroup": "Ifetch",
+ "MetricName": "tma_info_bottleneck_%_ifetch_miss_bound_cycles",
+ "PublicDescription": "Percentage of time that allocation and retirement is stalled by the Frontend Cluster due to an Ifetch Miss, either Icache or ITLB Miss. See Info.Ifetch_Bound",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Percentage of time that retirement is stalled due to an L1 miss",
+ "MetricExpr": "100 * cpu_atom@MEM_BOUND_STALLS_LOAD.ALL@ / cpu_atom@CPU_CLK_UNHALTED.CORE@",
+ "MetricGroup": "Load_Store_Miss",
+ "MetricName": "tma_info_bottleneck_%_load_miss_bound_cycles",
+ "PublicDescription": "Percentage of time that retirement is stalled due to an L1 miss. See Info.Load_Miss_Bound",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Percentage of time that retirement is stalled by the Memory Cluster due to a pipeline stall",
+ "MetricExpr": "100 * cpu_atom@LD_HEAD.ANY_AT_RET@ / cpu_atom@CPU_CLK_UNHALTED.CORE@",
+ "MetricGroup": "Mem_Exec",
+ "MetricName": "tma_info_bottleneck_%_mem_exec_bound_cycles",
+ "PublicDescription": "Percentage of time that retirement is stalled by the Memory Cluster due to a pipeline stall. See Info.Mem_Exec_Bound",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / cpu_atom@BR_INST_RETIRED.ALL_BRANCHES@",
+ "MetricName": "tma_info_br_inst_mix_ipbranch",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Instruction per (near) call (lower number means higher occurrence rate)",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / cpu_atom@BR_INST_RETIRED.NEAR_CALL@",
+ "MetricName": "tma_info_br_inst_mix_ipcall",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / cpu_atom@BR_INST_RETIRED.FAR_BRANCH@u",
+ "MetricName": "tma_info_br_inst_mix_ipfarbranch",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was not taken",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / (cpu_atom@BR_MISP_RETIRED.COND@ - cpu_atom@BR_MISP_RETIRED.COND_TAKEN@)",
+ "MetricName": "tma_info_br_inst_mix_ipmisp_cond_ntaken",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was taken",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / cpu_atom@BR_MISP_RETIRED.COND_TAKEN@",
+ "MetricName": "tma_info_br_inst_mix_ipmisp_cond_taken",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Instructions per retired indirect call or jump Branch Misprediction",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / cpu_atom@BR_MISP_RETIRED.INDIRECT@",
+ "MetricName": "tma_info_br_inst_mix_ipmisp_indirect",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Instructions per retired return Branch Misprediction",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / cpu_atom@BR_MISP_RETIRED.RETURN@",
+ "MetricName": "tma_info_br_inst_mix_ipmisp_ret",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Instructions per retired Branch Misprediction",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / cpu_atom@BR_MISP_RETIRED.ALL_BRANCHES@",
+ "MetricName": "tma_info_br_inst_mix_ipmispredict",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Ratio of all branches which mispredict",
+ "MetricExpr": "cpu_atom@BR_MISP_RETIRED.ALL_BRANCHES@ / cpu_atom@BR_INST_RETIRED.ALL_BRANCHES@",
+ "MetricName": "tma_info_br_mispredict_bound_branch_mispredict_ratio",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Ratio between Mispredicted branches and unknown branches",
+ "MetricExpr": "cpu_atom@BR_MISP_RETIRED.ALL_BRANCHES@ / cpu_atom@BACLEARS.ANY@",
+ "MetricName": "tma_info_br_mispredict_bound_branch_mispredict_to_unknown_branch_ratio",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Percentage of time that allocation is stalled due to load buffer full",
+ "MetricExpr": "100 * cpu_atom@MEM_SCHEDULER_BLOCK.LD_BUF@ / cpu_atom@CPU_CLK_UNHALTED.CORE@",
+ "MetricName": "tma_info_buffer_stalls_%_load_buffer_stall_cycles",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Percentage of time that allocation is stalled due to memory reservation stations full",
+ "MetricExpr": "100 * cpu_atom@MEM_SCHEDULER_BLOCK.RSV@ / cpu_atom@CPU_CLK_UNHALTED.CORE@",
+ "MetricName": "tma_info_buffer_stalls_%_mem_rsv_stall_cycles",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Percentage of time that allocation is stalled due to store buffer full",
+ "MetricExpr": "100 * cpu_atom@MEM_SCHEDULER_BLOCK.ST_BUF@ / cpu_atom@CPU_CLK_UNHALTED.CORE@",
+ "MetricName": "tma_info_buffer_stalls_%_store_buffer_stall_cycles",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction",
+ "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.CORE@ / cpu_atom@INST_RETIRED.ANY@",
+ "MetricName": "tma_info_core_cpi",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Floating Point Operations Per Cycle",
+ "MetricExpr": "cpu_atom@FP_FLOPS_RETIRED.ALL@ / cpu_atom@CPU_CLK_UNHALTED.CORE@",
+ "MetricGroup": "Flops",
+ "MetricName": "tma_info_core_flopc",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / cpu_atom@CPU_CLK_UNHALTED.CORE@",
+ "MetricName": "tma_info_core_ipc",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Uops Per Instruction",
+ "MetricExpr": "cpu_atom@TOPDOWN_RETIRING.ALL_P@ / cpu_atom@INST_RETIRED.ANY@",
+ "MetricName": "tma_info_core_upi",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Percentage of ifetch miss bound stalls, where the ifetch miss hits in the L2",
+ "MetricExpr": "100 * cpu_atom@MEM_BOUND_STALLS_IFETCH.L2_HIT@ / cpu_atom@MEM_BOUND_STALLS_IFETCH.ALL@",
+ "MetricName": "tma_info_ifetch_miss_bound_%_ifetchmissbound_with_l2hit",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Percentage of ifetch miss bound stalls, where the ifetch miss hits in the L3",
+ "MetricExpr": "100 * cpu_atom@MEM_BOUND_STALLS_IFETCH.LLC_HIT@ / cpu_atom@MEM_BOUND_STALLS_IFETCH.ALL@",
+ "MetricName": "tma_info_ifetch_miss_bound_%_ifetchmissbound_with_l3hit",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Percentage of ifetch miss bound stalls, where the ifetch miss subsequently misses in the L3",
+ "MetricExpr": "100 * cpu_atom@MEM_BOUND_STALLS_IFETCH.LLC_MISS@ / cpu_atom@MEM_BOUND_STALLS_IFETCH.ALL@",
+ "MetricName": "tma_info_ifetch_miss_bound_%_ifetchmissbound_with_l3miss",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Percentage of memory bound stalls where retirement is stalled due to an L1 miss that hit the L2",
+ "MetricExpr": "100 * cpu_atom@MEM_BOUND_STALLS_LOAD.L2_HIT@ / cpu_atom@MEM_BOUND_STALLS_LOAD.ALL@",
+ "MetricGroup": "load_store_bound",
+ "MetricName": "tma_info_load_miss_bound_%_loadmissbound_with_l2hit",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Percentage of memory bound stalls where retirement is stalled due to an L1 miss that hit the L3",
+ "MetricExpr": "100 * cpu_atom@MEM_BOUND_STALLS_LOAD.LLC_HIT@ / cpu_atom@MEM_BOUND_STALLS_LOAD.ALL@",
+ "MetricGroup": "load_store_bound",
+ "MetricName": "tma_info_load_miss_bound_%_loadmissbound_with_l3hit",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Percentage of memory bound stalls where retirement is stalled due to an L1 miss that subsequently misses the L3",
+ "MetricExpr": "100 * cpu_atom@MEM_BOUND_STALLS_LOAD.LLC_MISS@ / cpu_atom@MEM_BOUND_STALLS_LOAD.ALL@",
+ "MetricGroup": "load_store_bound",
+ "MetricName": "tma_info_load_miss_bound_%_loadmissbound_with_l3miss",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a pipeline block",
+ "MetricExpr": "100 * cpu_atom@LD_HEAD.L1_BOUND_AT_RET@ / cpu_atom@CPU_CLK_UNHALTED.CORE@",
+ "MetricGroup": "load_store_bound",
+ "MetricName": "tma_info_load_store_bound_l1_bound",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement",
+ "MetricExpr": "100 * (cpu_atom@LD_HEAD.L1_BOUND_AT_RET@ + cpu_atom@MEM_BOUND_STALLS_LOAD.ALL@) / cpu_atom@CPU_CLK_UNHALTED.CORE@",
+ "MetricGroup": "load_store_bound",
+ "MetricName": "tma_info_load_store_bound_load_bound",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the core is stalled due to store buffer full",
+ "MetricExpr": "100 * (cpu_atom@MEM_SCHEDULER_BLOCK.ST_BUF@ / cpu_atom@MEM_SCHEDULER_BLOCK.ALL@) * tma_mem_scheduler",
+ "MetricGroup": "load_store_bound",
+ "MetricName": "tma_info_load_store_bound_store_bound",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of machine clears relative to thousands of instructions retired, due to floating point assists",
+ "MetricExpr": "1e3 * cpu_atom@MACHINE_CLEARS.FP_ASSIST@ / cpu_atom@INST_RETIRED.ANY@",
+ "MetricName": "tma_info_machine_clear_bound_machine_clears_fp_assist_pki",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of machine clears relative to thousands of instructions retired, due to page faults",
+ "MetricExpr": "1e3 * cpu_atom@MACHINE_CLEARS.PAGE_FAULT@ / cpu_atom@INST_RETIRED.ANY@",
+ "MetricName": "tma_info_machine_clear_bound_machine_clears_page_fault_pki",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of machine clears relative to thousands of instructions retired, due to self-modifying code",
+ "MetricExpr": "1e3 * cpu_atom@MACHINE_CLEARS.SMC@ / cpu_atom@INST_RETIRED.ANY@",
+ "MetricName": "tma_info_machine_clear_bound_machine_clears_smc_pki",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Percentage of total non-speculative loads with an address aliasing block",
+ "MetricExpr": "100 * cpu_atom@LD_BLOCKS.ADDRESS_ALIAS@ / cpu_atom@MEM_UOPS_RETIRED.ALL_LOADS@",
+ "MetricName": "tma_info_mem_exec_blocks_%_loads_with_adressaliasing",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Percentage of total non-speculative loads with a store forward or unknown store address block",
+ "MetricExpr": "100 * cpu_atom@LD_BLOCKS.DATA_UNKNOWN@ / cpu_atom@MEM_UOPS_RETIRED.ALL_LOADS@",
+ "MetricName": "tma_info_mem_exec_blocks_%_loads_with_storefwdblk",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Percentage of Memory Execution Bound due to a first level data cache miss",
+ "MetricExpr": "100 * cpu_atom@LD_HEAD.L1_MISS_AT_RET@ / cpu_atom@LD_HEAD.ANY_AT_RET@",
+ "MetricName": "tma_info_mem_exec_bound_%_loadhead_with_l1miss",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Percentage of Memory Execution Bound due to other block cases, such as pipeline conflicts, fences, etc",
+ "MetricExpr": "100 * cpu_atom@LD_HEAD.OTHER_AT_RET@ / cpu_atom@LD_HEAD.ANY_AT_RET@",
+ "MetricName": "tma_info_mem_exec_bound_%_loadhead_with_otherpipelineblks",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Percentage of Memory Execution Bound due to a pagewalk",
+ "MetricExpr": "100 * cpu_atom@LD_HEAD.PGWALK_AT_RET@ / cpu_atom@LD_HEAD.ANY_AT_RET@",
+ "MetricName": "tma_info_mem_exec_bound_%_loadhead_with_pagewalk",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Percentage of Memory Execution Bound due to a second level TLB miss",
+ "MetricExpr": "100 * cpu_atom@LD_HEAD.DTLB_MISS_AT_RET@ / cpu_atom@LD_HEAD.ANY_AT_RET@",
+ "MetricName": "tma_info_mem_exec_bound_%_loadhead_with_stlbhit",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Percentage of Memory Execution Bound due to a store forward address match",
+ "MetricExpr": "100 * cpu_atom@LD_HEAD.ST_ADDR_AT_RET@ / cpu_atom@LD_HEAD.ANY_AT_RET@",
+ "MetricName": "tma_info_mem_exec_bound_%_loadhead_with_storefwding",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Instructions per Load",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / cpu_atom@MEM_UOPS_RETIRED.ALL_LOADS@",
+ "MetricName": "tma_info_mem_mix_ipload",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Instructions per Store",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / cpu_atom@MEM_UOPS_RETIRED.ALL_STORES@",
+ "MetricName": "tma_info_mem_mix_ipstore",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Percentage of total non-speculative loads that perform one or more locks",
+ "MetricExpr": "100 * cpu_atom@MEM_UOPS_RETIRED.LOCK_LOADS@ / cpu_atom@MEM_UOPS_RETIRED.ALL_LOADS@",
+ "MetricName": "tma_info_mem_mix_load_locks_ratio",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Percentage of total non-speculative loads that are splits",
+ "MetricExpr": "100 * cpu_atom@MEM_UOPS_RETIRED.SPLIT_LOADS@ / cpu_atom@MEM_UOPS_RETIRED.ALL_LOADS@",
+ "MetricName": "tma_info_mem_mix_load_splits_ratio",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Ratio of mem load uops to all uops",
+ "MetricExpr": "1e3 * cpu_atom@MEM_UOPS_RETIRED.ALL_LOADS@ / cpu_atom@TOPDOWN_RETIRING.ALL_P@",
+ "MetricName": "tma_info_mem_mix_memload_ratio",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Percentage of time that the core is stalled due to a TPAUSE or UMWAIT instruction",
+ "MetricExpr": "100 * cpu_atom@SERIALIZATION.C01_MS_SCB@ / (6 * cpu_atom@CPU_CLK_UNHALTED.CORE@)",
+ "MetricName": "tma_info_serialization _%_tpause_cycles",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.REF_TSC@ / TSC",
+ "MetricName": "tma_info_system_cpu_utilization",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "cpu_atom@FP_FLOPS_RETIRED.ALL@ / (duration_time * 1e9)",
+ "MetricGroup": "Flops",
+ "MetricName": "tma_info_system_gflops",
+ "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in Kernel mode",
+ "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.CORE_P@k / cpu_atom@CPU_CLK_UNHALTED.CORE@",
+ "MetricGroup": "Summary",
+ "MetricName": "tma_info_system_kernel_utilization",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+ "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.CORE@ / cpu_atom@CPU_CLK_UNHALTED.REF_TSC@",
+ "MetricGroup": "Power",
+ "MetricName": "tma_info_system_turbo_utilization",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Percentage of all uops which are FPDiv uops",
+ "MetricExpr": "100 * cpu_atom@UOPS_RETIRED.FPDIV@ / cpu_atom@TOPDOWN_RETIRING.ALL_P@",
+ "MetricName": "tma_info_uop_mix_fpdiv_uop_ratio",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Percentage of all uops which are IDiv uops",
+ "MetricExpr": "100 * cpu_atom@UOPS_RETIRED.IDIV@ / cpu_atom@TOPDOWN_RETIRING.ALL_P@",
+ "MetricName": "tma_info_uop_mix_idiv_uop_ratio",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Percentage of all uops which are microcode ops",
+ "MetricExpr": "100 * cpu_atom@UOPS_RETIRED.MS@ / cpu_atom@TOPDOWN_RETIRING.ALL_P@",
+ "MetricName": "tma_info_uop_mix_microcode_uop_ratio",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Percentage of all uops which are x87 uops",
+ "MetricExpr": "100 * cpu_atom@UOPS_RETIRED.X87@ / cpu_atom@TOPDOWN_RETIRING.ALL_P@",
+ "MetricName": "tma_info_uop_mix_x87_uop_ratio",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to Instruction Table Lookaside Buffer (ITLB) misses.",
+ "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.ITLB_MISS@ / (6 * cpu_atom@CPU_CLK_UNHALTED.CORE@)",
+ "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_latency_group",
+ "MetricName": "tma_itlb_misses",
+ "MetricThreshold": "tma_itlb_misses > 0.05 & (tma_ifetch_latency > 0.15 & tma_frontend_bound > 0.2)",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation",
+ "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS@ / (6 * cpu_atom@CPU_CLK_UNHALTED.CORE@)",
+ "MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group",
+ "MetricName": "tma_machine_clears",
+ "MetricThreshold": "tma_machine_clears > 0.05 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops",
+ "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.MEM_SCHEDULER@ / (6 * cpu_atom@CPU_CLK_UNHALTED.CORE@)",
+ "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
+ "MetricName": "tma_mem_scheduler",
+ "MetricThreshold": "tma_mem_scheduler > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound > 0.1)",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops",
+ "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER@ / (6 * cpu_atom@CPU_CLK_UNHALTED.CORE@)",
+ "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
+ "MetricName": "tma_non_mem_scheduler",
+ "MetricThreshold": "tma_non_mem_scheduler > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound > 0.1)",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear that requires the use of microcode (slow nuke)",
+ "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.NUKE@ / (6 * cpu_atom@CPU_CLK_UNHALTED.CORE@)",
+ "MetricGroup": "TopdownL3;tma_L3_group;tma_machine_clears_group",
+ "MetricName": "tma_nuke",
+ "MetricThreshold": "tma_nuke > 0.05 & (tma_machine_clears > 0.05 & tma_bad_speculation > 0.15)",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to other common frontend stalls not categorized.",
+ "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.OTHER@ / (6 * cpu_atom@CPU_CLK_UNHALTED.CORE@)",
+ "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_bandwidth_group",
+ "MetricName": "tma_other_fb",
+ "MetricThreshold": "tma_other_fb > 0.05 & (tma_ifetch_bandwidth > 0.1 & tma_frontend_bound > 0.2)",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to wrong predecodes.",
+ "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.PREDECODE@ / (6 * cpu_atom@CPU_CLK_UNHALTED.CORE@)",
+ "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_bandwidth_group",
+ "MetricName": "tma_predecode",
+ "MetricThreshold": "tma_predecode > 0.05 & (tma_ifetch_bandwidth > 0.1 & tma_frontend_bound > 0.2)",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls)",
+ "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.REGISTER@ / (6 * cpu_atom@CPU_CLK_UNHALTED.CORE@)",
+ "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
+ "MetricName": "tma_register",
+ "MetricThreshold": "tma_register > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound > 0.1)",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the reorder buffer being full (ROB stalls)",
+ "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.REORDER_BUFFER@ / (6 * cpu_atom@CPU_CLK_UNHALTED.CORE@)",
+ "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
+ "MetricName": "tma_reorder_buffer",
+ "MetricThreshold": "tma_reorder_buffer > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound > 0.1)",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the core is stalled due to a resource limitation",
+ "MetricExpr": "tma_backend_bound - tma_core_bound",
+ "MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_group",
+ "MetricName": "tma_resource_bound",
+ "MetricThreshold": "tma_resource_bound > 0.2 & tma_backend_bound > 0.1",
+ "MetricgroupNoGroup": "TopdownL2",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that result in retirement slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_RETIRING.ALL_P@ / (6 * cpu_atom@CPU_CLK_UNHALTED.CORE@)",
+ "MetricGroup": "TopdownL1;tma_L1_group",
+ "MetricName": "tma_retiring",
+ "MetricThreshold": "tma_retiring > 0.75",
+ "MetricgroupNoGroup": "TopdownL1",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS)",
+ "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.SERIALIZATION@ / (6 * cpu_atom@CPU_CLK_UNHALTED.CORE@)",
+ "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
+ "MetricName": "tma_serialization",
+ "MetricThreshold": "tma_serialization > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound > 0.1)",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Uncore frequency per die [GHZ]",
+ "MetricExpr": "tma_info_system_socket_clks / #num_dies / duration_time / 1e9",
+ "MetricGroup": "SoC",
+ "MetricName": "UNCORE_FREQ",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
+ "MetricExpr": "(cpu_core@UOPS_DISPATCHED.PORT_0@ + cpu_core@UOPS_DISPATCHED.PORT_1@ + cpu_core@UOPS_DISPATCHED.PORT_5_11@ + cpu_core@UOPS_DISPATCHED.PORT_6@) / (5 * tma_info_core_core_clks)",
+ "MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
+ "MetricName": "tma_alu_op_utilization",
+ "MetricThreshold": "tma_alu_op_utilization > 0.4",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists",
+ "MetricExpr": "78 * cpu_core@ASSISTS.ANY@ / tma_info_thread_slots",
+ "MetricGroup": "BvIO;TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
+ "MetricName": "tma_assists",
+ "MetricThreshold": "tma_assists > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
+ "PublicDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists. Assists are long sequences of uops that are required in certain corner-cases for operations that cannot be handled natively by the execution pipeline. For example; when working with very small floating point values (so-called Denormals); the FP units are not set up to perform these operations natively. Instead; a sequence of instructions to perform the computation on the Denormals is injected into the pipeline. Since these microcode sequences might be dozens of uops long; Assists can be extremely deleterious to performance and they can be avoided in many cases. Sample with: ASSISTS.ANY",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric estimates fraction of slots the CPU retired uops as a result of handing SSE to AVX* or AVX* to SSE transition Assists.",
+ "MetricExpr": "63 * cpu_core@ASSISTS.SSE_AVX_MIX@ / tma_info_thread_slots",
+ "MetricGroup": "HPC;TopdownL5;tma_L5_group;tma_assists_group",
+ "MetricName": "tma_avx_assists",
+ "MetricThreshold": "tma_avx_assists > 0.1",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
+ "MetricExpr": "cpu_core@topdown\\-be\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricGroup": "BvOB;TmaL1;TopdownL1;tma_L1_group",
+ "MetricName": "tma_backend_bound",
+ "MetricThreshold": "tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL1",
+ "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. Sample with: TOPDOWN.BACKEND_BOUND_SLOTS",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
+ "MetricExpr": "max(1 - (tma_frontend_bound + tma_backend_bound + tma_retiring), 0)",
+ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
+ "MetricName": "tma_bad_speculation",
+ "MetricThreshold": "tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
+ "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction",
+ "MetricExpr": "cpu_core@topdown\\-br\\-mispredict@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricGroup": "BadSpec;BrMispredicts;BvMP;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM",
+ "MetricName": "tma_branch_mispredicts",
+ "MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
+ "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: TOPDOWN.BR_MISPREDICT_SLOTS. Related metrics: tma_info_bad_spec_branch_misprediction_cost, tma_info_bottleneck_mispredictions, tma_mispredicts_resteers",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers",
+ "MetricExpr": "cpu_core@INT_MISC.CLEAR_RESTEER_CYCLES@ / tma_info_thread_clks + tma_unknown_branches",
+ "MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+ "MetricName": "tma_branch_resteers",
+ "MetricThreshold": "tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers. Branch Resteers estimates the Frontend delay in fetching operations from corrected path; following all sorts of miss-predicted branches. For example; branchy code with lots of miss-predictions might get categorized under Branch Resteers. Note the value of this node may overlap with its siblings. Sample with: BR_MISP_RETIRED.ALL_BRANCHES",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due staying in C0.1 power-performance optimized state (Faster wakeup time; Smaller power savings).",
+ "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.C01@ / tma_info_thread_clks",
+ "MetricGroup": "C0Wait;TopdownL4;tma_L4_group;tma_serializing_operation_group",
+ "MetricName": "tma_c01_wait",
+ "MetricThreshold": "tma_c01_wait > 0.05 & (tma_serializing_operation > 0.1 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due staying in C0.2 power-performance optimized state (Slower wakeup time; Larger power savings).",
+ "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.C02@ / tma_info_thread_clks",
+ "MetricGroup": "C0Wait;TopdownL4;tma_L4_group;tma_serializing_operation_group",
+ "MetricName": "tma_c02_wait",
+ "MetricThreshold": "tma_c02_wait > 0.05 & (tma_serializing_operation > 0.1 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric estimates fraction of cycles the CPU retired uops originated from CISC (complex instruction set computer) instruction",
+ "MetricExpr": "max(0, tma_microcode_sequencer - tma_assists)",
+ "MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
+ "MetricName": "tma_cisc",
+ "MetricThreshold": "tma_cisc > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
+ "PublicDescription": "This metric estimates fraction of cycles the CPU retired uops originated from CISC (complex instruction set computer) instruction. A CISC instruction has multiple uops that are required to perform the instruction's functionality as in the case of read-modify-write as an example. Since these instructions require multiple uops they may or may not imply sub-optimal use of machine resources. Sample with: FRONTEND_RETIRED.MS_FLOWS",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Machine Clears",
+ "MetricExpr": "(1 - tma_branch_mispredicts / tma_bad_speculation) * cpu_core@INT_MISC.CLEAR_RESTEER_CYCLES@ / tma_info_thread_clks",
+ "MetricGroup": "BadSpec;MachineClears;TopdownL4;tma_L4_group;tma_branch_resteers_group;tma_issueMC",
+ "MetricName": "tma_clears_resteers",
+ "MetricThreshold": "tma_clears_resteers > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Machine Clears. Sample with: INT_MISC.CLEAR_RESTEER_CYCLES. Related metrics: tma_l1_bound, tma_machine_clears, tma_microcode_sequencer, tma_ms_switches",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
+ "MetricExpr": "(cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS@ * min(cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS@R, 24 * tma_info_system_core_frequency) + cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD@ * min(cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD@R, 25 * tma_info_system_core_frequency) * (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ / (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ + cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD@))) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
+ "MetricGroup": "BvMS;DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
+ "MetricName": "tma_contested_accesses",
+ "MetricThreshold": "tma_contested_accesses > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses. Contested accesses occur when data written by one Logical Processor are read by another Logical Processor on a different Physical Core. Examples of contested accesses include synchronizations such as locks; true data sharing such as modified locked variables; and false sharing. Sample with: MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD;MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS. Related metrics: tma_data_sharing, tma_false_sharing, tma_machine_clears, tma_remote_cache",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck",
+ "MetricExpr": "max(0, tma_backend_bound - tma_memory_bound)",
+ "MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
+ "MetricName": "tma_core_bound",
+ "MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
+ "PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
+ "MetricExpr": "(cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD@ * min(cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD@R, 24 * tma_info_system_core_frequency) + cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD@ * min(cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD@R, 24 * tma_info_system_core_frequency) * (1 - cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ / (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ + cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD@))) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
+ "MetricGroup": "BvMS;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
+ "MetricName": "tma_data_sharing",
+ "MetricThreshold": "tma_data_sharing > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses. Data shared by multiple Logical Processors (even just read shared) may cause increased access latency due to cache coherency. Excessive data sharing can drastically harm multithreaded performance. Sample with: MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD. Related metrics: tma_contested_accesses, tma_false_sharing, tma_machine_clears, tma_remote_cache",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder",
+ "MetricExpr": "(cpu_core@INST_DECODED.DECODERS\\,cmask\\=1@ - cpu_core@INST_DECODED.DECODERS\\,cmask\\=2@) / tma_info_core_core_clks / 2",
+ "MetricGroup": "DSBmiss;FetchBW;TopdownL4;tma_L4_group;tma_issueD0;tma_mite_group",
+ "MetricName": "tma_decoder0_alone",
+ "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & tma_fetch_bandwidth > 0.2)",
+ "PublicDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder. Related metrics: tma_few_uops_instructions",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles where the Divider unit was active",
+ "MetricExpr": "cpu_core@ARITH.DIV_ACTIVE@ / tma_info_thread_clks",
+ "MetricGroup": "BvCB;TopdownL3;tma_L3_group;tma_core_bound_group",
+ "MetricName": "tma_divider",
+ "MetricThreshold": "tma_divider > 0.2 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
+ "PublicDescription": "This metric represents fraction of cycles where the Divider unit was active. Divide and square root instructions are performed by the Divider unit and can take considerably longer latency than integer or Floating Point addition; subtraction; or multiplication. Sample with: ARITH.DIVIDER_UOPS",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
+ "MetricExpr": "cpu_core@MEMORY_ACTIVITY.STALLS_L3_MISS@ / tma_info_thread_clks",
+ "MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+ "MetricName": "tma_dram_bound",
+ "MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
+ "PublicDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads. Better caching can improve the latency and increase performance. Sample with: MEM_LOAD_RETIRED.L3_MISS_PS",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline",
+ "MetricExpr": "(cpu_core@IDQ.DSB_CYCLES_ANY@ - cpu_core@IDQ.DSB_CYCLES_OK@) / tma_info_core_core_clks / 2",
+ "MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
+ "MetricName": "tma_dsb",
+ "MetricThreshold": "tma_dsb > 0.15 & tma_fetch_bandwidth > 0.2",
+ "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline. For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines",
+ "MetricExpr": "cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES@ / tma_info_thread_clks",
+ "MetricGroup": "DSBmiss;FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
+ "MetricName": "tma_dsb_switches",
+ "MetricThreshold": "tma_dsb_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Sample with: FRONTEND_RETIRED.DSB_MISS_PS. Related metrics: tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses",
+ "MetricExpr": "cpu_core@MEM_INST_RETIRED.STLB_HIT_LOADS@ * min(cpu_core@MEM_INST_RETIRED.STLB_HIT_LOADS@R, 7) / tma_info_thread_clks + tma_load_stlb_miss",
+ "MetricGroup": "BvMT;MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_l1_bound_group",
+ "MetricName": "tma_dtlb_load",
+ "MetricThreshold": "tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs, tma_info_bottleneck_memory_synchronization",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses",
+ "MetricExpr": "cpu_core@MEM_INST_RETIRED.STLB_HIT_STORES@ * min(cpu_core@MEM_INST_RETIRED.STLB_HIT_STORES@R, 7) / tma_info_thread_clks + tma_store_stlb_miss",
+ "MetricGroup": "BvMT;MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_store_bound_group",
+ "MetricName": "tma_dtlb_store",
+ "MetricThreshold": "tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses. As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead. Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page. Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_bottleneck_memory_data_tlbs, tma_info_bottleneck_memory_synchronization",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric roughly estimates how often CPU was handling synchronizations due to False Sharing",
+ "MetricExpr": "28 * tma_info_system_core_frequency * cpu_core@OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM@ / tma_info_thread_clks",
+ "MetricGroup": "BvMS;DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_store_bound_group",
+ "MetricName": "tma_false_sharing",
+ "MetricThreshold": "tma_false_sharing > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric roughly estimates how often CPU was handling synchronizations due to False Sharing. False Sharing is a multithreading hiccup; where multiple Logical Processors contend on different data-elements mapped into the same cache line. Sample with: OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM. Related metrics: tma_contested_accesses, tma_data_sharing, tma_machine_clears, tma_remote_cache",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed",
+ "MetricExpr": "cpu_core@L1D_PEND_MISS.FB_FULL@ / tma_info_thread_clks",
+ "MetricGroup": "BvMS;MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
+ "MetricName": "tma_fb_full",
+ "MetricThreshold": "tma_fb_full > 0.3",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues",
+ "MetricExpr": "max(0, tma_frontend_bound - tma_fetch_latency)",
+ "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
+ "MetricName": "tma_fetch_bandwidth",
+ "MetricThreshold": "tma_fetch_bandwidth > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
+ "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_botlnk_l2_dsb_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues",
+ "MetricExpr": "cpu_core@topdown\\-fetch\\-lat@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) - cpu_core@INT_MISC.UOP_DROPPING@ / tma_info_thread_slots",
+ "MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
+ "MetricName": "tma_fetch_latency",
+ "MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
+ "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: FRONTEND_RETIRED.LATENCY_GE_16_PS;FRONTEND_RETIRED.LATENCY_GE_8_PS",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of slots where the CPU was retiring instructions that that are decoder into two or up to ([SNB+] four; [ADL+] five) uops",
+ "MetricExpr": "max(0, tma_heavy_operations - tma_microcode_sequencer)",
+ "MetricGroup": "TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueD0",
+ "MetricName": "tma_few_uops_instructions",
+ "MetricThreshold": "tma_few_uops_instructions > 0.05 & tma_heavy_operations > 0.1",
+ "PublicDescription": "This metric represents fraction of slots where the CPU was retiring instructions that that are decoder into two or up to ([SNB+] four; [ADL+] five) uops. This highly-correlates with the number of uops in such instructions. Related metrics: tma_decoder0_alone",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)",
+ "MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector",
+ "MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group",
+ "MetricName": "tma_fp_arith",
+ "MetricThreshold": "tma_fp_arith > 0.2 & tma_light_operations > 0.6",
+ "PublicDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired). Note this metric's value may exceed its parent due to use of \"Uops\" CountDomain and FMA double-counting.",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric roughly estimates fraction of slots the CPU retired uops as a result of handing Floating Point (FP) Assists",
+ "MetricExpr": "30 * cpu_core@ASSISTS.FP@ / tma_info_thread_slots",
+ "MetricGroup": "HPC;TopdownL5;tma_L5_group;tma_assists_group",
+ "MetricName": "tma_fp_assists",
+ "MetricThreshold": "tma_fp_assists > 0.1",
+ "PublicDescription": "This metric roughly estimates fraction of slots the CPU retired uops as a result of handing Floating Point (FP) Assists. FP Assist may apply when working with very small floating point values (so-called Denormals).",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric approximates arithmetic floating-point (FP) scalar uops fraction the CPU has retired",
+ "MetricExpr": "cpu_core@FP_ARITH_INST_RETIRED.SCALAR@ / (tma_retiring * tma_info_thread_slots)",
+ "MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
+ "MetricName": "tma_fp_scalar",
+ "MetricThreshold": "tma_fp_scalar > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
+ "PublicDescription": "This metric approximates arithmetic floating-point (FP) scalar uops fraction the CPU has retired. May overcount due to FMA double counting. Related metrics: tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_int_vector_128b, tma_int_vector_256b, tma_port_0, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric approximates arithmetic floating-point (FP) vector uops fraction the CPU has retired aggregated across all vector widths",
+ "MetricExpr": "cpu_core@FP_ARITH_INST_RETIRED.VECTOR@ / (tma_retiring * tma_info_thread_slots)",
+ "MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
+ "MetricName": "tma_fp_vector",
+ "MetricThreshold": "tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
+ "PublicDescription": "This metric approximates arithmetic floating-point (FP) vector uops fraction the CPU has retired aggregated across all vector widths. May overcount due to FMA double counting. Related metrics: tma_fp_scalar, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_int_vector_128b, tma_int_vector_256b, tma_port_0, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 128-bit wide vectors",
+ "MetricExpr": "(cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@) / (tma_retiring * tma_info_thread_slots)",
+ "MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
+ "MetricName": "tma_fp_vector_128b",
+ "MetricThreshold": "tma_fp_vector_128b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
+ "PublicDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 128-bit wide vectors. May overcount due to FMA double counting. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_256b, tma_fp_vector_512b, tma_int_vector_128b, tma_int_vector_256b, tma_port_0, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 256-bit wide vectors",
+ "MetricExpr": "(cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@) / (tma_retiring * tma_info_thread_slots)",
+ "MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
+ "MetricName": "tma_fp_vector_256b",
+ "MetricThreshold": "tma_fp_vector_256b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
+ "PublicDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 256-bit wide vectors. May overcount due to FMA double counting. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_512b, tma_int_vector_128b, tma_int_vector_256b, tma_port_0, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
+ "MetricExpr": "cpu_core@topdown\\-fe\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) - cpu_core@INT_MISC.UOP_DROPPING@ / tma_info_thread_slots",
+ "MetricGroup": "BvFB;BvIO;PGO;TmaL1;TopdownL1;tma_L1_group",
+ "MetricName": "tma_frontend_bound",
+ "MetricThreshold": "tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
+ "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. Sample with: FRONTEND_RETIRED.LATENCY_GE_4_PS",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of slots where the CPU was retiring fused instructions -- where one uop can represent multiple contiguous instructions",
+ "MetricExpr": "tma_light_operations * cpu_core@INST_RETIRED.MACRO_FUSED@ / (tma_retiring * tma_info_thread_slots)",
+ "MetricGroup": "Branches;BvBO;Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
+ "MetricName": "tma_fused_instructions",
+ "MetricThreshold": "tma_fused_instructions > 0.1 & tma_light_operations > 0.6",
+ "PublicDescription": "This metric represents fraction of slots where the CPU was retiring fused instructions -- where one uop can represent multiple contiguous instructions. CMP+JCC or DEC+JCC are common examples of legacy fusions. {([MTL] Note new MOV+OP and Load+OP fusions appear under Other_Light_Ops in MTL!)}",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences",
+ "MetricExpr": "cpu_core@topdown\\-heavy\\-ops@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
+ "MetricName": "tma_heavy_operations",
+ "MetricThreshold": "tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL2",
+ "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences. ([ICL+] Note this may overcount due to approximation using indirect events; [ADL+] .). Sample with: UOPS_RETIRED.HEAVY",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses",
+ "MetricExpr": "cpu_core@ICACHE_DATA.STALLS@ / tma_info_thread_clks",
+ "MetricGroup": "BigFootprint;BvBC;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+ "MetricName": "tma_icache_misses",
+ "MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses. Sample with: FRONTEND_RETIRED.L2_MISS_PS;FRONTEND_RETIRED.L1I_MISS_PS",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
+ "MetricExpr": "tma_info_bottleneck_mispredictions * tma_info_thread_slots / cpu_core@BR_MISP_RETIRED.ALL_BRANCHES@ / 100",
+ "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
+ "MetricName": "tma_info_bad_spec_branch_misprediction_cost",
+ "PublicDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_branch_mispredicts, tma_info_bottleneck_mispredictions, tma_mispredicts_resteers",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Instructions per retired mispredicts for conditional non-taken branches (lower number means higher occurrence rate).",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@BR_MISP_RETIRED.COND_NTAKEN@",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_cond_ntaken",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_cond_ntaken < 200",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Instructions per retired mispredicts for conditional taken branches (lower number means higher occurrence rate).",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@BR_MISP_RETIRED.COND_TAKEN@",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_cond_taken",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_cond_taken < 200",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@BR_MISP_RETIRED.INDIRECT@",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_indirect",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_indirect < 1e3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Instructions per retired mispredicts for return branches (lower number means higher occurrence rate).",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@BR_MISP_RETIRED.RET@",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_ret",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_ret < 500",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@BR_MISP_RETIRED.ALL_BRANCHES@",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmispredict",
+ "MetricThreshold": "tma_info_bad_spec_ipmispredict < 200",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Speculative to Retired ratio of all clears (covering mispredicts and nukes)",
+ "MetricExpr": "cpu_core@INT_MISC.CLEARS_COUNT@ / (cpu_core@BR_MISP_RETIRED.ALL_BRANCHES@ + cpu_core@MACHINE_CLEARS.COUNT@)",
+ "MetricGroup": "BrMispredicts",
+ "MetricName": "tma_info_bad_spec_spec_clears_ratio",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
+ "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0)",
+ "MetricGroup": "Cor;SMT",
+ "MetricName": "tma_info_botlnk_l0_core_bound_likely",
+ "MetricThreshold": "tma_info_botlnk_l0_core_bound_likely > 0.5",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of DSB (uop cache) hits - subset of the Instruction_Fetch_BW Bottleneck",
+ "MetricExpr": "100 * (tma_frontend_bound * (tma_fetch_bandwidth / (tma_fetch_bandwidth + tma_fetch_latency)) * (tma_dsb / (tma_dsb + tma_lsd + tma_mite)))",
+ "MetricGroup": "DSB;FetchBW;tma_issueFB",
+ "MetricName": "tma_info_botlnk_l2_dsb_bandwidth",
+ "MetricThreshold": "tma_info_botlnk_l2_dsb_bandwidth > 10",
+ "PublicDescription": "Total pipeline cost of DSB (uop cache) hits - subset of the Instruction_Fetch_BW Bottleneck. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck",
+ "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_lsd + tma_mite))",
+ "MetricGroup": "DSBmiss;Fed;tma_issueFB",
+ "MetricName": "tma_info_botlnk_l2_dsb_misses",
+ "MetricThreshold": "tma_info_botlnk_l2_dsb_misses > 10",
+ "PublicDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_bandwidth, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
+ "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
+ "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
+ "MetricName": "tma_info_botlnk_l2_ic_misses",
+ "MetricThreshold": "tma_info_botlnk_l2_ic_misses > 5",
+ "PublicDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck. Related metrics: ",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
+ "MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
+ "MetricGroup": "BigFootprint;BvBC;Fed;Frontend;IcMiss;MemoryTLB",
+ "MetricName": "tma_info_bottleneck_big_code",
+ "MetricThreshold": "tma_info_bottleneck_big_code > 20",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of instructions used for program control-flow - a subset of the Retiring category in TMA",
+ "MetricExpr": "100 * ((cpu_core@BR_INST_RETIRED.ALL_BRANCHES@ + 2 * cpu_core@BR_INST_RETIRED.NEAR_CALL@ + cpu_core@INST_RETIRED.NOP@) / tma_info_thread_slots)",
+ "MetricGroup": "BvBO;Ret",
+ "MetricName": "tma_info_bottleneck_branching_overhead",
+ "MetricThreshold": "tma_info_bottleneck_branching_overhead > 5",
+ "PublicDescription": "Total pipeline cost of instructions used for program control-flow - a subset of the Retiring category in TMA. Examples include function calls; loops and alignments. (A lower bound)",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_l1_hit_latency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
+ "MetricGroup": "BvMB;Mem;MemoryBW;Offcore;tma_issueBW",
+ "MetricName": "tma_info_bottleneck_cache_memory_bandwidth",
+ "MetricThreshold": "tma_info_bottleneck_cache_memory_bandwidth > 20",
+ "PublicDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_hit_latency / (tma_dtlb_load + tma_fb_full + tma_l1_hit_latency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
+ "MetricGroup": "BvML;Mem;MemoryLat;Offcore;tma_issueLat",
+ "MetricName": "tma_info_bottleneck_cache_memory_latency",
+ "MetricThreshold": "tma_info_bottleneck_cache_memory_latency > 20",
+ "PublicDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks. Related metrics: tma_l3_hit_latency, tma_mem_latency",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
+ "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
+ "MetricGroup": "BvCB;Cor;tma_issueComp",
+ "MetricName": "tma_info_bottleneck_compute_bound_est",
+ "MetricThreshold": "tma_info_bottleneck_compute_bound_est > 20",
+ "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: ",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks (when the front-end could not sustain operations delivery to the back-end)",
+ "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - (1 - cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))) - tma_info_bottleneck_big_code",
+ "MetricGroup": "BvFB;Fed;FetchBW;Frontend",
+ "MetricName": "tma_info_bottleneck_instruction_fetch_bw",
+ "MetricThreshold": "tma_info_bottleneck_instruction_fetch_bw > 20",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of irregular execution (e.g",
+ "MetricExpr": "100 * ((1 - cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + cpu_core@RS.EMPTY\\,umask\\=1@ / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+ "MetricGroup": "Bad;BvIO;Cor;Ret;tma_issueMS",
+ "MetricName": "tma_info_bottleneck_irregular_overhead",
+ "MetricThreshold": "tma_info_bottleneck_irregular_overhead > 10",
+ "PublicDescription": "Total pipeline cost of irregular execution (e.g. FP-assists in HPC, Wait time with work imbalance multithreaded workloads, overhead in system services or virtualized environments). Related metrics: tma_microcode_sequencer, tma_ms_switches",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / (tma_dtlb_load + tma_fb_full + tma_l1_hit_latency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+ "MetricGroup": "BvMT;Mem;MemoryTLB;Offcore;tma_issueTLB",
+ "MetricName": "tma_info_bottleneck_memory_data_tlbs",
+ "MetricThreshold": "tma_info_bottleneck_memory_data_tlbs > 20",
+ "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store, tma_info_bottleneck_memory_synchronization",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of Memory Synchronization related bottlenecks (data transfers and coherency updates across processors)",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_contested_accesses + tma_data_sharing) / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * tma_false_sharing / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores - tma_store_latency)) + tma_machine_clears * (1 - tma_other_nukes / tma_other_nukes))",
+ "MetricGroup": "BvMS;Mem;Offcore;tma_issueTLB",
+ "MetricName": "tma_info_bottleneck_memory_synchronization",
+ "MetricThreshold": "tma_info_bottleneck_memory_synchronization > 10",
+ "PublicDescription": "Total pipeline cost of Memory Synchronization related bottlenecks (data transfers and coherency updates across processors). Related metrics: tma_dtlb_load, tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
+ "MetricExpr": "100 * (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts;BvMP;tma_issueBM",
+ "MetricName": "tma_info_bottleneck_mispredictions",
+ "MetricThreshold": "tma_info_bottleneck_mispredictions > 20",
+ "PublicDescription": "Total pipeline cost of Branch Misprediction related bottlenecks. Related metrics: tma_branch_mispredicts, tma_info_bad_spec_branch_misprediction_cost, tma_mispredicts_resteers",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of remaining bottlenecks in the back-end",
+ "MetricExpr": "100 - (tma_info_bottleneck_big_code + tma_info_bottleneck_instruction_fetch_bw + tma_info_bottleneck_mispredictions + tma_info_bottleneck_cache_memory_bandwidth + tma_info_bottleneck_cache_memory_latency + tma_info_bottleneck_memory_data_tlbs + tma_info_bottleneck_memory_synchronization + tma_info_bottleneck_compute_bound_est + tma_info_bottleneck_irregular_overhead + tma_info_bottleneck_branching_overhead + tma_info_bottleneck_useful_work)",
+ "MetricGroup": "BvOB;Cor;Offcore",
+ "MetricName": "tma_info_bottleneck_other_bottlenecks",
+ "MetricThreshold": "tma_info_bottleneck_other_bottlenecks > 20",
+ "PublicDescription": "Total pipeline cost of remaining bottlenecks in the back-end. Examples include data-dependencies (Core Bound when Low ILP) and other unlisted memory-related stalls.",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of \"useful operations\" - the portion of Retiring category not covered by Branching_Overhead nor Irregular_Overhead.",
+ "MetricExpr": "100 * (tma_retiring - (cpu_core@BR_INST_RETIRED.ALL_BRANCHES@ + 2 * cpu_core@BR_INST_RETIRED.NEAR_CALL@ + cpu_core@INST_RETIRED.NOP@) / tma_info_thread_slots - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+ "MetricGroup": "BvUW;Ret",
+ "MetricName": "tma_info_bottleneck_useful_work",
+ "MetricThreshold": "tma_info_bottleneck_useful_work > 20",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Fraction of branches that are CALL or RET",
+ "MetricExpr": "(cpu_core@BR_INST_RETIRED.NEAR_CALL@ + cpu_core@BR_INST_RETIRED.NEAR_RETURN@) / cpu_core@BR_INST_RETIRED.ALL_BRANCHES@",
+ "MetricGroup": "Bad;Branches",
+ "MetricName": "tma_info_branches_callret",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Fraction of branches that are non-taken conditionals",
+ "MetricExpr": "cpu_core@BR_INST_RETIRED.COND_NTAKEN@ / cpu_core@BR_INST_RETIRED.ALL_BRANCHES@",
+ "MetricGroup": "Bad;Branches;CodeGen;PGO",
+ "MetricName": "tma_info_branches_cond_nt",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Fraction of branches that are taken conditionals",
+ "MetricExpr": "cpu_core@BR_INST_RETIRED.COND_TAKEN@ / cpu_core@BR_INST_RETIRED.ALL_BRANCHES@",
+ "MetricGroup": "Bad;Branches;CodeGen;PGO",
+ "MetricName": "tma_info_branches_cond_tk",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Fraction of branches that are unconditional (direct or indirect) jumps",
+ "MetricExpr": "(cpu_core@BR_INST_RETIRED.NEAR_TAKEN@ - cpu_core@BR_INST_RETIRED.COND_TAKEN@ - 2 * cpu_core@BR_INST_RETIRED.NEAR_CALL@) / cpu_core@BR_INST_RETIRED.ALL_BRANCHES@",
+ "MetricGroup": "Bad;Branches",
+ "MetricName": "tma_info_branches_jump",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Fraction of branches of other types (not individually covered by other metrics in Info.Branches group)",
+ "MetricExpr": "1 - (tma_info_branches_cond_nt + tma_info_branches_cond_tk + tma_info_branches_callret + tma_info_branches_jump)",
+ "MetricGroup": "Bad;Branches",
+ "MetricName": "tma_info_branches_other_branches",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
+ "MetricExpr": "(cpu_core@CPU_CLK_UNHALTED.DISTRIBUTED@ if #SMT_on else tma_info_thread_clks)",
+ "MetricGroup": "SMT",
+ "MetricName": "tma_info_core_core_clks",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / tma_info_core_core_clks",
+ "MetricGroup": "Ret;SMT;TmaL1;tma_L1_group",
+ "MetricName": "tma_info_core_coreipc",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "uops Executed per Cycle",
+ "MetricExpr": "cpu_core@UOPS_EXECUTED.THREAD@ / tma_info_thread_clks",
+ "MetricGroup": "Power",
+ "MetricName": "tma_info_core_epc",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Floating Point Operations Per Cycle",
+ "MetricExpr": "(cpu_core@FP_ARITH_INST_RETIRED.SCALAR@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * cpu_core@FP_ARITH_INST_RETIRED.4_FLOPS@ + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@) / tma_info_core_core_clks",
+ "MetricGroup": "Flops;Ret",
+ "MetricName": "tma_info_core_flopc",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
+ "MetricExpr": "(cpu_core@FP_ARITH_DISPATCHED.PORT_0@ + cpu_core@FP_ARITH_DISPATCHED.PORT_1@ + cpu_core@FP_ARITH_DISPATCHED.PORT_5@) / (2 * tma_info_core_core_clks)",
+ "MetricGroup": "Cor;Flops;HPC",
+ "MetricName": "tma_info_core_fp_arith_utilization",
+ "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common).",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per thread (logical-processor)",
+ "MetricExpr": "cpu_core@UOPS_EXECUTED.THREAD@ / cpu_core@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+ "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
+ "MetricName": "tma_info_core_ilp",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
+ "MetricExpr": "cpu_core@IDQ.DSB_UOPS@ / cpu_core@UOPS_ISSUED.ANY@",
+ "MetricGroup": "DSB;Fed;FetchBW;tma_issueFB",
+ "MetricName": "tma_info_frontend_dsb_coverage",
+ "MetricThreshold": "tma_info_frontend_dsb_coverage < 0.7 & tma_info_thread_ipc / 6 > 0.35",
+ "PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_inst_mix_iptb, tma_lcp",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Average number of cycles of a switch from the DSB fetch-unit to MITE fetch unit - see DSB_Switches tree node for details.",
+ "MetricExpr": "cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES@ / cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=1\\,edge@",
+ "MetricGroup": "DSBmiss",
+ "MetricName": "tma_info_frontend_dsb_switch_cost",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Average number of Uops issued by front-end when it issued something",
+ "MetricExpr": "cpu_core@UOPS_ISSUED.ANY@ / cpu_core@UOPS_ISSUED.ANY\\,cmask\\=1@",
+ "MetricGroup": "Fed;FetchBW",
+ "MetricName": "tma_info_frontend_fetch_upc",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Average Latency for L1 instruction cache misses",
+ "MetricExpr": "cpu_core@ICACHE_DATA.STALLS@ / cpu_core@ICACHE_DATA.STALLS\\,cmask\\=1\\,edge@",
+ "MetricGroup": "Fed;FetchLat;IcMiss",
+ "MetricName": "tma_info_frontend_icache_miss_latency",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Instructions per non-speculative DSB miss (lower number means higher occurrence rate)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@FRONTEND_RETIRED.ANY_DSB_MISS@",
+ "MetricGroup": "DSBmiss;Fed",
+ "MetricName": "tma_info_frontend_ipdsb_miss_ret",
+ "MetricThreshold": "tma_info_frontend_ipdsb_miss_ret < 50",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Instructions per speculative Unknown Branch Misprediction (BAClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "tma_info_inst_mix_instructions / cpu_core@BACLEARS.ANY@",
+ "MetricGroup": "Fed",
+ "MetricName": "tma_info_frontend_ipunknown_branch",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "L2 cache true code cacheline misses per kilo instruction",
+ "MetricExpr": "1e3 * cpu_core@FRONTEND_RETIRED.L2_MISS@ / cpu_core@INST_RETIRED.ANY@",
+ "MetricGroup": "IcMiss",
+ "MetricName": "tma_info_frontend_l2mpki_code",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "L2 cache speculative code cacheline misses per kilo instruction",
+ "MetricExpr": "1e3 * cpu_core@L2_RQSTS.CODE_RD_MISS@ / cpu_core@INST_RETIRED.ANY@",
+ "MetricGroup": "IcMiss",
+ "MetricName": "tma_info_frontend_l2mpki_code_all",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Fraction of Uops delivered by the LSD (Loop Stream Detector; aka Loop Cache)",
+ "MetricExpr": "cpu_core@LSD.UOPS@ / cpu_core@UOPS_ISSUED.ANY@",
+ "MetricGroup": "Fed;LSD",
+ "MetricName": "tma_info_frontend_lsd_coverage",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Average number of cycles the front-end was delayed due to an Unknown Branch detection",
+ "MetricExpr": "cpu_core@INT_MISC.UNKNOWN_BRANCH_CYCLES@ / cpu_core@INT_MISC.UNKNOWN_BRANCH_CYCLES\\,cmask\\=1\\,edge@",
+ "MetricGroup": "Fed",
+ "MetricName": "tma_info_frontend_unknown_branch_cost",
+ "PublicDescription": "Average number of cycles the front-end was delayed due to an Unknown Branch detection. See Unknown_Branches node.",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Branch instructions per taken branch.",
+ "MetricExpr": "cpu_core@BR_INST_RETIRED.ALL_BRANCHES@ / cpu_core@BR_INST_RETIRED.NEAR_TAKEN@",
+ "MetricGroup": "Branches;Fed;PGO",
+ "MetricName": "tma_info_inst_mix_bptkbranch",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Total number of retired Instructions",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@",
+ "MetricGroup": "Summary;TmaL1;tma_L1_group",
+ "MetricName": "tma_info_inst_mix_instructions",
+ "PublicDescription": "Total number of retired Instructions. Sample with: INST_RETIRED.PREC_DIST",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR@ + cpu_core@FP_ARITH_INST_RETIRED.VECTOR@)",
+ "MetricGroup": "Flops;InsType",
+ "MetricName": "tma_info_inst_mix_iparith",
+ "MetricThreshold": "tma_info_inst_mix_iparith < 10",
+ "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting. Approximated prior to BDW.",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@)",
+ "MetricGroup": "Flops;FpVector;InsType",
+ "MetricName": "tma_info_inst_mix_iparith_avx128",
+ "MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10",
+ "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting.",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)",
+ "MetricGroup": "Flops;FpVector;InsType",
+ "MetricName": "tma_info_inst_mix_iparith_avx256",
+ "MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10",
+ "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting.",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@FP_ARITH_INST_RETIRED.SCALAR_DOUBLE@",
+ "MetricGroup": "Flops;FpScalar;InsType",
+ "MetricName": "tma_info_inst_mix_iparith_scalar_dp",
+ "MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10",
+ "PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting.",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE@",
+ "MetricGroup": "Flops;FpScalar;InsType",
+ "MetricName": "tma_info_inst_mix_iparith_scalar_sp",
+ "MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10",
+ "PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting.",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@BR_INST_RETIRED.ALL_BRANCHES@",
+ "MetricGroup": "Branches;Fed;InsType",
+ "MetricName": "tma_info_inst_mix_ipbranch",
+ "MetricThreshold": "tma_info_inst_mix_ipbranch < 8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@BR_INST_RETIRED.NEAR_CALL@",
+ "MetricGroup": "Branches;Fed;PGO",
+ "MetricName": "tma_info_inst_mix_ipcall",
+ "MetricThreshold": "tma_info_inst_mix_ipcall < 200",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * cpu_core@FP_ARITH_INST_RETIRED.4_FLOPS@ + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)",
+ "MetricGroup": "Flops;InsType",
+ "MetricName": "tma_info_inst_mix_ipflop",
+ "MetricThreshold": "tma_info_inst_mix_ipflop < 10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Instructions per Load (lower number means higher occurrence rate)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@MEM_INST_RETIRED.ALL_LOADS@",
+ "MetricGroup": "InsType",
+ "MetricName": "tma_info_inst_mix_ipload",
+ "MetricThreshold": "tma_info_inst_mix_ipload < 3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Instructions per PAUSE (lower number means higher occurrence rate)",
+ "MetricExpr": "tma_info_inst_mix_instructions / cpu_core@CPU_CLK_UNHALTED.PAUSE_INST@",
+ "MetricGroup": "Flops;FpVector;InsType",
+ "MetricName": "tma_info_inst_mix_ippause",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@MEM_INST_RETIRED.ALL_STORES@",
+ "MetricGroup": "InsType",
+ "MetricName": "tma_info_inst_mix_ipstore",
+ "MetricThreshold": "tma_info_inst_mix_ipstore < 8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Instructions per Software prefetch instruction (of any type: NTA/T0/T1/T2/Prefetch) (lower number means higher occurrence rate)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@SW_PREFETCH_ACCESS.T0\\,umask\\=0xF@",
+ "MetricGroup": "Prefetches",
+ "MetricName": "tma_info_inst_mix_ipswpf",
+ "MetricThreshold": "tma_info_inst_mix_ipswpf < 100",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Instructions per taken branch",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@BR_INST_RETIRED.NEAR_TAKEN@",
+ "MetricGroup": "Branches;Fed;FetchBW;Frontend;PGO;tma_issueFB",
+ "MetricName": "tma_info_inst_mix_iptb",
+ "MetricThreshold": "tma_info_inst_mix_iptb < 13",
+ "PublicDescription": "Instructions per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_lcp",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_l1d_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_core_l1d_cache_fill_bw_2t",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_l2_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_core_l2_cache_fill_bw_2t",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_l3_cache_access_bw",
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "tma_info_memory_core_l3_cache_access_bw_2t",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_l3_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_core_l3_cache_fill_bw_2t",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Fill Buffer (FB) hits per kilo instructions for retired demand loads (L1D misses that merge into ongoing miss-handling entries)",
+ "MetricExpr": "1e3 * cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@INST_RETIRED.ANY@",
+ "MetricGroup": "CacheHits;Mem",
+ "MetricName": "tma_info_memory_fb_hpki",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
+ "MetricExpr": "64 * cpu_core@L1D.REPLACEMENT@ / 1e9 / duration_time",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_l1d_cache_fill_bw",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1e3 * cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / cpu_core@INST_RETIRED.ANY@",
+ "MetricGroup": "CacheHits;Mem",
+ "MetricName": "tma_info_memory_l1mpki",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "L1 cache true misses per kilo instruction for all demand loads (including speculative)",
+ "MetricExpr": "1e3 * cpu_core@L2_RQSTS.ALL_DEMAND_DATA_RD@ / cpu_core@INST_RETIRED.ANY@",
+ "MetricGroup": "CacheHits;Mem",
+ "MetricName": "tma_info_memory_l1mpki_load",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
+ "MetricExpr": "64 * cpu_core@L2_LINES_IN.ALL@ / 1e9 / duration_time",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_l2_cache_fill_bw",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
+ "MetricExpr": "1e3 * (cpu_core@L2_RQSTS.REFERENCES@ - cpu_core@L2_RQSTS.MISS@) / cpu_core@INST_RETIRED.ANY@",
+ "MetricGroup": "CacheHits;Mem",
+ "MetricName": "tma_info_memory_l2hpki_all",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "L2 cache hits per kilo instruction for all demand loads (including speculative)",
+ "MetricExpr": "1e3 * cpu_core@L2_RQSTS.DEMAND_DATA_RD_HIT@ / cpu_core@INST_RETIRED.ANY@",
+ "MetricGroup": "CacheHits;Mem",
+ "MetricName": "tma_info_memory_l2hpki_load",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1e3 * cpu_core@MEM_LOAD_RETIRED.L2_MISS@ / cpu_core@INST_RETIRED.ANY@",
+ "MetricGroup": "Backend;CacheHits;Mem",
+ "MetricName": "tma_info_memory_l2mpki",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all request types (including speculative)",
+ "MetricExpr": "1e3 * cpu_core@L2_RQSTS.MISS@ / cpu_core@INST_RETIRED.ANY@",
+ "MetricGroup": "CacheHits;Mem;Offcore",
+ "MetricName": "tma_info_memory_l2mpki_all",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all demand loads (including speculative)",
+ "MetricExpr": "1e3 * cpu_core@L2_RQSTS.DEMAND_DATA_RD_MISS@ / cpu_core@INST_RETIRED.ANY@",
+ "MetricGroup": "CacheHits;Mem",
+ "MetricName": "tma_info_memory_l2mpki_load",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Offcore requests (L2 cache miss) per kilo instruction for demand RFOs",
+ "MetricExpr": "1e3 * cpu_core@L2_RQSTS.RFO_MISS@ / cpu_core@INST_RETIRED.ANY@",
+ "MetricGroup": "CacheMisses;Offcore",
+ "MetricName": "tma_info_memory_l2mpki_rfo",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "64 * cpu_core@OFFCORE_REQUESTS.ALL_REQUESTS@ / 1e9 / duration_time",
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "tma_info_memory_l3_cache_access_bw",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "64 * cpu_core@LONGEST_LAT_CACHE.MISS@ / 1e9 / duration_time",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_l3_cache_fill_bw",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1e3 * cpu_core@MEM_LOAD_RETIRED.L3_MISS@ / cpu_core@INST_RETIRED.ANY@",
+ "MetricGroup": "Mem",
+ "MetricName": "tma_info_memory_l3mpki",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Average Parallel L2 cache miss data reads",
+ "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DATA_RD@ / cpu_core@OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD@",
+ "MetricGroup": "Memory_BW;Offcore",
+ "MetricName": "tma_info_memory_latency_data_l2_mlp",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Average Latency for L2 cache miss demand Loads",
+ "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD@ / cpu_core@OFFCORE_REQUESTS.DEMAND_DATA_RD@",
+ "MetricGroup": "Memory_Lat;Offcore",
+ "MetricName": "tma_info_memory_latency_load_l2_miss_latency",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Average Parallel L2 cache miss demand Loads",
+ "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD@ / cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=1@",
+ "MetricGroup": "Memory_BW;Offcore",
+ "MetricName": "tma_info_memory_latency_load_l2_mlp",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Average Latency for L3 cache miss demand Loads",
+ "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD@ / cpu_core@OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD@",
+ "MetricGroup": "Memory_Lat;Offcore",
+ "MetricName": "tma_info_memory_latency_load_l3_miss_latency",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
+ "MetricExpr": "cpu_core@L1D_PEND_MISS.PENDING@ / cpu_core@MEM_LOAD_COMPLETED.L1_MISS_ANY@",
+ "MetricGroup": "Mem;MemoryBound;MemoryLat",
+ "MetricName": "tma_info_memory_load_miss_real_latency",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "\"Bus lock\" per kilo instruction",
+ "MetricExpr": "1e3 * cpu_core@SQ_MISC.BUS_LOCK@ / cpu_core@INST_RETIRED.ANY@",
+ "MetricGroup": "Mem",
+ "MetricName": "tma_info_memory_mix_bus_lock_pki",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Un-cacheable retired load per kilo instruction",
+ "MetricExpr": "1e3 * cpu_core@MEM_LOAD_MISC_RETIRED.UC@ / cpu_core@INST_RETIRED.ANY@",
+ "MetricGroup": "Mem",
+ "MetricName": "tma_info_memory_mix_uc_load_pki",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
+ "MetricExpr": "cpu_core@L1D_PEND_MISS.PENDING@ / cpu_core@L1D_PEND_MISS.PENDING_CYCLES@",
+ "MetricGroup": "Mem;MemoryBW;MemoryBound",
+ "MetricName": "tma_info_memory_mlp",
+ "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
+ "MetricExpr": "1e3 * cpu_core@ITLB_MISSES.WALK_COMPLETED@ / cpu_core@INST_RETIRED.ANY@",
+ "MetricGroup": "Fed;MemoryTLB",
+ "MetricName": "tma_info_memory_tlb_code_stlb_mpki",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "STLB (2nd level TLB) data load speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
+ "MetricExpr": "1e3 * cpu_core@DTLB_LOAD_MISSES.WALK_COMPLETED@ / cpu_core@INST_RETIRED.ANY@",
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_tlb_load_stlb_mpki",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricExpr": "(cpu_core@ITLB_MISSES.WALK_PENDING@ + cpu_core@DTLB_LOAD_MISSES.WALK_PENDING@ + cpu_core@DTLB_STORE_MISSES.WALK_PENDING@) / (4 * tma_info_core_core_clks)",
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_tlb_page_walks_utilization",
+ "MetricThreshold": "tma_info_memory_tlb_page_walks_utilization > 0.5",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "STLB (2nd level TLB) data store speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
+ "MetricExpr": "1e3 * cpu_core@DTLB_STORE_MISSES.WALK_COMPLETED@ / cpu_core@INST_RETIRED.ANY@",
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_tlb_store_stlb_mpki",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per core",
+ "MetricExpr": "cpu_core@UOPS_EXECUTED.THREAD@ / (cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@ / 2 if #SMT_on else cpu_core@UOPS_EXECUTED.THREAD\\,cmask\\=1@)",
+ "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
+ "MetricName": "tma_info_pipeline_execute",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Average number of uops fetched from DSB per cycle",
+ "MetricExpr": "cpu_core@IDQ.DSB_UOPS@ / cpu_core@IDQ.DSB_CYCLES_ANY@",
+ "MetricGroup": "Fed;FetchBW",
+ "MetricName": "tma_info_pipeline_fetch_dsb",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Average number of uops fetched from LSD per cycle",
+ "MetricExpr": "cpu_core@LSD.UOPS@ / cpu_core@LSD.CYCLES_ACTIVE@",
+ "MetricGroup": "Fed;FetchBW",
+ "MetricName": "tma_info_pipeline_fetch_lsd",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Average number of uops fetched from MITE per cycle",
+ "MetricExpr": "cpu_core@IDQ.MITE_UOPS@ / cpu_core@IDQ.MITE_CYCLES_ANY@",
+ "MetricGroup": "Fed;FetchBW",
+ "MetricName": "tma_info_pipeline_fetch_mite",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Instructions per a microcode Assist invocation",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@ASSISTS.ANY@",
+ "MetricGroup": "MicroSeq;Pipeline;Ret;Retire",
+ "MetricName": "tma_info_pipeline_ipassist",
+ "MetricThreshold": "tma_info_pipeline_ipassist < 100e3",
+ "PublicDescription": "Instructions per a microcode Assist invocation. See Assists tree node for details (lower number means higher occurrence rate)",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
+ "MetricExpr": "tma_retiring * tma_info_thread_slots / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
+ "MetricGroup": "Pipeline;Ret",
+ "MetricName": "tma_info_pipeline_retire",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Estimated fraction of retirement-cycles dealing with repeat instructions",
+ "MetricExpr": "cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
+ "MetricGroup": "MicroSeq;Pipeline;Ret",
+ "MetricName": "tma_info_pipeline_strings_cycles",
+ "MetricThreshold": "tma_info_pipeline_strings_cycles > 0.1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Fraction of cycles the processor is waiting yet unhalted; covering legacy PAUSE instruction, as well as C0.1 / C0.2 power-performance optimized states",
+ "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.C0_WAIT@ / tma_info_thread_clks",
+ "MetricGroup": "C0Wait",
+ "MetricName": "tma_info_system_c0_wait",
+ "MetricThreshold": "tma_info_system_c0_wait > 0.05",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
+ "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
+ "MetricGroup": "Power;Summary",
+ "MetricName": "tma_info_system_core_frequency",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization (percentage)",
+ "MetricExpr": "tma_info_system_cpus_utilized / #num_cpus_online",
+ "MetricGroup": "HPC;Summary",
+ "MetricName": "tma_info_system_cpu_utilization",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Average number of utilized CPUs",
+ "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.REF_TSC@ / TSC",
+ "MetricGroup": "Summary",
+ "MetricName": "tma_info_system_cpus_utilized",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+ "MetricExpr": "64 * (UNC_HAC_ARB_TRK_REQUESTS.ALL + UNC_HAC_ARB_COH_TRK_REQUESTS.ALL) / 1e9 / duration_time",
+ "MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
+ "MetricName": "tma_info_system_dram_bw_use",
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_bottleneck_cache_memory_bandwidth, tma_mem_bandwidth, tma_sq_full",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "(cpu_core@FP_ARITH_INST_RETIRED.SCALAR@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * cpu_core@FP_ARITH_INST_RETIRED.4_FLOPS@ + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@) / 1e9 / duration_time",
+ "MetricGroup": "Cor;Flops;HPC",
+ "MetricName": "tma_info_system_gflops",
+ "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@BR_INST_RETIRED.FAR_BRANCH@u",
+ "MetricGroup": "Branches;OS",
+ "MetricName": "tma_info_system_ipfarbranch",
+ "MetricThreshold": "tma_info_system_ipfarbranch < 1e6",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
+ "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.THREAD_P@k / cpu_core@INST_RETIRED.ANY_P@k",
+ "MetricGroup": "OS",
+ "MetricName": "tma_info_system_kernel_cpi",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
+ "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.THREAD_P@k / cpu_core@CPU_CLK_UNHALTED.THREAD@",
+ "MetricGroup": "OS",
+ "MetricName": "tma_info_system_kernel_utilization",
+ "MetricThreshold": "tma_info_system_kernel_utilization > 0.05",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Average number of parallel data read requests to external memory",
+ "MetricExpr": "UNC_ARB_DAT_OCCUPANCY.RD / UNC_ARB_DAT_OCCUPANCY.RD@cmask\\=1@",
+ "MetricGroup": "Mem;MemoryBW;SoC",
+ "MetricName": "tma_info_system_mem_parallel_reads",
+ "PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
+ "MetricExpr": "(1 - cpu_core@CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE@ / cpu_core@CPU_CLK_UNHALTED.REF_DISTRIBUTED@ if #SMT_on else 0)",
+ "MetricGroup": "SMT",
+ "MetricName": "tma_info_system_smt_2t_utilization",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Socket actual clocks when any core is active on that socket",
+ "MetricExpr": "UNC_CLOCK.SOCKET",
+ "MetricGroup": "SoC",
+ "MetricName": "tma_info_system_socket_clks",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+ "MetricExpr": "tma_info_thread_clks / cpu_core@CPU_CLK_UNHALTED.REF_TSC@",
+ "MetricGroup": "Power",
+ "MetricName": "tma_info_system_turbo_utilization",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
+ "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.THREAD@",
+ "MetricGroup": "Pipeline",
+ "MetricName": "tma_info_thread_clks",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
+ "MetricExpr": "1 / tma_info_thread_ipc",
+ "MetricGroup": "Mem;Pipeline",
+ "MetricName": "tma_info_thread_cpi",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "The ratio of Executed- by Issued-Uops",
+ "MetricExpr": "cpu_core@UOPS_EXECUTED.THREAD@ / cpu_core@UOPS_ISSUED.ANY@",
+ "MetricGroup": "Cor;Pipeline",
+ "MetricName": "tma_info_thread_execute_per_issue",
+ "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage.",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / tma_info_thread_clks",
+ "MetricGroup": "Ret;Summary",
+ "MetricName": "tma_info_thread_ipc",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
+ "MetricExpr": "cpu_core@TOPDOWN.SLOTS@",
+ "MetricGroup": "TmaL1;tma_L1_group",
+ "MetricName": "tma_info_thread_slots",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Fraction of Physical Core issue-slots utilized by this Logical Processor",
+ "MetricExpr": "(tma_info_thread_slots / (cpu_core@TOPDOWN.SLOTS@ / 2) if #SMT_on else 1)",
+ "MetricGroup": "SMT;TmaL1;tma_L1_group",
+ "MetricName": "tma_info_thread_slots_utilization",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Uops Per Instruction",
+ "MetricExpr": "tma_retiring * tma_info_thread_slots / cpu_core@INST_RETIRED.ANY@",
+ "MetricGroup": "Pipeline;Ret;Retire",
+ "MetricName": "tma_info_thread_uoppi",
+ "MetricThreshold": "tma_info_thread_uoppi > 1.05",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Uops per taken branch",
+ "MetricExpr": "tma_retiring * tma_info_thread_slots / cpu_core@BR_INST_RETIRED.NEAR_TAKEN@",
+ "MetricGroup": "Branches;Fed;FetchBW",
+ "MetricName": "tma_info_thread_uptb",
+ "MetricThreshold": "tma_info_thread_uptb < 9",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents overall Integer (Int) select operations fraction the CPU has executed (retired)",
+ "MetricExpr": "tma_int_vector_128b + tma_int_vector_256b",
+ "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
+ "MetricName": "tma_int_operations",
+ "MetricThreshold": "tma_int_operations > 0.1 & tma_light_operations > 0.6",
+ "PublicDescription": "This metric represents overall Integer (Int) select operations fraction the CPU has executed (retired). Vector/Matrix Int operations and shuffles are counted. Note this metric's value may exceed its parent due to use of \"Uops\" CountDomain.",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents 128-bit vector Integer ADD/SUB/SAD or VNNI (Vector Neural Network Instructions) uops fraction the CPU has retired",
+ "MetricExpr": "(cpu_core@INT_VEC_RETIRED.ADD_128@ + cpu_core@INT_VEC_RETIRED.VNNI_128@) / (tma_retiring * tma_info_thread_slots)",
+ "MetricGroup": "Compute;IntVector;Pipeline;TopdownL4;tma_L4_group;tma_int_operations_group;tma_issue2P",
+ "MetricName": "tma_int_vector_128b",
+ "MetricThreshold": "tma_int_vector_128b > 0.1 & (tma_int_operations > 0.1 & tma_light_operations > 0.6)",
+ "PublicDescription": "This metric represents 128-bit vector Integer ADD/SUB/SAD or VNNI (Vector Neural Network Instructions) uops fraction the CPU has retired. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_int_vector_256b, tma_port_0, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents 256-bit vector Integer ADD/SUB/SAD/MUL or VNNI (Vector Neural Network Instructions) uops fraction the CPU has retired",
+ "MetricExpr": "(cpu_core@INT_VEC_RETIRED.ADD_256@ + cpu_core@INT_VEC_RETIRED.MUL_256@ + cpu_core@INT_VEC_RETIRED.VNNI_256@) / (tma_retiring * tma_info_thread_slots)",
+ "MetricGroup": "Compute;IntVector;Pipeline;TopdownL4;tma_L4_group;tma_int_operations_group;tma_issue2P",
+ "MetricName": "tma_int_vector_256b",
+ "MetricThreshold": "tma_int_vector_256b > 0.1 & (tma_int_operations > 0.1 & tma_light_operations > 0.6)",
+ "PublicDescription": "This metric represents 256-bit vector Integer ADD/SUB/SAD/MUL or VNNI (Vector Neural Network Instructions) uops fraction the CPU has retired. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_int_vector_128b, tma_port_0, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
+ "MetricExpr": "cpu_core@ICACHE_TAG.STALLS@ / tma_info_thread_clks",
+ "MetricGroup": "BigFootprint;BvBC;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+ "MetricName": "tma_itlb_misses",
+ "MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses. Sample with: FRONTEND_RETIRED.STLB_MISS_PS;FRONTEND_RETIRED.ITLB_MISS_PS",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache",
+ "MetricExpr": "max((cpu_core@EXE_ACTIVITY.BOUND_ON_LOADS@ - cpu_core@MEMORY_ACTIVITY.STALLS_L1D_MISS@) / tma_info_thread_clks, 0)",
+ "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
+ "MetricName": "tma_l1_bound",
+ "MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
+ "PublicDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache. The L1 data cache typically has the shortest latency. However; in certain cases like loads blocked on older stores; a load might suffer due to high latency even though it is being satisfied by the L1. Another example is loads who miss in the TLB. These cases are characterized by execution unit stalls; while some non-completed demand load lives in the machine without having that demand load missing the L1 cache. Sample with: MEM_LOAD_RETIRED.L1_HIT_PS;MEM_LOAD_RETIRED.FB_HIT_PS. Related metrics: tma_clears_resteers, tma_machine_clears, tma_microcode_sequencer, tma_ms_switches, tma_ports_utilized_1",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric roughly estimates fraction of cycles with demand load accesses that hit the L1 cache",
+ "MetricExpr": "min(2 * (cpu_core@MEM_INST_RETIRED.ALL_LOADS@ - cpu_core@MEM_LOAD_RETIRED.FB_HIT@ - cpu_core@MEM_LOAD_RETIRED.L1_MISS@) * 20 / 100, max(cpu_core@CYCLE_ACTIVITY.CYCLES_MEM_ANY@ - cpu_core@MEMORY_ACTIVITY.CYCLES_L1D_MISS@, 0)) / tma_info_thread_clks",
+ "MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_l1_bound_group",
+ "MetricName": "tma_l1_hit_latency",
+ "MetricThreshold": "tma_l1_hit_latency > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric roughly estimates fraction of cycles with demand load accesses that hit the L1 cache. The short latency of the L1 data cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
+ "MetricExpr": "(cpu_core@MEMORY_ACTIVITY.STALLS_L1D_MISS@ - cpu_core@MEMORY_ACTIVITY.STALLS_L2_MISS@) / tma_info_thread_clks",
+ "MetricGroup": "BvML;CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+ "MetricName": "tma_l2_bound",
+ "MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
+ "PublicDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads. Avoiding cache misses (i.e. L1 misses/L2 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_RETIRED.L2_HIT_PS",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
+ "MetricExpr": "(cpu_core@MEMORY_ACTIVITY.STALLS_L2_MISS@ - cpu_core@MEMORY_ACTIVITY.STALLS_L3_MISS@) / tma_info_thread_clks",
+ "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+ "MetricName": "tma_l3_bound",
+ "MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
+ "PublicDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core. Avoiding cache misses (i.e. L2 misses/L3 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
+ "MetricExpr": "cpu_core@MEM_LOAD_RETIRED.L3_HIT@ * min(cpu_core@MEM_LOAD_RETIRED.L3_HIT@R, 9 * tma_info_system_core_frequency) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
+ "MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
+ "MetricName": "tma_l3_hit_latency",
+ "MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_bottleneck_cache_memory_latency, tma_mem_latency",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs)",
+ "MetricExpr": "cpu_core@DECODE.LCP@ / tma_info_thread_clks",
+ "MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
+ "MetricName": "tma_lcp",
+ "MetricThreshold": "tma_lcp > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
+ "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation)",
+ "MetricExpr": "max(0, tma_retiring - tma_heavy_operations)",
+ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
+ "MetricName": "tma_light_operations",
+ "MetricThreshold": "tma_light_operations > 0.6",
+ "MetricgroupNoGroup": "TopdownL2",
+ "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized code running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. ([ICL+] Note this may undercount due to approximation using indirect events; [ADL+] .). Sample with: INST_RETIRED.PREC_DIST",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
+ "MetricExpr": "cpu_core@UOPS_DISPATCHED.PORT_2_3_10@ / (3 * tma_info_core_core_clks)",
+ "MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
+ "MetricName": "tma_load_op_utilization",
+ "MetricThreshold": "tma_load_op_utilization > 0.6",
+ "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations. Sample with: UOPS_DISPATCHED.PORT_2_3_10",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric roughly estimates the fraction of cycles where the (first level) DTLB was missed by load accesses, that later on hit in second-level TLB (STLB)",
+ "MetricExpr": "max(0, tma_dtlb_load - tma_load_stlb_miss)",
+ "MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_load_group",
+ "MetricName": "tma_load_stlb_hit",
+ "MetricThreshold": "tma_load_stlb_hit > 0.05 & (tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric estimates the fraction of cycles where the Second-level TLB (STLB) was missed by load accesses, performing a hardware page walk",
+ "MetricExpr": "cpu_core@DTLB_LOAD_MISSES.WALK_ACTIVE@ / tma_info_thread_clks",
+ "MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_load_group",
+ "MetricName": "tma_load_stlb_miss",
+ "MetricThreshold": "tma_load_stlb_miss > 0.05 & (tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
+ "MetricExpr": "cpu_core@MEM_INST_RETIRED.LOCK_LOADS@ * cpu_core@MEM_INST_RETIRED.LOCK_LOADS@R / tma_info_thread_clks",
+ "MetricGroup": "Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
+ "MetricName": "tma_lock_latency",
+ "MetricThreshold": "tma_lock_latency > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations. Due to the microarchitecture handling of locks; they are classified as L1_Bound regardless of what memory source satisfied them. Sample with: MEM_INST_RETIRED.LOCK_LOADS. Related metrics: tma_store_latency",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to LSD (Loop Stream Detector) unit",
+ "MetricExpr": "(cpu_core@LSD.CYCLES_ACTIVE@ - cpu_core@LSD.CYCLES_OK@) / tma_info_core_core_clks / 2",
+ "MetricGroup": "FetchBW;LSD;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
+ "MetricName": "tma_lsd",
+ "MetricThreshold": "tma_lsd > 0.15 & tma_fetch_bandwidth > 0.2",
+ "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to LSD (Loop Stream Detector) unit. LSD typically does well sustaining Uop supply. However; in some rare cases; optimal uop-delivery could not be reached for small loops whose size (in terms of number of uops) does not suit well the LSD structure.",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears",
+ "MetricExpr": "max(0, tma_bad_speculation - tma_branch_mispredicts)",
+ "MetricGroup": "BadSpec;BvMS;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn",
+ "MetricName": "tma_machine_clears",
+ "MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
+ "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_contested_accesses, tma_data_sharing, tma_false_sharing, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM)",
+ "MetricExpr": "min(cpu_core@CPU_CLK_UNHALTED.THREAD@, cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DATA_RD\\,cmask\\=4@) / tma_info_thread_clks",
+ "MetricGroup": "BvMS;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
+ "MetricName": "tma_mem_bandwidth",
+ "MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_sq_full",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM)",
+ "MetricExpr": "min(cpu_core@CPU_CLK_UNHALTED.THREAD@, cpu_core@OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD@) / tma_info_thread_clks - tma_mem_bandwidth",
+ "MetricGroup": "BvML;MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
+ "MetricName": "tma_mem_latency",
+ "MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_bottleneck_cache_memory_latency, tma_l3_hit_latency",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck",
+ "MetricExpr": "cpu_core@topdown\\-mem\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
+ "MetricName": "tma_memory_bound",
+ "MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
+ "PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to LFENCE Instructions.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
+ "MetricExpr": "13 * cpu_core@MISC2_RETIRED.LFENCE@ / tma_info_thread_clks",
+ "MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
+ "MetricName": "tma_memory_fence",
+ "MetricThreshold": "tma_memory_fence > 0.05 & (tma_serializing_operation > 0.1 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of slots where the CPU was retiring memory operations -- uops for memory load or store accesses.",
+ "MetricExpr": "tma_light_operations * cpu_core@MEM_UOP_RETIRED.ANY@ / (tma_retiring * tma_info_thread_slots)",
+ "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
+ "MetricName": "tma_memory_operations",
+ "MetricThreshold": "tma_memory_operations > 0.1 & tma_light_operations > 0.6",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit",
+ "MetricExpr": "cpu_core@UOPS_RETIRED.MS@ / tma_info_thread_slots",
+ "MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
+ "MetricName": "tma_microcode_sequencer",
+ "MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1",
+ "PublicDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit. The MS is used for CISC instructions not supported by the default decoders (like repeat move strings; or CPUID); or by microcode assists used to address some operation modes (like in Floating Point assists). These cases can often be avoided. Sample with: UOPS_RETIRED.MS. Related metrics: tma_clears_resteers, tma_info_bottleneck_irregular_overhead, tma_l1_bound, tma_machine_clears, tma_ms_switches",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage",
+ "MetricExpr": "tma_branch_mispredicts / tma_bad_speculation * cpu_core@INT_MISC.CLEAR_RESTEER_CYCLES@ / tma_info_thread_clks",
+ "MetricGroup": "BadSpec;BrMispredicts;BvMP;TopdownL4;tma_L4_group;tma_branch_resteers_group;tma_issueBM",
+ "MetricName": "tma_mispredicts_resteers",
+ "MetricThreshold": "tma_mispredicts_resteers > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage. Sample with: INT_MISC.CLEAR_RESTEER_CYCLES. Related metrics: tma_branch_mispredicts, tma_info_bad_spec_branch_misprediction_cost, tma_info_bottleneck_mispredictions",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline)",
+ "MetricExpr": "(cpu_core@IDQ.MITE_CYCLES_ANY@ - cpu_core@IDQ.MITE_CYCLES_OK@) / tma_info_core_core_clks / 2",
+ "MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
+ "MetricName": "tma_mite",
+ "MetricThreshold": "tma_mite > 0.1 & tma_fetch_bandwidth > 0.2",
+ "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck. Sample with: FRONTEND_RETIRED.ANY_DSB_MISS",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric estimates penalty in terms of percentage of([SKL+] injected blend uops out of all Uops Issued -- the Count Domain; [ADL+] cycles)",
+ "MetricExpr": "160 * cpu_core@ASSISTS.SSE_AVX_MIX@ / tma_info_thread_clks",
+ "MetricGroup": "TopdownL5;tma_L5_group;tma_issueMV;tma_ports_utilized_0_group",
+ "MetricName": "tma_mixing_vectors",
+ "MetricThreshold": "tma_mixing_vectors > 0.05",
+ "PublicDescription": "This metric estimates penalty in terms of percentage of([SKL+] injected blend uops out of all Uops Issued -- the Count Domain; [ADL+] cycles). Usually a Mixing_Vectors over 5% is worth investigating. Read more in Appendix B1 of the Optimizations Guide for this topic. Related metrics: tma_ms_switches",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS)",
+ "MetricExpr": "3 * cpu_core@UOPS_RETIRED.MS\\,cmask\\=1\\,edge@ / (cpu_core@UOPS_RETIRED.SLOTS@ / cpu_core@UOPS_ISSUED.ANY@) / tma_info_thread_clks",
+ "MetricGroup": "FetchLat;MicroSeq;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueMC;tma_issueMS;tma_issueMV;tma_issueSO",
+ "MetricName": "tma_ms_switches",
+ "MetricThreshold": "tma_ms_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
+ "PublicDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS). Commonly used instructions are optimized for delivery by the DSB (decoded i-cache) or MITE (legacy instruction decode) pipelines. Certain operations cannot be handled natively by the execution pipeline; and must be performed by microcode (small programs injected into the execution stream). Switching to the MS too often can negatively impact performance. The MS is designated to deliver long uop flows required by CISC instructions like CPUID; or uncommon conditions like Floating Point Assists when dealing with Denormals. Sample with: FRONTEND_RETIRED.MS_FLOWS. Related metrics: tma_clears_resteers, tma_info_bottleneck_irregular_overhead, tma_l1_bound, tma_machine_clears, tma_microcode_sequencer, tma_mixing_vectors, tma_serializing_operation",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of slots where the CPU was retiring branch instructions that were not fused",
+ "MetricExpr": "tma_light_operations * (cpu_core@BR_INST_RETIRED.ALL_BRANCHES@ - cpu_core@INST_RETIRED.MACRO_FUSED@) / (tma_retiring * tma_info_thread_slots)",
+ "MetricGroup": "Branches;BvBO;Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
+ "MetricName": "tma_non_fused_branches",
+ "MetricThreshold": "tma_non_fused_branches > 0.1 & tma_light_operations > 0.6",
+ "PublicDescription": "This metric represents fraction of slots where the CPU was retiring branch instructions that were not fused. Non-conditional branches like direct JMP or CALL would count here. Can be used to examine fusible conditional jumps that were not fused.",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of slots where the CPU was retiring NOP (no op) instructions",
+ "MetricExpr": "tma_light_operations * cpu_core@INST_RETIRED.NOP@ / (tma_retiring * tma_info_thread_slots)",
+ "MetricGroup": "BvBO;Pipeline;TopdownL4;tma_L4_group;tma_other_light_ops_group",
+ "MetricName": "tma_nop_instructions",
+ "MetricThreshold": "tma_nop_instructions > 0.1 & (tma_other_light_ops > 0.3 & tma_light_operations > 0.6)",
+ "PublicDescription": "This metric represents fraction of slots where the CPU was retiring NOP (no op) instructions. Compilers often use NOPs for certain address alignments - e.g. start address of a function or loop body. Sample with: INST_RETIRED.NOP",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
+ "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_int_operations + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches))",
+ "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
+ "MetricName": "tma_other_light_ops",
+ "MetricThreshold": "tma_other_light_ops > 0.3 & tma_light_operations > 0.6",
+ "PublicDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes. May undercount due to FMA double counting",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric estimates fraction of slots the CPU was stalled due to other cases of misprediction (non-retired x86 branches or other types).",
+ "MetricExpr": "max(tma_branch_mispredicts * (1 - cpu_core@BR_MISP_RETIRED.ALL_BRANCHES@ / (cpu_core@INT_MISC.CLEARS_COUNT@ - cpu_core@MACHINE_CLEARS.COUNT@)), 0.0001)",
+ "MetricGroup": "BrMispredicts;BvIO;TopdownL3;tma_L3_group;tma_branch_mispredicts_group",
+ "MetricName": "tma_other_mispredicts",
+ "MetricThreshold": "tma_other_mispredicts > 0.05 & (tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15)",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Nukes (Machine Clears) not related to memory ordering.",
+ "MetricExpr": "max(tma_machine_clears * (1 - cpu_core@MACHINE_CLEARS.MEMORY_ORDERING@ / cpu_core@MACHINE_CLEARS.COUNT@), 0.0001)",
+ "MetricGroup": "BvIO;Machine_Clears;TopdownL3;tma_L3_group;tma_machine_clears_group",
+ "MetricName": "tma_other_nukes",
+ "MetricThreshold": "tma_other_nukes > 0.05 & (tma_machine_clears > 0.1 & tma_bad_speculation > 0.15)",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric roughly estimates fraction of slots the CPU retired uops as a result of handing Page Faults",
+ "MetricExpr": "99 * cpu_core@ASSISTS.PAGE_FAULT@ / tma_info_thread_slots",
+ "MetricGroup": "TopdownL5;tma_L5_group;tma_assists_group",
+ "MetricName": "tma_page_faults",
+ "MetricThreshold": "tma_page_faults > 0.05",
+ "PublicDescription": "This metric roughly estimates fraction of slots the CPU retired uops as a result of handing Page Faults. A Page Fault may apply on first application access to a memory page. Note operating system handling of page faults accounts for the majority of its cost.",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch)",
+ "MetricExpr": "cpu_core@UOPS_DISPATCHED.PORT_0@ / tma_info_core_core_clks",
+ "MetricGroup": "Compute;TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
+ "MetricName": "tma_port_0",
+ "MetricThreshold": "tma_port_0 > 0.6",
+ "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch). Sample with: UOPS_DISPATCHED.PORT_0. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_int_vector_128b, tma_int_vector_256b, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU)",
+ "MetricExpr": "cpu_core@UOPS_DISPATCHED.PORT_1@ / tma_info_core_core_clks",
+ "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
+ "MetricName": "tma_port_1",
+ "MetricThreshold": "tma_port_1 > 0.6",
+ "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU). Sample with: UOPS_DISPATCHED.PORT_1. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_int_vector_128b, tma_int_vector_256b, tma_port_0, tma_port_5, tma_port_6, tma_ports_utilized_2",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+] Primary Branch and simple ALU)",
+ "MetricExpr": "cpu_core@UOPS_DISPATCHED.PORT_6@ / tma_info_core_core_clks",
+ "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
+ "MetricName": "tma_port_6",
+ "MetricThreshold": "tma_port_6 > 0.6",
+ "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+] Primary Branch and simple ALU). Sample with: UOPS_DISPATCHED.PORT_6. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_int_vector_128b, tma_int_vector_256b, tma_port_0, tma_port_1, tma_port_5, tma_ports_utilized_2",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
+ "MetricExpr": "((tma_ports_utilized_0 * tma_info_thread_clks + (cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ + tma_retiring * cpu_core@EXE_ACTIVITY.2_PORTS_UTIL\\,umask\\=0xc@)) / tma_info_thread_clks if cpu_core@ARITH.DIV_ACTIVE@ < cpu_core@CYCLE_ACTIVITY.STALLS_TOTAL@ - cpu_core@EXE_ACTIVITY.BOUND_ON_LOADS@ else (cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ + tma_retiring * cpu_core@EXE_ACTIVITY.2_PORTS_UTIL\\,umask\\=0xc@) / tma_info_thread_clks)",
+ "MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
+ "MetricName": "tma_ports_utilization",
+ "MetricThreshold": "tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
+ "PublicDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related). Two distinct categories can be attributed into this metric: (1) heavy data-dependency among contiguous instructions would manifest in this metric - such cases are often referred to as low Instruction Level Parallelism (ILP). (2) Contention on some hardware execution unit other than Divider. For example; when there are too many multiply operations.",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricExpr": "max((cpu_core@EXE_ACTIVITY.EXE_BOUND_0_PORTS@ + max(cpu_core@RS.EMPTY_RESOURCE@ - cpu_core@RESOURCE_STALLS.SCOREBOARD@, 0)) / tma_info_thread_clks, 1) * (cpu_core@CYCLE_ACTIVITY.STALLS_TOTAL@ - cpu_core@EXE_ACTIVITY.BOUND_ON_LOADS@) / tma_info_thread_clks",
+ "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
+ "MetricName": "tma_ports_utilized_0",
+ "MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise). Long-latency instructions like divides may contribute to this metric.",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricExpr": "cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ / tma_info_thread_clks",
+ "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
+ "MetricName": "tma_ports_utilized_1",
+ "MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise). This can be due to heavy data-dependency among software instructions; or over oversubscribing a particular hardware resource. In some other cases with high 1_Port_Utilized and L1_Bound; this metric can point to L1 data-cache latency bottleneck that may not necessarily manifest with complete execution starvation (due to the short L1 latency e.g. walking a linked list) - looking at the assembly can be helpful. Sample with: EXE_ACTIVITY.1_PORTS_UTIL. Related metrics: tma_l1_bound",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
+ "MetricExpr": "cpu_core@EXE_ACTIVITY.2_PORTS_UTIL@ / tma_info_thread_clks",
+ "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
+ "MetricName": "tma_ports_utilized_2",
+ "MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise). Loop Vectorization -most compilers feature auto-Vectorization options today- reduces pressure on the execution ports as multiple elements are calculated with same uop. Sample with: EXE_ACTIVITY.2_PORTS_UTIL. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_int_vector_128b, tma_int_vector_256b, tma_port_0, tma_port_1, tma_port_5, tma_port_6",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
+ "MetricExpr": "cpu_core@UOPS_EXECUTED.CYCLES_GE_3@ / tma_info_thread_clks",
+ "MetricGroup": "BvCB;PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
+ "MetricName": "tma_ports_utilized_3m",
+ "MetricThreshold": "tma_ports_utilized_3m > 0.4 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise). Sample with: UOPS_EXECUTED.CYCLES_GE_3",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
+ "MetricExpr": "cpu_core@topdown\\-retiring@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricGroup": "BvUW;TmaL1;TopdownL1;tma_L1_group",
+ "MetricName": "tma_retiring",
+ "MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL1",
+ "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.SLOTS",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations",
+ "MetricExpr": "cpu_core@RESOURCE_STALLS.SCOREBOARD@ / tma_info_thread_clks + tma_c02_wait",
+ "MetricGroup": "BvIO;PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group;tma_issueSO",
+ "MetricName": "tma_serializing_operation",
+ "MetricThreshold": "tma_serializing_operation > 0.1 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
+ "PublicDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations. Instructions like CPUID; WRMSR or LFENCE serialize the out-of-order execution which may limit performance. Sample with: RESOURCE_STALLS.SCOREBOARD. Related metrics: tma_ms_switches",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of slots where the CPU was retiring Shuffle operations of 256-bit vector size (FP or Integer)",
+ "MetricExpr": "tma_light_operations * cpu_core@INT_VEC_RETIRED.SHUFFLES@ / (tma_retiring * tma_info_thread_slots)",
+ "MetricGroup": "HPC;Pipeline;TopdownL4;tma_L4_group;tma_other_light_ops_group",
+ "MetricName": "tma_shuffles_256b",
+ "MetricThreshold": "tma_shuffles_256b > 0.1 & (tma_other_light_ops > 0.3 & tma_light_operations > 0.6)",
+ "PublicDescription": "This metric represents fraction of slots where the CPU was retiring Shuffle operations of 256-bit vector size (FP or Integer). Shuffles may incur slow cross \"vector lane\" data transfers.",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
+ "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.PAUSE@ / tma_info_thread_clks",
+ "MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
+ "MetricName": "tma_slow_pause",
+ "MetricThreshold": "tma_slow_pause > 0.05 & (tma_serializing_operation > 0.1 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions. Sample with: CPU_CLK_UNHALTED.PAUSE_INST",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric estimates fraction of cycles handling memory load split accesses - load that cross 64-byte cache line boundary",
+ "MetricExpr": "cpu_core@MEM_INST_RETIRED.SPLIT_LOADS@ * min(cpu_core@MEM_INST_RETIRED.SPLIT_LOADS@R, tma_info_memory_load_miss_real_latency) / tma_info_thread_clks",
+ "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
+ "MetricName": "tma_split_loads",
+ "MetricThreshold": "tma_split_loads > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric estimates fraction of cycles handling memory load split accesses - load that cross 64-byte cache line boundary. Sample with: MEM_INST_RETIRED.SPLIT_LOADS_PS",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents rate of split store accesses",
+ "MetricExpr": "cpu_core@MEM_INST_RETIRED.SPLIT_STORES@ * min(cpu_core@MEM_INST_RETIRED.SPLIT_STORES@R, 1) / tma_info_thread_clks",
+ "MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
+ "MetricName": "tma_split_stores",
+ "MetricThreshold": "tma_split_stores > 0.2 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric represents rate of split store accesses. Consider aligning your data to the 64-byte cache line granularity. Sample with: MEM_INST_RETIRED.SPLIT_STORES_PS. Related metrics: tma_port_4",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors)",
+ "MetricExpr": "(cpu_core@XQ.FULL_CYCLES@ + cpu_core@L1D_PEND_MISS.L2_STALLS@) / tma_info_thread_clks",
+ "MetricGroup": "BvMS;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
+ "MetricName": "tma_sq_full",
+ "MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric estimates how often CPU was stalled due to RFO store memory accesses; RFO store issue a read-for-ownership request before the write",
+ "MetricExpr": "cpu_core@EXE_ACTIVITY.BOUND_ON_STORES@ / tma_info_thread_clks",
+ "MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+ "MetricName": "tma_store_bound",
+ "MetricThreshold": "tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
+ "PublicDescription": "This metric estimates how often CPU was stalled due to RFO store memory accesses; RFO store issue a read-for-ownership request before the write. Even though store accesses do not typically stall out-of-order CPUs; there are few cases where stores can lead to actual stalls. This metric will be flagged should RFO stores be a bottleneck. Sample with: MEM_INST_RETIRED.ALL_STORES_PS",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
+ "MetricExpr": "13 * cpu_core@LD_BLOCKS.STORE_FORWARD@ / tma_info_thread_clks",
+ "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
+ "MetricName": "tma_store_fwd_blk",
+ "MetricThreshold": "tma_store_fwd_blk > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores. To streamline memory operations in the pipeline; a load can avoid waiting for memory if a prior in-flight store is writing the data that the load wants to read (store forwarding process). However; in some cases the load may be blocked for a significant time pending the store forward. For example; when the prior store is writing a smaller region than the load is reading.",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric estimates fraction of cycles the CPU spent handling L1D store misses",
+ "MetricExpr": "(cpu_core@MEM_STORE_RETIRED.L2_HIT@ * 10 * (1 - cpu_core@MEM_INST_RETIRED.LOCK_LOADS@ / cpu_core@MEM_INST_RETIRED.ALL_STORES@) + (1 - cpu_core@MEM_INST_RETIRED.LOCK_LOADS@ / cpu_core@MEM_INST_RETIRED.ALL_STORES@) * min(cpu_core@CPU_CLK_UNHALTED.THREAD@, cpu_core@OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO@)) / tma_info_thread_clks",
+ "MetricGroup": "BvML;MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_issueSL;tma_store_bound_group",
+ "MetricName": "tma_store_latency",
+ "MetricThreshold": "tma_store_latency > 0.1 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric estimates fraction of cycles the CPU spent handling L1D store misses. Store accesses usually less impact out-of-order core performance; however; holding resources for longer time can lead into undesired implications (e.g. contention on L1D fill-buffer entries - see FB_Full). Related metrics: tma_fb_full, tma_lock_latency",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Store operations",
+ "MetricExpr": "(cpu_core@UOPS_DISPATCHED.PORT_4_9@ + cpu_core@UOPS_DISPATCHED.PORT_7_8@) / (4 * tma_info_core_core_clks)",
+ "MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
+ "MetricName": "tma_store_op_utilization",
+ "MetricThreshold": "tma_store_op_utilization > 0.6",
+ "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Store operations. Sample with: UOPS_DISPATCHED.PORT_7_8",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric roughly estimates the fraction of cycles where the TLB was missed by store accesses, hitting in the second-level TLB (STLB)",
+ "MetricExpr": "max(0, tma_dtlb_store - tma_store_stlb_miss)",
+ "MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_store_group",
+ "MetricName": "tma_store_stlb_hit",
+ "MetricThreshold": "tma_store_stlb_hit > 0.05 & (tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric estimates the fraction of cycles where the STLB was missed by store accesses, performing a hardware page walk",
+ "MetricExpr": "cpu_core@DTLB_STORE_MISSES.WALK_ACTIVE@ / tma_info_core_core_clks",
+ "MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_store_group",
+ "MetricName": "tma_store_stlb_miss",
+ "MetricThreshold": "tma_store_stlb_miss > 0.05 & (tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric estimates how often CPU was stalled due to Streaming store memory accesses; Streaming store optimize out a read request required by RFO stores",
+ "MetricExpr": "9 * cpu_core@OCR.STREAMING_WR.ANY_RESPONSE@ / tma_info_thread_clks",
+ "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueSmSt;tma_store_bound_group",
+ "MetricName": "tma_streaming_stores",
+ "MetricThreshold": "tma_streaming_stores > 0.2 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric estimates how often CPU was stalled due to Streaming store memory accesses; Streaming store optimize out a read request required by RFO stores. Even though store accesses do not typically stall out-of-order CPUs; there are few cases where stores can lead to actual stalls. This metric will be flagged should Streaming stores be a bottleneck. Sample with: OCR.STREAMING_WR.ANY_RESPONSE. Related metrics: tma_fb_full",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears",
+ "MetricExpr": "cpu_core@INT_MISC.UNKNOWN_BRANCH_CYCLES@ / tma_info_thread_clks",
+ "MetricGroup": "BigFootprint;BvBC;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
+ "MetricName": "tma_unknown_branches",
+ "MetricThreshold": "tma_unknown_branches > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears. These are fetched branches the Branch Prediction Unit was unable to recognize (e.g. first time the branch is fetched or hitting BTB capacity limit) hence called Unknown Branches. Sample with: FRONTEND_RETIRED.UNKNOWN_BRANCH",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This metric serves as an approximation of legacy x87 usage",
+ "MetricExpr": "tma_retiring * cpu_core@UOPS_EXECUTED.X87@ / cpu_core@UOPS_EXECUTED.THREAD@",
+ "MetricGroup": "Compute;TopdownL4;tma_L4_group;tma_fp_arith_group",
+ "MetricName": "tma_x87_use",
+ "MetricThreshold": "tma_x87_use > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
+ "PublicDescription": "This metric serves as an approximation of legacy x87 usage. It accounts for instructions beyond X87 FP arithmetic operations; hence may be used as a thermometer to avoid X87 high usage and preferably upgrade to modern ISA. See Tip under Tuning Hint.",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/uncore-cache.json b/tools/perf/pmu-events/arch/x86/skylakex/uncore-cache.json
index da46a3aeb58c..4fc818626491 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/uncore-cache.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/uncore-cache.json
@@ -4454,7 +4454,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_CRD",
- "Filter": "config1=0x40233",
+ "Filter": "config1=0x4023300000000",
"PerPkg": "1",
"PublicDescription": "TOR Inserts : CRds issued by iA Cores that Hit the LLC : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
"UMask": "0x11",
@@ -4465,7 +4465,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_DRD",
- "Filter": "config1=0x40433",
+ "Filter": "config1=0x4043300000000",
"PerPkg": "1",
"PublicDescription": "TOR Inserts : DRds issued by iA Cores that Hit the LLC : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
"UMask": "0x11",
@@ -4476,7 +4476,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_LlcPrefCRD",
- "Filter": "config1=0x4b233",
+ "Filter": "config1=0x4b23300000000",
"PerPkg": "1",
"UMask": "0x11",
"Unit": "CHA"
@@ -4486,7 +4486,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_LlcPrefDRD",
- "Filter": "config1=0x4b433",
+ "Filter": "config1=0x4b43300000000",
"PerPkg": "1",
"UMask": "0x11",
"Unit": "CHA"
@@ -4496,7 +4496,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_LlcPrefRFO",
- "Filter": "config1=0x4b033",
+ "Filter": "config1=0x4b03300000000",
"PerPkg": "1",
"PublicDescription": "TOR Inserts : LLCPrefRFO issued by iA Cores that hit the LLC : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
"UMask": "0x11",
@@ -4507,7 +4507,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_RFO",
- "Filter": "config1=0x40033",
+ "Filter": "config1=0x4003300000000",
"PerPkg": "1",
"PublicDescription": "TOR Inserts : RFOs issued by iA Cores that Hit the LLC : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
"UMask": "0x11",
@@ -4528,7 +4528,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_CRD",
- "Filter": "config1=0x40233",
+ "Filter": "config1=0x4023300000000",
"PerPkg": "1",
"PublicDescription": "TOR Inserts : CRds issued by iA Cores that Missed the LLC : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
"UMask": "0x21",
@@ -4539,7 +4539,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD",
- "Filter": "config1=0x40433",
+ "Filter": "config1=0x4043300000000",
"PerPkg": "1",
"PublicDescription": "TOR Inserts : DRds issued by iA Cores that Missed the LLC : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
"UMask": "0x21",
@@ -4550,7 +4550,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LlcPrefCRD",
- "Filter": "config1=0x4b233",
+ "Filter": "config1=0x4b23300000000",
"PerPkg": "1",
"UMask": "0x21",
"Unit": "CHA"
@@ -4560,7 +4560,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LlcPrefDRD",
- "Filter": "config1=0x4b433",
+ "Filter": "config1=0x4b43300000000",
"PerPkg": "1",
"UMask": "0x21",
"Unit": "CHA"
@@ -4570,7 +4570,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LlcPrefRFO",
- "Filter": "config1=0x4b033",
+ "Filter": "config1=0x4b03300000000",
"PerPkg": "1",
"PublicDescription": "TOR Inserts : LLCPrefRFO issued by iA Cores that missed the LLC : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
"UMask": "0x21",
@@ -4581,7 +4581,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO",
- "Filter": "config1=0x40033",
+ "Filter": "config1=0x4003300000000",
"PerPkg": "1",
"PublicDescription": "TOR Inserts : RFOs issued by iA Cores that Missed the LLC : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
"UMask": "0x21",
@@ -4624,7 +4624,7 @@
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IO_MISS_ITOM",
"Experimental": "1",
- "Filter": "config1=0x49033",
+ "Filter": "config1=0x4903300000000",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfully inserted into the TOR that are generated from local IO ItoM requests that miss the LLC. An ItoM request is used by IIO to request a data write without first reading the data for ownership.",
"UMask": "0x24",
@@ -4636,7 +4636,7 @@
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IO_MISS_RDCUR",
"Experimental": "1",
- "Filter": "config1=0x43C33",
+ "Filter": "config1=0x43c3300000000",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfully inserted into the TOR that are generated from local IO RdCur requests and miss the LLC. A RdCur request is used by IIO to read data without changing state.",
"UMask": "0x24",
@@ -4648,7 +4648,7 @@
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IO_MISS_RFO",
"Experimental": "1",
- "Filter": "config1=0x40033",
+ "Filter": "config1=0x4003300000000",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfully inserted into the TOR that are generated from local IO RFO requests that miss the LLC. A read for ownership (RFO) requests a cache line to be cached in E state with the intent to modify.",
"UMask": "0x24",
@@ -4865,7 +4865,7 @@
"Counter": "0",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_CRD",
- "Filter": "config1=0x40233",
+ "Filter": "config1=0x4023300000000",
"PerPkg": "1",
"PublicDescription": "TOR Occupancy : CRds issued by iA Cores that Hit the LLC : For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
"UMask": "0x11",
@@ -4876,7 +4876,7 @@
"Counter": "0",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_DRD",
- "Filter": "config1=0x40433",
+ "Filter": "config1=0x4043300000000",
"PerPkg": "1",
"PublicDescription": "TOR Occupancy : DRds issued by iA Cores that Hit the LLC : For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
"UMask": "0x11",
@@ -4887,7 +4887,7 @@
"Counter": "0",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_LlcPrefCRD",
- "Filter": "config1=0x4b233",
+ "Filter": "config1=0x4b23300000000",
"PerPkg": "1",
"UMask": "0x11",
"Unit": "CHA"
@@ -4897,7 +4897,7 @@
"Counter": "0",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_LlcPrefDRD",
- "Filter": "config1=0x4b433",
+ "Filter": "config1=0x4b43300000000",
"PerPkg": "1",
"UMask": "0x11",
"Unit": "CHA"
@@ -4907,7 +4907,7 @@
"Counter": "0",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_LlcPrefRFO",
- "Filter": "config1=0x4b033",
+ "Filter": "config1=0x4b03300000000",
"PerPkg": "1",
"PublicDescription": "TOR Occupancy : LLCPrefRFO issued by iA Cores that hit the LLC : For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
"UMask": "0x11",
@@ -4918,7 +4918,7 @@
"Counter": "0",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_RFO",
- "Filter": "config1=0x40033",
+ "Filter": "config1=0x4003300000000",
"PerPkg": "1",
"PublicDescription": "TOR Occupancy : RFOs issued by iA Cores that Hit the LLC : For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
"UMask": "0x11",
@@ -4939,7 +4939,7 @@
"Counter": "0",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_CRD",
- "Filter": "config1=0x40233",
+ "Filter": "config1=0x4023300000000",
"PerPkg": "1",
"PublicDescription": "TOR Occupancy : CRds issued by iA Cores that Missed the LLC : For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
"UMask": "0x21",
@@ -4950,7 +4950,7 @@
"Counter": "0",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD",
- "Filter": "config1=0x40433",
+ "Filter": "config1=0x4043300000000",
"PerPkg": "1",
"PublicDescription": "TOR Occupancy : DRds issued by iA Cores that Missed the LLC : For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
"UMask": "0x21",
@@ -4961,7 +4961,7 @@
"Counter": "0",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LlcPrefCRD",
- "Filter": "config1=0x4b233",
+ "Filter": "config1=0x4b23300000000",
"PerPkg": "1",
"UMask": "0x21",
"Unit": "CHA"
@@ -4971,7 +4971,7 @@
"Counter": "0",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LlcPrefDRD",
- "Filter": "config1=0x4b433",
+ "Filter": "config1=0x4b43300000000",
"PerPkg": "1",
"UMask": "0x21",
"Unit": "CHA"
@@ -4981,7 +4981,7 @@
"Counter": "0",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LlcPrefRFO",
- "Filter": "config1=0x4b033",
+ "Filter": "config1=0x4b03300000000",
"PerPkg": "1",
"PublicDescription": "TOR Occupancy : LLCPrefRFO issued by iA Cores that missed the LLC : For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
"UMask": "0x21",
@@ -4992,7 +4992,7 @@
"Counter": "0",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO",
- "Filter": "config1=0x40033",
+ "Filter": "config1=0x4003300000000",
"PerPkg": "1",
"PublicDescription": "TOR Occupancy : RFOs issued by iA Cores that Missed the LLC : For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
"UMask": "0x21",
@@ -5037,7 +5037,7 @@
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_ITOM",
"Experimental": "1",
- "Filter": "config1=0x49033",
+ "Filter": "config1=0x4903300000000",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that are generated from local IO ItoM requests that miss the LLC. An ItoM is used by IIO to request a data write without first reading the data for ownership.",
"UMask": "0x24",
@@ -5049,7 +5049,7 @@
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_RDCUR",
"Experimental": "1",
- "Filter": "config1=0x43C33",
+ "Filter": "config1=0x43c3300000000",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that are generated from local IO RdCur requests that miss the LLC. A RdCur request is used by IIO to read data without changing state.",
"UMask": "0x24",
@@ -5061,7 +5061,7 @@
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_RFO",
"Experimental": "1",
- "Filter": "config1=0x40033",
+ "Filter": "config1=0x4003300000000",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that are generated from local IO RFO requests that miss the LLC. A read for ownership (RFO) requests data to be cached in E state with the intent to modify.",
"UMask": "0x24",
diff --git a/tools/perf/pmu-events/arch/x86/snowridgex/uncore-cache.json b/tools/perf/pmu-events/arch/x86/snowridgex/uncore-cache.json
index 7551fb91a9d7..a81776deb2e6 100644
--- a/tools/perf/pmu-events/arch/x86/snowridgex/uncore-cache.json
+++ b/tools/perf/pmu-events/arch/x86/snowridgex/uncore-cache.json
@@ -1,62 +1,5 @@
[
{
- "BriefDescription": "MMIO reads. Derived from unc_cha_tor_inserts.ia_miss",
- "Counter": "0,1,2,3",
- "EventCode": "0x35",
- "EventName": "LLC_MISSES.MMIO_READ",
- "Filter": "config1=0x40040e33",
- "PerPkg": "1",
- "PublicDescription": "TOR Inserts : All requests from iA Cores that Missed the LLC : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
- "UMask": "0xc001fe01",
- "Unit": "CHA"
- },
- {
- "BriefDescription": "MMIO writes. Derived from unc_cha_tor_inserts.ia_miss",
- "Counter": "0,1,2,3",
- "EventCode": "0x35",
- "EventName": "LLC_MISSES.MMIO_WRITE",
- "Filter": "config1=0x40041e33",
- "PerPkg": "1",
- "PublicDescription": "TOR Inserts : All requests from iA Cores that Missed the LLC : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
- "UMask": "0xc001fe01",
- "Unit": "CHA"
- },
- {
- "BriefDescription": "LLC misses - Uncacheable reads (from cpu) . Derived from unc_cha_tor_inserts.ia_miss",
- "Counter": "0,1,2,3",
- "EventCode": "0x35",
- "EventName": "LLC_MISSES.UNCACHEABLE",
- "Filter": "config1=0x40e33",
- "PerPkg": "1",
- "PublicDescription": "TOR Inserts : All requests from iA Cores that Missed the LLC : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
- "UMask": "0xc001fe01",
- "Unit": "CHA"
- },
- {
- "BriefDescription": "Streaming stores (full cache line). Derived from unc_cha_tor_inserts.ia_miss",
- "Counter": "0,1,2,3",
- "EventCode": "0x35",
- "EventName": "LLC_REFERENCES.STREAMING_FULL",
- "Filter": "config1=0x41833",
- "PerPkg": "1",
- "PublicDescription": "TOR Inserts : All requests from iA Cores that Missed the LLC : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
- "ScaleUnit": "64Bytes",
- "UMask": "0xc001fe01",
- "Unit": "CHA"
- },
- {
- "BriefDescription": "Streaming stores (partial cache line). Derived from unc_cha_tor_inserts.ia_miss",
- "Counter": "0,1,2,3",
- "EventCode": "0x35",
- "EventName": "LLC_REFERENCES.STREAMING_PARTIAL",
- "Filter": "config1=0x41a33",
- "PerPkg": "1",
- "PublicDescription": "TOR Inserts : All requests from iA Cores that Missed the LLC : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
- "ScaleUnit": "64Bytes",
- "UMask": "0xc001fe01",
- "Unit": "CHA"
- },
- {
"BriefDescription": "CMS Agent0 AD Credits Acquired : For Transgress 0",
"Counter": "0,1,2,3",
"EventCode": "0x80",
diff --git a/tools/perf/pmu-events/empty-pmu-events.c b/tools/perf/pmu-events/empty-pmu-events.c
index 13727421d424..c592079982fb 100644
--- a/tools/perf/pmu-events/empty-pmu-events.c
+++ b/tools/perf/pmu-events/empty-pmu-events.c
@@ -1,196 +1,193 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * An empty pmu-events.c file used when there is no architecture json files in
- * arch or when the jevents.py script cannot be run.
- *
- * The test cpu/soc is provided for testing.
- */
-#include "pmu-events/pmu-events.h"
+
+/* SPDX-License-Identifier: GPL-2.0 */
+/* THIS FILE WAS AUTOGENERATED BY jevents.py arch=none model=none ! */
+
+#include <pmu-events/pmu-events.h>
#include "util/header.h"
#include "util/pmu.h"
#include <string.h>
#include <stddef.h>
-static const struct pmu_event pmu_events__test_soc_cpu[] = {
- {
- .name = "l3_cache_rd",
- .event = "event=0x40",
- .desc = "L3 cache access, read",
- .topic = "cache",
- .long_desc = "Attributable Level 3 cache access, read",
- },
- {
- .name = "segment_reg_loads.any",
- .event = "event=0x6,period=200000,umask=0x80",
- .desc = "Number of segment register loads",
- .topic = "other",
- },
- {
- .name = "dispatch_blocked.any",
- .event = "event=0x9,period=200000,umask=0x20",
- .desc = "Memory cluster signals to block micro-op dispatch for any reason",
- .topic = "other",
- },
- {
- .name = "eist_trans",
- .event = "event=0x3a,period=200000,umask=0x0",
- .desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions",
- .topic = "other",
- },
- {
- .name = "uncore_hisi_ddrc.flux_wcmd",
- .event = "event=0x2",
- .desc = "DDRC write commands. Unit: hisi_sccl,ddrc ",
- .topic = "uncore",
- .long_desc = "DDRC write commands",
- .pmu = "hisi_sccl,ddrc",
- },
- {
- .name = "unc_cbo_xsnp_response.miss_eviction",
- .event = "event=0x22,umask=0x81",
- .desc = "A cross-core snoop resulted from L3 Eviction which misses in some processor core. Unit: uncore_cbox ",
- .topic = "uncore",
- .long_desc = "A cross-core snoop resulted from L3 Eviction which misses in some processor core",
- .pmu = "uncore_cbox",
- },
- {
- .name = "event-hyphen",
- .event = "event=0xe0,umask=0x00",
- .desc = "UNC_CBO_HYPHEN. Unit: uncore_cbox ",
- .topic = "uncore",
- .long_desc = "UNC_CBO_HYPHEN",
- .pmu = "uncore_cbox",
- },
- {
- .name = "event-two-hyph",
- .event = "event=0xc0,umask=0x00",
- .desc = "UNC_CBO_TWO_HYPH. Unit: uncore_cbox ",
- .topic = "uncore",
- .long_desc = "UNC_CBO_TWO_HYPH",
- .pmu = "uncore_cbox",
- },
- {
- .name = "uncore_hisi_l3c.rd_hit_cpipe",
- .event = "event=0x7",
- .desc = "Total read hits. Unit: hisi_sccl,l3c ",
- .topic = "uncore",
- .long_desc = "Total read hits",
- .pmu = "hisi_sccl,l3c",
- },
- {
- .name = "uncore_imc_free_running.cache_miss",
- .event = "event=0x12",
- .desc = "Total cache misses. Unit: uncore_imc_free_running ",
- .topic = "uncore",
- .long_desc = "Total cache misses",
- .pmu = "uncore_imc_free_running",
- },
- {
- .name = "uncore_imc.cache_hits",
- .event = "event=0x34",
- .desc = "Total cache hits. Unit: uncore_imc ",
- .topic = "uncore",
- .long_desc = "Total cache hits",
- .pmu = "uncore_imc",
- },
- {
- .name = "bp_l1_btb_correct",
- .event = "event=0x8a",
- .desc = "L1 BTB Correction",
- .topic = "branch",
- },
- {
- .name = "bp_l2_btb_correct",
- .event = "event=0x8b",
- .desc = "L2 BTB Correction",
- .topic = "branch",
- },
- {
- .name = 0,
- .event = 0,
- .desc = 0,
- },
+struct compact_pmu_event {
+ int offset;
};
-static const struct pmu_metric pmu_metrics__test_soc_cpu[] = {
- {
- .metric_expr = "1 / IPC",
- .metric_name = "CPI",
- },
- {
- .metric_expr = "inst_retired.any / cpu_clk_unhalted.thread",
- .metric_name = "IPC",
- .metric_group = "group1",
- },
- {
- .metric_expr = "idq_uops_not_delivered.core / (4 * (( ( cpu_clk_unhalted.thread / 2 ) * "
- "( 1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk ) )))",
- .metric_name = "Frontend_Bound_SMT",
- },
- {
- .metric_expr = "l1d\\-loads\\-misses / inst_retired.any",
- .metric_name = "dcache_miss_cpi",
- },
- {
- .metric_expr = "l1i\\-loads\\-misses / inst_retired.any",
- .metric_name = "icache_miss_cycles",
- },
- {
- .metric_expr = "(dcache_miss_cpi + icache_miss_cycles)",
- .metric_name = "cache_miss_cycles",
- .metric_group = "group1",
- },
- {
- .metric_expr = "l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit",
- .metric_name = "DCache_L2_All_Hits",
- },
- {
- .metric_expr = "max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + "
- "l2_rqsts.pf_miss + l2_rqsts.rfo_miss",
- .metric_name = "DCache_L2_All_Miss",
- },
- {
- .metric_expr = "DCache_L2_All_Hits + DCache_L2_All_Miss",
- .metric_name = "DCache_L2_All",
- },
- {
- .metric_expr = "d_ratio(DCache_L2_All_Hits, DCache_L2_All)",
- .metric_name = "DCache_L2_Hits",
- },
- {
- .metric_expr = "d_ratio(DCache_L2_All_Miss, DCache_L2_All)",
- .metric_name = "DCache_L2_Misses",
- },
- {
- .metric_expr = "ipc + M2",
- .metric_name = "M1",
- },
- {
- .metric_expr = "ipc + M1",
- .metric_name = "M2",
- },
- {
- .metric_expr = "1/M3",
- .metric_name = "M3",
- },
- {
- .metric_expr = "64 * l1d.replacement / 1000000000 / duration_time",
- .metric_name = "L1D_Cache_Fill_BW",
- },
- {
- .metric_expr = 0,
- .metric_name = 0,
- },
+struct pmu_table_entry {
+ const struct compact_pmu_event *entries;
+ uint32_t num_entries;
+ struct compact_pmu_event pmu_name;
+};
+
+static const char *const big_c_string =
+/* offset=0 */ "default_core\000"
+/* offset=13 */ "bp_l1_btb_correct\000branch\000L1 BTB Correction\000event=0x8a\000\00000\000\000"
+/* offset=72 */ "bp_l2_btb_correct\000branch\000L2 BTB Correction\000event=0x8b\000\00000\000\000"
+/* offset=131 */ "l3_cache_rd\000cache\000L3 cache access, read\000event=0x40\000\00000\000Attributable Level 3 cache access, read\000"
+/* offset=226 */ "segment_reg_loads.any\000other\000Number of segment register loads\000event=6,period=200000,umask=0x80\000\00000\000\000"
+/* offset=325 */ "dispatch_blocked.any\000other\000Memory cluster signals to block micro-op dispatch for any reason\000event=9,period=200000,umask=0x20\000\00000\000\000"
+/* offset=455 */ "eist_trans\000other\000Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions\000event=0x3a,period=200000\000\00000\000\000"
+/* offset=570 */ "hisi_sccl,ddrc\000"
+/* offset=585 */ "uncore_hisi_ddrc.flux_wcmd\000uncore\000DDRC write commands\000event=2\000\00000\000DDRC write commands\000"
+/* offset=671 */ "uncore_cbox\000"
+/* offset=683 */ "unc_cbo_xsnp_response.miss_eviction\000uncore\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000event=0x22,umask=0x81\000\00000\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000"
+/* offset=914 */ "event-hyphen\000uncore\000UNC_CBO_HYPHEN\000event=0xe0\000\00000\000UNC_CBO_HYPHEN\000"
+/* offset=979 */ "event-two-hyph\000uncore\000UNC_CBO_TWO_HYPH\000event=0xc0\000\00000\000UNC_CBO_TWO_HYPH\000"
+/* offset=1050 */ "hisi_sccl,l3c\000"
+/* offset=1064 */ "uncore_hisi_l3c.rd_hit_cpipe\000uncore\000Total read hits\000event=7\000\00000\000Total read hits\000"
+/* offset=1144 */ "uncore_imc_free_running\000"
+/* offset=1168 */ "uncore_imc_free_running.cache_miss\000uncore\000Total cache misses\000event=0x12\000\00000\000Total cache misses\000"
+/* offset=1263 */ "uncore_imc\000"
+/* offset=1274 */ "uncore_imc.cache_hits\000uncore\000Total cache hits\000event=0x34\000\00000\000Total cache hits\000"
+/* offset=1352 */ "uncore_sys_ddr_pmu\000"
+/* offset=1371 */ "sys_ddr_pmu.write_cycles\000uncore\000ddr write-cycles event\000event=0x2b\000v8\00000\000\000"
+/* offset=1444 */ "uncore_sys_ccn_pmu\000"
+/* offset=1463 */ "sys_ccn_pmu.read_cycles\000uncore\000ccn read-cycles event\000config=0x2c\0000x01\00000\000\000"
+/* offset=1537 */ "uncore_sys_cmn_pmu\000"
+/* offset=1556 */ "sys_cmn_pmu.hnf_cache_miss\000uncore\000Counts total cache misses in first lookup result (high priority)\000eventid=1,type=5\000(434|436|43c|43a).*\00000\000\000"
+/* offset=1696 */ "CPI\000\0001 / IPC\000\000\000\000\000\000\000\00000"
+/* offset=1718 */ "IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\00000"
+/* offset=1781 */ "Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\00000"
+/* offset=1947 */ "dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000"
+/* offset=2011 */ "icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000"
+/* offset=2078 */ "cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\00000"
+/* offset=2149 */ "DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\00000"
+/* offset=2243 */ "DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\00000"
+/* offset=2377 */ "DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\00000"
+/* offset=2441 */ "DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\00000"
+/* offset=2509 */ "DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\00000"
+/* offset=2579 */ "M1\000\000ipc + M2\000\000\000\000\000\000\000\00000"
+/* offset=2601 */ "M2\000\000ipc + M1\000\000\000\000\000\000\000\00000"
+/* offset=2623 */ "M3\000\0001 / M3\000\000\000\000\000\000\000\00000"
+/* offset=2643 */ "L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\00000"
+;
+
+static const struct compact_pmu_event pmu_events__test_soc_cpu_default_core[] = {
+{ 13 }, /* bp_l1_btb_correct\000branch\000L1 BTB Correction\000event=0x8a\000\00000\000\000 */
+{ 72 }, /* bp_l2_btb_correct\000branch\000L2 BTB Correction\000event=0x8b\000\00000\000\000 */
+{ 325 }, /* dispatch_blocked.any\000other\000Memory cluster signals to block micro-op dispatch for any reason\000event=9,period=200000,umask=0x20\000\00000\000\000 */
+{ 455 }, /* eist_trans\000other\000Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions\000event=0x3a,period=200000\000\00000\000\000 */
+{ 131 }, /* l3_cache_rd\000cache\000L3 cache access, read\000event=0x40\000\00000\000Attributable Level 3 cache access, read\000 */
+{ 226 }, /* segment_reg_loads.any\000other\000Number of segment register loads\000event=6,period=200000,umask=0x80\000\00000\000\000 */
+};
+static const struct compact_pmu_event pmu_events__test_soc_cpu_hisi_sccl_ddrc[] = {
+{ 585 }, /* uncore_hisi_ddrc.flux_wcmd\000uncore\000DDRC write commands\000event=2\000\00000\000DDRC write commands\000 */
+};
+static const struct compact_pmu_event pmu_events__test_soc_cpu_hisi_sccl_l3c[] = {
+{ 1064 }, /* uncore_hisi_l3c.rd_hit_cpipe\000uncore\000Total read hits\000event=7\000\00000\000Total read hits\000 */
+};
+static const struct compact_pmu_event pmu_events__test_soc_cpu_uncore_cbox[] = {
+{ 914 }, /* event-hyphen\000uncore\000UNC_CBO_HYPHEN\000event=0xe0\000\00000\000UNC_CBO_HYPHEN\000 */
+{ 979 }, /* event-two-hyph\000uncore\000UNC_CBO_TWO_HYPH\000event=0xc0\000\00000\000UNC_CBO_TWO_HYPH\000 */
+{ 683 }, /* unc_cbo_xsnp_response.miss_eviction\000uncore\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000event=0x22,umask=0x81\000\00000\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000 */
+};
+static const struct compact_pmu_event pmu_events__test_soc_cpu_uncore_imc[] = {
+{ 1274 }, /* uncore_imc.cache_hits\000uncore\000Total cache hits\000event=0x34\000\00000\000Total cache hits\000 */
+};
+static const struct compact_pmu_event pmu_events__test_soc_cpu_uncore_imc_free_running[] = {
+{ 1168 }, /* uncore_imc_free_running.cache_miss\000uncore\000Total cache misses\000event=0x12\000\00000\000Total cache misses\000 */
+
+};
+
+const struct pmu_table_entry pmu_events__test_soc_cpu[] = {
+{
+ .entries = pmu_events__test_soc_cpu_default_core,
+ .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_default_core),
+ .pmu_name = { 0 /* default_core\000 */ },
+},
+{
+ .entries = pmu_events__test_soc_cpu_hisi_sccl_ddrc,
+ .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_hisi_sccl_ddrc),
+ .pmu_name = { 570 /* hisi_sccl,ddrc\000 */ },
+},
+{
+ .entries = pmu_events__test_soc_cpu_hisi_sccl_l3c,
+ .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_hisi_sccl_l3c),
+ .pmu_name = { 1050 /* hisi_sccl,l3c\000 */ },
+},
+{
+ .entries = pmu_events__test_soc_cpu_uncore_cbox,
+ .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_uncore_cbox),
+ .pmu_name = { 671 /* uncore_cbox\000 */ },
+},
+{
+ .entries = pmu_events__test_soc_cpu_uncore_imc,
+ .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_uncore_imc),
+ .pmu_name = { 1263 /* uncore_imc\000 */ },
+},
+{
+ .entries = pmu_events__test_soc_cpu_uncore_imc_free_running,
+ .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_uncore_imc_free_running),
+ .pmu_name = { 1144 /* uncore_imc_free_running\000 */ },
+},
};
+static const struct compact_pmu_event pmu_metrics__test_soc_cpu_default_core[] = {
+{ 1696 }, /* CPI\000\0001 / IPC\000\000\000\000\000\000\000\00000 */
+{ 2377 }, /* DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\00000 */
+{ 2149 }, /* DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\00000 */
+{ 2243 }, /* DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\00000 */
+{ 2441 }, /* DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\00000 */
+{ 2509 }, /* DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\00000 */
+{ 1781 }, /* Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\00000 */
+{ 1718 }, /* IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\00000 */
+{ 2643 }, /* L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\00000 */
+{ 2579 }, /* M1\000\000ipc + M2\000\000\000\000\000\000\000\00000 */
+{ 2601 }, /* M2\000\000ipc + M1\000\000\000\000\000\000\000\00000 */
+{ 2623 }, /* M3\000\0001 / M3\000\000\000\000\000\000\000\00000 */
+{ 2078 }, /* cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\00000 */
+{ 1947 }, /* dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000 */
+{ 2011 }, /* icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000 */
+
+};
+
+const struct pmu_table_entry pmu_metrics__test_soc_cpu[] = {
+{
+ .entries = pmu_metrics__test_soc_cpu_default_core,
+ .num_entries = ARRAY_SIZE(pmu_metrics__test_soc_cpu_default_core),
+ .pmu_name = { 0 /* default_core\000 */ },
+},
+};
+
+static const struct compact_pmu_event pmu_events__test_soc_sys_uncore_sys_ccn_pmu[] = {
+{ 1463 }, /* sys_ccn_pmu.read_cycles\000uncore\000ccn read-cycles event\000config=0x2c\0000x01\00000\000\000 */
+};
+static const struct compact_pmu_event pmu_events__test_soc_sys_uncore_sys_cmn_pmu[] = {
+{ 1556 }, /* sys_cmn_pmu.hnf_cache_miss\000uncore\000Counts total cache misses in first lookup result (high priority)\000eventid=1,type=5\000(434|436|43c|43a).*\00000\000\000 */
+};
+static const struct compact_pmu_event pmu_events__test_soc_sys_uncore_sys_ddr_pmu[] = {
+{ 1371 }, /* sys_ddr_pmu.write_cycles\000uncore\000ddr write-cycles event\000event=0x2b\000v8\00000\000\000 */
+
+};
+
+const struct pmu_table_entry pmu_events__test_soc_sys[] = {
+{
+ .entries = pmu_events__test_soc_sys_uncore_sys_ccn_pmu,
+ .num_entries = ARRAY_SIZE(pmu_events__test_soc_sys_uncore_sys_ccn_pmu),
+ .pmu_name = { 1444 /* uncore_sys_ccn_pmu\000 */ },
+},
+{
+ .entries = pmu_events__test_soc_sys_uncore_sys_cmn_pmu,
+ .num_entries = ARRAY_SIZE(pmu_events__test_soc_sys_uncore_sys_cmn_pmu),
+ .pmu_name = { 1537 /* uncore_sys_cmn_pmu\000 */ },
+},
+{
+ .entries = pmu_events__test_soc_sys_uncore_sys_ddr_pmu,
+ .num_entries = ARRAY_SIZE(pmu_events__test_soc_sys_uncore_sys_ddr_pmu),
+ .pmu_name = { 1352 /* uncore_sys_ddr_pmu\000 */ },
+},
+};
+
+
/* Struct used to make the PMU event table implementation opaque to callers. */
struct pmu_events_table {
- const struct pmu_event *entries;
+ const struct pmu_table_entry *pmus;
+ uint32_t num_pmus;
};
/* Struct used to make the PMU metric table implementation opaque to callers. */
struct pmu_metrics_table {
- const struct pmu_metric *entries;
+ const struct pmu_table_entry *pmus;
+ uint32_t num_pmus;
};
/*
@@ -202,92 +199,191 @@ struct pmu_metrics_table {
* The cpuid can contain any character other than the comma.
*/
struct pmu_events_map {
- const char *arch;
- const char *cpuid;
- const struct pmu_events_table event_table;
- const struct pmu_metrics_table metric_table;
+ const char *arch;
+ const char *cpuid;
+ struct pmu_events_table event_table;
+ struct pmu_metrics_table metric_table;
};
/*
* Global table mapping each known CPU for the architecture to its
* table of PMU events.
*/
-static const struct pmu_events_map pmu_events_map[] = {
- {
- .arch = "testarch",
- .cpuid = "testcpu",
- .event_table = { pmu_events__test_soc_cpu },
- .metric_table = { pmu_metrics__test_soc_cpu },
- },
- {
- .arch = 0,
- .cpuid = 0,
- .event_table = { 0 },
- .metric_table = { 0 },
- },
-};
-
-static const struct pmu_event pmu_events__test_soc_sys[] = {
- {
- .name = "sys_ddr_pmu.write_cycles",
- .event = "event=0x2b",
- .desc = "ddr write-cycles event. Unit: uncore_sys_ddr_pmu ",
- .compat = "v8",
- .topic = "uncore",
- .pmu = "uncore_sys_ddr_pmu",
- },
- {
- .name = "sys_ccn_pmu.read_cycles",
- .event = "config=0x2c",
- .desc = "ccn read-cycles event. Unit: uncore_sys_ccn_pmu ",
- .compat = "0x01",
- .topic = "uncore",
- .pmu = "uncore_sys_ccn_pmu",
- },
- {
- .name = "sys_cmn_pmu.hnf_cache_miss",
- .event = "eventid=0x1,type=0x5",
- .desc = "Counts total cache misses in first lookup result (high priority). Unit: uncore_sys_cmn_pmu ",
- .compat = "(434|436|43c|43a).*",
- .topic = "uncore",
- .pmu = "uncore_sys_cmn_pmu",
- },
- {
- .name = 0,
- .event = 0,
- .desc = 0,
- },
+const struct pmu_events_map pmu_events_map[] = {
+{
+ .arch = "testarch",
+ .cpuid = "testcpu",
+ .event_table = {
+ .pmus = pmu_events__test_soc_cpu,
+ .num_pmus = ARRAY_SIZE(pmu_events__test_soc_cpu),
+ },
+ .metric_table = {
+ .pmus = pmu_metrics__test_soc_cpu,
+ .num_pmus = ARRAY_SIZE(pmu_metrics__test_soc_cpu),
+ }
+},
+{
+ .arch = 0,
+ .cpuid = 0,
+ .event_table = { 0, 0 },
+ .metric_table = { 0, 0 },
+}
};
struct pmu_sys_events {
const char *name;
- const struct pmu_events_table table;
+ struct pmu_events_table event_table;
+ struct pmu_metrics_table metric_table;
};
static const struct pmu_sys_events pmu_sys_event_tables[] = {
{
- .table = { pmu_events__test_soc_sys },
+ .event_table = {
+ .pmus = pmu_events__test_soc_sys,
+ .num_pmus = ARRAY_SIZE(pmu_events__test_soc_sys)
+ },
.name = "pmu_events__test_soc_sys",
},
{
- .table = { 0 }
+ .event_table = { 0, 0 },
+ .metric_table = { 0, 0 },
},
};
-int pmu_events_table__for_each_event(const struct pmu_events_table *table, struct perf_pmu *pmu,
- pmu_event_iter_fn fn, void *data)
+static void decompress_event(int offset, struct pmu_event *pe)
+{
+ const char *p = &big_c_string[offset];
+
+ pe->name = (*p == '\0' ? NULL : p);
+ while (*p++);
+ pe->topic = (*p == '\0' ? NULL : p);
+ while (*p++);
+ pe->desc = (*p == '\0' ? NULL : p);
+ while (*p++);
+ pe->event = (*p == '\0' ? NULL : p);
+ while (*p++);
+ pe->compat = (*p == '\0' ? NULL : p);
+ while (*p++);
+ pe->deprecated = *p - '0';
+ p++;
+ pe->perpkg = *p - '0';
+ p++;
+ pe->unit = (*p == '\0' ? NULL : p);
+ while (*p++);
+ pe->long_desc = (*p == '\0' ? NULL : p);
+}
+
+static void decompress_metric(int offset, struct pmu_metric *pm)
{
- for (const struct pmu_event *pe = &table->entries[0]; pe->name; pe++) {
- int ret;
+ const char *p = &big_c_string[offset];
+
+ pm->metric_name = (*p == '\0' ? NULL : p);
+ while (*p++);
+ pm->metric_group = (*p == '\0' ? NULL : p);
+ while (*p++);
+ pm->metric_expr = (*p == '\0' ? NULL : p);
+ while (*p++);
+ pm->metric_threshold = (*p == '\0' ? NULL : p);
+ while (*p++);
+ pm->desc = (*p == '\0' ? NULL : p);
+ while (*p++);
+ pm->long_desc = (*p == '\0' ? NULL : p);
+ while (*p++);
+ pm->unit = (*p == '\0' ? NULL : p);
+ while (*p++);
+ pm->compat = (*p == '\0' ? NULL : p);
+ while (*p++);
+ pm->metricgroup_no_group = (*p == '\0' ? NULL : p);
+ while (*p++);
+ pm->default_metricgroup_name = (*p == '\0' ? NULL : p);
+ while (*p++);
+ pm->aggr_mode = *p - '0';
+ p++;
+ pm->event_grouping = *p - '0';
+}
- if (pmu && !pmu__name_match(pmu, pe->pmu))
+static int pmu_events_table__for_each_event_pmu(const struct pmu_events_table *table,
+ const struct pmu_table_entry *pmu,
+ pmu_event_iter_fn fn,
+ void *data)
+{
+ int ret;
+ struct pmu_event pe = {
+ .pmu = &big_c_string[pmu->pmu_name.offset],
+ };
+
+ for (uint32_t i = 0; i < pmu->num_entries; i++) {
+ decompress_event(pmu->entries[i].offset, &pe);
+ if (!pe.name)
continue;
+ ret = fn(&pe, table, data);
+ if (ret)
+ return ret;
+ }
+ return 0;
+ }
+
+static int pmu_events_table__find_event_pmu(const struct pmu_events_table *table,
+ const struct pmu_table_entry *pmu,
+ const char *name,
+ pmu_event_iter_fn fn,
+ void *data)
+{
+ struct pmu_event pe = {
+ .pmu = &big_c_string[pmu->pmu_name.offset],
+ };
+ int low = 0, high = pmu->num_entries - 1;
- ret = fn(pe, table, data);
- if (ret)
- return ret;
- }
- return 0;
+ while (low <= high) {
+ int cmp, mid = (low + high) / 2;
+
+ decompress_event(pmu->entries[mid].offset, &pe);
+
+ if (!pe.name && !name)
+ goto do_call;
+
+ if (!pe.name && name) {
+ low = mid + 1;
+ continue;
+ }
+ if (pe.name && !name) {
+ high = mid - 1;
+ continue;
+ }
+
+ cmp = strcasecmp(pe.name, name);
+ if (cmp < 0) {
+ low = mid + 1;
+ continue;
+ }
+ if (cmp > 0) {
+ high = mid - 1;
+ continue;
+ }
+ do_call:
+ return fn ? fn(&pe, table, data) : 0;
+ }
+ return PMU_EVENTS__NOT_FOUND;
+}
+
+int pmu_events_table__for_each_event(const struct pmu_events_table *table,
+ struct perf_pmu *pmu,
+ pmu_event_iter_fn fn,
+ void *data)
+{
+ for (size_t i = 0; i < table->num_pmus; i++) {
+ const struct pmu_table_entry *table_pmu = &table->pmus[i];
+ const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
+ int ret;
+
+ if (pmu && !pmu__name_match(pmu, pmu_name))
+ continue;
+
+ ret = pmu_events_table__for_each_event_pmu(table, table_pmu, fn, data);
+ if (pmu || ret)
+ return ret;
+ }
+ return 0;
}
int pmu_events_table__find_event(const struct pmu_events_table *table,
@@ -296,14 +392,19 @@ int pmu_events_table__find_event(const struct pmu_events_table *table,
pmu_event_iter_fn fn,
void *data)
{
- for (const struct pmu_event *pe = &table->entries[0]; pe->name; pe++) {
- if (pmu && !pmu__name_match(pmu, pe->pmu))
+ for (size_t i = 0; i < table->num_pmus; i++) {
+ const struct pmu_table_entry *table_pmu = &table->pmus[i];
+ const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
+ int ret;
+
+ if (!pmu__name_match(pmu, pmu_name))
continue;
- if (!strcasecmp(pe->name, name))
- return fn(pe, table, data);
- }
- return -1000;
+ ret = pmu_events_table__find_event_pmu(table, table_pmu, name, fn, data);
+ if (ret != PMU_EVENTS__NOT_FOUND)
+ return ret;
+ }
+ return PMU_EVENTS__NOT_FOUND;
}
size_t pmu_events_table__num_events(const struct pmu_events_table *table,
@@ -311,160 +412,253 @@ size_t pmu_events_table__num_events(const struct pmu_events_table *table,
{
size_t count = 0;
- for (const struct pmu_event *pe = &table->entries[0]; pe->name; pe++) {
- if (pmu && !pmu__name_match(pmu, pe->pmu))
- continue;
+ for (size_t i = 0; i < table->num_pmus; i++) {
+ const struct pmu_table_entry *table_pmu = &table->pmus[i];
+ const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
- count++;
- }
+ if (pmu__name_match(pmu, pmu_name))
+ count += table_pmu->num_entries;
+ }
return count;
}
-int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table, pmu_metric_iter_fn fn,
- void *data)
+static int pmu_metrics_table__for_each_metric_pmu(const struct pmu_metrics_table *table,
+ const struct pmu_table_entry *pmu,
+ pmu_metric_iter_fn fn,
+ void *data)
+{
+ int ret;
+ struct pmu_metric pm = {
+ .pmu = &big_c_string[pmu->pmu_name.offset],
+ };
+
+ for (uint32_t i = 0; i < pmu->num_entries; i++) {
+ decompress_metric(pmu->entries[i].offset, &pm);
+ if (!pm.metric_expr)
+ continue;
+ ret = fn(&pm, table, data);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table,
+ pmu_metric_iter_fn fn,
+ void *data)
{
- for (const struct pmu_metric *pm = &table->entries[0]; pm->metric_expr; pm++) {
- int ret = fn(pm, table, data);
+ for (size_t i = 0; i < table->num_pmus; i++) {
+ int ret = pmu_metrics_table__for_each_metric_pmu(table, &table->pmus[i],
+ fn, data);
+
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
- if (ret)
- return ret;
- }
- return 0;
+static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu)
+{
+ static struct {
+ const struct pmu_events_map *map;
+ struct perf_pmu *pmu;
+ } last_result;
+ static struct {
+ const struct pmu_events_map *map;
+ char *cpuid;
+ } last_map_search;
+ static bool has_last_result, has_last_map_search;
+ const struct pmu_events_map *map = NULL;
+ char *cpuid = NULL;
+ size_t i;
+
+ if (has_last_result && last_result.pmu == pmu)
+ return last_result.map;
+
+ cpuid = perf_pmu__getcpuid(pmu);
+
+ /*
+ * On some platforms which uses cpus map, cpuid can be NULL for
+ * PMUs other than CORE PMUs.
+ */
+ if (!cpuid)
+ goto out_update_last_result;
+
+ if (has_last_map_search && !strcmp(last_map_search.cpuid, cpuid)) {
+ map = last_map_search.map;
+ free(cpuid);
+ } else {
+ i = 0;
+ for (;;) {
+ map = &pmu_events_map[i++];
+
+ if (!map->arch) {
+ map = NULL;
+ break;
+ }
+
+ if (!strcmp_cpuid_str(map->cpuid, cpuid))
+ break;
+ }
+ free(last_map_search.cpuid);
+ last_map_search.cpuid = cpuid;
+ last_map_search.map = map;
+ has_last_map_search = true;
+ }
+out_update_last_result:
+ last_result.pmu = pmu;
+ last_result.map = map;
+ has_last_result = true;
+ return map;
}
const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu)
{
- const struct pmu_events_table *table = NULL;
- char *cpuid = perf_pmu__getcpuid(pmu);
- int i;
+ const struct pmu_events_map *map = map_for_pmu(pmu);
- /* on some platforms which uses cpus map, cpuid can be NULL for
- * PMUs other than CORE PMUs.
- */
- if (!cpuid)
- return NULL;
+ if (!map)
+ return NULL;
- i = 0;
- for (;;) {
- const struct pmu_events_map *map = &pmu_events_map[i++];
+ if (!pmu)
+ return &map->event_table;
- if (!map->cpuid)
- break;
+ for (size_t i = 0; i < map->event_table.num_pmus; i++) {
+ const struct pmu_table_entry *table_pmu = &map->event_table.pmus[i];
+ const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
- if (!strcmp_cpuid_str(map->cpuid, cpuid)) {
- table = &map->event_table;
- break;
- }
- }
- free(cpuid);
- return table;
+ if (pmu__name_match(pmu, pmu_name))
+ return &map->event_table;
+ }
+ return NULL;
}
const struct pmu_metrics_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu)
{
- const struct pmu_metrics_table *table = NULL;
- char *cpuid = perf_pmu__getcpuid(pmu);
- int i;
+ const struct pmu_events_map *map = map_for_pmu(pmu);
- /* on some platforms which uses cpus map, cpuid can be NULL for
- * PMUs other than CORE PMUs.
- */
- if (!cpuid)
- return NULL;
+ if (!map)
+ return NULL;
- i = 0;
- for (;;) {
- const struct pmu_events_map *map = &pmu_events_map[i++];
+ if (!pmu)
+ return &map->metric_table;
- if (!map->cpuid)
- break;
+ for (size_t i = 0; i < map->metric_table.num_pmus; i++) {
+ const struct pmu_table_entry *table_pmu = &map->metric_table.pmus[i];
+ const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
- if (!strcmp_cpuid_str(map->cpuid, cpuid)) {
- table = &map->metric_table;
- break;
- }
- }
- free(cpuid);
- return table;
+ if (pmu__name_match(pmu, pmu_name))
+ return &map->metric_table;
+ }
+ return NULL;
}
const struct pmu_events_table *find_core_events_table(const char *arch, const char *cpuid)
{
- for (const struct pmu_events_map *tables = &pmu_events_map[0];
- tables->arch;
- tables++) {
- if (!strcmp(tables->arch, arch) && !strcmp_cpuid_str(tables->cpuid, cpuid))
- return &tables->event_table;
- }
- return NULL;
+ for (const struct pmu_events_map *tables = &pmu_events_map[0];
+ tables->arch;
+ tables++) {
+ if (!strcmp(tables->arch, arch) && !strcmp_cpuid_str(tables->cpuid, cpuid))
+ return &tables->event_table;
+ }
+ return NULL;
}
const struct pmu_metrics_table *find_core_metrics_table(const char *arch, const char *cpuid)
{
- for (const struct pmu_events_map *tables = &pmu_events_map[0];
- tables->arch;
- tables++) {
- if (!strcmp(tables->arch, arch) && !strcmp_cpuid_str(tables->cpuid, cpuid))
- return &tables->metric_table;
- }
- return NULL;
+ for (const struct pmu_events_map *tables = &pmu_events_map[0];
+ tables->arch;
+ tables++) {
+ if (!strcmp(tables->arch, arch) && !strcmp_cpuid_str(tables->cpuid, cpuid))
+ return &tables->metric_table;
+ }
+ return NULL;
}
int pmu_for_each_core_event(pmu_event_iter_fn fn, void *data)
{
- for (const struct pmu_events_map *tables = &pmu_events_map[0]; tables->arch; tables++) {
- int ret = pmu_events_table__for_each_event(&tables->event_table,
- /*pmu=*/ NULL, fn, data);
-
- if (ret)
- return ret;
- }
- return 0;
+ for (const struct pmu_events_map *tables = &pmu_events_map[0];
+ tables->arch;
+ tables++) {
+ int ret = pmu_events_table__for_each_event(&tables->event_table,
+ /*pmu=*/ NULL, fn, data);
+
+ if (ret)
+ return ret;
+ }
+ return 0;
}
int pmu_for_each_core_metric(pmu_metric_iter_fn fn, void *data)
{
- for (const struct pmu_events_map *tables = &pmu_events_map[0];
- tables->arch;
- tables++) {
- int ret = pmu_metrics_table__for_each_metric(&tables->metric_table, fn, data);
-
- if (ret)
- return ret;
- }
- return 0;
+ for (const struct pmu_events_map *tables = &pmu_events_map[0];
+ tables->arch;
+ tables++) {
+ int ret = pmu_metrics_table__for_each_metric(&tables->metric_table, fn, data);
+
+ if (ret)
+ return ret;
+ }
+ return 0;
}
const struct pmu_events_table *find_sys_events_table(const char *name)
{
- for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
- tables->name;
- tables++) {
- if (!strcmp(tables->name, name))
- return &tables->table;
- }
- return NULL;
+ for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
+ tables->name;
+ tables++) {
+ if (!strcmp(tables->name, name))
+ return &tables->event_table;
+ }
+ return NULL;
}
int pmu_for_each_sys_event(pmu_event_iter_fn fn, void *data)
{
- for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
- tables->name;
- tables++) {
- int ret = pmu_events_table__for_each_event(&tables->table, /*pmu=*/ NULL, fn, data);
-
- if (ret)
- return ret;
- }
- return 0;
+ for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
+ tables->name;
+ tables++) {
+ int ret = pmu_events_table__for_each_event(&tables->event_table,
+ /*pmu=*/ NULL, fn, data);
+
+ if (ret)
+ return ret;
+ }
+ return 0;
}
-int pmu_for_each_sys_metric(pmu_metric_iter_fn fn __maybe_unused, void *data __maybe_unused)
+int pmu_for_each_sys_metric(pmu_metric_iter_fn fn, void *data)
{
- return 0;
+ for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
+ tables->name;
+ tables++) {
+ int ret = pmu_metrics_table__for_each_metric(&tables->metric_table, fn, data);
+
+ if (ret)
+ return ret;
+ }
+ return 0;
}
-const char *describe_metricgroup(const char *group __maybe_unused)
+static const int metricgroups[][2] = {
+
+};
+
+const char *describe_metricgroup(const char *group)
{
- return NULL;
+ int low = 0, high = (int)ARRAY_SIZE(metricgroups) - 1;
+
+ while (low <= high) {
+ int mid = (low + high) / 2;
+ const char *mgroup = &big_c_string[metricgroups[mid][0]];
+ int cmp = strcmp(mgroup, group);
+
+ if (cmp == 0) {
+ return &big_c_string[metricgroups[mid][1]];
+ } else if (cmp < 0) {
+ low = mid + 1;
+ } else {
+ high = mid - 1;
+ }
+ }
+ return NULL;
}
diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py
index ac9b7ca41856..bb0a5d92df4a 100755
--- a/tools/perf/pmu-events/jevents.py
+++ b/tools/perf/pmu-events/jevents.py
@@ -503,8 +503,11 @@ def print_pending_events() -> None:
first = True
last_pmu = None
+ last_name = None
pmus = set()
for event in sorted(_pending_events, key=event_cmp_key):
+ if last_pmu and last_pmu == event.pmu:
+ assert event.name != last_name, f"Duplicate event: {last_pmu}/{last_name}/ in {_pending_events_tblname}"
if event.pmu != last_pmu:
if not first:
_args.output_file.write('};\n')
@@ -516,6 +519,7 @@ def print_pending_events() -> None:
pmus.add((event.pmu, pmu_name))
_args.output_file.write(event.to_c_string(metric=False))
+ last_name = event.name
_pending_events = []
_args.output_file.write(f"""
@@ -631,14 +635,17 @@ def preprocess_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
def process_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
"""Process a JSON file during the main walk."""
- def is_leaf_dir(path: str) -> bool:
+ def is_leaf_dir_ignoring_sys(path: str) -> bool:
for item in os.scandir(path):
- if item.is_dir():
+ if item.is_dir() and item.name != 'sys':
return False
return True
- # model directory, reset topic
- if item.is_dir() and is_leaf_dir(item.path):
+ # Model directories are leaves (ignoring possible sys
+ # directories). The FTW will walk into the directory next. Flush
+ # pending events and metrics and update the table names for the new
+ # model directory.
+ if item.is_dir() and is_leaf_dir_ignoring_sys(item.path):
print_pending_events()
print_pending_metrics()
@@ -906,7 +913,7 @@ static int pmu_events_table__find_event_pmu(const struct pmu_events_table *table
do_call:
return fn ? fn(&pe, table, data) : 0;
}
- return -1000;
+ return PMU_EVENTS__NOT_FOUND;
}
int pmu_events_table__for_each_event(const struct pmu_events_table *table,
@@ -944,10 +951,10 @@ int pmu_events_table__find_event(const struct pmu_events_table *table,
continue;
ret = pmu_events_table__find_event_pmu(table, table_pmu, name, fn, data);
- if (ret != -1000)
+ if (ret != PMU_EVENTS__NOT_FOUND)
return ret;
}
- return -1000;
+ return PMU_EVENTS__NOT_FOUND;
}
size_t pmu_events_table__num_events(const struct pmu_events_table *table,
@@ -1256,6 +1263,10 @@ such as "arm/cortex-a34".''',
'output_file', type=argparse.FileType('w', encoding='utf-8'), nargs='?', default=sys.stdout)
_args = ap.parse_args()
+ _args.output_file.write(f"""
+/* SPDX-License-Identifier: GPL-2.0 */
+/* THIS FILE WAS AUTOGENERATED BY jevents.py arch={_args.arch} model={_args.model} ! */
+""")
_args.output_file.write("""
#include <pmu-events/pmu-events.h>
#include "util/header.h"
@@ -1281,7 +1292,7 @@ struct pmu_table_entry {
if item.name == _args.arch or _args.arch == 'all' or item.name == 'test':
archs.append(item.name)
- if len(archs) < 2:
+ if len(archs) < 2 and _args.arch != 'none':
raise IOError(f'Missing architecture directory \'{_args.arch}\'')
archs.sort()
diff --git a/tools/perf/pmu-events/models.py b/tools/perf/pmu-events/models.py
new file mode 100755
index 000000000000..8f727d29c952
--- /dev/null
+++ b/tools/perf/pmu-events/models.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+"""List model names from mapfile.csv files."""
+import argparse
+import csv
+import os
+import re
+from typing import List
+
+def main() -> None:
+ def dir_path(path: str) -> str:
+ """Validate path is a directory for argparse."""
+ if os.path.isdir(path):
+ return path
+ raise argparse.ArgumentTypeError(f'\'{path}\' is not a valid directory')
+
+ def find_archs(start_dir: str, arch: str) -> List[str]:
+ archs = []
+ for item in os.scandir(start_dir):
+ if not item.is_dir():
+ continue
+ if arch in (item.name, 'all'):
+ archs.append(item.name)
+
+ if len(archs) < 1:
+ raise IOError(f'Missing architecture directory \'{arch}\'')
+
+ return archs
+
+ def find_mapfiles(start_dir: str, archs: List[str]) -> List[str]:
+ result = []
+ for arch in archs:
+ for item in os.scandir(f'{start_dir}/{arch}'):
+ if item.is_dir():
+ continue
+ if item.name == 'mapfile.csv':
+ result.append(f'{start_dir}/{arch}/mapfile.csv')
+ return result
+
+ def find_cpuids(mapfiles: List[str], cpuids: str) -> List[str]:
+ result = []
+ for mapfile in mapfiles:
+ with open(mapfile, encoding='utf-8') as csvfile:
+ first = False
+ table = csv.reader(csvfile)
+ for row in table:
+ if not first or len(row) == 0 or row[0].startswith('#'):
+ first = True
+ continue
+ # Python regular expressions don't handle xdigit.
+ regex = row[0].replace('[[:xdigit:]]', '[0-9a-fA-F]')
+ for cpuid in cpuids.split(','):
+ if re.match(regex, cpuid):
+ result.append(row[2])
+ return result
+
+ ap = argparse.ArgumentParser()
+ ap.add_argument('arch', help='Architecture name like x86')
+ ap.add_argument('cpuid', default='all', help='List of cpuids to convert to model names')
+ ap.add_argument(
+ 'starting_dir',
+ type=dir_path,
+ help='Root of tree containing architecture directories containing json files'
+ )
+ args = ap.parse_args()
+
+ archs = find_archs(args.starting_dir, args.arch)
+ mapfiles = find_mapfiles(args.starting_dir, archs)
+ models = find_cpuids(mapfiles, args.cpuid)
+ print(','.join(models))
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h
index f5aa96f1685c..5435ad92180c 100644
--- a/tools/perf/pmu-events/pmu-events.h
+++ b/tools/perf/pmu-events/pmu-events.h
@@ -70,6 +70,8 @@ struct pmu_metric {
struct pmu_events_table;
struct pmu_metrics_table;
+#define PMU_EVENTS__NOT_FOUND -1000
+
typedef int (*pmu_event_iter_fn)(const struct pmu_event *pe,
const struct pmu_events_table *table,
void *data);
@@ -82,6 +84,13 @@ int pmu_events_table__for_each_event(const struct pmu_events_table *table,
struct perf_pmu *pmu,
pmu_event_iter_fn fn,
void *data);
+/*
+ * Search for table and entry matching with pmu__name_match. Each matching event
+ * has fn called on it. 0 implies to success/continue the search while non-zero
+ * means to terminate. The special value PMU_EVENTS__NOT_FOUND is used to
+ * indicate no event was found in one of the tables which doesn't terminate the
+ * search of all tables.
+ */
int pmu_events_table__find_event(const struct pmu_events_table *table,
struct perf_pmu *pmu,
const char *name,
diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py b/tools/perf/scripts/python/arm-cs-trace-disasm.py
index d973c2baed1c..7aff02d84ffb 100755
--- a/tools/perf/scripts/python/arm-cs-trace-disasm.py
+++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py
@@ -192,17 +192,16 @@ def process_event(param_dict):
ip = sample["ip"]
addr = sample["addr"]
+ if (options.verbose == True):
+ print("Event type: %s" % name)
+ print_sample(sample)
+
# Initialize CPU data if it's empty, and directly return back
# if this is the first tracing event for this CPU.
if (cpu_data.get(str(cpu) + 'addr') == None):
cpu_data[str(cpu) + 'addr'] = addr
return
-
- if (options.verbose == True):
- print("Event type: %s" % name)
- print_sample(sample)
-
# If cannot find dso so cannot dump assembler, bail out
if (dso == '[unknown]'):
return
diff --git a/tools/perf/tests/bp_account.c b/tools/perf/tests/bp_account.c
index 6f921db33cf9..4cb7d486b5c1 100644
--- a/tools/perf/tests/bp_account.c
+++ b/tools/perf/tests/bp_account.c
@@ -16,6 +16,7 @@
#include "tests.h"
#include "debug.h"
#include "event.h"
+#include "parse-events.h"
#include "../perf-sys.h"
#include "cloexec.h"
@@ -50,7 +51,7 @@ static int __event(bool is_x, void *addr, struct perf_event_attr *attr)
attr->config = 0;
attr->bp_type = is_x ? HW_BREAKPOINT_X : HW_BREAKPOINT_W;
attr->bp_addr = (unsigned long) addr;
- attr->bp_len = sizeof(long);
+ attr->bp_len = is_x ? default_breakpoint_len() : sizeof(long);
attr->sample_period = 1;
attr->sample_type = PERF_SAMPLE_IP;
@@ -92,6 +93,7 @@ static int bp_accounting(int wp_cnt, int share)
attr_mod = attr;
attr_mod.bp_type = HW_BREAKPOINT_X;
attr_mod.bp_addr = (unsigned long) test_function;
+ attr_mod.bp_len = default_breakpoint_len();
ret = ioctl(fd[0], PERF_EVENT_IOC_MODIFY_ATTRIBUTES, &attr_mod);
TEST_ASSERT_VAL("failed to modify wp\n", ret == 0);
diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c
index 1f2908f02389..3faeb5b6fe0b 100644
--- a/tools/perf/tests/bp_signal.c
+++ b/tools/perf/tests/bp_signal.c
@@ -26,6 +26,7 @@
#include "tests.h"
#include "debug.h"
#include "event.h"
+#include "parse-events.h"
#include "perf-sys.h"
#include "cloexec.h"
@@ -111,7 +112,7 @@ static int __event(bool is_x, void *addr, int sig)
pe.config = 0;
pe.bp_type = is_x ? HW_BREAKPOINT_X : HW_BREAKPOINT_W;
pe.bp_addr = (unsigned long) addr;
- pe.bp_len = sizeof(long);
+ pe.bp_len = is_x ? default_breakpoint_len() : sizeof(long);
pe.sample_period = 1;
pe.sample_type = PERF_SAMPLE_IP;
diff --git a/tools/perf/tests/bp_signal_overflow.c b/tools/perf/tests/bp_signal_overflow.c
index 4e897c2cf26b..ee560e156be6 100644
--- a/tools/perf/tests/bp_signal_overflow.c
+++ b/tools/perf/tests/bp_signal_overflow.c
@@ -25,6 +25,7 @@
#include "tests.h"
#include "debug.h"
#include "event.h"
+#include "parse-events.h"
#include "../perf-sys.h"
#include "cloexec.h"
@@ -88,7 +89,7 @@ static int test__bp_signal_overflow(struct test_suite *test __maybe_unused, int
pe.config = 0;
pe.bp_type = HW_BREAKPOINT_X;
pe.bp_addr = (unsigned long) test_function;
- pe.bp_len = sizeof(long);
+ pe.bp_len = default_breakpoint_len();
pe.sample_period = THRESHOLD;
pe.sample_type = PERF_SAMPLE_IP;
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index c3d84b67ca8e..470a9709427d 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -152,6 +152,7 @@ static struct test_workload *workloads[] = {
&workload__sqrtloop,
&workload__brstack,
&workload__datasym,
+ &workload__landlock,
};
static int num_subtests(const struct test_suite *t)
diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c
index bd8e396f3e57..2f0168b2a5a9 100644
--- a/tools/perf/tests/cpumap.c
+++ b/tools/perf/tests/cpumap.c
@@ -11,7 +11,7 @@
struct machine;
-static int process_event_mask(struct perf_tool *tool __maybe_unused,
+static int process_event_mask(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine __maybe_unused)
@@ -47,7 +47,7 @@ static int process_event_mask(struct perf_tool *tool __maybe_unused,
return 0;
}
-static int process_event_cpus(struct perf_tool *tool __maybe_unused,
+static int process_event_cpus(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine __maybe_unused)
@@ -73,7 +73,7 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused,
return 0;
}
-static int process_event_range_cpus(struct perf_tool *tool __maybe_unused,
+static int process_event_range_cpus(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine __maybe_unused)
diff --git a/tools/perf/tests/dlfilter-test.c b/tools/perf/tests/dlfilter-test.c
index da3a9b50b1b1..54f59d1246bc 100644
--- a/tools/perf/tests/dlfilter-test.c
+++ b/tools/perf/tests/dlfilter-test.c
@@ -62,7 +62,7 @@ static int test_result(const char *msg, int ret)
return ret;
}
-static int process(struct perf_tool *tool, union perf_event *event,
+static int process(const struct perf_tool *tool, union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine __maybe_unused)
{
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index d01aa931fe81..f85d391ced98 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -37,7 +37,7 @@
#define NO_TAIL_CALL_BARRIER __asm__ __volatile__("" : : : "memory");
#endif
-static int mmap_handler(struct perf_tool *tool __maybe_unused,
+static int mmap_handler(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c
index d093a9b878d1..d6b4ce3ef4ee 100644
--- a/tools/perf/tests/event_update.c
+++ b/tools/perf/tests/event_update.c
@@ -12,7 +12,7 @@
#include "tests.h"
#include "debug.h"
-static int process_event_unit(struct perf_tool *tool __maybe_unused,
+static int process_event_unit(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine __maybe_unused)
@@ -25,7 +25,7 @@ static int process_event_unit(struct perf_tool *tool __maybe_unused,
return 0;
}
-static int process_event_scale(struct perf_tool *tool __maybe_unused,
+static int process_event_scale(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine __maybe_unused)
@@ -43,7 +43,7 @@ struct event_name {
const char *name;
};
-static int process_event_name(struct perf_tool *tool,
+static int process_event_name(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine __maybe_unused)
@@ -57,7 +57,7 @@ static int process_event_name(struct perf_tool *tool,
return 0;
}
-static int process_event_cpus(struct perf_tool *tool __maybe_unused,
+static int process_event_cpus(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine __maybe_unused)
@@ -103,6 +103,7 @@ static int test__event_update(struct test_suite *test __maybe_unused, int subtes
TEST_ASSERT_VAL("failed to synthesize attr update scale",
!perf_event__synthesize_event_update_scale(NULL, evsel, process_event_scale));
+ perf_tool__init(&tmp.tool, /*ordered_events=*/false);
tmp.name = evsel__name(evsel);
TEST_ASSERT_VAL("failed to synthesize attr update name",
diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index a1f8adf85367..a5040772043f 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -70,6 +70,7 @@ make_python_perf_so := $(python_perf_so)
make_debug := DEBUG=1
make_nondistro := BUILD_NONDISTRO=1
make_extra_tests := EXTRA_TESTS=1
+make_jevents_all := JEVENTS_ARCH=all
make_no_bpf_skel := BUILD_BPF_SKEL=0
make_gen_vmlinux_h := GEN_VMLINUX_H=1
make_no_libperl := NO_LIBPERL=1
@@ -92,6 +93,7 @@ make_no_libbpf := NO_LIBBPF=1
make_libbpf_dynamic := LIBBPF_DYNAMIC=1
make_no_libbpf_DEBUG := NO_LIBBPF=1 DEBUG=1
make_no_libcrypto := NO_LIBCRYPTO=1
+make_no_libllvm := NO_LIBLLVM=1
make_with_babeltrace:= LIBBABELTRACE=1
make_with_coresight := CORESIGHT=1
make_no_sdt := NO_SDT=1
@@ -140,6 +142,7 @@ run += make_python_perf_so
run += make_debug
run += make_nondistro
run += make_extra_tests
+run += make_jevents_all
run += make_no_bpf_skel
run += make_gen_vmlinux_h
run += make_no_libperl
@@ -161,6 +164,7 @@ run += make_no_auxtrace
run += make_no_libbpf
run += make_no_libbpf_DEBUG
run += make_no_libcrypto
+run += make_no_libllvm
run += make_no_sdt
run += make_no_syscall_tbl
run += make_with_babeltrace
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index edc2adcf1bae..9e3086d02150 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -262,7 +262,7 @@ static int test__checkevent_breakpoint_x(struct evlist *evlist)
TEST_ASSERT_VAL("wrong config", test_config(evsel, 0));
TEST_ASSERT_VAL("wrong bp_type",
HW_BREAKPOINT_X == evsel->core.attr.bp_type);
- TEST_ASSERT_VAL("wrong bp_len", sizeof(long) == evsel->core.attr.bp_len);
+ TEST_ASSERT_VAL("wrong bp_len", default_breakpoint_len() == evsel->core.attr.bp_len);
return TEST_OK;
}
@@ -2500,7 +2500,7 @@ static int test_event(const struct evlist_test *e)
return TEST_FAIL;
}
parse_events_error__init(&err);
- ret = __parse_events(evlist, e->name, /*pmu_filter=*/NULL, &err, /*fake_pmu=*/NULL,
+ ret = __parse_events(evlist, e->name, /*pmu_filter=*/NULL, &err, /*fake_pmu=*/false,
/*warn_if_reordered=*/true, /*fake_tp=*/true);
if (ret) {
pr_debug("failed to parse event '%s', err %d\n", e->name, ret);
@@ -2529,7 +2529,7 @@ static int test_event_fake_pmu(const char *str)
parse_events_error__init(&err);
ret = __parse_events(evlist, str, /*pmu_filter=*/NULL, &err,
- &perf_pmu__fake, /*warn_if_reordered=*/true,
+ /*fake_pmu=*/true, /*warn_if_reordered=*/true,
/*fake_tp=*/true);
if (ret) {
pr_debug("failed to parse event '%s', err %d\n",
diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c
index ff3e7bc0a77f..db004d26fcb0 100644
--- a/tools/perf/tests/pmu-events.c
+++ b/tools/perf/tests/pmu-events.c
@@ -819,8 +819,7 @@ static bool is_number(const char *str)
return errno == 0 && end_ptr != str;
}
-static int check_parse_id(const char *id, struct parse_events_error *error,
- struct perf_pmu *fake_pmu)
+static int check_parse_id(const char *id, struct parse_events_error *error)
{
struct evlist *evlist;
int ret;
@@ -841,7 +840,7 @@ static int check_parse_id(const char *id, struct parse_events_error *error,
for (cur = strchr(dup, '@') ; cur; cur = strchr(++cur, '@'))
*cur = '/';
- ret = __parse_events(evlist, dup, /*pmu_filter=*/NULL, error, fake_pmu,
+ ret = __parse_events(evlist, dup, /*pmu_filter=*/NULL, error, /*fake_pmu=*/true,
/*warn_if_reordered=*/true, /*fake_tp=*/false);
free(dup);
@@ -855,7 +854,7 @@ static int check_parse_fake(const char *id)
int ret;
parse_events_error__init(&error);
- ret = check_parse_id(id, &error, &perf_pmu__fake);
+ ret = check_parse_id(id, &error);
parse_events_error__exit(&error);
return ret;
}
@@ -1051,9 +1050,8 @@ static int test__parsing_fake_callback(const struct pmu_metric *pm,
}
/*
- * Parse all the metrics for current architecture,
- * or all defined cpus via the 'fake_pmu'
- * in parse_events.
+ * Parse all the metrics for current architecture, or all defined cpus via the
+ * 'fake_pmu' in parse_events.
*/
static int test__parsing_fake(struct test_suite *test __maybe_unused,
int subtest __maybe_unused)
diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c
index 40132655ccd1..be18506f6a24 100644
--- a/tools/perf/tests/pmu.c
+++ b/tools/perf/tests/pmu.c
@@ -18,9 +18,6 @@
#include <sys/stat.h>
#include <sys/types.h>
-/* Fake PMUs created in temp directory. */
-static LIST_HEAD(test_pmus);
-
/* Cleanup test PMU directory. */
static int test_pmu_put(const char *dir, struct perf_pmu *pmu)
{
@@ -456,13 +453,15 @@ static int test__name_cmp(struct test_suite *test __maybe_unused, int subtest __
/**
* Test perf_pmu__match() that's used to search for a PMU given a name passed
* on the command line. The name that's passed may also be a filename type glob
- * match.
+ * match. If the name does not match, perf_pmu__match() attempts to match the
+ * alias of the PMU, if provided.
*/
static int test__pmu_match(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
{
- struct perf_pmu test_pmu;
+ struct perf_pmu test_pmu = {
+ .name = "pmuname",
+ };
- test_pmu.name = "pmuname";
TEST_ASSERT_EQUAL("Exact match", perf_pmu__match(&test_pmu, "pmuname"), true);
TEST_ASSERT_EQUAL("Longer token", perf_pmu__match(&test_pmu, "longertoken"), false);
TEST_ASSERT_EQUAL("Shorter token", perf_pmu__match(&test_pmu, "pmu"), false);
diff --git a/tools/perf/tests/shell/annotate.sh b/tools/perf/tests/shell/annotate.sh
index b072d9b97387..2ccf4f1d46b6 100755
--- a/tools/perf/tests/shell/annotate.sh
+++ b/tools/perf/tests/shell/annotate.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# perf annotate basic tests
# SPDX-License-Identifier: GPL-2.0
@@ -28,6 +28,7 @@ cleanup() {
}
trap_cleanup() {
+ echo "Unexpected signal in ${FUNCNAME[1]}"
cleanup
exit 1
}
diff --git a/tools/perf/tests/shell/base_probe/settings.sh b/tools/perf/tests/shell/base_probe/settings.sh
deleted file mode 100644
index 123621c7f95e..000000000000
--- a/tools/perf/tests/shell/base_probe/settings.sh
+++ /dev/null
@@ -1,48 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# settings.sh of perf_probe test
-# Author: Michael Petlan <mpetlan@redhat.com>
-# Author: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
-#
-
-export TEST_NAME="perf_probe"
-
-export MY_ARCH=`arch`
-
-if [ -n "$PERFSUITE_RUN_DIR" ]; then
- # when $PERFSUITE_RUN_DIR is set to something, all the logs and temp files will be placed there
- # --> the $PERFSUITE_RUN_DIR/perf_something/examples and $PERFSUITE_RUN_DIR/perf_something/logs
- # dirs will be used for that
- export PERFSUITE_RUN_DIR=`readlink -f $PERFSUITE_RUN_DIR`
- export CURRENT_TEST_DIR="$PERFSUITE_RUN_DIR/$TEST_NAME"
- export MAKE_TARGET_DIR="$CURRENT_TEST_DIR/examples"
- test -d "$MAKE_TARGET_DIR" || mkdir -p "$MAKE_TARGET_DIR"
- export LOGS_DIR="$PERFSUITE_RUN_DIR/$TEST_NAME/logs"
- test -d "$LOGS_DIR" || mkdir -p "$LOGS_DIR"
-else
- # when $PERFSUITE_RUN_DIR is not set, logs will be placed here
- export CURRENT_TEST_DIR="."
- export LOGS_DIR="."
-fi
-
-check_kprobes_available()
-{
- test -e /sys/kernel/debug/tracing/kprobe_events
-}
-
-check_uprobes_available()
-{
- test -e /sys/kernel/debug/tracing/uprobe_events
-}
-
-clear_all_probes()
-{
- echo 0 > /sys/kernel/debug/tracing/events/enable
- check_kprobes_available && echo > /sys/kernel/debug/tracing/kprobe_events
- check_uprobes_available && echo > /sys/kernel/debug/tracing/uprobe_events
-}
-
-check_sdt_support()
-{
- $CMD_PERF list sdt | grep sdt > /dev/null 2> /dev/null
-}
diff --git a/tools/perf/tests/shell/base_probe/test_adding_blacklisted.sh b/tools/perf/tests/shell/base_probe/test_adding_blacklisted.sh
new file mode 100755
index 000000000000..b5dc10b2a738
--- /dev/null
+++ b/tools/perf/tests/shell/base_probe/test_adding_blacklisted.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+# SPDX-License-Identifier: GPL-2.0
+
+#
+# test_adding_blacklisted of perf_probe test
+# Author: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+# Author: Michael Petlan <mpetlan@redhat.com>
+#
+# Description:
+#
+# Blacklisted functions should not be added successfully as probes,
+# they must be skipped.
+#
+
+# include working environment
+. ../common/init.sh
+
+TEST_RESULT=0
+
+# skip if not supported
+BLACKFUNC=`head -n 1 /sys/kernel/debug/kprobes/blacklist 2> /dev/null | cut -f2`
+if [ -z "$BLACKFUNC" ]; then
+ print_overall_skipped
+ exit 0
+fi
+
+# remove all previously added probes
+clear_all_probes
+
+
+### adding blacklisted function
+
+# functions from blacklist should be skipped by perf probe
+! $CMD_PERF probe $BLACKFUNC > $LOGS_DIR/adding_blacklisted.log 2> $LOGS_DIR/adding_blacklisted.err
+PERF_EXIT_CODE=$?
+
+REGEX_SCOPE_FAIL="Failed to find scope of probe point"
+REGEX_SKIP_MESSAGE=" is blacklisted function, skip it\."
+REGEX_NOT_FOUND_MESSAGE="Probe point \'$BLACKFUNC\' not found."
+REGEX_ERROR_MESSAGE="Error: Failed to add events."
+REGEX_INVALID_ARGUMENT="Failed to write event: Invalid argument"
+REGEX_SYMBOL_FAIL="Failed to find symbol at $RE_ADDRESS"
+REGEX_OUT_SECTION="$BLACKFUNC is out of \.\w+, skip it"
+../common/check_all_lines_matched.pl "$REGEX_SKIP_MESSAGE" "$REGEX_NOT_FOUND_MESSAGE" "$REGEX_ERROR_MESSAGE" "$REGEX_SCOPE_FAIL" "$REGEX_INVALID_ARGUMENT" "$REGEX_SYMBOL_FAIL" "$REGEX_OUT_SECTION" < $LOGS_DIR/adding_blacklisted.err
+CHECK_EXIT_CODE=$?
+
+print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "adding blacklisted function $BLACKFUNC"
+(( TEST_RESULT += $? ))
+
+
+### listing not-added probe
+
+# blacklisted probes should NOT appear in perf-list output
+$CMD_PERF list probe:\* > $LOGS_DIR/adding_blacklisted_list.log
+PERF_EXIT_CODE=$?
+
+../common/check_all_lines_matched.pl "$RE_LINE_EMPTY" "List of pre-defined events" "Metric Groups:" < $LOGS_DIR/adding_blacklisted_list.log
+CHECK_EXIT_CODE=$?
+
+print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "listing blacklisted probe (should NOT be listed)"
+(( TEST_RESULT += $? ))
+
+
+# print overall results
+print_overall_results "$TEST_RESULT"
+exit $?
diff --git a/tools/perf/tests/shell/base_probe/test_adding_kernel.sh b/tools/perf/tests/shell/base_probe/test_adding_kernel.sh
index 187dc8d4b163..d541ffd44a93 100755
--- a/tools/perf/tests/shell/base_probe/test_adding_kernel.sh
+++ b/tools/perf/tests/shell/base_probe/test_adding_kernel.sh
@@ -15,10 +15,7 @@
# include working environment
. ../common/init.sh
-. ./settings.sh
-# shellcheck disable=SC2034 # the variable is later used after the working environment is included
-THIS_TEST_NAME=`basename $0 .sh`
TEST_RESULT=0
# shellcheck source=lib/probe_vfs_getname.sh
diff --git a/tools/perf/tests/shell/base_probe/test_basic.sh b/tools/perf/tests/shell/base_probe/test_basic.sh
new file mode 100755
index 000000000000..09669ec479f2
--- /dev/null
+++ b/tools/perf/tests/shell/base_probe/test_basic.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+# SPDX-License-Identifier: GPL-2.0
+
+#
+# test_basic of perf_probe test
+# Author: Michael Petlan <mpetlan@redhat.com>
+# Author: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+#
+# Description:
+#
+# This test tests basic functionality of perf probe command.
+#
+
+# include working environment
+. ../common/init.sh
+
+TEST_RESULT=0
+
+if ! check_kprobes_available; then
+ print_overall_skipped
+ exit 0
+fi
+
+
+### help message
+
+if [ "$PARAM_GENERAL_HELP_TEXT_CHECK" = "y" ]; then
+ # test that a help message is shown and looks reasonable
+ $CMD_PERF probe --help > $LOGS_DIR/basic_helpmsg.log 2> $LOGS_DIR/basic_helpmsg.err
+ PERF_EXIT_CODE=$?
+
+ ../common/check_all_patterns_found.pl "PERF-PROBE" "NAME" "SYNOPSIS" "DESCRIPTION" "OPTIONS" "PROBE\s+SYNTAX" "PROBE\s+ARGUMENT" "LINE\s+SYNTAX" < $LOGS_DIR/basic_helpmsg.log
+ CHECK_EXIT_CODE=$?
+ ../common/check_all_patterns_found.pl "LAZY\s+MATCHING" "FILTER\s+PATTERN" "EXAMPLES" "SEE\s+ALSO" < $LOGS_DIR/basic_helpmsg.log
+ (( CHECK_EXIT_CODE += $? ))
+ ../common/check_all_patterns_found.pl "vmlinux" "module=" "source=" "verbose" "quiet" "add=" "del=" "list.*EVENT" "line=" "vars=" "externs" < $LOGS_DIR/basic_helpmsg.log
+ (( CHECK_EXIT_CODE += $? ))
+ ../common/check_all_patterns_found.pl "no-inlines" "funcs.*FILTER" "filter=FILTER" "force" "dry-run" "max-probes" "exec=" "demangle-kernel" < $LOGS_DIR/basic_helpmsg.log
+ (( CHECK_EXIT_CODE += $? ))
+ ../common/check_no_patterns_found.pl "No manual entry for" < $LOGS_DIR/basic_helpmsg.err
+ (( CHECK_EXIT_CODE += $? ))
+
+ print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "help message"
+ (( TEST_RESULT += $? ))
+else
+ print_testcase_skipped "help message"
+fi
+
+
+### usage message
+
+# without any args perf-probe should print usage
+$CMD_PERF probe 2> $LOGS_DIR/basic_usage.log > /dev/null
+
+../common/check_all_patterns_found.pl "[Uu]sage" "perf probe" "verbose" "quiet" "add" "del" "force" "line" "vars" "externs" "range" < $LOGS_DIR/basic_usage.log
+CHECK_EXIT_CODE=$?
+
+print_results 0 $CHECK_EXIT_CODE "usage message"
+(( TEST_RESULT += $? ))
+
+
+### quiet switch
+
+# '--quiet' should mute all output
+$CMD_PERF probe --quiet --add vfs_read > $LOGS_DIR/basic_quiet01.log 2> $LOGS_DIR/basic_quiet01.err
+PERF_EXIT_CODE=$?
+$CMD_PERF probe --quiet --del vfs_read > $LOGS_DIR/basic_quiet03.log 2> $LOGS_DIR/basic_quiet02.err
+(( PERF_EXIT_CODE += $? ))
+
+test "`cat $LOGS_DIR/basic_quiet*log $LOGS_DIR/basic_quiet*err | wc -l`" -eq 0
+CHECK_EXIT_CODE=$?
+
+print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "quiet switch"
+(( TEST_RESULT += $? ))
+
+
+# print overall results
+print_overall_results "$TEST_RESULT"
+exit $?
diff --git a/tools/perf/tests/shell/base_probe/test_invalid_options.sh b/tools/perf/tests/shell/base_probe/test_invalid_options.sh
new file mode 100755
index 000000000000..1fedfd8b0d0d
--- /dev/null
+++ b/tools/perf/tests/shell/base_probe/test_invalid_options.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+
+# SPDX-License-Identifier: GPL-2.0
+
+#
+# test_invalid_options of perf_probe test
+# Author: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+# Author: Michael Petlan <mpetlan@redhat.com>
+#
+# Description:
+#
+# This test checks whether the invalid and incompatible options are reported
+#
+
+# include working environment
+. ../common/init.sh
+
+TEST_RESULT=0
+
+if ! check_kprobes_available; then
+ print_overall_skipped
+ exit 0
+fi
+
+
+### missing argument
+
+# some options require an argument
+for opt in '-a' '-d' '-L' '-V'; do
+ ! $CMD_PERF probe $opt 2> $LOGS_DIR/invalid_options_missing_argument$opt.err
+ PERF_EXIT_CODE=$?
+
+ ../common/check_all_patterns_found.pl "Error: switch .* requires a value" < $LOGS_DIR/invalid_options_missing_argument$opt.err
+ CHECK_EXIT_CODE=$?
+
+ print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "missing argument for $opt"
+ (( TEST_RESULT += $? ))
+done
+
+
+### unnecessary argument
+
+# some options may omit the argument
+for opt in '-F' '-l'; do
+ $CMD_PERF probe -F > /dev/null 2> $LOGS_DIR/invalid_options_unnecessary_argument$opt.err
+ PERF_EXIT_CODE=$?
+
+ test ! -s $LOGS_DIR/invalid_options_unnecessary_argument$opt.err
+ CHECK_EXIT_CODE=$?
+
+ print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "unnecessary argument for $opt"
+ (( TEST_RESULT += $? ))
+done
+
+
+### mutually exclusive options
+
+# some options are mutually exclusive
+test -e $LOGS_DIR/invalid_options_mutually_exclusive.log && rm -f $LOGS_DIR/invalid_options_mutually_exclusive.log
+for opt in '-a xxx -d xxx' '-a xxx -L foo' '-a xxx -V foo' '-a xxx -l' '-a xxx -F' \
+ '-d xxx -L foo' '-d xxx -V foo' '-d xxx -l' '-d xxx -F' \
+ '-L foo -V bar' '-L foo -l' '-L foo -F' '-V foo -l' '-V foo -F' '-l -F'; do
+ ! $CMD_PERF probe $opt > /dev/null 2> $LOGS_DIR/aux.log
+ PERF_EXIT_CODE=$?
+
+ ../common/check_all_patterns_found.pl "Error: switch .+ cannot be used with switch .+" < $LOGS_DIR/aux.log
+ CHECK_EXIT_CODE=$?
+
+ print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "mutually exclusive options :: $opt"
+ (( TEST_RESULT += $? ))
+
+ # gather the logs
+ cat $LOGS_DIR/aux.log | grep "Error" >> $LOGS_DIR/invalid_options_mutually_exclusive.log
+done
+
+
+# print overall results
+print_overall_results "$TEST_RESULT"
+exit $?
diff --git a/tools/perf/tests/shell/base_probe/test_line_semantics.sh b/tools/perf/tests/shell/base_probe/test_line_semantics.sh
new file mode 100755
index 000000000000..d8f4bde0f585
--- /dev/null
+++ b/tools/perf/tests/shell/base_probe/test_line_semantics.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+# SPDX-License-Identifier: GPL-2.0
+
+#
+# test_line_semantics of perf_probe test
+# Author: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+# Author: Michael Petlan <mpetlan@redhat.com>
+#
+# Description:
+#
+# This test checks whether the semantic errors of line option's
+# arguments are properly reported.
+#
+
+# include working environment
+. ../common/init.sh
+
+TEST_RESULT=0
+
+if ! check_kprobes_available; then
+ print_overall_skipped
+ exit 0
+fi
+
+
+### acceptable --line descriptions
+
+# testing acceptance of valid patterns for the '--line' option
+VALID_PATTERNS="func func:10 func:0-10 func:2+10 func@source.c func@source.c:1 source.c:1 source.c:1+1 source.c:1-10"
+for desc in $VALID_PATTERNS; do
+ ! ( $CMD_PERF probe --line $desc 2>&1 | grep -q "Semantic error" )
+ CHECK_EXIT_CODE=$?
+
+ print_results 0 $CHECK_EXIT_CODE "acceptable descriptions :: $desc"
+ (( TEST_RESULT += $? ))
+done
+
+
+### unacceptable --line descriptions
+
+# testing handling of invalid patterns for the '--line' option
+INVALID_PATTERNS="func:foo func:1-foo func:1+foo func;lazy\*pattern"
+for desc in $INVALID_PATTERNS; do
+ $CMD_PERF probe --line $desc 2>&1 | grep -q "Semantic error"
+ CHECK_EXIT_CODE=$?
+
+ print_results 0 $CHECK_EXIT_CODE "unacceptable descriptions :: $desc"
+ (( TEST_RESULT += $? ))
+done
+
+
+# print overall results
+print_overall_results "$TEST_RESULT"
+exit $?
diff --git a/tools/perf/tests/shell/base_report/setup.sh b/tools/perf/tests/shell/base_report/setup.sh
new file mode 100755
index 000000000000..4caa496660c6
--- /dev/null
+++ b/tools/perf/tests/shell/base_report/setup.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+# SPDX-License-Identifier: GPL-2.0
+
+#
+# setup.sh of perf report test
+# Author: Michael Petlan <mpetlan@redhat.com>
+#
+# Description:
+#
+# We need some sample data for perf-report testing
+#
+#
+
+# include working environment
+. ../common/init.sh
+
+test -d "$HEADER_TAR_DIR" || mkdir -p "$HEADER_TAR_DIR"
+
+SW_EVENT="cpu-clock"
+
+$CMD_PERF record -asdg -e $SW_EVENT -o $CURRENT_TEST_DIR/perf.data -- $CMD_LONGER_SLEEP 2> $LOGS_DIR/setup.log
+PERF_EXIT_CODE=$?
+
+../common/check_all_patterns_found.pl "$RE_LINE_RECORD1" "$RE_LINE_RECORD2" < $LOGS_DIR/setup.log
+CHECK_EXIT_CODE=$?
+
+print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "prepare the perf.data file"
+TEST_RESULT=$?
+
+print_overall_results $TEST_RESULT
+exit $?
diff --git a/tools/perf/tests/shell/base_report/stderr-whitelist.txt b/tools/perf/tests/shell/base_report/stderr-whitelist.txt
new file mode 100644
index 000000000000..e3341401b47c
--- /dev/null
+++ b/tools/perf/tests/shell/base_report/stderr-whitelist.txt
@@ -0,0 +1,5 @@
+no symbols found in .*, maybe install a debug package
+was updated .*is prelink enabled.+ Restart the long running apps that use it
+Warning:
+\d+ out of order events recorded.
+detected invalid bpf_prog_info
diff --git a/tools/perf/tests/shell/base_report/test_basic.sh b/tools/perf/tests/shell/base_report/test_basic.sh
new file mode 100755
index 000000000000..47677cbd4df3
--- /dev/null
+++ b/tools/perf/tests/shell/base_report/test_basic.sh
@@ -0,0 +1,190 @@
+#!/bin/bash
+
+# SPDX-License-Identifier: GPL-2.0
+
+#
+# test_basic of perf_report test
+# Author: Michael Petlan <mpetlan@redhat.com>
+#
+# Description:
+#
+# This test tests basic functionality of perf report command.
+#
+#
+
+# include working environment
+. ../common/init.sh
+
+TEST_RESULT=0
+
+
+### help message
+
+if [ "$PARAM_GENERAL_HELP_TEXT_CHECK" = "y" ]; then
+ # test that a help message is shown and looks reasonable
+ $CMD_PERF report --help > $LOGS_DIR/basic_helpmsg.log 2> $LOGS_DIR/basic_helpmsg.err
+ PERF_EXIT_CODE=$?
+
+ ../common/check_all_patterns_found.pl "PERF-REPORT" "NAME" "SYNOPSIS" "DESCRIPTION" "OPTIONS" "OVERHEAD\s+CALCULATION" "SEE ALSO" < $LOGS_DIR/basic_helpmsg.log
+ CHECK_EXIT_CODE=$?
+ ../common/check_all_patterns_found.pl "input" "verbose" "show-nr-samples" "show-cpu-utilization" "threads" "comms" "pid" "tid" "dsos" "symbols" "symbol-filter" < $LOGS_DIR/basic_helpmsg.log
+ (( CHECK_EXIT_CODE += $? ))
+ ../common/check_all_patterns_found.pl "hide-unresolved" "sort" "fields" "parent" "exclude-other" "column-widths" "field-separator" "dump-raw-trace" "children" < $LOGS_DIR/basic_helpmsg.log
+ (( CHECK_EXIT_CODE += $? ))
+ ../common/check_all_patterns_found.pl "call-graph" "max-stack" "inverted" "ignore-callees" "pretty" "stdio" "tui" "gtk" "vmlinux" "kallsyms" "modules" < $LOGS_DIR/basic_helpmsg.log
+ (( CHECK_EXIT_CODE += $? ))
+ ../common/check_all_patterns_found.pl "force" "symfs" "cpu" "disassembler-style" "source" "asm-raw" "show-total-period" "show-info" "branch-stack" "group" < $LOGS_DIR/basic_helpmsg.log
+ (( CHECK_EXIT_CODE += $? ))
+ ../common/check_all_patterns_found.pl "branch-history" "objdump" "demangle" "percent-limit" "percentage" "header" "itrace" "full-source-path" "show-ref-call-graph" < $LOGS_DIR/basic_helpmsg.log
+ (( CHECK_EXIT_CODE += $? ))
+ ../common/check_no_patterns_found.pl "No manual entry for" < $LOGS_DIR/basic_helpmsg.err
+ (( CHECK_EXIT_CODE += $? ))
+
+ print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "help message"
+ (( TEST_RESULT += $? ))
+else
+ print_testcase_skipped "help message"
+fi
+
+
+### basic execution
+
+# test that perf report is even working
+$CMD_PERF report -i $CURRENT_TEST_DIR/perf.data --stdio > $LOGS_DIR/basic_basic.log 2> $LOGS_DIR/basic_basic.err
+PERF_EXIT_CODE=$?
+
+REGEX_LOST_SAMPLES_INFO="#\s*Total Lost Samples:\s+$RE_NUMBER"
+REGEX_SAMPLES_INFO="#\s*Samples:\s+(?:$RE_NUMBER)\w?\s+of\s+event\s+'$RE_EVENT_ANY'"
+REGEX_LINES_HEADER="#\s*Children\s+Self\s+Command\s+Shared Object\s+Symbol"
+REGEX_LINES="\s*$RE_NUMBER%\s+$RE_NUMBER%\s+\S+\s+\[kernel\.(?:vmlinux)|(?:kallsyms)\]\s+\[[k\.]\]\s+\w+"
+../common/check_all_patterns_found.pl "$REGEX_LOST_SAMPLES_INFO" "$REGEX_SAMPLES_INFO" "$REGEX_LINES_HEADER" "$REGEX_LINES" < $LOGS_DIR/basic_basic.log
+CHECK_EXIT_CODE=$?
+../common/check_errors_whitelisted.pl "stderr-whitelist.txt" < $LOGS_DIR/basic_basic.err
+(( CHECK_EXIT_CODE += $? ))
+
+print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "basic execution"
+(( TEST_RESULT += $? ))
+
+
+### number of samples
+
+# '--show-nr-samples' should show number of samples for each symbol
+$CMD_PERF report --stdio -i $CURRENT_TEST_DIR/perf.data --show-nr-samples > $LOGS_DIR/basic_nrsamples.log 2> $LOGS_DIR/basic_nrsamples.err
+PERF_EXIT_CODE=$?
+
+REGEX_LINES_HEADER="#\s*Children\s+Self\s+Samples\s+Command\s+Shared Object\s+Symbol"
+REGEX_LINES="\s*$RE_NUMBER%\s+$RE_NUMBER%\s+$RE_NUMBER\s+\S+\s+\[kernel\.(?:vmlinux)|(?:kallsyms)\]\s+\[[k\.]\]\s+\w+"
+../common/check_all_patterns_found.pl "$REGEX_LINES_HEADER" "$REGEX_LINES" < $LOGS_DIR/basic_nrsamples.log
+CHECK_EXIT_CODE=$?
+../common/check_errors_whitelisted.pl "stderr-whitelist.txt" < $LOGS_DIR/basic_nrsamples.err
+(( CHECK_EXIT_CODE += $? ))
+
+print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "number of samples"
+(( TEST_RESULT += $? ))
+
+
+### header
+
+# '--header' and '--header-only' should show perf report header
+$CMD_PERF report -i $CURRENT_TEST_DIR/perf.data --stdio --header-only > $LOGS_DIR/basic_header.log
+PERF_EXIT_CODE=$?
+
+REGEX_LINE_TIMESTAMP="#\s+captured on\s*:\s*$RE_DATE_TIME"
+REGEX_LINE_HOSTNAME="#\s+hostname\s*:\s*$MY_HOSTNAME"
+REGEX_LINE_KERNEL="#\s+os release\s*:\s*${MY_KERNEL_VERSION//+/\\+}"
+REGEX_LINE_PERF="#\s+perf version\s*:\s*"
+REGEX_LINE_ARCH="#\s+arch\s*:\s*$MY_ARCH"
+REGEX_LINE_CPUS_ONLINE="#\s+nrcpus online\s*:\s*$MY_CPUS_ONLINE"
+REGEX_LINE_CPUS_AVAIL="#\s+nrcpus avail\s*:\s*$MY_CPUS_AVAILABLE"
+# disable precise check for "nrcpus avail" in BASIC runmode
+test $PERFTOOL_TESTSUITE_RUNMODE -lt $RUNMODE_STANDARD && REGEX_LINE_CPUS_AVAIL="#\s+nrcpus avail\s*:\s*$RE_NUMBER"
+../common/check_all_patterns_found.pl "$REGEX_LINE_TIMESTAMP" "$REGEX_LINE_HOSTNAME" "$REGEX_LINE_KERNEL" "$REGEX_LINE_PERF" "$REGEX_LINE_ARCH" "$REGEX_LINE_CPUS_ONLINE" "$REGEX_LINE_CPUS_AVAIL" < $LOGS_DIR/basic_header.log
+CHECK_EXIT_CODE=$?
+
+print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "header"
+(( TEST_RESULT += $? ))
+
+# '--header' and '--header-only' should use creation time
+OLD_TIMESTAMP=`$CMD_PERF report --stdio --header-only -i $CURRENT_TEST_DIR/perf.data | grep "captured on"`
+PERF_EXIT_CODE=$?
+
+( tar -C $CURRENT_TEST_DIR -c perf.data | xz > $CURRENT_TEST_DIR/perf.data.tar.xz ; xzcat $CURRENT_TEST_DIR/perf.data.tar.xz | tar x -C $HEADER_TAR_DIR )
+(( PERF_EXIT_CODE += $? ))
+
+NEW_TIMESTAMP=`$CMD_PERF report --stdio --header-only -i $HEADER_TAR_DIR/perf.data | grep "captured on"`
+(( PERF_EXIT_CODE += $? ))
+
+test "$OLD_TIMESTAMP" = "$NEW_TIMESTAMP"
+CHECK_EXIT_CODE=$?
+
+print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "header timestamp"
+(( TEST_RESULT += $? ))
+
+
+### show CPU utilization
+
+# '--showcpuutilization' should show percentage for both system and userspace mode
+$CMD_PERF report -i $CURRENT_TEST_DIR/perf.data --stdio --showcpuutilization > $LOGS_DIR/basic_cpuut.log 2> $LOGS_DIR/basic_cpuut.err
+PERF_EXIT_CODE=$?
+
+REGEX_LINES_HEADER="#\s*Children\s+Self\s+sys\s+usr\s+Command\s+Shared Object\s+Symbol"
+REGEX_LINES="\s*$RE_NUMBER%\s+$RE_NUMBER%\s+$RE_NUMBER%\s+$RE_NUMBER%\s+\S+\s+\[kernel\.(?:vmlinux)|(?:kallsyms)\]\s+\[[k\.]\]\s+\w+"
+../common/check_all_patterns_found.pl "$REGEX_LINES_HEADER" "$REGEX_LINES" < $LOGS_DIR/basic_cpuut.log
+CHECK_EXIT_CODE=$?
+../common/check_errors_whitelisted.pl "stderr-whitelist.txt" < $LOGS_DIR/basic_cpuut.err
+(( CHECK_EXIT_CODE += $? ))
+
+print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "show CPU utilization"
+(( TEST_RESULT += $? ))
+
+
+### pid
+
+# '--pid=' should limit the output for a process with the given pid only
+$CMD_PERF report --stdio -i $CURRENT_TEST_DIR/perf.data --pid=1 > $LOGS_DIR/basic_pid.log 2> $LOGS_DIR/basic_pid.err
+PERF_EXIT_CODE=$?
+
+grep -P -v '^#' $LOGS_DIR/basic_pid.log | grep -P '\s+[\d\.]+%' | ../common/check_all_lines_matched.pl "systemd|init"
+CHECK_EXIT_CODE=$?
+../common/check_errors_whitelisted.pl "stderr-whitelist.txt" < $LOGS_DIR/basic_pid.err
+(( CHECK_EXIT_CODE += $? ))
+
+print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "pid"
+(( TEST_RESULT += $? ))
+
+
+### non-existing symbol
+
+# '--symbols' should show only the given symbols
+$CMD_PERF report --stdio -i $CURRENT_TEST_DIR/perf.data --symbols=dummynonexistingsymbol > $LOGS_DIR/basic_symbols.log 2> $LOGS_DIR/basic_symbols.err
+PERF_EXIT_CODE=$?
+
+../common/check_all_lines_matched.pl "$RE_LINE_EMPTY" "$RE_LINE_COMMENT" < $LOGS_DIR/basic_symbols.log
+CHECK_EXIT_CODE=$?
+../common/check_errors_whitelisted.pl "stderr-whitelist.txt" < $LOGS_DIR/basic_symbols.err
+(( CHECK_EXIT_CODE += $? ))
+
+print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "non-existing symbol"
+(( TEST_RESULT += $? ))
+
+
+### symbol filter
+
+# '--symbol-filter' should filter symbols based on substrings
+$CMD_PERF report --stdio -i $CURRENT_TEST_DIR/perf.data --symbol-filter=map > $LOGS_DIR/basic_symbolfilter.log 2> $LOGS_DIR/basic_symbolfilter.err
+PERF_EXIT_CODE=$?
+
+grep -P -v '^#' $LOGS_DIR/basic_symbolfilter.log | grep -P '\s+[\d\.]+%' | ../common/check_all_lines_matched.pl "\[[k\.]\]\s+.*map"
+CHECK_EXIT_CODE=$?
+../common/check_errors_whitelisted.pl "stderr-whitelist.txt" < $LOGS_DIR/basic_symbolfilter.err
+(( CHECK_EXIT_CODE += $? ))
+
+print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "symbol filter"
+(( TEST_RESULT += $? ))
+
+
+# TODO: $CMD_PERF report -n --showcpuutilization -TUxDg 2> 01.log
+
+# print overall results
+print_overall_results "$TEST_RESULT"
+exit $?
diff --git a/tools/perf/tests/shell/common/check_errors_whitelisted.pl b/tools/perf/tests/shell/common/check_errors_whitelisted.pl
new file mode 100755
index 000000000000..c57d355dd76e
--- /dev/null
+++ b/tools/perf/tests/shell/common/check_errors_whitelisted.pl
@@ -0,0 +1,51 @@
+#!/usr/bin/perl
+# SPDX-License-Identifier: GPL-2.0
+
+$whitelist_file = shift;
+
+if (defined $whitelist_file)
+{
+ open (INFILE, $whitelist_file) or die "Checker error: Unable to open the whitelist file: $whitelist_file\n";
+ @regexps = <INFILE>;
+ close INFILE or die "Checker error: Unable to close the whitelist file: $whitelist_file\n";
+}
+else
+{
+ @regexps = ();
+}
+
+$max_printed_lines = 20;
+$max_printed_lines = $ENV{TESTLOG_ERR_MSG_MAX_LINES} if (defined $ENV{TESTLOG_ERR_MSG_MAX_LINES});
+
+$quiet = 1;
+$quiet = 0 if (defined $ENV{TESTLOG_VERBOSITY} && $ENV{TESTLOG_VERBOSITY} ge 2);
+
+$passed = 1;
+$lines_printed = 0;
+
+while (<STDIN>)
+{
+ s/\n//;
+
+ $line_matched = 0;
+ for $r (@regexps)
+ {
+ chomp $r;
+ if (/$r/)
+ {
+ $line_matched = 1;
+ last;
+ }
+ }
+
+ unless ($line_matched)
+ {
+ if ($lines_printed++ < $max_printed_lines)
+ {
+ print "Line did not match any pattern: \"$_\"\n" unless $quiet;
+ }
+ $passed = 0;
+ }
+}
+
+exit ($passed == 0);
diff --git a/tools/perf/tests/shell/common/init.sh b/tools/perf/tests/shell/common/init.sh
index aadeaf782e03..075f17623c8e 100644
--- a/tools/perf/tests/shell/common/init.sh
+++ b/tools/perf/tests/shell/common/init.sh
@@ -26,8 +26,8 @@ print_results()
PERF_RETVAL="$1"; shift
CHECK_RETVAL="$1"; shift
FAILURE_REASON=""
- TASK_COMMENT="$@"
- if [ $PERF_RETVAL -eq 0 -a $CHECK_RETVAL -eq 0 ]; then
+ TASK_COMMENT="$*"
+ if [ $PERF_RETVAL -eq 0 ] && [ $CHECK_RETVAL -eq 0 ]; then
_echo "$MPASS-- [ PASS ] --$MEND $TEST_NAME :: $THIS_TEST_NAME :: $TASK_COMMENT"
return 0
else
@@ -56,7 +56,7 @@ print_overall_results()
print_testcase_skipped()
{
- TASK_COMMENT="$@"
+ TASK_COMMENT="$*"
_echo "$MSKIP-- [ SKIP ] --$MEND $TEST_NAME :: $THIS_TEST_NAME :: $TASK_COMMENT :: testcase skipped"
return 0
}
@@ -69,7 +69,7 @@ print_overall_skipped()
print_warning()
{
- WARN_COMMENT="$@"
+ WARN_COMMENT="$*"
_echo "$MWARN-- [ WARN ] --$MEND $TEST_NAME :: $THIS_TEST_NAME :: $WARN_COMMENT"
return 0
}
@@ -115,3 +115,26 @@ detect_amd()
# 1 = is not AMD or unknown
grep "vendor_id" < /proc/cpuinfo | grep -q "AMD"
}
+
+# base probe utility
+check_kprobes_available()
+{
+ test -e /sys/kernel/debug/tracing/kprobe_events
+}
+
+check_uprobes_available()
+{
+ test -e /sys/kernel/debug/tracing/uprobe_events
+}
+
+clear_all_probes()
+{
+ echo 0 > /sys/kernel/debug/tracing/events/enable
+ check_kprobes_available && echo > /sys/kernel/debug/tracing/kprobe_events
+ check_uprobes_available && echo > /sys/kernel/debug/tracing/uprobe_events
+}
+
+check_sdt_support()
+{
+ $CMD_PERF list sdt | grep sdt > /dev/null 2> /dev/null
+}
diff --git a/tools/perf/tests/shell/common/settings.sh b/tools/perf/tests/shell/common/settings.sh
index 361641dbaaad..cba1b338f96f 100644
--- a/tools/perf/tests/shell/common/settings.sh
+++ b/tools/perf/tests/shell/common/settings.sh
@@ -45,7 +45,7 @@ export TEST_IGNORE_MISSING_PMU=${TEST_IGNORE_MISSING_PMU:-n}
export LC_ALL=C
#### colors
-if [ -t 1 -o "$TESTLOG_FORCE_COLOR" = "yes" ]; then
+if [ -t 1 ] || [ "$TESTLOG_FORCE_COLOR" = "yes" ]; then
export MPASS="\e[32m"
export MALLPASS="\e[1;32m"
export MFAIL="\e[31m"
@@ -65,11 +65,37 @@ else
export MEND=""
fi
+### general info
+DIR_PATH=`dirname "$(readlink -e "$0")"`
+
+TEST_NAME=`basename $DIR_PATH | sed 's/base/perf/'`; export TEST_NAME
+MY_ARCH=`arch`; export MY_ARCH
+
+# storing logs and temporary files variables
+if [ -n "$PERFSUITE_RUN_DIR" ]; then
+ # when $PERFSUITE_RUN_DIR is set to something, all the logs and temp files will be placed there
+ # --> the $PERFSUITE_RUN_DIR/perf_something/examples and $PERFSUITE_RUN_DIR/perf_something/logs
+ # dirs will be used for that
+ PERFSUITE_RUN_DIR=`readlink -f $PERFSUITE_RUN_DIR`; export PERFSUITE_RUN_DIR
+ export CURRENT_TEST_DIR="$PERFSUITE_RUN_DIR/$TEST_NAME"
+ export MAKE_TARGET_DIR="$CURRENT_TEST_DIR/examples"
+ export LOGS_DIR="$CURRENT_TEST_DIR/logs"
+ export HEADER_TAR_DIR="$CURRENT_TEST_DIR/header_tar"
+ test -d "$CURRENT_TEST_DIR" || mkdir -p "$CURRENT_TEST_DIR"
+ test -d "$LOGS_DIR" || mkdir -p "$LOGS_DIR"
+else
+ # when $PERFSUITE_RUN_DIR is not set, logs will be placed here
+ export CURRENT_TEST_DIR="."
+ export LOGS_DIR="."
+ export HEADER_TAR_DIR="./header_tar"
+fi
+
#### test parametrization
if [ ! -d ./common ]; then
# set parameters based on runmode
if [ -f ../common/parametrization.$PERFTOOL_TESTSUITE_RUNMODE.sh ]; then
+ # shellcheck source=/dev/null
. ../common/parametrization.$PERFTOOL_TESTSUITE_RUNMODE.sh
fi
# if some parameters haven't been set until now, set them to default
diff --git a/tools/perf/tests/shell/ftrace.sh b/tools/perf/tests/shell/ftrace.sh
new file mode 100755
index 000000000000..a6ee740f0d7e
--- /dev/null
+++ b/tools/perf/tests/shell/ftrace.sh
@@ -0,0 +1,89 @@
+#!/bin/sh
+# perf ftrace tests
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+# perf ftrace commands only works for root
+if [ "$(id -u)" != 0 ]; then
+ echo "perf ftrace test [Skipped: no permission]"
+ exit 2
+fi
+
+output=$(mktemp /tmp/__perf_test.ftrace.XXXXXX)
+
+cleanup() {
+ rm -f "${output}"
+
+ trap - EXIT TERM INT
+}
+
+trap_cleanup() {
+ cleanup
+ exit 1
+}
+trap trap_cleanup EXIT TERM INT
+
+# this will be set in test_ftrace_trace()
+target_function=
+
+test_ftrace_list() {
+ echo "perf ftrace list test"
+ perf ftrace -F > "${output}"
+ # this will be used in test_ftrace_trace()
+ sleep_functions=$(grep 'sys_.*sleep$' "${output}")
+ echo "syscalls for sleep:"
+ echo "${sleep_functions}"
+ echo "perf ftrace list test [Success]"
+}
+
+test_ftrace_trace() {
+ echo "perf ftrace trace test"
+ perf ftrace trace --graph-opts depth=5 sleep 0.1 > "${output}"
+ # it should have some function name contains 'sleep'
+ grep "^#" "${output}"
+ grep -F 'sleep()' "${output}"
+ # find actual syscall function name
+ for FN in ${sleep_functions}; do
+ if grep -q "${FN}" "${output}"; then
+ target_function="${FN}"
+ echo "perf ftrace trace test [Success]"
+ return
+ fi
+ done
+
+ echo "perf ftrace trace test [Failure: sleep syscall not found]"
+ exit 1
+}
+
+test_ftrace_latency() {
+ echo "perf ftrace latency test"
+ echo "target function: ${target_function}"
+ perf ftrace latency -T "${target_function}" sleep 0.1 > "${output}"
+ grep "^#" "${output}"
+ grep "###" "${output}"
+ echo "perf ftrace latency test [Success]"
+}
+
+test_ftrace_profile() {
+ echo "perf ftrace profile test"
+ perf ftrace profile sleep 0.1 > "${output}"
+ grep ^# "${output}"
+ grep sleep "${output}"
+ grep schedule "${output}"
+ grep execve "${output}"
+ time_re="[[:space:]]+10[[:digit:]]{4}\.[[:digit:]]{3}"
+ # 100283.000 100283.000 100283.000 1 __x64_sys_clock_nanosleep
+ # Check for one *clock_nanosleep line with a Count of just 1 that takes a bit more than 0.1 seconds
+ # Strip the _x64_sys part to work with other architectures
+ grep -E "^${time_re}${time_re}${time_re}[[:space:]]+1[[:space:]]+.*clock_nanosleep" "${output}"
+ echo "perf ftrace profile test [Success]"
+}
+
+test_ftrace_list
+test_ftrace_trace
+test_ftrace_latency
+test_ftrace_profile
+
+cleanup
+exit 0
diff --git a/tools/perf/tests/shell/lib/perf_metric_validation.py b/tools/perf/tests/shell/lib/perf_metric_validation.py
index a2d235252183..0b94216c9c46 100644
--- a/tools/perf/tests/shell/lib/perf_metric_validation.py
+++ b/tools/perf/tests/shell/lib/perf_metric_validation.py
@@ -95,7 +95,7 @@ class Validator:
indent=4)
def get_results(self, idx: int = 0):
- return self.results[idx]
+ return self.results.get(idx)
def get_bounds(self, lb, ub, error, alias={}, ridx: int = 0) -> list:
"""
@@ -173,7 +173,10 @@ class Validator:
pcnt = 0
tcnt = 0
rerun = list()
- for name, val in self.get_results().items():
+ results = self.get_results()
+ if not results:
+ return
+ for name, val in results.items():
if val < 0:
negmetric[name] = val
rerun.append(name)
@@ -532,6 +535,9 @@ class Validator:
'''
if not self.collectlist:
self.parse_perf_metrics()
+ if not self.metrics:
+ print("No metric found for testing")
+ return 0
self.create_rules()
for i in range(0, len(self.workloads)):
self.wlidx = i
diff --git a/tools/perf/tests/shell/lib/probe_vfs_getname.sh b/tools/perf/tests/shell/lib/probe_vfs_getname.sh
index bf4c1fb71c4b..5c33ec7a5a63 100644
--- a/tools/perf/tests/shell/lib/probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/lib/probe_vfs_getname.sh
@@ -13,7 +13,12 @@ cleanup_probe_vfs_getname() {
add_probe_vfs_getname() {
add_probe_verbose=$1
if [ $had_vfs_getname -eq 1 ] ; then
- line=$(perf probe -L getname_flags 2>&1 | grep -E 'result.*=.*filename;' | sed -r 's/[[:space:]]+([[:digit:]]+)[[:space:]]+result->uptr.*/\1/')
+ result_filename_re="[[:space:]]+([[:digit:]]+)[[:space:]]+result->uptr.*"
+ line=$(perf probe -L getname_flags 2>&1 | grep -E "$result_filename_re" | sed -r "s/$result_filename_re/\1/")
+ if [ -z "$line" ] ; then
+ result_aname_re="[[:space:]]+([[:digit:]]+)[[:space:]]+result->aname = NULL;"
+ line=$(perf probe -L getname_flags 2>&1 | grep -E "$result_aname_re" | sed -r "s/$result_aname_re/\1/")
+ fi
perf probe -q "vfs_getname=getname_flags:${line} pathname=result->name:string" || \
perf probe $add_probe_verbose "vfs_getname=getname_flags:${line} pathname=filename:ustring"
fi
@@ -27,7 +32,7 @@ skip_if_no_debuginfo() {
# check if perf is compiled with libtraceevent support
skip_no_probe_record_support() {
if [ $had_vfs_getname -eq 1 ] ; then
- perf record --dry-run -e $1 2>&1 | grep "libtraceevent is necessary for tracepoint support" && return 2
- return 1
+ perf check feature -q libtraceevent && return 1
+ return 2
fi
}
diff --git a/tools/perf/tests/shell/perftool-testsuite_report.sh b/tools/perf/tests/shell/perftool-testsuite_report.sh
new file mode 100755
index 000000000000..973012ce92a7
--- /dev/null
+++ b/tools/perf/tests/shell/perftool-testsuite_report.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+# perftool-testsuite_report
+# SPDX-License-Identifier: GPL-2.0
+
+test -d "$(dirname "$0")/base_report" || exit 2
+cd "$(dirname "$0")/base_report" || exit 2
+status=0
+
+PERFSUITE_RUN_DIR=$(mktemp -d /tmp/"$(basename "$0" .sh)".XXX)
+export PERFSUITE_RUN_DIR
+
+for testcase in setup.sh test_*; do # skip setup.sh if not present or not executable
+ test -x "$testcase" || continue
+ ./"$testcase"
+ (( status += $? ))
+done
+
+if ! [ "$PERFTEST_KEEP_LOGS" = "y" ]; then
+ rm -rf "$PERFSUITE_RUN_DIR"
+fi
+
+test $status -ne 0 && exit 1
+exit 0
diff --git a/tools/perf/tests/shell/pipe_test.sh b/tools/perf/tests/shell/pipe_test.sh
index a78d35d2cff0..d4c8005ce9b9 100755
--- a/tools/perf/tests/shell/pipe_test.sh
+++ b/tools/perf/tests/shell/pipe_test.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# perf pipe recording and injection test
# SPDX-License-Identifier: GPL-2.0
@@ -11,31 +11,116 @@ sym="noploop"
skip_test_missing_symbol ${sym}
data=$(mktemp /tmp/perf.data.XXXXXX)
+data2=$(mktemp /tmp/perf.data2.XXXXXX)
prog="perf test -w noploop"
-task="perf"
+err=0
-if ! perf record -e task-clock:u -o - ${prog} | perf report -i - --task | grep ${task}; then
- echo "cannot find the test file in the perf report"
- exit 1
-fi
+set -e
-if ! perf record -e task-clock:u -o - ${prog} | perf inject -b | perf report -i - | grep ${sym}; then
- echo "cannot find noploop function in pipe #1"
- exit 1
-fi
+cleanup() {
+ rm -rf "${data}"
+ rm -rf "${data}".old
+ rm -rf "${data2}"
+ rm -rf "${data2}".old
-perf record -e task-clock:u -o - ${prog} | perf inject -b -o ${data}
-if ! perf report -i ${data} | grep ${sym}; then
- echo "cannot find noploop function in pipe #2"
- exit 1
-fi
+ trap - EXIT TERM INT
+}
-perf record -e task-clock:u -o ${data} ${prog}
-if ! perf inject -b -i ${data} | perf report -i - | grep ${sym}; then
- echo "cannot find noploop function in pipe #3"
- exit 1
-fi
+trap_cleanup() {
+ echo "Unexpected signal in ${FUNCNAME[1]}"
+ cleanup
+ exit 1
+}
+trap trap_cleanup EXIT TERM INT
+test_record_report() {
+ echo
+ echo "Record+report pipe test"
+
+ task="perf"
+ if ! perf record -e task-clock:u -o - ${prog} | perf report -i - --task | grep -q ${task}
+ then
+ echo "Record+report pipe test [Failed - cannot find the test file in the perf report #1]"
+ err=1
+ return
+ fi
+
+ if ! perf record -g -e task-clock:u -o - ${prog} | perf report -i - --task | grep -q ${task}
+ then
+ echo "Record+report pipe test [Failed - cannot find the test file in the perf report #2]"
+ err=1
+ return
+ fi
+
+ perf record -g -e task-clock:u -o - ${prog} > ${data}
+ if ! perf report -i ${data} --task | grep -q ${task}
+ then
+ echo "Record+report pipe test [Failed - cannot find the test file in the perf report #3]"
+ err=1
+ return
+ fi
+
+ echo "Record+report pipe test [Success]"
+}
+
+test_inject_bids() {
+ inject_opt=$1
+
+ echo
+ echo "Inject ${inject_opt} build-ids test"
+
+ if ! perf record -e task-clock:u -o - ${prog} | perf inject ${inject_opt}| perf report -i - | grep -q ${sym}
+ then
+ echo "Inject build-ids test [Failed - cannot find noploop function in pipe #1]"
+ err=1
+ return
+ fi
+
+ if ! perf record -g -e task-clock:u -o - ${prog} | perf inject ${inject_opt} | perf report -i - | grep -q ${sym}
+ then
+ echo "Inject ${inject_opt} build-ids test [Failed - cannot find noploop function in pipe #2]"
+ err=1
+ return
+ fi
+
+ perf record -e task-clock:u -o - ${prog} | perf inject ${inject_opt} -o ${data}
+ if ! perf report -i ${data} | grep -q ${sym}; then
+ echo "Inject ${inject_opt} build-ids test [Failed - cannot find noploop function in pipe #3]"
+ err=1
+ return
+ fi
+
+ perf record -e task-clock:u -o ${data} ${prog}
+ if ! perf inject ${inject_opt} -i ${data} | perf report -i - | grep -q ${sym}; then
+ echo "Inject ${inject_opt} build-ids test [Failed - cannot find noploop function in pipe #4]"
+ err=1
+ return
+ fi
+
+ perf record -e task-clock:u -o - ${prog} > ${data}
+ if ! perf inject ${inject_opt} -i ${data} | perf report -i - | grep -q ${sym}; then
+ echo "Inject ${inject_opt} build-ids test [Failed - cannot find noploop function in pipe #5]"
+ err=1
+ return
+ fi
+
+ perf record -e task-clock:u -o - ${prog} > ${data}
+ perf inject ${inject_opt} -i ${data} -o ${data2}
+ if ! perf report -i ${data2} | grep -q ${sym}; then
+ echo "Inject ${inject_opt} build-ids test [Failed - cannot find noploop function in pipe #6]"
+ err=1
+ return
+ fi
+
+ echo "Inject ${inject_opt} build-ids test [Success]"
+}
+
+test_record_report
+test_inject_bids -B
+test_inject_bids -b
+test_inject_bids --buildid-all
+test_inject_bids --mmap2-buildid-all
+
+cleanup
+exit $err
-rm -f ${data} ${data}.old
-exit 0
diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
index 72c65570db37..f38c8ead0b03 100755
--- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
+++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
@@ -63,7 +63,10 @@ trace_libc_inet_pton_backtrace() {
# Check presence of libtraceevent support to run perf record
skip_no_probe_record_support "$event_name/$eventattr/"
- [ $? -eq 2 ] && return 2
+ if [ $? -eq 2 ]; then
+ echo "WARN: Skipping test trace_libc_inet_pton_backtrace. No libtraceevent support."
+ return 2
+ fi
perf record -e $event_name/$eventattr/ -o $perf_data ping -6 -c 1 ::1 > /dev/null 2>&1
# check if perf data file got created in above step.
diff --git a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh
index 5eedbe29bba1..9a61928e3c9a 100755
--- a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh
@@ -21,7 +21,10 @@ record_open_file() {
echo "Recording open file:"
# Check presence of libtraceevent support to run perf record
skip_no_probe_record_support "probe:vfs_getname*"
- [ $? -eq 2 ] && return 2
+ if [ $? -eq 2 ]; then
+ echo "WARN: Skipping test record_open_file. No libtraceevent support"
+ return 2
+ fi
perf record -o ${perfdata} -e probe:vfs_getname\* touch $file
}
diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh
index 3d1a7759a7b2..048078ee2eca 100755
--- a/tools/perf/tests/shell/record.sh
+++ b/tools/perf/tests/shell/record.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# perf record tests
# SPDX-License-Identifier: GPL-2.0
@@ -21,6 +21,16 @@ testprog="perf test -w thloop"
cpu_pmu_dir="/sys/bus/event_source/devices/cpu*"
br_cntr_file="/caps/branch_counter_nr"
br_cntr_output="branch stack counters"
+br_cntr_script_output="br_cntr: A"
+
+default_fd_limit=$(ulimit -Sn)
+# With option --threads=cpu the number of open file descriptors should be
+# equal to sum of: nmb_cpus * nmb_events (2+dummy),
+# nmb_threads for perf.data.n (equal to nmb_cpus) and
+# 2*nmb_cpus of pipes = 4*nmb_cpus (each pipe has 2 ends)
+# All together it needs 8*nmb_cpus file descriptors plus some are also used
+# outside of testing, thus raising the limit to 16*nmb_cpus
+min_fd_limit=$(($(getconf _NPROCESSORS_ONLN) * 16))
cleanup() {
rm -rf "${perfdata}"
@@ -165,7 +175,7 @@ test_workload() {
}
test_branch_counter() {
- echo "Basic branch counter test"
+ echo "Branch counter test"
# Check if the branch counter feature is supported
for dir in $cpu_pmu_dir
do
@@ -175,26 +185,63 @@ test_branch_counter() {
return
fi
done
- if ! perf record -o "${perfdata}" -j any,counter ${testprog} 2> /dev/null
+ if ! perf record -o "${perfdata}" -e "{branches:p,instructions}" -j any,counter ${testprog} 2> /dev/null
then
- echo "Basic branch counter test [Failed record]"
+ echo "Branch counter record test [Failed record]"
err=1
return
fi
if ! perf report -i "${perfdata}" -D -q | grep -q "$br_cntr_output"
then
- echo "Basic branch record test [Failed missing output]"
+ echo "Branch counter report test [Failed missing output]"
+ err=1
+ return
+ fi
+ if ! perf script -i "${perfdata}" -F +brstackinsn,+brcntr | grep -q "$br_cntr_script_output"
+ then
+ echo " Branch counter script test [Failed missing output]"
+ err=1
+ return
+ fi
+ echo "Branch counter test [Success]"
+}
+
+test_cgroup() {
+ echo "Cgroup sampling test"
+ if ! perf record -aB --synth=cgroup --all-cgroups -o "${perfdata}" ${testprog} 2> /dev/null
+ then
+ echo "Cgroup sampling [Skipped not supported]"
+ return
+ fi
+ if ! perf report -i "${perfdata}" -D | grep -q "CGROUP"
+ then
+ echo "Cgroup sampling [Failed missing output]"
+ err=1
+ return
+ fi
+ if ! perf script -i "${perfdata}" -F cgroup | grep -q -v "unknown"
+ then
+ echo "Cgroup sampling [Failed cannot resolve cgroup names]"
err=1
return
fi
- echo "Basic branch counter test [Success]"
+ echo "Cgroup sampling test [Success]"
}
+# raise the limit of file descriptors to minimum
+if [[ $default_fd_limit -lt $min_fd_limit ]]; then
+ ulimit -Sn $min_fd_limit
+fi
+
test_per_thread
test_register_capture
test_system_wide
test_workload
test_branch_counter
+test_cgroup
+
+# restore the default value
+ulimit -Sn $default_fd_limit
cleanup
exit $err
diff --git a/tools/perf/tests/shell/record_bpf_filter.sh b/tools/perf/tests/shell/record_bpf_filter.sh
index 31c593966e8c..1b58ccc1fd88 100755
--- a/tools/perf/tests/shell/record_bpf_filter.sh
+++ b/tools/perf/tests/shell/record_bpf_filter.sh
@@ -22,15 +22,16 @@ trap trap_cleanup EXIT TERM INT
test_bpf_filter_priv() {
echo "Checking BPF-filter privilege"
- if [ "$(id -u)" != 0 ]
- then
- echo "bpf-filter test [Skipped permission]"
- err=2
- return
- fi
if ! perf record -e task-clock --filter 'period > 1' \
-o /dev/null --quiet true 2>&1
then
+ if [ "$(id -u)" != 0 ]
+ then
+ echo "try 'sudo perf record --setup-filter pin' first."
+ echo "bpf-filter test [Skipped permission]"
+ err=2
+ return
+ fi
echo "bpf-filter test [Skipped missing BPF support]"
err=2
return
@@ -67,7 +68,7 @@ test_bpf_filter_fail() {
# 'cpu' requires PERF_SAMPLE_CPU flag
if ! perf record -e task-clock --filter 'cpu > 0' \
- -o /dev/null true 2>&1 | grep PERF_SAMPLE_CPU
+ -o /dev/null true 2>&1 | grep -q PERF_SAMPLE_CPU
then
echo "Failing bpf-filter test [Failed forbidden CPU]"
err=1
@@ -97,7 +98,7 @@ test_bpf_filter_group() {
fi
if ! perf record -e task-clock --filter 'cpu > 0 || ip > 0' \
- -o /dev/null true 2>&1 | grep PERF_SAMPLE_CPU
+ -o /dev/null true 2>&1 | grep -q PERF_SAMPLE_CPU
then
echo "Group bpf-filter test [Failed forbidden CPU]"
err=1
@@ -105,7 +106,7 @@ test_bpf_filter_group() {
fi
if ! perf record -e task-clock --filter 'period > 0 || code_pgsz > 4096' \
- -o /dev/null true 2>&1 | grep PERF_SAMPLE_CODE_PAGE_SIZE
+ -o /dev/null true 2>&1 | grep -q PERF_SAMPLE_CODE_PAGE_SIZE
then
echo "Group bpf-filter test [Failed forbidden CODE_PAGE_SIZE]"
err=1
@@ -115,6 +116,65 @@ test_bpf_filter_group() {
echo "Group bpf-filter test [Success]"
}
+test_bpf_filter_multi() {
+ echo "Multiple bpf-filter test"
+
+ if ! perf record -e task-clock --filter 'period > 100000' \
+ -e page-faults --filter 'ip < 0xffffffff00000000' \
+ -o "${perfdata}" true 2> /dev/null
+ then
+ echo "Multiple bpf-filter test [Failed record]"
+ err=1
+ return
+ fi
+
+ if ! perf script -i "${perfdata}" -F period,event | grep task-clock | \
+ awk '{ if (int($1) <= 100000) { print $0; exit(1); } }'
+ then
+ echo "Multiple bpf-filter test [Failed task-clock period]"
+ err=1
+ return
+ fi
+
+ if perf script -i "${perfdata}" -F event,ip | grep page-fault | \
+ grep 'ffffffff[0-9a-f]*'
+ then
+ echo "Multiple bpf-filter test [Failed page-faults ip]"
+ err=1
+ return
+ fi
+
+ echo "Multiple bpf-filter test [Success]"
+}
+
+test_bpf_filter_cgroup() {
+ echo "Cgroup bpf-filter test"
+
+ if ! perf record -e task-clock --filter 'cgroup == /' \
+ -a --all-cgroups --synth=cgroup -o "${perfdata}" true 2> /dev/null
+ then
+ echo "Cgroup bpf-filter test [Skipped cgroup not supported]"
+ return
+ fi
+
+ # 'cgroup' requires PERF_SAMPLE_CGROUP flag
+ if ! perf record -e task-clock --filter 'cgroup == /' \
+ -o /dev/null true 2>&1 | grep -q PERF_SAMPLE_CGROUP
+ then
+ echo "Cgroup bpf-filter test [Failed CGROUP requires --all-cgroups]"
+ err=1
+ return
+ fi
+
+ if ! perf report -i "${perfdata}" -s cgroup -q | grep -q -F '100.00%'
+ then
+ echo "Cgroup bpf-filter test [Failed root cgroup does not have 100%]"
+ err=1
+ return
+ fi
+
+ echo "Cgroup bpf-filter test [Success]"
+}
test_bpf_filter_priv
@@ -130,5 +190,13 @@ if [ $err = 0 ]; then
test_bpf_filter_group
fi
+if [ $err = 0 ]; then
+ test_bpf_filter_multi
+fi
+
+if [ $err = 0 ]; then
+ test_bpf_filter_cgroup
+fi
+
cleanup
exit $err
diff --git a/tools/perf/tests/shell/record_lbr.sh b/tools/perf/tests/shell/record_lbr.sh
new file mode 100755
index 000000000000..32314641217e
--- /dev/null
+++ b/tools/perf/tests/shell/record_lbr.sh
@@ -0,0 +1,161 @@
+#!/bin/bash
+# perf record LBR tests
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+if [ ! -f /sys/devices/cpu/caps/branches ] && [ ! -f /sys/devices/cpu_core/caps/branches ]
+then
+ echo "Skip: only x86 CPUs support LBR"
+ exit 2
+fi
+
+err=0
+perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
+
+cleanup() {
+ rm -rf "${perfdata}"
+ rm -rf "${perfdata}".old
+ rm -rf "${perfdata}".txt
+
+ trap - EXIT TERM INT
+}
+
+trap_cleanup() {
+ cleanup
+ exit 1
+}
+trap trap_cleanup EXIT TERM INT
+
+
+lbr_callgraph_test() {
+ test="LBR callgraph"
+
+ echo "$test"
+ if ! perf record -e cycles --call-graph lbr -o "${perfdata}" perf test -w thloop
+ then
+ echo "$test [Failed support missing]"
+ if [ $err -eq 0 ]
+ then
+ err=2
+ fi
+ return
+ fi
+
+ if ! perf report --stitch-lbr -i "${perfdata}" > "${perfdata}".txt
+ then
+ cat "${perfdata}".txt
+ echo "$test [Failed in perf report]"
+ err=1
+ return
+ fi
+
+ echo "$test [Success]"
+}
+
+lbr_test() {
+ local branch_flags=$1
+ local test="LBR $2 test"
+ local threshold=$3
+ local out
+ local sam_nr
+ local bs_nr
+ local zero_nr
+ local r
+
+ echo "$test"
+ if ! perf record -e cycles $branch_flags -o "${perfdata}" perf test -w thloop
+ then
+ echo "$test [Failed support missing]"
+ perf record -e cycles $branch_flags -o "${perfdata}" perf test -w thloop || true
+ if [ $err -eq 0 ]
+ then
+ err=2
+ fi
+ return
+ fi
+
+ out=$(perf report -D -i "${perfdata}" 2> /dev/null | grep -A1 'PERF_RECORD_SAMPLE')
+ sam_nr=$(echo "$out" | grep -c 'PERF_RECORD_SAMPLE' || true)
+ if [ $sam_nr -eq 0 ]
+ then
+ echo "$test [Failed no samples captured]"
+ err=1
+ return
+ fi
+ echo "$test: $sam_nr samples"
+
+ bs_nr=$(echo "$out" | grep -c 'branch stack: nr:' || true)
+ if [ $sam_nr -ne $bs_nr ]
+ then
+ echo "$test [Failed samples missing branch stacks]"
+ err=1
+ return
+ fi
+
+ zero_nr=$(echo "$out" | grep -c 'branch stack: nr:0' || true)
+ r=$(($zero_nr * 100 / $bs_nr))
+ if [ $r -gt $threshold ]; then
+ echo "$test [Failed empty br stack ratio exceed $threshold%: $r%]"
+ err=1
+ return
+ fi
+
+ echo "$test [Success]"
+}
+
+parallel_lbr_test() {
+ err=0
+ perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
+ lbr_test "$1" "$2" "$3"
+ cleanup
+ exit $err
+}
+
+lbr_callgraph_test
+
+# Sequential
+lbr_test "-b" "any branch" 2
+lbr_test "-j any_call" "any call" 2
+lbr_test "-j any_ret" "any ret" 2
+lbr_test "-j ind_call" "any indirect call" 2
+lbr_test "-j ind_jmp" "any indirect jump" 100
+lbr_test "-j call" "direct calls" 2
+lbr_test "-j ind_call,u" "any indirect user call" 100
+lbr_test "-a -b" "system wide any branch" 2
+lbr_test "-a -j any_call" "system wide any call" 2
+
+# Parallel
+parallel_lbr_test "-b" "parallel any branch" 100 &
+pid1=$!
+parallel_lbr_test "-j any_call" "parallel any call" 100 &
+pid2=$!
+parallel_lbr_test "-j any_ret" "parallel any ret" 100 &
+pid3=$!
+parallel_lbr_test "-j ind_call" "parallel any indirect call" 100 &
+pid4=$!
+parallel_lbr_test "-j ind_jmp" "parallel any indirect jump" 100 &
+pid5=$!
+parallel_lbr_test "-j call" "parallel direct calls" 100 &
+pid6=$!
+parallel_lbr_test "-j ind_call,u" "parallel any indirect user call" 100 &
+pid7=$!
+parallel_lbr_test "-a -b" "parallel system wide any branch" 100 &
+pid8=$!
+parallel_lbr_test "-a -j any_call" "parallel system wide any call" 100 &
+pid9=$!
+
+for pid in $pid1 $pid2 $pid3 $pid4 $pid5 $pid6 $pid7 $pid8 $pid9
+do
+ set +e
+ wait $pid
+ child_err=$?
+ set -e
+ if ([ $err -eq 2 ] && [ $child_err -eq 1 ]) || [ $err -eq 0 ]
+ then
+ err=$child_err
+ fi
+done
+
+cleanup
+exit $err
diff --git a/tools/perf/tests/shell/script.sh b/tools/perf/tests/shell/script.sh
index c1a603653662..d3e2958d2242 100755
--- a/tools/perf/tests/shell/script.sh
+++ b/tools/perf/tests/shell/script.sh
@@ -61,7 +61,10 @@ _end_of_file_
esac
perf record $cmd_flags -o "${perfdatafile}" true
+ # Disable lsan to avoid warnings about python memory leaks.
+ export ASAN_OPTIONS=detect_leaks=0
perf script -i "${perfdatafile}" -s "${db_test}"
+ export ASAN_OPTIONS=
echo "DB test [Success]"
}
diff --git a/tools/perf/tests/shell/test_stat_intel_tpebs.sh b/tools/perf/tests/shell/test_stat_intel_tpebs.sh
new file mode 100755
index 000000000000..c60b29add980
--- /dev/null
+++ b/tools/perf/tests/shell/test_stat_intel_tpebs.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+# test Intel TPEBS counting mode
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+grep -q GenuineIntel /proc/cpuinfo || { echo Skipping non-Intel; exit 2; }
+
+# Use this event for testing because it should exist in all platforms
+event=cache-misses:R
+
+# Without this cmd option, default value or zero is returned
+echo "Testing without --record-tpebs"
+result=$(perf stat -e "$event" true 2>&1)
+[[ "$result" =~ $event ]] || exit 1
+
+# In platforms that do not support TPEBS, it should execute without error.
+echo "Testing with --record-tpebs"
+result=$(perf stat -e "$event" --record-tpebs -a sleep 0.01 2>&1)
+[[ "$result" =~ "perf record" && "$result" =~ $event ]] || exit 1
diff --git a/tools/perf/tests/shell/test_task_analyzer.sh b/tools/perf/tests/shell/test_task_analyzer.sh
index 92d15154ba79..7d76fc63d995 100755
--- a/tools/perf/tests/shell/test_task_analyzer.sh
+++ b/tools/perf/tests/shell/test_task_analyzer.sh
@@ -11,6 +11,9 @@ if [ -e "$perfdir/scripts/python/Perf-Trace-Util" ]; then
export PERF_EXEC_PATH=$perfdir
fi
+# Disable lsan to avoid warnings about python memory leaks.
+export ASAN_OPTIONS=detect_leaks=0
+
cleanup() {
rm -f perf.data
rm -f perf.data.old
@@ -52,8 +55,8 @@ find_str_or_fail() {
# check if perf is compiled with libtraceevent support
skip_no_probe_record_support() {
- perf version --build-options | grep -q " OFF .* HAVE_LIBTRACEEVENT" && return 2
- return 0
+ perf check feature -q libtraceevent && return 0
+ return 2
}
prepare_perf_data() {
diff --git a/tools/perf/tests/shell/test_uprobe_from_different_cu.sh b/tools/perf/tests/shell/test_uprobe_from_different_cu.sh
index 82bc774a078a..33387c329f92 100755
--- a/tools/perf/tests/shell/test_uprobe_from_different_cu.sh
+++ b/tools/perf/tests/shell/test_uprobe_from_different_cu.sh
@@ -4,6 +4,13 @@
set -e
+# Skip if there's no probe command.
+if ! perf | grep probe
+then
+ echo "Skip: probe command isn't present"
+ exit 2
+fi
+
# skip if there's no gcc
if ! [ -x "$(command -v gcc)" ]; then
echo "failed: no gcc compiler"
diff --git a/tools/perf/tests/shell/trace_btf_enum.sh b/tools/perf/tests/shell/trace_btf_enum.sh
new file mode 100755
index 000000000000..5a3b8a5a9b5c
--- /dev/null
+++ b/tools/perf/tests/shell/trace_btf_enum.sh
@@ -0,0 +1,62 @@
+#!/bin/sh
+# perf trace enum augmentation tests
+# SPDX-License-Identifier: GPL-2.0
+
+err=0
+set -e
+
+syscall="landlock_add_rule"
+non_syscall="timer:hrtimer_init,timer:hrtimer_start"
+
+TESTPROG="perf test -w landlock"
+
+# shellcheck source=lib/probe.sh
+. "$(dirname $0)"/lib/probe.sh
+skip_if_no_perf_trace || exit 2
+
+check_vmlinux() {
+ echo "Checking if vmlinux exists"
+ if ! ls /sys/kernel/btf/vmlinux 1>/dev/null 2>&1
+ then
+ echo "trace+enum test [Skipped missing vmlinux BTF support]"
+ err=2
+ fi
+}
+
+trace_landlock() {
+ echo "Tracing syscall ${syscall}"
+
+ # test flight just to see if landlock_add_rule and libbpf are available
+ $TESTPROG
+
+ if perf trace -e $syscall $TESTPROG 2>&1 | \
+ grep -q -E ".*landlock_add_rule\(ruleset_fd: 11, rule_type: (LANDLOCK_RULE_PATH_BENEATH|LANDLOCK_RULE_NET_PORT), rule_attr: 0x[a-f0-9]+, flags: 45\) = -1.*"
+ then
+ err=0
+ else
+ err=1
+ fi
+}
+
+trace_non_syscall() {
+ echo "Tracing non-syscall tracepoint ${non-syscall}"
+ if perf trace -e $non_syscall --max-events=1 2>&1 | \
+ grep -q -E '.*timer:hrtimer_.*\(.*mode: HRTIMER_MODE_.*\)$'
+ then
+ err=0
+ else
+ err=1
+ fi
+}
+
+check_vmlinux
+
+if [ $err = 0 ]; then
+ trace_landlock
+fi
+
+if [ $err = 0 ]; then
+ trace_non_syscall
+fi
+
+exit $err
diff --git a/tools/perf/tests/stat.c b/tools/perf/tests/stat.c
index 706780fb5695..6468cc0d0204 100644
--- a/tools/perf/tests/stat.c
+++ b/tools/perf/tests/stat.c
@@ -21,7 +21,7 @@ static bool has_term(struct perf_record_stat_config *config,
return false;
}
-static int process_stat_config_event(struct perf_tool *tool __maybe_unused,
+static int process_stat_config_event(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine __maybe_unused)
@@ -62,7 +62,7 @@ static int test__synthesize_stat_config(struct test_suite *test __maybe_unused,
return 0;
}
-static int process_stat_event(struct perf_tool *tool __maybe_unused,
+static int process_stat_event(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine __maybe_unused)
@@ -93,7 +93,7 @@ static int test__synthesize_stat(struct test_suite *test __maybe_unused, int sub
return 0;
}
-static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
+static int process_stat_round_event(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine __maybe_unused)
diff --git a/tools/perf/tests/tests-scripts.c b/tools/perf/tests/tests-scripts.c
index e2042b368269..ed114b044293 100644
--- a/tools/perf/tests/tests-scripts.c
+++ b/tools/perf/tests/tests-scripts.c
@@ -29,16 +29,45 @@
static int shell_tests__dir_fd(void)
{
- char path[PATH_MAX], *exec_path;
- static const char * const devel_dirs[] = { "./tools/perf/tests/shell", "./tests/shell", };
+ struct stat st;
+ char path[PATH_MAX], path2[PATH_MAX], *exec_path;
+ static const char * const devel_dirs[] = {
+ "./tools/perf/tests/shell",
+ "./tests/shell",
+ "./source/tests/shell"
+ };
+ int fd;
+ char *p;
for (size_t i = 0; i < ARRAY_SIZE(devel_dirs); ++i) {
- int fd = open(devel_dirs[i], O_PATH);
+ fd = open(devel_dirs[i], O_PATH);
if (fd >= 0)
return fd;
}
+ /* Use directory of executable */
+ if (readlink("/proc/self/exe", path2, sizeof path2) < 0)
+ return -1;
+ /* Follow another level of symlink if there */
+ if (lstat(path2, &st) == 0 && (st.st_mode & S_IFMT) == S_IFLNK) {
+ scnprintf(path, sizeof(path), path2);
+ if (readlink(path, path2, sizeof path2) < 0)
+ return -1;
+ }
+ /* Get directory */
+ p = strrchr(path2, '/');
+ if (p)
+ *p = 0;
+ scnprintf(path, sizeof(path), "%s/tests/shell", path2);
+ fd = open(path, O_PATH);
+ if (fd >= 0)
+ return fd;
+ scnprintf(path, sizeof(path), "%s/source/tests/shell", path2);
+ fd = open(path, O_PATH);
+ if (fd >= 0)
+ return fd;
+
/* Then installed path. */
exec_path = get_argv_exec_path();
scnprintf(path, sizeof(path), "%s/tests/shell", exec_path);
@@ -222,6 +251,8 @@ static void append_scripts_in_dir(int dir_fd,
if (!S_ISDIR(st.st_mode))
continue;
}
+ if (strncmp(ent->d_name, "base_", 5) == 0)
+ continue; /* Skip scripts that have a separate driver. */
fd = openat(dir_fd, ent->d_name, O_PATH);
append_scripts_in_dir(fd, result, result_sz);
}
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 3aa7701ee0e9..6ea2be86b7bf 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -205,6 +205,7 @@ DECLARE_WORKLOAD(leafloop);
DECLARE_WORKLOAD(sqrtloop);
DECLARE_WORKLOAD(brstack);
DECLARE_WORKLOAD(datasym);
+DECLARE_WORKLOAD(landlock);
extern const char *dso_to_test;
extern const char *test_objdump_path;
diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c
index 74308c1368fe..1fe521466bf4 100644
--- a/tools/perf/tests/thread-map.c
+++ b/tools/perf/tests/thread-map.c
@@ -60,7 +60,7 @@ static int test__thread_map(struct test_suite *test __maybe_unused, int subtest
return 0;
}
-static int process_event(struct perf_tool *tool __maybe_unused,
+static int process_event(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine __maybe_unused)
diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index e30fd55f8e51..74cdbd2ce9d0 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -26,7 +26,6 @@ static bool is_ignored_symbol(const char *name, char type)
* when --all-symbols is specified so exclude them to get a
* stable symbol list.
*/
- "kallsyms_addresses",
"kallsyms_offsets",
"kallsyms_relative_base",
"kallsyms_num_syms",
@@ -132,7 +131,7 @@ static int test__vmlinux_matches_kallsyms_cb1(struct map *map, void *data)
(dso__kernel(dso) ? dso__short_name(dso) : dso__name(dso)));
if (pair) {
- map__set_priv(pair, 1);
+ map__set_priv(pair);
map__put(pair);
} else {
if (!args->header_printed) {
@@ -167,7 +166,7 @@ static int test__vmlinux_matches_kallsyms_cb2(struct map *map, void *data)
pr_info(":\nWARN: *%" PRIx64 "-%" PRIx64 " %" PRIx64,
map__start(pair), map__end(pair), map__pgoff(pair));
pr_info(" %s\n", dso__name(dso));
- map__set_priv(pair, 1);
+ map__set_priv(pair);
}
map__put(pair);
return 0;
diff --git a/tools/perf/tests/workloads/Build b/tools/perf/tests/workloads/Build
index 48bf0d3b0f3d..5af17206f04d 100644
--- a/tools/perf/tests/workloads/Build
+++ b/tools/perf/tests/workloads/Build
@@ -6,6 +6,7 @@ perf-test-y += leafloop.o
perf-test-y += sqrtloop.o
perf-test-y += brstack.o
perf-test-y += datasym.o
+perf-test-y += landlock.o
CFLAGS_sqrtloop.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE
CFLAGS_leafloop.o = -g -O0 -fno-inline -fno-omit-frame-pointer -U_FORTIFY_SOURCE
diff --git a/tools/perf/tests/workloads/landlock.c b/tools/perf/tests/workloads/landlock.c
new file mode 100644
index 000000000000..e2b5ef647c09
--- /dev/null
+++ b/tools/perf/tests/workloads/landlock.c
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <unistd.h>
+#include "../tests.h"
+
+/* This workload was initially added to test enum augmentation with BTF in perf
+ * trace because its the only syscall that has an enum argument. Since it is
+ * a recent addition to the Linux kernel (at the time of the introduction of this
+ * 'perf test' workload) we just add the required types and defines here instead
+ * of including linux/landlock, that isn't available in older systems.
+ *
+ * We are not interested in the the result of the syscall, just in intercepting
+ * its arguments.
+ */
+
+#ifndef __NR_landlock_add_rule
+#define __NR_landlock_add_rule 445
+#endif
+
+#ifndef LANDLOCK_ACCESS_FS_READ_FILE
+#define LANDLOCK_ACCESS_FS_READ_FILE (1ULL << 2)
+
+#define LANDLOCK_RULE_PATH_BENEATH 1
+
+struct landlock_path_beneath_attr {
+ __u64 allowed_access;
+ __s32 parent_fd;
+};
+#endif
+
+#ifndef LANDLOCK_ACCESS_NET_CONNECT_TCP
+#define LANDLOCK_ACCESS_NET_CONNECT_TCP (1ULL << 1)
+
+#define LANDLOCK_RULE_NET_PORT 2
+
+struct landlock_net_port_attr {
+ __u64 allowed_access;
+ __u64 port;
+};
+#endif
+
+static int landlock(int argc __maybe_unused, const char **argv __maybe_unused)
+{
+ int fd = 11, flags = 45;
+
+ struct landlock_path_beneath_attr path_beneath_attr = {
+ .allowed_access = LANDLOCK_ACCESS_FS_READ_FILE,
+ .parent_fd = 14,
+ };
+
+ struct landlock_net_port_attr net_port_attr = {
+ .port = 19,
+ .allowed_access = LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+
+ syscall(__NR_landlock_add_rule, fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath_attr, flags);
+
+ syscall(__NR_landlock_add_rule, fd, LANDLOCK_RULE_NET_PORT,
+ &net_port_attr, flags);
+
+ return 0;
+}
+
+DEFINE_WORKLOAD(landlock);
diff --git a/tools/perf/tests/wp.c b/tools/perf/tests/wp.c
index cc8719609b19..6c178985e37f 100644
--- a/tools/perf/tests/wp.c
+++ b/tools/perf/tests/wp.c
@@ -20,7 +20,12 @@ do { \
TEST_ASSERT_VAL(text, count == val); \
} while (0)
+#ifdef __i386__
+/* Only breakpoint length less-than 8 has hardware support on i386. */
+volatile u32 data1;
+#else
volatile u64 data1;
+#endif
volatile u8 data2[3];
#ifndef __s390x__
diff --git a/tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h b/tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h
index 13aea8fc3d45..47051871b436 100644
--- a/tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h
+++ b/tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h
@@ -18,8 +18,8 @@
* Vectors 0 ... 31 : system traps and exceptions - hardcoded events
* Vectors 32 ... 127 : device interrupts
* Vector 128 : legacy int80 syscall interface
- * Vectors 129 ... LOCAL_TIMER_VECTOR-1
- * Vectors LOCAL_TIMER_VECTOR ... 255 : special interrupts
+ * Vectors 129 ... FIRST_SYSTEM_VECTOR-1 : device interrupts
+ * Vectors FIRST_SYSTEM_VECTOR ... 255 : special interrupts
*
* 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table.
*
diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h
index 78d10d92d351..0a07ad158f87 100644
--- a/tools/perf/trace/beauty/beauty.h
+++ b/tools/perf/trace/beauty/beauty.h
@@ -113,6 +113,7 @@ struct syscall_arg {
struct thread *thread;
struct trace *trace;
void *parm;
+ char *type_name;
u16 len;
u8 idx;
u8 mask;
@@ -228,6 +229,11 @@ size_t syscall_arg__scnprintf_renameat2_flags(char *bf, size_t size, struct sysc
size_t syscall_arg__scnprintf_sockaddr(char *bf, size_t size, struct syscall_arg *arg);
#define SCA_SOCKADDR syscall_arg__scnprintf_sockaddr
+// 'argname' is just documentational at this point, to remove the previous comment with that info
+#define SCA_SOCKADDR_FROM_USER(argname) \
+ { .scnprintf = SCA_SOCKADDR, \
+ .from_user = true, }
+
size_t syscall_arg__scnprintf_socket_protocol(char *bf, size_t size, struct syscall_arg *arg);
#define SCA_SK_PROTO syscall_arg__scnprintf_socket_protocol
@@ -249,6 +255,11 @@ size_t syscall_arg__scnprintf_sync_file_range_flags(char *bf, size_t size, struc
size_t syscall_arg__scnprintf_timespec(char *bf, size_t size, struct syscall_arg *arg);
#define SCA_TIMESPEC syscall_arg__scnprintf_timespec
+// 'argname' is just documentational at this point, to remove the previous comment with that info
+#define SCA_TIMESPEC_FROM_USER(argname) \
+ { .scnprintf = SCA_TIMESPEC, \
+ .from_user = true, }
+
size_t open__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix);
void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
diff --git a/tools/perf/trace/beauty/fs_at_flags.sh b/tools/perf/trace/beauty/fs_at_flags.sh
index 456f59addf74..e3f13f96a27c 100755
--- a/tools/perf/trace/beauty/fs_at_flags.sh
+++ b/tools/perf/trace/beauty/fs_at_flags.sh
@@ -13,9 +13,14 @@ printf "static const char *fs_at_flags[] = {\n"
regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+AT_([^_]+[[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
# AT_EACCESS is only meaningful to faccessat, so we will special case it there...
# AT_STATX_SYNC_TYPE is not a bit, its a mask of AT_STATX_SYNC_AS_STAT, AT_STATX_FORCE_SYNC and AT_STATX_DONT_SYNC
+# AT_HANDLE_FID and AT_HANDLE_MNT_ID_UNIQUE are reusing values and are valid only for name_to_handle_at()
+# AT_RENAME_NOREPLACE reuses 0x1 and is valid only for renameat2()
grep -E $regex ${linux_fcntl} | \
grep -v AT_EACCESS | \
grep -v AT_STATX_SYNC_TYPE | \
+ grep -v AT_HANDLE_FID | \
+ grep -v AT_HANDLE_MNT_ID_UNIQUE | \
+ grep -v AT_RENAME_NOREPLACE | \
sed -r "s/$regex/\2 \1/g" | \
xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n"
printf "};\n"
diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h
index 89d16b90370b..d18cc47e89bd 100644
--- a/tools/perf/trace/beauty/include/linux/socket.h
+++ b/tools/perf/trace/beauty/include/linux/socket.h
@@ -76,7 +76,7 @@ struct msghdr {
__kernel_size_t msg_controllen; /* ancillary data buffer length */
struct kiocb *msg_iocb; /* ptr to iocb for async requests */
struct ubuf_info *msg_ubuf;
- int (*sg_from_iter)(struct sock *sk, struct sk_buff *skb,
+ int (*sg_from_iter)(struct sk_buff *skb,
struct iov_iter *from, size_t length);
};
@@ -327,6 +327,7 @@ struct ucred {
* plain text and require encryption
*/
+#define MSG_SOCK_DEVMEM 0x2000000 /* Receive devmem skbs as cmsg */
#define MSG_ZEROCOPY 0x4000000 /* Use user data in kernel path */
#define MSG_SPLICE_PAGES 0x8000000 /* Splice the pages from the iterator in sendmsg() */
#define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */
@@ -442,11 +443,14 @@ extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
extern int __sys_socket(int family, int type, int protocol);
extern struct file *__sys_socket_file(int family, int type, int protocol);
extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen);
+extern int __sys_bind_socket(struct socket *sock, struct sockaddr_storage *address,
+ int addrlen);
extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr,
int addrlen, int file_flags);
extern int __sys_connect(int fd, struct sockaddr __user *uservaddr,
int addrlen);
extern int __sys_listen(int fd, int backlog);
+extern int __sys_listen_socket(struct socket *sock, int backlog);
extern int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
int __user *usockaddr_len);
extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
diff --git a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h
index c0bcc185fa48..87e2dec79fea 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h
@@ -16,6 +16,9 @@
#define F_DUPFD_QUERY (F_LINUX_SPECIFIC_BASE + 3)
+/* Was the file just created? */
+#define F_CREATED_QUERY (F_LINUX_SPECIFIC_BASE + 4)
+
/*
* Cancel a blocking posix lock; internal use only until we expose an
* asynchronous lock api to userspace:
@@ -87,37 +90,70 @@
#define DN_ATTRIB 0x00000020 /* File changed attibutes */
#define DN_MULTISHOT 0x80000000 /* Don't remove notifier */
+#define AT_FDCWD -100 /* Special value for dirfd used to
+ indicate openat should use the
+ current working directory. */
+
+
+/* Generic flags for the *at(2) family of syscalls. */
+
+/* Reserved for per-syscall flags 0xff. */
+#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic
+ links. */
+/* Reserved for per-syscall flags 0x200 */
+#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */
+#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount
+ traversal. */
+#define AT_EMPTY_PATH 0x1000 /* Allow empty relative
+ pathname to operate on dirfd
+ directly. */
+/*
+ * These flags are currently statx(2)-specific, but they could be made generic
+ * in the future and so they should not be used for other per-syscall flags.
+ */
+#define AT_STATX_SYNC_TYPE 0x6000 /* Type of synchronisation required from statx() */
+#define AT_STATX_SYNC_AS_STAT 0x0000 /* - Do whatever stat() does */
+#define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */
+#define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */
+
+#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
+
/*
- * The constants AT_REMOVEDIR and AT_EACCESS have the same value. AT_EACCESS is
- * meaningful only to faccessat, while AT_REMOVEDIR is meaningful only to
- * unlinkat. The two functions do completely different things and therefore,
- * the flags can be allowed to overlap. For example, passing AT_REMOVEDIR to
- * faccessat would be undefined behavior and thus treating it equivalent to
- * AT_EACCESS is valid undefined behavior.
+ * Per-syscall flags for the *at(2) family of syscalls.
+ *
+ * These are flags that are so syscall-specific that a user passing these flags
+ * to the wrong syscall is so "clearly wrong" that we can safely call such
+ * usage "undefined behaviour".
+ *
+ * For example, the constants AT_REMOVEDIR and AT_EACCESS have the same value.
+ * AT_EACCESS is meaningful only to faccessat, while AT_REMOVEDIR is meaningful
+ * only to unlinkat. The two functions do completely different things and
+ * therefore, the flags can be allowed to overlap. For example, passing
+ * AT_REMOVEDIR to faccessat would be undefined behavior and thus treating it
+ * equivalent to AT_EACCESS is valid undefined behavior.
+ *
+ * Note for implementers: When picking a new per-syscall AT_* flag, try to
+ * reuse already existing flags first. This leaves us with as many unused bits
+ * as possible, so we can use them for generic bits in the future if necessary.
*/
-#define AT_FDCWD -100 /* Special value used to indicate
- openat should use the current
- working directory. */
-#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */
+
+/* Flags for renameat2(2) (must match legacy RENAME_* flags). */
+#define AT_RENAME_NOREPLACE 0x0001
+#define AT_RENAME_EXCHANGE 0x0002
+#define AT_RENAME_WHITEOUT 0x0004
+
+/* Flag for faccessat(2). */
#define AT_EACCESS 0x200 /* Test access permitted for
effective IDs, not real IDs. */
+/* Flag for unlinkat(2). */
#define AT_REMOVEDIR 0x200 /* Remove directory instead of
unlinking file. */
-#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */
-#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */
-#define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */
-
-#define AT_STATX_SYNC_TYPE 0x6000 /* Type of synchronisation required from statx() */
-#define AT_STATX_SYNC_AS_STAT 0x0000 /* - Do whatever stat() does */
-#define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */
-#define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */
-
-#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
+/* Flags for name_to_handle_at(2). */
+#define AT_HANDLE_FID 0x200 /* File handle is needed to compare
+ object identity and may not be
+ usable with open_by_handle_at(2). */
+#define AT_HANDLE_MNT_ID_UNIQUE 0x001 /* Return the u64 unique mount ID. */
-/* Flags for name_to_handle_at(2). We reuse AT_ flag space to save bits... */
-#define AT_HANDLE_FID AT_REMOVEDIR /* file handle is needed to
- compare object identity and may not
- be usable to open_by_handle_at(2) */
#if defined(__KERNEL__)
#define AT_GETATTR_NOSEC 0x80000000
#endif
diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h
index 45e4e64fd664..753971770733 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/fs.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h
@@ -329,12 +329,17 @@ typedef int __bitwise __kernel_rwf_t;
/* per-IO negation of O_APPEND */
#define RWF_NOAPPEND ((__force __kernel_rwf_t)0x00000020)
+/* Atomic Write */
+#define RWF_ATOMIC ((__force __kernel_rwf_t)0x00000040)
+
/* mask of flags supported by the kernel */
#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
- RWF_APPEND | RWF_NOAPPEND)
+ RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC)
+
+#define PROCFS_IOCTL_MAGIC 'f'
/* Pagemap ioctl */
-#define PAGEMAP_SCAN _IOWR('f', 16, struct pm_scan_arg)
+#define PAGEMAP_SCAN _IOWR(PROCFS_IOCTL_MAGIC, 16, struct pm_scan_arg)
/* Bitmasks provided in pm_scan_args masks and reported in page_region.categories. */
#define PAGE_IS_WPALLOWED (1 << 0)
@@ -393,4 +398,158 @@ struct pm_scan_arg {
__u64 return_mask;
};
+/* /proc/<pid>/maps ioctl */
+#define PROCMAP_QUERY _IOWR(PROCFS_IOCTL_MAGIC, 17, struct procmap_query)
+
+enum procmap_query_flags {
+ /*
+ * VMA permission flags.
+ *
+ * Can be used as part of procmap_query.query_flags field to look up
+ * only VMAs satisfying specified subset of permissions. E.g., specifying
+ * PROCMAP_QUERY_VMA_READABLE only will return both readable and read/write VMAs,
+ * while having PROCMAP_QUERY_VMA_READABLE | PROCMAP_QUERY_VMA_WRITABLE will only
+ * return read/write VMAs, though both executable/non-executable and
+ * private/shared will be ignored.
+ *
+ * PROCMAP_QUERY_VMA_* flags are also returned in procmap_query.vma_flags
+ * field to specify actual VMA permissions.
+ */
+ PROCMAP_QUERY_VMA_READABLE = 0x01,
+ PROCMAP_QUERY_VMA_WRITABLE = 0x02,
+ PROCMAP_QUERY_VMA_EXECUTABLE = 0x04,
+ PROCMAP_QUERY_VMA_SHARED = 0x08,
+ /*
+ * Query modifier flags.
+ *
+ * By default VMA that covers provided address is returned, or -ENOENT
+ * is returned. With PROCMAP_QUERY_COVERING_OR_NEXT_VMA flag set, closest
+ * VMA with vma_start > addr will be returned if no covering VMA is
+ * found.
+ *
+ * PROCMAP_QUERY_FILE_BACKED_VMA instructs query to consider only VMAs that
+ * have file backing. Can be combined with PROCMAP_QUERY_COVERING_OR_NEXT_VMA
+ * to iterate all VMAs with file backing.
+ */
+ PROCMAP_QUERY_COVERING_OR_NEXT_VMA = 0x10,
+ PROCMAP_QUERY_FILE_BACKED_VMA = 0x20,
+};
+
+/*
+ * Input/output argument structured passed into ioctl() call. It can be used
+ * to query a set of VMAs (Virtual Memory Areas) of a process.
+ *
+ * Each field can be one of three kinds, marked in a short comment to the
+ * right of the field:
+ * - "in", input argument, user has to provide this value, kernel doesn't modify it;
+ * - "out", output argument, kernel sets this field with VMA data;
+ * - "in/out", input and output argument; user provides initial value (used
+ * to specify maximum allowable buffer size), and kernel sets it to actual
+ * amount of data written (or zero, if there is no data).
+ *
+ * If matching VMA is found (according to criterias specified by
+ * query_addr/query_flags, all the out fields are filled out, and ioctl()
+ * returns 0. If there is no matching VMA, -ENOENT will be returned.
+ * In case of any other error, negative error code other than -ENOENT is
+ * returned.
+ *
+ * Most of the data is similar to the one returned as text in /proc/<pid>/maps
+ * file, but procmap_query provides more querying flexibility. There are no
+ * consistency guarantees between subsequent ioctl() calls, but data returned
+ * for matched VMA is self-consistent.
+ */
+struct procmap_query {
+ /* Query struct size, for backwards/forward compatibility */
+ __u64 size;
+ /*
+ * Query flags, a combination of enum procmap_query_flags values.
+ * Defines query filtering and behavior, see enum procmap_query_flags.
+ *
+ * Input argument, provided by user. Kernel doesn't modify it.
+ */
+ __u64 query_flags; /* in */
+ /*
+ * Query address. By default, VMA that covers this address will
+ * be looked up. PROCMAP_QUERY_* flags above modify this default
+ * behavior further.
+ *
+ * Input argument, provided by user. Kernel doesn't modify it.
+ */
+ __u64 query_addr; /* in */
+ /* VMA starting (inclusive) and ending (exclusive) address, if VMA is found. */
+ __u64 vma_start; /* out */
+ __u64 vma_end; /* out */
+ /* VMA permissions flags. A combination of PROCMAP_QUERY_VMA_* flags. */
+ __u64 vma_flags; /* out */
+ /* VMA backing page size granularity. */
+ __u64 vma_page_size; /* out */
+ /*
+ * VMA file offset. If VMA has file backing, this specifies offset
+ * within the file that VMA's start address corresponds to.
+ * Is set to zero if VMA has no backing file.
+ */
+ __u64 vma_offset; /* out */
+ /* Backing file's inode number, or zero, if VMA has no backing file. */
+ __u64 inode; /* out */
+ /* Backing file's device major/minor number, or zero, if VMA has no backing file. */
+ __u32 dev_major; /* out */
+ __u32 dev_minor; /* out */
+ /*
+ * If set to non-zero value, signals the request to return VMA name
+ * (i.e., VMA's backing file's absolute path, with " (deleted)" suffix
+ * appended, if file was unlinked from FS) for matched VMA. VMA name
+ * can also be some special name (e.g., "[heap]", "[stack]") or could
+ * be even user-supplied with prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME).
+ *
+ * Kernel will set this field to zero, if VMA has no associated name.
+ * Otherwise kernel will return actual amount of bytes filled in
+ * user-supplied buffer (see vma_name_addr field below), including the
+ * terminating zero.
+ *
+ * If VMA name is longer that user-supplied maximum buffer size,
+ * -E2BIG error is returned.
+ *
+ * If this field is set to non-zero value, vma_name_addr should point
+ * to valid user space memory buffer of at least vma_name_size bytes.
+ * If set to zero, vma_name_addr should be set to zero as well
+ */
+ __u32 vma_name_size; /* in/out */
+ /*
+ * If set to non-zero value, signals the request to extract and return
+ * VMA's backing file's build ID, if the backing file is an ELF file
+ * and it contains embedded build ID.
+ *
+ * Kernel will set this field to zero, if VMA has no backing file,
+ * backing file is not an ELF file, or ELF file has no build ID
+ * embedded.
+ *
+ * Build ID is a binary value (not a string). Kernel will set
+ * build_id_size field to exact number of bytes used for build ID.
+ * If build ID is requested and present, but needs more bytes than
+ * user-supplied maximum buffer size (see build_id_addr field below),
+ * -E2BIG error will be returned.
+ *
+ * If this field is set to non-zero value, build_id_addr should point
+ * to valid user space memory buffer of at least build_id_size bytes.
+ * If set to zero, build_id_addr should be set to zero as well
+ */
+ __u32 build_id_size; /* in/out */
+ /*
+ * User-supplied address of a buffer of at least vma_name_size bytes
+ * for kernel to fill with matched VMA's name (see vma_name_size field
+ * description above for details).
+ *
+ * Should be set to zero if VMA name should not be returned.
+ */
+ __u64 vma_name_addr; /* in */
+ /*
+ * User-supplied address of a buffer of at least build_id_size bytes
+ * for kernel to fill with matched VMA's ELF build ID, if available
+ * (see build_id_size field description above for details).
+ *
+ * Should be set to zero if build ID should not be returned.
+ */
+ __u64 build_id_addr; /* in */
+};
+
#endif /* _UAPI_LINUX_FS_H */
diff --git a/tools/perf/trace/beauty/include/uapi/linux/mount.h b/tools/perf/trace/beauty/include/uapi/linux/mount.h
index ad5478dbad00..225bc366ffcb 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/mount.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/mount.h
@@ -154,7 +154,7 @@ struct mount_attr {
*/
struct statmount {
__u32 size; /* Total size, including strings */
- __u32 __spare1;
+ __u32 mnt_opts; /* [str] Mount options of the mount */
__u64 mask; /* What results were written */
__u32 sb_dev_major; /* Device ID */
__u32 sb_dev_minor;
@@ -172,7 +172,8 @@ struct statmount {
__u64 propagate_from; /* Propagation from in current namespace */
__u32 mnt_root; /* [str] Root of mount relative to root of fs */
__u32 mnt_point; /* [str] Mountpoint relative to current root */
- __u64 __spare2[50];
+ __u64 mnt_ns_id; /* ID of the mount namespace */
+ __u64 __spare2[49];
char str[]; /* Variable size part containing strings */
};
@@ -188,10 +189,12 @@ struct mnt_id_req {
__u32 spare;
__u64 mnt_id;
__u64 param;
+ __u64 mnt_ns_id;
};
/* List of all mnt_id_req versions. */
#define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */
+#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */
/*
* @mask bits for statmount(2)
@@ -202,10 +205,13 @@ struct mnt_id_req {
#define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */
#define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */
#define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */
+#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */
+#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */
/*
* Special @mnt_id values that can be passed to listmount
*/
#define LSMT_ROOT 0xffffffffffffffff /* root mount */
+#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */
#endif /* _UAPI_LINUX_MOUNT_H */
diff --git a/tools/perf/trace/beauty/include/uapi/linux/sched.h b/tools/perf/trace/beauty/include/uapi/linux/sched.h
index 3bac0a8ceab2..359a14cc76a4 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/sched.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/sched.h
@@ -118,6 +118,7 @@ struct clone_args {
/* SCHED_ISO: reserved but not implemented yet */
#define SCHED_IDLE 5
#define SCHED_DEADLINE 6
+#define SCHED_EXT 7
/* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
#define SCHED_RESET_ON_FORK 0x40000000
diff --git a/tools/perf/trace/beauty/include/uapi/linux/stat.h b/tools/perf/trace/beauty/include/uapi/linux/stat.h
index 67626d535316..887a25286441 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/stat.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/stat.h
@@ -126,9 +126,15 @@ struct statx {
__u64 stx_mnt_id;
__u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */
__u32 stx_dio_offset_align; /* File offset alignment for direct I/O */
- __u64 stx_subvol; /* Subvolume identifier */
/* 0xa0 */
- __u64 __spare3[11]; /* Spare space for future expansion */
+ __u64 stx_subvol; /* Subvolume identifier */
+ __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */
+ __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */
+ /* 0xb0 */
+ __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */
+ __u32 __spare1[1];
+ /* 0xb8 */
+ __u64 __spare3[9]; /* Spare space for future expansion */
/* 0x100 */
};
@@ -157,6 +163,7 @@ struct statx {
#define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */
#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */
#define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */
+#define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
@@ -192,6 +199,7 @@ struct statx {
#define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */
#define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */
#define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */
+#define STATX_ATTR_WRITE_ATOMIC 0x00400000 /* File supports atomic write operations */
#endif /* _UAPI_LINUX_STAT_H */
diff --git a/tools/perf/trace/beauty/include/uapi/sound/asound.h b/tools/perf/trace/beauty/include/uapi/sound/asound.h
index 628d46a0da92..4cd513215bcd 100644
--- a/tools/perf/trace/beauty/include/uapi/sound/asound.h
+++ b/tools/perf/trace/beauty/include/uapi/sound/asound.h
@@ -142,7 +142,7 @@ struct snd_hwdep_dsp_image {
* *
*****************************************************************************/
-#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 17)
+#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 18)
typedef unsigned long snd_pcm_uframes_t;
typedef signed long snd_pcm_sframes_t;
@@ -334,7 +334,7 @@ union snd_pcm_sync_id {
unsigned char id[16];
unsigned short id16[8];
unsigned int id32[4];
-};
+} __attribute__((deprecated));
struct snd_pcm_info {
unsigned int device; /* RO/WR (control): device number */
@@ -348,7 +348,7 @@ struct snd_pcm_info {
int dev_subclass; /* SNDRV_PCM_SUBCLASS_* */
unsigned int subdevices_count;
unsigned int subdevices_avail;
- union snd_pcm_sync_id sync; /* hardware synchronization ID */
+ unsigned char pad1[16]; /* was: hardware synchronization ID */
unsigned char reserved[64]; /* reserved for future... */
};
@@ -420,7 +420,8 @@ struct snd_pcm_hw_params {
unsigned int rate_num; /* R: rate numerator */
unsigned int rate_den; /* R: rate denominator */
snd_pcm_uframes_t fifo_size; /* R: chip FIFO size in frames */
- unsigned char reserved[64]; /* reserved for future */
+ unsigned char sync[16]; /* R: synchronization ID (perfect sync - one clock source) */
+ unsigned char reserved[48]; /* reserved for future */
};
enum {
@@ -868,7 +869,7 @@ struct snd_ump_block_info {
* Timer section - /dev/snd/timer
*/
-#define SNDRV_TIMER_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 7)
+#define SNDRV_TIMER_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 8)
enum {
SNDRV_TIMER_CLASS_NONE = -1,
@@ -893,6 +894,7 @@ enum {
#define SNDRV_TIMER_GLOBAL_RTC 1 /* unused */
#define SNDRV_TIMER_GLOBAL_HPET 2
#define SNDRV_TIMER_GLOBAL_HRTIMER 3
+#define SNDRV_TIMER_GLOBAL_UDRIVEN 4
/* info flags */
#define SNDRV_TIMER_FLG_SLAVE (1<<0) /* cannot be controlled */
@@ -973,6 +975,18 @@ struct snd_timer_status {
};
#endif
+/*
+ * This structure describes the userspace-driven timer. Such timers are purely virtual,
+ * and can only be triggered from software (for instance, by userspace application).
+ */
+struct snd_timer_uinfo {
+ /* To pretend being a normal timer, we need to know the resolution in ns. */
+ __u64 resolution;
+ int fd;
+ unsigned int id;
+ unsigned char reserved[16];
+};
+
#define SNDRV_TIMER_IOCTL_PVERSION _IOR('T', 0x00, int)
#define SNDRV_TIMER_IOCTL_NEXT_DEVICE _IOWR('T', 0x01, struct snd_timer_id)
#define SNDRV_TIMER_IOCTL_TREAD_OLD _IOW('T', 0x02, int)
@@ -989,6 +1003,8 @@ struct snd_timer_status {
#define SNDRV_TIMER_IOCTL_CONTINUE _IO('T', 0xa2)
#define SNDRV_TIMER_IOCTL_PAUSE _IO('T', 0xa3)
#define SNDRV_TIMER_IOCTL_TREAD64 _IOW('T', 0xa4, int)
+#define SNDRV_TIMER_IOCTL_CREATE _IOWR('T', 0xa5, struct snd_timer_uinfo)
+#define SNDRV_TIMER_IOCTL_TRIGGER _IO('T', 0xa6)
#if __BITS_PER_LONG == 64
#define SNDRV_TIMER_IOCTL_TREAD SNDRV_TIMER_IOCTL_TREAD_OLD
diff --git a/tools/perf/trace/beauty/msg_flags.c b/tools/perf/trace/beauty/msg_flags.c
index ed3ff969b546..2da581ff0c80 100644
--- a/tools/perf/trace/beauty/msg_flags.c
+++ b/tools/perf/trace/beauty/msg_flags.c
@@ -11,6 +11,9 @@
#ifndef MSG_BATCH
#define MSG_BATCH 0x40000
#endif
+#ifndef MSG_SOCK_DEVMEM
+#define MSG_SOCK_DEVMEM 0x2000000
+#endif
#ifndef MSG_ZEROCOPY
#define MSG_ZEROCOPY 0x4000000
#endif
@@ -57,6 +60,7 @@ static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
P_MSG_FLAG(MORE);
P_MSG_FLAG(WAITFORONE);
P_MSG_FLAG(BATCH);
+ P_MSG_FLAG(SOCK_DEVMEM);
P_MSG_FLAG(ZEROCOPY);
P_MSG_FLAG(SPLICE_PAGES);
P_MSG_FLAG(FASTOPEN);
diff --git a/tools/perf/trace/beauty/perf_event_open.c b/tools/perf/trace/beauty/perf_event_open.c
index 01ee15fe9d0c..9f1ed989c775 100644
--- a/tools/perf/trace/beauty/perf_event_open.c
+++ b/tools/perf/trace/beauty/perf_event_open.c
@@ -76,7 +76,7 @@ static size_t perf_event_attr___scnprintf(struct perf_event_attr *attr, char *bf
static size_t syscall_arg__scnprintf_augmented_perf_event_attr(struct syscall_arg *arg, char *bf, size_t size)
{
- return perf_event_attr___scnprintf((void *)arg->augmented.args, bf, size, arg->trace->show_zeros);
+ return perf_event_attr___scnprintf((void *)arg->augmented.args->value, bf, size, arg->trace->show_zeros);
}
static size_t syscall_arg__scnprintf_perf_event_attr(char *bf, size_t size, struct syscall_arg *arg)
@@ -88,3 +88,7 @@ static size_t syscall_arg__scnprintf_perf_event_attr(char *bf, size_t size, stru
}
#define SCA_PERF_ATTR syscall_arg__scnprintf_perf_event_attr
+// 'argname' is just documentational at this point, to remove the previous comment with that info
+#define SCA_PERF_ATTR_FROM_USER(argname) \
+ { .scnprintf = SCA_PERF_ATTR, \
+ .from_user = true, }
diff --git a/tools/perf/trace/beauty/sockaddr.c b/tools/perf/trace/beauty/sockaddr.c
index 2e0e867c0c1b..a17a27ac2a6f 100644
--- a/tools/perf/trace/beauty/sockaddr.c
+++ b/tools/perf/trace/beauty/sockaddr.c
@@ -47,7 +47,7 @@ static size_t (*af_scnprintfs[])(struct sockaddr *sa, char *bf, size_t size) = {
static size_t syscall_arg__scnprintf_augmented_sockaddr(struct syscall_arg *arg, char *bf, size_t size)
{
- struct sockaddr *sa = (struct sockaddr *)arg->augmented.args;
+ struct sockaddr *sa = (struct sockaddr *)&arg->augmented.args->value;
char family[32];
size_t printed;
diff --git a/tools/perf/trace/beauty/timespec.c b/tools/perf/trace/beauty/timespec.c
index e1a61f092aad..b14ab72a2738 100644
--- a/tools/perf/trace/beauty/timespec.c
+++ b/tools/perf/trace/beauty/timespec.c
@@ -7,7 +7,7 @@
static size_t syscall_arg__scnprintf_augmented_timespec(struct syscall_arg *arg, char *bf, size_t size)
{
- struct timespec *ts = (struct timespec *)arg->augmented.args;
+ struct timespec *ts = (struct timespec *)arg->augmented.args->value;
return scnprintf(bf, size, "{ .tv_sec: %" PRIu64 ", .tv_nsec: %" PRIu64 " }", ts->tv_sec, ts->tv_nsec);
}
diff --git a/tools/perf/ui/browsers/annotate-data.c b/tools/perf/ui/browsers/annotate-data.c
index 8d6bf08d371d..cd562a8822b7 100644
--- a/tools/perf/ui/browsers/annotate-data.c
+++ b/tools/perf/ui/browsers/annotate-data.c
@@ -14,17 +14,26 @@
#include "util/evlist.h"
#include "util/sort.h"
-struct annotated_data_browser {
- struct ui_browser b;
- struct list_head entries;
- int nr_events;
-};
+#define FOLDED_SIGN '+'
+#define UNFOLD_SIGN '-'
+#define NOCHLD_SIGN ' '
struct browser_entry {
struct list_head node;
struct annotated_member *data;
struct type_hist_entry *hists;
- int indent;
+ struct browser_entry *parent;
+ struct list_head children;
+ int indent; /*indentation level, starts from 0 */
+ int nr_entries; /* # of visible entries: self + descendents */
+ bool folded; /* only can be false when it has children */
+};
+
+struct annotated_data_browser {
+ struct ui_browser b;
+ struct list_head entries;
+ struct browser_entry *curr;
+ int nr_events;
};
static struct annotated_data_browser *get_browser(struct ui_browser *uib)
@@ -51,23 +60,28 @@ static int get_member_overhead(struct annotated_data_type *adt,
struct evsel *evsel;
int offset = member->offset + i;
+ k = 0;
for_each_group_evsel(evsel, leader) {
+ if (symbol_conf.skip_empty &&
+ evsel__hists(evsel)->stats.nr_samples == 0)
+ continue;
+
h = adt->histograms[evsel->core.idx];
- k = evsel__group_idx(evsel);
- update_hist_entry(&entry->hists[k], &h->addr[offset]);
+ update_hist_entry(&entry->hists[k++], &h->addr[offset]);
}
}
return 0;
}
static int add_child_entries(struct annotated_data_browser *browser,
+ struct browser_entry *parent,
struct annotated_data_type *adt,
struct annotated_member *member,
struct evsel *evsel, int indent)
{
struct annotated_member *pos;
struct browser_entry *entry;
- int nr_entries = 0;
+ struct list_head *parent_list;
entry = zalloc(sizeof(*entry));
if (entry == NULL)
@@ -80,36 +94,60 @@ static int add_child_entries(struct annotated_data_browser *browser,
}
entry->data = member;
+ entry->parent = parent;
entry->indent = indent;
if (get_member_overhead(adt, entry, evsel) < 0) {
free(entry);
return -1;
}
- list_add_tail(&entry->node, &browser->entries);
- nr_entries++;
+ INIT_LIST_HEAD(&entry->children);
+ if (parent)
+ parent_list = &parent->children;
+ else
+ parent_list = &browser->entries;
- list_for_each_entry(pos, &member->children, node) {
- int nr = add_child_entries(browser, adt, pos, evsel, indent + 1);
+ list_add_tail(&entry->node, parent_list);
+ list_for_each_entry(pos, &member->children, node) {
+ int nr = add_child_entries(browser, entry, adt, pos, evsel,
+ indent + 1);
if (nr < 0)
return nr;
-
- nr_entries += nr;
}
/* add an entry for the closing bracket ("}") */
if (!list_empty(&member->children)) {
- entry = zalloc(sizeof(*entry));
- if (entry == NULL)
+ struct browser_entry *bracket;
+
+ bracket = zalloc(sizeof(*bracket));
+ if (bracket == NULL)
return -1;
- entry->indent = indent;
- list_add_tail(&entry->node, &browser->entries);
- nr_entries++;
+ bracket->indent = indent;
+ bracket->parent = entry;
+ bracket->folded = true;
+ bracket->nr_entries = 1;
+
+ INIT_LIST_HEAD(&bracket->children);
+ list_add_tail(&bracket->node, &entry->children);
}
- return nr_entries;
+ /* fold child entries by default */
+ entry->folded = true;
+ entry->nr_entries = 1;
+ return 0;
+}
+
+static u32 count_visible_entries(struct annotated_data_browser *browser)
+{
+ int nr = 0;
+ struct browser_entry *entry;
+
+ list_for_each_entry(entry, &browser->entries, node)
+ nr += entry->nr_entries;
+
+ return nr;
}
static int annotated_data_browser__collect_entries(struct annotated_data_browser *browser)
@@ -119,9 +157,12 @@ static int annotated_data_browser__collect_entries(struct annotated_data_browser
struct evsel *evsel = hists_to_evsel(he->hists);
INIT_LIST_HEAD(&browser->entries);
+
+ add_child_entries(browser, /*parent=*/NULL, adt, &adt->self, evsel,
+ /*indent=*/0);
+
browser->b.entries = &browser->entries;
- browser->b.nr_entries = add_child_entries(browser, adt, &adt->self,
- evsel, /*indent=*/0);
+ browser->b.nr_entries = count_visible_entries(browser);
return 0;
}
@@ -136,9 +177,158 @@ static void annotated_data_browser__delete_entries(struct annotated_data_browser
}
}
+static struct browser_entry *get_first_child(struct browser_entry *entry)
+{
+ if (list_empty(&entry->children))
+ return NULL;
+
+ return list_first_entry(&entry->children, struct browser_entry, node);
+}
+
+static struct browser_entry *get_last_child(struct browser_entry *entry)
+{
+ if (list_empty(&entry->children))
+ return NULL;
+
+ return list_last_entry(&entry->children, struct browser_entry, node);
+}
+
+static bool is_first_child(struct browser_entry *entry)
+{
+ /* This will be checked in a different way */
+ if (entry->parent == NULL)
+ return false;
+
+ return get_first_child(entry->parent) == entry;
+}
+
+static bool is_last_child(struct browser_entry *entry)
+{
+ /* This will be checked in a different way */
+ if (entry->parent == NULL)
+ return false;
+
+ return get_last_child(entry->parent) == entry;
+}
+
+static struct browser_entry *browser__prev_entry(struct ui_browser *uib,
+ struct browser_entry *entry)
+{
+ struct annotated_data_browser *browser = get_browser(uib);
+ struct browser_entry *first;
+
+ first = list_first_entry(&browser->entries, struct browser_entry, node);
+
+ while (entry != first) {
+ if (is_first_child(entry))
+ entry = entry->parent;
+ else {
+ entry = list_prev_entry(entry, node);
+ while (!entry->folded)
+ entry = get_last_child(entry);
+ }
+
+ if (!uib->filter || !uib->filter(uib, &entry->node))
+ return entry;
+ }
+ return first;
+}
+
+static struct browser_entry *browser__next_entry(struct ui_browser *uib,
+ struct browser_entry *entry)
+{
+ struct annotated_data_browser *browser = get_browser(uib);
+ struct browser_entry *last;
+
+ last = list_last_entry(&browser->entries, struct browser_entry, node);
+ while (!last->folded)
+ last = get_last_child(last);
+
+ while (entry != last) {
+ if (!entry->folded)
+ entry = get_first_child(entry);
+ else {
+ while (is_last_child(entry))
+ entry = entry->parent;
+
+ entry = list_next_entry(entry, node);
+ }
+
+ if (!uib->filter || !uib->filter(uib, &entry->node))
+ return entry;
+ }
+ return last;
+}
+
+static void browser__seek(struct ui_browser *uib, off_t offset, int whence)
+{
+ struct annotated_data_browser *browser = get_browser(uib);
+ struct browser_entry *entry;
+
+ if (uib->nr_entries == 0)
+ return;
+
+ switch (whence) {
+ case SEEK_SET:
+ entry = list_first_entry(&browser->entries, typeof(*entry), node);
+ if (uib->filter && uib->filter(uib, &entry->node))
+ entry = browser__next_entry(uib, entry);
+ break;
+ case SEEK_CUR:
+ entry = list_entry(uib->top, typeof(*entry), node);
+ break;
+ case SEEK_END:
+ entry = list_last_entry(&browser->entries, typeof(*entry), node);
+ while (!entry->folded)
+ entry = get_last_child(entry);
+ if (uib->filter && uib->filter(uib, &entry->node))
+ entry = browser__prev_entry(uib, entry);
+ break;
+ default:
+ return;
+ }
+
+ assert(entry != NULL);
+
+ if (offset > 0) {
+ while (offset-- != 0)
+ entry = browser__next_entry(uib, entry);
+ } else {
+ while (offset++ != 0)
+ entry = browser__prev_entry(uib, entry);
+ }
+
+ uib->top = &entry->node;
+}
+
static unsigned int browser__refresh(struct ui_browser *uib)
{
- return ui_browser__list_head_refresh(uib);
+ struct annotated_data_browser *browser = get_browser(uib);
+ struct browser_entry *entry, *next;
+ int row = 0;
+
+ if (uib->top == NULL || uib->top == uib->entries)
+ browser__seek(uib, SEEK_SET, 0);
+
+ entry = list_entry(uib->top, typeof(*entry), node);
+
+ while (true) {
+ if (!uib->filter || !uib->filter(uib, &entry->node)) {
+ ui_browser__gotorc(uib, row, 0);
+ uib->write(uib, entry, row);
+ if (uib->top_idx + row == uib->index)
+ browser->curr = entry;
+ if (++row == uib->rows)
+ break;
+ }
+ next = browser__next_entry(uib, entry);
+ if (next == entry)
+ break;
+
+ entry = next;
+ }
+
+ return row;
}
static int browser__show(struct ui_browser *uib)
@@ -167,7 +357,7 @@ static int browser__show(struct ui_browser *uib)
strcpy(title, "Percent");
ui_browser__printf(uib, "%*s %10s %10s %10s %s",
- 11 * (browser->nr_events - 1), "",
+ 2 + 11 * (browser->nr_events - 1), "",
title, "Offset", "Size", "Field");
ui_browser__write_nstring(uib, "", uib->width);
return 0;
@@ -203,12 +393,13 @@ static void browser__write(struct ui_browser *uib, void *entry, int row)
struct annotated_data_type *adt = he->mem_type;
struct evsel *leader = hists_to_evsel(he->hists);
struct evsel *evsel;
+ int idx = 0;
+ bool current = ui_browser__is_current_entry(uib, row);
if (member == NULL) {
- bool current = ui_browser__is_current_entry(uib, row);
-
/* print the closing bracket */
ui_browser__set_percent_color(uib, 0, current);
+ ui_browser__printf(uib, "%c ", NOCHLD_SIGN);
ui_browser__write_nstring(uib, "", 11 * browser->nr_events);
ui_browser__printf(uib, " %10s %10s %*s};",
"", "", be->indent * 4, "");
@@ -216,31 +407,113 @@ static void browser__write(struct ui_browser *uib, void *entry, int row)
return;
}
+ ui_browser__set_percent_color(uib, 0, current);
+
+ if (!list_empty(&be->children))
+ ui_browser__printf(uib, "%c ", be->folded ? FOLDED_SIGN : UNFOLD_SIGN);
+ else
+ ui_browser__printf(uib, "%c ", NOCHLD_SIGN);
+
/* print the number */
for_each_group_evsel(evsel, leader) {
struct type_hist *h = adt->histograms[evsel->core.idx];
- int idx = evsel__group_idx(evsel);
- browser__write_overhead(uib, h, &be->hists[idx], row);
+ if (symbol_conf.skip_empty &&
+ evsel__hists(evsel)->stats.nr_samples == 0)
+ continue;
+
+ browser__write_overhead(uib, h, &be->hists[idx++], row);
}
/* print type info */
if (be->indent == 0 && !member->var_name) {
- ui_browser__printf(uib, " %10d %10d %s%s",
+ ui_browser__printf(uib, " %#10x %#10x %s%s",
member->offset, member->size,
member->type_name,
- list_empty(&member->children) ? ";" : " {");
+ list_empty(&member->children) || be->folded? ";" : " {");
} else {
- ui_browser__printf(uib, " %10d %10d %*s%s\t%s%s",
+ ui_browser__printf(uib, " %#10x %#10x %*s%s\t%s%s",
member->offset, member->size,
be->indent * 4, "", member->type_name,
member->var_name ?: "",
- list_empty(&member->children) ? ";" : " {");
+ list_empty(&member->children) || be->folded ? ";" : " {");
}
/* fill the rest */
ui_browser__write_nstring(uib, "", uib->width);
}
+static void annotated_data_browser__fold(struct annotated_data_browser *browser,
+ struct browser_entry *entry,
+ bool recursive)
+{
+ struct browser_entry *child;
+
+ if (list_empty(&entry->children))
+ return;
+ if (entry->folded && !recursive)
+ return;
+
+ if (recursive) {
+ list_for_each_entry(child, &entry->children, node)
+ annotated_data_browser__fold(browser, child, true);
+ }
+
+ entry->nr_entries = 1;
+ entry->folded = true;
+}
+
+static void annotated_data_browser__unfold(struct annotated_data_browser *browser,
+ struct browser_entry *entry,
+ bool recursive)
+{
+ struct browser_entry *child;
+ int nr_entries;
+
+ if (list_empty(&entry->children))
+ return;
+ if (!entry->folded && !recursive)
+ return;
+
+ nr_entries = 1; /* for self */
+ list_for_each_entry(child, &entry->children, node) {
+ if (recursive)
+ annotated_data_browser__unfold(browser, child, true);
+
+ nr_entries += child->nr_entries;
+ }
+
+ entry->nr_entries = nr_entries;
+ entry->folded = false;
+}
+
+static void annotated_data_browser__toggle_fold(struct annotated_data_browser *browser,
+ bool recursive)
+{
+ struct browser_entry *curr = browser->curr;
+ struct browser_entry *parent;
+
+ parent = curr->parent;
+ while (parent) {
+ parent->nr_entries -= curr->nr_entries;
+ parent = parent->parent;
+ }
+ browser->b.nr_entries -= curr->nr_entries;
+
+ if (curr->folded)
+ annotated_data_browser__unfold(browser, curr, recursive);
+ else
+ annotated_data_browser__fold(browser, curr, recursive);
+
+ parent = curr->parent;
+ while (parent) {
+ parent->nr_entries += curr->nr_entries;
+ parent = parent->parent;
+ }
+ browser->b.nr_entries += curr->nr_entries;
+
+ assert(browser->b.nr_entries == count_visible_entries(browser));
+}
+
static int annotated_data_browser__run(struct annotated_data_browser *browser,
struct evsel *evsel __maybe_unused,
struct hist_browser_timer *hbt)
@@ -265,8 +538,18 @@ static int annotated_data_browser__run(struct annotated_data_browser *browser,
"UP/DOWN/PGUP\n"
"PGDN/SPACE Navigate\n"
"</> Move to prev/next symbol\n"
+ "e Expand/Collapse current entry\n"
+ "E Expand/Collapse all children of the current\n"
"q/ESC/CTRL+C Exit\n\n");
continue;
+ case 'e':
+ annotated_data_browser__toggle_fold(browser,
+ /*recursive=*/false);
+ break;
+ case 'E':
+ annotated_data_browser__toggle_fold(browser,
+ /*recursive=*/true);
+ break;
case K_LEFT:
case '<':
case '>':
@@ -289,7 +572,7 @@ int hist_entry__annotate_data_tui(struct hist_entry *he, struct evsel *evsel,
struct annotated_data_browser browser = {
.b = {
.refresh = browser__refresh,
- .seek = ui_browser__list_head_seek,
+ .seek = browser__seek,
.write = browser__write,
.priv = he,
.extra_title_lines = 1,
@@ -300,13 +583,30 @@ int hist_entry__annotate_data_tui(struct hist_entry *he, struct evsel *evsel,
ui_helpline__push("Press ESC to exit");
- if (evsel__is_group_event(evsel))
- browser.nr_events = evsel->core.nr_members;
+ if (evsel__is_group_event(evsel)) {
+ struct evsel *pos;
+ int nr = 0;
+
+ for_each_group_evsel(pos, evsel) {
+ if (!symbol_conf.skip_empty ||
+ evsel__hists(pos)->stats.nr_samples)
+ nr++;
+ }
+ browser.nr_events = nr;
+ }
ret = annotated_data_browser__collect_entries(&browser);
- if (ret == 0)
- ret = annotated_data_browser__run(&browser, evsel, hbt);
+ if (ret < 0)
+ goto out;
+ /* To get the top and current entry */
+ browser__refresh(&browser.b);
+ /* Show the first-level child entries by default */
+ annotated_data_browser__toggle_fold(&browser, /*recursive=*/false);
+
+ ret = annotated_data_browser__run(&browser, evsel, hbt);
+
+out:
annotated_data_browser__delete_entries(&browser);
return ret;
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index ea986430241e..d7e727345dab 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -156,6 +156,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
struct symbol *sym = ms->sym;
struct annotation *notes = symbol__annotation(sym);
u8 pcnt_width = annotation__pcnt_width(notes);
+ u8 cntr_width = annotation__br_cntr_width();
int width;
int diff = 0;
@@ -205,13 +206,13 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
ui_browser__set_color(browser, HE_COLORSET_JUMP_ARROWS);
__ui_browser__line_arrow(browser,
- pcnt_width + 2 + notes->src->widths.addr + width,
+ pcnt_width + 2 + notes->src->widths.addr + width + cntr_width,
from, to);
diff = is_fused(ab, cursor);
if (diff > 0) {
ui_browser__mark_fused(browser,
- pcnt_width + 3 + notes->src->widths.addr + width,
+ pcnt_width + 3 + notes->src->widths.addr + width + cntr_width,
from - diff, diff, to > from);
}
}
@@ -714,6 +715,7 @@ static int annotate_browser__run(struct annotate_browser *browser,
struct annotation *notes = symbol__annotation(ms->sym);
const char *help = "Press 'h' for help on key bindings";
int delay_secs = hbt ? hbt->refresh : 0;
+ char *br_cntr_text = NULL;
char title[256];
int key;
@@ -730,6 +732,8 @@ static int annotate_browser__run(struct annotate_browser *browser,
nd = browser->curr_hot;
+ annotation_br_cntr_abbr_list(&br_cntr_text, evsel, false);
+
while (1) {
key = ui_browser__run(&browser->b, delay_secs);
@@ -796,6 +800,7 @@ static int annotate_browser__run(struct annotate_browser *browser,
"r Run available scripts\n"
"p Toggle percent type [local/global]\n"
"b Toggle percent base [period/hits]\n"
+ "B Branch counter abbr list (Optional)\n"
"? Search string backwards\n"
"f Toggle showing offsets to full address\n");
continue;
@@ -904,6 +909,14 @@ show_sup_ins:
hists__scnprintf_title(hists, title, sizeof(title));
annotate_browser__show(&browser->b, title, help);
continue;
+ case 'B':
+ if (br_cntr_text)
+ ui_browser__help_window(&browser->b, br_cntr_text);
+ else {
+ ui_browser__help_window(&browser->b,
+ "\n The branch counter is not available.\n");
+ }
+ continue;
case 'f':
annotation__toggle_full_addr(notes, ms);
continue;
@@ -923,6 +936,7 @@ show_sup_ins:
}
out:
ui_browser__hide(&browser->b);
+ free(br_cntr_text);
return key;
}
@@ -985,7 +999,7 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
browser.b.width = notes->src->widths.max_line_len;
browser.b.nr_entries = notes->src->nr_entries;
- browser.b.entries = &notes->src->source,
+ browser.b.entries = &notes->src->source;
browser.b.width += 18; /* Percentage */
if (annotate_opts.hide_src_code)
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index b7219df51236..49ba82bf3391 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -3684,8 +3684,10 @@ int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel,
struct hist_browser *browser;
int key = -1;
struct popup_action action;
+ char *br_cntr_text = NULL;
static const char help[] =
- " q Quit \n";
+ " q Quit \n"
+ " B Branch counter abbr list (Optional)\n";
browser = hist_browser__new(hists);
if (!browser)
@@ -3703,6 +3705,9 @@ int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel,
memset(&action, 0, sizeof(action));
+ if (!annotation_br_cntr_abbr_list(&br_cntr_text, evsel, false))
+ annotate_opts.show_br_cntr = true;
+
while (1) {
key = hist_browser__run(browser, "? - help", true, 0);
@@ -3723,6 +3728,16 @@ int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel,
action.ms.sym = browser->selection->sym;
do_annotate(browser, &action);
continue;
+ case 'B':
+ if (br_cntr_text) {
+ ui__question_window("Branch counter abbr list",
+ br_cntr_text, "Press any key...", 0);
+ } else {
+ ui__question_window("Branch counter abbr list",
+ "\n The branch counter is not available.\n",
+ "Press any key...", 0);
+ }
+ continue;
default:
break;
}
@@ -3730,5 +3745,6 @@ int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel,
out:
hist_browser__delete(browser);
+ free(br_cntr_text);
return 0;
}
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 5d1f04f66a5a..e5491995adf0 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -62,7 +62,7 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
struct evsel *pos;
char *buf = hpp->buf;
size_t size = hpp->size;
- int i, nr_members = 1;
+ int i = 0, nr_members = 1;
struct hpp_fmt_value *values;
if (evsel__is_group_event(evsel))
@@ -72,16 +72,16 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
if (values == NULL)
return 0;
- i = 0;
- for_each_group_evsel(pos, evsel)
- values[i++].hists = evsel__hists(pos);
-
+ values[0].hists = evsel__hists(evsel);
values[0].val = get_field(he);
values[0].samples = he->stat.nr_events;
if (evsel__is_group_event(evsel)) {
struct hist_entry *pair;
+ for_each_group_member(pos, evsel)
+ values[++i].hists = evsel__hists(pos);
+
list_for_each_entry(pair, &he->pairs.head, pairs.node) {
for (i = 0; i < nr_members; i++) {
if (values[i].hists != pair->hists)
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 9372e8904d22..74b2c619c56c 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -913,11 +913,11 @@ size_t events_stats__fprintf(struct events_stats *stats, FILE *fp)
continue;
if (i && total) {
- ret += fprintf(fp, "%16s events: %10d (%4.1f%%)\n",
+ ret += fprintf(fp, "%20s events: %10d (%4.1f%%)\n",
name, stats->nr_events[i],
100.0 * stats->nr_events[i] / total);
} else {
- ret += fprintf(fp, "%16s events: %10d\n",
+ ret += fprintf(fp, "%20s events: %10d\n",
name, stats->nr_events[i]);
}
}
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 0f18fe81ef0b..dc616292b2dd 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -13,6 +13,7 @@ perf-util-y += copyfile.o
perf-util-y += ctype.o
perf-util-y += db-export.o
perf-util-y += disasm.o
+perf-util-y += disasm_bpf.o
perf-util-y += env.o
perf-util-y += event.o
perf-util-y += evlist.o
@@ -65,6 +66,7 @@ perf-util-y += map.o
perf-util-y += maps.o
perf-util-y += pstack.o
perf-util-y += session.o
+perf-util-y += tool.o
perf-util-y += sample-raw.o
perf-util-y += s390-sample-raw.o
perf-util-y += amd-sample-raw.o
@@ -154,6 +156,7 @@ perf-util-y += clockid.o
perf-util-y += list_sort.o
perf-util-y += mutex.o
perf-util-y += sharded_mutex.o
+perf-util-$(CONFIG_X86_64) += intel-tpebs.o
perf-util-$(CONFIG_LIBBPF) += bpf_map.o
perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o
@@ -220,12 +223,13 @@ perf-util-$(CONFIG_ZLIB) += zlib.o
perf-util-$(CONFIG_LZMA) += lzma.o
perf-util-$(CONFIG_ZSTD) += zstd.o
-perf-util-$(CONFIG_LIBCAP) += cap.o
+perf-util-y += cap.o
perf-util-$(CONFIG_CXX_DEMANGLE) += demangle-cxx.o
perf-util-y += demangle-ocaml.o
perf-util-y += demangle-java.o
perf-util-y += demangle-rust.o
+perf-util-$(CONFIG_LIBLLVM) += llvm-c-helpers.o
ifdef CONFIG_JITDUMP
perf-util-$(CONFIG_LIBELF) += jitdump.o
@@ -275,12 +279,12 @@ $(OUTPUT)util/pmu-bison.c $(OUTPUT)util/pmu-bison.h: util/pmu.y
$(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) $(BISON_FILE_PREFIX_MAP) \
-o $(OUTPUT)util/pmu-bison.c -p perf_pmu_
-$(OUTPUT)util/bpf-filter-flex.c $(OUTPUT)util/bpf-filter-flex.h: util/bpf-filter.l $(OUTPUT)util/bpf-filter-bison.c
+$(OUTPUT)util/bpf-filter-flex.c $(OUTPUT)util/bpf-filter-flex.h: util/bpf-filter.l $(OUTPUT)util/bpf-filter-bison.c util/bpf-filter.h util/bpf_skel/sample-filter.h
$(call rule_mkdir)
$(Q)$(call echo-cmd,flex)$(FLEX) -o $(OUTPUT)util/bpf-filter-flex.c \
--header-file=$(OUTPUT)util/bpf-filter-flex.h $(PARSER_DEBUG_FLEX) $<
-$(OUTPUT)util/bpf-filter-bison.c $(OUTPUT)util/bpf-filter-bison.h: util/bpf-filter.y
+$(OUTPUT)util/bpf-filter-bison.c $(OUTPUT)util/bpf-filter-bison.h: util/bpf-filter.y util/bpf-filter.h util/bpf_skel/sample-filter.h
$(call rule_mkdir)
$(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) $(BISON_FILE_PREFIX_MAP) \
-o $(OUTPUT)util/bpf-filter-bison.c -p perf_bpf_filter_
diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c
index 965da6c0b542..976abedca09e 100644
--- a/tools/perf/util/annotate-data.c
+++ b/tools/perf/util/annotate-data.c
@@ -31,15 +31,6 @@
static void delete_var_types(struct die_var_type *var_types);
-enum type_state_kind {
- TSR_KIND_INVALID = 0,
- TSR_KIND_TYPE,
- TSR_KIND_PERCPU_BASE,
- TSR_KIND_CONST,
- TSR_KIND_POINTER,
- TSR_KIND_CANARY,
-};
-
#define pr_debug_dtp(fmt, ...) \
do { \
if (debug_type_profile) \
@@ -48,7 +39,7 @@ do { \
pr_debug3(fmt, ##__VA_ARGS__); \
} while (0)
-static void pr_debug_type_name(Dwarf_Die *die, enum type_state_kind kind)
+void pr_debug_type_name(Dwarf_Die *die, enum type_state_kind kind)
{
struct strbuf sb;
char *str;
@@ -104,7 +95,7 @@ static void pr_debug_location(Dwarf_Die *die, u64 pc, int reg)
return;
while ((off = dwarf_getlocations(&attr, off, &base, &start, &end, &ops, &nops)) > 0) {
- if (reg != DWARF_REG_PC && end < pc)
+ if (reg != DWARF_REG_PC && end <= pc)
continue;
if (reg != DWARF_REG_PC && start > pc)
break;
@@ -140,49 +131,27 @@ static void pr_debug_location(Dwarf_Die *die, u64 pc, int reg)
}
}
-/*
- * Type information in a register, valid when @ok is true.
- * The @caller_saved registers are invalidated after a function call.
- */
-struct type_state_reg {
- Dwarf_Die type;
- u32 imm_value;
- bool ok;
- bool caller_saved;
- u8 kind;
-};
+static void pr_debug_scope(Dwarf_Die *scope_die)
+{
+ int tag;
-/* Type information in a stack location, dynamically allocated */
-struct type_state_stack {
- struct list_head list;
- Dwarf_Die type;
- int offset;
- int size;
- bool compound;
- u8 kind;
-};
+ if (!debug_type_profile && verbose < 3)
+ return;
-/* FIXME: This should be arch-dependent */
-#define TYPE_STATE_MAX_REGS 16
+ pr_info("(die:%lx) ", (long)dwarf_dieoffset(scope_die));
-/*
- * State table to maintain type info in each register and stack location.
- * It'll be updated when new variable is allocated or type info is moved
- * to a new location (register or stack). As it'd be used with the
- * shortest path of basic blocks, it only maintains a single table.
- */
-struct type_state {
- /* state of general purpose registers */
- struct type_state_reg regs[TYPE_STATE_MAX_REGS];
- /* state of stack location */
- struct list_head stack_vars;
- /* return value register */
- int ret_reg;
- /* stack pointer register */
- int stack_reg;
-};
+ tag = dwarf_tag(scope_die);
+ if (tag == DW_TAG_subprogram)
+ pr_info("[function] %s\n", dwarf_diename(scope_die));
+ else if (tag == DW_TAG_inlined_subroutine)
+ pr_info("[inlined] %s\n", dwarf_diename(scope_die));
+ else if (tag == DW_TAG_lexical_block)
+ pr_info("[block]\n");
+ else
+ pr_info("[unknown] tag=%x\n", tag);
+}
-static bool has_reg_type(struct type_state *state, int reg)
+bool has_reg_type(struct type_state *state, int reg)
{
return (unsigned)reg < ARRAY_SIZE(state->regs);
}
@@ -253,7 +222,7 @@ static int __add_member_cb(Dwarf_Die *die, void *arg)
struct annotated_member *parent = arg;
struct annotated_member *member;
Dwarf_Die member_type, die_mem;
- Dwarf_Word size, loc;
+ Dwarf_Word size, loc, bit_size = 0;
Dwarf_Attribute attr;
struct strbuf sb;
int tag;
@@ -268,29 +237,56 @@ static int __add_member_cb(Dwarf_Die *die, void *arg)
strbuf_init(&sb, 32);
die_get_typename(die, &sb);
- die_get_real_type(die, &member_type);
- if (dwarf_aggregate_size(&member_type, &size) < 0)
+ __die_get_real_type(die, &member_type);
+ if (dwarf_tag(&member_type) == DW_TAG_typedef)
+ die_get_real_type(&member_type, &die_mem);
+ else
+ die_mem = member_type;
+
+ if (dwarf_aggregate_size(&die_mem, &size) < 0)
size = 0;
- if (!dwarf_attr_integrate(die, DW_AT_data_member_location, &attr))
- loc = 0;
- else
+ if (dwarf_attr_integrate(die, DW_AT_data_member_location, &attr))
dwarf_formudata(&attr, &loc);
+ else {
+ /* bitfield member */
+ if (dwarf_attr_integrate(die, DW_AT_data_bit_offset, &attr) &&
+ dwarf_formudata(&attr, &loc) == 0)
+ loc /= 8;
+ else
+ loc = 0;
+
+ if (dwarf_attr_integrate(die, DW_AT_bit_size, &attr) &&
+ dwarf_formudata(&attr, &bit_size) == 0)
+ size = (bit_size + 7) / 8;
+ }
member->type_name = strbuf_detach(&sb, NULL);
/* member->var_name can be NULL */
- if (dwarf_diename(die))
- member->var_name = strdup(dwarf_diename(die));
+ if (dwarf_diename(die)) {
+ if (bit_size) {
+ if (asprintf(&member->var_name, "%s:%ld",
+ dwarf_diename(die), (long)bit_size) < 0)
+ member->var_name = NULL;
+ } else {
+ member->var_name = strdup(dwarf_diename(die));
+ }
+
+ if (member->var_name == NULL) {
+ free(member);
+ return DIE_FIND_CB_END;
+ }
+ }
member->size = size;
member->offset = loc + parent->offset;
INIT_LIST_HEAD(&member->children);
list_add_tail(&member->node, &parent->children);
- tag = dwarf_tag(&member_type);
+ tag = dwarf_tag(&die_mem);
switch (tag) {
case DW_TAG_structure_type:
case DW_TAG_union_type:
- die_find_child(&member_type, __add_member_cb, member, &die_mem);
+ die_find_child(&die_mem, __add_member_cb, member, &die_mem);
break;
default:
break;
@@ -332,6 +328,10 @@ static struct annotated_data_type *dso__findnew_data_type(struct dso *dso,
if (die_get_typename_from_type(type_die, &sb) < 0)
strbuf_add(&sb, "(unknown type)", 14);
type_name = strbuf_detach(&sb, NULL);
+
+ if (dwarf_tag(type_die) == DW_TAG_typedef)
+ die_get_real_type(type_die, type_die);
+
dwarf_aggregate_size(type_die, &size);
/* Check existing nodes in dso->data_types tree */
@@ -387,61 +387,142 @@ static bool find_cu_die(struct debuginfo *di, u64 pc, Dwarf_Die *cu_die)
return false;
}
+enum type_match_result {
+ PERF_TMR_UNKNOWN = 0,
+ PERF_TMR_OK,
+ PERF_TMR_NO_TYPE,
+ PERF_TMR_NO_POINTER,
+ PERF_TMR_NO_SIZE,
+ PERF_TMR_BAD_OFFSET,
+ PERF_TMR_BAIL_OUT,
+};
+
+static const char *match_result_str(enum type_match_result tmr)
+{
+ switch (tmr) {
+ case PERF_TMR_OK:
+ return "Good!";
+ case PERF_TMR_NO_TYPE:
+ return "no type information";
+ case PERF_TMR_NO_POINTER:
+ return "no/void pointer";
+ case PERF_TMR_NO_SIZE:
+ return "type size is unknown";
+ case PERF_TMR_BAD_OFFSET:
+ return "offset bigger than size";
+ case PERF_TMR_UNKNOWN:
+ case PERF_TMR_BAIL_OUT:
+ default:
+ return "invalid state";
+ }
+}
+
+static bool is_pointer_type(Dwarf_Die *type_die)
+{
+ int tag = dwarf_tag(type_die);
+
+ return tag == DW_TAG_pointer_type || tag == DW_TAG_array_type;
+}
+
+static bool is_compound_type(Dwarf_Die *type_die)
+{
+ int tag = dwarf_tag(type_die);
+
+ return tag == DW_TAG_structure_type || tag == DW_TAG_union_type;
+}
+
+/* returns if Type B has better information than Type A */
+static bool is_better_type(Dwarf_Die *type_a, Dwarf_Die *type_b)
+{
+ Dwarf_Word size_a, size_b;
+ Dwarf_Die die_a, die_b;
+
+ /* pointer type is preferred */
+ if (is_pointer_type(type_a) != is_pointer_type(type_b))
+ return is_pointer_type(type_b);
+
+ if (is_pointer_type(type_b)) {
+ /*
+ * We want to compare the target type, but 'void *' can fail to
+ * get the target type.
+ */
+ if (die_get_real_type(type_a, &die_a) == NULL)
+ return true;
+ if (die_get_real_type(type_b, &die_b) == NULL)
+ return false;
+
+ type_a = &die_a;
+ type_b = &die_b;
+ }
+
+ /* bigger type is preferred */
+ if (dwarf_aggregate_size(type_a, &size_a) < 0 ||
+ dwarf_aggregate_size(type_b, &size_b) < 0)
+ return false;
+
+ if (size_a != size_b)
+ return size_a < size_b;
+
+ /* struct or union is preferred */
+ if (is_compound_type(type_a) != is_compound_type(type_b))
+ return is_compound_type(type_b);
+
+ /* typedef is preferred */
+ if (dwarf_tag(type_b) == DW_TAG_typedef)
+ return true;
+
+ return false;
+}
+
/* The type info will be saved in @type_die */
-static int check_variable(struct data_loc_info *dloc, Dwarf_Die *var_die,
- Dwarf_Die *type_die, int reg, int offset, bool is_fbreg)
+static enum type_match_result check_variable(struct data_loc_info *dloc,
+ Dwarf_Die *var_die,
+ Dwarf_Die *type_die, int reg,
+ int offset, bool is_fbreg)
{
Dwarf_Word size;
- bool is_pointer = true;
+ bool needs_pointer = true;
+ Dwarf_Die sized_type;
if (reg == DWARF_REG_PC)
- is_pointer = false;
+ needs_pointer = false;
else if (reg == dloc->fbreg || is_fbreg)
- is_pointer = false;
+ needs_pointer = false;
else if (arch__is(dloc->arch, "x86") && reg == X86_REG_SP)
- is_pointer = false;
+ needs_pointer = false;
/* Get the type of the variable */
- if (die_get_real_type(var_die, type_die) == NULL) {
- pr_debug_dtp("variable has no type\n");
- ann_data_stat.no_typeinfo++;
- return -1;
- }
+ if (__die_get_real_type(var_die, type_die) == NULL)
+ return PERF_TMR_NO_TYPE;
/*
* Usually it expects a pointer type for a memory access.
* Convert to a real type it points to. But global variables
* and local variables are accessed directly without a pointer.
*/
- if (is_pointer) {
- if ((dwarf_tag(type_die) != DW_TAG_pointer_type &&
- dwarf_tag(type_die) != DW_TAG_array_type) ||
- die_get_real_type(type_die, type_die) == NULL) {
- pr_debug_dtp("no pointer or no type\n");
- ann_data_stat.no_typeinfo++;
- return -1;
- }
+ if (needs_pointer) {
+ if (!is_pointer_type(type_die) ||
+ __die_get_real_type(type_die, type_die) == NULL)
+ return PERF_TMR_NO_POINTER;
}
+ if (dwarf_tag(type_die) == DW_TAG_typedef)
+ die_get_real_type(type_die, &sized_type);
+ else
+ sized_type = *type_die;
+
/* Get the size of the actual type */
- if (dwarf_aggregate_size(type_die, &size) < 0) {
- pr_debug_dtp("type size is unknown\n");
- ann_data_stat.invalid_size++;
- return -1;
- }
+ if (dwarf_aggregate_size(&sized_type, &size) < 0)
+ return PERF_TMR_NO_SIZE;
/* Minimal sanity check */
- if ((unsigned)offset >= size) {
- pr_debug_dtp("offset: %d is bigger than size: %"PRIu64"\n",
- offset, size);
- ann_data_stat.bad_offset++;
- return -1;
- }
+ if ((unsigned)offset >= size)
+ return PERF_TMR_BAD_OFFSET;
- return 0;
+ return PERF_TMR_OK;
}
-static struct type_state_stack *find_stack_state(struct type_state *state,
+struct type_state_stack *find_stack_state(struct type_state *state,
int offset)
{
struct type_state_stack *stack;
@@ -457,7 +538,7 @@ static struct type_state_stack *find_stack_state(struct type_state *state,
return NULL;
}
-static void set_stack_state(struct type_state_stack *stack, int offset, u8 kind,
+void set_stack_state(struct type_state_stack *stack, int offset, u8 kind,
Dwarf_Die *type_die)
{
int tag;
@@ -484,7 +565,7 @@ static void set_stack_state(struct type_state_stack *stack, int offset, u8 kind,
}
}
-static struct type_state_stack *findnew_stack_state(struct type_state *state,
+struct type_state_stack *findnew_stack_state(struct type_state *state,
int offset, u8 kind,
Dwarf_Die *type_die)
{
@@ -588,7 +669,7 @@ void global_var_type__tree_delete(struct rb_root *root)
}
}
-static bool get_global_var_info(struct data_loc_info *dloc, u64 addr,
+bool get_global_var_info(struct data_loc_info *dloc, u64 addr,
const char **var_name, int *var_offset)
{
struct addr_location al;
@@ -662,7 +743,7 @@ static void global_var__collect(struct data_loc_info *dloc)
}
}
-static bool get_global_var_type(Dwarf_Die *cu_die, struct data_loc_info *dloc,
+bool get_global_var_type(Dwarf_Die *cu_die, struct data_loc_info *dloc,
u64 ip, u64 var_addr, int *var_offset,
Dwarf_Die *type_die)
{
@@ -688,7 +769,7 @@ static bool get_global_var_type(Dwarf_Die *cu_die, struct data_loc_info *dloc,
/* Try to get the variable by address first */
if (die_find_variable_by_addr(cu_die, var_addr, &var_die, &offset) &&
check_variable(dloc, &var_die, type_die, DWARF_REG_PC, offset,
- /*is_fbreg=*/false) == 0) {
+ /*is_fbreg=*/false) == PERF_TMR_OK) {
var_name = dwarf_diename(&var_die);
*var_offset = offset;
goto ok;
@@ -702,7 +783,7 @@ static bool get_global_var_type(Dwarf_Die *cu_die, struct data_loc_info *dloc,
/* Try to get the name of global variable */
if (die_find_variable_at(cu_die, var_name, pc, &var_die) &&
check_variable(dloc, &var_die, type_die, DWARF_REG_PC, *var_offset,
- /*is_fbreg=*/false) == 0)
+ /*is_fbreg=*/false) == PERF_TMR_OK)
goto ok;
return false;
@@ -713,6 +794,11 @@ ok:
return true;
}
+static bool die_is_same(Dwarf_Die *die_a, Dwarf_Die *die_b)
+{
+ return (die_a->cu == die_b->cu) && (die_a->addr == die_b->addr);
+}
+
/**
* update_var_state - Update type state using given variables
* @state: type state table
@@ -744,24 +830,36 @@ static void update_var_state(struct type_state *state, struct data_loc_info *dlo
if (!dwarf_offdie(dloc->di->dbg, var->die_off, &mem_die))
continue;
- if (var->reg == DWARF_REG_FB) {
- findnew_stack_state(state, var->offset, TSR_KIND_TYPE,
- &mem_die);
+ if (var->reg == DWARF_REG_FB || var->reg == fbreg) {
+ int offset = var->offset;
+ struct type_state_stack *stack;
- pr_debug_dtp("var [%"PRIx64"] -%#x(stack)",
- insn_offset, -var->offset);
- pr_debug_type_name(&mem_die, TSR_KIND_TYPE);
- } else if (var->reg == fbreg) {
- findnew_stack_state(state, var->offset - fb_offset,
- TSR_KIND_TYPE, &mem_die);
+ if (var->reg != DWARF_REG_FB)
+ offset -= fb_offset;
+
+ stack = find_stack_state(state, offset);
+ if (stack && stack->kind == TSR_KIND_TYPE &&
+ !is_better_type(&stack->type, &mem_die))
+ continue;
+
+ findnew_stack_state(state, offset, TSR_KIND_TYPE,
+ &mem_die);
pr_debug_dtp("var [%"PRIx64"] -%#x(stack)",
- insn_offset, -var->offset + fb_offset);
+ insn_offset, -offset);
pr_debug_type_name(&mem_die, TSR_KIND_TYPE);
} else if (has_reg_type(state, var->reg) && var->offset == 0) {
struct type_state_reg *reg;
+ Dwarf_Die orig_type;
reg = &state->regs[var->reg];
+
+ if (reg->ok && reg->kind == TSR_KIND_TYPE &&
+ !is_better_type(&reg->type, &mem_die))
+ continue;
+
+ orig_type = reg->type;
+
reg->type = mem_die;
reg->kind = TSR_KIND_TYPE;
reg->ok = true;
@@ -769,383 +867,31 @@ static void update_var_state(struct type_state *state, struct data_loc_info *dlo
pr_debug_dtp("var [%"PRIx64"] reg%d",
insn_offset, var->reg);
pr_debug_type_name(&mem_die, TSR_KIND_TYPE);
- }
- }
-}
-
-static void update_insn_state_x86(struct type_state *state,
- struct data_loc_info *dloc, Dwarf_Die *cu_die,
- struct disasm_line *dl)
-{
- struct annotated_insn_loc loc;
- struct annotated_op_loc *src = &loc.ops[INSN_OP_SOURCE];
- struct annotated_op_loc *dst = &loc.ops[INSN_OP_TARGET];
- struct type_state_reg *tsr;
- Dwarf_Die type_die;
- u32 insn_offset = dl->al.offset;
- int fbreg = dloc->fbreg;
- int fboff = 0;
-
- if (annotate_get_insn_location(dloc->arch, dl, &loc) < 0)
- return;
-
- if (ins__is_call(&dl->ins)) {
- struct symbol *func = dl->ops.target.sym;
-
- if (func == NULL)
- return;
-
- /* __fentry__ will preserve all registers */
- if (!strcmp(func->name, "__fentry__"))
- return;
-
- pr_debug_dtp("call [%x] %s\n", insn_offset, func->name);
-
- /* Otherwise invalidate caller-saved registers after call */
- for (unsigned i = 0; i < ARRAY_SIZE(state->regs); i++) {
- if (state->regs[i].caller_saved)
- state->regs[i].ok = false;
- }
-
- /* Update register with the return type (if any) */
- if (die_find_func_rettype(cu_die, func->name, &type_die)) {
- tsr = &state->regs[state->ret_reg];
- tsr->type = type_die;
- tsr->kind = TSR_KIND_TYPE;
- tsr->ok = true;
-
- pr_debug_dtp("call [%x] return -> reg%d",
- insn_offset, state->ret_reg);
- pr_debug_type_name(&type_die, tsr->kind);
- }
- return;
- }
-
- if (!strncmp(dl->ins.name, "add", 3)) {
- u64 imm_value = -1ULL;
- int offset;
- const char *var_name = NULL;
- struct map_symbol *ms = dloc->ms;
- u64 ip = ms->sym->start + dl->al.offset;
-
- if (!has_reg_type(state, dst->reg1))
- return;
-
- tsr = &state->regs[dst->reg1];
-
- if (src->imm)
- imm_value = src->offset;
- else if (has_reg_type(state, src->reg1) &&
- state->regs[src->reg1].kind == TSR_KIND_CONST)
- imm_value = state->regs[src->reg1].imm_value;
- else if (src->reg1 == DWARF_REG_PC) {
- u64 var_addr = annotate_calc_pcrel(dloc->ms, ip,
- src->offset, dl);
-
- if (get_global_var_info(dloc, var_addr,
- &var_name, &offset) &&
- !strcmp(var_name, "this_cpu_off") &&
- tsr->kind == TSR_KIND_CONST) {
- tsr->kind = TSR_KIND_PERCPU_BASE;
- imm_value = tsr->imm_value;
- }
- }
- else
- return;
-
- if (tsr->kind != TSR_KIND_PERCPU_BASE)
- return;
- if (get_global_var_type(cu_die, dloc, ip, imm_value, &offset,
- &type_die) && offset == 0) {
/*
- * This is not a pointer type, but it should be treated
- * as a pointer.
+ * If this register is directly copied from another and it gets a
+ * better type, also update the type of the source register. This
+ * is usually the case of container_of() macro with offset of 0.
*/
- tsr->type = type_die;
- tsr->kind = TSR_KIND_POINTER;
- tsr->ok = true;
-
- pr_debug_dtp("add [%x] percpu %#"PRIx64" -> reg%d",
- insn_offset, imm_value, dst->reg1);
- pr_debug_type_name(&tsr->type, tsr->kind);
- }
- return;
- }
-
- if (strncmp(dl->ins.name, "mov", 3))
- return;
+ if (has_reg_type(state, reg->copied_from)) {
+ struct type_state_reg *copy_reg;
- if (dloc->fb_cfa) {
- u64 ip = dloc->ms->sym->start + dl->al.offset;
- u64 pc = map__rip_2objdump(dloc->ms->map, ip);
+ copy_reg = &state->regs[reg->copied_from];
- if (die_get_cfa(dloc->di->dbg, pc, &fbreg, &fboff) < 0)
- fbreg = -1;
- }
-
- /* Case 1. register to register or segment:offset to register transfers */
- if (!src->mem_ref && !dst->mem_ref) {
- if (!has_reg_type(state, dst->reg1))
- return;
-
- tsr = &state->regs[dst->reg1];
- if (dso__kernel(map__dso(dloc->ms->map)) &&
- src->segment == INSN_SEG_X86_GS && src->imm) {
- u64 ip = dloc->ms->sym->start + dl->al.offset;
- u64 var_addr;
- int offset;
-
- /*
- * In kernel, %gs points to a per-cpu region for the
- * current CPU. Access with a constant offset should
- * be treated as a global variable access.
- */
- var_addr = src->offset;
-
- if (var_addr == 40) {
- tsr->kind = TSR_KIND_CANARY;
- tsr->ok = true;
-
- pr_debug_dtp("mov [%x] stack canary -> reg%d\n",
- insn_offset, dst->reg1);
- return;
- }
-
- if (!get_global_var_type(cu_die, dloc, ip, var_addr,
- &offset, &type_die) ||
- !die_get_member_type(&type_die, offset, &type_die)) {
- tsr->ok = false;
- return;
- }
+ /* TODO: check if type is compatible or embedded */
+ if (!copy_reg->ok || (copy_reg->kind != TSR_KIND_TYPE) ||
+ !die_is_same(&copy_reg->type, &orig_type) ||
+ !is_better_type(&copy_reg->type, &mem_die))
+ continue;
- tsr->type = type_die;
- tsr->kind = TSR_KIND_TYPE;
- tsr->ok = true;
+ copy_reg->type = mem_die;
- pr_debug_dtp("mov [%x] this-cpu addr=%#"PRIx64" -> reg%d",
- insn_offset, var_addr, dst->reg1);
- pr_debug_type_name(&tsr->type, tsr->kind);
- return;
- }
-
- if (src->imm) {
- tsr->kind = TSR_KIND_CONST;
- tsr->imm_value = src->offset;
- tsr->ok = true;
-
- pr_debug_dtp("mov [%x] imm=%#x -> reg%d\n",
- insn_offset, tsr->imm_value, dst->reg1);
- return;
- }
-
- if (!has_reg_type(state, src->reg1) ||
- !state->regs[src->reg1].ok) {
- tsr->ok = false;
- return;
- }
-
- tsr->type = state->regs[src->reg1].type;
- tsr->kind = state->regs[src->reg1].kind;
- tsr->ok = true;
-
- pr_debug_dtp("mov [%x] reg%d -> reg%d",
- insn_offset, src->reg1, dst->reg1);
- pr_debug_type_name(&tsr->type, tsr->kind);
- }
- /* Case 2. memory to register transers */
- if (src->mem_ref && !dst->mem_ref) {
- int sreg = src->reg1;
-
- if (!has_reg_type(state, dst->reg1))
- return;
-
- tsr = &state->regs[dst->reg1];
-
-retry:
- /* Check stack variables with offset */
- if (sreg == fbreg) {
- struct type_state_stack *stack;
- int offset = src->offset - fboff;
-
- stack = find_stack_state(state, offset);
- if (stack == NULL) {
- tsr->ok = false;
- return;
- } else if (!stack->compound) {
- tsr->type = stack->type;
- tsr->kind = stack->kind;
- tsr->ok = true;
- } else if (die_get_member_type(&stack->type,
- offset - stack->offset,
- &type_die)) {
- tsr->type = type_die;
- tsr->kind = TSR_KIND_TYPE;
- tsr->ok = true;
- } else {
- tsr->ok = false;
- return;
- }
-
- pr_debug_dtp("mov [%x] -%#x(stack) -> reg%d",
- insn_offset, -offset, dst->reg1);
- pr_debug_type_name(&tsr->type, tsr->kind);
- }
- /* And then dereference the pointer if it has one */
- else if (has_reg_type(state, sreg) && state->regs[sreg].ok &&
- state->regs[sreg].kind == TSR_KIND_TYPE &&
- die_deref_ptr_type(&state->regs[sreg].type,
- src->offset, &type_die)) {
- tsr->type = type_die;
- tsr->kind = TSR_KIND_TYPE;
- tsr->ok = true;
-
- pr_debug_dtp("mov [%x] %#x(reg%d) -> reg%d",
- insn_offset, src->offset, sreg, dst->reg1);
- pr_debug_type_name(&tsr->type, tsr->kind);
- }
- /* Or check if it's a global variable */
- else if (sreg == DWARF_REG_PC) {
- struct map_symbol *ms = dloc->ms;
- u64 ip = ms->sym->start + dl->al.offset;
- u64 addr;
- int offset;
-
- addr = annotate_calc_pcrel(ms, ip, src->offset, dl);
-
- if (!get_global_var_type(cu_die, dloc, ip, addr, &offset,
- &type_die) ||
- !die_get_member_type(&type_die, offset, &type_die)) {
- tsr->ok = false;
- return;
- }
-
- tsr->type = type_die;
- tsr->kind = TSR_KIND_TYPE;
- tsr->ok = true;
-
- pr_debug_dtp("mov [%x] global addr=%"PRIx64" -> reg%d",
- insn_offset, addr, dst->reg1);
- pr_debug_type_name(&type_die, tsr->kind);
- }
- /* And check percpu access with base register */
- else if (has_reg_type(state, sreg) &&
- state->regs[sreg].kind == TSR_KIND_PERCPU_BASE) {
- u64 ip = dloc->ms->sym->start + dl->al.offset;
- u64 var_addr = src->offset;
- int offset;
-
- if (src->multi_regs) {
- int reg2 = (sreg == src->reg1) ? src->reg2 : src->reg1;
-
- if (has_reg_type(state, reg2) && state->regs[reg2].ok &&
- state->regs[reg2].kind == TSR_KIND_CONST)
- var_addr += state->regs[reg2].imm_value;
- }
-
- /*
- * In kernel, %gs points to a per-cpu region for the
- * current CPU. Access with a constant offset should
- * be treated as a global variable access.
- */
- if (get_global_var_type(cu_die, dloc, ip, var_addr,
- &offset, &type_die) &&
- die_get_member_type(&type_die, offset, &type_die)) {
- tsr->type = type_die;
- tsr->kind = TSR_KIND_TYPE;
- tsr->ok = true;
-
- if (src->multi_regs) {
- pr_debug_dtp("mov [%x] percpu %#x(reg%d,reg%d) -> reg%d",
- insn_offset, src->offset, src->reg1,
- src->reg2, dst->reg1);
- } else {
- pr_debug_dtp("mov [%x] percpu %#x(reg%d) -> reg%d",
- insn_offset, src->offset, sreg, dst->reg1);
- }
- pr_debug_type_name(&tsr->type, tsr->kind);
- } else {
- tsr->ok = false;
- }
- }
- /* And then dereference the calculated pointer if it has one */
- else if (has_reg_type(state, sreg) && state->regs[sreg].ok &&
- state->regs[sreg].kind == TSR_KIND_POINTER &&
- die_get_member_type(&state->regs[sreg].type,
- src->offset, &type_die)) {
- tsr->type = type_die;
- tsr->kind = TSR_KIND_TYPE;
- tsr->ok = true;
-
- pr_debug_dtp("mov [%x] pointer %#x(reg%d) -> reg%d",
- insn_offset, src->offset, sreg, dst->reg1);
- pr_debug_type_name(&tsr->type, tsr->kind);
- }
- /* Or try another register if any */
- else if (src->multi_regs && sreg == src->reg1 &&
- src->reg1 != src->reg2) {
- sreg = src->reg2;
- goto retry;
- }
- else {
- int offset;
- const char *var_name = NULL;
-
- /* it might be per-cpu variable (in kernel) access */
- if (src->offset < 0) {
- if (get_global_var_info(dloc, (s64)src->offset,
- &var_name, &offset) &&
- !strcmp(var_name, "__per_cpu_offset")) {
- tsr->kind = TSR_KIND_PERCPU_BASE;
-
- pr_debug_dtp("mov [%x] percpu base reg%d\n",
- insn_offset, dst->reg1);
- }
- }
-
- tsr->ok = false;
- }
- }
- /* Case 3. register to memory transfers */
- if (!src->mem_ref && dst->mem_ref) {
- if (!has_reg_type(state, src->reg1) ||
- !state->regs[src->reg1].ok)
- return;
-
- /* Check stack variables with offset */
- if (dst->reg1 == fbreg) {
- struct type_state_stack *stack;
- int offset = dst->offset - fboff;
-
- tsr = &state->regs[src->reg1];
-
- stack = find_stack_state(state, offset);
- if (stack) {
- /*
- * The source register is likely to hold a type
- * of member if it's a compound type. Do not
- * update the stack variable type since we can
- * get the member type later by using the
- * die_get_member_type().
- */
- if (!stack->compound)
- set_stack_state(stack, offset, tsr->kind,
- &tsr->type);
- } else {
- findnew_stack_state(state, offset, tsr->kind,
- &tsr->type);
+ pr_debug_dtp("var [%"PRIx64"] copyback reg%d",
+ insn_offset, reg->copied_from);
+ pr_debug_type_name(&mem_die, TSR_KIND_TYPE);
}
-
- pr_debug_dtp("mov [%x] reg%d -> -%#x(stack)",
- insn_offset, src->reg1, -offset);
- pr_debug_type_name(&tsr->type, tsr->kind);
}
- /*
- * Ignore other transfers since it'd set a value in a struct
- * and won't change the type.
- */
}
- /* Case 4. memory to memory transfers (not handled for now) */
}
/**
@@ -1166,8 +912,8 @@ retry:
static void update_insn_state(struct type_state *state, struct data_loc_info *dloc,
Dwarf_Die *cu_die, struct disasm_line *dl)
{
- if (arch__is(dloc->arch, "x86"))
- update_insn_state_x86(state, dloc, cu_die, dl);
+ if (dloc->arch->update_insn_state)
+ dloc->arch->update_insn_state(state, dloc, cu_die, dl);
}
/*
@@ -1254,75 +1000,164 @@ static void setup_stack_canary(struct data_loc_info *dloc)
/*
* It's at the target address, check if it has a matching type.
- * It returns 1 if found, 0 if not or -1 if not found but no need to
- * repeat the search. The last case is for per-cpu variables which
+ * It returns PERF_TMR_BAIL_OUT when it looks up per-cpu variables which
* are similar to global variables and no additional info is needed.
*/
-static int check_matching_type(struct type_state *state,
- struct data_loc_info *dloc,
- Dwarf_Die *cu_die, Dwarf_Die *type_die)
+static enum type_match_result check_matching_type(struct type_state *state,
+ struct data_loc_info *dloc,
+ Dwarf_Die *cu_die,
+ struct disasm_line *dl,
+ Dwarf_Die *type_die)
{
Dwarf_Word size;
- u32 insn_offset = dloc->ip - dloc->ms->sym->start;
+ u32 insn_offset = dl->al.offset;
int reg = dloc->op->reg1;
+ int offset = dloc->op->offset;
+ const char *offset_sign = "";
+ bool retry = true;
+
+ if (offset < 0) {
+ offset = -offset;
+ offset_sign = "-";
+ }
- pr_debug_dtp("chk [%x] reg%d offset=%#x ok=%d kind=%d",
- insn_offset, reg, dloc->op->offset,
+again:
+ pr_debug_dtp("chk [%x] reg%d offset=%s%#x ok=%d kind=%d ",
+ insn_offset, reg, offset_sign, offset,
state->regs[reg].ok, state->regs[reg].kind);
- if (state->regs[reg].ok && state->regs[reg].kind == TSR_KIND_TYPE) {
- int tag = dwarf_tag(&state->regs[reg].type);
+ if (!state->regs[reg].ok)
+ goto check_non_register;
+
+ if (state->regs[reg].kind == TSR_KIND_TYPE) {
+ Dwarf_Die sized_type;
+ struct strbuf sb;
+
+ strbuf_init(&sb, 32);
+ die_get_typename_from_type(&state->regs[reg].type, &sb);
+ pr_debug_dtp("(%s)", sb.buf);
+ strbuf_release(&sb);
/*
* Normal registers should hold a pointer (or array) to
* dereference a memory location.
*/
- if (tag != DW_TAG_pointer_type && tag != DW_TAG_array_type) {
+ if (!is_pointer_type(&state->regs[reg].type)) {
if (dloc->op->offset < 0 && reg != state->stack_reg)
goto check_kernel;
- pr_debug_dtp("\n");
- return -1;
+ return PERF_TMR_NO_POINTER;
}
- pr_debug_dtp("\n");
-
/* Remove the pointer and get the target type */
- if (die_get_real_type(&state->regs[reg].type, type_die) == NULL)
- return -1;
+ if (__die_get_real_type(&state->regs[reg].type, type_die) == NULL)
+ return PERF_TMR_NO_POINTER;
+
+ dloc->type_offset = dloc->op->offset;
+
+ if (dwarf_tag(type_die) == DW_TAG_typedef)
+ die_get_real_type(type_die, &sized_type);
+ else
+ sized_type = *type_die;
+
+ /* Get the size of the actual type */
+ if (dwarf_aggregate_size(&sized_type, &size) < 0 ||
+ (unsigned)dloc->type_offset >= size)
+ return PERF_TMR_BAD_OFFSET;
+
+ return PERF_TMR_OK;
+ }
+
+ if (state->regs[reg].kind == TSR_KIND_POINTER) {
+ pr_debug_dtp("percpu ptr");
+
+ /*
+ * It's actaully pointer but the address was calculated using
+ * some arithmetic. So it points to the actual type already.
+ */
+ *type_die = state->regs[reg].type;
dloc->type_offset = dloc->op->offset;
/* Get the size of the actual type */
if (dwarf_aggregate_size(type_die, &size) < 0 ||
(unsigned)dloc->type_offset >= size)
- return -1;
+ return PERF_TMR_BAIL_OUT;
- return 1;
+ return PERF_TMR_OK;
}
+ if (state->regs[reg].kind == TSR_KIND_CANARY) {
+ pr_debug_dtp("stack canary");
+
+ /*
+ * This is a saved value of the stack canary which will be handled
+ * in the outer logic when it returns failure here. Pretend it's
+ * from the stack canary directly.
+ */
+ setup_stack_canary(dloc);
+
+ return PERF_TMR_BAIL_OUT;
+ }
+
+ if (state->regs[reg].kind == TSR_KIND_PERCPU_BASE) {
+ u64 var_addr = dloc->op->offset;
+ int var_offset;
+
+ pr_debug_dtp("percpu var");
+
+ if (dloc->op->multi_regs) {
+ int reg2 = dloc->op->reg2;
+
+ if (dloc->op->reg2 == reg)
+ reg2 = dloc->op->reg1;
+
+ if (has_reg_type(state, reg2) && state->regs[reg2].ok &&
+ state->regs[reg2].kind == TSR_KIND_CONST)
+ var_addr += state->regs[reg2].imm_value;
+ }
+
+ if (get_global_var_type(cu_die, dloc, dloc->ip, var_addr,
+ &var_offset, type_die)) {
+ dloc->type_offset = var_offset;
+ return PERF_TMR_OK;
+ }
+ /* No need to retry per-cpu (global) variables */
+ return PERF_TMR_BAIL_OUT;
+ }
+
+check_non_register:
if (reg == dloc->fbreg) {
struct type_state_stack *stack;
- pr_debug_dtp(" fbreg\n");
+ pr_debug_dtp("fbreg");
stack = find_stack_state(state, dloc->type_offset);
- if (stack == NULL)
- return 0;
+ if (stack == NULL) {
+ if (retry) {
+ pr_debug_dtp(" : retry\n");
+ retry = false;
+
+ /* update type info it's the first store to the stack */
+ update_insn_state(state, dloc, cu_die, dl);
+ goto again;
+ }
+ return PERF_TMR_NO_TYPE;
+ }
if (stack->kind == TSR_KIND_CANARY) {
setup_stack_canary(dloc);
- return -1;
+ return PERF_TMR_BAIL_OUT;
}
if (stack->kind != TSR_KIND_TYPE)
- return 0;
+ return PERF_TMR_NO_TYPE;
*type_die = stack->type;
/* Update the type offset from the start of slot */
dloc->type_offset -= stack->offset;
- return 1;
+ return PERF_TMR_OK;
}
if (dloc->fb_cfa) {
@@ -1330,109 +1165,59 @@ static int check_matching_type(struct type_state *state,
u64 pc = map__rip_2objdump(dloc->ms->map, dloc->ip);
int fbreg, fboff;
- pr_debug_dtp(" cfa\n");
+ pr_debug_dtp("cfa");
if (die_get_cfa(dloc->di->dbg, pc, &fbreg, &fboff) < 0)
fbreg = -1;
if (reg != fbreg)
- return 0;
+ return PERF_TMR_NO_TYPE;
stack = find_stack_state(state, dloc->type_offset - fboff);
- if (stack == NULL)
- return 0;
+ if (stack == NULL) {
+ if (retry) {
+ pr_debug_dtp(" : retry\n");
+ retry = false;
+
+ /* update type info it's the first store to the stack */
+ update_insn_state(state, dloc, cu_die, dl);
+ goto again;
+ }
+ return PERF_TMR_NO_TYPE;
+ }
if (stack->kind == TSR_KIND_CANARY) {
setup_stack_canary(dloc);
- return -1;
+ return PERF_TMR_BAIL_OUT;
}
if (stack->kind != TSR_KIND_TYPE)
- return 0;
+ return PERF_TMR_NO_TYPE;
*type_die = stack->type;
/* Update the type offset from the start of slot */
dloc->type_offset -= fboff + stack->offset;
- return 1;
- }
-
- if (state->regs[reg].kind == TSR_KIND_PERCPU_BASE) {
- u64 var_addr = dloc->op->offset;
- int var_offset;
-
- pr_debug_dtp(" percpu var\n");
-
- if (dloc->op->multi_regs) {
- int reg2 = dloc->op->reg2;
-
- if (dloc->op->reg2 == reg)
- reg2 = dloc->op->reg1;
-
- if (has_reg_type(state, reg2) && state->regs[reg2].ok &&
- state->regs[reg2].kind == TSR_KIND_CONST)
- var_addr += state->regs[reg2].imm_value;
- }
-
- if (get_global_var_type(cu_die, dloc, dloc->ip, var_addr,
- &var_offset, type_die)) {
- dloc->type_offset = var_offset;
- return 1;
- }
- /* No need to retry per-cpu (global) variables */
- return -1;
- }
-
- if (state->regs[reg].ok && state->regs[reg].kind == TSR_KIND_POINTER) {
- pr_debug_dtp(" percpu ptr\n");
-
- /*
- * It's actaully pointer but the address was calculated using
- * some arithmetic. So it points to the actual type already.
- */
- *type_die = state->regs[reg].type;
-
- dloc->type_offset = dloc->op->offset;
-
- /* Get the size of the actual type */
- if (dwarf_aggregate_size(type_die, &size) < 0 ||
- (unsigned)dloc->type_offset >= size)
- return -1;
-
- return 1;
- }
-
- if (state->regs[reg].ok && state->regs[reg].kind == TSR_KIND_CANARY) {
- pr_debug_dtp(" stack canary\n");
-
- /*
- * This is a saved value of the stack canary which will be handled
- * in the outer logic when it returns failure here. Pretend it's
- * from the stack canary directly.
- */
- setup_stack_canary(dloc);
-
- return -1;
+ return PERF_TMR_OK;
}
check_kernel:
if (dso__kernel(map__dso(dloc->ms->map))) {
u64 addr;
- int offset;
/* Direct this-cpu access like "%gs:0x34740" */
if (dloc->op->segment == INSN_SEG_X86_GS && dloc->op->imm &&
arch__is(dloc->arch, "x86")) {
- pr_debug_dtp(" this-cpu var\n");
+ pr_debug_dtp("this-cpu var");
addr = dloc->op->offset;
if (get_global_var_type(cu_die, dloc, dloc->ip, addr,
&offset, type_die)) {
dloc->type_offset = offset;
- return 1;
+ return PERF_TMR_OK;
}
- return -1;
+ return PERF_TMR_BAIL_OUT;
}
/* Access to global variable like "-0x7dcf0500(,%rdx,8)" */
@@ -1441,31 +1226,30 @@ check_kernel:
if (get_global_var_type(cu_die, dloc, dloc->ip, addr,
&offset, type_die)) {
- pr_debug_dtp(" global var\n");
+ pr_debug_dtp("global var");
dloc->type_offset = offset;
- return 1;
+ return PERF_TMR_OK;
}
- pr_debug_dtp(" negative offset\n");
- return -1;
+ return PERF_TMR_BAIL_OUT;
}
}
- pr_debug_dtp("\n");
- return 0;
+ return PERF_TMR_UNKNOWN;
}
/* Iterate instructions in basic blocks and update type table */
-static int find_data_type_insn(struct data_loc_info *dloc,
- struct list_head *basic_blocks,
- struct die_var_type *var_types,
- Dwarf_Die *cu_die, Dwarf_Die *type_die)
+static enum type_match_result find_data_type_insn(struct data_loc_info *dloc,
+ struct list_head *basic_blocks,
+ struct die_var_type *var_types,
+ Dwarf_Die *cu_die,
+ Dwarf_Die *type_die)
{
struct type_state state;
struct symbol *sym = dloc->ms->sym;
struct annotation *notes = symbol__annotation(sym);
struct annotated_basic_block *bb;
- int ret = 0;
+ enum type_match_result ret = PERF_TMR_UNKNOWN;
init_type_state(&state, dloc->arch);
@@ -1490,7 +1274,8 @@ static int find_data_type_insn(struct data_loc_info *dloc,
if (this_ip == dloc->ip) {
ret = check_matching_type(&state, dloc,
- cu_die, type_die);
+ cu_die, dl, type_die);
+ pr_debug_dtp(" : %s\n", match_result_str(ret));
goto out;
}
@@ -1506,34 +1291,43 @@ out:
return ret;
}
+static int arch_supports_insn_tracking(struct data_loc_info *dloc)
+{
+ if ((arch__is(dloc->arch, "x86")) || (arch__is(dloc->arch, "powerpc")))
+ return 1;
+ return 0;
+}
+
/*
* Construct a list of basic blocks for each scope with variables and try to find
* the data type by updating a type state table through instructions.
*/
-static int find_data_type_block(struct data_loc_info *dloc,
- Dwarf_Die *cu_die, Dwarf_Die *scopes,
- int nr_scopes, Dwarf_Die *type_die)
+static enum type_match_result find_data_type_block(struct data_loc_info *dloc,
+ Dwarf_Die *cu_die,
+ Dwarf_Die *scopes,
+ int nr_scopes,
+ Dwarf_Die *type_die)
{
LIST_HEAD(basic_blocks);
struct die_var_type *var_types = NULL;
u64 src_ip, dst_ip, prev_dst_ip;
- int ret = -1;
+ enum type_match_result ret = PERF_TMR_UNKNOWN;
/* TODO: other architecture support */
- if (!arch__is(dloc->arch, "x86"))
- return -1;
+ if (!arch_supports_insn_tracking(dloc))
+ return PERF_TMR_BAIL_OUT;
prev_dst_ip = dst_ip = dloc->ip;
for (int i = nr_scopes - 1; i >= 0; i--) {
Dwarf_Addr base, start, end;
LIST_HEAD(this_blocks);
- int found;
if (dwarf_ranges(&scopes[i], 0, &base, &start, &end) < 0)
break;
- pr_debug_dtp("scope: [%d/%d] (die:%lx)\n",
- i + 1, nr_scopes, (long)dwarf_dieoffset(&scopes[i]));
+ pr_debug_dtp("scope: [%d/%d] ", i + 1, nr_scopes);
+ pr_debug_scope(&scopes[i]);
+
src_ip = map__objdump_2rip(dloc->ms->map, start);
again:
@@ -1558,10 +1352,17 @@ again:
fixup_var_address(var_types, start);
/* Find from start of this scope to the target instruction */
- found = find_data_type_insn(dloc, &basic_blocks, var_types,
+ ret = find_data_type_insn(dloc, &basic_blocks, var_types,
cu_die, type_die);
- if (found > 0) {
+ if (ret == PERF_TMR_OK) {
char buf[64];
+ int offset = dloc->op->offset;
+ const char *offset_sign = "";
+
+ if (offset < 0) {
+ offset = -offset;
+ offset_sign = "-";
+ }
if (dloc->op->multi_regs)
snprintf(buf, sizeof(buf), "reg%d, reg%d",
@@ -1569,14 +1370,12 @@ again:
else
snprintf(buf, sizeof(buf), "reg%d", dloc->op->reg1);
- pr_debug_dtp("found by insn track: %#x(%s) type-offset=%#x\n",
- dloc->op->offset, buf, dloc->type_offset);
- pr_debug_type_name(type_die, TSR_KIND_TYPE);
- ret = 0;
+ pr_debug_dtp("found by insn track: %s%#x(%s) type-offset=%#x\n",
+ offset_sign, offset, buf, dloc->type_offset);
break;
}
- if (found < 0)
+ if (ret == PERF_TMR_BAIL_OUT)
break;
/* Go up to the next scope and find blocks to the start */
@@ -1595,14 +1394,17 @@ static int find_data_type_die(struct data_loc_info *dloc, Dwarf_Die *type_die)
struct annotated_op_loc *loc = dloc->op;
Dwarf_Die cu_die, var_die;
Dwarf_Die *scopes = NULL;
- int reg, offset;
+ int reg, offset = loc->offset;
int ret = -1;
int i, nr_scopes;
int fbreg = -1;
int fb_offset = 0;
bool is_fbreg = false;
+ bool found = false;
u64 pc;
char buf[64];
+ enum type_match_result result = PERF_TMR_UNKNOWN;
+ const char *offset_sign = "";
if (dloc->op->multi_regs)
snprintf(buf, sizeof(buf), "reg%d, reg%d", dloc->op->reg1, dloc->op->reg2);
@@ -1611,10 +1413,15 @@ static int find_data_type_die(struct data_loc_info *dloc, Dwarf_Die *type_die)
else
snprintf(buf, sizeof(buf), "reg%d", dloc->op->reg1);
+ if (offset < 0) {
+ offset = -offset;
+ offset_sign = "-";
+ }
+
pr_debug_dtp("-----------------------------------------------------------\n");
- pr_debug_dtp("find data type for %#x(%s) at %s+%#"PRIx64"\n",
- dloc->op->offset, buf, dloc->ms->sym->name,
- dloc->ip - dloc->ms->sym->start);
+ pr_debug_dtp("find data type for %s%#x(%s) at %s+%#"PRIx64"\n",
+ offset_sign, offset, buf,
+ dloc->ms->sym->name, dloc->ip - dloc->ms->sym->start);
/*
* IP is a relative instruction address from the start of the map, as
@@ -1644,7 +1451,7 @@ static int find_data_type_die(struct data_loc_info *dloc, Dwarf_Die *type_die)
pr_debug_dtp("found by addr=%#"PRIx64" type_offset=%#x\n",
dloc->var_addr, offset);
pr_debug_type_name(type_die, TSR_KIND_TYPE);
- ret = 0;
+ found = true;
goto out;
}
}
@@ -1685,65 +1492,95 @@ retry:
/* Search from the inner-most scope to the outer */
for (i = nr_scopes - 1; i >= 0; i--) {
+ Dwarf_Die mem_die;
+ int type_offset = offset;
+
if (reg == DWARF_REG_PC) {
if (!die_find_variable_by_addr(&scopes[i], dloc->var_addr,
- &var_die, &offset))
+ &var_die, &type_offset))
continue;
} else {
/* Look up variables/parameters in this scope */
if (!die_find_variable_by_reg(&scopes[i], pc, reg,
- &offset, is_fbreg, &var_die))
+ &type_offset, is_fbreg, &var_die))
continue;
}
+ pr_debug_dtp("found \"%s\" (die: %#lx) in scope=%d/%d (die: %#lx) ",
+ dwarf_diename(&var_die), (long)dwarf_dieoffset(&var_die),
+ i+1, nr_scopes, (long)dwarf_dieoffset(&scopes[i]));
+
/* Found a variable, see if it's correct */
- ret = check_variable(dloc, &var_die, type_die, reg, offset, is_fbreg);
- if (ret == 0) {
- pr_debug_dtp("found \"%s\" in scope=%d/%d (die: %#lx) ",
- dwarf_diename(&var_die), i+1, nr_scopes,
- (long)dwarf_dieoffset(&scopes[i]));
+ result = check_variable(dloc, &var_die, &mem_die, reg, type_offset, is_fbreg);
+ if (result == PERF_TMR_OK) {
if (reg == DWARF_REG_PC) {
pr_debug_dtp("addr=%#"PRIx64" type_offset=%#x\n",
- dloc->var_addr, offset);
+ dloc->var_addr, type_offset);
} else if (reg == DWARF_REG_FB || is_fbreg) {
pr_debug_dtp("stack_offset=%#x type_offset=%#x\n",
- fb_offset, offset);
+ fb_offset, type_offset);
} else {
- pr_debug_dtp("type_offset=%#x\n", offset);
+ pr_debug_dtp("type_offset=%#x\n", type_offset);
+ }
+
+ if (!found || is_better_type(type_die, &mem_die)) {
+ *type_die = mem_die;
+ dloc->type_offset = type_offset;
+ found = true;
}
- pr_debug_location(&var_die, pc, reg);
- pr_debug_type_name(type_die, TSR_KIND_TYPE);
} else {
- pr_debug_dtp("check variable \"%s\" failed (die: %#lx)\n",
- dwarf_diename(&var_die),
- (long)dwarf_dieoffset(&var_die));
- pr_debug_location(&var_die, pc, reg);
- pr_debug_type_name(type_die, TSR_KIND_TYPE);
+ pr_debug_dtp("failed: %s\n", match_result_str(result));
}
- dloc->type_offset = offset;
- goto out;
+
+ pr_debug_location(&var_die, pc, reg);
+ pr_debug_type_name(&mem_die, TSR_KIND_TYPE);
}
- if (loc->multi_regs && reg == loc->reg1 && loc->reg1 != loc->reg2) {
+ if (!found && loc->multi_regs && reg == loc->reg1 && loc->reg1 != loc->reg2) {
reg = loc->reg2;
goto retry;
}
- if (reg != DWARF_REG_PC) {
- ret = find_data_type_block(dloc, &cu_die, scopes,
- nr_scopes, type_die);
- if (ret == 0) {
+ if (!found && reg != DWARF_REG_PC) {
+ result = find_data_type_block(dloc, &cu_die, scopes,
+ nr_scopes, type_die);
+ if (result == PERF_TMR_OK) {
ann_data_stat.insn_track++;
- goto out;
+ found = true;
}
}
- if (ret < 0) {
- pr_debug_dtp("no variable found\n");
- ann_data_stat.no_var++;
+out:
+ pr_debug_dtp("final result: ");
+ if (found) {
+ pr_debug_type_name(type_die, TSR_KIND_TYPE);
+ ret = 0;
+ } else {
+ switch (result) {
+ case PERF_TMR_NO_TYPE:
+ case PERF_TMR_NO_POINTER:
+ pr_debug_dtp("%s\n", match_result_str(result));
+ ann_data_stat.no_typeinfo++;
+ break;
+ case PERF_TMR_NO_SIZE:
+ pr_debug_dtp("%s\n", match_result_str(result));
+ ann_data_stat.invalid_size++;
+ break;
+ case PERF_TMR_BAD_OFFSET:
+ pr_debug_dtp("%s\n", match_result_str(result));
+ ann_data_stat.bad_offset++;
+ break;
+ case PERF_TMR_UNKNOWN:
+ case PERF_TMR_BAIL_OUT:
+ case PERF_TMR_OK: /* should not reach here */
+ default:
+ pr_debug_dtp("no variable found\n");
+ ann_data_stat.no_var++;
+ break;
+ }
+ ret = -1;
}
-out:
free(scopes);
return ret;
}
@@ -1764,16 +1601,9 @@ out:
*/
struct annotated_data_type *find_data_type(struct data_loc_info *dloc)
{
- struct annotated_data_type *result = NULL;
struct dso *dso = map__dso(dloc->ms->map);
Dwarf_Die type_die;
- dloc->di = debuginfo__new(dso__long_name(dso));
- if (dloc->di == NULL) {
- pr_debug_dtp("cannot get the debug info\n");
- return NULL;
- }
-
/*
* The type offset is the same as instruction offset by default.
* But when finding a global variable, the offset won't be valid.
@@ -1783,13 +1613,9 @@ struct annotated_data_type *find_data_type(struct data_loc_info *dloc)
dloc->fbreg = -1;
if (find_data_type_die(dloc, &type_die) < 0)
- goto out;
-
- result = dso__findnew_data_type(dso, &type_die);
+ return NULL;
-out:
- debuginfo__delete(dloc->di);
- return result;
+ return dso__findnew_data_type(dso, &type_die);
}
static int alloc_data_type_histograms(struct annotated_data_type *adt, int nr_entries)
@@ -1911,10 +1737,15 @@ static void print_annotated_data_header(struct hist_entry *he, struct evsel *evs
struct evsel *pos;
int i = 0;
- for_each_group_evsel(pos, evsel)
- printf(" event[%d] = %s\n", i++, pos->name);
+ nr_members = 0;
+ for_each_group_evsel(pos, evsel) {
+ if (symbol_conf.skip_empty &&
+ evsel__hists(pos)->stats.nr_samples == 0)
+ continue;
- nr_members = evsel->core.nr_members;
+ printf(" event[%d] = %s\n", i++, pos->name);
+ nr_members++;
+ }
}
if (symbol_conf.show_total_period) {
@@ -1949,34 +1780,29 @@ static void print_annotated_data_type(struct annotated_data_type *mem_type,
{
struct annotated_member *child;
struct type_hist *h = mem_type->histograms[evsel->core.idx];
- int i, nr_events = 1, samples = 0;
+ int i, nr_events = 0, samples = 0;
u64 period = 0;
int width = symbol_conf.show_total_period ? 11 : 7;
+ struct evsel *pos;
- for (i = 0; i < member->size; i++) {
- samples += h->addr[member->offset + i].nr_samples;
- period += h->addr[member->offset + i].period;
- }
- print_annotated_data_value(h, period, samples);
+ for_each_group_evsel(pos, evsel) {
+ h = mem_type->histograms[pos->core.idx];
- if (evsel__is_group_event(evsel)) {
- struct evsel *pos;
-
- for_each_group_member(pos, evsel) {
- h = mem_type->histograms[pos->core.idx];
+ if (symbol_conf.skip_empty &&
+ evsel__hists(pos)->stats.nr_samples == 0)
+ continue;
- samples = 0;
- period = 0;
- for (i = 0; i < member->size; i++) {
- samples += h->addr[member->offset + i].nr_samples;
- period += h->addr[member->offset + i].period;
- }
- print_annotated_data_value(h, period, samples);
+ samples = 0;
+ period = 0;
+ for (i = 0; i < member->size; i++) {
+ samples += h->addr[member->offset + i].nr_samples;
+ period += h->addr[member->offset + i].period;
}
- nr_events = evsel->core.nr_members;
+ print_annotated_data_value(h, period, samples);
+ nr_events++;
}
- printf(" %10d %10d %*s%s\t%s",
+ printf(" %#10x %#10x %*s%s\t%s",
member->offset, member->size, indent, "", member->type_name,
member->var_name ?: "");
diff --git a/tools/perf/util/annotate-data.h b/tools/perf/util/annotate-data.h
index 0a57d9f5ee78..8ac0fd94a0ba 100644
--- a/tools/perf/util/annotate-data.h
+++ b/tools/perf/util/annotate-data.h
@@ -6,6 +6,12 @@
#include <linux/compiler.h>
#include <linux/rbtree.h>
#include <linux/types.h>
+#include "dwarf-regs.h"
+#include "annotate.h"
+
+#ifdef HAVE_DWARF_SUPPORT
+#include "debuginfo.h"
+#endif
struct annotated_op_loc;
struct debuginfo;
@@ -15,6 +21,23 @@ struct hist_entry;
struct map_symbol;
struct thread;
+#define pr_debug_dtp(fmt, ...) \
+do { \
+ if (debug_type_profile) \
+ pr_info(fmt, ##__VA_ARGS__); \
+ else \
+ pr_debug3(fmt, ##__VA_ARGS__); \
+} while (0)
+
+enum type_state_kind {
+ TSR_KIND_INVALID = 0,
+ TSR_KIND_TYPE,
+ TSR_KIND_PERCPU_BASE,
+ TSR_KIND_CONST,
+ TSR_KIND_POINTER,
+ TSR_KIND_CANARY,
+};
+
/**
* struct annotated_member - Type of member field
* @node: List entry in the parent list
@@ -100,9 +123,9 @@ struct data_loc_info {
u64 var_addr;
u8 cpumode;
struct annotated_op_loc *op;
+ struct debuginfo *di;
/* These are used internally */
- struct debuginfo *di;
int fbreg;
bool fb_cfa;
@@ -143,6 +166,52 @@ struct annotated_data_stat {
extern struct annotated_data_stat ann_data_stat;
#ifdef HAVE_DWARF_SUPPORT
+/*
+ * Type information in a register, valid when @ok is true.
+ * The @caller_saved registers are invalidated after a function call.
+ */
+struct type_state_reg {
+ Dwarf_Die type;
+ u32 imm_value;
+ bool ok;
+ bool caller_saved;
+ u8 kind;
+ u8 copied_from;
+};
+
+/* Type information in a stack location, dynamically allocated */
+struct type_state_stack {
+ struct list_head list;
+ Dwarf_Die type;
+ int offset;
+ int size;
+ bool compound;
+ u8 kind;
+};
+
+/* FIXME: This should be arch-dependent */
+#ifdef __powerpc__
+#define TYPE_STATE_MAX_REGS 32
+#else
+#define TYPE_STATE_MAX_REGS 16
+#endif
+
+/*
+ * State table to maintain type info in each register and stack location.
+ * It'll be updated when new variable is allocated or type info is moved
+ * to a new location (register or stack). As it'd be used with the
+ * shortest path of basic blocks, it only maintains a single table.
+ */
+struct type_state {
+ /* state of general purpose registers */
+ struct type_state_reg regs[TYPE_STATE_MAX_REGS];
+ /* state of stack location */
+ struct list_head stack_vars;
+ /* return value register */
+ int ret_reg;
+ /* stack pointer register */
+ int stack_reg;
+};
/* Returns data type at the location (ip, reg, offset) */
struct annotated_data_type *find_data_type(struct data_loc_info *dloc);
@@ -160,6 +229,21 @@ void global_var_type__tree_delete(struct rb_root *root);
int hist_entry__annotate_data_tty(struct hist_entry *he, struct evsel *evsel);
+bool has_reg_type(struct type_state *state, int reg);
+struct type_state_stack *findnew_stack_state(struct type_state *state,
+ int offset, u8 kind,
+ Dwarf_Die *type_die);
+void set_stack_state(struct type_state_stack *stack, int offset, u8 kind,
+ Dwarf_Die *type_die);
+struct type_state_stack *find_stack_state(struct type_state *state,
+ int offset);
+bool get_global_var_type(Dwarf_Die *cu_die, struct data_loc_info *dloc,
+ u64 ip, u64 var_addr, int *var_offset,
+ Dwarf_Die *type_die);
+bool get_global_var_info(struct data_loc_info *dloc, u64 addr,
+ const char **var_name, int *var_offset);
+void pr_debug_type_name(Dwarf_Die *die, enum type_state_kind kind);
+
#else /* HAVE_DWARF_SUPPORT */
static inline struct annotated_data_type *
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 1451caf25e77..37ce43c4eb8f 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -25,6 +25,7 @@
#include "srcline.h"
#include "units.h"
#include "debug.h"
+#include "debuginfo.h"
#include "annotate.h"
#include "annotate-data.h"
#include "evsel.h"
@@ -40,6 +41,7 @@
#include "namespaces.h"
#include "thread.h"
#include "hashmap.h"
+#include "strbuf.h"
#include <regex.h>
#include <linux/bitops.h>
#include <linux/kernel.h>
@@ -47,6 +49,7 @@
#include <linux/zalloc.h>
#include <subcmd/parse-options.h>
#include <subcmd/run-command.h>
+#include <math.h>
/* FIXME: For the HE_COLORSET */
#include "ui/browser.h"
@@ -265,22 +268,30 @@ struct annotated_branch *annotation__get_branch(struct annotation *notes)
return notes->branch;
}
-static struct cyc_hist *symbol__cycles_hist(struct symbol *sym)
+static struct annotated_branch *symbol__find_branch_hist(struct symbol *sym,
+ unsigned int br_cntr_nr)
{
struct annotation *notes = symbol__annotation(sym);
struct annotated_branch *branch;
+ const size_t size = symbol__size(sym);
branch = annotation__get_branch(notes);
if (branch == NULL)
return NULL;
if (branch->cycles_hist == NULL) {
- const size_t size = symbol__size(sym);
-
branch->cycles_hist = calloc(size, sizeof(struct cyc_hist));
+ if (!branch->cycles_hist)
+ return NULL;
+ }
+
+ if (br_cntr_nr && branch->br_cntr == NULL) {
+ branch->br_cntr = calloc(br_cntr_nr * size, sizeof(u64));
+ if (!branch->br_cntr)
+ return NULL;
}
- return branch->cycles_hist;
+ return branch;
}
struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists)
@@ -315,16 +326,45 @@ static int symbol__inc_addr_samples(struct map_symbol *ms,
return src ? __symbol__inc_addr_samples(ms, src, evsel->core.idx, addr, sample) : 0;
}
-static int symbol__account_cycles(u64 addr, u64 start,
- struct symbol *sym, unsigned cycles)
+static int symbol__account_br_cntr(struct annotated_branch *branch,
+ struct evsel *evsel,
+ unsigned offset,
+ u64 br_cntr)
+{
+ unsigned int br_cntr_nr = evsel__leader(evsel)->br_cntr_nr;
+ unsigned int base = evsel__leader(evsel)->br_cntr_idx;
+ unsigned int off = offset * evsel->evlist->nr_br_cntr;
+ u64 *branch_br_cntr = branch->br_cntr;
+ unsigned int i, mask, width;
+
+ if (!br_cntr || !branch_br_cntr)
+ return 0;
+
+ perf_env__find_br_cntr_info(evsel__env(evsel), NULL, &width);
+ mask = (1L << width) - 1;
+ for (i = 0; i < br_cntr_nr; i++) {
+ u64 cntr = (br_cntr >> i * width) & mask;
+
+ branch_br_cntr[off + i + base] += cntr;
+ if (cntr == mask)
+ branch_br_cntr[off + i + base] |= ANNOTATION__BR_CNTR_SATURATED_FLAG;
+ }
+
+ return 0;
+}
+
+static int symbol__account_cycles(u64 addr, u64 start, struct symbol *sym,
+ unsigned cycles, struct evsel *evsel,
+ u64 br_cntr)
{
- struct cyc_hist *cycles_hist;
+ struct annotated_branch *branch;
unsigned offset;
+ int ret;
if (sym == NULL)
return 0;
- cycles_hist = symbol__cycles_hist(sym);
- if (cycles_hist == NULL)
+ branch = symbol__find_branch_hist(sym, evsel->evlist->nr_br_cntr);
+ if (!branch)
return -ENOMEM;
if (addr < sym->start || addr >= sym->end)
return -ERANGE;
@@ -336,15 +376,22 @@ static int symbol__account_cycles(u64 addr, u64 start,
start = 0;
}
offset = addr - sym->start;
- return __symbol__account_cycles(cycles_hist,
+ ret = __symbol__account_cycles(branch->cycles_hist,
start ? start - sym->start : 0,
offset, cycles,
!!start);
+
+ if (ret)
+ return ret;
+
+ return symbol__account_br_cntr(branch, evsel, offset, br_cntr);
}
int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
struct addr_map_symbol *start,
- unsigned cycles)
+ unsigned cycles,
+ struct evsel *evsel,
+ u64 br_cntr)
{
u64 saddr = 0;
int err;
@@ -370,7 +417,7 @@ int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
start ? start->addr : 0,
ams->ms.sym ? ams->ms.sym->start + map__start(ams->ms.map) : 0,
saddr);
- err = symbol__account_cycles(ams->al_addr, saddr, ams->ms.sym, cycles);
+ err = symbol__account_cycles(ams->al_addr, saddr, ams->ms.sym, cycles, evsel, br_cntr);
if (err)
pr_debug2("account_cycles failed %d\n", err);
return err;
@@ -411,6 +458,7 @@ static void annotated_branch__delete(struct annotated_branch *branch)
{
if (branch) {
zfree(&branch->cycles_hist);
+ free(branch->br_cntr);
free(branch);
}
}
@@ -454,8 +502,10 @@ static void annotation__count_and_fill(struct annotation *notes, u64 start, u64
}
}
-static int annotation__compute_ipc(struct annotation *notes, size_t size)
+static int annotation__compute_ipc(struct annotation *notes, size_t size,
+ struct evsel *evsel)
{
+ unsigned int br_cntr_nr = evsel->evlist->nr_br_cntr;
int err = 0;
s64 offset;
@@ -490,6 +540,20 @@ static int annotation__compute_ipc(struct annotation *notes, size_t size)
al->cycles->max = ch->cycles_max;
al->cycles->min = ch->cycles_min;
}
+ if (al && notes->branch->br_cntr) {
+ if (!al->br_cntr) {
+ al->br_cntr = calloc(br_cntr_nr, sizeof(u64));
+ if (!al->br_cntr) {
+ err = ENOMEM;
+ break;
+ }
+ }
+ al->num_aggr = ch->num_aggr;
+ al->br_cntr_nr = br_cntr_nr;
+ al->evsel = evsel;
+ memcpy(al->br_cntr, &notes->branch->br_cntr[offset * br_cntr_nr],
+ br_cntr_nr * sizeof(u64));
+ }
}
}
@@ -501,8 +565,10 @@ static int annotation__compute_ipc(struct annotation *notes, size_t size)
struct annotation_line *al;
al = annotated_source__get_line(notes->src, offset);
- if (al)
+ if (al) {
zfree(&al->cycles);
+ zfree(&al->br_cntr);
+ }
}
}
}
@@ -699,13 +765,13 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start
int percent_type)
{
struct disasm_line *dl = container_of(al, struct disasm_line, al);
+ struct annotation *notes = symbol__annotation(sym);
static const char *prev_line;
if (al->offset != -1) {
double max_percent = 0.0;
int i, nr_percent = 1;
const char *color;
- struct annotation *notes = symbol__annotation(sym);
for (i = 0; i < al->data_nr; i++) {
double percent;
@@ -775,14 +841,11 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start
} else if (max_lines && printed >= max_lines)
return 1;
else {
- int width = symbol_conf.show_total_period ? 12 : 8;
+ int width = annotation__pcnt_width(notes);
if (queue)
return -1;
- if (evsel__is_group_event(evsel))
- width *= evsel->core.nr_members;
-
if (!*al->line)
printf(" %*s:\n", width, " ");
else
@@ -851,6 +914,10 @@ static void annotation__calc_percent(struct annotation *notes,
BUG_ON(i >= al->data_nr);
+ if (symbol_conf.skip_empty &&
+ evsel__hists(evsel)->stats.nr_samples == 0)
+ continue;
+
data = &al->data[i++];
calc_percent(notes, evsel, data, al->offset, end);
@@ -904,7 +971,7 @@ int symbol__annotate(struct map_symbol *ms, struct evsel *evsel,
.options = &annotate_opts,
};
struct arch *arch = NULL;
- int err;
+ int err, nr;
err = evsel__get_arch(evsel, &arch);
if (err < 0)
@@ -925,6 +992,19 @@ int symbol__annotate(struct map_symbol *ms, struct evsel *evsel,
return -1;
}
+ nr = 0;
+ if (evsel__is_group_event(evsel)) {
+ struct evsel *pos;
+
+ for_each_group_evsel(pos, evsel) {
+ if (symbol_conf.skip_empty &&
+ evsel__hists(pos)->stats.nr_samples == 0)
+ continue;
+ nr++;
+ }
+ }
+ notes->src->nr_events = nr ? nr : 1;
+
if (annotate_opts.full_addr)
notes->src->start = map__objdump_2mem(ms->map, ms->sym->start);
else
@@ -1106,7 +1186,7 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel)
int more = 0;
bool context = opts->context;
u64 len;
- int width = symbol_conf.show_total_period ? 12 : 8;
+ int width = annotation__pcnt_width(notes);
int graph_dotted_len;
char buf[512];
@@ -1122,7 +1202,6 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel)
len = symbol__size(sym);
if (evsel__is_group_event(evsel)) {
- width *= evsel->core.nr_members;
evsel__group_desc(evsel, buf, sizeof(buf));
evsel_name = buf;
}
@@ -1594,13 +1673,12 @@ bool ui__has_annotation(void)
static double annotation_line__max_percent(struct annotation_line *al,
- struct annotation *notes,
unsigned int percent_type)
{
double percent_max = 0.0;
int i;
- for (i = 0; i < notes->src->nr_events; i++) {
+ for (i = 0; i < al->data_nr; i++) {
double percent;
percent = annotation_data__percent(&al->data[i],
@@ -1662,6 +1740,149 @@ static void ipc_coverage_string(char *bf, int size, struct annotation *notes)
ipc, coverage);
}
+int annotation_br_cntr_abbr_list(char **str, struct evsel *evsel, bool header)
+{
+ struct evsel *pos;
+ struct strbuf sb;
+
+ if (evsel->evlist->nr_br_cntr <= 0)
+ return -ENOTSUP;
+
+ strbuf_init(&sb, /*hint=*/ 0);
+
+ if (header && strbuf_addf(&sb, "# Branch counter abbr list:\n"))
+ goto err;
+
+ evlist__for_each_entry(evsel->evlist, pos) {
+ if (!(pos->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS))
+ continue;
+ if (header && strbuf_addf(&sb, "#"))
+ goto err;
+
+ if (strbuf_addf(&sb, " %s = %s\n", pos->name, pos->abbr_name))
+ goto err;
+ }
+
+ if (header && strbuf_addf(&sb, "#"))
+ goto err;
+ if (strbuf_addf(&sb, " '-' No event occurs\n"))
+ goto err;
+
+ if (header && strbuf_addf(&sb, "#"))
+ goto err;
+ if (strbuf_addf(&sb, " '+' Event occurrences may be lost due to branch counter saturated\n"))
+ goto err;
+
+ *str = strbuf_detach(&sb, NULL);
+
+ return 0;
+err:
+ strbuf_release(&sb);
+ return -ENOMEM;
+}
+
+/* Assume the branch counter saturated at 3 */
+#define ANNOTATION_BR_CNTR_SATURATION 3
+
+int annotation_br_cntr_entry(char **str, int br_cntr_nr,
+ u64 *br_cntr, int num_aggr,
+ struct evsel *evsel)
+{
+ struct evsel *pos = evsel ? evlist__first(evsel->evlist) : NULL;
+ bool saturated = false;
+ int i, j, avg, used;
+ struct strbuf sb;
+
+ strbuf_init(&sb, /*hint=*/ 0);
+ for (i = 0; i < br_cntr_nr; i++) {
+ used = 0;
+ avg = ceil((double)(br_cntr[i] & ~ANNOTATION__BR_CNTR_SATURATED_FLAG) /
+ (double)num_aggr);
+
+ /*
+ * A histogram with the abbr name is displayed by default.
+ * With -v, the exact number of branch counter is displayed.
+ */
+ if (verbose) {
+ evlist__for_each_entry_from(evsel->evlist, pos) {
+ if ((pos->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) &&
+ (pos->br_cntr_idx == i))
+ break;
+ }
+ if (strbuf_addstr(&sb, pos->abbr_name))
+ goto err;
+
+ if (!br_cntr[i]) {
+ if (strbuf_addstr(&sb, "=-"))
+ goto err;
+ } else {
+ if (strbuf_addf(&sb, "=%d", avg))
+ goto err;
+ }
+ if (br_cntr[i] & ANNOTATION__BR_CNTR_SATURATED_FLAG) {
+ if (strbuf_addch(&sb, '+'))
+ goto err;
+ } else {
+ if (strbuf_addch(&sb, ' '))
+ goto err;
+ }
+
+ if ((i < br_cntr_nr - 1) && strbuf_addch(&sb, ','))
+ goto err;
+ continue;
+ }
+
+ if (strbuf_addch(&sb, '|'))
+ goto err;
+
+ if (!br_cntr[i]) {
+ if (strbuf_addch(&sb, '-'))
+ goto err;
+ used++;
+ } else {
+ evlist__for_each_entry_from(evsel->evlist, pos) {
+ if ((pos->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) &&
+ (pos->br_cntr_idx == i))
+ break;
+ }
+ if (br_cntr[i] & ANNOTATION__BR_CNTR_SATURATED_FLAG)
+ saturated = true;
+
+ for (j = 0; j < avg; j++, used++) {
+ /* Print + if the number of logged events > 3 */
+ if (j >= ANNOTATION_BR_CNTR_SATURATION) {
+ saturated = true;
+ break;
+ }
+ if (strbuf_addstr(&sb, pos->abbr_name))
+ goto err;
+ }
+
+ if (saturated) {
+ if (strbuf_addch(&sb, '+'))
+ goto err;
+ used++;
+ }
+ pos = list_next_entry(pos, core.node);
+ }
+
+ for (j = used; j < ANNOTATION_BR_CNTR_SATURATION + 1; j++) {
+ if (strbuf_addch(&sb, ' '))
+ goto err;
+ }
+ }
+
+ if (!verbose && strbuf_addch(&sb, br_cntr_nr ? '|' : ' '))
+ goto err;
+
+ *str = strbuf_detach(&sb, NULL);
+
+ return 0;
+err:
+ strbuf_release(&sb);
+ return -ENOMEM;
+}
+
static void __annotation_line__write(struct annotation_line *al, struct annotation *notes,
bool first_line, bool current_entry, bool change_color, int width,
void *obj, unsigned int percent_type,
@@ -1672,7 +1893,7 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
void (*obj__write_graph)(void *obj, int graph))
{
- double percent_max = annotation_line__max_percent(al, notes, percent_type);
+ double percent_max = annotation_line__max_percent(al, percent_type);
int pcnt_width = annotation__pcnt_width(notes),
cycles_width = annotation__cycles_width(notes);
bool show_title = false;
@@ -1690,7 +1911,7 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
if (al->offset != -1 && percent_max != 0.0) {
int i;
- for (i = 0; i < notes->src->nr_events; i++) {
+ for (i = 0; i < al->data_nr; i++) {
double percent;
percent = annotation_data__percent(&al->data[i], percent_type);
@@ -1699,10 +1920,10 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
if (symbol_conf.show_total_period) {
obj__printf(obj, "%11" PRIu64 " ", al->data[i].he.period);
} else if (symbol_conf.show_nr_samples) {
- obj__printf(obj, "%6" PRIu64 " ",
+ obj__printf(obj, "%7" PRIu64 " ",
al->data[i].he.nr_samples);
} else {
- obj__printf(obj, "%6.2f ", percent);
+ obj__printf(obj, "%7.2f ", percent);
}
}
} else {
@@ -1758,6 +1979,22 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
"Cycle(min/max)");
}
+ if (annotate_opts.show_br_cntr) {
+ if (show_title) {
+ obj__printf(obj, "%*s ",
+ ANNOTATION__BR_CNTR_WIDTH,
+ "Branch Counter");
+ } else {
+ char *buf;
+
+ if (!annotation_br_cntr_entry(&buf, al->br_cntr_nr, al->br_cntr,
+ al->num_aggr, al->evsel)) {
+ obj__printf(obj, "%*s ", ANNOTATION__BR_CNTR_WIDTH, buf);
+ free(buf);
+ }
+ }
+ }
+
if (show_title && !*al->line) {
ipc_coverage_string(bf, sizeof(bf), notes);
obj__printf(obj, "%*s", ANNOTATION__AVG_IPC_WIDTH, bf);
@@ -1843,10 +2080,7 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel,
struct symbol *sym = ms->sym;
struct annotation *notes = symbol__annotation(sym);
size_t size = symbol__size(sym);
- int nr_pcnt = 1, err;
-
- if (evsel__is_group_event(evsel))
- nr_pcnt = evsel->core.nr_members;
+ int err;
err = symbol__annotate(ms, evsel, parch);
if (err)
@@ -1857,13 +2091,11 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel,
annotation__set_index(notes);
annotation__mark_jump_targets(notes, sym);
- err = annotation__compute_ipc(notes, size);
+ err = annotation__compute_ipc(notes, size, evsel);
if (err)
return err;
annotation__init_column_widths(notes, sym);
- notes->src->nr_events = nr_pcnt;
-
annotation__update_column_widths(notes);
sym->annotate2 = 1;
@@ -2123,20 +2355,33 @@ int annotate_get_insn_location(struct arch *arch, struct disasm_line *dl,
for_each_insn_op_loc(loc, i, op_loc) {
const char *insn_str = ops->source.raw;
bool multi_regs = ops->source.multi_regs;
+ bool mem_ref = ops->source.mem_ref;
if (i == INSN_OP_TARGET) {
insn_str = ops->target.raw;
multi_regs = ops->target.multi_regs;
+ mem_ref = ops->target.mem_ref;
}
/* Invalidate the register by default */
op_loc->reg1 = -1;
op_loc->reg2 = -1;
- if (insn_str == NULL)
- continue;
+ if (insn_str == NULL) {
+ if (!arch__is(arch, "powerpc"))
+ continue;
+ }
- if (strchr(insn_str, arch->objdump.memory_ref_char)) {
+ /*
+ * For powerpc, call get_powerpc_regs function which extracts the
+ * required fields for op_loc, ie reg1, reg2, offset from the
+ * raw instruction.
+ */
+ if (arch__is(arch, "powerpc")) {
+ op_loc->mem_ref = mem_ref;
+ op_loc->multi_regs = multi_regs;
+ get_powerpc_regs(dl->raw.raw_insn, !i, op_loc);
+ } else if (strchr(insn_str, arch->objdump.memory_ref_char)) {
op_loc->mem_ref = true;
op_loc->multi_regs = multi_regs;
extract_reg_offset(arch, insn_str, op_loc);
@@ -2216,7 +2461,7 @@ static struct annotated_item_stat *annotate_data_stat(struct list_head *head,
return NULL;
istat->name = strdup(name);
- if (istat->name == NULL) {
+ if ((istat->name == NULL) || (!strlen(istat->name))) {
free(istat);
return NULL;
}
@@ -2230,6 +2475,7 @@ static bool is_stack_operation(struct arch *arch, struct disasm_line *dl)
if (arch__is(arch, "x86")) {
if (!strncmp(dl->ins.name, "push", 4) ||
!strncmp(dl->ins.name, "pop", 3) ||
+ !strncmp(dl->ins.name, "call", 4) ||
!strncmp(dl->ins.name, "ret", 3))
return true;
}
@@ -2313,6 +2559,20 @@ u64 annotate_calc_pcrel(struct map_symbol *ms, u64 ip, int offset,
return map__rip_2objdump(ms->map, addr);
}
+static struct debuginfo_cache {
+ struct dso *dso;
+ struct debuginfo *dbg;
+} di_cache;
+
+void debuginfo_cache__delete(void)
+{
+ dso__put(di_cache.dso);
+ di_cache.dso = NULL;
+
+ debuginfo__delete(di_cache.dbg);
+ di_cache.dbg = NULL;
+}
+
/**
* hist_entry__get_data_type - find data type for given hist entry
* @he: hist entry
@@ -2347,6 +2607,27 @@ struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he)
return NULL;
}
+ /*
+ * di_cache holds a pair of values, but code below assumes
+ * di_cache.dso can be compared/updated and di_cache.dbg can be
+ * read/updated independently from each other. That assumption only
+ * holds in single threaded code.
+ */
+ assert(perf_singlethreaded);
+
+ if (map__dso(ms->map) != di_cache.dso) {
+ dso__put(di_cache.dso);
+ di_cache.dso = dso__get(map__dso(ms->map));
+
+ debuginfo__delete(di_cache.dbg);
+ di_cache.dbg = debuginfo__new(dso__long_name(di_cache.dso));
+ }
+
+ if (di_cache.dbg == NULL) {
+ ann_data_stat.no_dbginfo++;
+ return NULL;
+ }
+
/* Make sure it has the disasm of the function */
if (symbol__annotate(ms, evsel, &arch) < 0) {
ann_data_stat.no_insn++;
@@ -2391,6 +2672,7 @@ retry:
.ip = ms->sym->start + dl->al.offset,
.cpumode = he->cpumode,
.op = op_loc,
+ .di = di_cache.dbg,
};
if (!op_loc->mem_ref && op_loc->segment == INSN_SEG_NONE)
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index d5c821c22f79..8b9e05a1932f 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -14,6 +14,7 @@
#include "spark.h"
#include "hashmap.h"
#include "disasm.h"
+#include "branch.h"
struct hist_browser_timer;
struct hist_entry;
@@ -30,6 +31,7 @@ struct annotated_data_type;
#define ANNOTATION__CYCLES_WIDTH 6
#define ANNOTATION__MINMAX_CYCLES_WIDTH 19
#define ANNOTATION__AVG_IPC_WIDTH 36
+#define ANNOTATION__BR_CNTR_WIDTH 30
#define ANNOTATION_DUMMY_LEN 256
struct annotation_options {
@@ -43,6 +45,7 @@ struct annotation_options {
show_nr_jumps,
show_minmax_cycle,
show_asm_raw,
+ show_br_cntr,
annotate_src,
full_addr;
u8 offset_level;
@@ -103,6 +106,10 @@ struct annotation_line {
char *fileloc;
char *path;
struct cycles_info *cycles;
+ int num_aggr;
+ int br_cntr_nr;
+ u64 *br_cntr;
+ struct evsel *evsel;
int jump_sources;
u32 idx;
int idx_asm;
@@ -113,7 +120,10 @@ struct annotation_line {
struct disasm_line {
struct ins ins;
struct ins_operands ops;
-
+ union {
+ u8 bytes[4];
+ u32 raw_insn;
+ } raw;
/* This needs to be at the end. */
struct annotation_line al;
};
@@ -285,6 +295,9 @@ struct annotated_source {
struct annotation_line *annotated_source__get_line(struct annotated_source *src,
s64 offset);
+/* A branch counter once saturated */
+#define ANNOTATION__BR_CNTR_SATURATED_FLAG (1ULL << 63)
+
/**
* struct annotated_branch - basic block and IPC information for a symbol.
*
@@ -294,6 +307,7 @@ struct annotation_line *annotated_source__get_line(struct annotated_source *src,
* @cover_insn: Number of distinct, actually executed instructions.
* @cycles_hist: Array of cyc_hist for each instruction.
* @max_coverage: Maximum number of covered basic block (used for block-range).
+ * @br_cntr: Array of the occurrences of events (branch counters) during a block.
*
* This struct is used by two different codes when the sample has branch stack
* and cycles information. annotation__compute_ipc() calculates average IPC
@@ -310,6 +324,7 @@ struct annotated_branch {
unsigned int cover_insn;
struct cyc_hist *cycles_hist;
u64 max_coverage;
+ u64 *br_cntr;
};
struct LOCKABLE annotation {
@@ -336,7 +351,7 @@ static inline int annotation__cycles_width(struct annotation *notes)
static inline int annotation__pcnt_width(struct annotation *notes)
{
- return (symbol_conf.show_total_period ? 12 : 7) * notes->src->nr_events;
+ return (symbol_conf.show_total_period ? 12 : 8) * notes->src->nr_events;
}
static inline bool annotation_line__filter(struct annotation_line *al)
@@ -344,6 +359,11 @@ static inline bool annotation_line__filter(struct annotation_line *al)
return annotate_opts.hide_src_code && al->offset == -1;
}
+static inline u8 annotation__br_cntr_width(void)
+{
+ return annotate_opts.show_br_cntr ? ANNOTATION__BR_CNTR_WIDTH : 0;
+}
+
void annotation__update_column_widths(struct annotation *notes);
void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *ms);
@@ -380,7 +400,9 @@ struct annotated_branch *annotation__get_branch(struct annotation *notes);
int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
struct addr_map_symbol *start,
- unsigned cycles);
+ unsigned cycles,
+ struct evsel *evsel,
+ u64 br_cntr);
int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *sample,
struct evsel *evsel, u64 addr);
@@ -540,4 +562,9 @@ struct annotated_basic_block {
int annotate_get_basic_blocks(struct symbol *sym, s64 src, s64 dst,
struct list_head *head);
+void debuginfo_cache__delete(void);
+
+int annotation_br_cntr_entry(char **str, int br_cntr_nr, u64 *br_cntr,
+ int num_aggr, struct evsel *evsel);
+int annotation_br_cntr_abbr_list(char **str, struct evsel *evsel, bool header);
#endif /* __PERF_ANNOTATE_H */
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index 7bf607d0f6d8..4cef10a83962 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -11,7 +11,7 @@
#include <linux/bitops.h>
#include <stdarg.h>
#include <linux/kernel.h>
-#include <asm-generic/unaligned.h>
+#include <linux/unaligned.h>
#include "arm-spe-pkt-decoder.h"
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index afbd5869f6bf..138ffc71b32d 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -899,7 +899,7 @@ static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event,
static int arm_spe_process_event(struct perf_session *session,
union perf_event *event,
struct perf_sample *sample,
- struct perf_tool *tool)
+ const struct perf_tool *tool)
{
int err = 0;
u64 timestamp;
@@ -947,7 +947,7 @@ static int arm_spe_process_event(struct perf_session *session,
static int arm_spe_process_auxtrace_event(struct perf_session *session,
union perf_event *event,
- struct perf_tool *tool __maybe_unused)
+ const struct perf_tool *tool __maybe_unused)
{
struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
auxtrace);
@@ -985,7 +985,7 @@ static int arm_spe_process_auxtrace_event(struct perf_session *session,
}
static int arm_spe_flush(struct perf_session *session __maybe_unused,
- struct perf_tool *tool __maybe_unused)
+ const struct perf_tool *tool __maybe_unused)
{
struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
auxtrace);
@@ -1073,35 +1073,6 @@ static void arm_spe_print_info(__u64 *arr)
fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]);
}
-struct arm_spe_synth {
- struct perf_tool dummy_tool;
- struct perf_session *session;
-};
-
-static int arm_spe_event_synth(struct perf_tool *tool,
- union perf_event *event,
- struct perf_sample *sample __maybe_unused,
- struct machine *machine __maybe_unused)
-{
- struct arm_spe_synth *arm_spe_synth =
- container_of(tool, struct arm_spe_synth, dummy_tool);
-
- return perf_session__deliver_synth_event(arm_spe_synth->session,
- event, NULL);
-}
-
-static int arm_spe_synth_event(struct perf_session *session,
- struct perf_event_attr *attr, u64 id)
-{
- struct arm_spe_synth arm_spe_synth;
-
- memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth));
- arm_spe_synth.session = session;
-
- return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1,
- &id, arm_spe_event_synth);
-}
-
static void arm_spe_set_event_name(struct evlist *evlist, u64 id,
const char *name)
{
@@ -1172,7 +1143,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
spe->sample_flc = true;
/* Level 1 data cache miss */
- err = arm_spe_synth_event(session, &attr, id);
+ err = perf_session__deliver_synth_attr_event(session, &attr, id);
if (err)
return err;
spe->l1d_miss_id = id;
@@ -1180,7 +1151,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
id += 1;
/* Level 1 data cache access */
- err = arm_spe_synth_event(session, &attr, id);
+ err = perf_session__deliver_synth_attr_event(session, &attr, id);
if (err)
return err;
spe->l1d_access_id = id;
@@ -1192,7 +1163,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
spe->sample_llc = true;
/* Last level cache miss */
- err = arm_spe_synth_event(session, &attr, id);
+ err = perf_session__deliver_synth_attr_event(session, &attr, id);
if (err)
return err;
spe->llc_miss_id = id;
@@ -1200,7 +1171,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
id += 1;
/* Last level cache access */
- err = arm_spe_synth_event(session, &attr, id);
+ err = perf_session__deliver_synth_attr_event(session, &attr, id);
if (err)
return err;
spe->llc_access_id = id;
@@ -1212,7 +1183,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
spe->sample_tlb = true;
/* TLB miss */
- err = arm_spe_synth_event(session, &attr, id);
+ err = perf_session__deliver_synth_attr_event(session, &attr, id);
if (err)
return err;
spe->tlb_miss_id = id;
@@ -1220,7 +1191,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
id += 1;
/* TLB access */
- err = arm_spe_synth_event(session, &attr, id);
+ err = perf_session__deliver_synth_attr_event(session, &attr, id);
if (err)
return err;
spe->tlb_access_id = id;
@@ -1232,7 +1203,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
spe->sample_branch = true;
/* Branch miss */
- err = arm_spe_synth_event(session, &attr, id);
+ err = perf_session__deliver_synth_attr_event(session, &attr, id);
if (err)
return err;
spe->branch_miss_id = id;
@@ -1244,7 +1215,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
spe->sample_remote_access = true;
/* Remote access */
- err = arm_spe_synth_event(session, &attr, id);
+ err = perf_session__deliver_synth_attr_event(session, &attr, id);
if (err)
return err;
spe->remote_access_id = id;
@@ -1255,7 +1226,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
if (spe->synth_opts.mem) {
spe->sample_memory = true;
- err = arm_spe_synth_event(session, &attr, id);
+ err = perf_session__deliver_synth_attr_event(session, &attr, id);
if (err)
return err;
spe->memory_id = id;
@@ -1276,7 +1247,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
attr.config = PERF_COUNT_HW_INSTRUCTIONS;
attr.sample_period = spe->synth_opts.period;
spe->instructions_sample_period = attr.sample_period;
- err = arm_spe_synth_event(session, &attr, id);
+ err = perf_session__deliver_synth_attr_event(session, &attr, id);
if (err)
return err;
spe->instructions_id = id;
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index e2f317063eec..ca8682966fae 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -671,11 +671,11 @@ int auxtrace_record__read_finish(struct auxtrace_record *itr, int idx)
{
struct evsel *evsel;
- if (!itr->evlist || !itr->pmu)
+ if (!itr->evlist)
return -EINVAL;
evlist__for_each_entry(itr->evlist, evsel) {
- if (evsel->core.attr.type == itr->pmu->type) {
+ if (evsel__is_aux_event(evsel)) {
if (evsel->disabled)
return 0;
return evlist__enable_event_idx(itr->evlist, evsel, idx);
@@ -1240,7 +1240,7 @@ void auxtrace_synth_error(struct perf_record_auxtrace_error *auxtrace_error, int
}
int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr,
- struct perf_tool *tool,
+ const struct perf_tool *tool,
struct perf_session *session,
perf_event__handler_t process)
{
@@ -1831,7 +1831,7 @@ int __weak compat_auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail)
static int __auxtrace_mmap__read(struct mmap *map,
struct auxtrace_record *itr,
- struct perf_tool *tool, process_auxtrace_t fn,
+ const struct perf_tool *tool, process_auxtrace_t fn,
bool snapshot, size_t snapshot_size)
{
struct auxtrace_mmap *mm = &map->auxtrace_mmap;
@@ -1942,14 +1942,14 @@ static int __auxtrace_mmap__read(struct mmap *map,
}
int auxtrace_mmap__read(struct mmap *map, struct auxtrace_record *itr,
- struct perf_tool *tool, process_auxtrace_t fn)
+ const struct perf_tool *tool, process_auxtrace_t fn)
{
return __auxtrace_mmap__read(map, itr, tool, fn, false, 0);
}
int auxtrace_mmap__read_snapshot(struct mmap *map,
struct auxtrace_record *itr,
- struct perf_tool *tool, process_auxtrace_t fn,
+ const struct perf_tool *tool, process_auxtrace_t fn,
size_t snapshot_size)
{
return __auxtrace_mmap__read(map, itr, tool, fn, true, snapshot_size);
@@ -2829,7 +2829,7 @@ int auxtrace_parse_filters(struct evlist *evlist)
}
int auxtrace__process_event(struct perf_session *session, union perf_event *event,
- struct perf_sample *sample, struct perf_tool *tool)
+ struct perf_sample *sample, const struct perf_tool *tool)
{
if (!session->auxtrace)
return 0;
@@ -2847,7 +2847,7 @@ void auxtrace__dump_auxtrace_sample(struct perf_session *session,
session->auxtrace->dump_auxtrace_sample(session, sample);
}
-int auxtrace__flush_events(struct perf_session *session, struct perf_tool *tool)
+int auxtrace__flush_events(struct perf_session *session, const struct perf_tool *tool)
{
if (!session->auxtrace)
return 0;
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 8a6ec9565835..a1895a4f530b 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -208,17 +208,17 @@ struct auxtrace {
int (*process_event)(struct perf_session *session,
union perf_event *event,
struct perf_sample *sample,
- struct perf_tool *tool);
+ const struct perf_tool *tool);
int (*process_auxtrace_event)(struct perf_session *session,
union perf_event *event,
- struct perf_tool *tool);
+ const struct perf_tool *tool);
int (*queue_data)(struct perf_session *session,
struct perf_sample *sample, union perf_event *event,
u64 data_offset);
void (*dump_auxtrace_sample)(struct perf_session *session,
struct perf_sample *sample);
int (*flush_events)(struct perf_session *session,
- struct perf_tool *tool);
+ const struct perf_tool *tool);
void (*free_events)(struct perf_session *session);
void (*free)(struct perf_session *session);
bool (*evsel_is_auxtrace)(struct perf_session *session,
@@ -411,7 +411,6 @@ struct auxtrace_record {
int (*read_finish)(struct auxtrace_record *itr, int idx);
unsigned int alignment;
unsigned int default_aux_sample_size;
- struct perf_pmu *pmu;
struct evlist *evlist;
};
@@ -508,17 +507,17 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
struct evlist *evlist,
struct evsel *evsel, int idx);
-typedef int (*process_auxtrace_t)(struct perf_tool *tool,
+typedef int (*process_auxtrace_t)(const struct perf_tool *tool,
struct mmap *map,
union perf_event *event, void *data1,
size_t len1, void *data2, size_t len2);
int auxtrace_mmap__read(struct mmap *map, struct auxtrace_record *itr,
- struct perf_tool *tool, process_auxtrace_t fn);
+ const struct perf_tool *tool, process_auxtrace_t fn);
int auxtrace_mmap__read_snapshot(struct mmap *map,
struct auxtrace_record *itr,
- struct perf_tool *tool, process_auxtrace_t fn,
+ const struct perf_tool *tool, process_auxtrace_t fn,
size_t snapshot_size);
int auxtrace_queues__init_nr(struct auxtrace_queues *queues, int nr_queues);
@@ -639,10 +638,10 @@ int addr_filters__parse_bare_filter(struct addr_filters *filts,
int auxtrace_parse_filters(struct evlist *evlist);
int auxtrace__process_event(struct perf_session *session, union perf_event *event,
- struct perf_sample *sample, struct perf_tool *tool);
+ struct perf_sample *sample, const struct perf_tool *tool);
void auxtrace__dump_auxtrace_sample(struct perf_session *session,
struct perf_sample *sample);
-int auxtrace__flush_events(struct perf_session *session, struct perf_tool *tool);
+int auxtrace__flush_events(struct perf_session *session, const struct perf_tool *tool);
void auxtrace__free_events(struct perf_session *session);
void auxtrace__free(struct perf_session *session);
bool auxtrace__evsel_is_auxtrace(struct perf_session *session,
@@ -809,7 +808,7 @@ static inline
int auxtrace__process_event(struct perf_session *session __maybe_unused,
union perf_event *event __maybe_unused,
struct perf_sample *sample __maybe_unused,
- struct perf_tool *tool __maybe_unused)
+ const struct perf_tool *tool __maybe_unused)
{
return 0;
}
@@ -822,7 +821,7 @@ void auxtrace__dump_auxtrace_sample(struct perf_session *session __maybe_unused,
static inline
int auxtrace__flush_events(struct perf_session *session __maybe_unused,
- struct perf_tool *tool __maybe_unused)
+ const struct perf_tool *tool __maybe_unused)
{
return 0;
}
diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c
index 04068d48683f..649392bee7ed 100644
--- a/tools/perf/util/block-info.c
+++ b/tools/perf/util/block-info.c
@@ -40,16 +40,32 @@ static struct block_header_column {
[PERF_HPP_REPORT__BLOCK_DSO] = {
.name = "Shared Object",
.width = 20,
+ },
+ [PERF_HPP_REPORT__BLOCK_BRANCH_COUNTER] = {
+ .name = "Branch Counter",
+ .width = 30,
}
};
-struct block_info *block_info__new(void)
+static struct block_info *block_info__new(unsigned int br_cntr_nr)
{
- return zalloc(sizeof(struct block_info));
+ struct block_info *bi = zalloc(sizeof(struct block_info));
+
+ if (bi && br_cntr_nr) {
+ bi->br_cntr = calloc(br_cntr_nr, sizeof(u64));
+ if (!bi->br_cntr) {
+ free(bi);
+ return NULL;
+ }
+ }
+
+ return bi;
}
void block_info__delete(struct block_info *bi)
{
+ if (bi)
+ free(bi->br_cntr);
free(bi);
}
@@ -86,7 +102,8 @@ int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused,
static void init_block_info(struct block_info *bi, struct symbol *sym,
struct cyc_hist *ch, int offset,
- u64 total_cycles)
+ u64 total_cycles, unsigned int br_cntr_nr,
+ u64 *br_cntr, struct evsel *evsel)
{
bi->sym = sym;
bi->start = ch->start;
@@ -99,10 +116,18 @@ static void init_block_info(struct block_info *bi, struct symbol *sym,
memcpy(bi->cycles_spark, ch->cycles_spark,
NUM_SPARKS * sizeof(u64));
+
+ if (br_cntr && br_cntr_nr) {
+ bi->br_cntr_nr = br_cntr_nr;
+ memcpy(bi->br_cntr, &br_cntr[offset * br_cntr_nr],
+ br_cntr_nr * sizeof(u64));
+ }
+ bi->evsel = evsel;
}
int block_info__process_sym(struct hist_entry *he, struct block_hist *bh,
- u64 *block_cycles_aggr, u64 total_cycles)
+ u64 *block_cycles_aggr, u64 total_cycles,
+ unsigned int br_cntr_nr)
{
struct annotation *notes;
struct cyc_hist *ch;
@@ -125,12 +150,14 @@ int block_info__process_sym(struct hist_entry *he, struct block_hist *bh,
struct block_info *bi;
struct hist_entry *he_block;
- bi = block_info__new();
+ bi = block_info__new(br_cntr_nr);
if (!bi)
return -1;
init_block_info(bi, he->ms.sym, &ch[i], i,
- total_cycles);
+ total_cycles, br_cntr_nr,
+ notes->branch->br_cntr,
+ hists_to_evsel(he->hists));
cycles += bi->cycles_aggr / bi->num_aggr;
he_block = hists__add_entry_block(&bh->block_hists,
@@ -327,6 +354,24 @@ static void init_block_header(struct block_fmt *block_fmt)
fmt->width = block_column_width;
}
+static int block_branch_counter_entry(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+ struct block_info *bi = he->block_info;
+ char *buf;
+ int ret;
+
+ if (annotation_br_cntr_entry(&buf, bi->br_cntr_nr, bi->br_cntr,
+ bi->num_aggr, bi->evsel))
+ return 0;
+
+ ret = scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf);
+ free(buf);
+ return ret;
+}
+
static void hpp_register(struct block_fmt *block_fmt, int idx,
struct perf_hpp_list *hpp_list)
{
@@ -357,6 +402,9 @@ static void hpp_register(struct block_fmt *block_fmt, int idx,
case PERF_HPP_REPORT__BLOCK_DSO:
fmt->entry = block_dso_entry;
break;
+ case PERF_HPP_REPORT__BLOCK_BRANCH_COUNTER:
+ fmt->entry = block_branch_counter_entry;
+ break;
default:
return;
}
@@ -390,7 +438,7 @@ static void init_block_hist(struct block_hist *bh, struct block_fmt *block_fmts,
static int process_block_report(struct hists *hists,
struct block_report *block_report,
u64 total_cycles, int *block_hpps,
- int nr_hpps)
+ int nr_hpps, unsigned int br_cntr_nr)
{
struct rb_node *next = rb_first_cached(&hists->entries);
struct block_hist *bh = &block_report->hist;
@@ -405,7 +453,7 @@ static int process_block_report(struct hists *hists,
while (next) {
he = rb_entry(next, struct hist_entry, rb_node);
block_info__process_sym(he, bh, &block_report->cycles,
- total_cycles);
+ total_cycles, br_cntr_nr);
next = rb_next(&he->rb_node);
}
@@ -435,7 +483,7 @@ struct block_report *block_info__create_report(struct evlist *evlist,
struct hists *hists = evsel__hists(pos);
process_block_report(hists, &block_reports[i], total_cycles,
- block_hpps, nr_hpps);
+ block_hpps, nr_hpps, evlist->nr_br_cntr);
i++;
}
diff --git a/tools/perf/util/block-info.h b/tools/perf/util/block-info.h
index 0b9e1aad4c55..b9329dc3ab59 100644
--- a/tools/perf/util/block-info.h
+++ b/tools/perf/util/block-info.h
@@ -18,6 +18,9 @@ struct block_info {
u64 total_cycles;
int num;
int num_aggr;
+ int br_cntr_nr;
+ u64 *br_cntr;
+ struct evsel *evsel;
};
struct block_fmt {
@@ -36,6 +39,7 @@ enum {
PERF_HPP_REPORT__BLOCK_AVG_CYCLES,
PERF_HPP_REPORT__BLOCK_RANGE,
PERF_HPP_REPORT__BLOCK_DSO,
+ PERF_HPP_REPORT__BLOCK_BRANCH_COUNTER,
PERF_HPP_REPORT__BLOCK_MAX_INDEX
};
@@ -46,7 +50,6 @@ struct block_report {
int nr_fmts;
};
-struct block_info *block_info__new(void);
void block_info__delete(struct block_info *bi);
int64_t __block_info__cmp(struct hist_entry *left, struct hist_entry *right);
@@ -55,7 +58,8 @@ int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused,
struct hist_entry *left, struct hist_entry *right);
int block_info__process_sym(struct hist_entry *he, struct block_hist *bh,
- u64 *block_cycles_aggr, u64 total_cycles);
+ u64 *block_cycles_aggr, u64 total_cycles,
+ unsigned int br_cntr_nr);
struct block_report *block_info__create_report(struct evlist *evlist,
u64 total_cycles,
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index 827695cd0408..13608237c50e 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -170,7 +170,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
{
struct perf_record_ksymbol *ksymbol_event = &event->ksymbol;
struct perf_record_bpf_event *bpf_event = &event->bpf;
- struct perf_tool *tool = session->tool;
+ const struct perf_tool *tool = session->tool;
struct bpf_prog_info_node *info_node;
struct perf_bpil *info_linear;
struct bpf_prog_info *info;
@@ -310,7 +310,7 @@ struct kallsyms_parse {
union perf_event *event;
perf_event__handler_t process;
struct machine *machine;
- struct perf_tool *tool;
+ const struct perf_tool *tool;
};
static int
diff --git a/tools/perf/util/bpf-filter.c b/tools/perf/util/bpf-filter.c
index 04f98b6bb291..e87b6789eb9e 100644
--- a/tools/perf/util/bpf-filter.c
+++ b/tools/perf/util/bpf-filter.c
@@ -1,12 +1,60 @@
/* SPDX-License-Identifier: GPL-2.0 */
+/**
+ * Generic event filter for sampling events in BPF.
+ *
+ * The BPF program is fixed and just to read filter expressions in the 'filters'
+ * map and compare the sample data in order to reject samples that don't match.
+ * Each filter expression contains a sample flag (term) to compare, an operation
+ * (==, >=, and so on) and a value.
+ *
+ * Note that each entry has an array of filter expressions and it only succeeds
+ * when all of the expressions are satisfied. But it supports the logical OR
+ * using a GROUP operation which is satisfied when any of its member expression
+ * is evaluated to true. But it doesn't allow nested GROUP operations for now.
+ *
+ * To support non-root users, the filters map can be loaded and pinned in the BPF
+ * filesystem by root (perf record --setup-filter pin). Then each user will get
+ * a new entry in the shared filters map to fill the filter expressions. And the
+ * BPF program will find the filter using (task-id, event-id) as a key.
+ *
+ * The pinned BPF object (shared for regular users) has:
+ *
+ * event_hash |
+ * | | |
+ * event->id ---> | id | ---+ idx_hash | filters
+ * | | | | | | | |
+ * | .... | +-> | idx | --+--> | exprs | ---> perf_bpf_filter_entry[]
+ * | | | | | | .op
+ * task id (tgid) --------------+ | .... | | | ... | .term (+ part)
+ * | .value
+ * |
+ * ======= (root would skip this part) ======== (compares it in a loop)
+ *
+ * This is used for per-task use cases while system-wide profiling (normally from
+ * root user) uses a separate copy of the program and the maps for its own so that
+ * it can proceed even if a lot of non-root users are using the filters at the
+ * same time. In this case the filters map has a single entry and no need to use
+ * the hash maps to get the index (key) of the filters map (IOW it's always 0).
+ *
+ * The BPF program returns 1 to accept the sample or 0 to drop it.
+ * The 'dropped' map is to keep how many samples it dropped by the filter and
+ * it will be reported as lost samples.
+ */
#include <stdlib.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
#include <bpf/bpf.h>
#include <linux/err.h>
+#include <linux/list.h>
+#include <api/fs/fs.h>
#include <internal/xyarray.h>
+#include <perf/threadmap.h>
#include "util/debug.h"
#include "util/evsel.h"
+#include "util/target.h"
#include "util/bpf-filter.h"
#include <util/bpf-filter-flex.h>
@@ -20,6 +68,16 @@
#define __PERF_SAMPLE_TYPE(tt, st, opt) { tt, #st, opt }
#define PERF_SAMPLE_TYPE(_st, opt) __PERF_SAMPLE_TYPE(PBF_TERM_##_st, PERF_SAMPLE_##_st, opt)
+/* Index in the pinned 'filters' map. Should be released after use. */
+struct pinned_filter_idx {
+ struct list_head list;
+ struct evsel *evsel;
+ u64 event_id;
+ int hash_idx;
+};
+
+static LIST_HEAD(pinned_filters);
+
static const struct perf_sample_info {
enum perf_bpf_filter_term type;
const char *name;
@@ -42,8 +100,11 @@ static const struct perf_sample_info {
PERF_SAMPLE_TYPE(TRANSACTION, "--transaction"),
PERF_SAMPLE_TYPE(CODE_PAGE_SIZE, "--code-page-size"),
PERF_SAMPLE_TYPE(DATA_PAGE_SIZE, "--data-page-size"),
+ PERF_SAMPLE_TYPE(CGROUP, "--all-cgroups"),
};
+static int get_pinned_fd(const char *name);
+
static const struct perf_sample_info *get_sample_info(enum perf_bpf_filter_term type)
{
size_t i;
@@ -91,92 +152,452 @@ static int check_sample_flags(struct evsel *evsel, struct perf_bpf_filter_expr *
return -1;
}
-int perf_bpf_filter__prepare(struct evsel *evsel)
+static int get_filter_entries(struct evsel *evsel, struct perf_bpf_filter_entry *entry)
{
- int i, x, y, fd;
- struct sample_filter_bpf *skel;
- struct bpf_program *prog;
- struct bpf_link *link;
+ int i = 0;
struct perf_bpf_filter_expr *expr;
- skel = sample_filter_bpf__open_and_load();
- if (!skel) {
- pr_err("Failed to load perf sample-filter BPF skeleton\n");
- return -1;
- }
-
- i = 0;
- fd = bpf_map__fd(skel->maps.filters);
list_for_each_entry(expr, &evsel->bpf_filters, list) {
- struct perf_bpf_filter_entry entry = {
- .op = expr->op,
- .part = expr->part,
- .term = expr->term,
- .value = expr->val,
- };
-
if (check_sample_flags(evsel, expr) < 0)
- return -1;
+ return -EINVAL;
+
+ if (i == MAX_FILTERS)
+ return -E2BIG;
- bpf_map_update_elem(fd, &i, &entry, BPF_ANY);
+ entry[i].op = expr->op;
+ entry[i].part = expr->part;
+ entry[i].term = expr->term;
+ entry[i].value = expr->val;
i++;
if (expr->op == PBF_OP_GROUP_BEGIN) {
struct perf_bpf_filter_expr *group;
list_for_each_entry(group, &expr->groups, list) {
- struct perf_bpf_filter_entry group_entry = {
- .op = group->op,
- .part = group->part,
- .term = group->term,
- .value = group->val,
- };
- bpf_map_update_elem(fd, &i, &group_entry, BPF_ANY);
+ if (i == MAX_FILTERS)
+ return -E2BIG;
+
+ entry[i].op = group->op;
+ entry[i].part = group->part;
+ entry[i].term = group->term;
+ entry[i].value = group->val;
i++;
}
- memset(&entry, 0, sizeof(entry));
- entry.op = PBF_OP_GROUP_END;
- bpf_map_update_elem(fd, &i, &entry, BPF_ANY);
+ if (i == MAX_FILTERS)
+ return -E2BIG;
+
+ entry[i].op = PBF_OP_GROUP_END;
i++;
}
}
- if (i > MAX_FILTERS) {
- pr_err("Too many filters: %d (max = %d)\n", i, MAX_FILTERS);
+ if (i < MAX_FILTERS) {
+ /* to terminate the loop early */
+ entry[i].op = PBF_OP_DONE;
+ i++;
+ }
+ return 0;
+}
+
+static int convert_to_tgid(int tid)
+{
+ char path[128];
+ char *buf, *p, *q;
+ int tgid;
+ size_t len;
+
+ scnprintf(path, sizeof(path), "%d/status", tid);
+ if (procfs__read_str(path, &buf, &len) < 0)
+ return -1;
+
+ p = strstr(buf, "Tgid:");
+ if (p == NULL) {
+ free(buf);
+ return -1;
+ }
+
+ tgid = strtol(p + 6, &q, 0);
+ free(buf);
+ if (*q != '\n')
+ return -1;
+
+ return tgid;
+}
+
+/*
+ * The event might be closed already so we cannot get the list of ids using FD
+ * like in create_event_hash() below, let's iterate the event_hash map and
+ * delete all entries that have the event id as a key.
+ */
+static void destroy_event_hash(u64 event_id)
+{
+ int fd;
+ u64 key, *prev_key = NULL;
+ int num = 0, alloced = 32;
+ u64 *ids = calloc(alloced, sizeof(*ids));
+
+ if (ids == NULL)
+ return;
+
+ fd = get_pinned_fd("event_hash");
+ if (fd < 0) {
+ pr_debug("cannot get fd for 'event_hash' map\n");
+ free(ids);
+ return;
+ }
+
+ /* Iterate the whole map to collect keys for the event id. */
+ while (!bpf_map_get_next_key(fd, prev_key, &key)) {
+ u64 id;
+
+ if (bpf_map_lookup_elem(fd, &key, &id) == 0 && id == event_id) {
+ if (num == alloced) {
+ void *tmp;
+
+ alloced *= 2;
+ tmp = realloc(ids, alloced * sizeof(*ids));
+ if (tmp == NULL)
+ break;
+
+ ids = tmp;
+ }
+ ids[num++] = key;
+ }
+
+ prev_key = &key;
+ }
+
+ for (int i = 0; i < num; i++)
+ bpf_map_delete_elem(fd, &ids[i]);
+
+ free(ids);
+ close(fd);
+}
+
+/*
+ * Return a representative id if ok, or 0 for failures.
+ *
+ * The perf_event->id is good for this, but an evsel would have multiple
+ * instances for CPUs and tasks. So pick up the first id and setup a hash
+ * from id of each instance to the representative id (the first one).
+ */
+static u64 create_event_hash(struct evsel *evsel)
+{
+ int x, y, fd;
+ u64 the_id = 0, id;
+
+ fd = get_pinned_fd("event_hash");
+ if (fd < 0) {
+ pr_err("cannot get fd for 'event_hash' map\n");
+ return 0;
+ }
+
+ for (x = 0; x < xyarray__max_x(evsel->core.fd); x++) {
+ for (y = 0; y < xyarray__max_y(evsel->core.fd); y++) {
+ int ret = ioctl(FD(evsel, x, y), PERF_EVENT_IOC_ID, &id);
+
+ if (ret < 0) {
+ pr_err("Failed to get the event id\n");
+ if (the_id)
+ destroy_event_hash(the_id);
+ return 0;
+ }
+
+ if (the_id == 0)
+ the_id = id;
+
+ bpf_map_update_elem(fd, &id, &the_id, BPF_ANY);
+ }
+ }
+
+ close(fd);
+ return the_id;
+}
+
+static void destroy_idx_hash(struct pinned_filter_idx *pfi)
+{
+ int fd, nr;
+ struct perf_thread_map *threads;
+
+ fd = get_pinned_fd("filters");
+ bpf_map_delete_elem(fd, &pfi->hash_idx);
+ close(fd);
+
+ if (pfi->event_id)
+ destroy_event_hash(pfi->event_id);
+
+ threads = perf_evsel__threads(&pfi->evsel->core);
+ if (threads == NULL)
+ return;
+
+ fd = get_pinned_fd("idx_hash");
+ nr = perf_thread_map__nr(threads);
+ for (int i = 0; i < nr; i++) {
+ /* The target task might be dead already, just try the pid */
+ struct idx_hash_key key = {
+ .evt_id = pfi->event_id,
+ .tgid = perf_thread_map__pid(threads, i),
+ };
+
+ bpf_map_delete_elem(fd, &key);
+ }
+ close(fd);
+}
+
+/* Maintain a hashmap from (tgid, event-id) to filter index */
+static int create_idx_hash(struct evsel *evsel, struct perf_bpf_filter_entry *entry)
+{
+ int filter_idx;
+ int fd, nr, last;
+ u64 event_id = 0;
+ struct pinned_filter_idx *pfi = NULL;
+ struct perf_thread_map *threads;
+
+ fd = get_pinned_fd("filters");
+ if (fd < 0) {
+ pr_err("cannot get fd for 'filters' map\n");
+ return fd;
+ }
+
+ /* Find the first available entry in the filters map */
+ for (filter_idx = 0; filter_idx < MAX_FILTERS; filter_idx++) {
+ if (bpf_map_update_elem(fd, &filter_idx, entry, BPF_NOEXIST) == 0)
+ break;
+ }
+ close(fd);
+
+ if (filter_idx == MAX_FILTERS) {
+ pr_err("Too many users for the filter map\n");
+ return -EBUSY;
+ }
+
+ pfi = zalloc(sizeof(*pfi));
+ if (pfi == NULL) {
+ pr_err("Cannot save pinned filter index\n");
+ goto err;
+ }
+
+ pfi->evsel = evsel;
+ pfi->hash_idx = filter_idx;
+
+ event_id = create_event_hash(evsel);
+ if (event_id == 0) {
+ pr_err("Cannot update the event hash\n");
+ goto err;
+ }
+
+ pfi->event_id = event_id;
+
+ threads = perf_evsel__threads(&evsel->core);
+ if (threads == NULL) {
+ pr_err("Cannot get the thread list of the event\n");
+ goto err;
+ }
+
+ /* save the index to a hash map */
+ fd = get_pinned_fd("idx_hash");
+ if (fd < 0) {
+ pr_err("cannot get fd for 'idx_hash' map\n");
+ goto err;
+ }
+
+ last = -1;
+ nr = perf_thread_map__nr(threads);
+ for (int i = 0; i < nr; i++) {
+ int pid = perf_thread_map__pid(threads, i);
+ int tgid;
+ struct idx_hash_key key = {
+ .evt_id = event_id,
+ };
+
+ /* it actually needs tgid, let's get tgid from /proc. */
+ tgid = convert_to_tgid(pid);
+ if (tgid < 0) {
+ /* the thread may be dead, ignore. */
+ continue;
+ }
+
+ if (tgid == last)
+ continue;
+ last = tgid;
+ key.tgid = tgid;
+
+ if (bpf_map_update_elem(fd, &key, &filter_idx, BPF_ANY) < 0) {
+ pr_err("Failed to update the idx_hash\n");
+ close(fd);
+ goto err;
+ }
+ pr_debug("bpf-filter: idx_hash (task=%d,%s) -> %d\n",
+ tgid, evsel__name(evsel), filter_idx);
+ }
+
+ list_add(&pfi->list, &pinned_filters);
+ close(fd);
+ return filter_idx;
+
+err:
+ destroy_idx_hash(pfi);
+ free(pfi);
+ return -1;
+}
+
+int perf_bpf_filter__prepare(struct evsel *evsel, struct target *target)
+{
+ int i, x, y, fd, ret;
+ struct sample_filter_bpf *skel = NULL;
+ struct bpf_program *prog;
+ struct bpf_link *link;
+ struct perf_bpf_filter_entry *entry;
+ bool needs_idx_hash = !target__has_cpu(target) && !target->uid_str;
+
+ entry = calloc(MAX_FILTERS, sizeof(*entry));
+ if (entry == NULL)
return -1;
+
+ ret = get_filter_entries(evsel, entry);
+ if (ret < 0) {
+ pr_err("Failed to process filter entries\n");
+ goto err;
}
+
+ if (needs_idx_hash && geteuid() != 0) {
+ int zero = 0;
+
+ /* The filters map is shared among other processes */
+ ret = create_idx_hash(evsel, entry);
+ if (ret < 0)
+ goto err;
+
+ fd = get_pinned_fd("dropped");
+ if (fd < 0) {
+ ret = fd;
+ goto err;
+ }
+
+ /* Reset the lost count */
+ bpf_map_update_elem(fd, &ret, &zero, BPF_ANY);
+ close(fd);
+
+ fd = get_pinned_fd("perf_sample_filter");
+ if (fd < 0) {
+ ret = fd;
+ goto err;
+ }
+
+ for (x = 0; x < xyarray__max_x(evsel->core.fd); x++) {
+ for (y = 0; y < xyarray__max_y(evsel->core.fd); y++) {
+ ret = ioctl(FD(evsel, x, y), PERF_EVENT_IOC_SET_BPF, fd);
+ if (ret < 0) {
+ pr_err("Failed to attach perf sample-filter\n");
+ close(fd);
+ goto err;
+ }
+ }
+ }
+
+ close(fd);
+ free(entry);
+ return 0;
+ }
+
+ skel = sample_filter_bpf__open_and_load();
+ if (!skel) {
+ ret = -errno;
+ pr_err("Failed to load perf sample-filter BPF skeleton\n");
+ goto err;
+ }
+
+ i = 0;
+ fd = bpf_map__fd(skel->maps.filters);
+
+ /* The filters map has only one entry in this case */
+ if (bpf_map_update_elem(fd, &i, entry, BPF_ANY) < 0) {
+ ret = -errno;
+ pr_err("Failed to update the filter map\n");
+ goto err;
+ }
+
prog = skel->progs.perf_sample_filter;
for (x = 0; x < xyarray__max_x(evsel->core.fd); x++) {
for (y = 0; y < xyarray__max_y(evsel->core.fd); y++) {
link = bpf_program__attach_perf_event(prog, FD(evsel, x, y));
if (IS_ERR(link)) {
pr_err("Failed to attach perf sample-filter program\n");
- return PTR_ERR(link);
+ ret = PTR_ERR(link);
+ goto err;
}
}
}
+ free(entry);
evsel->bpf_skel = skel;
return 0;
+
+err:
+ free(entry);
+ if (!list_empty(&pinned_filters)) {
+ struct pinned_filter_idx *pfi, *tmp;
+
+ list_for_each_entry_safe(pfi, tmp, &pinned_filters, list) {
+ destroy_idx_hash(pfi);
+ list_del(&pfi->list);
+ free(pfi);
+ }
+ }
+ sample_filter_bpf__destroy(skel);
+ return ret;
}
int perf_bpf_filter__destroy(struct evsel *evsel)
{
struct perf_bpf_filter_expr *expr, *tmp;
+ struct pinned_filter_idx *pfi, *pos;
list_for_each_entry_safe(expr, tmp, &evsel->bpf_filters, list) {
list_del(&expr->list);
free(expr);
}
sample_filter_bpf__destroy(evsel->bpf_skel);
+
+ list_for_each_entry_safe(pfi, pos, &pinned_filters, list) {
+ destroy_idx_hash(pfi);
+ list_del(&pfi->list);
+ free(pfi);
+ }
return 0;
}
u64 perf_bpf_filter__lost_count(struct evsel *evsel)
{
- struct sample_filter_bpf *skel = evsel->bpf_skel;
+ int count = 0;
+
+ if (list_empty(&evsel->bpf_filters))
+ return 0;
+
+ if (!list_empty(&pinned_filters)) {
+ int fd = get_pinned_fd("dropped");
+ struct pinned_filter_idx *pfi;
+
+ if (fd < 0)
+ return 0;
- return skel ? skel->bss->dropped : 0;
+ list_for_each_entry(pfi, &pinned_filters, list) {
+ if (pfi->evsel != evsel)
+ continue;
+
+ bpf_map_lookup_elem(fd, &pfi->hash_idx, &count);
+ break;
+ }
+ close(fd);
+ } else if (evsel->bpf_skel) {
+ struct sample_filter_bpf *skel = evsel->bpf_skel;
+ int fd = bpf_map__fd(skel->maps.dropped);
+ int idx = 0;
+
+ bpf_map_lookup_elem(fd, &idx, &count);
+ }
+
+ return count;
}
struct perf_bpf_filter_expr *perf_bpf_filter_expr__new(enum perf_bpf_filter_term term,
@@ -212,3 +633,139 @@ int perf_bpf_filter__parse(struct list_head *expr_head, const char *str)
return ret;
}
+
+int perf_bpf_filter__pin(void)
+{
+ struct sample_filter_bpf *skel;
+ char *path = NULL;
+ int dir_fd, ret = -1;
+
+ skel = sample_filter_bpf__open();
+ if (!skel) {
+ ret = -errno;
+ pr_err("Failed to open perf sample-filter BPF skeleton\n");
+ goto err;
+ }
+
+ /* pinned program will use pid-hash */
+ bpf_map__set_max_entries(skel->maps.filters, MAX_FILTERS);
+ bpf_map__set_max_entries(skel->maps.event_hash, MAX_EVT_HASH);
+ bpf_map__set_max_entries(skel->maps.idx_hash, MAX_IDX_HASH);
+ bpf_map__set_max_entries(skel->maps.dropped, MAX_FILTERS);
+ skel->rodata->use_idx_hash = 1;
+
+ if (sample_filter_bpf__load(skel) < 0) {
+ ret = -errno;
+ pr_err("Failed to load perf sample-filter BPF skeleton\n");
+ goto err;
+ }
+
+ if (asprintf(&path, "%s/fs/bpf/%s", sysfs__mountpoint(),
+ PERF_BPF_FILTER_PIN_PATH) < 0) {
+ ret = -errno;
+ pr_err("Failed to allocate pathname in the BPF-fs\n");
+ goto err;
+ }
+
+ ret = bpf_object__pin(skel->obj, path);
+ if (ret < 0) {
+ pr_err("Failed to pin BPF filter objects\n");
+ goto err;
+ }
+
+ /* setup access permissions for the pinned objects */
+ dir_fd = open(path, O_PATH);
+ if (dir_fd < 0) {
+ bpf_object__unpin(skel->obj, path);
+ ret = dir_fd;
+ goto err;
+ }
+
+ /* BPF-fs root has the sticky bit */
+ if (fchmodat(dir_fd, "..", 01755, 0) < 0) {
+ pr_debug("chmod for BPF-fs failed\n");
+ ret = -errno;
+ goto err_close;
+ }
+
+ /* perf_filter directory */
+ if (fchmodat(dir_fd, ".", 0755, 0) < 0) {
+ pr_debug("chmod for perf_filter directory failed?\n");
+ ret = -errno;
+ goto err_close;
+ }
+
+ /* programs need write permission for some reason */
+ if (fchmodat(dir_fd, "perf_sample_filter", 0777, 0) < 0) {
+ pr_debug("chmod for perf_sample_filter failed\n");
+ ret = -errno;
+ }
+ /* maps */
+ if (fchmodat(dir_fd, "filters", 0666, 0) < 0) {
+ pr_debug("chmod for filters failed\n");
+ ret = -errno;
+ }
+ if (fchmodat(dir_fd, "event_hash", 0666, 0) < 0) {
+ pr_debug("chmod for event_hash failed\n");
+ ret = -errno;
+ }
+ if (fchmodat(dir_fd, "idx_hash", 0666, 0) < 0) {
+ pr_debug("chmod for idx_hash failed\n");
+ ret = -errno;
+ }
+ if (fchmodat(dir_fd, "dropped", 0666, 0) < 0) {
+ pr_debug("chmod for dropped failed\n");
+ ret = -errno;
+ }
+
+err_close:
+ close(dir_fd);
+
+err:
+ free(path);
+ sample_filter_bpf__destroy(skel);
+ return ret;
+}
+
+int perf_bpf_filter__unpin(void)
+{
+ struct sample_filter_bpf *skel;
+ char *path = NULL;
+ int ret = -1;
+
+ skel = sample_filter_bpf__open_and_load();
+ if (!skel) {
+ ret = -errno;
+ pr_err("Failed to open perf sample-filter BPF skeleton\n");
+ goto err;
+ }
+
+ if (asprintf(&path, "%s/fs/bpf/%s", sysfs__mountpoint(),
+ PERF_BPF_FILTER_PIN_PATH) < 0) {
+ ret = -errno;
+ pr_err("Failed to allocate pathname in the BPF-fs\n");
+ goto err;
+ }
+
+ ret = bpf_object__unpin(skel->obj, path);
+
+err:
+ free(path);
+ sample_filter_bpf__destroy(skel);
+ return ret;
+}
+
+static int get_pinned_fd(const char *name)
+{
+ char *path = NULL;
+ int fd;
+
+ if (asprintf(&path, "%s/fs/bpf/%s/%s", sysfs__mountpoint(),
+ PERF_BPF_FILTER_PIN_PATH, name) < 0)
+ return -1;
+
+ fd = bpf_obj_get(path);
+
+ free(path);
+ return fd;
+}
diff --git a/tools/perf/util/bpf-filter.h b/tools/perf/util/bpf-filter.h
index cd6764442c16..916ed7770b73 100644
--- a/tools/perf/util/bpf-filter.h
+++ b/tools/perf/util/bpf-filter.h
@@ -16,6 +16,10 @@ struct perf_bpf_filter_expr {
};
struct evsel;
+struct target;
+
+/* path in BPF-fs for the pinned program and maps */
+#define PERF_BPF_FILTER_PIN_PATH "perf_filter"
#ifdef HAVE_BPF_SKEL
struct perf_bpf_filter_expr *perf_bpf_filter_expr__new(enum perf_bpf_filter_term term,
@@ -23,9 +27,11 @@ struct perf_bpf_filter_expr *perf_bpf_filter_expr__new(enum perf_bpf_filter_term
enum perf_bpf_filter_op op,
unsigned long val);
int perf_bpf_filter__parse(struct list_head *expr_head, const char *str);
-int perf_bpf_filter__prepare(struct evsel *evsel);
+int perf_bpf_filter__prepare(struct evsel *evsel, struct target *target);
int perf_bpf_filter__destroy(struct evsel *evsel);
u64 perf_bpf_filter__lost_count(struct evsel *evsel);
+int perf_bpf_filter__pin(void);
+int perf_bpf_filter__unpin(void);
#else /* !HAVE_BPF_SKEL */
@@ -34,7 +40,8 @@ static inline int perf_bpf_filter__parse(struct list_head *expr_head __maybe_unu
{
return -EOPNOTSUPP;
}
-static inline int perf_bpf_filter__prepare(struct evsel *evsel __maybe_unused)
+static inline int perf_bpf_filter__prepare(struct evsel *evsel __maybe_unused,
+ struct target *target __maybe_unused)
{
return -EOPNOTSUPP;
}
@@ -46,5 +53,13 @@ static inline u64 perf_bpf_filter__lost_count(struct evsel *evsel __maybe_unused
{
return 0;
}
+static inline int perf_bpf_filter__pin(void)
+{
+ return -EOPNOTSUPP;
+}
+static inline int perf_bpf_filter__unpin(void)
+{
+ return -EOPNOTSUPP;
+}
#endif /* HAVE_BPF_SKEL*/
#endif /* PERF_UTIL_BPF_FILTER_H */
diff --git a/tools/perf/util/bpf-filter.l b/tools/perf/util/bpf-filter.l
index 2a7c839f3fae..f313404f95a9 100644
--- a/tools/perf/util/bpf-filter.l
+++ b/tools/perf/util/bpf-filter.l
@@ -9,8 +9,11 @@
#include "bpf-filter.h"
#include "bpf-filter-bison.h"
+extern int perf_bpf_filter_needs_path;
+
static int sample(enum perf_bpf_filter_term term)
{
+ perf_bpf_filter_needs_path = 0;
perf_bpf_filter_lval.sample.term = term;
perf_bpf_filter_lval.sample.part = 0;
return BFT_SAMPLE;
@@ -18,11 +21,20 @@ static int sample(enum perf_bpf_filter_term term)
static int sample_part(enum perf_bpf_filter_term term, int part)
{
+ perf_bpf_filter_needs_path = 0;
perf_bpf_filter_lval.sample.term = term;
perf_bpf_filter_lval.sample.part = part;
return BFT_SAMPLE;
}
+static int sample_path(enum perf_bpf_filter_term term)
+{
+ perf_bpf_filter_needs_path = 1;
+ perf_bpf_filter_lval.sample.term = term;
+ perf_bpf_filter_lval.sample.part = 0;
+ return BFT_SAMPLE_PATH;
+}
+
static int operator(enum perf_bpf_filter_op op)
{
perf_bpf_filter_lval.op = op;
@@ -48,10 +60,15 @@ static int constant(int val)
return BFT_NUM;
}
-static int error(const char *str)
+static int path_or_error(void)
{
- printf("perf_bpf_filter: Unexpected filter %s: %s\n", str, perf_bpf_filter_text);
- return BFT_ERROR;
+ if (!perf_bpf_filter_needs_path) {
+ printf("perf_bpf_filter: Error: Unexpected item: %s\n",
+ perf_bpf_filter_text);
+ return BFT_ERROR;
+ }
+ perf_bpf_filter_lval.path = perf_bpf_filter_text;
+ return BFT_PATH;
}
%}
@@ -59,6 +76,7 @@ static int error(const char *str)
num_dec [0-9]+
num_hex 0[Xx][0-9a-fA-F]+
space [ \t]+
+path [^ \t\n]+
ident [_a-zA-Z][_a-zA-Z0-9]+
%%
@@ -97,6 +115,7 @@ mem_blk { return sample_part(PBF_TERM_DATA_SRC, 7); }
mem_hops { return sample_part(PBF_TERM_DATA_SRC, 8); }
uid { return sample(PBF_TERM_UID); }
gid { return sample(PBF_TERM_GID); }
+cgroup { return sample_path(PBF_TERM_CGROUP); }
"==" { return operator(PBF_OP_EQ); }
"!=" { return operator(PBF_OP_NEQ); }
@@ -155,7 +174,6 @@ hops3 { return constant(PERF_MEM_HOPS_3); }
"," { return ','; }
"||" { return BFT_LOGICAL_OR; }
-{ident} { return error("ident"); }
-. { return error("input"); }
+{path} { return path_or_error(); }
%%
diff --git a/tools/perf/util/bpf-filter.y b/tools/perf/util/bpf-filter.y
index 0c56fccb8874..5a79a8e7a45b 100644
--- a/tools/perf/util/bpf-filter.y
+++ b/tools/perf/util/bpf-filter.y
@@ -12,9 +12,13 @@
#include <linux/compiler.h>
#include <linux/list.h>
#include "bpf-filter.h"
+#include "cgroup.h"
int perf_bpf_filter_lex(void);
+/* To indicate if the current term needs a pathname or not */
+int perf_bpf_filter_needs_path;
+
static void perf_bpf_filter_error(struct list_head *expr __maybe_unused,
char const *msg)
{
@@ -26,6 +30,7 @@ static void perf_bpf_filter_error(struct list_head *expr __maybe_unused,
%union
{
unsigned long num;
+ char *path;
struct {
enum perf_bpf_filter_term term;
int part;
@@ -34,12 +39,13 @@ static void perf_bpf_filter_error(struct list_head *expr __maybe_unused,
struct perf_bpf_filter_expr *expr;
}
-%token BFT_SAMPLE BFT_OP BFT_ERROR BFT_NUM BFT_LOGICAL_OR
+%token BFT_SAMPLE BFT_SAMPLE_PATH BFT_OP BFT_ERROR BFT_NUM BFT_LOGICAL_OR BFT_PATH
%type <expr> filter_term filter_expr
%destructor { free ($$); } <expr>
-%type <sample> BFT_SAMPLE
+%type <sample> BFT_SAMPLE BFT_SAMPLE_PATH
%type <op> BFT_OP
%type <num> BFT_NUM
+%type <path> BFT_PATH
%%
@@ -81,5 +87,23 @@ BFT_SAMPLE BFT_OP BFT_NUM
{
$$ = perf_bpf_filter_expr__new($1.term, $1.part, $2, $3);
}
+|
+BFT_SAMPLE_PATH BFT_OP BFT_PATH
+{
+ struct cgroup *cgrp;
+ unsigned long cgroup_id = 0;
+
+ if ($2 != PBF_OP_EQ && $2 != PBF_OP_NEQ) {
+ printf("perf_bpf_filter: cgroup accepts '==' or '!=' only\n");
+ YYERROR;
+ }
+
+ cgrp = cgroup__new($3, /*do_open=*/false);
+ if (cgrp && read_cgroup_id(cgrp) == 0)
+ cgroup_id = cgrp->id;
+
+ $$ = perf_bpf_filter_expr__new($1.term, $1.part, $2, cgroup_id);
+ cgroup__put(cgrp);
+}
%%
diff --git a/tools/perf/util/bpf_counter_cgroup.c b/tools/perf/util/bpf_counter_cgroup.c
index ea29c372f339..6ff42619de12 100644
--- a/tools/perf/util/bpf_counter_cgroup.c
+++ b/tools/perf/util/bpf_counter_cgroup.c
@@ -61,6 +61,9 @@ static int bperf_load_program(struct evlist *evlist)
skel->rodata->num_cpus = total_cpus;
skel->rodata->num_events = evlist->core.nr_entries / nr_cgroups;
+ if (cgroup_is_v2("perf_event") > 0)
+ skel->rodata->use_cgroup_v2 = 1;
+
BUG_ON(evlist->core.nr_entries % nr_cgroups != 0);
/* we need one copy of events per cpu for reading */
@@ -82,9 +85,6 @@ static int bperf_load_program(struct evlist *evlist)
goto out;
}
- if (cgroup_is_v2("perf_event") > 0)
- skel->bss->use_cgroup_v2 = 1;
-
err = -1;
cgrp_switch = evsel__new(&cgrp_switch_attr);
diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
index 7a4297d8fd2c..06d1c4018407 100644
--- a/tools/perf/util/bpf_ftrace.c
+++ b/tools/perf/util/bpf_ftrace.c
@@ -40,13 +40,17 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
if (ftrace->target.cpu_list) {
ncpus = perf_cpu_map__nr(ftrace->evlist->core.user_requested_cpus);
bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
+ skel->rodata->has_cpu = 1;
}
if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) {
ntasks = perf_thread_map__nr(ftrace->evlist->core.threads);
bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
+ skel->rodata->has_task = 1;
}
+ skel->rodata->use_nsec = ftrace->use_nsec;
+
set_max_rlimit();
err = func_latency_bpf__load(skel);
@@ -59,7 +63,6 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
u32 cpu;
u8 val = 1;
- skel->bss->has_cpu = 1;
fd = bpf_map__fd(skel->maps.cpu_filter);
for (i = 0; i < ncpus; i++) {
@@ -72,7 +75,6 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
u32 pid;
u8 val = 1;
- skel->bss->has_task = 1;
fd = bpf_map__fd(skel->maps.task_filter);
for (i = 0; i < ntasks; i++) {
@@ -81,8 +83,6 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
}
}
- skel->bss->use_nsec = ftrace->use_nsec;
-
skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
false, func->name);
if (IS_ERR(skel->links.func_begin)) {
diff --git a/tools/perf/util/bpf_kwork.c b/tools/perf/util/bpf_kwork.c
index 44f0f708a15d..6c7126b7670d 100644
--- a/tools/perf/util/bpf_kwork.c
+++ b/tools/perf/util/bpf_kwork.c
@@ -176,8 +176,6 @@ static int setup_filters(struct perf_kwork *kwork)
bpf_map_update_elem(fd, &cpu.cpu, &val, BPF_ANY);
}
perf_cpu_map__put(map);
-
- skel->bss->has_cpu_filter = 1;
}
if (kwork->profile_name != NULL) {
@@ -197,8 +195,6 @@ static int setup_filters(struct perf_kwork *kwork)
key = 0;
bpf_map_update_elem(fd, &key, kwork->profile_name, BPF_ANY);
-
- skel->bss->has_name_filter = 1;
}
return 0;
@@ -239,6 +235,11 @@ int perf_kwork__trace_prepare_bpf(struct perf_kwork *kwork)
class_bpf->load_prepare(kwork);
}
+ if (kwork->cpu_list != NULL)
+ skel->rodata->has_cpu_filter = 1;
+ if (kwork->profile_name != NULL)
+ skel->rodata->has_name_filter = 1;
+
if (kwork_trace_bpf__load(skel)) {
pr_debug("Failed to load kwork trace skeleton\n");
goto out;
diff --git a/tools/perf/util/bpf_kwork_top.c b/tools/perf/util/bpf_kwork_top.c
index 22a3b00a1e23..7261cad43468 100644
--- a/tools/perf/util/bpf_kwork_top.c
+++ b/tools/perf/util/bpf_kwork_top.c
@@ -151,14 +151,12 @@ static int setup_filters(struct perf_kwork *kwork)
bpf_map_update_elem(fd, &cpu.cpu, &val, BPF_ANY);
}
perf_cpu_map__put(map);
-
- skel->bss->has_cpu_filter = 1;
}
return 0;
}
-int perf_kwork__top_prepare_bpf(struct perf_kwork *kwork __maybe_unused)
+int perf_kwork__top_prepare_bpf(struct perf_kwork *kwork)
{
struct bpf_program *prog;
struct kwork_class *class;
@@ -193,6 +191,9 @@ int perf_kwork__top_prepare_bpf(struct perf_kwork *kwork __maybe_unused)
class_bpf->load_prepare();
}
+ if (kwork->cpu_list)
+ skel->rodata->has_cpu_filter = 1;
+
if (kwork_top_bpf__load(skel)) {
pr_debug("Failed to load kwork top skeleton\n");
goto out;
diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
index b4cb3fe5cc25..41a1ad087895 100644
--- a/tools/perf/util/bpf_lock_contention.c
+++ b/tools/perf/util/bpf_lock_contention.c
@@ -46,14 +46,22 @@ int lock_contention_prepare(struct lock_contention *con)
else
bpf_map__set_max_entries(skel->maps.stacks, 1);
- if (target__has_cpu(target))
+ if (target__has_cpu(target)) {
+ skel->rodata->has_cpu = 1;
ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus);
- if (target__has_task(target))
+ }
+ if (target__has_task(target)) {
+ skel->rodata->has_task = 1;
ntasks = perf_thread_map__nr(evlist->core.threads);
- if (con->filters->nr_types)
+ }
+ if (con->filters->nr_types) {
+ skel->rodata->has_type = 1;
ntypes = con->filters->nr_types;
- if (con->filters->nr_cgrps)
+ }
+ if (con->filters->nr_cgrps) {
+ skel->rodata->has_cgroup = 1;
ncgrps = con->filters->nr_cgrps;
+ }
/* resolve lock name filters to addr */
if (con->filters->nr_syms) {
@@ -82,6 +90,7 @@ int lock_contention_prepare(struct lock_contention *con)
con->filters->addrs = addrs;
}
naddrs = con->filters->nr_addrs;
+ skel->rodata->has_addr = 1;
}
bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
@@ -90,6 +99,16 @@ int lock_contention_prepare(struct lock_contention *con)
bpf_map__set_max_entries(skel->maps.addr_filter, naddrs);
bpf_map__set_max_entries(skel->maps.cgroup_filter, ncgrps);
+ skel->rodata->stack_skip = con->stack_skip;
+ skel->rodata->aggr_mode = con->aggr_mode;
+ skel->rodata->needs_callstack = con->save_callstack;
+ skel->rodata->lock_owner = con->owner;
+
+ if (con->aggr_mode == LOCK_AGGR_CGROUP || con->filters->nr_cgrps) {
+ if (cgroup_is_v2("perf_event"))
+ skel->rodata->use_cgroup_v2 = 1;
+ }
+
if (lock_contention_bpf__load(skel) < 0) {
pr_err("Failed to load lock-contention BPF skeleton\n");
return -1;
@@ -99,7 +118,6 @@ int lock_contention_prepare(struct lock_contention *con)
u32 cpu;
u8 val = 1;
- skel->bss->has_cpu = 1;
fd = bpf_map__fd(skel->maps.cpu_filter);
for (i = 0; i < ncpus; i++) {
@@ -112,7 +130,6 @@ int lock_contention_prepare(struct lock_contention *con)
u32 pid;
u8 val = 1;
- skel->bss->has_task = 1;
fd = bpf_map__fd(skel->maps.task_filter);
for (i = 0; i < ntasks; i++) {
@@ -125,7 +142,6 @@ int lock_contention_prepare(struct lock_contention *con)
u32 pid = evlist->workload.pid;
u8 val = 1;
- skel->bss->has_task = 1;
fd = bpf_map__fd(skel->maps.task_filter);
bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
}
@@ -133,7 +149,6 @@ int lock_contention_prepare(struct lock_contention *con)
if (con->filters->nr_types) {
u8 val = 1;
- skel->bss->has_type = 1;
fd = bpf_map__fd(skel->maps.type_filter);
for (i = 0; i < con->filters->nr_types; i++)
@@ -143,7 +158,6 @@ int lock_contention_prepare(struct lock_contention *con)
if (con->filters->nr_addrs) {
u8 val = 1;
- skel->bss->has_addr = 1;
fd = bpf_map__fd(skel->maps.addr_filter);
for (i = 0; i < con->filters->nr_addrs; i++)
@@ -153,25 +167,14 @@ int lock_contention_prepare(struct lock_contention *con)
if (con->filters->nr_cgrps) {
u8 val = 1;
- skel->bss->has_cgroup = 1;
fd = bpf_map__fd(skel->maps.cgroup_filter);
for (i = 0; i < con->filters->nr_cgrps; i++)
bpf_map_update_elem(fd, &con->filters->cgrps[i], &val, BPF_ANY);
}
- /* these don't work well if in the rodata section */
- skel->bss->stack_skip = con->stack_skip;
- skel->bss->aggr_mode = con->aggr_mode;
- skel->bss->needs_callstack = con->save_callstack;
- skel->bss->lock_owner = con->owner;
-
- if (con->aggr_mode == LOCK_AGGR_CGROUP) {
- if (cgroup_is_v2("perf_event"))
- skel->bss->use_cgroup_v2 = 1;
-
+ if (con->aggr_mode == LOCK_AGGR_CGROUP)
read_all_cgroups(&con->cgroups);
- }
bpf_program__set_autoload(skel->progs.collect_lock_syms, false);
@@ -286,6 +289,9 @@ static void account_end_timestamp(struct lock_contention *con)
goto next;
for (int i = 0; i < total_cpus; i++) {
+ if (cpu_data[i].lock == 0)
+ continue;
+
update_lock_stat(stat_fd, -1, end_ts, aggr_mode,
&cpu_data[i]);
}
diff --git a/tools/perf/util/bpf_map.c b/tools/perf/util/bpf_map.c
index c863ae0c5cb5..578f27d2d6b4 100644
--- a/tools/perf/util/bpf_map.c
+++ b/tools/perf/util/bpf_map.c
@@ -35,9 +35,6 @@ int bpf_map__fprintf(struct bpf_map *map, FILE *fp)
if (fd < 0)
return fd;
- if (!map)
- return PTR_ERR(map);
-
err = -ENOMEM;
key = malloc(bpf_map__key_size(map));
if (key == NULL)
diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c
index 6af36142dc5a..a590a8ac1f9d 100644
--- a/tools/perf/util/bpf_off_cpu.c
+++ b/tools/perf/util/bpf_off_cpu.c
@@ -73,14 +73,12 @@ static void off_cpu_start(void *arg)
struct evlist *evlist = arg;
/* update task filter for the given workload */
- if (!skel->bss->has_cpu && !skel->bss->has_task &&
+ if (skel->rodata->has_task && skel->rodata->uses_tgid &&
perf_thread_map__pid(evlist->core.threads, 0) != -1) {
int fd;
u32 pid;
u8 val = 1;
- skel->bss->has_task = 1;
- skel->bss->uses_tgid = 1;
fd = bpf_map__fd(skel->maps.task_filter);
pid = perf_thread_map__pid(evlist->core.threads, 0);
bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
@@ -148,6 +146,7 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
if (target->cpu_list) {
ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus);
bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
+ skel->rodata->has_cpu = 1;
}
if (target->pid) {
@@ -173,11 +172,16 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
ntasks = MAX_PROC;
bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
+ skel->rodata->has_task = 1;
+ skel->rodata->uses_tgid = 1;
} else if (target__has_task(target)) {
ntasks = perf_thread_map__nr(evlist->core.threads);
bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
+ skel->rodata->has_task = 1;
} else if (target__none(target)) {
bpf_map__set_max_entries(skel->maps.task_filter, MAX_PROC);
+ skel->rodata->has_task = 1;
+ skel->rodata->uses_tgid = 1;
}
if (evlist__first(evlist)->cgrp) {
@@ -186,6 +190,7 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
if (!cgroup_is_v2("perf_event"))
skel->rodata->uses_cgroup_v1 = true;
+ skel->rodata->has_cgroup = 1;
}
if (opts->record_cgroup) {
@@ -208,7 +213,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
u32 cpu;
u8 val = 1;
- skel->bss->has_cpu = 1;
fd = bpf_map__fd(skel->maps.cpu_filter);
for (i = 0; i < ncpus; i++) {
@@ -220,8 +224,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
if (target->pid) {
u8 val = 1;
- skel->bss->has_task = 1;
- skel->bss->uses_tgid = 1;
fd = bpf_map__fd(skel->maps.task_filter);
strlist__for_each_entry(pos, pid_slist) {
@@ -240,7 +242,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
u32 pid;
u8 val = 1;
- skel->bss->has_task = 1;
fd = bpf_map__fd(skel->maps.task_filter);
for (i = 0; i < ntasks; i++) {
@@ -253,7 +254,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
struct evsel *evsel;
u8 val = 1;
- skel->bss->has_cgroup = 1;
fd = bpf_map__fd(skel->maps.cgroup_filter);
evlist__for_each_entry(evlist, evsel) {
diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
index 0acbd74e8c76..b2f17cca014b 100644
--- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
+++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
@@ -7,9 +7,14 @@
*/
#include "vmlinux.h"
+#include "../trace_augment.h"
+
#include <bpf/bpf_helpers.h>
#include <linux/limits.h>
+#define PERF_ALIGN(x, a) __PERF_ALIGN_MASK(x, (typeof(x))(a)-1)
+#define __PERF_ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
+
/**
* is_power_of_2() - check if a value is a power of two
* @n: the value to check
@@ -66,19 +71,6 @@ struct syscall_exit_args {
long ret;
};
-struct augmented_arg {
- unsigned int size;
- int err;
- char value[PATH_MAX];
-};
-
-struct pids_filtered {
- __uint(type, BPF_MAP_TYPE_HASH);
- __type(key, pid_t);
- __type(value, bool);
- __uint(max_entries, 64);
-} pids_filtered SEC(".maps");
-
/*
* Desired design of maximum size and alignment (see RFC2553)
*/
@@ -105,17 +97,27 @@ struct sockaddr_storage {
};
};
-struct augmented_args_payload {
- struct syscall_enter_args args;
- union {
- struct {
- struct augmented_arg arg, arg2;
- };
+struct augmented_arg {
+ unsigned int size;
+ int err;
+ union {
+ char value[PATH_MAX];
struct sockaddr_storage saddr;
- char __data[sizeof(struct augmented_arg)];
};
};
+struct pids_filtered {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, pid_t);
+ __type(value, bool);
+ __uint(max_entries, 64);
+} pids_filtered SEC(".maps");
+
+struct augmented_args_payload {
+ struct syscall_enter_args args;
+ struct augmented_arg arg, arg2; // We have to reserve space for two arguments (rename, etc)
+};
+
// We need more tmp space than the BPF stack can give us
struct augmented_args_tmp {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
@@ -124,6 +126,25 @@ struct augmented_args_tmp {
__uint(max_entries, 1);
} augmented_args_tmp SEC(".maps");
+struct beauty_map_enter {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, int);
+ __type(value, __u32[6]);
+ __uint(max_entries, 512);
+} beauty_map_enter SEC(".maps");
+
+struct beauty_payload_enter {
+ struct syscall_enter_args args;
+ struct augmented_arg aug_args[6];
+};
+
+struct beauty_payload_enter_map {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __type(key, int);
+ __type(value, struct beauty_payload_enter);
+ __uint(max_entries, 1);
+} beauty_payload_enter_map SEC(".maps");
+
static inline struct augmented_args_payload *augmented_args_payload(void)
{
int key = 0;
@@ -136,6 +157,11 @@ static inline int augmented__output(void *ctx, struct augmented_args_payload *ar
return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len);
}
+static inline int augmented__beauty_output(void *ctx, void *data, int len)
+{
+ return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, data, len);
+}
+
static inline
unsigned int augmented_arg__read_str(struct augmented_arg *augmented_arg, const void *arg, unsigned int arg_len)
{
@@ -182,15 +208,17 @@ int sys_enter_connect(struct syscall_enter_args *args)
struct augmented_args_payload *augmented_args = augmented_args_payload();
const void *sockaddr_arg = (const void *)args->args[1];
unsigned int socklen = args->args[2];
- unsigned int len = sizeof(augmented_args->args);
+ unsigned int len = sizeof(u64) + sizeof(augmented_args->args); // the size + err in all 'augmented_arg' structs
if (augmented_args == NULL)
return 1; /* Failure: don't filter */
- _Static_assert(is_power_of_2(sizeof(augmented_args->saddr)), "sizeof(augmented_args->saddr) needs to be a power of two");
- socklen &= sizeof(augmented_args->saddr) - 1;
+ _Static_assert(is_power_of_2(sizeof(augmented_args->arg.saddr)), "sizeof(augmented_args->arg.saddr) needs to be a power of two");
+ socklen &= sizeof(augmented_args->arg.saddr) - 1;
- bpf_probe_read_user(&augmented_args->saddr, socklen, sockaddr_arg);
+ bpf_probe_read_user(&augmented_args->arg.saddr, socklen, sockaddr_arg);
+ augmented_args->arg.size = socklen;
+ augmented_args->arg.err = 0;
return augmented__output(args, augmented_args, len + socklen);
}
@@ -201,14 +229,14 @@ int sys_enter_sendto(struct syscall_enter_args *args)
struct augmented_args_payload *augmented_args = augmented_args_payload();
const void *sockaddr_arg = (const void *)args->args[4];
unsigned int socklen = args->args[5];
- unsigned int len = sizeof(augmented_args->args);
+ unsigned int len = sizeof(u64) + sizeof(augmented_args->args); // the size + err in all 'augmented_arg' structs
if (augmented_args == NULL)
return 1; /* Failure: don't filter */
- socklen &= sizeof(augmented_args->saddr) - 1;
+ socklen &= sizeof(augmented_args->arg.saddr) - 1;
- bpf_probe_read_user(&augmented_args->saddr, socklen, sockaddr_arg);
+ bpf_probe_read_user(&augmented_args->arg.saddr, socklen, sockaddr_arg);
return augmented__output(args, augmented_args, len + socklen);
}
@@ -249,30 +277,50 @@ int sys_enter_rename(struct syscall_enter_args *args)
struct augmented_args_payload *augmented_args = augmented_args_payload();
const void *oldpath_arg = (const void *)args->args[0],
*newpath_arg = (const void *)args->args[1];
- unsigned int len = sizeof(augmented_args->args), oldpath_len;
+ unsigned int len = sizeof(augmented_args->args), oldpath_len, newpath_len;
if (augmented_args == NULL)
return 1; /* Failure: don't filter */
+ len += 2 * sizeof(u64); // The overhead of size and err, just before the payload...
+
oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value));
- len += oldpath_len + augmented_arg__read_str((void *)(&augmented_args->arg) + oldpath_len, newpath_arg, sizeof(augmented_args->arg.value));
+ augmented_args->arg.size = PERF_ALIGN(oldpath_len + 1, sizeof(u64));
+ len += augmented_args->arg.size;
+
+ struct augmented_arg *arg2 = (void *)&augmented_args->arg.value + augmented_args->arg.size;
+
+ newpath_len = augmented_arg__read_str(arg2, newpath_arg, sizeof(augmented_args->arg.value));
+ arg2->size = newpath_len;
+
+ len += newpath_len;
return augmented__output(args, augmented_args, len);
}
-SEC("tp/syscalls/sys_enter_renameat")
-int sys_enter_renameat(struct syscall_enter_args *args)
+SEC("tp/syscalls/sys_enter_renameat2")
+int sys_enter_renameat2(struct syscall_enter_args *args)
{
struct augmented_args_payload *augmented_args = augmented_args_payload();
const void *oldpath_arg = (const void *)args->args[1],
*newpath_arg = (const void *)args->args[3];
- unsigned int len = sizeof(augmented_args->args), oldpath_len;
+ unsigned int len = sizeof(augmented_args->args), oldpath_len, newpath_len;
if (augmented_args == NULL)
return 1; /* Failure: don't filter */
+ len += 2 * sizeof(u64); // The overhead of size and err, just before the payload...
+
oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value));
- len += oldpath_len + augmented_arg__read_str((void *)(&augmented_args->arg) + oldpath_len, newpath_arg, sizeof(augmented_args->arg.value));
+ augmented_args->arg.size = PERF_ALIGN(oldpath_len + 1, sizeof(u64));
+ len += augmented_args->arg.size;
+
+ struct augmented_arg *arg2 = (void *)&augmented_args->arg.value + augmented_args->arg.size;
+
+ newpath_len = augmented_arg__read_str(arg2, newpath_arg, sizeof(augmented_args->arg.value));
+ arg2->size = newpath_len;
+
+ len += newpath_len;
return augmented__output(args, augmented_args, len);
}
@@ -293,26 +341,26 @@ int sys_enter_perf_event_open(struct syscall_enter_args *args)
{
struct augmented_args_payload *augmented_args = augmented_args_payload();
const struct perf_event_attr_size *attr = (const struct perf_event_attr_size *)args->args[0], *attr_read;
- unsigned int len = sizeof(augmented_args->args);
+ unsigned int len = sizeof(u64) + sizeof(augmented_args->args); // the size + err in all 'augmented_arg' structs
if (augmented_args == NULL)
goto failure;
- if (bpf_probe_read_user(&augmented_args->__data, sizeof(*attr), attr) < 0)
+ if (bpf_probe_read_user(&augmented_args->arg.value, sizeof(*attr), attr) < 0)
goto failure;
- attr_read = (const struct perf_event_attr_size *)augmented_args->__data;
+ attr_read = (const struct perf_event_attr_size *)augmented_args->arg.value;
__u32 size = attr_read->size;
if (!size)
size = PERF_ATTR_SIZE_VER0;
- if (size > sizeof(augmented_args->__data))
+ if (size > sizeof(augmented_args->arg.value))
goto failure;
// Now that we read attr->size and tested it against the size limits, read it completely
- if (bpf_probe_read_user(&augmented_args->__data, size, attr) < 0)
+ if (bpf_probe_read_user(&augmented_args->arg.value, size, attr) < 0)
goto failure;
return augmented__output(args, augmented_args, len + size);
@@ -325,16 +373,16 @@ int sys_enter_clock_nanosleep(struct syscall_enter_args *args)
{
struct augmented_args_payload *augmented_args = augmented_args_payload();
const void *rqtp_arg = (const void *)args->args[2];
- unsigned int len = sizeof(augmented_args->args);
+ unsigned int len = sizeof(u64) + sizeof(augmented_args->args); // the size + err in all 'augmented_arg' structs
__u32 size = sizeof(struct timespec64);
if (augmented_args == NULL)
goto failure;
- if (size > sizeof(augmented_args->__data))
+ if (size > sizeof(augmented_args->arg.value))
goto failure;
- bpf_probe_read_user(&augmented_args->__data, size, rqtp_arg);
+ bpf_probe_read_user(&augmented_args->arg.value, size, rqtp_arg);
return augmented__output(args, augmented_args, len + size);
failure:
@@ -352,10 +400,10 @@ int sys_enter_nanosleep(struct syscall_enter_args *args)
if (augmented_args == NULL)
goto failure;
- if (size > sizeof(augmented_args->__data))
+ if (size > sizeof(augmented_args->arg.value))
goto failure;
- bpf_probe_read_user(&augmented_args->__data, size, req_arg);
+ bpf_probe_read_user(&augmented_args->arg.value, size, req_arg);
return augmented__output(args, augmented_args, len + size);
failure:
@@ -372,6 +420,91 @@ static bool pid_filter__has(struct pids_filtered *pids, pid_t pid)
return bpf_map_lookup_elem(pids, &pid) != NULL;
}
+static int augment_sys_enter(void *ctx, struct syscall_enter_args *args)
+{
+ bool augmented, do_output = false;
+ int zero = 0, size, aug_size, index, output = 0,
+ value_size = sizeof(struct augmented_arg) - offsetof(struct augmented_arg, value);
+ unsigned int nr, *beauty_map;
+ struct beauty_payload_enter *payload;
+ void *arg, *payload_offset;
+
+ /* fall back to do predefined tail call */
+ if (args == NULL)
+ return 1;
+
+ /* use syscall number to get beauty_map entry */
+ nr = (__u32)args->syscall_nr;
+ beauty_map = bpf_map_lookup_elem(&beauty_map_enter, &nr);
+
+ /* set up payload for output */
+ payload = bpf_map_lookup_elem(&beauty_payload_enter_map, &zero);
+ payload_offset = (void *)&payload->aug_args;
+
+ if (beauty_map == NULL || payload == NULL)
+ return 1;
+
+ /* copy the sys_enter header, which has the syscall_nr */
+ __builtin_memcpy(&payload->args, args, sizeof(struct syscall_enter_args));
+
+ /*
+ * Determine what type of argument and how many bytes to read from user space, using the
+ * value in the beauty_map. This is the relation of parameter type and its corresponding
+ * value in the beauty map, and how many bytes we read eventually:
+ *
+ * string: 1 -> size of string
+ * struct: size of struct -> size of struct
+ * buffer: -1 * (index of paired len) -> value of paired len (maximum: TRACE_AUG_MAX_BUF)
+ */
+ for (int i = 0; i < 6; i++) {
+ arg = (void *)args->args[i];
+ augmented = false;
+ size = beauty_map[i];
+ aug_size = size; /* size of the augmented data read from user space */
+
+ if (size == 0 || arg == NULL)
+ continue;
+
+ if (size == 1) { /* string */
+ aug_size = bpf_probe_read_user_str(((struct augmented_arg *)payload_offset)->value, value_size, arg);
+ /* minimum of 0 to pass the verifier */
+ if (aug_size < 0)
+ aug_size = 0;
+
+ augmented = true;
+ } else if (size > 0 && size <= value_size) { /* struct */
+ if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, size, arg))
+ augmented = true;
+ } else if (size < 0 && size >= -6) { /* buffer */
+ index = -(size + 1);
+ aug_size = args->args[index];
+
+ if (aug_size > TRACE_AUG_MAX_BUF)
+ aug_size = TRACE_AUG_MAX_BUF;
+
+ if (aug_size > 0) {
+ if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, aug_size, arg))
+ augmented = true;
+ }
+ }
+
+ /* write data to payload */
+ if (augmented) {
+ int written = offsetof(struct augmented_arg, value) + aug_size;
+
+ ((struct augmented_arg *)payload_offset)->size = aug_size;
+ output += written;
+ payload_offset += written;
+ do_output = true;
+ }
+ }
+
+ if (!do_output)
+ return 1;
+
+ return augmented__beauty_output(ctx, payload, sizeof(struct syscall_enter_args) + output);
+}
+
SEC("tp/raw_syscalls/sys_enter")
int sys_enter(struct syscall_enter_args *args)
{
@@ -400,7 +533,8 @@ int sys_enter(struct syscall_enter_args *args)
* "!raw_syscalls:unaugmented" that will just return 1 to return the
* unaugmented tracepoint payload.
*/
- bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr);
+ if (augment_sys_enter(args, &augmented_args->args))
+ bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr);
// If not found on the PROG_ARRAY syscalls map, then we're filtering it:
return 0;
diff --git a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
index 6a438e0102c5..57cab7647a9a 100644
--- a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
+++ b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
@@ -57,9 +57,9 @@ struct cgroup___old {
const volatile __u32 num_events = 1;
const volatile __u32 num_cpus = 1;
+const volatile int use_cgroup_v2 = 0;
int enabled = 0;
-int use_cgroup_v2 = 0;
int perf_subsys_id = -1;
static inline __u64 get_cgroup_v1_ancestor_id(struct cgroup *cgrp, int level)
diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
index 9d01e3af7479..f613dc9cb123 100644
--- a/tools/perf/util/bpf_skel/func_latency.bpf.c
+++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
@@ -37,9 +37,10 @@ struct {
int enabled = 0;
-int has_cpu = 0;
-int has_task = 0;
-int use_nsec = 0;
+
+const volatile int has_cpu = 0;
+const volatile int has_task = 0;
+const volatile int use_nsec = 0;
SEC("kprobe/func")
int BPF_PROG(func_begin)
diff --git a/tools/perf/util/bpf_skel/kwork_top.bpf.c b/tools/perf/util/bpf_skel/kwork_top.bpf.c
index 84c15ccbab44..594da91965a2 100644
--- a/tools/perf/util/bpf_skel/kwork_top.bpf.c
+++ b/tools/perf/util/bpf_skel/kwork_top.bpf.c
@@ -84,7 +84,7 @@ struct {
int enabled = 0;
-int has_cpu_filter = 0;
+const volatile int has_cpu_filter = 0;
__u64 from_timestamp = 0;
__u64 to_timestamp = 0;
diff --git a/tools/perf/util/bpf_skel/kwork_trace.bpf.c b/tools/perf/util/bpf_skel/kwork_trace.bpf.c
index 063c124e0999..cbd79bc4b330 100644
--- a/tools/perf/util/bpf_skel/kwork_trace.bpf.c
+++ b/tools/perf/util/bpf_skel/kwork_trace.bpf.c
@@ -68,8 +68,9 @@ struct {
} perf_kwork_name_filter SEC(".maps");
int enabled = 0;
-int has_cpu_filter = 0;
-int has_name_filter = 0;
+
+const volatile int has_cpu_filter = 0;
+const volatile int has_name_filter = 0;
static __always_inline int local_strncmp(const char *s1,
unsigned int sz, const char *s2)
diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
index d931a898c434..1069bda5d733 100644
--- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
+++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
@@ -117,21 +117,22 @@ struct mm_struct___new {
} __attribute__((preserve_access_index));
/* control flags */
-int enabled;
-int has_cpu;
-int has_task;
-int has_type;
-int has_addr;
-int has_cgroup;
-int needs_callstack;
-int stack_skip;
-int lock_owner;
-
-int use_cgroup_v2;
-int perf_subsys_id = -1;
+const volatile int has_cpu;
+const volatile int has_task;
+const volatile int has_type;
+const volatile int has_addr;
+const volatile int has_cgroup;
+const volatile int needs_callstack;
+const volatile int stack_skip;
+const volatile int lock_owner;
+const volatile int use_cgroup_v2;
/* determine the key of lock stat */
-int aggr_mode;
+const volatile int aggr_mode;
+
+int enabled;
+
+int perf_subsys_id = -1;
__u64 end_ts;
@@ -323,8 +324,7 @@ static inline struct tstamp_data *get_tstamp_elem(__u32 flags)
struct tstamp_data *pelem;
/* Use per-cpu array map for spinlock and rwlock */
- if (flags == (LCB_F_SPIN | LCB_F_READ) || flags == LCB_F_SPIN ||
- flags == (LCB_F_SPIN | LCB_F_WRITE)) {
+ if ((flags & (LCB_F_SPIN | LCB_F_MUTEX)) == LCB_F_SPIN) {
__u32 idx = 0;
pelem = bpf_map_lookup_elem(&tstamp_cpu, &idx);
@@ -439,11 +439,8 @@ int contention_end(u64 *ctx)
duration = bpf_ktime_get_ns() - pelem->timestamp;
if ((__s64)duration < 0) {
- pelem->lock = 0;
- if (need_delete)
- bpf_map_delete_elem(&tstamp, &pid);
__sync_fetch_and_add(&time_fail, 1);
- return 0;
+ goto out;
}
switch (aggr_mode) {
@@ -477,11 +474,8 @@ int contention_end(u64 *ctx)
data = bpf_map_lookup_elem(&lock_stat, &key);
if (!data) {
if (data_map_full) {
- pelem->lock = 0;
- if (need_delete)
- bpf_map_delete_elem(&tstamp, &pid);
__sync_fetch_and_add(&data_fail, 1);
- return 0;
+ goto out;
}
struct contention_data first = {
@@ -498,16 +492,20 @@ int contention_end(u64 *ctx)
err = bpf_map_update_elem(&lock_stat, &key, &first, BPF_NOEXIST);
if (err < 0) {
+ if (err == -EEXIST) {
+ /* it lost the race, try to get it again */
+ data = bpf_map_lookup_elem(&lock_stat, &key);
+ if (data != NULL)
+ goto found;
+ }
if (err == -E2BIG)
data_map_full = 1;
__sync_fetch_and_add(&data_fail, 1);
}
- pelem->lock = 0;
- if (need_delete)
- bpf_map_delete_elem(&tstamp, &pid);
- return 0;
+ goto out;
}
+found:
__sync_fetch_and_add(&data->total_time, duration);
__sync_fetch_and_add(&data->count, 1);
@@ -517,6 +515,7 @@ int contention_end(u64 *ctx)
if (data->min_time > duration)
data->min_time = duration;
+out:
pelem->lock = 0;
if (need_delete)
bpf_map_delete_elem(&tstamp, &pid);
diff --git a/tools/perf/util/bpf_skel/lock_data.h b/tools/perf/util/bpf_skel/lock_data.h
index 36af11faad03..de12892f992f 100644
--- a/tools/perf/util/bpf_skel/lock_data.h
+++ b/tools/perf/util/bpf_skel/lock_data.h
@@ -7,11 +7,11 @@ struct tstamp_data {
u64 timestamp;
u64 lock;
u32 flags;
- u32 stack_id;
+ s32 stack_id;
};
struct contention_key {
- u32 stack_id;
+ s32 stack_id;
u32 pid;
u64 lock_addr_or_cgroup;
};
diff --git a/tools/perf/util/bpf_skel/off_cpu.bpf.c b/tools/perf/util/bpf_skel/off_cpu.bpf.c
index d877a0a9731f..c152116df72f 100644
--- a/tools/perf/util/bpf_skel/off_cpu.bpf.c
+++ b/tools/perf/util/bpf_skel/off_cpu.bpf.c
@@ -85,10 +85,11 @@ struct task_struct___old {
} __attribute__((preserve_access_index));
int enabled = 0;
-int has_cpu = 0;
-int has_task = 0;
-int has_cgroup = 0;
-int uses_tgid = 0;
+
+const volatile int has_cpu = 0;
+const volatile int has_task = 0;
+const volatile int has_cgroup = 0;
+const volatile int uses_tgid = 0;
const volatile bool has_prev_state = false;
const volatile bool needs_cgroup = false;
diff --git a/tools/perf/util/bpf_skel/sample-filter.h b/tools/perf/util/bpf_skel/sample-filter.h
index 350efa121026..683fec85e71e 100644
--- a/tools/perf/util/bpf_skel/sample-filter.h
+++ b/tools/perf/util/bpf_skel/sample-filter.h
@@ -1,7 +1,9 @@
#ifndef PERF_UTIL_BPF_SKEL_SAMPLE_FILTER_H
#define PERF_UTIL_BPF_SKEL_SAMPLE_FILTER_H
-#define MAX_FILTERS 64
+#define MAX_FILTERS 64
+#define MAX_IDX_HASH (16 * 1024)
+#define MAX_EVT_HASH (1024 * 1024)
/* supported filter operations */
enum perf_bpf_filter_op {
@@ -14,6 +16,7 @@ enum perf_bpf_filter_op {
PBF_OP_AND,
PBF_OP_GROUP_BEGIN,
PBF_OP_GROUP_END,
+ PBF_OP_DONE,
};
enum perf_bpf_filter_term {
@@ -42,7 +45,7 @@ enum perf_bpf_filter_term {
__PBF_UNUSED_TERM18 = PBF_TERM_SAMPLE_START + 18, /* SAMPLE_REGS_INTR = 1U << 18 */
PBF_TERM_PHYS_ADDR = PBF_TERM_SAMPLE_START + 19, /* SAMPLE_PHYS_ADDR = 1U << 19 */
__PBF_UNUSED_TERM20 = PBF_TERM_SAMPLE_START + 20, /* SAMPLE_AUX = 1U << 20 */
- __PBF_UNUSED_TERM21 = PBF_TERM_SAMPLE_START + 21, /* SAMPLE_CGROUP = 1U << 21 */
+ PBF_TERM_CGROUP = PBF_TERM_SAMPLE_START + 21, /* SAMPLE_CGROUP = 1U << 21 */
PBF_TERM_DATA_PAGE_SIZE = PBF_TERM_SAMPLE_START + 22, /* SAMPLE_DATA_PAGE_SIZE = 1U << 22 */
PBF_TERM_CODE_PAGE_SIZE = PBF_TERM_SAMPLE_START + 23, /* SAMPLE_CODE_PAGE_SIZE = 1U << 23 */
PBF_TERM_WEIGHT_STRUCT = PBF_TERM_SAMPLE_START + 24, /* SAMPLE_WEIGHT_STRUCT = 1U << 24 */
@@ -60,4 +63,10 @@ struct perf_bpf_filter_entry {
__u64 value;
};
+struct idx_hash_key {
+ __u64 evt_id;
+ __u32 tgid;
+ __u32 reserved;
+};
+
#endif /* PERF_UTIL_BPF_SKEL_SAMPLE_FILTER_H */
diff --git a/tools/perf/util/bpf_skel/sample_filter.bpf.c b/tools/perf/util/bpf_skel/sample_filter.bpf.c
index f59985101973..b195e6efeb8b 100644
--- a/tools/perf/util/bpf_skel/sample_filter.bpf.c
+++ b/tools/perf/util/bpf_skel/sample_filter.bpf.c
@@ -9,13 +9,41 @@
/* BPF map that will be filled by user space */
struct filters {
- __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(type, BPF_MAP_TYPE_HASH);
__type(key, int);
- __type(value, struct perf_bpf_filter_entry);
- __uint(max_entries, MAX_FILTERS);
+ __type(value, struct perf_bpf_filter_entry[MAX_FILTERS]);
+ __uint(max_entries, 1);
} filters SEC(".maps");
-int dropped;
+/*
+ * An evsel has multiple instances for each CPU or task but we need a single
+ * id to be used as a key for the idx_hash. This hashmap would translate the
+ * instance's ID to a representative ID.
+ */
+struct event_hash {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u64);
+ __type(value, __u64);
+ __uint(max_entries, 1);
+} event_hash SEC(".maps");
+
+/* tgid/evtid to filter index */
+struct idx_hash {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, struct idx_hash_key);
+ __type(value, int);
+ __uint(max_entries, 1);
+} idx_hash SEC(".maps");
+
+/* tgid to filter index */
+struct lost_count {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 1);
+} dropped SEC(".maps");
+
+volatile const int use_idx_hash;
void *bpf_cast_to_kern_ctx(void *) __ksym;
@@ -65,6 +93,7 @@ static inline __u64 perf_get_sample(struct bpf_perf_event_data_kern *kctx,
BUILD_CHECK_SAMPLE(DATA_SRC);
BUILD_CHECK_SAMPLE(TRANSACTION);
BUILD_CHECK_SAMPLE(PHYS_ADDR);
+ BUILD_CHECK_SAMPLE(CGROUP);
BUILD_CHECK_SAMPLE(DATA_PAGE_SIZE);
BUILD_CHECK_SAMPLE(CODE_PAGE_SIZE);
BUILD_CHECK_SAMPLE(WEIGHT_STRUCT);
@@ -107,6 +136,8 @@ static inline __u64 perf_get_sample(struct bpf_perf_event_data_kern *kctx,
return kctx->data->weight.full;
case PBF_TERM_PHYS_ADDR:
return kctx->data->phys_addr;
+ case PBF_TERM_CGROUP:
+ return kctx->data->cgroup;
case PBF_TERM_CODE_PAGE_SIZE:
return kctx->data->code_page_size;
case PBF_TERM_DATA_PAGE_SIZE:
@@ -155,7 +186,6 @@ static inline __u64 perf_get_sample(struct bpf_perf_event_data_kern *kctx,
case __PBF_UNUSED_TERM16:
case __PBF_UNUSED_TERM18:
case __PBF_UNUSED_TERM20:
- case __PBF_UNUSED_TERM21:
default:
break;
}
@@ -179,39 +209,66 @@ int perf_sample_filter(void *ctx)
__u64 sample_data;
int in_group = 0;
int group_result = 0;
- int i;
+ int i, k;
+ int *losts;
kctx = bpf_cast_to_kern_ctx(ctx);
- for (i = 0; i < MAX_FILTERS; i++) {
- int key = i; /* needed for verifier :( */
+ k = 0;
- entry = bpf_map_lookup_elem(&filters, &key);
- if (entry == NULL)
- break;
- sample_data = perf_get_sample(kctx, entry);
+ if (use_idx_hash) {
+ struct idx_hash_key key = {
+ .tgid = bpf_get_current_pid_tgid() >> 32,
+ };
+ __u64 eid = kctx->event->id;
+ __u64 *key_id;
+ int *idx;
+
+ /* get primary_event_id */
+ if (kctx->event->parent)
+ eid = kctx->event->parent->id;
- switch (entry->op) {
+ key_id = bpf_map_lookup_elem(&event_hash, &eid);
+ if (key_id == NULL)
+ goto drop;
+
+ key.evt_id = *key_id;
+
+ idx = bpf_map_lookup_elem(&idx_hash, &key);
+ if (idx)
+ k = *idx;
+ else
+ goto drop;
+ }
+
+ entry = bpf_map_lookup_elem(&filters, &k);
+ if (entry == NULL)
+ goto drop;
+
+ for (i = 0; i < MAX_FILTERS; i++) {
+ sample_data = perf_get_sample(kctx, &entry[i]);
+
+ switch (entry[i].op) {
case PBF_OP_EQ:
- CHECK_RESULT(sample_data, ==, entry->value)
+ CHECK_RESULT(sample_data, ==, entry[i].value)
break;
case PBF_OP_NEQ:
- CHECK_RESULT(sample_data, !=, entry->value)
+ CHECK_RESULT(sample_data, !=, entry[i].value)
break;
case PBF_OP_GT:
- CHECK_RESULT(sample_data, >, entry->value)
+ CHECK_RESULT(sample_data, >, entry[i].value)
break;
case PBF_OP_GE:
- CHECK_RESULT(sample_data, >=, entry->value)
+ CHECK_RESULT(sample_data, >=, entry[i].value)
break;
case PBF_OP_LT:
- CHECK_RESULT(sample_data, <, entry->value)
+ CHECK_RESULT(sample_data, <, entry[i].value)
break;
case PBF_OP_LE:
- CHECK_RESULT(sample_data, <=, entry->value)
+ CHECK_RESULT(sample_data, <=, entry[i].value)
break;
case PBF_OP_AND:
- CHECK_RESULT(sample_data, &, entry->value)
+ CHECK_RESULT(sample_data, &, entry[i].value)
break;
case PBF_OP_GROUP_BEGIN:
in_group = 1;
@@ -222,13 +279,19 @@ int perf_sample_filter(void *ctx)
goto drop;
in_group = 0;
break;
+ case PBF_OP_DONE:
+ /* no failures so far, accept it */
+ return 1;
}
}
/* generate sample data */
return 1;
drop:
- __sync_fetch_and_add(&dropped, 1);
+ losts = bpf_map_lookup_elem(&dropped, &k);
+ if (losts != NULL)
+ __sync_fetch_and_add(losts, 1);
+
return 0;
}
diff --git a/tools/perf/util/bpf_skel/vmlinux/vmlinux.h b/tools/perf/util/bpf_skel/vmlinux/vmlinux.h
index e9028235d771..4dcad7b682bd 100644
--- a/tools/perf/util/bpf_skel/vmlinux/vmlinux.h
+++ b/tools/perf/util/bpf_skel/vmlinux/vmlinux.h
@@ -15,6 +15,7 @@
typedef __u8 u8;
typedef __u32 u32;
+typedef __s32 s32;
typedef __u64 u64;
typedef __s64 s64;
@@ -170,10 +171,16 @@ struct perf_sample_data {
u32 cpu;
} cpu_entry;
u64 phys_addr;
+ u64 cgroup;
u64 data_page_size;
u64 code_page_size;
} __attribute__((__aligned__(64))) __attribute__((preserve_access_index));
+struct perf_event {
+ struct perf_event *parent;
+ u64 id;
+} __attribute__((preserve_access_index));
+
struct bpf_perf_event_data_kern {
struct perf_sample_data *data;
struct perf_event *event;
diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h
index 87704d713ff6..b80c12c74bbb 100644
--- a/tools/perf/util/branch.h
+++ b/tools/perf/util/branch.h
@@ -34,6 +34,7 @@ struct branch_info {
struct addr_map_symbol from;
struct addr_map_symbol to;
struct branch_flags flags;
+ u64 branch_stack_cntr;
char *srcline_from;
char *srcline_to;
};
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 83a1581e8cf1..8982f68e7230 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -42,7 +42,7 @@
static bool no_buildid_cache;
-int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused,
+int build_id__mark_dso_hit(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct evsel *evsel __maybe_unused,
@@ -67,38 +67,6 @@ int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused,
return 0;
}
-static int perf_event__exit_del_thread(struct perf_tool *tool __maybe_unused,
- union perf_event *event,
- struct perf_sample *sample
- __maybe_unused,
- struct machine *machine)
-{
- struct thread *thread = machine__findnew_thread(machine,
- event->fork.pid,
- event->fork.tid);
-
- dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid,
- event->fork.ppid, event->fork.ptid);
-
- if (thread) {
- machine__remove_thread(machine, thread);
- thread__put(thread);
- }
-
- return 0;
-}
-
-struct perf_tool build_id__mark_dso_hit_ops = {
- .sample = build_id__mark_dso_hit,
- .mmap = perf_event__process_mmap,
- .mmap2 = perf_event__process_mmap2,
- .fork = perf_event__process_fork,
- .exit = perf_event__exit_del_thread,
- .attr = perf_event__process_attr,
- .build_id = perf_event__process_build_id,
- .ordered_events = true,
-};
-
int build_id__sprintf(const struct build_id *build_id, char *bf)
{
char *bid = bf;
@@ -309,8 +277,8 @@ static int write_buildid(const char *name, size_t name_len, struct build_id *bid
struct perf_record_header_build_id b;
size_t len;
- len = name_len + 1;
- len = PERF_ALIGN(len, NAME_ALIGN);
+ len = sizeof(b) + name_len + 1;
+ len = PERF_ALIGN(len, sizeof(u64));
memset(&b, 0, sizeof(b));
memcpy(&b.data, bid->data, bid->size);
@@ -318,7 +286,7 @@ static int write_buildid(const char *name, size_t name_len, struct build_id *bid
misc |= PERF_RECORD_MISC_BUILD_ID_SIZE;
b.pid = pid;
b.header.misc = misc;
- b.header.size = sizeof(b) + len;
+ b.header.size = len;
err = do_write(fd, &b, sizeof(b));
if (err < 0)
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h
index 3fa8bffb07ca..a212497bfdb0 100644
--- a/tools/perf/util/build-id.h
+++ b/tools/perf/util/build-id.h
@@ -16,11 +16,9 @@ struct build_id {
size_t size;
};
-struct nsinfo;
-
-extern struct perf_tool build_id__mark_dso_hit_ops;
struct dso;
struct feat_fd;
+struct nsinfo;
void build_id__init(struct build_id *bid, const u8 *data, size_t size);
int build_id__sprintf(const struct build_id *build_id, char *bf);
@@ -35,11 +33,11 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size,
char *__dso__build_id_filename(const struct dso *dso, char *bf, size_t size,
bool is_debug, bool is_kallsyms);
-int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event,
+int build_id__mark_dso_hit(const struct perf_tool *tool, union perf_event *event,
struct perf_sample *sample, struct evsel *evsel,
struct machine *machine);
-int perf_event__inject_buildid(struct perf_tool *tool, union perf_event *event,
+int perf_event__inject_buildid(const struct perf_tool *tool, union perf_event *event,
struct perf_sample *sample, struct evsel *evsel,
struct machine *machine);
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 1730b852a947..0c7564747a14 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -1141,7 +1141,7 @@ int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *samp
int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node,
bool hide_unresolved)
{
- struct machine *machine = maps__machine(node->ms.maps);
+ struct machine *machine = node->ms.maps ? maps__machine(node->ms.maps) : NULL;
maps__put(al->maps);
al->maps = maps__get(node->ms.maps);
@@ -1797,3 +1797,38 @@ s64 callchain_avg_cycles(struct callchain_node *cnode)
return cycles;
}
+
+int sample__for_each_callchain_node(struct thread *thread, struct evsel *evsel,
+ struct perf_sample *sample, int max_stack,
+ bool symbols, callchain_iter_fn cb, void *data)
+{
+ struct callchain_cursor *cursor = get_tls_callchain_cursor();
+ int ret;
+
+ if (!cursor)
+ return -ENOMEM;
+
+ /* Fill in the callchain. */
+ ret = __thread__resolve_callchain(thread, cursor, evsel, sample,
+ /*parent=*/NULL, /*root_al=*/NULL,
+ max_stack, symbols);
+ if (ret)
+ return ret;
+
+ /* Switch from writing the callchain to reading it. */
+ callchain_cursor_commit(cursor);
+
+ while (1) {
+ struct callchain_cursor_node *node = callchain_cursor_current(cursor);
+
+ if (!node)
+ break;
+
+ ret = cb(node, data);
+ if (ret)
+ return ret;
+
+ callchain_cursor_advance(cursor);
+ }
+ return 0;
+}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index d5c66345ae31..86ed9e4d04f9 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -311,4 +311,10 @@ u64 callchain_total_hits(struct hists *hists);
s64 callchain_avg_cycles(struct callchain_node *cnode);
+typedef int (*callchain_iter_fn)(struct callchain_cursor_node *node, void *data);
+
+int sample__for_each_callchain_node(struct thread *thread, struct evsel *evsel,
+ struct perf_sample *sample, int max_stack,
+ bool symbols, callchain_iter_fn cb, void *data);
+
#endif /* __PERF_CALLCHAIN_H */
diff --git a/tools/perf/util/cap.c b/tools/perf/util/cap.c
index c3ba841bbf37..7574a67651bc 100644
--- a/tools/perf/util/cap.c
+++ b/tools/perf/util/cap.c
@@ -3,27 +3,52 @@
* Capability utilities
*/
-#ifdef HAVE_LIBCAP_SUPPORT
-
#include "cap.h"
-#include <stdbool.h>
-#include <sys/capability.h>
-
-bool perf_cap__capable(cap_value_t cap)
-{
- cap_flag_value_t val;
- cap_t caps = cap_get_proc();
+#include "debug.h"
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+#include <linux/capability.h>
+#include <sys/syscall.h>
- if (!caps)
- return false;
+#ifndef SYS_capget
+#define SYS_capget 90
+#endif
- if (cap_get_flag(caps, cap, CAP_EFFECTIVE, &val) != 0)
- val = CAP_CLEAR;
+#define MAX_LINUX_CAPABILITY_U32S _LINUX_CAPABILITY_U32S_3
- if (cap_free(caps) != 0)
- return false;
-
- return val == CAP_SET;
+bool perf_cap__capable(int cap, bool *used_root)
+{
+ struct __user_cap_header_struct header = {
+ .version = _LINUX_CAPABILITY_VERSION_3,
+ .pid = getpid(),
+ };
+ struct __user_cap_data_struct data[MAX_LINUX_CAPABILITY_U32S];
+ __u32 cap_val;
+
+ *used_root = false;
+ while (syscall(SYS_capget, &header, &data[0]) == -1) {
+ /* Retry, first attempt has set the header.version correctly. */
+ if (errno == EINVAL && header.version != _LINUX_CAPABILITY_VERSION_3 &&
+ header.version == _LINUX_CAPABILITY_VERSION_1)
+ continue;
+
+ pr_debug2("capget syscall failed (%s - %d) fall back on root check\n",
+ strerror(errno), errno);
+ *used_root = true;
+ return geteuid() == 0;
+ }
+
+ /* Extract the relevant capability bit. */
+ if (cap >= 32) {
+ if (header.version == _LINUX_CAPABILITY_VERSION_3) {
+ cap_val = data[1].effective;
+ } else {
+ /* Capability beyond 32 is requested but only 32 are supported. */
+ return false;
+ }
+ } else {
+ cap_val = data[0].effective;
+ }
+ return (cap_val & (1 << (cap & 0x1f))) != 0;
}
-
-#endif /* HAVE_LIBCAP_SUPPORT */
diff --git a/tools/perf/util/cap.h b/tools/perf/util/cap.h
index ae52878c0b2e..0c6a1ff55f07 100644
--- a/tools/perf/util/cap.h
+++ b/tools/perf/util/cap.h
@@ -3,26 +3,6 @@
#define __PERF_CAP_H
#include <stdbool.h>
-#include <linux/capability.h>
-#include <linux/compiler.h>
-
-#ifdef HAVE_LIBCAP_SUPPORT
-
-#include <sys/capability.h>
-
-bool perf_cap__capable(cap_value_t cap);
-
-#else
-
-#include <unistd.h>
-#include <sys/types.h>
-
-static inline bool perf_cap__capable(int cap __maybe_unused)
-{
- return geteuid() == 0;
-}
-
-#endif /* HAVE_LIBCAP_SUPPORT */
/* For older systems */
#ifndef CAP_SYSLOG
@@ -33,4 +13,7 @@ static inline bool perf_cap__capable(int cap __maybe_unused)
#define CAP_PERFMON 38
#endif
+/* Query if a capability is supported, used_root is set if the fallback root check was used. */
+bool perf_cap__capable(int cap, bool *used_root);
+
#endif /* __PERF_CAP_H */
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index e917985bbbe6..b78ef0262135 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -41,7 +41,7 @@ const u32 INSTR_PER_NS = 10;
struct cs_etm_decoder {
void *data;
- void (*packet_printer)(const char *msg);
+ void (*packet_printer)(const char *msg, void *data);
bool suppress_printing;
dcd_tree_handle_t dcd_tree;
cs_etm_mem_cb_type mem_access;
@@ -202,7 +202,7 @@ static void cs_etm_decoder__print_str_cb(const void *p_context,
const struct cs_etm_decoder *decoder = p_context;
if (p_context && str_len && !decoder->suppress_printing)
- decoder->packet_printer(msg);
+ decoder->packet_printer(msg, decoder->data);
}
static int
@@ -388,7 +388,8 @@ cs_etm_decoder__reset_timestamp(struct cs_etm_packet_queue *packet_queue)
}
static ocsd_datapath_resp_t
-cs_etm_decoder__buffer_packet(struct cs_etm_packet_queue *packet_queue,
+cs_etm_decoder__buffer_packet(struct cs_etm_queue *etmq,
+ struct cs_etm_packet_queue *packet_queue,
const u8 trace_chan_id,
enum cs_etm_sample_type sample_type)
{
@@ -398,7 +399,7 @@ cs_etm_decoder__buffer_packet(struct cs_etm_packet_queue *packet_queue,
if (packet_queue->packet_count >= CS_ETM_PACKET_MAX_BUFFER - 1)
return OCSD_RESP_FATAL_SYS_ERR;
- if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0)
+ if (cs_etm__get_cpu(etmq, trace_chan_id, &cpu) < 0)
return OCSD_RESP_FATAL_SYS_ERR;
et = packet_queue->tail;
@@ -436,7 +437,7 @@ cs_etm_decoder__buffer_range(struct cs_etm_queue *etmq,
int ret = 0;
struct cs_etm_packet *packet;
- ret = cs_etm_decoder__buffer_packet(packet_queue, trace_chan_id,
+ ret = cs_etm_decoder__buffer_packet(etmq, packet_queue, trace_chan_id,
CS_ETM_RANGE);
if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT)
return ret;
@@ -496,7 +497,8 @@ out:
}
static ocsd_datapath_resp_t
-cs_etm_decoder__buffer_discontinuity(struct cs_etm_packet_queue *queue,
+cs_etm_decoder__buffer_discontinuity(struct cs_etm_queue *etmq,
+ struct cs_etm_packet_queue *queue,
const uint8_t trace_chan_id)
{
/*
@@ -504,18 +506,19 @@ cs_etm_decoder__buffer_discontinuity(struct cs_etm_packet_queue *queue,
* reset time statistics.
*/
cs_etm_decoder__reset_timestamp(queue);
- return cs_etm_decoder__buffer_packet(queue, trace_chan_id,
+ return cs_etm_decoder__buffer_packet(etmq, queue, trace_chan_id,
CS_ETM_DISCONTINUITY);
}
static ocsd_datapath_resp_t
-cs_etm_decoder__buffer_exception(struct cs_etm_packet_queue *queue,
+cs_etm_decoder__buffer_exception(struct cs_etm_queue *etmq,
+ struct cs_etm_packet_queue *queue,
const ocsd_generic_trace_elem *elem,
const uint8_t trace_chan_id)
{ int ret = 0;
struct cs_etm_packet *packet;
- ret = cs_etm_decoder__buffer_packet(queue, trace_chan_id,
+ ret = cs_etm_decoder__buffer_packet(etmq, queue, trace_chan_id,
CS_ETM_EXCEPTION);
if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT)
return ret;
@@ -527,10 +530,11 @@ cs_etm_decoder__buffer_exception(struct cs_etm_packet_queue *queue,
}
static ocsd_datapath_resp_t
-cs_etm_decoder__buffer_exception_ret(struct cs_etm_packet_queue *queue,
+cs_etm_decoder__buffer_exception_ret(struct cs_etm_queue *etmq,
+ struct cs_etm_packet_queue *queue,
const uint8_t trace_chan_id)
{
- return cs_etm_decoder__buffer_packet(queue, trace_chan_id,
+ return cs_etm_decoder__buffer_packet(etmq, queue, trace_chan_id,
CS_ETM_EXCEPTION_RET);
}
@@ -599,7 +603,7 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
case OCSD_GEN_TRC_ELEM_EO_TRACE:
case OCSD_GEN_TRC_ELEM_NO_SYNC:
case OCSD_GEN_TRC_ELEM_TRACE_ON:
- resp = cs_etm_decoder__buffer_discontinuity(packet_queue,
+ resp = cs_etm_decoder__buffer_discontinuity(etmq, packet_queue,
trace_chan_id);
break;
case OCSD_GEN_TRC_ELEM_INSTR_RANGE:
@@ -607,11 +611,11 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
trace_chan_id);
break;
case OCSD_GEN_TRC_ELEM_EXCEPTION:
- resp = cs_etm_decoder__buffer_exception(packet_queue, elem,
+ resp = cs_etm_decoder__buffer_exception(etmq, packet_queue, elem,
trace_chan_id);
break;
case OCSD_GEN_TRC_ELEM_EXCEPTION_RET:
- resp = cs_etm_decoder__buffer_exception_ret(packet_queue,
+ resp = cs_etm_decoder__buffer_exception_ret(etmq, packet_queue,
trace_chan_id);
break;
case OCSD_GEN_TRC_ELEM_TIMESTAMP:
@@ -680,10 +684,6 @@ cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params,
return -1;
}
- /* if the CPU has no trace ID associated, no decoder needed */
- if (csid == CORESIGHT_TRACE_ID_UNUSED_VAL)
- return 0;
-
if (d_params->operation == CS_ETM_OPERATION_DECODE) {
if (ocsd_dt_create_decoder(decoder->dcd_tree,
decoder->decoder_name,
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
index 272c2efe78ee..12c782fa6db2 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
@@ -60,7 +60,7 @@ struct cs_etm_trace_params {
struct cs_etm_decoder_params {
int operation;
- void (*packet_printer)(const char *msg);
+ void (*packet_printer)(const char *msg, void *data);
cs_etm_mem_cb_type mem_acc_cb;
bool formatted;
bool fsyncs;
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 5e9fbcfad7d4..40f047baef81 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -97,28 +97,43 @@ struct cs_etm_traceid_queue {
struct cs_etm_packet_queue packet_queue;
};
+enum cs_etm_format {
+ UNSET,
+ FORMATTED,
+ UNFORMATTED
+};
+
struct cs_etm_queue {
struct cs_etm_auxtrace *etm;
struct cs_etm_decoder *decoder;
struct auxtrace_buffer *buffer;
unsigned int queue_nr;
u8 pending_timestamp_chan_id;
+ enum cs_etm_format format;
u64 offset;
const unsigned char *buf;
size_t buf_len, buf_used;
/* Conversion between traceID and index in traceid_queues array */
struct intlist *traceid_queues_list;
struct cs_etm_traceid_queue **traceid_queues;
+ /* Conversion between traceID and metadata pointers */
+ struct intlist *traceid_list;
+ /*
+ * Same as traceid_list, but traceid_list may be a reference to another
+ * queue's which has a matching sink ID.
+ */
+ struct intlist *own_traceid_list;
+ u32 sink_id;
};
-/* RB tree for quick conversion between traceID and metadata pointers */
-static struct intlist *traceid_list;
-
static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm);
static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
pid_t tid);
static int cs_etm__get_data_block(struct cs_etm_queue *etmq);
static int cs_etm__decode_data_block(struct cs_etm_queue *etmq);
+static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata);
+static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu);
+static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata);
/* PTMs ETMIDR [11:8] set to b0011 */
#define ETMIDR_PTM_VERSION 0x00000300
@@ -133,6 +148,7 @@ static int cs_etm__decode_data_block(struct cs_etm_queue *etmq);
(queue_nr << 16 | trace_chan_id)
#define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
#define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
+#define SINK_UNSET ((u32) -1)
static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
{
@@ -144,12 +160,12 @@ static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
return CS_ETM_PROTO_ETMV3;
}
-static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic)
+static int cs_etm__get_magic(struct cs_etm_queue *etmq, u8 trace_chan_id, u64 *magic)
{
struct int_node *inode;
u64 *metadata;
- inode = intlist__find(traceid_list, trace_chan_id);
+ inode = intlist__find(etmq->traceid_list, trace_chan_id);
if (!inode)
return -EINVAL;
@@ -158,12 +174,12 @@ static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic)
return 0;
}
-int cs_etm__get_cpu(u8 trace_chan_id, int *cpu)
+int cs_etm__get_cpu(struct cs_etm_queue *etmq, u8 trace_chan_id, int *cpu)
{
struct int_node *inode;
u64 *metadata;
- inode = intlist__find(traceid_list, trace_chan_id);
+ inode = intlist__find(etmq->traceid_list, trace_chan_id);
if (!inode)
return -EINVAL;
@@ -215,26 +231,171 @@ enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq)
return etmq->etm->pid_fmt;
}
-static int cs_etm__map_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
+static int cs_etm__insert_trace_id_node(struct cs_etm_queue *etmq,
+ u8 trace_chan_id, u64 *cpu_metadata)
{
- struct int_node *inode;
-
/* Get an RB node for this CPU */
- inode = intlist__findnew(traceid_list, trace_chan_id);
+ struct int_node *inode = intlist__findnew(etmq->traceid_list, trace_chan_id);
/* Something went wrong, no need to continue */
if (!inode)
return -ENOMEM;
+ /* Disallow re-mapping a different traceID to metadata pair. */
+ if (inode->priv) {
+ u64 *curr_cpu_data = inode->priv;
+ u8 curr_chan_id;
+ int err;
+
+ if (curr_cpu_data[CS_ETM_CPU] != cpu_metadata[CS_ETM_CPU]) {
+ /*
+ * With > CORESIGHT_TRACE_IDS_MAX ETMs, overlapping IDs
+ * are expected (but not supported) in per-thread mode,
+ * rather than signifying an error.
+ */
+ if (etmq->etm->per_thread_decoding)
+ pr_err("CS_ETM: overlapping Trace IDs aren't currently supported in per-thread mode\n");
+ else
+ pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
+
+ return -EINVAL;
+ }
+
+ /* check that the mapped ID matches */
+ err = cs_etm__metadata_get_trace_id(&curr_chan_id, curr_cpu_data);
+ if (err)
+ return err;
+
+ if (curr_chan_id != trace_chan_id) {
+ pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n");
+ return -EINVAL;
+ }
+
+ /* Skip re-adding the same mappings if everything matched */
+ return 0;
+ }
+
+ /* Not one we've seen before, associate the traceID with the metadata pointer */
+ inode->priv = cpu_metadata;
+
+ return 0;
+}
+
+static struct cs_etm_queue *cs_etm__get_queue(struct cs_etm_auxtrace *etm, int cpu)
+{
+ if (etm->per_thread_decoding)
+ return etm->queues.queue_array[0].priv;
+ else
+ return etm->queues.queue_array[cpu].priv;
+}
+
+static int cs_etm__map_trace_id_v0(struct cs_etm_auxtrace *etm, u8 trace_chan_id,
+ u64 *cpu_metadata)
+{
+ struct cs_etm_queue *etmq;
+
+ /*
+ * If the queue is unformatted then only save one mapping in the
+ * queue associated with that CPU so only one decoder is made.
+ */
+ etmq = cs_etm__get_queue(etm, cpu_metadata[CS_ETM_CPU]);
+ if (etmq->format == UNFORMATTED)
+ return cs_etm__insert_trace_id_node(etmq, trace_chan_id,
+ cpu_metadata);
+
/*
- * The node for that CPU should not be taken.
- * Back out if that's the case.
+ * Otherwise, version 0 trace IDs are global so save them into every
+ * queue.
*/
- if (inode->priv)
+ for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) {
+ int ret;
+
+ etmq = etm->queues.queue_array[i].priv;
+ ret = cs_etm__insert_trace_id_node(etmq, trace_chan_id,
+ cpu_metadata);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int cs_etm__process_trace_id_v0(struct cs_etm_auxtrace *etm, int cpu,
+ u64 hw_id)
+{
+ int err;
+ u64 *cpu_data;
+ u8 trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
+
+ cpu_data = get_cpu_data(etm, cpu);
+ if (cpu_data == NULL)
return -EINVAL;
- /* All good, associate the traceID with the metadata pointer */
- inode->priv = cpu_metadata;
+ err = cs_etm__map_trace_id_v0(etm, trace_chan_id, cpu_data);
+ if (err)
+ return err;
+
+ /*
+ * if we are picking up the association from the packet, need to plug
+ * the correct trace ID into the metadata for setting up decoders later.
+ */
+ return cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
+}
+
+static int cs_etm__process_trace_id_v0_1(struct cs_etm_auxtrace *etm, int cpu,
+ u64 hw_id)
+{
+ struct cs_etm_queue *etmq = cs_etm__get_queue(etm, cpu);
+ int ret;
+ u64 *cpu_data;
+ u32 sink_id = FIELD_GET(CS_AUX_HW_ID_SINK_ID_MASK, hw_id);
+ u8 trace_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
+
+ /*
+ * Check sink id hasn't changed in per-cpu mode. In per-thread mode,
+ * let it pass for now until an actual overlapping trace ID is hit. In
+ * most cases IDs won't overlap even if the sink changes.
+ */
+ if (!etmq->etm->per_thread_decoding && etmq->sink_id != SINK_UNSET &&
+ etmq->sink_id != sink_id) {
+ pr_err("CS_ETM: mismatch between sink IDs\n");
+ return -EINVAL;
+ }
+
+ etmq->sink_id = sink_id;
+
+ /* Find which other queues use this sink and link their ID maps */
+ for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) {
+ struct cs_etm_queue *other_etmq = etm->queues.queue_array[i].priv;
+
+ /* Different sinks, skip */
+ if (other_etmq->sink_id != etmq->sink_id)
+ continue;
+
+ /* Already linked, skip */
+ if (other_etmq->traceid_list == etmq->traceid_list)
+ continue;
+
+ /* At the point of first linking, this one should be empty */
+ if (!intlist__empty(etmq->traceid_list)) {
+ pr_err("CS_ETM: Can't link populated trace ID lists\n");
+ return -EINVAL;
+ }
+
+ etmq->own_traceid_list = NULL;
+ intlist__delete(etmq->traceid_list);
+ etmq->traceid_list = other_etmq->traceid_list;
+ break;
+ }
+
+ cpu_data = get_cpu_data(etm, cpu);
+ ret = cs_etm__insert_trace_id_node(etmq, trace_id, cpu_data);
+ if (ret)
+ return ret;
+
+ ret = cs_etm__metadata_set_trace_id(trace_id, cpu_data);
+ if (ret)
+ return ret;
return 0;
}
@@ -261,7 +422,6 @@ static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata)
/*
* update metadata trace ID from the value found in the AUX_HW_INFO packet.
- * This will also clear the CORESIGHT_TRACE_ID_UNUSED_FLAG flag if present.
*/
static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
{
@@ -322,20 +482,16 @@ static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
{
struct cs_etm_auxtrace *etm;
struct perf_sample sample;
- struct int_node *inode;
struct evsel *evsel;
- u64 *cpu_data;
u64 hw_id;
int cpu, version, err;
- u8 trace_chan_id, curr_chan_id;
/* extract and parse the HW ID */
hw_id = event->aux_output_hw_id.hw_id;
- version = FIELD_GET(CS_AUX_HW_ID_VERSION_MASK, hw_id);
- trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
+ version = FIELD_GET(CS_AUX_HW_ID_MAJOR_VERSION_MASK, hw_id);
/* check that we can handle this version */
- if (version > CS_AUX_HW_ID_CURR_VERSION) {
+ if (version > CS_AUX_HW_ID_MAJOR_VERSION) {
pr_err("CS ETM Trace: PERF_RECORD_AUX_OUTPUT_HW_ID version %d not supported. Please update Perf.\n",
version);
return -EINVAL;
@@ -360,43 +516,10 @@ static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
return -EINVAL;
}
- /* See if the ID is mapped to a CPU, and it matches the current CPU */
- inode = intlist__find(traceid_list, trace_chan_id);
- if (inode) {
- cpu_data = inode->priv;
- if ((int)cpu_data[CS_ETM_CPU] != cpu) {
- pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
- return -EINVAL;
- }
-
- /* check that the mapped ID matches */
- err = cs_etm__metadata_get_trace_id(&curr_chan_id, cpu_data);
- if (err)
- return err;
- if (curr_chan_id != trace_chan_id) {
- pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n");
- return -EINVAL;
- }
-
- /* mapped and matched - return OK */
- return 0;
- }
-
- cpu_data = get_cpu_data(etm, cpu);
- if (cpu_data == NULL)
- return err;
-
- /* not one we've seen before - lets map it */
- err = cs_etm__map_trace_id(trace_chan_id, cpu_data);
- if (err)
- return err;
+ if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0)
+ return cs_etm__process_trace_id_v0(etm, cpu, hw_id);
- /*
- * if we are picking up the association from the packet, need to plug
- * the correct trace ID into the metadata for setting up decoders later.
- */
- err = cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
- return err;
+ return cs_etm__process_trace_id_v0_1(etm, cpu, hw_id);
}
void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
@@ -639,94 +762,79 @@ static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
}
}
-static void cs_etm__packet_dump(const char *pkt_string)
+static void cs_etm__packet_dump(const char *pkt_string, void *data)
{
const char *color = PERF_COLOR_BLUE;
int len = strlen(pkt_string);
+ struct cs_etm_queue *etmq = data;
+ char queue_nr[64];
+
+ if (verbose)
+ snprintf(queue_nr, sizeof(queue_nr), "Qnr:%d; ", etmq->queue_nr);
+ else
+ queue_nr[0] = '\0';
if (len && (pkt_string[len-1] == '\n'))
- color_fprintf(stdout, color, " %s", pkt_string);
+ color_fprintf(stdout, color, " %s%s", queue_nr, pkt_string);
else
- color_fprintf(stdout, color, " %s\n", pkt_string);
+ color_fprintf(stdout, color, " %s%s\n", queue_nr, pkt_string);
fflush(stdout);
}
static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params,
- struct cs_etm_auxtrace *etm, int t_idx,
- int m_idx, u32 etmidr)
+ u64 *metadata, u32 etmidr)
{
- u64 **metadata = etm->metadata;
-
- t_params[t_idx].protocol = cs_etm__get_v7_protocol_version(etmidr);
- t_params[t_idx].etmv3.reg_ctrl = metadata[m_idx][CS_ETM_ETMCR];
- t_params[t_idx].etmv3.reg_trc_id = metadata[m_idx][CS_ETM_ETMTRACEIDR];
+ t_params->protocol = cs_etm__get_v7_protocol_version(etmidr);
+ t_params->etmv3.reg_ctrl = metadata[CS_ETM_ETMCR];
+ t_params->etmv3.reg_trc_id = metadata[CS_ETM_ETMTRACEIDR];
}
static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
- struct cs_etm_auxtrace *etm, int t_idx,
- int m_idx)
+ u64 *metadata)
{
- u64 **metadata = etm->metadata;
-
- t_params[t_idx].protocol = CS_ETM_PROTO_ETMV4i;
- t_params[t_idx].etmv4.reg_idr0 = metadata[m_idx][CS_ETMV4_TRCIDR0];
- t_params[t_idx].etmv4.reg_idr1 = metadata[m_idx][CS_ETMV4_TRCIDR1];
- t_params[t_idx].etmv4.reg_idr2 = metadata[m_idx][CS_ETMV4_TRCIDR2];
- t_params[t_idx].etmv4.reg_idr8 = metadata[m_idx][CS_ETMV4_TRCIDR8];
- t_params[t_idx].etmv4.reg_configr = metadata[m_idx][CS_ETMV4_TRCCONFIGR];
- t_params[t_idx].etmv4.reg_traceidr = metadata[m_idx][CS_ETMV4_TRCTRACEIDR];
+ t_params->protocol = CS_ETM_PROTO_ETMV4i;
+ t_params->etmv4.reg_idr0 = metadata[CS_ETMV4_TRCIDR0];
+ t_params->etmv4.reg_idr1 = metadata[CS_ETMV4_TRCIDR1];
+ t_params->etmv4.reg_idr2 = metadata[CS_ETMV4_TRCIDR2];
+ t_params->etmv4.reg_idr8 = metadata[CS_ETMV4_TRCIDR8];
+ t_params->etmv4.reg_configr = metadata[CS_ETMV4_TRCCONFIGR];
+ t_params->etmv4.reg_traceidr = metadata[CS_ETMV4_TRCTRACEIDR];
}
static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
- struct cs_etm_auxtrace *etm, int t_idx,
- int m_idx)
+ u64 *metadata)
{
- u64 **metadata = etm->metadata;
-
- t_params[t_idx].protocol = CS_ETM_PROTO_ETE;
- t_params[t_idx].ete.reg_idr0 = metadata[m_idx][CS_ETE_TRCIDR0];
- t_params[t_idx].ete.reg_idr1 = metadata[m_idx][CS_ETE_TRCIDR1];
- t_params[t_idx].ete.reg_idr2 = metadata[m_idx][CS_ETE_TRCIDR2];
- t_params[t_idx].ete.reg_idr8 = metadata[m_idx][CS_ETE_TRCIDR8];
- t_params[t_idx].ete.reg_configr = metadata[m_idx][CS_ETE_TRCCONFIGR];
- t_params[t_idx].ete.reg_traceidr = metadata[m_idx][CS_ETE_TRCTRACEIDR];
- t_params[t_idx].ete.reg_devarch = metadata[m_idx][CS_ETE_TRCDEVARCH];
+ t_params->protocol = CS_ETM_PROTO_ETE;
+ t_params->ete.reg_idr0 = metadata[CS_ETE_TRCIDR0];
+ t_params->ete.reg_idr1 = metadata[CS_ETE_TRCIDR1];
+ t_params->ete.reg_idr2 = metadata[CS_ETE_TRCIDR2];
+ t_params->ete.reg_idr8 = metadata[CS_ETE_TRCIDR8];
+ t_params->ete.reg_configr = metadata[CS_ETE_TRCCONFIGR];
+ t_params->ete.reg_traceidr = metadata[CS_ETE_TRCTRACEIDR];
+ t_params->ete.reg_devarch = metadata[CS_ETE_TRCDEVARCH];
}
static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
- struct cs_etm_auxtrace *etm,
- bool formatted,
- int sample_cpu,
- int decoders)
-{
- int t_idx, m_idx;
- u32 etmidr;
- u64 architecture;
-
- for (t_idx = 0; t_idx < decoders; t_idx++) {
- if (formatted)
- m_idx = t_idx;
- else {
- m_idx = get_cpu_data_idx(etm, sample_cpu);
- if (m_idx == -1) {
- pr_warning("CS_ETM: unknown CPU, falling back to first metadata\n");
- m_idx = 0;
- }
- }
+ struct cs_etm_queue *etmq)
+{
+ struct int_node *inode;
- architecture = etm->metadata[m_idx][CS_ETM_MAGIC];
+ intlist__for_each_entry(inode, etmq->traceid_list) {
+ u64 *metadata = inode->priv;
+ u64 architecture = metadata[CS_ETM_MAGIC];
+ u32 etmidr;
switch (architecture) {
case __perf_cs_etmv3_magic:
- etmidr = etm->metadata[m_idx][CS_ETM_ETMIDR];
- cs_etm__set_trace_param_etmv3(t_params, etm, t_idx, m_idx, etmidr);
+ etmidr = metadata[CS_ETM_ETMIDR];
+ cs_etm__set_trace_param_etmv3(t_params++, metadata, etmidr);
break;
case __perf_cs_etmv4_magic:
- cs_etm__set_trace_param_etmv4(t_params, etm, t_idx, m_idx);
+ cs_etm__set_trace_param_etmv4(t_params++, metadata);
break;
case __perf_cs_ete_magic:
- cs_etm__set_trace_param_ete(t_params, etm, t_idx, m_idx);
+ cs_etm__set_trace_param_ete(t_params++, metadata);
break;
default:
return -EINVAL;
@@ -738,8 +846,7 @@ static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
struct cs_etm_queue *etmq,
- enum cs_etm_decoder_operation mode,
- bool formatted)
+ enum cs_etm_decoder_operation mode)
{
int ret = -EINVAL;
@@ -749,7 +856,7 @@ static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
d_params->packet_printer = cs_etm__packet_dump;
d_params->operation = mode;
d_params->data = etmq;
- d_params->formatted = formatted;
+ d_params->formatted = etmq->format == FORMATTED;
d_params->fsyncs = false;
d_params->hsyncs = false;
d_params->frame_aligned = true;
@@ -788,7 +895,7 @@ static void cs_etm__dump_event(struct cs_etm_queue *etmq,
}
static int cs_etm__flush_events(struct perf_session *session,
- struct perf_tool *tool)
+ const struct perf_tool *tool)
{
struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
struct cs_etm_auxtrace,
@@ -850,6 +957,7 @@ static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
static void cs_etm__free_queue(void *priv)
{
+ struct int_node *inode, *tmp;
struct cs_etm_queue *etmq = priv;
if (!etmq)
@@ -857,6 +965,16 @@ static void cs_etm__free_queue(void *priv)
cs_etm_decoder__free(etmq->decoder);
cs_etm__free_traceid_queues(etmq);
+
+ if (etmq->own_traceid_list) {
+ /* First remove all traceID/metadata nodes for the RB tree */
+ intlist__for_each_entry_safe(inode, tmp, etmq->own_traceid_list)
+ intlist__remove(etmq->own_traceid_list, inode);
+
+ /* Then the RB tree itself */
+ intlist__delete(etmq->own_traceid_list);
+ }
+
free(etmq);
}
@@ -879,19 +997,12 @@ static void cs_etm__free_events(struct perf_session *session)
static void cs_etm__free(struct perf_session *session)
{
int i;
- struct int_node *inode, *tmp;
struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
struct cs_etm_auxtrace,
auxtrace);
cs_etm__free_events(session);
session->auxtrace = NULL;
- /* First remove all traceID/metadata nodes for the RB tree */
- intlist__for_each_entry_safe(inode, tmp, traceid_list)
- intlist__remove(traceid_list, inode);
- /* Then the RB tree itself */
- intlist__delete(traceid_list);
-
for (i = 0; i < aux->num_cpu; i++)
zfree(&aux->metadata[i]);
@@ -1041,19 +1152,9 @@ out:
return ret;
}
-static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
- bool formatted, int sample_cpu)
+static struct cs_etm_queue *cs_etm__alloc_queue(void)
{
- struct cs_etm_decoder_params d_params;
- struct cs_etm_trace_params *t_params = NULL;
- struct cs_etm_queue *etmq;
- /*
- * Each queue can only contain data from one CPU when unformatted, so only one decoder is
- * needed.
- */
- int decoders = formatted ? etm->num_cpu : 1;
-
- etmq = zalloc(sizeof(*etmq));
+ struct cs_etm_queue *etmq = zalloc(sizeof(*etmq));
if (!etmq)
return NULL;
@@ -1061,42 +1162,17 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
if (!etmq->traceid_queues_list)
goto out_free;
- /* Use metadata to fill in trace parameters for trace decoder */
- t_params = zalloc(sizeof(*t_params) * decoders);
-
- if (!t_params)
- goto out_free;
-
- if (cs_etm__init_trace_params(t_params, etm, formatted, sample_cpu, decoders))
- goto out_free;
-
- /* Set decoder parameters to decode trace packets */
- if (cs_etm__init_decoder_params(&d_params, etmq,
- dump_trace ? CS_ETM_OPERATION_PRINT :
- CS_ETM_OPERATION_DECODE,
- formatted))
- goto out_free;
-
- etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
- t_params);
-
- if (!etmq->decoder)
- goto out_free;
-
/*
- * Register a function to handle all memory accesses required by
- * the trace decoder library.
+ * Create an RB tree for traceID-metadata tuple. Since the conversion
+ * has to be made for each packet that gets decoded, optimizing access
+ * in anything other than a sequential array is worth doing.
*/
- if (cs_etm_decoder__add_mem_access_cb(etmq->decoder,
- 0x0L, ((u64) -1L),
- cs_etm__mem_access))
- goto out_free_decoder;
+ etmq->traceid_list = etmq->own_traceid_list = intlist__new(NULL);
+ if (!etmq->traceid_list)
+ goto out_free;
- zfree(&t_params);
return etmq;
-out_free_decoder:
- cs_etm_decoder__free(etmq->decoder);
out_free:
intlist__delete(etmq->traceid_queues_list);
free(etmq);
@@ -1106,16 +1182,14 @@ out_free:
static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
struct auxtrace_queue *queue,
- unsigned int queue_nr,
- bool formatted,
- int sample_cpu)
+ unsigned int queue_nr)
{
struct cs_etm_queue *etmq = queue->priv;
- if (list_empty(&queue->head) || etmq)
+ if (etmq)
return 0;
- etmq = cs_etm__alloc_queue(etm, formatted, sample_cpu);
+ etmq = cs_etm__alloc_queue();
if (!etmq)
return -ENOMEM;
@@ -1123,7 +1197,9 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
queue->priv = etmq;
etmq->etm = etm;
etmq->queue_nr = queue_nr;
+ queue->cpu = queue_nr; /* Placeholder, may be reset to -1 in per-thread mode */
etmq->offset = 0;
+ etmq->sink_id = SINK_UNSET;
return 0;
}
@@ -1267,8 +1343,12 @@ static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
{
- /* Returns 0 for the CS_ETM_DISCONTINUITY packet */
- if (packet->sample_type == CS_ETM_DISCONTINUITY)
+ /*
+ * Return 0 for packets that have no addresses so that CS_ETM_INVAL_ADDR doesn't
+ * appear in samples.
+ */
+ if (packet->sample_type == CS_ETM_DISCONTINUITY ||
+ packet->sample_type == CS_ETM_EXCEPTION)
return 0;
return packet->start_addr;
@@ -1595,35 +1675,6 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
return ret;
}
-struct cs_etm_synth {
- struct perf_tool dummy_tool;
- struct perf_session *session;
-};
-
-static int cs_etm__event_synth(struct perf_tool *tool,
- union perf_event *event,
- struct perf_sample *sample __maybe_unused,
- struct machine *machine __maybe_unused)
-{
- struct cs_etm_synth *cs_etm_synth =
- container_of(tool, struct cs_etm_synth, dummy_tool);
-
- return perf_session__deliver_synth_event(cs_etm_synth->session,
- event, NULL);
-}
-
-static int cs_etm__synth_event(struct perf_session *session,
- struct perf_event_attr *attr, u64 id)
-{
- struct cs_etm_synth cs_etm_synth;
-
- memset(&cs_etm_synth, 0, sizeof(struct cs_etm_synth));
- cs_etm_synth.session = session;
-
- return perf_event__synthesize_attr(&cs_etm_synth.dummy_tool, attr, 1,
- &id, cs_etm__event_synth);
-}
-
static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
struct perf_session *session)
{
@@ -1675,7 +1726,7 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
attr.sample_period = 1;
attr.sample_type |= PERF_SAMPLE_ADDR;
- err = cs_etm__synth_event(session, &attr, id);
+ err = perf_session__deliver_synth_attr_event(session, &attr, id);
if (err)
return err;
etm->branches_sample_type = attr.sample_type;
@@ -1698,7 +1749,7 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
attr.config = PERF_COUNT_HW_INSTRUCTIONS;
attr.sample_period = etm->synth_opts.period;
etm->instructions_sample_period = attr.sample_period;
- err = cs_etm__synth_event(session, &attr, id);
+ err = perf_session__deliver_synth_attr_event(session, &attr, id);
if (err)
return err;
etm->instructions_sample_type = attr.sample_type;
@@ -2252,7 +2303,7 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq,
PERF_IP_FLAG_TRACE_END;
break;
case CS_ETM_EXCEPTION:
- ret = cs_etm__get_magic(packet->trace_chan_id, &magic);
+ ret = cs_etm__get_magic(etmq, packet->trace_chan_id, &magic);
if (ret)
return ret;
@@ -2740,7 +2791,7 @@ static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm,
static int cs_etm__process_event(struct perf_session *session,
union perf_event *event,
struct perf_sample *sample,
- struct perf_tool *tool)
+ const struct perf_tool *tool)
{
struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
struct cs_etm_auxtrace,
@@ -2810,7 +2861,7 @@ static void dump_queued_data(struct cs_etm_auxtrace *etm,
static int cs_etm__process_auxtrace_event(struct perf_session *session,
union perf_event *event,
- struct perf_tool *tool __maybe_unused)
+ const struct perf_tool *tool __maybe_unused)
{
struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
struct cs_etm_auxtrace,
@@ -2836,17 +2887,6 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session,
if (err)
return err;
- /*
- * Knowing if the trace is formatted or not requires a lookup of
- * the aux record so only works in non-piped mode where data is
- * queued in cs_etm__queue_aux_records(). Always assume
- * formatted in piped mode (true).
- */
- err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
- idx, true, -1);
- if (err)
- return err;
-
if (dump_trace)
if (auxtrace_buffer__get_data(buffer, fd)) {
cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer);
@@ -2963,8 +3003,7 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o
struct perf_record_auxtrace *auxtrace_event;
union perf_event auxtrace_fragment;
__u64 aux_offset, aux_size;
- __u32 idx;
- bool formatted;
+ enum cs_etm_format format;
struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
struct cs_etm_auxtrace,
@@ -3030,6 +3069,8 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o
if (aux_offset >= auxtrace_event->offset &&
aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) {
+ struct cs_etm_queue *etmq = etm->queues.queue_array[auxtrace_event->idx].priv;
+
/*
* If this AUX event was inside this buffer somewhere, create a new auxtrace event
* based on the sizes of the aux event, and queue that fragment.
@@ -3046,10 +3087,14 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o
if (err)
return err;
- idx = auxtrace_event->idx;
- formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
- return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
- idx, formatted, sample->cpu);
+ format = (aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW) ?
+ UNFORMATTED : FORMATTED;
+ if (etmq->format != UNSET && format != etmq->format) {
+ pr_err("CS_ETM: mixed formatted and unformatted trace not supported\n");
+ return -EINVAL;
+ }
+ etmq->format = format;
+ return 0;
}
/* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
@@ -3175,7 +3220,8 @@ static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
}
/* map trace ids to correct metadata block, from information in metadata */
-static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata)
+static int cs_etm__map_trace_ids_metadata(struct cs_etm_auxtrace *etm, int num_cpu,
+ u64 **metadata)
{
u64 cs_etm_magic;
u8 trace_chan_id;
@@ -3197,7 +3243,7 @@ static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata)
/* unknown magic number */
return -EINVAL;
}
- err = cs_etm__map_trace_id(trace_chan_id, metadata[i]);
+ err = cs_etm__map_trace_id_v0(etm, trace_chan_id, metadata[i]);
if (err)
return err;
}
@@ -3205,30 +3251,85 @@ static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata)
}
/*
- * If we found AUX_HW_ID packets, then set any metadata marked as unused to the
- * unused value to reduce the number of unneeded decoders created.
+ * Use the data gathered by the peeks for HW_ID (trace ID mappings) and AUX
+ * (formatted or not) packets to create the decoders.
*/
-static int cs_etm__clear_unused_trace_ids_metadata(int num_cpu, u64 **metadata)
+static int cs_etm__create_queue_decoders(struct cs_etm_queue *etmq)
{
- u64 cs_etm_magic;
- int i;
+ struct cs_etm_decoder_params d_params;
+ struct cs_etm_trace_params *t_params;
+ int decoders = intlist__nr_entries(etmq->traceid_list);
- for (i = 0; i < num_cpu; i++) {
- cs_etm_magic = metadata[i][CS_ETM_MAGIC];
- switch (cs_etm_magic) {
- case __perf_cs_etmv3_magic:
- if (metadata[i][CS_ETM_ETMTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG)
- metadata[i][CS_ETM_ETMTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL;
- break;
- case __perf_cs_etmv4_magic:
- case __perf_cs_ete_magic:
- if (metadata[i][CS_ETMV4_TRCTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG)
- metadata[i][CS_ETMV4_TRCTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL;
- break;
- default:
- /* unknown magic number */
- return -EINVAL;
- }
+ if (decoders == 0)
+ return 0;
+
+ /*
+ * Each queue can only contain data from one CPU when unformatted, so only one decoder is
+ * needed.
+ */
+ if (etmq->format == UNFORMATTED)
+ assert(decoders == 1);
+
+ /* Use metadata to fill in trace parameters for trace decoder */
+ t_params = zalloc(sizeof(*t_params) * decoders);
+
+ if (!t_params)
+ goto out_free;
+
+ if (cs_etm__init_trace_params(t_params, etmq))
+ goto out_free;
+
+ /* Set decoder parameters to decode trace packets */
+ if (cs_etm__init_decoder_params(&d_params, etmq,
+ dump_trace ? CS_ETM_OPERATION_PRINT :
+ CS_ETM_OPERATION_DECODE))
+ goto out_free;
+
+ etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
+ t_params);
+
+ if (!etmq->decoder)
+ goto out_free;
+
+ /*
+ * Register a function to handle all memory accesses required by
+ * the trace decoder library.
+ */
+ if (cs_etm_decoder__add_mem_access_cb(etmq->decoder,
+ 0x0L, ((u64) -1L),
+ cs_etm__mem_access))
+ goto out_free_decoder;
+
+ zfree(&t_params);
+ return 0;
+
+out_free_decoder:
+ cs_etm_decoder__free(etmq->decoder);
+out_free:
+ zfree(&t_params);
+ return -EINVAL;
+}
+
+static int cs_etm__create_decoders(struct cs_etm_auxtrace *etm)
+{
+ struct auxtrace_queues *queues = &etm->queues;
+
+ for (unsigned int i = 0; i < queues->nr_queues; i++) {
+ bool empty = list_empty(&queues->queue_array[i].head);
+ struct cs_etm_queue *etmq = queues->queue_array[i].priv;
+ int ret;
+
+ /*
+ * Don't create decoders for empty queues, mainly because
+ * etmq->format is unknown for empty queues.
+ */
+ assert(empty || etmq->format != UNSET);
+ if (empty)
+ continue;
+
+ ret = cs_etm__create_queue_decoders(etmq);
+ if (ret)
+ return ret;
}
return 0;
}
@@ -3242,30 +3343,19 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
int event_header_size = sizeof(struct perf_event_header);
int total_size = auxtrace_info->header.size;
int priv_size = 0;
- int num_cpu;
+ int num_cpu, max_cpu = 0;
int err = 0;
int aux_hw_id_found;
- int i, j;
+ int i;
u64 *ptr = NULL;
u64 **metadata = NULL;
- /*
- * Create an RB tree for traceID-metadata tuple. Since the conversion
- * has to be made for each packet that gets decoded, optimizing access
- * in anything other than a sequential array is worth doing.
- */
- traceid_list = intlist__new(NULL);
- if (!traceid_list)
- return -ENOMEM;
-
/* First the global part */
ptr = (u64 *) auxtrace_info->priv;
num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
metadata = zalloc(sizeof(*metadata) * num_cpu);
- if (!metadata) {
- err = -ENOMEM;
- goto err_free_traceid_list;
- }
+ if (!metadata)
+ return -ENOMEM;
/* Start parsing after the common part of the header */
i = CS_HEADER_VERSION_MAX;
@@ -3276,7 +3366,7 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
* required by the trace decoder to properly decode the trace due
* to its highly compressed nature.
*/
- for (j = 0; j < num_cpu; j++) {
+ for (int j = 0; j < num_cpu; j++) {
if (ptr[i] == __perf_cs_etmv3_magic) {
metadata[j] =
cs_etm__create_meta_blk(ptr, &i,
@@ -3300,6 +3390,9 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
err = -ENOMEM;
goto err_free_metadata;
}
+
+ if ((int) metadata[j][CS_ETM_CPU] > max_cpu)
+ max_cpu = metadata[j][CS_ETM_CPU];
}
/*
@@ -3329,10 +3422,16 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
*/
etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]);
- err = auxtrace_queues__init(&etm->queues);
+ err = auxtrace_queues__init_nr(&etm->queues, max_cpu + 1);
if (err)
goto err_free_etm;
+ for (unsigned int j = 0; j < etm->queues.nr_queues; ++j) {
+ err = cs_etm__setup_queue(etm, &etm->queues.queue_array[j], j);
+ if (err)
+ goto err_free_queues;
+ }
+
if (session->itrace_synth_opts->set) {
etm->synth_opts = *session->itrace_synth_opts;
} else {
@@ -3396,12 +3495,16 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
if (err)
goto err_free_queues;
+ err = cs_etm__queue_aux_records(session);
+ if (err)
+ goto err_free_queues;
+
/*
* Map Trace ID values to CPU metadata.
*
- * Trace metadata will always contain Trace ID values from the legacy algorithm. If the
- * files has been recorded by a "new" perf updated to handle AUX_HW_ID then the metadata
- * ID value will also have the CORESIGHT_TRACE_ID_UNUSED_FLAG set.
+ * Trace metadata will always contain Trace ID values from the legacy algorithm
+ * in case it's read by a version of Perf that doesn't know about HW_ID packets
+ * or the kernel doesn't emit them.
*
* The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use
* the same IDs as the old algorithm as far as is possible, unless there are clashes
@@ -3410,15 +3513,14 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
*
* For a perf able to interpret AUX_HW_ID packets we first check for the presence of
* those packets. If they are there then the values will be mapped and plugged into
- * the metadata. We then set any remaining metadata values with the used flag to a
- * value CORESIGHT_TRACE_ID_UNUSED_VAL - which indicates no decoder is required.
+ * the metadata and decoders are only created for each mapping received.
*
* If no AUX_HW_ID packets are present - which means a file recorded on an old kernel
- * then we map Trace ID values to CPU directly from the metadata - clearing any unused
- * flags if present.
+ * then we map Trace ID values to CPU directly from the metadata and create decoders
+ * for all mappings.
*/
- /* first scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */
+ /* Scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */
aux_hw_id_found = 0;
err = perf_session__peek_events(session, session->header.data_offset,
session->header.data_size,
@@ -3426,17 +3528,14 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
if (err)
goto err_free_queues;
- /* if HW ID found then clear any unused metadata ID values */
- if (aux_hw_id_found)
- err = cs_etm__clear_unused_trace_ids_metadata(num_cpu, metadata);
- /* otherwise, this is a file with metadata values only, map from metadata */
- else
- err = cs_etm__map_trace_ids_metadata(num_cpu, metadata);
-
- if (err)
- goto err_free_queues;
+ /* if no HW ID found this is a file with metadata values only, map from metadata */
+ if (!aux_hw_id_found) {
+ err = cs_etm__map_trace_ids_metadata(etm, num_cpu, metadata);
+ if (err)
+ goto err_free_queues;
+ }
- err = cs_etm__queue_aux_records(session);
+ err = cs_etm__create_decoders(etm);
if (err)
goto err_free_queues;
@@ -3450,10 +3549,8 @@ err_free_etm:
zfree(&etm);
err_free_metadata:
/* No need to check @metadata[j], free(NULL) is supported */
- for (j = 0; j < num_cpu; j++)
+ for (int j = 0; j < num_cpu; j++)
zfree(&metadata[j]);
zfree(&metadata);
-err_free_traceid_list:
- intlist__delete(traceid_list);
return err;
}
diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
index 4696267a32f0..a8caeea720aa 100644
--- a/tools/perf/util/cs-etm.h
+++ b/tools/perf/util/cs-etm.h
@@ -230,16 +230,6 @@ struct cs_etm_packet_queue {
/* CoreSight trace ID is currently the bottom 7 bits of the value */
#define CORESIGHT_TRACE_ID_VAL_MASK GENMASK(6, 0)
-/*
- * perf record will set the legacy meta data values as unused initially.
- * This allows perf report to manage the decoders created when dynamic
- * allocation in operation.
- */
-#define CORESIGHT_TRACE_ID_UNUSED_FLAG BIT(31)
-
-/* Value to set for unused trace ID values */
-#define CORESIGHT_TRACE_ID_UNUSED_VAL 0x7F
-
int cs_etm__process_auxtrace_info(union perf_event *event,
struct perf_session *session);
void cs_etm_get_default_config(const struct perf_pmu *pmu, struct perf_event_attr *attr);
@@ -252,7 +242,7 @@ enum cs_etm_pid_fmt {
#ifdef HAVE_CSTRACE_SUPPORT
#include <opencsd/ocsd_if_types.h>
-int cs_etm__get_cpu(u8 trace_chan_id, int *cpu);
+int cs_etm__get_cpu(struct cs_etm_queue *etmq, u8 trace_chan_id, int *cpu);
enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq);
int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid,
u8 trace_chan_id, ocsd_ex_level el);
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 2b732bccabad..021e9b1d5cc5 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -792,7 +792,7 @@ static bool is_flush_needed(struct ctf_stream *cs)
return cs->count >= STREAM_FLUSH_COUNT;
}
-static int process_sample_event(struct perf_tool *tool,
+static int process_sample_event(const struct perf_tool *tool,
union perf_event *_event,
struct perf_sample *sample,
struct evsel *evsel,
@@ -871,7 +871,7 @@ do { \
} while(0)
#define __FUNC_PROCESS_NON_SAMPLE(_name, body) \
-static int process_##_name##_event(struct perf_tool *tool, \
+static int process_##_name##_event(const struct perf_tool *tool, \
union perf_event *_event, \
struct perf_sample *sample, \
struct machine *machine) \
@@ -1607,25 +1607,23 @@ int bt_convert__perf2ctf(const char *input, const char *path,
.mode = PERF_DATA_MODE_READ,
.force = opts->force,
};
- struct convert c = {
- .tool = {
- .sample = process_sample_event,
- .mmap = perf_event__process_mmap,
- .mmap2 = perf_event__process_mmap2,
- .comm = perf_event__process_comm,
- .exit = perf_event__process_exit,
- .fork = perf_event__process_fork,
- .lost = perf_event__process_lost,
- .tracing_data = perf_event__process_tracing_data,
- .build_id = perf_event__process_build_id,
- .namespaces = perf_event__process_namespaces,
- .ordered_events = true,
- .ordering_requires_timestamps = true,
- },
- };
+ struct convert c = {};
struct ctf_writer *cw = &c.writer;
int err;
+ perf_tool__init(&c.tool, /*ordered_events=*/true);
+ c.tool.sample = process_sample_event;
+ c.tool.mmap = perf_event__process_mmap;
+ c.tool.mmap2 = perf_event__process_mmap2;
+ c.tool.comm = perf_event__process_comm;
+ c.tool.exit = perf_event__process_exit;
+ c.tool.fork = perf_event__process_fork;
+ c.tool.lost = perf_event__process_lost;
+ c.tool.tracing_data = perf_event__process_tracing_data;
+ c.tool.build_id = perf_event__process_build_id;
+ c.tool.namespaces = perf_event__process_namespaces;
+ c.tool.ordering_requires_timestamps = true;
+
if (opts->all) {
c.tool.comm = process_comm_event;
c.tool.exit = process_exit_event;
diff --git a/tools/perf/util/data-convert-json.c b/tools/perf/util/data-convert-json.c
index 3cf64f5b23ee..20bfb0884e9e 100644
--- a/tools/perf/util/data-convert-json.c
+++ b/tools/perf/util/data-convert-json.c
@@ -118,7 +118,7 @@ static void output_json_key_format(FILE *out, bool comma, int depth,
va_end(args);
}
-static void output_sample_callchain_entry(struct perf_tool *tool,
+static void output_sample_callchain_entry(const struct perf_tool *tool,
u64 ip, struct addr_location *al)
{
struct convert_json *c = container_of(tool, struct convert_json, tool);
@@ -146,7 +146,7 @@ static void output_sample_callchain_entry(struct perf_tool *tool,
output_json_format(out, false, 4, "}");
}
-static int process_sample_event(struct perf_tool *tool,
+static int process_sample_event(const struct perf_tool *tool,
union perf_event *event __maybe_unused,
struct perf_sample *sample,
struct evsel *evsel __maybe_unused,
@@ -316,39 +316,36 @@ int bt_convert__perf2json(const char *input_name, const char *output_name,
struct perf_session *session;
int fd;
int ret = -1;
-
struct convert_json c = {
- .tool = {
- .sample = process_sample_event,
- .mmap = perf_event__process_mmap,
- .mmap2 = perf_event__process_mmap2,
- .comm = perf_event__process_comm,
- .namespaces = perf_event__process_namespaces,
- .cgroup = perf_event__process_cgroup,
- .exit = perf_event__process_exit,
- .fork = perf_event__process_fork,
- .lost = perf_event__process_lost,
-#ifdef HAVE_LIBTRACEEVENT
- .tracing_data = perf_event__process_tracing_data,
-#endif
- .build_id = perf_event__process_build_id,
- .id_index = perf_event__process_id_index,
- .auxtrace_info = perf_event__process_auxtrace_info,
- .auxtrace = perf_event__process_auxtrace,
- .event_update = perf_event__process_event_update,
- .ordered_events = true,
- .ordering_requires_timestamps = true,
- },
.first = true,
.events_count = 0,
};
-
struct perf_data data = {
.mode = PERF_DATA_MODE_READ,
.path = input_name,
.force = opts->force,
};
+ perf_tool__init(&c.tool, /*ordered_events=*/true);
+ c.tool.sample = process_sample_event;
+ c.tool.mmap = perf_event__process_mmap;
+ c.tool.mmap2 = perf_event__process_mmap2;
+ c.tool.comm = perf_event__process_comm;
+ c.tool.namespaces = perf_event__process_namespaces;
+ c.tool.cgroup = perf_event__process_cgroup;
+ c.tool.exit = perf_event__process_exit;
+ c.tool.fork = perf_event__process_fork;
+ c.tool.lost = perf_event__process_lost;
+#ifdef HAVE_LIBTRACEEVENT
+ c.tool.tracing_data = perf_event__process_tracing_data;
+#endif
+ c.tool.build_id = perf_event__process_build_id;
+ c.tool.id_index = perf_event__process_id_index;
+ c.tool.auxtrace_info = perf_event__process_auxtrace_info;
+ c.tool.auxtrace = perf_event__process_auxtrace;
+ c.tool.event_update = perf_event__process_event_update;
+ c.tool.ordering_requires_timestamps = true;
+
if (opts->all) {
pr_err("--all is currently unsupported for JSON output.\n");
goto err;
diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
index 08c4bfbd817f..98661ede2a73 100644
--- a/tools/perf/util/data.c
+++ b/tools/perf/util/data.c
@@ -204,7 +204,12 @@ static bool check_pipe(struct perf_data *data)
data->file.fd = fd;
data->use_stdio = false;
}
- } else {
+
+ /*
+ * When is_pipe and data->file.fd is given, use given fd
+ * instead of STDIN_FILENO or STDOUT_FILENO
+ */
+ } else if (data->file.fd <= 0) {
data->file.fd = fd;
}
}
diff --git a/tools/perf/util/debuginfo.h b/tools/perf/util/debuginfo.h
index 4d65b8c605fc..ad6422c3f8ca 100644
--- a/tools/perf/util/debuginfo.h
+++ b/tools/perf/util/debuginfo.h
@@ -40,6 +40,8 @@ static inline void debuginfo__delete(struct debuginfo *dbg __maybe_unused)
{
}
+typedef void Dwarf_Addr;
+
static inline int debuginfo__get_text_offset(struct debuginfo *dbg __maybe_unused,
Dwarf_Addr *offs __maybe_unused,
bool adjust_offset __maybe_unused)
diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c
index e10558b79504..f05ba7739c1e 100644
--- a/tools/perf/util/disasm.c
+++ b/tools/perf/util/disasm.c
@@ -12,9 +12,11 @@
#include <subcmd/run-command.h>
#include "annotate.h"
+#include "annotate-data.h"
#include "build-id.h"
#include "debug.h"
#include "disasm.h"
+#include "disasm_bpf.h"
#include "dso.h"
#include "env.h"
#include "evsel.h"
@@ -35,6 +37,8 @@ static struct ins_ops mov_ops;
static struct ins_ops nop_ops;
static struct ins_ops lock_ops;
static struct ins_ops ret_ops;
+static struct ins_ops load_store_ops;
+static struct ins_ops arithmetic_ops;
static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
struct ins_operands *ops, int max_ins_name);
@@ -43,6 +47,8 @@ static int call__scnprintf(struct ins *ins, char *bf, size_t size,
static void ins__sort(struct arch *arch);
static int disasm_line__parse(char *line, const char **namep, char **rawp);
+static int disasm_line__parse_powerpc(struct disasm_line *dl);
+static char *expand_tabs(char *line, char **storage, size_t *storage_len);
static __attribute__((constructor)) void symbol__init_regexpr(void)
{
@@ -145,10 +151,16 @@ static struct arch architectures[] = {
.memory_ref_char = '(',
.imm_char = '$',
},
+#ifdef HAVE_DWARF_SUPPORT
+ .update_insn_state = update_insn_state_x86,
+#endif
},
{
.name = "powerpc",
.init = powerpc__annotate_init,
+#ifdef HAVE_DWARF_SUPPORT
+ .update_insn_state = update_insn_state_powerpc,
+#endif
},
{
.name = "riscv64",
@@ -250,7 +262,8 @@ bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2)
return arch->ins_is_fused(arch, ins1, ins2);
}
-static int call__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms)
+static int call__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms,
+ struct disasm_line *dl __maybe_unused)
{
char *endptr, *tok, *name;
struct map *map = ms->map;
@@ -345,7 +358,8 @@ static inline const char *validate_comma(const char *c, struct ins_operands *ops
return c;
}
-static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms)
+static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms,
+ struct disasm_line *dl __maybe_unused)
{
struct map *map = ms->map;
struct symbol *sym = ms->sym;
@@ -504,7 +518,8 @@ static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep)
return 0;
}
-static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms)
+static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms,
+ struct disasm_line *dl __maybe_unused)
{
ops->locked.ops = zalloc(sizeof(*ops->locked.ops));
if (ops->locked.ops == NULL)
@@ -513,13 +528,13 @@ static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map_s
if (disasm_line__parse(ops->raw, &ops->locked.ins.name, &ops->locked.ops->raw) < 0)
goto out_free_ops;
- ops->locked.ins.ops = ins__find(arch, ops->locked.ins.name);
+ ops->locked.ins.ops = ins__find(arch, ops->locked.ins.name, 0);
if (ops->locked.ins.ops == NULL)
goto out_free_ops;
if (ops->locked.ins.ops->parse &&
- ops->locked.ins.ops->parse(arch, ops->locked.ops, ms) < 0)
+ ops->locked.ins.ops->parse(arch, ops->locked.ops, ms, NULL) < 0)
goto out_free_ops;
return 0;
@@ -552,6 +567,7 @@ static void lock__delete(struct ins_operands *ops)
ins_ops__delete(ops->locked.ops);
zfree(&ops->locked.ops);
+ zfree(&ops->locked.ins.name);
zfree(&ops->target.raw);
zfree(&ops->target.name);
}
@@ -590,7 +606,8 @@ static bool check_multi_regs(struct arch *arch, const char *op)
return count > 1;
}
-static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms __maybe_unused)
+static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms __maybe_unused,
+ struct disasm_line *dl __maybe_unused)
{
char *s = strchr(ops->raw, ','), *target, *comment, prev;
@@ -668,7 +685,92 @@ static struct ins_ops mov_ops = {
.scnprintf = mov__scnprintf,
};
-static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map_symbol *ms __maybe_unused)
+#define PPC_22_30(R) (((R) >> 1) & 0x1ff)
+#define MINUS_EXT_XO_FORM 234
+#define SUB_EXT_XO_FORM 232
+#define ADD_ZERO_EXT_XO_FORM 202
+#define SUB_ZERO_EXT_XO_FORM 200
+
+static int arithmetic__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops, int max_ins_name)
+{
+ return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name,
+ ops->raw);
+}
+
+/*
+ * Sets the fields: multi_regs and "mem_ref".
+ * "mem_ref" is set for ops->source which is later used to
+ * fill the objdump->memory_ref-char field. This ops is currently
+ * used by powerpc and since binary instruction code is used to
+ * extract opcode, regs and offset, no other parsing is needed here.
+ *
+ * Dont set multi regs for 4 cases since it has only one operand
+ * for source:
+ * - Add to Minus One Extended XO-form ( Ex: addme, addmeo )
+ * - Subtract From Minus One Extended XO-form ( Ex: subfme )
+ * - Add to Zero Extended XO-form ( Ex: addze, addzeo )
+ * - Subtract From Zero Extended XO-form ( Ex: subfze )
+ */
+static int arithmetic__parse(struct arch *arch __maybe_unused, struct ins_operands *ops,
+ struct map_symbol *ms __maybe_unused, struct disasm_line *dl)
+{
+ int opcode = PPC_OP(dl->raw.raw_insn);
+
+ ops->source.mem_ref = false;
+ if (opcode == 31) {
+ if ((opcode != MINUS_EXT_XO_FORM) && (opcode != SUB_EXT_XO_FORM) \
+ && (opcode != ADD_ZERO_EXT_XO_FORM) && (opcode != SUB_ZERO_EXT_XO_FORM))
+ ops->source.multi_regs = true;
+ }
+
+ ops->target.mem_ref = false;
+ ops->target.multi_regs = false;
+
+ return 0;
+}
+
+static struct ins_ops arithmetic_ops = {
+ .parse = arithmetic__parse,
+ .scnprintf = arithmetic__scnprintf,
+};
+
+static int load_store__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops, int max_ins_name)
+{
+ return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name,
+ ops->raw);
+}
+
+/*
+ * Sets the fields: multi_regs and "mem_ref".
+ * "mem_ref" is set for ops->source which is later used to
+ * fill the objdump->memory_ref-char field. This ops is currently
+ * used by powerpc and since binary instruction code is used to
+ * extract opcode, regs and offset, no other parsing is needed here
+ */
+static int load_store__parse(struct arch *arch __maybe_unused, struct ins_operands *ops,
+ struct map_symbol *ms __maybe_unused, struct disasm_line *dl __maybe_unused)
+{
+ ops->source.mem_ref = true;
+ ops->source.multi_regs = false;
+ /* opcode 31 is of X form */
+ if (PPC_OP(dl->raw.raw_insn) == 31)
+ ops->source.multi_regs = true;
+
+ ops->target.mem_ref = false;
+ ops->target.multi_regs = false;
+
+ return 0;
+}
+
+static struct ins_ops load_store_ops = {
+ .parse = load_store__parse,
+ .scnprintf = load_store__scnprintf,
+};
+
+static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map_symbol *ms __maybe_unused,
+ struct disasm_line *dl __maybe_unused)
{
char *target, *comment, *s, prev;
@@ -758,11 +860,23 @@ static void ins__sort(struct arch *arch)
qsort(arch->instructions, nmemb, sizeof(struct ins), ins__cmp);
}
-static struct ins_ops *__ins__find(struct arch *arch, const char *name)
+static struct ins_ops *__ins__find(struct arch *arch, const char *name, struct disasm_line *dl)
{
struct ins *ins;
const int nmemb = arch->nr_instructions;
+ if (arch__is(arch, "powerpc")) {
+ /*
+ * For powerpc, identify the instruction ops
+ * from the opcode using raw_insn.
+ */
+ struct ins_ops *ops;
+
+ ops = check_ppc_insn(dl);
+ if (ops)
+ return ops;
+ }
+
if (!arch->sorted_instructions) {
ins__sort(arch);
arch->sorted_instructions = true;
@@ -792,9 +906,9 @@ static struct ins_ops *__ins__find(struct arch *arch, const char *name)
return ins ? ins->ops : NULL;
}
-struct ins_ops *ins__find(struct arch *arch, const char *name)
+struct ins_ops *ins__find(struct arch *arch, const char *name, struct disasm_line *dl)
{
- struct ins_ops *ops = __ins__find(arch, name);
+ struct ins_ops *ops = __ins__find(arch, name, dl);
if (!ops && arch->associate_instruction_ops)
ops = arch->associate_instruction_ops(arch, name);
@@ -804,12 +918,12 @@ struct ins_ops *ins__find(struct arch *arch, const char *name)
static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, struct map_symbol *ms)
{
- dl->ins.ops = ins__find(arch, dl->ins.name);
+ dl->ins.ops = ins__find(arch, dl->ins.name, dl);
if (!dl->ins.ops)
return;
- if (dl->ins.ops->parse && dl->ins.ops->parse(arch, &dl->ops, ms) < 0)
+ if (dl->ins.ops->parse && dl->ins.ops->parse(arch, &dl->ops, ms, dl) < 0)
dl->ins.ops = NULL;
}
@@ -841,6 +955,51 @@ out:
return -1;
}
+/*
+ * Parses the result captured from symbol__disassemble_*
+ * Example, line read from DSO file in powerpc:
+ * line: 38 01 81 e8
+ * opcode: fetched from arch specific get_opcode_insn
+ * rawp_insn: e8810138
+ *
+ * rawp_insn is used later to extract the reg/offset fields
+ */
+#define PPC_OP(op) (((op) >> 26) & 0x3F)
+#define RAW_BYTES 11
+
+static int disasm_line__parse_powerpc(struct disasm_line *dl)
+{
+ char *line = dl->al.line;
+ const char **namep = &dl->ins.name;
+ char **rawp = &dl->ops.raw;
+ char *tmp_raw_insn, *name_raw_insn = skip_spaces(line);
+ char *name = skip_spaces(name_raw_insn + RAW_BYTES);
+ int objdump = 0;
+
+ if (strlen(line) > RAW_BYTES)
+ objdump = 1;
+
+ if (name_raw_insn[0] == '\0')
+ return -1;
+
+ if (objdump) {
+ disasm_line__parse(name, namep, rawp);
+ } else
+ *namep = "";
+
+ tmp_raw_insn = strndup(name_raw_insn, 11);
+ if (tmp_raw_insn == NULL)
+ return -1;
+
+ remove_spaces(tmp_raw_insn);
+
+ sscanf(tmp_raw_insn, "%x", &dl->raw.raw_insn);
+ if (objdump)
+ dl->raw.raw_insn = be32_to_cpu(dl->raw.raw_insn);
+
+ return 0;
+}
+
static void annotation_line__init(struct annotation_line *al,
struct annotate_args *args,
int nr)
@@ -857,6 +1016,7 @@ static void annotation_line__exit(struct annotation_line *al)
zfree_srcline(&al->path);
zfree(&al->line);
zfree(&al->cycles);
+ zfree(&al->br_cntr);
}
static size_t disasm_line_size(int nr)
@@ -880,10 +1040,8 @@ static size_t disasm_line_size(int nr)
struct disasm_line *disasm_line__new(struct annotate_args *args)
{
struct disasm_line *dl = NULL;
- int nr = 1;
-
- if (evsel__is_group_event(args->evsel))
- nr = args->evsel->core.nr_members;
+ struct annotation *notes = symbol__annotation(args->ms.sym);
+ int nr = notes->src->nr_events;
dl = zalloc(disasm_line_size(nr));
if (!dl)
@@ -894,7 +1052,10 @@ struct disasm_line *disasm_line__new(struct annotate_args *args)
goto out_delete;
if (args->offset != -1) {
- if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0)
+ if (arch__is(args->arch, "powerpc")) {
+ if (disasm_line__parse_powerpc(dl) < 0)
+ goto out_free_line;
+ } else if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0)
goto out_free_line;
disasm_line__init_ins(dl, args->arch, &args->ms);
@@ -1164,195 +1325,11 @@ fallback:
return 0;
}
-#if defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT)
-#define PACKAGE "perf"
-#include <bfd.h>
-#include <dis-asm.h>
-#include <bpf/bpf.h>
-#include <bpf/btf.h>
-#include <bpf/libbpf.h>
-#include <linux/btf.h>
-#include <tools/dis-asm-compat.h>
-
-#include "bpf-event.h"
-#include "bpf-utils.h"
-
-static int symbol__disassemble_bpf(struct symbol *sym,
- struct annotate_args *args)
-{
- struct annotation *notes = symbol__annotation(sym);
- struct bpf_prog_linfo *prog_linfo = NULL;
- struct bpf_prog_info_node *info_node;
- int len = sym->end - sym->start;
- disassembler_ftype disassemble;
- struct map *map = args->ms.map;
- struct perf_bpil *info_linear;
- struct disassemble_info info;
- struct dso *dso = map__dso(map);
- int pc = 0, count, sub_id;
- struct btf *btf = NULL;
- char tpath[PATH_MAX];
- size_t buf_size;
- int nr_skip = 0;
- char *buf;
- bfd *bfdf;
- int ret;
- FILE *s;
-
- if (dso__binary_type(dso) != DSO_BINARY_TYPE__BPF_PROG_INFO)
- return SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE;
-
- pr_debug("%s: handling sym %s addr %" PRIx64 " len %" PRIx64 "\n", __func__,
- sym->name, sym->start, sym->end - sym->start);
-
- memset(tpath, 0, sizeof(tpath));
- perf_exe(tpath, sizeof(tpath));
-
- bfdf = bfd_openr(tpath, NULL);
- if (bfdf == NULL)
- abort();
-
- if (!bfd_check_format(bfdf, bfd_object))
- abort();
-
- s = open_memstream(&buf, &buf_size);
- if (!s) {
- ret = errno;
- goto out;
- }
- init_disassemble_info_compat(&info, s,
- (fprintf_ftype) fprintf,
- fprintf_styled);
- info.arch = bfd_get_arch(bfdf);
- info.mach = bfd_get_mach(bfdf);
-
- info_node = perf_env__find_bpf_prog_info(dso__bpf_prog(dso)->env,
- dso__bpf_prog(dso)->id);
- if (!info_node) {
- ret = SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF;
- goto out;
- }
- info_linear = info_node->info_linear;
- sub_id = dso__bpf_prog(dso)->sub_id;
-
- info.buffer = (void *)(uintptr_t)(info_linear->info.jited_prog_insns);
- info.buffer_length = info_linear->info.jited_prog_len;
-
- if (info_linear->info.nr_line_info)
- prog_linfo = bpf_prog_linfo__new(&info_linear->info);
-
- if (info_linear->info.btf_id) {
- struct btf_node *node;
-
- node = perf_env__find_btf(dso__bpf_prog(dso)->env,
- info_linear->info.btf_id);
- if (node)
- btf = btf__new((__u8 *)(node->data),
- node->data_size);
- }
-
- disassemble_init_for_target(&info);
-
-#ifdef DISASM_FOUR_ARGS_SIGNATURE
- disassemble = disassembler(info.arch,
- bfd_big_endian(bfdf),
- info.mach,
- bfdf);
-#else
- disassemble = disassembler(bfdf);
-#endif
- if (disassemble == NULL)
- abort();
-
- fflush(s);
- do {
- const struct bpf_line_info *linfo = NULL;
- struct disasm_line *dl;
- size_t prev_buf_size;
- const char *srcline;
- u64 addr;
-
- addr = pc + ((u64 *)(uintptr_t)(info_linear->info.jited_ksyms))[sub_id];
- count = disassemble(pc, &info);
-
- if (prog_linfo)
- linfo = bpf_prog_linfo__lfind_addr_func(prog_linfo,
- addr, sub_id,
- nr_skip);
-
- if (linfo && btf) {
- srcline = btf__name_by_offset(btf, linfo->line_off);
- nr_skip++;
- } else
- srcline = NULL;
-
- fprintf(s, "\n");
- prev_buf_size = buf_size;
- fflush(s);
-
- if (!annotate_opts.hide_src_code && srcline) {
- args->offset = -1;
- args->line = strdup(srcline);
- args->line_nr = 0;
- args->fileloc = NULL;
- args->ms.sym = sym;
- dl = disasm_line__new(args);
- if (dl) {
- annotation_line__add(&dl->al,
- &notes->src->source);
- }
- }
-
- args->offset = pc;
- args->line = buf + prev_buf_size;
- args->line_nr = 0;
- args->fileloc = NULL;
- args->ms.sym = sym;
- dl = disasm_line__new(args);
- if (dl)
- annotation_line__add(&dl->al, &notes->src->source);
-
- pc += count;
- } while (count > 0 && pc < len);
-
- ret = 0;
-out:
- free(prog_linfo);
- btf__free(btf);
- fclose(s);
- bfd_close(bfdf);
- return ret;
-}
-#else // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT)
-static int symbol__disassemble_bpf(struct symbol *sym __maybe_unused,
- struct annotate_args *args __maybe_unused)
-{
- return SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF;
-}
-#endif // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT)
-
-static int
-symbol__disassemble_bpf_image(struct symbol *sym,
- struct annotate_args *args)
-{
- struct annotation *notes = symbol__annotation(sym);
- struct disasm_line *dl;
-
- args->offset = -1;
- args->line = strdup("to be implemented");
- args->line_nr = 0;
- args->fileloc = NULL;
- dl = disasm_line__new(args);
- if (dl)
- annotation_line__add(&dl->al, &notes->src->source);
-
- zfree(&args->line);
- return 0;
-}
-
#ifdef HAVE_LIBCAPSTONE_SUPPORT
#include <capstone/capstone.h>
+int capstone_init(struct machine *machine, csh *cs_handle, bool is64, bool disassembler_style);
+
static int open_capstone_handle(struct annotate_args *args, bool is_64bit,
csh *handle)
{
@@ -1378,7 +1355,9 @@ static int open_capstone_handle(struct annotate_args *args, bool is_64bit,
return 0;
}
+#endif
+#if defined(HAVE_LIBCAPSTONE_SUPPORT) || defined(HAVE_LIBLLVM_SUPPORT)
struct find_file_offset_data {
u64 ip;
u64 offset;
@@ -1396,6 +1375,55 @@ static int find_file_offset(u64 start, u64 len, u64 pgoff, void *arg)
return 0;
}
+static u8 *
+read_symbol(const char *filename, struct map *map, struct symbol *sym,
+ u64 *len, bool *is_64bit)
+{
+ struct dso *dso = map__dso(map);
+ struct nscookie nsc;
+ u64 start = map__rip_2objdump(map, sym->start);
+ u64 end = map__rip_2objdump(map, sym->end);
+ int fd, count;
+ u8 *buf = NULL;
+ struct find_file_offset_data data = {
+ .ip = start,
+ };
+
+ *is_64bit = false;
+
+ nsinfo__mountns_enter(dso__nsinfo(dso), &nsc);
+ fd = open(filename, O_RDONLY);
+ nsinfo__mountns_exit(&nsc);
+ if (fd < 0)
+ return NULL;
+
+ if (file__read_maps(fd, /*exe=*/true, find_file_offset, &data,
+ is_64bit) == 0)
+ goto err;
+
+ *len = end - start;
+ buf = malloc(*len);
+ if (buf == NULL)
+ goto err;
+
+ count = pread(fd, buf, *len, data.offset);
+ close(fd);
+ fd = -1;
+
+ if ((u64)count != *len)
+ goto err;
+
+ return buf;
+
+err:
+ if (fd >= 0)
+ close(fd);
+ free(buf);
+ return NULL;
+}
+#endif
+
+#ifdef HAVE_LIBCAPSTONE_SUPPORT
static void print_capstone_detail(cs_insn *insn, char *buf, size_t len,
struct annotate_args *args, u64 addr)
{
@@ -1453,7 +1481,7 @@ static void print_capstone_detail(cs_insn *insn, char *buf, size_t len,
}
}
-static int symbol__disassemble_capstone(char *filename, struct symbol *sym,
+static int symbol__disassemble_capstone_powerpc(char *filename, struct symbol *sym,
struct annotate_args *args)
{
struct annotation *notes = symbol__annotation(sym);
@@ -1472,9 +1500,10 @@ static int symbol__disassemble_capstone(char *filename, struct symbol *sym,
.ip = start,
};
csh handle;
- cs_insn *insn;
char disasm_buf[512];
struct disasm_line *dl;
+ u32 *line;
+ bool disassembler_style = false;
if (args->options->objdump_path)
return -1;
@@ -1489,7 +1518,11 @@ static int symbol__disassemble_capstone(char *filename, struct symbol *sym,
&is_64bit) == 0)
goto err;
- if (open_capstone_handle(args, is_64bit, &handle) < 0)
+ if (!args->options->disassembler_style ||
+ !strcmp(args->options->disassembler_style, "att"))
+ disassembler_style = true;
+
+ if (capstone_init(maps__machine(args->ms.maps), &handle, is_64bit, disassembler_style) < 0)
goto err;
needs_cs_close = true;
@@ -1505,6 +1538,8 @@ static int symbol__disassemble_capstone(char *filename, struct symbol *sym,
if ((u64)count != len)
goto err;
+ line = (u32 *)buf;
+
/* add the function address and name */
scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:",
start, sym->name);
@@ -1521,6 +1556,114 @@ static int symbol__disassemble_capstone(char *filename, struct symbol *sym,
annotation_line__add(&dl->al, &notes->src->source);
+ /*
+ * TODO: enable disassm for powerpc
+ * count = cs_disasm(handle, buf, len, start, len, &insn);
+ *
+ * For now, only binary code is saved in disassembled line
+ * to be used in "type" and "typeoff" sort keys. Each raw code
+ * is 32 bit instruction. So use "len/4" to get the number of
+ * entries.
+ */
+ count = len/4;
+
+ for (i = 0, offset = 0; i < count; i++) {
+ args->offset = offset;
+ sprintf(args->line, "%x", line[i]);
+
+ dl = disasm_line__new(args);
+ if (dl == NULL)
+ goto err;
+
+ annotation_line__add(&dl->al, &notes->src->source);
+
+ offset += 4;
+ }
+
+ /* It failed in the middle */
+ if (offset != len) {
+ struct list_head *list = &notes->src->source;
+
+ /* Discard all lines and fallback to objdump */
+ while (!list_empty(list)) {
+ dl = list_first_entry(list, struct disasm_line, al.node);
+
+ list_del_init(&dl->al.node);
+ disasm_line__free(dl);
+ }
+ count = -1;
+ }
+
+out:
+ if (needs_cs_close)
+ cs_close(&handle);
+ free(buf);
+ return count < 0 ? count : 0;
+
+err:
+ if (fd >= 0)
+ close(fd);
+ if (needs_cs_close) {
+ struct disasm_line *tmp;
+
+ /*
+ * It probably failed in the middle of the above loop.
+ * Release any resources it might add.
+ */
+ list_for_each_entry_safe(dl, tmp, &notes->src->source, al.node) {
+ list_del(&dl->al.node);
+ free(dl);
+ }
+ }
+ count = -1;
+ goto out;
+}
+
+static int symbol__disassemble_capstone(char *filename, struct symbol *sym,
+ struct annotate_args *args)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct map *map = args->ms.map;
+ u64 start = map__rip_2objdump(map, sym->start);
+ u64 len;
+ u64 offset;
+ int i, count;
+ bool is_64bit = false;
+ bool needs_cs_close = false;
+ u8 *buf = NULL;
+ csh handle;
+ cs_insn *insn;
+ char disasm_buf[512];
+ struct disasm_line *dl;
+
+ if (args->options->objdump_path)
+ return -1;
+
+ buf = read_symbol(filename, map, sym, &len, &is_64bit);
+ if (buf == NULL)
+ return -1;
+
+ /* add the function address and name */
+ scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:",
+ start, sym->name);
+
+ args->offset = -1;
+ args->line = disasm_buf;
+ args->line_nr = 0;
+ args->fileloc = NULL;
+ args->ms.sym = sym;
+
+ dl = disasm_line__new(args);
+ if (dl == NULL)
+ goto err;
+
+ annotation_line__add(&dl->al, &notes->src->source);
+
+ if (open_capstone_handle(args, is_64bit, &handle) < 0)
+ goto err;
+
+ needs_cs_close = true;
+
count = cs_disasm(handle, buf, len, start, len, &insn);
for (i = 0, offset = 0; i < count; i++) {
int printed;
@@ -1565,8 +1708,6 @@ out:
return count < 0 ? count : 0;
err:
- if (fd >= 0)
- close(fd);
if (needs_cs_close) {
struct disasm_line *tmp;
@@ -1584,6 +1725,274 @@ err:
}
#endif
+static int symbol__disassemble_raw(char *filename, struct symbol *sym,
+ struct annotate_args *args)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct map *map = args->ms.map;
+ struct dso *dso = map__dso(map);
+ u64 start = map__rip_2objdump(map, sym->start);
+ u64 end = map__rip_2objdump(map, sym->end);
+ u64 len = end - start;
+ u64 offset;
+ int i, count;
+ u8 *buf = NULL;
+ char disasm_buf[512];
+ struct disasm_line *dl;
+ u32 *line;
+
+ /* Return if objdump is specified explicitly */
+ if (args->options->objdump_path)
+ return -1;
+
+ pr_debug("Reading raw instruction from : %s using dso__data_read_offset\n", filename);
+
+ buf = malloc(len);
+ if (buf == NULL)
+ goto err;
+
+ count = dso__data_read_offset(dso, NULL, sym->start, buf, len);
+
+ line = (u32 *)buf;
+
+ if ((u64)count != len)
+ goto err;
+
+ /* add the function address and name */
+ scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:",
+ start, sym->name);
+
+ args->offset = -1;
+ args->line = disasm_buf;
+ args->line_nr = 0;
+ args->fileloc = NULL;
+ args->ms.sym = sym;
+
+ dl = disasm_line__new(args);
+ if (dl == NULL)
+ goto err;
+
+ annotation_line__add(&dl->al, &notes->src->source);
+
+ /* Each raw instruction is 4 byte */
+ count = len/4;
+
+ for (i = 0, offset = 0; i < count; i++) {
+ args->offset = offset;
+ sprintf(args->line, "%x", line[i]);
+ dl = disasm_line__new(args);
+ if (dl == NULL)
+ goto err;
+
+ annotation_line__add(&dl->al, &notes->src->source);
+ offset += 4;
+ }
+
+ /* It failed in the middle */
+ if (offset != len) {
+ struct list_head *list = &notes->src->source;
+
+ /* Discard all lines and fallback to objdump */
+ while (!list_empty(list)) {
+ dl = list_first_entry(list, struct disasm_line, al.node);
+
+ list_del_init(&dl->al.node);
+ disasm_line__free(dl);
+ }
+ count = -1;
+ }
+
+out:
+ free(buf);
+ return count < 0 ? count : 0;
+
+err:
+ count = -1;
+ goto out;
+}
+
+#ifdef HAVE_LIBLLVM_SUPPORT
+#include <llvm-c/Disassembler.h>
+#include <llvm-c/Target.h>
+#include "util/llvm-c-helpers.h"
+
+struct symbol_lookup_storage {
+ u64 branch_addr;
+ u64 pcrel_load_addr;
+};
+
+/*
+ * Whenever LLVM wants to resolve an address into a symbol, it calls this
+ * callback. We don't ever actually _return_ anything (in particular, because
+ * it puts quotation marks around what we return), but we use this as a hint
+ * that there is a branch or PC-relative address in the expression that we
+ * should add some textual annotation for after the instruction. The caller
+ * will use this information to add the actual annotation.
+ */
+static const char *
+symbol_lookup_callback(void *disinfo, uint64_t value,
+ uint64_t *ref_type,
+ uint64_t address __maybe_unused,
+ const char **ref __maybe_unused)
+{
+ struct symbol_lookup_storage *storage = disinfo;
+
+ if (*ref_type == LLVMDisassembler_ReferenceType_In_Branch)
+ storage->branch_addr = value;
+ else if (*ref_type == LLVMDisassembler_ReferenceType_In_PCrel_Load)
+ storage->pcrel_load_addr = value;
+ *ref_type = LLVMDisassembler_ReferenceType_InOut_None;
+ return NULL;
+}
+
+static int symbol__disassemble_llvm(char *filename, struct symbol *sym,
+ struct annotate_args *args)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct map *map = args->ms.map;
+ struct dso *dso = map__dso(map);
+ u64 start = map__rip_2objdump(map, sym->start);
+ u8 *buf;
+ u64 len;
+ u64 pc;
+ bool is_64bit;
+ char triplet[64];
+ char disasm_buf[2048];
+ size_t disasm_len;
+ struct disasm_line *dl;
+ LLVMDisasmContextRef disasm = NULL;
+ struct symbol_lookup_storage storage;
+ char *line_storage = NULL;
+ size_t line_storage_len = 0;
+ int ret = -1;
+
+ if (args->options->objdump_path)
+ return -1;
+
+ LLVMInitializeAllTargetInfos();
+ LLVMInitializeAllTargetMCs();
+ LLVMInitializeAllDisassemblers();
+
+ buf = read_symbol(filename, map, sym, &len, &is_64bit);
+ if (buf == NULL)
+ return -1;
+
+ if (arch__is(args->arch, "x86")) {
+ if (is_64bit)
+ scnprintf(triplet, sizeof(triplet), "x86_64-pc-linux");
+ else
+ scnprintf(triplet, sizeof(triplet), "i686-pc-linux");
+ } else {
+ scnprintf(triplet, sizeof(triplet), "%s-linux-gnu",
+ args->arch->name);
+ }
+
+ disasm = LLVMCreateDisasm(triplet, &storage, 0, NULL,
+ symbol_lookup_callback);
+ if (disasm == NULL)
+ goto err;
+
+ if (args->options->disassembler_style &&
+ !strcmp(args->options->disassembler_style, "intel"))
+ LLVMSetDisasmOptions(disasm,
+ LLVMDisassembler_Option_AsmPrinterVariant);
+
+ /*
+ * This needs to be set after AsmPrinterVariant, due to a bug in LLVM;
+ * setting AsmPrinterVariant makes a new instruction printer, making it
+ * forget about the PrintImmHex flag (which is applied before if both
+ * are given to the same call).
+ */
+ LLVMSetDisasmOptions(disasm, LLVMDisassembler_Option_PrintImmHex);
+
+ /* add the function address and name */
+ scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:",
+ start, sym->name);
+
+ args->offset = -1;
+ args->line = disasm_buf;
+ args->line_nr = 0;
+ args->fileloc = NULL;
+ args->ms.sym = sym;
+
+ dl = disasm_line__new(args);
+ if (dl == NULL)
+ goto err;
+
+ annotation_line__add(&dl->al, &notes->src->source);
+
+ pc = start;
+ for (u64 offset = 0; offset < len; ) {
+ unsigned int ins_len;
+
+ storage.branch_addr = 0;
+ storage.pcrel_load_addr = 0;
+
+ ins_len = LLVMDisasmInstruction(disasm, buf + offset,
+ len - offset, pc,
+ disasm_buf, sizeof(disasm_buf));
+ if (ins_len == 0)
+ goto err;
+ disasm_len = strlen(disasm_buf);
+
+ if (storage.branch_addr != 0) {
+ char *name = llvm_name_for_code(dso, filename,
+ storage.branch_addr);
+ if (name != NULL) {
+ disasm_len += scnprintf(disasm_buf + disasm_len,
+ sizeof(disasm_buf) -
+ disasm_len,
+ " <%s>", name);
+ free(name);
+ }
+ }
+ if (storage.pcrel_load_addr != 0) {
+ char *name = llvm_name_for_data(dso, filename,
+ storage.pcrel_load_addr);
+ disasm_len += scnprintf(disasm_buf + disasm_len,
+ sizeof(disasm_buf) - disasm_len,
+ " # %#"PRIx64,
+ storage.pcrel_load_addr);
+ if (name) {
+ disasm_len += scnprintf(disasm_buf + disasm_len,
+ sizeof(disasm_buf) -
+ disasm_len,
+ " <%s>", name);
+ free(name);
+ }
+ }
+
+ args->offset = offset;
+ args->line = expand_tabs(disasm_buf, &line_storage,
+ &line_storage_len);
+ args->line_nr = 0;
+ args->fileloc = NULL;
+ args->ms.sym = sym;
+
+ llvm_addr2line(filename, pc, &args->fileloc,
+ (unsigned int *)&args->line_nr, false, NULL);
+
+ dl = disasm_line__new(args);
+ if (dl == NULL)
+ goto err;
+
+ annotation_line__add(&dl->al, &notes->src->source);
+
+ free(args->fileloc);
+ pc += ins_len;
+ offset += ins_len;
+ }
+
+ ret = 0;
+
+err:
+ LLVMDisasmDispose(disasm);
+ free(buf);
+ free(line_storage);
+ return ret;
+}
+#endif
+
/*
* Possibly create a new version of line with tabs expanded. Returns the
* existing or new line, storage is updated if a new line is allocated. If
@@ -1708,6 +2117,33 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
strcpy(symfs_filename, tmp);
}
+ /*
+ * For powerpc data type profiling, use the dso__data_read_offset
+ * to read raw instruction directly and interpret the binary code
+ * to understand instructions and register fields. For sort keys as
+ * type and typeoff, disassemble to mnemonic notation is
+ * not required in case of powerpc.
+ */
+ if (arch__is(args->arch, "powerpc")) {
+ extern const char *sort_order;
+
+ if (sort_order && !strstr(sort_order, "sym")) {
+ err = symbol__disassemble_raw(symfs_filename, sym, args);
+ if (err == 0)
+ goto out_remove_tmp;
+#ifdef HAVE_LIBCAPSTONE_SUPPORT
+ err = symbol__disassemble_capstone_powerpc(symfs_filename, sym, args);
+ if (err == 0)
+ goto out_remove_tmp;
+#endif
+ }
+ }
+
+#ifdef HAVE_LIBLLVM_SUPPORT
+ err = symbol__disassemble_llvm(symfs_filename, sym, args);
+ if (err == 0)
+ goto out_remove_tmp;
+#endif
#ifdef HAVE_LIBCAPSTONE_SUPPORT
err = symbol__disassemble_capstone(symfs_filename, sym, args);
if (err == 0)
diff --git a/tools/perf/util/disasm.h b/tools/perf/util/disasm.h
index 3d381a043520..f56beedeb9da 100644
--- a/tools/perf/util/disasm.h
+++ b/tools/perf/util/disasm.h
@@ -4,11 +4,18 @@
#include "map_symbol.h"
+#ifdef HAVE_DWARF_SUPPORT
+#include "dwarf-aux.h"
+#endif
+
struct annotation_options;
struct disasm_line;
struct ins;
struct evsel;
struct symbol;
+struct data_loc_info;
+struct type_state;
+struct disasm_line;
struct arch {
const char *name;
@@ -32,6 +39,11 @@ struct arch {
char memory_ref_char;
char imm_char;
} objdump;
+#ifdef HAVE_DWARF_SUPPORT
+ void (*update_insn_state)(struct type_state *state,
+ struct data_loc_info *dloc, Dwarf_Die *cu_die,
+ struct disasm_line *dl);
+#endif
};
struct ins {
@@ -50,6 +62,7 @@ struct ins_operands {
bool offset_avail;
bool outside;
bool multi_regs;
+ bool mem_ref;
} target;
union {
struct {
@@ -57,6 +70,7 @@ struct ins_operands {
char *name;
u64 addr;
bool multi_regs;
+ bool mem_ref;
} source;
struct {
struct ins ins;
@@ -71,7 +85,8 @@ struct ins_operands {
struct ins_ops {
void (*free)(struct ins_operands *ops);
- int (*parse)(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms);
+ int (*parse)(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms,
+ struct disasm_line *dl);
int (*scnprintf)(struct ins *ins, char *bf, size_t size,
struct ins_operands *ops, int max_ins_name);
};
@@ -90,7 +105,7 @@ struct annotate_args {
struct arch *arch__find(const char *name);
bool arch__is(struct arch *arch, const char *name);
-struct ins_ops *ins__find(struct arch *arch, const char *name);
+struct ins_ops *ins__find(struct arch *arch, const char *name, struct disasm_line *dl);
int ins__scnprintf(struct ins *ins, char *bf, size_t size,
struct ins_operands *ops, int max_ins_name);
diff --git a/tools/perf/util/disasm_bpf.c b/tools/perf/util/disasm_bpf.c
new file mode 100644
index 000000000000..1fee71c79b62
--- /dev/null
+++ b/tools/perf/util/disasm_bpf.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "util/annotate.h"
+#include "util/disasm_bpf.h"
+#include "util/symbol.h"
+#include <linux/zalloc.h>
+#include <string.h>
+
+#if defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT)
+#define PACKAGE "perf"
+#include <bfd.h>
+#include <bpf/bpf.h>
+#include <bpf/btf.h>
+#include <bpf/libbpf.h>
+#include <dis-asm.h>
+#include <errno.h>
+#include <linux/btf.h>
+#include <tools/dis-asm-compat.h>
+
+#include "util/bpf-event.h"
+#include "util/bpf-utils.h"
+#include "util/debug.h"
+#include "util/dso.h"
+#include "util/map.h"
+#include "util/env.h"
+#include "util/util.h"
+
+int symbol__disassemble_bpf(struct symbol *sym, struct annotate_args *args)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct bpf_prog_linfo *prog_linfo = NULL;
+ struct bpf_prog_info_node *info_node;
+ int len = sym->end - sym->start;
+ disassembler_ftype disassemble;
+ struct map *map = args->ms.map;
+ struct perf_bpil *info_linear;
+ struct disassemble_info info;
+ struct dso *dso = map__dso(map);
+ int pc = 0, count, sub_id;
+ struct btf *btf = NULL;
+ char tpath[PATH_MAX];
+ size_t buf_size;
+ int nr_skip = 0;
+ char *buf;
+ bfd *bfdf;
+ int ret;
+ FILE *s;
+
+ if (dso__binary_type(dso) != DSO_BINARY_TYPE__BPF_PROG_INFO)
+ return SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE;
+
+ pr_debug("%s: handling sym %s addr %" PRIx64 " len %" PRIx64 "\n", __func__,
+ sym->name, sym->start, sym->end - sym->start);
+
+ memset(tpath, 0, sizeof(tpath));
+ perf_exe(tpath, sizeof(tpath));
+
+ bfdf = bfd_openr(tpath, NULL);
+ if (bfdf == NULL)
+ abort();
+
+ if (!bfd_check_format(bfdf, bfd_object))
+ abort();
+
+ s = open_memstream(&buf, &buf_size);
+ if (!s) {
+ ret = errno;
+ goto out;
+ }
+ init_disassemble_info_compat(&info, s,
+ (fprintf_ftype) fprintf,
+ fprintf_styled);
+ info.arch = bfd_get_arch(bfdf);
+ info.mach = bfd_get_mach(bfdf);
+
+ info_node = perf_env__find_bpf_prog_info(dso__bpf_prog(dso)->env,
+ dso__bpf_prog(dso)->id);
+ if (!info_node) {
+ ret = SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF;
+ goto out;
+ }
+ info_linear = info_node->info_linear;
+ sub_id = dso__bpf_prog(dso)->sub_id;
+
+ info.buffer = (void *)(uintptr_t)(info_linear->info.jited_prog_insns);
+ info.buffer_length = info_linear->info.jited_prog_len;
+
+ if (info_linear->info.nr_line_info)
+ prog_linfo = bpf_prog_linfo__new(&info_linear->info);
+
+ if (info_linear->info.btf_id) {
+ struct btf_node *node;
+
+ node = perf_env__find_btf(dso__bpf_prog(dso)->env,
+ info_linear->info.btf_id);
+ if (node)
+ btf = btf__new((__u8 *)(node->data),
+ node->data_size);
+ }
+
+ disassemble_init_for_target(&info);
+
+#ifdef DISASM_FOUR_ARGS_SIGNATURE
+ disassemble = disassembler(info.arch,
+ bfd_big_endian(bfdf),
+ info.mach,
+ bfdf);
+#else
+ disassemble = disassembler(bfdf);
+#endif
+ if (disassemble == NULL)
+ abort();
+
+ fflush(s);
+ do {
+ const struct bpf_line_info *linfo = NULL;
+ struct disasm_line *dl;
+ size_t prev_buf_size;
+ const char *srcline;
+ u64 addr;
+
+ addr = pc + ((u64 *)(uintptr_t)(info_linear->info.jited_ksyms))[sub_id];
+ count = disassemble(pc, &info);
+
+ if (prog_linfo)
+ linfo = bpf_prog_linfo__lfind_addr_func(prog_linfo,
+ addr, sub_id,
+ nr_skip);
+
+ if (linfo && btf) {
+ srcline = btf__name_by_offset(btf, linfo->line_off);
+ nr_skip++;
+ } else
+ srcline = NULL;
+
+ fprintf(s, "\n");
+ prev_buf_size = buf_size;
+ fflush(s);
+
+ if (!annotate_opts.hide_src_code && srcline) {
+ args->offset = -1;
+ args->line = strdup(srcline);
+ args->line_nr = 0;
+ args->fileloc = NULL;
+ args->ms.sym = sym;
+ dl = disasm_line__new(args);
+ if (dl) {
+ annotation_line__add(&dl->al,
+ &notes->src->source);
+ }
+ }
+
+ args->offset = pc;
+ args->line = buf + prev_buf_size;
+ args->line_nr = 0;
+ args->fileloc = NULL;
+ args->ms.sym = sym;
+ dl = disasm_line__new(args);
+ if (dl)
+ annotation_line__add(&dl->al, &notes->src->source);
+
+ pc += count;
+ } while (count > 0 && pc < len);
+
+ ret = 0;
+out:
+ free(prog_linfo);
+ btf__free(btf);
+ fclose(s);
+ bfd_close(bfdf);
+ return ret;
+}
+#else // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT)
+int symbol__disassemble_bpf(struct symbol *sym __maybe_unused, struct annotate_args *args __maybe_unused)
+{
+ return SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF;
+}
+#endif // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT)
+
+int symbol__disassemble_bpf_image(struct symbol *sym, struct annotate_args *args)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct disasm_line *dl;
+
+ args->offset = -1;
+ args->line = strdup("to be implemented");
+ args->line_nr = 0;
+ args->fileloc = NULL;
+ dl = disasm_line__new(args);
+ if (dl)
+ annotation_line__add(&dl->al, &notes->src->source);
+
+ zfree(&args->line);
+ return 0;
+}
diff --git a/tools/perf/util/disasm_bpf.h b/tools/perf/util/disasm_bpf.h
new file mode 100644
index 000000000000..2ecb19545388
--- /dev/null
+++ b/tools/perf/util/disasm_bpf.h
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#ifndef __PERF_DISASM_BPF_H
+#define __PERF_DISASM_BPF_H
+
+struct symbol;
+struct annotate_args;
+
+int symbol__disassemble_bpf(struct symbol *sym, struct annotate_args *args);
+int symbol__disassemble_bpf_image(struct symbol *sym, struct annotate_args *args);
+
+#endif /* __PERF_DISASM_BPF_H */
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 2340c4f6d0c2..5c6e85fdae0d 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -1327,7 +1327,7 @@ bool dso_id__empty(const struct dso_id *id)
return !id->maj && !id->min && !id->ino && !id->ino_generation;
}
-void __dso__inject_id(struct dso *dso, struct dso_id *id)
+void __dso__inject_id(struct dso *dso, const struct dso_id *id)
{
struct dsos *dsos = dso__dsos(dso);
struct dso_id *dso_id = dso__id(dso);
@@ -1417,7 +1417,7 @@ void dso__set_sorted_by_name(struct dso *dso)
RC_CHK_ACCESS(dso)->sorted_by_name = true;
}
-struct dso *dso__new_id(const char *name, struct dso_id *id)
+struct dso *dso__new_id(const char *name, const struct dso_id *id)
{
RC_STRUCT(dso) *dso = zalloc(sizeof(*dso) + strlen(name) + 1);
struct dso *res;
@@ -1501,7 +1501,7 @@ void dso__delete(struct dso *dso)
auxtrace_cache__free(RC_CHK_ACCESS(dso)->auxtrace_cache);
dso_cache__free(dso);
dso__free_a2l(dso);
- zfree(&RC_CHK_ACCESS(dso)->symsrc_filename);
+ dso__free_symsrc_filename(dso);
nsinfo__zput(RC_CHK_ACCESS(dso)->nsinfo);
mutex_destroy(dso__lock(dso));
RC_CHK_FREE(dso);
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 878c1f441868..bb8e8f444054 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -602,6 +602,11 @@ static inline void dso__set_symsrc_filename(struct dso *dso, char *val)
RC_CHK_ACCESS(dso)->symsrc_filename = val;
}
+static inline void dso__free_symsrc_filename(struct dso *dso)
+{
+ zfree(&RC_CHK_ACCESS(dso)->symsrc_filename);
+}
+
static inline enum dso_binary_type dso__symtab_type(const struct dso *dso)
{
return RC_CHK_ACCESS(dso)->symtab_type;
@@ -635,14 +640,14 @@ static inline void dso__set_text_offset(struct dso *dso, u64 val)
int dso_id__cmp(const struct dso_id *a, const struct dso_id *b);
bool dso_id__empty(const struct dso_id *id);
-struct dso *dso__new_id(const char *name, struct dso_id *id);
+struct dso *dso__new_id(const char *name, const struct dso_id *id);
struct dso *dso__new(const char *name);
void dso__delete(struct dso *dso);
int dso__cmp_id(struct dso *a, struct dso *b);
void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated);
void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated);
-void __dso__inject_id(struct dso *dso, struct dso_id *id);
+void __dso__inject_id(struct dso *dso, const struct dso_id *id);
int dso__name_len(const struct dso *dso);
diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c
index d4acdb37f046..e0998e2a7c4e 100644
--- a/tools/perf/util/dsos.c
+++ b/tools/perf/util/dsos.c
@@ -155,7 +155,7 @@ static int dsos__cmp_key_long_name_id(const void *vkey, const void *vdso)
*/
static struct dso *__dsos__find_by_longname_id(struct dsos *dsos,
const char *name,
- struct dso_id *id,
+ const struct dso_id *id,
bool write_locked)
{
struct dsos__key key = {
@@ -244,7 +244,7 @@ int dsos__add(struct dsos *dsos, struct dso *dso)
struct dsos__find_id_cb_args {
const char *name;
- struct dso_id *id;
+ const struct dso_id *id;
struct dso *res;
};
@@ -260,7 +260,7 @@ static int dsos__find_id_cb(struct dso *dso, void *data)
}
-static struct dso *__dsos__find_id(struct dsos *dsos, const char *name, struct dso_id *id,
+static struct dso *__dsos__find_id(struct dsos *dsos, const char *name, const struct dso_id *id,
bool cmp_short, bool write_locked)
{
struct dso *res;
@@ -321,7 +321,7 @@ static void dso__set_basename(struct dso *dso)
dso__set_short_name(dso, base, true);
}
-static struct dso *__dsos__addnew_id(struct dsos *dsos, const char *name, struct dso_id *id)
+static struct dso *__dsos__addnew_id(struct dsos *dsos, const char *name, const struct dso_id *id)
{
struct dso *dso = dso__new_id(name, id);
@@ -337,7 +337,7 @@ static struct dso *__dsos__addnew_id(struct dsos *dsos, const char *name, struct
return dso;
}
-static struct dso *__dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id)
+static struct dso *__dsos__findnew_id(struct dsos *dsos, const char *name, const struct dso_id *id)
{
struct dso *dso = __dsos__find_id(dsos, name, id, false, /*write_locked=*/true);
@@ -347,7 +347,7 @@ static struct dso *__dsos__findnew_id(struct dsos *dsos, const char *name, struc
return dso ? dso : __dsos__addnew_id(dsos, name, id);
}
-struct dso *dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id)
+struct dso *dsos__findnew_id(struct dsos *dsos, const char *name, const struct dso_id *id)
{
struct dso *dso;
down_write(&dsos->lock);
diff --git a/tools/perf/util/dsos.h b/tools/perf/util/dsos.h
index 6c13b65648bc..a26774950866 100644
--- a/tools/perf/util/dsos.h
+++ b/tools/perf/util/dsos.h
@@ -32,7 +32,7 @@ int __dsos__add(struct dsos *dsos, struct dso *dso);
int dsos__add(struct dsos *dsos, struct dso *dso);
struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short);
-struct dso *dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id);
+struct dso *dsos__findnew_id(struct dsos *dsos, const char *name, const struct dso_id *id);
bool dsos__read_build_ids(struct dsos *dsos, bool with_hits);
diff --git a/tools/perf/util/dump-insn.c b/tools/perf/util/dump-insn.c
index 2bd8585db93c..c1cc0ade48d0 100644
--- a/tools/perf/util/dump-insn.c
+++ b/tools/perf/util/dump-insn.c
@@ -15,7 +15,7 @@ const char *dump_insn(struct perf_insn *x __maybe_unused,
}
__weak
-int arch_is_branch(const unsigned char *buf __maybe_unused,
+int arch_is_uncond_branch(const unsigned char *buf __maybe_unused,
size_t len __maybe_unused,
int x86_64 __maybe_unused)
{
diff --git a/tools/perf/util/dump-insn.h b/tools/perf/util/dump-insn.h
index 4a7797dd6d09..20d4d7bb5275 100644
--- a/tools/perf/util/dump-insn.h
+++ b/tools/perf/util/dump-insn.h
@@ -21,6 +21,6 @@ struct perf_insn {
const char *dump_insn(struct perf_insn *x, u64 ip,
u8 *inbuf, int inlen, int *lenp);
-int arch_is_branch(const unsigned char *buf, size_t len, int x86_64);
+int arch_is_uncond_branch(const unsigned char *buf, size_t len, int x86_64);
#endif
diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index 44ef968a7ad3..92eb9c8dc3e5 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -267,7 +267,7 @@ Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
}
/* Get a type die, but skip qualifiers */
-static Dwarf_Die *__die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
+Dwarf_Die *__die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
{
int tag;
@@ -1444,7 +1444,7 @@ static int __die_find_var_reg_cb(Dwarf_Die *die_mem, void *arg)
while ((off = dwarf_getlocations(&attr, off, &base, &start, &end, &ops, &nops)) > 0) {
/* Assuming the location list is sorted by address */
- if (end < data->pc)
+ if (end <= data->pc)
continue;
if (start > data->pc)
break;
@@ -1598,6 +1598,9 @@ static int __die_collect_vars_cb(Dwarf_Die *die_mem, void *arg)
if (dwarf_getlocations(&attr, 0, &base, &start, &end, &ops, &nops) <= 0)
return DIE_FIND_CB_SIBLING;
+ if (!check_allowed_ops(ops, nops))
+ return DIE_FIND_CB_SIBLING;
+
if (die_get_real_type(die_mem, &type_die) == NULL)
return DIE_FIND_CB_SIBLING;
@@ -1974,8 +1977,15 @@ static int __die_find_member_offset_cb(Dwarf_Die *die_mem, void *arg)
return DIE_FIND_CB_SIBLING;
/* Unions might not have location */
- if (die_get_data_member_location(die_mem, &loc) < 0)
- loc = 0;
+ if (die_get_data_member_location(die_mem, &loc) < 0) {
+ Dwarf_Attribute attr;
+
+ if (dwarf_attr_integrate(die_mem, DW_AT_data_bit_offset, &attr) &&
+ dwarf_formudata(&attr, &loc) == 0)
+ loc /= 8;
+ else
+ loc = 0;
+ }
if (offset == loc)
return DIE_FIND_CB_END;
diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h
index 24446412b869..bd7505812569 100644
--- a/tools/perf/util/dwarf-aux.h
+++ b/tools/perf/util/dwarf-aux.h
@@ -9,6 +9,7 @@
#include <elfutils/libdw.h>
#include <elfutils/libdwfl.h>
#include <elfutils/version.h>
+#include <errno.h>
struct strbuf;
@@ -56,6 +57,8 @@ const char *die_get_decl_file(Dwarf_Die *dw_die);
/* Get type die */
Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem);
+/* Get a type die, but skip qualifiers */
+Dwarf_Die *__die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem);
/* Get a type die, but skip qualifiers and typedef */
Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem);
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index a459374d0a1a..1edbccfc3281 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -624,3 +624,18 @@ out:
free(cap_eq);
return NULL;
}
+
+void perf_env__find_br_cntr_info(struct perf_env *env,
+ unsigned int *nr,
+ unsigned int *width)
+{
+ if (nr) {
+ *nr = env->cpu_pmu_caps ? env->br_cntr_nr :
+ env->pmu_caps->br_cntr_nr;
+ }
+
+ if (width) {
+ *width = env->cpu_pmu_caps ? env->br_cntr_width :
+ env->pmu_caps->br_cntr_width;
+ }
+}
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 2a2c37cc40b7..51b36c36019b 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -192,4 +192,7 @@ char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name,
const char *cap);
bool perf_env__has_pmu_mapping(struct perf_env *env, const char *pmu_name);
+void perf_env__find_br_cntr_info(struct perf_env *env,
+ unsigned int *nr,
+ unsigned int *width);
#endif /* __PERF_ENV_H */
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index f32f9abf6344..aac96d5d1917 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -216,7 +216,7 @@ size_t perf_event__fprintf_cgroup(union perf_event *event, FILE *fp)
event->cgroup.id, event->cgroup.path);
}
-int perf_event__process_comm(struct perf_tool *tool __maybe_unused,
+int perf_event__process_comm(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -224,7 +224,7 @@ int perf_event__process_comm(struct perf_tool *tool __maybe_unused,
return machine__process_comm_event(machine, event, sample);
}
-int perf_event__process_namespaces(struct perf_tool *tool __maybe_unused,
+int perf_event__process_namespaces(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -232,7 +232,7 @@ int perf_event__process_namespaces(struct perf_tool *tool __maybe_unused,
return machine__process_namespaces_event(machine, event, sample);
}
-int perf_event__process_cgroup(struct perf_tool *tool __maybe_unused,
+int perf_event__process_cgroup(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -240,7 +240,7 @@ int perf_event__process_cgroup(struct perf_tool *tool __maybe_unused,
return machine__process_cgroup_event(machine, event, sample);
}
-int perf_event__process_lost(struct perf_tool *tool __maybe_unused,
+int perf_event__process_lost(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -248,7 +248,7 @@ int perf_event__process_lost(struct perf_tool *tool __maybe_unused,
return machine__process_lost_event(machine, event, sample);
}
-int perf_event__process_aux(struct perf_tool *tool __maybe_unused,
+int perf_event__process_aux(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine)
@@ -256,7 +256,7 @@ int perf_event__process_aux(struct perf_tool *tool __maybe_unused,
return machine__process_aux_event(machine, event);
}
-int perf_event__process_itrace_start(struct perf_tool *tool __maybe_unused,
+int perf_event__process_itrace_start(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine)
@@ -264,7 +264,7 @@ int perf_event__process_itrace_start(struct perf_tool *tool __maybe_unused,
return machine__process_itrace_start_event(machine, event);
}
-int perf_event__process_aux_output_hw_id(struct perf_tool *tool __maybe_unused,
+int perf_event__process_aux_output_hw_id(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine)
@@ -272,7 +272,7 @@ int perf_event__process_aux_output_hw_id(struct perf_tool *tool __maybe_unused,
return machine__process_aux_output_hw_id_event(machine, event);
}
-int perf_event__process_lost_samples(struct perf_tool *tool __maybe_unused,
+int perf_event__process_lost_samples(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -280,7 +280,7 @@ int perf_event__process_lost_samples(struct perf_tool *tool __maybe_unused,
return machine__process_lost_samples_event(machine, event, sample);
}
-int perf_event__process_switch(struct perf_tool *tool __maybe_unused,
+int perf_event__process_switch(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine)
@@ -288,7 +288,7 @@ int perf_event__process_switch(struct perf_tool *tool __maybe_unused,
return machine__process_switch_event(machine, event);
}
-int perf_event__process_ksymbol(struct perf_tool *tool __maybe_unused,
+int perf_event__process_ksymbol(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine)
@@ -296,7 +296,7 @@ int perf_event__process_ksymbol(struct perf_tool *tool __maybe_unused,
return machine__process_ksymbol(machine, event, sample);
}
-int perf_event__process_bpf(struct perf_tool *tool __maybe_unused,
+int perf_event__process_bpf(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -304,7 +304,7 @@ int perf_event__process_bpf(struct perf_tool *tool __maybe_unused,
return machine__process_bpf(machine, event, sample);
}
-int perf_event__process_text_poke(struct perf_tool *tool __maybe_unused,
+int perf_event__process_text_poke(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -387,7 +387,7 @@ size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp)
return ret;
}
-int perf_event__process_mmap(struct perf_tool *tool __maybe_unused,
+int perf_event__process_mmap(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -395,7 +395,7 @@ int perf_event__process_mmap(struct perf_tool *tool __maybe_unused,
return machine__process_mmap_event(machine, event, sample);
}
-int perf_event__process_mmap2(struct perf_tool *tool __maybe_unused,
+int perf_event__process_mmap2(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -410,7 +410,7 @@ size_t perf_event__fprintf_task(union perf_event *event, FILE *fp)
event->fork.ppid, event->fork.ptid);
}
-int perf_event__process_fork(struct perf_tool *tool __maybe_unused,
+int perf_event__process_fork(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -418,7 +418,7 @@ int perf_event__process_fork(struct perf_tool *tool __maybe_unused,
return machine__process_fork_event(machine, event, sample);
}
-int perf_event__process_exit(struct perf_tool *tool __maybe_unused,
+int perf_event__process_exit(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -426,6 +426,26 @@ int perf_event__process_exit(struct perf_tool *tool __maybe_unused,
return machine__process_exit_event(machine, event, sample);
}
+int perf_event__exit_del_thread(const struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine)
+{
+ struct thread *thread = machine__findnew_thread(machine,
+ event->fork.pid,
+ event->fork.tid);
+
+ dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid,
+ event->fork.ppid, event->fork.ptid);
+
+ if (thread) {
+ machine__remove_thread(machine, thread);
+ thread__put(thread);
+ }
+
+ return 0;
+}
+
size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp)
{
return fprintf(fp, " offset: %#"PRI_lx64" size: %#"PRI_lx64" flags: %#"PRI_lx64" [%s%s%s]\n",
@@ -587,7 +607,7 @@ size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FIL
return ret;
}
-int perf_event__process(struct perf_tool *tool __maybe_unused,
+int perf_event__process(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index d8bcee2e9b93..f8742e6230a5 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -267,71 +267,75 @@ struct perf_tool;
void perf_event__read_stat_config(struct perf_stat_config *config,
struct perf_record_stat_config *event);
-int perf_event__process_comm(struct perf_tool *tool,
+int perf_event__process_comm(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
-int perf_event__process_lost(struct perf_tool *tool,
+int perf_event__process_lost(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
-int perf_event__process_lost_samples(struct perf_tool *tool,
+int perf_event__process_lost_samples(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
-int perf_event__process_aux(struct perf_tool *tool,
+int perf_event__process_aux(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
-int perf_event__process_itrace_start(struct perf_tool *tool,
+int perf_event__process_itrace_start(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
-int perf_event__process_aux_output_hw_id(struct perf_tool *tool,
+int perf_event__process_aux_output_hw_id(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
-int perf_event__process_switch(struct perf_tool *tool,
+int perf_event__process_switch(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
-int perf_event__process_namespaces(struct perf_tool *tool,
+int perf_event__process_namespaces(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
-int perf_event__process_cgroup(struct perf_tool *tool,
+int perf_event__process_cgroup(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
-int perf_event__process_mmap(struct perf_tool *tool,
+int perf_event__process_mmap(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
-int perf_event__process_mmap2(struct perf_tool *tool,
+int perf_event__process_mmap2(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
-int perf_event__process_fork(struct perf_tool *tool,
+int perf_event__process_fork(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
-int perf_event__process_exit(struct perf_tool *tool,
+int perf_event__process_exit(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
-int perf_event__process_ksymbol(struct perf_tool *tool,
+int perf_event__exit_del_thread(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
-int perf_event__process_bpf(struct perf_tool *tool,
+int perf_event__process_ksymbol(const struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process_bpf(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
-int perf_event__process_text_poke(struct perf_tool *tool,
+int perf_event__process_text_poke(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
-int perf_event__process(struct perf_tool *tool,
+int perf_event__process(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
diff --git a/tools/perf/util/events_stats.h b/tools/perf/util/events_stats.h
index f43e5b1a366a..eabd7913c309 100644
--- a/tools/perf/util/events_stats.h
+++ b/tools/perf/util/events_stats.h
@@ -18,7 +18,18 @@
* PERF_RECORD_LOST_SAMPLES event. The number of lost-samples events is stored
* in .nr_events[PERF_RECORD_LOST_SAMPLES] while total_lost_samples tells
* exactly how many samples the kernel in fact dropped, i.e. it is the sum of
- * all struct perf_record_lost_samples.lost fields reported.
+ * all struct perf_record_lost_samples.lost fields reported without setting the
+ * misc field in the header.
+ *
+ * The BPF program can discard samples according to the filter expressions given
+ * by the user. This number is kept in a BPF map and dumped at the end of perf
+ * record in a PERF_RECORD_LOST_SAMPLES event. To differentiate it from other
+ * lost samples, perf tools sets PERF_RECORD_MISC_LOST_SAMPLES_BPF flag in the
+ * header.misc field. The number of dropped-samples events is stored in
+ * .nr_events[PERF_RECORD_LOST_SAMPLES] while total_dropped_samples tells
+ * exactly how many samples the BPF program in fact dropped, i.e. it is the sum
+ * of all struct perf_record_lost_samples.lost fields reported with the misc
+ * field set in the header.
*
* The total_period is needed because by default auto-freq is used, so
* multiplying nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get
@@ -28,6 +39,7 @@
struct events_stats {
u64 total_lost;
u64 total_lost_samples;
+ u64 total_dropped_samples;
u64 total_aux_lost;
u64 total_aux_partial;
u64 total_aux_collision;
@@ -48,6 +60,7 @@ struct hists_stats {
u32 nr_samples;
u32 nr_non_filtered_samples;
u32 nr_lost_samples;
+ u32 nr_dropped_samples;
};
void events_stats__inc(struct events_stats *stats, u32 type);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 3a719edafc7a..f14b7e6ff1dc 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -33,6 +33,8 @@
#include "util/bpf-filter.h"
#include "util/stat.h"
#include "util/util.h"
+#include "util/env.h"
+#include "util/intel-tpebs.h"
#include <signal.h>
#include <unistd.h>
#include <sched.h>
@@ -78,6 +80,7 @@ void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
evlist->ctl_fd.fd = -1;
evlist->ctl_fd.ack = -1;
evlist->ctl_fd.pos = -1;
+ evlist->nr_br_cntr = -1;
}
struct evlist *evlist__new(void)
@@ -179,6 +182,7 @@ void evlist__delete(struct evlist *evlist)
if (evlist == NULL)
return;
+ tpebs_delete();
evlist__free_stats(evlist);
evlist__munmap(evlist);
evlist__close(evlist);
@@ -1063,7 +1067,7 @@ int evlist__create_maps(struct evlist *evlist, struct target *target)
if (!threads)
return -1;
- if (target__uses_dummy_map(target))
+ if (target__uses_dummy_map(target) && !evlist__has_bpf_output(evlist))
cpus = perf_cpu_map__new_any_cpu();
else
cpus = perf_cpu_map__new(target->cpu_list);
@@ -1086,7 +1090,8 @@ out_delete_threads:
return -1;
}
-int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel)
+int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel,
+ struct target *target)
{
struct evsel *evsel;
int err = 0;
@@ -1108,7 +1113,7 @@ int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel)
* non-tracepoint events can have BPF filters.
*/
if (!list_empty(&evsel->bpf_filters)) {
- err = perf_bpf_filter__prepare(evsel);
+ err = perf_bpf_filter__prepare(evsel, target);
if (err) {
*err_evsel = evsel;
break;
@@ -1261,6 +1266,72 @@ u64 evlist__combined_branch_type(struct evlist *evlist)
return branch_type;
}
+static struct evsel *
+evlist__find_dup_event_from_prev(struct evlist *evlist, struct evsel *event)
+{
+ struct evsel *pos;
+
+ evlist__for_each_entry(evlist, pos) {
+ if (event == pos)
+ break;
+ if ((pos->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) &&
+ !strcmp(pos->name, event->name))
+ return pos;
+ }
+ return NULL;
+}
+
+#define MAX_NR_ABBR_NAME (26 * 11)
+
+/*
+ * The abbr name is from A to Z9. If the number of event
+ * which requires the branch counter > MAX_NR_ABBR_NAME,
+ * return NA.
+ */
+static void evlist__new_abbr_name(char *name)
+{
+ static int idx;
+ int i = idx / 26;
+
+ if (idx >= MAX_NR_ABBR_NAME) {
+ name[0] = 'N';
+ name[1] = 'A';
+ name[2] = '\0';
+ return;
+ }
+
+ name[0] = 'A' + (idx % 26);
+
+ if (!i)
+ name[1] = '\0';
+ else {
+ name[1] = '0' + i - 1;
+ name[2] = '\0';
+ }
+
+ idx++;
+}
+
+void evlist__update_br_cntr(struct evlist *evlist)
+{
+ struct evsel *evsel, *dup;
+ int i = 0;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) {
+ evsel->br_cntr_idx = i++;
+ evsel__leader(evsel)->br_cntr_nr++;
+
+ dup = evlist__find_dup_event_from_prev(evlist, evsel);
+ if (dup)
+ memcpy(evsel->abbr_name, dup->abbr_name, 3 * sizeof(char));
+ else
+ evlist__new_abbr_name(evsel->abbr_name);
+ }
+ }
+ evlist->nr_br_cntr = i;
+}
+
bool evlist__valid_read_format(struct evlist *evlist)
{
struct evsel *first = evlist__first(evlist), *pos = first;
@@ -2556,3 +2627,15 @@ void evlist__uniquify_name(struct evlist *evlist)
}
}
}
+
+bool evlist__has_bpf_output(struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel__is_bpf_output(evsel))
+ return true;
+ }
+
+ return false;
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index cb91dc9117a2..bcc1c6984bb5 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -20,6 +20,7 @@ struct pollfd;
struct thread_map;
struct perf_cpu_map;
struct record_opts;
+struct target;
/*
* State machine of bkw_mmap_state:
@@ -56,6 +57,7 @@ struct evlist {
bool enabled;
int id_pos;
int is_pos;
+ int nr_br_cntr;
u64 combined_sample_type;
enum bkw_mmap_state bkw_mmap_state;
struct {
@@ -212,11 +214,13 @@ void evlist__enable_non_dummy(struct evlist *evlist);
void evlist__set_selected(struct evlist *evlist, struct evsel *evsel);
int evlist__create_maps(struct evlist *evlist, struct target *target);
-int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel);
+int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel,
+ struct target *target);
u64 __evlist__combined_sample_type(struct evlist *evlist);
u64 evlist__combined_sample_type(struct evlist *evlist);
u64 evlist__combined_branch_type(struct evlist *evlist);
+void evlist__update_br_cntr(struct evlist *evlist);
bool evlist__sample_id_all(struct evlist *evlist);
u16 evlist__id_hdr_size(struct evlist *evlist);
@@ -443,5 +447,6 @@ int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf);
void evlist__check_mem_load_aux(struct evlist *evlist);
void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_list);
void evlist__uniquify_name(struct evlist *evlist);
+bool evlist__has_bpf_output(struct evlist *evlist);
#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index bc603193c477..dbf9c8cee3c5 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -59,6 +59,7 @@
#include <internal/xyarray.h>
#include <internal/lib.h>
#include <internal/threadmap.h>
+#include "util/intel-tpebs.h"
#include <linux/ctype.h>
@@ -772,7 +773,7 @@ const char *evsel__name(struct evsel *evsel)
case PERF_TYPE_SOFTWARE:
if (evsel__is_tool(evsel))
- evsel__tool_name(evsel->tool_event, bf, sizeof(bf));
+ evsel__tool_name(evsel__tool_event(evsel), bf, sizeof(bf));
else
evsel__sw_name(evsel, bf, sizeof(bf));
break;
@@ -810,7 +811,7 @@ const char *evsel__metric_id(const struct evsel *evsel)
return evsel->metric_id;
if (evsel__is_tool(evsel))
- return perf_tool_event__to_str(evsel->tool_event);
+ return perf_tool_event__to_str(evsel__tool_event(evsel));
return "unknown";
}
@@ -1502,8 +1503,8 @@ void evsel__exit(struct evsel *evsel)
evsel->per_pkg_mask = NULL;
zfree(&evsel->metric_events);
perf_evsel__object.fini(evsel);
- if (evsel->tool_event == PERF_TOOL_SYSTEM_TIME ||
- evsel->tool_event == PERF_TOOL_USER_TIME)
+ if (evsel__tool_event(evsel) == PERF_TOOL_SYSTEM_TIME ||
+ evsel__tool_event(evsel) == PERF_TOOL_USER_TIME)
xyarray__delete(evsel->start_times);
}
@@ -1539,6 +1540,11 @@ static int evsel__read_one(struct evsel *evsel, int cpu_map_idx, int thread)
return perf_evsel__read(&evsel->core, cpu_map_idx, thread, count);
}
+static int evsel__read_retire_lat(struct evsel *evsel, int cpu_map_idx, int thread)
+{
+ return tpebs_set_evsel(evsel, cpu_map_idx, thread);
+}
+
static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread,
u64 val, u64 ena, u64 run, u64 lost)
{
@@ -1546,6 +1552,12 @@ static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread,
count = perf_counts(counter->counts, cpu_map_idx, thread);
+ if (counter->retire_lat) {
+ evsel__read_retire_lat(counter, cpu_map_idx, thread);
+ perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true);
+ return;
+ }
+
count->val = val;
count->ena = ena;
count->run = run;
@@ -1554,6 +1566,60 @@ static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread,
perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true);
}
+static bool evsel__group_has_tpebs(struct evsel *leader)
+{
+ struct evsel *evsel;
+
+ for_each_group_evsel(evsel, leader) {
+ if (evsel__is_retire_lat(evsel))
+ return true;
+ }
+ return false;
+}
+
+static u64 evsel__group_read_nr_members(struct evsel *leader)
+{
+ u64 nr = leader->core.nr_members;
+ struct evsel *evsel;
+
+ for_each_group_evsel(evsel, leader) {
+ if (evsel__is_retire_lat(evsel))
+ nr--;
+ }
+ return nr;
+}
+
+static u64 evsel__group_read_size(struct evsel *leader)
+{
+ u64 read_format = leader->core.attr.read_format;
+ int entry = sizeof(u64); /* value */
+ int size = 0;
+ int nr = 1;
+
+ if (!evsel__group_has_tpebs(leader))
+ return perf_evsel__read_size(&leader->core);
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ size += sizeof(u64);
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ size += sizeof(u64);
+
+ if (read_format & PERF_FORMAT_ID)
+ entry += sizeof(u64);
+
+ if (read_format & PERF_FORMAT_LOST)
+ entry += sizeof(u64);
+
+ if (read_format & PERF_FORMAT_GROUP) {
+ nr = evsel__group_read_nr_members(leader);
+ size += sizeof(u64);
+ }
+
+ size += entry * nr;
+ return size;
+}
+
static int evsel__process_group_data(struct evsel *leader, int cpu_map_idx, int thread, u64 *data)
{
u64 read_format = leader->core.attr.read_format;
@@ -1562,7 +1628,7 @@ static int evsel__process_group_data(struct evsel *leader, int cpu_map_idx, int
nr = *data++;
- if (nr != (u64) leader->core.nr_members)
+ if (nr != evsel__group_read_nr_members(leader))
return -EINVAL;
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
@@ -1592,7 +1658,7 @@ static int evsel__read_group(struct evsel *leader, int cpu_map_idx, int thread)
{
struct perf_stat_evsel *ps = leader->stats;
u64 read_format = leader->core.attr.read_format;
- int size = perf_evsel__read_size(&leader->core);
+ int size = evsel__group_read_size(leader);
u64 *data = ps->group_data;
if (!(read_format & PERF_FORMAT_ID))
@@ -1719,7 +1785,7 @@ static int evsel__read_tool(struct evsel *evsel, int cpu_map_idx, int thread)
count = perf_counts(evsel->counts, cpu_map_idx, thread);
- switch (evsel->tool_event) {
+ switch (evsel__tool_event(evsel)) {
case PERF_TOOL_DURATION_TIME:
/*
* Pretend duration_time is only on the first CPU and thread, or
@@ -1734,7 +1800,7 @@ static int evsel__read_tool(struct evsel *evsel, int cpu_map_idx, int thread)
break;
case PERF_TOOL_USER_TIME:
case PERF_TOOL_SYSTEM_TIME: {
- bool system = evsel->tool_event == PERF_TOOL_SYSTEM_TIME;
+ bool system = evsel__tool_event(evsel) == PERF_TOOL_SYSTEM_TIME;
start_time = xyarray__entry(evsel->start_times, cpu_map_idx, thread);
fd = FD(evsel, cpu_map_idx, thread);
@@ -1784,6 +1850,9 @@ int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread)
if (evsel__is_tool(evsel))
return evsel__read_tool(evsel, cpu_map_idx, thread);
+ if (evsel__is_retire_lat(evsel))
+ return evsel__read_retire_lat(evsel, cpu_map_idx, thread);
+
if (evsel->core.attr.read_format & PERF_FORMAT_GROUP)
return evsel__read_group(evsel, cpu_map_idx, thread);
@@ -2003,8 +2072,8 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
perf_evsel__alloc_fd(&evsel->core, perf_cpu_map__nr(cpus), nthreads) < 0)
return -ENOMEM;
- if ((evsel->tool_event == PERF_TOOL_SYSTEM_TIME ||
- evsel->tool_event == PERF_TOOL_USER_TIME) &&
+ if ((evsel__tool_event(evsel) == PERF_TOOL_SYSTEM_TIME ||
+ evsel__tool_event(evsel) == PERF_TOOL_USER_TIME) &&
!evsel->start_times) {
evsel->start_times = xyarray__new(perf_cpu_map__nr(cpus), nthreads, sizeof(__u64));
if (!evsel->start_times)
@@ -2193,13 +2262,16 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
int pid = -1, err, old_errno;
enum rlimit_action set_rlimit = NO_CHANGE;
- if (evsel->tool_event == PERF_TOOL_DURATION_TIME) {
+ if (evsel__tool_event(evsel) == PERF_TOOL_DURATION_TIME) {
if (evsel->core.attr.sample_period) /* no sampling */
return -EINVAL;
evsel->start_time = rdclock();
return 0;
}
+ if (evsel__is_retire_lat(evsel))
+ return tpebs_start(evsel->evlist);
+
err = __evsel__prepare_open(evsel, cpus, threads);
if (err)
return err;
@@ -2232,9 +2304,9 @@ retry_open:
if (!evsel->cgrp && !evsel->core.system_wide)
pid = perf_thread_map__pid(threads, thread);
- if (evsel->tool_event == PERF_TOOL_USER_TIME ||
- evsel->tool_event == PERF_TOOL_SYSTEM_TIME) {
- bool system = evsel->tool_event == PERF_TOOL_SYSTEM_TIME;
+ if (evsel__tool_event(evsel) == PERF_TOOL_USER_TIME ||
+ evsel__tool_event(evsel) == PERF_TOOL_SYSTEM_TIME) {
+ bool system = evsel__tool_event(evsel) == PERF_TOOL_SYSTEM_TIME;
__u64 *start_time = NULL;
if (evsel->core.attr.sample_period) {
@@ -2392,6 +2464,8 @@ int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
void evsel__close(struct evsel *evsel)
{
+ if (evsel__is_retire_lat(evsel))
+ tpebs_delete();
perf_evsel__close(&evsel->core);
perf_evsel__free_id(&evsel->core);
}
@@ -2562,17 +2636,18 @@ u64 evsel__bitfield_swap_branch_flags(u64 value)
static inline bool evsel__has_branch_counters(const struct evsel *evsel)
{
- struct evsel *cur, *leader = evsel__leader(evsel);
+ struct evsel *leader = evsel__leader(evsel);
/* The branch counters feature only supports group */
if (!leader || !evsel->evlist)
return false;
- evlist__for_each_entry(evsel->evlist, cur) {
- if ((leader == evsel__leader(cur)) &&
- (cur->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS))
- return true;
- }
+ if (evsel->evlist->nr_br_cntr < 0)
+ evlist__update_br_cntr(evsel->evlist);
+
+ if (leader->br_cntr_nr > 0)
+ return true;
+
return false;
}
@@ -2810,8 +2885,6 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
array = (void *)array + sz;
if (evsel__has_branch_counters(evsel)) {
- OVERFLOW_CHECK_u64(array);
-
data->branch_stack_cntr = (u64 *)array;
sz = data->branch_stack->nr * sizeof(u64);
@@ -2975,7 +3048,7 @@ int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event,
return 0;
}
-u16 evsel__id_hdr_size(struct evsel *evsel)
+u16 evsel__id_hdr_size(const struct evsel *evsel)
{
u64 sample_type = evsel->core.attr.sample_type;
u16 size = 0;
@@ -3357,6 +3430,9 @@ static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist)
{
int cpu_map_idx, thread;
+ if (evsel__is_retire_lat(evsel))
+ return 0;
+
for (cpu_map_idx = 0; cpu_map_idx < xyarray__max_x(evsel->core.fd); cpu_map_idx++) {
for (thread = 0; thread < xyarray__max_y(evsel->core.fd);
thread++) {
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 80b5f6dd868e..15e745a9a798 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -98,6 +98,7 @@ struct evsel {
bool bpf_counter;
bool use_config_name;
bool skippable;
+ bool retire_lat;
int bpf_fd;
struct bpf_object *bpf_obj;
struct list_head config_terms;
@@ -148,6 +149,20 @@ struct evsel {
__u64 synth_sample_type;
/*
+ * Store the branch counter related information.
+ * br_cntr_idx: The idx of the branch counter event in the evlist
+ * br_cntr_nr: The number of the branch counter event in the group
+ * (Only available for the leader event)
+ * abbr_name: The abbreviation name assigned to an event which is
+ * logged by the branch counter.
+ * The abbr name is from A to Z9. NA is applied if out
+ * of the range.
+ */
+ int br_cntr_idx;
+ int br_cntr_nr;
+ char abbr_name[3];
+
+ /*
* bpf_counter_ops serves two use cases:
* 1. perf-stat -b counting events used byBPF programs
* 2. perf-stat --use-bpf use BPF programs to aggregate counts
@@ -310,6 +325,16 @@ static inline bool evsel__is_tool(const struct evsel *evsel)
return evsel->tool_event != PERF_TOOL_NONE;
}
+static inline bool evsel__is_retire_lat(const struct evsel *evsel)
+{
+ return evsel->retire_lat;
+}
+
+static inline enum perf_tool_event evsel__tool_event(const struct evsel *evsel)
+{
+ return evsel->tool_event;
+}
+
const char *evsel__group_name(struct evsel *evsel);
int evsel__group_desc(struct evsel *evsel, char *buf, size_t size);
@@ -422,7 +447,7 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event,
u64 *timestamp);
-u16 evsel__id_hdr_size(struct evsel *evsel);
+u16 evsel__id_hdr_size(const struct evsel *evsel);
static inline struct evsel *evsel__next(struct evsel *evsel)
{
diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
index 8719b3cb5646..c2c0500d5da9 100644
--- a/tools/perf/util/evsel_fprintf.c
+++ b/tools/perf/util/evsel_fprintf.c
@@ -107,7 +107,6 @@ out:
return ++printed;
}
-#ifndef PYTHON_PERF
int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
unsigned int print_opts, struct callchain_cursor *cursor,
struct strlist *bt_stop_list, FILE *fp)
@@ -248,4 +247,3 @@ int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al,
return printed;
}
-#endif /* PYTHON_PERF */
diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h
index 558efcb98d25..bae649ef50e8 100644
--- a/tools/perf/util/ftrace.h
+++ b/tools/perf/util/ftrace.h
@@ -6,6 +6,7 @@
#include "target.h"
struct evlist;
+struct hashamp;
struct perf_ftrace {
struct evlist *evlist;
@@ -15,6 +16,7 @@ struct perf_ftrace {
struct list_head notrace;
struct list_head graph_funcs;
struct list_head nograph_funcs;
+ struct hashmap *profile_hash;
unsigned long percpu_buffer_size;
bool inherit;
bool use_nsec;
@@ -25,6 +27,7 @@ struct perf_ftrace {
int graph_noirqs;
int graph_verbose;
int graph_thresh;
+ int graph_tail;
};
struct filter_entry {
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 55e9553861d0..a6386d12afd7 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -3676,32 +3676,50 @@ int perf_header__write_pipe(int fd)
static int perf_session__do_write_header(struct perf_session *session,
struct evlist *evlist,
int fd, bool at_exit,
- struct feat_copier *fc)
+ struct feat_copier *fc,
+ bool write_attrs_after_data)
{
struct perf_file_header f_header;
- struct perf_file_attr f_attr;
struct perf_header *header = &session->header;
struct evsel *evsel;
struct feat_fd ff = {
.fd = fd,
};
- u64 attr_offset;
+ u64 attr_offset = sizeof(f_header), attr_size = 0;
int err;
- lseek(fd, sizeof(f_header), SEEK_SET);
+ if (write_attrs_after_data && at_exit) {
+ /*
+ * Write features at the end of the file first so that
+ * attributes may come after them.
+ */
+ if (!header->data_offset && header->data_size) {
+ pr_err("File contains data but offset unknown\n");
+ err = -1;
+ goto err_out;
+ }
+ header->feat_offset = header->data_offset + header->data_size;
+ err = perf_header__adds_write(header, evlist, fd, fc);
+ if (err < 0)
+ goto err_out;
+ attr_offset = lseek(fd, 0, SEEK_CUR);
+ } else {
+ lseek(fd, attr_offset, SEEK_SET);
+ }
evlist__for_each_entry(session->evlist, evsel) {
- evsel->id_offset = lseek(fd, 0, SEEK_CUR);
- err = do_write(&ff, evsel->core.id, evsel->core.ids * sizeof(u64));
- if (err < 0) {
- pr_debug("failed to write perf header\n");
- free(ff.buf);
- return err;
+ evsel->id_offset = attr_offset;
+ /* Avoid writing at the end of the file until the session is exiting. */
+ if (!write_attrs_after_data || at_exit) {
+ err = do_write(&ff, evsel->core.id, evsel->core.ids * sizeof(u64));
+ if (err < 0) {
+ pr_debug("failed to write perf header\n");
+ goto err_out;
+ }
}
+ attr_offset += evsel->core.ids * sizeof(u64);
}
- attr_offset = lseek(ff.fd, 0, SEEK_CUR);
-
evlist__for_each_entry(evlist, evsel) {
if (evsel->core.attr.size < sizeof(evsel->core.attr)) {
/*
@@ -3711,40 +3729,46 @@ static int perf_session__do_write_header(struct perf_session *session,
*/
evsel->core.attr.size = sizeof(evsel->core.attr);
}
- f_attr = (struct perf_file_attr){
- .attr = evsel->core.attr,
- .ids = {
- .offset = evsel->id_offset,
- .size = evsel->core.ids * sizeof(u64),
+ /* Avoid writing at the end of the file until the session is exiting. */
+ if (!write_attrs_after_data || at_exit) {
+ struct perf_file_attr f_attr = {
+ .attr = evsel->core.attr,
+ .ids = {
+ .offset = evsel->id_offset,
+ .size = evsel->core.ids * sizeof(u64),
+ }
+ };
+ err = do_write(&ff, &f_attr, sizeof(f_attr));
+ if (err < 0) {
+ pr_debug("failed to write perf header attribute\n");
+ goto err_out;
}
- };
- err = do_write(&ff, &f_attr, sizeof(f_attr));
- if (err < 0) {
- pr_debug("failed to write perf header attribute\n");
- free(ff.buf);
- return err;
}
+ attr_size += sizeof(struct perf_file_attr);
}
- if (!header->data_offset)
- header->data_offset = lseek(fd, 0, SEEK_CUR);
+ if (!header->data_offset) {
+ if (write_attrs_after_data)
+ header->data_offset = sizeof(f_header);
+ else
+ header->data_offset = attr_offset + attr_size;
+ }
header->feat_offset = header->data_offset + header->data_size;
- if (at_exit) {
+ if (!write_attrs_after_data && at_exit) {
+ /* Write features now feat_offset is known. */
err = perf_header__adds_write(header, evlist, fd, fc);
- if (err < 0) {
- free(ff.buf);
- return err;
- }
+ if (err < 0)
+ goto err_out;
}
f_header = (struct perf_file_header){
.magic = PERF_MAGIC,
.size = sizeof(f_header),
- .attr_size = sizeof(f_attr),
+ .attr_size = sizeof(struct perf_file_attr),
.attrs = {
.offset = attr_offset,
- .size = evlist->core.nr_entries * sizeof(f_attr),
+ .size = attr_size,
},
.data = {
.offset = header->data_offset,
@@ -3757,21 +3781,24 @@ static int perf_session__do_write_header(struct perf_session *session,
lseek(fd, 0, SEEK_SET);
err = do_write(&ff, &f_header, sizeof(f_header));
- free(ff.buf);
if (err < 0) {
pr_debug("failed to write perf header\n");
- return err;
+ goto err_out;
+ } else {
+ lseek(fd, 0, SEEK_END);
+ err = 0;
}
- lseek(fd, header->data_offset + header->data_size, SEEK_SET);
-
- return 0;
+err_out:
+ free(ff.buf);
+ return err;
}
int perf_session__write_header(struct perf_session *session,
struct evlist *evlist,
int fd, bool at_exit)
{
- return perf_session__do_write_header(session, evlist, fd, at_exit, NULL);
+ return perf_session__do_write_header(session, evlist, fd, at_exit, /*fc=*/NULL,
+ /*write_attrs_after_data=*/false);
}
size_t perf_session__data_offset(const struct evlist *evlist)
@@ -3791,9 +3818,11 @@ size_t perf_session__data_offset(const struct evlist *evlist)
int perf_session__inject_header(struct perf_session *session,
struct evlist *evlist,
int fd,
- struct feat_copier *fc)
+ struct feat_copier *fc,
+ bool write_attrs_after_data)
{
- return perf_session__do_write_header(session, evlist, fd, true, fc);
+ return perf_session__do_write_header(session, evlist, fd, true, fc,
+ write_attrs_after_data);
}
static int perf_header__getbuffer64(struct perf_header *header,
@@ -3986,6 +4015,24 @@ int perf_file_header__read(struct perf_file_header *header,
adds_features));
}
+ if (header->size > header->attrs.offset) {
+ pr_err("Perf file header corrupt: header overlaps attrs\n");
+ return -1;
+ }
+
+ if (header->size > header->data.offset) {
+ pr_err("Perf file header corrupt: header overlaps data\n");
+ return -1;
+ }
+
+ if ((header->attrs.offset <= header->data.offset &&
+ header->attrs.offset + header->attrs.size > header->data.offset) ||
+ (header->attrs.offset > header->data.offset &&
+ header->data.offset + header->data.size > header->attrs.offset)) {
+ pr_err("Perf file header corrupt: Attributes and data overlap\n");
+ return -1;
+ }
+
if (header->size != sizeof(*header)) {
/* Support the previous format */
if (header->size == offsetof(typeof(*header), adds_features))
@@ -4066,13 +4113,8 @@ static int perf_file_section__process(struct perf_file_section *section,
static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
struct perf_header *ph,
- struct perf_data* data,
- bool repipe, int repipe_fd)
+ struct perf_data *data)
{
- struct feat_fd ff = {
- .fd = repipe_fd,
- .ph = ph,
- };
ssize_t ret;
ret = perf_data__read(data, header, sizeof(*header));
@@ -4087,19 +4129,15 @@ static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
if (ph->needs_swap)
header->size = bswap_64(header->size);
- if (repipe && do_write(&ff, header, sizeof(*header)) < 0)
- return -1;
-
return 0;
}
-static int perf_header__read_pipe(struct perf_session *session, int repipe_fd)
+static int perf_header__read_pipe(struct perf_session *session)
{
struct perf_header *header = &session->header;
struct perf_pipe_file_header f_header;
- if (perf_file_header__read_pipe(&f_header, header, session->data,
- session->repipe, repipe_fd) < 0) {
+ if (perf_file_header__read_pipe(&f_header, header, session->data) < 0) {
pr_debug("incompatible file format\n");
return -EINVAL;
}
@@ -4199,7 +4237,7 @@ static int evlist__prepare_tracepoint_events(struct evlist *evlist, struct tep_h
}
#endif
-int perf_session__read_header(struct perf_session *session, int repipe_fd)
+int perf_session__read_header(struct perf_session *session)
{
struct perf_data *data = session->data;
struct perf_header *header = &session->header;
@@ -4220,7 +4258,7 @@ int perf_session__read_header(struct perf_session *session, int repipe_fd)
* We can read 'pipe' data event from regular file,
* check for the pipe header regardless of source.
*/
- err = perf_header__read_pipe(session, repipe_fd);
+ err = perf_header__read_pipe(session);
if (!err || perf_data__is_pipe(data)) {
data->is_pipe = true;
return err;
@@ -4326,7 +4364,7 @@ out_delete_evlist:
int perf_event__process_feature(struct perf_session *session,
union perf_event *event)
{
- struct perf_tool *tool = session->tool;
+ const struct perf_tool *tool = session->tool;
struct feat_fd ff = { .fd = 0 };
struct perf_record_header_feature *fe = (struct perf_record_header_feature *)event;
int type = fe->header.type;
@@ -4405,7 +4443,7 @@ size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp)
return ret;
}
-int perf_event__process_attr(struct perf_tool *tool __maybe_unused,
+int perf_event__process_attr(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct evlist **pevlist)
{
@@ -4444,7 +4482,7 @@ int perf_event__process_attr(struct perf_tool *tool __maybe_unused,
return 0;
}
-int perf_event__process_event_update(struct perf_tool *tool __maybe_unused,
+int perf_event__process_event_update(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct evlist **pevlist)
{
@@ -4514,15 +4552,14 @@ int perf_event__process_tracing_data(struct perf_session *session,
SEEK_SET);
}
- size_read = trace_report(fd, &session->tevent,
- session->repipe);
+ size_read = trace_report(fd, &session->tevent, session->trace_event_repipe);
padding = PERF_ALIGN(size_read, sizeof(u64)) - size_read;
if (readn(fd, buf, padding) < 0) {
pr_err("%s: reading input file", __func__);
return -1;
}
- if (session->repipe) {
+ if (session->trace_event_repipe) {
int retw = write(STDOUT_FILENO, buf, padding);
if (retw <= 0 || retw != padding) {
pr_err("%s: repiping tracing data padding", __func__);
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 7c16a250e738..a63a361f20f4 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -61,14 +61,28 @@ struct perf_file_section {
u64 size;
};
+/**
+ * struct perf_file_header: Header representation on disk.
+ */
struct perf_file_header {
+ /** @magic: Holds "PERFILE2". */
u64 magic;
+ /** @size: Size of this header - sizeof(struct perf_file_header). */
u64 size;
+ /**
+ * @attr_size: Size of attrs entries - sizeof(struct perf_event_attr) +
+ * sizeof(struct perf_file_section).
+ */
u64 attr_size;
+ /** @attrs: Offset and size of file section holding attributes. */
struct perf_file_section attrs;
+ /** @data: Offset and size of file section holding regular event data. */
struct perf_file_section data;
- /* event_types is ignored */
+ /** @event_types: Ignored. */
struct perf_file_section event_types;
+ /**
+ * @adds_features: Bitmap of features. The features are immediately after the data section.
+ */
DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
};
@@ -117,7 +131,7 @@ union perf_event;
extern const char perf_version_string[];
-int perf_session__read_header(struct perf_session *session, int repipe_fd);
+int perf_session__read_header(struct perf_session *session);
int perf_session__write_header(struct perf_session *session,
struct evlist *evlist,
int fd, bool at_exit);
@@ -136,7 +150,8 @@ struct feat_copier {
int perf_session__inject_header(struct perf_session *session,
struct evlist *evlist,
int fd,
- struct feat_copier *fc);
+ struct feat_copier *fc,
+ bool write_attrs_after_data);
size_t perf_session__data_offset(const struct evlist *evlist);
@@ -156,9 +171,9 @@ int perf_header__fprintf_info(struct perf_session *s, FILE *fp, bool full);
int perf_event__process_feature(struct perf_session *session,
union perf_event *event);
-int perf_event__process_attr(struct perf_tool *tool, union perf_event *event,
+int perf_event__process_attr(const struct perf_tool *tool, union perf_event *event,
struct evlist **pevlist);
-int perf_event__process_event_update(struct perf_tool *tool,
+int perf_event__process_event_update(const struct perf_tool *tool,
union perf_event *event,
struct evlist **pevlist);
size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp);
diff --git a/tools/perf/util/hisi-ptt.c b/tools/perf/util/hisi-ptt.c
index 37ea987017f6..e4cc4785f744 100644
--- a/tools/perf/util/hisi-ptt.c
+++ b/tools/perf/util/hisi-ptt.c
@@ -79,14 +79,14 @@ static void hisi_ptt_dump_event(struct hisi_ptt *ptt, unsigned char *buf,
static int hisi_ptt_process_event(struct perf_session *session __maybe_unused,
union perf_event *event __maybe_unused,
struct perf_sample *sample __maybe_unused,
- struct perf_tool *tool __maybe_unused)
+ const struct perf_tool *tool __maybe_unused)
{
return 0;
}
static int hisi_ptt_process_auxtrace_event(struct perf_session *session,
union perf_event *event,
- struct perf_tool *tool __maybe_unused)
+ const struct perf_tool *tool __maybe_unused)
{
struct hisi_ptt *ptt = container_of(session->auxtrace, struct hisi_ptt,
auxtrace);
@@ -123,7 +123,7 @@ static int hisi_ptt_process_auxtrace_event(struct perf_session *session,
}
static int hisi_ptt_flush(struct perf_session *session __maybe_unused,
- struct perf_tool *tool __maybe_unused)
+ const struct perf_tool *tool __maybe_unused)
{
return 0;
}
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index f028f113c4fd..f387e85a0087 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -472,10 +472,18 @@ static int hist_entry__init(struct hist_entry *he,
memcpy(he->branch_info, template->branch_info,
sizeof(*he->branch_info));
+ he->branch_info->from.ms.maps = maps__get(he->branch_info->from.ms.maps);
he->branch_info->from.ms.map = map__get(he->branch_info->from.ms.map);
+ he->branch_info->to.ms.maps = maps__get(he->branch_info->to.ms.maps);
he->branch_info->to.ms.map = map__get(he->branch_info->to.ms.map);
}
+ if (he->mem_info) {
+ he->mem_info = mem_info__clone(template->mem_info);
+ if (he->mem_info == NULL)
+ goto err_infos;
+ }
+
if (hist_entry__has_callchains(he) && symbol_conf.use_callchain)
callchain_init(he->callchain);
@@ -620,12 +628,6 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
if (symbol_conf.cumulate_callchain)
he_stat__add_period(he->stat_acc, period);
- /*
- * This mem info was allocated from sample__resolve_mem
- * and will not be used anymore.
- */
- mem_info__zput(entry->mem_info);
-
block_info__delete(entry->block_info);
kvm_info__zput(entry->kvm_info);
@@ -636,7 +638,12 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
* mis-adjust symbol addresses when computing
* the history counter to increment.
*/
- if (he->ms.map != entry->ms.map) {
+ if (hists__has(hists, sym) && he->ms.map != entry->ms.map) {
+ if (he->ms.sym) {
+ u64 addr = he->ms.sym->start;
+ he->ms.sym = map__find_symbol(entry->ms.map, addr);
+ }
+
map__put(he->ms.map);
he->ms.map = map__get(entry->ms.map);
}
@@ -739,7 +746,7 @@ __hists__add_entry(struct hists *hists,
.filtered = symbol__parent_filter(sym_parent) | al->filtered,
.hists = hists,
.branch_info = bi,
- .mem_info = mem_info__get(mi),
+ .mem_info = mi,
.kvm_info = ki,
.block_info = block_info,
.transaction = sample->transaction,
@@ -970,10 +977,21 @@ out:
return err;
}
+static void branch_info__exit(struct branch_info *bi)
+{
+ map_symbol__exit(&bi->from.ms);
+ map_symbol__exit(&bi->to.ms);
+ zfree_srcline(&bi->srcline_from);
+ zfree_srcline(&bi->srcline_to);
+}
+
static int
iter_finish_branch_entry(struct hist_entry_iter *iter,
struct addr_location *al __maybe_unused)
{
+ for (int i = 0; i < iter->total; i++)
+ branch_info__exit(&iter->bi[i]);
+
zfree(&iter->bi);
iter->he = NULL;
@@ -1319,10 +1337,7 @@ void hist_entry__delete(struct hist_entry *he)
map_symbol__exit(&he->ms);
if (he->branch_info) {
- map_symbol__exit(&he->branch_info->from.ms);
- map_symbol__exit(&he->branch_info->to.ms);
- zfree_srcline(&he->branch_info->srcline_from);
- zfree_srcline(&he->branch_info->srcline_to);
+ branch_info__exit(he->branch_info);
zfree(&he->branch_info);
}
@@ -2370,6 +2385,11 @@ void hists__inc_nr_lost_samples(struct hists *hists, u32 lost)
hists->stats.nr_lost_samples += lost;
}
+void hists__inc_nr_dropped_samples(struct hists *hists, u32 lost)
+{
+ hists->stats.nr_dropped_samples += lost;
+}
+
static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
struct hist_entry *pair)
{
@@ -2667,7 +2687,7 @@ int hists__unlink(struct hists *hists)
void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
struct perf_sample *sample, bool nonany_branch_mode,
- u64 *total_cycles)
+ u64 *total_cycles, struct evsel *evsel)
{
struct branch_info *bi;
struct branch_entry *entries = perf_sample__branch_entries(sample);
@@ -2691,7 +2711,8 @@ void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
for (int i = bs->nr - 1; i >= 0; i--) {
addr_map_symbol__account_cycles(&bi[i].from,
nonany_branch_mode ? NULL : prev,
- bi[i].flags.cycles);
+ bi[i].flags.cycles, evsel,
+ bi[i].branch_stack_cntr);
prev = &bi[i].to;
if (total_cycles)
@@ -2713,18 +2734,24 @@ size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp)
evlist__for_each_entry(evlist, pos) {
struct hists *hists = evsel__hists(pos);
+ u64 total_samples = hists->stats.nr_samples;
+
+ total_samples += hists->stats.nr_lost_samples;
+ total_samples += hists->stats.nr_dropped_samples;
- if (symbol_conf.skip_empty && !hists->stats.nr_samples &&
- !hists->stats.nr_lost_samples)
+ if (symbol_conf.skip_empty && total_samples == 0)
continue;
ret += fprintf(fp, "%s stats:\n", evsel__name(pos));
if (hists->stats.nr_samples)
- ret += fprintf(fp, "%16s events: %10d\n",
+ ret += fprintf(fp, "%20s events: %10d\n",
"SAMPLE", hists->stats.nr_samples);
if (hists->stats.nr_lost_samples)
- ret += fprintf(fp, "%16s events: %10d\n",
+ ret += fprintf(fp, "%20s events: %10d\n",
"LOST_SAMPLES", hists->stats.nr_lost_samples);
+ if (hists->stats.nr_dropped_samples)
+ ret += fprintf(fp, "%20s events: %10d\n",
+ "LOST_SAMPLES (BPF)", hists->stats.nr_dropped_samples);
}
return ret;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 5273f5c37050..7d7ae94b4b31 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -86,6 +86,7 @@ enum hist_column {
HISTC_TYPE,
HISTC_TYPE_OFFSET,
HISTC_SYMBOL_OFFSET,
+ HISTC_TYPE_CACHELINE,
HISTC_NR_COLS, /* Last entry */
};
@@ -371,6 +372,7 @@ void hists__inc_stats(struct hists *hists, struct hist_entry *h);
void hists__inc_nr_events(struct hists *hists);
void hists__inc_nr_samples(struct hists *hists, bool filtered);
void hists__inc_nr_lost_samples(struct hists *hists, u32 lost);
+void hists__inc_nr_dropped_samples(struct hists *hists, u32 lost);
size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
int max_cols, float min_pcnt, FILE *fp,
@@ -742,7 +744,7 @@ unsigned int hists__overhead_width(struct hists *hists);
void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
struct perf_sample *sample, bool nonany_branch_mode,
- u64 *total_cycles);
+ u64 *total_cycles, struct evsel *evsel);
struct option;
int parse_filter_percentage(const struct option *opt, const char *arg, int unset);
diff --git a/tools/perf/util/include/dwarf-regs.h b/tools/perf/util/include/dwarf-regs.h
index 01fb25a1150a..75b28dcc8317 100644
--- a/tools/perf/util/include/dwarf-regs.h
+++ b/tools/perf/util/include/dwarf-regs.h
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _PERF_DWARF_REGS_H_
#define _PERF_DWARF_REGS_H_
+#include "annotate.h"
#define DWARF_REG_PC 0xd3af9c /* random number */
#define DWARF_REG_FB 0xd3affb /* random number */
@@ -31,6 +32,16 @@ static inline int get_dwarf_regnum(const char *name __maybe_unused,
}
#endif
+#if !defined(__powerpc__) || !defined(HAVE_DWARF_SUPPORT)
+static inline void get_powerpc_regs(u32 raw_insn __maybe_unused, int is_source __maybe_unused,
+ struct annotated_op_loc *op_loc __maybe_unused)
+{
+ return;
+}
+#else
+void get_powerpc_regs(u32 raw_insn, int is_source, struct annotated_op_loc *op_loc);
+#endif
+
#ifdef HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
/*
* Arch should support fetching the offset of a register in pt_regs
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index ec1b3bd9f530..27d9b5c9fec8 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -591,7 +591,7 @@ static int intel_bts_process_queues(struct intel_bts *bts, u64 timestamp)
static int intel_bts_process_event(struct perf_session *session,
union perf_event *event,
struct perf_sample *sample,
- struct perf_tool *tool)
+ const struct perf_tool *tool)
{
struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
auxtrace);
@@ -634,7 +634,7 @@ static int intel_bts_process_event(struct perf_session *session,
static int intel_bts_process_auxtrace_event(struct perf_session *session,
union perf_event *event,
- struct perf_tool *tool __maybe_unused)
+ const struct perf_tool *tool __maybe_unused)
{
struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
auxtrace);
@@ -675,7 +675,7 @@ static int intel_bts_process_auxtrace_event(struct perf_session *session,
}
static int intel_bts_flush(struct perf_session *session,
- struct perf_tool *tool __maybe_unused)
+ const struct perf_tool *tool __maybe_unused)
{
struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
auxtrace);
@@ -737,35 +737,6 @@ static bool intel_bts_evsel_is_auxtrace(struct perf_session *session,
return evsel->core.attr.type == bts->pmu_type;
}
-struct intel_bts_synth {
- struct perf_tool dummy_tool;
- struct perf_session *session;
-};
-
-static int intel_bts_event_synth(struct perf_tool *tool,
- union perf_event *event,
- struct perf_sample *sample __maybe_unused,
- struct machine *machine __maybe_unused)
-{
- struct intel_bts_synth *intel_bts_synth =
- container_of(tool, struct intel_bts_synth, dummy_tool);
-
- return perf_session__deliver_synth_event(intel_bts_synth->session,
- event, NULL);
-}
-
-static int intel_bts_synth_event(struct perf_session *session,
- struct perf_event_attr *attr, u64 id)
-{
- struct intel_bts_synth intel_bts_synth;
-
- memset(&intel_bts_synth, 0, sizeof(struct intel_bts_synth));
- intel_bts_synth.session = session;
-
- return perf_event__synthesize_attr(&intel_bts_synth.dummy_tool, attr, 1,
- &id, intel_bts_event_synth);
-}
-
static int intel_bts_synth_events(struct intel_bts *bts,
struct perf_session *session)
{
@@ -814,7 +785,7 @@ static int intel_bts_synth_events(struct intel_bts *bts,
attr.sample_type |= PERF_SAMPLE_ADDR;
pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
id, (u64)attr.sample_type);
- err = intel_bts_synth_event(session, &attr, id);
+ err = perf_session__deliver_synth_attr_event(session, &attr, id);
if (err) {
pr_err("%s: failed to synthesize 'branches' event type\n",
__func__);
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
index 4407130d91f8..47cf35799a4d 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
@@ -209,12 +209,13 @@ int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64,
return 0;
}
-int arch_is_branch(const unsigned char *buf, size_t len, int x86_64)
+int arch_is_uncond_branch(const unsigned char *buf, size_t len, int x86_64)
{
struct intel_pt_insn in;
if (intel_pt_get_insn(buf, len, x86_64, &in) < 0)
return -1;
- return in.branch != INTEL_PT_BR_NO_BRANCH;
+ return in.branch == INTEL_PT_BR_UNCONDITIONAL ||
+ in.branch == INTEL_PT_BR_INDIRECT;
}
const char *dump_insn(struct perf_insn *x, uint64_t ip __maybe_unused,
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
index bccb988a7a44..94fb16cf9e0c 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
@@ -10,7 +10,7 @@
#include <byteswap.h>
#include <linux/kernel.h>
#include <linux/compiler.h>
-#include <asm-generic/unaligned.h>
+#include <linux/unaligned.h>
#include "intel-pt-pkt-decoder.h"
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index d6d7b7512505..fd2597613f3d 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -3449,7 +3449,7 @@ out:
static int intel_pt_process_event(struct perf_session *session,
union perf_event *event,
struct perf_sample *sample,
- struct perf_tool *tool)
+ const struct perf_tool *tool)
{
struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
auxtrace);
@@ -3533,7 +3533,7 @@ static int intel_pt_process_event(struct perf_session *session,
return err;
}
-static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool)
+static int intel_pt_flush(struct perf_session *session, const struct perf_tool *tool)
{
struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
auxtrace);
@@ -3600,7 +3600,7 @@ static bool intel_pt_evsel_is_auxtrace(struct perf_session *session,
static int intel_pt_process_auxtrace_event(struct perf_session *session,
union perf_event *event,
- struct perf_tool *tool __maybe_unused)
+ const struct perf_tool *tool __maybe_unused)
{
struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
auxtrace);
@@ -3659,37 +3659,15 @@ static int intel_pt_queue_data(struct perf_session *session,
data_offset, timestamp);
}
-struct intel_pt_synth {
- struct perf_tool dummy_tool;
- struct perf_session *session;
-};
-
-static int intel_pt_event_synth(struct perf_tool *tool,
- union perf_event *event,
- struct perf_sample *sample __maybe_unused,
- struct machine *machine __maybe_unused)
-{
- struct intel_pt_synth *intel_pt_synth =
- container_of(tool, struct intel_pt_synth, dummy_tool);
-
- return perf_session__deliver_synth_event(intel_pt_synth->session, event,
- NULL);
-}
-
static int intel_pt_synth_event(struct perf_session *session, const char *name,
struct perf_event_attr *attr, u64 id)
{
- struct intel_pt_synth intel_pt_synth;
int err;
pr_debug("Synthesizing '%s' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
name, id, (u64)attr->sample_type);
- memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth));
- intel_pt_synth.session = session;
-
- err = perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1,
- &id, intel_pt_event_synth);
+ err = perf_session__deliver_synth_attr_event(session, attr, id);
if (err)
pr_err("%s: failed to synthesize '%s' event type\n",
__func__, name);
diff --git a/tools/perf/util/intel-tpebs.c b/tools/perf/util/intel-tpebs.c
new file mode 100644
index 000000000000..50a3c3e07160
--- /dev/null
+++ b/tools/perf/util/intel-tpebs.c
@@ -0,0 +1,432 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * intel_tpebs.c: Intel TPEBS support
+ */
+
+
+#include <sys/param.h>
+#include <subcmd/run-command.h>
+#include <thread.h>
+#include "intel-tpebs.h"
+#include <linux/list.h>
+#include <linux/zalloc.h>
+#include <linux/err.h>
+#include "sample.h"
+#include "debug.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "session.h"
+#include "tool.h"
+#include "cpumap.h"
+#include "metricgroup.h"
+#include <sys/stat.h>
+#include <sys/file.h>
+#include <poll.h>
+#include <math.h>
+
+#define PERF_DATA "-"
+
+bool tpebs_recording;
+static pid_t tpebs_pid = -1;
+static size_t tpebs_event_size;
+static LIST_HEAD(tpebs_results);
+static pthread_t tpebs_reader_thread;
+static struct child_process *tpebs_cmd;
+
+struct tpebs_retire_lat {
+ struct list_head nd;
+ /* Event name */
+ const char *name;
+ /* Event name with the TPEBS modifier R */
+ const char *tpebs_name;
+ /* Count of retire_latency values found in sample data */
+ size_t count;
+ /* Sum of all the retire_latency values in sample data */
+ int sum;
+ /* Average of retire_latency, val = sum / count */
+ double val;
+};
+
+static int get_perf_record_args(const char **record_argv, char buf[],
+ const char *cpumap_buf)
+{
+ struct tpebs_retire_lat *e;
+ int i = 0;
+
+ pr_debug("tpebs: Prepare perf record for retire_latency\n");
+
+ record_argv[i++] = "perf";
+ record_argv[i++] = "record";
+ record_argv[i++] = "-W";
+ record_argv[i++] = "--synth=no";
+ record_argv[i++] = buf;
+
+ if (!cpumap_buf) {
+ pr_err("tpebs: Require cpumap list to run sampling\n");
+ return -ECANCELED;
+ }
+ /* Use -C when cpumap_buf is not "-1" */
+ if (strcmp(cpumap_buf, "-1")) {
+ record_argv[i++] = "-C";
+ record_argv[i++] = cpumap_buf;
+ }
+
+ list_for_each_entry(e, &tpebs_results, nd) {
+ record_argv[i++] = "-e";
+ record_argv[i++] = e->name;
+ }
+
+ record_argv[i++] = "-o";
+ record_argv[i++] = PERF_DATA;
+
+ return 0;
+}
+
+static int prepare_run_command(const char **argv)
+{
+ tpebs_cmd = zalloc(sizeof(struct child_process));
+ if (!tpebs_cmd)
+ return -ENOMEM;
+ tpebs_cmd->argv = argv;
+ tpebs_cmd->out = -1;
+ return 0;
+}
+
+static int start_perf_record(int control_fd[], int ack_fd[],
+ const char *cpumap_buf)
+{
+ const char **record_argv;
+ int ret;
+ char buf[32];
+
+ scnprintf(buf, sizeof(buf), "--control=fd:%d,%d", control_fd[0], ack_fd[1]);
+
+ record_argv = calloc(12 + 2 * tpebs_event_size, sizeof(char *));
+ if (!record_argv)
+ return -ENOMEM;
+
+ ret = get_perf_record_args(record_argv, buf, cpumap_buf);
+ if (ret)
+ goto out;
+
+ ret = prepare_run_command(record_argv);
+ if (ret)
+ goto out;
+ ret = start_command(tpebs_cmd);
+out:
+ free(record_argv);
+ return ret;
+}
+
+static int process_sample_event(const struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample,
+ struct evsel *evsel,
+ struct machine *machine __maybe_unused)
+{
+ int ret = 0;
+ const char *evname;
+ struct tpebs_retire_lat *t;
+
+ evname = evsel__name(evsel);
+
+ /*
+ * Need to handle per core results? We are assuming average retire
+ * latency value will be used. Save the number of samples and the sum of
+ * retire latency value for each event.
+ */
+ list_for_each_entry(t, &tpebs_results, nd) {
+ if (!strcmp(evname, t->name)) {
+ t->count += 1;
+ t->sum += sample->retire_lat;
+ t->val = (double) t->sum / t->count;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static int process_feature_event(struct perf_session *session,
+ union perf_event *event)
+{
+ if (event->feat.feat_id < HEADER_LAST_FEATURE)
+ return perf_event__process_feature(session, event);
+ return 0;
+}
+
+static void *__sample_reader(void *arg)
+{
+ struct child_process *child = arg;
+ struct perf_session *session;
+ struct perf_data data = {
+ .mode = PERF_DATA_MODE_READ,
+ .path = PERF_DATA,
+ .file.fd = child->out,
+ };
+ struct perf_tool tool;
+
+ perf_tool__init(&tool, /*ordered_events=*/false);
+ tool.sample = process_sample_event;
+ tool.feature = process_feature_event;
+ tool.attr = perf_event__process_attr;
+
+ session = perf_session__new(&data, &tool);
+ if (IS_ERR(session))
+ return NULL;
+ perf_session__process_events(session);
+ perf_session__delete(session);
+
+ return NULL;
+}
+
+/*
+ * tpebs_stop - stop the sample data read thread and the perf record process.
+ */
+static int tpebs_stop(void)
+{
+ int ret = 0;
+
+ /* Like tpebs_start, we should only run tpebs_end once. */
+ if (tpebs_pid != -1) {
+ kill(tpebs_cmd->pid, SIGTERM);
+ tpebs_pid = -1;
+ pthread_join(tpebs_reader_thread, NULL);
+ close(tpebs_cmd->out);
+ ret = finish_command(tpebs_cmd);
+ if (ret == -ERR_RUN_COMMAND_WAITPID_SIGNAL)
+ ret = 0;
+ }
+ return ret;
+}
+
+/*
+ * tpebs_start - start tpebs execution.
+ * @evsel_list: retire_latency evsels in this list will be selected and sampled
+ * to get the average retire_latency value.
+ *
+ * This function will be called from evlist level later when evlist__open() is
+ * called consistently.
+ */
+int tpebs_start(struct evlist *evsel_list)
+{
+ int ret = 0;
+ struct evsel *evsel;
+ char cpumap_buf[50];
+
+ /*
+ * We should only run tpebs_start when tpebs_recording is enabled.
+ * And we should only run it once with all the required events.
+ */
+ if (tpebs_pid != -1 || !tpebs_recording)
+ return 0;
+
+ cpu_map__snprint(evsel_list->core.user_requested_cpus, cpumap_buf, sizeof(cpumap_buf));
+ /*
+ * Prepare perf record for sampling event retire_latency before fork and
+ * prepare workload
+ */
+ evlist__for_each_entry(evsel_list, evsel) {
+ int i;
+ char *name;
+ struct tpebs_retire_lat *new;
+
+ if (!evsel->retire_lat)
+ continue;
+
+ pr_debug("tpebs: Retire_latency of event %s is required\n", evsel->name);
+ for (i = strlen(evsel->name) - 1; i > 0; i--) {
+ if (evsel->name[i] == 'R')
+ break;
+ }
+ if (i <= 0 || evsel->name[i] != 'R') {
+ ret = -1;
+ goto err;
+ }
+
+ name = strdup(evsel->name);
+ if (!name) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ name[i] = 'p';
+
+ new = zalloc(sizeof(*new));
+ if (!new) {
+ ret = -1;
+ zfree(name);
+ goto err;
+ }
+ new->name = name;
+ new->tpebs_name = evsel->name;
+ list_add_tail(&new->nd, &tpebs_results);
+ tpebs_event_size += 1;
+ }
+
+ if (tpebs_event_size > 0) {
+ struct pollfd pollfd = { .events = POLLIN, };
+ int control_fd[2], ack_fd[2], len;
+ char ack_buf[8];
+
+ /*Create control and ack fd for --control*/
+ if (pipe(control_fd) < 0) {
+ pr_err("tpebs: Failed to create control fifo");
+ ret = -1;
+ goto out;
+ }
+ if (pipe(ack_fd) < 0) {
+ pr_err("tpebs: Failed to create control fifo");
+ ret = -1;
+ goto out;
+ }
+
+ ret = start_perf_record(control_fd, ack_fd, cpumap_buf);
+ if (ret)
+ goto out;
+ tpebs_pid = tpebs_cmd->pid;
+ if (pthread_create(&tpebs_reader_thread, NULL, __sample_reader, tpebs_cmd)) {
+ kill(tpebs_cmd->pid, SIGTERM);
+ close(tpebs_cmd->out);
+ pr_err("Could not create thread to process sample data.\n");
+ ret = -1;
+ goto out;
+ }
+ /* Wait for perf record initialization.*/
+ len = strlen(EVLIST_CTL_CMD_ENABLE_TAG);
+ ret = write(control_fd[1], EVLIST_CTL_CMD_ENABLE_TAG, len);
+ if (ret != len) {
+ pr_err("perf record control write control message failed\n");
+ goto out;
+ }
+
+ /* wait for an ack */
+ pollfd.fd = ack_fd[0];
+
+ /*
+ * We need this poll to ensure the ack_fd PIPE will not hang
+ * when perf record failed for any reason. The timeout value
+ * 3000ms is an empirical selection.
+ */
+ if (!poll(&pollfd, 1, 3000)) {
+ pr_err("tpebs failed: perf record ack timeout\n");
+ ret = -1;
+ goto out;
+ }
+
+ if (!(pollfd.revents & POLLIN)) {
+ pr_err("tpebs failed: did not received an ack\n");
+ ret = -1;
+ goto out;
+ }
+
+ ret = read(ack_fd[0], ack_buf, sizeof(ack_buf));
+ if (ret > 0)
+ ret = strcmp(ack_buf, EVLIST_CTL_CMD_ACK_TAG);
+ else {
+ pr_err("tpebs: perf record control ack failed\n");
+ goto out;
+ }
+out:
+ close(control_fd[0]);
+ close(control_fd[1]);
+ close(ack_fd[0]);
+ close(ack_fd[1]);
+ }
+err:
+ if (ret)
+ tpebs_delete();
+ return ret;
+}
+
+
+int tpebs_set_evsel(struct evsel *evsel, int cpu_map_idx, int thread)
+{
+ __u64 val;
+ bool found = false;
+ struct tpebs_retire_lat *t;
+ struct perf_counts_values *count;
+
+ /* Non reitre_latency evsel should never enter this function. */
+ if (!evsel__is_retire_lat(evsel))
+ return -1;
+
+ /*
+ * Need to stop the forked record to ensure get sampled data from the
+ * PIPE to process and get non-zero retire_lat value for hybrid.
+ */
+ tpebs_stop();
+ count = perf_counts(evsel->counts, cpu_map_idx, thread);
+
+ list_for_each_entry(t, &tpebs_results, nd) {
+ if (t->tpebs_name == evsel->name ||
+ (evsel->metric_id && !strcmp(t->tpebs_name, evsel->metric_id))) {
+ found = true;
+ break;
+ }
+ }
+
+ /* Set ena and run to non-zero */
+ count->ena = count->run = 1;
+ count->lost = 0;
+
+ if (!found) {
+ /*
+ * Set default value or 0 when retire_latency for this event is
+ * not found from sampling data (record_tpebs not set or 0
+ * sample recorded).
+ */
+ count->val = 0;
+ return 0;
+ }
+
+ /*
+ * Only set retire_latency value to the first CPU and thread.
+ */
+ if (cpu_map_idx == 0 && thread == 0)
+ val = rint(t->val);
+ else
+ val = 0;
+
+ count->val = val;
+ return 0;
+}
+
+static void tpebs_retire_lat__delete(struct tpebs_retire_lat *r)
+{
+ zfree(&r->name);
+ free(r);
+}
+
+
+/*
+ * tpebs_delete - delete tpebs related data and stop the created thread and
+ * process by calling tpebs_stop().
+ *
+ * This function is called from evlist_delete() and also from builtin-stat
+ * stat_handle_error(). If tpebs_start() is called from places other then perf
+ * stat, need to ensure tpebs_delete() is also called to safely free mem and
+ * close the data read thread and the forked perf record process.
+ *
+ * This function is also called in evsel__close() to be symmetric with
+ * tpebs_start() being called in evsel__open(). We will update this call site
+ * when move tpebs_start() to evlist level.
+ */
+void tpebs_delete(void)
+{
+ struct tpebs_retire_lat *r, *rtmp;
+
+ if (tpebs_pid == -1)
+ return;
+
+ tpebs_stop();
+
+ list_for_each_entry_safe(r, rtmp, &tpebs_results, nd) {
+ list_del_init(&r->nd);
+ tpebs_retire_lat__delete(r);
+ }
+
+ if (tpebs_cmd) {
+ free(tpebs_cmd);
+ tpebs_cmd = NULL;
+ }
+}
diff --git a/tools/perf/util/intel-tpebs.h b/tools/perf/util/intel-tpebs.h
new file mode 100644
index 000000000000..766b3fbd79f1
--- /dev/null
+++ b/tools/perf/util/intel-tpebs.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * intel_tpebs.h: Intel TEPBS support
+ */
+#ifndef INCLUDE__PERF_INTEL_TPEBS_H__
+#define INCLUDE__PERF_INTEL_TPEBS_H__
+
+#include "stat.h"
+#include "evsel.h"
+
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+
+extern bool tpebs_recording;
+int tpebs_start(struct evlist *evsel_list);
+void tpebs_delete(void);
+int tpebs_set_evsel(struct evsel *evsel, int cpu_map_idx, int thread);
+
+#else
+
+static inline int tpebs_start(struct evlist *evsel_list __maybe_unused)
+{
+ return 0;
+}
+
+static inline void tpebs_delete(void) {};
+
+static inline int tpebs_set_evsel(struct evsel *evsel __maybe_unused,
+ int cpu_map_idx __maybe_unused,
+ int thread __maybe_unused)
+{
+ return 0;
+}
+
+#endif
+#endif
diff --git a/tools/perf/util/jit.h b/tools/perf/util/jit.h
index fb810e1b2de7..f4037203e9ec 100644
--- a/tools/perf/util/jit.h
+++ b/tools/perf/util/jit.h
@@ -5,7 +5,8 @@
#include <data.h>
int jit_process(struct perf_session *session, struct perf_data *output,
- struct machine *machine, char *filename, pid_t pid, pid_t tid, u64 *nbytes);
+ struct machine *machine, const char *filename,
+ pid_t pid, pid_t tid, u64 *nbytes);
int jit_inject_record(const char *filename);
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index 1f657ef8975f..346513e5e9b7 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -424,7 +424,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
{
struct perf_sample sample;
union perf_event *event;
- struct perf_tool *tool = jd->session->tool;
+ const struct perf_tool *tool = jd->session->tool;
uint64_t code, addr;
uintptr_t uaddr;
char *filename;
@@ -543,7 +543,7 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr)
{
struct perf_sample sample;
union perf_event *event;
- struct perf_tool *tool = jd->session->tool;
+ const struct perf_tool *tool = jd->session->tool;
char *filename;
size_t size;
struct stat st;
@@ -710,7 +710,7 @@ jit_process_dump(struct jit_buf_desc *jd)
}
static int
-jit_inject(struct jit_buf_desc *jd, char *path)
+jit_inject(struct jit_buf_desc *jd, const char *path)
{
int ret;
@@ -737,7 +737,7 @@ jit_inject(struct jit_buf_desc *jd, char *path)
* as captured in the RECORD_MMAP record
*/
static int
-jit_detect(char *mmap_name, pid_t pid, struct nsinfo *nsi)
+jit_detect(const char *mmap_name, pid_t pid, struct nsinfo *nsi)
{
char *p;
char *end = NULL;
@@ -821,7 +821,7 @@ int
jit_process(struct perf_session *session,
struct perf_data *output,
struct machine *machine,
- char *filename,
+ const char *filename,
pid_t pid,
pid_t tid,
u64 *nbytes)
diff --git a/tools/perf/util/llvm-c-helpers.cpp b/tools/perf/util/llvm-c-helpers.cpp
new file mode 100644
index 000000000000..663bcaba2041
--- /dev/null
+++ b/tools/perf/util/llvm-c-helpers.cpp
@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Must come before the linux/compiler.h include, which defines several
+ * macros (e.g. noinline) that conflict with compiler builtins used
+ * by LLVM.
+ */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter" /* Needed for LLVM <= 15 */
+#include <llvm/DebugInfo/Symbolize/Symbolize.h>
+#include <llvm/Support/TargetSelect.h>
+#pragma GCC diagnostic pop
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <linux/compiler.h>
+extern "C" {
+#include <linux/zalloc.h>
+}
+#include "symbol_conf.h"
+#include "llvm-c-helpers.h"
+
+extern "C"
+char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name);
+
+using namespace llvm;
+using llvm::symbolize::LLVMSymbolizer;
+
+/*
+ * Allocate a static LLVMSymbolizer, which will live to the end of the program.
+ * Unlike the bfd paths, LLVMSymbolizer has its own cache, so we do not need
+ * to store anything in the dso struct.
+ */
+static LLVMSymbolizer *get_symbolizer()
+{
+ static LLVMSymbolizer *instance = nullptr;
+ if (instance == nullptr) {
+ LLVMSymbolizer::Options opts;
+ /*
+ * LLVM sometimes demangles slightly different from the rest
+ * of the code, and this mismatch can cause new_inline_sym()
+ * to get confused and mark non-inline symbol as inlined
+ * (since the name does not properly match up with base_sym).
+ * Thus, disable the demangling and let the rest of the code
+ * handle it.
+ */
+ opts.Demangle = false;
+ instance = new LLVMSymbolizer(opts);
+ }
+ return instance;
+}
+
+/* Returns 0 on error, 1 on success. */
+static int extract_file_and_line(const DILineInfo &line_info, char **file,
+ unsigned int *line)
+{
+ if (file) {
+ if (line_info.FileName == "<invalid>") {
+ /* Match the convention of libbfd. */
+ *file = nullptr;
+ } else {
+ /* The caller expects to get something it can free(). */
+ *file = strdup(line_info.FileName.c_str());
+ if (*file == nullptr)
+ return 0;
+ }
+ }
+ if (line)
+ *line = line_info.Line;
+ return 1;
+}
+
+extern "C"
+int llvm_addr2line(const char *dso_name, u64 addr,
+ char **file, unsigned int *line,
+ bool unwind_inlines,
+ llvm_a2l_frame **inline_frames)
+{
+ LLVMSymbolizer *symbolizer = get_symbolizer();
+ object::SectionedAddress sectioned_addr = {
+ addr,
+ object::SectionedAddress::UndefSection
+ };
+
+ if (unwind_inlines) {
+ Expected<DIInliningInfo> res_or_err =
+ symbolizer->symbolizeInlinedCode(dso_name,
+ sectioned_addr);
+ if (!res_or_err)
+ return 0;
+ unsigned num_frames = res_or_err->getNumberOfFrames();
+ if (num_frames == 0)
+ return 0;
+
+ if (extract_file_and_line(res_or_err->getFrame(0),
+ file, line) == 0)
+ return 0;
+
+ *inline_frames = (llvm_a2l_frame *)calloc(
+ num_frames, sizeof(**inline_frames));
+ if (*inline_frames == nullptr)
+ return 0;
+
+ for (unsigned i = 0; i < num_frames; ++i) {
+ const DILineInfo &src = res_or_err->getFrame(i);
+
+ llvm_a2l_frame &dst = (*inline_frames)[i];
+ if (src.FileName == "<invalid>")
+ /* Match the convention of libbfd. */
+ dst.filename = nullptr;
+ else
+ dst.filename = strdup(src.FileName.c_str());
+ dst.funcname = strdup(src.FunctionName.c_str());
+ dst.line = src.Line;
+
+ if (dst.filename == nullptr ||
+ dst.funcname == nullptr) {
+ for (unsigned j = 0; j <= i; ++j) {
+ zfree(&(*inline_frames)[j].filename);
+ zfree(&(*inline_frames)[j].funcname);
+ }
+ zfree(inline_frames);
+ return 0;
+ }
+ }
+
+ return num_frames;
+ } else {
+ if (inline_frames)
+ *inline_frames = nullptr;
+
+ Expected<DILineInfo> res_or_err =
+ symbolizer->symbolizeCode(dso_name, sectioned_addr);
+ if (!res_or_err)
+ return 0;
+ return extract_file_and_line(*res_or_err, file, line);
+ }
+}
+
+static char *
+make_symbol_relative_string(struct dso *dso, const char *sym_name,
+ u64 addr, u64 base_addr)
+{
+ if (!strcmp(sym_name, "<invalid>"))
+ return NULL;
+
+ char *demangled = dso__demangle_sym(dso, 0, sym_name);
+ if (base_addr && base_addr != addr) {
+ char buf[256];
+ snprintf(buf, sizeof(buf), "%s+0x%" PRIx64,
+ demangled ? demangled : sym_name, addr - base_addr);
+ free(demangled);
+ return strdup(buf);
+ } else {
+ if (demangled)
+ return demangled;
+ else
+ return strdup(sym_name);
+ }
+}
+
+extern "C"
+char *llvm_name_for_code(struct dso *dso, const char *dso_name, u64 addr)
+{
+ LLVMSymbolizer *symbolizer = get_symbolizer();
+ object::SectionedAddress sectioned_addr = {
+ addr,
+ object::SectionedAddress::UndefSection
+ };
+ Expected<DILineInfo> res_or_err =
+ symbolizer->symbolizeCode(dso_name, sectioned_addr);
+ if (!res_or_err) {
+ return NULL;
+ }
+ return make_symbol_relative_string(
+ dso, res_or_err->FunctionName.c_str(),
+ addr, res_or_err->StartAddress ? *res_or_err->StartAddress : 0);
+}
+
+extern "C"
+char *llvm_name_for_data(struct dso *dso, const char *dso_name, u64 addr)
+{
+ LLVMSymbolizer *symbolizer = get_symbolizer();
+ object::SectionedAddress sectioned_addr = {
+ addr,
+ object::SectionedAddress::UndefSection
+ };
+ Expected<DIGlobal> res_or_err =
+ symbolizer->symbolizeData(dso_name, sectioned_addr);
+ if (!res_or_err) {
+ return NULL;
+ }
+ return make_symbol_relative_string(
+ dso, res_or_err->Name.c_str(),
+ addr, res_or_err->Start);
+}
diff --git a/tools/perf/util/llvm-c-helpers.h b/tools/perf/util/llvm-c-helpers.h
new file mode 100644
index 000000000000..d2b99637a28a
--- /dev/null
+++ b/tools/perf/util/llvm-c-helpers.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_LLVM_C_HELPERS
+#define __PERF_LLVM_C_HELPERS 1
+
+/*
+ * Helpers to call into LLVM C++ code from C, for the parts that do not have
+ * C APIs.
+ */
+
+#include <linux/compiler.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct dso;
+
+struct llvm_a2l_frame {
+ char* filename;
+ char* funcname;
+ unsigned int line;
+};
+
+/*
+ * Implement addr2line() using libLLVM. LLVM is a C++ API, and
+ * many of the linux/ headers cannot be included in a C++ compile unit,
+ * so we need to make a little bridge code here. llvm_addr2line() will
+ * convert the inline frame information from LLVM's internal structures
+ * and put them into a flat array given in inline_frames. The caller
+ * is then responsible for taking that array and convert it into perf's
+ * regular inline frame structures (which depend on e.g. struct list_head).
+ *
+ * If the address could not be resolved, or an error occurred (e.g. OOM),
+ * returns 0. Otherwise, returns the number of inline frames (which means 1
+ * if the address was not part of an inlined function). If unwind_inlines
+ * is set and the return code is nonzero, inline_frames will be set to
+ * a newly allocated array with that length. The caller is then responsible
+ * for freeing both the strings and the array itself.
+ */
+int llvm_addr2line(const char* dso_name,
+ u64 addr,
+ char** file,
+ unsigned int* line,
+ bool unwind_inlines,
+ struct llvm_a2l_frame** inline_frames);
+
+/*
+ * Simple symbolizers for addresses; will convert something like
+ * 0x12345 to "func+0x123". Will return NULL if no symbol was found.
+ *
+ * The returned value must be freed by the caller, with free().
+ */
+char *llvm_name_for_code(struct dso *dso, const char *dso_name, u64 addr);
+char *llvm_name_for_data(struct dso *dso, const char *dso_name, u64 addr);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __PERF_LLVM_C_HELPERS */
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 8477edefc299..fad227b625d1 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -642,8 +642,9 @@ int machine__process_lost_event(struct machine *machine __maybe_unused,
int machine__process_lost_samples_event(struct machine *machine __maybe_unused,
union perf_event *event, struct perf_sample *sample)
{
- dump_printf(": id:%" PRIu64 ": lost samples :%" PRI_lu64 "\n",
- sample->id, event->lost_samples.lost);
+ dump_printf(": id:%" PRIu64 ": lost samples :%" PRI_lu64 "%s\n",
+ sample->id, event->lost_samples.lost,
+ event->header.misc & PERF_RECORD_MISC_LOST_SAMPLES_BPF ? " (BPF)" : "");
return 0;
}
@@ -2059,7 +2060,8 @@ static int add_callchain_ip(struct thread *thread,
bool branch,
struct branch_flags *flags,
struct iterations *iter,
- u64 branch_from)
+ u64 branch_from,
+ bool symbols)
{
struct map_symbol ms = {};
struct addr_location al;
@@ -2098,7 +2100,8 @@ static int add_callchain_ip(struct thread *thread,
}
goto out;
}
- thread__find_symbol(thread, *cpumode, ip, &al);
+ if (symbols)
+ thread__find_symbol(thread, *cpumode, ip, &al);
}
if (al.sym != NULL) {
@@ -2141,6 +2144,7 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
unsigned int i;
const struct branch_stack *bs = sample->branch_stack;
struct branch_entry *entries = perf_sample__branch_entries(sample);
+ u64 *branch_stack_cntr = sample->branch_stack_cntr;
struct branch_info *bi = calloc(bs->nr, sizeof(struct branch_info));
if (!bi)
@@ -2150,6 +2154,8 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
ip__resolve_ams(al->thread, &bi[i].to, entries[i].to);
ip__resolve_ams(al->thread, &bi[i].from, entries[i].from);
bi[i].flags = entries[i].flags;
+ if (branch_stack_cntr)
+ bi[i].branch_stack_cntr = branch_stack_cntr[i];
}
return bi;
}
@@ -2224,7 +2230,8 @@ static int lbr_callchain_add_kernel_ip(struct thread *thread,
struct symbol **parent,
struct addr_location *root_al,
u64 branch_from,
- bool callee, int end)
+ bool callee, int end,
+ bool symbols)
{
struct ip_callchain *chain = sample->callchain;
u8 cpumode = PERF_RECORD_MISC_USER;
@@ -2234,7 +2241,8 @@ static int lbr_callchain_add_kernel_ip(struct thread *thread,
for (i = 0; i < end + 1; i++) {
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, chain->ips[i],
- false, NULL, NULL, branch_from);
+ false, NULL, NULL, branch_from,
+ symbols);
if (err)
return err;
}
@@ -2244,7 +2252,8 @@ static int lbr_callchain_add_kernel_ip(struct thread *thread,
for (i = end; i >= 0; i--) {
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, chain->ips[i],
- false, NULL, NULL, branch_from);
+ false, NULL, NULL, branch_from,
+ symbols);
if (err)
return err;
}
@@ -2270,8 +2279,12 @@ static void save_lbr_cursor_node(struct thread *thread,
cursor->curr = cursor->first;
else
cursor->curr = cursor->curr->next;
+
+ map_symbol__exit(&lbr_stitch->prev_lbr_cursor[idx].ms);
memcpy(&lbr_stitch->prev_lbr_cursor[idx], cursor->curr,
sizeof(struct callchain_cursor_node));
+ lbr_stitch->prev_lbr_cursor[idx].ms.maps = maps__get(cursor->curr->ms.maps);
+ lbr_stitch->prev_lbr_cursor[idx].ms.map = map__get(cursor->curr->ms.map);
lbr_stitch->prev_lbr_cursor[idx].valid = true;
cursor->pos++;
@@ -2283,7 +2296,8 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread,
struct symbol **parent,
struct addr_location *root_al,
u64 *branch_from,
- bool callee)
+ bool callee,
+ bool symbols)
{
struct branch_stack *lbr_stack = sample->branch_stack;
struct branch_entry *entries = perf_sample__branch_entries(sample);
@@ -2316,7 +2330,7 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread,
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
true, flags, NULL,
- *branch_from);
+ *branch_from, symbols);
if (err)
return err;
@@ -2341,7 +2355,7 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread,
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
true, flags, NULL,
- *branch_from);
+ *branch_from, symbols);
if (err)
return err;
save_lbr_cursor_node(thread, cursor, i);
@@ -2356,7 +2370,7 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread,
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
true, flags, NULL,
- *branch_from);
+ *branch_from, symbols);
if (err)
return err;
save_lbr_cursor_node(thread, cursor, i);
@@ -2370,7 +2384,7 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread,
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
true, flags, NULL,
- *branch_from);
+ *branch_from, symbols);
if (err)
return err;
}
@@ -2482,6 +2496,9 @@ static bool has_stitched_lbr(struct thread *thread,
memcpy(&stitch_node->cursor, &lbr_stitch->prev_lbr_cursor[i],
sizeof(struct callchain_cursor_node));
+ stitch_node->cursor.ms.maps = maps__get(lbr_stitch->prev_lbr_cursor[i].ms.maps);
+ stitch_node->cursor.ms.map = map__get(lbr_stitch->prev_lbr_cursor[i].ms.map);
+
if (callee)
list_add(&stitch_node->node, &lbr_stitch->lists);
else
@@ -2505,6 +2522,8 @@ static bool alloc_lbr_stitch(struct thread *thread, unsigned int max_lbr)
if (!thread__lbr_stitch(thread)->prev_lbr_cursor)
goto free_lbr_stitch;
+ thread__lbr_stitch(thread)->prev_lbr_cursor_size = max_lbr + 1;
+
INIT_LIST_HEAD(&thread__lbr_stitch(thread)->lists);
INIT_LIST_HEAD(&thread__lbr_stitch(thread)->free_lists);
@@ -2532,7 +2551,8 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
struct symbol **parent,
struct addr_location *root_al,
int max_stack,
- unsigned int max_lbr)
+ unsigned int max_lbr,
+ bool symbols)
{
bool callee = (callchain_param.order == ORDER_CALLEE);
struct ip_callchain *chain = sample->callchain;
@@ -2560,8 +2580,12 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
max_lbr, callee);
if (!stitched_lbr && !list_empty(&lbr_stitch->lists)) {
- list_replace_init(&lbr_stitch->lists,
- &lbr_stitch->free_lists);
+ struct stitch_list *stitch_node;
+
+ list_for_each_entry(stitch_node, &lbr_stitch->lists, node)
+ map_symbol__exit(&stitch_node->cursor.ms);
+
+ list_splice_init(&lbr_stitch->lists, &lbr_stitch->free_lists);
}
memcpy(&lbr_stitch->prev_sample, sample, sizeof(*sample));
}
@@ -2570,12 +2594,12 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
/* Add kernel ip */
err = lbr_callchain_add_kernel_ip(thread, cursor, sample,
parent, root_al, branch_from,
- true, i);
+ true, i, symbols);
if (err)
goto error;
err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent,
- root_al, &branch_from, true);
+ root_al, &branch_from, true, symbols);
if (err)
goto error;
@@ -2592,14 +2616,14 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
goto error;
}
err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent,
- root_al, &branch_from, false);
+ root_al, &branch_from, false, symbols);
if (err)
goto error;
/* Add kernel ip */
err = lbr_callchain_add_kernel_ip(thread, cursor, sample,
parent, root_al, branch_from,
- false, i);
+ false, i, symbols);
if (err)
goto error;
}
@@ -2613,7 +2637,7 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
struct callchain_cursor *cursor,
struct symbol **parent,
struct addr_location *root_al,
- u8 *cpumode, int ent)
+ u8 *cpumode, int ent, bool symbols)
{
int err = 0;
@@ -2623,7 +2647,7 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
if (ip >= PERF_CONTEXT_MAX) {
err = add_callchain_ip(thread, cursor, parent,
root_al, cpumode, ip,
- false, NULL, NULL, 0);
+ false, NULL, NULL, 0, symbols);
break;
}
}
@@ -2645,7 +2669,8 @@ static int thread__resolve_callchain_sample(struct thread *thread,
struct perf_sample *sample,
struct symbol **parent,
struct addr_location *root_al,
- int max_stack)
+ int max_stack,
+ bool symbols)
{
struct branch_stack *branch = sample->branch_stack;
struct branch_entry *entries = perf_sample__branch_entries(sample);
@@ -2665,7 +2690,8 @@ static int thread__resolve_callchain_sample(struct thread *thread,
err = resolve_lbr_callchain_sample(thread, cursor, sample, parent,
root_al, max_stack,
- !env ? 0 : env->max_branches);
+ !env ? 0 : env->max_branches,
+ symbols);
if (err)
return (err < 0) ? err : 0;
}
@@ -2730,13 +2756,14 @@ static int thread__resolve_callchain_sample(struct thread *thread,
root_al,
NULL, be[i].to,
true, &be[i].flags,
- NULL, be[i].from);
+ NULL, be[i].from, symbols);
- if (!err)
+ if (!err) {
err = add_callchain_ip(thread, cursor, parent, root_al,
NULL, be[i].from,
true, &be[i].flags,
- &iter[i], 0);
+ &iter[i], 0, symbols);
+ }
if (err == -EINVAL)
break;
if (err)
@@ -2752,7 +2779,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
check_calls:
if (chain && callchain_param.order != ORDER_CALLEE) {
err = find_prev_cpumode(chain, thread, cursor, parent, root_al,
- &cpumode, chain->nr - first_call);
+ &cpumode, chain->nr - first_call, symbols);
if (err)
return (err < 0) ? err : 0;
}
@@ -2774,7 +2801,7 @@ check_calls:
++nr_entries;
else if (callchain_param.order != ORDER_CALLEE) {
err = find_prev_cpumode(chain, thread, cursor, parent,
- root_al, &cpumode, j);
+ root_al, &cpumode, j, symbols);
if (err)
return (err < 0) ? err : 0;
continue;
@@ -2801,8 +2828,8 @@ check_calls:
if (leaf_frame_caller && leaf_frame_caller != ip) {
err = add_callchain_ip(thread, cursor, parent,
- root_al, &cpumode, leaf_frame_caller,
- false, NULL, NULL, 0);
+ root_al, &cpumode, leaf_frame_caller,
+ false, NULL, NULL, 0, symbols);
if (err)
return (err < 0) ? err : 0;
}
@@ -2810,7 +2837,7 @@ check_calls:
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
- false, NULL, NULL, 0);
+ false, NULL, NULL, 0, symbols);
if (err)
return (err < 0) ? err : 0;
@@ -2890,7 +2917,7 @@ static int thread__resolve_callchain_unwind(struct thread *thread,
struct callchain_cursor *cursor,
struct evsel *evsel,
struct perf_sample *sample,
- int max_stack)
+ int max_stack, bool symbols)
{
/* Can we do dwarf post unwind? */
if (!((evsel->core.attr.sample_type & PERF_SAMPLE_REGS_USER) &&
@@ -2902,17 +2929,21 @@ static int thread__resolve_callchain_unwind(struct thread *thread,
(!sample->user_stack.size))
return 0;
+ if (!symbols)
+ pr_debug("Not resolving symbols with an unwinder isn't currently supported\n");
+
return unwind__get_entries(unwind_entry, cursor,
thread, sample, max_stack, false);
}
-int thread__resolve_callchain(struct thread *thread,
- struct callchain_cursor *cursor,
- struct evsel *evsel,
- struct perf_sample *sample,
- struct symbol **parent,
- struct addr_location *root_al,
- int max_stack)
+int __thread__resolve_callchain(struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct evsel *evsel,
+ struct perf_sample *sample,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ int max_stack,
+ bool symbols)
{
int ret = 0;
@@ -2925,22 +2956,22 @@ int thread__resolve_callchain(struct thread *thread,
ret = thread__resolve_callchain_sample(thread, cursor,
evsel, sample,
parent, root_al,
- max_stack);
+ max_stack, symbols);
if (ret)
return ret;
ret = thread__resolve_callchain_unwind(thread, cursor,
evsel, sample,
- max_stack);
+ max_stack, symbols);
} else {
ret = thread__resolve_callchain_unwind(thread, cursor,
evsel, sample,
- max_stack);
+ max_stack, symbols);
if (ret)
return ret;
ret = thread__resolve_callchain_sample(thread, cursor,
evsel, sample,
parent, root_al,
- max_stack);
+ max_stack, symbols);
}
return ret;
@@ -3112,7 +3143,8 @@ out:
return addr_cpumode;
}
-struct dso *machine__findnew_dso_id(struct machine *machine, const char *filename, struct dso_id *id)
+struct dso *machine__findnew_dso_id(struct machine *machine, const char *filename,
+ const struct dso_id *id)
{
return dsos__findnew_id(&machine->dsos, filename, id);
}
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 82a47bac8023..2e5a4cb342d8 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -178,13 +178,32 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample,
struct callchain_cursor;
-int thread__resolve_callchain(struct thread *thread,
- struct callchain_cursor *cursor,
- struct evsel *evsel,
- struct perf_sample *sample,
- struct symbol **parent,
- struct addr_location *root_al,
- int max_stack);
+int __thread__resolve_callchain(struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct evsel *evsel,
+ struct perf_sample *sample,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ int max_stack,
+ bool symbols);
+
+static inline int thread__resolve_callchain(struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct evsel *evsel,
+ struct perf_sample *sample,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ int max_stack)
+{
+ return __thread__resolve_callchain(thread,
+ cursor,
+ evsel,
+ sample,
+ parent,
+ root_al,
+ max_stack,
+ /*symbols=*/true);
+}
/*
* Default guest kernel is defined by parameter --guestkallsyms
@@ -207,7 +226,8 @@ int machine__nr_cpus_avail(struct machine *machine);
struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
-struct dso *machine__findnew_dso_id(struct machine *machine, const char *filename, struct dso_id *id);
+struct dso *machine__findnew_dso_id(struct machine *machine, const char *filename,
+ const struct dso_id *id);
struct dso *machine__findnew_dso(struct machine *machine, const char *filename);
size_t machine__fprintf(struct machine *machine, FILE *fp);
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index e1d14936a60d..d729438b7d65 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -102,16 +102,21 @@ static inline bool replace_android_lib(const char *filename, char *newfilename)
return false;
}
-void map__init(struct map *map, u64 start, u64 end, u64 pgoff, struct dso *dso)
+static void map__init(struct map *map, u64 start, u64 end, u64 pgoff,
+ struct dso *dso, u32 prot, u32 flags)
{
map__set_start(map, start);
map__set_end(map, end);
map__set_pgoff(map, pgoff);
- map__set_reloc(map, 0);
+ assert(map__reloc(map) == 0);
map__set_dso(map, dso__get(dso));
- map__set_mapping_type(map, MAPPING_TYPE__DSO);
- map__set_erange_warned(map, false);
refcount_set(map__refcnt(map), 1);
+ RC_CHK_ACCESS(map)->prot = prot;
+ RC_CHK_ACCESS(map)->flags = flags;
+ map__set_mapping_type(map, MAPPING_TYPE__DSO);
+ assert(map__erange_warned(map) == false);
+ assert(map__priv(map) == false);
+ assert(map__hit(map) == false);
}
struct map *map__new(struct machine *machine, u64 start, u64 len,
@@ -124,7 +129,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
struct nsinfo *nsi = NULL;
struct nsinfo *nnsi;
- map = malloc(sizeof(*map));
+ map = zalloc(sizeof(*map));
if (ADD_RC_CHK(result, map)) {
char newfilename[PATH_MAX];
struct dso *dso, *header_bid_dso;
@@ -134,8 +139,6 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
anon = is_anon_memory(filename) || flags & MAP_HUGETLB;
vdso = is_vdso_map(filename);
no_dso = is_no_dso_memory(filename);
- map->prot = prot;
- map->flags = flags;
nsi = nsinfo__get(thread__nsinfo(thread));
if ((anon || no_dso) && nsi && (prot & PROT_EXEC)) {
@@ -169,7 +172,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
goto out_delete;
assert(!dso__kernel(dso));
- map__init(result, start, start + len, pgoff, dso);
+ map__init(result, start, start + len, pgoff, dso, prot, flags);
if (anon || no_dso) {
map->mapping_type = MAPPING_TYPE__IDENTITY;
@@ -223,10 +226,8 @@ struct map *map__new2(u64 start, struct dso *dso)
map = calloc(1, sizeof(*map) + (dso__kernel(dso) ? sizeof(struct kmap) : 0));
if (ADD_RC_CHK(result, map)) {
- /*
- * ->end will be filled after we load all the symbols
- */
- map__init(result, start, 0, 0, dso);
+ /* ->end will be filled after we load all the symbols. */
+ map__init(result, start, /*end=*/0, /*pgoff=*/0, dso, /*prot=*/0, /*flags=*/0);
}
return result;
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 65e2609fa1b1..4262f5a143be 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -35,6 +35,7 @@ DECLARE_RC_STRUCT(map) {
enum mapping_type mapping_type:8;
bool erange_warned;
bool priv;
+ bool hit;
};
struct kmap;
@@ -83,6 +84,11 @@ static inline bool map__priv(const struct map *map)
return RC_CHK_ACCESS(map)->priv;
}
+static inline bool map__hit(const struct map *map)
+{
+ return RC_CHK_ACCESS(map)->hit;
+}
+
static inline refcount_t *map__refcnt(struct map *map)
{
return &RC_CHK_ACCESS(map)->refcnt;
@@ -166,9 +172,6 @@ struct thread;
#define map__for_each_symbol_by_name(map, sym_name, pos, idx) \
__map__for_each_symbol_by_name(map, sym_name, (pos), idx)
-void map__init(struct map *map,
- u64 start, u64 end, u64 pgoff, struct dso *dso);
-
struct dso_id;
struct build_id;
@@ -285,14 +288,19 @@ static inline void map__set_reloc(struct map *map, u64 reloc)
RC_CHK_ACCESS(map)->reloc = reloc;
}
-static inline void map__set_priv(struct map *map, int priv)
+static inline void map__set_priv(struct map *map)
+{
+ RC_CHK_ACCESS(map)->priv = true;
+}
+
+static inline void map__set_hit(struct map *map)
{
- RC_CHK_ACCESS(map)->priv = priv;
+ RC_CHK_ACCESS(map)->hit = true;
}
-static inline void map__set_erange_warned(struct map *map, bool erange_warned)
+static inline void map__set_erange_warned(struct map *map)
{
- RC_CHK_ACCESS(map)->erange_warned = erange_warned;
+ RC_CHK_ACCESS(map)->erange_warned = true;
}
static inline void map__set_dso(struct map *map, struct dso *dso)
diff --git a/tools/perf/util/map_symbol.c b/tools/perf/util/map_symbol.c
index bef5079f2403..6ad2960bc289 100644
--- a/tools/perf/util/map_symbol.c
+++ b/tools/perf/util/map_symbol.c
@@ -13,3 +13,21 @@ void addr_map_symbol__exit(struct addr_map_symbol *ams)
{
map_symbol__exit(&ams->ms);
}
+
+void map_symbol__copy(struct map_symbol *dst, struct map_symbol *src)
+{
+ dst->maps = maps__get(src->maps);
+ dst->map = map__get(src->map);
+ dst->sym = src->sym;
+}
+
+void addr_map_symbol__copy(struct addr_map_symbol *dst, struct addr_map_symbol *src)
+{
+ map_symbol__copy(&dst->ms, &src->ms);
+
+ dst->addr = src->addr;
+ dst->al_addr = src->al_addr;
+ dst->al_level = src->al_level;
+ dst->phys_addr = src->phys_addr;
+ dst->data_page_size = src->data_page_size;
+}
diff --git a/tools/perf/util/map_symbol.h b/tools/perf/util/map_symbol.h
index 72d5ed938ed6..e370bb32ed47 100644
--- a/tools/perf/util/map_symbol.h
+++ b/tools/perf/util/map_symbol.h
@@ -26,4 +26,7 @@ struct addr_map_symbol {
void map_symbol__exit(struct map_symbol *ms);
void addr_map_symbol__exit(struct addr_map_symbol *ams);
+void map_symbol__copy(struct map_symbol *dst, struct map_symbol *src);
+void addr_map_symbol__copy(struct addr_map_symbol *dst, struct addr_map_symbol *src);
+
#endif // __PERF_MAP_SYMBOL
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index be048bd02f36..051feb93ed8d 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -29,6 +29,8 @@ struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
};
#undef E
+bool perf_mem_record[PERF_MEM_EVENTS__MAX] = { 0 };
+
static char mem_loads_name[100];
static char mem_stores_name[100];
@@ -163,7 +165,7 @@ int perf_pmu__mem_events_parse(struct perf_pmu *pmu, const char *str)
continue;
if (strstr(e->tag, tok))
- e->record = found = true;
+ perf_mem_record[j] = found = true;
}
tok = strtok_r(NULL, ",", &saveptr);
@@ -192,7 +194,7 @@ static bool perf_pmu__mem_events_supported(const char *mnt, struct perf_pmu *pmu
return !stat(path, &st);
}
-int perf_pmu__mem_events_init(struct perf_pmu *pmu)
+static int __perf_pmu__mem_events_init(struct perf_pmu *pmu)
{
const char *mnt = sysfs__mount();
bool found = false;
@@ -219,6 +221,18 @@ int perf_pmu__mem_events_init(struct perf_pmu *pmu)
return found ? 0 : -ENOENT;
}
+int perf_pmu__mem_events_init(void)
+{
+ struct perf_pmu *pmu = NULL;
+
+ while ((pmu = perf_pmus__scan_mem(pmu)) != NULL) {
+ if (__perf_pmu__mem_events_init(pmu))
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
void perf_pmu__mem_events_list(struct perf_pmu *pmu)
{
int j;
@@ -249,7 +263,7 @@ int perf_mem_events__record_args(const char **rec_argv, int *argv_nr)
for (int j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
e = perf_pmu__mem_events_ptr(pmu, j);
- if (!e->record)
+ if (!perf_mem_record[j])
continue;
if (!e->supported) {
diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
index ca31014d7934..8dc27db9fd52 100644
--- a/tools/perf/util/mem-events.h
+++ b/tools/perf/util/mem-events.h
@@ -6,7 +6,6 @@
#include <linux/types.h>
struct perf_mem_event {
- bool record;
bool supported;
bool ldlat;
u32 aux_event;
@@ -28,9 +27,10 @@ struct perf_pmu;
extern unsigned int perf_mem_events__loads_ldlat;
extern struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX];
+extern bool perf_mem_record[PERF_MEM_EVENTS__MAX];
int perf_pmu__mem_events_parse(struct perf_pmu *pmu, const char *str);
-int perf_pmu__mem_events_init(struct perf_pmu *pmu);
+int perf_pmu__mem_events_init(void);
struct perf_mem_event *perf_pmu__mem_events_ptr(struct perf_pmu *pmu, int i);
struct perf_pmu *perf_mem_events_find_pmu(void);
diff --git a/tools/perf/util/mem-info.c b/tools/perf/util/mem-info.c
index 27d67721a695..d3efa9c139f2 100644
--- a/tools/perf/util/mem-info.c
+++ b/tools/perf/util/mem-info.c
@@ -33,3 +33,16 @@ struct mem_info *mem_info__new(void)
return result;
}
+
+struct mem_info *mem_info__clone(struct mem_info *mi)
+{
+ struct mem_info *result = mem_info__new();
+
+ if (result) {
+ addr_map_symbol__copy(mem_info__iaddr(result), mem_info__iaddr(mi));
+ addr_map_symbol__copy(mem_info__daddr(result), mem_info__daddr(mi));
+ mem_info__data_src(result)->val = mem_info__data_src(mi)->val;
+ }
+
+ return result;
+}
diff --git a/tools/perf/util/mem-info.h b/tools/perf/util/mem-info.h
index 0f68e29f311b..df75e94ed3d0 100644
--- a/tools/perf/util/mem-info.h
+++ b/tools/perf/util/mem-info.h
@@ -15,6 +15,7 @@ DECLARE_RC_STRUCT(mem_info) {
};
struct mem_info *mem_info__new(void);
+struct mem_info *mem_info__clone(struct mem_info *mi);
struct mem_info *mem_info__get(struct mem_info *mi);
void mem_info__put(struct mem_info *mi);
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 69f6a46402c3..4dff3e925a47 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -1436,7 +1436,7 @@ err_out:
* parse_ids - Build the event string for the ids and parse them creating an
* evlist. The encoded metric_ids are decoded.
* @metric_no_merge: is metric sharing explicitly disabled.
- * @fake_pmu: used when testing metrics not supported by the current CPU.
+ * @fake_pmu: use a fake PMU when testing metrics not supported by the current CPU.
* @ids: the event identifiers parsed from a metric.
* @modifier: any modifiers added to the events.
* @group_events: should events be placed in a weak group.
@@ -1444,7 +1444,7 @@ err_out:
* the overall list of metrics.
* @out_evlist: the created list of events.
*/
-static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu,
+static int parse_ids(bool metric_no_merge, bool fake_pmu,
struct expr_parse_ctx *ids, const char *modifier,
bool group_events, const bool tool_events[PERF_TOOL_MAX],
struct evlist **out_evlist)
@@ -1528,7 +1528,7 @@ static int parse_groups(struct evlist *perf_evlist,
bool metric_no_threshold,
const char *user_requested_cpu_list,
bool system_wide,
- struct perf_pmu *fake_pmu,
+ bool fake_pmu,
struct rblist *metric_events_list,
const struct pmu_metrics_table *table)
{
@@ -1703,7 +1703,7 @@ int metricgroup__parse_groups(struct evlist *perf_evlist,
return parse_groups(perf_evlist, pmu, str, metric_no_group, metric_no_merge,
metric_no_threshold, user_requested_cpu_list, system_wide,
- /*fake_pmu=*/NULL, metric_events, table);
+ /*fake_pmu=*/false, metric_events, table);
}
int metricgroup__parse_groups_test(struct evlist *evlist,
@@ -1717,7 +1717,7 @@ int metricgroup__parse_groups_test(struct evlist *evlist,
/*metric_no_threshold=*/false,
/*user_requested_cpu_list=*/NULL,
/*system_wide=*/false,
- &perf_pmu__fake, metric_events, table);
+ /*fake_pmu=*/true, metric_events, table);
}
struct metricgroup__has_metric_data {
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 122ee198a86e..43b02293f1d2 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -230,9 +230,7 @@ void mmap__munmap(struct mmap *map)
{
bitmap_free(map->affinity_mask.bits);
-#ifndef PYTHON_PERF
zstd_fini(&map->zstd_data);
-#endif
perf_mmap__aio_munmap(map);
if (map->data != NULL) {
@@ -295,12 +293,10 @@ int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, struct perf_cpu
map->core.flush = mp->flush;
-#ifndef PYTHON_PERF
if (zstd_init(&map->zstd_data, mp->comp_level)) {
pr_debug2("failed to init mmap compressor, error %d\n", errno);
return -1;
}
-#endif
if (mp->comp_level && !perf_mmap__aio_enabled(map)) {
map->data = mmap(NULL, mmap__mmap_len(map), PROT_READ|PROT_WRITE,
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 321586fb5556..9a8be1e46d67 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -8,6 +8,7 @@
#include <sys/ioctl.h>
#include <sys/param.h>
#include "term.h"
+#include "env.h"
#include "evlist.h"
#include "evsel.h"
#include <subcmd/parse-options.h>
@@ -227,12 +228,12 @@ __add_event(struct list_head *list, int *idx,
bool init_attr,
const char *name, const char *metric_id, struct perf_pmu *pmu,
struct list_head *config_terms, bool auto_merge_stats,
- const char *cpu_list)
+ struct perf_cpu_map *cpu_list)
{
struct evsel *evsel;
- struct perf_cpu_map *cpus = pmu ? perf_cpu_map__get(pmu->cpus) :
- cpu_list ? perf_cpu_map__new(cpu_list) : NULL;
+ struct perf_cpu_map *cpus = perf_cpu_map__is_empty(cpu_list) && pmu ? pmu->cpus : cpu_list;
+ cpus = perf_cpu_map__get(cpus);
if (pmu)
perf_pmu__warn_invalid_formats(pmu);
@@ -305,16 +306,17 @@ static int add_event_tool(struct list_head *list, int *idx,
.type = PERF_TYPE_SOFTWARE,
.config = PERF_COUNT_SW_DUMMY,
};
- const char *cpu_list = NULL;
+ struct perf_cpu_map *cpu_list = NULL;
if (tool_event == PERF_TOOL_DURATION_TIME) {
/* Duration time is gathered globally, pretend it is only on CPU0. */
- cpu_list = "0";
+ cpu_list = perf_cpu_map__new("0");
}
evsel = __add_event(list, idx, &attr, /*init_attr=*/true, /*name=*/NULL,
/*metric_id=*/NULL, /*pmu=*/NULL,
/*config_terms=*/NULL, /*auto_merge_stats=*/false,
cpu_list);
+ perf_cpu_map__put(cpu_list);
if (!evsel)
return -ENOMEM;
evsel->tool_event = tool_event;
@@ -670,6 +672,26 @@ static int add_tracepoint_multi_sys(struct parse_events_state *parse_state,
}
#endif /* HAVE_LIBTRACEEVENT */
+size_t default_breakpoint_len(void)
+{
+#if defined(__i386__)
+ static int len;
+
+ if (len == 0) {
+ struct perf_env env = {};
+
+ perf_env__init(&env);
+ len = perf_env__kernel_is_64_bit(&env) ? sizeof(u64) : sizeof(long);
+ perf_env__exit(&env);
+ }
+ return len;
+#elif defined(__aarch64__)
+ return 4;
+#else
+ return sizeof(long);
+#endif
+}
+
static int
parse_breakpoint_type(const char *type, struct perf_event_attr *attr)
{
@@ -728,7 +750,7 @@ int parse_events_add_breakpoint(struct parse_events_state *parse_state,
/* Provide some defaults if len is not specified */
if (!len) {
if (attr.bp_type == HW_BREAKPOINT_X)
- len = sizeof(long);
+ len = default_breakpoint_len();
else
len = HW_BREAKPOINT_LEN_4;
}
@@ -1478,8 +1500,8 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state,
}
/* Look for event names in the terms and rewrite into format based terms. */
- if (!parse_state->fake_pmu && perf_pmu__check_alias(pmu, &parsed_terms,
- &info, &alias_rewrote_terms, err)) {
+ if (perf_pmu__check_alias(pmu, &parsed_terms,
+ &info, &alias_rewrote_terms, err)) {
parse_events_terms__exit(&parsed_terms);
return -EINVAL;
}
@@ -1515,8 +1537,7 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state,
return -ENOMEM;
}
- if (!parse_state->fake_pmu &&
- perf_pmu__config(pmu, &attr, &parsed_terms, parse_state->error)) {
+ if (perf_pmu__config(pmu, &attr, &parsed_terms, parse_state->error)) {
free_config_terms(&config_terms);
parse_events_terms__exit(&parsed_terms);
return -EINVAL;
@@ -1536,11 +1557,6 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state,
evsel->percore = config_term_percore(&evsel->config_terms);
- if (parse_state->fake_pmu) {
- parse_events_terms__exit(&parsed_terms);
- return 0;
- }
-
parse_events_terms__exit(&parsed_terms);
free((char *)evsel->unit);
evsel->unit = strdup(info.unit);
@@ -1616,13 +1632,13 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
}
if (parse_state->fake_pmu) {
- if (!parse_events_add_pmu(parse_state, list, parse_state->fake_pmu, &parsed_terms,
+ if (!parse_events_add_pmu(parse_state, list, perf_pmus__fake_pmu(), &parsed_terms,
/*auto_merge_stats=*/true)) {
struct strbuf sb;
strbuf_init(&sb, /*hint=*/ 0);
parse_events_terms__to_strbuf(&parsed_terms, &sb);
- pr_debug("%s -> %s/%s/\n", event_name, "fake_pmu", sb.buf);
+ pr_debug("%s -> fake/%s/\n", event_name, sb.buf);
strbuf_release(&sb);
ok++;
}
@@ -1656,11 +1672,18 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state
INIT_LIST_HEAD(*listp);
/* Attempt to add to list assuming event_or_pmu is a PMU name. */
- pmu = parse_state->fake_pmu ?: perf_pmus__find(event_or_pmu);
+ pmu = perf_pmus__find(event_or_pmu);
if (pmu && !parse_events_add_pmu(parse_state, *listp, pmu, const_parsed_terms,
/*auto_merge_stats=*/false))
return 0;
+ if (parse_state->fake_pmu) {
+ if (!parse_events_add_pmu(parse_state, *listp, perf_pmus__fake_pmu(),
+ const_parsed_terms,
+ /*auto_merge_stats=*/false))
+ return 0;
+ }
+
pmu = NULL;
/* Failed to add, try wildcard expansion of event_or_pmu as a PMU name. */
while ((pmu = perf_pmus__scan(pmu)) != NULL) {
@@ -1811,6 +1834,8 @@ static int parse_events__modifier_list(struct parse_events_state *parse_state,
evsel->weak_group = true;
if (mod.bpf)
evsel->bpf_counter = true;
+ if (mod.retire_lat)
+ evsel->retire_lat = true;
}
return 0;
}
@@ -1959,8 +1984,8 @@ static int evsel__compute_group_pmu_name(struct evsel *evsel,
}
}
}
- /* Assign the actual name taking care that the fake PMU lacks a name. */
- evsel->group_pmu_name = strdup(group_pmu_name ?: "fake");
+ /* Record computed name. */
+ evsel->group_pmu_name = strdup(group_pmu_name);
return evsel->group_pmu_name ? 0 : -ENOMEM;
}
@@ -2122,7 +2147,7 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list)
}
int __parse_events(struct evlist *evlist, const char *str, const char *pmu_filter,
- struct parse_events_error *err, struct perf_pmu *fake_pmu,
+ struct parse_events_error *err, bool fake_pmu,
bool warn_if_reordered, bool fake_tp)
{
struct parse_events_state parse_state = {
@@ -2341,7 +2366,7 @@ int parse_events_option(const struct option *opt, const char *str,
parse_events_error__init(&err);
ret = __parse_events(*args->evlistp, str, args->pmu_filter, &err,
- /*fake_pmu=*/NULL, /*warn_if_reordered=*/true,
+ /*fake_pmu=*/false, /*warn_if_reordered=*/true,
/*fake_tp=*/false);
if (ret) {
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index e13de2c8b706..10cc9c433116 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -31,14 +31,14 @@ int parse_events_option(const struct option *opt, const char *str, int unset);
int parse_events_option_new_evlist(const struct option *opt, const char *str, int unset);
__attribute__((nonnull(1, 2, 4)))
int __parse_events(struct evlist *evlist, const char *str, const char *pmu_filter,
- struct parse_events_error *error, struct perf_pmu *fake_pmu,
+ struct parse_events_error *error, bool fake_pmu,
bool warn_if_reordered, bool fake_tp);
__attribute__((nonnull(1, 2, 3)))
static inline int parse_events(struct evlist *evlist, const char *str,
struct parse_events_error *err)
{
- return __parse_events(evlist, str, /*pmu_filter=*/NULL, err, /*fake_pmu=*/NULL,
+ return __parse_events(evlist, str, /*pmu_filter=*/NULL, err, /*fake_pmu=*/false,
/*warn_if_reordered=*/true, /*fake_tp=*/false);
}
@@ -150,8 +150,8 @@ struct parse_events_state {
struct parse_events_terms *terms;
/* Start token. */
int stoken;
- /* Special fake PMU marker for testing. */
- struct perf_pmu *fake_pmu;
+ /* Use the fake PMU marker for testing. */
+ bool fake_pmu;
/* Skip actual tracepoint processing for testing. */
bool fake_tp;
/* If non-null, when wildcard matching only match the given PMU. */
@@ -203,6 +203,7 @@ struct parse_events_modifier {
bool hypervisor : 1; /* 'h' */
bool guest : 1; /* 'G' */
bool host : 1; /* 'H' */
+ bool retire_lat : 1; /* 'R' */
};
int parse_events__modifier_event(struct parse_events_state *parse_state, void *loc,
@@ -285,4 +286,6 @@ static inline bool is_sdt_event(char *str __maybe_unused)
}
#endif /* HAVE_LIBELF_SUPPORT */
+size_t default_breakpoint_len(void);
+
#endif /* __PERF_PARSE_EVENTS_H */
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 16045c383ada..5a0bcd7f166a 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -209,6 +209,7 @@ static int modifiers(struct parse_events_state *parse_state, yyscan_t scanner)
CASE('W', weak);
CASE('e', exclusive);
CASE('b', bpf);
+ CASE('R', retire_lat);
default:
return PE_ERROR;
}
@@ -250,7 +251,7 @@ drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)?
* If you add a modifier you need to update check_modifier().
* Also, the letters in modifier_event must not be in modifier_bp.
*/
-modifier_event [ukhpPGHSDIWeb]{1,15}
+modifier_event [ukhpPGHSDIWebR]{1,16}
modifier_bp [rwx]{1,3}
lc_type (L1-dcache|l1-d|l1d|L1-data|L1-icache|l1-i|l1i|L1-instruction|LLC|L2|dTLB|d-tlb|Data-TLB|iTLB|i-tlb|Instruction-TLB|branch|branches|bpu|btb|bpc|node)
lc_op_result (load|loads|read|store|stores|write|prefetch|prefetches|speculative-read|speculative-load|refs|Reference|ops|access|misses|miss)
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 986166bc7c78..61bdda01a05a 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -30,10 +30,6 @@
#include "util/evsel_config.h"
#include <regex.h>
-struct perf_pmu perf_pmu__fake = {
- .name = "fake",
-};
-
#define UNIT_MAX_LEN 31 /* max length for event unit name */
enum event_source {
@@ -367,8 +363,8 @@ error:
return -1;
}
-static int
-perf_pmu__parse_per_pkg(struct perf_pmu *pmu, struct perf_pmu_alias *alias)
+static bool perf_pmu__parse_event_source_bool(const char *pmu_name, const char *event_name,
+ const char *suffix)
{
char path[PATH_MAX];
size_t len;
@@ -376,37 +372,36 @@ perf_pmu__parse_per_pkg(struct perf_pmu *pmu, struct perf_pmu_alias *alias)
len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path));
if (!len)
- return 0;
- scnprintf(path + len, sizeof(path) - len, "%s/events/%s.per-pkg", pmu->name, alias->name);
+ return false;
+
+ scnprintf(path + len, sizeof(path) - len, "%s/events/%s.%s", pmu_name, event_name, suffix);
fd = open(path, O_RDONLY);
if (fd == -1)
- return -1;
+ return false;
- close(fd);
+#ifndef NDEBUG
+ {
+ char buf[8];
- alias->per_pkg = true;
- return 0;
+ len = read(fd, buf, sizeof(buf));
+ assert(len == 1 || len == 2);
+ assert(buf[0] == '1');
+ }
+#endif
+
+ close(fd);
+ return true;
}
-static int perf_pmu__parse_snapshot(struct perf_pmu *pmu, struct perf_pmu_alias *alias)
+static void perf_pmu__parse_per_pkg(struct perf_pmu *pmu, struct perf_pmu_alias *alias)
{
- char path[PATH_MAX];
- size_t len;
- int fd;
-
- len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path));
- if (!len)
- return 0;
- scnprintf(path + len, sizeof(path) - len, "%s/events/%s.snapshot", pmu->name, alias->name);
-
- fd = open(path, O_RDONLY);
- if (fd == -1)
- return -1;
+ alias->per_pkg = perf_pmu__parse_event_source_bool(pmu->name, alias->name, "per-pkg");
+}
- alias->snapshot = true;
- close(fd);
- return 0;
+static void perf_pmu__parse_snapshot(struct perf_pmu *pmu, struct perf_pmu_alias *alias)
+{
+ alias->snapshot = perf_pmu__parse_event_source_bool(pmu->name, alias->name, "snapshot");
}
/* Delete an alias entry. */
@@ -1173,6 +1168,11 @@ struct perf_pmu *perf_pmu__create_placeholder_core_pmu(struct list_head *core_pm
return pmu;
}
+static bool perf_pmu__is_fake(const struct perf_pmu *pmu)
+{
+ return pmu->type == PERF_PMU_TYPE_FAKE;
+}
+
void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu)
{
struct perf_pmu_format *format;
@@ -1183,7 +1183,7 @@ void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu)
pmu->formats_checked = true;
/* fake pmu doesn't have format list */
- if (pmu == &perf_pmu__fake)
+ if (perf_pmu__is_fake(pmu))
return;
list_for_each_entry(format, &pmu->format, list) {
@@ -1199,8 +1199,12 @@ void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu)
bool evsel__is_aux_event(const struct evsel *evsel)
{
- struct perf_pmu *pmu = evsel__find_pmu(evsel);
+ struct perf_pmu *pmu;
+ if (evsel->needs_auxtrace_mmap)
+ return true;
+
+ pmu = evsel__find_pmu(evsel);
return pmu && pmu->auxtrace;
}
@@ -1507,6 +1511,10 @@ int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
{
bool zero = !!pmu->perf_event_attr_init_default;
+ /* Fake PMU doesn't have proper terms so nothing to configure in attr. */
+ if (perf_pmu__is_fake(pmu))
+ return 0;
+
return perf_pmu__config_terms(pmu, attr, head_terms, zero, err);
}
@@ -1615,6 +1623,10 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_
info->scale = 0.0;
info->snapshot = false;
+ /* Fake PMU doesn't rewrite terms. */
+ if (perf_pmu__is_fake(pmu))
+ goto out;
+
list_for_each_entry_safe(term, h, &head_terms->terms, list) {
alias = pmu_find_alias(pmu, term);
if (!alias)
@@ -1637,7 +1649,7 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_
list_del_init(&term->list);
parse_events_term__delete(term);
}
-
+out:
/*
* if no unit or scale found in aliases, then
* set defaults as for evsel
@@ -1844,6 +1856,7 @@ int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus,
struct perf_pmu_alias *event;
struct pmu_event_info info = {
.pmu = pmu,
+ .event_type_desc = "Kernel PMU event",
};
int ret = 0;
struct strbuf sb;
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index b2d3fd291f02..4397c48ad569 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -36,6 +36,10 @@ struct perf_pmu_caps {
struct list_head list;
};
+enum {
+ PERF_PMU_TYPE_FAKE = 0xFFFFFFFF,
+};
+
/**
* struct perf_pmu
*/
@@ -173,9 +177,6 @@ struct perf_pmu {
struct perf_mem_event *mem_events;
};
-/** @perf_pmu__fake: A special global PMU used for testing. */
-extern struct perf_pmu perf_pmu__fake;
-
struct perf_pmu_info {
const char *unit;
double scale;
@@ -193,6 +194,7 @@ struct pmu_event_info {
const char *encoding_desc;
const char *topic;
const char *pmu_name;
+ const char *event_type_desc;
const char *str;
bool deprecated;
};
diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c
index 3fcabfd8fca1..52109af5f2f1 100644
--- a/tools/perf/util/pmus.c
+++ b/tools/perf/util/pmus.c
@@ -69,7 +69,7 @@ size_t pmu_name_len_no_suffix(const char *str)
int pmu_name_cmp(const char *lhs_pmu_name, const char *rhs_pmu_name)
{
- unsigned long lhs_num = 0, rhs_num = 0;
+ unsigned long long lhs_num = 0, rhs_num = 0;
size_t lhs_pmu_name_len = pmu_name_len_no_suffix(lhs_pmu_name);
size_t rhs_pmu_name_len = pmu_name_len_no_suffix(rhs_pmu_name);
int ret = strncmp(lhs_pmu_name, rhs_pmu_name,
@@ -79,9 +79,9 @@ int pmu_name_cmp(const char *lhs_pmu_name, const char *rhs_pmu_name)
return ret;
if (lhs_pmu_name_len + 1 < strlen(lhs_pmu_name))
- lhs_num = strtoul(&lhs_pmu_name[lhs_pmu_name_len + 1], NULL, 16);
+ lhs_num = strtoull(&lhs_pmu_name[lhs_pmu_name_len + 1], NULL, 16);
if (rhs_pmu_name_len + 1 < strlen(rhs_pmu_name))
- rhs_num = strtoul(&rhs_pmu_name[rhs_pmu_name_len + 1], NULL, 16);
+ rhs_num = strtoull(&rhs_pmu_name[rhs_pmu_name_len + 1], NULL, 16);
return lhs_num < rhs_num ? -1 : (lhs_num > rhs_num ? 1 : 0);
}
@@ -371,6 +371,7 @@ struct sevent {
const char *encoding_desc;
const char *topic;
const char *pmu_name;
+ const char *event_type_desc;
bool deprecated;
};
@@ -444,6 +445,7 @@ static int perf_pmus__print_pmu_events__callback(void *vstate,
COPY_STR(encoding_desc);
COPY_STR(topic);
COPY_STR(pmu_name);
+ COPY_STR(event_type_desc);
#undef COPY_STR
s->deprecated = info->deprecated;
state->index++;
@@ -498,7 +500,7 @@ void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *p
aliases[j].alias,
aliases[j].scale_unit,
aliases[j].deprecated,
- "Kernel PMU event",
+ aliases[j].event_type_desc,
aliases[j].desc,
aliases[j].long_desc,
aliases[j].encoding_desc);
@@ -511,6 +513,7 @@ free:
zfree(&aliases[j].encoding_desc);
zfree(&aliases[j].topic);
zfree(&aliases[j].pmu_name);
+ zfree(&aliases[j].event_type_desc);
}
if (printed && pager_in_use())
printf("\n");
@@ -720,3 +723,14 @@ struct perf_pmu *perf_pmus__add_test_pmu(int test_sysfs_dirfd, const char *name)
*/
return perf_pmu__lookup(&other_pmus, test_sysfs_dirfd, name, /*eager_load=*/true);
}
+
+struct perf_pmu *perf_pmus__fake_pmu(void)
+{
+ static struct perf_pmu fake = {
+ .name = "fake",
+ .type = PERF_PMU_TYPE_FAKE,
+ .format = LIST_HEAD_INIT(fake.format),
+ };
+
+ return &fake;
+}
diff --git a/tools/perf/util/pmus.h b/tools/perf/util/pmus.h
index bdbff02324bb..e1742b56eec7 100644
--- a/tools/perf/util/pmus.h
+++ b/tools/perf/util/pmus.h
@@ -30,5 +30,6 @@ bool perf_pmus__supports_extended_type(void);
char *perf_pmus__default_pmu_name(void);
struct perf_pmu *perf_pmus__add_test_pmu(int test_sysfs_dirfd, const char *name);
+struct perf_pmu *perf_pmus__fake_pmu(void);
#endif /* __PMUS_H */
diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c
index 3f38c27f0157..81e0135cddf0 100644
--- a/tools/perf/util/print-events.c
+++ b/tools/perf/util/print-events.c
@@ -68,11 +68,12 @@ void print_tracepoint_events(const struct print_callbacks *print_cb __maybe_unus
struct dirent **sys_namelist = NULL;
int sys_items;
- put_tracing_file(events_path);
if (events_fd < 0) {
pr_err("Error: failed to open tracing events directory\n");
+ pr_err("%s: %s\n", events_path, strerror(errno));
return;
}
+ put_tracing_file(events_path);
sys_items = tracing_events__scandir_alphasort(&sys_namelist);
diff --git a/tools/perf/util/print_insn.c b/tools/perf/util/print_insn.c
index a950e9157d2d..a33a7726422d 100644
--- a/tools/perf/util/print_insn.c
+++ b/tools/perf/util/print_insn.c
@@ -32,7 +32,9 @@ size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp)
#ifdef HAVE_LIBCAPSTONE_SUPPORT
#include <capstone/capstone.h>
-static int capstone_init(struct machine *machine, csh *cs_handle, bool is64)
+int capstone_init(struct machine *machine, csh *cs_handle, bool is64, bool disassembler_style);
+
+int capstone_init(struct machine *machine, csh *cs_handle, bool is64, bool disassembler_style)
{
cs_arch arch;
cs_mode mode;
@@ -62,7 +64,13 @@ static int capstone_init(struct machine *machine, csh *cs_handle, bool is64)
}
if (machine__normalized_is(machine, "x86")) {
- cs_option(*cs_handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
+ /*
+ * In case of using capstone_init while symbol__disassemble
+ * setting CS_OPT_SYNTAX_ATT depends if disassembler_style opts
+ * is set via annotation args
+ */
+ if (disassembler_style)
+ cs_option(*cs_handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
/*
* Resolving address operands to symbols is implemented
* on x86 by investigating instruction details.
@@ -122,7 +130,7 @@ ssize_t fprintf_insn_asm(struct machine *machine, struct thread *thread, u8 cpum
int ret;
/* TODO: Try to initiate capstone only once but need a proper place. */
- ret = capstone_init(machine, &cs_handle, is64bit);
+ ret = capstone_init(machine, &cs_handle, is64bit, true);
if (ret < 0)
return ret;
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 3be882b2e845..31a223eaf8e6 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -20,6 +20,7 @@
#include "util/env.h"
#include "util/kvm-stat.h"
#include "util/kwork.h"
+#include "util/sample.h"
#include "util/lock-contention.h"
#include <internal/lib.h>
#include "../builtin.h"
diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c
index 6fe478b0b61b..73846b73d0cf 100644
--- a/tools/perf/util/s390-cpumsf.c
+++ b/tools/perf/util/s390-cpumsf.c
@@ -912,7 +912,7 @@ static int
s390_cpumsf_process_event(struct perf_session *session,
union perf_event *event,
struct perf_sample *sample,
- struct perf_tool *tool)
+ const struct perf_tool *tool)
{
struct s390_cpumsf *sf = container_of(session->auxtrace,
struct s390_cpumsf,
@@ -952,15 +952,10 @@ s390_cpumsf_process_event(struct perf_session *session,
return err;
}
-struct s390_cpumsf_synth {
- struct perf_tool cpumsf_tool;
- struct perf_session *session;
-};
-
static int
s390_cpumsf_process_auxtrace_event(struct perf_session *session,
union perf_event *event __maybe_unused,
- struct perf_tool *tool __maybe_unused)
+ const struct perf_tool *tool __maybe_unused)
{
struct s390_cpumsf *sf = container_of(session->auxtrace,
struct s390_cpumsf,
@@ -1003,7 +998,7 @@ static void s390_cpumsf_free_events(struct perf_session *session __maybe_unused)
}
static int s390_cpumsf_flush(struct perf_session *session __maybe_unused,
- struct perf_tool *tool __maybe_unused)
+ const struct perf_tool *tool __maybe_unused)
{
return 0;
}
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index fb00f3ad6815..d7183134b669 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -762,6 +762,8 @@ static void regs_map(struct regs_dump *regs, uint64_t mask, const char *arch, ch
}
}
+#define MAX_REG_SIZE 128
+
static int set_regs_in_dict(PyObject *dict,
struct perf_sample *sample,
struct evsel *evsel)
@@ -769,14 +771,7 @@ static int set_regs_in_dict(PyObject *dict,
struct perf_event_attr *attr = &evsel->core.attr;
const char *arch = perf_env__arch(evsel__env(evsel));
- /*
- * Here value 28 is a constant size which can be used to print
- * one register value and its corresponds to:
- * 16 chars is to specify 64 bit register in hexadecimal.
- * 2 chars is for appending "0x" to the hexadecimal value and
- * 10 chars is for register name.
- */
- int size = __sw_hweight64(attr->sample_regs_intr) * 28;
+ int size = (__sw_hweight64(attr->sample_regs_intr) * MAX_REG_SIZE) + 1;
char *bf = malloc(size);
if (!bf)
return -1;
@@ -888,6 +883,8 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample,
set_sample_read_in_dict(dict_sample, sample, evsel);
pydict_set_item_string_decref(dict_sample, "weight",
PyLong_FromUnsignedLongLong(sample->weight));
+ pydict_set_item_string_decref(dict_sample, "ins_lat",
+ PyLong_FromUnsignedLong(sample->ins_lat));
pydict_set_item_string_decref(dict_sample, "transaction",
PyLong_FromUnsignedLongLong(sample->transaction));
set_sample_datasrc_in_dict(dict_sample, sample);
@@ -1317,7 +1314,7 @@ static void python_export_sample_table(struct db_export *dbe,
struct tables *tables = container_of(dbe, struct tables, dbe);
PyObject *t;
- t = tuple_new(27);
+ t = tuple_new(28);
tuple_set_d64(t, 0, es->db_id);
tuple_set_d64(t, 1, es->evsel->db_id);
@@ -1346,6 +1343,7 @@ static void python_export_sample_table(struct db_export *dbe,
tuple_set_s32(t, 24, es->sample->flags);
tuple_set_d64(t, 25, es->sample->id);
tuple_set_d64(t, 26, es->sample->stream_id);
+ tuple_set_u32(t, 27, es->sample->ins_lat);
call_object(tables->sample_handler, t, "sample_table");
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 5596bed1b8c8..dbaf07bf6c5f 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -36,81 +36,20 @@
#include "util.h"
#include "arch/common.h"
#include "units.h"
+#include "annotate.h"
#include <internal/lib.h>
-#ifdef HAVE_ZSTD_SUPPORT
-static int perf_session__process_compressed_event(struct perf_session *session,
- union perf_event *event, u64 file_offset,
- const char *file_path)
-{
- void *src;
- size_t decomp_size, src_size;
- u64 decomp_last_rem = 0;
- size_t mmap_len, decomp_len = session->header.env.comp_mmap_len;
- struct decomp *decomp, *decomp_last = session->active_decomp->decomp_last;
-
- if (decomp_last) {
- decomp_last_rem = decomp_last->size - decomp_last->head;
- decomp_len += decomp_last_rem;
- }
-
- mmap_len = sizeof(struct decomp) + decomp_len;
- decomp = mmap(NULL, mmap_len, PROT_READ|PROT_WRITE,
- MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
- if (decomp == MAP_FAILED) {
- pr_err("Couldn't allocate memory for decompression\n");
- return -1;
- }
-
- decomp->file_pos = file_offset;
- decomp->file_path = file_path;
- decomp->mmap_len = mmap_len;
- decomp->head = 0;
-
- if (decomp_last_rem) {
- memcpy(decomp->data, &(decomp_last->data[decomp_last->head]), decomp_last_rem);
- decomp->size = decomp_last_rem;
- }
-
- src = (void *)event + sizeof(struct perf_record_compressed);
- src_size = event->pack.header.size - sizeof(struct perf_record_compressed);
-
- decomp_size = zstd_decompress_stream(session->active_decomp->zstd_decomp, src, src_size,
- &(decomp->data[decomp_last_rem]), decomp_len - decomp_last_rem);
- if (!decomp_size) {
- munmap(decomp, mmap_len);
- pr_err("Couldn't decompress data\n");
- return -1;
- }
-
- decomp->size += decomp_size;
-
- if (session->active_decomp->decomp == NULL)
- session->active_decomp->decomp = decomp;
- else
- session->active_decomp->decomp_last->next = decomp;
-
- session->active_decomp->decomp_last = decomp;
-
- pr_debug("decomp (B): %zd to %zd\n", src_size, decomp_size);
-
- return 0;
-}
-#else /* !HAVE_ZSTD_SUPPORT */
-#define perf_session__process_compressed_event perf_session__process_compressed_event_stub
-#endif
-
static int perf_session__deliver_event(struct perf_session *session,
union perf_event *event,
- struct perf_tool *tool,
+ const struct perf_tool *tool,
u64 file_offset,
const char *file_path);
-static int perf_session__open(struct perf_session *session, int repipe_fd)
+static int perf_session__open(struct perf_session *session)
{
struct perf_data *data = session->data;
- if (perf_session__read_header(session, repipe_fd) < 0) {
+ if (perf_session__read_header(session) < 0) {
pr_err("incompatible file format (rerun with -v to learn more)\n");
return -1;
}
@@ -196,8 +135,8 @@ static int ordered_events__deliver_event(struct ordered_events *oe,
}
struct perf_session *__perf_session__new(struct perf_data *data,
- bool repipe, int repipe_fd,
- struct perf_tool *tool)
+ struct perf_tool *tool,
+ bool trace_event_repipe)
{
int ret = -ENOMEM;
struct perf_session *session = zalloc(sizeof(*session));
@@ -205,7 +144,7 @@ struct perf_session *__perf_session__new(struct perf_data *data,
if (!session)
goto out;
- session->repipe = repipe;
+ session->trace_event_repipe = trace_event_repipe;
session->tool = tool;
session->decomp_data.zstd_decomp = &session->zstd_data;
session->active_decomp = &session->decomp_data;
@@ -223,7 +162,7 @@ struct perf_session *__perf_session__new(struct perf_data *data,
session->data = data;
if (perf_data__is_read(data)) {
- ret = perf_session__open(session, repipe_fd);
+ ret = perf_session__open(session);
if (ret < 0)
goto out_delete;
@@ -304,6 +243,7 @@ void perf_session__delete(struct perf_session *session)
return;
auxtrace__free(session);
auxtrace_index__free(&session->auxtrace_index);
+ debuginfo_cache__delete();
perf_session__destroy_kernel_maps(session);
perf_decomp__release_events(session->decomp_data.decomp);
perf_env__exit(&session->header.env);
@@ -319,251 +259,6 @@ void perf_session__delete(struct perf_session *session)
free(session);
}
-static int process_event_synth_tracing_data_stub(struct perf_session *session
- __maybe_unused,
- union perf_event *event
- __maybe_unused)
-{
- dump_printf(": unhandled!\n");
- return 0;
-}
-
-static int process_event_synth_attr_stub(struct perf_tool *tool __maybe_unused,
- union perf_event *event __maybe_unused,
- struct evlist **pevlist
- __maybe_unused)
-{
- dump_printf(": unhandled!\n");
- return 0;
-}
-
-static int process_event_synth_event_update_stub(struct perf_tool *tool __maybe_unused,
- union perf_event *event __maybe_unused,
- struct evlist **pevlist
- __maybe_unused)
-{
- if (dump_trace)
- perf_event__fprintf_event_update(event, stdout);
-
- dump_printf(": unhandled!\n");
- return 0;
-}
-
-static int process_event_sample_stub(struct perf_tool *tool __maybe_unused,
- union perf_event *event __maybe_unused,
- struct perf_sample *sample __maybe_unused,
- struct evsel *evsel __maybe_unused,
- struct machine *machine __maybe_unused)
-{
- dump_printf(": unhandled!\n");
- return 0;
-}
-
-static int process_event_stub(struct perf_tool *tool __maybe_unused,
- union perf_event *event __maybe_unused,
- struct perf_sample *sample __maybe_unused,
- struct machine *machine __maybe_unused)
-{
- dump_printf(": unhandled!\n");
- return 0;
-}
-
-static int process_finished_round_stub(struct perf_tool *tool __maybe_unused,
- union perf_event *event __maybe_unused,
- struct ordered_events *oe __maybe_unused)
-{
- dump_printf(": unhandled!\n");
- return 0;
-}
-
-static int skipn(int fd, off_t n)
-{
- char buf[4096];
- ssize_t ret;
-
- while (n > 0) {
- ret = read(fd, buf, min(n, (off_t)sizeof(buf)));
- if (ret <= 0)
- return ret;
- n -= ret;
- }
-
- return 0;
-}
-
-static s64 process_event_auxtrace_stub(struct perf_session *session __maybe_unused,
- union perf_event *event)
-{
- dump_printf(": unhandled!\n");
- if (perf_data__is_pipe(session->data))
- skipn(perf_data__fd(session->data), event->auxtrace.size);
- return event->auxtrace.size;
-}
-
-static int process_event_op2_stub(struct perf_session *session __maybe_unused,
- union perf_event *event __maybe_unused)
-{
- dump_printf(": unhandled!\n");
- return 0;
-}
-
-
-static
-int process_event_thread_map_stub(struct perf_session *session __maybe_unused,
- union perf_event *event __maybe_unused)
-{
- if (dump_trace)
- perf_event__fprintf_thread_map(event, stdout);
-
- dump_printf(": unhandled!\n");
- return 0;
-}
-
-static
-int process_event_cpu_map_stub(struct perf_session *session __maybe_unused,
- union perf_event *event __maybe_unused)
-{
- if (dump_trace)
- perf_event__fprintf_cpu_map(event, stdout);
-
- dump_printf(": unhandled!\n");
- return 0;
-}
-
-static
-int process_event_stat_config_stub(struct perf_session *session __maybe_unused,
- union perf_event *event __maybe_unused)
-{
- if (dump_trace)
- perf_event__fprintf_stat_config(event, stdout);
-
- dump_printf(": unhandled!\n");
- return 0;
-}
-
-static int process_stat_stub(struct perf_session *perf_session __maybe_unused,
- union perf_event *event)
-{
- if (dump_trace)
- perf_event__fprintf_stat(event, stdout);
-
- dump_printf(": unhandled!\n");
- return 0;
-}
-
-static int process_stat_round_stub(struct perf_session *perf_session __maybe_unused,
- union perf_event *event)
-{
- if (dump_trace)
- perf_event__fprintf_stat_round(event, stdout);
-
- dump_printf(": unhandled!\n");
- return 0;
-}
-
-static int process_event_time_conv_stub(struct perf_session *perf_session __maybe_unused,
- union perf_event *event)
-{
- if (dump_trace)
- perf_event__fprintf_time_conv(event, stdout);
-
- dump_printf(": unhandled!\n");
- return 0;
-}
-
-static int perf_session__process_compressed_event_stub(struct perf_session *session __maybe_unused,
- union perf_event *event __maybe_unused,
- u64 file_offset __maybe_unused,
- const char *file_path __maybe_unused)
-{
- dump_printf(": unhandled!\n");
- return 0;
-}
-
-void perf_tool__fill_defaults(struct perf_tool *tool)
-{
- if (tool->sample == NULL)
- tool->sample = process_event_sample_stub;
- if (tool->mmap == NULL)
- tool->mmap = process_event_stub;
- if (tool->mmap2 == NULL)
- tool->mmap2 = process_event_stub;
- if (tool->comm == NULL)
- tool->comm = process_event_stub;
- if (tool->namespaces == NULL)
- tool->namespaces = process_event_stub;
- if (tool->cgroup == NULL)
- tool->cgroup = process_event_stub;
- if (tool->fork == NULL)
- tool->fork = process_event_stub;
- if (tool->exit == NULL)
- tool->exit = process_event_stub;
- if (tool->lost == NULL)
- tool->lost = perf_event__process_lost;
- if (tool->lost_samples == NULL)
- tool->lost_samples = perf_event__process_lost_samples;
- if (tool->aux == NULL)
- tool->aux = perf_event__process_aux;
- if (tool->itrace_start == NULL)
- tool->itrace_start = perf_event__process_itrace_start;
- if (tool->context_switch == NULL)
- tool->context_switch = perf_event__process_switch;
- if (tool->ksymbol == NULL)
- tool->ksymbol = perf_event__process_ksymbol;
- if (tool->bpf == NULL)
- tool->bpf = perf_event__process_bpf;
- if (tool->text_poke == NULL)
- tool->text_poke = perf_event__process_text_poke;
- if (tool->aux_output_hw_id == NULL)
- tool->aux_output_hw_id = perf_event__process_aux_output_hw_id;
- if (tool->read == NULL)
- tool->read = process_event_sample_stub;
- if (tool->throttle == NULL)
- tool->throttle = process_event_stub;
- if (tool->unthrottle == NULL)
- tool->unthrottle = process_event_stub;
- if (tool->attr == NULL)
- tool->attr = process_event_synth_attr_stub;
- if (tool->event_update == NULL)
- tool->event_update = process_event_synth_event_update_stub;
- if (tool->tracing_data == NULL)
- tool->tracing_data = process_event_synth_tracing_data_stub;
- if (tool->build_id == NULL)
- tool->build_id = process_event_op2_stub;
- if (tool->finished_round == NULL) {
- if (tool->ordered_events)
- tool->finished_round = perf_event__process_finished_round;
- else
- tool->finished_round = process_finished_round_stub;
- }
- if (tool->id_index == NULL)
- tool->id_index = process_event_op2_stub;
- if (tool->auxtrace_info == NULL)
- tool->auxtrace_info = process_event_op2_stub;
- if (tool->auxtrace == NULL)
- tool->auxtrace = process_event_auxtrace_stub;
- if (tool->auxtrace_error == NULL)
- tool->auxtrace_error = process_event_op2_stub;
- if (tool->thread_map == NULL)
- tool->thread_map = process_event_thread_map_stub;
- if (tool->cpu_map == NULL)
- tool->cpu_map = process_event_cpu_map_stub;
- if (tool->stat_config == NULL)
- tool->stat_config = process_event_stat_config_stub;
- if (tool->stat == NULL)
- tool->stat = process_stat_stub;
- if (tool->stat_round == NULL)
- tool->stat_round = process_stat_round_stub;
- if (tool->time_conv == NULL)
- tool->time_conv = process_event_time_conv_stub;
- if (tool->feature == NULL)
- tool->feature = process_event_op2_stub;
- if (tool->compressed == NULL)
- tool->compressed = perf_session__process_compressed_event;
- if (tool->finished_init == NULL)
- tool->finished_init = process_event_op2_stub;
-}
-
static void swap_sample_id_all(union perf_event *event, void *data)
{
void *end = (void *) event + event->header.size;
@@ -1076,7 +771,7 @@ static perf_event__swap_op perf_event__swap_ops[] = {
* Flush every events below timestamp 7
* etc...
*/
-int perf_event__process_finished_round(struct perf_tool *tool __maybe_unused,
+int perf_event__process_finished_round(const struct perf_tool *tool __maybe_unused,
union perf_event *event __maybe_unused,
struct ordered_events *oe)
{
@@ -1161,7 +856,6 @@ static void branch_stack__printf(struct perf_sample *sample,
struct branch_entry *entries = perf_sample__branch_entries(sample);
bool callstack = evsel__has_branch_callstack(evsel);
u64 *branch_stack_cntr = sample->branch_stack_cntr;
- struct perf_env *env = evsel__env(evsel);
uint64_t i;
if (!callstack) {
@@ -1205,8 +899,11 @@ static void branch_stack__printf(struct perf_sample *sample,
}
if (branch_stack_cntr) {
+ unsigned int br_cntr_width, br_cntr_nr;
+
+ perf_env__find_br_cntr_info(evsel__env(evsel), &br_cntr_nr, &br_cntr_width);
printf("... branch stack counters: nr:%" PRIu64 " (counter width: %u max counter nr:%u)\n",
- sample->branch_stack->nr, env->br_cntr_width, env->br_cntr_nr);
+ sample->branch_stack->nr, br_cntr_width, br_cntr_nr);
for (i = 0; i < sample->branch_stack->nr; i++)
printf("..... %2"PRIu64": %016" PRIx64 "\n", i, branch_stack_cntr[i]);
}
@@ -1470,7 +1167,7 @@ static struct machine *machines__find_for_cpumode(struct machines *machines,
}
static int deliver_sample_value(struct evlist *evlist,
- struct perf_tool *tool,
+ const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct sample_read_value *v,
@@ -1502,7 +1199,7 @@ static int deliver_sample_value(struct evlist *evlist,
}
static int deliver_sample_group(struct evlist *evlist,
- struct perf_tool *tool,
+ const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine,
@@ -1511,6 +1208,9 @@ static int deliver_sample_group(struct evlist *evlist,
int ret = -EINVAL;
struct sample_read_value *v = sample->read.group.values;
+ if (tool->dont_split_sample_group)
+ return deliver_sample_value(evlist, tool, event, sample, v, machine);
+
sample_read_group__for_each(v, sample->read.group.nr, read_format) {
ret = deliver_sample_value(evlist, tool, event, sample, v,
machine);
@@ -1521,7 +1221,7 @@ static int deliver_sample_group(struct evlist *evlist,
return ret;
}
-static int evlist__deliver_sample(struct evlist *evlist, struct perf_tool *tool,
+static int evlist__deliver_sample(struct evlist *evlist, const struct perf_tool *tool,
union perf_event *event, struct perf_sample *sample,
struct evsel *evsel, struct machine *machine)
{
@@ -1546,7 +1246,7 @@ static int machines__deliver_event(struct machines *machines,
struct evlist *evlist,
union perf_event *event,
struct perf_sample *sample,
- struct perf_tool *tool, u64 file_offset,
+ const struct perf_tool *tool, u64 file_offset,
const char *file_path)
{
struct evsel *evsel;
@@ -1592,8 +1292,9 @@ static int machines__deliver_event(struct machines *machines,
evlist->stats.total_lost += event->lost.lost;
return tool->lost(tool, event, sample, machine);
case PERF_RECORD_LOST_SAMPLES:
- if (tool->lost_samples == perf_event__process_lost_samples &&
- !(event->header.misc & PERF_RECORD_MISC_LOST_SAMPLES_BPF))
+ if (event->header.misc & PERF_RECORD_MISC_LOST_SAMPLES_BPF)
+ evlist->stats.total_dropped_samples += event->lost_samples.lost;
+ else if (tool->lost_samples == perf_event__process_lost_samples)
evlist->stats.total_lost_samples += event->lost_samples.lost;
return tool->lost_samples(tool, event, sample, machine);
case PERF_RECORD_READ:
@@ -1634,7 +1335,7 @@ static int machines__deliver_event(struct machines *machines,
static int perf_session__deliver_event(struct perf_session *session,
union perf_event *event,
- struct perf_tool *tool,
+ const struct perf_tool *tool,
u64 file_offset,
const char *file_path)
{
@@ -1667,13 +1368,12 @@ static s64 perf_session__process_user_event(struct perf_session *session,
const char *file_path)
{
struct ordered_events *oe = &session->ordered_events;
- struct perf_tool *tool = session->tool;
+ const struct perf_tool *tool = session->tool;
struct perf_sample sample = { .time = 0, };
int fd = perf_data__fd(session->data);
int err;
- if (event->header.type != PERF_RECORD_COMPRESSED ||
- tool->compressed == perf_session__process_compressed_event_stub)
+ if (event->header.type != PERF_RECORD_COMPRESSED || perf_tool__compressed_is_stub(tool))
dump_event(session->evlist, event, file_offset, &sample, file_path);
/* These events are processed right away */
@@ -1754,7 +1454,7 @@ int perf_session__deliver_synth_event(struct perf_session *session,
struct perf_sample *sample)
{
struct evlist *evlist = session->evlist;
- struct perf_tool *tool = session->tool;
+ const struct perf_tool *tool = session->tool;
events_stats__inc(&evlist->stats, event->header.type);
@@ -1764,6 +1464,30 @@ int perf_session__deliver_synth_event(struct perf_session *session,
return machines__deliver_event(&session->machines, evlist, event, sample, tool, 0, NULL);
}
+int perf_session__deliver_synth_attr_event(struct perf_session *session,
+ const struct perf_event_attr *attr,
+ u64 id)
+{
+ union {
+ struct {
+ struct perf_record_header_attr attr;
+ u64 ids[1];
+ } attr_id;
+ union perf_event ev;
+ } ev = {
+ .attr_id.attr.header.type = PERF_RECORD_HEADER_ATTR,
+ .attr_id.attr.header.size = sizeof(ev.attr_id),
+ .attr_id.ids[0] = id,
+ };
+
+ if (attr->size != sizeof(ev.attr_id.attr.attr)) {
+ pr_debug("Unexpected perf_event_attr size\n");
+ return -EINVAL;
+ }
+ ev.attr_id.attr.attr = *attr;
+ return perf_session__deliver_synth_event(session, &ev.ev, NULL);
+}
+
static void event_swap(union perf_event *event, bool sample_id_all)
{
perf_event__swap_op swap;
@@ -1862,7 +1586,7 @@ static s64 perf_session__process_event(struct perf_session *session,
const char *file_path)
{
struct evlist *evlist = session->evlist;
- struct perf_tool *tool = session->tool;
+ const struct perf_tool *tool = session->tool;
int ret;
if (session->header.needs_swap)
@@ -2049,7 +1773,7 @@ static int __perf_session__process_decomp_events(struct perf_session *session);
static int __perf_session__process_pipe_events(struct perf_session *session)
{
struct ordered_events *oe = &session->ordered_events;
- struct perf_tool *tool = session->tool;
+ const struct perf_tool *tool = session->tool;
struct ui_progress prog;
union perf_event *event;
uint32_t size, cur_size = 0;
@@ -2060,8 +1784,6 @@ static int __perf_session__process_pipe_events(struct perf_session *session)
void *p;
bool update_prog = false;
- perf_tool__fill_defaults(tool);
-
/*
* If it's from a file saving pipe data (by redirection), it would have
* a file name other than "-". Then we can get the total size and show
@@ -2481,12 +2203,10 @@ static int __perf_session__process_events(struct perf_session *session)
.in_place_update = session->data->in_place_update,
};
struct ordered_events *oe = &session->ordered_events;
- struct perf_tool *tool = session->tool;
+ const struct perf_tool *tool = session->tool;
struct ui_progress prog;
int err;
- perf_tool__fill_defaults(tool);
-
if (rd.data_size == 0)
return -1;
@@ -2533,14 +2253,12 @@ out_err:
static int __perf_session__process_dir_events(struct perf_session *session)
{
struct perf_data *data = session->data;
- struct perf_tool *tool = session->tool;
+ const struct perf_tool *tool = session->tool;
int i, ret, readers, nr_readers;
struct ui_progress prog;
u64 total_size = perf_data__size(session->data);
struct reader *rd;
- perf_tool__fill_defaults(tool);
-
ui_progress__init_size(&prog, total_size, "Processing events...");
nr_readers = 1;
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 4c29dc86956f..bcf1bcf06959 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -26,26 +26,68 @@ struct decomp_data {
struct zstd_data *zstd_decomp;
};
+/**
+ * struct perf_session- A Perf session holds the main state when the program is
+ * working with live perf events or reading data from an input file.
+ *
+ * The rough organization of a perf_session is:
+ * ```
+ * +--------------+ +-----------+ +------------+
+ * | Session |1..* ----->| Machine |1..* ----->| Thread |
+ * +--------------+ +-----------+ +------------+
+ * ```
+ */
struct perf_session {
+ /**
+ * @header: The read version of a perf_file_header, or captures global
+ * information from a live session.
+ */
struct perf_header header;
+ /** @machines: Machines within the session a host and 0 or more guests. */
struct machines machines;
+ /** @evlist: List of evsels/events of the session. */
struct evlist *evlist;
- struct auxtrace *auxtrace;
+ /** @auxtrace: callbacks to allow AUX area data decoding. */
+ const struct auxtrace *auxtrace;
+ /** @itrace_synth_opts: AUX area tracing synthesis options. */
struct itrace_synth_opts *itrace_synth_opts;
+ /** @auxtrace_index: index of AUX area tracing events within a perf.data file. */
struct list_head auxtrace_index;
#ifdef HAVE_LIBTRACEEVENT
+ /** @tevent: handles for libtraceevent and plugins. */
struct trace_event tevent;
#endif
+ /** @time_conv: Holds contents of last PERF_RECORD_TIME_CONV event. */
struct perf_record_time_conv time_conv;
- bool repipe;
+ /** @trace_event_repipe: When set causes read trace events to be written to stdout. */
+ bool trace_event_repipe;
+ /**
+ * @one_mmap: The reader will use a single mmap by default. There may be
+ * multiple data files in particular for aux events. If this is true
+ * then the single big mmap for the data file can be assumed.
+ */
bool one_mmap;
+ /** @one_mmap_addr: Address of initial perf data file reader mmap. */
void *one_mmap_addr;
+ /** @one_mmap_offset: File offset in perf.data file when mapped. */
u64 one_mmap_offset;
+ /** @ordered_events: Used to turn unordered events into ordered ones. */
struct ordered_events ordered_events;
+ /** @data: Optional perf data file being read from. */
struct perf_data *data;
- struct perf_tool *tool;
+ /** @tool: callbacks for event handling. */
+ const struct perf_tool *tool;
+ /**
+ * @bytes_transferred: Used by perf record to count written bytes before
+ * compression.
+ */
u64 bytes_transferred;
+ /**
+ * @bytes_compressed: Used by perf record to count written bytes after
+ * compression.
+ */
u64 bytes_compressed;
+ /** @zstd_data: Owner of global compression state, buffers, etc. */
struct zstd_data zstd_data;
struct decomp_data decomp_data;
struct decomp_data *active_decomp;
@@ -64,13 +106,13 @@ struct decomp {
struct perf_tool;
struct perf_session *__perf_session__new(struct perf_data *data,
- bool repipe, int repipe_fd,
- struct perf_tool *tool);
+ struct perf_tool *tool,
+ bool trace_event_repipe);
static inline struct perf_session *perf_session__new(struct perf_data *data,
struct perf_tool *tool)
{
- return __perf_session__new(data, false, -1, tool);
+ return __perf_session__new(data, tool, /*trace_event_repipe=*/false);
}
void perf_session__delete(struct perf_session *session);
@@ -92,8 +134,6 @@ int perf_session__process_events(struct perf_session *session);
int perf_session__queue_event(struct perf_session *s, union perf_event *event,
u64 timestamp, u64 file_offset, const char *file_path);
-void perf_tool__fill_defaults(struct perf_tool *tool);
-
int perf_session__resolve_callchain(struct perf_session *session,
struct evsel *evsel,
struct thread *thread,
@@ -154,13 +194,16 @@ extern volatile int session_done;
int perf_session__deliver_synth_event(struct perf_session *session,
union perf_event *event,
struct perf_sample *sample);
+int perf_session__deliver_synth_attr_event(struct perf_session *session,
+ const struct perf_event_attr *attr,
+ u64 id);
int perf_session__dsos_hit_all(struct perf_session *session);
int perf_event__process_id_index(struct perf_session *session,
union perf_event *event);
-int perf_event__process_finished_round(struct perf_tool *tool,
+int perf_event__process_finished_round(const struct perf_tool *tool,
union perf_event *event,
struct ordered_events *oe);
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index 142e9d447ce7..649550e9b7aa 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -17,7 +17,7 @@ src_feature_tests = getenv('srctree') + '/tools/build/feature'
def clang_has_option(option):
cc_output = Popen([cc, cc_options + option, path.join(src_feature_tests, "test-hello.c") ], stderr=PIPE).stderr.readlines()
- return [o for o in cc_output if ((b"unknown argument" in o) or (b"is not supported" in o))] == [ ]
+ return [o for o in cc_output if ((b"unknown argument" in o) or (b"is not supported" in o) or (b"unknown warning option" in o))] == [ ]
if cc_is_clang:
from sysconfig import get_config_vars
@@ -63,6 +63,8 @@ cflags = getenv('CFLAGS', '').split()
cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter', '-Wno-redundant-decls' ]
if cc_is_clang:
cflags += ["-Wno-unused-command-line-argument" ]
+ if clang_has_option("-Wno-cast-function-type-mismatch"):
+ cflags += ["-Wno-cast-function-type-mismatch" ]
else:
cflags += ['-Wno-cast-function-type' ]
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index ab7c7ff35f9b..013020f33ece 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -2312,7 +2312,7 @@ static int hist_entry__typeoff_snprintf(struct hist_entry *he, char *bf,
he->mem_type_off, true);
buf[4095] = '\0';
- return repsep_snprintf(bf, size, "%s %+d (%s)", he_type->self.type_name,
+ return repsep_snprintf(bf, size, "%s +%#x (%s)", he_type->self.type_name,
he->mem_type_off, buf);
}
@@ -2326,6 +2326,57 @@ struct sort_entry sort_type_offset = {
.se_width_idx = HISTC_TYPE_OFFSET,
};
+/* --sort typecln */
+
+/* TODO: use actual value in the system */
+#define TYPE_CACHELINE_SIZE 64
+
+static int64_t
+sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
+{
+ struct annotated_data_type *left_type = left->mem_type;
+ struct annotated_data_type *right_type = right->mem_type;
+ int64_t left_cln, right_cln;
+ int64_t ret;
+
+ if (!left_type) {
+ sort__type_init(left);
+ left_type = left->mem_type;
+ }
+
+ if (!right_type) {
+ sort__type_init(right);
+ right_type = right->mem_type;
+ }
+
+ ret = strcmp(left_type->self.type_name, right_type->self.type_name);
+ if (ret)
+ return ret;
+
+ left_cln = left->mem_type_off / TYPE_CACHELINE_SIZE;
+ right_cln = right->mem_type_off / TYPE_CACHELINE_SIZE;
+ return left_cln - right_cln;
+}
+
+static int hist_entry__typecln_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width __maybe_unused)
+{
+ struct annotated_data_type *he_type = he->mem_type;
+
+ return repsep_snprintf(bf, size, "%s: cache-line %d", he_type->self.type_name,
+ he->mem_type_off / TYPE_CACHELINE_SIZE);
+}
+
+struct sort_entry sort_type_cacheline = {
+ .se_header = "Data Type Cacheline",
+ .se_cmp = sort__type_cmp,
+ .se_collapse = sort__typecln_sort,
+ .se_sort = sort__typecln_sort,
+ .se_init = sort__type_init,
+ .se_snprintf = hist_entry__typecln_snprintf,
+ .se_width_idx = HISTC_TYPE_CACHELINE,
+};
+
struct sort_dimension {
const char *name;
@@ -2384,6 +2435,7 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_ANNOTATE_DATA_TYPE, "type", sort_type),
DIM(SORT_ANNOTATE_DATA_TYPE_OFFSET, "typeoff", sort_type_offset),
DIM(SORT_SYM_OFFSET, "symoff", sort_sym_offset),
+ DIM(SORT_ANNOTATE_DATA_TYPE_CACHELINE, "typecln", sort_type_cacheline),
};
#undef DIM
@@ -3960,7 +4012,7 @@ static void add_hpp_sort_string(struct strbuf *sb, struct hpp_dimension *s, int
add_key(sb, s[i].name, llen);
}
-char *sort_help(const char *prefix)
+char *sort_help(const char *prefix, enum sort_mode mode)
{
struct strbuf sb;
char *s;
@@ -3972,10 +4024,12 @@ char *sort_help(const char *prefix)
ARRAY_SIZE(hpp_sort_dimensions), &len);
add_sort_string(&sb, common_sort_dimensions,
ARRAY_SIZE(common_sort_dimensions), &len);
- add_sort_string(&sb, bstack_sort_dimensions,
- ARRAY_SIZE(bstack_sort_dimensions), &len);
- add_sort_string(&sb, memory_sort_dimensions,
- ARRAY_SIZE(memory_sort_dimensions), &len);
+ if (mode == SORT_MODE__NORMAL || mode == SORT_MODE__BRANCH)
+ add_sort_string(&sb, bstack_sort_dimensions,
+ ARRAY_SIZE(bstack_sort_dimensions), &len);
+ if (mode == SORT_MODE__NORMAL || mode == SORT_MODE__MEMORY)
+ add_sort_string(&sb, memory_sort_dimensions,
+ ARRAY_SIZE(memory_sort_dimensions), &len);
s = strbuf_detach(&sb, NULL);
strbuf_release(&sb);
return s;
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 0bd0ee3ae76b..9ff68c6786e7 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -71,6 +71,7 @@ enum sort_type {
SORT_ANNOTATE_DATA_TYPE,
SORT_ANNOTATE_DATA_TYPE_OFFSET,
SORT_SYM_OFFSET,
+ SORT_ANNOTATE_DATA_TYPE_CACHELINE,
/* branch stack specific sort keys */
__SORT_BRANCH_STACK,
@@ -130,7 +131,7 @@ void reset_output_field(void);
void sort__setup_elide(FILE *fp);
void perf_hpp__set_elide(int idx, bool elide);
-char *sort_help(const char *prefix);
+char *sort_help(const char *prefix, enum sort_mode mode);
int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, int unset);
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index 760742fd4a7d..f32d0d4f4bc9 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -6,6 +6,7 @@
#include <string.h>
#include <sys/types.h>
+#include <linux/compiler.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/zalloc.h>
@@ -16,6 +17,9 @@
#include "util/debug.h"
#include "util/callchain.h"
#include "util/symbol_conf.h"
+#ifdef HAVE_LIBLLVM_SUPPORT
+#include "util/llvm-c-helpers.h"
+#endif
#include "srcline.h"
#include "string2.h"
#include "symbol.h"
@@ -130,7 +134,60 @@ static struct symbol *new_inline_sym(struct dso *dso,
#define MAX_INLINE_NEST 1024
-#ifdef HAVE_LIBBFD_SUPPORT
+#ifdef HAVE_LIBLLVM_SUPPORT
+
+static void free_llvm_inline_frames(struct llvm_a2l_frame *inline_frames,
+ int num_frames)
+{
+ if (inline_frames != NULL) {
+ for (int i = 0; i < num_frames; ++i) {
+ zfree(&inline_frames[i].filename);
+ zfree(&inline_frames[i].funcname);
+ }
+ zfree(&inline_frames);
+ }
+}
+
+static int addr2line(const char *dso_name, u64 addr,
+ char **file, unsigned int *line, struct dso *dso,
+ bool unwind_inlines, struct inline_node *node,
+ struct symbol *sym)
+{
+ struct llvm_a2l_frame *inline_frames = NULL;
+ int num_frames = llvm_addr2line(dso_name, addr, file, line,
+ node && unwind_inlines, &inline_frames);
+
+ if (num_frames == 0 || !inline_frames) {
+ /* Error, or we didn't want inlines. */
+ return num_frames;
+ }
+
+ for (int i = 0; i < num_frames; ++i) {
+ struct symbol *inline_sym =
+ new_inline_sym(dso, sym, inline_frames[i].funcname);
+ char *srcline = NULL;
+
+ if (inline_frames[i].filename) {
+ srcline =
+ srcline_from_fileline(inline_frames[i].filename,
+ inline_frames[i].line);
+ }
+ if (inline_list__append(inline_sym, srcline, node) != 0) {
+ free_llvm_inline_frames(inline_frames, num_frames);
+ return 0;
+ }
+ }
+ free_llvm_inline_frames(inline_frames, num_frames);
+
+ return num_frames;
+}
+
+void dso__free_a2l(struct dso *dso __maybe_unused)
+{
+ /* Nothing to free. */
+}
+
+#elif defined(HAVE_LIBBFD_SUPPORT)
/*
* Implement addr2line using libbfd.
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index c38bcb6f4c78..ea96e4ebad8c 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -1237,7 +1237,8 @@ static void print_metric_headers(struct perf_stat_config *config,
/* Print metrics headers only */
evlist__for_each_entry(evlist, counter) {
- if (config->aggr_mode != AGGR_NONE && counter->metric_leader != counter)
+ if (!config->iostat_run &&
+ config->aggr_mode != AGGR_NONE && counter->metric_leader != counter)
continue;
os.evsel = counter;
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 6bb975e46de3..99376c12dd8e 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -380,7 +380,7 @@ static int prepare_metric(const struct metric_expr *mexp,
struct stats *stats;
double scale;
- switch (metric_events[i]->tool_event) {
+ switch (evsel__tool_event(metric_events[i])) {
case PERF_TOOL_DURATION_TIME:
stats = &walltime_nsecs_stats;
scale = 1e-9;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 19eb623e0826..3bbf173ad822 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1931,6 +1931,9 @@ int dso__load(struct dso *dso, struct map *map)
if (next_slot) {
ss_pos++;
+ if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND)
+ dso__set_binary_type(dso, symtab_type);
+
if (syms_ss && runtime_ss)
break;
} else {
@@ -2425,14 +2428,14 @@ static bool symbol__read_kptr_restrict(void)
{
bool value = false;
FILE *fp = fopen("/proc/sys/kernel/kptr_restrict", "r");
+ bool used_root;
+ bool cap_syslog = perf_cap__capable(CAP_SYSLOG, &used_root);
if (fp != NULL) {
char line[8];
if (fgets(line, sizeof(line), fp) != NULL)
- value = perf_cap__capable(CAP_SYSLOG) ?
- (atoi(line) >= 2) :
- (atoi(line) != 0);
+ value = cap_syslog ? (atoi(line) >= 2) : (atoi(line) != 0);
fclose(fp);
}
@@ -2440,7 +2443,7 @@ static bool symbol__read_kptr_restrict(void)
/* Per kernel/kallsyms.c:
* we also restrict when perf_event_paranoid > 1 w/o CAP_SYSLOG
*/
- if (perf_event_paranoid() > 1 && !perf_cap__capable(CAP_SYSLOG))
+ if (perf_event_paranoid() > 1 && !cap_syslog)
value = true;
return value;
diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h
index 657cfa5af43c..a9c51acc722f 100644
--- a/tools/perf/util/symbol_conf.h
+++ b/tools/perf/util/symbol_conf.h
@@ -64,7 +64,7 @@ struct symbol_conf {
*sym_list_str,
*col_width_list_str,
*bt_stop_list_str;
- char *addr2line_path;
+ const char *addr2line_path;
unsigned long time_quantum;
struct strlist *dso_list,
*comm_list,
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index 5498048f56ea..a58444c4aed1 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-only
+// SPDX-License-Identifier: GPL-2.0-only
#include "util/cgroup.h"
#include "util/data.h"
@@ -47,7 +47,7 @@
unsigned int proc_map_timeout = DEFAULT_PROC_MAP_PARSE_TIMEOUT;
-int perf_tool__process_synth_event(struct perf_tool *tool,
+int perf_tool__process_synth_event(const struct perf_tool *tool,
union perf_event *event,
struct machine *machine,
perf_event__handler_t process)
@@ -187,7 +187,7 @@ static int perf_event__prepare_comm(union perf_event *event, pid_t pid, pid_t ti
return 0;
}
-pid_t perf_event__synthesize_comm(struct perf_tool *tool,
+pid_t perf_event__synthesize_comm(const struct perf_tool *tool,
union perf_event *event, pid_t pid,
perf_event__handler_t process,
struct machine *machine)
@@ -218,7 +218,7 @@ static void perf_event__get_ns_link_info(pid_t pid, const char *ns,
}
}
-int perf_event__synthesize_namespaces(struct perf_tool *tool,
+int perf_event__synthesize_namespaces(const struct perf_tool *tool,
union perf_event *event,
pid_t pid, pid_t tgid,
perf_event__handler_t process,
@@ -257,7 +257,7 @@ int perf_event__synthesize_namespaces(struct perf_tool *tool,
return 0;
}
-static int perf_event__synthesize_fork(struct perf_tool *tool,
+static int perf_event__synthesize_fork(const struct perf_tool *tool,
union perf_event *event,
pid_t pid, pid_t tgid, pid_t ppid,
perf_event__handler_t process,
@@ -418,7 +418,7 @@ out:
dso__put(dso);
}
-int perf_event__synthesize_mmap_events(struct perf_tool *tool,
+int perf_event__synthesize_mmap_events(const struct perf_tool *tool,
union perf_event *event,
pid_t pid, pid_t tgid,
perf_event__handler_t process,
@@ -542,7 +542,7 @@ out:
}
#ifdef HAVE_FILE_HANDLE
-static int perf_event__synthesize_cgroup(struct perf_tool *tool,
+static int perf_event__synthesize_cgroup(const struct perf_tool *tool,
union perf_event *event,
char *path, size_t mount_len,
perf_event__handler_t process,
@@ -582,7 +582,7 @@ static int perf_event__synthesize_cgroup(struct perf_tool *tool,
return 0;
}
-static int perf_event__walk_cgroup_tree(struct perf_tool *tool,
+static int perf_event__walk_cgroup_tree(const struct perf_tool *tool,
union perf_event *event,
char *path, size_t mount_len,
perf_event__handler_t process,
@@ -630,7 +630,7 @@ static int perf_event__walk_cgroup_tree(struct perf_tool *tool,
return ret;
}
-int perf_event__synthesize_cgroups(struct perf_tool *tool,
+int perf_event__synthesize_cgroups(const struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine)
{
@@ -657,7 +657,7 @@ int perf_event__synthesize_cgroups(struct perf_tool *tool,
return 0;
}
#else
-int perf_event__synthesize_cgroups(struct perf_tool *tool __maybe_unused,
+int perf_event__synthesize_cgroups(const struct perf_tool *tool __maybe_unused,
perf_event__handler_t process __maybe_unused,
struct machine *machine __maybe_unused)
{
@@ -666,7 +666,7 @@ int perf_event__synthesize_cgroups(struct perf_tool *tool __maybe_unused,
#endif
struct perf_event__synthesize_modules_maps_cb_args {
- struct perf_tool *tool;
+ const struct perf_tool *tool;
perf_event__handler_t process;
struct machine *machine;
union perf_event *event;
@@ -717,7 +717,7 @@ static int perf_event__synthesize_modules_maps_cb(struct map *map, void *data)
return 0;
}
-int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process,
+int perf_event__synthesize_modules(const struct perf_tool *tool, perf_event__handler_t process,
struct machine *machine)
{
int rc;
@@ -763,7 +763,7 @@ static int __event__synthesize_thread(union perf_event *comm_event,
union perf_event *fork_event,
union perf_event *namespaces_event,
pid_t pid, int full, perf_event__handler_t process,
- struct perf_tool *tool, struct machine *machine,
+ const struct perf_tool *tool, struct machine *machine,
bool needs_mmap, bool mmap_data)
{
char filename[PATH_MAX];
@@ -852,7 +852,7 @@ static int __event__synthesize_thread(union perf_event *comm_event,
return rc;
}
-int perf_event__synthesize_thread_map(struct perf_tool *tool,
+int perf_event__synthesize_thread_map(const struct perf_tool *tool,
struct perf_thread_map *threads,
perf_event__handler_t process,
struct machine *machine,
@@ -929,7 +929,7 @@ out:
return err;
}
-static int __perf_event__synthesize_threads(struct perf_tool *tool,
+static int __perf_event__synthesize_threads(const struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine,
bool needs_mmap,
@@ -993,7 +993,7 @@ out:
}
struct synthesize_threads_arg {
- struct perf_tool *tool;
+ const struct perf_tool *tool;
perf_event__handler_t process;
struct machine *machine;
bool needs_mmap;
@@ -1015,7 +1015,7 @@ static void *synthesize_threads_worker(void *arg)
return NULL;
}
-int perf_event__synthesize_threads(struct perf_tool *tool,
+int perf_event__synthesize_threads(const struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine,
bool needs_mmap, bool mmap_data,
@@ -1104,14 +1104,14 @@ free_dirent:
return err;
}
-int __weak perf_event__synthesize_extra_kmaps(struct perf_tool *tool __maybe_unused,
+int __weak perf_event__synthesize_extra_kmaps(const struct perf_tool *tool __maybe_unused,
perf_event__handler_t process __maybe_unused,
struct machine *machine __maybe_unused)
{
return 0;
}
-static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
+static int __perf_event__synthesize_kernel_mmap(const struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine)
{
@@ -1183,7 +1183,7 @@ static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
return err;
}
-int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
+int perf_event__synthesize_kernel_mmap(const struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine)
{
@@ -1196,7 +1196,7 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
return perf_event__synthesize_extra_kmaps(tool, process, machine);
}
-int perf_event__synthesize_thread_map2(struct perf_tool *tool,
+int perf_event__synthesize_thread_map2(const struct perf_tool *tool,
struct perf_thread_map *threads,
perf_event__handler_t process,
struct machine *machine)
@@ -1346,7 +1346,7 @@ static struct perf_record_cpu_map *cpu_map_event__new(const struct perf_cpu_map
}
-int perf_event__synthesize_cpu_map(struct perf_tool *tool,
+int perf_event__synthesize_cpu_map(const struct perf_tool *tool,
const struct perf_cpu_map *map,
perf_event__handler_t process,
struct machine *machine)
@@ -1364,7 +1364,7 @@ int perf_event__synthesize_cpu_map(struct perf_tool *tool,
return err;
}
-int perf_event__synthesize_stat_config(struct perf_tool *tool,
+int perf_event__synthesize_stat_config(const struct perf_tool *tool,
struct perf_stat_config *config,
perf_event__handler_t process,
struct machine *machine)
@@ -1403,7 +1403,7 @@ int perf_event__synthesize_stat_config(struct perf_tool *tool,
return err;
}
-int perf_event__synthesize_stat(struct perf_tool *tool,
+int perf_event__synthesize_stat(const struct perf_tool *tool,
struct perf_cpu cpu, u32 thread, u64 id,
struct perf_counts_values *count,
perf_event__handler_t process,
@@ -1425,7 +1425,7 @@ int perf_event__synthesize_stat(struct perf_tool *tool,
return process(tool, (union perf_event *) &event, NULL, machine);
}
-int perf_event__synthesize_stat_round(struct perf_tool *tool,
+int perf_event__synthesize_stat_round(const struct perf_tool *tool,
u64 evtime, u64 type,
perf_event__handler_t process,
struct machine *machine)
@@ -1826,7 +1826,7 @@ int perf_event__synthesize_id_sample(__u64 *array, u64 type, const struct perf_s
return (void *)array - (void *)start;
}
-int __perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_t process,
+int __perf_event__synthesize_id_index(const struct perf_tool *tool, perf_event__handler_t process,
struct evlist *evlist, struct machine *machine, size_t from)
{
union perf_event *ev;
@@ -1918,13 +1918,13 @@ out_err:
return err;
}
-int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_t process,
+int perf_event__synthesize_id_index(const struct perf_tool *tool, perf_event__handler_t process,
struct evlist *evlist, struct machine *machine)
{
return __perf_event__synthesize_id_index(tool, process, evlist, machine, 0);
}
-int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
+int __machine__synthesize_threads(struct machine *machine, const struct perf_tool *tool,
struct target *target, struct perf_thread_map *threads,
perf_event__handler_t process, bool needs_mmap,
bool data_mmap, unsigned int nr_threads_synthesize)
@@ -1985,7 +1985,7 @@ static struct perf_record_event_update *event_update_event__new(size_t size, u64
return ev;
}
-int perf_event__synthesize_event_update_unit(struct perf_tool *tool, struct evsel *evsel,
+int perf_event__synthesize_event_update_unit(const struct perf_tool *tool, struct evsel *evsel,
perf_event__handler_t process)
{
size_t size = strlen(evsel->unit);
@@ -2002,7 +2002,7 @@ int perf_event__synthesize_event_update_unit(struct perf_tool *tool, struct evse
return err;
}
-int perf_event__synthesize_event_update_scale(struct perf_tool *tool, struct evsel *evsel,
+int perf_event__synthesize_event_update_scale(const struct perf_tool *tool, struct evsel *evsel,
perf_event__handler_t process)
{
struct perf_record_event_update *ev;
@@ -2019,7 +2019,7 @@ int perf_event__synthesize_event_update_scale(struct perf_tool *tool, struct evs
return err;
}
-int perf_event__synthesize_event_update_name(struct perf_tool *tool, struct evsel *evsel,
+int perf_event__synthesize_event_update_name(const struct perf_tool *tool, struct evsel *evsel,
perf_event__handler_t process)
{
struct perf_record_event_update *ev;
@@ -2036,7 +2036,7 @@ int perf_event__synthesize_event_update_name(struct perf_tool *tool, struct evse
return err;
}
-int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, struct evsel *evsel,
+int perf_event__synthesize_event_update_cpus(const struct perf_tool *tool, struct evsel *evsel,
perf_event__handler_t process)
{
struct synthesize_cpu_map_data syn_data = { .map = evsel->core.own_cpus };
@@ -2059,7 +2059,7 @@ int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, struct evse
return err;
}
-int perf_event__synthesize_attrs(struct perf_tool *tool, struct evlist *evlist,
+int perf_event__synthesize_attrs(const struct perf_tool *tool, struct evlist *evlist,
perf_event__handler_t process)
{
struct evsel *evsel;
@@ -2087,7 +2087,7 @@ static bool has_scale(struct evsel *evsel)
return evsel->scale != 1;
}
-int perf_event__synthesize_extra_attr(struct perf_tool *tool, struct evlist *evsel_list,
+int perf_event__synthesize_extra_attr(const struct perf_tool *tool, struct evlist *evsel_list,
perf_event__handler_t process, bool is_pipe)
{
struct evsel *evsel;
@@ -2143,7 +2143,7 @@ int perf_event__synthesize_extra_attr(struct perf_tool *tool, struct evlist *evs
return 0;
}
-int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr *attr,
+int perf_event__synthesize_attr(const struct perf_tool *tool, struct perf_event_attr *attr,
u32 ids, u64 *id, perf_event__handler_t process)
{
union perf_event *ev;
@@ -2177,7 +2177,7 @@ int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr *
}
#ifdef HAVE_LIBTRACEEVENT
-int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct evlist *evlist,
+int perf_event__synthesize_tracing_data(const struct perf_tool *tool, int fd, struct evlist *evlist,
perf_event__handler_t process)
{
union perf_event ev;
@@ -2200,7 +2200,7 @@ int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct e
if (!tdata)
return -1;
- memset(&ev, 0, sizeof(ev));
+ memset(&ev, 0, sizeof(ev.tracing_data));
ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA;
size = tdata->size;
@@ -2225,31 +2225,108 @@ int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct e
}
#endif
-int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16 misc,
- perf_event__handler_t process, struct machine *machine)
+int perf_event__synthesize_build_id(const struct perf_tool *tool,
+ struct perf_sample *sample,
+ struct machine *machine,
+ perf_event__handler_t process,
+ const struct evsel *evsel,
+ __u16 misc,
+ const struct build_id *bid,
+ const char *filename)
{
union perf_event ev;
size_t len;
- if (!dso__hit(pos))
- return 0;
+ len = sizeof(ev.build_id) + strlen(filename) + 1;
+ len = PERF_ALIGN(len, sizeof(u64));
- memset(&ev, 0, sizeof(ev));
+ memset(&ev, 0, len);
- len = dso__long_name_len(pos) + 1;
- len = PERF_ALIGN(len, NAME_ALIGN);
- ev.build_id.size = min(dso__bid(pos)->size, sizeof(dso__bid(pos)->data));
- memcpy(&ev.build_id.build_id, dso__bid(pos)->data, ev.build_id.size);
+ ev.build_id.size = min(bid->size, sizeof(ev.build_id.build_id));
+ memcpy(ev.build_id.build_id, bid->data, ev.build_id.size);
ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID;
ev.build_id.header.misc = misc | PERF_RECORD_MISC_BUILD_ID_SIZE;
ev.build_id.pid = machine->pid;
- ev.build_id.header.size = sizeof(ev.build_id) + len;
- memcpy(&ev.build_id.filename, dso__long_name(pos), dso__long_name_len(pos));
+ ev.build_id.header.size = len;
+ strcpy(ev.build_id.filename, filename);
+
+ if (evsel) {
+ void *array = &ev;
+ int ret;
+
+ array += ev.header.size;
+ ret = perf_event__synthesize_id_sample(array, evsel->core.attr.sample_type, sample);
+ if (ret < 0)
+ return ret;
+
+ if (ret & 7) {
+ pr_err("Bad id sample size %d\n", ret);
+ return -EINVAL;
+ }
+
+ ev.header.size += ret;
+ }
+
+ return process(tool, &ev, sample, machine);
+}
+
+int perf_event__synthesize_mmap2_build_id(const struct perf_tool *tool,
+ struct perf_sample *sample,
+ struct machine *machine,
+ perf_event__handler_t process,
+ const struct evsel *evsel,
+ __u16 misc,
+ __u32 pid, __u32 tid,
+ __u64 start, __u64 len, __u64 pgoff,
+ const struct build_id *bid,
+ __u32 prot, __u32 flags,
+ const char *filename)
+{
+ union perf_event ev;
+ size_t ev_len;
+ void *array;
+ int ret;
+
+ ev_len = sizeof(ev.mmap2) - sizeof(ev.mmap2.filename) + strlen(filename) + 1;
+ ev_len = PERF_ALIGN(ev_len, sizeof(u64));
+
+ memset(&ev, 0, ev_len);
+
+ ev.mmap2.header.type = PERF_RECORD_MMAP2;
+ ev.mmap2.header.misc = misc | PERF_RECORD_MISC_MMAP_BUILD_ID;
+ ev.mmap2.header.size = ev_len;
+
+ ev.mmap2.pid = pid;
+ ev.mmap2.tid = tid;
+ ev.mmap2.start = start;
+ ev.mmap2.len = len;
+ ev.mmap2.pgoff = pgoff;
+
+ ev.mmap2.build_id_size = min(bid->size, sizeof(ev.mmap2.build_id));
+ memcpy(ev.mmap2.build_id, bid->data, ev.mmap2.build_id_size);
+
+ ev.mmap2.prot = prot;
+ ev.mmap2.flags = flags;
+
+ memcpy(ev.mmap2.filename, filename, min(strlen(filename), sizeof(ev.mmap.filename)));
+
+ array = &ev;
+ array += ev.header.size;
+ ret = perf_event__synthesize_id_sample(array, evsel->core.attr.sample_type, sample);
+ if (ret < 0)
+ return ret;
+
+ if (ret & 7) {
+ pr_err("Bad id sample size %d\n", ret);
+ return -EINVAL;
+ }
+
+ ev.header.size += ret;
- return process(tool, &ev, NULL, machine);
+ return process(tool, &ev, sample, machine);
}
-int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct perf_tool *tool,
+int perf_event__synthesize_stat_events(struct perf_stat_config *config, const struct perf_tool *tool,
struct evlist *evlist, perf_event__handler_t process, bool attrs)
{
int err;
@@ -2286,7 +2363,7 @@ int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct p
extern const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE];
-int perf_event__synthesize_features(struct perf_tool *tool, struct perf_session *session,
+int perf_event__synthesize_features(const struct perf_tool *tool, struct perf_session *session,
struct evlist *evlist, perf_event__handler_t process)
{
struct perf_header *header = &session->header;
@@ -2349,7 +2426,7 @@ int perf_event__synthesize_features(struct perf_tool *tool, struct perf_session
return ret;
}
-int perf_event__synthesize_for_pipe(struct perf_tool *tool,
+int perf_event__synthesize_for_pipe(const struct perf_tool *tool,
struct perf_session *session,
struct perf_data *data,
perf_event__handler_t process)
diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h
index 53737d1619a4..b9c936b5cfeb 100644
--- a/tools/perf/util/synthetic-events.h
+++ b/tools/perf/util/synthetic-events.h
@@ -9,6 +9,7 @@
#include <perf/cpumap.h>
struct auxtrace_record;
+struct build_id;
struct dso;
struct evlist;
struct evsel;
@@ -40,45 +41,63 @@ enum perf_record_synth {
int parse_synth_opt(char *str);
-typedef int (*perf_event__handler_t)(struct perf_tool *tool, union perf_event *event,
+typedef int (*perf_event__handler_t)(const struct perf_tool *tool, union perf_event *event,
struct perf_sample *sample, struct machine *machine);
-int perf_event__synthesize_attrs(struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process);
-int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr *attr, u32 ids, u64 *id, perf_event__handler_t process);
-int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16 misc, perf_event__handler_t process, struct machine *machine);
-int perf_event__synthesize_cpu_map(struct perf_tool *tool, const struct perf_cpu_map *cpus, perf_event__handler_t process, struct machine *machine);
-int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process);
-int perf_event__synthesize_event_update_name(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process);
-int perf_event__synthesize_event_update_scale(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process);
-int perf_event__synthesize_event_update_unit(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process);
-int perf_event__synthesize_extra_attr(struct perf_tool *tool, struct evlist *evsel_list, perf_event__handler_t process, bool is_pipe);
-int perf_event__synthesize_extra_kmaps(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
-int perf_event__synthesize_features(struct perf_tool *tool, struct perf_session *session, struct evlist *evlist, perf_event__handler_t process);
-int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_t process, struct evlist *evlist, struct machine *machine);
-int __perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_t process, struct evlist *evlist, struct machine *machine, size_t from);
+int perf_event__synthesize_attrs(const struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process);
+int perf_event__synthesize_attr(const struct perf_tool *tool, struct perf_event_attr *attr, u32 ids, u64 *id, perf_event__handler_t process);
+int perf_event__synthesize_build_id(const struct perf_tool *tool,
+ struct perf_sample *sample,
+ struct machine *machine,
+ perf_event__handler_t process,
+ const struct evsel *evsel,
+ __u16 misc,
+ const struct build_id *bid,
+ const char *filename);
+int perf_event__synthesize_mmap2_build_id(const struct perf_tool *tool,
+ struct perf_sample *sample,
+ struct machine *machine,
+ perf_event__handler_t process,
+ const struct evsel *evsel,
+ __u16 misc,
+ __u32 pid, __u32 tid,
+ __u64 start, __u64 len, __u64 pgoff,
+ const struct build_id *bid,
+ __u32 prot, __u32 flags,
+ const char *filename);
+int perf_event__synthesize_cpu_map(const struct perf_tool *tool, const struct perf_cpu_map *cpus, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_event_update_cpus(const struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process);
+int perf_event__synthesize_event_update_name(const struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process);
+int perf_event__synthesize_event_update_scale(const struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process);
+int perf_event__synthesize_event_update_unit(const struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process);
+int perf_event__synthesize_extra_attr(const struct perf_tool *tool, struct evlist *evsel_list, perf_event__handler_t process, bool is_pipe);
+int perf_event__synthesize_extra_kmaps(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_features(const struct perf_tool *tool, struct perf_session *session, struct evlist *evlist, perf_event__handler_t process);
+int perf_event__synthesize_id_index(const struct perf_tool *tool, perf_event__handler_t process, struct evlist *evlist, struct machine *machine);
+int __perf_event__synthesize_id_index(const struct perf_tool *tool, perf_event__handler_t process, struct evlist *evlist, struct machine *machine, size_t from);
int perf_event__synthesize_id_sample(__u64 *array, u64 type, const struct perf_sample *sample);
-int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
-int perf_event__synthesize_mmap_events(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine, bool mmap_data);
-int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
-int perf_event__synthesize_namespaces(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine);
-int perf_event__synthesize_cgroups(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_kernel_mmap(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_mmap_events(const struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine, bool mmap_data);
+int perf_event__synthesize_modules(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_namespaces(const struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_cgroups(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format, const struct perf_sample *sample);
-int perf_event__synthesize_stat_config(struct perf_tool *tool, struct perf_stat_config *config, perf_event__handler_t process, struct machine *machine);
-int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process, bool attrs);
-int perf_event__synthesize_stat_round(struct perf_tool *tool, u64 time, u64 type, perf_event__handler_t process, struct machine *machine);
-int perf_event__synthesize_stat(struct perf_tool *tool, struct perf_cpu cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine);
-int perf_event__synthesize_thread_map2(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine);
-int perf_event__synthesize_thread_map(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data);
-int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data, unsigned int nr_threads_synthesize);
-int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct evlist *evlist, perf_event__handler_t process);
-int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
-pid_t perf_event__synthesize_comm(struct perf_tool *tool, union perf_event *event, pid_t pid, perf_event__handler_t process, struct machine *machine);
-
-int perf_tool__process_synth_event(struct perf_tool *tool, union perf_event *event, struct machine *machine, perf_event__handler_t process);
+int perf_event__synthesize_stat_config(const struct perf_tool *tool, struct perf_stat_config *config, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_stat_events(struct perf_stat_config *config, const struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process, bool attrs);
+int perf_event__synthesize_stat_round(const struct perf_tool *tool, u64 time, u64 type, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_stat(const struct perf_tool *tool, struct perf_cpu cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_thread_map2(const struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_thread_map(const struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data);
+int perf_event__synthesize_threads(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data, unsigned int nr_threads_synthesize);
+int perf_event__synthesize_tracing_data(const struct perf_tool *tool, int fd, struct evlist *evlist, perf_event__handler_t process);
+int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
+pid_t perf_event__synthesize_comm(const struct perf_tool *tool, union perf_event *event, pid_t pid, perf_event__handler_t process, struct machine *machine);
+
+int perf_tool__process_synth_event(const struct perf_tool *tool, union perf_event *event, struct machine *machine, perf_event__handler_t process);
size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, u64 read_format);
-int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
+int __machine__synthesize_threads(struct machine *machine, const struct perf_tool *tool,
struct target *target, struct perf_thread_map *threads,
perf_event__handler_t process, bool needs_mmap, bool data_mmap,
unsigned int nr_threads_synthesize);
@@ -87,7 +106,7 @@ int machine__synthesize_threads(struct machine *machine, struct target *target,
unsigned int nr_threads_synthesize);
#ifdef HAVE_AUXTRACE_SUPPORT
-int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, struct perf_tool *tool,
+int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, const struct perf_tool *tool,
struct perf_session *session, perf_event__handler_t process);
#else // HAVE_AUXTRACE_SUPPORT
@@ -96,7 +115,7 @@ int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, struct per
static inline int
perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr __maybe_unused,
- struct perf_tool *tool __maybe_unused,
+ const struct perf_tool *tool __maybe_unused,
struct perf_session *session __maybe_unused,
perf_event__handler_t process __maybe_unused)
{
@@ -117,7 +136,7 @@ static inline int perf_event__synthesize_bpf_events(struct perf_session *session
}
#endif // HAVE_LIBBPF_SUPPORT
-int perf_event__synthesize_for_pipe(struct perf_tool *tool,
+int perf_event__synthesize_for_pipe(const struct perf_tool *tool,
struct perf_session *session,
struct perf_data *data,
perf_event__handler_t process);
diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c
index 0dd26b991b3f..7c15dec6900d 100644
--- a/tools/perf/util/syscalltbl.c
+++ b/tools/perf/util/syscalltbl.c
@@ -18,6 +18,10 @@
#include <asm/syscalls_64.c>
const int syscalltbl_native_max_id = SYSCALLTBL_x86_64_MAX_ID;
static const char *const *syscalltbl_native = syscalltbl_x86_64;
+#elif defined(__i386__)
+#include <asm/syscalls_32.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_x86_MAX_ID;
+static const char *const *syscalltbl_native = syscalltbl_x86;
#elif defined(__s390x__)
#include <asm/syscalls_64.c>
const int syscalltbl_native_max_id = SYSCALLTBL_S390_64_MAX_ID;
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 87c59aa9fe38..0ffdd52d86d7 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -476,6 +476,7 @@ void thread__free_stitch_list(struct thread *thread)
return;
list_for_each_entry_safe(pos, tmp, &lbr_stitch->lists, node) {
+ map_symbol__exit(&pos->cursor.ms);
list_del_init(&pos->node);
free(pos);
}
@@ -485,6 +486,9 @@ void thread__free_stitch_list(struct thread *thread)
free(pos);
}
+ for (unsigned int i = 0 ; i < lbr_stitch->prev_lbr_cursor_size; i++)
+ map_symbol__exit(&lbr_stitch->prev_lbr_cursor[i].ms);
+
zfree(&lbr_stitch->prev_lbr_cursor);
free(thread__lbr_stitch(thread));
thread__set_lbr_stitch(thread, NULL);
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 8b4a3c69bad1..6cbf6eb2812e 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -26,6 +26,7 @@ struct lbr_stitch {
struct list_head free_lists;
struct perf_sample prev_sample;
struct callchain_cursor_node *prev_lbr_cursor;
+ unsigned int prev_lbr_cursor_size;
};
DECLARE_RC_STRUCT(thread) {
diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c
index 302443921681..1b91ccd4d523 100644
--- a/tools/perf/util/time-utils.c
+++ b/tools/perf/util/time-utils.c
@@ -20,7 +20,7 @@ int parse_nsec_time(const char *str, u64 *ptime)
u64 time_sec, time_nsec;
char *end;
- time_sec = strtoul(str, &end, 10);
+ time_sec = strtoull(str, &end, 10);
if (*end != '.' && *end != '\0')
return -1;
@@ -38,7 +38,7 @@ int parse_nsec_time(const char *str, u64 *ptime)
for (i = strlen(nsec_buf); i < 9; i++)
nsec_buf[i] = '0';
- time_nsec = strtoul(nsec_buf, &end, 10);
+ time_nsec = strtoull(nsec_buf, &end, 10);
if (*end != '\0')
return -1;
} else
diff --git a/tools/perf/util/tool.c b/tools/perf/util/tool.c
new file mode 100644
index 000000000000..3b7f390f26eb
--- /dev/null
+++ b/tools/perf/util/tool.c
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "data.h"
+#include "debug.h"
+#include "header.h"
+#include "session.h"
+#include "stat.h"
+#include "tool.h"
+#include "tsc.h"
+#include <sys/mman.h>
+#include <unistd.h>
+
+#ifdef HAVE_ZSTD_SUPPORT
+static int perf_session__process_compressed_event(struct perf_session *session,
+ union perf_event *event, u64 file_offset,
+ const char *file_path)
+{
+ void *src;
+ size_t decomp_size, src_size;
+ u64 decomp_last_rem = 0;
+ size_t mmap_len, decomp_len = session->header.env.comp_mmap_len;
+ struct decomp *decomp, *decomp_last = session->active_decomp->decomp_last;
+
+ if (decomp_last) {
+ decomp_last_rem = decomp_last->size - decomp_last->head;
+ decomp_len += decomp_last_rem;
+ }
+
+ mmap_len = sizeof(struct decomp) + decomp_len;
+ decomp = mmap(NULL, mmap_len, PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ if (decomp == MAP_FAILED) {
+ pr_err("Couldn't allocate memory for decompression\n");
+ return -1;
+ }
+
+ decomp->file_pos = file_offset;
+ decomp->file_path = file_path;
+ decomp->mmap_len = mmap_len;
+ decomp->head = 0;
+
+ if (decomp_last_rem) {
+ memcpy(decomp->data, &(decomp_last->data[decomp_last->head]), decomp_last_rem);
+ decomp->size = decomp_last_rem;
+ }
+
+ src = (void *)event + sizeof(struct perf_record_compressed);
+ src_size = event->pack.header.size - sizeof(struct perf_record_compressed);
+
+ decomp_size = zstd_decompress_stream(session->active_decomp->zstd_decomp, src, src_size,
+ &(decomp->data[decomp_last_rem]), decomp_len - decomp_last_rem);
+ if (!decomp_size) {
+ munmap(decomp, mmap_len);
+ pr_err("Couldn't decompress data\n");
+ return -1;
+ }
+
+ decomp->size += decomp_size;
+
+ if (session->active_decomp->decomp == NULL)
+ session->active_decomp->decomp = decomp;
+ else
+ session->active_decomp->decomp_last->next = decomp;
+
+ session->active_decomp->decomp_last = decomp;
+
+ pr_debug("decomp (B): %zd to %zd\n", src_size, decomp_size);
+
+ return 0;
+}
+#endif
+
+static int process_event_synth_tracing_data_stub(struct perf_session *session
+ __maybe_unused,
+ union perf_event *event
+ __maybe_unused)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_event_synth_attr_stub(const struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct evlist **pevlist
+ __maybe_unused)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_event_synth_event_update_stub(const struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct evlist **pevlist
+ __maybe_unused)
+{
+ if (dump_trace)
+ perf_event__fprintf_event_update(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+int process_event_sample_stub(const struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample __maybe_unused,
+ struct evsel *evsel __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_event_stub(const struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_finished_round_stub(const struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct ordered_events *oe __maybe_unused)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int skipn(int fd, off_t n)
+{
+ char buf[4096];
+ ssize_t ret;
+
+ while (n > 0) {
+ ret = read(fd, buf, min(n, (off_t)sizeof(buf)));
+ if (ret <= 0)
+ return ret;
+ n -= ret;
+ }
+
+ return 0;
+}
+
+static s64 process_event_auxtrace_stub(struct perf_session *session __maybe_unused,
+ union perf_event *event)
+{
+ dump_printf(": unhandled!\n");
+ if (perf_data__is_pipe(session->data))
+ skipn(perf_data__fd(session->data), event->auxtrace.size);
+ return event->auxtrace.size;
+}
+
+static int process_event_op2_stub(struct perf_session *session __maybe_unused,
+ union perf_event *event __maybe_unused)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+
+static
+int process_event_thread_map_stub(struct perf_session *session __maybe_unused,
+ union perf_event *event __maybe_unused)
+{
+ if (dump_trace)
+ perf_event__fprintf_thread_map(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static
+int process_event_cpu_map_stub(struct perf_session *session __maybe_unused,
+ union perf_event *event __maybe_unused)
+{
+ if (dump_trace)
+ perf_event__fprintf_cpu_map(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static
+int process_event_stat_config_stub(struct perf_session *session __maybe_unused,
+ union perf_event *event __maybe_unused)
+{
+ if (dump_trace)
+ perf_event__fprintf_stat_config(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_stat_stub(struct perf_session *perf_session __maybe_unused,
+ union perf_event *event)
+{
+ if (dump_trace)
+ perf_event__fprintf_stat(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_stat_round_stub(struct perf_session *perf_session __maybe_unused,
+ union perf_event *event)
+{
+ if (dump_trace)
+ perf_event__fprintf_stat_round(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_event_time_conv_stub(struct perf_session *perf_session __maybe_unused,
+ union perf_event *event)
+{
+ if (dump_trace)
+ perf_event__fprintf_time_conv(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int perf_session__process_compressed_event_stub(struct perf_session *session __maybe_unused,
+ union perf_event *event __maybe_unused,
+ u64 file_offset __maybe_unused,
+ const char *file_path __maybe_unused)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+void perf_tool__init(struct perf_tool *tool, bool ordered_events)
+{
+ tool->ordered_events = ordered_events;
+ tool->ordering_requires_timestamps = false;
+ tool->namespace_events = false;
+ tool->cgroup_events = false;
+ tool->no_warn = false;
+ tool->show_feat_hdr = SHOW_FEAT_NO_HEADER;
+
+ tool->sample = process_event_sample_stub;
+ tool->mmap = process_event_stub;
+ tool->mmap2 = process_event_stub;
+ tool->comm = process_event_stub;
+ tool->namespaces = process_event_stub;
+ tool->cgroup = process_event_stub;
+ tool->fork = process_event_stub;
+ tool->exit = process_event_stub;
+ tool->lost = perf_event__process_lost;
+ tool->lost_samples = perf_event__process_lost_samples;
+ tool->aux = perf_event__process_aux;
+ tool->itrace_start = perf_event__process_itrace_start;
+ tool->context_switch = perf_event__process_switch;
+ tool->ksymbol = perf_event__process_ksymbol;
+ tool->bpf = perf_event__process_bpf;
+ tool->text_poke = perf_event__process_text_poke;
+ tool->aux_output_hw_id = perf_event__process_aux_output_hw_id;
+ tool->read = process_event_sample_stub;
+ tool->throttle = process_event_stub;
+ tool->unthrottle = process_event_stub;
+ tool->attr = process_event_synth_attr_stub;
+ tool->event_update = process_event_synth_event_update_stub;
+ tool->tracing_data = process_event_synth_tracing_data_stub;
+ tool->build_id = process_event_op2_stub;
+
+ if (ordered_events)
+ tool->finished_round = perf_event__process_finished_round;
+ else
+ tool->finished_round = process_finished_round_stub;
+
+ tool->id_index = process_event_op2_stub;
+ tool->auxtrace_info = process_event_op2_stub;
+ tool->auxtrace = process_event_auxtrace_stub;
+ tool->auxtrace_error = process_event_op2_stub;
+ tool->thread_map = process_event_thread_map_stub;
+ tool->cpu_map = process_event_cpu_map_stub;
+ tool->stat_config = process_event_stat_config_stub;
+ tool->stat = process_stat_stub;
+ tool->stat_round = process_stat_round_stub;
+ tool->time_conv = process_event_time_conv_stub;
+ tool->feature = process_event_op2_stub;
+#ifdef HAVE_ZSTD_SUPPORT
+ tool->compressed = perf_session__process_compressed_event;
+#else
+ tool->compressed = perf_session__process_compressed_event_stub;
+#endif
+ tool->finished_init = process_event_op2_stub;
+}
+
+bool perf_tool__compressed_is_stub(const struct perf_tool *tool)
+{
+ return tool->compressed == perf_session__process_compressed_event_stub;
+}
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index c957fb849ac6..db1c7642b0d1 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -15,14 +15,14 @@ struct perf_tool;
struct machine;
struct ordered_events;
-typedef int (*event_sample)(struct perf_tool *tool, union perf_event *event,
+typedef int (*event_sample)(const struct perf_tool *tool, union perf_event *event,
struct perf_sample *sample,
struct evsel *evsel, struct machine *machine);
-typedef int (*event_op)(struct perf_tool *tool, union perf_event *event,
+typedef int (*event_op)(const struct perf_tool *tool, union perf_event *event,
struct perf_sample *sample, struct machine *machine);
-typedef int (*event_attr_op)(struct perf_tool *tool,
+typedef int (*event_attr_op)(const struct perf_tool *tool,
union perf_event *event,
struct evlist **pevlist);
@@ -31,7 +31,7 @@ typedef s64 (*event_op3)(struct perf_session *session, union perf_event *event);
typedef int (*event_op4)(struct perf_session *session, union perf_event *event, u64 data,
const char *str);
-typedef int (*event_oe)(struct perf_tool *tool, union perf_event *event,
+typedef int (*event_oe)(const struct perf_tool *tool, union perf_event *event,
struct ordered_events *oe);
enum show_feature_header {
@@ -85,7 +85,18 @@ struct perf_tool {
bool namespace_events;
bool cgroup_events;
bool no_warn;
+ bool dont_split_sample_group;
enum show_feature_header show_feat_hdr;
};
+void perf_tool__init(struct perf_tool *tool, bool ordered_events);
+
+bool perf_tool__compressed_is_stub(const struct perf_tool *tool);
+
+int process_event_sample_stub(const struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct evsel *evsel,
+ struct machine *machine);
+
#endif /* __PERF_TOOL_H */
diff --git a/tools/perf/util/trace_augment.h b/tools/perf/util/trace_augment.h
new file mode 100644
index 000000000000..57a3e5045937
--- /dev/null
+++ b/tools/perf/util/trace_augment.h
@@ -0,0 +1,6 @@
+#ifndef TRACE_AUGMENT_H
+#define TRACE_AUGMENT_H
+
+#define TRACE_AUG_MAX_BUF 32 /* for buffer augmentation in perf trace */
+
+#endif
diff --git a/tools/perf/util/tsc.c b/tools/perf/util/tsc.c
index f19791d46e99..2e33a20e1e1b 100644
--- a/tools/perf/util/tsc.c
+++ b/tools/perf/util/tsc.c
@@ -72,7 +72,7 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
}
int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc,
- struct perf_tool *tool,
+ const struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine)
{
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
index f6a6f6a91030..16c2b03831f3 100644
--- a/tools/perf/util/unwind-libunwind-local.c
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -413,7 +413,7 @@ static int read_unwind_spec_debug_frame(struct dso *dso,
__func__,
dso__symsrc_filename(dso),
debuglink);
- zfree(&dso__symsrc_filename(dso));
+ dso__free_symsrc_filename(dso);
}
dso__set_symsrc_filename(dso, debuglink);
} else {
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 4f561e5e4162..9d55a13787ce 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -325,9 +325,15 @@ int perf_event_paranoid(void)
bool perf_event_paranoid_check(int max_level)
{
- return perf_cap__capable(CAP_SYS_ADMIN) ||
- perf_cap__capable(CAP_PERFMON) ||
- perf_event_paranoid() <= max_level;
+ bool used_root;
+
+ if (perf_cap__capable(CAP_SYS_ADMIN, &used_root))
+ return true;
+
+ if (!used_root && perf_cap__capable(CAP_PERFMON, &used_root))
+ return true;
+
+ return perf_event_paranoid() <= max_level;
}
static int
diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c
index 1b6f8f6db7aa..c12f5d8c4bf6 100644
--- a/tools/perf/util/vdso.c
+++ b/tools/perf/util/vdso.c
@@ -308,8 +308,10 @@ static struct dso *machine__find_vdso(struct machine *machine,
if (!dso) {
dso = dsos__find(&machine->dsos, DSO__NAME_VDSO,
true);
- if (dso && dso_type != dso__type(dso, machine))
+ if (dso && dso_type != dso__type(dso, machine)) {
+ dso__put(dso);
dso = NULL;
+ }
}
break;
case DSO__TYPE_X32BIT: