summaryrefslogtreecommitdiff
path: root/tools/perf/pmu-events
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/pmu-events')
-rw-r--r--tools/perf/pmu-events/Build6
-rw-r--r--tools/perf/pmu-events/arch/arm64/ampere/ampereone/cache.json3
-rw-r--r--tools/perf/pmu-events/arch/arm64/ampere/ampereone/core-imp-def.json120
-rw-r--r--tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json362
-rw-r--r--tools/perf/pmu-events/arch/arm64/ampere/ampereone/pipeline.json12
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/branch.json8
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/bus.json18
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/cache.json155
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/exception.json45
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/fp_operation.json22
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/general.json10
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/instruction.json143
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l1d_cache.json54
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l1i_cache.json14
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l2_cache.json50
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l3_cache.json22
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/ll_cache.json10
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/memory.json39
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json365
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/pipeline.json23
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/retired.json30
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/spe.json12
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/spec_operation.json110
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/stall.json30
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/sve.json50
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/tlb.json66
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/trace.json27
-rw-r--r--tools/perf/pmu-events/arch/arm64/freescale/yitian710/sys/ali_drw.json373
-rw-r--r--tools/perf/pmu-events/arch/arm64/freescale/yitian710/sys/metrics.json20
-rw-r--r--tools/perf/pmu-events/arch/arm64/sbsa.json24
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/cache.json47
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/floating_point.json66
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/frontend.json197
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/marked.json224
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/memory.json93
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/metrics.json89
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/others.json210
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/pipeline.json292
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/pmc.json198
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/translation.json43
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json11
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json10
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json10
-rw-r--r--tools/perf/pmu-events/arch/x86/mapfile.csv4
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/cache.json165
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/floating-point.json8
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/frontend.json56
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/memory.json80
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/other.json16
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json159
-rw-r--r--tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json10
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/other.json18
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json9
-rw-r--r--tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json10
-rw-r--r--tools/perf/pmu-events/empty-pmu-events.c49
-rwxr-xr-xtools/perf/pmu-events/jevents.py330
-rw-r--r--tools/perf/pmu-events/metric.py17
-rw-r--r--tools/perf/pmu-events/pmu-events.h15
60 files changed, 3195 insertions, 1468 deletions
diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build
index 150765f2baee..1d18bb89402e 100644
--- a/tools/perf/pmu-events/Build
+++ b/tools/perf/pmu-events/Build
@@ -35,3 +35,9 @@ $(PMU_EVENTS_C): $(JSON) $(JSON_TEST) $(JEVENTS_PY) $(METRIC_PY) $(METRIC_TEST_L
$(call rule_mkdir)
$(Q)$(call echo-cmd,gen)$(PYTHON) $(JEVENTS_PY) $(JEVENTS_ARCH) $(JEVENTS_MODEL) pmu-events/arch $@
endif
+
+# pmu-events.c file is generated in the OUTPUT directory so it needs a
+# separate rule to depend on it properly
+$(OUTPUT)pmu-events/pmu-events.o: $(PMU_EVENTS_C)
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/cache.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/cache.json
index fc0633054211..7a2b7b200f14 100644
--- a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/cache.json
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/cache.json
@@ -93,9 +93,6 @@
"ArchStdEvent": "L1D_CACHE_LMISS_RD"
},
{
- "ArchStdEvent": "L1D_CACHE_LMISS"
- },
- {
"ArchStdEvent": "L1I_CACHE_LMISS"
},
{
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/core-imp-def.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/core-imp-def.json
index 95c30243f2b2..88b23b85e33c 100644
--- a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/core-imp-def.json
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/core-imp-def.json
@@ -534,66 +534,6 @@
"BriefDescription": "L2D OTB allocate"
},
{
- "PublicDescription": "DTLB Translation cache hit on S1L2 walk cache entry",
- "EventCode": "0xD801",
- "EventName": "MMU_D_TRANS_CACHE_HIT_S1L2_WALK",
- "BriefDescription": "DTLB Translation cache hit on S1L2 walk cache entry"
- },
- {
- "PublicDescription": "DTLB Translation cache hit on S1L1 walk cache entry",
- "EventCode": "0xD802",
- "EventName": "MMU_D_TRANS_CACHE_HIT_S1L1_WALK",
- "BriefDescription": "DTLB Translation cache hit on S1L1 walk cache entry"
- },
- {
- "PublicDescription": "DTLB Translation cache hit on S1L0 walk cache entry",
- "EventCode": "0xD803",
- "EventName": "MMU_D_TRANS_CACHE_HIT_S1L0_WALK",
- "BriefDescription": "DTLB Translation cache hit on S1L0 walk cache entry"
- },
- {
- "PublicDescription": "DTLB Translation cache hit on S2L2 walk cache entry",
- "EventCode": "0xD804",
- "EventName": "MMU_D_TRANS_CACHE_HIT_S2L2_WALK",
- "BriefDescription": "DTLB Translation cache hit on S2L2 walk cache entry"
- },
- {
- "PublicDescription": "DTLB Translation cache hit on S2L1 walk cache entry",
- "EventCode": "0xD805",
- "EventName": "MMU_D_TRANS_CACHE_HIT_S2L1_WALK",
- "BriefDescription": "DTLB Translation cache hit on S2L1 walk cache entry"
- },
- {
- "PublicDescription": "DTLB Translation cache hit on S2L0 walk cache entry",
- "EventCode": "0xD806",
- "EventName": "MMU_D_TRANS_CACHE_HIT_S2L0_WALK",
- "BriefDescription": "DTLB Translation cache hit on S2L0 walk cache entry"
- },
- {
- "PublicDescription": "D-side S1 Page walk cache lookup",
- "EventCode": "0xD807",
- "EventName": "MMU_D_S1_WALK_CACHE_LOOKUP",
- "BriefDescription": "D-side S1 Page walk cache lookup"
- },
- {
- "PublicDescription": "D-side S1 Page walk cache refill",
- "EventCode": "0xD808",
- "EventName": "MMU_D_S1_WALK_CACHE_REFILL",
- "BriefDescription": "D-side S1 Page walk cache refill"
- },
- {
- "PublicDescription": "D-side S2 Page walk cache lookup",
- "EventCode": "0xD809",
- "EventName": "MMU_D_S2_WALK_CACHE_LOOKUP",
- "BriefDescription": "D-side S2 Page walk cache lookup"
- },
- {
- "PublicDescription": "D-side S2 Page walk cache refill",
- "EventCode": "0xD80A",
- "EventName": "MMU_D_S2_WALK_CACHE_REFILL",
- "BriefDescription": "D-side S2 Page walk cache refill"
- },
- {
"PublicDescription": "D-side Stage1 tablewalk fault",
"EventCode": "0xD80B",
"EventName": "MMU_D_S1_WALK_FAULT",
@@ -618,66 +558,6 @@
"BriefDescription": "L2I OTB allocate"
},
{
- "PublicDescription": "ITLB Translation cache hit on S1L2 walk cache entry",
- "EventCode": "0xD901",
- "EventName": "MMU_I_TRANS_CACHE_HIT_S1L2_WALK",
- "BriefDescription": "ITLB Translation cache hit on S1L2 walk cache entry"
- },
- {
- "PublicDescription": "ITLB Translation cache hit on S1L1 walk cache entry",
- "EventCode": "0xD902",
- "EventName": "MMU_I_TRANS_CACHE_HIT_S1L1_WALK",
- "BriefDescription": "ITLB Translation cache hit on S1L1 walk cache entry"
- },
- {
- "PublicDescription": "ITLB Translation cache hit on S1L0 walk cache entry",
- "EventCode": "0xD903",
- "EventName": "MMU_I_TRANS_CACHE_HIT_S1L0_WALK",
- "BriefDescription": "ITLB Translation cache hit on S1L0 walk cache entry"
- },
- {
- "PublicDescription": "ITLB Translation cache hit on S2L2 walk cache entry",
- "EventCode": "0xD904",
- "EventName": "MMU_I_TRANS_CACHE_HIT_S2L2_WALK",
- "BriefDescription": "ITLB Translation cache hit on S2L2 walk cache entry"
- },
- {
- "PublicDescription": "ITLB Translation cache hit on S2L1 walk cache entry",
- "EventCode": "0xD905",
- "EventName": "MMU_I_TRANS_CACHE_HIT_S2L1_WALK",
- "BriefDescription": "ITLB Translation cache hit on S2L1 walk cache entry"
- },
- {
- "PublicDescription": "ITLB Translation cache hit on S2L0 walk cache entry",
- "EventCode": "0xD906",
- "EventName": "MMU_I_TRANS_CACHE_HIT_S2L0_WALK",
- "BriefDescription": "ITLB Translation cache hit on S2L0 walk cache entry"
- },
- {
- "PublicDescription": "I-side S1 Page walk cache lookup",
- "EventCode": "0xD907",
- "EventName": "MMU_I_S1_WALK_CACHE_LOOKUP",
- "BriefDescription": "I-side S1 Page walk cache lookup"
- },
- {
- "PublicDescription": "I-side S1 Page walk cache refill",
- "EventCode": "0xD908",
- "EventName": "MMU_I_S1_WALK_CACHE_REFILL",
- "BriefDescription": "I-side S1 Page walk cache refill"
- },
- {
- "PublicDescription": "I-side S2 Page walk cache lookup",
- "EventCode": "0xD909",
- "EventName": "MMU_I_S2_WALK_CACHE_LOOKUP",
- "BriefDescription": "I-side S2 Page walk cache lookup"
- },
- {
- "PublicDescription": "I-side S2 Page walk cache refill",
- "EventCode": "0xD90A",
- "EventName": "MMU_I_S2_WALK_CACHE_REFILL",
- "BriefDescription": "I-side S2 Page walk cache refill"
- },
- {
"PublicDescription": "I-side Stage1 tablewalk fault",
"EventCode": "0xD90B",
"EventName": "MMU_I_S1_WALK_FAULT",
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json
new file mode 100644
index 000000000000..1e7e8901a445
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json
@@ -0,0 +1,362 @@
+[
+ {
+ "MetricExpr": "BR_MIS_PRED / BR_PRED",
+ "BriefDescription": "Branch predictor misprediction rate. May not count branches that are never resolved because they are in the misprediction shadow of an earlier branch",
+ "MetricGroup": "Branch Prediction",
+ "MetricName": "Misprediction"
+ },
+ {
+ "MetricExpr": "BR_MIS_PRED_RETIRED / BR_RETIRED",
+ "BriefDescription": "Branch predictor misprediction rate",
+ "MetricGroup": "Branch Prediction",
+ "MetricName": "Misprediction (retired)"
+ },
+ {
+ "MetricExpr": "BUS_ACCESS / ( BUS_CYCLES * 1)",
+ "BriefDescription": "Core-to-uncore bus utilization",
+ "MetricGroup": "Bus",
+ "MetricName": "Bus utilization"
+ },
+ {
+ "MetricExpr": "L1D_CACHE_REFILL / L1D_CACHE",
+ "BriefDescription": "L1D cache miss rate",
+ "MetricGroup": "Cache",
+ "MetricName": "L1D cache miss"
+ },
+ {
+ "MetricExpr": "L1D_CACHE_LMISS_RD / L1D_CACHE_RD",
+ "BriefDescription": "L1D cache read miss rate",
+ "MetricGroup": "Cache",
+ "MetricName": "L1D cache read miss"
+ },
+ {
+ "MetricExpr": "L1I_CACHE_REFILL / L1I_CACHE",
+ "BriefDescription": "L1I cache miss rate",
+ "MetricGroup": "Cache",
+ "MetricName": "L1I cache miss"
+ },
+ {
+ "MetricExpr": "L2D_CACHE_REFILL / L2D_CACHE",
+ "BriefDescription": "L2 cache miss rate",
+ "MetricGroup": "Cache",
+ "MetricName": "L2 cache miss"
+ },
+ {
+ "MetricExpr": "L1I_CACHE_LMISS / L1I_CACHE",
+ "BriefDescription": "L1I cache read miss rate",
+ "MetricGroup": "Cache",
+ "MetricName": "L1I cache read miss"
+ },
+ {
+ "MetricExpr": "L2D_CACHE_LMISS_RD / L2D_CACHE_RD",
+ "BriefDescription": "L2 cache read miss rate",
+ "MetricGroup": "Cache",
+ "MetricName": "L2 cache read miss"
+ },
+ {
+ "MetricExpr": "(L1D_CACHE_LMISS_RD * 1000) / INST_RETIRED",
+ "BriefDescription": "Misses per thousand instructions (data)",
+ "MetricGroup": "Cache",
+ "MetricName": "MPKI data"
+ },
+ {
+ "MetricExpr": "(L1I_CACHE_LMISS * 1000) / INST_RETIRED",
+ "BriefDescription": "Misses per thousand instructions (instruction)",
+ "MetricGroup": "Cache",
+ "MetricName": "MPKI instruction"
+ },
+ {
+ "MetricExpr": "ASE_SPEC / OP_SPEC",
+ "BriefDescription": "Proportion of advanced SIMD data processing operations (excluding DP_SPEC/LD_SPEC) operations",
+ "MetricGroup": "Instruction",
+ "MetricName": "ASE mix"
+ },
+ {
+ "MetricExpr": "CRYPTO_SPEC / OP_SPEC",
+ "BriefDescription": "Proportion of crypto data processing operations",
+ "MetricGroup": "Instruction",
+ "MetricName": "Crypto mix"
+ },
+ {
+ "MetricExpr": "VFP_SPEC / (duration_time *1000000000)",
+ "BriefDescription": "Giga-floating point operations per second",
+ "MetricGroup": "Instruction",
+ "MetricName": "GFLOPS_ISSUED"
+ },
+ {
+ "MetricExpr": "DP_SPEC / OP_SPEC",
+ "BriefDescription": "Proportion of integer data processing operations",
+ "MetricGroup": "Instruction",
+ "MetricName": "Integer mix"
+ },
+ {
+ "MetricExpr": "INST_RETIRED / CPU_CYCLES",
+ "BriefDescription": "Instructions per cycle",
+ "MetricGroup": "Instruction",
+ "MetricName": "IPC"
+ },
+ {
+ "MetricExpr": "LD_SPEC / OP_SPEC",
+ "BriefDescription": "Proportion of load operations",
+ "MetricGroup": "Instruction",
+ "MetricName": "Load mix"
+ },
+ {
+ "MetricExpr": "LDST_SPEC/ OP_SPEC",
+ "BriefDescription": "Proportion of load & store operations",
+ "MetricGroup": "Instruction",
+ "MetricName": "Load-store mix"
+ },
+ {
+ "MetricExpr": "INST_RETIRED / (duration_time * 1000000)",
+ "BriefDescription": "Millions of instructions per second",
+ "MetricGroup": "Instruction",
+ "MetricName": "MIPS_RETIRED"
+ },
+ {
+ "MetricExpr": "INST_SPEC / (duration_time * 1000000)",
+ "BriefDescription": "Millions of instructions per second",
+ "MetricGroup": "Instruction",
+ "MetricName": "MIPS_UTILIZATION"
+ },
+ {
+ "MetricExpr": "PC_WRITE_SPEC / OP_SPEC",
+ "BriefDescription": "Proportion of software change of PC operations",
+ "MetricGroup": "Instruction",
+ "MetricName": "PC write mix"
+ },
+ {
+ "MetricExpr": "ST_SPEC / OP_SPEC",
+ "BriefDescription": "Proportion of store operations",
+ "MetricGroup": "Instruction",
+ "MetricName": "Store mix"
+ },
+ {
+ "MetricExpr": "VFP_SPEC / OP_SPEC",
+ "BriefDescription": "Proportion of FP operations",
+ "MetricGroup": "Instruction",
+ "MetricName": "VFP mix"
+ },
+ {
+ "MetricExpr": "1 - (OP_RETIRED/ (CPU_CYCLES * 4))",
+ "BriefDescription": "Proportion of slots lost",
+ "MetricGroup": "Speculation / TDA",
+ "MetricName": "CPU lost"
+ },
+ {
+ "MetricExpr": "OP_RETIRED/ (CPU_CYCLES * 4)",
+ "BriefDescription": "Proportion of slots retiring",
+ "MetricGroup": "Speculation / TDA",
+ "MetricName": "CPU utilization"
+ },
+ {
+ "MetricExpr": "OP_RETIRED - OP_SPEC",
+ "BriefDescription": "Operations lost due to misspeculation",
+ "MetricGroup": "Speculation / TDA",
+ "MetricName": "Operations lost"
+ },
+ {
+ "MetricExpr": "1 - (OP_RETIRED / OP_SPEC)",
+ "BriefDescription": "Proportion of operations lost",
+ "MetricGroup": "Speculation / TDA",
+ "MetricName": "Operations lost (ratio)"
+ },
+ {
+ "MetricExpr": "OP_RETIRED / OP_SPEC",
+ "BriefDescription": "Proportion of operations retired",
+ "MetricGroup": "Speculation / TDA",
+ "MetricName": "Operations retired"
+ },
+ {
+ "MetricExpr": "STALL_BACKEND_CACHE / CPU_CYCLES",
+ "BriefDescription": "Proportion of cycles stalled and no operations issued to backend and cache miss",
+ "MetricGroup": "Stall",
+ "MetricName": "Stall backend cache cycles"
+ },
+ {
+ "MetricExpr": "STALL_BACKEND_RESOURCE / CPU_CYCLES",
+ "BriefDescription": "Proportion of cycles stalled and no operations issued to backend and resource full",
+ "MetricGroup": "Stall",
+ "MetricName": "Stall backend resource cycles"
+ },
+ {
+ "MetricExpr": "STALL_BACKEND_TLB / CPU_CYCLES",
+ "BriefDescription": "Proportion of cycles stalled and no operations issued to backend and TLB miss",
+ "MetricGroup": "Stall",
+ "MetricName": "Stall backend tlb cycles"
+ },
+ {
+ "MetricExpr": "STALL_FRONTEND_CACHE / CPU_CYCLES",
+ "BriefDescription": "Proportion of cycles stalled and no ops delivered from frontend and cache miss",
+ "MetricGroup": "Stall",
+ "MetricName": "Stall frontend cache cycles"
+ },
+ {
+ "MetricExpr": "STALL_FRONTEND_TLB / CPU_CYCLES",
+ "BriefDescription": "Proportion of cycles stalled and no ops delivered from frontend and TLB miss",
+ "MetricGroup": "Stall",
+ "MetricName": "Stall frontend tlb cycles"
+ },
+ {
+ "MetricExpr": "DTLB_WALK / L1D_TLB",
+ "BriefDescription": "D-side walk per d-side translation request",
+ "MetricGroup": "TLB",
+ "MetricName": "DTLB walks"
+ },
+ {
+ "MetricExpr": "ITLB_WALK / L1I_TLB",
+ "BriefDescription": "I-side walk per i-side translation request",
+ "MetricGroup": "TLB",
+ "MetricName": "ITLB walks"
+ },
+ {
+ "MetricExpr": "STALL_SLOT_BACKEND / (CPU_CYCLES * 4)",
+ "BriefDescription": "Fraction of slots backend bound",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "backend"
+ },
+ {
+ "MetricExpr": "1 - (retiring + lost + backend)",
+ "BriefDescription": "Fraction of slots frontend bound",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "frontend"
+ },
+ {
+ "MetricExpr": "((OP_SPEC - OP_RETIRED) / (CPU_CYCLES * 4))",
+ "BriefDescription": "Fraction of slots lost due to misspeculation",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "lost"
+ },
+ {
+ "MetricExpr": "(OP_RETIRED / (CPU_CYCLES * 4))",
+ "BriefDescription": "Fraction of slots retiring, useful work",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "retiring"
+ },
+ {
+ "MetricExpr": "backend - backend_memory",
+ "BriefDescription": "Fraction of slots the CPU was stalled due to backend non-memory subsystem issues",
+ "MetricGroup": "TopDownL2",
+ "MetricName": "backend_core"
+ },
+ {
+ "MetricExpr": "(STALL_BACKEND_TLB + STALL_BACKEND_CACHE + STALL_BACKEND_MEM) / CPU_CYCLES ",
+ "BriefDescription": "Fraction of slots the CPU was stalled due to backend memory subsystem issues (cache/tlb miss)",
+ "MetricGroup": "TopDownL2",
+ "MetricName": "backend_memory"
+ },
+ {
+ "MetricExpr": " (BR_MIS_PRED_RETIRED / GPC_FLUSH) * lost",
+ "BriefDescription": "Fraction of slots lost due to branch misprediciton",
+ "MetricGroup": "TopDownL2",
+ "MetricName": "branch_mispredict"
+ },
+ {
+ "MetricExpr": "frontend - frontend_latency",
+ "BriefDescription": "Fraction of slots the CPU did not dispatch at full bandwidth - able to dispatch partial slots only (1, 2, or 3 uops)",
+ "MetricGroup": "TopDownL2",
+ "MetricName": "frontend_bandwidth"
+ },
+ {
+ "MetricExpr": "(STALL_FRONTEND - ((STALL_SLOT_FRONTEND - (frontend * CPU_CYCLES * 4)) / 4)) / CPU_CYCLES",
+ "BriefDescription": "Fraction of slots the CPU was stalled due to frontend latency issues (cache/tlb miss); nothing to dispatch",
+ "MetricGroup": "TopDownL2",
+ "MetricName": "frontend_latency"
+ },
+ {
+ "MetricExpr": "lost - branch_mispredict",
+ "BriefDescription": "Fraction of slots lost due to other/non-branch misprediction misspeculation",
+ "MetricGroup": "TopDownL2",
+ "MetricName": "other_clears"
+ },
+ {
+ "MetricExpr": "(IXU_NUM_UOPS_ISSUED + FSU_ISSUED) / (CPU_CYCLES * 6)",
+ "BriefDescription": "Fraction of execute slots utilized",
+ "MetricGroup": "TopDownL2",
+ "MetricName": "pipe_utilization"
+ },
+ {
+ "MetricExpr": "STALL_BACKEND_MEM / CPU_CYCLES",
+ "BriefDescription": "Fraction of cycles the CPU was stalled due to data L2 cache miss",
+ "MetricGroup": "TopDownL3",
+ "MetricName": "d_cache_l2_miss"
+ },
+ {
+ "MetricExpr": "STALL_BACKEND_CACHE / CPU_CYCLES",
+ "BriefDescription": "Fraction of cycles the CPU was stalled due to data cache miss",
+ "MetricGroup": "TopDownL3",
+ "MetricName": "d_cache_miss"
+ },
+ {
+ "MetricExpr": "STALL_BACKEND_TLB / CPU_CYCLES",
+ "BriefDescription": "Fraction of cycles the CPU was stalled due to data TLB miss",
+ "MetricGroup": "TopDownL3",
+ "MetricName": "d_tlb_miss"
+ },
+ {
+ "MetricExpr": "FSU_ISSUED / (CPU_CYCLES * 2)",
+ "BriefDescription": "Fraction of FSU execute slots utilized",
+ "MetricGroup": "TopDownL3",
+ "MetricName": "fsu_pipe_utilization"
+ },
+ {
+ "MetricExpr": "STALL_FRONTEND_CACHE / CPU_CYCLES",
+ "BriefDescription": "Fraction of cycles the CPU was stalled due to instruction cache miss",
+ "MetricGroup": "TopDownL3",
+ "MetricName": "i_cache_miss"
+ },
+ {
+ "MetricExpr": " STALL_FRONTEND_TLB / CPU_CYCLES ",
+ "BriefDescription": "Fraction of cycles the CPU was stalled due to instruction TLB miss",
+ "MetricGroup": "TopDownL3",
+ "MetricName": "i_tlb_miss"
+ },
+ {
+ "MetricExpr": "IXU_NUM_UOPS_ISSUED / (CPU_CYCLES / 4)",
+ "BriefDescription": "Fraction of IXU execute slots utilized",
+ "MetricGroup": "TopDownL3",
+ "MetricName": "ixu_pipe_utilization"
+ },
+ {
+ "MetricExpr": "IDR_STALL_FLUSH / CPU_CYCLES",
+ "BriefDescription": "Fraction of cycles the CPU was stalled due to flush recovery",
+ "MetricGroup": "TopDownL3",
+ "MetricName": "recovery"
+ },
+ {
+ "MetricExpr": "STALL_BACKEND_RESOURCE / CPU_CYCLES",
+ "BriefDescription": "Fraction of cycles the CPU was stalled due to core resource shortage",
+ "MetricGroup": "TopDownL3",
+ "MetricName": "resource"
+ },
+ {
+ "MetricExpr": "IDR_STALL_FSU_SCHED / CPU_CYCLES ",
+ "BriefDescription": "Fraction of cycles the CPU was stalled and FSU was full",
+ "MetricGroup": "TopDownL4",
+ "MetricName": "stall_fsu_sched"
+ },
+ {
+ "MetricExpr": "IDR_STALL_IXU_SCHED / CPU_CYCLES ",
+ "BriefDescription": "Fraction of cycles the CPU was stalled and IXU was full",
+ "MetricGroup": "TopDownL4",
+ "MetricName": "stall_ixu_sched"
+ },
+ {
+ "MetricExpr": "IDR_STALL_LOB_ID / CPU_CYCLES ",
+ "BriefDescription": "Fraction of cycles the CPU was stalled and LOB was full",
+ "MetricGroup": "TopDownL4",
+ "MetricName": "stall_lob_id"
+ },
+ {
+ "MetricExpr": "IDR_STALL_ROB_ID / CPU_CYCLES",
+ "BriefDescription": "Fraction of cycles the CPU was stalled and ROB was full",
+ "MetricGroup": "TopDownL4",
+ "MetricName": "stall_rob_id"
+ },
+ {
+ "MetricExpr": "IDR_STALL_SOB_ID / CPU_CYCLES ",
+ "BriefDescription": "Fraction of cycles the CPU was stalled and SOB was full",
+ "MetricGroup": "TopDownL4",
+ "MetricName": "stall_sob_id"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/pipeline.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/pipeline.json
index f9fae15f7555..711028377f3e 100644
--- a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/pipeline.json
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/pipeline.json
@@ -1,18 +1,24 @@
[
{
- "ArchStdEvent": "STALL_FRONTEND"
+ "ArchStdEvent": "STALL_FRONTEND",
+ "Errata": "Errata AC03_CPU_29",
+ "BriefDescription": "Impacted by errata, use metrics instead -"
},
{
"ArchStdEvent": "STALL_BACKEND"
},
{
- "ArchStdEvent": "STALL"
+ "ArchStdEvent": "STALL",
+ "Errata": "Errata AC03_CPU_29",
+ "BriefDescription": "Impacted by errata, use metrics instead -"
},
{
"ArchStdEvent": "STALL_SLOT_BACKEND"
},
{
- "ArchStdEvent": "STALL_SLOT_FRONTEND"
+ "ArchStdEvent": "STALL_SLOT_FRONTEND",
+ "Errata": "Errata AC03_CPU_29",
+ "BriefDescription": "Impacted by errata, use metrics instead -"
},
{
"ArchStdEvent": "STALL_SLOT"
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/branch.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/branch.json
deleted file mode 100644
index 79f2016c53b0..000000000000
--- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/branch.json
+++ /dev/null
@@ -1,8 +0,0 @@
-[
- {
- "ArchStdEvent": "BR_MIS_PRED"
- },
- {
- "ArchStdEvent": "BR_PRED"
- }
-]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/bus.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/bus.json
index 579c1c993d17..2e11a8c4a484 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/bus.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/bus.json
@@ -1,20 +1,18 @@
[
{
- "ArchStdEvent": "CPU_CYCLES"
+ "ArchStdEvent": "BUS_ACCESS",
+ "PublicDescription": "Counts memory transactions issued by the CPU to the external bus, including snoop requests and snoop responses. Each beat of data is counted individually."
},
{
- "ArchStdEvent": "BUS_ACCESS"
+ "ArchStdEvent": "BUS_CYCLES",
+ "PublicDescription": "Counts bus cycles in the CPU. Bus cycles represent a clock cycle in which a transaction could be sent or received on the interface from the CPU to the external bus. Since that interface is driven at the same clock speed as the CPU, this event is a duplicate of CPU_CYCLES."
},
{
- "ArchStdEvent": "BUS_CYCLES"
+ "ArchStdEvent": "BUS_ACCESS_RD",
+ "PublicDescription": "Counts memory read transactions seen on the external bus. Each beat of data is counted individually."
},
{
- "ArchStdEvent": "BUS_ACCESS_RD"
- },
- {
- "ArchStdEvent": "BUS_ACCESS_WR"
- },
- {
- "ArchStdEvent": "CNT_CYCLES"
+ "ArchStdEvent": "BUS_ACCESS_WR",
+ "PublicDescription": "Counts memory write transactions seen on the external bus. Each beat of data is counted individually."
}
]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/cache.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/cache.json
deleted file mode 100644
index 0141f749bff3..000000000000
--- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/cache.json
+++ /dev/null
@@ -1,155 +0,0 @@
-[
- {
- "ArchStdEvent": "L1I_CACHE_REFILL"
- },
- {
- "ArchStdEvent": "L1I_TLB_REFILL"
- },
- {
- "ArchStdEvent": "L1D_CACHE_REFILL"
- },
- {
- "ArchStdEvent": "L1D_CACHE"
- },
- {
- "ArchStdEvent": "L1D_TLB_REFILL"
- },
- {
- "ArchStdEvent": "L1I_CACHE"
- },
- {
- "ArchStdEvent": "L1D_CACHE_WB"
- },
- {
- "ArchStdEvent": "L2D_CACHE"
- },
- {
- "ArchStdEvent": "L2D_CACHE_REFILL"
- },
- {
- "ArchStdEvent": "L2D_CACHE_WB"
- },
- {
- "ArchStdEvent": "L2D_CACHE_ALLOCATE"
- },
- {
- "ArchStdEvent": "L1D_TLB"
- },
- {
- "ArchStdEvent": "L1I_TLB"
- },
- {
- "ArchStdEvent": "L3D_CACHE_ALLOCATE"
- },
- {
- "ArchStdEvent": "L3D_CACHE_REFILL"
- },
- {
- "ArchStdEvent": "L3D_CACHE"
- },
- {
- "ArchStdEvent": "L2D_TLB_REFILL"
- },
- {
- "ArchStdEvent": "L2D_TLB"
- },
- {
- "ArchStdEvent": "DTLB_WALK"
- },
- {
- "ArchStdEvent": "ITLB_WALK"
- },
- {
- "ArchStdEvent": "LL_CACHE_RD"
- },
- {
- "ArchStdEvent": "LL_CACHE_MISS_RD"
- },
- {
- "ArchStdEvent": "L1D_CACHE_LMISS_RD"
- },
- {
- "ArchStdEvent": "L1D_CACHE_RD"
- },
- {
- "ArchStdEvent": "L1D_CACHE_WR"
- },
- {
- "ArchStdEvent": "L1D_CACHE_REFILL_RD"
- },
- {
- "ArchStdEvent": "L1D_CACHE_REFILL_WR"
- },
- {
- "ArchStdEvent": "L1D_CACHE_REFILL_INNER"
- },
- {
- "ArchStdEvent": "L1D_CACHE_REFILL_OUTER"
- },
- {
- "ArchStdEvent": "L1D_CACHE_WB_VICTIM"
- },
- {
- "ArchStdEvent": "L1D_CACHE_WB_CLEAN"
- },
- {
- "ArchStdEvent": "L1D_CACHE_INVAL"
- },
- {
- "ArchStdEvent": "L1D_TLB_REFILL_RD"
- },
- {
- "ArchStdEvent": "L1D_TLB_REFILL_WR"
- },
- {
- "ArchStdEvent": "L1D_TLB_RD"
- },
- {
- "ArchStdEvent": "L1D_TLB_WR"
- },
- {
- "ArchStdEvent": "L2D_CACHE_RD"
- },
- {
- "ArchStdEvent": "L2D_CACHE_WR"
- },
- {
- "ArchStdEvent": "L2D_CACHE_REFILL_RD"
- },
- {
- "ArchStdEvent": "L2D_CACHE_REFILL_WR"
- },
- {
- "ArchStdEvent": "L2D_CACHE_WB_VICTIM"
- },
- {
- "ArchStdEvent": "L2D_CACHE_WB_CLEAN"
- },
- {
- "ArchStdEvent": "L2D_CACHE_INVAL"
- },
- {
- "ArchStdEvent": "L2D_TLB_REFILL_RD"
- },
- {
- "ArchStdEvent": "L2D_TLB_REFILL_WR"
- },
- {
- "ArchStdEvent": "L2D_TLB_RD"
- },
- {
- "ArchStdEvent": "L2D_TLB_WR"
- },
- {
- "ArchStdEvent": "L3D_CACHE_RD"
- },
- {
- "ArchStdEvent": "L1I_CACHE_LMISS"
- },
- {
- "ArchStdEvent": "L2D_CACHE_LMISS_RD"
- },
- {
- "ArchStdEvent": "L3D_CACHE_LMISS_RD"
- }
-]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/exception.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/exception.json
index 344a2d552ad5..4404b8e91690 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/exception.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/exception.json
@@ -1,47 +1,62 @@
[
{
- "ArchStdEvent": "EXC_TAKEN"
+ "ArchStdEvent": "EXC_TAKEN",
+ "PublicDescription": "Counts any taken architecturally visible exceptions such as IRQ, FIQ, SError, and other synchronous exceptions. Exceptions are counted whether or not they are taken locally."
},
{
- "ArchStdEvent": "MEMORY_ERROR"
+ "ArchStdEvent": "EXC_RETURN",
+ "PublicDescription": "Counts any architecturally executed exception return instructions. Eg: AArch64: ERET"
},
{
- "ArchStdEvent": "EXC_UNDEF"
+ "ArchStdEvent": "EXC_UNDEF",
+ "PublicDescription": "Counts the number of synchronous exceptions which are taken locally that are due to attempting to execute an instruction that is UNDEFINED. Attempting to execute instruction bit patterns that have not been allocated. Attempting to execute instructions when they are disabled. Attempting to execute instructions at an inappropriate Exception level. Attempting to execute an instruction when the value of PSTATE.IL is 1."
},
{
- "ArchStdEvent": "EXC_SVC"
+ "ArchStdEvent": "EXC_SVC",
+ "PublicDescription": "Counts SVC exceptions taken locally."
},
{
- "ArchStdEvent": "EXC_PABORT"
+ "ArchStdEvent": "EXC_PABORT",
+ "PublicDescription": "Counts synchronous exceptions that are taken locally and caused by Instruction Aborts."
},
{
- "ArchStdEvent": "EXC_DABORT"
+ "ArchStdEvent": "EXC_DABORT",
+ "PublicDescription": "Counts exceptions that are taken locally and are caused by data aborts or SErrors. Conditions that could cause those exceptions are attempting to read or write memory where the MMU generates a fault, attempting to read or write memory with a misaligned address, interrupts from the nSEI inputs and internally generated SErrors."
},
{
- "ArchStdEvent": "EXC_IRQ"
+ "ArchStdEvent": "EXC_IRQ",
+ "PublicDescription": "Counts IRQ exceptions including the virtual IRQs that are taken locally."
},
{
- "ArchStdEvent": "EXC_FIQ"
+ "ArchStdEvent": "EXC_FIQ",
+ "PublicDescription": "Counts FIQ exceptions including the virtual FIQs that are taken locally."
},
{
- "ArchStdEvent": "EXC_SMC"
+ "ArchStdEvent": "EXC_SMC",
+ "PublicDescription": "Counts SMC exceptions take to EL3."
},
{
- "ArchStdEvent": "EXC_HVC"
+ "ArchStdEvent": "EXC_HVC",
+ "PublicDescription": "Counts HVC exceptions taken to EL2."
},
{
- "ArchStdEvent": "EXC_TRAP_PABORT"
+ "ArchStdEvent": "EXC_TRAP_PABORT",
+ "PublicDescription": "Counts exceptions which are traps not taken locally and are caused by Instruction Aborts. For example, attempting to execute an instruction with a misaligned PC."
},
{
- "ArchStdEvent": "EXC_TRAP_DABORT"
+ "ArchStdEvent": "EXC_TRAP_DABORT",
+ "PublicDescription": "Counts exceptions which are traps not taken locally and are caused by Data Aborts or SError interrupts. Conditions that could cause those exceptions are:\n\n1. Attempting to read or write memory where the MMU generates a fault,\n2. Attempting to read or write memory with a misaligned address,\n3. Interrupts from the SEI input.\n4. internally generated SErrors."
},
{
- "ArchStdEvent": "EXC_TRAP_OTHER"
+ "ArchStdEvent": "EXC_TRAP_OTHER",
+ "PublicDescription": "Counts the number of synchronous trap exceptions which are not taken locally and are not SVC, SMC, HVC, data aborts, Instruction Aborts, or interrupts."
},
{
- "ArchStdEvent": "EXC_TRAP_IRQ"
+ "ArchStdEvent": "EXC_TRAP_IRQ",
+ "PublicDescription": "Counts IRQ exceptions including the virtual IRQs that are not taken locally."
},
{
- "ArchStdEvent": "EXC_TRAP_FIQ"
+ "ArchStdEvent": "EXC_TRAP_FIQ",
+ "PublicDescription": "Counts FIQs which are not taken locally but taken from EL0, EL1,\n or EL2 to EL3 (which would be the normal behavior for FIQs when not executing\n in EL3)."
}
]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/fp_operation.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/fp_operation.json
new file mode 100644
index 000000000000..cec3435ac766
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/fp_operation.json
@@ -0,0 +1,22 @@
+[
+ {
+ "ArchStdEvent": "FP_HP_SPEC",
+ "PublicDescription": "Counts speculatively executed half precision floating point operations."
+ },
+ {
+ "ArchStdEvent": "FP_SP_SPEC",
+ "PublicDescription": "Counts speculatively executed single precision floating point operations."
+ },
+ {
+ "ArchStdEvent": "FP_DP_SPEC",
+ "PublicDescription": "Counts speculatively executed double precision floating point operations."
+ },
+ {
+ "ArchStdEvent": "FP_SCALE_OPS_SPEC",
+ "PublicDescription": "Counts speculatively executed scalable single precision floating point operations."
+ },
+ {
+ "ArchStdEvent": "FP_FIXED_OPS_SPEC",
+ "PublicDescription": "Counts speculatively executed non-scalable single precision floating point operations."
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/general.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/general.json
new file mode 100644
index 000000000000..428810f855b8
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/general.json
@@ -0,0 +1,10 @@
+[
+ {
+ "ArchStdEvent": "CPU_CYCLES",
+ "PublicDescription": "Counts CPU clock cycles (not timer cycles). The clock measured by this event is defined as the physical clock driving the CPU logic."
+ },
+ {
+ "ArchStdEvent": "CNT_CYCLES",
+ "PublicDescription": "Counts constant frequency cycles"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/instruction.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/instruction.json
deleted file mode 100644
index e57cd55937c6..000000000000
--- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/instruction.json
+++ /dev/null
@@ -1,143 +0,0 @@
-[
- {
- "ArchStdEvent": "SW_INCR"
- },
- {
- "ArchStdEvent": "INST_RETIRED"
- },
- {
- "ArchStdEvent": "EXC_RETURN"
- },
- {
- "ArchStdEvent": "CID_WRITE_RETIRED"
- },
- {
- "ArchStdEvent": "INST_SPEC"
- },
- {
- "ArchStdEvent": "TTBR_WRITE_RETIRED"
- },
- {
- "ArchStdEvent": "BR_RETIRED"
- },
- {
- "ArchStdEvent": "BR_MIS_PRED_RETIRED"
- },
- {
- "ArchStdEvent": "OP_RETIRED"
- },
- {
- "ArchStdEvent": "OP_SPEC"
- },
- {
- "ArchStdEvent": "LDREX_SPEC"
- },
- {
- "ArchStdEvent": "STREX_PASS_SPEC"
- },
- {
- "ArchStdEvent": "STREX_FAIL_SPEC"
- },
- {
- "ArchStdEvent": "STREX_SPEC"
- },
- {
- "ArchStdEvent": "LD_SPEC"
- },
- {
- "ArchStdEvent": "ST_SPEC"
- },
- {
- "ArchStdEvent": "DP_SPEC"
- },
- {
- "ArchStdEvent": "ASE_SPEC"
- },
- {
- "ArchStdEvent": "VFP_SPEC"
- },
- {
- "ArchStdEvent": "PC_WRITE_SPEC"
- },
- {
- "ArchStdEvent": "CRYPTO_SPEC"
- },
- {
- "ArchStdEvent": "BR_IMMED_SPEC"
- },
- {
- "ArchStdEvent": "BR_RETURN_SPEC"
- },
- {
- "ArchStdEvent": "BR_INDIRECT_SPEC"
- },
- {
- "ArchStdEvent": "ISB_SPEC"
- },
- {
- "ArchStdEvent": "DSB_SPEC"
- },
- {
- "ArchStdEvent": "DMB_SPEC"
- },
- {
- "ArchStdEvent": "RC_LD_SPEC"
- },
- {
- "ArchStdEvent": "RC_ST_SPEC"
- },
- {
- "ArchStdEvent": "ASE_INST_SPEC"
- },
- {
- "ArchStdEvent": "SVE_INST_SPEC"
- },
- {
- "ArchStdEvent": "FP_HP_SPEC"
- },
- {
- "ArchStdEvent": "FP_SP_SPEC"
- },
- {
- "ArchStdEvent": "FP_DP_SPEC"
- },
- {
- "ArchStdEvent": "SVE_PRED_SPEC"
- },
- {
- "ArchStdEvent": "SVE_PRED_EMPTY_SPEC"
- },
- {
- "ArchStdEvent": "SVE_PRED_FULL_SPEC"
- },
- {
- "ArchStdEvent": "SVE_PRED_PARTIAL_SPEC"
- },
- {
- "ArchStdEvent": "SVE_PRED_NOT_FULL_SPEC"
- },
- {
- "ArchStdEvent": "SVE_LDFF_SPEC"
- },
- {
- "ArchStdEvent": "SVE_LDFF_FAULT_SPEC"
- },
- {
- "ArchStdEvent": "FP_SCALE_OPS_SPEC"
- },
- {
- "ArchStdEvent": "FP_FIXED_OPS_SPEC"
- },
- {
- "ArchStdEvent": "ASE_SVE_INT8_SPEC"
- },
- {
- "ArchStdEvent": "ASE_SVE_INT16_SPEC"
- },
- {
- "ArchStdEvent": "ASE_SVE_INT32_SPEC"
- },
- {
- "ArchStdEvent": "ASE_SVE_INT64_SPEC"
- }
-]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l1d_cache.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l1d_cache.json
new file mode 100644
index 000000000000..da7c129f2569
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l1d_cache.json
@@ -0,0 +1,54 @@
+[
+ {
+ "ArchStdEvent": "L1D_CACHE_REFILL",
+ "PublicDescription": "Counts level 1 data cache refills caused by speculatively executed load or store operations that missed in the level 1 data cache. This event only counts one event per cache line. This event does not count cache line allocations from preload instructions or from hardware cache prefetching."
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE",
+ "PublicDescription": "Counts level 1 data cache accesses from any load/store operations. Atomic operations that resolve in the CPUs caches (near atomic operations) count as both a write access and read access. Each access to a cache line is counted including the multiple accesses caused by single instructions such as LDM or STM. Each access to other level 1 data or unified memory structures, for example refill buffers, write buffers, and write-back buffers, are also counted."
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_WB",
+ "PublicDescription": "Counts write-backs of dirty data from the L1 data cache to the L2 cache. This occurs when either a dirty cache line is evicted from L1 data cache and allocated in the L2 cache or dirty data is written to the L2 and possibly to the next level of cache. This event counts both victim cache line evictions and cache write-backs from snoops or cache maintenance operations. The following cache operations are not counted:\n\n1. Invalidations which do not result in data being transferred out of the L1 (such as evictions of clean data),\n2. Full line writes which write to L2 without writing L1, such as write streaming mode."
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_LMISS_RD",
+ "PublicDescription": "Counts cache line refills into the level 1 data cache from any memory read operations, that incurred additional latency."
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_RD",
+ "PublicDescription": "Counts level 1 data cache accesses from any load operation. Atomic load operations that resolve in the CPUs caches count as both a write access and read access."
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_WR",
+ "PublicDescription": "Counts level 1 data cache accesses generated by store operations. This event also counts accesses caused by a DC ZVA (data cache zero, specified by virtual address) instruction. Near atomic operations that resolve in the CPUs caches count as a write access and read access."
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_REFILL_RD",
+ "PublicDescription": "Counts level 1 data cache refills caused by speculatively executed load instructions where the memory read operation misses in the level 1 data cache. This event only counts one event per cache line."
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_REFILL_WR",
+ "PublicDescription": "Counts level 1 data cache refills caused by speculatively executed store instructions where the memory write operation misses in the level 1 data cache. This event only counts one event per cache line."
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_REFILL_INNER",
+ "PublicDescription": "Counts level 1 data cache refills where the cache line data came from caches inside the immediate cluster of the core."
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_REFILL_OUTER",
+ "PublicDescription": "Counts level 1 data cache refills for which the cache line data came from outside the immediate cluster of the core, like an SLC in the system interconnect or DRAM."
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_WB_VICTIM",
+ "PublicDescription": "Counts dirty cache line evictions from the level 1 data cache caused by a new cache line allocation. This event does not count evictions caused by cache maintenance operations."
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_WB_CLEAN",
+ "PublicDescription": "Counts write-backs from the level 1 data cache that are a result of a coherency operation made by another CPU. Event count includes cache maintenance operations."
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_INVAL",
+ "PublicDescription": "Counts each explicit invalidation of a cache line in the level 1 data cache caused by:\n\n- Cache Maintenance Operations (CMO) that operate by a virtual address.\n- Broadcast cache coherency operations from another CPU in the system.\n\nThis event does not count for the following conditions:\n\n1. A cache refill invalidates a cache line.\n2. A CMO which is executed on that CPU and invalidates a cache line specified by set/way.\n\nNote that CMOs that operate by set/way cannot be broadcast from one CPU to another."
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l1i_cache.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l1i_cache.json
new file mode 100644
index 000000000000..633f1030359d
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l1i_cache.json
@@ -0,0 +1,14 @@
+[
+ {
+ "ArchStdEvent": "L1I_CACHE_REFILL",
+ "PublicDescription": "Counts cache line refills in the level 1 instruction cache caused by a missed instruction fetch. Instruction fetches may include accessing multiple instructions, but the single cache line allocation is counted once."
+ },
+ {
+ "ArchStdEvent": "L1I_CACHE",
+ "PublicDescription": "Counts instruction fetches which access the level 1 instruction cache. Instruction cache accesses caused by cache maintenance operations are not counted."
+ },
+ {
+ "ArchStdEvent": "L1I_CACHE_LMISS",
+ "PublicDescription": "Counts cache line refills into the level 1 instruction cache, that incurred additional latency."
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l2_cache.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l2_cache.json
new file mode 100644
index 000000000000..0e31d0daf88b
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l2_cache.json
@@ -0,0 +1,50 @@
+[
+ {
+ "ArchStdEvent": "L2D_CACHE",
+ "PublicDescription": "Counts level 2 cache accesses. level 2 cache is a unified cache for data and instruction accesses. Accesses are for misses in the first level caches or translation resolutions due to accesses. This event also counts write back of dirty data from level 1 data cache to the L2 cache."
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_REFILL",
+ "PublicDescription": "Counts cache line refills into the level 2 cache. level 2 cache is a unified cache for data and instruction accesses. Accesses are for misses in the level 1 caches or translation resolutions due to accesses."
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_WB",
+ "PublicDescription": "Counts write-backs of data from the L2 cache to outside the CPU. This includes snoops to the L2 (from other CPUs) which return data even if the snoops cause an invalidation. L2 cache line invalidations which do not write data outside the CPU and snoops which return data from an L1 cache are not counted. Data would not be written outside the cache when invalidating a clean cache line."
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_ALLOCATE",
+ "PublicDescription": "TBD"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_RD",
+ "PublicDescription": "Counts level 2 cache accesses due to memory read operations. level 2 cache is a unified cache for data and instruction accesses, accesses are for misses in the level 1 caches or translation resolutions due to accesses."
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_WR",
+ "PublicDescription": "Counts level 2 cache accesses due to memory write operations. level 2 cache is a unified cache for data and instruction accesses, accesses are for misses in the level 1 caches or translation resolutions due to accesses."
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_REFILL_RD",
+ "PublicDescription": "Counts refills for memory accesses due to memory read operation counted by L2D_CACHE_RD. level 2 cache is a unified cache for data and instruction accesses, accesses are for misses in the level 1 caches or translation resolutions due to accesses."
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_REFILL_WR",
+ "PublicDescription": "Counts refills for memory accesses due to memory write operation counted by L2D_CACHE_WR. level 2 cache is a unified cache for data and instruction accesses, accesses are for misses in the level 1 caches or translation resolutions due to accesses."
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_WB_VICTIM",
+ "PublicDescription": "Counts evictions from the level 2 cache because of a line being allocated into the L2 cache."
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_WB_CLEAN",
+ "PublicDescription": "Counts write-backs from the level 2 cache that are a result of either:\n\n1. Cache maintenance operations,\n\n2. Snoop responses or,\n\n3. Direct cache transfers to another CPU due to a forwarding snoop request."
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_INVAL",
+ "PublicDescription": "Counts each explicit invalidation of a cache line in the level 2 cache by cache maintenance operations that operate by a virtual address, or by external coherency operations. This event does not count if either:\n\n1. A cache refill invalidates a cache line or,\n2. A Cache Maintenance Operation (CMO), which invalidates a cache line specified by set/way, is executed on that CPU.\n\nCMOs that operate by set/way cannot be broadcast from one CPU to another."
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_LMISS_RD",
+ "PublicDescription": "Counts cache line refills into the level 2 unified cache from any memory read operations that incurred additional latency."
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l3_cache.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l3_cache.json
new file mode 100644
index 000000000000..45bfba532df7
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l3_cache.json
@@ -0,0 +1,22 @@
+[
+ {
+ "ArchStdEvent": "L3D_CACHE_ALLOCATE",
+ "PublicDescription": "Counts level 3 cache line allocates that do not fetch data from outside the level 3 data or unified cache. For example, allocates due to streaming stores."
+ },
+ {
+ "ArchStdEvent": "L3D_CACHE_REFILL",
+ "PublicDescription": "Counts level 3 accesses that receive data from outside the L3 cache."
+ },
+ {
+ "ArchStdEvent": "L3D_CACHE",
+ "PublicDescription": "Counts level 3 cache accesses. level 3 cache is a unified cache for data and instruction accesses. Accesses are for misses in the lower level caches or translation resolutions due to accesses."
+ },
+ {
+ "ArchStdEvent": "L3D_CACHE_RD",
+ "PublicDescription": "TBD"
+ },
+ {
+ "ArchStdEvent": "L3D_CACHE_LMISS_RD",
+ "PublicDescription": "Counts any cache line refill into the level 3 cache from memory read operations that incurred additional latency."
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/ll_cache.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/ll_cache.json
new file mode 100644
index 000000000000..bb712d57d58a
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/ll_cache.json
@@ -0,0 +1,10 @@
+[
+ {
+ "ArchStdEvent": "LL_CACHE_RD",
+ "PublicDescription": "Counts read transactions that were returned from outside the core cluster. This event counts when the system register CPUECTLR.EXTLLC bit is set. This event counts read transactions returned from outside the core if those transactions are either hit in the system level cache or missed in the SLC and are returned from any other external sources."
+ },
+ {
+ "ArchStdEvent": "LL_CACHE_MISS_RD",
+ "PublicDescription": "Counts read transactions that were returned from outside the core cluster but missed in the system level cache. This event counts when the system register CPUECTLR.EXTLLC bit is set. This event counts read transactions returned from outside the core if those transactions are missed in the System level Cache. The data source of the transaction is indicated by a field in the CHI transaction returning to the CPU. This event does not count reads caused by cache maintenance operations."
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/memory.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/memory.json
index 7b2b21ac150f..106a97f8b2e7 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/memory.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/memory.json
@@ -1,41 +1,46 @@
[
{
- "ArchStdEvent": "MEM_ACCESS"
+ "ArchStdEvent": "MEM_ACCESS",
+ "PublicDescription": "Counts memory accesses issued by the CPU load store unit, where those accesses are issued due to load or store operations. This event counts memory accesses no matter whether the data is received from any level of cache hierarchy or external memory. If memory accesses are broken up into smaller transactions than what were specified in the load or store instructions, then the event counts those smaller memory transactions."
},
{
- "ArchStdEvent": "REMOTE_ACCESS"
+ "ArchStdEvent": "MEMORY_ERROR",
+ "PublicDescription": "Counts any detected correctable or uncorrectable physical memory errors (ECC or parity) in protected CPUs RAMs. On the core, this event counts errors in the caches (including data and tag rams). Any detected memory error (from either a speculative and abandoned access, or an architecturally executed access) is counted. Note that errors are only detected when the actual protected memory is accessed by an operation."
},
{
- "ArchStdEvent": "MEM_ACCESS_RD"
+ "ArchStdEvent": "REMOTE_ACCESS",
+ "PublicDescription": "Counts accesses to another chip, which is implemented as a different CMN mesh in the system. If the CHI bus response back to the core indicates that the data source is from another chip (mesh), then the counter is updated. If no data is returned, even if the system snoops another chip/mesh, then the counter is not updated."
},
{
- "ArchStdEvent": "MEM_ACCESS_WR"
+ "ArchStdEvent": "MEM_ACCESS_RD",
+ "PublicDescription": "Counts memory accesses issued by the CPU due to load operations. The event counts any memory load access, no matter whether the data is received from any level of cache hierarchy or external memory. The event also counts atomic load operations. If memory accesses are broken up by the load/store unit into smaller transactions that are issued by the bus interface, then the event counts those smaller transactions."
},
{
- "ArchStdEvent": "UNALIGNED_LD_SPEC"
+ "ArchStdEvent": "MEM_ACCESS_WR",
+ "PublicDescription": "Counts memory accesses issued by the CPU due to store operations. The event counts any memory store access, no matter whether the data is located in any level of cache or external memory. The event also counts atomic load and store operations. If memory accesses are broken up by the load/store unit into smaller transactions that are issued by the bus interface, then the event counts those smaller transactions."
},
{
- "ArchStdEvent": "UNALIGNED_ST_SPEC"
+ "ArchStdEvent": "LDST_ALIGN_LAT",
+ "PublicDescription": "Counts the number of memory read and write accesses in a cycle that incurred additional latency, due to the alignment of the address and the size of data being accessed, which results in store crossing a single cache line."
},
{
- "ArchStdEvent": "UNALIGNED_LDST_SPEC"
+ "ArchStdEvent": "LD_ALIGN_LAT",
+ "PublicDescription": "Counts the number of memory read accesses in a cycle that incurred additional latency, due to the alignment of the address and size of data being accessed, which results in load crossing a single cache line."
},
{
- "ArchStdEvent": "LDST_ALIGN_LAT"
+ "ArchStdEvent": "ST_ALIGN_LAT",
+ "PublicDescription": "Counts the number of memory write access in a cycle that incurred additional latency, due to the alignment of the address and size of data being accessed incurred additional latency."
},
{
- "ArchStdEvent": "LD_ALIGN_LAT"
+ "ArchStdEvent": "MEM_ACCESS_CHECKED",
+ "PublicDescription": "Counts the number of memory read and write accesses in a cycle that are tag checked by the Memory Tagging Extension (MTE)."
},
{
- "ArchStdEvent": "ST_ALIGN_LAT"
+ "ArchStdEvent": "MEM_ACCESS_CHECKED_RD",
+ "PublicDescription": "Counts the number of memory read accesses in a cycle that are tag checked by the Memory Tagging Extension (MTE)."
},
{
- "ArchStdEvent": "MEM_ACCESS_CHECKED"
- },
- {
- "ArchStdEvent": "MEM_ACCESS_CHECKED_RD"
- },
- {
- "ArchStdEvent": "MEM_ACCESS_CHECKED_WR"
+ "ArchStdEvent": "MEM_ACCESS_CHECKED_WR",
+ "PublicDescription": "Counts the number of memory write accesses in a cycle that is tag checked by the Memory Tagging Extension (MTE)."
}
]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json
index 8ad15b726dca..5f449270b448 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json
@@ -1,272 +1,303 @@
[
{
- "ArchStdEvent": "FRONTEND_BOUND",
- "MetricExpr": "((stall_slot_frontend) if (#slots - 5) else (stall_slot_frontend - cpu_cycles)) / (#slots * cpu_cycles)"
+ "ArchStdEvent": "backend_bound",
+ "MetricExpr": "(100 * ((STALL_SLOT_BACKEND / (CPU_CYCLES * #slots)) - ((BR_MIS_PRED * 3) / CPU_CYCLES)))"
},
{
- "ArchStdEvent": "BAD_SPECULATION",
- "MetricExpr": "(1 - op_retired / op_spec) * (1 - (stall_slot if (#slots - 5) else (stall_slot - cpu_cycles)) / (#slots * cpu_cycles))"
+ "MetricName": "backend_stalled_cycles",
+ "MetricExpr": "((STALL_BACKEND / CPU_CYCLES) * 100)",
+ "BriefDescription": "This metric is the percentage of cycles that were stalled due to resource constraints in the backend unit of the processor.",
+ "MetricGroup": "Cycle_Accounting",
+ "ScaleUnit": "1percent of cycles"
},
{
- "ArchStdEvent": "RETIRING",
- "MetricExpr": "(op_retired / op_spec) * (1 - (stall_slot if (#slots - 5) else (stall_slot - cpu_cycles)) / (#slots * cpu_cycles))"
+ "ArchStdEvent": "bad_speculation",
+ "MetricExpr": "(100 * (((1 - (OP_RETIRED / OP_SPEC)) * (1 - (((STALL_SLOT) if (strcmp_cpuid_str(0x410fd493) | strcmp_cpuid_str(0x410fd490) ^ 1) else (STALL_SLOT - CPU_CYCLES)) / (CPU_CYCLES * #slots)))) + ((BR_MIS_PRED * 4) / CPU_CYCLES)))"
},
{
- "ArchStdEvent": "BACKEND_BOUND"
+ "MetricName": "branch_misprediction_ratio",
+ "MetricExpr": "(BR_MIS_PRED_RETIRED / BR_RETIRED)",
+ "BriefDescription": "This metric measures the ratio of branches mispredicted to the total number of branches architecturally executed. This gives an indication of the effectiveness of the branch prediction unit.",
+ "MetricGroup": "Miss_Ratio;Branch_Effectiveness",
+ "ScaleUnit": "1per branch"
},
{
- "MetricExpr": "L1D_TLB_REFILL / L1D_TLB",
- "BriefDescription": "The rate of L1D TLB refill to the overall L1D TLB lookups",
- "MetricGroup": "TLB",
- "MetricName": "l1d_tlb_miss_rate",
- "ScaleUnit": "100%"
+ "MetricName": "branch_mpki",
+ "MetricExpr": "((BR_MIS_PRED_RETIRED / INST_RETIRED) * 1000)",
+ "BriefDescription": "This metric measures the number of branch mispredictions per thousand instructions executed.",
+ "MetricGroup": "MPKI;Branch_Effectiveness",
+ "ScaleUnit": "1MPKI"
},
{
- "MetricExpr": "L1I_TLB_REFILL / L1I_TLB",
- "BriefDescription": "The rate of L1I TLB refill to the overall L1I TLB lookups",
- "MetricGroup": "TLB",
- "MetricName": "l1i_tlb_miss_rate",
- "ScaleUnit": "100%"
+ "MetricName": "branch_percentage",
+ "MetricExpr": "(((BR_IMMED_SPEC + BR_INDIRECT_SPEC) / INST_SPEC) * 100)",
+ "BriefDescription": "This metric measures branch operations as a percentage of operations speculatively executed.",
+ "MetricGroup": "Operation_Mix",
+ "ScaleUnit": "1percent of operations"
},
{
- "MetricExpr": "L2D_TLB_REFILL / L2D_TLB",
- "BriefDescription": "The rate of L2D TLB refill to the overall L2D TLB lookups",
- "MetricGroup": "TLB",
- "MetricName": "l2_tlb_miss_rate",
- "ScaleUnit": "100%"
+ "MetricName": "crypto_percentage",
+ "MetricExpr": "((CRYPTO_SPEC / INST_SPEC) * 100)",
+ "BriefDescription": "This metric measures crypto operations as a percentage of operations speculatively executed.",
+ "MetricGroup": "Operation_Mix",
+ "ScaleUnit": "1percent of operations"
},
{
- "MetricExpr": "DTLB_WALK / INST_RETIRED * 1000",
- "BriefDescription": "The rate of TLB Walks per kilo instructions for data accesses",
- "MetricGroup": "TLB",
"MetricName": "dtlb_mpki",
+ "MetricExpr": "((DTLB_WALK / INST_RETIRED) * 1000)",
+ "BriefDescription": "This metric measures the number of data TLB Walks per thousand instructions executed.",
+ "MetricGroup": "MPKI;DTLB_Effectiveness",
"ScaleUnit": "1MPKI"
},
{
- "MetricExpr": "DTLB_WALK / L1D_TLB",
- "BriefDescription": "The rate of DTLB Walks to the overall L1D TLB lookups",
- "MetricGroup": "TLB",
- "MetricName": "dtlb_walk_rate",
- "ScaleUnit": "100%"
+ "MetricName": "dtlb_walk_ratio",
+ "MetricExpr": "(DTLB_WALK / L1D_TLB)",
+ "BriefDescription": "This metric measures the ratio of data TLB Walks to the total number of data TLB accesses. This gives an indication of the effectiveness of the data TLB accesses.",
+ "MetricGroup": "Miss_Ratio;DTLB_Effectiveness",
+ "ScaleUnit": "1per TLB access"
},
{
- "MetricExpr": "ITLB_WALK / INST_RETIRED * 1000",
- "BriefDescription": "The rate of TLB Walks per kilo instructions for instruction accesses",
- "MetricGroup": "TLB",
- "MetricName": "itlb_mpki",
- "ScaleUnit": "1MPKI"
+ "ArchStdEvent": "frontend_bound",
+ "MetricExpr": "(100 * ((((STALL_SLOT_FRONTEND) if (strcmp_cpuid_str(0x410fd493) | strcmp_cpuid_str(0x410fd490) ^ 1) else (STALL_SLOT_FRONTEND - CPU_CYCLES)) / (CPU_CYCLES * #slots)) - (BR_MIS_PRED / CPU_CYCLES)))"
},
{
- "MetricExpr": "ITLB_WALK / L1I_TLB",
- "BriefDescription": "The rate of ITLB Walks to the overall L1I TLB lookups",
- "MetricGroup": "TLB",
- "MetricName": "itlb_walk_rate",
- "ScaleUnit": "100%"
+ "MetricName": "frontend_stalled_cycles",
+ "MetricExpr": "((STALL_FRONTEND / CPU_CYCLES) * 100)",
+ "BriefDescription": "This metric is the percentage of cycles that were stalled due to resource constraints in the frontend unit of the processor.",
+ "MetricGroup": "Cycle_Accounting",
+ "ScaleUnit": "1percent of cycles"
},
{
- "MetricExpr": "L1I_CACHE_REFILL / INST_RETIRED * 1000",
- "BriefDescription": "The rate of L1 I-Cache misses per kilo instructions",
- "MetricGroup": "Cache",
- "MetricName": "l1i_cache_mpki",
+ "MetricName": "integer_dp_percentage",
+ "MetricExpr": "((DP_SPEC / INST_SPEC) * 100)",
+ "BriefDescription": "This metric measures scalar integer operations as a percentage of operations speculatively executed.",
+ "MetricGroup": "Operation_Mix",
+ "ScaleUnit": "1percent of operations"
+ },
+ {
+ "MetricName": "ipc",
+ "MetricExpr": "(INST_RETIRED / CPU_CYCLES)",
+ "BriefDescription": "This metric measures the number of instructions retired per cycle.",
+ "MetricGroup": "General",
+ "ScaleUnit": "1per cycle"
+ },
+ {
+ "MetricName": "itlb_mpki",
+ "MetricExpr": "((ITLB_WALK / INST_RETIRED) * 1000)",
+ "BriefDescription": "This metric measures the number of instruction TLB Walks per thousand instructions executed.",
+ "MetricGroup": "MPKI;ITLB_Effectiveness",
"ScaleUnit": "1MPKI"
},
{
- "MetricExpr": "L1I_CACHE_REFILL / L1I_CACHE",
- "BriefDescription": "The rate of L1 I-Cache misses to the overall L1 I-Cache",
- "MetricGroup": "Cache",
- "MetricName": "l1i_cache_miss_rate",
- "ScaleUnit": "100%"
+ "MetricName": "itlb_walk_ratio",
+ "MetricExpr": "(ITLB_WALK / L1I_TLB)",
+ "BriefDescription": "This metric measures the ratio of instruction TLB Walks to the total number of instruction TLB accesses. This gives an indication of the effectiveness of the instruction TLB accesses.",
+ "MetricGroup": "Miss_Ratio;ITLB_Effectiveness",
+ "ScaleUnit": "1per TLB access"
+ },
+ {
+ "MetricName": "l1d_cache_miss_ratio",
+ "MetricExpr": "(L1D_CACHE_REFILL / L1D_CACHE)",
+ "BriefDescription": "This metric measures the ratio of level 1 data cache accesses missed to the total number of level 1 data cache accesses. This gives an indication of the effectiveness of the level 1 data cache.",
+ "MetricGroup": "Miss_Ratio;L1D_Cache_Effectiveness",
+ "ScaleUnit": "1per cache access"
},
{
- "MetricExpr": "L1D_CACHE_REFILL / INST_RETIRED * 1000",
- "BriefDescription": "The rate of L1 D-Cache misses per kilo instructions",
- "MetricGroup": "Cache",
"MetricName": "l1d_cache_mpki",
+ "MetricExpr": "((L1D_CACHE_REFILL / INST_RETIRED) * 1000)",
+ "BriefDescription": "This metric measures the number of level 1 data cache accesses missed per thousand instructions executed.",
+ "MetricGroup": "MPKI;L1D_Cache_Effectiveness",
"ScaleUnit": "1MPKI"
},
{
- "MetricExpr": "L1D_CACHE_REFILL / L1D_CACHE",
- "BriefDescription": "The rate of L1 D-Cache misses to the overall L1 D-Cache",
- "MetricGroup": "Cache",
- "MetricName": "l1d_cache_miss_rate",
- "ScaleUnit": "100%"
+ "MetricName": "l1d_tlb_miss_ratio",
+ "MetricExpr": "(L1D_TLB_REFILL / L1D_TLB)",
+ "BriefDescription": "This metric measures the ratio of level 1 data TLB accesses missed to the total number of level 1 data TLB accesses. This gives an indication of the effectiveness of the level 1 data TLB.",
+ "MetricGroup": "Miss_Ratio;DTLB_Effectiveness",
+ "ScaleUnit": "1per TLB access"
},
{
- "MetricExpr": "L2D_CACHE_REFILL / INST_RETIRED * 1000",
- "BriefDescription": "The rate of L2 D-Cache misses per kilo instructions",
- "MetricGroup": "Cache",
- "MetricName": "l2d_cache_mpki",
+ "MetricName": "l1d_tlb_mpki",
+ "MetricExpr": "((L1D_TLB_REFILL / INST_RETIRED) * 1000)",
+ "BriefDescription": "This metric measures the number of level 1 instruction TLB accesses missed per thousand instructions executed.",
+ "MetricGroup": "MPKI;DTLB_Effectiveness",
"ScaleUnit": "1MPKI"
},
{
- "MetricExpr": "L2D_CACHE_REFILL / L2D_CACHE",
- "BriefDescription": "The rate of L2 D-Cache misses to the overall L2 D-Cache",
- "MetricGroup": "Cache",
- "MetricName": "l2d_cache_miss_rate",
- "ScaleUnit": "100%"
+ "MetricName": "l1i_cache_miss_ratio",
+ "MetricExpr": "(L1I_CACHE_REFILL / L1I_CACHE)",
+ "BriefDescription": "This metric measures the ratio of level 1 instruction cache accesses missed to the total number of level 1 instruction cache accesses. This gives an indication of the effectiveness of the level 1 instruction cache.",
+ "MetricGroup": "Miss_Ratio;L1I_Cache_Effectiveness",
+ "ScaleUnit": "1per cache access"
},
{
- "MetricExpr": "L3D_CACHE_REFILL / INST_RETIRED * 1000",
- "BriefDescription": "The rate of L3 D-Cache misses per kilo instructions",
- "MetricGroup": "Cache",
- "MetricName": "l3d_cache_mpki",
+ "MetricName": "l1i_cache_mpki",
+ "MetricExpr": "((L1I_CACHE_REFILL / INST_RETIRED) * 1000)",
+ "BriefDescription": "This metric measures the number of level 1 instruction cache accesses missed per thousand instructions executed.",
+ "MetricGroup": "MPKI;L1I_Cache_Effectiveness",
"ScaleUnit": "1MPKI"
},
{
- "MetricExpr": "L3D_CACHE_REFILL / L3D_CACHE",
- "BriefDescription": "The rate of L3 D-Cache misses to the overall L3 D-Cache",
- "MetricGroup": "Cache",
- "MetricName": "l3d_cache_miss_rate",
- "ScaleUnit": "100%"
+ "MetricName": "l1i_tlb_miss_ratio",
+ "MetricExpr": "(L1I_TLB_REFILL / L1I_TLB)",
+ "BriefDescription": "This metric measures the ratio of level 1 instruction TLB accesses missed to the total number of level 1 instruction TLB accesses. This gives an indication of the effectiveness of the level 1 instruction TLB.",
+ "MetricGroup": "Miss_Ratio;ITLB_Effectiveness",
+ "ScaleUnit": "1per TLB access"
},
{
- "MetricExpr": "LL_CACHE_MISS_RD / INST_RETIRED * 1000",
- "BriefDescription": "The rate of LL Cache read misses per kilo instructions",
- "MetricGroup": "Cache",
- "MetricName": "ll_cache_read_mpki",
+ "MetricName": "l1i_tlb_mpki",
+ "MetricExpr": "((L1I_TLB_REFILL / INST_RETIRED) * 1000)",
+ "BriefDescription": "This metric measures the number of level 1 instruction TLB accesses missed per thousand instructions executed.",
+ "MetricGroup": "MPKI;ITLB_Effectiveness",
"ScaleUnit": "1MPKI"
},
{
- "MetricExpr": "LL_CACHE_MISS_RD / LL_CACHE_RD",
- "BriefDescription": "The rate of LL Cache read misses to the overall LL Cache read",
- "MetricGroup": "Cache",
- "MetricName": "ll_cache_read_miss_rate",
- "ScaleUnit": "100%"
+ "MetricName": "l2_cache_miss_ratio",
+ "MetricExpr": "(L2D_CACHE_REFILL / L2D_CACHE)",
+ "BriefDescription": "This metric measures the ratio of level 2 cache accesses missed to the total number of level 2 cache accesses. This gives an indication of the effectiveness of the level 2 cache, which is a unified cache that stores both data and instruction. Note that cache accesses in this cache are either data memory access or instruction fetch as this is a unified cache.",
+ "MetricGroup": "Miss_Ratio;L2_Cache_Effectiveness",
+ "ScaleUnit": "1per cache access"
},
{
- "MetricExpr": "(LL_CACHE_RD - LL_CACHE_MISS_RD) / LL_CACHE_RD",
- "BriefDescription": "The rate of LL Cache read hit to the overall LL Cache read",
- "MetricGroup": "Cache",
- "MetricName": "ll_cache_read_hit_rate",
- "ScaleUnit": "100%"
+ "MetricName": "l2_cache_mpki",
+ "MetricExpr": "((L2D_CACHE_REFILL / INST_RETIRED) * 1000)",
+ "BriefDescription": "This metric measures the number of level 2 unified cache accesses missed per thousand instructions executed. Note that cache accesses in this cache are either data memory access or instruction fetch as this is a unified cache.",
+ "MetricGroup": "MPKI;L2_Cache_Effectiveness",
+ "ScaleUnit": "1MPKI"
},
{
- "MetricExpr": "BR_MIS_PRED_RETIRED / INST_RETIRED * 1000",
- "BriefDescription": "The rate of branches mis-predicted per kilo instructions",
- "MetricGroup": "Branch",
- "MetricName": "branch_mpki",
+ "MetricName": "l2_tlb_miss_ratio",
+ "MetricExpr": "(L2D_TLB_REFILL / L2D_TLB)",
+ "BriefDescription": "This metric measures the ratio of level 2 unified TLB accesses missed to the total number of level 2 unified TLB accesses. This gives an indication of the effectiveness of the level 2 TLB.",
+ "MetricGroup": "Miss_Ratio;ITLB_Effectiveness;DTLB_Effectiveness",
+ "ScaleUnit": "1per TLB access"
+ },
+ {
+ "MetricName": "l2_tlb_mpki",
+ "MetricExpr": "((L2D_TLB_REFILL / INST_RETIRED) * 1000)",
+ "BriefDescription": "This metric measures the number of level 2 unified TLB accesses missed per thousand instructions executed.",
+ "MetricGroup": "MPKI;ITLB_Effectiveness;DTLB_Effectiveness",
"ScaleUnit": "1MPKI"
},
{
- "MetricExpr": "BR_RETIRED / INST_RETIRED * 1000",
- "BriefDescription": "The rate of branches retired per kilo instructions",
- "MetricGroup": "Branch",
- "MetricName": "branch_pki",
- "ScaleUnit": "1PKI"
+ "MetricName": "ll_cache_read_hit_ratio",
+ "MetricExpr": "((LL_CACHE_RD - LL_CACHE_MISS_RD) / LL_CACHE_RD)",
+ "BriefDescription": "This metric measures the ratio of last level cache read accesses hit in the cache to the total number of last level cache accesses. This gives an indication of the effectiveness of the last level cache for read traffic. Note that cache accesses in this cache are either data memory access or instruction fetch as this is a system level cache.",
+ "MetricGroup": "LL_Cache_Effectiveness",
+ "ScaleUnit": "1per cache access"
},
{
- "MetricExpr": "BR_MIS_PRED_RETIRED / BR_RETIRED",
- "BriefDescription": "The rate of branches mis-predited to the overall branches",
- "MetricGroup": "Branch",
- "MetricName": "branch_miss_pred_rate",
- "ScaleUnit": "100%"
+ "MetricName": "ll_cache_read_miss_ratio",
+ "MetricExpr": "(LL_CACHE_MISS_RD / LL_CACHE_RD)",
+ "BriefDescription": "This metric measures the ratio of last level cache read accesses missed to the total number of last level cache accesses. This gives an indication of the effectiveness of the last level cache for read traffic. Note that cache accesses in this cache are either data memory access or instruction fetch as this is a system level cache.",
+ "MetricGroup": "Miss_Ratio;LL_Cache_Effectiveness",
+ "ScaleUnit": "1per cache access"
},
{
- "MetricExpr": "instructions / CPU_CYCLES",
- "BriefDescription": "The average number of instructions executed for each cycle.",
- "MetricGroup": "PEutilization",
- "MetricName": "ipc"
+ "MetricName": "ll_cache_read_mpki",
+ "MetricExpr": "((LL_CACHE_MISS_RD / INST_RETIRED) * 1000)",
+ "BriefDescription": "This metric measures the number of last level cache read accesses missed per thousand instructions executed.",
+ "MetricGroup": "MPKI;LL_Cache_Effectiveness",
+ "ScaleUnit": "1MPKI"
},
{
- "MetricExpr": "ipc / 5",
- "BriefDescription": "IPC percentage of peak. The peak of IPC is 5.",
- "MetricGroup": "PEutilization",
- "MetricName": "ipc_rate",
- "ScaleUnit": "100%"
+ "MetricName": "load_percentage",
+ "MetricExpr": "((LD_SPEC / INST_SPEC) * 100)",
+ "BriefDescription": "This metric measures load operations as a percentage of operations speculatively executed.",
+ "MetricGroup": "Operation_Mix",
+ "ScaleUnit": "1percent of operations"
},
{
- "MetricExpr": "INST_RETIRED / CPU_CYCLES",
- "BriefDescription": "Architecturally executed Instructions Per Cycle (IPC)",
- "MetricGroup": "PEutilization",
- "MetricName": "retired_ipc"
+ "ArchStdEvent": "retiring",
+ "MetricExpr": "(100 * ((OP_RETIRED / OP_SPEC) * (1 - (((STALL_SLOT) if (strcmp_cpuid_str(0x410fd493) | strcmp_cpuid_str(0x410fd490) ^ 1) else (STALL_SLOT - CPU_CYCLES)) / (CPU_CYCLES * #slots)))))"
},
{
- "MetricExpr": "INST_SPEC / CPU_CYCLES",
- "BriefDescription": "Speculatively executed Instructions Per Cycle (IPC)",
- "MetricGroup": "PEutilization",
- "MetricName": "spec_ipc"
+ "MetricName": "scalar_fp_percentage",
+ "MetricExpr": "((VFP_SPEC / INST_SPEC) * 100)",
+ "BriefDescription": "This metric measures scalar floating point operations as a percentage of operations speculatively executed.",
+ "MetricGroup": "Operation_Mix",
+ "ScaleUnit": "1percent of operations"
},
{
- "MetricExpr": "OP_RETIRED / OP_SPEC",
- "BriefDescription": "Of all the micro-operations issued, what percentage are retired(committed)",
- "MetricGroup": "PEutilization",
- "MetricName": "retired_rate",
- "ScaleUnit": "100%"
+ "MetricName": "simd_percentage",
+ "MetricExpr": "((ASE_SPEC / INST_SPEC) * 100)",
+ "BriefDescription": "This metric measures advanced SIMD operations as a percentage of total operations speculatively executed.",
+ "MetricGroup": "Operation_Mix",
+ "ScaleUnit": "1percent of operations"
},
{
- "MetricExpr": "1 - OP_RETIRED / OP_SPEC",
- "BriefDescription": "Of all the micro-operations issued, what percentage are not retired(committed)",
- "MetricGroup": "PEutilization",
- "MetricName": "wasted_rate",
- "ScaleUnit": "100%"
+ "MetricName": "store_percentage",
+ "MetricExpr": "((ST_SPEC / INST_SPEC) * 100)",
+ "BriefDescription": "This metric measures store operations as a percentage of operations speculatively executed.",
+ "MetricGroup": "Operation_Mix",
+ "ScaleUnit": "1percent of operations"
},
{
- "MetricExpr": "OP_RETIRED / OP_SPEC * (1 - (STALL_SLOT if (#slots - 5) else (STALL_SLOT - CPU_CYCLES)) / (#slots * CPU_CYCLES))",
- "BriefDescription": "The truly effective ratio of micro-operations executed by the CPU, which means that misprediction and stall are not included",
- "MetricGroup": "PEutilization",
- "MetricName": "cpu_utilization",
- "ScaleUnit": "100%"
+ "MetricExpr": "L3D_CACHE_REFILL / INST_RETIRED * 1000",
+ "BriefDescription": "The rate of L3 D-Cache misses per kilo instructions",
+ "MetricGroup": "MPKI;L3_Cache_Effectiveness",
+ "MetricName": "l3d_cache_mpki",
+ "ScaleUnit": "1MPKI"
},
{
- "MetricExpr": "LD_SPEC / INST_SPEC",
- "BriefDescription": "The rate of load instructions speculatively executed to overall instructions speclatively executed",
- "MetricGroup": "InstructionMix",
- "MetricName": "load_spec_rate",
+ "MetricExpr": "L3D_CACHE_REFILL / L3D_CACHE",
+ "BriefDescription": "The rate of L3 D-Cache misses to the overall L3 D-Cache",
+ "MetricGroup": "Miss_Ratio;L3_Cache_Effectiveness",
+ "MetricName": "l3d_cache_miss_rate",
"ScaleUnit": "100%"
},
{
- "MetricExpr": "ST_SPEC / INST_SPEC",
- "BriefDescription": "The rate of store instructions speculatively executed to overall instructions speclatively executed",
- "MetricGroup": "InstructionMix",
- "MetricName": "store_spec_rate",
- "ScaleUnit": "100%"
+ "MetricExpr": "BR_RETIRED / INST_RETIRED * 1000",
+ "BriefDescription": "The rate of branches retired per kilo instructions",
+ "MetricGroup": "MPKI;Branch_Effectiveness",
+ "MetricName": "branch_pki",
+ "ScaleUnit": "1PKI"
},
{
- "MetricExpr": "DP_SPEC / INST_SPEC",
- "BriefDescription": "The rate of integer data-processing instructions speculatively executed to overall instructions speclatively executed",
- "MetricGroup": "InstructionMix",
- "MetricName": "data_process_spec_rate",
+ "MetricExpr": "ipc / #slots",
+ "BriefDescription": "IPC percentage of peak. The peak of IPC is the number of slots.",
+ "MetricGroup": "General",
+ "MetricName": "ipc_rate",
"ScaleUnit": "100%"
},
{
- "MetricExpr": "ASE_SPEC / INST_SPEC",
- "BriefDescription": "The rate of advanced SIMD instructions speculatively executed to overall instructions speclatively executed",
- "MetricGroup": "InstructionMix",
- "MetricName": "advanced_simd_spec_rate",
- "ScaleUnit": "100%"
+ "MetricExpr": "INST_SPEC / CPU_CYCLES",
+ "BriefDescription": "Speculatively executed Instructions Per Cycle (IPC)",
+ "MetricGroup": "General",
+ "MetricName": "spec_ipc"
},
{
- "MetricExpr": "VFP_SPEC / INST_SPEC",
- "BriefDescription": "The rate of floating point instructions speculatively executed to overall instructions speclatively executed",
- "MetricGroup": "InstructionMix",
- "MetricName": "float_point_spec_rate",
+ "MetricExpr": "OP_RETIRED / OP_SPEC",
+ "BriefDescription": "Of all the micro-operations issued, what percentage are retired(committed)",
+ "MetricGroup": "General",
+ "MetricName": "retired_rate",
"ScaleUnit": "100%"
},
{
- "MetricExpr": "CRYPTO_SPEC / INST_SPEC",
- "BriefDescription": "The rate of crypto instructions speculatively executed to overall instructions speclatively executed",
- "MetricGroup": "InstructionMix",
- "MetricName": "crypto_spec_rate",
+ "MetricExpr": "1 - OP_RETIRED / OP_SPEC",
+ "BriefDescription": "Of all the micro-operations issued, what percentage are not retired(committed)",
+ "MetricGroup": "General",
+ "MetricName": "wasted_rate",
"ScaleUnit": "100%"
},
{
"MetricExpr": "BR_IMMED_SPEC / INST_SPEC",
- "BriefDescription": "The rate of branch immediate instructions speculatively executed to overall instructions speclatively executed",
- "MetricGroup": "InstructionMix",
+ "BriefDescription": "The rate of branch immediate instructions speculatively executed to overall instructions speculatively executed",
+ "MetricGroup": "Operation_Mix",
"MetricName": "branch_immed_spec_rate",
"ScaleUnit": "100%"
},
{
"MetricExpr": "BR_RETURN_SPEC / INST_SPEC",
- "BriefDescription": "The rate of procedure return instructions speculatively executed to overall instructions speclatively executed",
- "MetricGroup": "InstructionMix",
+ "BriefDescription": "The rate of procedure return instructions speculatively executed to overall instructions speculatively executed",
+ "MetricGroup": "Operation_Mix",
"MetricName": "branch_return_spec_rate",
"ScaleUnit": "100%"
},
{
"MetricExpr": "BR_INDIRECT_SPEC / INST_SPEC",
- "BriefDescription": "The rate of indirect branch instructions speculatively executed to overall instructions speclatively executed",
- "MetricGroup": "InstructionMix",
+ "BriefDescription": "The rate of indirect branch instructions speculatively executed to overall instructions speculatively executed",
+ "MetricGroup": "Operation_Mix",
"MetricName": "branch_indirect_spec_rate",
"ScaleUnit": "100%"
}
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/pipeline.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/pipeline.json
deleted file mode 100644
index f9fae15f7555..000000000000
--- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/pipeline.json
+++ /dev/null
@@ -1,23 +0,0 @@
-[
- {
- "ArchStdEvent": "STALL_FRONTEND"
- },
- {
- "ArchStdEvent": "STALL_BACKEND"
- },
- {
- "ArchStdEvent": "STALL"
- },
- {
- "ArchStdEvent": "STALL_SLOT_BACKEND"
- },
- {
- "ArchStdEvent": "STALL_SLOT_FRONTEND"
- },
- {
- "ArchStdEvent": "STALL_SLOT"
- },
- {
- "ArchStdEvent": "STALL_BACKEND_MEM"
- }
-]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/retired.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/retired.json
new file mode 100644
index 000000000000..f297b049b62f
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/retired.json
@@ -0,0 +1,30 @@
+[
+ {
+ "ArchStdEvent": "SW_INCR",
+ "PublicDescription": "Counts software writes to the PMSWINC_EL0 (software PMU increment) register. The PMSWINC_EL0 register is a manually updated counter for use by application software.\n\nThis event could be used to measure any user program event, such as accesses to a particular data structure (by writing to the PMSWINC_EL0 register each time the data structure is accessed).\n\nTo use the PMSWINC_EL0 register and event, developers must insert instructions that write to the PMSWINC_EL0 register into the source code.\n\nSince the SW_INCR event records writes to the PMSWINC_EL0 register, there is no need to do a read/increment/write sequence to the PMSWINC_EL0 register."
+ },
+ {
+ "ArchStdEvent": "INST_RETIRED",
+ "PublicDescription": "Counts instructions that have been architecturally executed."
+ },
+ {
+ "ArchStdEvent": "CID_WRITE_RETIRED",
+ "PublicDescription": "Counts architecturally executed writes to the CONTEXTIDR register, which usually contain the kernel PID and can be output with hardware trace."
+ },
+ {
+ "ArchStdEvent": "TTBR_WRITE_RETIRED",
+ "PublicDescription": "Counts architectural writes to TTBR0/1_EL1. If virtualization host extensions are enabled (by setting the HCR_EL2.E2H bit to 1), then accesses to TTBR0/1_EL1 that are redirected to TTBR0/1_EL2, or accesses to TTBR0/1_EL12, are counted. TTBRn registers are typically updated when the kernel is swapping user-space threads or applications."
+ },
+ {
+ "ArchStdEvent": "BR_RETIRED",
+ "PublicDescription": "Counts architecturally executed branches, whether the branch is taken or not. Instructions that explicitly write to the PC are also counted."
+ },
+ {
+ "ArchStdEvent": "BR_MIS_PRED_RETIRED",
+ "PublicDescription": "Counts branches counted by BR_RETIRED which were mispredicted and caused a pipeline flush."
+ },
+ {
+ "ArchStdEvent": "OP_RETIRED",
+ "PublicDescription": "Counts micro-operations that are architecturally executed. This is a count of number of micro-operations retired from the commit queue in a single cycle."
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/spe.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/spe.json
index 20f2165c85fe..5de8b0f3a440 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/spe.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/spe.json
@@ -1,14 +1,18 @@
[
{
- "ArchStdEvent": "SAMPLE_POP"
+ "ArchStdEvent": "SAMPLE_POP",
+ "PublicDescription": "Counts statistical profiling sample population, the count of all operations that could be sampled but may or may not be chosen for sampling."
},
{
- "ArchStdEvent": "SAMPLE_FEED"
+ "ArchStdEvent": "SAMPLE_FEED",
+ "PublicDescription": "Counts statistical profiling samples taken for sampling."
},
{
- "ArchStdEvent": "SAMPLE_FILTRATE"
+ "ArchStdEvent": "SAMPLE_FILTRATE",
+ "PublicDescription": "Counts statistical profiling samples taken which are not removed by filtering."
},
{
- "ArchStdEvent": "SAMPLE_COLLISION"
+ "ArchStdEvent": "SAMPLE_COLLISION",
+ "PublicDescription": "Counts statistical profiling samples that have collided with a previous sample and so therefore not taken."
}
]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/spec_operation.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/spec_operation.json
new file mode 100644
index 000000000000..1af961f8a6c8
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/spec_operation.json
@@ -0,0 +1,110 @@
+[
+ {
+ "ArchStdEvent": "BR_MIS_PRED",
+ "PublicDescription": "Counts branches which are speculatively executed and mispredicted."
+ },
+ {
+ "ArchStdEvent": "BR_PRED",
+ "PublicDescription": "Counts branches speculatively executed and were predicted right."
+ },
+ {
+ "ArchStdEvent": "INST_SPEC",
+ "PublicDescription": "Counts operations that have been speculatively executed."
+ },
+ {
+ "ArchStdEvent": "OP_SPEC",
+ "PublicDescription": "Counts micro-operations speculatively executed. This is the count of the number of micro-operations dispatched in a cycle."
+ },
+ {
+ "ArchStdEvent": "UNALIGNED_LD_SPEC",
+ "PublicDescription": "Counts unaligned memory read operations issued by the CPU. This event counts unaligned accesses (as defined by the actual instruction), even if they are subsequently issued as multiple aligned accesses. The event does not count preload operations (PLD, PLI)."
+ },
+ {
+ "ArchStdEvent": "UNALIGNED_ST_SPEC",
+ "PublicDescription": "Counts unaligned memory write operations issued by the CPU. This event counts unaligned accesses (as defined by the actual instruction), even if they are subsequently issued as multiple aligned accesses."
+ },
+ {
+ "ArchStdEvent": "UNALIGNED_LDST_SPEC",
+ "PublicDescription": "Counts unaligned memory operations issued by the CPU. This event counts unaligned accesses (as defined by the actual instruction), even if they are subsequently issued as multiple aligned accesses."
+ },
+ {
+ "ArchStdEvent": "LDREX_SPEC",
+ "PublicDescription": "Counts Load-Exclusive operations that have been speculatively executed. Eg: LDREX, LDX"
+ },
+ {
+ "ArchStdEvent": "STREX_PASS_SPEC",
+ "PublicDescription": "Counts store-exclusive operations that have been speculatively executed and have successfully completed the store operation."
+ },
+ {
+ "ArchStdEvent": "STREX_FAIL_SPEC",
+ "PublicDescription": "Counts store-exclusive operations that have been speculatively executed and have not successfully completed the store operation."
+ },
+ {
+ "ArchStdEvent": "STREX_SPEC",
+ "PublicDescription": "Counts store-exclusive operations that have been speculatively executed."
+ },
+ {
+ "ArchStdEvent": "LD_SPEC",
+ "PublicDescription": "Counts speculatively executed load operations including Single Instruction Multiple Data (SIMD) load operations."
+ },
+ {
+ "ArchStdEvent": "ST_SPEC",
+ "PublicDescription": "Counts speculatively executed store operations including Single Instruction Multiple Data (SIMD) store operations."
+ },
+ {
+ "ArchStdEvent": "DP_SPEC",
+ "PublicDescription": "Counts speculatively executed logical or arithmetic instructions such as MOV/MVN operations."
+ },
+ {
+ "ArchStdEvent": "ASE_SPEC",
+ "PublicDescription": "Counts speculatively executed Advanced SIMD operations excluding load, store and move micro-operations that move data to or from SIMD (vector) registers."
+ },
+ {
+ "ArchStdEvent": "VFP_SPEC",
+ "PublicDescription": "Counts speculatively executed floating point operations. This event does not count operations that move data to or from floating point (vector) registers."
+ },
+ {
+ "ArchStdEvent": "PC_WRITE_SPEC",
+ "PublicDescription": "Counts speculatively executed operations which cause software changes of the PC. Those operations include all taken branch operations."
+ },
+ {
+ "ArchStdEvent": "CRYPTO_SPEC",
+ "PublicDescription": "Counts speculatively executed cryptographic operations except for PMULL and VMULL operations."
+ },
+ {
+ "ArchStdEvent": "BR_IMMED_SPEC",
+ "PublicDescription": "Counts immediate branch operations which are speculatively executed."
+ },
+ {
+ "ArchStdEvent": "BR_RETURN_SPEC",
+ "PublicDescription": "Counts procedure return operations (RET) which are speculatively executed."
+ },
+ {
+ "ArchStdEvent": "BR_INDIRECT_SPEC",
+ "PublicDescription": "Counts indirect branch operations including procedure returns, which are speculatively executed. This includes operations that force a software change of the PC, other than exception-generating operations. Eg: BR Xn, RET"
+ },
+ {
+ "ArchStdEvent": "ISB_SPEC",
+ "PublicDescription": "Counts ISB operations that are executed."
+ },
+ {
+ "ArchStdEvent": "DSB_SPEC",
+ "PublicDescription": "Counts DSB operations that are speculatively issued to Load/Store unit in the CPU."
+ },
+ {
+ "ArchStdEvent": "DMB_SPEC",
+ "PublicDescription": "Counts DMB operations that are speculatively issued to the Load/Store unit in the CPU. This event does not count implied barriers from load acquire/store release operations."
+ },
+ {
+ "ArchStdEvent": "RC_LD_SPEC",
+ "PublicDescription": "Counts any load acquire operations that are speculatively executed. Eg: LDAR, LDARH, LDARB"
+ },
+ {
+ "ArchStdEvent": "RC_ST_SPEC",
+ "PublicDescription": "Counts any store release operations that are speculatively executed. Eg: STLR, STLRH, STLRB'"
+ },
+ {
+ "ArchStdEvent": "ASE_INST_SPEC",
+ "PublicDescription": "Counts speculatively executed Advanced SIMD operations."
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/stall.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/stall.json
new file mode 100644
index 000000000000..bbbebc805034
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/stall.json
@@ -0,0 +1,30 @@
+[
+ {
+ "ArchStdEvent": "STALL_FRONTEND",
+ "PublicDescription": "Counts cycles when frontend could not send any micro-operations to the rename stage because of frontend resource stalls caused by fetch memory latency or branch prediction flow stalls. All the frontend slots were empty during the cycle when this event counts."
+ },
+ {
+ "ArchStdEvent": "STALL_BACKEND",
+ "PublicDescription": "Counts cycles whenever the rename unit is unable to send any micro-operations to the backend of the pipeline because of backend resource constraints. Backend resource constraints can include issue stage fullness, execution stage fullness, or other internal pipeline resource fullness. All the backend slots were empty during the cycle when this event counts."
+ },
+ {
+ "ArchStdEvent": "STALL",
+ "PublicDescription": "Counts cycles when no operations are sent to the rename unit from the frontend or from the rename unit to the backend for any reason (either frontend or backend stall)."
+ },
+ {
+ "ArchStdEvent": "STALL_SLOT_BACKEND",
+ "PublicDescription": "Counts slots per cycle in which no operations are sent from the rename unit to the backend due to backend resource constraints."
+ },
+ {
+ "ArchStdEvent": "STALL_SLOT_FRONTEND",
+ "PublicDescription": "Counts slots per cycle in which no operations are sent to the rename unit from the frontend due to frontend resource constraints."
+ },
+ {
+ "ArchStdEvent": "STALL_SLOT",
+ "PublicDescription": "Counts slots per cycle in which no operations are sent to the rename unit from the frontend or from the rename unit to the backend for any reason (either frontend or backend stall)."
+ },
+ {
+ "ArchStdEvent": "STALL_BACKEND_MEM",
+ "PublicDescription": "Counts cycles when the backend is stalled because there is a pending demand load request in progress in the last level core cache."
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/sve.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/sve.json
new file mode 100644
index 000000000000..51dab48cb2ba
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/sve.json
@@ -0,0 +1,50 @@
+[
+ {
+ "ArchStdEvent": "SVE_INST_SPEC",
+ "PublicDescription": "Counts speculatively executed operations that are SVE operations."
+ },
+ {
+ "ArchStdEvent": "SVE_PRED_SPEC",
+ "PublicDescription": "Counts speculatively executed predicated SVE operations."
+ },
+ {
+ "ArchStdEvent": "SVE_PRED_EMPTY_SPEC",
+ "PublicDescription": "Counts speculatively executed predicated SVE operations with no active predicate elements."
+ },
+ {
+ "ArchStdEvent": "SVE_PRED_FULL_SPEC",
+ "PublicDescription": "Counts speculatively executed predicated SVE operations with all predicate elements active."
+ },
+ {
+ "ArchStdEvent": "SVE_PRED_PARTIAL_SPEC",
+ "PublicDescription": "Counts speculatively executed predicated SVE operations with at least one but not all active predicate elements."
+ },
+ {
+ "ArchStdEvent": "SVE_PRED_NOT_FULL_SPEC",
+ "PublicDescription": "Counts speculatively executed predicated SVE operations with at least one non active predicate elements."
+ },
+ {
+ "ArchStdEvent": "SVE_LDFF_SPEC",
+ "PublicDescription": "Counts speculatively executed SVE first fault or non-fault load operations."
+ },
+ {
+ "ArchStdEvent": "SVE_LDFF_FAULT_SPEC",
+ "PublicDescription": "Counts speculatively executed SVE first fault or non-fault load operations that clear at least one bit in the FFR."
+ },
+ {
+ "ArchStdEvent": "ASE_SVE_INT8_SPEC",
+ "PublicDescription": "Counts speculatively executed Advanced SIMD or SVE integer operations with the largest data type an 8-bit integer."
+ },
+ {
+ "ArchStdEvent": "ASE_SVE_INT16_SPEC",
+ "PublicDescription": "Counts speculatively executed Advanced SIMD or SVE integer operations with the largest data type a 16-bit integer."
+ },
+ {
+ "ArchStdEvent": "ASE_SVE_INT32_SPEC",
+ "PublicDescription": "Counts speculatively executed Advanced SIMD or SVE integer operations with the largest data type a 32-bit integer."
+ },
+ {
+ "ArchStdEvent": "ASE_SVE_INT64_SPEC",
+ "PublicDescription": "Counts speculatively executed Advanced SIMD or SVE integer operations with the largest data type a 64-bit integer."
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/tlb.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/tlb.json
new file mode 100644
index 000000000000..b550af1831f5
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/tlb.json
@@ -0,0 +1,66 @@
+[
+ {
+ "ArchStdEvent": "L1I_TLB_REFILL",
+ "PublicDescription": "Counts level 1 instruction TLB refills from any Instruction fetch. If there are multiple misses in the TLB that are resolved by the refill, then this event only counts once. This event will not count if the translation table walk results in a fault (such as a translation or access fault), since there is no new translation created for the TLB."
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_REFILL",
+ "PublicDescription": "Counts level 1 data TLB accesses that resulted in TLB refills. If there are multiple misses in the TLB that are resolved by the refill, then this event only counts once. This event counts for refills caused by preload instructions or hardware prefetch accesses. This event counts regardless of whether the miss hits in L2 or results in a translation table walk. This event will not count if the translation table walk results in a fault (such as a translation or access fault), since there is no new translation created for the TLB. This event will not count on an access from an AT(address translation) instruction."
+ },
+ {
+ "ArchStdEvent": "L1D_TLB",
+ "PublicDescription": "Counts level 1 data TLB accesses caused by any memory load or store operation. Note that load or store instructions can be broken up into multiple memory operations. This event does not count TLB maintenance operations."
+ },
+ {
+ "ArchStdEvent": "L1I_TLB",
+ "PublicDescription": "Counts level 1 instruction TLB accesses, whether the access hits or misses in the TLB. This event counts both demand accesses and prefetch or preload generated accesses."
+ },
+ {
+ "ArchStdEvent": "L2D_TLB_REFILL",
+ "PublicDescription": "Counts level 2 TLB refills caused by memory operations from both data and instruction fetch, except for those caused by TLB maintenance operations and hardware prefetches."
+ },
+ {
+ "ArchStdEvent": "L2D_TLB",
+ "PublicDescription": "Counts level 2 TLB accesses except those caused by TLB maintenance operations."
+ },
+ {
+ "ArchStdEvent": "DTLB_WALK",
+ "PublicDescription": "Counts data memory translation table walks caused by a miss in the L2 TLB driven by a memory access. Note that partial translations that also cause a table walk are counted. This event does not count table walks caused by TLB maintenance operations."
+ },
+ {
+ "ArchStdEvent": "ITLB_WALK",
+ "PublicDescription": "Counts instruction memory translation table walks caused by a miss in the L2 TLB driven by a memory access. Partial translations that also cause a table walk are counted. This event does not count table walks caused by TLB maintenance operations."
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_REFILL_RD",
+ "PublicDescription": "Counts level 1 data TLB refills caused by memory read operations. If there are multiple misses in the TLB that are resolved by the refill, then this event only counts once. This event counts for refills caused by preload instructions or hardware prefetch accesses. This event counts regardless of whether the miss hits in L2 or results in a translation table walk. This event will not count if the translation table walk results in a fault (such as a translation or access fault), since there is no new translation created for the TLB. This event will not count on an access from an Address Translation (AT) instruction."
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_REFILL_WR",
+ "PublicDescription": "Counts level 1 data TLB refills caused by data side memory write operations. If there are multiple misses in the TLB that are resolved by the refill, then this event only counts once. This event counts for refills caused by preload instructions or hardware prefetch accesses. This event counts regardless of whether the miss hits in L2 or results in a translation table walk. This event will not count if the table walk results in a fault (such as a translation or access fault), since there is no new translation created for the TLB. This event will not count with an access from an Address Translation (AT) instruction."
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_RD",
+ "PublicDescription": "Counts level 1 data TLB accesses caused by memory read operations. This event counts whether the access hits or misses in the TLB. This event does not count TLB maintenance operations."
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_WR",
+ "PublicDescription": "Counts any L1 data side TLB accesses caused by memory write operations. This event counts whether the access hits or misses in the TLB. This event does not count TLB maintenance operations."
+ },
+ {
+ "ArchStdEvent": "L2D_TLB_REFILL_RD",
+ "PublicDescription": "Counts level 2 TLB refills caused by memory read operations from both data and instruction fetch except for those caused by TLB maintenance operations or hardware prefetches."
+ },
+ {
+ "ArchStdEvent": "L2D_TLB_REFILL_WR",
+ "PublicDescription": "Counts level 2 TLB refills caused by memory write operations from both data and instruction fetch except for those caused by TLB maintenance operations."
+ },
+ {
+ "ArchStdEvent": "L2D_TLB_RD",
+ "PublicDescription": "Counts level 2 TLB accesses caused by memory read operations from both data and instruction fetch except for those caused by TLB maintenance operations."
+ },
+ {
+ "ArchStdEvent": "L2D_TLB_WR",
+ "PublicDescription": "Counts level 2 TLB accesses caused by memory write operations from both data and instruction fetch except for those caused by TLB maintenance operations."
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/trace.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/trace.json
index 3116135c59e2..98f6fabfebc7 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/trace.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/trace.json
@@ -1,29 +1,38 @@
[
{
- "ArchStdEvent": "TRB_WRAP"
+ "ArchStdEvent": "TRB_WRAP",
+ "PublicDescription": "This event is generated each time the current write pointer is wrapped to the base pointer."
},
{
- "ArchStdEvent": "TRCEXTOUT0"
+ "ArchStdEvent": "TRCEXTOUT0",
+ "PublicDescription": "This event is generated each time an event is signaled by ETE external event 0."
},
{
- "ArchStdEvent": "TRCEXTOUT1"
+ "ArchStdEvent": "TRCEXTOUT1",
+ "PublicDescription": "This event is generated each time an event is signaled by ETE external event 1."
},
{
- "ArchStdEvent": "TRCEXTOUT2"
+ "ArchStdEvent": "TRCEXTOUT2",
+ "PublicDescription": "This event is generated each time an event is signaled by ETE external event 2."
},
{
- "ArchStdEvent": "TRCEXTOUT3"
+ "ArchStdEvent": "TRCEXTOUT3",
+ "PublicDescription": "This event is generated each time an event is signaled by ETE external event 3."
},
{
- "ArchStdEvent": "CTI_TRIGOUT4"
+ "ArchStdEvent": "CTI_TRIGOUT4",
+ "PublicDescription": "This event is generated each time an event is signaled on CTI output trigger 4."
},
{
- "ArchStdEvent": "CTI_TRIGOUT5"
+ "ArchStdEvent": "CTI_TRIGOUT5",
+ "PublicDescription": "This event is generated each time an event is signaled on CTI output trigger 5."
},
{
- "ArchStdEvent": "CTI_TRIGOUT6"
+ "ArchStdEvent": "CTI_TRIGOUT6",
+ "PublicDescription": "This event is generated each time an event is signaled on CTI output trigger 6."
},
{
- "ArchStdEvent": "CTI_TRIGOUT7"
+ "ArchStdEvent": "CTI_TRIGOUT7",
+ "PublicDescription": "This event is generated each time an event is signaled on CTI output trigger 7."
}
]
diff --git a/tools/perf/pmu-events/arch/arm64/freescale/yitian710/sys/ali_drw.json b/tools/perf/pmu-events/arch/arm64/freescale/yitian710/sys/ali_drw.json
new file mode 100644
index 000000000000..e21c469a8ef0
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/freescale/yitian710/sys/ali_drw.json
@@ -0,0 +1,373 @@
+[
+ {
+ "BriefDescription": "A Write or Read Op at HIF interface. The unit is 64B.",
+ "ConfigCode": "0x0",
+ "EventName": "hif_rd_or_wr",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A Write Op at HIF interface. The unit is 64B.",
+ "ConfigCode": "0x1",
+ "EventName": "hif_wr",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A Read Op at HIF interface. The unit is 64B.",
+ "ConfigCode": "0x2",
+ "EventName": "hif_rd",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A Read-Modify-Write Op at HIF interface. The unit is 64B.",
+ "ConfigCode": "0x3",
+ "EventName": "hif_rmw",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A high priority Read at HIF interface. The unit is 64B.",
+ "ConfigCode": "0x4",
+ "EventName": "hif_hi_pri_rd",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A write data cycle at DFI interface (to DRAM).",
+ "ConfigCode": "0x7",
+ "EventName": "dfi_wr_data_cycles",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A read data cycle at DFI interface (to DRAM).",
+ "ConfigCode": "0x8",
+ "EventName": "dfi_rd_data_cycles",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A high priority read becomes critical.",
+ "ConfigCode": "0x9",
+ "EventName": "hpr_xact_when_critical",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A low priority read becomes critical.",
+ "ConfigCode": "0xA",
+ "EventName": "lpr_xact_when_critical",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A write becomes critical.",
+ "ConfigCode": "0xB",
+ "EventName": "wr_xact_when_critical",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "An Activate(ACT) command to DRAM.",
+ "ConfigCode": "0xC",
+ "EventName": "op_is_activate",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A Read or Write CAS command to DRAM.",
+ "ConfigCode": "0xD",
+ "EventName": "op_is_rd_or_wr",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "An Activate(ACT) command for read to DRAM.",
+ "ConfigCode": "0xE",
+ "EventName": "op_is_rd_activate",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A Read CAS command to DRAM.",
+ "ConfigCode": "0xF",
+ "EventName": "op_is_rd",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A Write CAS command to DRAM.",
+ "ConfigCode": "0x10",
+ "EventName": "op_is_wr",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A Masked Write command to DRAM.",
+ "ConfigCode": "0x11",
+ "EventName": "op_is_mwr",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A Precharge(PRE) command to DRAM.",
+ "ConfigCode": "0x12",
+ "EventName": "op_is_precharge",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A Precharge(PRE) required by read or write.",
+ "ConfigCode": "0x13",
+ "EventName": "precharge_for_rdwr",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A Precharge(PRE) required by other conditions.",
+ "ConfigCode": "0x14",
+ "EventName": "precharge_for_other",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A read-write turnaround.",
+ "ConfigCode": "0x15",
+ "EventName": "rdwr_transitions",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A write combine(merge) in write data buffer.",
+ "ConfigCode": "0x16",
+ "EventName": "write_combine",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A Write-After-Read hazard.",
+ "ConfigCode": "0x17",
+ "EventName": "war_hazard",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A Read-After-Write hazard.",
+ "ConfigCode": "0x18",
+ "EventName": "raw_hazard",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A Write-After-Write hazard.",
+ "ConfigCode": "0x19",
+ "EventName": "waw_hazard",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "Rank0 enters self-refresh(SRE).",
+ "ConfigCode": "0x1A",
+ "EventName": "op_is_enter_selfref_rk0",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "Rank1 enters self-refresh(SRE).",
+ "ConfigCode": "0x1B",
+ "EventName": "op_is_enter_selfref_rk1",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "Rank2 enters self-refresh(SRE).",
+ "ConfigCode": "0x1C",
+ "EventName": "op_is_enter_selfref_rk2",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "Rank3 enters self-refresh(SRE).",
+ "ConfigCode": "0x1D",
+ "EventName": "op_is_enter_selfref_rk3",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "Rank0 enters power-down(PDE).",
+ "ConfigCode": "0x1E",
+ "EventName": "op_is_enter_powerdown_rk0",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "Rank1 enters power-down(PDE).",
+ "ConfigCode": "0x1F",
+ "EventName": "op_is_enter_powerdown_rk1",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "Rank2 enters power-down(PDE).",
+ "ConfigCode": "0x20",
+ "EventName": "op_is_enter_powerdown_rk2",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "Rank3 enters power-down(PDE).",
+ "ConfigCode": "0x21",
+ "EventName": "op_is_enter_powerdown_rk3",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A cycle that Rank0 stays in self-refresh mode.",
+ "ConfigCode": "0x26",
+ "EventName": "selfref_mode_rk0",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A cycle that Rank1 stays in self-refresh mode.",
+ "ConfigCode": "0x27",
+ "EventName": "selfref_mode_rk1",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A cycle that Rank2 stays in self-refresh mode.",
+ "ConfigCode": "0x28",
+ "EventName": "selfref_mode_rk2",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A cycle that Rank3 stays in self-refresh mode.",
+ "ConfigCode": "0x29",
+ "EventName": "selfref_mode_rk3",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "An auto-refresh(REF) command to DRAM.",
+ "ConfigCode": "0x2A",
+ "EventName": "op_is_refresh",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A critical auto-refresh(REF) command to DRAM.",
+ "ConfigCode": "0x2B",
+ "EventName": "op_is_crit_ref",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "An MRR or MRW command to DRAM.",
+ "ConfigCode": "0x2D",
+ "EventName": "op_is_load_mode",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A ZQCal command to DRAM.",
+ "ConfigCode": "0x2E",
+ "EventName": "op_is_zqcl",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "At least one entry in read queue reaches the visible window limit.",
+ "ConfigCode": "0x30",
+ "EventName": "visible_window_limit_reached_rd",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "At least one entry in write queue reaches the visible window limit.",
+ "ConfigCode": "0x31",
+ "EventName": "visible_window_limit_reached_wr",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A DQS Oscillator MPC command to DRAM.",
+ "ConfigCode": "0x34",
+ "EventName": "op_is_dqsosc_mpc",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A DQS Oscillator MRR command to DRAM.",
+ "ConfigCode": "0x35",
+ "EventName": "op_is_dqsosc_mrr",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A Temperature Compensated Refresh(TCR) MRR command to DRAM.",
+ "ConfigCode": "0x36",
+ "EventName": "op_is_tcr_mrr",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A ZQCal Start command to DRAM.",
+ "ConfigCode": "0x37",
+ "EventName": "op_is_zqstart",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A ZQCal Latch command to DRAM.",
+ "ConfigCode": "0x38",
+ "EventName": "op_is_zqlatch",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A packet at CHI TXREQ interface (request).",
+ "ConfigCode": "0x39",
+ "EventName": "chi_txreq",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A packet at CHI TXDAT interface (read data).",
+ "ConfigCode": "0x3A",
+ "EventName": "chi_txdat",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A packet at CHI RXDAT interface (write data).",
+ "ConfigCode": "0x3B",
+ "EventName": "chi_rxdat",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A packet at CHI RXRSP interface.",
+ "ConfigCode": "0x3C",
+ "EventName": "chi_rxrsp",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "A violation detected in TZC.",
+ "ConfigCode": "0x3D",
+ "EventName": "tsz_vio",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "BriefDescription": "The ddr cycles.",
+ "ConfigCode": "0x80",
+ "EventName": "ddr_cycles",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/freescale/yitian710/sys/metrics.json b/tools/perf/pmu-events/arch/arm64/freescale/yitian710/sys/metrics.json
new file mode 100644
index 000000000000..bc865b374b6a
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/freescale/yitian710/sys/metrics.json
@@ -0,0 +1,20 @@
+[
+ {
+ "MetricName": "ddr_read_bandwidth.all",
+ "BriefDescription": "The ddr read bandwidth(MB/s).",
+ "MetricGroup": "ali_drw",
+ "MetricExpr": "hif_rd * 64 / 1e6 / duration_time",
+ "ScaleUnit": "1MB/s",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ },
+ {
+ "MetricName": "ddr_write_bandwidth.all",
+ "BriefDescription": "The ddr write bandwidth(MB/s).",
+ "MetricGroup": "ali_drw",
+ "MetricExpr": "(hif_wr + hif_rmw) * 64 / 1e6 / duration_time",
+ "ScaleUnit": "1MB/s",
+ "Unit": "ali_drw",
+ "Compat": "ali_drw_pmu"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/sbsa.json b/tools/perf/pmu-events/arch/arm64/sbsa.json
index f90b338261ac..4eed79a28f6e 100644
--- a/tools/perf/pmu-events/arch/arm64/sbsa.json
+++ b/tools/perf/pmu-events/arch/arm64/sbsa.json
@@ -1,34 +1,34 @@
[
{
- "MetricExpr": "stall_slot_frontend / (#slots * cpu_cycles)",
- "BriefDescription": "Frontend bound L1 topdown metric",
+ "MetricExpr": "100 * (stall_slot_frontend / (#slots * cpu_cycles))",
+ "BriefDescription": "This metric is the percentage of total slots that were stalled due to resource constraints in the frontend of the processor.",
"DefaultMetricgroupName": "TopdownL1",
"MetricGroup": "Default;TopdownL1",
"MetricName": "frontend_bound",
- "ScaleUnit": "100%"
+ "ScaleUnit": "1percent of slots"
},
{
- "MetricExpr": "(1 - op_retired / op_spec) * (1 - stall_slot / (#slots * cpu_cycles))",
- "BriefDescription": "Bad speculation L1 topdown metric",
+ "MetricExpr": "100 * ((1 - op_retired / op_spec) * (1 - stall_slot / (#slots * cpu_cycles)))",
+ "BriefDescription": "This metric is the percentage of total slots that executed operations and didn't retire due to a pipeline flush.\nThis indicates cycles that were utilized but inefficiently.",
"DefaultMetricgroupName": "TopdownL1",
"MetricGroup": "Default;TopdownL1",
"MetricName": "bad_speculation",
- "ScaleUnit": "100%"
+ "ScaleUnit": "1percent of slots"
},
{
- "MetricExpr": "(op_retired / op_spec) * (1 - stall_slot / (#slots * cpu_cycles))",
- "BriefDescription": "Retiring L1 topdown metric",
+ "MetricExpr": "100 * ((op_retired / op_spec) * (1 - stall_slot / (#slots * cpu_cycles)))",
+ "BriefDescription": "This metric is the percentage of total slots that retired operations, which indicates cycles that were utilized efficiently.",
"DefaultMetricgroupName": "TopdownL1",
"MetricGroup": "Default;TopdownL1",
"MetricName": "retiring",
- "ScaleUnit": "100%"
+ "ScaleUnit": "1percent of slots"
},
{
- "MetricExpr": "stall_slot_backend / (#slots * cpu_cycles)",
- "BriefDescription": "Backend Bound L1 topdown metric",
+ "MetricExpr": "100 * (stall_slot_backend / (#slots * cpu_cycles))",
+ "BriefDescription": "This metric is the percentage of total slots that were stalled due to resource constraints in the backend of the processor.",
"DefaultMetricgroupName": "TopdownL1",
"MetricGroup": "Default;TopdownL1",
"MetricName": "backend_bound",
- "ScaleUnit": "100%"
+ "ScaleUnit": "1percent of slots"
}
]
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/cache.json b/tools/perf/pmu-events/arch/powerpc/power10/cache.json
index 605be14f441c..839ae26945fb 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/cache.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/cache.json
@@ -1,53 +1,8 @@
[
{
- "EventCode": "0x1003C",
- "EventName": "PM_EXEC_STALL_DMISS_L2L3",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from either the local L2 or local L3."
- },
- {
- "EventCode": "0x1E054",
- "EventName": "PM_EXEC_STALL_DMISS_L21_L31",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from another core's L2 or L3 on the same chip."
- },
- {
- "EventCode": "0x34054",
- "EventName": "PM_EXEC_STALL_DMISS_L2L3_NOCONFLICT",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, without a dispatch conflict."
- },
- {
- "EventCode": "0x34056",
- "EventName": "PM_EXEC_STALL_LOAD_FINISH",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was finishing a load after its data was reloaded from a data source beyond the local L1; cycles in which the LSU was processing an L1-hit; cycles in which the NTF instruction merged with another load in the LMQ; cycles in which the NTF instruction is waiting for a data reload for a load miss, but the data comes back with a non-NTF instruction."
- },
- {
- "EventCode": "0x3006C",
- "EventName": "PM_RUN_CYC_SMT2_MODE",
- "BriefDescription": "Cycles when this thread's run latch is set and the core is in SMT2 mode."
- },
- {
"EventCode": "0x300F4",
"EventName": "PM_RUN_INST_CMPL_CONC",
- "BriefDescription": "PowerPC instructions completed by this thread when all threads in the core had the run-latch set."
- },
- {
- "EventCode": "0x4C016",
- "EventName": "PM_EXEC_STALL_DMISS_L2L3_CONFLICT",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, with a dispatch conflict."
- },
- {
- "EventCode": "0x4D014",
- "EventName": "PM_EXEC_STALL_LOAD",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a load instruction executing in the Load Store Unit."
- },
- {
- "EventCode": "0x4D016",
- "EventName": "PM_EXEC_STALL_PTESYNC",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a PTESYNC instruction executing in the Load Store Unit."
- },
- {
- "EventCode": "0x401EA",
- "EventName": "PM_THRESH_EXC_128",
- "BriefDescription": "Threshold counter exceeded a value of 128."
+ "BriefDescription": "PowerPC instruction completed by this thread when all threads in the core had the run-latch set."
},
{
"EventCode": "0x400F6",
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json b/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json
index 54acb55e2c8c..e816cd10c129 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json
@@ -1,7 +1,67 @@
[
{
- "EventCode": "0x4016E",
- "EventName": "PM_THRESH_NOT_MET",
- "BriefDescription": "Threshold counter did not meet threshold."
+ "EventCode": "0x100F4",
+ "EventName": "PM_FLOP_CMPL",
+ "BriefDescription": "Floating Point Operations Completed. Includes any type. It counts once for each 1, 2, 4 or 8 flop instruction. Use PM_1|2|4|8_FLOP_CMPL events to count flops."
+ },
+ {
+ "EventCode": "0x45050",
+ "EventName": "PM_1FLOP_CMPL",
+ "BriefDescription": "One floating point instruction completed (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg)."
+ },
+ {
+ "EventCode": "0x45052",
+ "EventName": "PM_4FLOP_CMPL",
+ "BriefDescription": "Four floating point instruction completed (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg)."
+ },
+ {
+ "EventCode": "0x45054",
+ "EventName": "PM_FMA_CMPL",
+ "BriefDescription": "Two floating point instruction completed (FMA class of instructions: fmadd, fnmadd, fmsub, fnmsub). Scalar instructions only."
+ },
+ {
+ "EventCode": "0x45056",
+ "EventName": "PM_SCALAR_FLOP_CMPL",
+ "BriefDescription": "Scalar floating point instruction completed."
+ },
+ {
+ "EventCode": "0x4505A",
+ "EventName": "PM_SP_FLOP_CMPL",
+ "BriefDescription": "Single Precision floating point instruction completed."
+ },
+ {
+ "EventCode": "0x4505C",
+ "EventName": "PM_MATH_FLOP_CMPL",
+ "BriefDescription": "Math floating point instruction completed."
+ },
+ {
+ "EventCode": "0x4D052",
+ "EventName": "PM_2FLOP_CMPL",
+ "BriefDescription": "Double Precision vector version of fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg completed."
+ },
+ {
+ "EventCode": "0x4D054",
+ "EventName": "PM_8FLOP_CMPL",
+ "BriefDescription": "Four Double Precision vector instruction completed."
+ },
+ {
+ "EventCode": "0x4D056",
+ "EventName": "PM_NON_FMA_FLOP_CMPL",
+ "BriefDescription": "Non FMA instruction completed."
+ },
+ {
+ "EventCode": "0x4D058",
+ "EventName": "PM_VECTOR_FLOP_CMPL",
+ "BriefDescription": "Vector floating point instruction completed."
+ },
+ {
+ "EventCode": "0x4D05A",
+ "EventName": "PM_NON_MATH_FLOP_CMPL",
+ "BriefDescription": "Non Math instruction completed."
+ },
+ {
+ "EventCode": "0x4D05C",
+ "EventName": "PM_DPP_FLOP_CMPL",
+ "BriefDescription": "Double-Precision or Quad-Precision instruction completed."
}
]
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/frontend.json b/tools/perf/pmu-events/arch/powerpc/power10/frontend.json
index 558f9530f54e..5977f5e64212 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/frontend.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/frontend.json
@@ -1,43 +1,13 @@
[
{
- "EventCode": "0x10004",
- "EventName": "PM_EXEC_STALL_TRANSLATION",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered a TLB miss or ERAT miss and waited for it to resolve."
+ "EventCode": "0x1D054",
+ "EventName": "PM_DTLB_HIT_2M",
+ "BriefDescription": "Data TLB hit (DERAT reload) page size 2M. Implies radix translation. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "0x10006",
- "EventName": "PM_DISP_STALL_HELD_OTHER_CYC",
- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch for any other reason."
- },
- {
- "EventCode": "0x10010",
- "EventName": "PM_PMC4_OVERFLOW",
- "BriefDescription": "The event selected for PMC4 caused the event counter to overflow."
- },
- {
- "EventCode": "0x10020",
- "EventName": "PM_PMC4_REWIND",
- "BriefDescription": "The speculative event selected for PMC4 rewinds and the counter for PMC4 is not charged."
- },
- {
- "EventCode": "0x10038",
- "EventName": "PM_DISP_STALL_TRANSLATION",
- "BriefDescription": "Cycles when dispatch was stalled for this thread because the MMU was handling a translation miss."
- },
- {
- "EventCode": "0x1003A",
- "EventName": "PM_DISP_STALL_BR_MPRED_IC_L2",
- "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2 after suffering a branch mispredict."
- },
- {
- "EventCode": "0x1D05E",
- "EventName": "PM_DISP_STALL_HELD_HALT_CYC",
- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because of power management."
- },
- {
- "EventCode": "0x1E050",
- "EventName": "PM_DISP_STALL_HELD_STF_MAPPER_CYC",
- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the STF mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR."
+ "EventCode": "0x1D058",
+ "EventName": "PM_ITLB_HIT_64K",
+ "BriefDescription": "Instruction TLB hit (IERAT reload) page size 64K. When MMCR1[17]=0 this event counts only for demand misses. When MMCR1[17]=1 this event includes demand misses and prefetches."
},
{
"EventCode": "0x1F054",
@@ -45,21 +15,6 @@
"BriefDescription": "The PTE required by the instruction was resident in the TLB (data TLB access). When MMCR1[16]=0 this event counts only demand hits. When MMCR1[16]=1 this event includes demand and prefetch. Applies to both HPT and RPT."
},
{
- "EventCode": "0x10064",
- "EventName": "PM_DISP_STALL_IC_L2",
- "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2."
- },
- {
- "EventCode": "0x101E8",
- "EventName": "PM_THRESH_EXC_256",
- "BriefDescription": "Threshold counter exceeded a count of 256."
- },
- {
- "EventCode": "0x101EC",
- "EventName": "PM_THRESH_MET",
- "BriefDescription": "Threshold exceeded."
- },
- {
"EventCode": "0x100F2",
"EventName": "PM_1PLUS_PPC_CMPL",
"BriefDescription": "Cycles in which at least one instruction is completed by this thread."
@@ -67,57 +22,7 @@
{
"EventCode": "0x100F6",
"EventName": "PM_IERAT_MISS",
- "BriefDescription": "IERAT Reloaded to satisfy an IERAT miss. All page sizes are counted by this event."
- },
- {
- "EventCode": "0x100F8",
- "EventName": "PM_DISP_STALL_CYC",
- "BriefDescription": "Cycles the ICT has no itags assigned to this thread (no instructions were dispatched during these cycles)."
- },
- {
- "EventCode": "0x20006",
- "EventName": "PM_DISP_STALL_HELD_ISSQ_FULL_CYC",
- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch due to Issue queue full. Includes issue queue and branch queue."
- },
- {
- "EventCode": "0x20114",
- "EventName": "PM_MRK_L2_RC_DISP",
- "BriefDescription": "Marked instruction RC dispatched in L2."
- },
- {
- "EventCode": "0x2C010",
- "EventName": "PM_EXEC_STALL_LSU",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the Load Store Unit. This does not include simple fixed point instructions."
- },
- {
- "EventCode": "0x2C016",
- "EventName": "PM_DISP_STALL_IERAT_ONLY_MISS",
- "BriefDescription": "Cycles when dispatch was stalled while waiting to resolve an instruction ERAT miss."
- },
- {
- "EventCode": "0x2C01E",
- "EventName": "PM_DISP_STALL_BR_MPRED_IC_L3",
- "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L3 after suffering a branch mispredict."
- },
- {
- "EventCode": "0x2D01A",
- "EventName": "PM_DISP_STALL_IC_MISS",
- "BriefDescription": "Cycles when dispatch was stalled for this thread due to an Icache Miss."
- },
- {
- "EventCode": "0x2E018",
- "EventName": "PM_DISP_STALL_FETCH",
- "BriefDescription": "Cycles when dispatch was stalled for this thread because Fetch was being held."
- },
- {
- "EventCode": "0x2E01A",
- "EventName": "PM_DISP_STALL_HELD_XVFC_MAPPER_CYC",
- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the XVFC mapper/SRB was full."
- },
- {
- "EventCode": "0x2C142",
- "EventName": "PM_MRK_XFER_FROM_SRC_PMC2",
- "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[15:27]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
+ "BriefDescription": "IERAT Reloaded to satisfy an IERAT miss. All page sizes are counted by this event. This event only counts instruction demand access."
},
{
"EventCode": "0x24050",
@@ -135,11 +40,6 @@
"BriefDescription": "Branch Taken instruction completed."
},
{
- "EventCode": "0x30004",
- "EventName": "PM_DISP_STALL_FLUSH",
- "BriefDescription": "Cycles when dispatch was stalled because of a flush that happened to an instruction(s) that was not yet NTC. PM_EXEC_STALL_NTC_FLUSH only includes instructions that were flushed after becoming NTC."
- },
- {
"EventCode": "0x3000A",
"EventName": "PM_DISP_STALL_ITLB_MISS",
"BriefDescription": "Cycles when dispatch was stalled while waiting to resolve an instruction TLB miss."
@@ -150,59 +50,24 @@
"BriefDescription": "The instruction that was next to complete (oldest in the pipeline) did not complete because it suffered a flush."
},
{
- "EventCode": "0x30014",
- "EventName": "PM_EXEC_STALL_STORE",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a store instruction executing in the Load Store Unit."
- },
- {
- "EventCode": "0x30018",
- "EventName": "PM_DISP_STALL_HELD_SCOREBOARD_CYC",
- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch while waiting on the Scoreboard. This event combines VSCR and FPSCR together."
- },
- {
- "EventCode": "0x30026",
- "EventName": "PM_EXEC_STALL_STORE_MISS",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a store whose cache line was not resident in the L1 and was waiting for allocation of the missing line into the L1."
- },
- {
- "EventCode": "0x3012A",
- "EventName": "PM_MRK_L2_RC_DONE",
- "BriefDescription": "L2 RC machine completed the transaction for the marked instruction."
- },
- {
"EventCode": "0x3F046",
"EventName": "PM_ITLB_HIT_1G",
"BriefDescription": "Instruction TLB hit (IERAT reload) page size 1G, which implies Radix Page Table translation is in use. When MMCR1[17]=0 this event counts only for demand misses. When MMCR1[17]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "0x34058",
- "EventName": "PM_DISP_STALL_BR_MPRED_ICMISS",
- "BriefDescription": "Cycles when dispatch was stalled after a mispredicted branch resulted in an instruction cache miss."
- },
- {
- "EventCode": "0x3D05C",
- "EventName": "PM_DISP_STALL_HELD_RENAME_CYC",
- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR and XVFC."
- },
- {
- "EventCode": "0x3E052",
- "EventName": "PM_DISP_STALL_IC_L3",
- "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L3."
+ "EventCode": "0x3C05A",
+ "EventName": "PM_DTLB_HIT_64K",
+ "BriefDescription": "Data TLB hit (DERAT reload) page size 64K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
"EventCode": "0x3E054",
"EventName": "PM_LD_MISS_L1",
- "BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load."
- },
- {
- "EventCode": "0x301EA",
- "EventName": "PM_THRESH_EXC_1024",
- "BriefDescription": "Threshold counter exceeded a value of 1024."
+ "BriefDescription": "Load missed L1, counted at finish time. LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load."
},
{
"EventCode": "0x300FA",
"EventName": "PM_INST_FROM_L3MISS",
- "BriefDescription": "The processor's instruction cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss."
+ "BriefDescription": "The processor's instruction cache was reloaded from beyond the local core's L3 due to a demand miss."
},
{
"EventCode": "0x40006",
@@ -210,38 +75,18 @@
"BriefDescription": "Cycles in which an instruction or group of instructions were cancelled after being issued. This event increments once per occurrence, regardless of how many instructions are included in the issue group."
},
{
- "EventCode": "0x40116",
- "EventName": "PM_MRK_LARX_FIN",
- "BriefDescription": "Marked load and reserve instruction (LARX) finished. LARX and STCX are instructions used to acquire a lock."
- },
- {
- "EventCode": "0x4C010",
- "EventName": "PM_DISP_STALL_BR_MPRED_IC_L3MISS",
- "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from sources beyond the local L3 after suffering a mispredicted branch."
- },
- {
- "EventCode": "0x4D01E",
- "EventName": "PM_DISP_STALL_BR_MPRED",
- "BriefDescription": "Cycles when dispatch was stalled for this thread due to a mispredicted branch."
- },
- {
- "EventCode": "0x4E010",
- "EventName": "PM_DISP_STALL_IC_L3MISS",
- "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from any source beyond the local L3."
- },
- {
- "EventCode": "0x4E01A",
- "EventName": "PM_DISP_STALL_HELD_CYC",
- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch for any reason."
+ "EventCode": "0x44056",
+ "EventName": "PM_VECTOR_ST_CMPL",
+ "BriefDescription": "Vector store instruction completed."
},
{
- "EventCode": "0x4003C",
- "EventName": "PM_DISP_STALL_HELD_SYNC_CYC",
- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because of a synchronizing instruction that requires the ICT to be empty before dispatch."
+ "EventCode": "0x4E054",
+ "EventName": "PM_DTLB_HIT_1G",
+ "BriefDescription": "Data TLB hit (DERAT reload) page size 1G. Implies radix translation. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "0x44056",
- "EventName": "PM_VECTOR_ST_CMPL",
- "BriefDescription": "Vector store instructions completed."
+ "EventCode": "0x400FC",
+ "EventName": "PM_ITLB_MISS",
+ "BriefDescription": "Instruction TLB reload (after a miss), all page sizes. Includes only demand misses."
}
]
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/marked.json b/tools/perf/pmu-events/arch/powerpc/power10/marked.json
index 58b5dfe3a273..78f71a9eadfd 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/marked.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/marked.json
@@ -1,15 +1,35 @@
[
{
- "EventCode": "0x1002C",
- "EventName": "PM_LD_PREFETCH_CACHE_LINE_MISS",
- "BriefDescription": "The L1 cache was reloaded with a line that fulfills a prefetch request."
- },
- {
"EventCode": "0x10132",
"EventName": "PM_MRK_INST_ISSUED",
"BriefDescription": "Marked instruction issued. Note that stores always get issued twice, the address gets issued to the LSU and the data gets issued to the VSU. Also, issues can sometimes get killed/cancelled and cause multiple sequential issues for the same instruction."
},
{
+ "EventCode": "0x10134",
+ "EventName": "PM_MRK_ST_DONE_L2",
+ "BriefDescription": "Marked store completed in L2."
+ },
+ {
+ "EventCode": "0x1C142",
+ "EventName": "PM_MRK_XFER_FROM_SRC_PMC1",
+ "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[0:12]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
+ },
+ {
+ "EventCode": "0x1C144",
+ "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC1",
+ "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[0:12]."
+ },
+ {
+ "EventCode": "0x1D15C",
+ "EventName": "PM_MRK_DTLB_MISS_1G",
+ "BriefDescription": "Marked Data TLB reload (after a miss) page size 1G. Implies radix translation was used. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
+ },
+ {
+ "EventCode": "0x1F150",
+ "EventName": "PM_MRK_ST_L2_CYC",
+ "BriefDescription": "Cycles from L2 RC dispatch to L2 RC completion."
+ },
+ {
"EventCode": "0x101E0",
"EventName": "PM_MRK_INST_DISP",
"BriefDescription": "The thread has dispatched a randomly sampled marked instruction."
@@ -20,14 +40,39 @@
"BriefDescription": "Marked Branch Taken instruction completed."
},
{
- "EventCode": "0x20112",
- "EventName": "PM_MRK_NTF_FIN",
- "BriefDescription": "The marked instruction became the oldest in the pipeline before it finished. It excludes instructions that finish at dispatch."
+ "EventCode": "0x101E4",
+ "EventName": "PM_MRK_L1_ICACHE_MISS",
+ "BriefDescription": "Marked instruction suffered an instruction cache miss."
+ },
+ {
+ "EventCode": "0x101EA",
+ "EventName": "PM_MRK_L1_RELOAD_VALID",
+ "BriefDescription": "Marked demand reload."
},
{
- "EventCode": "0x2C01C",
- "EventName": "PM_EXEC_STALL_DMISS_OFF_CHIP",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a remote chip."
+ "EventCode": "0x20114",
+ "EventName": "PM_MRK_L2_RC_DISP",
+ "BriefDescription": "Marked instruction RC dispatched in L2."
+ },
+ {
+ "EventCode": "0x2011C",
+ "EventName": "PM_MRK_NTF_CYC",
+ "BriefDescription": "Cycles in which the marked instruction is the oldest in the pipeline (next-to-finish or next-to-complete)."
+ },
+ {
+ "EventCode": "0x20130",
+ "EventName": "PM_MRK_INST_DECODED",
+ "BriefDescription": "An instruction was marked at decode time. Random Instruction Sampling (RIS) only."
+ },
+ {
+ "EventCode": "0x20132",
+ "EventName": "PM_MRK_DFU_ISSUE",
+ "BriefDescription": "The marked instruction was a decimal floating point operation issued to the VSU. Measured at issue time."
+ },
+ {
+ "EventCode": "0x20134",
+ "EventName": "PM_MRK_FXU_ISSUE",
+ "BriefDescription": "The marked instruction was a fixed point operation issued to the VSU. Measured at issue time."
},
{
"EventCode": "0x20138",
@@ -40,6 +85,16 @@
"BriefDescription": "Marked Branch instruction finished."
},
{
+ "EventCode": "0x2013C",
+ "EventName": "PM_MRK_FX_LSU_FIN",
+ "BriefDescription": "The marked instruction was simple fixed point that was issued to the store unit. Measured at finish time."
+ },
+ {
+ "EventCode": "0x2C142",
+ "EventName": "PM_MRK_XFER_FROM_SRC_PMC2",
+ "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[15:27]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
+ },
+ {
"EventCode": "0x2C144",
"EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC2",
"BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[15:27]."
@@ -60,19 +115,54 @@
"BriefDescription": "A marked branch completed. All branches are included."
},
{
- "EventCode": "0x200FD",
- "EventName": "PM_L1_ICACHE_MISS",
- "BriefDescription": "Demand iCache Miss."
+ "EventCode": "0x2D154",
+ "EventName": "PM_MRK_DERAT_MISS_64K",
+ "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 64K for a marked instruction. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
+ },
+ {
+ "EventCode": "0x201E0",
+ "EventName": "PM_MRK_DATA_FROM_MEMORY",
+ "BriefDescription": "The processor's data cache was reloaded from local, remote, or distant memory due to a demand miss for a marked load."
},
{
- "EventCode": "0x30130",
- "EventName": "PM_MRK_INST_FIN",
- "BriefDescription": "marked instruction finished. Excludes instructions that finish at dispatch. Note that stores always finish twice since the address gets issued to the LSU and the data gets issued to the VSU."
+ "EventCode": "0x201E2",
+ "EventName": "PM_MRK_LD_MISS_L1",
+ "BriefDescription": "Marked demand data load miss counted at finish time."
+ },
+ {
+ "EventCode": "0x201E4",
+ "EventName": "PM_MRK_DATA_FROM_L3MISS",
+ "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss for a marked load."
+ },
+ {
+ "EventCode": "0x3012A",
+ "EventName": "PM_MRK_L2_RC_DONE",
+ "BriefDescription": "L2 RC machine completed the transaction for the marked instruction."
+ },
+ {
+ "EventCode": "0x3012E",
+ "EventName": "PM_MRK_DTLB_MISS_2M",
+ "BriefDescription": "Marked Data TLB reload (after a miss) page size 2M, which implies Radix Page Table translation was used. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
+ },
+ {
+ "EventCode": "0x30132",
+ "EventName": "PM_MRK_VSU_FIN",
+ "BriefDescription": "VSU marked instruction finished. Excludes simple FX instructions issued to the Store Unit."
},
{
"EventCode": "0x34146",
"EventName": "PM_MRK_LD_CMPL",
- "BriefDescription": "Marked loads completed."
+ "BriefDescription": "Marked load instruction completed."
+ },
+ {
+ "EventCode": "0x3C142",
+ "EventName": "PM_MRK_XFER_FROM_SRC_PMC3",
+ "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[30:42]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
+ },
+ {
+ "EventCode": "0x3C144",
+ "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC3",
+ "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[30:42]."
},
{
"EventCode": "0x3E158",
@@ -82,12 +172,22 @@
{
"EventCode": "0x3E15A",
"EventName": "PM_MRK_ST_FIN",
- "BriefDescription": "The marked instruction was a store of any kind."
+ "BriefDescription": "Marked store instruction finished."
+ },
+ {
+ "EventCode": "0x3F150",
+ "EventName": "PM_MRK_ST_DRAIN_CYC",
+ "BriefDescription": "Cycles in which the marked store drained from the core to the L2."
},
{
- "EventCode": "0x30068",
- "EventName": "PM_L1_ICACHE_RELOADED_PREF",
- "BriefDescription": "Counts all Icache prefetch reloads ( includes demand turned into prefetch)."
+ "EventCode": "0x30162",
+ "EventName": "PM_MRK_ISSUE_DEPENDENT_LOAD",
+ "BriefDescription": "The marked instruction was dependent on a load. It is eligible for issue kill."
+ },
+ {
+ "EventCode": "0x301E2",
+ "EventName": "PM_MRK_ST_CMPL",
+ "BriefDescription": "Marked store completed and sent to nest. Note that this count excludes cache-inhibited stores."
},
{
"EventCode": "0x301E4",
@@ -95,48 +195,78 @@
"BriefDescription": "Marked Branch Mispredicted. Includes direction and target."
},
{
- "EventCode": "0x300F6",
- "EventName": "PM_LD_DEMAND_MISS_L1",
- "BriefDescription": "The L1 cache was reloaded with a line that fulfills a demand miss request. Counted at reload time, before finish."
+ "EventCode": "0x301E6",
+ "EventName": "PM_MRK_DERAT_MISS",
+ "BriefDescription": "Marked Erat Miss (Data TLB Access) All page sizes. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
+ },
+ {
+ "EventCode": "0x4010E",
+ "EventName": "PM_MRK_TLBIE_FIN",
+ "BriefDescription": "Marked TLBIE instruction finished. Includes TLBIE and TLBIEL instructions."
+ },
+ {
+ "EventCode": "0x40116",
+ "EventName": "PM_MRK_LARX_FIN",
+ "BriefDescription": "Marked load and reserve instruction (LARX) finished. LARX and STCX are instructions used to acquire a lock."
+ },
+ {
+ "EventCode": "0x40132",
+ "EventName": "PM_MRK_LSU_FIN",
+ "BriefDescription": "LSU marked instruction finish."
+ },
+ {
+ "EventCode": "0x44146",
+ "EventName": "PM_MRK_STCX_CORE_CYC",
+ "BriefDescription": "Cycles spent in the core portion of a marked STCX instruction. It starts counting when the instruction is decoded and stops counting when it drains into the L2."
},
{
- "EventCode": "0x300FE",
- "EventName": "PM_DATA_FROM_L3MISS",
- "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss."
+ "EventCode": "0x4C142",
+ "EventName": "PM_MRK_XFER_FROM_SRC_PMC4",
+ "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[45:57]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
},
{
- "EventCode": "0x40012",
- "EventName": "PM_L1_ICACHE_RELOADED_ALL",
- "BriefDescription": "Counts all Icache reloads includes demand, prefetch, prefetch turned into demand and demand turned into prefetch."
+ "EventCode": "0x4C144",
+ "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC4",
+ "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[45:57]."
},
{
- "EventCode": "0x40134",
- "EventName": "PM_MRK_INST_TIMEO",
- "BriefDescription": "Marked instruction finish timeout (instruction was lost)."
+ "EventCode": "0x4C15C",
+ "EventName": "PM_MRK_DERAT_MISS_1G",
+ "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 1G for a marked instruction. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "0x4505A",
- "EventName": "PM_SP_FLOP_CMPL",
- "BriefDescription": "Single Precision floating point instructions completed."
+ "EventCode": "0x4C15E",
+ "EventName": "PM_MRK_DTLB_MISS_64K",
+ "BriefDescription": "Marked Data TLB reload (after a miss) page size 64K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "0x4D058",
- "EventName": "PM_VECTOR_FLOP_CMPL",
- "BriefDescription": "Vector floating point instructions completed."
+ "EventCode": "0x4E15E",
+ "EventName": "PM_MRK_INST_FLUSHED",
+ "BriefDescription": "The marked instruction was flushed."
},
{
- "EventCode": "0x4D05A",
- "EventName": "PM_NON_MATH_FLOP_CMPL",
- "BriefDescription": "Non Math instructions completed."
+ "EventCode": "0x40164",
+ "EventName": "PM_MRK_DERAT_MISS_2M",
+ "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 2M for a marked instruction. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
"EventCode": "0x401E0",
"EventName": "PM_MRK_INST_CMPL",
- "BriefDescription": "marked instruction completed."
+ "BriefDescription": "Marked instruction completed."
+ },
+ {
+ "EventCode": "0x401E4",
+ "EventName": "PM_MRK_DTLB_MISS",
+ "BriefDescription": "The DPTEG required for the marked load/store instruction in execution was missing from the TLB. This event only counts for demand misses."
+ },
+ {
+ "EventCode": "0x401E6",
+ "EventName": "PM_MRK_INST_FROM_L3MISS",
+ "BriefDescription": "The processor's instruction cache was reloaded from beyond the local core's L3 due to a demand miss for a marked instruction."
},
{
- "EventCode": "0x400FE",
- "EventName": "PM_DATA_FROM_MEMORY",
- "BriefDescription": "The processor's data cache was reloaded from local, remote, or distant memory due to a demand miss."
+ "EventCode": "0x401E8",
+ "EventName": "PM_MRK_DATA_FROM_L2MISS",
+ "BriefDescription": "The processor's L1 data cache was reloaded from a source beyond the local core's L2 due to a demand miss for a marked instruction."
}
]
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/memory.json b/tools/perf/pmu-events/arch/powerpc/power10/memory.json
index 843b51f531e9..885262957beb 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/memory.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/memory.json
@@ -1,25 +1,10 @@
[
{
- "EventCode": "0x1000A",
- "EventName": "PM_PMC3_REWIND",
- "BriefDescription": "The speculative event selected for PMC3 rewinds and the counter for PMC3 is not charged."
- },
- {
"EventCode": "0x1C040",
"EventName": "PM_XFER_FROM_SRC_PMC1",
"BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[0:12]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
},
{
- "EventCode": "0x1C142",
- "EventName": "PM_MRK_XFER_FROM_SRC_PMC1",
- "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[0:12]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
- },
- {
- "EventCode": "0x1C144",
- "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC1",
- "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[0:12]."
- },
- {
"EventCode": "0x1C056",
"EventName": "PM_DERAT_MISS_4K",
"BriefDescription": "Data ERAT Miss (Data TLB Access) page size 4K. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
@@ -35,24 +20,9 @@
"BriefDescription": "Data TLB reload (after a miss) page size 2M. Implies radix translation was used. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "0x1E056",
- "EventName": "PM_EXEC_STALL_STORE_PIPE",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the store unit. This does not include cycles spent handling store misses, PTESYNC instructions or TLBIE instructions."
- },
- {
- "EventCode": "0x1F150",
- "EventName": "PM_MRK_ST_L2_CYC",
- "BriefDescription": "Cycles from L2 RC dispatch to L2 RC completion."
- },
- {
"EventCode": "0x10062",
"EventName": "PM_LD_L3MISS_PEND_CYC",
- "BriefDescription": "Cycles L3 miss was pending for this thread."
- },
- {
- "EventCode": "0x20010",
- "EventName": "PM_PMC1_OVERFLOW",
- "BriefDescription": "The event selected for PMC1 caused the event counter to overflow."
+ "BriefDescription": "Cycles in which an L3 miss was pending for this thread."
},
{
"EventCode": "0x2001A",
@@ -80,9 +50,9 @@
"BriefDescription": "Data TLB reload (after a miss) page size 4K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "0x2D154",
- "EventName": "PM_MRK_DERAT_MISS_64K",
- "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 64K for a marked instruction. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
+ "EventCode": "0x2C05A",
+ "EventName": "PM_DERAT_MISS_1G",
+ "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 1G. Implies radix translation. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
"EventCode": "0x200F6",
@@ -90,9 +60,9 @@
"BriefDescription": "DERAT Reloaded to satisfy a DERAT miss. All page sizes are counted by this event. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "0x30016",
- "EventName": "PM_EXEC_STALL_DERAT_DTLB_MISS",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered a TLB miss and waited for it resolve."
+ "EventCode": "0x34044",
+ "EventName": "PM_DERAT_MISS_PREF",
+ "BriefDescription": "DERAT miss (TLB access) while servicing a data prefetch."
},
{
"EventCode": "0x3C040",
@@ -100,16 +70,6 @@
"BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[30:42]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
},
{
- "EventCode": "0x3C142",
- "EventName": "PM_MRK_XFER_FROM_SRC_PMC3",
- "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[30:42]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
- },
- {
- "EventCode": "0x3C144",
- "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC3",
- "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[30:42]."
- },
- {
"EventCode": "0x3C054",
"EventName": "PM_DERAT_MISS_16M",
"BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16M. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
@@ -125,24 +85,14 @@
"BriefDescription": "Load and reserve instruction (LARX) finished. LARX and STCX are instructions used to acquire a lock."
},
{
- "EventCode": "0x301E2",
- "EventName": "PM_MRK_ST_CMPL",
- "BriefDescription": "Marked store completed and sent to nest. Note that this count excludes cache-inhibited stores."
- },
- {
"EventCode": "0x300FC",
"EventName": "PM_DTLB_MISS",
- "BriefDescription": "The DPTEG required for the load/store instruction in execution was missing from the TLB. It includes pages of all sizes for demand and prefetch activity."
- },
- {
- "EventCode": "0x4D02C",
- "EventName": "PM_PMC1_REWIND",
- "BriefDescription": "The speculative event selected for PMC1 rewinds and the counter for PMC1 is not charged."
+ "BriefDescription": "The DPTEG required for the load/store instruction in execution was missing from the TLB. This event only counts for demand misses."
},
{
"EventCode": "0x4003E",
"EventName": "PM_LD_CMPL",
- "BriefDescription": "Loads completed."
+ "BriefDescription": "Load instruction completed."
},
{
"EventCode": "0x4C040",
@@ -150,16 +100,6 @@
"BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[45:57]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
},
{
- "EventCode": "0x4C142",
- "EventName": "PM_MRK_XFER_FROM_SRC_PMC4",
- "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[45:57]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
- },
- {
- "EventCode": "0x4C144",
- "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC4",
- "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[45:57]."
- },
- {
"EventCode": "0x4C056",
"EventName": "PM_DTLB_MISS_16M",
"BriefDescription": "Data TLB reload (after a miss) page size 16M. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
@@ -168,20 +108,5 @@
"EventCode": "0x4C05A",
"EventName": "PM_DTLB_MISS_1G",
"BriefDescription": "Data TLB reload (after a miss) page size 1G. Implies radix translation was used. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
- },
- {
- "EventCode": "0x4C15E",
- "EventName": "PM_MRK_DTLB_MISS_64K",
- "BriefDescription": "Marked Data TLB reload (after a miss) page size 64K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
- },
- {
- "EventCode": "0x4D056",
- "EventName": "PM_NON_FMA_FLOP_CMPL",
- "BriefDescription": "Non FMA instruction completed."
- },
- {
- "EventCode": "0x40164",
- "EventName": "PM_MRK_DERAT_MISS_2M",
- "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 2M for a marked instruction. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
}
]
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/metrics.json b/tools/perf/pmu-events/arch/powerpc/power10/metrics.json
index 6f53583a0c62..4d66b75c6ad5 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/metrics.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/metrics.json
@@ -16,133 +16,139 @@
"BriefDescription": "Average cycles per completed instruction when dispatch was stalled for any reason",
"MetricExpr": "PM_DISP_STALL_CYC / PM_RUN_INST_CMPL",
"MetricGroup": "CPI;CPI_STALL_RATIO",
- "MetricName": "DISPATCHED_CPI"
+ "MetricName": "DISPATCH_STALL_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when dispatch was stalled because there was a flush",
"MetricExpr": "PM_DISP_STALL_FLUSH / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_FLUSH_CPI"
+ "MetricName": "DISPATCH_STALL_FLUSH_CPI"
+ },
+ {
+ "BriefDescription": "Average cycles per completed instruction when dispatch was stalled because Fetch was being held, so there was nothing in the pipeline for this thread",
+ "MetricExpr": "PM_DISP_STALL_FETCH / PM_RUN_INST_CMPL",
+ "MetricGroup": "CPI",
+ "MetricName": "DISPATCH_STALL_FETCH_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when dispatch was stalled because the MMU was handling a translation miss",
"MetricExpr": "PM_DISP_STALL_TRANSLATION / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_TRANSLATION_CPI"
+ "MetricName": "DISPATCH_STALL_TRANSLATION_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when dispatch was stalled waiting to resolve an instruction ERAT miss",
"MetricExpr": "PM_DISP_STALL_IERAT_ONLY_MISS / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_IERAT_ONLY_MISS_CPI"
+ "MetricName": "DISPATCH_STALL_IERAT_ONLY_MISS_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when dispatch was stalled waiting to resolve an instruction TLB miss",
"MetricExpr": "PM_DISP_STALL_ITLB_MISS / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_ITLB_MISS_CPI"
+ "MetricName": "DISPATCH_STALL_ITLB_MISS_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when dispatch was stalled due to an icache miss",
"MetricExpr": "PM_DISP_STALL_IC_MISS / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_IC_MISS_CPI"
+ "MetricName": "DISPATCH_STALL_IC_MISS_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when dispatch was stalled while the instruction was fetched from the local L2",
"MetricExpr": "PM_DISP_STALL_IC_L2 / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_IC_L2_CPI"
+ "MetricName": "DISPATCH_STALL_IC_L2_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when dispatch was stalled while the instruction was fetched from the local L3",
"MetricExpr": "PM_DISP_STALL_IC_L3 / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_IC_L3_CPI"
+ "MetricName": "DISPATCH_STALL_IC_L3_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when dispatch was stalled while the instruction was fetched from any source beyond the local L3",
"MetricExpr": "PM_DISP_STALL_IC_L3MISS / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_IC_L3MISS_CPI"
+ "MetricName": "DISPATCH_STALL_IC_L3MISS_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when dispatch was stalled due to an icache miss after a branch mispredict",
"MetricExpr": "PM_DISP_STALL_BR_MPRED_ICMISS / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_BR_MPRED_ICMISS_CPI"
+ "MetricName": "DISPATCH_STALL_BR_MPRED_ICMISS_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when dispatch was stalled while instruction was fetched from the local L2 after suffering a branch mispredict",
"MetricExpr": "PM_DISP_STALL_BR_MPRED_IC_L2 / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_BR_MPRED_IC_L2_CPI"
+ "MetricName": "DISPATCH_STALL_BR_MPRED_IC_L2_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when dispatch was stalled while instruction was fetched from the local L3 after suffering a branch mispredict",
"MetricExpr": "PM_DISP_STALL_BR_MPRED_IC_L3 / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_BR_MPRED_IC_L3_CPI"
+ "MetricName": "DISPATCH_STALL_BR_MPRED_IC_L3_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when dispatch was stalled while instruction was fetched from any source beyond the local L3 after suffering a branch mispredict",
"MetricExpr": "PM_DISP_STALL_BR_MPRED_IC_L3MISS / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_BR_MPRED_IC_L3MISS_CPI"
+ "MetricName": "DISPATCH_STALL_BR_MPRED_IC_L3MISS_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when dispatch was stalled due to a branch mispredict",
"MetricExpr": "PM_DISP_STALL_BR_MPRED / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_BR_MPRED_CPI"
+ "MetricName": "DISPATCH_STALL_BR_MPRED_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch for any reason",
"MetricExpr": "PM_DISP_STALL_HELD_CYC / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_HELD_CPI"
+ "MetricName": "DISPATCH_STALL_HELD_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because of a synchronizing instruction that requires the ICT to be empty before dispatch",
"MetricExpr": "PM_DISP_STALL_HELD_SYNC_CYC / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISP_HELD_STALL_SYNC_CPI"
+ "MetricName": "DISPATCH_STALL_HELD_SYNC_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch while waiting on the scoreboard",
"MetricExpr": "PM_DISP_STALL_HELD_SCOREBOARD_CYC / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISP_HELD_STALL_SCOREBOARD_CPI"
+ "MetricName": "DISPATCH_STALL_HELD_SCOREBOARD_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch due to issue queue full",
"MetricExpr": "PM_DISP_STALL_HELD_ISSQ_FULL_CYC / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISP_HELD_STALL_ISSQ_FULL_CPI"
+ "MetricName": "DISPATCH_STALL_HELD_ISSQ_FULL_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because the mapper/SRB was full",
"MetricExpr": "PM_DISP_STALL_HELD_RENAME_CYC / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_HELD_RENAME_CPI"
+ "MetricName": "DISPATCH_STALL_HELD_RENAME_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because the STF mapper/SRB was full",
"MetricExpr": "PM_DISP_STALL_HELD_STF_MAPPER_CYC / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_HELD_STF_MAPPER_CPI"
+ "MetricName": "DISPATCH_STALL_HELD_STF_MAPPER_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because the XVFC mapper/SRB was full",
"MetricExpr": "PM_DISP_STALL_HELD_XVFC_MAPPER_CYC / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_HELD_XVFC_MAPPER_CPI"
+ "MetricName": "DISPATCH_STALL_HELD_XVFC_MAPPER_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch for any other reason",
"MetricExpr": "PM_DISP_STALL_HELD_OTHER_CYC / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_HELD_OTHER_CPI"
+ "MetricName": "DISPATCH_STALL_HELD_OTHER_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when the NTC instruction has been dispatched but not issued for any reason",
@@ -352,13 +358,13 @@
"BriefDescription": "Average cycles per completed instruction when dispatch was stalled because fetch was being held, so there was nothing in the pipeline for this thread",
"MetricExpr": "PM_DISP_STALL_FETCH / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_FETCH_CPI"
+ "MetricName": "DISPATCH_STALL_FETCH_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because of power management",
"MetricExpr": "PM_DISP_STALL_HELD_HALT_CYC / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_HELD_HALT_CPI"
+ "MetricName": "DISPATCH_STALL_HELD_HALT_CPI"
},
{
"BriefDescription": "Percentage of flushes per completed instruction",
@@ -395,6 +401,13 @@
"ScaleUnit": "1%"
},
{
+ "BriefDescription": "Percentage of completed instructions that were stores that missed the L1",
+ "MetricExpr": "PM_ST_MISS_L1 * 100 / PM_RUN_INST_CMPL",
+ "MetricGroup": "Others",
+ "MetricName": "L1_ST_MISS_RATE",
+ "ScaleUnit": "1%"
+ },
+ {
"BriefDescription": "Percentage of completed instructions when the DPTEG required for the load/store instruction in execution was missing from the TLB",
"MetricExpr": "PM_DTLB_MISS / PM_RUN_INST_CMPL * 100",
"MetricGroup": "Others",
@@ -454,12 +467,6 @@
"MetricName": "LOADS_PER_INST"
},
{
- "BriefDescription": "Average number of finished stores per completed instruction",
- "MetricExpr": "PM_ST_FIN / PM_RUN_INST_CMPL",
- "MetricGroup": "General",
- "MetricName": "STORES_PER_INST"
- },
- {
"BriefDescription": "Percentage of demand loads that reloaded from beyond the L2 per completed instruction",
"MetricExpr": "PM_DATA_FROM_L2MISS / PM_RUN_INST_CMPL * 100",
"MetricGroup": "dL1_Reloads",
@@ -474,6 +481,13 @@
"ScaleUnit": "1%"
},
{
+ "BriefDescription": "Percentage of ITLB misses per completed run instruction",
+ "MetricExpr": "PM_ITLB_MISS / PM_RUN_INST_CMPL * 100",
+ "MetricGroup": "General",
+ "MetricName": "ITLB_MISS_RATE",
+ "ScaleUnit": "1%"
+ },
+ {
"BriefDescription": "Percentage of DERAT misses with 4k page size per completed instruction",
"MetricExpr": "PM_DERAT_MISS_4K / PM_RUN_INST_CMPL * 100",
"MetricGroup": "Translation",
@@ -566,7 +580,7 @@
"BriefDescription": "Average number of STCX instructions finshed per completed instruction",
"MetricExpr": "PM_STCX_FIN / PM_RUN_INST_CMPL",
"MetricGroup": "General",
- "MetricName": "STXC_PER_INST"
+ "MetricName": "STCX_PER_INST"
},
{
"BriefDescription": "Average number of LARX instructions finshed per completed instruction",
@@ -629,6 +643,13 @@
"ScaleUnit": "1%"
},
{
+ "BriefDescription": "Percentage of DERAT misses with 1G page size per completed run instruction",
+ "MetricExpr": "PM_DERAT_MISS_1G * 100 / PM_RUN_INST_CMPL",
+ "MetricGroup": "Translation",
+ "MetricName": "DERAT_1G_MISS_RATE",
+ "ScaleUnit": "1%"
+ },
+ {
"BriefDescription": "DERAT miss ratio for 4K page size",
"MetricExpr": "PM_DERAT_MISS_4K / PM_DERAT_MISS",
"MetricGroup": "Translation",
@@ -647,6 +668,12 @@
"MetricName": "DERAT_16M_MISS_RATIO"
},
{
+ "BriefDescription": "DERAT miss ratio for 1G page size",
+ "MetricExpr": "PM_DERAT_MISS_1G / PM_DERAT_MISS",
+ "MetricGroup": "Translation",
+ "MetricName": "DERAT_1G_MISS_RATIO"
+ },
+ {
"BriefDescription": "DERAT miss ratio for 64K page size",
"MetricExpr": "PM_DERAT_MISS_64K / PM_DERAT_MISS",
"MetricGroup": "Translation",
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/others.json b/tools/perf/pmu-events/arch/powerpc/power10/others.json
index a771e4b6bec5..0e21e7ba1959 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/others.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/others.json
@@ -1,28 +1,13 @@
[
{
- "EventCode": "0x10016",
- "EventName": "PM_VSU0_ISSUE",
- "BriefDescription": "VSU instructions issued to VSU pipe 0."
- },
- {
- "EventCode": "0x1001C",
- "EventName": "PM_ULTRAVISOR_INST_CMPL",
- "BriefDescription": "PowerPC instructions that completed while the thread was in ultravisor state."
- },
- {
- "EventCode": "0x100F0",
- "EventName": "PM_CYC",
- "BriefDescription": "Processor cycles."
- },
- {
- "EventCode": "0x10134",
- "EventName": "PM_MRK_ST_DONE_L2",
- "BriefDescription": "Marked stores completed in L2 (RC machine done)."
+ "EventCode": "0x1002C",
+ "EventName": "PM_LD_PREFETCH_CACHE_LINE_MISS",
+ "BriefDescription": "The L1 cache was reloaded with a line that fulfills a prefetch request."
},
{
"EventCode": "0x1505E",
"EventName": "PM_LD_HIT_L1",
- "BriefDescription": "Loads that finished without experiencing an L1 miss."
+ "BriefDescription": "Load finished without experiencing an L1 miss."
},
{
"EventCode": "0x1F056",
@@ -30,9 +15,9 @@
"BriefDescription": "Cycles in which Superslice 0 dispatches either 1 or 2 instructions."
},
{
- "EventCode": "0x1F15C",
- "EventName": "PM_MRK_STCX_L2_CYC",
- "BriefDescription": "Cycles spent in the nest portion of a marked Stcx instruction. It starts counting when the operation starts to drain to the L2 and it stops counting when the instruction retires from the Instruction Completion Table (ICT) in the Instruction Sequencing Unit (ISU)."
+ "EventCode": "0x1F05A",
+ "EventName": "PM_DISP_HELD_SYNC_CYC",
+ "BriefDescription": "Cycles dispatch is held because of a synchronizing instruction that requires the ICT to be empty before dispatch."
},
{
"EventCode": "0x10066",
@@ -40,39 +25,14 @@
"BriefDescription": "Cycles in which the thread is in Adjunct state. MSR[S HV PR] bits = 011."
},
{
- "EventCode": "0x101E4",
- "EventName": "PM_MRK_L1_ICACHE_MISS",
- "BriefDescription": "Marked Instruction suffered an icache Miss."
- },
- {
- "EventCode": "0x101EA",
- "EventName": "PM_MRK_L1_RELOAD_VALID",
- "BriefDescription": "Marked demand reload."
- },
- {
- "EventCode": "0x100F4",
- "EventName": "PM_FLOP_CMPL",
- "BriefDescription": "Floating Point Operations Completed. Includes any type. It counts once for each 1, 2, 4 or 8 flop instruction. Use PM_1|2|4|8_FLOP_CMPL events to count flops."
- },
- {
- "EventCode": "0x100FA",
- "EventName": "PM_RUN_LATCH_ANY_THREAD_CYC",
- "BriefDescription": "Cycles when at least one thread has the run latch set."
- },
- {
"EventCode": "0x100FC",
"EventName": "PM_LD_REF_L1",
"BriefDescription": "All L1 D cache load references counted at finish, gated by reject. In P9 and earlier this event counted only cacheable loads but in P10 both cacheable and non-cacheable loads are included."
},
{
- "EventCode": "0x2000C",
- "EventName": "PM_RUN_LATCH_ALL_THREADS_CYC",
- "BriefDescription": "Cycles when the run latch is set for all threads."
- },
- {
"EventCode": "0x2E010",
"EventName": "PM_ADJUNCT_INST_CMPL",
- "BriefDescription": "PowerPC instructions that completed while the thread is in Adjunct state."
+ "BriefDescription": "PowerPC instruction completed while the thread was in Adjunct state."
},
{
"EventCode": "0x2E014",
@@ -80,26 +40,6 @@
"BriefDescription": "Conditional store instruction (STCX) finished. LARX and STCX are instructions used to acquire a lock."
},
{
- "EventCode": "0x20130",
- "EventName": "PM_MRK_INST_DECODED",
- "BriefDescription": "An instruction was marked at decode time. Random Instruction Sampling (RIS) only."
- },
- {
- "EventCode": "0x20132",
- "EventName": "PM_MRK_DFU_ISSUE",
- "BriefDescription": "The marked instruction was a decimal floating point operation issued to the VSU. Measured at issue time."
- },
- {
- "EventCode": "0x20134",
- "EventName": "PM_MRK_FXU_ISSUE",
- "BriefDescription": "The marked instruction was a fixed point operation issued to the VSU. Measured at issue time."
- },
- {
- "EventCode": "0x2505C",
- "EventName": "PM_VSU_ISSUE",
- "BriefDescription": "At least one VSU instruction was issued to one of the VSU pipes. Up to 4 per cycle. Includes fixed point operations."
- },
- {
"EventCode": "0x2F054",
"EventName": "PM_DISP_SS1_2_INSTR_CYC",
"BriefDescription": "Cycles in which Superslice 1 dispatches either 1 or 2 instructions."
@@ -110,39 +50,14 @@
"BriefDescription": "Cycles in which Superslice 1 dispatches either 3 or 4 instructions."
},
{
- "EventCode": "0x2006C",
- "EventName": "PM_RUN_CYC_SMT4_MODE",
- "BriefDescription": "Cycles when this thread's run latch is set and the core is in SMT4 mode."
- },
- {
- "EventCode": "0x201E0",
- "EventName": "PM_MRK_DATA_FROM_MEMORY",
- "BriefDescription": "The processor's data cache was reloaded from local, remote, or distant memory due to a demand miss for a marked load."
- },
- {
- "EventCode": "0x201E4",
- "EventName": "PM_MRK_DATA_FROM_L3MISS",
- "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss for a marked load."
- },
- {
- "EventCode": "0x201E8",
- "EventName": "PM_THRESH_EXC_512",
- "BriefDescription": "Threshold counter exceeded a value of 512."
- },
- {
"EventCode": "0x200F2",
"EventName": "PM_INST_DISP",
- "BriefDescription": "PowerPC instructions dispatched."
- },
- {
- "EventCode": "0x30132",
- "EventName": "PM_MRK_VSU_FIN",
- "BriefDescription": "VSU marked instructions finished. Excludes simple FX instructions issued to the Store Unit."
+ "BriefDescription": "PowerPC instruction dispatched."
},
{
- "EventCode": "0x30038",
- "EventName": "PM_EXEC_STALL_DMISS_LMEM",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local memory, local OpenCapp cache, or local OpenCapp memory."
+ "EventCode": "0x200FD",
+ "EventName": "PM_L1_ICACHE_MISS",
+ "BriefDescription": "Demand instruction cache miss."
},
{
"EventCode": "0x3F04A",
@@ -152,12 +67,7 @@
{
"EventCode": "0x3405A",
"EventName": "PM_PRIVILEGED_INST_CMPL",
- "BriefDescription": "PowerPC Instructions that completed while the thread is in Privileged state."
- },
- {
- "EventCode": "0x3F150",
- "EventName": "PM_MRK_ST_DRAIN_CYC",
- "BriefDescription": "cycles to drain st from core to L2."
+ "BriefDescription": "PowerPC instruction completed while the thread was in Privileged state."
},
{
"EventCode": "0x3F054",
@@ -170,74 +80,29 @@
"BriefDescription": "Cycles in which Superslice 0 dispatches either 5, 6, 7 or 8 instructions."
},
{
- "EventCode": "0x30162",
- "EventName": "PM_MRK_ISSUE_DEPENDENT_LOAD",
- "BriefDescription": "The marked instruction was dependent on a load. It is eligible for issue kill."
- },
- {
- "EventCode": "0x40114",
- "EventName": "PM_MRK_START_PROBE_NOP_DISP",
- "BriefDescription": "Marked Start probe nop dispatched. Instruction AND R0,R0,R0."
- },
- {
- "EventCode": "0x4001C",
- "EventName": "PM_VSU_FIN",
- "BriefDescription": "VSU instructions finished."
- },
- {
- "EventCode": "0x4C01A",
- "EventName": "PM_EXEC_STALL_DMISS_OFF_NODE",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a distant chip."
- },
- {
- "EventCode": "0x4D012",
- "EventName": "PM_PMC3_SAVED",
- "BriefDescription": "The conditions for the speculative event selected for PMC3 are met and PMC3 is charged."
- },
- {
- "EventCode": "0x4D022",
- "EventName": "PM_HYPERVISOR_INST_CMPL",
- "BriefDescription": "PowerPC instructions that completed while the thread is in hypervisor state."
- },
- {
- "EventCode": "0x4D026",
- "EventName": "PM_ULTRAVISOR_CYC",
- "BriefDescription": "Cycles when the thread is in Ultravisor state. MSR[S HV PR]=110."
+ "EventCode": "0x30068",
+ "EventName": "PM_L1_ICACHE_RELOADED_PREF",
+ "BriefDescription": "Counts all instruction cache prefetch reloads (includes demand turned into prefetch)."
},
{
- "EventCode": "0x4D028",
- "EventName": "PM_PRIVILEGED_CYC",
- "BriefDescription": "Cycles when the thread is in Privileged state. MSR[S HV PR]=x00."
+ "EventCode": "0x300F6",
+ "EventName": "PM_LD_DEMAND_MISS_L1",
+ "BriefDescription": "The L1 cache was reloaded with a line that fulfills a demand miss request. Counted at reload time, before finish."
},
{
- "EventCode": "0x40030",
- "EventName": "PM_INST_FIN",
- "BriefDescription": "Instructions finished."
+ "EventCode": "0x300FE",
+ "EventName": "PM_DATA_FROM_L3MISS",
+ "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss."
},
{
- "EventCode": "0x44146",
- "EventName": "PM_MRK_STCX_CORE_CYC",
- "BriefDescription": "Cycles spent in the core portion of a marked Stcx instruction. It starts counting when the instruction is decoded and stops counting when it drains into the L2."
+ "EventCode": "0x40012",
+ "EventName": "PM_L1_ICACHE_RELOADED_ALL",
+ "BriefDescription": "Counts all instruction cache reloads includes demand, prefetch, prefetch turned into demand and demand turned into prefetch."
},
{
"EventCode": "0x44054",
"EventName": "PM_VECTOR_LD_CMPL",
- "BriefDescription": "Vector load instructions completed."
- },
- {
- "EventCode": "0x45054",
- "EventName": "PM_FMA_CMPL",
- "BriefDescription": "Two floating point instructions completed (FMA class of instructions: fmadd, fnmadd, fmsub, fnmsub). Scalar instructions only."
- },
- {
- "EventCode": "0x45056",
- "EventName": "PM_SCALAR_FLOP_CMPL",
- "BriefDescription": "Scalar floating point instructions completed."
- },
- {
- "EventCode": "0x4505C",
- "EventName": "PM_MATH_FLOP_CMPL",
- "BriefDescription": "Math floating point instructions completed."
+ "BriefDescription": "Vector load instruction completed."
},
{
"EventCode": "0x4D05E",
@@ -245,28 +110,13 @@
"BriefDescription": "A branch completed. All branches are included."
},
{
- "EventCode": "0x4E15E",
- "EventName": "PM_MRK_INST_FLUSHED",
- "BriefDescription": "The marked instruction was flushed."
- },
- {
- "EventCode": "0x401E6",
- "EventName": "PM_MRK_INST_FROM_L3MISS",
- "BriefDescription": "The processor's instruction cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss for a marked instruction."
- },
- {
- "EventCode": "0x401E8",
- "EventName": "PM_MRK_DATA_FROM_L2MISS",
- "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1 or L2 due to a demand miss for a marked load."
- },
- {
"EventCode": "0x400F0",
"EventName": "PM_LD_DEMAND_MISS_L1_FIN",
- "BriefDescription": "Load Missed L1, counted at finish time."
+ "BriefDescription": "Load missed L1, counted at finish time."
},
{
- "EventCode": "0x500FA",
- "EventName": "PM_RUN_INST_CMPL",
- "BriefDescription": "Completed PowerPC instructions gated by the run latch."
+ "EventCode": "0x400FE",
+ "EventName": "PM_DATA_FROM_MEMORY",
+ "BriefDescription": "The processor's data cache was reloaded from local, remote, or distant memory due to a demand miss."
}
]
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json b/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json
index b8aded6045fa..21b23bb55d0d 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json
@@ -1,8 +1,13 @@
[
{
- "EventCode": "0x100FE",
- "EventName": "PM_INST_CMPL",
- "BriefDescription": "PowerPC instructions completed."
+ "EventCode": "0x10004",
+ "EventName": "PM_EXEC_STALL_TRANSLATION",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered a TLB miss or ERAT miss and waited for it to resolve."
+ },
+ {
+ "EventCode": "0x10006",
+ "EventName": "PM_DISP_STALL_HELD_OTHER_CYC",
+ "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch for any other reason."
},
{
"EventCode": "0x1000C",
@@ -12,7 +17,7 @@
{
"EventCode": "0x1000E",
"EventName": "PM_MMA_ISSUED",
- "BriefDescription": "MMA instructions issued."
+ "BriefDescription": "MMA instruction issued."
},
{
"EventCode": "0x10012",
@@ -30,14 +35,24 @@
"BriefDescription": "Cycles in which an instruction reload is pending to satisfy a demand miss."
},
{
- "EventCode": "0x10022",
- "EventName": "PM_PMC2_SAVED",
- "BriefDescription": "The conditions for the speculative event selected for PMC2 are met and PMC2 is charged."
+ "EventCode": "0x10028",
+ "EventName": "PM_NTC_FLUSH",
+ "BriefDescription": "The instruction was flushed after becoming next-to-complete (NTC)."
+ },
+ {
+ "EventCode": "0x10038",
+ "EventName": "PM_DISP_STALL_TRANSLATION",
+ "BriefDescription": "Cycles when dispatch was stalled for this thread because the MMU was handling a translation miss."
+ },
+ {
+ "EventCode": "0x1003A",
+ "EventName": "PM_DISP_STALL_BR_MPRED_IC_L2",
+ "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2 after suffering a branch mispredict."
},
{
- "EventCode": "0x10024",
- "EventName": "PM_PMC5_OVERFLOW",
- "BriefDescription": "The event selected for PMC5 caused the event counter to overflow."
+ "EventCode": "0x1003C",
+ "EventName": "PM_EXEC_STALL_DMISS_L2L3",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from either the local L2 or local L3."
},
{
"EventCode": "0x10058",
@@ -55,11 +70,41 @@
"BriefDescription": "Data ERAT Miss (Data TLB Access) page size 2M. Implies radix translation. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
+ "EventCode": "0x1D05E",
+ "EventName": "PM_DISP_STALL_HELD_HALT_CYC",
+ "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because of power management."
+ },
+ {
+ "EventCode": "0x1E050",
+ "EventName": "PM_DISP_STALL_HELD_STF_MAPPER_CYC",
+ "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because the STF mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR."
+ },
+ {
+ "EventCode": "0x1E054",
+ "EventName": "PM_EXEC_STALL_DMISS_L21_L31",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from another core's L2 or L3 on the same chip."
+ },
+ {
+ "EventCode": "0x1E056",
+ "EventName": "PM_EXEC_STALL_STORE_PIPE",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the store unit. This does not include cycles spent handling store misses, PTESYNC instructions or TLBIE instructions."
+ },
+ {
"EventCode": "0x1E05A",
"EventName": "PM_CMPL_STALL_LWSYNC",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was a lwsync waiting to complete."
},
{
+ "EventCode": "0x1F058",
+ "EventName": "PM_DISP_HELD_CYC",
+ "BriefDescription": "Cycles dispatch is held."
+ },
+ {
+ "EventCode": "0x10064",
+ "EventName": "PM_DISP_STALL_IC_L2",
+ "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2."
+ },
+ {
"EventCode": "0x10068",
"EventName": "PM_BR_FIN",
"BriefDescription": "A branch instruction finished. Includes predicted/mispredicted/unconditional."
@@ -70,9 +115,9 @@
"BriefDescription": "Simple fixed point instruction issued to the store unit. Measured at finish time."
},
{
- "EventCode": "0x1006C",
- "EventName": "PM_RUN_CYC_ST_MODE",
- "BriefDescription": "Cycles when the run latch is set and the core is in ST mode."
+ "EventCode": "0x100F8",
+ "EventName": "PM_DISP_STALL_CYC",
+ "BriefDescription": "Cycles the ICT has no itags assigned to this thread (no instructions were dispatched during these cycles)."
},
{
"EventCode": "0x20004",
@@ -80,9 +125,9 @@
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was dispatched but not issued yet."
},
{
- "EventCode": "0x2000A",
- "EventName": "PM_HYPERVISOR_CYC",
- "BriefDescription": "Cycles when the thread is in Hypervisor state. MSR[S HV PR]=010."
+ "EventCode": "0x20006",
+ "EventName": "PM_DISP_STALL_HELD_ISSQ_FULL_CYC",
+ "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch due to Issue queue full. Includes issue queue and branch queue."
},
{
"EventCode": "0x2000E",
@@ -90,24 +135,59 @@
"BriefDescription": "LSU Finished an internal operation in LD1 port."
},
{
+ "EventCode": "0x2C010",
+ "EventName": "PM_EXEC_STALL_LSU",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the Load Store Unit. This does not include simple fixed point instructions."
+ },
+ {
"EventCode": "0x2C014",
"EventName": "PM_CMPL_STALL_SPECIAL",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline required special handling before completing."
},
{
+ "EventCode": "0x2C016",
+ "EventName": "PM_DISP_STALL_IERAT_ONLY_MISS",
+ "BriefDescription": "Cycles when dispatch was stalled while waiting to resolve an instruction ERAT miss."
+ },
+ {
"EventCode": "0x2C018",
"EventName": "PM_EXEC_STALL_DMISS_L3MISS",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a source beyond the local L2 or local L3."
},
{
+ "EventCode": "0x2C01C",
+ "EventName": "PM_EXEC_STALL_DMISS_OFF_CHIP",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a remote chip."
+ },
+ {
+ "EventCode": "0x2C01E",
+ "EventName": "PM_DISP_STALL_BR_MPRED_IC_L3",
+ "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L3 after suffering a branch mispredict."
+ },
+ {
"EventCode": "0x2D010",
"EventName": "PM_LSU_ST1_FIN",
"BriefDescription": "LSU Finished an internal operation in ST1 port."
},
{
+ "EventCode": "0x10016",
+ "EventName": "PM_VSU0_ISSUE",
+ "BriefDescription": "VSU instruction issued to VSU pipe 0."
+ },
+ {
"EventCode": "0x2D012",
"EventName": "PM_VSU1_ISSUE",
- "BriefDescription": "VSU instructions issued to VSU pipe 1."
+ "BriefDescription": "VSU instruction issued to VSU pipe 1."
+ },
+ {
+ "EventCode": "0x2505C",
+ "EventName": "PM_VSU_ISSUE",
+ "BriefDescription": "At least one VSU instruction was issued to one of the VSU pipes. Up to 4 per cycle. Includes fixed point operations."
+ },
+ {
+ "EventCode": "0x4001C",
+ "EventName": "PM_VSU_FIN",
+ "BriefDescription": "VSU instruction finished."
},
{
"EventCode": "0x2D018",
@@ -115,19 +195,34 @@
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the VSU (includes FXU, VSU, CRU)."
},
{
+ "EventCode": "0x2D01A",
+ "EventName": "PM_DISP_STALL_IC_MISS",
+ "BriefDescription": "Cycles when dispatch was stalled for this thread due to an instruction cache miss."
+ },
+ {
"EventCode": "0x2D01C",
"EventName": "PM_CMPL_STALL_STCX",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was a stcx waiting for resolution from the nest before completing."
},
{
- "EventCode": "0x2E01E",
- "EventName": "PM_EXEC_STALL_NTC_FLUSH",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in any unit before it was flushed. Note that if the flush of the oldest instruction happens after finish, the cycles from dispatch to issue will be included in PM_DISP_STALL and the cycles from issue to finish will be included in PM_EXEC_STALL and its corresponding children. This event will also count cycles when the previous NTF instruction is still completing and the new NTF instruction is stalled at dispatch."
+ "EventCode": "0x2E018",
+ "EventName": "PM_DISP_STALL_FETCH",
+ "BriefDescription": "Cycles when dispatch was stalled for this thread because Fetch was being held."
+ },
+ {
+ "EventCode": "0x2E01A",
+ "EventName": "PM_DISP_STALL_HELD_XVFC_MAPPER_CYC",
+ "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because the XVFC mapper/SRB was full."
+ },
+ {
+ "EventCode": "0x2E01C",
+ "EventName": "PM_EXEC_STALL_TLBIE",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a TLBIE instruction executing in the Load Store Unit."
},
{
- "EventCode": "0x2013C",
- "EventName": "PM_MRK_FX_LSU_FIN",
- "BriefDescription": "The marked instruction was simple fixed point that was issued to the store unit. Measured at finish time."
+ "EventCode": "0x2E01E",
+ "EventName": "PM_EXEC_STALL_NTC_FLUSH",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in any unit before it was flushed. Note that if the flush of the oldest instruction happens after finish, the cycles from dispatch to issue will be included in PM_DISP_STALL and the cycles from issue to finish will be included in PM_EXEC_STALL and its corresponding children. This event will also count cycles when the previous next-to-finish (NTF) instruction is still completing and the new NTF instruction is stalled at dispatch."
},
{
"EventCode": "0x2405A",
@@ -135,14 +230,19 @@
"BriefDescription": "Cycles in which the oldest instruction in the pipeline (NTC) finishes. Note that instructions can finish out of order, therefore not all the instructions that finish have a Next-to-complete status."
},
{
- "EventCode": "0x201E2",
- "EventName": "PM_MRK_LD_MISS_L1",
- "BriefDescription": "Marked DL1 Demand Miss counted at finish time."
+ "EventCode": "0x20066",
+ "EventName": "PM_DISP_HELD_OTHER_CYC",
+ "BriefDescription": "Cycles dispatch is held for any other reason."
+ },
+ {
+ "EventCode": "0x2006A",
+ "EventName": "PM_DISP_HELD_STF_MAPPER_CYC",
+ "BriefDescription": "Cycles dispatch is held because the STF mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR."
},
{
- "EventCode": "0x200F4",
- "EventName": "PM_RUN_CYC",
- "BriefDescription": "Processor cycles gated by the run latch."
+ "EventCode": "0x30004",
+ "EventName": "PM_DISP_STALL_FLUSH",
+ "BriefDescription": "Cycles when dispatch was stalled because of a flush that happened to an instruction(s) that was not yet next-to-complete (NTC). PM_EXEC_STALL_NTC_FLUSH only includes instructions that were flushed after becoming NTC."
},
{
"EventCode": "0x30008",
@@ -150,29 +250,34 @@
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting to finish in one of the execution units (BRU, LSU, VSU). Only cycles between issue and finish are counted in this category."
},
{
- "EventCode": "0x3001A",
- "EventName": "PM_LSU_ST2_FIN",
- "BriefDescription": "LSU Finished an internal operation in ST2 port."
+ "EventCode": "0x30014",
+ "EventName": "PM_EXEC_STALL_STORE",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a store instruction executing in the Load Store Unit."
+ },
+ {
+ "EventCode": "0x30016",
+ "EventName": "PM_EXEC_STALL_DERAT_DTLB_MISS",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered a TLB miss and waited for it resolve."
},
{
- "EventCode": "0x30020",
- "EventName": "PM_PMC2_REWIND",
- "BriefDescription": "The speculative event selected for PMC2 rewinds and the counter for PMC2 is not charged."
+ "EventCode": "0x30018",
+ "EventName": "PM_DISP_STALL_HELD_SCOREBOARD_CYC",
+ "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch while waiting on the Scoreboard. This event combines VSCR and FPSCR together."
},
{
- "EventCode": "0x30022",
- "EventName": "PM_PMC4_SAVED",
- "BriefDescription": "The conditions for the speculative event selected for PMC4 are met and PMC4 is charged."
+ "EventCode": "0x3001A",
+ "EventName": "PM_LSU_ST2_FIN",
+ "BriefDescription": "LSU Finished an internal operation in ST2 port."
},
{
- "EventCode": "0x30024",
- "EventName": "PM_PMC6_OVERFLOW",
- "BriefDescription": "The event selected for PMC6 caused the event counter to overflow."
+ "EventCode": "0x30026",
+ "EventName": "PM_EXEC_STALL_STORE_MISS",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a store whose cache line was not resident in the L1 and was waiting for allocation of the missing line into the L1."
},
{
"EventCode": "0x30028",
"EventName": "PM_CMPL_STALL_MEM_ECC",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for the non-speculative finish of either a stcx waiting for its result or a load waiting for non-critical sectors of data and ECC."
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for the non-speculative finish of either a STCX waiting for its result or a load waiting for non-critical sectors of data and ECC."
},
{
"EventCode": "0x30036",
@@ -180,6 +285,11 @@
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was a simple fixed point instruction executing in the Load Store Unit."
},
{
+ "EventCode": "0x30038",
+ "EventName": "PM_EXEC_STALL_DMISS_LMEM",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local memory, local OpenCAPI cache, or local OpenCAPI memory."
+ },
+ {
"EventCode": "0x3003A",
"EventName": "PM_CMPL_STALL_EXCEPTION",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was not allowed to complete because it was interrupted by ANY exception, which has to be serviced before the instruction can complete."
@@ -187,17 +297,42 @@
{
"EventCode": "0x3F044",
"EventName": "PM_VSU2_ISSUE",
- "BriefDescription": "VSU instructions issued to VSU pipe 2."
+ "BriefDescription": "VSU instruction issued to VSU pipe 2."
},
{
"EventCode": "0x30058",
"EventName": "PM_TLBIE_FIN",
- "BriefDescription": "TLBIE instructions finished in the LSU. Two TLBIEs can finish each cycle. All will be counted."
+ "BriefDescription": "TLBIE instruction finished in the LSU. Two TLBIEs can finish each cycle. All will be counted."
},
{
- "EventCode": "0x3D058",
- "EventName": "PM_SCALAR_FSQRT_FDIV_ISSUE",
- "BriefDescription": "Scalar versions of four floating point operations: fdiv,fsqrt (xvdivdp, xvdivsp, xvsqrtdp, xvsqrtsp)."
+ "EventCode": "0x34054",
+ "EventName": "PM_EXEC_STALL_DMISS_L2L3_NOCONFLICT",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, without a dispatch conflict."
+ },
+ {
+ "EventCode": "0x34056",
+ "EventName": "PM_EXEC_STALL_LOAD_FINISH",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was finishing a load after its data was reloaded from a data source beyond the local L1; cycles in which the LSU was processing an L1-hit; cycles in which the next-to-finish (NTF) instruction merged with another load in the LMQ; cycles in which the NTF instruction is waiting for a data reload for a load miss, but the data comes back with a non-NTF instruction."
+ },
+ {
+ "EventCode": "0x34058",
+ "EventName": "PM_DISP_STALL_BR_MPRED_ICMISS",
+ "BriefDescription": "Cycles when dispatch was stalled after a mispredicted branch resulted in an instruction cache miss."
+ },
+ {
+ "EventCode": "0x3D05C",
+ "EventName": "PM_DISP_STALL_HELD_RENAME_CYC",
+ "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because the mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR and XVFC."
+ },
+ {
+ "EventCode": "0x3E052",
+ "EventName": "PM_DISP_STALL_IC_L3",
+ "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L3."
+ },
+ {
+ "EventCode": "0x30060",
+ "EventName": "PM_DISP_HELD_XVFC_MAPPER_CYC",
+ "BriefDescription": "Cycles dispatch is held because the XVFC mapper/SRB was full."
},
{
"EventCode": "0x30066",
@@ -215,9 +350,9 @@
"BriefDescription": "Cycles in which both instructions in the ICT entry pair show as finished. These are the cycles between finish and completion for the oldest pair of instructions in the pipeline."
},
{
- "EventCode": "0x40010",
- "EventName": "PM_PMC3_OVERFLOW",
- "BriefDescription": "The event selected for PMC3 caused the event counter to overflow."
+ "EventCode": "0x4C010",
+ "EventName": "PM_DISP_STALL_BR_MPRED_IC_L3MISS",
+ "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from sources beyond the local L3 after suffering a mispredicted branch."
},
{
"EventCode": "0x4C012",
@@ -225,16 +360,36 @@
"BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered an ERAT miss and waited for it resolve."
},
{
+ "EventCode": "0x4C016",
+ "EventName": "PM_EXEC_STALL_DMISS_L2L3_CONFLICT",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, with a dispatch conflict."
+ },
+ {
"EventCode": "0x4C018",
"EventName": "PM_CMPL_STALL",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline cannot complete because the thread was blocked for any reason."
},
{
+ "EventCode": "0x4C01A",
+ "EventName": "PM_EXEC_STALL_DMISS_OFF_NODE",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a distant chip."
+ },
+ {
"EventCode": "0x4C01E",
"EventName": "PM_LSU_ST3_FIN",
"BriefDescription": "LSU Finished an internal operation in ST3 port."
},
{
+ "EventCode": "0x4D014",
+ "EventName": "PM_EXEC_STALL_LOAD",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a load instruction executing in the Load Store Unit."
+ },
+ {
+ "EventCode": "0x4D016",
+ "EventName": "PM_EXEC_STALL_PTESYNC",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a PTESYNC instruction executing in the Load Store Unit."
+ },
+ {
"EventCode": "0x4D018",
"EventName": "PM_EXEC_STALL_BRU",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the Branch unit."
@@ -250,9 +405,24 @@
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was a TLBIEL instruction executing in the Load Store Unit. TLBIEL instructions have lower overhead than TLBIE instructions because they don't get set to the nest."
},
{
+ "EventCode": "0x4D01E",
+ "EventName": "PM_DISP_STALL_BR_MPRED",
+ "BriefDescription": "Cycles when dispatch was stalled for this thread due to a mispredicted branch."
+ },
+ {
+ "EventCode": "0x4E010",
+ "EventName": "PM_DISP_STALL_IC_L3MISS",
+ "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from any source beyond the local L3."
+ },
+ {
"EventCode": "0x4E012",
"EventName": "PM_EXEC_STALL_UNKNOWN",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline completed without an ntf_type pulse. The ntf_pulse was missed by the ISU because the NTF finishes and completions came too close together."
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline completed without an ntf_type pulse. The ntf_pulse was missed by the ISU because the next-to-finish (NTF) instruction finishes and completions came too close together."
+ },
+ {
+ "EventCode": "0x4E01A",
+ "EventName": "PM_DISP_STALL_HELD_CYC",
+ "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch for any reason."
},
{
"EventCode": "0x4D020",
@@ -260,24 +430,24 @@
"BriefDescription": "VSU instruction was issued to VSU pipe 3."
},
{
- "EventCode": "0x40132",
- "EventName": "PM_MRK_LSU_FIN",
- "BriefDescription": "LSU marked instruction finish."
+ "EventCode": "0x4003C",
+ "EventName": "PM_DISP_STALL_HELD_SYNC_CYC",
+ "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because of a synchronizing instruction that requires the ICT to be empty before dispatch."
},
{
"EventCode": "0x45058",
"EventName": "PM_IC_MISS_CMPL",
- "BriefDescription": "Non-speculative icache miss, counted at completion."
+ "BriefDescription": "Non-speculative instruction cache miss, counted at completion."
},
{
- "EventCode": "0x4D050",
- "EventName": "PM_VSU_NON_FLOP_CMPL",
- "BriefDescription": "Non-floating point VSU instructions completed."
+ "EventCode": "0x40060",
+ "EventName": "PM_DISP_HELD_SCOREBOARD_CYC",
+ "BriefDescription": "Cycles dispatch is held while waiting on the Scoreboard. This event combines VSCR and FPSCR together."
},
{
- "EventCode": "0x4D052",
- "EventName": "PM_2FLOP_CMPL",
- "BriefDescription": "Double Precision vector version of fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg completed."
+ "EventCode": "0x40062",
+ "EventName": "PM_DISP_HELD_RENAME_CYC",
+ "BriefDescription": "Cycles dispatch is held because the mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR and XVFC."
},
{
"EventCode": "0x400F2",
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/pmc.json b/tools/perf/pmu-events/arch/powerpc/power10/pmc.json
index b5d1bd39cfb2..c606ae03cd27 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/pmc.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/pmc.json
@@ -1,22 +1,202 @@
[
{
+ "EventCode": "0x100FE",
+ "EventName": "PM_INST_CMPL",
+ "BriefDescription": "PowerPC instruction completed."
+ },
+ {
+ "EventCode": "0x1000A",
+ "EventName": "PM_PMC3_REWIND",
+ "BriefDescription": "The speculative event selected for PMC3 rewinds and the counter for PMC3 is not charged."
+ },
+ {
+ "EventCode": "0x10010",
+ "EventName": "PM_PMC4_OVERFLOW",
+ "BriefDescription": "The event selected for PMC4 caused the event counter to overflow."
+ },
+ {
+ "EventCode": "0x1001C",
+ "EventName": "PM_ULTRAVISOR_INST_CMPL",
+ "BriefDescription": "PowerPC instruction completed while the thread was in ultravisor state."
+ },
+ {
+ "EventCode": "0x100F0",
+ "EventName": "PM_CYC",
+ "BriefDescription": "Processor cycles."
+ },
+ {
+ "EventCode": "0x10020",
+ "EventName": "PM_PMC4_REWIND",
+ "BriefDescription": "The speculative event selected for PMC4 rewinds and the counter for PMC4 is not charged."
+ },
+ {
+ "EventCode": "0x10022",
+ "EventName": "PM_PMC2_SAVED",
+ "BriefDescription": "The conditions for the speculative event selected for PMC2 are met and PMC2 is charged."
+ },
+ {
+ "EventCode": "0x10024",
+ "EventName": "PM_PMC5_OVERFLOW",
+ "BriefDescription": "The event selected for PMC5 caused the event counter to overflow."
+ },
+ {
+ "EventCode": "0x1002A",
+ "EventName": "PM_PMC3_HELD_CYC",
+ "BriefDescription": "Cycles when the speculative counter for PMC3 is frozen."
+ },
+ {
+ "EventCode": "0x1F15E",
+ "EventName": "PM_MRK_START_PROBE_NOP_CMPL",
+ "BriefDescription": "Marked Start probe nop (AND R0,R0,R0) completed."
+ },
+ {
+ "EventCode": "0x1006C",
+ "EventName": "PM_RUN_CYC_ST_MODE",
+ "BriefDescription": "Cycles when the run latch is set and the core is in ST mode."
+ },
+ {
+ "EventCode": "0x101E8",
+ "EventName": "PM_THRESH_EXC_256",
+ "BriefDescription": "Threshold counter exceeded a count of 256."
+ },
+ {
+ "EventCode": "0x101EC",
+ "EventName": "PM_THRESH_MET",
+ "BriefDescription": "Threshold exceeded."
+ },
+ {
+ "EventCode": "0x100FA",
+ "EventName": "PM_RUN_LATCH_ANY_THREAD_CYC",
+ "BriefDescription": "Cycles when at least one thread has the run latch set."
+ },
+ {
+ "EventCode": "0x2000A",
+ "EventName": "PM_HYPERVISOR_CYC",
+ "BriefDescription": "Cycles when the thread is in Hypervisor state. MSR[S HV PR]=010."
+ },
+ {
+ "EventCode": "0x2000C",
+ "EventName": "PM_RUN_LATCH_ALL_THREADS_CYC",
+ "BriefDescription": "Cycles when the run latch is set for all threads."
+ },
+ {
+ "EventCode": "0x20010",
+ "EventName": "PM_PMC1_OVERFLOW",
+ "BriefDescription": "The event selected for PMC1 caused the event counter to overflow."
+ },
+ {
+ "EventCode": "0x2006C",
+ "EventName": "PM_RUN_CYC_SMT4_MODE",
+ "BriefDescription": "Cycles when this thread's run latch is set and the core is in SMT4 mode."
+ },
+ {
+ "EventCode": "0x201E6",
+ "EventName": "PM_THRESH_EXC_32",
+ "BriefDescription": "Threshold counter exceeded a value of 32."
+ },
+ {
+ "EventCode": "0x201E8",
+ "EventName": "PM_THRESH_EXC_512",
+ "BriefDescription": "Threshold counter exceeded a value of 512."
+ },
+ {
+ "EventCode": "0x200F4",
+ "EventName": "PM_RUN_CYC",
+ "BriefDescription": "Processor cycles gated by the run latch."
+ },
+ {
+ "EventCode": "0x30010",
+ "EventName": "PM_PMC2_OVERFLOW",
+ "BriefDescription": "The event selected for PMC2 caused the event counter to overflow."
+ },
+ {
+ "EventCode": "0x30020",
+ "EventName": "PM_PMC2_REWIND",
+ "BriefDescription": "The speculative event selected for PMC2 rewinds and the counter for PMC2 is not charged."
+ },
+ {
+ "EventCode": "0x30022",
+ "EventName": "PM_PMC4_SAVED",
+ "BriefDescription": "The conditions for the speculative event selected for PMC4 are met and PMC4 is charged."
+ },
+ {
+ "EventCode": "0x30024",
+ "EventName": "PM_PMC6_OVERFLOW",
+ "BriefDescription": "The event selected for PMC6 caused the event counter to overflow."
+ },
+ {
+ "EventCode": "0x3006C",
+ "EventName": "PM_RUN_CYC_SMT2_MODE",
+ "BriefDescription": "Cycles when this thread's run latch is set and the core is in SMT2 mode."
+ },
+ {
"EventCode": "0x301E8",
"EventName": "PM_THRESH_EXC_64",
"BriefDescription": "Threshold counter exceeded a value of 64."
},
{
- "EventCode": "0x45050",
- "EventName": "PM_1FLOP_CMPL",
- "BriefDescription": "One floating point instruction completed (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg)."
+ "EventCode": "0x301EA",
+ "EventName": "PM_THRESH_EXC_1024",
+ "BriefDescription": "Threshold counter exceeded a value of 1024."
+ },
+ {
+ "EventCode": "0x40010",
+ "EventName": "PM_PMC3_OVERFLOW",
+ "BriefDescription": "The event selected for PMC3 caused the event counter to overflow."
+ },
+ {
+ "EventCode": "0x40114",
+ "EventName": "PM_MRK_START_PROBE_NOP_DISP",
+ "BriefDescription": "Marked Start probe nop dispatched. Instruction AND R0,R0,R0."
+ },
+ {
+ "EventCode": "0x4D010",
+ "EventName": "PM_PMC1_SAVED",
+ "BriefDescription": "The conditions for the speculative event selected for PMC1 are met and PMC1 is charged."
+ },
+ {
+ "EventCode": "0x4D012",
+ "EventName": "PM_PMC3_SAVED",
+ "BriefDescription": "The conditions for the speculative event selected for PMC3 are met and PMC3 is charged."
+ },
+ {
+ "EventCode": "0x4D022",
+ "EventName": "PM_HYPERVISOR_INST_CMPL",
+ "BriefDescription": "PowerPC instruction completed while the thread was in hypervisor state."
+ },
+ {
+ "EventCode": "0x4D026",
+ "EventName": "PM_ULTRAVISOR_CYC",
+ "BriefDescription": "Cycles when the thread is in Ultravisor state. MSR[S HV PR]=110."
+ },
+ {
+ "EventCode": "0x4D028",
+ "EventName": "PM_PRIVILEGED_CYC",
+ "BriefDescription": "Cycles when the thread is in Privileged state. MSR[S HV PR]=x00."
+ },
+ {
+ "EventCode": "0x4D02C",
+ "EventName": "PM_PMC1_REWIND",
+ "BriefDescription": "The speculative event selected for PMC1 rewinds and the counter for PMC1 is not charged."
+ },
+ {
+ "EventCode": "0x40030",
+ "EventName": "PM_INST_FIN",
+ "BriefDescription": "Instruction finished."
+ },
+ {
+ "EventCode": "0x40134",
+ "EventName": "PM_MRK_INST_TIMEO",
+ "BriefDescription": "Marked instruction finish timeout (instruction was lost)."
},
{
- "EventCode": "0x45052",
- "EventName": "PM_4FLOP_CMPL",
- "BriefDescription": "Four floating point instructions completed (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg)."
+ "EventCode": "0x401EA",
+ "EventName": "PM_THRESH_EXC_128",
+ "BriefDescription": "Threshold counter exceeded a value of 128."
},
{
- "EventCode": "0x4D054",
- "EventName": "PM_8FLOP_CMPL",
- "BriefDescription": "Four Double Precision vector instructions completed."
+ "EventCode": "0x400FA",
+ "EventName": "PM_RUN_INST_CMPL",
+ "BriefDescription": "PowerPC instruction completed while the run latch is set."
}
]
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/translation.json b/tools/perf/pmu-events/arch/powerpc/power10/translation.json
index db3766dca07c..ea73900d248a 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/translation.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/translation.json
@@ -1,35 +1,10 @@
[
{
- "EventCode": "0x1F15E",
- "EventName": "PM_MRK_START_PROBE_NOP_CMPL",
- "BriefDescription": "Marked Start probe nop (AND R0,R0,R0) completed."
- },
- {
- "EventCode": "0x20016",
- "EventName": "PM_ST_FIN",
- "BriefDescription": "Store finish count. Includes speculative activity."
- },
- {
"EventCode": "0x20018",
"EventName": "PM_ST_FWD",
"BriefDescription": "Store forwards that finished."
},
{
- "EventCode": "0x2011C",
- "EventName": "PM_MRK_NTF_CYC",
- "BriefDescription": "Cycles during which the marked instruction is the oldest in the pipeline (NTF or NTC)."
- },
- {
- "EventCode": "0x2E01C",
- "EventName": "PM_EXEC_STALL_TLBIE",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a TLBIE instruction executing in the Load Store Unit."
- },
- {
- "EventCode": "0x201E6",
- "EventName": "PM_THRESH_EXC_32",
- "BriefDescription": "Threshold counter exceeded a value of 32."
- },
- {
"EventCode": "0x200F0",
"EventName": "PM_ST_CMPL",
"BriefDescription": "Stores completed from S2Q (2nd-level store queue). This event includes regular stores, stcx and cache inhibited stores. The following operations are excluded (pteupdate, snoop tlbie complete, store atomics, miso, load atomic payloads, tlbie, tlbsync, slbieg, isync, msgsnd, slbiag, cpabort, copy, tcheck, tend, stsync, dcbst, icbi, dcbf, hwsync, lwsync, ptesync, eieio, msgsync)."
@@ -37,21 +12,11 @@
{
"EventCode": "0x200FE",
"EventName": "PM_DATA_FROM_L2MISS",
- "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1 or L2 due to a demand miss."
- },
- {
- "EventCode": "0x30010",
- "EventName": "PM_PMC2_OVERFLOW",
- "BriefDescription": "The event selected for PMC2 caused the event counter to overflow."
- },
- {
- "EventCode": "0x4D010",
- "EventName": "PM_PMC1_SAVED",
- "BriefDescription": "The conditions for the speculative event selected for PMC1 are met and PMC1 is charged."
+ "BriefDescription": "The processor's L1 data cache was reloaded from a source beyond the local core's L2 due to a demand miss."
},
{
- "EventCode": "0x4D05C",
- "EventName": "PM_DPP_FLOP_CMPL",
- "BriefDescription": "Double-Precision or Quad-Precision instructions completed."
+ "EventCode": "0x300F0",
+ "EventName": "PM_ST_MISS_L1",
+ "BriefDescription": "Store Missed L1."
}
]
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
index daf9458f0b77..c6780d5c456b 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
@@ -558,6 +558,7 @@
},
{
"BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / tma_info_core_clks - max((cpu_atom@MEM_BOUND_STALLS.LOAD@ - cpu_atom@LD_HEAD.L1_MISS_AT_RET@) / tma_info_core_clks, 0) * cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / cpu_atom@MEM_BOUND_STALLS.LOAD@",
"MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
@@ -800,6 +801,7 @@
},
{
"BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a store forward block.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "LD_HEAD.ST_ADDR_AT_RET / tma_info_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
@@ -1058,7 +1060,6 @@
},
{
"BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector",
"MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_fp_arith",
@@ -1230,6 +1231,7 @@
},
{
"BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
"MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
"MetricName": "tma_info_botlnk_l2_ic_misses",
@@ -1267,6 +1269,7 @@
},
{
"BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
"MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
"MetricName": "tma_info_bottleneck_memory_bandwidth",
@@ -1355,7 +1358,6 @@
},
{
"BriefDescription": "Floating Point Operations Per Cycle",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.SCALAR_DOUBLE@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@) + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@) / tma_info_core_core_clks",
"MetricGroup": "Flops;Ret",
"MetricName": "tma_info_core_flopc",
@@ -1363,7 +1365,6 @@
},
{
"BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(cpu_core@FP_ARITH_DISPATCHED.PORT_0@ + cpu_core@FP_ARITH_DISPATCHED.PORT_1@ + cpu_core@FP_ARITH_DISPATCHED.PORT_5@) / (2 * tma_info_core_core_clks)",
"MetricGroup": "Cor;Flops;HPC",
"MetricName": "tma_info_core_fp_arith_utilization",
@@ -1769,7 +1770,6 @@
},
{
"BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "tma_retiring * tma_info_thread_slots / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
"MetricGroup": "Pipeline;Ret",
"MetricName": "tma_info_pipeline_retire",
@@ -2002,6 +2002,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(cpu_core@MEMORY_ACTIVITY.STALLS_L2_MISS@ - cpu_core@MEMORY_ACTIVITY.STALLS_L3_MISS@) / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
@@ -2375,6 +2376,7 @@
},
{
"BriefDescription": "This metric represents rate of split store accesses",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
"MetricName": "tma_split_stores",
@@ -2405,6 +2407,7 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "13 * cpu_core@LD_BLOCKS.STORE_FORWARD@ / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json b/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json
index 0f1628d698da..06e67e34e1bf 100644
--- a/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json
@@ -466,6 +466,7 @@
},
{
"BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / tma_info_core_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_core_clks, 0) * MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_BOUND_STALLS.LOAD",
"MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
@@ -682,6 +683,7 @@
},
{
"BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a store forward block.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "LD_HEAD.ST_ADDR_AT_RET / tma_info_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
diff --git a/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json b/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json
index 8fcc05c4e0a1..a6eed0d9a26d 100644
--- a/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json
@@ -85,6 +85,7 @@
},
{
"BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_4k_aliasing",
@@ -319,7 +320,6 @@
},
{
"BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector",
"MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_fp_arith",
@@ -464,6 +464,7 @@
},
{
"BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
"MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
"MetricName": "tma_info_botlnk_l2_ic_misses",
@@ -497,6 +498,7 @@
},
{
"BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
"MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
"MetricName": "tma_info_bottleneck_memory_bandwidth",
@@ -574,14 +576,12 @@
},
{
"BriefDescription": "Floating Point Operations Per Cycle",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks",
"MetricGroup": "Flops;Ret",
"MetricName": "tma_info_core_flopc"
},
{
"BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@) / (2 * tma_info_core_core_clks)",
"MetricGroup": "Cor;Flops;HPC",
"MetricName": "tma_info_core_fp_arith_utilization",
@@ -927,7 +927,6 @@
},
{
"BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "tma_retiring * tma_info_thread_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
"MetricGroup": "Pipeline;Ret",
"MetricName": "tma_info_pipeline_retire"
@@ -1100,6 +1099,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
@@ -1419,6 +1419,7 @@
},
{
"BriefDescription": "This metric represents rate of split store accesses",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
"MetricName": "tma_split_stores",
@@ -1446,6 +1447,7 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json
index 9bb7e3f20f7f..7082ad5ba961 100644
--- a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json
@@ -289,6 +289,7 @@
},
{
"BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_4k_aliasing",
@@ -523,7 +524,6 @@
},
{
"BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector",
"MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_fp_arith",
@@ -668,6 +668,7 @@
},
{
"BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
"MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
"MetricName": "tma_info_botlnk_l2_ic_misses",
@@ -701,6 +702,7 @@
},
{
"BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
"MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
"MetricName": "tma_info_bottleneck_memory_bandwidth",
@@ -778,14 +780,12 @@
},
{
"BriefDescription": "Floating Point Operations Per Cycle",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks",
"MetricGroup": "Flops;Ret",
"MetricName": "tma_info_core_flopc"
},
{
"BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@) / (2 * tma_info_core_core_clks)",
"MetricGroup": "Cor;Flops;HPC",
"MetricName": "tma_info_core_fp_arith_utilization",
@@ -1144,7 +1144,6 @@
},
{
"BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "tma_retiring * tma_info_thread_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
"MetricGroup": "Pipeline;Ret",
"MetricName": "tma_info_pipeline_retire"
@@ -1369,6 +1368,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
@@ -1715,6 +1715,7 @@
},
{
"BriefDescription": "This metric represents rate of split store accesses",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
"MetricName": "tma_split_stores",
@@ -1742,6 +1743,7 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv
index 6650100830c4..3a8770e29fe8 100644
--- a/tools/perf/pmu-events/arch/x86/mapfile.csv
+++ b/tools/perf/pmu-events/arch/x86/mapfile.csv
@@ -19,12 +19,12 @@ GenuineIntel-6-3A,v24,ivybridge,core
GenuineIntel-6-3E,v23,ivytown,core
GenuineIntel-6-2D,v23,jaketown,core
GenuineIntel-6-(57|85),v10,knightslanding,core
-GenuineIntel-6-A[AC],v1.03,meteorlake,core
+GenuineIntel-6-A[AC],v1.04,meteorlake,core
GenuineIntel-6-1[AEF],v3,nehalemep,core
GenuineIntel-6-2E,v3,nehalemex,core
GenuineIntel-6-A7,v1.01,rocketlake,core
GenuineIntel-6-2A,v19,sandybridge,core
-GenuineIntel-6-(8F|CF),v1.14,sapphirerapids,core
+GenuineIntel-6-(8F|CF),v1.15,sapphirerapids,core
GenuineIntel-6-AF,v1.00,sierraforest,core
GenuineIntel-6-(37|4A|4C|4D|5A),v15,silvermont,core
GenuineIntel-6-(4E|5E|8E|9E|A5|A6),v57,skylake,core
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/cache.json b/tools/perf/pmu-events/arch/x86/meteorlake/cache.json
index e1ae7c92f38e..1de0200b32f6 100644
--- a/tools/perf/pmu-events/arch/x86/meteorlake/cache.json
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/cache.json
@@ -37,6 +37,15 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Number of cycles a demand request has waited due to L1D due to lack of L2 resources.",
+ "EventCode": "0x48",
+ "EventName": "L1D_PEND_MISS.L2_STALLS",
+ "PublicDescription": "Counts number of cycles a demand request has waited due to L1D due to lack of L2 resources. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Number of L1D misses that are outstanding",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.PENDING",
@@ -261,6 +270,15 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Cycles when L1D is locked",
+ "EventCode": "0x42",
+ "EventName": "LOCK_CYCLES.CACHE_LOCK_DURATION",
+ "PublicDescription": "This event counts the number of cycles when the L1D is locked. It is a superset of the 0x1 mask (BUS_LOCK_CLOCKS.BUS_LOCK_DURATION).",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the number of cacheable memory requests that miss in the LLC. Counts on a per core basis.",
"EventCode": "0x2e",
"EventName": "LONGEST_LAT_CACHE.MISS",
@@ -515,6 +533,17 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
+ "Data_LA": "1",
+ "EventCode": "0xd2",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS",
+ "PEBS": "1",
+ "PublicDescription": "Counts the retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
+ "SampleAfterValue": "20011",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Retired load instructions whose data sources were hits in L3 without snoops required",
"Data_LA": "1",
"EventCode": "0xd2",
@@ -731,6 +760,14 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "MEM_STORE_RETIRED.L2_HIT",
+ "EventCode": "0x44",
+ "EventName": "MEM_STORE_RETIRED.L2_HIT",
+ "SampleAfterValue": "200003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the number of load ops retired.",
"Data_LA": "1",
"EventCode": "0xd0",
@@ -978,6 +1015,15 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Cacheable and Non-Cacheable code read requests",
+ "EventCode": "0x21",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
+ "PublicDescription": "Counts both cacheable and Non-Cacheable code read requests.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Demand Data Read requests sent to uncore",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
@@ -996,6 +1042,89 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+ "CounterMask": "1",
+ "EventCode": "0x20",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.",
+ "CounterMask": "1",
+ "EventCode": "0x20",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD",
+ "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles where at least 1 outstanding demand data read request is pending.",
+ "CounterMask": "1",
+ "EventCode": "0x20",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.",
+ "CounterMask": "1",
+ "EventCode": "0x20",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+ "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD",
+ "EventCode": "0x20",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.",
+ "EventCode": "0x20",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
+ "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "For every cycle, increments by the number of outstanding demand data read requests pending.",
+ "EventCode": "0x20",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+ "PublicDescription": "For every cycle, increments by the number of outstanding demand data read requests pending. Requests are considered outstanding from the time they miss the core's L2 cache until the transaction completion message is sent to the requestor.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
+ "CounterMask": "6",
+ "EventCode": "0x20",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Store Read transactions pending for off-core. Highly correlated.",
+ "EventCode": "0x20",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
+ "PublicDescription": "Counts the number of off-core outstanding read-for-ownership (RFO) store transactions every cycle. An RFO transaction is considered to be in the Off-core outstanding state between L2 cache miss and transaction completion.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts bus locks, accounts for cache line split locks and UC locks.",
"EventCode": "0x2c",
"EventName": "SQ_MISC.BUS_LOCK",
@@ -1005,6 +1134,42 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Number of PREFETCHNTA instructions executed.",
+ "EventCode": "0x40",
+ "EventName": "SW_PREFETCH_ACCESS.NTA",
+ "PublicDescription": "Counts the number of PREFETCHNTA instructions executed.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of PREFETCHW instructions executed.",
+ "EventCode": "0x40",
+ "EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
+ "PublicDescription": "Counts the number of PREFETCHW instructions executed.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of PREFETCHT0 instructions executed.",
+ "EventCode": "0x40",
+ "EventName": "SW_PREFETCH_ACCESS.T0",
+ "PublicDescription": "Counts the number of PREFETCHT0 instructions executed.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.",
+ "EventCode": "0x40",
+ "EventName": "SW_PREFETCH_ACCESS.T1_T2",
+ "PublicDescription": "Counts the number of PREFETCHT1 or PREFETCHT2 instructions executed.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to an icache miss",
"EventCode": "0x71",
"EventName": "TOPDOWN_FE_BOUND.ICACHE",
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/floating-point.json b/tools/perf/pmu-events/arch/x86/meteorlake/floating-point.json
index 616489f0974a..f66506ee37ef 100644
--- a/tools/perf/pmu-events/arch/x86/meteorlake/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/floating-point.json
@@ -42,6 +42,14 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "FP_ARITH_DISPATCHED.PORT_5",
+ "EventCode": "0xb3",
+ "EventName": "FP_ARITH_DISPATCHED.PORT_5",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json b/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json
index 0f064518d1c0..8264419500a5 100644
--- a/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json
@@ -44,6 +44,14 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "DSB_FILL.FB_STALL_OT",
+ "EventCode": "0x62",
+ "EventName": "DSB_FILL.FB_STALL_OT",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Retired ANT branches",
"EventCode": "0xc6",
"EventName": "FRONTEND_RETIRED.ANY_ANT",
@@ -56,6 +64,30 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Retired Instructions who experienced DSB miss.",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.ANY_DSB_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x1",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired Instructions who experienced a critical DSB miss.",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.DSB_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x11",
+ "PEBS": "1",
+ "PublicDescription": "Number of retired Instructions that experienced a critical DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Critical means stalls were exposed to the back-end as a result of the DSB miss.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the number of instructions retired that were tagged because empty issue slots were seen before the uop due to ITLB miss",
"EventCode": "0xc6",
"EventName": "FRONTEND_RETIRED.ITLB_MISS",
@@ -89,6 +121,18 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss.",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.L2_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x13",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired Instructions who experienced Instruction L2 Cache true miss.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Retired instructions after front-end starvation of at least 1 cycle",
"EventCode": "0xc6",
"EventName": "FRONTEND_RETIRED.LATENCY_GE_1",
@@ -244,6 +288,18 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.STLB_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x15",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "FRONTEND_RETIRED.UNKNOWN_BRANCH",
"EventCode": "0xc6",
"EventName": "FRONTEND_RETIRED.UNKNOWN_BRANCH",
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/memory.json b/tools/perf/pmu-events/arch/x86/meteorlake/memory.json
index 67e949b4c789..2605e1d0ba9f 100644
--- a/tools/perf/pmu-events/arch/x86/meteorlake/memory.json
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/memory.json
@@ -67,6 +67,15 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Number of machine clears due to memory ordering conflicts.",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+ "PublicDescription": "Counts the number of Machine Clears detected dye to memory ordering. Memory Ordering Machine Clears may apply when a memory read may not conform to the memory ordering rules of the x86 architecture",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
"CounterMask": "3",
"EventCode": "0x47",
@@ -96,6 +105,35 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "MEMORY_ORDERING.MD_NUKE",
+ "EventCode": "0x09",
+ "EventName": "MEMORY_ORDERING.MD_NUKE",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of memory ordering machine clears due to memory renaming.",
+ "EventCode": "0x09",
+ "EventName": "MEMORY_ORDERING.MRN_NUKE",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 1024 cycles.",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_1024",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x400",
+ "PEBS": "2",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 1024 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "53",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.",
"Data_LA": "1",
"EventCode": "0xcd",
@@ -122,6 +160,19 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 2048 cycles.",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_2048",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x800",
+ "PEBS": "2",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 2048 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "23",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.",
"Data_LA": "1",
"EventCode": "0xcd",
@@ -235,5 +286,34 @@
"SampleAfterValue": "100003",
"UMask": "0x10",
"Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles where data return is pending for a Demand Data Read request who miss L3 cache.",
+ "CounterMask": "1",
+ "EventCode": "0x20",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD",
+ "PublicDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache.",
+ "EventCode": "0x20",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD",
+ "PublicDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache. Note that this does not capture all elapsed cycles while requests are outstanding - only cycles from when the requests were known by the requesting core to have missed the L3 cache.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles where the core is waiting on at least 6 outstanding demand data read requests known to have missed the L3 cache.",
+ "CounterMask": "6",
+ "EventCode": "0x20",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6",
+ "PublicDescription": "Cycles where the core is waiting on at least 6 outstanding demand data read requests known to have missed the L3 cache. Note that this event does not capture all elapsed cycles while the requests are outstanding - only cycles from when the requests were known to have missed the L3 cache.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/other.json b/tools/perf/pmu-events/arch/x86/meteorlake/other.json
index 2ec57f487525..f4c603599df4 100644
--- a/tools/perf/pmu-events/arch/x86/meteorlake/other.json
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/other.json
@@ -1,5 +1,13 @@
[
{
+ "BriefDescription": "ASSISTS.PAGE_FAULT",
+ "EventCode": "0xc1",
+ "EventName": "ASSISTS.PAGE_FAULT",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts streaming stores that have any type of response.",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.STREAMING_WR.ANY_RESPONSE",
@@ -31,6 +39,14 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "RS.EMPTY_RESOURCE",
+ "EventCode": "0xa5",
+ "EventName": "RS.EMPTY_RESOURCE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the number of issue slots in a UMWAIT or TPAUSE instruction where no uop issues due to the instruction putting the CPU into the C0.1 activity state. For Tremont, UMWAIT and TPAUSE will only put the CPU into C0.1 activity state (not C0.2 activity state)",
"EventCode": "0x75",
"EventName": "SERIALIZATION.C01_MS_SCB",
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json b/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json
index eeaa7a97f71c..352c5efafc06 100644
--- a/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json
@@ -312,6 +312,16 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "This event counts the number of mispredicted ret instructions retired. Non PEBS",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.RET",
+ "PEBS": "1",
+ "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted return instructions retired.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the number of mispredicted near RET branch instructions retired.",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.RETURN",
@@ -330,6 +340,33 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state.",
+ "EventCode": "0xec",
+ "EventName": "CPU_CLK_UNHALTED.C01",
+ "PublicDescription": "Counts core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state.",
+ "EventCode": "0xec",
+ "EventName": "CPU_CLK_UNHALTED.C02",
+ "PublicDescription": "Counts core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x20",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Core clocks when the thread is in the C0.1 or C0.2 or running a PAUSE in C0 ACPI state.",
+ "EventCode": "0xec",
+ "EventName": "CPU_CLK_UNHALTED.C0_WAIT",
+ "PublicDescription": "Counts core clocks when the thread is in the C0.1 or C0.2 power saving optimized states (TPAUSE or UMWAIT instructions) or running the PAUSE instruction.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x70",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Fixed Counter: Counts the number of unhalted core clock cycles",
"EventName": "CPU_CLK_UNHALTED.CORE",
"SampleAfterValue": "2000003",
@@ -362,6 +399,24 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "CPU_CLK_UNHALTED.PAUSE",
+ "EventCode": "0xec",
+ "EventName": "CPU_CLK_UNHALTED.PAUSE",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x40",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "CPU_CLK_UNHALTED.PAUSE_INST",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0xec",
+ "EventName": "CPU_CLK_UNHALTED.PAUSE_INST",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x40",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Core crystal clock cycles. Cycle counts are evenly distributed between active threads in the Core.",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.REF_DISTRIBUTED",
@@ -603,6 +658,15 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Retired NOP instructions.",
+ "EventCode": "0xc0",
+ "EventName": "INST_RETIRED.NOP",
+ "PublicDescription": "Counts all retired NOP or ENDBR32/64 or PREFETCHIT0/1 instructions",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Precise instruction retired with PEBS precise-distribution",
"EventName": "INST_RETIRED.PREC_DIST",
"PEBS": "1",
@@ -612,6 +676,15 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Iterations of Repeat string retired instructions.",
+ "EventCode": "0xc0",
+ "EventName": "INST_RETIRED.REP_ITERATION",
+ "PublicDescription": "Number of iterations of Repeat (REP) string retired instructions such as MOVS, CMPS, and SCAS. Each has a byte, word, and doubleword version and string instructions can be repeated using a repetition prefix, REP, that allows their architectural execution to be repeated a number of times as specified by the RCX register. Note the number of iterations is implementation-dependent.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Cycles the Backend cluster is recovering after a miss-speculation or a Store Buffer or Load Buffer drain stall.",
"CounterMask": "1",
"EventCode": "0xad",
@@ -622,6 +695,17 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Clears speculative count",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0xad",
+ "EventName": "INT_MISC.CLEARS_COUNT",
+ "PublicDescription": "Counts the number of speculative clears due to any type of branch misprediction or machine clears",
+ "SampleAfterValue": "500009",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.",
"EventCode": "0xad",
"EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
@@ -631,6 +715,15 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Cycles when Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the thread",
+ "EventCode": "0xad",
+ "EventName": "INT_MISC.RAT_STALLS",
+ "PublicDescription": "This event counts the number of cycles during which Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the current thread. This also includes the cycles during which the Allocator is serving another thread.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread",
"EventCode": "0xad",
"EventName": "INT_MISC.RECOVERY_CYCLES",
@@ -734,6 +827,15 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "False dependencies in MOB due to partial compare on address.",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.ADDRESS_ALIAS",
+ "PublicDescription": "Counts the number of times a load got blocked due to false dependencies in MOB due to partial compare on address.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the number of retired loads that are blocked because its address exactly matches an older store whose data is not ready.",
"EventCode": "0x03",
"EventName": "LD_BLOCKS.DATA_UNKNOWN",
@@ -743,6 +845,15 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.NO_SR",
+ "PublicDescription": "Counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x88",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the number of retired loads that are blocked because its address partially overlapped with an older store.",
"EventCode": "0x03",
"EventName": "LD_BLOCKS.STORE_FORWARD",
@@ -752,6 +863,15 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Loads blocked due to overlapping with a preceding store that cannot be forwarded.",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.STORE_FORWARD",
+ "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x82",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
"CounterMask": "1",
"EventCode": "0xa8",
@@ -824,6 +944,24 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Self-modifying code (SMC) detected.",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.SMC",
+ "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "LFENCE instructions retired",
+ "EventCode": "0xe0",
+ "EventName": "MISC2_RETIRED.LFENCE",
+ "PublicDescription": "number of LFENCE retired instructions",
+ "SampleAfterValue": "400009",
+ "UMask": "0x20",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts cycles where the pipeline is stalled due to serializing operations.",
"EventCode": "0xa2",
"EventName": "RESOURCE_STALLS.SCOREBOARD",
@@ -1261,6 +1399,16 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Cycles with retired uop(s).",
+ "CounterMask": "1",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.CYCLES",
+ "PublicDescription": "Counts cycles where at least one uop has retired.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Retired uops except the last uop of each instruction.",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.HEAVY",
@@ -1307,6 +1455,17 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Cycles without actually retired uops.",
+ "CounterMask": "1",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.STALLS",
+ "Invert": "1",
+ "PublicDescription": "This event counts cycles without actually retired uops.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Cycles with less than 10 actually retired uops.",
"CounterMask": "10",
"EventCode": "0xc2",
diff --git a/tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json b/tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json
index 1bb9cededa56..a0191c8b708d 100644
--- a/tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json
@@ -85,6 +85,7 @@
},
{
"BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_4k_aliasing",
@@ -319,7 +320,6 @@
},
{
"BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector",
"MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_fp_arith",
@@ -464,6 +464,7 @@
},
{
"BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
"MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
"MetricName": "tma_info_botlnk_l2_ic_misses",
@@ -497,6 +498,7 @@
},
{
"BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
"MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
"MetricName": "tma_info_bottleneck_memory_bandwidth",
@@ -574,14 +576,12 @@
},
{
"BriefDescription": "Floating Point Operations Per Cycle",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks",
"MetricGroup": "Flops;Ret",
"MetricName": "tma_info_core_flopc"
},
{
"BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@) / (2 * tma_info_core_core_clks)",
"MetricGroup": "Cor;Flops;HPC",
"MetricName": "tma_info_core_fp_arith_utilization",
@@ -933,7 +933,6 @@
},
{
"BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "tma_retiring * tma_info_thread_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
"MetricGroup": "Pipeline;Ret",
"MetricName": "tma_info_pipeline_retire"
@@ -1126,6 +1125,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
@@ -1445,6 +1445,7 @@
},
{
"BriefDescription": "This metric represents rate of split store accesses",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
"MetricName": "tma_split_stores",
@@ -1472,6 +1473,7 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/other.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/other.json
index 31b6be9fb8c7..442ef3807a9d 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/other.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/other.json
@@ -77,6 +77,24 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Counts demand data reads that were supplied by PMM attached to this socket, whether or not in Sub NUMA Cluster(SNC) Mode. In SNC Mode counts PMM accesses that are controlled by the close or distant SNC Cluster.",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.DEMAND_DATA_RD.LOCAL_SOCKET_PMM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x700C00001",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that were supplied by PMM.",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.DEMAND_DATA_RD.PMM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x703C00001",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Counts demand data reads that were supplied by DRAM attached to another socket.",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.REMOTE_DRAM",
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
index c207c851a9f9..222212abd811 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
@@ -553,7 +553,6 @@
},
{
"BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector + tma_fp_amx",
"MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_fp_arith",
@@ -717,6 +716,7 @@
},
{
"BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
"MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
"MetricName": "tma_info_botlnk_l2_ic_misses",
@@ -750,6 +750,7 @@
},
{
"BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
"MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
"MetricName": "tma_info_bottleneck_memory_bandwidth",
@@ -827,14 +828,12 @@
},
{
"BriefDescription": "Floating Point Operations Per Cycle",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + FP_ARITH_INST_RETIRED2.SCALAR_HALF + 2 * (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED2.COMPLEX_SCALAR_HALF) + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * (FP_ARITH_INST_RETIRED2.128B_PACKED_HALF + cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@) + 16 * (FP_ARITH_INST_RETIRED2.256B_PACKED_HALF + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) + 32 * FP_ARITH_INST_RETIRED2.512B_PACKED_HALF + 4 * AMX_OPS_RETIRED.BF16",
"MetricGroup": "Flops;Ret",
"MetricName": "tma_info_core_flopc"
},
{
"BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(FP_ARITH_DISPATCHED.PORT_0 + FP_ARITH_DISPATCHED.PORT_1 + FP_ARITH_DISPATCHED.PORT_5) / (2 * tma_info_core_core_clks)",
"MetricGroup": "Cor;Flops;HPC",
"MetricName": "tma_info_core_fp_arith_utilization",
@@ -1216,7 +1215,6 @@
},
{
"BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "tma_retiring * tma_info_thread_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
"MetricGroup": "Pipeline;Ret",
"MetricName": "tma_info_pipeline_retire"
@@ -1467,6 +1465,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(MEMORY_ACTIVITY.STALLS_L2_MISS - MEMORY_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
@@ -1841,6 +1840,7 @@
},
{
"BriefDescription": "This metric represents rate of split store accesses",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
"MetricName": "tma_split_stores",
@@ -1868,6 +1868,7 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
diff --git a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
index 94cb38540b5a..2795a404bb58 100644
--- a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
@@ -923,7 +923,7 @@
},
{
"BriefDescription": "Average number of parallel data read requests to external memory",
- "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.DATA_READ / UNC_ARB_TRK_OCCUPANCY.DATA_READ@thresh\\=1@",
+ "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.DATA_READ / UNC_ARB_TRK_OCCUPANCY.DATA_READ@cmask\\=1@",
"MetricGroup": "Mem;MemoryBW;SoC",
"MetricName": "tma_info_system_mem_parallel_reads",
"PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches"
diff --git a/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json b/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json
index c7c2d6ab1a93..fab084e1bc69 100644
--- a/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json
@@ -79,6 +79,7 @@
},
{
"BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_4k_aliasing",
@@ -313,7 +314,6 @@
},
{
"BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector",
"MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_fp_arith",
@@ -458,6 +458,7 @@
},
{
"BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
"MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
"MetricName": "tma_info_botlnk_l2_ic_misses",
@@ -491,6 +492,7 @@
},
{
"BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
"MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
"MetricName": "tma_info_bottleneck_memory_bandwidth",
@@ -568,14 +570,12 @@
},
{
"BriefDescription": "Floating Point Operations Per Cycle",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks",
"MetricGroup": "Flops;Ret",
"MetricName": "tma_info_core_flopc"
},
{
"BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@) / (2 * tma_info_core_core_clks)",
"MetricGroup": "Cor;Flops;HPC",
"MetricName": "tma_info_core_fp_arith_utilization",
@@ -927,7 +927,6 @@
},
{
"BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "tma_retiring * tma_info_thread_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
"MetricGroup": "Pipeline;Ret",
"MetricName": "tma_info_pipeline_retire"
@@ -1114,6 +1113,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
@@ -1433,6 +1433,7 @@
},
{
"BriefDescription": "This metric represents rate of split store accesses",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
"MetricName": "tma_split_stores",
@@ -1460,6 +1461,7 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
diff --git a/tools/perf/pmu-events/empty-pmu-events.c b/tools/perf/pmu-events/empty-pmu-events.c
index a630c617e879..12bd043a05e3 100644
--- a/tools/perf/pmu-events/empty-pmu-events.c
+++ b/tools/perf/pmu-events/empty-pmu-events.c
@@ -266,19 +266,53 @@ static const struct pmu_sys_events pmu_sys_event_tables[] = {
},
};
-int pmu_events_table_for_each_event(const struct pmu_events_table *table, pmu_event_iter_fn fn,
- void *data)
+int pmu_events_table__for_each_event(const struct pmu_events_table *table, struct perf_pmu *pmu,
+ pmu_event_iter_fn fn, void *data)
{
for (const struct pmu_event *pe = &table->entries[0]; pe->name; pe++) {
- int ret = fn(pe, table, data);
+ int ret;
+ if (pmu && !pmu__name_match(pmu, pe->pmu))
+ continue;
+
+ ret = fn(pe, table, data);
if (ret)
return ret;
}
return 0;
}
-int pmu_metrics_table_for_each_metric(const struct pmu_metrics_table *table, pmu_metric_iter_fn fn,
+int pmu_events_table__find_event(const struct pmu_events_table *table,
+ struct perf_pmu *pmu,
+ const char *name,
+ pmu_event_iter_fn fn,
+ void *data)
+{
+ for (const struct pmu_event *pe = &table->entries[0]; pe->name; pe++) {
+ if (pmu && !pmu__name_match(pmu, pe->pmu))
+ continue;
+
+ if (!strcasecmp(pe->name, name))
+ return fn(pe, table, data);
+ }
+ return -1000;
+}
+
+size_t pmu_events_table__num_events(const struct pmu_events_table *table,
+ struct perf_pmu *pmu)
+{
+ size_t count = 0;
+
+ for (const struct pmu_event *pe = &table->entries[0]; pe->name; pe++) {
+ if (pmu && !pmu__name_match(pmu, pe->pmu))
+ continue;
+
+ count++;
+ }
+ return count;
+}
+
+int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table, pmu_metric_iter_fn fn,
void *data)
{
for (const struct pmu_metric *pm = &table->entries[0]; pm->metric_expr; pm++) {
@@ -371,7 +405,8 @@ const struct pmu_metrics_table *find_core_metrics_table(const char *arch, const
int pmu_for_each_core_event(pmu_event_iter_fn fn, void *data)
{
for (const struct pmu_events_map *tables = &pmu_events_map[0]; tables->arch; tables++) {
- int ret = pmu_events_table_for_each_event(&tables->event_table, fn, data);
+ int ret = pmu_events_table__for_each_event(&tables->event_table,
+ /*pmu=*/ NULL, fn, data);
if (ret)
return ret;
@@ -384,7 +419,7 @@ int pmu_for_each_core_metric(pmu_metric_iter_fn fn, void *data)
for (const struct pmu_events_map *tables = &pmu_events_map[0];
tables->arch;
tables++) {
- int ret = pmu_metrics_table_for_each_metric(&tables->metric_table, fn, data);
+ int ret = pmu_metrics_table__for_each_metric(&tables->metric_table, fn, data);
if (ret)
return ret;
@@ -408,7 +443,7 @@ int pmu_for_each_sys_event(pmu_event_iter_fn fn, void *data)
for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
tables->name;
tables++) {
- int ret = pmu_events_table_for_each_event(&tables->table, fn, data);
+ int ret = pmu_events_table__for_each_event(&tables->table, /*pmu=*/ NULL, fn, data);
if (ret)
return ret;
diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py
index 12e80bb7939b..72ba4a9239c6 100755
--- a/tools/perf/pmu-events/jevents.py
+++ b/tools/perf/pmu-events/jevents.py
@@ -42,7 +42,7 @@ _metricgroups = {}
# Order specific JsonEvent attributes will be visited.
_json_event_attributes = [
# cmp_sevent related attributes.
- 'name', 'pmu', 'topic', 'desc',
+ 'name', 'topic', 'desc',
# Seems useful, put it early.
'event',
# Short things in alphabetical order.
@@ -53,7 +53,7 @@ _json_event_attributes = [
# Attributes that are in pmu_metric rather than pmu_event.
_json_metric_attributes = [
- 'pmu', 'metric_name', 'metric_group', 'metric_expr', 'metric_threshold',
+ 'metric_name', 'metric_group', 'metric_expr', 'metric_threshold',
'desc', 'long_desc', 'unit', 'compat', 'metricgroup_no_group',
'default_metricgroup_name', 'aggr_mode', 'event_grouping'
]
@@ -113,13 +113,24 @@ class BigCString:
strings: Set[str]
big_string: Sequence[str]
offsets: Dict[str, int]
+ insert_number: int
+ insert_point: Dict[str, int]
+ metrics: Set[str]
def __init__(self):
self.strings = set()
+ self.insert_number = 0;
+ self.insert_point = {}
+ self.metrics = set()
- def add(self, s: str) -> None:
+ def add(self, s: str, metric: bool) -> None:
"""Called to add to the big string."""
- self.strings.add(s)
+ if s not in self.strings:
+ self.strings.add(s)
+ self.insert_point[s] = self.insert_number
+ self.insert_number += 1
+ if metric:
+ self.metrics.add(s)
def compute(self) -> None:
"""Called once all strings are added to compute the string and offsets."""
@@ -160,8 +171,11 @@ class BigCString:
self.big_string = []
self.offsets = {}
+ def string_cmp_key(s: str) -> Tuple[bool, int, str]:
+ return (s in self.metrics, self.insert_point[s], s)
+
# Emit all strings that aren't folded in a sorted manner.
- for s in sorted(self.strings):
+ for s in sorted(self.strings, key=string_cmp_key):
if s not in folded_strings:
self.offsets[s] = big_string_offset
self.big_string.append(f'/* offset={big_string_offset} */ "')
@@ -252,7 +266,7 @@ class JsonEvent:
def unit_to_pmu(unit: str) -> Optional[str]:
"""Convert a JSON Unit to Linux PMU name."""
if not unit:
- return None
+ return 'default_core'
# Comment brought over from jevents.c:
# it's not realistic to keep adding these, we need something more scalable ...
table = {
@@ -274,6 +288,7 @@ class JsonEvent:
'DFPMC': 'amd_df',
'cpu_core': 'cpu_core',
'cpu_atom': 'cpu_atom',
+ 'ali_drw': 'ali_drw',
}
return table[unit] if unit in table else f'uncore_{unit.lower()}'
@@ -342,16 +357,15 @@ class JsonEvent:
self.desc += extra_desc
if self.long_desc and extra_desc:
self.long_desc += extra_desc
- if self.pmu:
- if self.desc and not self.desc.endswith('. '):
- self.desc += '. '
- self.desc = (self.desc if self.desc else '') + ('Unit: ' + self.pmu + ' ')
- if arch_std and arch_std.lower() in _arch_std_events:
- event = _arch_std_events[arch_std.lower()].event
- # Copy from the architecture standard event to self for undefined fields.
- for attr, value in _arch_std_events[arch_std.lower()].__dict__.items():
- if hasattr(self, attr) and not getattr(self, attr):
- setattr(self, attr, value)
+ if arch_std:
+ if arch_std.lower() in _arch_std_events:
+ event = _arch_std_events[arch_std.lower()].event
+ # Copy from the architecture standard event to self for undefined fields.
+ for attr, value in _arch_std_events[arch_std.lower()].__dict__.items():
+ if hasattr(self, attr) and not getattr(self, attr):
+ setattr(self, attr, value)
+ else:
+ raise argparse.ArgumentTypeError('Cannot find arch std event:', arch_std)
self.event = real_event(self.name, event)
@@ -433,13 +447,13 @@ def add_events_table_entries(item: os.DirEntry, topic: str) -> None:
def print_pending_events() -> None:
"""Optionally close events table."""
- def event_cmp_key(j: JsonEvent) -> Tuple[bool, str, str, str, str]:
+ def event_cmp_key(j: JsonEvent) -> Tuple[str, str, bool, str, str]:
def fix_none(s: Optional[str]) -> str:
if s is None:
return ''
return s
- return (j.desc is not None, fix_none(j.topic), fix_none(j.name), fix_none(j.pmu),
+ return (fix_none(j.pmu).replace(',','_'), fix_none(j.name), j.desc is not None, fix_none(j.topic),
fix_none(j.metric_name))
global _pending_events
@@ -454,13 +468,36 @@ def print_pending_events() -> None:
global event_tables
_event_tables.append(_pending_events_tblname)
- _args.output_file.write(
- f'static const struct compact_pmu_event {_pending_events_tblname}[] = {{\n')
-
+ first = True
+ last_pmu = None
+ pmus = set()
for event in sorted(_pending_events, key=event_cmp_key):
+ if event.pmu != last_pmu:
+ if not first:
+ _args.output_file.write('};\n')
+ pmu_name = event.pmu.replace(',', '_')
+ _args.output_file.write(
+ f'static const struct compact_pmu_event {_pending_events_tblname}_{pmu_name}[] = {{\n')
+ first = False
+ last_pmu = event.pmu
+ pmus.add((event.pmu, pmu_name))
+
_args.output_file.write(event.to_c_string(metric=False))
_pending_events = []
+ _args.output_file.write(f"""
+}};
+
+const struct pmu_table_entry {_pending_events_tblname}[] = {{
+""")
+ for (pmu, tbl_pmu) in sorted(pmus):
+ pmu_name = f"{pmu}\\000"
+ _args.output_file.write(f"""{{
+ .entries = {_pending_events_tblname}_{tbl_pmu},
+ .num_entries = ARRAY_SIZE({_pending_events_tblname}_{tbl_pmu}),
+ .pmu_name = {{ {_bcs.offsets[pmu_name]} /* {pmu_name} */ }},
+}},
+""")
_args.output_file.write('};\n\n')
def print_pending_metrics() -> None:
@@ -486,13 +523,36 @@ def print_pending_metrics() -> None:
global metric_tables
_metric_tables.append(_pending_metrics_tblname)
- _args.output_file.write(
- f'static const struct compact_pmu_event {_pending_metrics_tblname}[] = {{\n')
-
+ first = True
+ last_pmu = None
+ pmus = set()
for metric in sorted(_pending_metrics, key=metric_cmp_key):
+ if metric.pmu != last_pmu:
+ if not first:
+ _args.output_file.write('};\n')
+ pmu_name = metric.pmu.replace(',', '_')
+ _args.output_file.write(
+ f'static const struct compact_pmu_event {_pending_metrics_tblname}_{pmu_name}[] = {{\n')
+ first = False
+ last_pmu = metric.pmu
+ pmus.add((metric.pmu, pmu_name))
+
_args.output_file.write(metric.to_c_string(metric=True))
_pending_metrics = []
+ _args.output_file.write(f"""
+}};
+
+const struct pmu_table_entry {_pending_metrics_tblname}[] = {{
+""")
+ for (pmu, tbl_pmu) in sorted(pmus):
+ pmu_name = f"{pmu}\\000"
+ _args.output_file.write(f"""{{
+ .entries = {_pending_metrics_tblname}_{tbl_pmu},
+ .num_entries = ARRAY_SIZE({_pending_metrics_tblname}_{tbl_pmu}),
+ .pmu_name = {{ {_bcs.offsets[pmu_name]} /* {pmu_name} */ }},
+}},
+""")
_args.output_file.write('};\n\n')
def get_topic(topic: str) -> str:
@@ -521,17 +581,20 @@ def preprocess_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
assert len(mgroup) > 1, parents
description = f"{metricgroup_descriptions[mgroup]}\\000"
mgroup = f"{mgroup}\\000"
- _bcs.add(mgroup)
- _bcs.add(description)
+ _bcs.add(mgroup, metric=True)
+ _bcs.add(description, metric=True)
_metricgroups[mgroup] = description
return
topic = get_topic(item.name)
for event in read_json_events(item.path, topic):
+ pmu_name = f"{event.pmu}\\000"
if event.name:
- _bcs.add(event.build_c_string(metric=False))
+ _bcs.add(pmu_name, metric=False)
+ _bcs.add(event.build_c_string(metric=False), metric=False)
if event.metric_name:
- _bcs.add(event.build_c_string(metric=True))
+ _bcs.add(pmu_name, metric=True)
+ _bcs.add(event.build_c_string(metric=True), metric=True)
def process_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
"""Process a JSON file during the main walk."""
@@ -573,14 +636,14 @@ def print_mapping_table(archs: Sequence[str]) -> None:
_args.output_file.write("""
/* Struct used to make the PMU event table implementation opaque to callers. */
struct pmu_events_table {
- const struct compact_pmu_event *entries;
- size_t length;
+ const struct pmu_table_entry *pmus;
+ uint32_t num_pmus;
};
/* Struct used to make the PMU metric table implementation opaque to callers. */
struct pmu_metrics_table {
- const struct compact_pmu_event *entries;
- size_t length;
+ const struct pmu_table_entry *pmus;
+ uint32_t num_pmus;
};
/*
@@ -610,12 +673,12 @@ const struct pmu_events_map pmu_events_map[] = {
\t.arch = "testarch",
\t.cpuid = "testcpu",
\t.event_table = {
-\t\t.entries = pmu_events__test_soc_cpu,
-\t\t.length = ARRAY_SIZE(pmu_events__test_soc_cpu),
+\t\t.pmus = pmu_events__test_soc_cpu,
+\t\t.num_pmus = ARRAY_SIZE(pmu_events__test_soc_cpu),
\t},
\t.metric_table = {
-\t\t.entries = pmu_metrics__test_soc_cpu,
-\t\t.length = ARRAY_SIZE(pmu_metrics__test_soc_cpu),
+\t\t.pmus = pmu_metrics__test_soc_cpu,
+\t\t.num_pmus = ARRAY_SIZE(pmu_metrics__test_soc_cpu),
\t}
},
""")
@@ -645,12 +708,12 @@ const struct pmu_events_map pmu_events_map[] = {
\t.arch = "{arch}",
\t.cpuid = "{cpuid}",
\t.event_table = {{
-\t\t.entries = {event_tblname},
-\t\t.length = {event_size}
+\t\t.pmus = {event_tblname},
+\t\t.num_pmus = {event_size}
\t}},
\t.metric_table = {{
-\t\t.entries = {metric_tblname},
-\t\t.length = {metric_size}
+\t\t.pmus = {metric_tblname},
+\t\t.num_pmus = {metric_size}
\t}}
}},
""")
@@ -681,15 +744,15 @@ static const struct pmu_sys_events pmu_sys_event_tables[] = {
for tblname in _sys_event_tables:
_args.output_file.write(f"""\t{{
\t\t.event_table = {{
-\t\t\t.entries = {tblname},
-\t\t\t.length = ARRAY_SIZE({tblname})
+\t\t\t.pmus = {tblname},
+\t\t\t.num_pmus = ARRAY_SIZE({tblname})
\t\t}},""")
metric_tblname = _sys_event_table_to_metric_table_mapping[tblname]
if metric_tblname in _sys_metric_tables:
_args.output_file.write(f"""
\t\t.metric_table = {{
-\t\t\t.entries = {metric_tblname},
-\t\t\t.length = ARRAY_SIZE({metric_tblname})
+\t\t\t.pmus = {metric_tblname},
+\t\t\t.num_pmus = ARRAY_SIZE({metric_tblname})
\t\t}},""")
printed_metric_tables.append(metric_tblname)
_args.output_file.write(f"""
@@ -749,15 +812,18 @@ static void decompress_metric(int offset, struct pmu_metric *pm)
_args.output_file.write('\twhile (*p++);')
_args.output_file.write("""}
-int pmu_events_table_for_each_event(const struct pmu_events_table *table,
- pmu_event_iter_fn fn,
- void *data)
+static int pmu_events_table__for_each_event_pmu(const struct pmu_events_table *table,
+ const struct pmu_table_entry *pmu,
+ pmu_event_iter_fn fn,
+ void *data)
{
- for (size_t i = 0; i < table->length; i++) {
- struct pmu_event pe;
- int ret;
+ int ret;
+ struct pmu_event pe = {
+ .pmu = &big_c_string[pmu->pmu_name.offset],
+ };
- decompress_event(table->entries[i].offset, &pe);
+ for (uint32_t i = 0; i < pmu->num_entries; i++) {
+ decompress_event(pmu->entries[i].offset, &pe);
if (!pe.name)
continue;
ret = fn(&pe, table, data);
@@ -765,17 +831,119 @@ int pmu_events_table_for_each_event(const struct pmu_events_table *table,
return ret;
}
return 0;
+ }
+
+static int pmu_events_table__find_event_pmu(const struct pmu_events_table *table,
+ const struct pmu_table_entry *pmu,
+ const char *name,
+ pmu_event_iter_fn fn,
+ void *data)
+{
+ struct pmu_event pe = {
+ .pmu = &big_c_string[pmu->pmu_name.offset],
+ };
+ int low = 0, high = pmu->num_entries - 1;
+
+ while (low <= high) {
+ int cmp, mid = (low + high) / 2;
+
+ decompress_event(pmu->entries[mid].offset, &pe);
+
+ if (!pe.name && !name)
+ goto do_call;
+
+ if (!pe.name && name) {
+ low = mid + 1;
+ continue;
+ }
+ if (pe.name && !name) {
+ high = mid - 1;
+ continue;
+ }
+
+ cmp = strcasecmp(pe.name, name);
+ if (cmp < 0) {
+ low = mid + 1;
+ continue;
+ }
+ if (cmp > 0) {
+ high = mid - 1;
+ continue;
+ }
+ do_call:
+ return fn ? fn(&pe, table, data) : 0;
+ }
+ return -1000;
}
-int pmu_metrics_table_for_each_metric(const struct pmu_metrics_table *table,
- pmu_metric_iter_fn fn,
- void *data)
+int pmu_events_table__for_each_event(const struct pmu_events_table *table,
+ struct perf_pmu *pmu,
+ pmu_event_iter_fn fn,
+ void *data)
+{
+ for (size_t i = 0; i < table->num_pmus; i++) {
+ const struct pmu_table_entry *table_pmu = &table->pmus[i];
+ const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
+ int ret;
+
+ if (pmu && !pmu__name_match(pmu, pmu_name))
+ continue;
+
+ ret = pmu_events_table__for_each_event_pmu(table, table_pmu, fn, data);
+ if (pmu || ret)
+ return ret;
+ }
+ return 0;
+}
+
+int pmu_events_table__find_event(const struct pmu_events_table *table,
+ struct perf_pmu *pmu,
+ const char *name,
+ pmu_event_iter_fn fn,
+ void *data)
{
- for (size_t i = 0; i < table->length; i++) {
- struct pmu_metric pm;
+ for (size_t i = 0; i < table->num_pmus; i++) {
+ const struct pmu_table_entry *table_pmu = &table->pmus[i];
+ const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
int ret;
- decompress_metric(table->entries[i].offset, &pm);
+ if (!pmu__name_match(pmu, pmu_name))
+ continue;
+
+ ret = pmu_events_table__find_event_pmu(table, table_pmu, name, fn, data);
+ if (ret != -1000)
+ return ret;
+ }
+ return -1000;
+}
+
+size_t pmu_events_table__num_events(const struct pmu_events_table *table,
+ struct perf_pmu *pmu)
+{
+ size_t count = 0;
+
+ for (size_t i = 0; i < table->num_pmus; i++) {
+ const struct pmu_table_entry *table_pmu = &table->pmus[i];
+ const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
+
+ if (pmu__name_match(pmu, pmu_name))
+ count += table_pmu->num_entries;
+ }
+ return count;
+}
+
+static int pmu_metrics_table__for_each_metric_pmu(const struct pmu_metrics_table *table,
+ const struct pmu_table_entry *pmu,
+ pmu_metric_iter_fn fn,
+ void *data)
+{
+ int ret;
+ struct pmu_metric pm = {
+ .pmu = &big_c_string[pmu->pmu_name.offset],
+ };
+
+ for (uint32_t i = 0; i < pmu->num_entries; i++) {
+ decompress_metric(pmu->entries[i].offset, &pm);
if (!pm.metric_expr)
continue;
ret = fn(&pm, table, data);
@@ -785,11 +953,25 @@ int pmu_metrics_table_for_each_metric(const struct pmu_metrics_table *table,
return 0;
}
+int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table,
+ pmu_metric_iter_fn fn,
+ void *data)
+{
+ for (size_t i = 0; i < table->num_pmus; i++) {
+ int ret = pmu_metrics_table__for_each_metric_pmu(table, &table->pmus[i],
+ fn, data);
+
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu)
{
const struct pmu_events_table *table = NULL;
char *cpuid = perf_pmu__getcpuid(pmu);
- int i;
+ size_t i;
/* on some platforms which uses cpus map, cpuid can be NULL for
* PMUs other than CORE PMUs.
@@ -809,7 +991,17 @@ const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu)
}
}
free(cpuid);
- return table;
+ if (!pmu || !table)
+ return table;
+
+ for (i = 0; i < table->num_pmus; i++) {
+ const struct pmu_table_entry *table_pmu = &table->pmus[i];
+ const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
+
+ if (pmu__name_match(pmu, pmu_name))
+ return table;
+ }
+ return NULL;
}
const struct pmu_metrics_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu)
@@ -866,7 +1058,8 @@ int pmu_for_each_core_event(pmu_event_iter_fn fn, void *data)
for (const struct pmu_events_map *tables = &pmu_events_map[0];
tables->arch;
tables++) {
- int ret = pmu_events_table_for_each_event(&tables->event_table, fn, data);
+ int ret = pmu_events_table__for_each_event(&tables->event_table,
+ /*pmu=*/ NULL, fn, data);
if (ret)
return ret;
@@ -879,7 +1072,7 @@ int pmu_for_each_core_metric(pmu_metric_iter_fn fn, void *data)
for (const struct pmu_events_map *tables = &pmu_events_map[0];
tables->arch;
tables++) {
- int ret = pmu_metrics_table_for_each_metric(&tables->metric_table, fn, data);
+ int ret = pmu_metrics_table__for_each_metric(&tables->metric_table, fn, data);
if (ret)
return ret;
@@ -903,7 +1096,8 @@ int pmu_for_each_sys_event(pmu_event_iter_fn fn, void *data)
for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
tables->name;
tables++) {
- int ret = pmu_events_table_for_each_event(&tables->event_table, fn, data);
+ int ret = pmu_events_table__for_each_event(&tables->event_table,
+ /*pmu=*/ NULL, fn, data);
if (ret)
return ret;
@@ -916,7 +1110,7 @@ int pmu_for_each_sys_metric(pmu_metric_iter_fn fn, void *data)
for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
tables->name;
tables++) {
- int ret = pmu_metrics_table_for_each_metric(&tables->metric_table, fn, data);
+ int ret = pmu_metrics_table__for_each_metric(&tables->metric_table, fn, data);
if (ret)
return ret;
@@ -999,14 +1193,20 @@ such as "arm/cortex-a34".''',
_args = ap.parse_args()
_args.output_file.write("""
-#include "pmu-events/pmu-events.h"
+#include <pmu-events/pmu-events.h>
#include "util/header.h"
#include "util/pmu.h"
#include <string.h>
#include <stddef.h>
struct compact_pmu_event {
- int offset;
+ int offset;
+};
+
+struct pmu_table_entry {
+ const struct compact_pmu_event *entries;
+ uint32_t num_entries;
+ struct compact_pmu_event pmu_name;
};
""")
diff --git a/tools/perf/pmu-events/metric.py b/tools/perf/pmu-events/metric.py
index 85a3545f5b6a..3e673f25d5fd 100644
--- a/tools/perf/pmu-events/metric.py
+++ b/tools/perf/pmu-events/metric.py
@@ -413,6 +413,10 @@ def has_event(event: Event) -> Function:
# pylint: disable=invalid-name
return Function('has_event', event)
+def strcmp_cpuid_str(cpuid: Event) -> Function:
+ # pylint: disable=redefined-builtin
+ # pylint: disable=invalid-name
+ return Function('strcmp_cpuid_str', cpuid)
class Metric:
"""An individual metric that will specifiable on the perf command line."""
@@ -541,14 +545,23 @@ def ParsePerfJson(orig: str) -> Expression:
"""
# pylint: disable=eval-used
py = orig.strip()
+ # First try to convert everything that looks like a string (event name) into Event(r"EVENT_NAME").
+ # This isn't very selective so is followed up by converting some unwanted conversions back again
py = re.sub(r'([a-zA-Z][^-+/\* \\\(\),]*(?:\\.[^-+/\* \\\(\),]*)*)',
r'Event(r"\1")', py)
+ # If it started with a # it should have been a literal, rather than an event name
py = re.sub(r'#Event\(r"([^"]*)"\)', r'Literal("#\1")', py)
+ # Convert accidentally converted hex constants ("0Event(r"xDEADBEEF)"") back to a constant,
+ # but keep it wrapped in Event(), otherwise Python drops the 0x prefix and it gets interpreted as
+ # a double by the Bison parser
+ py = re.sub(r'0Event\(r"[xX]([0-9a-fA-F]*)"\)', r'Event("0x\1")', py)
+ # Convert accidentally converted scientific notation constants back
py = re.sub(r'([0-9]+)Event\(r"(e[0-9]+)"\)', r'\1\2', py)
- keywords = ['if', 'else', 'min', 'max', 'd_ratio', 'source_count', 'has_event']
+ # Convert all the known keywords back from events to just the keyword
+ keywords = ['if', 'else', 'min', 'max', 'd_ratio', 'source_count', 'has_event', 'strcmp_cpuid_str',
+ 'cpuid_not_more_than']
for kw in keywords:
py = re.sub(rf'Event\(r"{kw}"\)', kw, py)
-
try:
parsed = ast.parse(py, mode='eval')
except SyntaxError as e:
diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h
index caf59f23cd64..f5aa96f1685c 100644
--- a/tools/perf/pmu-events/pmu-events.h
+++ b/tools/perf/pmu-events/pmu-events.h
@@ -3,6 +3,7 @@
#define PMU_EVENTS_H
#include <stdbool.h>
+#include <stddef.h>
struct perf_pmu;
@@ -77,9 +78,19 @@ typedef int (*pmu_metric_iter_fn)(const struct pmu_metric *pm,
const struct pmu_metrics_table *table,
void *data);
-int pmu_events_table_for_each_event(const struct pmu_events_table *table, pmu_event_iter_fn fn,
+int pmu_events_table__for_each_event(const struct pmu_events_table *table,
+ struct perf_pmu *pmu,
+ pmu_event_iter_fn fn,
void *data);
-int pmu_metrics_table_for_each_metric(const struct pmu_metrics_table *table, pmu_metric_iter_fn fn,
+int pmu_events_table__find_event(const struct pmu_events_table *table,
+ struct perf_pmu *pmu,
+ const char *name,
+ pmu_event_iter_fn fn,
+ void *data);
+size_t pmu_events_table__num_events(const struct pmu_events_table *table,
+ struct perf_pmu *pmu);
+
+int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table, pmu_metric_iter_fn fn,
void *data);
const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu);