summaryrefslogtreecommitdiff
path: root/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json')
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json91
1 files changed, 55 insertions, 36 deletions
diff --git a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
index 554f87c03c05..cb1420df3768 100644
--- a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
@@ -65,7 +65,7 @@
},
{
"BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
- "MetricExpr": "(UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ((INT_MISC.RECOVERY_CYCLES_ANY / 2) if #SMT_on else INT_MISC.RECOVERY_CYCLES)) / SLOTS",
+ "MetricExpr": "(UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (INT_MISC.RECOVERY_CYCLES_ANY / 2 if #SMT_on else INT_MISC.RECOVERY_CYCLES)) / SLOTS",
"MetricGroup": "TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
@@ -73,7 +73,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction",
- "MetricExpr": "(BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT)) * tma_bad_speculation",
+ "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT) * tma_bad_speculation",
"MetricGroup": "BadSpec;BrMispredicts;TopdownL2;tma_L2_group;tma_bad_speculation_group",
"MetricName": "tma_branch_mispredicts",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES",
@@ -97,7 +97,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck",
- "MetricExpr": "((min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_L1D_PENDING) + RESOURCE_STALLS.SB) / (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_DISPATCH) + cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=1@ - cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=3@ if (IPC > 1.8) else cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=2@ - RS_EVENTS.EMPTY_CYCLES if (tma_fetch_latency > 0.1) else RESOURCE_STALLS.SB)) * tma_backend_bound",
+ "MetricExpr": "(min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_L1D_PENDING) + RESOURCE_STALLS.SB) / (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_DISPATCH) + cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=1@ - cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=3@ if IPC > 1.8 else (cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=2@ - RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else RESOURCE_STALLS.SB)) * tma_backend_bound",
"MetricGroup": "Backend;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).",
@@ -113,7 +113,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
- "MetricExpr": "(MEM_LOAD_UOPS_RETIRED.LLC_HIT / (MEM_LOAD_UOPS_RETIRED.LLC_HIT + 7 * MEM_LOAD_UOPS_RETIRED.LLC_MISS)) * CYCLE_ACTIVITY.STALLS_L2_PENDING / CLKS",
+ "MetricExpr": "MEM_LOAD_UOPS_RETIRED.LLC_HIT / (MEM_LOAD_UOPS_RETIRED.LLC_HIT + 7 * MEM_LOAD_UOPS_RETIRED.LLC_MISS) * CYCLE_ACTIVITY.STALLS_L2_PENDING / CLKS",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
"PublicDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core. Avoiding cache misses (i.e. L2 misses/L3 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_UOPS_RETIRED.L3_HIT_PS",
@@ -121,7 +121,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
- "MetricExpr": "(1 - (MEM_LOAD_UOPS_RETIRED.LLC_HIT / (MEM_LOAD_UOPS_RETIRED.LLC_HIT + 7 * MEM_LOAD_UOPS_RETIRED.LLC_MISS))) * CYCLE_ACTIVITY.STALLS_L2_PENDING / CLKS",
+ "MetricExpr": "(1 - MEM_LOAD_UOPS_RETIRED.LLC_HIT / (MEM_LOAD_UOPS_RETIRED.LLC_HIT + 7 * MEM_LOAD_UOPS_RETIRED.LLC_MISS)) * CYCLE_ACTIVITY.STALLS_L2_PENDING / CLKS",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
"PublicDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads. Better caching can improve the latency and increase performance. Sample with: MEM_LOAD_UOPS_RETIRED.L3_MISS_PS",
@@ -169,7 +169,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
- "MetricExpr": "((min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_DISPATCH) + cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=1@ - cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=3@ if (IPC > 1.8) else cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=2@ - RS_EVENTS.EMPTY_CYCLES if (tma_fetch_latency > 0.1) else RESOURCE_STALLS.SB) - RESOURCE_STALLS.SB - min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_L1D_PENDING)) / CLKS",
+ "MetricExpr": "((min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_DISPATCH) + cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=1@ - cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=3@ if IPC > 1.8 else (cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=2@ - RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else RESOURCE_STALLS.SB)) - RESOURCE_STALLS.SB - min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_L1D_PENDING)) / CLKS",
"MetricGroup": "PortsUtil;TopdownL3;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
"PublicDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related). Two distinct categories can be attributed into this metric: (1) heavy data-dependency among contiguous instructions would manifest in this metric - such cases are often referred to as low Instruction Level Parallelism (ILP). (2) Contention on some hardware execution unit other than Divider. For example; when there are too many multiply operations.",
@@ -233,7 +233,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit",
- "MetricExpr": "(UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) * IDQ.MS_UOPS / SLOTS",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * IDQ.MS_UOPS / SLOTS",
"MetricGroup": "MicroSeq;TopdownL3;tma_heavy_operations_group",
"MetricName": "tma_microcode_sequencer",
"PublicDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit. The MS is used for CISC instructions not supported by the default decoders (like repeat move strings; or CPUID); or by microcode assists used to address some operation modes (like in Floating Point assists). These cases can often be avoided. Sample with: IDQ.MS_UOPS",
@@ -284,19 +284,19 @@
},
{
"BriefDescription": "Floating Point Operations Per Cycle",
- "MetricExpr": "(1 * (FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * (FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE) + 8 * SIMD_FP_256.PACKED_SINGLE) / CORE_CLKS",
+ "MetricExpr": "(FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * (FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE) + 8 * SIMD_FP_256.PACKED_SINGLE) / CORE_CLKS",
"MetricGroup": "Flops;Ret",
"MetricName": "FLOPc"
},
{
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
- "MetricExpr": "UOPS_DISPATCHED.THREAD / ((cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@)",
+ "MetricExpr": "UOPS_DISPATCHED.THREAD / (cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2 if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@)",
"MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
"MetricName": "ILP"
},
{
"BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
- "MetricExpr": "((CPU_CLK_UNHALTED.THREAD / 2) * (1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK)) if #core_wide < 1 else (CPU_CLK_UNHALTED.THREAD_ANY / 2) if #SMT_on else CLKS",
+ "MetricExpr": "(CPU_CLK_UNHALTED.THREAD / 2 * (1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK) if #core_wide < 1 else (CPU_CLK_UNHALTED.THREAD_ANY / 2 if #SMT_on else CLKS))",
"MetricGroup": "SMT",
"MetricName": "CORE_CLKS"
},
@@ -314,25 +314,25 @@
},
{
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
- "MetricExpr": "IDQ.DSB_UOPS / ((IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS))",
+ "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
"MetricGroup": "DSB;Fed;FetchBW",
"MetricName": "DSB_Coverage"
},
{
"BriefDescription": "Average CPU Utilization",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
"MetricGroup": "HPC;Summary",
"MetricName": "CPU_Utilization"
},
{
"BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
- "MetricExpr": "Turbo_Utilization * msr@tsc@ / 1000000000 / duration_time",
+ "MetricExpr": "Turbo_Utilization * TSC / 1e9 / duration_time",
"MetricGroup": "Power;Summary",
"MetricName": "Average_Frequency"
},
{
"BriefDescription": "Giga Floating Point Operations Per Second",
- "MetricExpr": "((1 * (FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * (FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE) + 8 * SIMD_FP_256.PACKED_SINGLE) / 1000000000) / duration_time",
+ "MetricExpr": "(FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * (FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE) + 8 * SIMD_FP_256.PACKED_SINGLE) / 1e9 / duration_time",
"MetricGroup": "Cor;Flops;HPC",
"MetricName": "GFLOPs",
"PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
@@ -345,7 +345,7 @@
},
{
"BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
- "MetricExpr": "1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / (CPU_CLK_UNHALTED.REF_XCLK_ANY / 2) if #SMT_on else 0",
+ "MetricExpr": "(1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / (CPU_CLK_UNHALTED.REF_XCLK_ANY / 2) if #SMT_on else 0)",
"MetricGroup": "SMT",
"MetricName": "SMT_2T_Utilization"
},
@@ -363,11 +363,23 @@
},
{
"BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
- "MetricExpr": "(64 * (uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@) / 1000000000) / duration_time",
+ "MetricExpr": "64 * (UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) / 1e9 / duration_time",
"MetricGroup": "HPC;Mem;MemoryBW;SoC",
"MetricName": "DRAM_BW_Use"
},
{
+ "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches",
+ "MetricExpr": "1e9 * (UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182@ / UNC_C_TOR_INSERTS.MISS_OPCODE@filter_opc\\=0x182@) / (Socket_CLKS / duration_time)",
+ "MetricGroup": "Mem;MemoryLat;SoC",
+ "MetricName": "MEM_Read_Latency"
+ },
+ {
+ "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches",
+ "MetricExpr": "UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182@ / UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182\\,thresh\\=1@",
+ "MetricGroup": "Mem;MemoryBW;SoC",
+ "MetricName": "MEM_Parallel_Reads"
+ },
+ {
"BriefDescription": "Socket actual clocks when any core is active on that socket",
"MetricExpr": "cbox_0@event\\=0x0@",
"MetricGroup": "SoC",
@@ -380,51 +392,58 @@
"MetricName": "IpFarBranch"
},
{
+ "BriefDescription": "Uncore frequency per die [GHZ]",
+ "MetricExpr": "Socket_CLKS / #num_dies / duration_time / 1e9",
+ "MetricGroup": "SoC",
+ "MetricName": "UNCORE_FREQ"
+ },
+ {
"BriefDescription": "C3 residency percent per core",
- "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricExpr": "cstate_core@c3\\-residency@ / TSC",
"MetricGroup": "Power",
- "MetricName": "C3_Core_Residency"
+ "MetricName": "C3_Core_Residency",
+ "ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
"MetricGroup": "Power",
- "MetricName": "C6_Core_Residency"
+ "MetricName": "C6_Core_Residency",
+ "ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
"MetricGroup": "Power",
- "MetricName": "C7_Core_Residency"
+ "MetricName": "C7_Core_Residency",
+ "ScaleUnit": "100%"
},
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
"MetricGroup": "Power",
- "MetricName": "C2_Pkg_Residency"
+ "MetricName": "C2_Pkg_Residency",
+ "ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
"MetricGroup": "Power",
- "MetricName": "C3_Pkg_Residency"
+ "MetricName": "C3_Pkg_Residency",
+ "ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
"MetricGroup": "Power",
- "MetricName": "C6_Pkg_Residency"
+ "MetricName": "C6_Pkg_Residency",
+ "ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per package",
- "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC",
"MetricGroup": "Power",
- "MetricName": "C7_Pkg_Residency"
- },
- {
- "BriefDescription": "Uncore frequency per die [GHZ]",
- "MetricExpr": "Socket_CLKS / #num_dies / duration_time / 1000000000",
- "MetricGroup": "SoC",
- "MetricName": "UNCORE_FREQ"
+ "MetricName": "C7_Pkg_Residency",
+ "ScaleUnit": "100%"
}
]