summaryrefslogtreecommitdiff
path: root/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json')
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json9
1 files changed, 5 insertions, 4 deletions
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
index c207c851a9f9..222212abd811 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
@@ -553,7 +553,6 @@
},
{
"BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector + tma_fp_amx",
"MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_fp_arith",
@@ -717,6 +716,7 @@
},
{
"BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
"MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
"MetricName": "tma_info_botlnk_l2_ic_misses",
@@ -750,6 +750,7 @@
},
{
"BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
"MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
"MetricName": "tma_info_bottleneck_memory_bandwidth",
@@ -827,14 +828,12 @@
},
{
"BriefDescription": "Floating Point Operations Per Cycle",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + FP_ARITH_INST_RETIRED2.SCALAR_HALF + 2 * (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED2.COMPLEX_SCALAR_HALF) + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * (FP_ARITH_INST_RETIRED2.128B_PACKED_HALF + cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@) + 16 * (FP_ARITH_INST_RETIRED2.256B_PACKED_HALF + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) + 32 * FP_ARITH_INST_RETIRED2.512B_PACKED_HALF + 4 * AMX_OPS_RETIRED.BF16",
"MetricGroup": "Flops;Ret",
"MetricName": "tma_info_core_flopc"
},
{
"BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(FP_ARITH_DISPATCHED.PORT_0 + FP_ARITH_DISPATCHED.PORT_1 + FP_ARITH_DISPATCHED.PORT_5) / (2 * tma_info_core_core_clks)",
"MetricGroup": "Cor;Flops;HPC",
"MetricName": "tma_info_core_fp_arith_utilization",
@@ -1216,7 +1215,6 @@
},
{
"BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "tma_retiring * tma_info_thread_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
"MetricGroup": "Pipeline;Ret",
"MetricName": "tma_info_pipeline_retire"
@@ -1467,6 +1465,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(MEMORY_ACTIVITY.STALLS_L2_MISS - MEMORY_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
@@ -1841,6 +1840,7 @@
},
{
"BriefDescription": "This metric represents rate of split store accesses",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
"MetricName": "tma_split_stores",
@@ -1868,6 +1868,7 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",