diff options
Diffstat (limited to 'arch/x86/events/intel/ds.c')
-rw-r--r-- | arch/x86/events/intel/ds.c | 138 |
1 files changed, 95 insertions, 43 deletions
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 376cc3d66094..de1f55d51784 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -94,15 +94,40 @@ void __init intel_pmu_pebs_data_source_nhm(void) pebs_data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM); } -void __init intel_pmu_pebs_data_source_skl(bool pmem) +static void __init __intel_pmu_pebs_data_source_skl(bool pmem, u64 *data_source) { u64 pmem_or_l4 = pmem ? LEVEL(PMEM) : LEVEL(L4); - pebs_data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT); - pebs_data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT); - pebs_data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE); - pebs_data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD); - pebs_data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM); + data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT); + data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT); + data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE); + data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD); + data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM); +} + +void __init intel_pmu_pebs_data_source_skl(bool pmem) +{ + __intel_pmu_pebs_data_source_skl(pmem, pebs_data_source); +} + +static void __init intel_pmu_pebs_data_source_grt(u64 *data_source) +{ + data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT); + data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM); + data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD); +} + +void __init intel_pmu_pebs_data_source_adl(void) +{ + u64 *data_source; + + data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source; + memcpy(data_source, pebs_data_source, sizeof(pebs_data_source)); + __intel_pmu_pebs_data_source_skl(false, data_source); + + data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source; + memcpy(data_source, pebs_data_source, sizeof(pebs_data_source)); + intel_pmu_pebs_data_source_grt(data_source); } static u64 precise_store_data(u64 status) @@ -171,7 +196,50 @@ static u64 precise_datala_hsw(struct perf_event *event, u64 status) return dse.val; } -static u64 load_latency_data(u64 status) +static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock) +{ + /* + * TLB access + * 0 = did not miss 2nd level TLB + * 1 = missed 2nd level TLB + */ + if (tlb) + *val |= P(TLB, MISS) | P(TLB, L2); + else + *val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2); + + /* locked prefix */ + if (lock) + *val |= P(LOCK, LOCKED); +} + +/* Retrieve the latency data for e-core of ADL */ +u64 adl_latency_data_small(struct perf_event *event, u64 status) +{ + union intel_x86_pebs_dse dse; + u64 val; + + WARN_ON_ONCE(hybrid_pmu(event->pmu)->cpu_type == hybrid_big); + + dse.val = status; + + val = hybrid_var(event->pmu, pebs_data_source)[dse.ld_dse]; + + /* + * For the atom core on ADL, + * bit 4: lock, bit 5: TLB access. + */ + pebs_set_tlb_lock(&val, dse.ld_locked, dse.ld_stlb_miss); + + if (dse.ld_data_blk) + val |= P(BLK, DATA); + else + val |= P(BLK, NA); + + return val; +} + +static u64 load_latency_data(struct perf_event *event, u64 status) { union intel_x86_pebs_dse dse; u64 val; @@ -181,7 +249,7 @@ static u64 load_latency_data(u64 status) /* * use the mapping table for bit 0-3 */ - val = pebs_data_source[dse.ld_dse]; + val = hybrid_var(event->pmu, pebs_data_source)[dse.ld_dse]; /* * Nehalem models do not support TLB, Lock infos @@ -190,21 +258,8 @@ static u64 load_latency_data(u64 status) val |= P(TLB, NA) | P(LOCK, NA); return val; } - /* - * bit 4: TLB access - * 0 = did not miss 2nd level TLB - * 1 = missed 2nd level TLB - */ - if (dse.ld_stlb_miss) - val |= P(TLB, MISS) | P(TLB, L2); - else - val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2); - /* - * bit 5: locked prefix - */ - if (dse.ld_locked) - val |= P(LOCK, LOCKED); + pebs_set_tlb_lock(&val, dse.ld_stlb_miss, dse.ld_locked); /* * Ice Lake and earlier models do not support block infos. @@ -233,9 +288,10 @@ static u64 load_latency_data(u64 status) return val; } -static u64 store_latency_data(u64 status) +static u64 store_latency_data(struct perf_event *event, u64 status) { union intel_x86_pebs_dse dse; + union perf_mem_data_src src; u64 val; dse.val = status; @@ -243,27 +299,20 @@ static u64 store_latency_data(u64 status) /* * use the mapping table for bit 0-3 */ - val = pebs_data_source[dse.st_lat_dse]; + val = hybrid_var(event->pmu, pebs_data_source)[dse.st_lat_dse]; - /* - * bit 4: TLB access - * 0 = did not miss 2nd level TLB - * 1 = missed 2nd level TLB - */ - if (dse.st_lat_stlb_miss) - val |= P(TLB, MISS) | P(TLB, L2); - else - val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2); + pebs_set_tlb_lock(&val, dse.st_lat_stlb_miss, dse.st_lat_locked); + + val |= P(BLK, NA); /* - * bit 5: locked prefix + * the pebs_data_source table is only for loads + * so override the mem_op to say STORE instead */ - if (dse.st_lat_locked) - val |= P(LOCK, LOCKED); + src.val = val; + src.mem_op = P(OP,STORE); - val |= P(BLK, NA); - - return val; + return src.val; } struct pebs_record_core { @@ -781,8 +830,8 @@ struct event_constraint intel_glm_pebs_event_constraints[] = { struct event_constraint intel_grt_pebs_event_constraints[] = { /* Allow all events as PEBS with no flags */ - INTEL_PLD_CONSTRAINT(0x5d0, 0xf), - INTEL_PSD_CONSTRAINT(0x6d0, 0xf), + INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0x3), + INTEL_HYBRID_LAT_CONSTRAINT(0x6d0, 0xf), EVENT_CONSTRAINT_END }; @@ -1443,9 +1492,11 @@ static u64 get_data_src(struct perf_event *event, u64 aux) bool fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC); if (fl & PERF_X86_EVENT_PEBS_LDLAT) - val = load_latency_data(aux); + val = load_latency_data(event, aux); else if (fl & PERF_X86_EVENT_PEBS_STLAT) - val = store_latency_data(aux); + val = store_latency_data(event, aux); + else if (fl & PERF_X86_EVENT_PEBS_LAT_HYBRID) + val = x86_pmu.pebs_latency_data(event, aux); else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC)) val = precise_datala_hsw(event, aux); else if (fst) @@ -2219,6 +2270,7 @@ void __init intel_ds_init(void) PERF_SAMPLE_BRANCH_STACK | PERF_SAMPLE_TIME; x86_pmu.flags |= PMU_FL_PEBS_ALL; + x86_pmu.pebs_capable = ~0ULL; pebs_qual = "-baseline"; x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_EXTENDED_REGS; } else { |