From bd27568117664b8b3e259721393df420ed51f57b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 8 Oct 2022 11:54:24 +0530 Subject: perf: Rewrite core context handling There have been various issues and limitations with the way perf uses (task) contexts to track events. Most notable is the single hardware PMU task context, which has resulted in a number of yucky things (both proposed and merged). Notably: - HW breakpoint PMU - ARM big.little PMU / Intel ADL PMU - Intel Branch Monitoring PMU - AMD IBS PMU - S390 cpum_cf PMU - PowerPC trace_imc PMU *Current design:* Currently we have a per task and per cpu perf_event_contexts: task_struct::perf_events_ctxp[] <-> perf_event_context <-> perf_cpu_context ^ | ^ | ^ `---------------------------------' | `--> pmu ---' v ^ perf_event ------' Each task has an array of pointers to a perf_event_context. Each perf_event_context has a direct relation to a PMU and a group of events for that PMU. The task related perf_event_context's have a pointer back to that task. Each PMU has a per-cpu pointer to a per-cpu perf_cpu_context, which includes a perf_event_context, which again has a direct relation to that PMU, and a group of events for that PMU. The perf_cpu_context also tracks which task context is currently associated with that CPU and includes a few other things like the hrtimer for rotation etc. Each perf_event is then associated with its PMU and one perf_event_context. *Proposed design:* New design proposed by this patch reduce to a single task context and a single CPU context but adds some intermediate data-structures: task_struct::perf_event_ctxp -> perf_event_context <- perf_cpu_context ^ | ^ ^ `---------------------------' | | | | perf_cpu_pmu_context <--. | `----. ^ | | | | | | v v | | ,--> perf_event_pmu_context | | | | | | | v v | perf_event ---> pmu ----------------' With the new design, perf_event_context will hold all events for all pmus in the (respective pinned/flexible) rbtrees. This can be achieved by adding pmu to rbtree key: {cpu, pmu, cgroup, group_index} Each perf_event_context carries a list of perf_event_pmu_context which is used to hold per-pmu-per-context state. For example, it keeps track of currently active events for that pmu, a pmu specific task_ctx_data, a flag to tell whether rotation is required or not etc. Additionally, perf_cpu_pmu_context is used to hold per-pmu-per-cpu state like hrtimer details to drive the event rotation, a pointer to perf_event_pmu_context of currently running task and some other ancillary information. Each perf_event is associated to it's pmu, perf_event_context and perf_event_pmu_context. Further optimizations to current implementation are possible. For example, ctx_resched() can be optimized to reschedule only single pmu events. Much thanks to Ravi for picking this up and pushing it towards completion. Signed-off-by: Peter Zijlstra (Intel) Co-developed-by: Ravi Bangoria Signed-off-by: Ravi Bangoria Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221008062424.313-1-ravi.bangoria@amd.com --- arch/arm64/kernel/perf_event.c | 18 ++++++++++------ arch/powerpc/perf/core-book3s.c | 8 +++---- arch/s390/kernel/perf_pai_crypto.c | 2 +- arch/s390/kernel/perf_pai_ext.c | 2 +- arch/x86/events/amd/brs.c | 2 +- arch/x86/events/amd/lbr.c | 6 +++--- arch/x86/events/core.c | 44 ++++++++++++-------------------------- arch/x86/events/intel/core.c | 23 +++++++++----------- arch/x86/events/intel/ds.c | 4 ++-- arch/x86/events/intel/lbr.c | 30 +++++++++++++------------- arch/x86/events/perf_event.h | 30 ++++++++++++-------------- 11 files changed, 76 insertions(+), 93 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 7b0643fe2f13..54186542a969 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -806,10 +806,14 @@ static void armv8pmu_disable_event(struct perf_event *event) static void armv8pmu_start(struct arm_pmu *cpu_pmu) { - struct perf_event_context *task_ctx = - this_cpu_ptr(cpu_pmu->pmu.pmu_cpu_context)->task_ctx; + struct perf_event_context *ctx; + int nr_user = 0; - if (sysctl_perf_user_access && task_ctx && task_ctx->nr_user) + ctx = perf_cpu_task_ctx(); + if (ctx) + nr_user = ctx->nr_user; + + if (sysctl_perf_user_access && nr_user) armv8pmu_enable_user_access(cpu_pmu); else armv8pmu_disable_user_access(); @@ -1019,10 +1023,10 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, return 0; } -static int armv8pmu_filter_match(struct perf_event *event) +static bool armv8pmu_filter(struct pmu *pmu, int cpu) { - unsigned long evtype = event->hw.config_base & ARMV8_PMU_EVTYPE_EVENT; - return evtype != ARMV8_PMUV3_PERFCTR_CHAIN; + struct arm_pmu *armpmu = to_arm_pmu(pmu); + return !cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus); } static void armv8pmu_reset(void *info) @@ -1253,7 +1257,7 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name, cpu_pmu->stop = armv8pmu_stop; cpu_pmu->reset = armv8pmu_reset; cpu_pmu->set_event_filter = armv8pmu_set_event_filter; - cpu_pmu->filter_match = armv8pmu_filter_match; + cpu_pmu->filter = armv8pmu_filter; cpu_pmu->pmu.event_idx = armv8pmu_user_event_idx; diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 942aa830e110..bf318dd9b709 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -132,7 +132,7 @@ static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw) static inline void power_pmu_bhrb_enable(struct perf_event *event) {} static inline void power_pmu_bhrb_disable(struct perf_event *event) {} -static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) {} +static void power_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) {} static inline void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw) {} static void pmao_restore_workaround(bool ebb) { } #endif /* CONFIG_PPC32 */ @@ -424,7 +424,7 @@ static void power_pmu_bhrb_enable(struct perf_event *event) cpuhw->bhrb_context = event->ctx; } cpuhw->bhrb_users++; - perf_sched_cb_inc(event->ctx->pmu); + perf_sched_cb_inc(event->pmu); } static void power_pmu_bhrb_disable(struct perf_event *event) @@ -436,7 +436,7 @@ static void power_pmu_bhrb_disable(struct perf_event *event) WARN_ON_ONCE(!cpuhw->bhrb_users); cpuhw->bhrb_users--; - perf_sched_cb_dec(event->ctx->pmu); + perf_sched_cb_dec(event->pmu); if (!cpuhw->disabled && !cpuhw->bhrb_users) { /* BHRB cannot be turned off when other @@ -451,7 +451,7 @@ static void power_pmu_bhrb_disable(struct perf_event *event) /* Called from ctxsw to prevent one process's branch entries to * mingle with the other process's entries during context switch. */ -static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) +static void power_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) { if (!ppmu->bhrb_nr) return; diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index 6826e2a69a21..f747137f39ae 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -379,7 +379,7 @@ static int paicrypt_push_sample(void) /* Called on schedule-in and schedule-out. No access to event structure, * but for sampling only event CRYPTO_ALL is allowed. */ -static void paicrypt_sched_task(struct perf_event_context *ctx, bool sched_in) +static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) { /* We started with a clean page on event installation. So read out * results on schedule_out and if page was dirty, clear values. diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index d5c7c1e30c17..9547798594f9 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -471,7 +471,7 @@ static int paiext_push_sample(void) /* Called on schedule-in and schedule-out. No access to event structure, * but for sampling only event NNPA_ALL is allowed. */ -static void paiext_sched_task(struct perf_event_context *ctx, bool sched_in) +static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) { /* We started with a clean page on event installation. So read out * results on schedule_out and if page was dirty, clear values. diff --git a/arch/x86/events/amd/brs.c b/arch/x86/events/amd/brs.c index f1bff153d945..58461fa18b6f 100644 --- a/arch/x86/events/amd/brs.c +++ b/arch/x86/events/amd/brs.c @@ -384,7 +384,7 @@ static void amd_brs_poison_buffer(void) * On ctxswin, sched_in = true, called after the PMU has started * On ctxswout, sched_in = false, called before the PMU is stopped */ -void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in) +void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c index 38a75216c12c..eb31f850841a 100644 --- a/arch/x86/events/amd/lbr.c +++ b/arch/x86/events/amd/lbr.c @@ -352,7 +352,7 @@ void amd_pmu_lbr_add(struct perf_event *event) cpuc->br_sel = reg->reg; } - perf_sched_cb_inc(event->ctx->pmu); + perf_sched_cb_inc(event->pmu); if (!cpuc->lbr_users++ && !event->total_time_running) amd_pmu_lbr_reset(); @@ -370,10 +370,10 @@ void amd_pmu_lbr_del(struct perf_event *event) cpuc->lbr_users--; WARN_ON_ONCE(cpuc->lbr_users < 0); - perf_sched_cb_dec(event->ctx->pmu); + perf_sched_cb_dec(event->pmu); } -void amd_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in) +void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index b30b8bbcd1e2..337a99afe8ae 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -90,6 +90,8 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx); DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs); DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases); +DEFINE_STATIC_CALL_NULL(x86_pmu_filter, *x86_pmu.filter); + /* * This one is magic, it will get called even when PMU init fails (because * there is no PMU), in which case it should simply return NULL. @@ -2031,6 +2033,7 @@ static void x86_pmu_static_call_update(void) static_call_update(x86_pmu_pebs_aliases, x86_pmu.pebs_aliases); static_call_update(x86_pmu_guest_get_msrs, x86_pmu.guest_get_msrs); + static_call_update(x86_pmu_filter, x86_pmu.filter); } static void _x86_pmu_read(struct perf_event *event) @@ -2052,23 +2055,6 @@ void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed, pr_info("... event mask: %016Lx\n", intel_ctrl); } -/* - * The generic code is not hybrid friendly. The hybrid_pmu->pmu - * of the first registered PMU is unconditionally assigned to - * each possible cpuctx->ctx.pmu. - * Update the correct hybrid PMU to the cpuctx->ctx.pmu. - */ -void x86_pmu_update_cpu_context(struct pmu *pmu, int cpu) -{ - struct perf_cpu_context *cpuctx; - - if (!pmu->pmu_cpu_context) - return; - - cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); - cpuctx->ctx.pmu = pmu; -} - static int __init init_hw_perf_events(void) { struct x86_pmu_quirk *quirk; @@ -2195,9 +2181,6 @@ static int __init init_hw_perf_events(void) (hybrid_pmu->cpu_type == hybrid_big) ? PERF_TYPE_RAW : -1); if (err) break; - - if (cpu_type == hybrid_pmu->cpu_type) - x86_pmu_update_cpu_context(&hybrid_pmu->pmu, raw_smp_processor_id()); } if (i < x86_pmu.num_hybrid_pmus) { @@ -2646,15 +2629,15 @@ static const struct attribute_group *x86_pmu_attr_groups[] = { NULL, }; -static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) +static void x86_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) { - static_call_cond(x86_pmu_sched_task)(ctx, sched_in); + static_call_cond(x86_pmu_sched_task)(pmu_ctx, sched_in); } -static void x86_pmu_swap_task_ctx(struct perf_event_context *prev, - struct perf_event_context *next) +static void x86_pmu_swap_task_ctx(struct perf_event_pmu_context *prev_epc, + struct perf_event_pmu_context *next_epc) { - static_call_cond(x86_pmu_swap_task_ctx)(prev, next); + static_call_cond(x86_pmu_swap_task_ctx)(prev_epc, next_epc); } void perf_check_microcode(void) @@ -2689,12 +2672,13 @@ static int x86_pmu_aux_output_match(struct perf_event *event) return 0; } -static int x86_pmu_filter_match(struct perf_event *event) +static bool x86_pmu_filter(struct pmu *pmu, int cpu) { - if (x86_pmu.filter_match) - return x86_pmu.filter_match(event); + bool ret = false; - return 1; + static_call_cond(x86_pmu_filter)(pmu, cpu, &ret); + + return ret; } static struct pmu pmu = { @@ -2725,7 +2709,7 @@ static struct pmu pmu = { .aux_output_match = x86_pmu_aux_output_match, - .filter_match = x86_pmu_filter_match, + .filter = x86_pmu_filter, }; void arch_perf_update_userpage(struct perf_event *event, diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index a646a5f9a235..d8af75466ee9 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4536,8 +4536,6 @@ end: cpumask_set_cpu(cpu, &pmu->supported_cpus); cpuc->pmu = &pmu->pmu; - x86_pmu_update_cpu_context(&pmu->pmu, cpu); - return true; } @@ -4671,17 +4669,17 @@ static void intel_pmu_cpu_dead(int cpu) cpumask_clear_cpu(cpu, &hybrid_pmu(cpuc->pmu)->supported_cpus); } -static void intel_pmu_sched_task(struct perf_event_context *ctx, +static void intel_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) { - intel_pmu_pebs_sched_task(ctx, sched_in); - intel_pmu_lbr_sched_task(ctx, sched_in); + intel_pmu_pebs_sched_task(pmu_ctx, sched_in); + intel_pmu_lbr_sched_task(pmu_ctx, sched_in); } -static void intel_pmu_swap_task_ctx(struct perf_event_context *prev, - struct perf_event_context *next) +static void intel_pmu_swap_task_ctx(struct perf_event_pmu_context *prev_epc, + struct perf_event_pmu_context *next_epc) { - intel_pmu_lbr_swap_task_ctx(prev, next); + intel_pmu_lbr_swap_task_ctx(prev_epc, next_epc); } static int intel_pmu_check_period(struct perf_event *event, u64 value) @@ -4705,12 +4703,11 @@ static int intel_pmu_aux_output_match(struct perf_event *event) return is_intel_pt_event(event); } -static int intel_pmu_filter_match(struct perf_event *event) +static void intel_pmu_filter(struct pmu *pmu, int cpu, bool *ret) { - struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); - unsigned int cpu = smp_processor_id(); + struct x86_hybrid_pmu *hpmu = hybrid_pmu(pmu); - return cpumask_test_cpu(cpu, &pmu->supported_cpus); + *ret = !cpumask_test_cpu(cpu, &hpmu->supported_cpus); } PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); @@ -6412,7 +6409,7 @@ __init int intel_pmu_init(void) static_call_update(intel_pmu_set_topdown_event_period, &adl_set_topdown_event_period); - x86_pmu.filter_match = intel_pmu_filter_match; + x86_pmu.filter = intel_pmu_filter; x86_pmu.get_event_constraints = adl_get_event_constraints; x86_pmu.hw_config = adl_hw_config; x86_pmu.limit_period = spr_limit_period; diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 7839507b3844..f141cc7b8847 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1059,7 +1059,7 @@ static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc) return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs); } -void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in) +void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); @@ -1167,7 +1167,7 @@ static void pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct perf_event *event, bool add) { - struct pmu *pmu = event->ctx->pmu; + struct pmu *pmu = event->pmu; /* * Make sure we get updated with the first PEBS * event. It will trigger also during removal, but diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 8259d725054d..017baba56b01 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -515,21 +515,21 @@ static void __intel_pmu_lbr_save(void *ctx) cpuc->last_log_id = ++task_context_opt(ctx)->log_id; } -void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev, - struct perf_event_context *next) +void intel_pmu_lbr_swap_task_ctx(struct perf_event_pmu_context *prev_epc, + struct perf_event_pmu_context *next_epc) { void *prev_ctx_data, *next_ctx_data; - swap(prev->task_ctx_data, next->task_ctx_data); + swap(prev_epc->task_ctx_data, next_epc->task_ctx_data); /* - * Architecture specific synchronization makes sense in - * case both prev->task_ctx_data and next->task_ctx_data + * Architecture specific synchronization makes sense in case + * both prev_epc->task_ctx_data and next_epc->task_ctx_data * pointers are allocated. */ - prev_ctx_data = next->task_ctx_data; - next_ctx_data = prev->task_ctx_data; + prev_ctx_data = next_epc->task_ctx_data; + next_ctx_data = prev_epc->task_ctx_data; if (!prev_ctx_data || !next_ctx_data) return; @@ -538,7 +538,7 @@ void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev, task_context_opt(next_ctx_data)->lbr_callstack_users); } -void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in) +void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); void *task_ctx; @@ -551,7 +551,7 @@ void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in) * the task was scheduled out, restore the stack. Otherwise flush * the LBR stack. */ - task_ctx = ctx ? ctx->task_ctx_data : NULL; + task_ctx = pmu_ctx ? pmu_ctx->task_ctx_data : NULL; if (task_ctx) { if (sched_in) __intel_pmu_lbr_restore(task_ctx); @@ -587,8 +587,8 @@ void intel_pmu_lbr_add(struct perf_event *event) cpuc->br_sel = event->hw.branch_reg.reg; - if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data) - task_context_opt(event->ctx->task_ctx_data)->lbr_callstack_users++; + if (branch_user_callstack(cpuc->br_sel) && event->pmu_ctx->task_ctx_data) + task_context_opt(event->pmu_ctx->task_ctx_data)->lbr_callstack_users++; /* * Request pmu::sched_task() callback, which will fire inside the @@ -611,7 +611,7 @@ void intel_pmu_lbr_add(struct perf_event *event) */ if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0) cpuc->lbr_pebs_users++; - perf_sched_cb_inc(event->ctx->pmu); + perf_sched_cb_inc(event->pmu); if (!cpuc->lbr_users++ && !event->total_time_running) intel_pmu_lbr_reset(); } @@ -664,8 +664,8 @@ void intel_pmu_lbr_del(struct perf_event *event) return; if (branch_user_callstack(cpuc->br_sel) && - event->ctx->task_ctx_data) - task_context_opt(event->ctx->task_ctx_data)->lbr_callstack_users--; + event->pmu_ctx->task_ctx_data) + task_context_opt(event->pmu_ctx->task_ctx_data)->lbr_callstack_users--; if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT) cpuc->lbr_select = 0; @@ -675,7 +675,7 @@ void intel_pmu_lbr_del(struct perf_event *event) cpuc->lbr_users--; WARN_ON_ONCE(cpuc->lbr_users < 0); WARN_ON_ONCE(cpuc->lbr_pebs_users < 0); - perf_sched_cb_dec(event->ctx->pmu); + perf_sched_cb_dec(event->pmu); } static inline bool vlbr_exclude_host(void) diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 332d2e6d8ae4..6a44aed7b100 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -811,7 +811,7 @@ struct x86_pmu { void (*cpu_dead)(int cpu); void (*check_microcode)(void); - void (*sched_task)(struct perf_event_context *ctx, + void (*sched_task)(struct perf_event_pmu_context *pmu_ctx, bool sched_in); /* @@ -894,12 +894,12 @@ struct x86_pmu { int num_topdown_events; /* - * perf task context (i.e. struct perf_event_context::task_ctx_data) + * perf task context (i.e. struct perf_event_pmu_context::task_ctx_data) * switch helper to bridge calls from perf/core to perf/x86. * See struct pmu::swap_task_ctx() usage for examples; */ - void (*swap_task_ctx)(struct perf_event_context *prev, - struct perf_event_context *next); + void (*swap_task_ctx)(struct perf_event_pmu_context *prev_epc, + struct perf_event_pmu_context *next_epc); /* * AMD bits @@ -925,7 +925,7 @@ struct x86_pmu { int (*aux_output_match) (struct perf_event *event); - int (*filter_match)(struct perf_event *event); + void (*filter)(struct pmu *pmu, int cpu, bool *ret); /* * Hybrid support * @@ -1180,8 +1180,6 @@ int x86_pmu_handle_irq(struct pt_regs *regs); void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed, u64 intel_ctrl); -void x86_pmu_update_cpu_context(struct pmu *pmu, int cpu); - extern struct event_constraint emptyconstraint; extern struct event_constraint unconstrained; @@ -1306,7 +1304,7 @@ void amd_pmu_lbr_reset(void); void amd_pmu_lbr_read(void); void amd_pmu_lbr_add(struct perf_event *event); void amd_pmu_lbr_del(struct perf_event *event); -void amd_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in); +void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in); void amd_pmu_lbr_enable_all(void); void amd_pmu_lbr_disable_all(void); int amd_pmu_lbr_hw_config(struct perf_event *event); @@ -1330,7 +1328,7 @@ static inline void amd_pmu_brs_add(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - perf_sched_cb_inc(event->ctx->pmu); + perf_sched_cb_inc(event->pmu); cpuc->lbr_users++; /* * No need to reset BRS because it is reset @@ -1345,10 +1343,10 @@ static inline void amd_pmu_brs_del(struct perf_event *event) cpuc->lbr_users--; WARN_ON_ONCE(cpuc->lbr_users < 0); - perf_sched_cb_dec(event->ctx->pmu); + perf_sched_cb_dec(event->pmu); } -void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in); +void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in); #else static inline int amd_brs_init(void) { @@ -1373,7 +1371,7 @@ static inline void amd_pmu_brs_del(struct perf_event *event) { } -static inline void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in) +static inline void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) { } @@ -1533,7 +1531,7 @@ void intel_pmu_pebs_enable_all(void); void intel_pmu_pebs_disable_all(void); -void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in); +void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in); void intel_pmu_auto_reload_read(struct perf_event *event); @@ -1541,10 +1539,10 @@ void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr); void intel_ds_init(void); -void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev, - struct perf_event_context *next); +void intel_pmu_lbr_swap_task_ctx(struct perf_event_pmu_context *prev_epc, + struct perf_event_pmu_context *next_epc); -void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in); +void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in); u64 lbr_from_signext_quirk_wr(u64 val); -- cgit v1.2.3 From 8e356858be2989355aafcc96af541fedf9fb486f Mon Sep 17 00:00:00 2001 From: Rafael Mendonca Date: Fri, 28 Oct 2022 17:30:05 -0300 Subject: perf/x86: Remove unused variable 'cpu_type' Since the removal of function x86_pmu_update_cpu_context() by commit 983bd8543b5a ("perf: Rewrite core context handling"), there is no need to query the type of the hybrid CPU inside function init_hw_perf_events(). Fixes: 983bd8543b5a ("perf: Rewrite core context handling") Signed-off-by: Rafael Mendonca Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221028203006.976831-1-rafaelmendsr@gmail.com --- arch/x86/events/core.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 337a99afe8ae..85a63a41c471 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2161,13 +2161,9 @@ static int __init init_hw_perf_events(void) if (err) goto out2; } else { - u8 cpu_type = get_this_hybrid_cpu_type(); struct x86_hybrid_pmu *hybrid_pmu; int i, j; - if (!cpu_type && x86_pmu.get_hybrid_cpu_type) - cpu_type = x86_pmu.get_hybrid_cpu_type(); - for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) { hybrid_pmu = &x86_pmu.hybrid_pmu[i]; -- cgit v1.2.3 From 634a9d5ec78ae2899725950dfb97b98ce7a2f0e4 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Tue, 8 Nov 2022 18:41:17 +0800 Subject: perf/x86/amd: Remove the repeated declaration The function 'amd_brs_disable_all' is declared twice in commit ada543459cab ("perf/x86/amd: Add AMD Fam19h Branch Sampling support"). Remove one of them. Signed-off-by: Shaokun Zhang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221108104117.46642-1-zhangshaokun@hisilicon.com --- arch/x86/events/perf_event.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 6a44aed7b100..0e849f28a5c1 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -1320,7 +1320,6 @@ void amd_brs_enable_all(void); void amd_brs_disable_all(void); void amd_brs_drain(void); void amd_brs_lopwr_init(void); -void amd_brs_disable_all(void); int amd_brs_hw_config(struct perf_event *event); void amd_brs_reset(void); -- cgit v1.2.3 From 30093056f7b2f759ff180d3a86d29f68315e469b Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 15 Nov 2022 15:09:04 +0530 Subject: perf/amd/ibs: Make IBS a core pmu So far, only one pmu was allowed to be registered as core pmu and thus IBS pmus were being registered as uncore. However, with the event context rewrite, that limitation no longer exists and thus IBS pmus can also be registered as core pmu. This makes IBS much more usable, for ex, user will be able to do per-process precise monitoring on AMD: Before patch: $ sudo perf record -e cycles:pp ls Error: Invalid event (cycles:pp) in per-thread mode, enable system wide with '-a' After patch: $ sudo perf record -e cycles:pp ls [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.017 MB perf.data (33 samples) ] Signed-off-by: Ravi Bangoria Signed-off-by: Peter Zijlstra (Intel) Acked-by: Ian Rogers Link: https://lkml.kernel.org/r/20221115093904.1799-1-ravi.bangoria@amd.com --- arch/x86/events/amd/ibs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 3271735f0070..fbc2ce86f4b8 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -631,7 +631,7 @@ static const struct attribute_group *op_attr_update[] = { static struct perf_ibs perf_ibs_fetch = { .pmu = { - .task_ctx_nr = perf_invalid_context, + .task_ctx_nr = perf_hw_context, .event_init = perf_ibs_init, .add = perf_ibs_add, @@ -655,7 +655,7 @@ static struct perf_ibs perf_ibs_fetch = { static struct perf_ibs perf_ibs_op = { .pmu = { - .task_ctx_nr = perf_invalid_context, + .task_ctx_nr = perf_hw_context, .event_init = perf_ibs_init, .add = perf_ibs_add, -- cgit v1.2.3 From 4d13be8ab5d4aa6106331baa9674632a75b884b0 Mon Sep 17 00:00:00 2001 From: Alexander Antonov Date: Thu, 17 Nov 2022 12:28:23 +0000 Subject: perf/x86/intel/uncore: Generalize IIO topology support Current implementation of uncore mapping doesn't support different types of uncore PMUs which have its own topology context. This patch generalizes Intel uncore topology implementation to be able easily introduce support for new uncore blocks. Signed-off-by: Alexander Antonov Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kan Liang Link: https://lore.kernel.org/r/20221117122833.3103580-2-alexander.antonov@linux.intel.com --- arch/x86/events/intel/uncore.h | 14 +++- arch/x86/events/intel/uncore_snbep.c | 152 +++++++++++++++++++++++++---------- 2 files changed, 122 insertions(+), 44 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 2adeaf4de4df..917cff1e7815 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -88,7 +88,7 @@ struct intel_uncore_type { * to identify which platform component each PMON block of that type is * supposed to monitor. */ - struct intel_uncore_topology *topology; + struct intel_uncore_topology **topology; /* * Optional callbacks for managing mapping of Uncore units to PMONs */ @@ -178,11 +178,19 @@ struct freerunning_counters { unsigned *box_offsets; }; -struct intel_uncore_topology { - u64 configuration; +struct uncore_iio_topology { + int pci_bus_no; int segment; }; +struct intel_uncore_topology { + int pmu_idx; + union { + void *untyped; + struct uncore_iio_topology *iio; + }; +}; + struct pci2phy_map { struct list_head list; int segment; diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index ed869443efb2..469ff889ea08 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3699,10 +3699,16 @@ static struct intel_uncore_ops skx_uncore_iio_ops = { .read_counter = uncore_msr_read_counter, }; -static inline u8 skx_iio_stack(struct intel_uncore_pmu *pmu, int die) +static struct intel_uncore_topology *pmu_topology(struct intel_uncore_pmu *pmu, int die) { - return pmu->type->topology[die].configuration >> - (pmu->pmu_idx * BUS_NUM_STRIDE); + int idx; + + for (idx = 0; idx < pmu->type->num_boxes; idx++) { + if (pmu->type->topology[die][idx].pmu_idx == pmu->pmu_idx) + return &pmu->type->topology[die][idx]; + } + + return NULL; } static umode_t @@ -3710,8 +3716,9 @@ pmu_iio_mapping_visible(struct kobject *kobj, struct attribute *attr, int die, int zero_bus_pmu) { struct intel_uncore_pmu *pmu = dev_to_uncore_pmu(kobj_to_dev(kobj)); + struct intel_uncore_topology *pmut = pmu_topology(pmu, die); - return (!skx_iio_stack(pmu, die) && pmu->pmu_idx != zero_bus_pmu) ? 0 : attr->mode; + return (pmut && !pmut->iio->pci_bus_no && pmu->pmu_idx != zero_bus_pmu) ? 0 : attr->mode; } static umode_t @@ -3727,9 +3734,10 @@ static ssize_t skx_iio_mapping_show(struct device *dev, struct intel_uncore_pmu *pmu = dev_to_uncore_pmu(dev); struct dev_ext_attribute *ea = to_dev_ext_attribute(attr); long die = (long)ea->var; + struct intel_uncore_topology *pmut = pmu_topology(pmu, die); - return sprintf(buf, "%04x:%02x\n", pmu->type->topology[die].segment, - skx_iio_stack(pmu, die)); + return sprintf(buf, "%04x:%02x\n", pmut ? pmut->iio->segment : 0, + pmut ? pmut->iio->pci_bus_no : 0); } static int skx_msr_cpu_bus_read(int cpu, u64 *topology) @@ -3764,18 +3772,77 @@ static int die_to_cpu(int die) return res; } +enum { + IIO_TOPOLOGY_TYPE, + TOPOLOGY_MAX +}; + +static const size_t topology_size[TOPOLOGY_MAX] = { + sizeof(*((struct intel_uncore_topology *)NULL)->iio) +}; + +static int pmu_alloc_topology(struct intel_uncore_type *type, int topology_type) +{ + int die, idx; + struct intel_uncore_topology **topology; + + if (!type->num_boxes) + return -EPERM; + + topology = kcalloc(uncore_max_dies(), sizeof(*topology), GFP_KERNEL); + if (!topology) + goto err; + + for (die = 0; die < uncore_max_dies(); die++) { + topology[die] = kcalloc(type->num_boxes, sizeof(**topology), GFP_KERNEL); + if (!topology[die]) + goto clear; + for (idx = 0; idx < type->num_boxes; idx++) { + topology[die][idx].untyped = kcalloc(type->num_boxes, + topology_size[topology_type], + GFP_KERNEL); + if (!topology[die][idx].untyped) + goto clear; + } + } + + type->topology = topology; + + return 0; +clear: + for (; die >= 0; die--) { + for (idx = 0; idx < type->num_boxes; idx++) + kfree(topology[die][idx].untyped); + kfree(topology[die]); + } + kfree(topology); +err: + return -ENOMEM; +} + +static void pmu_free_topology(struct intel_uncore_type *type) +{ + int die, idx; + + if (type->topology) { + for (die = 0; die < uncore_max_dies(); die++) { + for (idx = 0; idx < type->num_boxes; idx++) + kfree(type->topology[die][idx].untyped); + kfree(type->topology[die]); + } + kfree(type->topology); + type->topology = NULL; + } +} + static int skx_iio_get_topology(struct intel_uncore_type *type) { int die, ret = -EPERM; - - type->topology = kcalloc(uncore_max_dies(), sizeof(*type->topology), - GFP_KERNEL); - if (!type->topology) - return -ENOMEM; + u64 configuration; + int idx; for (die = 0; die < uncore_max_dies(); die++) { - ret = skx_msr_cpu_bus_read(die_to_cpu(die), - &type->topology[die].configuration); + ret = skx_msr_cpu_bus_read(die_to_cpu(die), &configuration); if (ret) break; @@ -3783,12 +3850,12 @@ static int skx_iio_get_topology(struct intel_uncore_type *type) if (ret < 0) break; - type->topology[die].segment = ret; - } - - if (ret < 0) { - kfree(type->topology); - type->topology = NULL; + for (idx = 0; idx < type->num_boxes; idx++) { + type->topology[die][idx].pmu_idx = idx; + type->topology[die][idx].iio->segment = ret; + type->topology[die][idx].iio->pci_bus_no = + (configuration >> (idx * BUS_NUM_STRIDE)) & 0xff; + } } return ret; @@ -3804,7 +3871,9 @@ static const struct attribute_group *skx_iio_attr_update[] = { }; static int -pmu_iio_set_mapping(struct intel_uncore_type *type, struct attribute_group *ag) +pmu_set_mapping(struct intel_uncore_type *type, struct attribute_group *ag, + ssize_t (*show)(struct device*, struct device_attribute*, char*), + int topology_type) { char buf[64]; int ret; @@ -3812,10 +3881,14 @@ pmu_iio_set_mapping(struct intel_uncore_type *type, struct attribute_group *ag) struct attribute **attrs = NULL; struct dev_ext_attribute *eas = NULL; - ret = type->get_topology(type); + ret = pmu_alloc_topology(type, topology_type); if (ret < 0) goto clear_attr_update; + ret = type->get_topology(type); + if (ret < 0) + goto clear_topology; + ret = -ENOMEM; /* One more for NULL. */ @@ -3828,13 +3901,13 @@ pmu_iio_set_mapping(struct intel_uncore_type *type, struct attribute_group *ag) goto clear_attrs; for (die = 0; die < uncore_max_dies(); die++) { - sprintf(buf, "die%ld", die); + snprintf(buf, sizeof(buf), "die%ld", die); sysfs_attr_init(&eas[die].attr.attr); eas[die].attr.attr.name = kstrdup(buf, GFP_KERNEL); if (!eas[die].attr.attr.name) goto err; eas[die].attr.attr.mode = 0444; - eas[die].attr.show = skx_iio_mapping_show; + eas[die].attr.show = show; eas[die].attr.store = NULL; eas[die].var = (void *)die; attrs[die] = &eas[die].attr.attr; @@ -3849,14 +3922,14 @@ err: clear_attrs: kfree(attrs); clear_topology: - kfree(type->topology); + pmu_free_topology(type); clear_attr_update: type->attr_update = NULL; return ret; } static void -pmu_iio_cleanup_mapping(struct intel_uncore_type *type, struct attribute_group *ag) +pmu_cleanup_mapping(struct intel_uncore_type *type, struct attribute_group *ag) { struct attribute **attr = ag->attrs; @@ -3868,7 +3941,13 @@ pmu_iio_cleanup_mapping(struct intel_uncore_type *type, struct attribute_group * kfree(attr_to_ext_attr(*ag->attrs)); kfree(ag->attrs); ag->attrs = NULL; - kfree(type->topology); + pmu_free_topology(type); +} + +static int +pmu_iio_set_mapping(struct intel_uncore_type *type, struct attribute_group *ag) +{ + return pmu_set_mapping(type, ag, skx_iio_mapping_show, IIO_TOPOLOGY_TYPE); } static int skx_iio_set_mapping(struct intel_uncore_type *type) @@ -3878,7 +3957,7 @@ static int skx_iio_set_mapping(struct intel_uncore_type *type) static void skx_iio_cleanup_mapping(struct intel_uncore_type *type) { - pmu_iio_cleanup_mapping(type, &skx_iio_mapping_group); + pmu_cleanup_mapping(type, &skx_iio_mapping_group); } static struct intel_uncore_type skx_uncore_iio = { @@ -4461,11 +4540,6 @@ static int sad_cfg_iio_topology(struct intel_uncore_type *type, u8 *sad_pmon_map int die, stack_id, ret = -EPERM; struct pci_dev *dev = NULL; - type->topology = kcalloc(uncore_max_dies(), sizeof(*type->topology), - GFP_KERNEL); - if (!type->topology) - return -ENOMEM; - while ((dev = pci_get_device(PCI_VENDOR_ID_INTEL, SNR_ICX_MESH2IIO_MMAP_DID, dev))) { ret = pci_read_config_dword(dev, SNR_ICX_SAD_CONTROL_CFG, &sad_cfg); if (ret) { @@ -4483,13 +4557,9 @@ static int sad_cfg_iio_topology(struct intel_uncore_type *type, u8 *sad_pmon_map /* Convert stack id from SAD_CONTROL to PMON notation. */ stack_id = sad_pmon_mapping[stack_id]; - ((u8 *)&(type->topology[die].configuration))[stack_id] = dev->bus->number; - type->topology[die].segment = pci_domain_nr(dev->bus); - } - - if (ret) { - kfree(type->topology); - type->topology = NULL; + type->topology[die][stack_id].iio->segment = pci_domain_nr(dev->bus); + type->topology[die][stack_id].pmu_idx = stack_id; + type->topology[die][stack_id].iio->pci_bus_no = dev->bus->number; } return ret; @@ -4526,7 +4596,7 @@ static int snr_iio_set_mapping(struct intel_uncore_type *type) static void snr_iio_cleanup_mapping(struct intel_uncore_type *type) { - pmu_iio_cleanup_mapping(type, &snr_iio_mapping_group); + pmu_cleanup_mapping(type, &snr_iio_mapping_group); } static struct event_constraint snr_uncore_iio_constraints[] = { @@ -5144,7 +5214,7 @@ static int icx_iio_set_mapping(struct intel_uncore_type *type) static void icx_iio_cleanup_mapping(struct intel_uncore_type *type) { - pmu_iio_cleanup_mapping(type, &icx_iio_mapping_group); + pmu_cleanup_mapping(type, &icx_iio_mapping_group); } static struct intel_uncore_type icx_uncore_iio = { -- cgit v1.2.3 From cee4eebd9158d9eb3f8c1708c297b219e8ea861f Mon Sep 17 00:00:00 2001 From: Alexander Antonov Date: Thu, 17 Nov 2022 12:28:24 +0000 Subject: perf/x86/intel/uncore: Introduce UPI topology type This patch introduces new 'uncore_upi_topology' topology type to support UPI topology discovery. Signed-off-by: Alexander Antonov Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kan Liang Link: https://lore.kernel.org/r/20221117122833.3103580-3-alexander.antonov@linux.intel.com --- arch/x86/events/intel/uncore.h | 7 +++++++ arch/x86/events/intel/uncore_snbep.c | 4 +++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 917cff1e7815..ef1414957623 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -183,11 +183,18 @@ struct uncore_iio_topology { int segment; }; +struct uncore_upi_topology { + int die_to; + int pmu_idx_to; + int enabled; +}; + struct intel_uncore_topology { int pmu_idx; union { void *untyped; struct uncore_iio_topology *iio; + struct uncore_upi_topology *upi; }; }; diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 469ff889ea08..d3323f13c304 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3774,11 +3774,13 @@ static int die_to_cpu(int die) enum { IIO_TOPOLOGY_TYPE, + UPI_TOPOLOGY_TYPE, TOPOLOGY_MAX }; static const size_t topology_size[TOPOLOGY_MAX] = { - sizeof(*((struct intel_uncore_topology *)NULL)->iio) + sizeof(*((struct intel_uncore_topology *)NULL)->iio), + sizeof(*((struct intel_uncore_topology *)NULL)->upi) }; static int pmu_alloc_topology(struct intel_uncore_type *type, int topology_type) -- cgit v1.2.3 From 6532783310e2b2f50dc13f46c49aa6546cb6e7a3 Mon Sep 17 00:00:00 2001 From: Alexander Antonov Date: Thu, 17 Nov 2022 12:28:25 +0000 Subject: perf/x86/intel/uncore: Clear attr_update properly Current clear_attr_update procedure in pmu_set_mapping() sets attr_update field in NULL that is not correct because intel_uncore_type pmu types can contain several groups in attr_update field. For example, SPR platform already has uncore_alias_group to update and then UPI topology group will be added in next patches. Fix current behavior and clear attr_update group related to mapping only. Fixes: bb42b3d39781 ("perf/x86/intel/uncore: Expose an Uncore unit to IIO PMON mapping") Signed-off-by: Alexander Antonov Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kan Liang Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221117122833.3103580-4-alexander.antonov@linux.intel.com --- arch/x86/events/intel/uncore_snbep.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index d3323f13c304..0d06b56b8a33 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3872,6 +3872,21 @@ static const struct attribute_group *skx_iio_attr_update[] = { NULL, }; +static void pmu_clear_mapping_attr(const struct attribute_group **groups, + struct attribute_group *ag) +{ + int i; + + for (i = 0; groups[i]; i++) { + if (groups[i] == ag) { + for (i++; groups[i]; i++) + groups[i - 1] = groups[i]; + groups[i - 1] = NULL; + break; + } + } +} + static int pmu_set_mapping(struct intel_uncore_type *type, struct attribute_group *ag, ssize_t (*show)(struct device*, struct device_attribute*, char*), @@ -3926,7 +3941,7 @@ clear_attrs: clear_topology: pmu_free_topology(type); clear_attr_update: - type->attr_update = NULL; + pmu_clear_mapping_attr(type->attr_update, ag); return ret; } -- cgit v1.2.3 From efe062705d149b20a15498cb999a9edbb8241e6f Mon Sep 17 00:00:00 2001 From: Alexander Antonov Date: Thu, 17 Nov 2022 12:28:26 +0000 Subject: perf/x86/intel/uncore: Disable I/O stacks to PMU mapping on ICX-D Current implementation of I/O stacks to PMU mapping doesn't support ICX-D. Detect ICX-D system to disable mapping. Fixes: 10337e95e04c ("perf/x86/intel/uncore: Enable I/O stacks to IIO PMON mapping on ICX") Signed-off-by: Alexander Antonov Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kan Liang Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221117122833.3103580-5-alexander.antonov@linux.intel.com --- arch/x86/events/intel/uncore.h | 1 + arch/x86/events/intel/uncore_snbep.c | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index ef1414957623..fac3612289f1 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -2,6 +2,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 0d06b56b8a33..e14b96398377 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -5226,6 +5226,11 @@ static int icx_iio_get_topology(struct intel_uncore_type *type) static int icx_iio_set_mapping(struct intel_uncore_type *type) { + /* Detect ICX-D system. This case is not supported */ + if (boot_cpu_data.x86_model == INTEL_FAM6_ICELAKE_D) { + pmu_clear_mapping_attr(type->attr_update, &icx_iio_mapping_group); + return -EPERM; + } return pmu_iio_set_mapping(type, &icx_iio_mapping_group); } -- cgit v1.2.3 From 07813e2a59c93f10716ffa33d608d2c0685cf85b Mon Sep 17 00:00:00 2001 From: Alexander Antonov Date: Thu, 17 Nov 2022 12:28:27 +0000 Subject: perf/x86/intel/uncore: Generalize get_topology() for SKX PMUs Factor out a generic code from skx_iio_get_topology() to skx_pmu_get_topology() to avoid code duplication. This code will be used by get_topology() procedure for SKX UPI PMUs in the further patch. Signed-off-by: Alexander Antonov Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kan Liang Link: https://lore.kernel.org/r/20221117122833.3103580-6-alexander.antonov@linux.intel.com --- arch/x86/events/intel/uncore_snbep.c | 38 ++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index e14b96398377..254ba0ad650a 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3837,14 +3837,14 @@ static void pmu_free_topology(struct intel_uncore_type *type) } } -static int skx_iio_get_topology(struct intel_uncore_type *type) +static int skx_pmu_get_topology(struct intel_uncore_type *type, + int (*topology_cb)(struct intel_uncore_type*, int, int, u64)) { int die, ret = -EPERM; - u64 configuration; - int idx; + u64 cpu_bus_msr; for (die = 0; die < uncore_max_dies(); die++) { - ret = skx_msr_cpu_bus_read(die_to_cpu(die), &configuration); + ret = skx_msr_cpu_bus_read(die_to_cpu(die), &cpu_bus_msr); if (ret) break; @@ -3852,17 +3852,35 @@ static int skx_iio_get_topology(struct intel_uncore_type *type) if (ret < 0) break; - for (idx = 0; idx < type->num_boxes; idx++) { - type->topology[die][idx].pmu_idx = idx; - type->topology[die][idx].iio->segment = ret; - type->topology[die][idx].iio->pci_bus_no = - (configuration >> (idx * BUS_NUM_STRIDE)) & 0xff; - } + ret = topology_cb(type, ret, die, cpu_bus_msr); + if (ret) + break; } return ret; } +static int skx_iio_topology_cb(struct intel_uncore_type *type, int segment, + int die, u64 cpu_bus_msr) +{ + int idx; + struct intel_uncore_topology *t; + + for (idx = 0; idx < type->num_boxes; idx++) { + t = &type->topology[die][idx]; + t->pmu_idx = idx; + t->iio->segment = segment; + t->iio->pci_bus_no = (cpu_bus_msr >> (idx * BUS_NUM_STRIDE)) & 0xff; + } + + return 0; +} + +static int skx_iio_get_topology(struct intel_uncore_type *type) +{ + return skx_pmu_get_topology(type, skx_iio_topology_cb); +} + static struct attribute_group skx_iio_mapping_group = { .is_visible = skx_iio_mapping_visible, }; -- cgit v1.2.3 From 4cfce57fa42d277497cd2c425021312eae2f223c Mon Sep 17 00:00:00 2001 From: Alexander Antonov Date: Thu, 17 Nov 2022 12:28:28 +0000 Subject: perf/x86/intel/uncore: Enable UPI topology discovery for Skylake Server UPI topology discovery relies on data from KTILP0 (offset 0x94) and KTIPCSTS (offset 0x120) registers which reside under IIO bus(3) on SKX/CLX. This patch enable UPI topology discovery on Skylake Server. Topology is exposed through attributes /sys/devices/uncore_upi_/dieX, where dieX is file which holds "upi_:die_" connected to this UPI link. Signed-off-by: Alexander Antonov Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kan Liang Link: https://lore.kernel.org/r/20221117122833.3103580-7-alexander.antonov@linux.intel.com --- arch/x86/events/intel/uncore_snbep.c | 130 +++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) (limited to 'arch') diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 254ba0ad650a..f682a9a0f562 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -4253,6 +4253,132 @@ static struct intel_uncore_ops skx_upi_uncore_pci_ops = { .read_counter = snbep_uncore_pci_read_counter, }; +static umode_t +skx_upi_mapping_visible(struct kobject *kobj, struct attribute *attr, int die) +{ + struct intel_uncore_pmu *pmu = dev_to_uncore_pmu(kobj_to_dev(kobj)); + + return pmu->type->topology[die][pmu->pmu_idx].upi->enabled ? attr->mode : 0; +} + +static ssize_t skx_upi_mapping_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct intel_uncore_pmu *pmu = dev_to_uncore_pmu(dev); + struct dev_ext_attribute *ea = to_dev_ext_attribute(attr); + long die = (long)ea->var; + struct uncore_upi_topology *upi = pmu->type->topology[die][pmu->pmu_idx].upi; + + return sysfs_emit(buf, "upi_%d,die_%d\n", upi->pmu_idx_to, upi->die_to); +} + +#define SKX_UPI_REG_DID 0x2058 +#define SKX_UPI_REGS_ADDR_DEVICE_LINK0 0x0e +#define SKX_UPI_REGS_ADDR_FUNCTION 0x00 + +/* + * UPI Link Parameter 0 + * | Bit | Default | Description + * | 19:16 | 0h | base_nodeid - The NodeID of the sending socket. + * | 12:8 | 00h | sending_port - The processor die port number of the sending port. + */ +#define SKX_KTILP0_OFFSET 0x94 + +/* + * UPI Pcode Status. This register is used by PCode to store the link training status. + * | Bit | Default | Description + * | 4 | 0h | ll_status_valid — Bit indicates the valid training status + * logged from PCode to the BIOS. + */ +#define SKX_KTIPCSTS_OFFSET 0x120 + +static int upi_fill_topology(struct pci_dev *dev, struct intel_uncore_topology *tp, + int pmu_idx) +{ + int ret; + u32 upi_conf; + struct uncore_upi_topology *upi = tp->upi; + + tp->pmu_idx = pmu_idx; + ret = pci_read_config_dword(dev, SKX_KTIPCSTS_OFFSET, &upi_conf); + if (ret) { + ret = pcibios_err_to_errno(ret); + goto err; + } + upi->enabled = (upi_conf >> 4) & 1; + if (upi->enabled) { + ret = pci_read_config_dword(dev, SKX_KTILP0_OFFSET, + &upi_conf); + if (ret) { + ret = pcibios_err_to_errno(ret); + goto err; + } + upi->die_to = (upi_conf >> 16) & 0xf; + upi->pmu_idx_to = (upi_conf >> 8) & 0x1f; + } +err: + return ret; +} + +static int skx_upi_topology_cb(struct intel_uncore_type *type, int segment, + int die, u64 cpu_bus_msr) +{ + int idx, ret; + struct intel_uncore_topology *upi; + unsigned int devfn; + struct pci_dev *dev = NULL; + u8 bus = cpu_bus_msr >> (3 * BUS_NUM_STRIDE); + + for (idx = 0; idx < type->num_boxes; idx++) { + upi = &type->topology[die][idx]; + devfn = PCI_DEVFN(SKX_UPI_REGS_ADDR_DEVICE_LINK0 + idx, + SKX_UPI_REGS_ADDR_FUNCTION); + dev = pci_get_domain_bus_and_slot(segment, bus, devfn); + if (dev) { + ret = upi_fill_topology(dev, upi, idx); + if (ret) + break; + } + } + + pci_dev_put(dev); + return ret; +} + +static int skx_upi_get_topology(struct intel_uncore_type *type) +{ + /* CPX case is not supported */ + if (boot_cpu_data.x86_stepping == 11) + return -EPERM; + + return skx_pmu_get_topology(type, skx_upi_topology_cb); +} + +static struct attribute_group skx_upi_mapping_group = { + .is_visible = skx_upi_mapping_visible, +}; + +static const struct attribute_group *skx_upi_attr_update[] = { + &skx_upi_mapping_group, + NULL +}; + +static int +pmu_upi_set_mapping(struct intel_uncore_type *type, struct attribute_group *ag) +{ + return pmu_set_mapping(type, ag, skx_upi_mapping_show, UPI_TOPOLOGY_TYPE); +} + +static int skx_upi_set_mapping(struct intel_uncore_type *type) +{ + return pmu_upi_set_mapping(type, &skx_upi_mapping_group); +} + +static void skx_upi_cleanup_mapping(struct intel_uncore_type *type) +{ + pmu_cleanup_mapping(type, &skx_upi_mapping_group); +} + static struct intel_uncore_type skx_uncore_upi = { .name = "upi", .num_counters = 4, @@ -4265,6 +4391,10 @@ static struct intel_uncore_type skx_uncore_upi = { .box_ctl = SKX_UPI_PCI_PMON_BOX_CTL, .ops = &skx_upi_uncore_pci_ops, .format_group = &skx_upi_uncore_format_group, + .attr_update = skx_upi_attr_update, + .get_topology = skx_upi_get_topology, + .set_mapping = skx_upi_set_mapping, + .cleanup_mapping = skx_upi_cleanup_mapping, }; static void skx_m2m_uncore_pci_init_box(struct intel_uncore_box *box) -- cgit v1.2.3 From c4aebdb3b5f50fd0d83bf0fc2d49ac299f8b61df Mon Sep 17 00:00:00 2001 From: Alexander Antonov Date: Thu, 17 Nov 2022 12:28:29 +0000 Subject: perf/x86/intel/uncore: Get UPI NodeID and GroupID The GIDNIDMAP register of UBOX device is used to get the topology information in the snbep_pci2phy_map_init(). The same approach will be used to discover UPI topology for ICX and SPR platforms. Move common code that will be reused in next patches. Signed-off-by: Alexander Antonov Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kan Liang Link: https://lore.kernel.org/r/20221117122833.3103580-8-alexander.antonov@linux.intel.com --- arch/x86/events/intel/uncore_snbep.c | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index f682a9a0f562..6da5f692afea 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1372,6 +1372,28 @@ static struct pci_driver snbep_uncore_pci_driver = { #define NODE_ID_MASK 0x7 +/* Each three bits from 0 to 23 of GIDNIDMAP register correspond Node ID. */ +#define GIDNIDMAP(config, id) (((config) >> (3 * (id))) & 0x7) + +static int upi_nodeid_groupid(struct pci_dev *ubox_dev, int nodeid_loc, int idmap_loc, + int *nodeid, int *groupid) +{ + int ret; + + /* get the Node ID of the local register */ + ret = pci_read_config_dword(ubox_dev, nodeid_loc, nodeid); + if (ret) + goto err; + + *nodeid = *nodeid & NODE_ID_MASK; + /* get the Node ID mapping */ + ret = pci_read_config_dword(ubox_dev, idmap_loc, groupid); + if (ret) + goto err; +err: + return ret; +} + /* * build pci bus to socket mapping */ @@ -1397,13 +1419,8 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool * the topology. */ if (nr_node_ids <= 8) { - /* get the Node ID of the local register */ - err = pci_read_config_dword(ubox_dev, nodeid_loc, &config); - if (err) - break; - nodeid = config & NODE_ID_MASK; - /* get the Node ID mapping */ - err = pci_read_config_dword(ubox_dev, idmap_loc, &config); + err = upi_nodeid_groupid(ubox_dev, nodeid_loc, idmap_loc, + &nodeid, &config); if (err) break; @@ -1421,7 +1438,7 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool * to a particular node. */ for (i = 0; i < 8; i++) { - if (nodeid == ((config >> (3 * i)) & 0x7)) { + if (nodeid == GIDNIDMAP(config, i)) { if (topology_max_die_per_package() > 1) die_id = i; else -- cgit v1.2.3 From f680b6e6062ef3c944ffc966d685f067958fca33 Mon Sep 17 00:00:00 2001 From: Alexander Antonov Date: Thu, 17 Nov 2022 12:28:30 +0000 Subject: perf/x86/intel/uncore: Enable UPI topology discovery for Icelake Server UPI topology discovery relies on data from KTILP0 (offset 0x94) and KTIPCSTS (offset 0x120) as well as on SKX but on Icelake Server these registers reside under UBOX (Device ID 0x3450) bus. This patch enables /sys/devices/uncore_upi_*/die* attributes on Icelake Server. Signed-off-by: Alexander Antonov Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kan Liang Link: https://lore.kernel.org/r/20221117122833.3103580-9-alexander.antonov@linux.intel.com --- arch/x86/events/intel/uncore_snbep.c | 75 ++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) (limited to 'arch') diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 6da5f692afea..d45f5843444d 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -445,6 +445,7 @@ #define ICX_UPI_PCI_PMON_CTR0 0x320 #define ICX_UPI_PCI_PMON_BOX_CTL 0x318 #define ICX_UPI_CTL_UMASK_EXT 0xffffff +#define ICX_UBOX_DID 0x3450 /* ICX M3UPI*/ #define ICX_M3UPI_PCI_PMON_CTL0 0xd8 @@ -5594,6 +5595,76 @@ static const struct attribute_group icx_upi_uncore_format_group = { .attrs = icx_upi_uncore_formats_attr, }; +#define ICX_UPI_REGS_ADDR_DEVICE_LINK0 0x02 +#define ICX_UPI_REGS_ADDR_FUNCTION 0x01 + +static int discover_upi_topology(struct intel_uncore_type *type, int ubox_did, int dev_link0) +{ + struct pci_dev *ubox = NULL; + struct pci_dev *dev = NULL; + u32 nid, gid; + int i, idx, ret = -EPERM; + struct intel_uncore_topology *upi; + unsigned int devfn; + + /* GIDNIDMAP method supports machines which have less than 8 sockets. */ + if (uncore_max_dies() > 8) + goto err; + + while ((ubox = pci_get_device(PCI_VENDOR_ID_INTEL, ubox_did, ubox))) { + ret = upi_nodeid_groupid(ubox, SKX_CPUNODEID, SKX_GIDNIDMAP, &nid, &gid); + if (ret) { + ret = pcibios_err_to_errno(ret); + break; + } + + for (i = 0; i < 8; i++) { + if (nid != GIDNIDMAP(gid, i)) + continue; + for (idx = 0; idx < type->num_boxes; idx++) { + upi = &type->topology[nid][idx]; + devfn = PCI_DEVFN(dev_link0 + idx, ICX_UPI_REGS_ADDR_FUNCTION); + dev = pci_get_domain_bus_and_slot(pci_domain_nr(ubox->bus), + ubox->bus->number, + devfn); + if (dev) { + ret = upi_fill_topology(dev, upi, idx); + if (ret) + goto err; + } + } + } + } +err: + pci_dev_put(ubox); + pci_dev_put(dev); + return ret; +} + +static int icx_upi_get_topology(struct intel_uncore_type *type) +{ + return discover_upi_topology(type, ICX_UBOX_DID, ICX_UPI_REGS_ADDR_DEVICE_LINK0); +} + +static struct attribute_group icx_upi_mapping_group = { + .is_visible = skx_upi_mapping_visible, +}; + +static const struct attribute_group *icx_upi_attr_update[] = { + &icx_upi_mapping_group, + NULL +}; + +static int icx_upi_set_mapping(struct intel_uncore_type *type) +{ + return pmu_upi_set_mapping(type, &icx_upi_mapping_group); +} + +static void icx_upi_cleanup_mapping(struct intel_uncore_type *type) +{ + pmu_cleanup_mapping(type, &icx_upi_mapping_group); +} + static struct intel_uncore_type icx_uncore_upi = { .name = "upi", .num_counters = 4, @@ -5606,6 +5677,10 @@ static struct intel_uncore_type icx_uncore_upi = { .box_ctl = ICX_UPI_PCI_PMON_BOX_CTL, .ops = &skx_upi_uncore_pci_ops, .format_group = &icx_upi_uncore_format_group, + .attr_update = icx_upi_attr_update, + .get_topology = icx_upi_get_topology, + .set_mapping = icx_upi_set_mapping, + .cleanup_mapping = icx_upi_cleanup_mapping, }; static struct event_constraint icx_uncore_m3upi_constraints[] = { -- cgit v1.2.3 From 9a3b675cd393a4430d5e6cbf94c5de43414613cd Mon Sep 17 00:00:00 2001 From: Alexander Antonov Date: Thu, 17 Nov 2022 12:28:31 +0000 Subject: perf/x86/intel/uncore: Enable UPI topology discovery for Sapphire Rapids UPI topology discovery on SPR is same as in ICX but UBOX device has different Device ID 0x3250. This patch enables /sys/devices/uncore_upi_*/die* attributes on SPR. Signed-off-by: Alexander Antonov Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kan Liang Link: https://lore.kernel.org/r/20221117122833.3103580-10-alexander.antonov@linux.intel.com --- arch/x86/events/intel/uncore_snbep.c | 43 +++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index d45f5843444d..4c2d5b5ea445 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -458,6 +458,7 @@ /* SPR */ #define SPR_RAW_EVENT_MASK_EXT 0xffffff +#define SPR_UBOX_DID 0x3250 /* SPR CHA */ #define SPR_CHA_PMON_CTL_TID_EN (1 << 16) @@ -6112,9 +6113,43 @@ static struct intel_uncore_type spr_uncore_m2m = { .name = "m2m", }; +static struct attribute_group spr_upi_mapping_group = { + .is_visible = skx_upi_mapping_visible, +}; + +static const struct attribute_group *spr_upi_attr_update[] = { + &uncore_alias_group, + &spr_upi_mapping_group, + NULL +}; + +#define SPR_UPI_REGS_ADDR_DEVICE_LINK0 0x01 + +static int spr_upi_set_mapping(struct intel_uncore_type *type) +{ + return pmu_upi_set_mapping(type, &spr_upi_mapping_group); +} + +static void spr_upi_cleanup_mapping(struct intel_uncore_type *type) +{ + pmu_cleanup_mapping(type, &spr_upi_mapping_group); +} + +static int spr_upi_get_topology(struct intel_uncore_type *type) +{ + return discover_upi_topology(type, SPR_UBOX_DID, SPR_UPI_REGS_ADDR_DEVICE_LINK0); +} + static struct intel_uncore_type spr_uncore_upi = { - SPR_UNCORE_PCI_COMMON_FORMAT(), + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .event_mask_ext = SPR_RAW_EVENT_MASK_EXT, + .format_group = &spr_uncore_raw_format_group, + .ops = &spr_uncore_pci_ops, .name = "upi", + .attr_update = spr_upi_attr_update, + .get_topology = spr_upi_get_topology, + .set_mapping = spr_upi_set_mapping, + .cleanup_mapping = spr_upi_cleanup_mapping, }; static struct intel_uncore_type spr_uncore_m3upi = { @@ -6318,6 +6353,12 @@ static void uncore_type_customized_copy(struct intel_uncore_type *to_type, to_type->format_group = from_type->format_group; if (from_type->attr_update) to_type->attr_update = from_type->attr_update; + if (from_type->set_mapping) + to_type->set_mapping = from_type->set_mapping; + if (from_type->get_topology) + to_type->get_topology = from_type->get_topology; + if (from_type->cleanup_mapping) + to_type->cleanup_mapping = from_type->cleanup_mapping; } static struct intel_uncore_type ** -- cgit v1.2.3 From d5b73506b5b1b4a6e675c54b7977ea08e64cba19 Mon Sep 17 00:00:00 2001 From: Alexander Antonov Date: Thu, 17 Nov 2022 12:28:33 +0000 Subject: perf/x86/intel/uncore: Make set_mapping() procedure void Return value of set_mapping() is not needed to be checked anymore. So, make this procedure void. Signed-off-by: Alexander Antonov Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kan Liang Link: https://lore.kernel.org/r/20221117122833.3103580-12-alexander.antonov@linux.intel.com --- arch/x86/events/intel/uncore.h | 2 +- arch/x86/events/intel/uncore_snbep.c | 41 +++++++++++++++++------------------- 2 files changed, 20 insertions(+), 23 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index fac3612289f1..e278e2e7c051 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -94,7 +94,7 @@ struct intel_uncore_type { * Optional callbacks for managing mapping of Uncore units to PMONs */ int (*get_topology)(struct intel_uncore_type *type); - int (*set_mapping)(struct intel_uncore_type *type); + void (*set_mapping)(struct intel_uncore_type *type); void (*cleanup_mapping)(struct intel_uncore_type *type); }; diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 4c2d5b5ea445..b8f9bd150b42 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3924,7 +3924,7 @@ static void pmu_clear_mapping_attr(const struct attribute_group **groups, } } -static int +static void pmu_set_mapping(struct intel_uncore_type *type, struct attribute_group *ag, ssize_t (*show)(struct device*, struct device_attribute*, char*), int topology_type) @@ -3943,8 +3943,6 @@ pmu_set_mapping(struct intel_uncore_type *type, struct attribute_group *ag, if (ret < 0) goto clear_topology; - ret = -ENOMEM; - /* One more for NULL. */ attrs = kcalloc((uncore_max_dies() + 1), sizeof(*attrs), GFP_KERNEL); if (!attrs) @@ -3968,7 +3966,7 @@ pmu_set_mapping(struct intel_uncore_type *type, struct attribute_group *ag, } ag->attrs = attrs; - return 0; + return; err: for (; die >= 0; die--) kfree(eas[die].attr.attr.name); @@ -3979,7 +3977,6 @@ clear_topology: pmu_free_topology(type); clear_attr_update: pmu_clear_mapping_attr(type->attr_update, ag); - return ret; } static void @@ -3998,15 +3995,15 @@ pmu_cleanup_mapping(struct intel_uncore_type *type, struct attribute_group *ag) pmu_free_topology(type); } -static int +static void pmu_iio_set_mapping(struct intel_uncore_type *type, struct attribute_group *ag) { - return pmu_set_mapping(type, ag, skx_iio_mapping_show, IIO_TOPOLOGY_TYPE); + pmu_set_mapping(type, ag, skx_iio_mapping_show, IIO_TOPOLOGY_TYPE); } -static int skx_iio_set_mapping(struct intel_uncore_type *type) +static void skx_iio_set_mapping(struct intel_uncore_type *type) { - return pmu_iio_set_mapping(type, &skx_iio_mapping_group); + pmu_iio_set_mapping(type, &skx_iio_mapping_group); } static void skx_iio_cleanup_mapping(struct intel_uncore_type *type) @@ -4382,15 +4379,15 @@ static const struct attribute_group *skx_upi_attr_update[] = { NULL }; -static int +static void pmu_upi_set_mapping(struct intel_uncore_type *type, struct attribute_group *ag) { - return pmu_set_mapping(type, ag, skx_upi_mapping_show, UPI_TOPOLOGY_TYPE); + pmu_set_mapping(type, ag, skx_upi_mapping_show, UPI_TOPOLOGY_TYPE); } -static int skx_upi_set_mapping(struct intel_uncore_type *type) +static void skx_upi_set_mapping(struct intel_uncore_type *type) { - return pmu_upi_set_mapping(type, &skx_upi_mapping_group); + pmu_upi_set_mapping(type, &skx_upi_mapping_group); } static void skx_upi_cleanup_mapping(struct intel_uncore_type *type) @@ -4773,9 +4770,9 @@ static int snr_iio_get_topology(struct intel_uncore_type *type) return sad_cfg_iio_topology(type, snr_sad_pmon_mapping); } -static int snr_iio_set_mapping(struct intel_uncore_type *type) +static void snr_iio_set_mapping(struct intel_uncore_type *type) { - return pmu_iio_set_mapping(type, &snr_iio_mapping_group); + pmu_iio_set_mapping(type, &snr_iio_mapping_group); } static void snr_iio_cleanup_mapping(struct intel_uncore_type *type) @@ -5391,14 +5388,14 @@ static int icx_iio_get_topology(struct intel_uncore_type *type) return sad_cfg_iio_topology(type, icx_sad_pmon_mapping); } -static int icx_iio_set_mapping(struct intel_uncore_type *type) +static void icx_iio_set_mapping(struct intel_uncore_type *type) { /* Detect ICX-D system. This case is not supported */ if (boot_cpu_data.x86_model == INTEL_FAM6_ICELAKE_D) { pmu_clear_mapping_attr(type->attr_update, &icx_iio_mapping_group); - return -EPERM; + return; } - return pmu_iio_set_mapping(type, &icx_iio_mapping_group); + pmu_iio_set_mapping(type, &icx_iio_mapping_group); } static void icx_iio_cleanup_mapping(struct intel_uncore_type *type) @@ -5656,9 +5653,9 @@ static const struct attribute_group *icx_upi_attr_update[] = { NULL }; -static int icx_upi_set_mapping(struct intel_uncore_type *type) +static void icx_upi_set_mapping(struct intel_uncore_type *type) { - return pmu_upi_set_mapping(type, &icx_upi_mapping_group); + pmu_upi_set_mapping(type, &icx_upi_mapping_group); } static void icx_upi_cleanup_mapping(struct intel_uncore_type *type) @@ -6125,9 +6122,9 @@ static const struct attribute_group *spr_upi_attr_update[] = { #define SPR_UPI_REGS_ADDR_DEVICE_LINK0 0x01 -static int spr_upi_set_mapping(struct intel_uncore_type *type) +static void spr_upi_set_mapping(struct intel_uncore_type *type) { - return pmu_upi_set_mapping(type, &spr_upi_mapping_group); + pmu_upi_set_mapping(type, &spr_upi_mapping_group); } static void spr_upi_cleanup_mapping(struct intel_uncore_type *type) -- cgit v1.2.3 From c508eb042d9739bf9473526f53303721b70e9100 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Fri, 18 Nov 2022 14:31:34 +0800 Subject: perf/x86/intel/uncore: Fix reference count leak in sad_cfg_iio_topology() pci_get_device() will increase the reference count for the returned pci_dev, and also decrease the reference count for the input parameter *from* if it is not NULL. If we break the loop in sad_cfg_iio_topology() with 'dev' not NULL. We need to call pci_dev_put() to decrease the reference count. Since pci_dev_put() can handle the NULL input parameter, we can just add one pci_dev_put() right before 'return ret'. Fixes: c1777be3646b ("perf/x86/intel/uncore: Enable I/O stacks to IIO PMON mapping on SNR") Signed-off-by: Xiongfeng Wang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kan Liang Link: https://lore.kernel.org/r/20221118063137.121512-2-wangxiongfeng2@huawei.com --- arch/x86/events/intel/uncore_snbep.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index b8f9bd150b42..b14d59183516 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -4743,6 +4743,8 @@ static int sad_cfg_iio_topology(struct intel_uncore_type *type, u8 *sad_pmon_map type->topology[die][stack_id].iio->pci_bus_no = dev->bus->number; } + pci_dev_put(dev); + return ret; } -- cgit v1.2.3 From 1ff9dd6e7071a561f803135c1d684b13c7a7d01d Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Fri, 18 Nov 2022 14:31:35 +0800 Subject: perf/x86/intel/uncore: Fix reference count leak in hswep_has_limit_sbox() pci_get_device() will increase the reference count for the returned 'dev'. We need to call pci_dev_put() to decrease the reference count. Since 'dev' is only used in pci_read_config_dword(), let's add pci_dev_put() right after it. Fixes: 9d480158ee86 ("perf/x86/intel/uncore: Remove uncore extra PCI dev HSWEP_PCI_PCU_3") Signed-off-by: Xiongfeng Wang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kan Liang Link: https://lore.kernel.org/r/20221118063137.121512-3-wangxiongfeng2@huawei.com --- arch/x86/events/intel/uncore_snbep.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index b14d59183516..93b0f1f4d0d1 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -2910,6 +2910,7 @@ static bool hswep_has_limit_sbox(unsigned int device) return false; pci_read_config_dword(dev, HSWEP_PCU_CAPID4_OFFET, &capid4); + pci_dev_put(dev); if (!hswep_get_chop(capid4)) return true; -- cgit v1.2.3 From 8ebd16c11c346751b3944d708e6c181ed4746c39 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Fri, 18 Nov 2022 14:31:36 +0800 Subject: perf/x86/intel/uncore: Fix reference count leak in snr_uncore_mmio_map() pci_get_device() will increase the reference count for the returned pci_dev, so snr_uncore_get_mc_dev() will return a pci_dev with its reference count increased. We need to call pci_dev_put() to decrease the reference count. Let's add the missing pci_dev_put(). Fixes: ee49532b38dd ("perf/x86/intel/uncore: Add IMC uncore support for Snow Ridge") Signed-off-by: Xiongfeng Wang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kan Liang Link: https://lore.kernel.org/r/20221118063137.121512-4-wangxiongfeng2@huawei.com --- arch/x86/events/intel/uncore_snbep.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 93b0f1f4d0d1..44c2f879f708 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -5111,6 +5111,8 @@ static int snr_uncore_mmio_map(struct intel_uncore_box *box, addr += box_ctl; + pci_dev_put(pdev); + box->io_addr = ioremap(addr, type->mmio_map_size); if (!box->io_addr) { pr_warn("perf uncore: Failed to ioremap for %s.\n", type->name); -- cgit v1.2.3 From 17b8d847b92d815d1638f0de154654081d66b281 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Fri, 18 Nov 2022 14:31:37 +0800 Subject: perf/x86/intel/uncore: Fix reference count leak in __uncore_imc_init_box() pci_get_device() will increase the reference count for the returned pci_dev, so tgl_uncore_get_mc_dev() will return a pci_dev with its reference count increased. We need to call pci_dev_put() to decrease the reference count before exiting from __uncore_imc_init_box(). Add pci_dev_put() for both normal and error path. Fixes: fdb64822443e ("perf/x86: Add Intel Tiger Lake uncore support") Signed-off-by: Xiongfeng Wang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kan Liang Link: https://lore.kernel.org/r/20221118063137.121512-5-wangxiongfeng2@huawei.com --- arch/x86/events/intel/uncore_snb.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 1ef4f7861e2e..1f4869227efb 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -1338,6 +1338,7 @@ static void __uncore_imc_init_box(struct intel_uncore_box *box, /* MCHBAR is disabled */ if (!(mch_bar & BIT(0))) { pr_warn("perf uncore: MCHBAR is disabled. Failed to map IMC free-running counters.\n"); + pci_dev_put(pdev); return; } mch_bar &= ~BIT(0); @@ -1352,6 +1353,8 @@ static void __uncore_imc_init_box(struct intel_uncore_box *box, box->io_addr = ioremap(addr, type->mmio_map_size); if (!box->io_addr) pr_warn("perf uncore: Failed to ioremap for %s.\n", type->name); + + pci_dev_put(pdev); } static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box) -- cgit v1.2.3