From f818947a06183dee7c2afc6648c75149586bf288 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 20 Apr 2023 22:27:24 +0200 Subject: perf/arm-cci: Slightly optimize cci_pmu_sync_counters() When the 'mask' bitmap is cleared, it is better to use its full maximum size instead of only the needed size. This lets the compiler optimize it because the size is now known at compile time. HW_CNTRS_MAX is small (i.e. currently 9), so a call to memset() is saved. Also, as 'mask' is local to the function, the non-atomic __set_bit() can also safely be used here. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/88d4e20d595f771396e9d558c1587eb4494057db.1682022422.git.christophe.jaillet@wanadoo.fr Signed-off-by: Will Deacon --- drivers/perf/arm-cci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/perf') diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index 03b1309875ae..998259f1d973 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -645,7 +645,7 @@ static void cci_pmu_sync_counters(struct cci_pmu *cci_pmu) struct cci_pmu_hw_events *cci_hw = &cci_pmu->hw_events; DECLARE_BITMAP(mask, HW_CNTRS_MAX); - bitmap_zero(mask, cci_pmu->num_cntrs); + bitmap_zero(mask, HW_CNTRS_MAX); for_each_set_bit(i, cci_pmu->hw_events.used_mask, cci_pmu->num_cntrs) { struct perf_event *event = cci_hw->events[i]; @@ -656,7 +656,7 @@ static void cci_pmu_sync_counters(struct cci_pmu *cci_pmu) if (event->hw.state & PERF_HES_STOPPED) continue; if (event->hw.state & PERF_HES_ARCH) { - set_bit(i, mask); + __set_bit(i, mask); event->hw.state &= ~PERF_HES_ARCH; } } -- cgit v1.2.3 From 7bd42f122c7cf1e8101519dced3e07866b81e0d2 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 18 May 2023 10:18:08 +0200 Subject: perf: qcom_l2_pmu: Make l2_cache_pmu_probe_cluster() more robust If an error occurs after calling list_add(), the &l2cache_pmu->clusters list will reference some memory that will be freed when the managed resources will be released. Move the list_add() at the end of the function when everything is in fine. This is harmless because if l2_cache_pmu_probe_cluster() fails, then l2_cache_pmu_probe() will fail as well and 'l2cache_pmu' will be released as well. But it looks cleaner and could silence static checker warning. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/6a0f5bdb6b7b2ed4ef194fc49693e902ad5b95ea.1684397879.git.christophe.jaillet@wanadoo.fr Signed-off-by: Will Deacon --- drivers/perf/qcom_l2_pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/perf') diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c index aaca6db7d8f6..3f9a98c17a89 100644 --- a/drivers/perf/qcom_l2_pmu.c +++ b/drivers/perf/qcom_l2_pmu.c @@ -857,7 +857,6 @@ static int l2_cache_pmu_probe_cluster(struct device *dev, void *data) return -ENOMEM; INIT_LIST_HEAD(&cluster->next); - list_add(&cluster->next, &l2cache_pmu->clusters); cluster->cluster_id = fw_cluster_id; irq = platform_get_irq(sdev, 0); @@ -883,6 +882,7 @@ static int l2_cache_pmu_probe_cluster(struct device *dev, void *data) spin_lock_init(&cluster->pmu_lock); + list_add(&cluster->next, &l2cache_pmu->clusters); l2cache_pmu->num_pmus++; return 0; -- cgit v1.2.3 From 71746c995cac92fcf6a65661b51211cf2009d7f0 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 24 May 2023 17:44:32 +0100 Subject: perf/arm-cmn: Fix DTC reset It turns out that my naive DTC reset logic fails to work as intended, since, after checking with the hardware designers, the PMU actually needs to be fully enabled in order to correctly clear any pending overflows. Therefore, invert the sequence to start with turning on both enables so that we can reliably get the DTCs into a known state, then moving to our normal counters-stopped state from there. Since all the DTM counters have already been unpaired during the initial discovery pass, we just need to additionally reset the cycle counters to ensure that no other unexpected overflows occur during this period. Fixes: 0ba64770a2f2 ("perf: Add Arm CMN-600 PMU driver") Reported-by: Geoff Blake Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/0ea4559261ea394f827c9aee5168c77a60aaee03.1684946389.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/perf') diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 47d359f72957..89a685a09d84 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -1899,9 +1899,10 @@ static int arm_cmn_init_dtc(struct arm_cmn *cmn, struct arm_cmn_node *dn, int id if (dtc->irq < 0) return dtc->irq; - writel_relaxed(0, dtc->base + CMN_DT_PMCR); + writel_relaxed(CMN_DT_DTC_CTL_DT_EN, dtc->base + CMN_DT_DTC_CTL); + writel_relaxed(CMN_DT_PMCR_PMU_EN | CMN_DT_PMCR_OVFL_INTR_EN, dtc->base + CMN_DT_PMCR); + writeq_relaxed(0, dtc->base + CMN_DT_PMCCNTR); writel_relaxed(0x1ff, dtc->base + CMN_DT_PMOVSR_CLR); - writel_relaxed(CMN_DT_PMCR_OVFL_INTR_EN, dtc->base + CMN_DT_PMCR); return 0; } @@ -1961,7 +1962,7 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn) dn->type = CMN_TYPE_CCLA; } - writel_relaxed(CMN_DT_DTC_CTL_DT_EN, cmn->dtc[0].base + CMN_DT_DTC_CTL); + arm_cmn_set_state(cmn, CMN_STATE_DISABLED); return 0; } -- cgit v1.2.3 From 8be3593b9efa8903d2ee7bb9cdf57a8e56c66f36 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 28 May 2023 09:02:05 +0100 Subject: drivers/perf: apple_m1: Force 63bit counters for M2 CPUs Sidharth reports that on M2, the PMU never generates any interrupt when using 'perf record', which is a annoying as you get no sample. I'm temped to say "no sample, no problem", but others may have a different opinion. Upon investigation, it appears that the counters on M2 are significantly different from the ones on M1, as they count on 64 bits instead of 48. Which of course, in the fine M1 tradition, means that we can only use 63 bits, as the top bit is used to signal the interrupt... This results in having to introduce yet another flag to indicate yet another odd counter width. Who knows what the next crazy implementation will do... With this, perf can work out the correct offset, and 'perf record' works as intended. Tested on M2 and M2-Pro CPUs. Cc: Janne Grunau Cc: Hector Martin Cc: Mark Rutland Cc: Will Deacon Fixes: 7d0bfb7c9977 ("drivers/perf: apple_m1: Add Apple M2 support") Reported-by: Sidharth Kshatriya Tested-by: Sidharth Kshatriya Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230528080205.288446-1-maz@kernel.org Signed-off-by: Will Deacon --- drivers/perf/apple_m1_cpu_pmu.c | 30 ++++++++++++++++++++++++------ drivers/perf/arm_pmu.c | 2 ++ include/linux/perf/arm_pmu.h | 2 ++ 3 files changed, 28 insertions(+), 6 deletions(-) (limited to 'drivers/perf') diff --git a/drivers/perf/apple_m1_cpu_pmu.c b/drivers/perf/apple_m1_cpu_pmu.c index 8574c6e58c83..cd2de44b61b9 100644 --- a/drivers/perf/apple_m1_cpu_pmu.c +++ b/drivers/perf/apple_m1_cpu_pmu.c @@ -493,6 +493,17 @@ static int m1_pmu_map_event(struct perf_event *event) return armpmu_map_event(event, &m1_pmu_perf_map, NULL, M1_PMU_CFG_EVENT); } +static int m2_pmu_map_event(struct perf_event *event) +{ + /* + * Same deal as the above, except that M2 has 64bit counters. + * Which, as far as we're concerned, actually means 63 bits. + * Yes, this is getting awkward. + */ + event->hw.flags |= ARMPMU_EVT_63BIT; + return armpmu_map_event(event, &m1_pmu_perf_map, NULL, M1_PMU_CFG_EVENT); +} + static void m1_pmu_reset(void *info) { int i; @@ -525,7 +536,7 @@ static int m1_pmu_set_event_filter(struct hw_perf_event *event, return 0; } -static int m1_pmu_init(struct arm_pmu *cpu_pmu) +static int m1_pmu_init(struct arm_pmu *cpu_pmu, u32 flags) { cpu_pmu->handle_irq = m1_pmu_handle_irq; cpu_pmu->enable = m1_pmu_enable_event; @@ -536,7 +547,14 @@ static int m1_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->clear_event_idx = m1_pmu_clear_event_idx; cpu_pmu->start = m1_pmu_start; cpu_pmu->stop = m1_pmu_stop; - cpu_pmu->map_event = m1_pmu_map_event; + + if (flags & ARMPMU_EVT_47BIT) + cpu_pmu->map_event = m1_pmu_map_event; + else if (flags & ARMPMU_EVT_63BIT) + cpu_pmu->map_event = m2_pmu_map_event; + else + return WARN_ON(-EINVAL); + cpu_pmu->reset = m1_pmu_reset; cpu_pmu->set_event_filter = m1_pmu_set_event_filter; @@ -550,25 +568,25 @@ static int m1_pmu_init(struct arm_pmu *cpu_pmu) static int m1_pmu_ice_init(struct arm_pmu *cpu_pmu) { cpu_pmu->name = "apple_icestorm_pmu"; - return m1_pmu_init(cpu_pmu); + return m1_pmu_init(cpu_pmu, ARMPMU_EVT_47BIT); } static int m1_pmu_fire_init(struct arm_pmu *cpu_pmu) { cpu_pmu->name = "apple_firestorm_pmu"; - return m1_pmu_init(cpu_pmu); + return m1_pmu_init(cpu_pmu, ARMPMU_EVT_47BIT); } static int m2_pmu_avalanche_init(struct arm_pmu *cpu_pmu) { cpu_pmu->name = "apple_avalanche_pmu"; - return m1_pmu_init(cpu_pmu); + return m1_pmu_init(cpu_pmu, ARMPMU_EVT_63BIT); } static int m2_pmu_blizzard_init(struct arm_pmu *cpu_pmu) { cpu_pmu->name = "apple_blizzard_pmu"; - return m1_pmu_init(cpu_pmu); + return m1_pmu_init(cpu_pmu, ARMPMU_EVT_63BIT); } static const struct of_device_id m1_pmu_of_device_ids[] = { diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 15bd1e34a88e..277e29fbd504 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -109,6 +109,8 @@ static inline u64 arm_pmu_event_max_period(struct perf_event *event) { if (event->hw.flags & ARMPMU_EVT_64BIT) return GENMASK_ULL(63, 0); + else if (event->hw.flags & ARMPMU_EVT_63BIT) + return GENMASK_ULL(62, 0); else if (event->hw.flags & ARMPMU_EVT_47BIT) return GENMASK_ULL(46, 0); else diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 525b5d64e394..c0e4baf940dc 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -26,9 +26,11 @@ */ #define ARMPMU_EVT_64BIT 0x00001 /* Event uses a 64bit counter */ #define ARMPMU_EVT_47BIT 0x00002 /* Event uses a 47bit counter */ +#define ARMPMU_EVT_63BIT 0x00004 /* Event uses a 63bit counter */ static_assert((PERF_EVENT_FLAG_ARCH & ARMPMU_EVT_64BIT) == ARMPMU_EVT_64BIT); static_assert((PERF_EVENT_FLAG_ARCH & ARMPMU_EVT_47BIT) == ARMPMU_EVT_47BIT); +static_assert((PERF_EVENT_FLAG_ARCH & ARMPMU_EVT_63BIT) == ARMPMU_EVT_63BIT); #define HW_OP_UNSUPPORTED 0xFFFF #define C(_x) PERF_COUNT_HW_CACHE_##_x -- cgit v1.2.3 From 7a6a9f1c5a0a875a421db798d4b2ee022dc1ee1a Mon Sep 17 00:00:00 2001 From: Junhao He Date: Thu, 8 Jun 2023 19:43:26 +0800 Subject: drivers/perf: hisi: Don't migrate perf to the CPU going to teardown The driver needs to migrate the perf context if the current using CPU going to teardown. By the time calling the cpuhp::teardown() callback the cpu_online_mask() hasn't updated yet and still includes the CPU going to teardown. In current driver's implementation we may migrate the context to the teardown CPU and leads to the below calltrace: ... [ 368.104662][ T932] task:cpuhp/0 state:D stack: 0 pid: 15 ppid: 2 flags:0x00000008 [ 368.113699][ T932] Call trace: [ 368.116834][ T932] __switch_to+0x7c/0xbc [ 368.120924][ T932] __schedule+0x338/0x6f0 [ 368.125098][ T932] schedule+0x50/0xe0 [ 368.128926][ T932] schedule_preempt_disabled+0x18/0x24 [ 368.134229][ T932] __mutex_lock.constprop.0+0x1d4/0x5dc [ 368.139617][ T932] __mutex_lock_slowpath+0x1c/0x30 [ 368.144573][ T932] mutex_lock+0x50/0x60 [ 368.148579][ T932] perf_pmu_migrate_context+0x84/0x2b0 [ 368.153884][ T932] hisi_pcie_pmu_offline_cpu+0x90/0xe0 [hisi_pcie_pmu] [ 368.160579][ T932] cpuhp_invoke_callback+0x2a0/0x650 [ 368.165707][ T932] cpuhp_thread_fun+0xe4/0x190 [ 368.170316][ T932] smpboot_thread_fn+0x15c/0x1a0 [ 368.175099][ T932] kthread+0x108/0x13c [ 368.179012][ T932] ret_from_fork+0x10/0x18 ... Use function cpumask_any_but() to find one correct active cpu to fixes this issue. Fixes: 8404b0fbc7fb ("drivers/perf: hisi: Add driver for HiSilicon PCIe PMU") Signed-off-by: Junhao He Reviewed-by: Jonathan Cameron Reviewed-by: Yicong Yang Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20230608114326.27649-1-hejunhao3@huawei.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_pcie_pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/perf') diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c index 6fee0b6e163b..e10fc7cb9493 100644 --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c @@ -683,7 +683,7 @@ static int hisi_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) pcie_pmu->on_cpu = -1; /* Choose a new CPU from all online cpus. */ - target = cpumask_first(cpu_online_mask); + target = cpumask_any_but(cpu_online_mask, cpu); if (target >= nr_cpu_ids) { pci_err(pcie_pmu->pdev, "There is no CPU to set\n"); return 0; -- cgit v1.2.3 From 225d757012e0afa673d8c862e6fb39ed2f429b4d Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Thu, 8 Jun 2023 13:37:42 -0700 Subject: perf: arm_cspmu: Set irq affinitiy only if overflow interrupt is used Don't try to set irq affinity if PMU doesn't have an overflow interrupt. Fixes: e37dfd65731d ("perf: arm_cspmu: Add support for ARM CoreSight PMU driver") Signed-off-by: Ilkka Koskinen Link: https://lore.kernel.org/r/20230608203742.3503486-1-ilkka@os.amperecomputing.com Signed-off-by: Will Deacon --- drivers/perf/arm_cspmu/arm_cspmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/perf') diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c index a3f1c410b417..d0572162f063 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.c +++ b/drivers/perf/arm_cspmu/arm_cspmu.c @@ -1232,7 +1232,8 @@ static struct platform_driver arm_cspmu_driver = { static void arm_cspmu_set_active_cpu(int cpu, struct arm_cspmu *cspmu) { cpumask_set_cpu(cpu, &cspmu->active_cpu); - WARN_ON(irq_set_affinity(cspmu->irq, &cspmu->active_cpu)); + if (cspmu->irq) + WARN_ON(irq_set_affinity(cspmu->irq, &cspmu->active_cpu)); } static int arm_cspmu_cpu_online(unsigned int cpu, struct hlist_node *node) -- cgit v1.2.3 From 71e0cb32d5fc61468e83ed962379af71bba8237e Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 5 Jun 2023 18:01:31 +0100 Subject: perf/arm_cspmu: Fix event attribute type ARM_CSPMU_EVENT_ATTR() defines a struct perf_pmu_events_attr, so arm_cspmu_sysfs_event_show() should not be interpreting it as struct dev_ext_attribute. Fixes: e37dfd65731d ("perf: arm_cspmu: Add support for ARM CoreSight PMU driver") Reviewed-by: Suzuki K Poulose Reviewed-and-tested-by: Ilkka Koskinen Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/27c0804af64007b2400abbc40278f642ee6a0a29.1685983270.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm_cspmu/arm_cspmu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/perf') diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c index d0572162f063..e8bc8fc1fb9c 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.c +++ b/drivers/perf/arm_cspmu/arm_cspmu.c @@ -189,10 +189,10 @@ static inline bool use_64b_counter_reg(const struct arm_cspmu *cspmu) ssize_t arm_cspmu_sysfs_event_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct dev_ext_attribute *eattr = - container_of(attr, struct dev_ext_attribute, attr); - return sysfs_emit(buf, "event=0x%llx\n", - (unsigned long long)eattr->var); + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, typeof(*pmu_attr), attr); + return sysfs_emit(buf, "event=0x%llx\n", pmu_attr->id); } EXPORT_SYMBOL_GPL(arm_cspmu_sysfs_event_show); -- cgit v1.2.3 From f9bd34e3753ea8f1433a3ba70f03a165a1416f98 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 5 Jun 2023 18:01:33 +0100 Subject: perf/arm_cspmu: Clean up ACPI dependency Build-wise, the ACPI dependency consists of only a couple of things which could probably stand being factored out into ACPI helpers anyway. However for the immediate concern of working towards Devicetree support here, it's easy enough to make a few tweaks to contain the affected code locally, such that we can relax the Kconfig dependency. Reviewed-and-Tested-by: Suzuki K Poulose Reviewed-by: Ilkka Koskinen Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/9d126711c7498b199b3e6f5cf48ca60ffb9df54c.1685983270.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm_cspmu/Kconfig | 3 +-- drivers/perf/arm_cspmu/arm_cspmu.c | 17 +++++++++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) (limited to 'drivers/perf') diff --git a/drivers/perf/arm_cspmu/Kconfig b/drivers/perf/arm_cspmu/Kconfig index 0b316fe69a45..25d25ded0983 100644 --- a/drivers/perf/arm_cspmu/Kconfig +++ b/drivers/perf/arm_cspmu/Kconfig @@ -4,8 +4,7 @@ config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU tristate "ARM Coresight Architecture PMU" - depends on ARM64 && ACPI - depends on ACPI_APMT || COMPILE_TEST + depends on ARM64 || COMPILE_TEST help Provides support for performance monitoring unit (PMU) devices based on ARM CoreSight PMU architecture. Note that this PMU diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c index e8bc8fc1fb9c..42abe433b7a4 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.c +++ b/drivers/perf/arm_cspmu/arm_cspmu.c @@ -28,7 +28,6 @@ #include #include #include -#include #include "arm_cspmu.h" #include "nvidia_cspmu.h" @@ -1075,6 +1074,9 @@ static int arm_cspmu_request_irq(struct arm_cspmu *cspmu) return 0; } +#if defined(CONFIG_ACPI) && defined(CONFIG_ARM64) +#include + static inline int arm_cspmu_find_cpu_container(int cpu, u32 container_uid) { u32 acpi_uid; @@ -1099,7 +1101,7 @@ static inline int arm_cspmu_find_cpu_container(int cpu, u32 container_uid) return -ENODEV; } -static int arm_cspmu_get_cpus(struct arm_cspmu *cspmu) +static int arm_cspmu_acpi_get_cpus(struct arm_cspmu *cspmu) { struct device *dev; struct acpi_apmt_node *apmt_node; @@ -1135,6 +1137,17 @@ static int arm_cspmu_get_cpus(struct arm_cspmu *cspmu) return 0; } +#else +static int arm_cspmu_acpi_get_cpus(struct arm_cspmu *cspmu) +{ + return -ENODEV; +} +#endif + +static int arm_cspmu_get_cpus(struct arm_cspmu *cspmu) +{ + return arm_cspmu_acpi_get_cpus(cspmu); +} static int arm_cspmu_register_pmu(struct arm_cspmu *cspmu) { -- cgit v1.2.3 From d2e3bb51281875be23cb4726a59b03d0a53eb0d3 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 5 Jun 2023 18:01:34 +0100 Subject: perf/arm_cspmu: Decouple APMT dependency The functional paths of the driver need not care about ACPI, so abstract the property of atomic doubleword access as its own flag (repacking the structure for a better fit). We also do not need to go poking directly at the APMT for standard resources which the ACPI layer has already dealt with, so deal with the optional MMIO page and interrupt in the normal firmware-agnostic manner. The few remaining portions of probing that *are* APMT-specific can still easily retrieve the APMT pointer as needed without us having to carry a duplicate copy around everywhere. Reviewed-by: Suzuki K Poulose Signed-off-by: Robin Murphy Reviewed-and-tested-by: Ilkka Koskinen Link: https://lore.kernel.org/r/88f97268603e1aa6016d178982a1dc2861f6770d.1685983270.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm_cspmu/arm_cspmu.c | 54 ++++++++++++-------------------------- drivers/perf/arm_cspmu/arm_cspmu.h | 5 ++-- 2 files changed, 19 insertions(+), 40 deletions(-) (limited to 'drivers/perf') diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c index 42abe433b7a4..44a9f7e25fb2 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.c +++ b/drivers/perf/arm_cspmu/arm_cspmu.c @@ -100,10 +100,6 @@ #define ARM_CSPMU_ACTIVE_CPU_MASK 0x0 #define ARM_CSPMU_ASSOCIATED_CPU_MASK 0x1 -/* Check if field f in flags is set with value v */ -#define CHECK_APMT_FLAG(flags, f, v) \ - ((flags & (ACPI_APMT_FLAGS_ ## f)) == (ACPI_APMT_FLAGS_ ## f ## _ ## v)) - /* Check and use default if implementer doesn't provide attribute callback */ #define CHECK_DEFAULT_IMPL_OPS(ops, callback) \ do { \ @@ -121,6 +117,11 @@ static unsigned long arm_cspmu_cpuhp_state; +static struct acpi_apmt_node *arm_cspmu_apmt_node(struct device *dev) +{ + return *(struct acpi_apmt_node **)dev_get_platdata(dev); +} + /* * In CoreSight PMU architecture, all of the MMIO registers are 32-bit except * counter register. The counter register can be implemented as 32-bit or 64-bit @@ -155,12 +156,6 @@ static u64 read_reg64_hilohi(const void __iomem *addr, u32 max_poll_count) return val; } -/* Check if PMU supports 64-bit single copy atomic. */ -static inline bool supports_64bit_atomics(const struct arm_cspmu *cspmu) -{ - return CHECK_APMT_FLAG(cspmu->apmt_node->flags, ATOMIC, SUPP); -} - /* Check if cycle counter is supported. */ static inline bool supports_cycle_counter(const struct arm_cspmu *cspmu) { @@ -319,7 +314,7 @@ static const char *arm_cspmu_get_name(const struct arm_cspmu *cspmu) static atomic_t pmu_idx[ACPI_APMT_NODE_TYPE_COUNT] = { 0 }; dev = cspmu->dev; - apmt_node = cspmu->apmt_node; + apmt_node = arm_cspmu_apmt_node(dev); pmu_type = apmt_node->type; if (pmu_type >= ACPI_APMT_NODE_TYPE_COUNT) { @@ -396,8 +391,8 @@ static const struct impl_match impl_match[] = { static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu) { int ret; - struct acpi_apmt_node *apmt_node = cspmu->apmt_node; struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops; + struct acpi_apmt_node *apmt_node = arm_cspmu_apmt_node(cspmu->dev); const struct impl_match *match = impl_match; /* @@ -719,7 +714,7 @@ static u64 arm_cspmu_read_counter(struct perf_event *event) offset = counter_offset(sizeof(u64), event->hw.idx); counter_addr = cspmu->base1 + offset; - return supports_64bit_atomics(cspmu) ? + return cspmu->has_atomic_dword ? readq(counter_addr) : read_reg64_hilohi(counter_addr, HILOHI_MAX_POLL); } @@ -910,24 +905,18 @@ static struct arm_cspmu *arm_cspmu_alloc(struct platform_device *pdev) { struct acpi_apmt_node *apmt_node; struct arm_cspmu *cspmu; - struct device *dev; - - dev = &pdev->dev; - apmt_node = *(struct acpi_apmt_node **)dev_get_platdata(dev); - if (!apmt_node) { - dev_err(dev, "failed to get APMT node\n"); - return NULL; - } + struct device *dev = &pdev->dev; cspmu = devm_kzalloc(dev, sizeof(*cspmu), GFP_KERNEL); if (!cspmu) return NULL; cspmu->dev = dev; - cspmu->apmt_node = apmt_node; - platform_set_drvdata(pdev, cspmu); + apmt_node = arm_cspmu_apmt_node(dev); + cspmu->has_atomic_dword = apmt_node->flags & ACPI_APMT_FLAGS_ATOMIC; + return cspmu; } @@ -935,11 +924,9 @@ static int arm_cspmu_init_mmio(struct arm_cspmu *cspmu) { struct device *dev; struct platform_device *pdev; - struct acpi_apmt_node *apmt_node; dev = cspmu->dev; pdev = to_platform_device(dev); - apmt_node = cspmu->apmt_node; /* Base address for page 0. */ cspmu->base0 = devm_platform_ioremap_resource(pdev, 0); @@ -950,7 +937,7 @@ static int arm_cspmu_init_mmio(struct arm_cspmu *cspmu) /* Base address for page 1 if supported. Otherwise point to page 0. */ cspmu->base1 = cspmu->base0; - if (CHECK_APMT_FLAG(apmt_node->flags, DUAL_PAGE, SUPP)) { + if (platform_get_resource(pdev, IORESOURCE_MEM, 1)) { cspmu->base1 = devm_platform_ioremap_resource(pdev, 1); if (IS_ERR(cspmu->base1)) { dev_err(dev, "ioremap failed for page-1 resource\n"); @@ -1047,19 +1034,14 @@ static int arm_cspmu_request_irq(struct arm_cspmu *cspmu) int irq, ret; struct device *dev; struct platform_device *pdev; - struct acpi_apmt_node *apmt_node; dev = cspmu->dev; pdev = to_platform_device(dev); - apmt_node = cspmu->apmt_node; /* Skip IRQ request if the PMU does not support overflow interrupt. */ - if (apmt_node->ovflw_irq == 0) - return 0; - - irq = platform_get_irq(pdev, 0); + irq = platform_get_irq_optional(pdev, 0); if (irq < 0) - return irq; + return irq == -ENXIO ? 0 : irq; ret = devm_request_irq(dev, irq, arm_cspmu_handle_irq, IRQF_NOBALANCING | IRQF_NO_THREAD, dev_name(dev), @@ -1103,13 +1085,11 @@ static inline int arm_cspmu_find_cpu_container(int cpu, u32 container_uid) static int arm_cspmu_acpi_get_cpus(struct arm_cspmu *cspmu) { - struct device *dev; struct acpi_apmt_node *apmt_node; int affinity_flag; int cpu; - dev = cspmu->pmu.dev; - apmt_node = cspmu->apmt_node; + apmt_node = arm_cspmu_apmt_node(cspmu->dev); affinity_flag = apmt_node->flags & ACPI_APMT_FLAGS_AFFINITY; if (affinity_flag == ACPI_APMT_FLAGS_AFFINITY_PROC) { @@ -1131,7 +1111,7 @@ static int arm_cspmu_acpi_get_cpus(struct arm_cspmu *cspmu) } if (cpumask_empty(&cspmu->associated_cpus)) { - dev_dbg(dev, "No cpu associated with the PMU\n"); + dev_dbg(cspmu->dev, "No cpu associated with the PMU\n"); return -ENODEV; } diff --git a/drivers/perf/arm_cspmu/arm_cspmu.h b/drivers/perf/arm_cspmu/arm_cspmu.h index 51323b175a4a..83df53d1c132 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.h +++ b/drivers/perf/arm_cspmu/arm_cspmu.h @@ -8,7 +8,6 @@ #ifndef __ARM_CSPMU_H__ #define __ARM_CSPMU_H__ -#include #include #include #include @@ -118,16 +117,16 @@ struct arm_cspmu_impl { struct arm_cspmu { struct pmu pmu; struct device *dev; - struct acpi_apmt_node *apmt_node; const char *name; const char *identifier; void __iomem *base0; void __iomem *base1; - int irq; cpumask_t associated_cpus; cpumask_t active_cpu; struct hlist_node cpuhp_node; + int irq; + bool has_atomic_dword; u32 pmcfgr; u32 num_logical_ctrs; u32 num_set_clr_reg; -- cgit v1.2.3 From 55691f99d417084305e575c477f06556f93dfb0b Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Tue, 18 Apr 2023 18:29:08 +0800 Subject: drivers/perf: imx_ddr: Add support for NXP i.MX9 SoC DDRC PMU driver Add ddr performance monitor support for i.MX93. There are 11 counters for ddr performance events. - Counter 0 is a 64-bit counter that counts only clock cycles. - Counter 1-10 are 32-bit counters that can monitor counter-specific events in addition to counting reference events. For example: perf stat -a -e imx9_ddr0/ddrc_pm_1,counter=1/,imx9_ddr0/ddrc_pm_2,counter=2/ ls Besides, this ddr pmu support AXI filter capability. It's implemented as counter-specific events. It now supports read transaction, write transaction and read beat events which corresponding respecitively to counter 2, 3 and 4. axi_mask and axi_id need to be as event parameters. For example: perf stat -a -I 1000 -e imx9_ddr0/eddrtq_pm_rd_trans_filt,counter=2,axi_mask=ID_MASK,axi_id=ID/ perf stat -a -I 1000 -e imx9_ddr0/eddrtq_pm_wr_trans_filt,counter=3,axi_mask=ID_MASK,axi_id=ID/ perf stat -a -I 1000 -e imx9_ddr0/eddrtq_pm_rd_beat_filt,counter=4,axi_mask=ID_MASK,axi_id=ID/ Signed-off-by: Xu Yang Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20230418102910.2065651-1-xu.yang_2@nxp.com [will: Remove redundant error message on platform_get_irq() failure] Signed-off-by: Will Deacon --- drivers/perf/Kconfig | 8 + drivers/perf/Makefile | 1 + drivers/perf/fsl_imx9_ddr_perf.c | 711 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 720 insertions(+) create mode 100644 drivers/perf/fsl_imx9_ddr_perf.c (limited to 'drivers/perf') diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index 711f82400086..4c07d71cab0b 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -127,6 +127,14 @@ config FSL_IMX8_DDR_PMU can give information about memory throughput and other related events. +config FSL_IMX9_DDR_PMU + tristate "Freescale i.MX9 DDR perf monitor" + depends on ARCH_MXC + help + Provides support for the DDR performance monitor in i.MX9, which + can give information about memory throughput and other related + events. + config QCOM_L2_PMU bool "Qualcomm Technologies L2-cache PMU" depends on ARCH_QCOM && ARM64 && ACPI diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index dabc859540ce..5cfe8954c250 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o obj-$(CONFIG_ARM_PMUV3) += arm_pmuv3.o obj-$(CONFIG_ARM_SMMU_V3_PMU) += arm_smmuv3_pmu.o obj-$(CONFIG_FSL_IMX8_DDR_PMU) += fsl_imx8_ddr_perf.o +obj-$(CONFIG_FSL_IMX9_DDR_PMU) += fsl_imx9_ddr_perf.o obj-$(CONFIG_HISI_PMU) += hisilicon/ obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o diff --git a/drivers/perf/fsl_imx9_ddr_perf.c b/drivers/perf/fsl_imx9_ddr_perf.c new file mode 100644 index 000000000000..71d5b07e3aff --- /dev/null +++ b/drivers/perf/fsl_imx9_ddr_perf.c @@ -0,0 +1,711 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright 2023 NXP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Performance monitor configuration */ +#define PMCFG1 0x00 +#define PMCFG1_RD_TRANS_FILT_EN BIT(31) +#define PMCFG1_WR_TRANS_FILT_EN BIT(30) +#define PMCFG1_RD_BT_FILT_EN BIT(29) +#define PMCFG1_ID_MASK GENMASK(17, 0) + +#define PMCFG2 0x04 +#define PMCFG2_ID GENMASK(17, 0) + +/* Global control register affects all counters and takes priority over local control registers */ +#define PMGC0 0x40 +/* Global control register bits */ +#define PMGC0_FAC BIT(31) +#define PMGC0_PMIE BIT(30) +#define PMGC0_FCECE BIT(29) + +/* + * 64bit counter0 exclusively dedicated to counting cycles + * 32bit counters monitor counter-specific events in addition to counting reference events + */ +#define PMLCA(n) (0x40 + 0x10 + (0x10 * n)) +#define PMLCB(n) (0x40 + 0x14 + (0x10 * n)) +#define PMC(n) (0x40 + 0x18 + (0x10 * n)) +/* Local control register bits */ +#define PMLCA_FC BIT(31) +#define PMLCA_CE BIT(26) +#define PMLCA_EVENT GENMASK(22, 16) + +#define NUM_COUNTERS 11 +#define CYCLES_COUNTER 0 + +#define to_ddr_pmu(p) container_of(p, struct ddr_pmu, pmu) + +#define DDR_PERF_DEV_NAME "imx9_ddr" +#define DDR_CPUHP_CB_NAME DDR_PERF_DEV_NAME "_perf_pmu" + +static DEFINE_IDA(ddr_ida); + +struct imx_ddr_devtype_data { + const char *identifier; /* system PMU identifier for userspace */ +}; + +struct ddr_pmu { + struct pmu pmu; + void __iomem *base; + unsigned int cpu; + struct hlist_node node; + struct device *dev; + struct perf_event *events[NUM_COUNTERS]; + int active_events; + enum cpuhp_state cpuhp_state; + const struct imx_ddr_devtype_data *devtype_data; + int irq; + int id; +}; + +static const struct imx_ddr_devtype_data imx93_devtype_data = { + .identifier = "imx93", +}; + +static const struct of_device_id imx_ddr_pmu_dt_ids[] = { + {.compatible = "fsl,imx93-ddr-pmu", .data = &imx93_devtype_data}, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, imx_ddr_pmu_dt_ids); + +static ssize_t ddr_perf_identifier_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + struct ddr_pmu *pmu = dev_get_drvdata(dev); + + return sysfs_emit(page, "%s\n", pmu->devtype_data->identifier); +} + +static struct device_attribute ddr_perf_identifier_attr = + __ATTR(identifier, 0444, ddr_perf_identifier_show, NULL); + +static struct attribute *ddr_perf_identifier_attrs[] = { + &ddr_perf_identifier_attr.attr, + NULL, +}; + +static struct attribute_group ddr_perf_identifier_attr_group = { + .attrs = ddr_perf_identifier_attrs, +}; + +static ssize_t ddr_perf_cpumask_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ddr_pmu *pmu = dev_get_drvdata(dev); + + return cpumap_print_to_pagebuf(true, buf, cpumask_of(pmu->cpu)); +} + +static struct device_attribute ddr_perf_cpumask_attr = + __ATTR(cpumask, 0444, ddr_perf_cpumask_show, NULL); + +static struct attribute *ddr_perf_cpumask_attrs[] = { + &ddr_perf_cpumask_attr.attr, + NULL, +}; + +static const struct attribute_group ddr_perf_cpumask_attr_group = { + .attrs = ddr_perf_cpumask_attrs, +}; + +static ssize_t ddr_pmu_event_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); + return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id); +} + +#define IMX9_DDR_PMU_EVENT_ATTR(_name, _id) \ + (&((struct perf_pmu_events_attr[]) { \ + { .attr = __ATTR(_name, 0444, ddr_pmu_event_show, NULL),\ + .id = _id, } \ + })[0].attr.attr) + +static struct attribute *ddr_perf_events_attrs[] = { + /* counter0 cycles event */ + IMX9_DDR_PMU_EVENT_ATTR(cycles, 0), + + /* reference events for all normal counters, need assert DEBUG19[21] bit */ + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ddrc1_rmw_for_ecc, 12), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_rreorder, 13), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_wreorder, 14), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_0, 15), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_1, 16), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_2, 17), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_3, 18), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_4, 19), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_5, 22), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_6, 23), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_7, 24), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_8, 25), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_9, 26), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_10, 27), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_11, 28), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_12, 31), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_13, 59), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_15, 61), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_29, 63), + + /* counter1 specific events */ + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_0, 64), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_1, 65), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_2, 66), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_3, 67), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_4, 68), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_5, 69), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_6, 70), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_7, 71), + + /* counter2 specific events */ + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_0, 64), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_1, 65), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_2, 66), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_3, 67), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_4, 68), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_5, 69), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_6, 70), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_7, 71), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_empty, 72), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_trans_filt, 73), + + /* counter3 specific events */ + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_0, 64), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_1, 65), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_2, 66), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_3, 67), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_4, 68), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_5, 69), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_6, 70), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_7, 71), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_full, 72), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pm_wr_trans_filt, 73), + + /* counter4 specific events */ + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_0, 64), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_1, 65), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_2, 66), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_3, 67), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_4, 68), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_5, 69), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_6, 70), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_7, 71), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq2_rmw, 72), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_beat_filt, 73), + + /* counter5 specific events */ + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_0, 64), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_1, 65), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_2, 66), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_3, 67), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_4, 68), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_5, 69), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_6, 70), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_7, 71), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq1, 72), + + /* counter6 specific events */ + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_end_0, 64), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq2, 72), + + /* counter7 specific events */ + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_1_2_full, 64), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_wrq0, 65), + + /* counter8 specific events */ + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_bias_switched, 64), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_1_4_full, 65), + + /* counter9 specific events */ + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_wrq1, 65), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_3_4_full, 66), + + /* counter10 specific events */ + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_misc_mrk, 65), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq0, 66), + NULL, +}; + +static const struct attribute_group ddr_perf_events_attr_group = { + .name = "events", + .attrs = ddr_perf_events_attrs, +}; + +PMU_FORMAT_ATTR(event, "config:0-7"); +PMU_FORMAT_ATTR(counter, "config:8-15"); +PMU_FORMAT_ATTR(axi_id, "config1:0-17"); +PMU_FORMAT_ATTR(axi_mask, "config2:0-17"); + +static struct attribute *ddr_perf_format_attrs[] = { + &format_attr_event.attr, + &format_attr_counter.attr, + &format_attr_axi_id.attr, + &format_attr_axi_mask.attr, + NULL, +}; + +static const struct attribute_group ddr_perf_format_attr_group = { + .name = "format", + .attrs = ddr_perf_format_attrs, +}; + +static const struct attribute_group *attr_groups[] = { + &ddr_perf_identifier_attr_group, + &ddr_perf_cpumask_attr_group, + &ddr_perf_events_attr_group, + &ddr_perf_format_attr_group, + NULL, +}; + +static void ddr_perf_clear_counter(struct ddr_pmu *pmu, int counter) +{ + if (counter == CYCLES_COUNTER) { + writel(0, pmu->base + PMC(counter) + 0x4); + writel(0, pmu->base + PMC(counter)); + } else { + writel(0, pmu->base + PMC(counter)); + } +} + +static u64 ddr_perf_read_counter(struct ddr_pmu *pmu, int counter) +{ + u32 val_lower, val_upper; + u64 val; + + if (counter != CYCLES_COUNTER) { + val = readl_relaxed(pmu->base + PMC(counter)); + goto out; + } + + /* special handling for reading 64bit cycle counter */ + do { + val_upper = readl_relaxed(pmu->base + PMC(counter) + 0x4); + val_lower = readl_relaxed(pmu->base + PMC(counter)); + } while (val_upper != readl_relaxed(pmu->base + PMC(counter) + 0x4)); + + val = val_upper; + val = (val << 32); + val |= val_lower; +out: + return val; +} + +static void ddr_perf_counter_global_config(struct ddr_pmu *pmu, bool enable) +{ + u32 ctrl; + + ctrl = readl_relaxed(pmu->base + PMGC0); + + if (enable) { + /* + * The performance monitor must be reset before event counting + * sequences. The performance monitor can be reset by first freezing + * one or more counters and then clearing the freeze condition to + * allow the counters to count according to the settings in the + * performance monitor registers. Counters can be frozen individually + * by setting PMLCAn[FC] bits, or simultaneously by setting PMGC0[FAC]. + * Simply clearing these freeze bits will then allow the performance + * monitor to begin counting based on the register settings. + */ + ctrl |= PMGC0_FAC; + writel(ctrl, pmu->base + PMGC0); + + /* + * Freeze all counters disabled, interrupt enabled, and freeze + * counters on condition enabled. + */ + ctrl &= ~PMGC0_FAC; + ctrl |= PMGC0_PMIE | PMGC0_FCECE; + writel(ctrl, pmu->base + PMGC0); + } else { + ctrl |= PMGC0_FAC; + ctrl &= ~(PMGC0_PMIE | PMGC0_FCECE); + writel(ctrl, pmu->base + PMGC0); + } +} + +static void ddr_perf_counter_local_config(struct ddr_pmu *pmu, int config, + int counter, bool enable) +{ + u32 ctrl_a; + + ctrl_a = readl_relaxed(pmu->base + PMLCA(counter)); + + if (enable) { + ctrl_a |= PMLCA_FC; + writel(ctrl_a, pmu->base + PMLCA(counter)); + + ddr_perf_clear_counter(pmu, counter); + + /* Freeze counter disabled, condition enabled, and program event.*/ + ctrl_a &= ~PMLCA_FC; + ctrl_a |= PMLCA_CE; + ctrl_a &= ~FIELD_PREP(PMLCA_EVENT, 0x7F); + ctrl_a |= FIELD_PREP(PMLCA_EVENT, (config & 0x000000FF)); + writel(ctrl_a, pmu->base + PMLCA(counter)); + } else { + /* Freeze counter. */ + ctrl_a |= PMLCA_FC; + writel(ctrl_a, pmu->base + PMLCA(counter)); + } +} + +static void ddr_perf_monitor_config(struct ddr_pmu *pmu, int cfg, int cfg1, int cfg2) +{ + u32 pmcfg1, pmcfg2; + int event, counter; + + event = cfg & 0x000000FF; + counter = (cfg & 0x0000FF00) >> 8; + + pmcfg1 = readl_relaxed(pmu->base + PMCFG1); + + if (counter == 2 && event == 73) + pmcfg1 |= PMCFG1_RD_TRANS_FILT_EN; + else if (counter == 2 && event != 73) + pmcfg1 &= ~PMCFG1_RD_TRANS_FILT_EN; + + if (counter == 3 && event == 73) + pmcfg1 |= PMCFG1_WR_TRANS_FILT_EN; + else if (counter == 3 && event != 73) + pmcfg1 &= ~PMCFG1_WR_TRANS_FILT_EN; + + if (counter == 4 && event == 73) + pmcfg1 |= PMCFG1_RD_BT_FILT_EN; + else if (counter == 4 && event != 73) + pmcfg1 &= ~PMCFG1_RD_BT_FILT_EN; + + pmcfg1 &= ~FIELD_PREP(PMCFG1_ID_MASK, 0x3FFFF); + pmcfg1 |= FIELD_PREP(PMCFG1_ID_MASK, cfg2); + writel(pmcfg1, pmu->base + PMCFG1); + + pmcfg2 = readl_relaxed(pmu->base + PMCFG2); + pmcfg2 &= ~FIELD_PREP(PMCFG2_ID, 0x3FFFF); + pmcfg2 |= FIELD_PREP(PMCFG2_ID, cfg1); + writel(pmcfg2, pmu->base + PMCFG2); +} + +static void ddr_perf_event_update(struct perf_event *event) +{ + struct ddr_pmu *pmu = to_ddr_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + u64 new_raw_count; + + new_raw_count = ddr_perf_read_counter(pmu, counter); + local64_add(new_raw_count, &event->count); + + /* clear counter's value every time */ + ddr_perf_clear_counter(pmu, counter); +} + +static int ddr_perf_event_init(struct perf_event *event) +{ + struct ddr_pmu *pmu = to_ddr_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + struct perf_event *sibling; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) + return -EOPNOTSUPP; + + if (event->cpu < 0) { + dev_warn(pmu->dev, "Can't provide per-task data!\n"); + return -EOPNOTSUPP; + } + + /* + * We must NOT create groups containing mixed PMUs, although software + * events are acceptable (for example to create a CCN group + * periodically read when a hrtimer aka cpu-clock leader triggers). + */ + if (event->group_leader->pmu != event->pmu && + !is_software_event(event->group_leader)) + return -EINVAL; + + for_each_sibling_event(sibling, event->group_leader) { + if (sibling->pmu != event->pmu && + !is_software_event(sibling)) + return -EINVAL; + } + + event->cpu = pmu->cpu; + hwc->idx = -1; + + return 0; +} + +static void ddr_perf_event_start(struct perf_event *event, int flags) +{ + struct ddr_pmu *pmu = to_ddr_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + + local64_set(&hwc->prev_count, 0); + + ddr_perf_counter_local_config(pmu, event->attr.config, counter, true); + hwc->state = 0; +} + +static int ddr_perf_event_add(struct perf_event *event, int flags) +{ + struct ddr_pmu *pmu = to_ddr_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int cfg = event->attr.config; + int cfg1 = event->attr.config1; + int cfg2 = event->attr.config2; + int counter; + + counter = (cfg & 0x0000FF00) >> 8; + + pmu->events[counter] = event; + pmu->active_events++; + hwc->idx = counter; + hwc->state |= PERF_HES_STOPPED; + + if (flags & PERF_EF_START) + ddr_perf_event_start(event, flags); + + /* read trans, write trans, read beat */ + ddr_perf_monitor_config(pmu, cfg, cfg1, cfg2); + + return 0; +} + +static void ddr_perf_event_stop(struct perf_event *event, int flags) +{ + struct ddr_pmu *pmu = to_ddr_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + + ddr_perf_counter_local_config(pmu, event->attr.config, counter, false); + ddr_perf_event_update(event); + + hwc->state |= PERF_HES_STOPPED; +} + +static void ddr_perf_event_del(struct perf_event *event, int flags) +{ + struct ddr_pmu *pmu = to_ddr_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + ddr_perf_event_stop(event, PERF_EF_UPDATE); + + pmu->active_events--; + hwc->idx = -1; +} + +static void ddr_perf_pmu_enable(struct pmu *pmu) +{ + struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu); + + ddr_perf_counter_global_config(ddr_pmu, true); +} + +static void ddr_perf_pmu_disable(struct pmu *pmu) +{ + struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu); + + ddr_perf_counter_global_config(ddr_pmu, false); +} + +static void ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base, + struct device *dev) +{ + *pmu = (struct ddr_pmu) { + .pmu = (struct pmu) { + .module = THIS_MODULE, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + .task_ctx_nr = perf_invalid_context, + .attr_groups = attr_groups, + .event_init = ddr_perf_event_init, + .add = ddr_perf_event_add, + .del = ddr_perf_event_del, + .start = ddr_perf_event_start, + .stop = ddr_perf_event_stop, + .read = ddr_perf_event_update, + .pmu_enable = ddr_perf_pmu_enable, + .pmu_disable = ddr_perf_pmu_disable, + }, + .base = base, + .dev = dev, + }; +} + +static irqreturn_t ddr_perf_irq_handler(int irq, void *p) +{ + struct ddr_pmu *pmu = (struct ddr_pmu *)p; + struct perf_event *event; + int i; + + /* + * Counters can generate an interrupt on an overflow when msb of a + * counter changes from 0 to 1. For the interrupt to be signalled, + * below condition mush be satisfied: + * PMGC0[PMIE] = 1, PMGC0[FCECE] = 1, PMLCAn[CE] = 1 + * When an interrupt is signalled, PMGC0[FAC] is set by hardware and + * all of the registers are frozen. + * Software can clear the interrupt condition by resetting the performance + * monitor and clearing the most significant bit of the counter that + * generate the overflow. + */ + for (i = 0; i < NUM_COUNTERS; i++) { + if (!pmu->events[i]) + continue; + + event = pmu->events[i]; + + ddr_perf_event_update(event); + } + + ddr_perf_counter_global_config(pmu, true); + + return IRQ_HANDLED; +} + +static int ddr_perf_offline_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct ddr_pmu *pmu = hlist_entry_safe(node, struct ddr_pmu, node); + int target; + + if (cpu != pmu->cpu) + return 0; + + target = cpumask_any_but(cpu_online_mask, cpu); + if (target >= nr_cpu_ids) + return 0; + + perf_pmu_migrate_context(&pmu->pmu, cpu, target); + pmu->cpu = target; + + WARN_ON(irq_set_affinity(pmu->irq, cpumask_of(pmu->cpu))); + + return 0; +} + +static int ddr_perf_probe(struct platform_device *pdev) +{ + struct ddr_pmu *pmu; + void __iomem *base; + int ret, irq; + char *name; + + base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(base)) + return PTR_ERR(base); + + pmu = devm_kzalloc(&pdev->dev, sizeof(*pmu), GFP_KERNEL); + if (!pmu) + return -ENOMEM; + + ddr_perf_init(pmu, base, &pdev->dev); + + pmu->devtype_data = of_device_get_match_data(&pdev->dev); + + platform_set_drvdata(pdev, pmu); + + pmu->id = ida_simple_get(&ddr_ida, 0, 0, GFP_KERNEL); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME "%d", pmu->id); + if (!name) { + ret = -ENOMEM; + goto format_string_err; + } + + pmu->cpu = raw_smp_processor_id(); + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, DDR_CPUHP_CB_NAME, + NULL, ddr_perf_offline_cpu); + if (ret < 0) { + dev_err(&pdev->dev, "Failed to add callbacks for multi state\n"); + goto cpuhp_state_err; + } + pmu->cpuhp_state = ret; + + /* Register the pmu instance for cpu hotplug */ + ret = cpuhp_state_add_instance_nocalls(pmu->cpuhp_state, &pmu->node); + if (ret) { + dev_err(&pdev->dev, "Error %d registering hotplug\n", ret); + goto cpuhp_instance_err; + } + + /* Request irq */ + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + ret = irq; + goto ddr_perf_err; + } + + ret = devm_request_irq(&pdev->dev, irq, ddr_perf_irq_handler, + IRQF_NOBALANCING | IRQF_NO_THREAD, + DDR_CPUHP_CB_NAME, pmu); + if (ret < 0) { + dev_err(&pdev->dev, "Request irq failed: %d", ret); + goto ddr_perf_err; + } + + pmu->irq = irq; + ret = irq_set_affinity(pmu->irq, cpumask_of(pmu->cpu)); + if (ret) { + dev_err(pmu->dev, "Failed to set interrupt affinity\n"); + goto ddr_perf_err; + } + + ret = perf_pmu_register(&pmu->pmu, name, -1); + if (ret) + goto ddr_perf_err; + + return 0; + +ddr_perf_err: + cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node); +cpuhp_instance_err: + cpuhp_remove_multi_state(pmu->cpuhp_state); +cpuhp_state_err: +format_string_err: + ida_simple_remove(&ddr_ida, pmu->id); + dev_warn(&pdev->dev, "i.MX9 DDR Perf PMU failed (%d), disabled\n", ret); + return ret; +} + +static int ddr_perf_remove(struct platform_device *pdev) +{ + struct ddr_pmu *pmu = platform_get_drvdata(pdev); + + cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node); + cpuhp_remove_multi_state(pmu->cpuhp_state); + + perf_pmu_unregister(&pmu->pmu); + + ida_simple_remove(&ddr_ida, pmu->id); + + return 0; +} + +static struct platform_driver imx_ddr_pmu_driver = { + .driver = { + .name = "imx9-ddr-pmu", + .of_match_table = imx_ddr_pmu_dt_ids, + .suppress_bind_attrs = true, + }, + .probe = ddr_perf_probe, + .remove = ddr_perf_remove, +}; +module_platform_driver(imx_ddr_pmu_driver); + +MODULE_AUTHOR("Xu Yang "); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("DDRC PerfMon for i.MX9 SoCs"); -- cgit v1.2.3 From 95f5819738a7500b91e99151a03eb05be478ee6a Mon Sep 17 00:00:00 2001 From: Xin Yang Date: Tue, 13 Jun 2023 01:34:23 +0000 Subject: perf/arm_dmc620: Add cpumask Add a cpumask for the DMC620 PMU. As it is an uncore PMU, perf userspace tool only needs to open a single counter on the CPU specified by the CPU mask for each event on a given DMC620 device. Signed-off-by: Xin Yang Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/20230613013423.2078397-1-xin.yang@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm_dmc620_pmu.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'drivers/perf') diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c index 5de06f9a4dd3..9d0f01c4455a 100644 --- a/drivers/perf/arm_dmc620_pmu.c +++ b/drivers/perf/arm_dmc620_pmu.c @@ -227,9 +227,31 @@ static const struct attribute_group dmc620_pmu_format_attr_group = { .attrs = dmc620_pmu_formats_attrs, }; +static ssize_t dmc620_pmu_cpumask_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dmc620_pmu *dmc620_pmu = to_dmc620_pmu(dev_get_drvdata(dev)); + + return cpumap_print_to_pagebuf(true, buf, + cpumask_of(dmc620_pmu->irq->cpu)); +} + +static struct device_attribute dmc620_pmu_cpumask_attr = + __ATTR(cpumask, 0444, dmc620_pmu_cpumask_show, NULL); + +static struct attribute *dmc620_pmu_cpumask_attrs[] = { + &dmc620_pmu_cpumask_attr.attr, + NULL, +}; + +static const struct attribute_group dmc620_pmu_cpumask_attr_group = { + .attrs = dmc620_pmu_cpumask_attrs, +}; + static const struct attribute_group *dmc620_pmu_attr_groups[] = { &dmc620_pmu_events_attr_group, &dmc620_pmu_format_attr_group, + &dmc620_pmu_cpumask_attr_group, NULL, }; -- cgit v1.2.3 From 7819e05a0dceac20c5ff78ec9b252faf3b76b824 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 12 Jun 2023 18:16:32 +0100 Subject: perf/arm-cmn: Revamp model detection CMN implements a set of CoreSight-format peripheral ID registers which in principle we should be able to use to identify the hardware. However so far we have avoided trying to use the part number field since the TRMs have all described it as "configuration dependent". It turns out, though, that this is a quirk of the documentation generation process, and in fact the part number should always be a stable well-defined field which we can trust. To that end, revamp our model detection to rely less on ACPI/DT, and pave the way towards further using the hardware information as an identifier for userspace jevent metrics. This includes renaming the revision constants to maximise readability. Signed-off-by: Robin Murphy Reviewed-and-tested-by: Ilkka Koskinen Link: https://lore.kernel.org/r/3c791eaae814b0126f9adbd5419bfb4a600dade7.1686588640.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 145 +++++++++++++++++++++++++++++++------------------ 1 file changed, 93 insertions(+), 52 deletions(-) (limited to 'drivers/perf') diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 89a685a09d84..4a1dc4b527f6 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -44,8 +44,11 @@ #define CMN_MAX_DTMS (CMN_MAX_XPS + (CMN_MAX_DIMENSION - 1) * 4) /* The CFG node has various info besides the discovery tree */ -#define CMN_CFGM_PERIPH_ID_2 0x0010 -#define CMN_CFGM_PID2_REVISION GENMASK(7, 4) +#define CMN_CFGM_PERIPH_ID_01 0x0008 +#define CMN_CFGM_PID0_PART_0 GENMASK_ULL(7, 0) +#define CMN_CFGM_PID1_PART_1 GENMASK_ULL(35, 32) +#define CMN_CFGM_PERIPH_ID_23 0x0010 +#define CMN_CFGM_PID2_REVISION GENMASK_ULL(7, 4) #define CMN_CFGM_INFO_GLOBAL 0x900 #define CMN_INFO_MULTIPLE_DTM_EN BIT_ULL(63) @@ -186,6 +189,7 @@ #define CMN_WP_DOWN 2 +/* Internal values for encoding event support */ enum cmn_model { CMN600 = 1, CMN650 = 2, @@ -197,26 +201,34 @@ enum cmn_model { CMN_650ON = CMN650 | CMN700, }; +/* Actual part numbers and revision IDs defined by the hardware */ +enum cmn_part { + PART_CMN600 = 0x434, + PART_CMN650 = 0x436, + PART_CMN700 = 0x43c, + PART_CI700 = 0x43a, +}; + /* CMN-600 r0px shouldn't exist in silicon, thankfully */ enum cmn_revision { - CMN600_R1P0, - CMN600_R1P1, - CMN600_R1P2, - CMN600_R1P3, - CMN600_R2P0, - CMN600_R3P0, - CMN600_R3P1, - CMN650_R0P0 = 0, - CMN650_R1P0, - CMN650_R1P1, - CMN650_R2P0, - CMN650_R1P2, - CMN700_R0P0 = 0, - CMN700_R1P0, - CMN700_R2P0, - CI700_R0P0 = 0, - CI700_R1P0, - CI700_R2P0, + REV_CMN600_R1P0, + REV_CMN600_R1P1, + REV_CMN600_R1P2, + REV_CMN600_R1P3, + REV_CMN600_R2P0, + REV_CMN600_R3P0, + REV_CMN600_R3P1, + REV_CMN650_R0P0 = 0, + REV_CMN650_R1P0, + REV_CMN650_R1P1, + REV_CMN650_R2P0, + REV_CMN650_R1P2, + REV_CMN700_R0P0 = 0, + REV_CMN700_R1P0, + REV_CMN700_R2P0, + REV_CI700_R0P0 = 0, + REV_CI700_R1P0, + REV_CI700_R2P0, }; enum cmn_node_type { @@ -306,7 +318,7 @@ struct arm_cmn { unsigned int state; enum cmn_revision rev; - enum cmn_model model; + enum cmn_part part; u8 mesh_x; u8 mesh_y; u16 num_xps; @@ -394,19 +406,35 @@ static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn, return NULL; } +static enum cmn_model arm_cmn_model(const struct arm_cmn *cmn) +{ + switch (cmn->part) { + case PART_CMN600: + return CMN600; + case PART_CMN650: + return CMN650; + case PART_CMN700: + return CMN700; + case PART_CI700: + return CI700; + default: + return 0; + }; +} + static u32 arm_cmn_device_connect_info(const struct arm_cmn *cmn, const struct arm_cmn_node *xp, int port) { int offset = CMN_MXP__CONNECT_INFO(port); if (port >= 2) { - if (cmn->model & (CMN600 | CMN650)) + if (cmn->part == PART_CMN600 || cmn->part == PART_CMN650) return 0; /* * CI-700 may have extra ports, but still has the * mesh_port_connect_info registers in the way. */ - if (cmn->model == CI700) + if (cmn->part == PART_CI700) offset += CI700_CONNECT_INFO_P2_5_OFFSET; } @@ -640,7 +668,7 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, eattr = container_of(attr, typeof(*eattr), attr.attr); - if (!(eattr->model & cmn->model)) + if (!(eattr->model & arm_cmn_model(cmn))) return 0; type = eattr->type; @@ -658,7 +686,7 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, if ((intf & 4) && !(cmn->ports_used & BIT(intf & 3))) return 0; - if (chan == 4 && cmn->model == CMN600) + if (chan == 4 && cmn->part == PART_CMN600) return 0; if ((chan == 5 && cmn->rsp_vc_num < 2) || @@ -669,19 +697,19 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, } /* Revision-specific differences */ - if (cmn->model == CMN600) { - if (cmn->rev < CMN600_R1P3) { + if (cmn->part == PART_CMN600) { + if (cmn->rev < REV_CMN600_R1P3) { if (type == CMN_TYPE_CXRA && eventid > 0x10) return 0; } - if (cmn->rev < CMN600_R1P2) { + if (cmn->rev < REV_CMN600_R1P2) { if (type == CMN_TYPE_HNF && eventid == 0x1b) return 0; if (type == CMN_TYPE_CXRA || type == CMN_TYPE_CXHA) return 0; } - } else if (cmn->model == CMN650) { - if (cmn->rev < CMN650_R2P0 || cmn->rev == CMN650_R1P2) { + } else if (cmn->part == PART_CMN650) { + if (cmn->rev < REV_CMN650_R2P0 || cmn->rev == REV_CMN650_R1P2) { if (type == CMN_TYPE_HNF && eventid > 0x22) return 0; if (type == CMN_TYPE_SBSX && eventid == 0x17) @@ -689,8 +717,8 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, if (type == CMN_TYPE_RNI && eventid > 0x10) return 0; } - } else if (cmn->model == CMN700) { - if (cmn->rev < CMN700_R2P0) { + } else if (cmn->part == PART_CMN700) { + if (cmn->rev < REV_CMN700_R2P0) { if (type == CMN_TYPE_HNF && eventid > 0x2c) return 0; if (type == CMN_TYPE_CCHA && eventid > 0x74) @@ -698,7 +726,7 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, if (type == CMN_TYPE_CCLA && eventid > 0x27) return 0; } - if (cmn->rev < CMN700_R1P0) { + if (cmn->rev < REV_CMN700_R1P0) { if (type == CMN_TYPE_HNF && eventid > 0x2b) return 0; } @@ -1200,7 +1228,7 @@ static u32 arm_cmn_wp_config(struct perf_event *event) u32 grp = CMN_EVENT_WP_GRP(event); u32 exc = CMN_EVENT_WP_EXCLUSIVE(event); u32 combine = CMN_EVENT_WP_COMBINE(event); - bool is_cmn600 = to_cmn(event->pmu)->model == CMN600; + bool is_cmn600 = to_cmn(event->pmu)->part == PART_CMN600; config = FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL, dev) | FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_CHN_SEL, chn) | @@ -1520,14 +1548,14 @@ done: return ret; } -static enum cmn_filter_select arm_cmn_filter_sel(enum cmn_model model, +static enum cmn_filter_select arm_cmn_filter_sel(const struct arm_cmn *cmn, enum cmn_node_type type, unsigned int eventid) { struct arm_cmn_event_attr *e; - int i; + enum cmn_model model = arm_cmn_model(cmn); - for (i = 0; i < ARRAY_SIZE(arm_cmn_event_attrs) - 1; i++) { + for (int i = 0; i < ARRAY_SIZE(arm_cmn_event_attrs) - 1; i++) { e = container_of(arm_cmn_event_attrs[i], typeof(*e), attr.attr); if (e->model & model && e->type == type && e->eventid == eventid) return e->fsel; @@ -1570,12 +1598,12 @@ static int arm_cmn_event_init(struct perf_event *event) /* ...but the DTM may depend on which port we're watching */ if (cmn->multi_dtm) hw->dtm_offset = CMN_EVENT_WP_DEV_SEL(event) / 2; - } else if (type == CMN_TYPE_XP && cmn->model == CMN700) { + } else if (type == CMN_TYPE_XP && cmn->part == PART_CMN700) { hw->wide_sel = true; } /* This is sufficiently annoying to recalculate, so cache it */ - hw->filter_sel = arm_cmn_filter_sel(cmn->model, type, eventid); + hw->filter_sel = arm_cmn_filter_sel(cmn, type, eventid); bynodeid = CMN_EVENT_BYNODEID(event); nodeid = CMN_EVENT_NODEID(event); @@ -2007,6 +2035,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) void __iomem *cfg_region; struct arm_cmn_node cfg, *dn; struct arm_cmn_dtm *dtm; + enum cmn_part part; u16 child_count, child_poff; u32 xp_offset[CMN_MAX_XPS]; u64 reg; @@ -2018,7 +2047,19 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) return -ENODEV; cfg_region = cmn->base + rgn_offset; - reg = readl_relaxed(cfg_region + CMN_CFGM_PERIPH_ID_2); + + reg = readq_relaxed(cfg_region + CMN_CFGM_PERIPH_ID_01); + part = FIELD_GET(CMN_CFGM_PID0_PART_0, reg); + part |= FIELD_GET(CMN_CFGM_PID1_PART_1, reg) << 8; + if (cmn->part && cmn->part != part) + dev_warn(cmn->dev, + "Firmware binding mismatch: expected part number 0x%x, found 0x%x\n", + cmn->part, part); + cmn->part = part; + if (!arm_cmn_model(cmn)) + dev_warn(cmn->dev, "Unknown part number: 0x%x\n", part); + + reg = readl_relaxed(cfg_region + CMN_CFGM_PERIPH_ID_23); cmn->rev = FIELD_GET(CMN_CFGM_PID2_REVISION, reg); reg = readq_relaxed(cfg_region + CMN_CFGM_INFO_GLOBAL); @@ -2082,7 +2123,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) if (xp->id == (1 << 3)) cmn->mesh_x = xp->logid; - if (cmn->model == CMN600) + if (cmn->part == PART_CMN600) xp->dtc = 0xf; else xp->dtc = 1 << readl_relaxed(xp_region + CMN_DTM_UNIT_INFO); @@ -2202,7 +2243,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) if (cmn->num_xps == 1) dev_warn(cmn->dev, "1x1 config not fully supported, translate XP events manually\n"); - dev_dbg(cmn->dev, "model %d, periph_id_2 revision %d\n", cmn->model, cmn->rev); + dev_dbg(cmn->dev, "periph_id part 0x%03x revision %d\n", cmn->part, cmn->rev); reg = cmn->ports_used; dev_dbg(cmn->dev, "mesh %dx%d, ID width %d, ports %6pbl%s\n", cmn->mesh_x, cmn->mesh_y, arm_cmn_xyidbits(cmn), ®, @@ -2257,17 +2298,17 @@ static int arm_cmn_probe(struct platform_device *pdev) return -ENOMEM; cmn->dev = &pdev->dev; - cmn->model = (unsigned long)device_get_match_data(cmn->dev); + cmn->part = (unsigned long)device_get_match_data(cmn->dev); platform_set_drvdata(pdev, cmn); - if (cmn->model == CMN600 && has_acpi_companion(cmn->dev)) { + if (cmn->part == PART_CMN600 && has_acpi_companion(cmn->dev)) { rootnode = arm_cmn600_acpi_probe(pdev, cmn); } else { rootnode = 0; cmn->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(cmn->base)) return PTR_ERR(cmn->base); - if (cmn->model == CMN600) + if (cmn->part == PART_CMN600) rootnode = arm_cmn600_of_probe(pdev->dev.of_node); } if (rootnode < 0) @@ -2336,10 +2377,10 @@ static int arm_cmn_remove(struct platform_device *pdev) #ifdef CONFIG_OF static const struct of_device_id arm_cmn_of_match[] = { - { .compatible = "arm,cmn-600", .data = (void *)CMN600 }, - { .compatible = "arm,cmn-650", .data = (void *)CMN650 }, - { .compatible = "arm,cmn-700", .data = (void *)CMN700 }, - { .compatible = "arm,ci-700", .data = (void *)CI700 }, + { .compatible = "arm,cmn-600", .data = (void *)PART_CMN600 }, + { .compatible = "arm,cmn-650" }, + { .compatible = "arm,cmn-700" }, + { .compatible = "arm,ci-700" }, {} }; MODULE_DEVICE_TABLE(of, arm_cmn_of_match); @@ -2347,9 +2388,9 @@ MODULE_DEVICE_TABLE(of, arm_cmn_of_match); #ifdef CONFIG_ACPI static const struct acpi_device_id arm_cmn_acpi_match[] = { - { "ARMHC600", CMN600 }, - { "ARMHC650", CMN650 }, - { "ARMHC700", CMN700 }, + { "ARMHC600", PART_CMN600 }, + { "ARMHC650" }, + { "ARMHC700" }, {} }; MODULE_DEVICE_TABLE(acpi, arm_cmn_acpi_match); -- cgit v1.2.3 From a1c45d3ebd30cf8b9b1131c1335d603f3c46999c Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 12 Jun 2023 18:16:33 +0100 Subject: perf/arm-cmn: Add sysfs identifier Expose a sysfs identifier encapsulating the CMN part number and revision so that jevents can narrow down a fundamental set of possible events for calculating metrics. Configuration-dependent aspects - such as whether a given node type is present, and/or a given node ID is valid - are still not covered, and in general it's hard to see how userspace could handle them, so we won't be removing any data or validation logic from the driver any time soon, but at least it's a step in a useful direction. Signed-off-by: Robin Murphy Reviewed-and-tested-by: Ilkka Koskinen Tested-by: Jing Zhang Link: https://lore.kernel.org/r/b8a14c14fcdf028939ebf57849863e8ae01743de.1686588640.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'drivers/perf') diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 4a1dc4b527f6..b8c15878bc86 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -1199,19 +1199,31 @@ static ssize_t arm_cmn_cpumask_show(struct device *dev, static struct device_attribute arm_cmn_cpumask_attr = __ATTR(cpumask, 0444, arm_cmn_cpumask_show, NULL); -static struct attribute *arm_cmn_cpumask_attrs[] = { +static ssize_t arm_cmn_identifier_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct arm_cmn *cmn = to_cmn(dev_get_drvdata(dev)); + + return sysfs_emit(buf, "%03x%02x\n", cmn->part, cmn->rev); +} + +static struct device_attribute arm_cmn_identifier_attr = + __ATTR(identifier, 0444, arm_cmn_identifier_show, NULL); + +static struct attribute *arm_cmn_other_attrs[] = { &arm_cmn_cpumask_attr.attr, + &arm_cmn_identifier_attr.attr, NULL, }; -static const struct attribute_group arm_cmn_cpumask_attr_group = { - .attrs = arm_cmn_cpumask_attrs, +static const struct attribute_group arm_cmn_other_attrs_group = { + .attrs = arm_cmn_other_attrs, }; static const struct attribute_group *arm_cmn_attr_groups[] = { &arm_cmn_event_attrs_group, &arm_cmn_format_attrs_group, - &arm_cmn_cpumask_attr_group, + &arm_cmn_other_attrs_group, NULL }; -- cgit v1.2.3 From 7e51d05e43f19f394b3b0df01a22225143f9c5b5 Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Thu, 15 Jun 2023 16:26:30 -0700 Subject: perf: arm_cspmu: Add missing MODULE_DEVICE_TABLE Add missing MODULE_DEVICE_TABLE definition to generate modalias, which enables module autoloading. Signed-off-by: Ilkka Koskinen Link: https://lore.kernel.org/r/20230615232630.304870-1-ilkka@os.amperecomputing.com Signed-off-by: Will Deacon --- drivers/perf/arm_cspmu/arm_cspmu.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/perf') diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c index 44a9f7e25fb2..e2b7827c4563 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.c +++ b/drivers/perf/arm_cspmu/arm_cspmu.c @@ -1213,6 +1213,12 @@ static int arm_cspmu_device_remove(struct platform_device *pdev) return 0; } +static const struct platform_device_id arm_cspmu_id[] = { + {DRVNAME, 0}, + { }, +}; +MODULE_DEVICE_TABLE(platform, arm_cspmu_id); + static struct platform_driver arm_cspmu_driver = { .driver = { .name = DRVNAME, @@ -1220,6 +1226,7 @@ static struct platform_driver arm_cspmu_driver = { }, .probe = arm_cspmu_device_probe, .remove = arm_cspmu_device_remove, + .id_table = arm_cspmu_id, }; static void arm_cspmu_set_active_cpu(int cpu, struct arm_cspmu *cspmu) -- cgit v1.2.3 From 1a51688474c0d395b864e98236335fba712e29bf Mon Sep 17 00:00:00 2001 From: Junhao He Date: Thu, 15 Jun 2023 20:59:24 +0800 Subject: drivers/perf: hisi: Add support for HiSilicon H60PA and PAv3 PMU driver Compared to the original PA device, H60PA offers higher bandwidth. The H60PA is a new device and we use HID to differentiate them. The events supported by PAv3 and PAv2 are different. The PAv3 PMU removed some events which are supported by PAv2 PMU. The older PA PMU driver will probe v3 as v2. Therefore PA events displayed by "perf list" cannot work properly. We add the HISI0275 HID for PAv3 PMU to distinguish different. For each H60PA PMU, except for the overflow interrupt register, other functions of the H60PA PMU are the same as the original PA PMU module. It has 8-programable counters and each counter is free-running. Interrupt is supported to handle counter (64-bits) overflow. Signed-off-by: Junhao He Reviewed-by: Jonathan Cameron Reviewed-by: Yicong Yang Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20230615125926.29832-2-hejunhao3@huawei.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 127 ++++++++++++++++++++++++---- drivers/perf/hisilicon/hisi_uncore_pmu.h | 8 ++ 2 files changed, 120 insertions(+), 15 deletions(-) (limited to 'drivers/perf') diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index 71b6687d6696..d941e746b424 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -22,9 +22,15 @@ #define PA_TT_CTRL 0x1c08 #define PA_TGTID_CTRL 0x1c14 #define PA_SRCID_CTRL 0x1c18 + +/* H32 PA interrupt registers */ #define PA_INT_MASK 0x1c70 #define PA_INT_STATUS 0x1c78 #define PA_INT_CLEAR 0x1c7c + +#define H60PA_INT_STATUS 0x1c70 +#define H60PA_INT_MASK 0x1c74 + #define PA_EVENT_TYPE0 0x1c80 #define PA_PMU_VERSION 0x1cf0 #define PA_EVENT_CNT0_L 0x1d00 @@ -46,6 +52,12 @@ HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_cmd, config1, 32, 22); HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_msk, config1, 43, 33); HISI_PMU_EVENT_ATTR_EXTRACTOR(tracetag_en, config1, 44, 44); +struct hisi_pa_pmu_int_regs { + u32 mask_offset; + u32 clear_offset; + u32 status_offset; +}; + static void hisi_pa_pmu_enable_tracetag(struct perf_event *event) { struct hisi_pmu *pa_pmu = to_hisi_pmu(event->pmu); @@ -219,40 +231,40 @@ static void hisi_pa_pmu_disable_counter(struct hisi_pmu *pa_pmu, static void hisi_pa_pmu_enable_counter_int(struct hisi_pmu *pa_pmu, struct hw_perf_event *hwc) { + struct hisi_pa_pmu_int_regs *regs = pa_pmu->dev_info->private; u32 val; /* Write 0 to enable interrupt */ - val = readl(pa_pmu->base + PA_INT_MASK); + val = readl(pa_pmu->base + regs->mask_offset); val &= ~(1 << hwc->idx); - writel(val, pa_pmu->base + PA_INT_MASK); + writel(val, pa_pmu->base + regs->mask_offset); } static void hisi_pa_pmu_disable_counter_int(struct hisi_pmu *pa_pmu, struct hw_perf_event *hwc) { + struct hisi_pa_pmu_int_regs *regs = pa_pmu->dev_info->private; u32 val; /* Write 1 to mask interrupt */ - val = readl(pa_pmu->base + PA_INT_MASK); + val = readl(pa_pmu->base + regs->mask_offset); val |= 1 << hwc->idx; - writel(val, pa_pmu->base + PA_INT_MASK); + writel(val, pa_pmu->base + regs->mask_offset); } static u32 hisi_pa_pmu_get_int_status(struct hisi_pmu *pa_pmu) { - return readl(pa_pmu->base + PA_INT_STATUS); + struct hisi_pa_pmu_int_regs *regs = pa_pmu->dev_info->private; + + return readl(pa_pmu->base + regs->status_offset); } static void hisi_pa_pmu_clear_int_status(struct hisi_pmu *pa_pmu, int idx) { - writel(1 << idx, pa_pmu->base + PA_INT_CLEAR); -} + struct hisi_pa_pmu_int_regs *regs = pa_pmu->dev_info->private; -static const struct acpi_device_id hisi_pa_pmu_acpi_match[] = { - { "HISI0273", }, - {} -}; -MODULE_DEVICE_TABLE(acpi, hisi_pa_pmu_acpi_match); + writel(1 << idx, pa_pmu->base + regs->clear_offset); +} static int hisi_pa_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *pa_pmu) @@ -276,6 +288,10 @@ static int hisi_pa_pmu_init_data(struct platform_device *pdev, pa_pmu->ccl_id = -1; pa_pmu->sccl_id = -1; + pa_pmu->dev_info = device_get_match_data(&pdev->dev); + if (!pa_pmu->dev_info) + return -ENODEV; + pa_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(pa_pmu->base)) { dev_err(&pdev->dev, "ioremap failed for pa_pmu resource.\n"); @@ -314,6 +330,32 @@ static const struct attribute_group hisi_pa_pmu_v2_events_group = { .attrs = hisi_pa_pmu_v2_events_attr, }; +static struct attribute *hisi_pa_pmu_v3_events_attr[] = { + HISI_PMU_EVENT_ATTR(tx_req, 0x0), + HISI_PMU_EVENT_ATTR(tx_dat, 0x1), + HISI_PMU_EVENT_ATTR(tx_snp, 0x2), + HISI_PMU_EVENT_ATTR(rx_req, 0x7), + HISI_PMU_EVENT_ATTR(rx_dat, 0x8), + HISI_PMU_EVENT_ATTR(rx_snp, 0x9), + NULL +}; + +static const struct attribute_group hisi_pa_pmu_v3_events_group = { + .name = "events", + .attrs = hisi_pa_pmu_v3_events_attr, +}; + +static struct attribute *hisi_h60pa_pmu_events_attr[] = { + HISI_PMU_EVENT_ATTR(rx_flit, 0x50), + HISI_PMU_EVENT_ATTR(tx_flit, 0x65), + NULL +}; + +static const struct attribute_group hisi_h60pa_pmu_events_group = { + .name = "events", + .attrs = hisi_h60pa_pmu_events_attr, +}; + static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); static struct attribute *hisi_pa_pmu_cpumask_attrs[] = { @@ -337,6 +379,12 @@ static const struct attribute_group hisi_pa_pmu_identifier_group = { .attrs = hisi_pa_pmu_identifier_attrs, }; +static struct hisi_pa_pmu_int_regs hisi_pa_pmu_regs = { + .mask_offset = PA_INT_MASK, + .clear_offset = PA_INT_CLEAR, + .status_offset = PA_INT_STATUS, +}; + static const struct attribute_group *hisi_pa_pmu_v2_attr_groups[] = { &hisi_pa_pmu_v2_format_group, &hisi_pa_pmu_v2_events_group, @@ -345,6 +393,46 @@ static const struct attribute_group *hisi_pa_pmu_v2_attr_groups[] = { NULL }; +static const struct hisi_pmu_dev_info hisi_h32pa_v2 = { + .name = "pa", + .attr_groups = hisi_pa_pmu_v2_attr_groups, + .private = &hisi_pa_pmu_regs, +}; + +static const struct attribute_group *hisi_pa_pmu_v3_attr_groups[] = { + &hisi_pa_pmu_v2_format_group, + &hisi_pa_pmu_v3_events_group, + &hisi_pa_pmu_cpumask_attr_group, + &hisi_pa_pmu_identifier_group, + NULL +}; + +static const struct hisi_pmu_dev_info hisi_h32pa_v3 = { + .name = "pa", + .attr_groups = hisi_pa_pmu_v3_attr_groups, + .private = &hisi_pa_pmu_regs, +}; + +static struct hisi_pa_pmu_int_regs hisi_h60pa_pmu_regs = { + .mask_offset = H60PA_INT_MASK, + .clear_offset = H60PA_INT_STATUS, /* Clear on write */ + .status_offset = H60PA_INT_STATUS, +}; + +static const struct attribute_group *hisi_h60pa_pmu_attr_groups[] = { + &hisi_pa_pmu_v2_format_group, + &hisi_h60pa_pmu_events_group, + &hisi_pa_pmu_cpumask_attr_group, + &hisi_pa_pmu_identifier_group, + NULL +}; + +static const struct hisi_pmu_dev_info hisi_h60pa = { + .name = "h60pa", + .attr_groups = hisi_h60pa_pmu_attr_groups, + .private = &hisi_h60pa_pmu_regs, +}; + static const struct hisi_uncore_ops hisi_uncore_pa_ops = { .write_evtype = hisi_pa_pmu_write_evtype, .get_event_idx = hisi_uncore_pmu_get_event_idx, @@ -375,7 +463,7 @@ static int hisi_pa_pmu_dev_probe(struct platform_device *pdev, if (ret) return ret; - pa_pmu->pmu_events.attr_groups = hisi_pa_pmu_v2_attr_groups; + pa_pmu->pmu_events.attr_groups = pa_pmu->dev_info->attr_groups; pa_pmu->num_counters = PA_NR_COUNTERS; pa_pmu->ops = &hisi_uncore_pa_ops; pa_pmu->check_event = 0xB0; @@ -400,8 +488,9 @@ static int hisi_pa_pmu_probe(struct platform_device *pdev) if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%u_pa%u", - pa_pmu->sicl_id, pa_pmu->index_id); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_%s%u", + pa_pmu->sicl_id, pa_pmu->dev_info->name, + pa_pmu->index_id); if (!name) return -ENOMEM; @@ -435,6 +524,14 @@ static int hisi_pa_pmu_remove(struct platform_device *pdev) return 0; } +static const struct acpi_device_id hisi_pa_pmu_acpi_match[] = { + { "HISI0273", (kernel_ulong_t)&hisi_h32pa_v2 }, + { "HISI0275", (kernel_ulong_t)&hisi_h32pa_v3 }, + { "HISI0274", (kernel_ulong_t)&hisi_h60pa }, + {} +}; +MODULE_DEVICE_TABLE(acpi, hisi_pa_pmu_acpi_match); + static struct platform_driver hisi_pa_pmu_driver = { .driver = { .name = "hisi_pa_pmu", diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 07890a8e96ca..772857b99dc5 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -62,6 +62,13 @@ struct hisi_uncore_ops { void (*disable_filter)(struct perf_event *event); }; +/* Describes the HISI PMU chip features information */ +struct hisi_pmu_dev_info { + const char *name; + const struct attribute_group **attr_groups; + void *private; +}; + struct hisi_pmu_hwevents { struct perf_event *hw_events[HISI_MAX_COUNTERS]; DECLARE_BITMAP(used_mask, HISI_MAX_COUNTERS); @@ -72,6 +79,7 @@ struct hisi_pmu_hwevents { struct hisi_pmu { struct pmu pmu; const struct hisi_uncore_ops *ops; + const struct hisi_pmu_dev_info *dev_info; struct hisi_pmu_hwevents pmu_events; /* associated_cpus: All CPUs associated with the PMU */ cpumask_t associated_cpus; -- cgit v1.2.3 From 312eca95e28d40694aee7c78a18ac0ecfd6d8159 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Thu, 15 Jun 2023 20:59:25 +0800 Subject: drivers/perf: hisi: Add support for HiSilicon UC PMU driver On HiSilicon Hip09 platform, there are 4 UC (unified cache) modules on each chip CCL (CPU Cluster). UC is a cache that provides coherence between NUMA and UMA domains. It is located between L2 and Memory System. Many PMU events are supported. Let's support the UC PMU driver using the HiSilicon uncore PMU framework. * rd_req_en : rd_req_en is the abbreviation of read request tracetag enable and allows user to count only read operations. Details are listed in the hisi-pmu document at Documentation/admin-guide/perf/hisi-pmu.rst * srcid_en & srcid: Allows users to filter statistical information based on specific CPU/ICL by srcid. srcid_en depends on rd_req_en being enabled. * uring_channel: Allows users to filter statistical information based on the specified tx request uring channel. uring_channel only supported events: [0x47 ~ 0x59]. Signed-off-by: Junhao He Reviewed-by: Yicong Yang Reviewed-by: Jonathan Cameron Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20230615125926.29832-3-hejunhao3@huawei.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/Makefile | 2 +- drivers/perf/hisilicon/hisi_uncore_pmu.c | 4 +- drivers/perf/hisilicon/hisi_uncore_pmu.h | 6 + drivers/perf/hisilicon/hisi_uncore_uc_pmu.c | 578 ++++++++++++++++++++++++++++ 4 files changed, 588 insertions(+), 2 deletions(-) create mode 100644 drivers/perf/hisilicon/hisi_uncore_uc_pmu.c (limited to 'drivers/perf') diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile index 4d2c9abe3372..48dcc8381ea7 100644 --- a/drivers/perf/hisilicon/Makefile +++ b/drivers/perf/hisilicon/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \ hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o hisi_uncore_sllc_pmu.o \ - hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o + hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o hisi_uncore_uc_pmu.o obj-$(CONFIG_HISI_PCIE_PMU) += hisi_pcie_pmu.o obj-$(CONFIG_HNS3_PMU) += hns3_pmu.o diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 2823f381930d..04031450d5fe 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -20,7 +20,6 @@ #include "hisi_uncore_pmu.h" -#define HISI_GET_EVENTID(ev) (ev->hw.config_base & 0xff) #define HISI_MAX_PERIOD(nr) (GENMASK_ULL((nr) - 1, 0)) /* @@ -226,6 +225,9 @@ int hisi_uncore_pmu_event_init(struct perf_event *event) hwc->idx = -1; hwc->config_base = event->attr.config; + if (hisi_pmu->ops->check_filter && hisi_pmu->ops->check_filter(event)) + return -EINVAL; + /* Enforce to use the same CPU for all events in this PMU */ event->cpu = hisi_pmu->on_cpu; diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 772857b99dc5..92402aa69d70 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -43,9 +43,15 @@ return FIELD_GET(GENMASK_ULL(hi, lo), event->attr.config); \ } +#define HISI_GET_EVENTID(ev) (ev->hw.config_base & 0xff) + +#define HISI_PMU_EVTYPE_BITS 8 +#define HISI_PMU_EVTYPE_SHIFT(idx) ((idx) % 4 * HISI_PMU_EVTYPE_BITS) + struct hisi_pmu; struct hisi_uncore_ops { + int (*check_filter)(struct perf_event *event); void (*write_evtype)(struct hisi_pmu *, int, u32); int (*get_event_idx)(struct perf_event *); u64 (*read_counter)(struct hisi_pmu *, struct hw_perf_event *); diff --git a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c new file mode 100644 index 000000000000..63da05e5831c --- /dev/null +++ b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c @@ -0,0 +1,578 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * HiSilicon SoC UC (unified cache) uncore Hardware event counters support + * + * Copyright (C) 2023 HiSilicon Limited + * + * This code is based on the uncore PMUs like hisi_uncore_l3c_pmu. + */ +#include +#include +#include +#include +#include +#include + +#include "hisi_uncore_pmu.h" + +/* Dynamic CPU hotplug state used by UC PMU */ +static enum cpuhp_state hisi_uc_pmu_online; + +/* UC register definition */ +#define HISI_UC_INT_MASK_REG 0x0800 +#define HISI_UC_INT_STS_REG 0x0808 +#define HISI_UC_INT_CLEAR_REG 0x080c +#define HISI_UC_TRACETAG_CTRL_REG 0x1b2c +#define HISI_UC_TRACETAG_REQ_MSK GENMASK(9, 7) +#define HISI_UC_TRACETAG_MARK_EN BIT(0) +#define HISI_UC_TRACETAG_REQ_EN (HISI_UC_TRACETAG_MARK_EN | BIT(2)) +#define HISI_UC_TRACETAG_SRCID_EN BIT(3) +#define HISI_UC_SRCID_CTRL_REG 0x1b40 +#define HISI_UC_SRCID_MSK GENMASK(14, 1) +#define HISI_UC_EVENT_CTRL_REG 0x1c00 +#define HISI_UC_EVENT_TRACETAG_EN BIT(29) +#define HISI_UC_EVENT_URING_MSK GENMASK(28, 27) +#define HISI_UC_EVENT_GLB_EN BIT(26) +#define HISI_UC_VERSION_REG 0x1cf0 +#define HISI_UC_EVTYPE_REGn(n) (0x1d00 + (n) * 4) +#define HISI_UC_EVTYPE_MASK GENMASK(7, 0) +#define HISI_UC_CNTR_REGn(n) (0x1e00 + (n) * 8) + +#define HISI_UC_NR_COUNTERS 0x8 +#define HISI_UC_V2_NR_EVENTS 0xFF +#define HISI_UC_CNTR_REG_BITS 64 + +#define HISI_UC_RD_REQ_TRACETAG 0x4 +#define HISI_UC_URING_EVENT_MIN 0x47 +#define HISI_UC_URING_EVENT_MAX 0x59 + +HISI_PMU_EVENT_ATTR_EXTRACTOR(rd_req_en, config1, 0, 0); +HISI_PMU_EVENT_ATTR_EXTRACTOR(uring_channel, config1, 5, 4); +HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid, config1, 19, 6); +HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_en, config1, 20, 20); + +static int hisi_uc_pmu_check_filter(struct perf_event *event) +{ + struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu); + + if (hisi_get_srcid_en(event) && !hisi_get_rd_req_en(event)) { + dev_err(uc_pmu->dev, + "rcid_en depends on rd_req_en being enabled!\n"); + return -EINVAL; + } + + if (!hisi_get_uring_channel(event)) + return 0; + + if ((HISI_GET_EVENTID(event) < HISI_UC_URING_EVENT_MIN) || + (HISI_GET_EVENTID(event) > HISI_UC_URING_EVENT_MAX)) + dev_warn(uc_pmu->dev, + "Only events: [%#x ~ %#x] support channel filtering!", + HISI_UC_URING_EVENT_MIN, HISI_UC_URING_EVENT_MAX); + + return 0; +} + +static void hisi_uc_pmu_config_req_tracetag(struct perf_event *event) +{ + struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu); + u32 val; + + if (!hisi_get_rd_req_en(event)) + return; + + val = readl(uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG); + + /* The request-type has been configured */ + if (FIELD_GET(HISI_UC_TRACETAG_REQ_MSK, val) == HISI_UC_RD_REQ_TRACETAG) + return; + + /* Set request-type for tracetag, only read request is supported! */ + val &= ~HISI_UC_TRACETAG_REQ_MSK; + val |= FIELD_PREP(HISI_UC_TRACETAG_REQ_MSK, HISI_UC_RD_REQ_TRACETAG); + val |= HISI_UC_TRACETAG_REQ_EN; + writel(val, uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG); +} + +static void hisi_uc_pmu_clear_req_tracetag(struct perf_event *event) +{ + struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu); + u32 val; + + if (!hisi_get_rd_req_en(event)) + return; + + val = readl(uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG); + + /* Do nothing, the request-type tracetag has been cleaned up */ + if (FIELD_GET(HISI_UC_TRACETAG_REQ_MSK, val) == 0) + return; + + /* Clear request-type */ + val &= ~HISI_UC_TRACETAG_REQ_MSK; + val &= ~HISI_UC_TRACETAG_REQ_EN; + writel(val, uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG); +} + +static void hisi_uc_pmu_config_srcid_tracetag(struct perf_event *event) +{ + struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu); + u32 val; + + if (!hisi_get_srcid_en(event)) + return; + + val = readl(uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG); + + /* Do nothing, the source id has been configured */ + if (FIELD_GET(HISI_UC_TRACETAG_SRCID_EN, val)) + return; + + /* Enable source id tracetag */ + val |= HISI_UC_TRACETAG_SRCID_EN; + writel(val, uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG); + + val = readl(uc_pmu->base + HISI_UC_SRCID_CTRL_REG); + val &= ~HISI_UC_SRCID_MSK; + val |= FIELD_PREP(HISI_UC_SRCID_MSK, hisi_get_srcid(event)); + writel(val, uc_pmu->base + HISI_UC_SRCID_CTRL_REG); + + /* Depend on request-type tracetag enabled */ + hisi_uc_pmu_config_req_tracetag(event); +} + +static void hisi_uc_pmu_clear_srcid_tracetag(struct perf_event *event) +{ + struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu); + u32 val; + + if (!hisi_get_srcid_en(event)) + return; + + val = readl(uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG); + + /* Do nothing, the source id has been cleaned up */ + if (FIELD_GET(HISI_UC_TRACETAG_SRCID_EN, val) == 0) + return; + + hisi_uc_pmu_clear_req_tracetag(event); + + /* Disable source id tracetag */ + val &= ~HISI_UC_TRACETAG_SRCID_EN; + writel(val, uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG); + + val = readl(uc_pmu->base + HISI_UC_SRCID_CTRL_REG); + val &= ~HISI_UC_SRCID_MSK; + writel(val, uc_pmu->base + HISI_UC_SRCID_CTRL_REG); +} + +static void hisi_uc_pmu_config_uring_channel(struct perf_event *event) +{ + struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu); + u32 uring_channel = hisi_get_uring_channel(event); + u32 val; + + /* Do nothing if not being set or is set explicitly to zero (default) */ + if (uring_channel == 0) + return; + + val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG); + + /* Do nothing, the uring_channel has been configured */ + if (uring_channel == FIELD_GET(HISI_UC_EVENT_URING_MSK, val)) + return; + + val &= ~HISI_UC_EVENT_URING_MSK; + val |= FIELD_PREP(HISI_UC_EVENT_URING_MSK, uring_channel); + writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG); +} + +static void hisi_uc_pmu_clear_uring_channel(struct perf_event *event) +{ + struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu); + u32 val; + + /* Do nothing if not being set or is set explicitly to zero (default) */ + if (hisi_get_uring_channel(event) == 0) + return; + + val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG); + + /* Do nothing, the uring_channel has been cleaned up */ + if (FIELD_GET(HISI_UC_EVENT_URING_MSK, val) == 0) + return; + + val &= ~HISI_UC_EVENT_URING_MSK; + writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG); +} + +static void hisi_uc_pmu_enable_filter(struct perf_event *event) +{ + if (event->attr.config1 == 0) + return; + + hisi_uc_pmu_config_uring_channel(event); + hisi_uc_pmu_config_req_tracetag(event); + hisi_uc_pmu_config_srcid_tracetag(event); +} + +static void hisi_uc_pmu_disable_filter(struct perf_event *event) +{ + if (event->attr.config1 == 0) + return; + + hisi_uc_pmu_clear_srcid_tracetag(event); + hisi_uc_pmu_clear_req_tracetag(event); + hisi_uc_pmu_clear_uring_channel(event); +} + +static void hisi_uc_pmu_write_evtype(struct hisi_pmu *uc_pmu, int idx, u32 type) +{ + u32 val; + + /* + * Select the appropriate event select register. + * There are 2 32-bit event select registers for the + * 8 hardware counters, each event code is 8-bit wide. + */ + val = readl(uc_pmu->base + HISI_UC_EVTYPE_REGn(idx / 4)); + val &= ~(HISI_UC_EVTYPE_MASK << HISI_PMU_EVTYPE_SHIFT(idx)); + val |= (type << HISI_PMU_EVTYPE_SHIFT(idx)); + writel(val, uc_pmu->base + HISI_UC_EVTYPE_REGn(idx / 4)); +} + +static void hisi_uc_pmu_start_counters(struct hisi_pmu *uc_pmu) +{ + u32 val; + + val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG); + val |= HISI_UC_EVENT_GLB_EN; + writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG); +} + +static void hisi_uc_pmu_stop_counters(struct hisi_pmu *uc_pmu) +{ + u32 val; + + val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG); + val &= ~HISI_UC_EVENT_GLB_EN; + writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG); +} + +static void hisi_uc_pmu_enable_counter(struct hisi_pmu *uc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Enable counter index */ + val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG); + val |= (1 << hwc->idx); + writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG); +} + +static void hisi_uc_pmu_disable_counter(struct hisi_pmu *uc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Clear counter index */ + val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG); + val &= ~(1 << hwc->idx); + writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG); +} + +static u64 hisi_uc_pmu_read_counter(struct hisi_pmu *uc_pmu, + struct hw_perf_event *hwc) +{ + return readq(uc_pmu->base + HISI_UC_CNTR_REGn(hwc->idx)); +} + +static void hisi_uc_pmu_write_counter(struct hisi_pmu *uc_pmu, + struct hw_perf_event *hwc, u64 val) +{ + writeq(val, uc_pmu->base + HISI_UC_CNTR_REGn(hwc->idx)); +} + +static void hisi_uc_pmu_enable_counter_int(struct hisi_pmu *uc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(uc_pmu->base + HISI_UC_INT_MASK_REG); + val &= ~(1 << hwc->idx); + writel(val, uc_pmu->base + HISI_UC_INT_MASK_REG); +} + +static void hisi_uc_pmu_disable_counter_int(struct hisi_pmu *uc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(uc_pmu->base + HISI_UC_INT_MASK_REG); + val |= (1 << hwc->idx); + writel(val, uc_pmu->base + HISI_UC_INT_MASK_REG); +} + +static u32 hisi_uc_pmu_get_int_status(struct hisi_pmu *uc_pmu) +{ + return readl(uc_pmu->base + HISI_UC_INT_STS_REG); +} + +static void hisi_uc_pmu_clear_int_status(struct hisi_pmu *uc_pmu, int idx) +{ + writel(1 << idx, uc_pmu->base + HISI_UC_INT_CLEAR_REG); +} + +static int hisi_uc_pmu_init_data(struct platform_device *pdev, + struct hisi_pmu *uc_pmu) +{ + /* + * Use SCCL (Super CPU Cluster) ID and CCL (CPU Cluster) ID to + * identify the topology information of UC PMU devices in the chip. + * They have some CCLs per SCCL and then 4 UC PMU per CCL. + */ + if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", + &uc_pmu->sccl_id)) { + dev_err(&pdev->dev, "Can not read uc sccl-id!\n"); + return -EINVAL; + } + + if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id", + &uc_pmu->ccl_id)) { + dev_err(&pdev->dev, "Can not read uc ccl-id!\n"); + return -EINVAL; + } + + if (device_property_read_u32(&pdev->dev, "hisilicon,sub-id", + &uc_pmu->sub_id)) { + dev_err(&pdev->dev, "Can not read sub-id!\n"); + return -EINVAL; + } + + uc_pmu->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(uc_pmu->base)) { + dev_err(&pdev->dev, "ioremap failed for uc_pmu resource\n"); + return PTR_ERR(uc_pmu->base); + } + + uc_pmu->identifier = readl(uc_pmu->base + HISI_UC_VERSION_REG); + + return 0; +} + +static struct attribute *hisi_uc_pmu_format_attr[] = { + HISI_PMU_FORMAT_ATTR(event, "config:0-7"), + HISI_PMU_FORMAT_ATTR(rd_req_en, "config1:0-0"), + HISI_PMU_FORMAT_ATTR(uring_channel, "config1:4-5"), + HISI_PMU_FORMAT_ATTR(srcid, "config1:6-19"), + HISI_PMU_FORMAT_ATTR(srcid_en, "config1:20-20"), + NULL +}; + +static const struct attribute_group hisi_uc_pmu_format_group = { + .name = "format", + .attrs = hisi_uc_pmu_format_attr, +}; + +static struct attribute *hisi_uc_pmu_events_attr[] = { + HISI_PMU_EVENT_ATTR(sq_time, 0x00), + HISI_PMU_EVENT_ATTR(pq_time, 0x01), + HISI_PMU_EVENT_ATTR(hbm_time, 0x02), + HISI_PMU_EVENT_ATTR(iq_comp_time_cring, 0x03), + HISI_PMU_EVENT_ATTR(iq_comp_time_uring, 0x05), + HISI_PMU_EVENT_ATTR(cpu_rd, 0x10), + HISI_PMU_EVENT_ATTR(cpu_rd64, 0x17), + HISI_PMU_EVENT_ATTR(cpu_rs64, 0x19), + HISI_PMU_EVENT_ATTR(cpu_mru, 0x1a), + HISI_PMU_EVENT_ATTR(cycles, 0x9c), + HISI_PMU_EVENT_ATTR(spipe_hit, 0xb3), + HISI_PMU_EVENT_ATTR(hpipe_hit, 0xdb), + HISI_PMU_EVENT_ATTR(cring_rxdat_cnt, 0xfa), + HISI_PMU_EVENT_ATTR(cring_txdat_cnt, 0xfb), + HISI_PMU_EVENT_ATTR(uring_rxdat_cnt, 0xfc), + HISI_PMU_EVENT_ATTR(uring_txdat_cnt, 0xfd), + NULL +}; + +static const struct attribute_group hisi_uc_pmu_events_group = { + .name = "events", + .attrs = hisi_uc_pmu_events_attr, +}; + +static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); + +static struct attribute *hisi_uc_pmu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static const struct attribute_group hisi_uc_pmu_cpumask_attr_group = { + .attrs = hisi_uc_pmu_cpumask_attrs, +}; + +static struct device_attribute hisi_uc_pmu_identifier_attr = + __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); + +static struct attribute *hisi_uc_pmu_identifier_attrs[] = { + &hisi_uc_pmu_identifier_attr.attr, + NULL +}; + +static const struct attribute_group hisi_uc_pmu_identifier_group = { + .attrs = hisi_uc_pmu_identifier_attrs, +}; + +static const struct attribute_group *hisi_uc_pmu_attr_groups[] = { + &hisi_uc_pmu_format_group, + &hisi_uc_pmu_events_group, + &hisi_uc_pmu_cpumask_attr_group, + &hisi_uc_pmu_identifier_group, + NULL +}; + +static const struct hisi_uncore_ops hisi_uncore_uc_pmu_ops = { + .check_filter = hisi_uc_pmu_check_filter, + .write_evtype = hisi_uc_pmu_write_evtype, + .get_event_idx = hisi_uncore_pmu_get_event_idx, + .start_counters = hisi_uc_pmu_start_counters, + .stop_counters = hisi_uc_pmu_stop_counters, + .enable_counter = hisi_uc_pmu_enable_counter, + .disable_counter = hisi_uc_pmu_disable_counter, + .enable_counter_int = hisi_uc_pmu_enable_counter_int, + .disable_counter_int = hisi_uc_pmu_disable_counter_int, + .write_counter = hisi_uc_pmu_write_counter, + .read_counter = hisi_uc_pmu_read_counter, + .get_int_status = hisi_uc_pmu_get_int_status, + .clear_int_status = hisi_uc_pmu_clear_int_status, + .enable_filter = hisi_uc_pmu_enable_filter, + .disable_filter = hisi_uc_pmu_disable_filter, +}; + +static int hisi_uc_pmu_dev_probe(struct platform_device *pdev, + struct hisi_pmu *uc_pmu) +{ + int ret; + + ret = hisi_uc_pmu_init_data(pdev, uc_pmu); + if (ret) + return ret; + + ret = hisi_uncore_pmu_init_irq(uc_pmu, pdev); + if (ret) + return ret; + + uc_pmu->pmu_events.attr_groups = hisi_uc_pmu_attr_groups; + uc_pmu->check_event = HISI_UC_EVTYPE_MASK; + uc_pmu->ops = &hisi_uncore_uc_pmu_ops; + uc_pmu->counter_bits = HISI_UC_CNTR_REG_BITS; + uc_pmu->num_counters = HISI_UC_NR_COUNTERS; + uc_pmu->dev = &pdev->dev; + uc_pmu->on_cpu = -1; + + return 0; +} + +static void hisi_uc_pmu_remove_cpuhp_instance(void *hotplug_node) +{ + cpuhp_state_remove_instance_nocalls(hisi_uc_pmu_online, hotplug_node); +} + +static void hisi_uc_pmu_unregister_pmu(void *pmu) +{ + perf_pmu_unregister(pmu); +} + +static int hisi_uc_pmu_probe(struct platform_device *pdev) +{ + struct hisi_pmu *uc_pmu; + char *name; + int ret; + + uc_pmu = devm_kzalloc(&pdev->dev, sizeof(*uc_pmu), GFP_KERNEL); + if (!uc_pmu) + return -ENOMEM; + + platform_set_drvdata(pdev, uc_pmu); + + ret = hisi_uc_pmu_dev_probe(pdev, uc_pmu); + if (ret) + return ret; + + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_uc%d_%u", + uc_pmu->sccl_id, uc_pmu->ccl_id, uc_pmu->sub_id); + if (!name) + return -ENOMEM; + + ret = cpuhp_state_add_instance(hisi_uc_pmu_online, &uc_pmu->node); + if (ret) + return dev_err_probe(&pdev->dev, ret, "Error registering hotplug\n"); + + ret = devm_add_action_or_reset(&pdev->dev, + hisi_uc_pmu_remove_cpuhp_instance, + &uc_pmu->node); + if (ret) + return ret; + + hisi_pmu_init(uc_pmu, THIS_MODULE); + + ret = perf_pmu_register(&uc_pmu->pmu, name, -1); + if (ret) + return ret; + + return devm_add_action_or_reset(&pdev->dev, + hisi_uc_pmu_unregister_pmu, + &uc_pmu->pmu); +} + +static const struct acpi_device_id hisi_uc_pmu_acpi_match[] = { + { "HISI0291", }, + {} +}; +MODULE_DEVICE_TABLE(acpi, hisi_uc_pmu_acpi_match); + +static struct platform_driver hisi_uc_pmu_driver = { + .driver = { + .name = "hisi_uc_pmu", + .acpi_match_table = hisi_uc_pmu_acpi_match, + /* + * We have not worked out a safe bind/unbind process, + * Forcefully unbinding during sampling will lead to a + * kernel panic, so this is not supported yet. + */ + .suppress_bind_attrs = true, + }, + .probe = hisi_uc_pmu_probe, +}; + +static int __init hisi_uc_pmu_module_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "perf/hisi/uc:online", + hisi_uncore_pmu_online_cpu, + hisi_uncore_pmu_offline_cpu); + if (ret < 0) { + pr_err("UC PMU: Error setup hotplug, ret = %d\n", ret); + return ret; + } + hisi_uc_pmu_online = ret; + + ret = platform_driver_register(&hisi_uc_pmu_driver); + if (ret) + cpuhp_remove_multi_state(hisi_uc_pmu_online); + + return ret; +} +module_init(hisi_uc_pmu_module_init); + +static void __exit hisi_uc_pmu_module_exit(void) +{ + platform_driver_unregister(&hisi_uc_pmu_driver); + cpuhp_remove_multi_state(hisi_uc_pmu_online); +} +module_exit(hisi_uc_pmu_module_exit); + +MODULE_DESCRIPTION("HiSilicon SoC UC uncore PMU driver"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Junhao He "); -- cgit v1.2.3