From f66ae597411cb0d11ed3c281ef2ae809c2e25cb0 Mon Sep 17 00:00:00 2001 From: Joel Granados Date: Thu, 28 Mar 2024 16:57:54 +0100 Subject: drivers: perf: Remove the now superfluous sentinel elements from ctl_table array This commit comes at the tail end of a greater effort to remove the empty elements at the end of the ctl_table arrays (sentinels) which will reduce the overall build time size of the kernel and run time memory bloat by ~64 bytes per sentinel (further information Link : https://lore.kernel.org/all/ZO5Yx5JFogGi%2FcBo@bombadil.infradead.org/) Remove sentinel from sbi_pmu_sysctl_table Signed-off-by: Joel Granados Link: https://lore.kernel.org/r/20240328-jag-sysctl_remset_misc-v1-7-47c1463b3af2@samsung.com Signed-off-by: Will Deacon --- drivers/perf/riscv_pmu_sbi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index 8cbe6e5f9c39..5aef5a8737b2 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -1043,7 +1043,6 @@ static struct ctl_table sbi_pmu_sysctl_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO, }, - { } }; static int pmu_sbi_device_probe(struct platform_device *pdev) -- cgit v1.2.3 From 105350fe07862c7f919828250f306ec674240b66 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 4 Apr 2024 19:59:23 +0300 Subject: drivers/perf: thunderx2_pmu: Replace open coded acpi_match_acpi_device() Replace open coded acpi_match_acpi_device() in get_tx2_pmu_type(). Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240404170016.2466898-1-andriy.shevchenko@linux.intel.com Signed-off-by: Will Deacon --- drivers/perf/thunderx2_pmu.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c index e16d10c763de..f03aa85072ec 100644 --- a/drivers/perf/thunderx2_pmu.c +++ b/drivers/perf/thunderx2_pmu.c @@ -504,24 +504,19 @@ static void tx2_uncore_event_update(struct perf_event *event) static enum tx2_uncore_type get_tx2_pmu_type(struct acpi_device *adev) { - int i = 0; - struct acpi_tx2_pmu_device { - __u8 id[ACPI_ID_LEN]; - enum tx2_uncore_type type; - } devices[] = { + struct acpi_device_id devices[] = { {"CAV901D", PMU_TYPE_L3C}, {"CAV901F", PMU_TYPE_DMC}, {"CAV901E", PMU_TYPE_CCPI2}, - {"", PMU_TYPE_INVALID} + {} }; + const struct acpi_device_id *id; - while (devices[i].type != PMU_TYPE_INVALID) { - if (!strcmp(acpi_device_hid(adev), devices[i].id)) - break; - i++; - } + id = acpi_match_acpi_device(devices, adev); + if (!id) + return PMU_TYPE_INVALID; - return devices[i].type; + return (enum tx2_uncore_type)id->driver_data; } static bool tx2_uncore_validate_event(struct pmu *pmu, -- cgit v1.2.3 From 897fa2c38c076c801bd1f1238af0af927e339c8f Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 3 Apr 2024 23:59:41 +0800 Subject: cpumask: add cpumask_any_and_but() In some cases, it's useful to be able to select a random cpu from the intersection of two masks, excluding a particular CPU. For example, in some systems an uncore PMU is shared by a subset of CPUs, and management of this PMU is assigned to some arbitrary CPU in this set. Whenever the management CPU is hotplugged out, we wish to migrate responsibility to another arbitrary CPU which is both in this set and online. Today we can use cpumask_any_and() to select an arbitrary CPU in the intersection of two masks. We can also use cpumask_any_but() to select any arbitrary cpu in a mask excluding, a particular CPU. To do both, we either need to use a temporary cpumask, which is wasteful, or use some lower-level cpumask helpers, which can be unclear. This patch adds a new cpumask_any_and_but() to cater for these cases. Signed-off-by: Mark Rutland Cc: Thomas Gleixner Cc: Andrew Morton Cc: Peter Zijlstra Cc: Rusty Russell Cc: linux-kernel@vger.kernel.org Signed-off-by: Dawei Li Acked-by: Yury Norov Link: https://lore.kernel.org/r/20240403155950.2068109-2-dawei.li@shingroup.cn Signed-off-by: Will Deacon --- include/linux/cpumask.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 1c29947db848..121f3ac757ff 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -388,6 +388,29 @@ unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) return i; } +/** + * cpumask_any_and_but - pick a "random" cpu from *mask1 & *mask2, but not this one. + * @mask1: the first input cpumask + * @mask2: the second input cpumask + * @cpu: the cpu to ignore + * + * Returns >= nr_cpu_ids if no cpus set. + */ +static inline +unsigned int cpumask_any_and_but(const struct cpumask *mask1, + const struct cpumask *mask2, + unsigned int cpu) +{ + unsigned int i; + + cpumask_check(cpu); + i = cpumask_first_and(mask1, mask2); + if (i != cpu) + return i; + + return cpumask_next_and(cpu, mask1, mask2); +} + /** * cpumask_nth - get the Nth cpu in a cpumask * @srcp: the cpumask pointer -- cgit v1.2.3 From 2f6589df124ee8cbe1772353e533c3fcc4319a24 Mon Sep 17 00:00:00 2001 From: Dawei Li Date: Wed, 3 Apr 2024 23:59:42 +0800 Subject: perf/alibaba_uncore_drw: Avoid placing cpumask on the stack In general it's preferable to avoid placing cpumasks on the stack, as for large values of NR_CPUS these can consume significant amounts of stack space and make stack overflows more likely. Use cpumask_any_and_but() to avoid the need for a temporary cpumask on the stack. Suggested-by: Mark Rutland Reviewed-by: Mark Rutland Signed-off-by: Dawei Li Reviewed-by: Shuai Xue Link: https://lore.kernel.org/r/20240403155950.2068109-3-dawei.li@shingroup.cn Signed-off-by: Will Deacon --- drivers/perf/alibaba_uncore_drw_pmu.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/perf/alibaba_uncore_drw_pmu.c b/drivers/perf/alibaba_uncore_drw_pmu.c index a9277dcf90ce..d4d14b65c4a5 100644 --- a/drivers/perf/alibaba_uncore_drw_pmu.c +++ b/drivers/perf/alibaba_uncore_drw_pmu.c @@ -746,18 +746,14 @@ static int ali_drw_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) struct ali_drw_pmu_irq *irq; struct ali_drw_pmu *drw_pmu; unsigned int target; - int ret; - cpumask_t node_online_cpus; irq = hlist_entry_safe(node, struct ali_drw_pmu_irq, node); if (cpu != irq->cpu) return 0; - ret = cpumask_and(&node_online_cpus, - cpumask_of_node(cpu_to_node(cpu)), cpu_online_mask); - if (ret) - target = cpumask_any_but(&node_online_cpus, cpu); - else + target = cpumask_any_and_but(cpumask_of_node(cpu_to_node(cpu)), + cpu_online_mask, cpu); + if (target >= nr_cpu_ids) target = cpumask_any_but(cpu_online_mask, cpu); if (target >= nr_cpu_ids) -- cgit v1.2.3 From 60c73240f304a654b66811f7f56a3325201f46de Mon Sep 17 00:00:00 2001 From: Dawei Li Date: Wed, 3 Apr 2024 23:59:43 +0800 Subject: perf/arm-cmn: Avoid placing cpumask on the stack In general it's preferable to avoid placing cpumasks on the stack, as for large values of NR_CPUS these can consume significant amounts of stack space and make stack overflows more likely. Use cpumask_any_and_but() to avoid the need for a temporary cpumask on the stack. Suggested-by: Mark Rutland Reviewed-by: Mark Rutland Signed-off-by: Dawei Li Link: https://lore.kernel.org/r/20240403155950.2068109-4-dawei.li@shingroup.cn Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 7ef9c7e4836b..6bfb0c4a1287 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -1950,20 +1950,20 @@ static int arm_cmn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_no struct arm_cmn *cmn; unsigned int target; int node; - cpumask_t mask; cmn = hlist_entry_safe(cpuhp_node, struct arm_cmn, cpuhp_node); if (cpu != cmn->cpu) return 0; node = dev_to_node(cmn->dev); - if (cpumask_and(&mask, cpumask_of_node(node), cpu_online_mask) && - cpumask_andnot(&mask, &mask, cpumask_of(cpu))) - target = cpumask_any(&mask); - else + + target = cpumask_any_and_but(cpumask_of_node(node), cpu_online_mask, cpu); + if (target >= nr_cpu_ids) target = cpumask_any_but(cpu_online_mask, cpu); + if (target < nr_cpu_ids) arm_cmn_migrate(cmn, target); + return 0; } -- cgit v1.2.3 From b5310fa1fe8e29e82dd88ef23e2f04ac533548e1 Mon Sep 17 00:00:00 2001 From: Dawei Li Date: Wed, 3 Apr 2024 23:59:44 +0800 Subject: perf/arm_cspmu: Avoid placing cpumask on the stack In general it's preferable to avoid placing cpumasks on the stack, as for large values of NR_CPUS these can consume significant amounts of stack space and make stack overflows more likely. Use cpumask_any_and_but() to avoid the need for a temporary cpumask on the stack. Suggested-by: Mark Rutland Reviewed-by: Mark Rutland Signed-off-by: Dawei Li Link: https://lore.kernel.org/r/20240403155950.2068109-5-dawei.li@shingroup.cn Signed-off-by: Will Deacon --- drivers/perf/arm_cspmu/arm_cspmu.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c index b9a252272f1e..fd1004251665 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.c +++ b/drivers/perf/arm_cspmu/arm_cspmu.c @@ -1322,8 +1322,7 @@ static int arm_cspmu_cpu_online(unsigned int cpu, struct hlist_node *node) static int arm_cspmu_cpu_teardown(unsigned int cpu, struct hlist_node *node) { - int dst; - struct cpumask online_supported; + unsigned int dst; struct arm_cspmu *cspmu = hlist_entry_safe(node, struct arm_cspmu, cpuhp_node); @@ -1333,9 +1332,8 @@ static int arm_cspmu_cpu_teardown(unsigned int cpu, struct hlist_node *node) return 0; /* Choose a new CPU to migrate ownership of the PMU to */ - cpumask_and(&online_supported, &cspmu->associated_cpus, - cpu_online_mask); - dst = cpumask_any_but(&online_supported, cpu); + dst = cpumask_any_and_but(&cspmu->associated_cpus, + cpu_online_mask, cpu); if (dst >= nr_cpu_ids) return 0; -- cgit v1.2.3 From bea2a13b207ef48732daf329564101a07df14e3a Mon Sep 17 00:00:00 2001 From: Dawei Li Date: Wed, 3 Apr 2024 23:59:45 +0800 Subject: perf/arm_dsu: Avoid placing cpumask on the stack In general it's preferable to avoid placing cpumasks on the stack, as for large values of NR_CPUS these can consume significant amounts of stack space and make stack overflows more likely. Use cpumask_any_and_but() to avoid the need for a temporary cpumask on the stack. Suggested-by: Mark Rutland Reviewed-by: Mark Rutland Signed-off-by: Dawei Li Link: https://lore.kernel.org/r/20240403155950.2068109-6-dawei.li@shingroup.cn Signed-off-by: Will Deacon --- drivers/perf/arm_dsu_pmu.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c index bae3ca37f846..adc0bbb5fafe 100644 --- a/drivers/perf/arm_dsu_pmu.c +++ b/drivers/perf/arm_dsu_pmu.c @@ -230,15 +230,6 @@ static const struct attribute_group *dsu_pmu_attr_groups[] = { NULL, }; -static int dsu_pmu_get_online_cpu_any_but(struct dsu_pmu *dsu_pmu, int cpu) -{ - struct cpumask online_supported; - - cpumask_and(&online_supported, - &dsu_pmu->associated_cpus, cpu_online_mask); - return cpumask_any_but(&online_supported, cpu); -} - static inline bool dsu_pmu_counter_valid(struct dsu_pmu *dsu_pmu, u32 idx) { return (idx < dsu_pmu->num_counters) || @@ -827,14 +818,16 @@ static int dsu_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) static int dsu_pmu_cpu_teardown(unsigned int cpu, struct hlist_node *node) { - int dst; - struct dsu_pmu *dsu_pmu = hlist_entry_safe(node, struct dsu_pmu, - cpuhp_node); + struct dsu_pmu *dsu_pmu; + unsigned int dst; + + dsu_pmu = hlist_entry_safe(node, struct dsu_pmu, cpuhp_node); if (!cpumask_test_and_clear_cpu(cpu, &dsu_pmu->active_cpu)) return 0; - dst = dsu_pmu_get_online_cpu_any_but(dsu_pmu, cpu); + dst = cpumask_any_and_but(&dsu_pmu->associated_cpus, + cpu_online_mask, cpu); /* If there are no active CPUs in the DSU, leave IRQ disabled */ if (dst >= nr_cpu_ids) return 0; -- cgit v1.2.3 From cf276ee46bc44aa188d6a9ea36f83118f48bac67 Mon Sep 17 00:00:00 2001 From: Dawei Li Date: Wed, 3 Apr 2024 23:59:46 +0800 Subject: perf/dwc_pcie: Avoid placing cpumask on the stack In general it's preferable to avoid placing cpumasks on the stack, as for large values of NR_CPUS these can consume significant amounts of stack space and make stack overflows more likely. Use cpumask_any_and_but() to avoid the need for a temporary cpumask on the stack. Suggested-by: Mark Rutland Reviewed-by: Mark Rutland Signed-off-by: Dawei Li Reviewed-by: Shuai Xue Link: https://lore.kernel.org/r/20240403155950.2068109-7-dawei.li@shingroup.cn Signed-off-by: Will Deacon --- drivers/perf/dwc_pcie_pmu.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/perf/dwc_pcie_pmu.c b/drivers/perf/dwc_pcie_pmu.c index 957058ad0099..c5e328f23841 100644 --- a/drivers/perf/dwc_pcie_pmu.c +++ b/drivers/perf/dwc_pcie_pmu.c @@ -690,9 +690,8 @@ static int dwc_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_n { struct dwc_pcie_pmu *pcie_pmu; struct pci_dev *pdev; - int node; - cpumask_t mask; unsigned int target; + int node; pcie_pmu = hlist_entry_safe(cpuhp_node, struct dwc_pcie_pmu, cpuhp_node); /* Nothing to do if this CPU doesn't own the PMU */ @@ -702,10 +701,9 @@ static int dwc_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_n pcie_pmu->on_cpu = -1; pdev = pcie_pmu->pdev; node = dev_to_node(&pdev->dev); - if (cpumask_and(&mask, cpumask_of_node(node), cpu_online_mask) && - cpumask_andnot(&mask, &mask, cpumask_of(cpu))) - target = cpumask_any(&mask); - else + + target = cpumask_any_and_but(cpumask_of_node(node), cpu_online_mask, cpu); + if (target >= nr_cpu_ids) target = cpumask_any_but(cpu_online_mask, cpu); if (target >= nr_cpu_ids) { -- cgit v1.2.3 From d7df79e6af29f99d149b8995e68813d77d381e63 Mon Sep 17 00:00:00 2001 From: Dawei Li Date: Wed, 3 Apr 2024 23:59:47 +0800 Subject: perf/hisi_pcie: Avoid placing cpumask on the stack In general it's preferable to avoid placing cpumasks on the stack, as for large values of NR_CPUS these can consume significant amounts of stack space and make stack overflows more likely. Use cpumask_any_and_but() to avoid the need for a temporary cpumask on the stack. Suggested-by: Mark Rutland Reviewed-by: Mark Rutland Signed-off-by: Dawei Li Acked-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240403155950.2068109-8-dawei.li@shingroup.cn Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_pcie_pmu.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c index 5d1f0e9fdb08..06b192cc31d5 100644 --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c @@ -673,7 +673,6 @@ static int hisi_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) { struct hisi_pcie_pmu *pcie_pmu = hlist_entry_safe(node, struct hisi_pcie_pmu, node); unsigned int target; - cpumask_t mask; int numa_node; /* Nothing to do if this CPU doesn't own the PMU */ @@ -684,10 +683,10 @@ static int hisi_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) /* Choose a local CPU from all online cpus. */ numa_node = dev_to_node(&pcie_pmu->pdev->dev); - if (cpumask_and(&mask, cpumask_of_node(numa_node), cpu_online_mask) && - cpumask_andnot(&mask, &mask, cpumask_of(cpu))) - target = cpumask_any(&mask); - else + + target = cpumask_any_and_but(cpumask_of_node(numa_node), + cpu_online_mask, cpu); + if (target >= nr_cpu_ids) target = cpumask_any_but(cpu_online_mask, cpu); if (target >= nr_cpu_ids) { -- cgit v1.2.3 From b78d0fa25462405f0f123eb827a9e1bc0e595d52 Mon Sep 17 00:00:00 2001 From: Dawei Li Date: Wed, 3 Apr 2024 23:59:48 +0800 Subject: perf/hisi_uncore: Avoid placing cpumask on the stack In general it's preferable to avoid placing cpumasks on the stack, as for large values of NR_CPUS these can consume significant amounts of stack space and make stack overflows more likely. Use cpumask_any_and_but() to avoid the need for a temporary cpumask on the stack. Suggested-by: Mark Rutland Reviewed-by: Mark Rutland Signed-off-by: Dawei Li Acked-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240403155950.2068109-9-dawei.li@shingroup.cn Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_uncore_pmu.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 04031450d5fe..ccc9191ad1b6 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -504,7 +504,6 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) { struct hisi_pmu *hisi_pmu = hlist_entry_safe(node, struct hisi_pmu, node); - cpumask_t pmu_online_cpus; unsigned int target; if (!cpumask_test_and_clear_cpu(cpu, &hisi_pmu->associated_cpus)) @@ -518,9 +517,8 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) hisi_pmu->on_cpu = -1; /* Choose a new CPU to migrate ownership of the PMU to */ - cpumask_and(&pmu_online_cpus, &hisi_pmu->associated_cpus, - cpu_online_mask); - target = cpumask_any_but(&pmu_online_cpus, cpu); + target = cpumask_any_and_but(&hisi_pmu->associated_cpus, + cpu_online_mask, cpu); if (target >= nr_cpu_ids) return 0; -- cgit v1.2.3 From fc85cee97029dee3acb4dcefe4af01b8f8022699 Mon Sep 17 00:00:00 2001 From: Dawei Li Date: Wed, 3 Apr 2024 23:59:49 +0800 Subject: perf/qcom_l2: Avoid placing cpumask on the stack In general it's preferable to avoid placing cpumasks on the stack, as for large values of NR_CPUS these can consume significant amounts of stack space and make stack overflows more likely. Use cpumask_any_and_but() to avoid the need for a temporary cpumask on the stack. Suggested-by: Mark Rutland Reviewed-by: Mark Rutland Signed-off-by: Dawei Li Link: https://lore.kernel.org/r/20240403155950.2068109-10-dawei.li@shingroup.cn Signed-off-by: Will Deacon --- drivers/perf/qcom_l2_pmu.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c index 148df5ae8ef8..b5a44dc1dc3a 100644 --- a/drivers/perf/qcom_l2_pmu.c +++ b/drivers/perf/qcom_l2_pmu.c @@ -801,9 +801,8 @@ static int l2cache_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) static int l2cache_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) { - struct cluster_pmu *cluster; struct l2cache_pmu *l2cache_pmu; - cpumask_t cluster_online_cpus; + struct cluster_pmu *cluster; unsigned int target; l2cache_pmu = hlist_entry_safe(node, struct l2cache_pmu, node); @@ -820,9 +819,8 @@ static int l2cache_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) cluster->on_cpu = -1; /* Any other CPU for this cluster which is still online */ - cpumask_and(&cluster_online_cpus, &cluster->cluster_cpus, - cpu_online_mask); - target = cpumask_any_but(&cluster_online_cpus, cpu); + target = cpumask_any_and_but(&cluster->cluster_cpus, + cpu_online_mask, cpu); if (target >= nr_cpu_ids) { disable_irq(cluster->irq); return 0; -- cgit v1.2.3 From 595275ca498485667039e8c190453a9a684687cb Mon Sep 17 00:00:00 2001 From: Dawei Li Date: Wed, 3 Apr 2024 23:59:50 +0800 Subject: perf/thunderx2: Avoid placing cpumask on the stack In general it's preferable to avoid placing cpumasks on the stack, as for large values of NR_CPUS these can consume significant amounts of stack space and make stack overflows more likely. Use cpumask_any_and_but() to avoid the need for a temporary cpumask on the stack. Suggested-by: Mark Rutland Reviewed-by: Mark Rutland Signed-off-by: Dawei Li Link: https://lore.kernel.org/r/20240403155950.2068109-11-dawei.li@shingroup.cn Signed-off-by: Will Deacon --- drivers/perf/thunderx2_pmu.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c index f03aa85072ec..33e8ff3e5265 100644 --- a/drivers/perf/thunderx2_pmu.c +++ b/drivers/perf/thunderx2_pmu.c @@ -927,9 +927,8 @@ static int tx2_uncore_pmu_online_cpu(unsigned int cpu, static int tx2_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *hpnode) { - int new_cpu; struct tx2_uncore_pmu *tx2_pmu; - struct cpumask cpu_online_mask_temp; + unsigned int new_cpu; tx2_pmu = hlist_entry_safe(hpnode, struct tx2_uncore_pmu, hpnode); @@ -940,11 +939,8 @@ static int tx2_uncore_pmu_offline_cpu(unsigned int cpu, if (tx2_pmu->hrtimer_callback) hrtimer_cancel(&tx2_pmu->hrtimer); - cpumask_copy(&cpu_online_mask_temp, cpu_online_mask); - cpumask_clear_cpu(cpu, &cpu_online_mask_temp); - new_cpu = cpumask_any_and( - cpumask_of_node(tx2_pmu->node), - &cpu_online_mask_temp); + new_cpu = cpumask_any_and_but(cpumask_of_node(tx2_pmu->node), + cpu_online_mask, cpu); tx2_pmu->cpu = new_cpu; if (new_cpu >= nr_cpu_ids) -- cgit v1.2.3 From 8f9f5041c64600b01b71f29fb8e2121e45bfb719 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 9 Apr 2024 18:15:17 +0100 Subject: perf/arm-cmn: Set PMU device parent Now that perf supports giving the PMU device a parent, we can use our platform device to make the relationship between CMN instances and PMU IDs trivially discoverable, from either nominal direction: root@crazy-taxi:~# ls /sys/devices/platform/ARMHC600:00 | grep cmn arm_cmn_0 root@crazy-taxi:~# realpath /sys/bus/event_source/devices/arm_cmn_0/.. /sys/devices/platform/ARMHC600:00 Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/25d4428df1ddad966c74a3ed60171cd3ca6c8b66.1712682917.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 6bfb0c4a1287..e26ad1d3ed0b 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -2482,6 +2482,7 @@ static int arm_cmn_probe(struct platform_device *pdev) cmn->cpu = cpumask_local_spread(0, dev_to_node(cmn->dev)); cmn->pmu = (struct pmu) { .module = THIS_MODULE, + .parent = cmn->dev, .attr_groups = arm_cmn_attr_groups, .capabilities = PERF_PMU_CAP_NO_EXCLUDE, .task_ctx_nr = perf_invalid_context, -- cgit v1.2.3 From b782e8d07baac95a5ce3f8773cc61f4ed7d0ccbc Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Thu, 11 Apr 2024 20:30:30 +0800 Subject: arm64: arm_pmuv3: Correctly extract and check the PMUVer Currently we're using "sbfx" to extract the PMUVer from ID_AA64DFR0_EL1 and skip the init/reset if no PMU present when the extracted PMUVer is negative or is zero. However for PMUv3p8 the PMUVer will be 0b1000 and PMUVer extracted by "sbfx" will always be negative and we'll skip the init/reset in __init_el2_debug/reset_pmuserenr_el0 unexpectedly. So this patch use "ubfx" instead of "sbfx" to extract the PMUVer. If the PMUVer is implementation defined (0b1111) or not implemented(0b0000) then skip the reset/init. Previously we'll also skip the init/reset if the PMUVer is higher than the version we known (currently PMUv3p9), with this patch we'll only skip if the PMU is not implemented or implementation defined. This keeps consistence with how we probe the PMU in the driver with pmuv3_implemented(). Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20240411123030.7201-1-yangyicong@huawei.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/assembler.h | 7 ++++--- arch/arm64/include/asm/el2_setup.h | 9 +++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index ab8b396428da..9ecd076ba08f 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -480,9 +480,10 @@ alternative_endif */ .macro reset_pmuserenr_el0, tmpreg mrs \tmpreg, id_aa64dfr0_el1 - sbfx \tmpreg, \tmpreg, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4 - cmp \tmpreg, #1 // Skip if no PMU present - b.lt 9000f + ubfx \tmpreg, \tmpreg, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4 + cmp \tmpreg, #ID_AA64DFR0_EL1_PMUVer_NI + ccmp \tmpreg, #ID_AA64DFR0_EL1_PMUVer_IMP_DEF, #4, ne + b.eq 9000f // Skip if no PMU present or IMP_DEF msr pmuserenr_el0, xzr // Disable PMU access from EL0 9000: .endm diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index b7afaa026842..e4546b29dd0c 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -59,13 +59,14 @@ .macro __init_el2_debug mrs x1, id_aa64dfr0_el1 - sbfx x0, x1, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4 - cmp x0, #1 - b.lt .Lskip_pmu_\@ // Skip if no PMU present + ubfx x0, x1, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4 + cmp x0, #ID_AA64DFR0_EL1_PMUVer_NI + ccmp x0, #ID_AA64DFR0_EL1_PMUVer_IMP_DEF, #4, ne + b.eq .Lskip_pmu_\@ // Skip if no PMU present or IMP_DEF mrs x0, pmcr_el0 // Disable debug access traps ubfx x0, x0, #11, #5 // to EL2 and allow access to .Lskip_pmu_\@: - csel x2, xzr, x0, lt // all PMU counters from EL1 + csel x2, xzr, x0, eq // all PMU counters from EL1 /* Statistical profiling */ ubfx x0, x1, #ID_AA64DFR0_EL1_PMSVer_SHIFT, #4 -- cgit v1.2.3 From 1fb8950417a4c73c33dd827b816ee41c8cf0c26a Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 12 Apr 2024 17:10:28 +0100 Subject: perf/hisi-pcie: Assign parent for event_source device Currently the PMU device appears directly under /sys/devices/ Only root busses should appear there, so instead assign the pmu->dev parent to be the PCI device. Link: https://lore.kernel.org/linux-cxl/ZCLI9A40PJsyqAmq@kroah.com/ Reviewed-by: Yicong Yang Reviewed-by: Greg Kroah-Hartman Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240412161057.14099-2-Jonathan.Cameron@huawei.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_pcie_pmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c index 06b192cc31d5..b1e4739fbdb0 100644 --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c @@ -806,6 +806,7 @@ static int hisi_pcie_alloc_pmu(struct pci_dev *pdev, struct hisi_pcie_pmu *pcie_ pcie_pmu->pmu = (struct pmu) { .name = name, .module = THIS_MODULE, + .parent = &pdev->dev, .event_init = hisi_pcie_pmu_event_init, .pmu_enable = hisi_pcie_pmu_enable, .pmu_disable = hisi_pcie_pmu_disable, -- cgit v1.2.3 From d0412b6ecb4e422dcb1754106c52e40946a95b61 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 12 Apr 2024 17:10:29 +0100 Subject: Documentation: hisi-pmu: Drop reference to /sys/devices path Having assigned a parent to the device, the suggested path is no longer valid. As /sys/bus/event_sources based path is also provided, simply drop mention of alternative. Reviewed-by: Yicong Yang Reviewed-by: Greg Kroah-Hartman Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240412161057.14099-3-Jonathan.Cameron@huawei.com Signed-off-by: Will Deacon --- Documentation/admin-guide/perf/hisi-pmu.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/admin-guide/perf/hisi-pmu.rst b/Documentation/admin-guide/perf/hisi-pmu.rst index e0174d20809a..5cc248d18c63 100644 --- a/Documentation/admin-guide/perf/hisi-pmu.rst +++ b/Documentation/admin-guide/perf/hisi-pmu.rst @@ -20,7 +20,6 @@ interrupt, and the PMU driver shall register perf PMU drivers like L3C, HHA and DDRC etc. The available events and configuration options shall be described in the sysfs, see: -/sys/devices/hisi_sccl{X}_/, or /sys/bus/event_source/devices/hisi_sccl{X}_. The "perf list" command shall list the available events from sysfs. -- cgit v1.2.3 From 16d417f6c45b1854e29fc5d6de722de0bdc6ccdf Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 12 Apr 2024 17:10:30 +0100 Subject: perf/hisi-uncore: Assign parents for event_source devices Currently the PMU device appears directly under /sys/devices/ Only root busses should appear there, so instead assign the pmu->dev parent to be the platform device. Link: https://lore.kernel.org/linux-cxl/ZCLI9A40PJsyqAmq@kroah.com/ Reviewed-by: Greg Kroah-Hartman Reviewed-by: Yicong Yang Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240412161057.14099-4-Jonathan.Cameron@huawei.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_uncore_pmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index ccc9191ad1b6..a60e4c966098 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -536,6 +536,7 @@ void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module) struct pmu *pmu = &hisi_pmu->pmu; pmu->module = module; + pmu->parent = hisi_pmu->dev; pmu->task_ctx_nr = perf_invalid_context; pmu->event_init = hisi_uncore_pmu_event_init; pmu->pmu_enable = hisi_uncore_pmu_enable; -- cgit v1.2.3 From eff6af531335341d7a988ea4cacd4e5938a2321a Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 12 Apr 2024 17:10:31 +0100 Subject: Documentation: hns-pmu: Use /sys/bus/event_source/devices paths To allow setting an appropriate parent for the struct pmu device remove existing references to /sys/devices/ path. Reviewed-by: Greg Kroah-Hartman Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240412161057.14099-5-Jonathan.Cameron@huawei.com Signed-off-by: Will Deacon --- Documentation/admin-guide/perf/hns3-pmu.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/perf/hns3-pmu.rst b/Documentation/admin-guide/perf/hns3-pmu.rst index 75a40846d47f..1195e570f2d6 100644 --- a/Documentation/admin-guide/perf/hns3-pmu.rst +++ b/Documentation/admin-guide/perf/hns3-pmu.rst @@ -16,7 +16,7 @@ HNS3 PMU driver The HNS3 PMU driver registers a perf PMU with the name of its sicl id.:: - /sys/devices/hns3_pmu_sicl_ + /sys/bus/event_source/devices/hns3_pmu_sicl_ PMU driver provides description of available events, filter modes, format, identifier and cpumask in sysfs. @@ -40,9 +40,9 @@ device. Example usage of checking event code and subevent code:: - $# cat /sys/devices/hns3_pmu_sicl_0/events/dly_tx_normal_to_mac_time + $# cat /sys/bus/event_source/devices/hns3_pmu_sicl_0/events/dly_tx_normal_to_mac_time config=0x00204 - $# cat /sys/devices/hns3_pmu_sicl_0/events/dly_tx_normal_to_mac_packet_num + $# cat /sys/bus/event_source/devices/hns3_pmu_sicl_0/events/dly_tx_normal_to_mac_packet_num config=0x10204 Each performance statistic has a pair of events to get two values to @@ -60,7 +60,7 @@ computation to calculate real performance data is::: Example usage of checking supported filter mode:: - $# cat /sys/devices/hns3_pmu_sicl_0/filtermode/bw_ssu_rpu_byte_num + $# cat /sys/bus/event_source/devices/hns3_pmu_sicl_0/filtermode/bw_ssu_rpu_byte_num filter mode supported: global/port/port-tc/func/func-queue/ Example usage of perf:: -- cgit v1.2.3 From 3d957de12c65a8757a8007735ed2a303e0b365a9 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 12 Apr 2024 17:10:32 +0100 Subject: perf/hisi-hns3: Assign parents for event_source device Currently the PMU device appears directly under /sys/devices/ Only root busses should appear there, so instead assign the pmu->dev parent to be the PCI device. Link: https://lore.kernel.org/linux-cxl/ZCLI9A40PJsyqAmq@kroah.com/ Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240412161057.14099-6-Jonathan.Cameron@huawei.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hns3_pmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/perf/hisilicon/hns3_pmu.c b/drivers/perf/hisilicon/hns3_pmu.c index 16869bf5bf4c..5236acdcc2e1 100644 --- a/drivers/perf/hisilicon/hns3_pmu.c +++ b/drivers/perf/hisilicon/hns3_pmu.c @@ -1419,6 +1419,7 @@ static int hns3_pmu_alloc_pmu(struct pci_dev *pdev, struct hns3_pmu *hns3_pmu) hns3_pmu->pmu = (struct pmu) { .name = name, .module = THIS_MODULE, + .parent = &pdev->dev, .event_init = hns3_pmu_event_init, .pmu_enable = hns3_pmu_enable, .pmu_disable = hns3_pmu_disable, -- cgit v1.2.3 From 1b7718fcc3f20f89931a4cf6a4fde2546cf143bd Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 12 Apr 2024 17:10:33 +0100 Subject: perf/amlogic: Assign parents for event_source devices Currently all these devices appear directly under /sys/devices/ Only root busses should appear there, so instead assign the pmu->dev parents to be the platform device. Link: https://lore.kernel.org/linux-cxl/ZCLI9A40PJsyqAmq@kroah.com/ Reviewed-by: Jiucheng Xu Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240412161057.14099-7-Jonathan.Cameron@huawei.com Signed-off-by: Will Deacon --- drivers/perf/amlogic/meson_ddr_pmu_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/perf/amlogic/meson_ddr_pmu_core.c b/drivers/perf/amlogic/meson_ddr_pmu_core.c index bbc7285fd934..07446d784a1a 100644 --- a/drivers/perf/amlogic/meson_ddr_pmu_core.c +++ b/drivers/perf/amlogic/meson_ddr_pmu_core.c @@ -492,6 +492,7 @@ int meson_ddr_pmu_create(struct platform_device *pdev) *pmu = (struct ddr_pmu) { .pmu = { .module = THIS_MODULE, + .parent = &pdev->dev, .capabilities = PERF_PMU_CAP_NO_EXCLUDE, .task_ctx_nr = perf_invalid_context, .attr_groups = attr_groups, -- cgit v1.2.3 From 3a1bb75ebc1b8965c7f7dbb86584143feda8e83b Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 12 Apr 2024 17:10:34 +0100 Subject: perf/arm_cspmu: Assign parents for event_source devices Currently all these devices appear directly under /sys/devices/ Only root busses should appear there, so instead assign the pmu->dev parents to be the platform device. Link: https://lore.kernel.org/linux-cxl/ZCLI9A40PJsyqAmq@kroah.com/ Reviewed-by: Suzuki K Poulose Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240412161057.14099-8-Jonathan.Cameron@huawei.com Signed-off-by: Will Deacon --- drivers/perf/arm_cspmu/arm_cspmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c index fd1004251665..ba0cf2f466ef 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.c +++ b/drivers/perf/arm_cspmu/arm_cspmu.c @@ -1206,6 +1206,7 @@ static int arm_cspmu_register_pmu(struct arm_cspmu *cspmu) cspmu->pmu = (struct pmu){ .task_ctx_nr = perf_invalid_context, .module = cspmu->impl.module, + .parent = cspmu->dev, .pmu_enable = arm_cspmu_enable, .pmu_disable = arm_cspmu_disable, .event_init = arm_cspmu_event_init, -- cgit v1.2.3 From 867ba6d204f1c35c5d615c9b9c62f51a699220fa Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 12 Apr 2024 17:10:35 +0100 Subject: Documentation: xgene-pmu: Use /sys/bus/event_source/devices paths To allow setting an appropriate parent for the struct pmu device remove existing references to /sys/devices/ path. Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240412161057.14099-9-Jonathan.Cameron@huawei.com Signed-off-by: Will Deacon --- Documentation/admin-guide/perf/xgene-pmu.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/perf/xgene-pmu.rst b/Documentation/admin-guide/perf/xgene-pmu.rst index 644f8ed89152..98ccb8e777c4 100644 --- a/Documentation/admin-guide/perf/xgene-pmu.rst +++ b/Documentation/admin-guide/perf/xgene-pmu.rst @@ -13,7 +13,7 @@ PMU (perf) driver The xgene-pmu driver registers several perf PMU drivers. Each of the perf driver provides description of its available events and configuration options -in sysfs, see /sys/devices//. +in sysfs, see /sys/bus/event_source/devices//. The "format" directory describes format of the config (event ID), config1 (agent ID) fields of the perf_event_attr structure. The "events" -- cgit v1.2.3 From 89e34f8bee6cdd4137966321a5b5573412079116 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 12 Apr 2024 17:10:36 +0100 Subject: perf/xgene: Assign parents for event_source devices Currently all these devices appear directly under /sys/devices/ Only root busses should appear there, so instead assign the pmu->dev parents to be the hardware related struct device. Link: https://lore.kernel.org/linux-cxl/ZCLI9A40PJsyqAmq@kroah.com/ Cc: Khuong Dinh Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240412161057.14099-10-Jonathan.Cameron@huawei.com Signed-off-by: Will Deacon --- drivers/perf/xgene_pmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c index 0d49343d704b..8823b4c6b556 100644 --- a/drivers/perf/xgene_pmu.c +++ b/drivers/perf/xgene_pmu.c @@ -1102,6 +1102,7 @@ static int xgene_init_perf(struct xgene_pmu_dev *pmu_dev, char *name) /* Perf driver registration */ pmu_dev->pmu = (struct pmu) { + .parent = pmu_dev->parent->dev, .attr_groups = pmu_dev->attr_groups, .task_ctx_nr = perf_invalid_context, .pmu_enable = xgene_perf_pmu_enable, -- cgit v1.2.3 From 90b4a1a927ee03972b70c69efbf7642654c79902 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 12 Apr 2024 17:10:37 +0100 Subject: Documentation: thunderx2-pmu: Use /sys/bus/event_source/devices paths To allow setting an appropriate parent for the struct pmu device remove existing references to /sys/devices/ path. Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240412161057.14099-11-Jonathan.Cameron@huawei.com Signed-off-by: Will Deacon --- Documentation/admin-guide/perf/thunderx2-pmu.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/perf/thunderx2-pmu.rst b/Documentation/admin-guide/perf/thunderx2-pmu.rst index 01f158238ae1..9255f7bf9452 100644 --- a/Documentation/admin-guide/perf/thunderx2-pmu.rst +++ b/Documentation/admin-guide/perf/thunderx2-pmu.rst @@ -22,7 +22,7 @@ The thunderx2_pmu driver registers per-socket perf PMUs for the DMC and L3C devices. Each PMU can be used to count up to 4 (DMC/L3C) or up to 8 (CCPI2) events simultaneously. The PMUs provide a description of their available events and configuration options under sysfs, see -/sys/devices/uncore_; S is the socket id. +/sys/bus/event_source/devices/uncore_; S is the socket id. The driver does not support sampling, therefore "perf record" will not work. Per-task perf sessions are also not supported. -- cgit v1.2.3 From ecb79c21c18943487b4e16ae2e5fe60e8f59e08a Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 12 Apr 2024 17:10:38 +0100 Subject: perf/thunderx2: Assign parents for event_source devices Currently all these devices appear directly under /sys/devices/ Only root busses should appear there, so instead assign the pmu->dev parents to be the platform device. Link: https://lore.kernel.org/linux-cxl/ZCLI9A40PJsyqAmq@kroah.com/ Cc: Robert Richter Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240412161057.14099-12-Jonathan.Cameron@huawei.com Signed-off-by: Will Deacon --- drivers/perf/thunderx2_pmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c index 33e8ff3e5265..faf763d2c95c 100644 --- a/drivers/perf/thunderx2_pmu.c +++ b/drivers/perf/thunderx2_pmu.c @@ -724,6 +724,7 @@ static int tx2_uncore_pmu_register( /* Perf event registration */ tx2_pmu->pmu = (struct pmu) { .module = THIS_MODULE, + .parent = tx2_pmu->dev, .attr_groups = tx2_pmu->attr_groups, .task_ctx_nr = perf_invalid_context, .event_init = tx2_uncore_event_init, -- cgit v1.2.3 From 50650e5f3186dcd3cc291b515022eab281256688 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 12 Apr 2024 17:10:39 +0100 Subject: perf/riscv: Assign parents for event_source devices Currently all these devices appear directly under /sys/devices/ Only root busses should appear there, so instead assign the pmu->dev parents to be the appropriate platform devices. Link: https://lore.kernel.org/linux-cxl/ZCLI9A40PJsyqAmq@kroah.com/ Cc: Atish Patra CC: Anup Patel Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240412161057.14099-13-Jonathan.Cameron@huawei.com Signed-off-by: Will Deacon --- drivers/perf/riscv_pmu_legacy.c | 1 + drivers/perf/riscv_pmu_sbi.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/perf/riscv_pmu_legacy.c b/drivers/perf/riscv_pmu_legacy.c index fa0bccf4edf2..04487ad7fba0 100644 --- a/drivers/perf/riscv_pmu_legacy.c +++ b/drivers/perf/riscv_pmu_legacy.c @@ -136,6 +136,7 @@ static int pmu_legacy_device_probe(struct platform_device *pdev) pmu = riscv_pmu_alloc(); if (!pmu) return -ENOMEM; + pmu->pmu.parent = &pdev->dev; pmu_legacy_init(pmu); return 0; diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index 5aef5a8737b2..82636273d726 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -1080,6 +1080,7 @@ static int pmu_sbi_device_probe(struct platform_device *pdev) } pmu->pmu.attr_groups = riscv_pmu_attr_groups; + pmu->pmu.parent = &pdev->dev; pmu->cmask = cmask; pmu->ctr_start = pmu_sbi_ctr_start; pmu->ctr_stop = pmu_sbi_ctr_stop; -- cgit v1.2.3 From 556da13434521abd912bcb810ef871cf0f3555e8 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 12 Apr 2024 17:10:40 +0100 Subject: Documentation: qcom-pmu: Use /sys/bus/event_source/devices paths To allow setting an appropriate parent for the struct pmu device remove existing references to /sys/devices/ path. Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240412161057.14099-14-Jonathan.Cameron@huawei.com Signed-off-by: Will Deacon --- Documentation/admin-guide/perf/qcom_l2_pmu.rst | 2 +- Documentation/admin-guide/perf/qcom_l3_pmu.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/perf/qcom_l2_pmu.rst b/Documentation/admin-guide/perf/qcom_l2_pmu.rst index c130178a4a55..c37c6be9b8d8 100644 --- a/Documentation/admin-guide/perf/qcom_l2_pmu.rst +++ b/Documentation/admin-guide/perf/qcom_l2_pmu.rst @@ -10,7 +10,7 @@ There is one logical L2 PMU exposed, which aggregates the results from the physical PMUs. The driver provides a description of its available events and configuration -options in sysfs, see /sys/devices/l2cache_0. +options in sysfs, see /sys/bus/event_source/devices/l2cache_0. The "format" directory describes the format of the events. diff --git a/Documentation/admin-guide/perf/qcom_l3_pmu.rst b/Documentation/admin-guide/perf/qcom_l3_pmu.rst index a3d014a46bfd..a66556b7e985 100644 --- a/Documentation/admin-guide/perf/qcom_l3_pmu.rst +++ b/Documentation/admin-guide/perf/qcom_l3_pmu.rst @@ -9,7 +9,7 @@ PMU with device name l3cache__. User space is responsible for aggregating across slices. The driver provides a description of its available events and configuration -options in sysfs, see /sys/devices/l3cache*. Given that these are uncore PMUs +options in sysfs, see /sys/bus/event_source/devices/l3cache*. Given that these are uncore PMUs the driver also exposes a "cpumask" sysfs attribute which contains a mask consisting of one CPU per socket which will be used to handle all the PMU events on that socket. -- cgit v1.2.3 From 6148865dd57cce4acae81909892d5a9fe16ecce7 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 12 Apr 2024 17:10:41 +0100 Subject: perf/qcom: Assign parents for event_source devices Currently all these devices appear directly under /sys/devices/ Only root busses should appear there, so instead assign the pmu->dev parents to be the platform devices. Link: https://lore.kernel.org/linux-cxl/ZCLI9A40PJsyqAmq@kroah.com/ Cc: Andy Gross Cc: Bjorn Andersson Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240412161057.14099-15-Jonathan.Cameron@huawei.com Signed-off-by: Will Deacon --- drivers/perf/qcom_l2_pmu.c | 1 + drivers/perf/qcom_l3_pmu.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c index b5a44dc1dc3a..980e3051edd7 100644 --- a/drivers/perf/qcom_l2_pmu.c +++ b/drivers/perf/qcom_l2_pmu.c @@ -902,6 +902,7 @@ static int l2_cache_pmu_probe(struct platform_device *pdev) l2cache_pmu->pmu = (struct pmu) { /* suffix is instance id for future use with multiple sockets */ .name = "l2cache_0", + .parent = &pdev->dev, .task_ctx_nr = perf_invalid_context, .pmu_enable = l2_cache_pmu_enable, .pmu_disable = l2_cache_pmu_disable, diff --git a/drivers/perf/qcom_l3_pmu.c b/drivers/perf/qcom_l3_pmu.c index f16783d03db7..37786e88514e 100644 --- a/drivers/perf/qcom_l3_pmu.c +++ b/drivers/perf/qcom_l3_pmu.c @@ -748,6 +748,7 @@ static int qcom_l3_cache_pmu_probe(struct platform_device *pdev) return -ENOMEM; l3pmu->pmu = (struct pmu) { + .parent = &pdev->dev, .task_ctx_nr = perf_invalid_context, .pmu_enable = qcom_l3_cache__pmu_enable, -- cgit v1.2.3 From 1d194ab8571beb7363519d5c5b050a0cbd59b664 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 12 Apr 2024 17:10:42 +0100 Subject: perf/imx_ddr: Assign parents for event_source devices Currently all this device appear directly under /sys/devices/ Only root busses should appear there, so instead assign the pmu->dev parent to be the platform device. Link: https://lore.kernel.org/linux-cxl/ZCLI9A40PJsyqAmq@kroah.com/ Cc: Frank Li Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240412161057.14099-16-Jonathan.Cameron@huawei.com Signed-off-by: Will Deacon --- drivers/perf/fsl_imx8_ddr_perf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index 4e8fa5a48fcf..1bbdb29743c4 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -651,6 +651,7 @@ static int ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base, *pmu = (struct ddr_pmu) { .pmu = (struct pmu) { .module = THIS_MODULE, + .parent = dev, .capabilities = PERF_PMU_CAP_NO_EXCLUDE, .task_ctx_nr = perf_invalid_context, .attr_groups = attr_groups, -- cgit v1.2.3 From 7bf75431a9ba1b3c2ededbcf08dca023786337fc Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 12 Apr 2024 17:10:43 +0100 Subject: perf/arm_pmu: Assign parents for event_source devices Currently the PMU device appears directly under /sys/devices/ Only root busses should appear there, so instead assign the pmu->dev parent to be the platform device. Link: https://lore.kernel.org/linux-cxl/ZCLI9A40PJsyqAmq@kroah.com/ Signed-off-by: Jonathan Cameron Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20240412161057.14099-17-Jonathan.Cameron@huawei.com Signed-off-by: Will Deacon --- drivers/perf/arm_pmu_platform.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c index 3596db36cbff..4b1a9a92ea11 100644 --- a/drivers/perf/arm_pmu_platform.c +++ b/drivers/perf/arm_pmu_platform.c @@ -196,6 +196,7 @@ int arm_pmu_device_probe(struct platform_device *pdev, if (!pmu) return -ENOMEM; + pmu->pmu.parent = &pdev->dev; pmu->plat_device = pdev; ret = pmu_parse_irqs(pmu); -- cgit v1.2.3 From 1919bd8e0be0bdd0382ee672a40e75bf19c0068c Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 12 Apr 2024 17:10:44 +0100 Subject: perf/alibaba_uncore: Assign parents for event_source device Currently the PMU device appears directly under /sys/devices/ Only root busses should appear there, so instead assign the pmu->dev parent to be the platform device. Link: https://lore.kernel.org/linux-cxl/ZCLI9A40PJsyqAmq@kroah.com/ Reviewed-by: Shuai Xue Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240412161057.14099-18-Jonathan.Cameron@huawei.com Signed-off-by: Will Deacon --- drivers/perf/alibaba_uncore_drw_pmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/perf/alibaba_uncore_drw_pmu.c b/drivers/perf/alibaba_uncore_drw_pmu.c index d4d14b65c4a5..89dd38343f93 100644 --- a/drivers/perf/alibaba_uncore_drw_pmu.c +++ b/drivers/perf/alibaba_uncore_drw_pmu.c @@ -709,6 +709,7 @@ static int ali_drw_pmu_probe(struct platform_device *pdev) drw_pmu->pmu = (struct pmu) { .module = THIS_MODULE, + .parent = &pdev->dev, .task_ctx_nr = perf_invalid_context, .event_init = ali_drw_pmu_event_init, .add = ali_drw_pmu_add, -- cgit v1.2.3 From e7ec4791f903d65548519a9ceeaec4f44a591655 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 12 Apr 2024 17:10:45 +0100 Subject: perf/arm-cci: Assign parents for event_source device Currently the PMU device appears directly under /sys/devices/ Only root busses should appear there, so instead assign the pmu->dev parent to be the platform device. Link: https://lore.kernel.org/linux-cxl/ZCLI9A40PJsyqAmq@kroah.com/ Reviewed-by: Suzuki K Poulose Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240412161057.14099-19-Jonathan.Cameron@huawei.com Signed-off-by: Will Deacon --- drivers/perf/arm-cci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index 6be03f81ae5d..a7fd80677919 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -1409,6 +1409,7 @@ static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev) cci_pmu->pmu = (struct pmu) { .module = THIS_MODULE, + .parent = &pdev->dev, .name = cci_pmu->model->name, .task_ctx_nr = perf_invalid_context, .pmu_enable = cci_pmu_enable, -- cgit v1.2.3 From f4144be05a606371d7258b618e383f1276e3c207 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 12 Apr 2024 17:10:46 +0100 Subject: perf/arm-ccn: Assign parents for event_source device Currently the PMU device appears directly under /sys/devices/ Only root busses should appear there, so instead assign the pmu->dev parent to be the platform device. Link: https://lore.kernel.org/linux-cxl/ZCLI9A40PJsyqAmq@kroah.com/ Acked-by: Suzuki K Poulose Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240412161057.14099-20-Jonathan.Cameron@huawei.com Signed-off-by: Will Deacon --- drivers/perf/arm-ccn.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c index 641471bd5eff..f4495ff6525f 100644 --- a/drivers/perf/arm-ccn.c +++ b/drivers/perf/arm-ccn.c @@ -1265,6 +1265,7 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn) /* Perf driver registration */ ccn->dt.pmu = (struct pmu) { .module = THIS_MODULE, + .parent = ccn->dev, .attr_groups = arm_ccn_pmu_attr_groups, .task_ctx_nr = perf_invalid_context, .event_init = arm_ccn_pmu_event_init, -- cgit v1.2.3 From 46bed4c740d5de9e2ac62b4cfc20461da463f71b Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 12 Apr 2024 17:10:47 +0100 Subject: perf/arm-dmc620: Assign parents for event_source device Currently the PMU device appears directly under /sys/devices/ Only root busses should appear there, so instead assign the pmu->dev parent to be the platform device. Link: https://lore.kernel.org/linux-cxl/ZCLI9A40PJsyqAmq@kroah.com/ Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240412161057.14099-21-Jonathan.Cameron@huawei.com Signed-off-by: Will Deacon --- drivers/perf/arm_dmc620_pmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c index 8a81be2dd5ec..2ec96e204c40 100644 --- a/drivers/perf/arm_dmc620_pmu.c +++ b/drivers/perf/arm_dmc620_pmu.c @@ -673,6 +673,7 @@ static int dmc620_pmu_device_probe(struct platform_device *pdev) dmc620_pmu->pmu = (struct pmu) { .module = THIS_MODULE, + .parent = &pdev->dev, .capabilities = PERF_PMU_CAP_NO_EXCLUDE, .task_ctx_nr = perf_invalid_context, .event_init = dmc620_pmu_event_init, -- cgit v1.2.3 From bc81ae2efbb3f9fd12322787f475b77f51ca2a15 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 12 Apr 2024 17:10:48 +0100 Subject: perf/arm-dsu: Assign parents for event_source device Currently the PMU device appears directly under /sys/devices/ Only root busses should appear there, so instead assign the pmu->dev parent to be the platform device. Link: https://lore.kernel.org/linux-cxl/ZCLI9A40PJsyqAmq@kroah.com/ Reviewed-by: Suzuki K Poulose Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240412161057.14099-22-Jonathan.Cameron@huawei.com Signed-off-by: Will Deacon --- drivers/perf/arm_dsu_pmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c index adc0bbb5fafe..92248a24a1aa 100644 --- a/drivers/perf/arm_dsu_pmu.c +++ b/drivers/perf/arm_dsu_pmu.c @@ -742,6 +742,7 @@ static int dsu_pmu_device_probe(struct platform_device *pdev) dsu_pmu->pmu = (struct pmu) { .task_ctx_nr = perf_invalid_context, + .parent = &pdev->dev, .module = THIS_MODULE, .pmu_enable = dsu_pmu_enable, .pmu_disable = dsu_pmu_disable, -- cgit v1.2.3 From a8889fbf16bc954b8d11f16410963feee6cd9980 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 12 Apr 2024 17:10:49 +0100 Subject: perf/arm-smmuv3: Assign parents for event_source device Currently the PMU device appears directly under /sys/devices/ Only root busses should appear there, so instead assign the pmu->dev parent to be the platform device. Link: https://lore.kernel.org/linux-cxl/ZCLI9A40PJsyqAmq@kroah.com/ Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240412161057.14099-23-Jonathan.Cameron@huawei.com Signed-off-by: Will Deacon --- drivers/perf/arm_smmuv3_pmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c index 719aa953a1c4..d5fa92ba8373 100644 --- a/drivers/perf/arm_smmuv3_pmu.c +++ b/drivers/perf/arm_smmuv3_pmu.c @@ -860,6 +860,7 @@ static int smmu_pmu_probe(struct platform_device *pdev) smmu_pmu->pmu = (struct pmu) { .module = THIS_MODULE, + .parent = &pdev->dev, .task_ctx_nr = perf_invalid_context, .pmu_enable = smmu_pmu_enable, .pmu_disable = smmu_pmu_disable, -- cgit v1.2.3 From 4052ce07d5d7e8158dfd5c01c514b930cb689f68 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 12 Apr 2024 17:10:50 +0100 Subject: perf/arm-spe: Assign parents for event_source device Currently the PMU device appears directly under /sys/devices/ Only root busses should appear there, so instead assign the pmu->dev parent to be the platform device. Link: https://lore.kernel.org/linux-cxl/ZCLI9A40PJsyqAmq@kroah.com/ Acked-by: Suzuki K Poulose Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240412161057.14099-24-Jonathan.Cameron@huawei.com Signed-off-by: Will Deacon --- drivers/perf/arm_spe_pmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index 35f0de03416f..9100d82bfabc 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -932,6 +932,7 @@ static int arm_spe_pmu_perf_init(struct arm_spe_pmu *spe_pmu) spe_pmu->pmu = (struct pmu) { .module = THIS_MODULE, + .parent = &spe_pmu->pdev->dev, .capabilities = PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE, .attr_groups = arm_spe_pmu_attr_groups, /* -- cgit v1.2.3 From 77fce82678ea5fd51442e62febec2004f79e041b Mon Sep 17 00:00:00 2001 From: Junhao He Date: Thu, 25 Apr 2024 20:46:25 +0800 Subject: drivers/perf: hisi_pcie: Fix out-of-bound access when valid event group The perf tool allows users to create event groups through following cmd [1], but the driver does not check whether the array index is out of bounds when writing data to the event_group array. If the number of events in an event_group is greater than HISI_PCIE_MAX_COUNTERS, the memory write overflow of event_group array occurs. Add array index check to fix the possible array out of bounds violation, and return directly when write new events are written to array bounds. There are 9 different events in an event_group. [1] perf stat -e '{pmu/event1/, ... ,pmu/event9/}' Fixes: 8404b0fbc7fb ("drivers/perf: hisi: Add driver for HiSilicon PCIe PMU") Signed-off-by: Junhao He Reviewed-by: Jijie Shao Acked-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240425124627.13764-2-hejunhao3@huawei.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_pcie_pmu.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c index b1e4739fbdb0..03c506aa3853 100644 --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c @@ -350,15 +350,27 @@ static bool hisi_pcie_pmu_validate_event_group(struct perf_event *event) return false; for (num = 0; num < counters; num++) { + /* + * If we find a related event, then it's a valid group + * since we don't need to allocate a new counter for it. + */ if (hisi_pcie_pmu_cmp_event(event_group[num], sibling)) break; } + /* + * Otherwise it's a new event but if there's no available counter, + * fail the check since we cannot schedule all the events in + * the group simultaneously. + */ + if (num == HISI_PCIE_MAX_COUNTERS) + return false; + if (num == counters) event_group[counters++] = sibling; } - return counters <= HISI_PCIE_MAX_COUNTERS; + return true; } static int hisi_pcie_pmu_event_init(struct perf_event *event) -- cgit v1.2.3 From 81bdd60a3d1d3b05e6cc6674845afb1694dd3a0e Mon Sep 17 00:00:00 2001 From: Junhao He Date: Thu, 25 Apr 2024 20:46:26 +0800 Subject: drivers/perf: hisi: hns3: Fix out-of-bound access when valid event group The perf tool allows users to create event groups through following cmd [1], but the driver does not check whether the array index is out of bounds when writing data to the event_group array. If the number of events in an event_group is greater than HNS3_PMU_MAX_HW_EVENTS, the memory write overflow of event_group array occurs. Add array index check to fix the possible array out of bounds violation, and return directly when write new events are written to array bounds. There are 9 different events in an event_group. [1] perf stat -e '{pmu/event1/, ... ,pmu/event9/} Fixes: 66637ab137b4 ("drivers/perf: hisi: add driver for HNS3 PMU") Signed-off-by: Junhao He Signed-off-by: Hao Chen Acked-by: Jonathan Cameron Reviewed-by: Jijie Shao Link: https://lore.kernel.org/r/20240425124627.13764-3-hejunhao3@huawei.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hns3_pmu.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/perf/hisilicon/hns3_pmu.c b/drivers/perf/hisilicon/hns3_pmu.c index 5236acdcc2e1..0417bf23fa3e 100644 --- a/drivers/perf/hisilicon/hns3_pmu.c +++ b/drivers/perf/hisilicon/hns3_pmu.c @@ -1085,15 +1085,27 @@ static bool hns3_pmu_validate_event_group(struct perf_event *event) return false; for (num = 0; num < counters; num++) { + /* + * If we find a related event, then it's a valid group + * since we don't need to allocate a new counter for it. + */ if (hns3_pmu_cmp_event(event_group[num], sibling)) break; } + /* + * Otherwise it's a new event but if there's no available counter, + * fail the check since we cannot schedule all the events in + * the group simultaneously. + */ + if (num == HNS3_PMU_MAX_HW_EVENTS) + return false; + if (num == counters) event_group[counters++] = sibling; } - return counters <= HNS3_PMU_MAX_HW_EVENTS; + return true; } static u32 hns3_pmu_get_filter_condition(struct perf_event *event) -- cgit v1.2.3 From 582c1aeee0a9e73010cf1c4cef338709860deeb0 Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Thu, 25 Apr 2024 20:46:27 +0800 Subject: drivers/perf: hisi: hns3: Actually use devm_add_action_or_reset() pci_alloc_irq_vectors() allocates an irq vector. When devm_add_action() fails, the irq vector is not freed, which leads to a memory leak. Replace the devm_add_action with devm_add_action_or_reset to ensure the irq vector can be destroyed when it fails. Fixes: 66637ab137b4 ("drivers/perf: hisi: add driver for HNS3 PMU") Signed-off-by: Hao Chen Signed-off-by: Junhao He Reviewed-by: Jijie Shao Acked-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240425124627.13764-4-hejunhao3@huawei.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hns3_pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/hisilicon/hns3_pmu.c b/drivers/perf/hisilicon/hns3_pmu.c index 0417bf23fa3e..e900f8e00b18 100644 --- a/drivers/perf/hisilicon/hns3_pmu.c +++ b/drivers/perf/hisilicon/hns3_pmu.c @@ -1528,7 +1528,7 @@ static int hns3_pmu_irq_register(struct pci_dev *pdev, return ret; } - ret = devm_add_action(&pdev->dev, hns3_pmu_free_irq, pdev); + ret = devm_add_action_or_reset(&pdev->dev, hns3_pmu_free_irq, pdev); if (ret) { pci_err(pdev, "failed to add free irq action, ret = %d.\n", ret); return ret; -- cgit v1.2.3 From 410e471f87465f04d7ae7f8ed16ef8e7a3b5517c Mon Sep 17 00:00:00 2001 From: chenqiwu Date: Tue, 19 Dec 2023 10:22:29 +0800 Subject: arm64: Add USER_STACKTRACE support Currently, userstacktrace is unsupported for ftrace and uprobe tracers on arm64. This patch uses the perf_callchain_user() code as blueprint to implement the arch_stack_walk_user() which add userstacktrace support on arm64. Meanwhile, we can use arch_stack_walk_user() to simplify the implementation of perf_callchain_user(). This patch is tested pass with ftrace, uprobe and perf tracers profiling userstacktrace cases. Tested-by: chenqiwu Signed-off-by: chenqiwu Link: https://lore.kernel.org/r/20231219022229.10230-1-qiwu.chen@transsion.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 + arch/arm64/kernel/perf_callchain.c | 118 ++---------------------------------- arch/arm64/kernel/stacktrace.c | 120 +++++++++++++++++++++++++++++++++++++ 3 files changed, 125 insertions(+), 114 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7b11c98b3e84..7f7bbee7d257 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -258,6 +258,7 @@ config ARM64 select TRACE_IRQFLAGS_SUPPORT select TRACE_IRQFLAGS_NMI_SUPPORT select HAVE_SOFTIRQ_ON_OWN_STACK + select USER_STACKTRACE_SUPPORT help ARM 64-bit (AArch64) Linux support. diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c index 6d157f32187b..e8ed5673f481 100644 --- a/arch/arm64/kernel/perf_callchain.c +++ b/arch/arm64/kernel/perf_callchain.c @@ -10,94 +10,12 @@ #include -struct frame_tail { - struct frame_tail __user *fp; - unsigned long lr; -} __attribute__((packed)); - -/* - * Get the return address for a single stackframe and return a pointer to the - * next frame tail. - */ -static struct frame_tail __user * -user_backtrace(struct frame_tail __user *tail, - struct perf_callchain_entry_ctx *entry) -{ - struct frame_tail buftail; - unsigned long err; - unsigned long lr; - - /* Also check accessibility of one struct frame_tail beyond */ - if (!access_ok(tail, sizeof(buftail))) - return NULL; - - pagefault_disable(); - err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); - pagefault_enable(); - - if (err) - return NULL; - - lr = ptrauth_strip_user_insn_pac(buftail.lr); - - perf_callchain_store(entry, lr); - - /* - * Frame pointers should strictly progress back up the stack - * (towards higher addresses). - */ - if (tail >= buftail.fp) - return NULL; - - return buftail.fp; -} - -#ifdef CONFIG_COMPAT -/* - * The registers we're interested in are at the end of the variable - * length saved register structure. The fp points at the end of this - * structure so the address of this struct is: - * (struct compat_frame_tail *)(xxx->fp)-1 - * - * This code has been adapted from the ARM OProfile support. - */ -struct compat_frame_tail { - compat_uptr_t fp; /* a (struct compat_frame_tail *) in compat mode */ - u32 sp; - u32 lr; -} __attribute__((packed)); - -static struct compat_frame_tail __user * -compat_user_backtrace(struct compat_frame_tail __user *tail, - struct perf_callchain_entry_ctx *entry) +static bool callchain_trace(void *data, unsigned long pc) { - struct compat_frame_tail buftail; - unsigned long err; - - /* Also check accessibility of one struct frame_tail beyond */ - if (!access_ok(tail, sizeof(buftail))) - return NULL; - - pagefault_disable(); - err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); - pagefault_enable(); - - if (err) - return NULL; - - perf_callchain_store(entry, buftail.lr); - - /* - * Frame pointers should strictly progress back up the stack - * (towards higher addresses). - */ - if (tail + 1 >= (struct compat_frame_tail __user *) - compat_ptr(buftail.fp)) - return NULL; + struct perf_callchain_entry_ctx *entry = data; - return (struct compat_frame_tail __user *)compat_ptr(buftail.fp) - 1; + return perf_callchain_store(entry, pc) == 0; } -#endif /* CONFIG_COMPAT */ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) @@ -107,35 +25,7 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, return; } - perf_callchain_store(entry, regs->pc); - - if (!compat_user_mode(regs)) { - /* AARCH64 mode */ - struct frame_tail __user *tail; - - tail = (struct frame_tail __user *)regs->regs[29]; - - while (entry->nr < entry->max_stack && - tail && !((unsigned long)tail & 0x7)) - tail = user_backtrace(tail, entry); - } else { -#ifdef CONFIG_COMPAT - /* AARCH32 compat mode */ - struct compat_frame_tail __user *tail; - - tail = (struct compat_frame_tail __user *)regs->compat_fp - 1; - - while ((entry->nr < entry->max_stack) && - tail && !((unsigned long)tail & 0x3)) - tail = compat_user_backtrace(tail, entry); -#endif - } -} - -static bool callchain_trace(void *data, unsigned long pc) -{ - struct perf_callchain_entry_ctx *entry = data; - return perf_callchain_store(entry, pc) == 0; + arch_stack_walk_user(callchain_trace, entry, regs); } void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 684c26511696..6b3258860377 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -324,3 +324,123 @@ void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl) dump_backtrace(NULL, tsk, loglvl); barrier(); } + +/* + * The struct defined for userspace stack frame in AARCH64 mode. + */ +struct frame_tail { + struct frame_tail __user *fp; + unsigned long lr; +} __attribute__((packed)); + +/* + * Get the return address for a single stackframe and return a pointer to the + * next frame tail. + */ +static struct frame_tail __user * +unwind_user_frame(struct frame_tail __user *tail, void *cookie, + stack_trace_consume_fn consume_entry) +{ + struct frame_tail buftail; + unsigned long err; + unsigned long lr; + + /* Also check accessibility of one struct frame_tail beyond */ + if (!access_ok(tail, sizeof(buftail))) + return NULL; + + pagefault_disable(); + err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); + pagefault_enable(); + + if (err) + return NULL; + + lr = ptrauth_strip_user_insn_pac(buftail.lr); + + if (!consume_entry(cookie, lr)) + return NULL; + + /* + * Frame pointers should strictly progress back up the stack + * (towards higher addresses). + */ + if (tail >= buftail.fp) + return NULL; + + return buftail.fp; +} + +#ifdef CONFIG_COMPAT +/* + * The registers we're interested in are at the end of the variable + * length saved register structure. The fp points at the end of this + * structure so the address of this struct is: + * (struct compat_frame_tail *)(xxx->fp)-1 + * + * This code has been adapted from the ARM OProfile support. + */ +struct compat_frame_tail { + compat_uptr_t fp; /* a (struct compat_frame_tail *) in compat mode */ + u32 sp; + u32 lr; +} __attribute__((packed)); + +static struct compat_frame_tail __user * +unwind_compat_user_frame(struct compat_frame_tail __user *tail, void *cookie, + stack_trace_consume_fn consume_entry) +{ + struct compat_frame_tail buftail; + unsigned long err; + + /* Also check accessibility of one struct frame_tail beyond */ + if (!access_ok(tail, sizeof(buftail))) + return NULL; + + pagefault_disable(); + err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); + pagefault_enable(); + + if (err) + return NULL; + + if (!consume_entry(cookie, buftail.lr)) + return NULL; + + /* + * Frame pointers should strictly progress back up the stack + * (towards higher addresses). + */ + if (tail + 1 >= (struct compat_frame_tail __user *) + compat_ptr(buftail.fp)) + return NULL; + + return (struct compat_frame_tail __user *)compat_ptr(buftail.fp) - 1; +} +#endif /* CONFIG_COMPAT */ + + +void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie, + const struct pt_regs *regs) +{ + if (!consume_entry(cookie, regs->pc)) + return; + + if (!compat_user_mode(regs)) { + /* AARCH64 mode */ + struct frame_tail __user *tail; + + tail = (struct frame_tail __user *)regs->regs[29]; + while (tail && !((unsigned long)tail & 0x7)) + tail = unwind_user_frame(tail, cookie, consume_entry); + } else { +#ifdef CONFIG_COMPAT + /* AARCH32 compat mode */ + struct compat_frame_tail __user *tail; + + tail = (struct compat_frame_tail __user *)regs->compat_fp - 1; + while (tail && !((unsigned long)tail & 0x3)) + tail = unwind_compat_user_frame(tail, cookie, consume_entry); +#endif + } +} -- cgit v1.2.3