From 7f95da9d2dc4c20bb374c281ceb8fa40b6208f4b Mon Sep 17 00:00:00 2001 From: Junhao He Date: Thu, 19 Jan 2023 18:03:05 +0800 Subject: drivers/perf: hisi: Advertise the PERF_PMU_CAP_NO_EXCLUDE capability Missed initialization the variable of pmu::capabilities when extract the initialization code of hisi_pmu->pmu into a function. HISI UNCORE PMU drivers counters that not support context exclusion. So we have to advertise the PERF_PMU_CAP_NO_EXCLUDE capability. This ensures that perf will prevent us from handling events where any exclusion flags are set. Signed-off-by: Junhao He Link: https://lore.kernel.org/r/20230119100307.3660-2-hejunhao3@huawei.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_uncore_pmu.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index fbc8a93d5eac..2a466477920b 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -546,6 +546,7 @@ void hisi_pmu_init(struct pmu *pmu, const char *name, pmu->stop = hisi_uncore_pmu_stop; pmu->read = hisi_uncore_pmu_read; pmu->attr_groups = attr_groups; + pmu->capabilities = PERF_PMU_CAP_NO_EXCLUDE; } EXPORT_SYMBOL_GPL(hisi_pmu_init); -- cgit v1.2.3 From 053b5579dacfc5763dda0c073ee14147421d32d7 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Thu, 19 Jan 2023 18:03:06 +0800 Subject: drivers/perf: hisi: Simplify the parameters of hisi_pmu_init() Use "hisi_pmu" to simplify the parameter list for the hisi_pmu_init() function. Signed-off-by: Junhao He Link: https://lore.kernel.org/r/20230119100307.3660-3-hejunhao3@huawei.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_pmu.c | 8 +++++--- drivers/perf/hisilicon/hisi_uncore_pmu.h | 4 ++-- drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c | 2 +- 7 files changed, 12 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 50d0c0a2f1fe..8c3ffcbfd4c0 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -516,7 +516,7 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev) "hisi_sccl%u_ddrc%u", ddrc_pmu->sccl_id, ddrc_pmu->index_id); - hisi_pmu_init(&ddrc_pmu->pmu, name, ddrc_pmu->pmu_events.attr_groups, THIS_MODULE); + hisi_pmu_init(ddrc_pmu, name, THIS_MODULE); ret = perf_pmu_register(&ddrc_pmu->pmu, name, -1); if (ret) { diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 13017b3412a5..806698b9eabf 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -519,7 +519,7 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev) name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_hha%u", hha_pmu->sccl_id, hha_pmu->index_id); - hisi_pmu_init(&hha_pmu->pmu, name, hha_pmu->pmu_events.attr_groups, THIS_MODULE); + hisi_pmu_init(hha_pmu, name, THIS_MODULE); ret = perf_pmu_register(&hha_pmu->pmu, name, -1); if (ret) { diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 2995f3630d49..5b2c35f1658a 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -557,7 +557,7 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev) */ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u", l3c_pmu->sccl_id, l3c_pmu->ccl_id); - hisi_pmu_init(&l3c_pmu->pmu, name, l3c_pmu->pmu_events.attr_groups, THIS_MODULE); + hisi_pmu_init(l3c_pmu, name, THIS_MODULE); ret = perf_pmu_register(&l3c_pmu->pmu, name, -1); if (ret) { diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index 47d3cc9b6eec..afe3419f3f6d 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -412,7 +412,7 @@ static int hisi_pa_pmu_probe(struct platform_device *pdev) return ret; } - hisi_pmu_init(&pa_pmu->pmu, name, pa_pmu->pmu_events.attr_groups, THIS_MODULE); + hisi_pmu_init(pa_pmu, name, THIS_MODULE); ret = perf_pmu_register(&pa_pmu->pmu, name, -1); if (ret) { dev_err(pa_pmu->dev, "PMU register failed, ret = %d\n", ret); diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 2a466477920b..f1b0f5e1a28f 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -531,9 +531,11 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) } EXPORT_SYMBOL_GPL(hisi_uncore_pmu_offline_cpu); -void hisi_pmu_init(struct pmu *pmu, const char *name, - const struct attribute_group **attr_groups, struct module *module) +void hisi_pmu_init(struct hisi_pmu *hisi_pmu, const char *name, + struct module *module) { + struct pmu *pmu = &hisi_pmu->pmu; + pmu->name = name; pmu->module = module; pmu->task_ctx_nr = perf_invalid_context; @@ -545,7 +547,7 @@ void hisi_pmu_init(struct pmu *pmu, const char *name, pmu->start = hisi_uncore_pmu_start; pmu->stop = hisi_uncore_pmu_stop; pmu->read = hisi_uncore_pmu_read; - pmu->attr_groups = attr_groups; + pmu->attr_groups = hisi_pmu->pmu_events.attr_groups; pmu->capabilities = PERF_PMU_CAP_NO_EXCLUDE; } EXPORT_SYMBOL_GPL(hisi_pmu_init); diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index b59de33cd059..f8e3cc6903d7 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -121,6 +121,6 @@ ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu, struct platform_device *pdev); -void hisi_pmu_init(struct pmu *pmu, const char *name, - const struct attribute_group **attr_groups, struct module *module); +void hisi_pmu_init(struct hisi_pmu *hisi_pmu, const char *name, + struct module *module); #endif /* __HISI_UNCORE_PMU_H__ */ diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c index b9c79f17230c..1e354433776a 100644 --- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -445,7 +445,7 @@ static int hisi_sllc_pmu_probe(struct platform_device *pdev) return ret; } - hisi_pmu_init(&sllc_pmu->pmu, name, sllc_pmu->pmu_events.attr_groups, THIS_MODULE); + hisi_pmu_init(sllc_pmu, name, THIS_MODULE); ret = perf_pmu_register(&sllc_pmu->pmu, name, -1); if (ret) { -- cgit v1.2.3 From e126f6f42f89baee09e088ab6bc48f83ac3a0eae Mon Sep 17 00:00:00 2001 From: Junhao He Date: Thu, 19 Jan 2023 18:03:07 +0800 Subject: drivers/perf: hisi: Extract initialization of "cpa_pmu->pmu" Use hisi_pmu_init() function to simplify initialization of "cpa_pmu->pmu". Signed-off-by: Junhao He Link: https://lore.kernel.org/r/20230119100307.3660-4-hejunhao3@huawei.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) (limited to 'drivers') diff --git a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c index a9bb73f76be4..4c67d57217a7 100644 --- a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c @@ -316,21 +316,7 @@ static int hisi_cpa_pmu_probe(struct platform_device *pdev) if (!name) return -ENOMEM; - cpa_pmu->pmu = (struct pmu) { - .name = name, - .module = THIS_MODULE, - .task_ctx_nr = perf_invalid_context, - .event_init = hisi_uncore_pmu_event_init, - .pmu_enable = hisi_uncore_pmu_enable, - .pmu_disable = hisi_uncore_pmu_disable, - .add = hisi_uncore_pmu_add, - .del = hisi_uncore_pmu_del, - .start = hisi_uncore_pmu_start, - .stop = hisi_uncore_pmu_stop, - .read = hisi_uncore_pmu_read, - .attr_groups = cpa_pmu->pmu_events.attr_groups, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE, - }; + hisi_pmu_init(cpa_pmu, name, THIS_MODULE); /* Power Management should be disabled before using CPA PMU. */ hisi_cpa_pmu_disable_pm(cpa_pmu); -- cgit v1.2.3 From bb21ef19a3d8f586a99310116d40622fb5b79942 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 5 Dec 2022 19:46:13 +0000 Subject: perf/arm-cmn: Reset DTM_PMU_CONFIG at probe Although we treat the DTM counters as free-running such that we're not too concerned about the initial DTM state, it's possible for a previous user to have left DTM counters enabled and paired with DTC counters. Thus if the first events are scheduled using some, but not all, DTMs, the as-yet-unused ones could end up adding spurious increments to the event counts at the DTC. Make sure we sync our initial DTM_PMU_CONFIG state to all the DTMs at probe time to avoid that possibility. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/ba5f38b3dc733cd06bfb5e659b697e76d18c2183.1670269572.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index b80a9b74662b..e220714954b0 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -1865,6 +1865,7 @@ static void arm_cmn_init_dtm(struct arm_cmn_dtm *dtm, struct arm_cmn_node *xp, i dtm->base = xp->pmu_base + CMN_DTM_OFFSET(idx); dtm->pmu_config_low = CMN_DTM_PMU_CONFIG_PMU_EN; + writeq_relaxed(dtm->pmu_config_low, dtm->base + CMN_DTM_PMU_CONFIG); for (i = 0; i < 4; i++) { dtm->wp_event[i] = -1; writeq_relaxed(0, dtm->base + CMN_DTM_WPn_MASK(i)); -- cgit v1.2.3 From e85930f06f0e938bfeb6e081526da86a784cb907 Mon Sep 17 00:00:00 2001 From: Gowthami Thiagarajan Date: Fri, 9 Dec 2022 11:06:07 +0530 Subject: perf/marvell: Add ACPI support to DDR uncore driver Add support for ACPI based device registration so that the driver can be also enabled through ACPI table. Signed-off-by: Gowthami Thiagarajan Link: https://lore.kernel.org/r/20221209053607.3929964-1-gthiagarajan@marvell.com Signed-off-by: Will Deacon --- drivers/perf/marvell_cn10k_ddr_pmu.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers') diff --git a/drivers/perf/marvell_cn10k_ddr_pmu.c b/drivers/perf/marvell_cn10k_ddr_pmu.c index 665b382a0ee3..b94a5f6cc22b 100644 --- a/drivers/perf/marvell_cn10k_ddr_pmu.c +++ b/drivers/perf/marvell_cn10k_ddr_pmu.c @@ -12,6 +12,7 @@ #include #include #include +#include /* Performance Counters Operating Mode Control Registers */ #define DDRC_PERF_CNT_OP_MODE_CTRL 0x8020 @@ -717,10 +718,19 @@ static const struct of_device_id cn10k_ddr_pmu_of_match[] = { MODULE_DEVICE_TABLE(of, cn10k_ddr_pmu_of_match); #endif +#ifdef CONFIG_ACPI +static const struct acpi_device_id cn10k_ddr_pmu_acpi_match[] = { + {"MRVL000A", 0}, + {}, +}; +MODULE_DEVICE_TABLE(acpi, cn10k_ddr_pmu_acpi_match); +#endif + static struct platform_driver cn10k_ddr_pmu_driver = { .driver = { .name = "cn10k-ddr-pmu", .of_match_table = of_match_ptr(cn10k_ddr_pmu_of_match), + .acpi_match_table = ACPI_PTR(cn10k_ddr_pmu_acpi_match), .suppress_bind_attrs = true, }, .probe = cn10k_ddr_perf_probe, -- cgit v1.2.3 From 093cf1f62fe8504d3cbd721c8753dbda931dd387 Mon Sep 17 00:00:00 2001 From: Gowthami Thiagarajan Date: Fri, 9 Dec 2022 11:07:15 +0530 Subject: perf/marvell: Add ACPI support to TAD uncore driver Add support for ACPI based device registration so that the driver can be also enabled through ACPI table. While at that change the DT specific API's to device_* API's so that both DT based and ACPI based probing works. Signed-off-by: Gowthami Thiagarajan Link: https://lore.kernel.org/r/20221209053715.3930071-1-gthiagarajan@marvell.com Signed-off-by: Will Deacon --- drivers/perf/marvell_cn10k_tad_pmu.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/perf/marvell_cn10k_tad_pmu.c b/drivers/perf/marvell_cn10k_tad_pmu.c index a1166afb3702..3972197e2210 100644 --- a/drivers/perf/marvell_cn10k_tad_pmu.c +++ b/drivers/perf/marvell_cn10k_tad_pmu.c @@ -13,6 +13,7 @@ #include #include #include +#include #define TAD_PFC_OFFSET 0x800 #define TAD_PFC(counter) (TAD_PFC_OFFSET | (counter << 3)) @@ -254,7 +255,7 @@ static const struct attribute_group *tad_pmu_attr_groups[] = { static int tad_pmu_probe(struct platform_device *pdev) { - struct device_node *node = pdev->dev.of_node; + struct device *dev = &pdev->dev; struct tad_region *regions; struct tad_pmu *tad_pmu; struct resource *res; @@ -276,21 +277,21 @@ static int tad_pmu_probe(struct platform_device *pdev) return -ENODEV; } - ret = of_property_read_u32(node, "marvell,tad-page-size", - &tad_page_size); + ret = device_property_read_u32(dev, "marvell,tad-page-size", + &tad_page_size); if (ret) { dev_err(&pdev->dev, "Can't find tad-page-size property\n"); return ret; } - ret = of_property_read_u32(node, "marvell,tad-pmu-page-size", - &tad_pmu_page_size); + ret = device_property_read_u32(dev, "marvell,tad-pmu-page-size", + &tad_pmu_page_size); if (ret) { dev_err(&pdev->dev, "Can't find tad-pmu-page-size property\n"); return ret; } - ret = of_property_read_u32(node, "marvell,tad-cnt", &tad_cnt); + ret = device_property_read_u32(dev, "marvell,tad-cnt", &tad_cnt); if (ret) { dev_err(&pdev->dev, "Can't find tad-cnt property\n"); return ret; @@ -369,10 +370,19 @@ static const struct of_device_id tad_pmu_of_match[] = { }; #endif +#ifdef CONFIG_ACPI +static const struct acpi_device_id tad_pmu_acpi_match[] = { + {"MRVL000B", 0}, + {}, +}; +MODULE_DEVICE_TABLE(acpi, tad_pmu_acpi_match); +#endif + static struct platform_driver tad_pmu_driver = { .driver = { .name = "cn10k_tad_pmu", .of_match_table = of_match_ptr(tad_pmu_of_match), + .acpi_match_table = ACPI_PTR(tad_pmu_acpi_match), .suppress_bind_attrs = true, }, .probe = tad_pmu_probe, -- cgit v1.2.3 From e080477a050cce0471d3a84347f350ad7514a18b Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 9 Jan 2023 13:26:17 -0600 Subject: perf: arm_spe: Use feature numbering for PMSEVFR_EL1 defines Similar to commit 121a8fc088f1 ("arm64/sysreg: Use feature numbering for PMU and SPE revisions") use feature numbering instead of architecture versions for the PMSEVFR_EL1 Res0 defines. Tested-by: James Clark Signed-off-by: Rob Herring Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20220825-arm-spe-v8-7-v4-1-327f860daf28@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 6 +++--- drivers/perf/arm_spe_pmu.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 1312fb48f18b..c4ce16333750 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -273,11 +273,11 @@ #define SYS_PMSFCR_EL1_ST_SHIFT 18 #define SYS_PMSEVFR_EL1 sys_reg(3, 0, 9, 9, 5) -#define SYS_PMSEVFR_EL1_RES0_8_2 \ +#define PMSEVFR_EL1_RES0_IMP \ (GENMASK_ULL(47, 32) | GENMASK_ULL(23, 16) | GENMASK_ULL(11, 8) |\ BIT_ULL(6) | BIT_ULL(4) | BIT_ULL(2) | BIT_ULL(0)) -#define SYS_PMSEVFR_EL1_RES0_8_3 \ - (SYS_PMSEVFR_EL1_RES0_8_2 & ~(BIT_ULL(18) | BIT_ULL(17) | BIT_ULL(11))) +#define PMSEVFR_EL1_RES0_V1P1 \ + (PMSEVFR_EL1_RES0_IMP & ~(BIT_ULL(18) | BIT_ULL(17) | BIT_ULL(11))) #define SYS_PMSLATFR_EL1 sys_reg(3, 0, 9, 9, 6) #define SYS_PMSLATFR_EL1_MINLAT_SHIFT 0 diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index 00e3a637f7b6..65cf93dcc8ee 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -677,11 +677,11 @@ static u64 arm_spe_pmsevfr_res0(u16 pmsver) { switch (pmsver) { case ID_AA64DFR0_EL1_PMSVer_IMP: - return SYS_PMSEVFR_EL1_RES0_8_2; + return PMSEVFR_EL1_RES0_IMP; case ID_AA64DFR0_EL1_PMSVer_V1P1: /* Return the highest version we support in default */ default: - return SYS_PMSEVFR_EL1_RES0_8_3; + return PMSEVFR_EL1_RES0_V1P1; } } -- cgit v1.2.3 From c759ec850df89f7235b08e468abb3190b6998d4e Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 9 Jan 2023 13:26:18 -0600 Subject: arm64: Drop SYS_ from SPE register defines We currently have a non-standard SYS_ prefix in the constants generated for the SPE register bitfields. Drop this in preparation for automatic register definition generation. The SPE mask defines were unshifted, and the SPE register field enumerations were shifted. The autogenerated defines are the opposite, so make the necessary adjustments. No functional changes. Tested-by: James Clark Signed-off-by: Rob Herring Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20220825-arm-spe-v8-7-v4-2-327f860daf28@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/el2_setup.h | 6 +- arch/arm64/include/asm/sysreg.h | 112 ++++++++++++++++++------------------- arch/arm64/kvm/debug.c | 2 +- arch/arm64/kvm/hyp/nvhe/debug-sr.c | 2 +- drivers/perf/arm_spe_pmu.c | 85 ++++++++++++++-------------- 5 files changed, 103 insertions(+), 104 deletions(-) (limited to 'drivers') diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 668569adf4d3..f9da43e53cdb 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -53,10 +53,10 @@ cbz x0, .Lskip_spe_\@ // Skip if SPE not present mrs_s x0, SYS_PMBIDR_EL1 // If SPE available at EL2, - and x0, x0, #(1 << SYS_PMBIDR_EL1_P_SHIFT) + and x0, x0, #(1 << PMBIDR_EL1_P_SHIFT) cbnz x0, .Lskip_spe_el2_\@ // then permit sampling of physical - mov x0, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \ - 1 << SYS_PMSCR_EL2_PA_SHIFT) + mov x0, #(1 << PMSCR_EL2_PCT_SHIFT | \ + 1 << PMSCR_EL2_PA_SHIFT) msr_s SYS_PMSCR_EL2, x0 // addresses and physical counter .Lskip_spe_el2_\@: mov x0, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index c4ce16333750..dbb0e8e22cf4 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -218,59 +218,59 @@ /*** Statistical Profiling Extension ***/ /* ID registers */ #define SYS_PMSIDR_EL1 sys_reg(3, 0, 9, 9, 7) -#define SYS_PMSIDR_EL1_FE_SHIFT 0 -#define SYS_PMSIDR_EL1_FT_SHIFT 1 -#define SYS_PMSIDR_EL1_FL_SHIFT 2 -#define SYS_PMSIDR_EL1_ARCHINST_SHIFT 3 -#define SYS_PMSIDR_EL1_LDS_SHIFT 4 -#define SYS_PMSIDR_EL1_ERND_SHIFT 5 -#define SYS_PMSIDR_EL1_INTERVAL_SHIFT 8 -#define SYS_PMSIDR_EL1_INTERVAL_MASK 0xfUL -#define SYS_PMSIDR_EL1_MAXSIZE_SHIFT 12 -#define SYS_PMSIDR_EL1_MAXSIZE_MASK 0xfUL -#define SYS_PMSIDR_EL1_COUNTSIZE_SHIFT 16 -#define SYS_PMSIDR_EL1_COUNTSIZE_MASK 0xfUL +#define PMSIDR_EL1_FE_SHIFT 0 +#define PMSIDR_EL1_FT_SHIFT 1 +#define PMSIDR_EL1_FL_SHIFT 2 +#define PMSIDR_EL1_ARCHINST_SHIFT 3 +#define PMSIDR_EL1_LDS_SHIFT 4 +#define PMSIDR_EL1_ERND_SHIFT 5 +#define PMSIDR_EL1_INTERVAL_SHIFT 8 +#define PMSIDR_EL1_INTERVAL_MASK GENMASK_ULL(11, 8) +#define PMSIDR_EL1_MAXSIZE_SHIFT 12 +#define PMSIDR_EL1_MAXSIZE_MASK GENMASK_ULL(15, 12) +#define PMSIDR_EL1_COUNTSIZE_SHIFT 16 +#define PMSIDR_EL1_COUNTSIZE_MASK GENMASK_ULL(19, 16) #define SYS_PMBIDR_EL1 sys_reg(3, 0, 9, 10, 7) -#define SYS_PMBIDR_EL1_ALIGN_SHIFT 0 -#define SYS_PMBIDR_EL1_ALIGN_MASK 0xfU -#define SYS_PMBIDR_EL1_P_SHIFT 4 -#define SYS_PMBIDR_EL1_F_SHIFT 5 +#define PMBIDR_EL1_ALIGN_SHIFT 0 +#define PMBIDR_EL1_ALIGN_MASK 0xfU +#define PMBIDR_EL1_P_SHIFT 4 +#define PMBIDR_EL1_F_SHIFT 5 /* Sampling controls */ #define SYS_PMSCR_EL1 sys_reg(3, 0, 9, 9, 0) -#define SYS_PMSCR_EL1_E0SPE_SHIFT 0 -#define SYS_PMSCR_EL1_E1SPE_SHIFT 1 -#define SYS_PMSCR_EL1_CX_SHIFT 3 -#define SYS_PMSCR_EL1_PA_SHIFT 4 -#define SYS_PMSCR_EL1_TS_SHIFT 5 -#define SYS_PMSCR_EL1_PCT_SHIFT 6 +#define PMSCR_EL1_E0SPE_SHIFT 0 +#define PMSCR_EL1_E1SPE_SHIFT 1 +#define PMSCR_EL1_CX_SHIFT 3 +#define PMSCR_EL1_PA_SHIFT 4 +#define PMSCR_EL1_TS_SHIFT 5 +#define PMSCR_EL1_PCT_SHIFT 6 #define SYS_PMSCR_EL2 sys_reg(3, 4, 9, 9, 0) -#define SYS_PMSCR_EL2_E0HSPE_SHIFT 0 -#define SYS_PMSCR_EL2_E2SPE_SHIFT 1 -#define SYS_PMSCR_EL2_CX_SHIFT 3 -#define SYS_PMSCR_EL2_PA_SHIFT 4 -#define SYS_PMSCR_EL2_TS_SHIFT 5 -#define SYS_PMSCR_EL2_PCT_SHIFT 6 +#define PMSCR_EL2_E0HSPE_SHIFT 0 +#define PMSCR_EL2_E2SPE_SHIFT 1 +#define PMSCR_EL2_CX_SHIFT 3 +#define PMSCR_EL2_PA_SHIFT 4 +#define PMSCR_EL2_TS_SHIFT 5 +#define PMSCR_EL2_PCT_SHIFT 6 #define SYS_PMSICR_EL1 sys_reg(3, 0, 9, 9, 2) #define SYS_PMSIRR_EL1 sys_reg(3, 0, 9, 9, 3) -#define SYS_PMSIRR_EL1_RND_SHIFT 0 -#define SYS_PMSIRR_EL1_INTERVAL_SHIFT 8 -#define SYS_PMSIRR_EL1_INTERVAL_MASK 0xffffffUL +#define PMSIRR_EL1_RND_SHIFT 0 +#define PMSIRR_EL1_INTERVAL_SHIFT 8 +#define PMSIRR_EL1_INTERVAL_MASK GENMASK_ULL(31, 8) /* Filtering controls */ #define SYS_PMSNEVFR_EL1 sys_reg(3, 0, 9, 9, 1) #define SYS_PMSFCR_EL1 sys_reg(3, 0, 9, 9, 4) -#define SYS_PMSFCR_EL1_FE_SHIFT 0 -#define SYS_PMSFCR_EL1_FT_SHIFT 1 -#define SYS_PMSFCR_EL1_FL_SHIFT 2 -#define SYS_PMSFCR_EL1_B_SHIFT 16 -#define SYS_PMSFCR_EL1_LD_SHIFT 17 -#define SYS_PMSFCR_EL1_ST_SHIFT 18 +#define PMSFCR_EL1_FE_SHIFT 0 +#define PMSFCR_EL1_FT_SHIFT 1 +#define PMSFCR_EL1_FL_SHIFT 2 +#define PMSFCR_EL1_B_SHIFT 16 +#define PMSFCR_EL1_LD_SHIFT 17 +#define PMSFCR_EL1_ST_SHIFT 18 #define SYS_PMSEVFR_EL1 sys_reg(3, 0, 9, 9, 5) #define PMSEVFR_EL1_RES0_IMP \ @@ -280,37 +280,37 @@ (PMSEVFR_EL1_RES0_IMP & ~(BIT_ULL(18) | BIT_ULL(17) | BIT_ULL(11))) #define SYS_PMSLATFR_EL1 sys_reg(3, 0, 9, 9, 6) -#define SYS_PMSLATFR_EL1_MINLAT_SHIFT 0 +#define PMSLATFR_EL1_MINLAT_SHIFT 0 /* Buffer controls */ #define SYS_PMBLIMITR_EL1 sys_reg(3, 0, 9, 10, 0) -#define SYS_PMBLIMITR_EL1_E_SHIFT 0 -#define SYS_PMBLIMITR_EL1_FM_SHIFT 1 -#define SYS_PMBLIMITR_EL1_FM_MASK 0x3UL -#define SYS_PMBLIMITR_EL1_FM_STOP_IRQ (0 << SYS_PMBLIMITR_EL1_FM_SHIFT) +#define PMBLIMITR_EL1_E_SHIFT 0 +#define PMBLIMITR_EL1_FM_SHIFT 1 +#define PMBLIMITR_EL1_FM_MASK GENMASK_ULL(2, 1) +#define PMBLIMITR_EL1_FM_STOP_IRQ 0 #define SYS_PMBPTR_EL1 sys_reg(3, 0, 9, 10, 1) /* Buffer error reporting */ #define SYS_PMBSR_EL1 sys_reg(3, 0, 9, 10, 3) -#define SYS_PMBSR_EL1_COLL_SHIFT 16 -#define SYS_PMBSR_EL1_S_SHIFT 17 -#define SYS_PMBSR_EL1_EA_SHIFT 18 -#define SYS_PMBSR_EL1_DL_SHIFT 19 -#define SYS_PMBSR_EL1_EC_SHIFT 26 -#define SYS_PMBSR_EL1_EC_MASK 0x3fUL +#define PMBSR_EL1_COLL_SHIFT 16 +#define PMBSR_EL1_S_SHIFT 17 +#define PMBSR_EL1_EA_SHIFT 18 +#define PMBSR_EL1_DL_SHIFT 19 +#define PMBSR_EL1_EC_SHIFT 26 +#define PMBSR_EL1_EC_MASK GENMASK_ULL(31, 26) -#define SYS_PMBSR_EL1_EC_BUF (0x0UL << SYS_PMBSR_EL1_EC_SHIFT) -#define SYS_PMBSR_EL1_EC_FAULT_S1 (0x24UL << SYS_PMBSR_EL1_EC_SHIFT) -#define SYS_PMBSR_EL1_EC_FAULT_S2 (0x25UL << SYS_PMBSR_EL1_EC_SHIFT) +#define PMBSR_EL1_EC_BUF 0x0UL +#define PMBSR_EL1_EC_FAULT_S1 0x24UL +#define PMBSR_EL1_EC_FAULT_S2 0x25UL -#define SYS_PMBSR_EL1_FAULT_FSC_SHIFT 0 -#define SYS_PMBSR_EL1_FAULT_FSC_MASK 0x3fUL +#define PMBSR_EL1_FAULT_FSC_SHIFT 0 +#define PMBSR_EL1_FAULT_FSC_MASK 0x3fUL -#define SYS_PMBSR_EL1_BUF_BSC_SHIFT 0 -#define SYS_PMBSR_EL1_BUF_BSC_MASK 0x3fUL +#define PMBSR_EL1_BUF_BSC_SHIFT 0 +#define PMBSR_EL1_BUF_BSC_MASK 0x3fUL -#define SYS_PMBSR_EL1_BUF_BSC_FULL (0x1UL << SYS_PMBSR_EL1_BUF_BSC_SHIFT) +#define PMBSR_EL1_BUF_BSC_FULL 0x1UL /*** End of Statistical Profiling Extension ***/ diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index fccf9ec01813..55f80fb93925 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -328,7 +328,7 @@ void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu) * we may need to check if the host state needs to be saved. */ if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_PMSVer_SHIFT) && - !(read_sysreg_s(SYS_PMBIDR_EL1) & BIT(SYS_PMBIDR_EL1_P_SHIFT))) + !(read_sysreg_s(SYS_PMBIDR_EL1) & BIT(PMBIDR_EL1_P_SHIFT))) vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_SPE); /* Check if we have TRBE implemented and available at the host */ diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c index e17455773b98..2673bde62fad 100644 --- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c @@ -27,7 +27,7 @@ static void __debug_save_spe(u64 *pmscr_el1) * Check if the host is actually using it ? */ reg = read_sysreg_s(SYS_PMBLIMITR_EL1); - if (!(reg & BIT(SYS_PMBLIMITR_EL1_E_SHIFT))) + if (!(reg & BIT(PMBLIMITR_EL1_E_SHIFT))) return; /* Yes; save the control register and disable data generation */ diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index 65cf93dcc8ee..814ed18346b6 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -12,6 +12,7 @@ #define DRVNAME PMUNAME "_pmu" #define pr_fmt(fmt) DRVNAME ": " fmt +#include #include #include #include @@ -282,18 +283,18 @@ static u64 arm_spe_event_to_pmscr(struct perf_event *event) struct perf_event_attr *attr = &event->attr; u64 reg = 0; - reg |= ATTR_CFG_GET_FLD(attr, ts_enable) << SYS_PMSCR_EL1_TS_SHIFT; - reg |= ATTR_CFG_GET_FLD(attr, pa_enable) << SYS_PMSCR_EL1_PA_SHIFT; - reg |= ATTR_CFG_GET_FLD(attr, pct_enable) << SYS_PMSCR_EL1_PCT_SHIFT; + reg |= ATTR_CFG_GET_FLD(attr, ts_enable) << PMSCR_EL1_TS_SHIFT; + reg |= ATTR_CFG_GET_FLD(attr, pa_enable) << PMSCR_EL1_PA_SHIFT; + reg |= ATTR_CFG_GET_FLD(attr, pct_enable) << PMSCR_EL1_PCT_SHIFT; if (!attr->exclude_user) - reg |= BIT(SYS_PMSCR_EL1_E0SPE_SHIFT); + reg |= BIT(PMSCR_EL1_E0SPE_SHIFT); if (!attr->exclude_kernel) - reg |= BIT(SYS_PMSCR_EL1_E1SPE_SHIFT); + reg |= BIT(PMSCR_EL1_E1SPE_SHIFT); if (get_spe_event_has_cx(event)) - reg |= BIT(SYS_PMSCR_EL1_CX_SHIFT); + reg |= BIT(PMSCR_EL1_CX_SHIFT); return reg; } @@ -302,8 +303,7 @@ static void arm_spe_event_sanitise_period(struct perf_event *event) { struct arm_spe_pmu *spe_pmu = to_spe_pmu(event->pmu); u64 period = event->hw.sample_period; - u64 max_period = SYS_PMSIRR_EL1_INTERVAL_MASK - << SYS_PMSIRR_EL1_INTERVAL_SHIFT; + u64 max_period = PMSIRR_EL1_INTERVAL_MASK; if (period < spe_pmu->min_period) period = spe_pmu->min_period; @@ -322,7 +322,7 @@ static u64 arm_spe_event_to_pmsirr(struct perf_event *event) arm_spe_event_sanitise_period(event); - reg |= ATTR_CFG_GET_FLD(attr, jitter) << SYS_PMSIRR_EL1_RND_SHIFT; + reg |= ATTR_CFG_GET_FLD(attr, jitter) << PMSIRR_EL1_RND_SHIFT; reg |= event->hw.sample_period; return reg; @@ -333,18 +333,18 @@ static u64 arm_spe_event_to_pmsfcr(struct perf_event *event) struct perf_event_attr *attr = &event->attr; u64 reg = 0; - reg |= ATTR_CFG_GET_FLD(attr, load_filter) << SYS_PMSFCR_EL1_LD_SHIFT; - reg |= ATTR_CFG_GET_FLD(attr, store_filter) << SYS_PMSFCR_EL1_ST_SHIFT; - reg |= ATTR_CFG_GET_FLD(attr, branch_filter) << SYS_PMSFCR_EL1_B_SHIFT; + reg |= ATTR_CFG_GET_FLD(attr, load_filter) << PMSFCR_EL1_LD_SHIFT; + reg |= ATTR_CFG_GET_FLD(attr, store_filter) << PMSFCR_EL1_ST_SHIFT; + reg |= ATTR_CFG_GET_FLD(attr, branch_filter) << PMSFCR_EL1_B_SHIFT; if (reg) - reg |= BIT(SYS_PMSFCR_EL1_FT_SHIFT); + reg |= BIT(PMSFCR_EL1_FT_SHIFT); if (ATTR_CFG_GET_FLD(attr, event_filter)) - reg |= BIT(SYS_PMSFCR_EL1_FE_SHIFT); + reg |= BIT(PMSFCR_EL1_FE_SHIFT); if (ATTR_CFG_GET_FLD(attr, min_latency)) - reg |= BIT(SYS_PMSFCR_EL1_FL_SHIFT); + reg |= BIT(PMSFCR_EL1_FL_SHIFT); return reg; } @@ -359,7 +359,7 @@ static u64 arm_spe_event_to_pmslatfr(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; return ATTR_CFG_GET_FLD(attr, min_latency) - << SYS_PMSLATFR_EL1_MINLAT_SHIFT; + << PMSLATFR_EL1_MINLAT_SHIFT; } static void arm_spe_pmu_pad_buf(struct perf_output_handle *handle, int len) @@ -511,7 +511,7 @@ static void arm_spe_perf_aux_output_begin(struct perf_output_handle *handle, limit = buf->snapshot ? arm_spe_pmu_next_snapshot_off(handle) : arm_spe_pmu_next_off(handle); if (limit) - limit |= BIT(SYS_PMBLIMITR_EL1_E_SHIFT); + limit |= BIT(PMBLIMITR_EL1_E_SHIFT); limit += (u64)buf->base; base = (u64)buf->base + PERF_IDX2OFF(handle->head, buf); @@ -570,28 +570,28 @@ arm_spe_pmu_buf_get_fault_act(struct perf_output_handle *handle) /* Service required? */ pmbsr = read_sysreg_s(SYS_PMBSR_EL1); - if (!(pmbsr & BIT(SYS_PMBSR_EL1_S_SHIFT))) + if (!(pmbsr & BIT(PMBSR_EL1_S_SHIFT))) return SPE_PMU_BUF_FAULT_ACT_SPURIOUS; /* * If we've lost data, disable profiling and also set the PARTIAL * flag to indicate that the last record is corrupted. */ - if (pmbsr & BIT(SYS_PMBSR_EL1_DL_SHIFT)) + if (pmbsr & BIT(PMBSR_EL1_DL_SHIFT)) perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED | PERF_AUX_FLAG_PARTIAL); /* Report collisions to userspace so that it can up the period */ - if (pmbsr & BIT(SYS_PMBSR_EL1_COLL_SHIFT)) + if (pmbsr & BIT(PMBSR_EL1_COLL_SHIFT)) perf_aux_output_flag(handle, PERF_AUX_FLAG_COLLISION); /* We only expect buffer management events */ - switch (pmbsr & (SYS_PMBSR_EL1_EC_MASK << SYS_PMBSR_EL1_EC_SHIFT)) { - case SYS_PMBSR_EL1_EC_BUF: + switch (FIELD_GET(PMBSR_EL1_EC_MASK, pmbsr)) { + case PMBSR_EL1_EC_BUF: /* Handled below */ break; - case SYS_PMBSR_EL1_EC_FAULT_S1: - case SYS_PMBSR_EL1_EC_FAULT_S2: + case PMBSR_EL1_EC_FAULT_S1: + case PMBSR_EL1_EC_FAULT_S2: err_str = "Unexpected buffer fault"; goto out_err; default: @@ -600,9 +600,8 @@ arm_spe_pmu_buf_get_fault_act(struct perf_output_handle *handle) } /* Buffer management event */ - switch (pmbsr & - (SYS_PMBSR_EL1_BUF_BSC_MASK << SYS_PMBSR_EL1_BUF_BSC_SHIFT)) { - case SYS_PMBSR_EL1_BUF_BSC_FULL: + switch (FIELD_GET(PMBSR_EL1_BUF_BSC_MASK, pmbsr)) { + case PMBSR_EL1_BUF_BSC_FULL: ret = SPE_PMU_BUF_FAULT_ACT_OK; goto out_stop; default: @@ -717,23 +716,23 @@ static int arm_spe_pmu_event_init(struct perf_event *event) return -EINVAL; reg = arm_spe_event_to_pmsfcr(event); - if ((reg & BIT(SYS_PMSFCR_EL1_FE_SHIFT)) && + if ((reg & BIT(PMSFCR_EL1_FE_SHIFT)) && !(spe_pmu->features & SPE_PMU_FEAT_FILT_EVT)) return -EOPNOTSUPP; - if ((reg & BIT(SYS_PMSFCR_EL1_FT_SHIFT)) && + if ((reg & BIT(PMSFCR_EL1_FT_SHIFT)) && !(spe_pmu->features & SPE_PMU_FEAT_FILT_TYP)) return -EOPNOTSUPP; - if ((reg & BIT(SYS_PMSFCR_EL1_FL_SHIFT)) && + if ((reg & BIT(PMSFCR_EL1_FL_SHIFT)) && !(spe_pmu->features & SPE_PMU_FEAT_FILT_LAT)) return -EOPNOTSUPP; set_spe_event_has_cx(event); reg = arm_spe_event_to_pmscr(event); if (!perfmon_capable() && - (reg & (BIT(SYS_PMSCR_EL1_PA_SHIFT) | - BIT(SYS_PMSCR_EL1_PCT_SHIFT)))) + (reg & (BIT(PMSCR_EL1_PA_SHIFT) | + BIT(PMSCR_EL1_PCT_SHIFT)))) return -EACCES; return 0; @@ -971,14 +970,14 @@ static void __arm_spe_pmu_dev_probe(void *info) /* Read PMBIDR first to determine whether or not we have access */ reg = read_sysreg_s(SYS_PMBIDR_EL1); - if (reg & BIT(SYS_PMBIDR_EL1_P_SHIFT)) { + if (reg & BIT(PMBIDR_EL1_P_SHIFT)) { dev_err(dev, "profiling buffer owned by higher exception level\n"); return; } /* Minimum alignment. If it's out-of-range, then fail the probe */ - fld = reg >> SYS_PMBIDR_EL1_ALIGN_SHIFT & SYS_PMBIDR_EL1_ALIGN_MASK; + fld = (reg & PMBIDR_EL1_ALIGN_MASK) >> PMBIDR_EL1_ALIGN_SHIFT; spe_pmu->align = 1 << fld; if (spe_pmu->align > SZ_2K) { dev_err(dev, "unsupported PMBIDR.Align [%d] on CPU %d\n", @@ -988,26 +987,26 @@ static void __arm_spe_pmu_dev_probe(void *info) /* It's now safe to read PMSIDR and figure out what we've got */ reg = read_sysreg_s(SYS_PMSIDR_EL1); - if (reg & BIT(SYS_PMSIDR_EL1_FE_SHIFT)) + if (reg & BIT(PMSIDR_EL1_FE_SHIFT)) spe_pmu->features |= SPE_PMU_FEAT_FILT_EVT; - if (reg & BIT(SYS_PMSIDR_EL1_FT_SHIFT)) + if (reg & BIT(PMSIDR_EL1_FT_SHIFT)) spe_pmu->features |= SPE_PMU_FEAT_FILT_TYP; - if (reg & BIT(SYS_PMSIDR_EL1_FL_SHIFT)) + if (reg & BIT(PMSIDR_EL1_FL_SHIFT)) spe_pmu->features |= SPE_PMU_FEAT_FILT_LAT; - if (reg & BIT(SYS_PMSIDR_EL1_ARCHINST_SHIFT)) + if (reg & BIT(PMSIDR_EL1_ARCHINST_SHIFT)) spe_pmu->features |= SPE_PMU_FEAT_ARCH_INST; - if (reg & BIT(SYS_PMSIDR_EL1_LDS_SHIFT)) + if (reg & BIT(PMSIDR_EL1_LDS_SHIFT)) spe_pmu->features |= SPE_PMU_FEAT_LDS; - if (reg & BIT(SYS_PMSIDR_EL1_ERND_SHIFT)) + if (reg & BIT(PMSIDR_EL1_ERND_SHIFT)) spe_pmu->features |= SPE_PMU_FEAT_ERND; /* This field has a spaced out encoding, so just use a look-up */ - fld = reg >> SYS_PMSIDR_EL1_INTERVAL_SHIFT & SYS_PMSIDR_EL1_INTERVAL_MASK; + fld = (reg & PMSIDR_EL1_INTERVAL_MASK) >> PMSIDR_EL1_INTERVAL_SHIFT; switch (fld) { case 0: spe_pmu->min_period = 256; @@ -1039,7 +1038,7 @@ static void __arm_spe_pmu_dev_probe(void *info) } /* Maximum record size. If it's out-of-range, then fail the probe */ - fld = reg >> SYS_PMSIDR_EL1_MAXSIZE_SHIFT & SYS_PMSIDR_EL1_MAXSIZE_MASK; + fld = (reg & PMSIDR_EL1_MAXSIZE_MASK) >> PMSIDR_EL1_MAXSIZE_SHIFT; spe_pmu->max_record_sz = 1 << fld; if (spe_pmu->max_record_sz > SZ_2K || spe_pmu->max_record_sz < 16) { dev_err(dev, "unsupported PMSIDR_EL1.MaxSize [%d] on CPU %d\n", @@ -1047,7 +1046,7 @@ static void __arm_spe_pmu_dev_probe(void *info) return; } - fld = reg >> SYS_PMSIDR_EL1_COUNTSIZE_SHIFT & SYS_PMSIDR_EL1_COUNTSIZE_MASK; + fld = (reg & PMSIDR_EL1_COUNTSIZE_MASK) >> PMSIDR_EL1_COUNTSIZE_SHIFT; switch (fld) { default: dev_warn(dev, "unknown PMSIDR_EL1.CountSize [%d]; assuming 2\n", -- cgit v1.2.3 From 2d347ac23362f6cfc5e04a4b998f51e1e7e909a8 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 9 Jan 2023 13:26:20 -0600 Subject: perf: arm_spe: Drop BIT() and use FIELD_GET/PREP accessors Now that generated sysregs are in place, update the register field accesses. The use of BIT() is no longer needed with the new defines. Use FIELD_GET and FIELD_PREP instead of open coding masking and shifting. No functional change. Tested-by: James Clark Signed-off-by: Rob Herring Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20220825-arm-spe-v8-7-v4-4-327f860daf28@kernel.org Signed-off-by: Will Deacon --- drivers/perf/arm_spe_pmu.c | 70 ++++++++++++++++++++++------------------------ 1 file changed, 34 insertions(+), 36 deletions(-) (limited to 'drivers') diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index 814ed18346b6..9b4bd72087ea 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -283,18 +283,18 @@ static u64 arm_spe_event_to_pmscr(struct perf_event *event) struct perf_event_attr *attr = &event->attr; u64 reg = 0; - reg |= ATTR_CFG_GET_FLD(attr, ts_enable) << PMSCR_EL1_TS_SHIFT; - reg |= ATTR_CFG_GET_FLD(attr, pa_enable) << PMSCR_EL1_PA_SHIFT; - reg |= ATTR_CFG_GET_FLD(attr, pct_enable) << PMSCR_EL1_PCT_SHIFT; + reg |= FIELD_PREP(PMSCR_EL1_TS, ATTR_CFG_GET_FLD(attr, ts_enable)); + reg |= FIELD_PREP(PMSCR_EL1_PA, ATTR_CFG_GET_FLD(attr, pa_enable)); + reg |= FIELD_PREP(PMSCR_EL1_PCT, ATTR_CFG_GET_FLD(attr, pct_enable)); if (!attr->exclude_user) - reg |= BIT(PMSCR_EL1_E0SPE_SHIFT); + reg |= PMSCR_EL1_E0SPE; if (!attr->exclude_kernel) - reg |= BIT(PMSCR_EL1_E1SPE_SHIFT); + reg |= PMSCR_EL1_E1SPE; if (get_spe_event_has_cx(event)) - reg |= BIT(PMSCR_EL1_CX_SHIFT); + reg |= PMSCR_EL1_CX; return reg; } @@ -322,7 +322,7 @@ static u64 arm_spe_event_to_pmsirr(struct perf_event *event) arm_spe_event_sanitise_period(event); - reg |= ATTR_CFG_GET_FLD(attr, jitter) << PMSIRR_EL1_RND_SHIFT; + reg |= FIELD_PREP(PMSIRR_EL1_RND, ATTR_CFG_GET_FLD(attr, jitter)); reg |= event->hw.sample_period; return reg; @@ -333,18 +333,18 @@ static u64 arm_spe_event_to_pmsfcr(struct perf_event *event) struct perf_event_attr *attr = &event->attr; u64 reg = 0; - reg |= ATTR_CFG_GET_FLD(attr, load_filter) << PMSFCR_EL1_LD_SHIFT; - reg |= ATTR_CFG_GET_FLD(attr, store_filter) << PMSFCR_EL1_ST_SHIFT; - reg |= ATTR_CFG_GET_FLD(attr, branch_filter) << PMSFCR_EL1_B_SHIFT; + reg |= FIELD_PREP(PMSFCR_EL1_LD, ATTR_CFG_GET_FLD(attr, load_filter)); + reg |= FIELD_PREP(PMSFCR_EL1_ST, ATTR_CFG_GET_FLD(attr, store_filter)); + reg |= FIELD_PREP(PMSFCR_EL1_B, ATTR_CFG_GET_FLD(attr, branch_filter)); if (reg) - reg |= BIT(PMSFCR_EL1_FT_SHIFT); + reg |= PMSFCR_EL1_FT; if (ATTR_CFG_GET_FLD(attr, event_filter)) - reg |= BIT(PMSFCR_EL1_FE_SHIFT); + reg |= PMSFCR_EL1_FE; if (ATTR_CFG_GET_FLD(attr, min_latency)) - reg |= BIT(PMSFCR_EL1_FL_SHIFT); + reg |= PMSFCR_EL1_FL; return reg; } @@ -358,8 +358,7 @@ static u64 arm_spe_event_to_pmsevfr(struct perf_event *event) static u64 arm_spe_event_to_pmslatfr(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; - return ATTR_CFG_GET_FLD(attr, min_latency) - << PMSLATFR_EL1_MINLAT_SHIFT; + return FIELD_PREP(PMSLATFR_EL1_MINLAT, ATTR_CFG_GET_FLD(attr, min_latency)); } static void arm_spe_pmu_pad_buf(struct perf_output_handle *handle, int len) @@ -511,7 +510,7 @@ static void arm_spe_perf_aux_output_begin(struct perf_output_handle *handle, limit = buf->snapshot ? arm_spe_pmu_next_snapshot_off(handle) : arm_spe_pmu_next_off(handle); if (limit) - limit |= BIT(PMBLIMITR_EL1_E_SHIFT); + limit |= PMBLIMITR_EL1_E; limit += (u64)buf->base; base = (u64)buf->base + PERF_IDX2OFF(handle->head, buf); @@ -570,23 +569,23 @@ arm_spe_pmu_buf_get_fault_act(struct perf_output_handle *handle) /* Service required? */ pmbsr = read_sysreg_s(SYS_PMBSR_EL1); - if (!(pmbsr & BIT(PMBSR_EL1_S_SHIFT))) + if (!FIELD_GET(PMBSR_EL1_S, pmbsr)) return SPE_PMU_BUF_FAULT_ACT_SPURIOUS; /* * If we've lost data, disable profiling and also set the PARTIAL * flag to indicate that the last record is corrupted. */ - if (pmbsr & BIT(PMBSR_EL1_DL_SHIFT)) + if (FIELD_GET(PMBSR_EL1_DL, pmbsr)) perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED | PERF_AUX_FLAG_PARTIAL); /* Report collisions to userspace so that it can up the period */ - if (pmbsr & BIT(PMBSR_EL1_COLL_SHIFT)) + if (FIELD_GET(PMBSR_EL1_COLL, pmbsr)) perf_aux_output_flag(handle, PERF_AUX_FLAG_COLLISION); /* We only expect buffer management events */ - switch (FIELD_GET(PMBSR_EL1_EC_MASK, pmbsr)) { + switch (FIELD_GET(PMBSR_EL1_EC, pmbsr)) { case PMBSR_EL1_EC_BUF: /* Handled below */ break; @@ -716,23 +715,22 @@ static int arm_spe_pmu_event_init(struct perf_event *event) return -EINVAL; reg = arm_spe_event_to_pmsfcr(event); - if ((reg & BIT(PMSFCR_EL1_FE_SHIFT)) && + if ((FIELD_GET(PMSFCR_EL1_FE, reg)) && !(spe_pmu->features & SPE_PMU_FEAT_FILT_EVT)) return -EOPNOTSUPP; - if ((reg & BIT(PMSFCR_EL1_FT_SHIFT)) && + if ((FIELD_GET(PMSFCR_EL1_FT, reg)) && !(spe_pmu->features & SPE_PMU_FEAT_FILT_TYP)) return -EOPNOTSUPP; - if ((reg & BIT(PMSFCR_EL1_FL_SHIFT)) && + if ((FIELD_GET(PMSFCR_EL1_FL, reg)) && !(spe_pmu->features & SPE_PMU_FEAT_FILT_LAT)) return -EOPNOTSUPP; set_spe_event_has_cx(event); reg = arm_spe_event_to_pmscr(event); if (!perfmon_capable() && - (reg & (BIT(PMSCR_EL1_PA_SHIFT) | - BIT(PMSCR_EL1_PCT_SHIFT)))) + (reg & (PMSCR_EL1_PA | PMSCR_EL1_PCT))) return -EACCES; return 0; @@ -970,14 +968,14 @@ static void __arm_spe_pmu_dev_probe(void *info) /* Read PMBIDR first to determine whether or not we have access */ reg = read_sysreg_s(SYS_PMBIDR_EL1); - if (reg & BIT(PMBIDR_EL1_P_SHIFT)) { + if (FIELD_GET(PMBIDR_EL1_P, reg)) { dev_err(dev, "profiling buffer owned by higher exception level\n"); return; } /* Minimum alignment. If it's out-of-range, then fail the probe */ - fld = (reg & PMBIDR_EL1_ALIGN_MASK) >> PMBIDR_EL1_ALIGN_SHIFT; + fld = FIELD_GET(PMBIDR_EL1_ALIGN, reg); spe_pmu->align = 1 << fld; if (spe_pmu->align > SZ_2K) { dev_err(dev, "unsupported PMBIDR.Align [%d] on CPU %d\n", @@ -987,26 +985,26 @@ static void __arm_spe_pmu_dev_probe(void *info) /* It's now safe to read PMSIDR and figure out what we've got */ reg = read_sysreg_s(SYS_PMSIDR_EL1); - if (reg & BIT(PMSIDR_EL1_FE_SHIFT)) + if (FIELD_GET(PMSIDR_EL1_FE, reg)) spe_pmu->features |= SPE_PMU_FEAT_FILT_EVT; - if (reg & BIT(PMSIDR_EL1_FT_SHIFT)) + if (FIELD_GET(PMSIDR_EL1_FT, reg)) spe_pmu->features |= SPE_PMU_FEAT_FILT_TYP; - if (reg & BIT(PMSIDR_EL1_FL_SHIFT)) + if (FIELD_GET(PMSIDR_EL1_FL, reg)) spe_pmu->features |= SPE_PMU_FEAT_FILT_LAT; - if (reg & BIT(PMSIDR_EL1_ARCHINST_SHIFT)) + if (FIELD_GET(PMSIDR_EL1_ARCHINST, reg)) spe_pmu->features |= SPE_PMU_FEAT_ARCH_INST; - if (reg & BIT(PMSIDR_EL1_LDS_SHIFT)) + if (FIELD_GET(PMSIDR_EL1_LDS, reg)) spe_pmu->features |= SPE_PMU_FEAT_LDS; - if (reg & BIT(PMSIDR_EL1_ERND_SHIFT)) + if (FIELD_GET(PMSIDR_EL1_ERND, reg)) spe_pmu->features |= SPE_PMU_FEAT_ERND; /* This field has a spaced out encoding, so just use a look-up */ - fld = (reg & PMSIDR_EL1_INTERVAL_MASK) >> PMSIDR_EL1_INTERVAL_SHIFT; + fld = FIELD_GET(PMSIDR_EL1_INTERVAL, reg); switch (fld) { case 0: spe_pmu->min_period = 256; @@ -1038,7 +1036,7 @@ static void __arm_spe_pmu_dev_probe(void *info) } /* Maximum record size. If it's out-of-range, then fail the probe */ - fld = (reg & PMSIDR_EL1_MAXSIZE_MASK) >> PMSIDR_EL1_MAXSIZE_SHIFT; + fld = FIELD_GET(PMSIDR_EL1_MAXSIZE, reg); spe_pmu->max_record_sz = 1 << fld; if (spe_pmu->max_record_sz > SZ_2K || spe_pmu->max_record_sz < 16) { dev_err(dev, "unsupported PMSIDR_EL1.MaxSize [%d] on CPU %d\n", @@ -1046,7 +1044,7 @@ static void __arm_spe_pmu_dev_probe(void *info) return; } - fld = (reg & PMSIDR_EL1_COUNTSIZE_MASK) >> PMSIDR_EL1_COUNTSIZE_SHIFT; + fld = FIELD_GET(PMSIDR_EL1_COUNTSIZE, reg); switch (fld) { default: dev_warn(dev, "unknown PMSIDR_EL1.CountSize [%d]; assuming 2\n", -- cgit v1.2.3 From 05e4c88e2b5c9f77409577702d6d516682e1ce14 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 9 Jan 2023 13:26:21 -0600 Subject: perf: arm_spe: Use new PMSIDR_EL1 register enums Now that the SPE register definitions include enums for some PMSIDR_EL1 fields, use them in the driver in place of magic values. Signed-off-by: Rob Herring Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20220825-arm-spe-v8-7-v4-5-327f860daf28@kernel.org Signed-off-by: Will Deacon --- drivers/perf/arm_spe_pmu.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index 9b4bd72087ea..af6d3867c3e7 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -1006,32 +1006,32 @@ static void __arm_spe_pmu_dev_probe(void *info) /* This field has a spaced out encoding, so just use a look-up */ fld = FIELD_GET(PMSIDR_EL1_INTERVAL, reg); switch (fld) { - case 0: + case PMSIDR_EL1_INTERVAL_256: spe_pmu->min_period = 256; break; - case 2: + case PMSIDR_EL1_INTERVAL_512: spe_pmu->min_period = 512; break; - case 3: + case PMSIDR_EL1_INTERVAL_768: spe_pmu->min_period = 768; break; - case 4: + case PMSIDR_EL1_INTERVAL_1024: spe_pmu->min_period = 1024; break; - case 5: + case PMSIDR_EL1_INTERVAL_1536: spe_pmu->min_period = 1536; break; - case 6: + case PMSIDR_EL1_INTERVAL_2048: spe_pmu->min_period = 2048; break; - case 7: + case PMSIDR_EL1_INTERVAL_3072: spe_pmu->min_period = 3072; break; default: dev_warn(dev, "unknown PMSIDR_EL1.Interval [%d]; assuming 8\n", fld); fallthrough; - case 8: + case PMSIDR_EL1_INTERVAL_4096: spe_pmu->min_period = 4096; } @@ -1050,10 +1050,10 @@ static void __arm_spe_pmu_dev_probe(void *info) dev_warn(dev, "unknown PMSIDR_EL1.CountSize [%d]; assuming 2\n", fld); fallthrough; - case 2: + case PMSIDR_EL1_COUNTSIZE_12_BIT_SAT: spe_pmu->counter_sz = 12; break; - case 3: + case PMSIDR_EL1_COUNTSIZE_16_BIT_SAT: spe_pmu->counter_sz = 16; } -- cgit v1.2.3 From 4998897b1e96d624bf094e5785b27023e17ba570 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 9 Jan 2023 13:26:22 -0600 Subject: perf: arm_spe: Support new SPEv1.2/v8.7 'not taken' event Arm SPEv1.2 (Armv8.7/v9.2) adds a new event, 'not taken', in bit 6 of the PMSEVFR_EL1 register. Update arm_spe_pmsevfr_res0() to support the additional event. Tested-by: James Clark Signed-off-by: Rob Herring Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20220825-arm-spe-v8-7-v4-6-327f860daf28@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 2 ++ drivers/perf/arm_spe_pmu.c | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index db269eda7c1c..fc8787727792 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -221,6 +221,8 @@ BIT_ULL(6) | BIT_ULL(4) | BIT_ULL(2) | BIT_ULL(0)) #define PMSEVFR_EL1_RES0_V1P1 \ (PMSEVFR_EL1_RES0_IMP & ~(BIT_ULL(18) | BIT_ULL(17) | BIT_ULL(11))) +#define PMSEVFR_EL1_RES0_V1P2 \ + (PMSEVFR_EL1_RES0_V1P1 & ~BIT_ULL(6)) /* Buffer error reporting */ #define PMBSR_EL1_FAULT_FSC_SHIFT PMBSR_EL1_MSS_SHIFT diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index af6d3867c3e7..82f67e941bc4 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -677,9 +677,11 @@ static u64 arm_spe_pmsevfr_res0(u16 pmsver) case ID_AA64DFR0_EL1_PMSVer_IMP: return PMSEVFR_EL1_RES0_IMP; case ID_AA64DFR0_EL1_PMSVer_V1P1: + return PMSEVFR_EL1_RES0_V1P1; + case ID_AA64DFR0_EL1_PMSVer_V1P2: /* Return the highest version we support in default */ default: - return PMSEVFR_EL1_RES0_V1P1; + return PMSEVFR_EL1_RES0_V1P2; } } -- cgit v1.2.3 From 8f9e0a52810dd83406c768972d022c37e7a18f1f Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 23 Jan 2023 13:45:58 +0000 Subject: ACPI: Don't build ACPICA with '-Os' The ACPICA code has been built with '-Os' since the beginning of git history, though there's no explanatory comment as to why. This is unfortunate as GCC drops the alignment specificed by '-falign-functions=N' when '-Os' is used, as reported in GCC bug 88345: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88345 This prevents CONFIG_FUNCTION_ALIGNMENT and CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B from having their expected effect on the ACPICA code. This is doubly unfortunate as in subsequent patches arm64 will depend upon CONFIG_FUNCTION_ALIGNMENT for its ftrace implementation. Drop the '-Os' flag when building the ACPICA code. With this removed, the code builds cleanly and works correctly in testing so far. I've tested this by selecting CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B=y, building and booting a kernel using ACPI, and looking for misaligned text symbols: * arm64: Before, v6.2-rc3: # uname -rm 6.2.0-rc3 aarch64 # grep ' [Tt] ' /proc/kallsyms | grep -iv '[048c]0 [Tt] ' | wc -l 5009 Before, v6.2-rc3 + fixed __cold: # uname -rm 6.2.0-rc3-00001-g2a2bedf8bfa9 aarch64 # grep ' [Tt] ' /proc/kallsyms | grep -iv '[048c]0 [Tt] ' | wc -l 919 After: # uname -rm 6.2.0-rc3-00002-g267bddc38572 aarch64 # grep ' [Tt] ' /proc/kallsyms | grep -iv '[048c]0 [Tt] ' | wc -l 323 # grep ' [Tt] ' /proc/kallsyms | grep -iv '[048c]0 [Tt] ' | grep acpi | wc -l 0 * x86_64: Before, v6.2-rc3: # uname -rm 6.2.0-rc3 x86_64 # grep ' [Tt] ' /proc/kallsyms | grep -iv '[048c]0 [Tt] ' | wc -l 11537 Before, v6.2-rc3 + fixed __cold: # uname -rm 6.2.0-rc3-00001-g2a2bedf8bfa9 x86_64 # grep ' [Tt] ' /proc/kallsyms | grep -iv '[048c]0 [Tt] ' | wc -l 2805 After: # uname -rm 6.2.0-rc3-00002-g267bddc38572 x86_64 # grep ' [Tt] ' /proc/kallsyms | grep -iv '[048c]0 [Tt] ' | wc -l 1357 # grep ' [Tt] ' /proc/kallsyms | grep -iv '[048c]0 [Tt] ' | grep acpi | wc -l 0 With the patch applied, the remaining unaligned text labels are a combination of static call trampolines and labels in assembly, which can be dealt with in subsequent patches. Signed-off-by: Mark Rutland Acked-by: Rafael J. Wysocki Cc: Florent Revest Cc: Len Brown Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Robert Moore Cc: Steven Rostedt Cc: Will Deacon Cc: linux-acpi@vger.kernel.org Link: https://lore.kernel.org/r/20230123134603.1064407-4-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- drivers/acpi/acpica/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/acpi/acpica/Makefile b/drivers/acpi/acpica/Makefile index 9e0d95d76fff..30f3fc13c29d 100644 --- a/drivers/acpi/acpica/Makefile +++ b/drivers/acpi/acpica/Makefile @@ -3,7 +3,7 @@ # Makefile for ACPICA Core interpreter # -ccflags-y := -Os -D_LINUX -DBUILDING_ACPICA +ccflags-y := -D_LINUX -DBUILDING_ACPICA ccflags-$(CONFIG_ACPI_DEBUG) += -DACPI_DEBUG_OUTPUT # use acpi.o to put all files here into acpi.o modparam namespace -- cgit v1.2.3 From 61786170383093908e9f5f8fd8c5c3ff0c3bbe03 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 11 Jan 2023 11:22:36 +0100 Subject: efi: arm64: enter with MMU and caches enabled Instead of cleaning the entire loaded kernel image to the PoC and disabling the MMU and caches before branching to the kernel's bare metal entry point, we can leave the MMU and caches enabled, and rely on EFI's cacheable 1:1 mapping of all of system RAM (which is mandated by the spec) to populate the initial page tables. This removes the need for managing coherency in software, which is tedious and error prone. Note that we still need to clean the executable region of the image to the PoU if this is required for I/D coherency, but only if we actually decided to move the image in memory, as otherwise, this will have been taken care of by the loader. This change affects both the builtin EFI stub as well as the zboot decompressor, which now carries the entire EFI stub along with the decompression code and the compressed image. Signed-off-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20230111102236.1430401-7-ardb@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/efi.h | 2 + arch/arm64/kernel/image-vars.h | 5 ++- arch/arm64/mm/cache.S | 1 + drivers/firmware/efi/libstub/Makefile | 4 +- drivers/firmware/efi/libstub/arm64-entry.S | 67 ------------------------------ drivers/firmware/efi/libstub/arm64-stub.c | 26 ++++++++---- drivers/firmware/efi/libstub/arm64.c | 41 +++++++++++++++--- 7 files changed, 61 insertions(+), 85 deletions(-) delete mode 100644 drivers/firmware/efi/libstub/arm64-entry.S (limited to 'drivers') diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 31d13a6001df..0f0e729b40ef 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -105,6 +105,8 @@ static inline unsigned long efi_get_kimg_min_align(void) #define EFI_ALLOC_ALIGN SZ_64K #define EFI_ALLOC_LIMIT ((1UL << 48) - 1) +extern unsigned long primary_entry_offset(void); + /* * On ARM systems, virtually remapped UEFI runtime services are set up in two * distinct stages: diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index d0e9bb5c91fc..73388b21d07d 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -10,7 +10,7 @@ #error This file should only be included in vmlinux.lds.S #endif -PROVIDE(__efistub_primary_entry_offset = primary_entry - _text); +PROVIDE(__efistub_primary_entry = primary_entry); /* * The EFI stub has its own symbol namespace prefixed by __efistub_, to @@ -21,10 +21,11 @@ PROVIDE(__efistub_primary_entry_offset = primary_entry - _text); * linked at. The routines below are all implemented in assembler in a * position independent manner */ -PROVIDE(__efistub_dcache_clean_poc = __pi_dcache_clean_poc); +PROVIDE(__efistub_caches_clean_inval_pou = __pi_caches_clean_inval_pou); PROVIDE(__efistub__text = _text); PROVIDE(__efistub__end = _end); +PROVIDE(__efistub___inittext_end = __inittext_end); PROVIDE(__efistub__edata = _edata); PROVIDE(__efistub_screen_info = screen_info); PROVIDE(__efistub__ctype = _ctype); diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 081058d4e436..503567c864fd 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -56,6 +56,7 @@ SYM_FUNC_START(caches_clean_inval_pou) caches_clean_inval_pou_macro ret SYM_FUNC_END(caches_clean_inval_pou) +SYM_FUNC_ALIAS(__pi_caches_clean_inval_pou, caches_clean_inval_pou) /* * caches_clean_inval_user_pou(start,end) diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index be8b8c6e8b40..80d85a5169fb 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -87,7 +87,7 @@ lib-$(CONFIG_EFI_GENERIC_STUB) += efi-stub.o string.o intrinsics.o systable.o \ screen_info.o efi-stub-entry.o lib-$(CONFIG_ARM) += arm32-stub.o -lib-$(CONFIG_ARM64) += arm64.o arm64-stub.o arm64-entry.o smbios.o +lib-$(CONFIG_ARM64) += arm64.o arm64-stub.o smbios.o lib-$(CONFIG_X86) += x86-stub.o lib-$(CONFIG_RISCV) += riscv.o riscv-stub.o lib-$(CONFIG_LOONGARCH) += loongarch.o loongarch-stub.o @@ -141,7 +141,7 @@ STUBCOPY_RELOC-$(CONFIG_ARM) := R_ARM_ABS # STUBCOPY_FLAGS-$(CONFIG_ARM64) += --prefix-alloc-sections=.init \ --prefix-symbols=__efistub_ -STUBCOPY_RELOC-$(CONFIG_ARM64) := R_AARCH64_ABS64 +STUBCOPY_RELOC-$(CONFIG_ARM64) := R_AARCH64_ABS # For RISC-V, we don't need anything special other than arm64. Keep all the # symbols in .init section and make sure that no absolute symbols references diff --git a/drivers/firmware/efi/libstub/arm64-entry.S b/drivers/firmware/efi/libstub/arm64-entry.S deleted file mode 100644 index b5c17e89a4fc..000000000000 --- a/drivers/firmware/efi/libstub/arm64-entry.S +++ /dev/null @@ -1,67 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * EFI entry point. - * - * Copyright (C) 2013, 2014 Red Hat, Inc. - * Author: Mark Salter - */ -#include -#include - - /* - * The entrypoint of a arm64 bare metal image is at offset #0 of the - * image, so this is a reasonable default for primary_entry_offset. - * Only when the EFI stub is integrated into the core kernel, it is not - * guaranteed that the PE/COFF header has been copied to memory too, so - * in this case, primary_entry_offset should be overridden by the - * linker and point to primary_entry() directly. - */ - .weak primary_entry_offset - -SYM_CODE_START(efi_enter_kernel) - /* - * efi_pe_entry() will have copied the kernel image if necessary and we - * end up here with device tree address in x1 and the kernel entry - * point stored in x0. Save those values in registers which are - * callee preserved. - */ - ldr w2, =primary_entry_offset - add x19, x0, x2 // relocated Image entrypoint - - mov x0, x1 // DTB address - mov x1, xzr - mov x2, xzr - mov x3, xzr - - /* - * Clean the remainder of this routine to the PoC - * so that we can safely disable the MMU and caches. - */ - adr x4, 1f - dc civac, x4 - dsb sy - - /* Turn off Dcache and MMU */ - mrs x4, CurrentEL - cmp x4, #CurrentEL_EL2 - mrs x4, sctlr_el1 - b.ne 0f - mrs x4, sctlr_el2 -0: bic x4, x4, #SCTLR_ELx_M - bic x4, x4, #SCTLR_ELx_C - b.eq 1f - b 2f - - .balign 32 -1: pre_disable_mmu_workaround - msr sctlr_el2, x4 - isb - br x19 // jump to kernel entrypoint - -2: pre_disable_mmu_workaround - msr sctlr_el1, x4 - isb - br x19 // jump to kernel entrypoint - - .org 1b + 32 -SYM_CODE_END(efi_enter_kernel) diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index 7327b98d8e3f..d4a6b12a8741 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -58,7 +58,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, efi_handle_t image_handle) { efi_status_t status; - unsigned long kernel_size, kernel_memsize = 0; + unsigned long kernel_size, kernel_codesize, kernel_memsize; u32 phys_seed = 0; u64 min_kimg_align = efi_get_kimg_min_align(); @@ -93,6 +93,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, SEGMENT_ALIGN >> 10); kernel_size = _edata - _text; + kernel_codesize = __inittext_end - _text; kernel_memsize = kernel_size + (_end - _edata); *reserve_size = kernel_memsize; @@ -121,7 +122,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, */ *image_addr = (u64)_text; *reserve_size = 0; - goto clean_image_to_poc; + return EFI_SUCCESS; } status = efi_allocate_pages_aligned(*reserve_size, reserve_addr, @@ -137,14 +138,21 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, *image_addr = *reserve_addr; memcpy((void *)*image_addr, _text, kernel_size); + caches_clean_inval_pou(*image_addr, *image_addr + kernel_codesize); -clean_image_to_poc: + return EFI_SUCCESS; +} + +asmlinkage void primary_entry(void); + +unsigned long primary_entry_offset(void) +{ /* - * Clean the copied Image to the PoC, and ensure it is not shadowed by - * stale icache entries from before relocation. + * When built as part of the kernel, the EFI stub cannot branch to the + * kernel proper via the image header, as the PE/COFF header is + * strictly not part of the in-memory presentation of the image, only + * of the file representation. So instead, we need to jump to the + * actual entrypoint in the .text region of the image. */ - dcache_clean_poc(*image_addr, *image_addr + kernel_size); - asm("ic ialluis"); - - return EFI_SUCCESS; + return (char *)primary_entry - _text; } diff --git a/drivers/firmware/efi/libstub/arm64.c b/drivers/firmware/efi/libstub/arm64.c index ff2d18c42ee7..f5da4fbccd86 100644 --- a/drivers/firmware/efi/libstub/arm64.c +++ b/drivers/firmware/efi/libstub/arm64.c @@ -56,6 +56,12 @@ efi_status_t check_platform_features(void) return EFI_SUCCESS; } +#ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE +#define DCTYPE "civac" +#else +#define DCTYPE "cvau" +#endif + void efi_cache_sync_image(unsigned long image_base, unsigned long alloc_size, unsigned long code_size) @@ -64,13 +70,38 @@ void efi_cache_sync_image(unsigned long image_base, u64 lsize = 4 << cpuid_feature_extract_unsigned_field(ctr, CTR_EL0_DminLine_SHIFT); - do { - asm("dc civac, %0" :: "r"(image_base)); - image_base += lsize; - alloc_size -= lsize; - } while (alloc_size >= lsize); + /* only perform the cache maintenance if needed for I/D coherency */ + if (!(ctr & BIT(CTR_EL0_IDC_SHIFT))) { + do { + asm("dc " DCTYPE ", %0" :: "r"(image_base)); + image_base += lsize; + code_size -= lsize; + } while (code_size >= lsize); + } asm("ic ialluis"); dsb(ish); isb(); } + +unsigned long __weak primary_entry_offset(void) +{ + /* + * By default, we can invoke the kernel via the branch instruction in + * the image header, so offset #0. This will be overridden by the EFI + * stub build that is linked into the core kernel, as in that case, the + * image header may not have been loaded into memory, or may be mapped + * with non-executable permissions. + */ + return 0; +} + +void __noreturn efi_enter_kernel(unsigned long entrypoint, + unsigned long fdt_addr, + unsigned long fdt_size) +{ + void (* __noreturn enter_kernel)(u64, u64, u64, u64); + + enter_kernel = (void *)entrypoint + primary_entry_offset(); + enter_kernel(fdt_addr, 0, 0, 0); +} -- cgit v1.2.3 From 0e62ccb9598dae492588bef0453a059bb2bbbabe Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 30 Jan 2023 14:54:25 +0000 Subject: arm64: rename ARM64_HAS_SYSREG_GIC_CPUIF to ARM64_HAS_GIC_CPUIF_SYSREGS Subsequent patches will add more GIC-related cpucaps. When we do so, it would be nice to give them a consistent HAS_GIC_* prefix. In preparation for doing so, this patch renames the existing ARM64_HAS_SYSREG_GIC_CPUIF cap to ARM64_HAS_GIC_CPUIF_SYSREGS. The 'CPUIF_SYSREGS' suffix is chosen so that this will be ordered ahead of other ARM64_HAS_GIC_* definitions in subsequent patches. The cpucaps file was hand-modified; all other changes were scripted with: find . -type f -name '*.[chS]' -print0 | \ xargs -0 sed -i 's/ARM64_HAS_SYSREG_GIC_CPUIF/ARM64_HAS_GIC_CPUIF_SYSREGS/' There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Link: https://lore.kernel.org/r/20230130145429.903791-2-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpufeature.c | 2 +- arch/arm64/tools/cpucaps | 2 +- drivers/irqchip/irq-gic.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index a77315b338e6..ad2a1f5503f3 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2142,7 +2142,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { }, { .desc = "GIC system register CPU interface", - .capability = ARM64_HAS_SYSREG_GIC_CPUIF, + .capability = ARM64_HAS_GIC_CPUIF_SYSREGS, .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, .matches = has_useable_gicv3_cpuif, .sys_reg = SYS_ID_AA64PFR0_EL1, diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index a86ee376920a..373eb148498e 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -28,6 +28,7 @@ HAS_GENERIC_AUTH HAS_GENERIC_AUTH_ARCH_QARMA3 HAS_GENERIC_AUTH_ARCH_QARMA5 HAS_GENERIC_AUTH_IMP_DEF +HAS_GIC_CPUIF_SYSREGS HAS_IRQ_PRIO_MASKING HAS_LDAPR HAS_LSE_ATOMICS @@ -38,7 +39,6 @@ HAS_RAS_EXTN HAS_RNG HAS_SB HAS_STAGE2_FWB -HAS_SYSREG_GIC_CPUIF HAS_TIDCP1 HAS_TLB_RANGE HAS_VIRT_HOST_EXTN diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 210bc2f4d555..6ae697a3800d 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -54,7 +54,7 @@ static void gic_check_cpu_features(void) { - WARN_TAINT_ONCE(this_cpu_has_cap(ARM64_HAS_SYSREG_GIC_CPUIF), + WARN_TAINT_ONCE(this_cpu_has_cap(ARM64_HAS_GIC_CPUIF_SYSREGS), TAINT_CPU_OUT_OF_SPEC, "GICv3 system registers enabled, broken firmware!\n"); } -- cgit v1.2.3 From 8bf0a8048b155eebc05aa8896f4c378a4c538214 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 30 Jan 2023 14:54:28 +0000 Subject: arm64: add ARM64_HAS_GIC_PRIO_RELAXED_SYNC cpucap When Priority Mask Hint Enable (PMHE) == 0b1, the GIC may use the PMR value to determine whether to signal an IRQ to a PE, and consequently after a change to the PMR value, a DSB SY may be required to ensure that interrupts are signalled to a CPU in finite time. When PMHE == 0b0, interrupts are always signalled to the relevant PE, and all masking occurs locally, without requiring a DSB SY. Since commit: f226650494c6aa87 ("arm64: Relax ICC_PMR_EL1 accesses when ICC_CTLR_EL1.PMHE is clear") ... we handle this dynamically: in most cases a static key is used to determine whether to issue a DSB SY, but the entry code must read from ICC_CTLR_EL1 as static keys aren't accessible from plain assembly. It would be much nicer to use an alternative instruction sequence for the DSB, as this would avoid the need to read from ICC_CTLR_EL1 in the entry code, and for most other code this will result in simpler code generation with fewer instructions and fewer branches. This patch adds a new ARM64_HAS_GIC_PRIO_RELAXED_SYNC cpucap which is only set when ICC_CTLR_EL1.PMHE == 0b0 (and GIC priority masking is in use). This allows us to replace the existing users of the `gic_pmr_sync` static key with alternative sequences which default to a DSB SY and are relaxed to a NOP when PMHE is not in use. The entry assembly management of the PMR is slightly restructured to use a branch (rather than multiple NOPs) when priority masking is not in use. This is more in keeping with other alternatives in the entry assembly, and permits the use of a separate alternatives for the PMHE-dependent DSB SY (and removal of the conditional branch this currently requires). For consistency I've adjusted both the save and restore paths. According to bloat-o-meter, when building defconfig + CONFIG_ARM64_PSEUDO_NMI=y this shrinks the kernel text by ~4KiB: | add/remove: 4/2 grow/shrink: 42/310 up/down: 332/-5032 (-4700) The resulting vmlinux is ~66KiB smaller, though the resulting Image size is unchanged due to padding and alignment: | [mark@lakrids:~/src/linux]% ls -al vmlinux-* | -rwxr-xr-x 1 mark mark 137508344 Jan 17 14:11 vmlinux-after | -rwxr-xr-x 1 mark mark 137575440 Jan 17 13:49 vmlinux-before | [mark@lakrids:~/src/linux]% ls -al Image-* | -rw-r--r-- 1 mark mark 38777344 Jan 17 14:11 Image-after | -rw-r--r-- 1 mark mark 38777344 Jan 17 13:49 Image-before Prior to this patch we did not verify the state of ICC_CTLR_EL1.PMHE on secondary CPUs. As of this patch this is verified by the cpufeature code when using GIC priority masking (i.e. when using pseudo-NMIs). Note that since commit: 7e3a57fa6ca831fa ("arm64: Document ICC_CTLR_EL3.PMHE setting requirements") ... Documentation/arm64/booting.rst specifies: | - ICC_CTLR_EL3.PMHE (bit 6) must be set to the same value across | all CPUs the kernel is executing on, and must stay constant | for the lifetime of the kernel. ... so that should not adversely affect any compliant systems, and as we'll only check for the absense of PMHE when using pseudo-NMIs, this will only fire when such mismatch will adversely affect the system. Signed-off-by: Mark Rutland Reviewed-by: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Link: https://lore.kernel.org/r/20230130145429.903791-5-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm/include/asm/arch_gicv3.h | 5 +++++ arch/arm64/include/asm/arch_gicv3.h | 5 +++++ arch/arm64/include/asm/barrier.h | 11 +++++++---- arch/arm64/kernel/cpufeature.c | 36 ++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/entry.S | 25 ++++++++++++++++--------- arch/arm64/kernel/image-vars.h | 2 -- arch/arm64/tools/cpucaps | 1 + drivers/irqchip/irq-gic-v3.c | 19 +------------------ 8 files changed, 71 insertions(+), 33 deletions(-) (limited to 'drivers') diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h index f82a819eb0db..311e83038bdb 100644 --- a/arch/arm/include/asm/arch_gicv3.h +++ b/arch/arm/include/asm/arch_gicv3.h @@ -252,5 +252,10 @@ static inline void gic_arch_enable_irqs(void) WARN_ON_ONCE(true); } +static inline bool gic_has_relaxed_pmr_sync(void) +{ + return false; +} + #endif /* !__ASSEMBLY__ */ #endif /* !__ASM_ARCH_GICV3_H */ diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 48d4473e8eee..01281a5336cf 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -190,5 +190,10 @@ static inline void gic_arch_enable_irqs(void) asm volatile ("msr daifclr, #3" : : : "memory"); } +static inline bool gic_has_relaxed_pmr_sync(void) +{ + return cpus_have_cap(ARM64_HAS_GIC_PRIO_RELAXED_SYNC); +} + #endif /* __ASSEMBLY__ */ #endif /* __ASM_ARCH_GICV3_H */ diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 2cfc4245d2e2..3dd8982a9ce3 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -11,6 +11,8 @@ #include +#include + #define __nops(n) ".rept " #n "\nnop\n.endr\n" #define nops(n) asm volatile(__nops(n)) @@ -41,10 +43,11 @@ #ifdef CONFIG_ARM64_PSEUDO_NMI #define pmr_sync() \ do { \ - extern struct static_key_false gic_pmr_sync; \ - \ - if (static_branch_unlikely(&gic_pmr_sync)) \ - dsb(sy); \ + asm volatile( \ + ALTERNATIVE_CB("dsb sy", \ + ARM64_HAS_GIC_PRIO_RELAXED_SYNC, \ + alt_cb_patch_nops) \ + ); \ } while(0) #else #define pmr_sync() do {} while (0) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 515975f42d03..bdabfc98226a 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2056,6 +2056,34 @@ static bool can_use_gic_priorities(const struct arm64_cpu_capabilities *entry, return enable_pseudo_nmi; } + +static bool has_gic_prio_relaxed_sync(const struct arm64_cpu_capabilities *entry, + int scope) +{ + /* + * If we're not using priority masking then we won't be poking PMR_EL1, + * and there's no need to relax synchronization of writes to it, and + * ICC_CTLR_EL1 might not be accessible and we must avoid reads from + * that. + * + * ARM64_HAS_GIC_PRIO_MASKING has a lower index, and is a boot CPU + * feature, so will be detected earlier. + */ + BUILD_BUG_ON(ARM64_HAS_GIC_PRIO_RELAXED_SYNC <= ARM64_HAS_GIC_PRIO_MASKING); + if (!cpus_have_cap(ARM64_HAS_GIC_PRIO_MASKING)) + return false; + + /* + * When Priority Mask Hint Enable (PMHE) == 0b0, PMR is not used as a + * hint for interrupt distribution, a DSB is not necessary when + * unmasking IRQs via PMR, and we can relax the barrier to a NOP. + * + * Linux itself doesn't use 1:N distribution, so has no need to + * set PMHE. The only reason to have it set is if EL3 requires it + * (and we can't change it). + */ + return (gic_read_ctlr() & ICC_CTLR_EL1_PMHE_MASK) == 0; +} #endif #ifdef CONFIG_ARM64_BTI @@ -2546,6 +2574,14 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, .matches = can_use_gic_priorities, }, + { + /* + * Depends on ARM64_HAS_GIC_PRIO_MASKING + */ + .capability = ARM64_HAS_GIC_PRIO_RELAXED_SYNC, + .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, + .matches = has_gic_prio_relaxed_sync, + }, #endif #ifdef CONFIG_ARM64_E0PD { diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index e2d1d3d5de1d..8427cdc0cfcb 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -311,13 +311,16 @@ alternative_else_nop_endif .endif #ifdef CONFIG_ARM64_PSEUDO_NMI - /* Save pmr */ -alternative_if ARM64_HAS_GIC_PRIO_MASKING +alternative_if_not ARM64_HAS_GIC_PRIO_MASKING + b .Lskip_pmr_save\@ +alternative_else_nop_endif + mrs_s x20, SYS_ICC_PMR_EL1 str x20, [sp, #S_PMR_SAVE] mov x20, #GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET msr_s SYS_ICC_PMR_EL1, x20 -alternative_else_nop_endif + +.Lskip_pmr_save\@: #endif /* @@ -336,15 +339,19 @@ alternative_else_nop_endif .endif #ifdef CONFIG_ARM64_PSEUDO_NMI - /* Restore pmr */ -alternative_if ARM64_HAS_GIC_PRIO_MASKING +alternative_if_not ARM64_HAS_GIC_PRIO_MASKING + b .Lskip_pmr_restore\@ +alternative_else_nop_endif + ldr x20, [sp, #S_PMR_SAVE] msr_s SYS_ICC_PMR_EL1, x20 - mrs_s x21, SYS_ICC_CTLR_EL1 - tbz x21, #6, .L__skip_pmr_sync\@ // Check for ICC_CTLR_EL1.PMHE - dsb sy // Ensure priority change is seen by redistributor -.L__skip_pmr_sync\@: + + /* Ensure priority change is seen by redistributor */ +alternative_if_not ARM64_HAS_GIC_PRIO_RELAXED_SYNC + dsb sy alternative_else_nop_endif + +.Lskip_pmr_restore\@: #endif ldp x21, x22, [sp, #S_PC] // load ELR, SPSR diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index d0e9bb5c91fc..97e750a35f70 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -67,9 +67,7 @@ KVM_NVHE_ALIAS(__hyp_stub_vectors); KVM_NVHE_ALIAS(vgic_v2_cpuif_trap); KVM_NVHE_ALIAS(vgic_v3_cpuif_trap); -/* Static key checked in pmr_sync(). */ #ifdef CONFIG_ARM64_PSEUDO_NMI -KVM_NVHE_ALIAS(gic_pmr_sync); /* Static key checked in GIC_PRIO_IRQOFF. */ KVM_NVHE_ALIAS(gic_nonsecure_priorities); #endif diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index c993d43624b3..10ce8f88f86b 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -30,6 +30,7 @@ HAS_GENERIC_AUTH_ARCH_QARMA5 HAS_GENERIC_AUTH_IMP_DEF HAS_GIC_CPUIF_SYSREGS HAS_GIC_PRIO_MASKING +HAS_GIC_PRIO_RELAXED_SYNC HAS_LDAPR HAS_LSE_ATOMICS HAS_NO_FPSIMD diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 997104d4338e..3779836737c8 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -89,15 +89,6 @@ static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key); */ static DEFINE_STATIC_KEY_FALSE(supports_pseudo_nmis); -/* - * Global static key controlling whether an update to PMR allowing more - * interrupts requires to be propagated to the redistributor (DSB SY). - * And this needs to be exported for modules to be able to enable - * interrupts... - */ -DEFINE_STATIC_KEY_FALSE(gic_pmr_sync); -EXPORT_SYMBOL(gic_pmr_sync); - DEFINE_STATIC_KEY_FALSE(gic_nonsecure_priorities); EXPORT_SYMBOL(gic_nonsecure_priorities); @@ -1768,16 +1759,8 @@ static void gic_enable_nmi_support(void) for (i = 0; i < gic_data.ppi_nr; i++) refcount_set(&ppi_nmi_refs[i], 0); - /* - * Linux itself doesn't use 1:N distribution, so has no need to - * set PMHE. The only reason to have it set is if EL3 requires it - * (and we can't change it). - */ - if (gic_read_ctlr() & ICC_CTLR_EL1_PMHE_MASK) - static_branch_enable(&gic_pmr_sync); - pr_info("Pseudo-NMIs enabled using %s ICC_PMR_EL1 synchronisation\n", - static_branch_unlikely(&gic_pmr_sync) ? "forced" : "relaxed"); + gic_has_relaxed_pmr_sync() ? "relaxed" : "forced"); /* * How priority values are used by the GIC depends on two things: -- cgit v1.2.3 From 7f49b037397631dc5ec8f6eed67d218edf094fa2 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Fri, 3 Feb 2023 13:15:09 +0100 Subject: drivers/perf: fsl_imx8_ddr_perf: Remove set-but-not-used variable active_events is set but not used, remove it. Signed-off-by: Sascha Hauer Link: https://lore.kernel.org/r/20230203121509.3580245-1-s.hauer@pengutronix.de Signed-off-by: Will Deacon --- drivers/perf/fsl_imx8_ddr_perf.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers') diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index 8e058e08fe81..5222ba1e79d0 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -97,7 +97,6 @@ struct ddr_pmu { struct hlist_node node; struct device *dev; struct perf_event *events[NUM_COUNTERS]; - int active_events; enum cpuhp_state cpuhp_state; const struct fsl_ddr_devtype_data *devtype_data; int irq; @@ -530,7 +529,6 @@ static int ddr_perf_event_add(struct perf_event *event, int flags) } pmu->events[counter] = event; - pmu->active_events++; hwc->idx = counter; hwc->state |= PERF_HES_STOPPED; @@ -562,7 +560,6 @@ static void ddr_perf_event_del(struct perf_event *event, int flags) ddr_perf_event_stop(event, PERF_EF_UPDATE); ddr_perf_free_counter(pmu, counter); - pmu->active_events--; hwc->idx = -1; } -- cgit v1.2.3 From 8d9190f00a9753ff51f18319d928dedd9a272057 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 9 Jan 2023 13:26:24 -0600 Subject: perf: arm_spe: Add support for SPEv1.2 inverted event filtering Arm SPEv1.2 (Arm v8.7/v9.2) adds a new feature called Inverted Event Filter which excludes samples matching the event filter. The feature mirrors the existing event filter in PMSEVFR_EL1 adding a new register, PMSNEVFR_EL1, which has the same event bit assignments. Tested-by: James Clark Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20220825-arm-spe-v8-7-v4-8-327f860daf28@kernel.org Signed-off-by: Will Deacon --- drivers/perf/arm_spe_pmu.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'drivers') diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index 82f67e941bc4..573db4211acd 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -85,6 +85,7 @@ struct arm_spe_pmu { #define SPE_PMU_FEAT_ARCH_INST (1UL << 3) #define SPE_PMU_FEAT_LDS (1UL << 4) #define SPE_PMU_FEAT_ERND (1UL << 5) +#define SPE_PMU_FEAT_INV_FILT_EVT (1UL << 6) #define SPE_PMU_FEAT_DEV_PROBED (1UL << 63) u64 features; @@ -202,6 +203,10 @@ static const struct attribute_group arm_spe_pmu_cap_group = { #define ATTR_CFG_FLD_min_latency_LO 0 #define ATTR_CFG_FLD_min_latency_HI 11 +#define ATTR_CFG_FLD_inv_event_filter_CFG config3 /* PMSNEVFR_EL1 */ +#define ATTR_CFG_FLD_inv_event_filter_LO 0 +#define ATTR_CFG_FLD_inv_event_filter_HI 63 + /* Why does everything I do descend into this? */ #define __GEN_PMU_FORMAT_ATTR(cfg, lo, hi) \ (lo) == (hi) ? #cfg ":" #lo "\n" : #cfg ":" #lo "-" #hi @@ -232,6 +237,7 @@ GEN_PMU_FORMAT_ATTR(branch_filter); GEN_PMU_FORMAT_ATTR(load_filter); GEN_PMU_FORMAT_ATTR(store_filter); GEN_PMU_FORMAT_ATTR(event_filter); +GEN_PMU_FORMAT_ATTR(inv_event_filter); GEN_PMU_FORMAT_ATTR(min_latency); static struct attribute *arm_spe_pmu_formats_attr[] = { @@ -243,12 +249,27 @@ static struct attribute *arm_spe_pmu_formats_attr[] = { &format_attr_load_filter.attr, &format_attr_store_filter.attr, &format_attr_event_filter.attr, + &format_attr_inv_event_filter.attr, &format_attr_min_latency.attr, NULL, }; +static umode_t arm_spe_pmu_format_attr_is_visible(struct kobject *kobj, + struct attribute *attr, + int unused) + { + struct device *dev = kobj_to_dev(kobj); + struct arm_spe_pmu *spe_pmu = dev_get_drvdata(dev); + + if (attr == &format_attr_inv_event_filter.attr && !(spe_pmu->features & SPE_PMU_FEAT_INV_FILT_EVT)) + return 0; + + return attr->mode; +} + static const struct attribute_group arm_spe_pmu_format_group = { .name = "format", + .is_visible = arm_spe_pmu_format_attr_is_visible, .attrs = arm_spe_pmu_formats_attr, }; @@ -343,6 +364,9 @@ static u64 arm_spe_event_to_pmsfcr(struct perf_event *event) if (ATTR_CFG_GET_FLD(attr, event_filter)) reg |= PMSFCR_EL1_FE; + if (ATTR_CFG_GET_FLD(attr, inv_event_filter)) + reg |= PMSFCR_EL1_FnE; + if (ATTR_CFG_GET_FLD(attr, min_latency)) reg |= PMSFCR_EL1_FL; @@ -355,6 +379,12 @@ static u64 arm_spe_event_to_pmsevfr(struct perf_event *event) return ATTR_CFG_GET_FLD(attr, event_filter); } +static u64 arm_spe_event_to_pmsnevfr(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + return ATTR_CFG_GET_FLD(attr, inv_event_filter); +} + static u64 arm_spe_event_to_pmslatfr(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; @@ -703,6 +733,9 @@ static int arm_spe_pmu_event_init(struct perf_event *event) if (arm_spe_event_to_pmsevfr(event) & arm_spe_pmsevfr_res0(spe_pmu->pmsver)) return -EOPNOTSUPP; + if (arm_spe_event_to_pmsnevfr(event) & arm_spe_pmsevfr_res0(spe_pmu->pmsver)) + return -EOPNOTSUPP; + if (attr->exclude_idle) return -EOPNOTSUPP; @@ -721,6 +754,10 @@ static int arm_spe_pmu_event_init(struct perf_event *event) !(spe_pmu->features & SPE_PMU_FEAT_FILT_EVT)) return -EOPNOTSUPP; + if ((FIELD_GET(PMSFCR_EL1_FnE, reg)) && + !(spe_pmu->features & SPE_PMU_FEAT_INV_FILT_EVT)) + return -EOPNOTSUPP; + if ((FIELD_GET(PMSFCR_EL1_FT, reg)) && !(spe_pmu->features & SPE_PMU_FEAT_FILT_TYP)) return -EOPNOTSUPP; @@ -756,6 +793,11 @@ static void arm_spe_pmu_start(struct perf_event *event, int flags) reg = arm_spe_event_to_pmsevfr(event); write_sysreg_s(reg, SYS_PMSEVFR_EL1); + if (spe_pmu->features & SPE_PMU_FEAT_INV_FILT_EVT) { + reg = arm_spe_event_to_pmsnevfr(event); + write_sysreg_s(reg, SYS_PMSNEVFR_EL1); + } + reg = arm_spe_event_to_pmslatfr(event); write_sysreg_s(reg, SYS_PMSLATFR_EL1); @@ -990,6 +1032,9 @@ static void __arm_spe_pmu_dev_probe(void *info) if (FIELD_GET(PMSIDR_EL1_FE, reg)) spe_pmu->features |= SPE_PMU_FEAT_FILT_EVT; + if (FIELD_GET(PMSIDR_EL1_FnE, reg)) + spe_pmu->features |= SPE_PMU_FEAT_INV_FILT_EVT; + if (FIELD_GET(PMSIDR_EL1_FT, reg)) spe_pmu->features |= SPE_PMU_FEAT_FILT_TYP; -- cgit v1.2.3 From e8a709dc2a9156f223ec953ae70a919e87ad7e9a Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 6 Feb 2023 14:47:46 -0600 Subject: perf: arm_spe: Print the version of SPE detected There's up to 4 versions of SPE now. Let's add the version that's been detected to the driver's informational print out. Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20230206204746.1452942-1-robh@kernel.org Signed-off-by: Will Deacon --- drivers/perf/arm_spe_pmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index 573db4211acd..b9ba4c4fe5a2 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -1105,8 +1105,8 @@ static void __arm_spe_pmu_dev_probe(void *info) } dev_info(dev, - "probed for CPUs %*pbl [max_record_sz %u, align %u, features 0x%llx]\n", - cpumask_pr_args(&spe_pmu->supported_cpus), + "probed SPEv1.%d for CPUs %*pbl [max_record_sz %u, align %u, features 0x%llx]\n", + spe_pmu->pmsver - 1, cpumask_pr_args(&spe_pmu->supported_cpus), spe_pmu->max_record_sz, spe_pmu->align, spe_pmu->features); spe_pmu->features |= SPE_PMU_FEAT_DEV_PROBED; -- cgit v1.2.3