summaryrefslogtreecommitdiff
path: root/drivers/perf
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/perf')
-rw-r--r--drivers/perf/Kconfig2
-rw-r--r--drivers/perf/alibaba_uncore_drw_pmu.c27
-rw-r--r--drivers/perf/amlogic/meson_ddr_pmu_core.c2
-rw-r--r--drivers/perf/arm-cci.c5
-rw-r--r--drivers/perf/arm-cmn.c161
-rw-r--r--drivers/perf/arm_dmc620_pmu.c19
-rw-r--r--drivers/perf/arm_dsu_pmu.c2
-rw-r--r--drivers/perf/arm_pmu.c10
-rw-r--r--drivers/perf/arm_pmu_acpi.c137
-rw-r--r--drivers/perf/arm_pmu_platform.c1
-rw-r--r--drivers/perf/arm_pmuv3.c33
-rw-r--r--drivers/perf/arm_smmuv3_pmu.c47
-rw-r--r--drivers/perf/arm_spe_pmu.c3
-rw-r--r--drivers/perf/fsl_imx8_ddr_perf.c48
-rw-r--r--drivers/perf/fsl_imx9_ddr_perf.c4
-rw-r--r--drivers/perf/hisilicon/hisi_pcie_pmu.c17
-rw-r--r--drivers/perf/marvell_cn10k_ddr_pmu.c3
-rw-r--r--drivers/perf/marvell_cn10k_tad_pmu.c3
-rw-r--r--drivers/perf/riscv_pmu.c116
-rw-r--r--drivers/perf/riscv_pmu_legacy.c28
-rw-r--r--drivers/perf/riscv_pmu_sbi.c196
-rw-r--r--drivers/perf/xgene_pmu.c4
22 files changed, 717 insertions, 151 deletions
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index f4572a5cca72..273d67ecf6d2 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -92,7 +92,7 @@ config ARM_PMU_ACPI
config ARM_SMMU_V3_PMU
tristate "ARM SMMUv3 Performance Monitors Extension"
- depends on (ARM64 && ACPI) || (COMPILE_TEST && 64BIT)
+ depends on ARM64 || (COMPILE_TEST && 64BIT)
depends on GENERIC_MSI_IRQ
help
Provides support for the ARM SMMUv3 Performance Monitor Counter
diff --git a/drivers/perf/alibaba_uncore_drw_pmu.c b/drivers/perf/alibaba_uncore_drw_pmu.c
index 5c5be9fc1b15..19d459a36be5 100644
--- a/drivers/perf/alibaba_uncore_drw_pmu.c
+++ b/drivers/perf/alibaba_uncore_drw_pmu.c
@@ -236,10 +236,37 @@ static const struct attribute_group ali_drw_pmu_cpumask_attr_group = {
.attrs = ali_drw_pmu_cpumask_attrs,
};
+static ssize_t ali_drw_pmu_identifier_show(struct device *dev,
+ struct device_attribute *attr,
+ char *page)
+{
+ return sysfs_emit(page, "%s\n", "ali_drw_pmu");
+}
+
+static umode_t ali_drw_pmu_identifier_attr_visible(struct kobject *kobj,
+ struct attribute *attr, int n)
+{
+ return attr->mode;
+}
+
+static struct device_attribute ali_drw_pmu_identifier_attr =
+ __ATTR(identifier, 0444, ali_drw_pmu_identifier_show, NULL);
+
+static struct attribute *ali_drw_pmu_identifier_attrs[] = {
+ &ali_drw_pmu_identifier_attr.attr,
+ NULL
+};
+
+static const struct attribute_group ali_drw_pmu_identifier_attr_group = {
+ .attrs = ali_drw_pmu_identifier_attrs,
+ .is_visible = ali_drw_pmu_identifier_attr_visible
+};
+
static const struct attribute_group *ali_drw_pmu_attr_groups[] = {
&ali_drw_pmu_events_attr_group,
&ali_drw_pmu_cpumask_attr_group,
&ali_drw_pmu_format_group,
+ &ali_drw_pmu_identifier_attr_group,
NULL,
};
diff --git a/drivers/perf/amlogic/meson_ddr_pmu_core.c b/drivers/perf/amlogic/meson_ddr_pmu_core.c
index 0b24dee1ed3c..bbc7285fd934 100644
--- a/drivers/perf/amlogic/meson_ddr_pmu_core.c
+++ b/drivers/perf/amlogic/meson_ddr_pmu_core.c
@@ -9,8 +9,6 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
#include <linux/perf_event.h>
#include <linux/platform_device.h>
#include <linux/printk.h>
diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
index 998259f1d973..61de861eaf91 100644
--- a/drivers/perf/arm-cci.c
+++ b/drivers/perf/arm-cci.c
@@ -7,10 +7,7 @@
#include <linux/io.h>
#include <linux/interrupt.h>
#include <linux/module.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
#include <linux/perf_event.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index b8c15878bc86..913dc04b3a40 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -72,6 +72,8 @@
/* For most nodes, this is all there is */
#define CMN_PMU_EVENT_SEL 0x000
#define CMN__PMU_CBUSY_SNTHROTTLE_SEL GENMASK_ULL(44, 42)
+#define CMN__PMU_SN_HOME_SEL GENMASK_ULL(40, 39)
+#define CMN__PMU_HBT_LBT_SEL GENMASK_ULL(38, 37)
#define CMN__PMU_CLASS_OCCUP_ID GENMASK_ULL(36, 35)
/* Technically this is 4 bits wide on DNs, but we only use 2 there anyway */
#define CMN__PMU_OCCUP1_ID GENMASK_ULL(34, 32)
@@ -226,6 +228,7 @@ enum cmn_revision {
REV_CMN700_R0P0 = 0,
REV_CMN700_R1P0,
REV_CMN700_R2P0,
+ REV_CMN700_R3P0,
REV_CI700_R0P0 = 0,
REV_CI700_R1P0,
REV_CI700_R2P0,
@@ -254,6 +257,9 @@ enum cmn_node_type {
CMN_TYPE_CCHA,
CMN_TYPE_CCLA,
CMN_TYPE_CCLA_RNI,
+ CMN_TYPE_HNS = 0x200,
+ CMN_TYPE_HNS_MPAM_S,
+ CMN_TYPE_HNS_MPAM_NS,
/* Not a real node type */
CMN_TYPE_WP = 0x7770
};
@@ -263,6 +269,8 @@ enum cmn_filter_select {
SEL_OCCUP1ID,
SEL_CLASS_OCCUP_ID,
SEL_CBUSY_SNTHROTTLE_SEL,
+ SEL_HBT_LBT_SEL,
+ SEL_SN_HOME_SEL,
SEL_MAX
};
@@ -742,8 +750,8 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
_CMN_EVENT_ATTR(_model, dn_##_name, CMN_TYPE_DVM, _event, _occup, _fsel)
#define CMN_EVENT_DTC(_name) \
CMN_EVENT_ATTR(CMN_ANY, dtc_##_name, CMN_TYPE_DTC, 0)
-#define _CMN_EVENT_HNF(_model, _name, _event, _occup, _fsel) \
- _CMN_EVENT_ATTR(_model, hnf_##_name, CMN_TYPE_HNF, _event, _occup, _fsel)
+#define CMN_EVENT_HNF(_model, _name, _event) \
+ CMN_EVENT_ATTR(_model, hnf_##_name, CMN_TYPE_HNF, _event)
#define CMN_EVENT_HNI(_name, _event) \
CMN_EVENT_ATTR(CMN_ANY, hni_##_name, CMN_TYPE_HNI, _event)
#define CMN_EVENT_HNP(_name, _event) \
@@ -768,6 +776,8 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
CMN_EVENT_ATTR(CMN_ANY, ccla_##_name, CMN_TYPE_CCLA, _event)
#define CMN_EVENT_CCLA_RNI(_name, _event) \
CMN_EVENT_ATTR(CMN_ANY, ccla_rni_##_name, CMN_TYPE_CCLA_RNI, _event)
+#define CMN_EVENT_HNS(_name, _event) \
+ CMN_EVENT_ATTR(CMN_ANY, hns_##_name, CMN_TYPE_HNS, _event)
#define CMN_EVENT_DVM(_model, _name, _event) \
_CMN_EVENT_DVM(_model, _name, _event, 0, SEL_NONE)
@@ -775,32 +785,68 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
_CMN_EVENT_DVM(_model, _name##_all, _event, 0, SEL_OCCUP1ID), \
_CMN_EVENT_DVM(_model, _name##_dvmop, _event, 1, SEL_OCCUP1ID), \
_CMN_EVENT_DVM(_model, _name##_dvmsync, _event, 2, SEL_OCCUP1ID)
-#define CMN_EVENT_HNF(_model, _name, _event) \
- _CMN_EVENT_HNF(_model, _name, _event, 0, SEL_NONE)
+
+#define CMN_EVENT_HN_OCC(_model, _name, _type, _event) \
+ _CMN_EVENT_ATTR(_model, _name##_all, _type, _event, 0, SEL_OCCUP1ID), \
+ _CMN_EVENT_ATTR(_model, _name##_read, _type, _event, 1, SEL_OCCUP1ID), \
+ _CMN_EVENT_ATTR(_model, _name##_write, _type, _event, 2, SEL_OCCUP1ID), \
+ _CMN_EVENT_ATTR(_model, _name##_atomic, _type, _event, 3, SEL_OCCUP1ID), \
+ _CMN_EVENT_ATTR(_model, _name##_stash, _type, _event, 4, SEL_OCCUP1ID)
+#define CMN_EVENT_HN_CLS(_model, _name, _type, _event) \
+ _CMN_EVENT_ATTR(_model, _name##_class0, _type, _event, 0, SEL_CLASS_OCCUP_ID), \
+ _CMN_EVENT_ATTR(_model, _name##_class1, _type, _event, 1, SEL_CLASS_OCCUP_ID), \
+ _CMN_EVENT_ATTR(_model, _name##_class2, _type, _event, 2, SEL_CLASS_OCCUP_ID), \
+ _CMN_EVENT_ATTR(_model, _name##_class3, _type, _event, 3, SEL_CLASS_OCCUP_ID)
+#define CMN_EVENT_HN_SNT(_model, _name, _type, _event) \
+ _CMN_EVENT_ATTR(_model, _name##_all, _type, _event, 0, SEL_CBUSY_SNTHROTTLE_SEL), \
+ _CMN_EVENT_ATTR(_model, _name##_group0_read, _type, _event, 1, SEL_CBUSY_SNTHROTTLE_SEL), \
+ _CMN_EVENT_ATTR(_model, _name##_group0_write, _type, _event, 2, SEL_CBUSY_SNTHROTTLE_SEL), \
+ _CMN_EVENT_ATTR(_model, _name##_group1_read, _type, _event, 3, SEL_CBUSY_SNTHROTTLE_SEL), \
+ _CMN_EVENT_ATTR(_model, _name##_group1_write, _type, _event, 4, SEL_CBUSY_SNTHROTTLE_SEL), \
+ _CMN_EVENT_ATTR(_model, _name##_read, _type, _event, 5, SEL_CBUSY_SNTHROTTLE_SEL), \
+ _CMN_EVENT_ATTR(_model, _name##_write, _type, _event, 6, SEL_CBUSY_SNTHROTTLE_SEL)
+
+#define CMN_EVENT_HNF_OCC(_model, _name, _event) \
+ CMN_EVENT_HN_OCC(_model, hnf_##_name, CMN_TYPE_HNF, _event)
#define CMN_EVENT_HNF_CLS(_model, _name, _event) \
- _CMN_EVENT_HNF(_model, _name##_class0, _event, 0, SEL_CLASS_OCCUP_ID), \
- _CMN_EVENT_HNF(_model, _name##_class1, _event, 1, SEL_CLASS_OCCUP_ID), \
- _CMN_EVENT_HNF(_model, _name##_class2, _event, 2, SEL_CLASS_OCCUP_ID), \
- _CMN_EVENT_HNF(_model, _name##_class3, _event, 3, SEL_CLASS_OCCUP_ID)
+ CMN_EVENT_HN_CLS(_model, hnf_##_name, CMN_TYPE_HNS, _event)
#define CMN_EVENT_HNF_SNT(_model, _name, _event) \
- _CMN_EVENT_HNF(_model, _name##_all, _event, 0, SEL_CBUSY_SNTHROTTLE_SEL), \
- _CMN_EVENT_HNF(_model, _name##_group0_read, _event, 1, SEL_CBUSY_SNTHROTTLE_SEL), \
- _CMN_EVENT_HNF(_model, _name##_group0_write, _event, 2, SEL_CBUSY_SNTHROTTLE_SEL), \
- _CMN_EVENT_HNF(_model, _name##_group1_read, _event, 3, SEL_CBUSY_SNTHROTTLE_SEL), \
- _CMN_EVENT_HNF(_model, _name##_group1_write, _event, 4, SEL_CBUSY_SNTHROTTLE_SEL), \
- _CMN_EVENT_HNF(_model, _name##_read, _event, 5, SEL_CBUSY_SNTHROTTLE_SEL), \
- _CMN_EVENT_HNF(_model, _name##_write, _event, 6, SEL_CBUSY_SNTHROTTLE_SEL)
-
-#define _CMN_EVENT_XP(_name, _event) \
+ CMN_EVENT_HN_SNT(_model, hnf_##_name, CMN_TYPE_HNF, _event)
+
+#define CMN_EVENT_HNS_OCC(_name, _event) \
+ CMN_EVENT_HN_OCC(CMN_ANY, hns_##_name, CMN_TYPE_HNS, _event), \
+ _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_rxsnp, CMN_TYPE_HNS, _event, 5, SEL_OCCUP1ID), \
+ _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_lbt, CMN_TYPE_HNS, _event, 6, SEL_OCCUP1ID), \
+ _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_hbt, CMN_TYPE_HNS, _event, 7, SEL_OCCUP1ID)
+#define CMN_EVENT_HNS_CLS( _name, _event) \
+ CMN_EVENT_HN_CLS(CMN_ANY, hns_##_name, CMN_TYPE_HNS, _event)
+#define CMN_EVENT_HNS_SNT(_name, _event) \
+ CMN_EVENT_HN_SNT(CMN_ANY, hns_##_name, CMN_TYPE_HNS, _event)
+#define CMN_EVENT_HNS_HBT(_name, _event) \
+ _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_all, CMN_TYPE_HNS, _event, 0, SEL_HBT_LBT_SEL), \
+ _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_hbt, CMN_TYPE_HNS, _event, 1, SEL_HBT_LBT_SEL), \
+ _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_lbt, CMN_TYPE_HNS, _event, 2, SEL_HBT_LBT_SEL)
+#define CMN_EVENT_HNS_SNH(_name, _event) \
+ _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_all, CMN_TYPE_HNS, _event, 0, SEL_SN_HOME_SEL), \
+ _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_sn, CMN_TYPE_HNS, _event, 1, SEL_SN_HOME_SEL), \
+ _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_home, CMN_TYPE_HNS, _event, 2, SEL_SN_HOME_SEL)
+
+#define _CMN_EVENT_XP_MESH(_name, _event) \
__CMN_EVENT_XP(e_##_name, (_event) | (0 << 2)), \
__CMN_EVENT_XP(w_##_name, (_event) | (1 << 2)), \
__CMN_EVENT_XP(n_##_name, (_event) | (2 << 2)), \
- __CMN_EVENT_XP(s_##_name, (_event) | (3 << 2)), \
+ __CMN_EVENT_XP(s_##_name, (_event) | (3 << 2))
+
+#define _CMN_EVENT_XP_PORT(_name, _event) \
__CMN_EVENT_XP(p0_##_name, (_event) | (4 << 2)), \
__CMN_EVENT_XP(p1_##_name, (_event) | (5 << 2)), \
__CMN_EVENT_XP(p2_##_name, (_event) | (6 << 2)), \
__CMN_EVENT_XP(p3_##_name, (_event) | (7 << 2))
+#define _CMN_EVENT_XP(_name, _event) \
+ _CMN_EVENT_XP_MESH(_name, _event), \
+ _CMN_EVENT_XP_PORT(_name, _event)
+
/* Good thing there are only 3 fundamental XP events... */
#define CMN_EVENT_XP(_name, _event) \
_CMN_EVENT_XP(req_##_name, (_event) | (0 << 5)), \
@@ -813,6 +859,10 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
_CMN_EVENT_XP(snp2_##_name, (_event) | (7 << 5)), \
_CMN_EVENT_XP(req2_##_name, (_event) | (8 << 5))
+#define CMN_EVENT_XP_DAT(_name, _event) \
+ _CMN_EVENT_XP_PORT(dat_##_name, (_event) | (3 << 5)), \
+ _CMN_EVENT_XP_PORT(dat2_##_name, (_event) | (6 << 5))
+
static struct attribute *arm_cmn_event_attrs[] = {
CMN_EVENT_DTC(cycles),
@@ -862,11 +912,7 @@ static struct attribute *arm_cmn_event_attrs[] = {
CMN_EVENT_HNF(CMN_ANY, mc_retries, 0x0c),
CMN_EVENT_HNF(CMN_ANY, mc_reqs, 0x0d),
CMN_EVENT_HNF(CMN_ANY, qos_hh_retry, 0x0e),
- _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_all, 0x0f, 0, SEL_OCCUP1ID),
- _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_read, 0x0f, 1, SEL_OCCUP1ID),
- _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_write, 0x0f, 2, SEL_OCCUP1ID),
- _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_atomic, 0x0f, 3, SEL_OCCUP1ID),
- _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_stash, 0x0f, 4, SEL_OCCUP1ID),
+ CMN_EVENT_HNF_OCC(CMN_ANY, qos_pocq_occupancy, 0x0f),
CMN_EVENT_HNF(CMN_ANY, pocq_addrhaz, 0x10),
CMN_EVENT_HNF(CMN_ANY, pocq_atomic_addrhaz, 0x11),
CMN_EVENT_HNF(CMN_ANY, ld_st_swp_adq_full, 0x12),
@@ -943,7 +989,7 @@ static struct attribute *arm_cmn_event_attrs[] = {
CMN_EVENT_XP(txflit_valid, 0x01),
CMN_EVENT_XP(txflit_stall, 0x02),
- CMN_EVENT_XP(partial_dat_flit, 0x03),
+ CMN_EVENT_XP_DAT(partial_dat_flit, 0x03),
/* We treat watchpoints as a special made-up class of XP events */
CMN_EVENT_ATTR(CMN_ANY, watchpoint_up, CMN_TYPE_WP, CMN_WP_UP),
CMN_EVENT_ATTR(CMN_ANY, watchpoint_down, CMN_TYPE_WP, CMN_WP_DOWN),
@@ -1132,6 +1178,66 @@ static struct attribute *arm_cmn_event_attrs[] = {
CMN_EVENT_CCLA(pfwd_sndr_stalls_static_crd, 0x2a),
CMN_EVENT_CCLA(pfwd_sndr_stalls_dynmaic_crd, 0x2b),
+ CMN_EVENT_HNS_HBT(cache_miss, 0x01),
+ CMN_EVENT_HNS_HBT(slc_sf_cache_access, 0x02),
+ CMN_EVENT_HNS_HBT(cache_fill, 0x03),
+ CMN_EVENT_HNS_HBT(pocq_retry, 0x04),
+ CMN_EVENT_HNS_HBT(pocq_reqs_recvd, 0x05),
+ CMN_EVENT_HNS_HBT(sf_hit, 0x06),
+ CMN_EVENT_HNS_HBT(sf_evictions, 0x07),
+ CMN_EVENT_HNS(dir_snoops_sent, 0x08),
+ CMN_EVENT_HNS(brd_snoops_sent, 0x09),
+ CMN_EVENT_HNS_HBT(slc_eviction, 0x0a),
+ CMN_EVENT_HNS_HBT(slc_fill_invalid_way, 0x0b),
+ CMN_EVENT_HNS(mc_retries_local, 0x0c),
+ CMN_EVENT_HNS_SNH(mc_reqs_local, 0x0d),
+ CMN_EVENT_HNS(qos_hh_retry, 0x0e),
+ CMN_EVENT_HNS_OCC(qos_pocq_occupancy, 0x0f),
+ CMN_EVENT_HNS(pocq_addrhaz, 0x10),
+ CMN_EVENT_HNS(pocq_atomic_addrhaz, 0x11),
+ CMN_EVENT_HNS(ld_st_swp_adq_full, 0x12),
+ CMN_EVENT_HNS(cmp_adq_full, 0x13),
+ CMN_EVENT_HNS(txdat_stall, 0x14),
+ CMN_EVENT_HNS(txrsp_stall, 0x15),
+ CMN_EVENT_HNS(seq_full, 0x16),
+ CMN_EVENT_HNS(seq_hit, 0x17),
+ CMN_EVENT_HNS(snp_sent, 0x18),
+ CMN_EVENT_HNS(sfbi_dir_snp_sent, 0x19),
+ CMN_EVENT_HNS(sfbi_brd_snp_sent, 0x1a),
+ CMN_EVENT_HNS(intv_dirty, 0x1c),
+ CMN_EVENT_HNS(stash_snp_sent, 0x1d),
+ CMN_EVENT_HNS(stash_data_pull, 0x1e),
+ CMN_EVENT_HNS(snp_fwded, 0x1f),
+ CMN_EVENT_HNS(atomic_fwd, 0x20),
+ CMN_EVENT_HNS(mpam_hardlim, 0x21),
+ CMN_EVENT_HNS(mpam_softlim, 0x22),
+ CMN_EVENT_HNS(snp_sent_cluster, 0x23),
+ CMN_EVENT_HNS(sf_imprecise_evict, 0x24),
+ CMN_EVENT_HNS(sf_evict_shared_line, 0x25),
+ CMN_EVENT_HNS_CLS(pocq_class_occup, 0x26),
+ CMN_EVENT_HNS_CLS(pocq_class_retry, 0x27),
+ CMN_EVENT_HNS_CLS(class_mc_reqs_local, 0x28),
+ CMN_EVENT_HNS_CLS(class_cgnt_cmin, 0x29),
+ CMN_EVENT_HNS_SNT(sn_throttle, 0x2a),
+ CMN_EVENT_HNS_SNT(sn_throttle_min, 0x2b),
+ CMN_EVENT_HNS(sf_precise_to_imprecise, 0x2c),
+ CMN_EVENT_HNS(snp_intv_cln, 0x2d),
+ CMN_EVENT_HNS(nc_excl, 0x2e),
+ CMN_EVENT_HNS(excl_mon_ovfl, 0x2f),
+ CMN_EVENT_HNS(snp_req_recvd, 0x30),
+ CMN_EVENT_HNS(snp_req_byp_pocq, 0x31),
+ CMN_EVENT_HNS(dir_ccgha_snp_sent, 0x32),
+ CMN_EVENT_HNS(brd_ccgha_snp_sent, 0x33),
+ CMN_EVENT_HNS(ccgha_snp_stall, 0x34),
+ CMN_EVENT_HNS(lbt_req_hardlim, 0x35),
+ CMN_EVENT_HNS(hbt_req_hardlim, 0x36),
+ CMN_EVENT_HNS(sf_reupdate, 0x37),
+ CMN_EVENT_HNS(excl_sf_imprecise, 0x38),
+ CMN_EVENT_HNS(snp_pocq_addrhaz, 0x39),
+ CMN_EVENT_HNS(mc_retries_remote, 0x3a),
+ CMN_EVENT_HNS_SNH(mc_reqs_remote, 0x3b),
+ CMN_EVENT_HNS_CLS(class_mc_reqs_remote, 0x3c),
+
NULL
};
@@ -1373,6 +1479,10 @@ static int arm_cmn_set_event_sel_hi(struct arm_cmn_node *dn,
dn->occupid[fsel].val = occupid;
reg = FIELD_PREP(CMN__PMU_CBUSY_SNTHROTTLE_SEL,
dn->occupid[SEL_CBUSY_SNTHROTTLE_SEL].val) |
+ FIELD_PREP(CMN__PMU_SN_HOME_SEL,
+ dn->occupid[SEL_SN_HOME_SEL].val) |
+ FIELD_PREP(CMN__PMU_HBT_LBT_SEL,
+ dn->occupid[SEL_HBT_LBT_SEL].val) |
FIELD_PREP(CMN__PMU_CLASS_OCCUP_ID,
dn->occupid[SEL_CLASS_OCCUP_ID].val) |
FIELD_PREP(CMN__PMU_OCCUP1_ID,
@@ -2200,6 +2310,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
case CMN_TYPE_CCRA:
case CMN_TYPE_CCHA:
case CMN_TYPE_CCLA:
+ case CMN_TYPE_HNS:
dn++;
break;
/* Nothing to see here */
@@ -2207,6 +2318,8 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
case CMN_TYPE_MPAM_NS:
case CMN_TYPE_RNSAM:
case CMN_TYPE_CXLA:
+ case CMN_TYPE_HNS_MPAM_S:
+ case CMN_TYPE_HNS_MPAM_NS:
break;
/*
* Split "optimised" combination nodes into separate
diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c
index 9d0f01c4455a..30cea6859574 100644
--- a/drivers/perf/arm_dmc620_pmu.c
+++ b/drivers/perf/arm_dmc620_pmu.c
@@ -66,8 +66,13 @@
#define DMC620_PMU_COUNTERn_OFFSET(n) \
(DMC620_PMU_COUNTERS_BASE + 0x28 * (n))
-static LIST_HEAD(dmc620_pmu_irqs);
+/*
+ * dmc620_pmu_irqs_lock: protects dmc620_pmu_irqs list
+ * dmc620_pmu_node_lock: protects pmus_node lists in all dmc620_pmu instances
+ */
static DEFINE_MUTEX(dmc620_pmu_irqs_lock);
+static DEFINE_MUTEX(dmc620_pmu_node_lock);
+static LIST_HEAD(dmc620_pmu_irqs);
struct dmc620_pmu_irq {
struct hlist_node node;
@@ -475,9 +480,9 @@ static int dmc620_pmu_get_irq(struct dmc620_pmu *dmc620_pmu, int irq_num)
return PTR_ERR(irq);
dmc620_pmu->irq = irq;
- mutex_lock(&dmc620_pmu_irqs_lock);
+ mutex_lock(&dmc620_pmu_node_lock);
list_add_rcu(&dmc620_pmu->pmus_node, &irq->pmus_node);
- mutex_unlock(&dmc620_pmu_irqs_lock);
+ mutex_unlock(&dmc620_pmu_node_lock);
return 0;
}
@@ -486,9 +491,11 @@ static void dmc620_pmu_put_irq(struct dmc620_pmu *dmc620_pmu)
{
struct dmc620_pmu_irq *irq = dmc620_pmu->irq;
- mutex_lock(&dmc620_pmu_irqs_lock);
+ mutex_lock(&dmc620_pmu_node_lock);
list_del_rcu(&dmc620_pmu->pmus_node);
+ mutex_unlock(&dmc620_pmu_node_lock);
+ mutex_lock(&dmc620_pmu_irqs_lock);
if (!refcount_dec_and_test(&irq->refcount)) {
mutex_unlock(&dmc620_pmu_irqs_lock);
return;
@@ -638,10 +645,10 @@ static int dmc620_pmu_cpu_teardown(unsigned int cpu,
return 0;
/* We're only reading, but this isn't the place to be involving RCU */
- mutex_lock(&dmc620_pmu_irqs_lock);
+ mutex_lock(&dmc620_pmu_node_lock);
list_for_each_entry(dmc620_pmu, &irq->pmus_node, pmus_node)
perf_pmu_migrate_context(&dmc620_pmu->pmu, irq->cpu, target);
- mutex_unlock(&dmc620_pmu_irqs_lock);
+ mutex_unlock(&dmc620_pmu_node_lock);
WARN_ON(irq_set_affinity(irq->irq_num, cpumask_of(target)));
irq->cpu = target;
diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c
index fe2abb412c00..8223c49bd082 100644
--- a/drivers/perf/arm_dsu_pmu.c
+++ b/drivers/perf/arm_dsu_pmu.c
@@ -20,7 +20,7 @@
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/perf_event.h>
#include <linux/platform_device.h>
#include <linux/spinlock.h>
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index f6ccb2cd4dfc..d712a19e47ac 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -877,11 +877,13 @@ struct arm_pmu *armpmu_alloc(void)
.attr_groups = pmu->attr_groups,
/*
* This is a CPU PMU potentially in a heterogeneous
- * configuration (e.g. big.LITTLE). This is not an uncore PMU,
- * and we have taken ctx sharing into account (e.g. with our
- * pmu::filter callback and pmu::event_init group validation).
+ * configuration (e.g. big.LITTLE) so
+ * PERF_PMU_CAP_EXTENDED_HW_TYPE is required to open
+ * PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE events on a
+ * specific PMU.
*/
- .capabilities = PERF_PMU_CAP_HETEROGENEOUS_CPUS | PERF_PMU_CAP_EXTENDED_REGS,
+ .capabilities = PERF_PMU_CAP_EXTENDED_REGS |
+ PERF_PMU_CAP_EXTENDED_HW_TYPE,
};
pmu->attr_groups[ARMPMU_ATTR_GROUP_COMMON] =
diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c
index 90815ad762eb..05dda19c5359 100644
--- a/drivers/perf/arm_pmu_acpi.c
+++ b/drivers/perf/arm_pmu_acpi.c
@@ -69,6 +69,62 @@ static void arm_pmu_acpi_unregister_irq(int cpu)
acpi_unregister_gsi(gsi);
}
+static int __maybe_unused
+arm_acpi_register_pmu_device(struct platform_device *pdev, u8 len,
+ u16 (*parse_gsi)(struct acpi_madt_generic_interrupt *))
+{
+ int cpu, this_hetid, hetid, irq, ret;
+ u16 this_gsi = 0, gsi = 0;
+
+ /*
+ * Ensure that platform device must have IORESOURCE_IRQ
+ * resource to hold gsi interrupt.
+ */
+ if (pdev->num_resources != 1)
+ return -ENXIO;
+
+ if (pdev->resource[0].flags != IORESOURCE_IRQ)
+ return -ENXIO;
+
+ /*
+ * Sanity check all the GICC tables for the same interrupt
+ * number. For now, only support homogeneous ACPI machines.
+ */
+ for_each_possible_cpu(cpu) {
+ struct acpi_madt_generic_interrupt *gicc;
+
+ gicc = acpi_cpu_get_madt_gicc(cpu);
+ if (gicc->header.length < len)
+ return gsi ? -ENXIO : 0;
+
+ this_gsi = parse_gsi(gicc);
+ this_hetid = find_acpi_cpu_topology_hetero_id(cpu);
+ if (!gsi) {
+ hetid = this_hetid;
+ gsi = this_gsi;
+ } else if (hetid != this_hetid || gsi != this_gsi) {
+ pr_warn("ACPI: %s: must be homogeneous\n", pdev->name);
+ return -ENXIO;
+ }
+ }
+
+ if (!this_gsi)
+ return 0;
+
+ irq = acpi_register_gsi(NULL, gsi, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_HIGH);
+ if (irq < 0) {
+ pr_warn("ACPI: %s Unable to register interrupt: %d\n", pdev->name, gsi);
+ return -ENXIO;
+ }
+
+ pdev->resource[0].start = irq;
+ ret = platform_device_register(pdev);
+ if (ret)
+ acpi_unregister_gsi(gsi);
+
+ return ret;
+}
+
#if IS_ENABLED(CONFIG_ARM_SPE_PMU)
static struct resource spe_resources[] = {
{
@@ -84,6 +140,11 @@ static struct platform_device spe_dev = {
.num_resources = ARRAY_SIZE(spe_resources)
};
+static u16 arm_spe_parse_gsi(struct acpi_madt_generic_interrupt *gicc)
+{
+ return gicc->spe_interrupt;
+}
+
/*
* For lack of a better place, hook the normal PMU MADT walk
* and create a SPE device if we detect a recent MADT with
@@ -91,53 +152,50 @@ static struct platform_device spe_dev = {
*/
static void arm_spe_acpi_register_device(void)
{
- int cpu, hetid, irq, ret;
- bool first = true;
- u16 gsi = 0;
-
- /*
- * Sanity check all the GICC tables for the same interrupt number.
- * For now, we only support homogeneous ACPI/SPE machines.
- */
- for_each_possible_cpu(cpu) {
- struct acpi_madt_generic_interrupt *gicc;
+ int ret = arm_acpi_register_pmu_device(&spe_dev, ACPI_MADT_GICC_SPE,
+ arm_spe_parse_gsi);
+ if (ret)
+ pr_warn("ACPI: SPE: Unable to register device\n");
+}
+#else
+static inline void arm_spe_acpi_register_device(void)
+{
+}
+#endif /* CONFIG_ARM_SPE_PMU */
- gicc = acpi_cpu_get_madt_gicc(cpu);
- if (gicc->header.length < ACPI_MADT_GICC_SPE)
- return;
-
- if (first) {
- gsi = gicc->spe_interrupt;
- if (!gsi)
- return;
- hetid = find_acpi_cpu_topology_hetero_id(cpu);
- first = false;
- } else if ((gsi != gicc->spe_interrupt) ||
- (hetid != find_acpi_cpu_topology_hetero_id(cpu))) {
- pr_warn("ACPI: SPE must be homogeneous\n");
- return;
- }
+#if IS_ENABLED(CONFIG_CORESIGHT_TRBE)
+static struct resource trbe_resources[] = {
+ {
+ /* irq */
+ .flags = IORESOURCE_IRQ,
}
+};
- irq = acpi_register_gsi(NULL, gsi, ACPI_LEVEL_SENSITIVE,
- ACPI_ACTIVE_HIGH);
- if (irq < 0) {
- pr_warn("ACPI: SPE Unable to register interrupt: %d\n", gsi);
- return;
- }
+static struct platform_device trbe_dev = {
+ .name = ARMV8_TRBE_PDEV_NAME,
+ .id = -1,
+ .resource = trbe_resources,
+ .num_resources = ARRAY_SIZE(trbe_resources)
+};
- spe_resources[0].start = irq;
- ret = platform_device_register(&spe_dev);
- if (ret < 0) {
- pr_warn("ACPI: SPE: Unable to register device\n");
- acpi_unregister_gsi(gsi);
- }
+static u16 arm_trbe_parse_gsi(struct acpi_madt_generic_interrupt *gicc)
+{
+ return gicc->trbe_interrupt;
+}
+
+static void arm_trbe_acpi_register_device(void)
+{
+ int ret = arm_acpi_register_pmu_device(&trbe_dev, ACPI_MADT_GICC_TRBE,
+ arm_trbe_parse_gsi);
+ if (ret)
+ pr_warn("ACPI: TRBE: Unable to register device\n");
}
#else
-static inline void arm_spe_acpi_register_device(void)
+static inline void arm_trbe_acpi_register_device(void)
{
+
}
-#endif /* CONFIG_ARM_SPE_PMU */
+#endif /* CONFIG_CORESIGHT_TRBE */
static int arm_pmu_acpi_parse_irqs(void)
{
@@ -374,6 +432,7 @@ static int arm_pmu_acpi_init(void)
return 0;
arm_spe_acpi_register_device();
+ arm_trbe_acpi_register_device();
return 0;
}
diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c
index 933b96e243b8..3596db36cbff 100644
--- a/drivers/perf/arm_pmu_platform.c
+++ b/drivers/perf/arm_pmu_platform.c
@@ -16,7 +16,6 @@
#include <linux/irqdesc.h>
#include <linux/kconfig.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/percpu.h>
#include <linux/perf/arm_pmu.h>
#include <linux/platform_device.h>
diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
index 08b3a1bf0ef6..e5a2ac4155f6 100644
--- a/drivers/perf/arm_pmuv3.c
+++ b/drivers/perf/arm_pmuv3.c
@@ -721,38 +721,15 @@ static void armv8pmu_enable_event(struct perf_event *event)
* Enable counter and interrupt, and set the counter to count
* the event that we're interested in.
*/
-
- /*
- * Disable counter
- */
armv8pmu_disable_event_counter(event);
-
- /*
- * Set event.
- */
armv8pmu_write_event_type(event);
-
- /*
- * Enable interrupt for this counter
- */
armv8pmu_enable_event_irq(event);
-
- /*
- * Enable counter
- */
armv8pmu_enable_event_counter(event);
}
static void armv8pmu_disable_event(struct perf_event *event)
{
- /*
- * Disable counter
- */
armv8pmu_disable_event_counter(event);
-
- /*
- * Disable interrupt for this counter
- */
armv8pmu_disable_event_irq(event);
}
@@ -1266,9 +1243,14 @@ PMUV3_INIT_SIMPLE(armv8_cortex_a76)
PMUV3_INIT_SIMPLE(armv8_cortex_a77)
PMUV3_INIT_SIMPLE(armv8_cortex_a78)
PMUV3_INIT_SIMPLE(armv9_cortex_a510)
+PMUV3_INIT_SIMPLE(armv9_cortex_a520)
PMUV3_INIT_SIMPLE(armv9_cortex_a710)
+PMUV3_INIT_SIMPLE(armv9_cortex_a715)
+PMUV3_INIT_SIMPLE(armv9_cortex_a720)
PMUV3_INIT_SIMPLE(armv8_cortex_x1)
PMUV3_INIT_SIMPLE(armv9_cortex_x2)
+PMUV3_INIT_SIMPLE(armv9_cortex_x3)
+PMUV3_INIT_SIMPLE(armv9_cortex_x4)
PMUV3_INIT_SIMPLE(armv8_neoverse_e1)
PMUV3_INIT_SIMPLE(armv8_neoverse_n1)
PMUV3_INIT_SIMPLE(armv9_neoverse_n2)
@@ -1334,9 +1316,14 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = {
{.compatible = "arm,cortex-a77-pmu", .data = armv8_cortex_a77_pmu_init},
{.compatible = "arm,cortex-a78-pmu", .data = armv8_cortex_a78_pmu_init},
{.compatible = "arm,cortex-a510-pmu", .data = armv9_cortex_a510_pmu_init},
+ {.compatible = "arm,cortex-a520-pmu", .data = armv9_cortex_a520_pmu_init},
{.compatible = "arm,cortex-a710-pmu", .data = armv9_cortex_a710_pmu_init},
+ {.compatible = "arm,cortex-a715-pmu", .data = armv9_cortex_a715_pmu_init},
+ {.compatible = "arm,cortex-a720-pmu", .data = armv9_cortex_a720_pmu_init},
{.compatible = "arm,cortex-x1-pmu", .data = armv8_cortex_x1_pmu_init},
{.compatible = "arm,cortex-x2-pmu", .data = armv9_cortex_x2_pmu_init},
+ {.compatible = "arm,cortex-x3-pmu", .data = armv9_cortex_x3_pmu_init},
+ {.compatible = "arm,cortex-x4-pmu", .data = armv9_cortex_x4_pmu_init},
{.compatible = "arm,neoverse-e1-pmu", .data = armv8_neoverse_e1_pmu_init},
{.compatible = "arm,neoverse-n1-pmu", .data = armv8_neoverse_n1_pmu_init},
{.compatible = "arm,neoverse-n2-pmu", .data = armv9_neoverse_n2_pmu_init},
diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
index 25a269d431e4..6303b82566f9 100644
--- a/drivers/perf/arm_smmuv3_pmu.c
+++ b/drivers/perf/arm_smmuv3_pmu.c
@@ -115,6 +115,7 @@
#define SMMU_PMCG_PA_SHIFT 12
#define SMMU_PMCG_EVCNTR_RDONLY BIT(0)
+#define SMMU_PMCG_HARDEN_DISABLE BIT(1)
static int cpuhp_state_num;
@@ -159,6 +160,20 @@ static inline void smmu_pmu_enable(struct pmu *pmu)
writel(SMMU_PMCG_CR_ENABLE, smmu_pmu->reg_base + SMMU_PMCG_CR);
}
+static int smmu_pmu_apply_event_filter(struct smmu_pmu *smmu_pmu,
+ struct perf_event *event, int idx);
+
+static inline void smmu_pmu_enable_quirk_hip08_09(struct pmu *pmu)
+{
+ struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu);
+ unsigned int idx;
+
+ for_each_set_bit(idx, smmu_pmu->used_counters, smmu_pmu->num_counters)
+ smmu_pmu_apply_event_filter(smmu_pmu, smmu_pmu->events[idx], idx);
+
+ smmu_pmu_enable(pmu);
+}
+
static inline void smmu_pmu_disable(struct pmu *pmu)
{
struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu);
@@ -167,6 +182,22 @@ static inline void smmu_pmu_disable(struct pmu *pmu)
writel(0, smmu_pmu->reg_base + SMMU_PMCG_IRQ_CTRL);
}
+static inline void smmu_pmu_disable_quirk_hip08_09(struct pmu *pmu)
+{
+ struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu);
+ unsigned int idx;
+
+ /*
+ * The global disable of PMU sometimes fail to stop the counting.
+ * Harden this by writing an invalid event type to each used counter
+ * to forcibly stop counting.
+ */
+ for_each_set_bit(idx, smmu_pmu->used_counters, smmu_pmu->num_counters)
+ writel(0xffff, smmu_pmu->reg_base + SMMU_PMCG_EVTYPER(idx));
+
+ smmu_pmu_disable(pmu);
+}
+
static inline void smmu_pmu_counter_set_value(struct smmu_pmu *smmu_pmu,
u32 idx, u64 value)
{
@@ -765,7 +796,10 @@ static void smmu_pmu_get_acpi_options(struct smmu_pmu *smmu_pmu)
switch (model) {
case IORT_SMMU_V3_PMCG_HISI_HIP08:
/* HiSilicon Erratum 162001800 */
- smmu_pmu->options |= SMMU_PMCG_EVCNTR_RDONLY;
+ smmu_pmu->options |= SMMU_PMCG_EVCNTR_RDONLY | SMMU_PMCG_HARDEN_DISABLE;
+ break;
+ case IORT_SMMU_V3_PMCG_HISI_HIP09:
+ smmu_pmu->options |= SMMU_PMCG_HARDEN_DISABLE;
break;
}
@@ -890,6 +924,16 @@ static int smmu_pmu_probe(struct platform_device *pdev)
if (!dev->of_node)
smmu_pmu_get_acpi_options(smmu_pmu);
+ /*
+ * For platforms suffer this quirk, the PMU disable sometimes fails to
+ * stop the counters. This will leads to inaccurate or error counting.
+ * Forcibly disable the counters with these quirk handler.
+ */
+ if (smmu_pmu->options & SMMU_PMCG_HARDEN_DISABLE) {
+ smmu_pmu->pmu.pmu_enable = smmu_pmu_enable_quirk_hip08_09;
+ smmu_pmu->pmu.pmu_disable = smmu_pmu_disable_quirk_hip08_09;
+ }
+
/* Pick one CPU to be the preferred one to use */
smmu_pmu->on_cpu = raw_smp_processor_id();
WARN_ON(irq_set_affinity(smmu_pmu->irq, cpumask_of(smmu_pmu->on_cpu)));
@@ -984,6 +1028,7 @@ static void __exit arm_smmu_pmu_exit(void)
module_exit(arm_smmu_pmu_exit);
+MODULE_ALIAS("platform:arm-smmu-v3-pmcg");
MODULE_DESCRIPTION("PMU driver for ARM SMMUv3 Performance Monitors Extension");
MODULE_AUTHOR("Neil Leeder <nleeder@codeaurora.org>");
MODULE_AUTHOR("Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>");
diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
index b9ba4c4fe5a2..d2b0cbf0e0c4 100644
--- a/drivers/perf/arm_spe_pmu.c
+++ b/drivers/perf/arm_spe_pmu.c
@@ -25,8 +25,7 @@
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/module.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/perf_event.h>
#include <linux/perf/arm_pmu.h>
#include <linux/platform_device.h>
diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c
index 5222ba1e79d0..92611c98120f 100644
--- a/drivers/perf/fsl_imx8_ddr_perf.c
+++ b/drivers/perf/fsl_imx8_ddr_perf.c
@@ -10,10 +10,9 @@
#include <linux/io.h>
#include <linux/module.h>
#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
#include <linux/of_irq.h>
#include <linux/perf_event.h>
+#include <linux/platform_device.h>
#include <linux/slab.h>
#define COUNTER_CNTL 0x0
@@ -28,6 +27,8 @@
#define CNTL_CLEAR_MASK 0xFFFFFFFD
#define CNTL_OVER_MASK 0xFFFFFFFE
+#define CNTL_CP_SHIFT 16
+#define CNTL_CP_MASK (0xFF << CNTL_CP_SHIFT)
#define CNTL_CSV_SHIFT 24
#define CNTL_CSV_MASK (0xFFU << CNTL_CSV_SHIFT)
@@ -35,6 +36,8 @@
#define EVENT_CYCLES_COUNTER 0
#define NUM_COUNTERS 4
+/* For removing bias if cycle counter CNTL.CP is set to 0xf0 */
+#define CYCLES_COUNTER_MASK 0x0FFFFFFF
#define AXI_MASKING_REVERT 0xffff0000 /* AXI_MASKING(MSB 16bits) + AXI_ID(LSB 16bits) */
#define to_ddr_pmu(p) container_of(p, struct ddr_pmu, pmu)
@@ -101,6 +104,7 @@ struct ddr_pmu {
const struct fsl_ddr_devtype_data *devtype_data;
int irq;
int id;
+ int active_counter;
};
static ssize_t ddr_perf_identifier_show(struct device *dev,
@@ -427,6 +431,17 @@ static void ddr_perf_counter_enable(struct ddr_pmu *pmu, int config,
writel(0, pmu->base + reg);
val = CNTL_EN | CNTL_CLEAR;
val |= FIELD_PREP(CNTL_CSV_MASK, config);
+
+ /*
+ * On i.MX8MP we need to bias the cycle counter to overflow more often.
+ * We do this by initializing bits [23:16] of the counter value via the
+ * COUNTER_CTRL Counter Parameter (CP) field.
+ */
+ if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER_ENHANCED) {
+ if (counter == EVENT_CYCLES_COUNTER)
+ val |= FIELD_PREP(CNTL_CP_MASK, 0xf0);
+ }
+
writel(val, pmu->base + reg);
} else {
/* Disable counter */
@@ -466,6 +481,12 @@ static void ddr_perf_event_update(struct perf_event *event)
int ret;
new_raw_count = ddr_perf_read_counter(pmu, counter);
+ /* Remove the bias applied in ddr_perf_counter_enable(). */
+ if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER_ENHANCED) {
+ if (counter == EVENT_CYCLES_COUNTER)
+ new_raw_count &= CYCLES_COUNTER_MASK;
+ }
+
local64_add(new_raw_count, &event->count);
/*
@@ -495,6 +516,10 @@ static void ddr_perf_event_start(struct perf_event *event, int flags)
ddr_perf_counter_enable(pmu, event->attr.config, counter, true);
+ if (!pmu->active_counter++)
+ ddr_perf_counter_enable(pmu, EVENT_CYCLES_ID,
+ EVENT_CYCLES_COUNTER, true);
+
hwc->state = 0;
}
@@ -548,6 +573,10 @@ static void ddr_perf_event_stop(struct perf_event *event, int flags)
ddr_perf_counter_enable(pmu, event->attr.config, counter, false);
ddr_perf_event_update(event);
+ if (!--pmu->active_counter)
+ ddr_perf_counter_enable(pmu, EVENT_CYCLES_ID,
+ EVENT_CYCLES_COUNTER, false);
+
hwc->state |= PERF_HES_STOPPED;
}
@@ -565,25 +594,10 @@ static void ddr_perf_event_del(struct perf_event *event, int flags)
static void ddr_perf_pmu_enable(struct pmu *pmu)
{
- struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu);
-
- /* enable cycle counter if cycle is not active event list */
- if (ddr_pmu->events[EVENT_CYCLES_COUNTER] == NULL)
- ddr_perf_counter_enable(ddr_pmu,
- EVENT_CYCLES_ID,
- EVENT_CYCLES_COUNTER,
- true);
}
static void ddr_perf_pmu_disable(struct pmu *pmu)
{
- struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu);
-
- if (ddr_pmu->events[EVENT_CYCLES_COUNTER] == NULL)
- ddr_perf_counter_enable(ddr_pmu,
- EVENT_CYCLES_ID,
- EVENT_CYCLES_COUNTER,
- false);
}
static int ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base,
diff --git a/drivers/perf/fsl_imx9_ddr_perf.c b/drivers/perf/fsl_imx9_ddr_perf.c
index 71d5b07e3aff..5cf770a1bc31 100644
--- a/drivers/perf/fsl_imx9_ddr_perf.c
+++ b/drivers/perf/fsl_imx9_ddr_perf.c
@@ -7,9 +7,7 @@
#include <linux/io.h>
#include <linux/module.h>
#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
+#include <linux/platform_device.h>
#include <linux/perf_event.h>
/* Performance monitor configuration */
diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c
index e10fc7cb9493..5a00adb2de8c 100644
--- a/drivers/perf/hisilicon/hisi_pcie_pmu.c
+++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c
@@ -665,8 +665,8 @@ static int hisi_pcie_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
struct hisi_pcie_pmu *pcie_pmu = hlist_entry_safe(node, struct hisi_pcie_pmu, node);
if (pcie_pmu->on_cpu == -1) {
- pcie_pmu->on_cpu = cpu;
- WARN_ON(irq_set_affinity(pcie_pmu->irq, cpumask_of(cpu)));
+ pcie_pmu->on_cpu = cpumask_local_spread(0, dev_to_node(&pcie_pmu->pdev->dev));
+ WARN_ON(irq_set_affinity(pcie_pmu->irq, cpumask_of(pcie_pmu->on_cpu)));
}
return 0;
@@ -676,14 +676,23 @@ static int hisi_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
{
struct hisi_pcie_pmu *pcie_pmu = hlist_entry_safe(node, struct hisi_pcie_pmu, node);
unsigned int target;
+ cpumask_t mask;
+ int numa_node;
/* Nothing to do if this CPU doesn't own the PMU */
if (pcie_pmu->on_cpu != cpu)
return 0;
pcie_pmu->on_cpu = -1;
- /* Choose a new CPU from all online cpus. */
- target = cpumask_any_but(cpu_online_mask, cpu);
+
+ /* Choose a local CPU from all online cpus. */
+ numa_node = dev_to_node(&pcie_pmu->pdev->dev);
+ if (cpumask_and(&mask, cpumask_of_node(numa_node), cpu_online_mask) &&
+ cpumask_andnot(&mask, &mask, cpumask_of(cpu)))
+ target = cpumask_any(&mask);
+ else
+ target = cpumask_any_but(cpu_online_mask, cpu);
+
if (target >= nr_cpu_ids) {
pci_err(pcie_pmu->pdev, "There is no CPU to set\n");
return 0;
diff --git a/drivers/perf/marvell_cn10k_ddr_pmu.c b/drivers/perf/marvell_cn10k_ddr_pmu.c
index b94a5f6cc22b..524ba82bfce2 100644
--- a/drivers/perf/marvell_cn10k_ddr_pmu.c
+++ b/drivers/perf/marvell_cn10k_ddr_pmu.c
@@ -8,11 +8,10 @@
#include <linux/io.h>
#include <linux/module.h>
#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
#include <linux/perf_event.h>
#include <linux/hrtimer.h>
#include <linux/acpi.h>
+#include <linux/platform_device.h>
/* Performance Counters Operating Mode Control Registers */
#define DDRC_PERF_CNT_OP_MODE_CTRL 0x8020
diff --git a/drivers/perf/marvell_cn10k_tad_pmu.c b/drivers/perf/marvell_cn10k_tad_pmu.c
index 3972197e2210..fec8e82edb95 100644
--- a/drivers/perf/marvell_cn10k_tad_pmu.c
+++ b/drivers/perf/marvell_cn10k_tad_pmu.c
@@ -6,10 +6,9 @@
#define pr_fmt(fmt) "tad_pmu: " fmt
+#include <linux/io.h>
#include <linux/module.h>
#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
#include <linux/cpuhotplug.h>
#include <linux/perf_event.h>
#include <linux/platform_device.h>
diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c
index ebca5eab9c9b..1f9a35f724f5 100644
--- a/drivers/perf/riscv_pmu.c
+++ b/drivers/perf/riscv_pmu.c
@@ -14,9 +14,81 @@
#include <linux/perf/riscv_pmu.h>
#include <linux/printk.h>
#include <linux/smp.h>
+#include <linux/sched_clock.h>
#include <asm/sbi.h>
+static bool riscv_perf_user_access(struct perf_event *event)
+{
+ return ((event->attr.type == PERF_TYPE_HARDWARE) ||
+ (event->attr.type == PERF_TYPE_HW_CACHE) ||
+ (event->attr.type == PERF_TYPE_RAW)) &&
+ !!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT);
+}
+
+void arch_perf_update_userpage(struct perf_event *event,
+ struct perf_event_mmap_page *userpg, u64 now)
+{
+ struct clock_read_data *rd;
+ unsigned int seq;
+ u64 ns;
+
+ userpg->cap_user_time = 0;
+ userpg->cap_user_time_zero = 0;
+ userpg->cap_user_time_short = 0;
+ userpg->cap_user_rdpmc = riscv_perf_user_access(event);
+
+#ifdef CONFIG_RISCV_PMU
+ /*
+ * The counters are 64-bit but the priv spec doesn't mandate all the
+ * bits to be implemented: that's why, counter width can vary based on
+ * the cpu vendor.
+ */
+ if (userpg->cap_user_rdpmc)
+ userpg->pmc_width = to_riscv_pmu(event->pmu)->ctr_get_width(event->hw.idx) + 1;
+#endif
+
+ do {
+ rd = sched_clock_read_begin(&seq);
+
+ userpg->time_mult = rd->mult;
+ userpg->time_shift = rd->shift;
+ userpg->time_zero = rd->epoch_ns;
+ userpg->time_cycles = rd->epoch_cyc;
+ userpg->time_mask = rd->sched_clock_mask;
+
+ /*
+ * Subtract the cycle base, such that software that
+ * doesn't know about cap_user_time_short still 'works'
+ * assuming no wraps.
+ */
+ ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift);
+ userpg->time_zero -= ns;
+
+ } while (sched_clock_read_retry(seq));
+
+ userpg->time_offset = userpg->time_zero - now;
+
+ /*
+ * time_shift is not expected to be greater than 31 due to
+ * the original published conversion algorithm shifting a
+ * 32-bit value (now specifies a 64-bit value) - refer
+ * perf_event_mmap_page documentation in perf_event.h.
+ */
+ if (userpg->time_shift == 32) {
+ userpg->time_shift = 31;
+ userpg->time_mult >>= 1;
+ }
+
+ /*
+ * Internal timekeeping for enabled/running/stopped times
+ * is always computed with the sched_clock.
+ */
+ userpg->cap_user_time = 1;
+ userpg->cap_user_time_zero = 1;
+ userpg->cap_user_time_short = 1;
+}
+
static unsigned long csr_read_num(int csr_num)
{
#define switchcase_csr_read(__csr_num, __val) {\
@@ -171,6 +243,8 @@ int riscv_pmu_event_set_period(struct perf_event *event)
local64_set(&hwc->prev_count, (u64)-left);
+ perf_event_update_userpage(event);
+
return overflow;
}
@@ -181,9 +255,6 @@ void riscv_pmu_start(struct perf_event *event, int flags)
uint64_t max_period = riscv_pmu_ctr_get_width_mask(event);
u64 init_val;
- if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
- return;
-
if (flags & PERF_EF_RELOAD)
WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
@@ -267,6 +338,9 @@ static int riscv_pmu_event_init(struct perf_event *event)
hwc->idx = -1;
hwc->event_base = mapped_event;
+ if (rvpmu->event_init)
+ rvpmu->event_init(event);
+
if (!is_sampling_event(event)) {
/*
* For non-sampling runs, limit the sample_period to half
@@ -283,6 +357,39 @@ static int riscv_pmu_event_init(struct perf_event *event)
return 0;
}
+static int riscv_pmu_event_idx(struct perf_event *event)
+{
+ struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+
+ if (!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT))
+ return 0;
+
+ if (rvpmu->csr_index)
+ return rvpmu->csr_index(event) + 1;
+
+ return 0;
+}
+
+static void riscv_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
+{
+ struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+
+ if (rvpmu->event_mapped) {
+ rvpmu->event_mapped(event, mm);
+ perf_event_update_userpage(event);
+ }
+}
+
+static void riscv_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm)
+{
+ struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+
+ if (rvpmu->event_unmapped) {
+ rvpmu->event_unmapped(event, mm);
+ perf_event_update_userpage(event);
+ }
+}
+
struct riscv_pmu *riscv_pmu_alloc(void)
{
struct riscv_pmu *pmu;
@@ -307,6 +414,9 @@ struct riscv_pmu *riscv_pmu_alloc(void)
}
pmu->pmu = (struct pmu) {
.event_init = riscv_pmu_event_init,
+ .event_mapped = riscv_pmu_event_mapped,
+ .event_unmapped = riscv_pmu_event_unmapped,
+ .event_idx = riscv_pmu_event_idx,
.add = riscv_pmu_add,
.del = riscv_pmu_del,
.start = riscv_pmu_start,
diff --git a/drivers/perf/riscv_pmu_legacy.c b/drivers/perf/riscv_pmu_legacy.c
index ca9e20bfc7ac..79fdd667922e 100644
--- a/drivers/perf/riscv_pmu_legacy.c
+++ b/drivers/perf/riscv_pmu_legacy.c
@@ -13,7 +13,7 @@
#include <linux/platform_device.h>
#define RISCV_PMU_LEGACY_CYCLE 0
-#define RISCV_PMU_LEGACY_INSTRET 1
+#define RISCV_PMU_LEGACY_INSTRET 2
static bool pmu_init_done;
@@ -71,6 +71,29 @@ static void pmu_legacy_ctr_start(struct perf_event *event, u64 ival)
local64_set(&hwc->prev_count, initial_val);
}
+static uint8_t pmu_legacy_csr_index(struct perf_event *event)
+{
+ return event->hw.idx;
+}
+
+static void pmu_legacy_event_mapped(struct perf_event *event, struct mm_struct *mm)
+{
+ if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES &&
+ event->attr.config != PERF_COUNT_HW_INSTRUCTIONS)
+ return;
+
+ event->hw.flags |= PERF_EVENT_FLAG_USER_READ_CNT;
+}
+
+static void pmu_legacy_event_unmapped(struct perf_event *event, struct mm_struct *mm)
+{
+ if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES &&
+ event->attr.config != PERF_COUNT_HW_INSTRUCTIONS)
+ return;
+
+ event->hw.flags &= ~PERF_EVENT_FLAG_USER_READ_CNT;
+}
+
/*
* This is just a simple implementation to allow legacy implementations
* compatible with new RISC-V PMU driver framework.
@@ -91,6 +114,9 @@ static void pmu_legacy_init(struct riscv_pmu *pmu)
pmu->ctr_get_width = NULL;
pmu->ctr_clear_idx = NULL;
pmu->ctr_read = pmu_legacy_read_ctr;
+ pmu->event_mapped = pmu_legacy_event_mapped;
+ pmu->event_unmapped = pmu_legacy_event_unmapped;
+ pmu->csr_index = pmu_legacy_csr_index;
perf_pmu_register(&pmu->pmu, "cpu", PERF_TYPE_RAW);
}
diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c
index 4163ff517471..9a51053b1f99 100644
--- a/drivers/perf/riscv_pmu_sbi.c
+++ b/drivers/perf/riscv_pmu_sbi.c
@@ -24,6 +24,14 @@
#include <asm/sbi.h>
#include <asm/hwcap.h>
+#define SYSCTL_NO_USER_ACCESS 0
+#define SYSCTL_USER_ACCESS 1
+#define SYSCTL_LEGACY 2
+
+#define PERF_EVENT_FLAG_NO_USER_ACCESS BIT(SYSCTL_NO_USER_ACCESS)
+#define PERF_EVENT_FLAG_USER_ACCESS BIT(SYSCTL_USER_ACCESS)
+#define PERF_EVENT_FLAG_LEGACY BIT(SYSCTL_LEGACY)
+
PMU_FORMAT_ATTR(event, "config:0-47");
PMU_FORMAT_ATTR(firmware, "config:63");
@@ -43,6 +51,9 @@ static const struct attribute_group *riscv_pmu_attr_groups[] = {
NULL,
};
+/* Allow user mode access by default */
+static int sysctl_perf_user_access __read_mostly = SYSCTL_USER_ACCESS;
+
/*
* RISC-V doesn't have heterogeneous harts yet. This need to be part of
* per_cpu in case of harts with different pmu counters
@@ -301,6 +312,11 @@ int riscv_pmu_get_hpm_info(u32 *hw_ctr_width, u32 *num_hw_ctr)
}
EXPORT_SYMBOL_GPL(riscv_pmu_get_hpm_info);
+static uint8_t pmu_sbi_csr_index(struct perf_event *event)
+{
+ return pmu_ctr_list[event->hw.idx].csr - CSR_CYCLE;
+}
+
static unsigned long pmu_sbi_get_filter_flags(struct perf_event *event)
{
unsigned long cflags = 0;
@@ -329,18 +345,34 @@ static int pmu_sbi_ctr_get_idx(struct perf_event *event)
struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
struct sbiret ret;
int idx;
- uint64_t cbase = 0;
+ uint64_t cbase = 0, cmask = rvpmu->cmask;
unsigned long cflags = 0;
cflags = pmu_sbi_get_filter_flags(event);
+
+ /*
+ * In legacy mode, we have to force the fixed counters for those events
+ * but not in the user access mode as we want to use the other counters
+ * that support sampling/filtering.
+ */
+ if (hwc->flags & PERF_EVENT_FLAG_LEGACY) {
+ if (event->attr.config == PERF_COUNT_HW_CPU_CYCLES) {
+ cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH;
+ cmask = 1;
+ } else if (event->attr.config == PERF_COUNT_HW_INSTRUCTIONS) {
+ cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH;
+ cmask = 1UL << (CSR_INSTRET - CSR_CYCLE);
+ }
+ }
+
/* retrieve the available counter index */
#if defined(CONFIG_32BIT)
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase,
- rvpmu->cmask, cflags, hwc->event_base, hwc->config,
+ cmask, cflags, hwc->event_base, hwc->config,
hwc->config >> 32);
#else
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase,
- rvpmu->cmask, cflags, hwc->event_base, hwc->config, 0);
+ cmask, cflags, hwc->event_base, hwc->config, 0);
#endif
if (ret.error) {
pr_debug("Not able to find a counter for event %lx config %llx\n",
@@ -474,6 +506,22 @@ static u64 pmu_sbi_ctr_read(struct perf_event *event)
return val;
}
+static void pmu_sbi_set_scounteren(void *arg)
+{
+ struct perf_event *event = (struct perf_event *)arg;
+
+ csr_write(CSR_SCOUNTEREN,
+ csr_read(CSR_SCOUNTEREN) | (1 << pmu_sbi_csr_index(event)));
+}
+
+static void pmu_sbi_reset_scounteren(void *arg)
+{
+ struct perf_event *event = (struct perf_event *)arg;
+
+ csr_write(CSR_SCOUNTEREN,
+ csr_read(CSR_SCOUNTEREN) & ~(1 << pmu_sbi_csr_index(event)));
+}
+
static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival)
{
struct sbiret ret;
@@ -490,6 +538,10 @@ static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival)
if (ret.error && (ret.error != SBI_ERR_ALREADY_STARTED))
pr_err("Starting counter idx %d failed with error %d\n",
hwc->idx, sbi_err_map_linux_errno(ret.error));
+
+ if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) &&
+ (hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT))
+ pmu_sbi_set_scounteren((void *)event);
}
static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag)
@@ -497,6 +549,10 @@ static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag)
struct sbiret ret;
struct hw_perf_event *hwc = &event->hw;
+ if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) &&
+ (hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT))
+ pmu_sbi_reset_scounteren((void *)event);
+
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, hwc->idx, 1, flag, 0, 0, 0);
if (ret.error && (ret.error != SBI_ERR_ALREADY_STOPPED) &&
flag != SBI_PMU_STOP_FLAG_RESET)
@@ -704,10 +760,13 @@ static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node)
struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
/*
- * Enable the access for CYCLE, TIME, and INSTRET CSRs from userspace,
- * as is necessary to maintain uABI compatibility.
+ * We keep enabling userspace access to CYCLE, TIME and INSTRET via the
+ * legacy option but that will be removed in the future.
*/
- csr_write(CSR_SCOUNTEREN, 0x7);
+ if (sysctl_perf_user_access == SYSCTL_LEGACY)
+ csr_write(CSR_SCOUNTEREN, 0x7);
+ else
+ csr_write(CSR_SCOUNTEREN, 0x2);
/* Stop all the counters so that they can be enabled from perf */
pmu_sbi_stop_all(pmu);
@@ -838,6 +897,121 @@ static void riscv_pmu_destroy(struct riscv_pmu *pmu)
cpuhp_state_remove_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
}
+static void pmu_sbi_event_init(struct perf_event *event)
+{
+ /*
+ * The permissions are set at event_init so that we do not depend
+ * on the sysctl value that can change.
+ */
+ if (sysctl_perf_user_access == SYSCTL_NO_USER_ACCESS)
+ event->hw.flags |= PERF_EVENT_FLAG_NO_USER_ACCESS;
+ else if (sysctl_perf_user_access == SYSCTL_USER_ACCESS)
+ event->hw.flags |= PERF_EVENT_FLAG_USER_ACCESS;
+ else
+ event->hw.flags |= PERF_EVENT_FLAG_LEGACY;
+}
+
+static void pmu_sbi_event_mapped(struct perf_event *event, struct mm_struct *mm)
+{
+ if (event->hw.flags & PERF_EVENT_FLAG_NO_USER_ACCESS)
+ return;
+
+ if (event->hw.flags & PERF_EVENT_FLAG_LEGACY) {
+ if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES &&
+ event->attr.config != PERF_COUNT_HW_INSTRUCTIONS) {
+ return;
+ }
+ }
+
+ /*
+ * The user mmapped the event to directly access it: this is where
+ * we determine based on sysctl_perf_user_access if we grant userspace
+ * the direct access to this event. That means that within the same
+ * task, some events may be directly accessible and some other may not,
+ * if the user changes the value of sysctl_perf_user_accesss in the
+ * meantime.
+ */
+
+ event->hw.flags |= PERF_EVENT_FLAG_USER_READ_CNT;
+
+ /*
+ * We must enable userspace access *before* advertising in the user page
+ * that it is possible to do so to avoid any race.
+ * And we must notify all cpus here because threads that currently run
+ * on other cpus will try to directly access the counter too without
+ * calling pmu_sbi_ctr_start.
+ */
+ if (event->hw.flags & PERF_EVENT_FLAG_USER_ACCESS)
+ on_each_cpu_mask(mm_cpumask(mm),
+ pmu_sbi_set_scounteren, (void *)event, 1);
+}
+
+static void pmu_sbi_event_unmapped(struct perf_event *event, struct mm_struct *mm)
+{
+ if (event->hw.flags & PERF_EVENT_FLAG_NO_USER_ACCESS)
+ return;
+
+ if (event->hw.flags & PERF_EVENT_FLAG_LEGACY) {
+ if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES &&
+ event->attr.config != PERF_COUNT_HW_INSTRUCTIONS) {
+ return;
+ }
+ }
+
+ /*
+ * Here we can directly remove user access since the user does not have
+ * access to the user page anymore so we avoid the racy window where the
+ * user could have read cap_user_rdpmc to true right before we disable
+ * it.
+ */
+ event->hw.flags &= ~PERF_EVENT_FLAG_USER_READ_CNT;
+
+ if (event->hw.flags & PERF_EVENT_FLAG_USER_ACCESS)
+ on_each_cpu_mask(mm_cpumask(mm),
+ pmu_sbi_reset_scounteren, (void *)event, 1);
+}
+
+static void riscv_pmu_update_counter_access(void *info)
+{
+ if (sysctl_perf_user_access == SYSCTL_LEGACY)
+ csr_write(CSR_SCOUNTEREN, 0x7);
+ else
+ csr_write(CSR_SCOUNTEREN, 0x2);
+}
+
+static int riscv_pmu_proc_user_access_handler(struct ctl_table *table,
+ int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ int prev = sysctl_perf_user_access;
+ int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+
+ /*
+ * Test against the previous value since we clear SCOUNTEREN when
+ * sysctl_perf_user_access is set to SYSCTL_USER_ACCESS, but we should
+ * not do that if that was already the case.
+ */
+ if (ret || !write || prev == sysctl_perf_user_access)
+ return ret;
+
+ on_each_cpu(riscv_pmu_update_counter_access, NULL, 1);
+
+ return 0;
+}
+
+static struct ctl_table sbi_pmu_sysctl_table[] = {
+ {
+ .procname = "perf_user_access",
+ .data = &sysctl_perf_user_access,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = riscv_pmu_proc_user_access_handler,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_TWO,
+ },
+ { }
+};
+
static int pmu_sbi_device_probe(struct platform_device *pdev)
{
struct riscv_pmu *pmu = NULL;
@@ -881,6 +1055,10 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
pmu->ctr_get_width = pmu_sbi_ctr_get_width;
pmu->ctr_clear_idx = pmu_sbi_ctr_clear_idx;
pmu->ctr_read = pmu_sbi_ctr_read;
+ pmu->event_init = pmu_sbi_event_init;
+ pmu->event_mapped = pmu_sbi_event_mapped;
+ pmu->event_unmapped = pmu_sbi_event_unmapped;
+ pmu->csr_index = pmu_sbi_csr_index;
ret = cpuhp_state_add_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
if (ret)
@@ -894,6 +1072,8 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
if (ret)
goto out_unregister;
+ register_sysctl("kernel", sbi_pmu_sysctl_table);
+
return 0;
out_unregister:
@@ -907,7 +1087,7 @@ out_free:
static struct platform_driver pmu_sbi_driver = {
.probe = pmu_sbi_device_probe,
.driver = {
- .name = RISCV_PMU_PDEV_NAME,
+ .name = RISCV_PMU_SBI_PDEV_NAME,
},
};
@@ -934,7 +1114,7 @@ static int __init pmu_sbi_devinit(void)
if (ret)
return ret;
- pdev = platform_device_register_simple(RISCV_PMU_PDEV_NAME, -1, NULL, 0);
+ pdev = platform_device_register_simple(RISCV_PMU_SBI_PDEV_NAME, -1, NULL, 0);
if (IS_ERR(pdev)) {
platform_driver_unregister(&pmu_sbi_driver);
return PTR_ERR(pdev);
diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c
index 0c32dffc7ede..9972bfc11a5c 100644
--- a/drivers/perf/xgene_pmu.c
+++ b/drivers/perf/xgene_pmu.c
@@ -1833,7 +1833,6 @@ static int xgene_pmu_probe(struct platform_device *pdev)
const struct xgene_pmu_data *dev_data;
const struct of_device_id *of_id;
struct xgene_pmu *xgene_pmu;
- struct resource *res;
int irq, rc;
int version;
@@ -1883,8 +1882,7 @@ static int xgene_pmu_probe(struct platform_device *pdev)
xgene_pmu->version = version;
dev_info(&pdev->dev, "X-Gene PMU version %d\n", xgene_pmu->version);
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- xgene_pmu->pcppmu_csr = devm_ioremap_resource(&pdev->dev, res);
+ xgene_pmu->pcppmu_csr = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(xgene_pmu->pcppmu_csr)) {
dev_err(&pdev->dev, "ioremap failed for PCP PMU resource\n");
return PTR_ERR(xgene_pmu->pcppmu_csr);