summaryrefslogtreecommitdiff
path: root/drivers/powercap
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-03-13 21:40:06 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2024-03-13 21:40:06 +0300
commit07abb19a9b201c11e4367e8a428be7235b6dbd0d (patch)
tree8e0a08bce75f5e91a5b40c9f74ecd7f722295160 /drivers/powercap
parenta070a08d006d142e2ae0bf73843dc90c2b42b02f (diff)
parent866b554c2d3e067751cc2cbad9ed281db2d47143 (diff)
downloadlinux-07abb19a9b201c11e4367e8a428be7235b6dbd0d.tar.xz
Merge tag 'pm-6.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull power management updates from Rafael Wysocki: "From the functional perspective, the most significant change here is the addition of support for Energy Models that can be updated dynamically at run time. There is also the addition of LZ4 compression support for hibernation, the new preferred core support in amd-pstate, new platforms support in the Intel RAPL driver, new model-specific EPP handling in intel_pstate and more. Apart from that, the cpufreq default transition delay is reduced from 10 ms to 2 ms (along with some related adjustments), the system suspend statistics code undergoes a significant rework and there is a usual bunch of fixes and code cleanups all over. Specifics: - Allow the Energy Model to be updated dynamically (Lukasz Luba) - Add support for LZ4 compression algorithm to the hibernation image creation and loading code (Nikhil V) - Fix and clean up system suspend statistics collection (Rafael Wysocki) - Simplify device suspend and resume handling in the power management core code (Rafael Wysocki) - Fix PCI hibernation support description (Yiwei Lin) - Make hibernation take set_memory_ro() return values into account as appropriate (Christophe Leroy) - Set mem_sleep_current during kernel command line setup to avoid an ordering issue with handling it (Maulik Shah) - Fix wake IRQs handling when pm_runtime_force_suspend() is used as a driver's system suspend callback (Qingliang Li) - Simplify pm_runtime_get_if_active() usage and add a replacement for pm_runtime_put_autosuspend() (Sakari Ailus) - Add a tracepoint for runtime_status changes tracking (Vilas Bhat) - Fix section title markdown in the runtime PM documentation (Yiwei Lin) - Enable preferred core support in the amd-pstate cpufreq driver (Meng Li) - Fix min_perf assignment in amd_pstate_adjust_perf() and make the min/max limit perf values in amd-pstate always stay within the (highest perf, lowest perf) range (Tor Vic, Meng Li) - Allow intel_pstate to assign model-specific values to strings used in the EPP sysfs interface and make it do so on Meteor Lake (Srinivas Pandruvada) - Drop long-unused cpudata::prev_cummulative_iowait from the intel_pstate cpufreq driver (Jiri Slaby) - Prevent scaling_cur_freq from exceeding scaling_max_freq when the latter is an inefficient frequency (Shivnandan Kumar) - Change default transition delay in cpufreq to 2ms (Qais Yousef) - Remove references to 10ms minimum sampling rate from comments in the cpufreq code (Pierre Gondois) - Honour transition_latency over transition_delay_us in cpufreq (Qais Yousef) - Stop unregistering cpufreq cooling on CPU hot-remove (Viresh Kumar) - General enhancements / cleanups to ARM cpufreq drivers (tianyu2, NĂ­colas F. R. A. Prado, Erick Archer, Arnd Bergmann, Anastasia Belova) - Update cpufreq-dt-platdev to block/approve devices (Richard Acayan) - Make the SCMI cpufreq driver get a transition delay value from firmware (Pierre Gondois) - Prevent the haltpoll cpuidle governor from shrinking guest poll_limit_ns below grow_start (Parshuram Sangle) - Avoid potential overflow in integer multiplication when computing cpuidle state parameters (C Cheng) - Adjust MWAIT hint target C-state computation in the ACPI cpuidle driver and in intel_idle to return a correct value for C0 (He Rongguang) - Address multiple issues in the TPMI RAPL driver and add support for new platforms (Lunar Lake-M, Arrow Lake) to Intel RAPL (Zhang Rui) - Fix freq_qos_add_request() return value check in dtpm_cpu (Daniel Lezcano) - Fix kernel-doc for dtpm_create_hierarchy() (Yang Li) - Fix file leak in get_pkg_num() in x86_energy_perf_policy (Samasth Norway Ananda) - Fix cpupower-frequency-info.1 man page typo (Jan Kratochvil) - Fix a couple of warnings in the OPP core code related to W=1 builds (Viresh Kumar) - Move dev_pm_opp_{init|free}_cpufreq_table() to pm_opp.h (Viresh Kumar) - Extend dev_pm_opp_data with turbo support (Sibi Sankar) - dt-bindings: drop maxItems from inner items (David Heidelberg)" * tag 'pm-6.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: (95 commits) dt-bindings: opp: drop maxItems from inner items OPP: debugfs: Fix warning around icc_get_name() OPP: debugfs: Fix warning with W=1 builds cpufreq: Move dev_pm_opp_{init|free}_cpufreq_table() to pm_opp.h OPP: Extend dev_pm_opp_data with turbo support Fix cpupower-frequency-info.1 man page typo cpufreq: scmi: Set transition_delay_us firmware: arm_scmi: Populate fast channel rate_limit firmware: arm_scmi: Populate perf commands rate_limit cpuidle: ACPI/intel: fix MWAIT hint target C-state computation PM: sleep: wakeirq: fix wake irq warning in system suspend powercap: dtpm: Fix kernel-doc for dtpm_create_hierarchy() function cpufreq: Don't unregister cpufreq cooling on CPU hotplug PM: suspend: Set mem_sleep_current during kernel command line setup cpufreq: Honour transition_latency over transition_delay_us cpufreq: Limit resolving a frequency to policy min/max Documentation: PM: Fix runtime_pm.rst markdown syntax cpufreq: amd-pstate: adjust min/max limit perf cpufreq: Remove references to 10ms min sampling rate cpufreq: intel_pstate: Update default EPPs for Meteor Lake ...
Diffstat (limited to 'drivers/powercap')
-rw-r--r--drivers/powercap/dtpm.c2
-rw-r--r--drivers/powercap/dtpm_cpu.c43
-rw-r--r--drivers/powercap/dtpm_devfreq.c34
-rw-r--r--drivers/powercap/intel_rapl_common.c36
-rw-r--r--drivers/powercap/intel_rapl_msr.c8
-rw-r--r--drivers/powercap/intel_rapl_tpmi.c15
6 files changed, 107 insertions, 31 deletions
diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c
index ce920f17f45f..f390665743c4 100644
--- a/drivers/powercap/dtpm.c
+++ b/drivers/powercap/dtpm.c
@@ -522,7 +522,7 @@ static int dtpm_for_each_child(const struct dtpm_node *hierarchy,
/**
* dtpm_create_hierarchy - Create the dtpm hierarchy
- * @hierarchy: An array of struct dtpm_node describing the hierarchy
+ * @dtpm_match_table: Pointer to the array of device ID structures
*
* The function is called by the platform specific code with the
* description of the different node in the hierarchy. It creates the
diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
index 9193c3b8edeb..bc90126f1b5f 100644
--- a/drivers/powercap/dtpm_cpu.c
+++ b/drivers/powercap/dtpm_cpu.c
@@ -42,6 +42,7 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit)
{
struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
struct em_perf_domain *pd = em_cpu_get(dtpm_cpu->cpu);
+ struct em_perf_state *table;
struct cpumask cpus;
unsigned long freq;
u64 power;
@@ -50,20 +51,22 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit)
cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus));
nr_cpus = cpumask_weight(&cpus);
+ rcu_read_lock();
+ table = em_perf_state_from_pd(pd);
for (i = 0; i < pd->nr_perf_states; i++) {
- power = pd->table[i].power * nr_cpus;
+ power = table[i].power * nr_cpus;
if (power > power_limit)
break;
}
- freq = pd->table[i - 1].frequency;
+ freq = table[i - 1].frequency;
+ power_limit = table[i - 1].power * nr_cpus;
+ rcu_read_unlock();
freq_qos_update_request(&dtpm_cpu->qos_req, freq);
- power_limit = pd->table[i - 1].power * nr_cpus;
-
return power_limit;
}
@@ -87,9 +90,11 @@ static u64 scale_pd_power_uw(struct cpumask *pd_mask, u64 power)
static u64 get_pd_power_uw(struct dtpm *dtpm)
{
struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
+ struct em_perf_state *table;
struct em_perf_domain *pd;
struct cpumask *pd_mask;
unsigned long freq;
+ u64 power = 0;
int i;
pd = em_cpu_get(dtpm_cpu->cpu);
@@ -98,33 +103,43 @@ static u64 get_pd_power_uw(struct dtpm *dtpm)
freq = cpufreq_quick_get(dtpm_cpu->cpu);
+ rcu_read_lock();
+ table = em_perf_state_from_pd(pd);
for (i = 0; i < pd->nr_perf_states; i++) {
- if (pd->table[i].frequency < freq)
+ if (table[i].frequency < freq)
continue;
- return scale_pd_power_uw(pd_mask, pd->table[i].power);
+ power = scale_pd_power_uw(pd_mask, table[i].power);
+ break;
}
+ rcu_read_unlock();
- return 0;
+ return power;
}
static int update_pd_power_uw(struct dtpm *dtpm)
{
struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
struct em_perf_domain *em = em_cpu_get(dtpm_cpu->cpu);
+ struct em_perf_state *table;
struct cpumask cpus;
int nr_cpus;
cpumask_and(&cpus, cpu_online_mask, to_cpumask(em->cpus));
nr_cpus = cpumask_weight(&cpus);
- dtpm->power_min = em->table[0].power;
+ rcu_read_lock();
+ table = em_perf_state_from_pd(em);
+
+ dtpm->power_min = table[0].power;
dtpm->power_min *= nr_cpus;
- dtpm->power_max = em->table[em->nr_perf_states - 1].power;
+ dtpm->power_max = table[em->nr_perf_states - 1].power;
dtpm->power_max *= nr_cpus;
+ rcu_read_unlock();
+
return 0;
}
@@ -143,7 +158,7 @@ static void pd_release(struct dtpm *dtpm)
cpufreq_cpu_put(policy);
}
-
+
kfree(dtpm_cpu);
}
@@ -180,6 +195,7 @@ static int __dtpm_cpu_setup(int cpu, struct dtpm *parent)
{
struct dtpm_cpu *dtpm_cpu;
struct cpufreq_policy *policy;
+ struct em_perf_state *table;
struct em_perf_domain *pd;
char name[CPUFREQ_NAME_LEN];
int ret = -ENOMEM;
@@ -216,10 +232,13 @@ static int __dtpm_cpu_setup(int cpu, struct dtpm *parent)
if (ret)
goto out_kfree_dtpm_cpu;
+ rcu_read_lock();
+ table = em_perf_state_from_pd(pd);
ret = freq_qos_add_request(&policy->constraints,
&dtpm_cpu->qos_req, FREQ_QOS_MAX,
- pd->table[pd->nr_perf_states - 1].frequency);
- if (ret)
+ table[pd->nr_perf_states - 1].frequency);
+ rcu_read_unlock();
+ if (ret < 0)
goto out_dtpm_unregister;
cpufreq_cpu_put(policy);
diff --git a/drivers/powercap/dtpm_devfreq.c b/drivers/powercap/dtpm_devfreq.c
index 612c3b59dd5b..f40bce8176df 100644
--- a/drivers/powercap/dtpm_devfreq.c
+++ b/drivers/powercap/dtpm_devfreq.c
@@ -37,11 +37,16 @@ static int update_pd_power_uw(struct dtpm *dtpm)
struct devfreq *devfreq = dtpm_devfreq->devfreq;
struct device *dev = devfreq->dev.parent;
struct em_perf_domain *pd = em_pd_get(dev);
+ struct em_perf_state *table;
- dtpm->power_min = pd->table[0].power;
+ rcu_read_lock();
+ table = em_perf_state_from_pd(pd);
- dtpm->power_max = pd->table[pd->nr_perf_states - 1].power;
+ dtpm->power_min = table[0].power;
+ dtpm->power_max = table[pd->nr_perf_states - 1].power;
+
+ rcu_read_unlock();
return 0;
}
@@ -51,20 +56,23 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit)
struct devfreq *devfreq = dtpm_devfreq->devfreq;
struct device *dev = devfreq->dev.parent;
struct em_perf_domain *pd = em_pd_get(dev);
+ struct em_perf_state *table;
unsigned long freq;
int i;
+ rcu_read_lock();
+ table = em_perf_state_from_pd(pd);
for (i = 0; i < pd->nr_perf_states; i++) {
- if (pd->table[i].power > power_limit)
+ if (table[i].power > power_limit)
break;
}
- freq = pd->table[i - 1].frequency;
+ freq = table[i - 1].frequency;
+ power_limit = table[i - 1].power;
+ rcu_read_unlock();
dev_pm_qos_update_request(&dtpm_devfreq->qos_req, freq);
- power_limit = pd->table[i - 1].power;
-
return power_limit;
}
@@ -89,8 +97,9 @@ static u64 get_pd_power_uw(struct dtpm *dtpm)
struct device *dev = devfreq->dev.parent;
struct em_perf_domain *pd = em_pd_get(dev);
struct devfreq_dev_status status;
+ struct em_perf_state *table;
unsigned long freq;
- u64 power;
+ u64 power = 0;
int i;
mutex_lock(&devfreq->lock);
@@ -100,19 +109,22 @@ static u64 get_pd_power_uw(struct dtpm *dtpm)
freq = DIV_ROUND_UP(status.current_frequency, HZ_PER_KHZ);
_normalize_load(&status);
+ rcu_read_lock();
+ table = em_perf_state_from_pd(pd);
for (i = 0; i < pd->nr_perf_states; i++) {
- if (pd->table[i].frequency < freq)
+ if (table[i].frequency < freq)
continue;
- power = pd->table[i].power;
+ power = table[i].power;
power *= status.busy_time;
power >>= 10;
- return power;
+ break;
}
+ rcu_read_unlock();
- return 0;
+ return power;
}
static void pd_release(struct dtpm *dtpm)
diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index 00c861899a47..a28d54fd5222 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -5,6 +5,7 @@
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/cleanup.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/list.h>
@@ -759,6 +760,11 @@ static int rapl_config(struct rapl_package *rp)
default:
return -EINVAL;
}
+
+ /* defaults_msr can be NULL on unsupported platforms */
+ if (!rp->priv->defaults || !rp->priv->rpi)
+ return -ENODEV;
+
return 0;
}
@@ -1256,6 +1262,8 @@ static const struct x86_cpu_id rapl_ids[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &rapl_defaults_core),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &rapl_defaults_spr_server),
X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &rapl_defaults_spr_server),
+ X86_MATCH_INTEL_FAM6_MODEL(LUNARLAKE_M, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(ARROWLAKE, &rapl_defaults_core),
X86_MATCH_INTEL_FAM6_MODEL(LAKEFIELD, &rapl_defaults_core),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &rapl_defaults_byt),
@@ -1499,7 +1507,7 @@ static int rapl_detect_domains(struct rapl_package *rp)
}
/* called from CPU hotplug notifier, hotplug lock held */
-void rapl_remove_package(struct rapl_package *rp)
+void rapl_remove_package_cpuslocked(struct rapl_package *rp)
{
struct rapl_domain *rd, *rd_package = NULL;
@@ -1528,10 +1536,18 @@ void rapl_remove_package(struct rapl_package *rp)
list_del(&rp->plist);
kfree(rp);
}
+EXPORT_SYMBOL_GPL(rapl_remove_package_cpuslocked);
+
+void rapl_remove_package(struct rapl_package *rp)
+{
+ guard(cpus_read_lock)();
+ rapl_remove_package_cpuslocked(rp);
+}
EXPORT_SYMBOL_GPL(rapl_remove_package);
/* caller to ensure CPU hotplug lock is held */
-struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, bool id_is_cpu)
+struct rapl_package *rapl_find_package_domain_cpuslocked(int id, struct rapl_if_priv *priv,
+ bool id_is_cpu)
{
struct rapl_package *rp;
int uid;
@@ -1549,10 +1565,17 @@ struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv,
return NULL;
}
+EXPORT_SYMBOL_GPL(rapl_find_package_domain_cpuslocked);
+
+struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, bool id_is_cpu)
+{
+ guard(cpus_read_lock)();
+ return rapl_find_package_domain_cpuslocked(id, priv, id_is_cpu);
+}
EXPORT_SYMBOL_GPL(rapl_find_package_domain);
/* called from CPU hotplug notifier, hotplug lock held */
-struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id_is_cpu)
+struct rapl_package *rapl_add_package_cpuslocked(int id, struct rapl_if_priv *priv, bool id_is_cpu)
{
struct rapl_package *rp;
int ret;
@@ -1598,6 +1621,13 @@ err_free_package:
kfree(rp);
return ERR_PTR(ret);
}
+EXPORT_SYMBOL_GPL(rapl_add_package_cpuslocked);
+
+struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id_is_cpu)
+{
+ guard(cpus_read_lock)();
+ return rapl_add_package_cpuslocked(id, priv, id_is_cpu);
+}
EXPORT_SYMBOL_GPL(rapl_add_package);
static void power_limit_state_save(void)
diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c
index 250bd41a588c..b4b6930cacb0 100644
--- a/drivers/powercap/intel_rapl_msr.c
+++ b/drivers/powercap/intel_rapl_msr.c
@@ -73,9 +73,9 @@ static int rapl_cpu_online(unsigned int cpu)
{
struct rapl_package *rp;
- rp = rapl_find_package_domain(cpu, rapl_msr_priv, true);
+ rp = rapl_find_package_domain_cpuslocked(cpu, rapl_msr_priv, true);
if (!rp) {
- rp = rapl_add_package(cpu, rapl_msr_priv, true);
+ rp = rapl_add_package_cpuslocked(cpu, rapl_msr_priv, true);
if (IS_ERR(rp))
return PTR_ERR(rp);
}
@@ -88,14 +88,14 @@ static int rapl_cpu_down_prep(unsigned int cpu)
struct rapl_package *rp;
int lead_cpu;
- rp = rapl_find_package_domain(cpu, rapl_msr_priv, true);
+ rp = rapl_find_package_domain_cpuslocked(cpu, rapl_msr_priv, true);
if (!rp)
return 0;
cpumask_clear_cpu(cpu, &rp->cpumask);
lead_cpu = cpumask_first(&rp->cpumask);
if (lead_cpu >= nr_cpu_ids)
- rapl_remove_package(rp);
+ rapl_remove_package_cpuslocked(rp);
else if (rp->lead_cpu == cpu)
rp->lead_cpu = lead_cpu;
return 0;
diff --git a/drivers/powercap/intel_rapl_tpmi.c b/drivers/powercap/intel_rapl_tpmi.c
index 891c90fefd8b..f6b7f085977c 100644
--- a/drivers/powercap/intel_rapl_tpmi.c
+++ b/drivers/powercap/intel_rapl_tpmi.c
@@ -40,6 +40,7 @@ enum tpmi_rapl_register {
TPMI_RAPL_REG_ENERGY_STATUS,
TPMI_RAPL_REG_PERF_STATUS,
TPMI_RAPL_REG_POWER_INFO,
+ TPMI_RAPL_REG_DOMAIN_INFO,
TPMI_RAPL_REG_INTERRUPT,
TPMI_RAPL_REG_MAX = 15,
};
@@ -130,6 +131,12 @@ static void trp_release(struct tpmi_rapl_package *trp)
mutex_unlock(&tpmi_rapl_lock);
}
+/*
+ * Bit 0 of TPMI_RAPL_REG_DOMAIN_INFO indicates if the current package is a domain
+ * root or not. Only domain root packages can enumerate System (Psys) Domain.
+ */
+#define TPMI_RAPL_DOMAIN_ROOT BIT(0)
+
static int parse_one_domain(struct tpmi_rapl_package *trp, u32 offset)
{
u8 tpmi_domain_version;
@@ -139,6 +146,7 @@ static int parse_one_domain(struct tpmi_rapl_package *trp, u32 offset)
enum rapl_domain_reg_id reg_id;
int tpmi_domain_size, tpmi_domain_flags;
u64 tpmi_domain_header = readq(trp->base + offset);
+ u64 tpmi_domain_info;
/* Domain Parent bits are ignored for now */
tpmi_domain_version = tpmi_domain_header & 0xff;
@@ -169,6 +177,13 @@ static int parse_one_domain(struct tpmi_rapl_package *trp, u32 offset)
domain_type = RAPL_DOMAIN_PACKAGE;
break;
case TPMI_RAPL_DOMAIN_SYSTEM:
+ if (!(tpmi_domain_flags & BIT(TPMI_RAPL_REG_DOMAIN_INFO))) {
+ pr_warn(FW_BUG "System domain must support Domain Info register\n");
+ return -ENODEV;
+ }
+ tpmi_domain_info = readq(trp->base + offset + TPMI_RAPL_REG_DOMAIN_INFO);
+ if (!(tpmi_domain_info & TPMI_RAPL_DOMAIN_ROOT))
+ return 0;
domain_type = RAPL_DOMAIN_PLATFORM;
break;
case TPMI_RAPL_DOMAIN_MEMORY: