summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-05-14 03:18:51 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2024-05-14 03:18:51 +0300
commit6e5a0c30b616bfff6926ecca5d88e3d06e6bf79a (patch)
treec8b459ab41f9265828116d04faa23e5224be6e5e /drivers
parent17ca7fc22f4bbc795e4d136449521b2fecb88e06 (diff)
parent97450eb909658573dcacc1063b06d3d08642c0c1 (diff)
downloadlinux-6e5a0c30b616bfff6926ecca5d88e3d06e6bf79a.tar.xz
Merge tag 'sched-core-2024-05-13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar: - Add cpufreq pressure feedback for the scheduler - Rework misfit load-balancing wrt affinity restrictions - Clean up and simplify the code around ::overutilized and ::overload access. - Simplify sched_balance_newidle() - Bump SCHEDSTAT_VERSION to 16 due to a cleanup of CPU_MAX_IDLE_TYPES handling that changed the output. - Rework & clean up <asm/vtime.h> interactions wrt arch_vtime_task_switch() - Reorganize, clean up and unify most of the higher level scheduler balancing function names around the sched_balance_*() prefix - Simplify the balancing flag code (sched_balance_running) - Miscellaneous cleanups & fixes * tag 'sched-core-2024-05-13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (50 commits) sched/pelt: Remove shift of thermal clock sched/cpufreq: Rename arch_update_thermal_pressure() => arch_update_hw_pressure() thermal/cpufreq: Remove arch_update_thermal_pressure() sched/cpufreq: Take cpufreq feedback into account cpufreq: Add a cpufreq pressure feedback for the scheduler sched/fair: Fix update of rd->sg_overutilized sched/vtime: Do not include <asm/vtime.h> header s390/irq,nmi: Include <asm/vtime.h> header directly s390/vtime: Remove unused __ARCH_HAS_VTIME_TASK_SWITCH leftover sched/vtime: Get rid of generic vtime_task_switch() implementation sched/vtime: Remove confusing arch_vtime_task_switch() declaration sched/balancing: Simplify the sg_status bitmask and use separate ->overloaded and ->overutilized flags sched/fair: Rename set_rd_overutilized_status() to set_rd_overutilized() sched/fair: Rename SG_OVERLOAD to SG_OVERLOADED sched/fair: Rename {set|get}_rd_overload() to {set|get}_rd_overloaded() sched/fair: Rename root_domain::overload to ::overloaded sched/fair: Use helper functions to access root_domain::overload sched/fair: Check root_domain::overload value before update sched/fair: Combine EAS check with root_domain::overutilized access sched/fair: Simplify the continue_balancing logic in sched_balance_newidle() ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/base/arch_topology.c26
-rw-r--r--drivers/cpufreq/cpufreq.c36
-rw-r--r--drivers/cpufreq/qcom-cpufreq-hw.c4
-rw-r--r--drivers/thermal/cpufreq_cooling.c3
4 files changed, 51 insertions, 18 deletions
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 024b78a0cfc1..0248912ff687 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -22,7 +22,7 @@
#include <linux/units.h>
#define CREATE_TRACE_POINTS
-#include <trace/events/thermal_pressure.h>
+#include <trace/events/hw_pressure.h>
static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data);
static struct cpumask scale_freq_counters_mask;
@@ -160,26 +160,26 @@ void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
per_cpu(cpu_scale, cpu) = capacity;
}
-DEFINE_PER_CPU(unsigned long, thermal_pressure);
+DEFINE_PER_CPU(unsigned long, hw_pressure);
/**
- * topology_update_thermal_pressure() - Update thermal pressure for CPUs
+ * topology_update_hw_pressure() - Update HW pressure for CPUs
* @cpus : The related CPUs for which capacity has been reduced
* @capped_freq : The maximum allowed frequency that CPUs can run at
*
- * Update the value of thermal pressure for all @cpus in the mask. The
+ * Update the value of HW pressure for all @cpus in the mask. The
* cpumask should include all (online+offline) affected CPUs, to avoid
* operating on stale data when hot-plug is used for some CPUs. The
* @capped_freq reflects the currently allowed max CPUs frequency due to
- * thermal capping. It might be also a boost frequency value, which is bigger
+ * HW capping. It might be also a boost frequency value, which is bigger
* than the internal 'capacity_freq_ref' max frequency. In such case the
* pressure value should simply be removed, since this is an indication that
- * there is no thermal throttling. The @capped_freq must be provided in kHz.
+ * there is no HW throttling. The @capped_freq must be provided in kHz.
*/
-void topology_update_thermal_pressure(const struct cpumask *cpus,
+void topology_update_hw_pressure(const struct cpumask *cpus,
unsigned long capped_freq)
{
- unsigned long max_capacity, capacity, th_pressure;
+ unsigned long max_capacity, capacity, hw_pressure;
u32 max_freq;
int cpu;
@@ -189,21 +189,21 @@ void topology_update_thermal_pressure(const struct cpumask *cpus,
/*
* Handle properly the boost frequencies, which should simply clean
- * the thermal pressure value.
+ * the HW pressure value.
*/
if (max_freq <= capped_freq)
capacity = max_capacity;
else
capacity = mult_frac(max_capacity, capped_freq, max_freq);
- th_pressure = max_capacity - capacity;
+ hw_pressure = max_capacity - capacity;
- trace_thermal_pressure_update(cpu, th_pressure);
+ trace_hw_pressure_update(cpu, hw_pressure);
for_each_cpu(cpu, cpus)
- WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure);
+ WRITE_ONCE(per_cpu(hw_pressure, cpu), hw_pressure);
}
-EXPORT_SYMBOL_GPL(topology_update_thermal_pressure);
+EXPORT_SYMBOL_GPL(topology_update_hw_pressure);
static ssize_t cpu_capacity_show(struct device *dev,
struct device_attribute *attr,
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 66e10a19d76a..1de8bd105934 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2582,6 +2582,40 @@ int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
}
EXPORT_SYMBOL(cpufreq_get_policy);
+DEFINE_PER_CPU(unsigned long, cpufreq_pressure);
+
+/**
+ * cpufreq_update_pressure() - Update cpufreq pressure for CPUs
+ * @policy: cpufreq policy of the CPUs.
+ *
+ * Update the value of cpufreq pressure for all @cpus in the policy.
+ */
+static void cpufreq_update_pressure(struct cpufreq_policy *policy)
+{
+ unsigned long max_capacity, capped_freq, pressure;
+ u32 max_freq;
+ int cpu;
+
+ cpu = cpumask_first(policy->related_cpus);
+ max_freq = arch_scale_freq_ref(cpu);
+ capped_freq = policy->max;
+
+ /*
+ * Handle properly the boost frequencies, which should simply clean
+ * the cpufreq pressure value.
+ */
+ if (max_freq <= capped_freq) {
+ pressure = 0;
+ } else {
+ max_capacity = arch_scale_cpu_capacity(cpu);
+ pressure = max_capacity -
+ mult_frac(max_capacity, capped_freq, max_freq);
+ }
+
+ for_each_cpu(cpu, policy->related_cpus)
+ WRITE_ONCE(per_cpu(cpufreq_pressure, cpu), pressure);
+}
+
/**
* cpufreq_set_policy - Modify cpufreq policy parameters.
* @policy: Policy object to modify.
@@ -2637,6 +2671,8 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
policy->max = __resolve_freq(policy, policy->max, CPUFREQ_RELATION_H);
trace_cpu_frequency_limits(policy);
+ cpufreq_update_pressure(policy);
+
policy->cached_target_freq = UINT_MAX;
pr_debug("new min and max freqs are %u - %u kHz\n",
diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c
index 70b0f21968a0..ec8df5496a0c 100644
--- a/drivers/cpufreq/qcom-cpufreq-hw.c
+++ b/drivers/cpufreq/qcom-cpufreq-hw.c
@@ -347,8 +347,8 @@ static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data)
throttled_freq = freq_hz / HZ_PER_KHZ;
- /* Update thermal pressure (the boost frequencies are accepted) */
- arch_update_thermal_pressure(policy->related_cpus, throttled_freq);
+ /* Update HW pressure (the boost frequencies are accepted) */
+ arch_update_hw_pressure(policy->related_cpus, throttled_freq);
/*
* In the unlikely case policy is unregistered do not enable
diff --git a/drivers/thermal/cpufreq_cooling.c b/drivers/thermal/cpufreq_cooling.c
index 9d1b1459700d..280071be30b1 100644
--- a/drivers/thermal/cpufreq_cooling.c
+++ b/drivers/thermal/cpufreq_cooling.c
@@ -477,7 +477,6 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
unsigned long state)
{
struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
- struct cpumask *cpus;
unsigned int frequency;
int ret;
@@ -494,8 +493,6 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
ret = freq_qos_update_request(&cpufreq_cdev->qos_req, frequency);
if (ret >= 0) {
cpufreq_cdev->cpufreq_state = state;
- cpus = cpufreq_cdev->policy->related_cpus;
- arch_update_thermal_pressure(cpus, frequency);
ret = 0;
}