From 97148d0ae5303bcc18fcd1c9b968a9485292f32a Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 13 Oct 2020 10:42:47 +0530 Subject: cpufreq: Improve code around unlisted freq check The cpufreq core checks if the frequency programmed by the bootloaders is not listed in the freq table and programs one from the table in such a case. This is done only if the driver has set the CPUFREQ_NEED_INITIAL_FREQ_CHECK flag. Currently we print two separate messages, with almost the same content, and do this with a pr_warn() which may be a bit too much as the driver only asked us to check this as it expected this to be the case. Lower down the severity of the print message by switching to pr_info() instead and print a single message only. Reported-by: Sumit Gupta Signed-off-by: Viresh Kumar Reviewed-by: Sumit Gupta Tested-by: Sumit Gupta Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 1877f5e2e5b0..f4b60663efe6 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1454,14 +1454,13 @@ static int cpufreq_online(unsigned int cpu) */ if ((cpufreq_driver->flags & CPUFREQ_NEED_INITIAL_FREQ_CHECK) && has_target()) { + unsigned int old_freq = policy->cur; + /* Are we running at unknown frequency ? */ - ret = cpufreq_frequency_table_get_index(policy, policy->cur); + ret = cpufreq_frequency_table_get_index(policy, old_freq); if (ret == -EINVAL) { - /* Warn user and fix it */ - pr_warn("%s: CPU%d: Running at unlisted freq: %u KHz\n", - __func__, policy->cpu, policy->cur); - ret = __cpufreq_driver_target(policy, policy->cur - 1, - CPUFREQ_RELATION_L); + ret = __cpufreq_driver_target(policy, old_freq - 1, + CPUFREQ_RELATION_L); /* * Reaching here after boot in a few seconds may not @@ -1469,8 +1468,8 @@ static int cpufreq_online(unsigned int cpu) * frequency for longer duration. Hence, a BUG_ON(). */ BUG_ON(ret); - pr_warn("%s: CPU%d: Unlisted initial frequency changed to: %u KHz\n", - __func__, policy->cpu, policy->cur); + pr_info("%s: CPU%d: Running at unlisted initial frequency: %u KHz, changing to: %u KHz\n", + __func__, policy->cpu, old_freq, policy->cur); } } -- cgit v1.2.3 From cdc1719cd885ef490e30c14c01a6e7fee42bf2e2 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Fri, 9 Oct 2020 11:30:38 +0800 Subject: cpufreq: intel_pstate: Delete intel_pstate sysfs if failed to register the driver There is a corner case that if the intel_pstate driver fails to be registered (might be due to invalid MSR access) and acpi_cpufreq takse over, the intel_pstate sysfs interface is still populated which may confuse user space (turbostat for example): grep . /sys/devices/system/cpu/cpu0/cpufreq/scaling_driver acpi-cpufreq grep . /sys/devices/system/cpu/intel_pstate/* /sys/devices/system/cpu/intel_pstate/max_perf_pct:0 /sys/devices/system/cpu/intel_pstate/min_perf_pct:0 grep: /sys/devices/system/cpu/intel_pstate/no_turbo: Resource temporarily unavailable grep: /sys/devices/system/cpu/intel_pstate/num_pstates: Resource temporarily unavailable /sys/devices/system/cpu/intel_pstate/status:off grep: /sys/devices/system/cpu/intel_pstate/turbo_pct: Resource temporarily unavailable The mere presence of the intel_pstate sysfs interface does not mean that intel_pstate is in use (for example, echo "off" to "status"), but it should not be created in the failing case. Fix this issue by deleting the intel_pstate sysfs if the driver registration fails. Reported-by: Wendy Wang Suggested-by: Zhang Rui Signed-off-by: Chen Yu Acked-by: Srinivas Pandruvada --- drivers/cpufreq/intel_pstate.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 9a515c460a00..3c1455518738 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1420,6 +1420,24 @@ static void __init intel_pstate_sysfs_expose_params(void) } } +static void __init intel_pstate_sysfs_remove(void) +{ + if (!intel_pstate_kobject) + return; + + sysfs_remove_group(intel_pstate_kobject, &intel_pstate_attr_group); + + if (!per_cpu_limits) { + sysfs_remove_file(intel_pstate_kobject, &max_perf_pct.attr); + sysfs_remove_file(intel_pstate_kobject, &min_perf_pct.attr); + + if (x86_match_cpu(intel_pstate_cpu_ee_disable_ids)) + sysfs_remove_file(intel_pstate_kobject, &energy_efficiency.attr); + } + + kobject_put(intel_pstate_kobject); +} + static void intel_pstate_sysfs_expose_hwp_dynamic_boost(void) { int rc; @@ -3063,8 +3081,10 @@ hwp_cpu_matched: mutex_lock(&intel_pstate_driver_lock); rc = intel_pstate_register_driver(default_driver); mutex_unlock(&intel_pstate_driver_lock); - if (rc) + if (rc) { + intel_pstate_sysfs_remove(); return rc; + } if (hwp_active) { const struct x86_cpu_id *id; -- cgit v1.2.3 From 8bb2e2a887afdf8a39e68fa0dccf82a168aae655 Mon Sep 17 00:00:00 2001 From: Alexander Monakov Date: Mon, 12 Oct 2020 15:50:33 +0300 Subject: intel_idle: mention assumption that WBINVD is not needed Intel SDM does not explicitly say that entering a C-state via MWAIT will implicitly flush CPU caches as appropriate for that C-state. However, documentation for individual Intel CPU generations does mention this behavior. Since intel_idle binds to any Intel CPU with MWAIT, list this assumption of MWAIT behavior. In passing, reword opening comment to make it clear that the driver can load on any old and future Intel CPU with MWAIT. Signed-off-by: Alexander Monakov Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 9a810e4a7946..bf3dd4a19fef 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -8,7 +8,7 @@ */ /* - * intel_idle is a cpuidle driver that loads on specific Intel processors + * intel_idle is a cpuidle driver that loads on all Intel CPUs with MWAIT * in lieu of the legacy ACPI processor_idle driver. The intent is to * make Linux more efficient on these processors, as intel_idle knows * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs. @@ -20,7 +20,11 @@ * All CPUs have same idle states as boot CPU * * Chipset BM_STS (bus master status) bit is a NOP - * for preventing entry into deep C-stats + * for preventing entry into deep C-states + * + * CPU will flush caches as needed when entering a C-state via MWAIT + * (in contrast to entering ACPI C3, in which case the WBINVD + * instruction needs to be executed to flush the caches) */ /* -- cgit v1.2.3 From 75af76d0a34e048651a6af311781d7206b6964c7 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 16 Oct 2020 17:28:32 +0200 Subject: intel_idle: Ignore _CST if control cannot be taken from the platform e6d4f08a6776 ("intel_idle: Use ACPI _CST on server systems") avoids enabling c-states that have been disabled by the platform with the exception of C1E. Unfortunately, BIOS implementations are not always consistent in terms of how capabilities are advertised and control cannot always be handed over. If control cannot be handed over then intel_idle reports that "ACPI _CST not found or not usable" but does not clear acpi_state_table.count meaning the information is still partially used. This patch ignores ACPI information if CST control cannot be requested from the platform. This was only observed on a number of Haswell platforms that had identical CPUs but not identical BIOS versions. While this problem may be rare overall, 24 separate test cases bisected to this specific commit across 4 separate test machines and is worth addressing. If the situation occurs, the kernel behaves as it did before commit e6d4f08a6776 and uses any c-states that are discovered. The affected test cases were all ones that involved a small number of processes -- exec microbenchmark, pipe microbenchmark, git test suite, netperf, tbench with one client and system call microbenchmark. Each case benefits from being able to use turboboost which is prevented if the lower c-states are unavailable. This may mask real regressions specific to older hardware so it is worth addressing. C-state status before and after the patch 5.9.0-vanilla POLL latency:0 disabled:0 default:enabled 5.9.0-vanilla C1 latency:2 disabled:0 default:enabled 5.9.0-vanilla C1E latency:10 disabled:0 default:enabled 5.9.0-vanilla C3 latency:33 disabled:1 default:disabled 5.9.0-vanilla C6 latency:133 disabled:1 default:disabled 5.9.0-ignore-cst-v1r1 POLL latency:0 disabled:0 default:enabled 5.9.0-ignore-cst-v1r1 C1 latency:2 disabled:0 default:enabled 5.9.0-ignore-cst-v1r1 C1E latency:10 disabled:0 default:enabled 5.9.0-ignore-cst-v1r1 C3 latency:33 disabled:0 default:enabled 5.9.0-ignore-cst-v1r1 C6 latency:133 disabled:0 default:enabled Patch enables C3/C6. Netperf UDP_STREAM netperf-udp 5.5.0 5.9.0 vanilla ignore-cst-v1r1 Hmean send-64 193.41 ( 0.00%) 226.54 * 17.13%* Hmean send-128 392.16 ( 0.00%) 450.54 * 14.89%* Hmean send-256 769.94 ( 0.00%) 881.85 * 14.53%* Hmean send-1024 2994.21 ( 0.00%) 3468.95 * 15.85%* Hmean send-2048 5725.60 ( 0.00%) 6628.99 * 15.78%* Hmean send-3312 8468.36 ( 0.00%) 10288.02 * 21.49%* Hmean send-4096 10135.46 ( 0.00%) 12387.57 * 22.22%* Hmean send-8192 17142.07 ( 0.00%) 19748.11 * 15.20%* Hmean send-16384 28539.71 ( 0.00%) 30084.45 * 5.41%* Fixes: e6d4f08a6776 ("intel_idle: Use ACPI _CST on server systems") Signed-off-by: Mel Gorman Cc: 5.6+ # 5.6+ Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index bf3dd4a19fef..56f5b8077cba 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1216,14 +1216,13 @@ static bool __init intel_idle_acpi_cst_extract(void) if (!intel_idle_cst_usable()) continue; - if (!acpi_processor_claim_cst_control()) { - acpi_state_table.count = 0; - return false; - } + if (!acpi_processor_claim_cst_control()) + break; return true; } + acpi_state_table.count = 0; pr_debug("ACPI _CST not found or not usable\n"); return false; } -- cgit v1.2.3 From 5368512abe08a28525d9b24abbfc2a72493e8dba Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Sun, 18 Oct 2020 22:57:41 -0500 Subject: acpi-cpufreq: Honor _PSD table setting on new AMD CPUs acpi-cpufreq has a old quirk that overrides the _PSD table supplied by BIOS on AMD CPUs. However the _PSD table of new AMD CPUs (Family 19h+) now accurately reports the P-state dependency of CPU cores. Hence this quirk needs to be fixed in order to support new CPUs' frequency control. Fixes: acd316248205 ("acpi-cpufreq: Add quirk to disable _PSD usage on all AMD CPUs") Signed-off-by: Wei Huang [ rjw: Subject edit ] Cc: 3.10+ # 3.10+ Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/acpi-cpufreq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index e4ff681faaaa..1e4fbb002a31 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -691,7 +691,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) cpumask_copy(policy->cpus, topology_core_cpumask(cpu)); } - if (check_amd_hwpstate_cpu(cpu) && !acpi_pstate_strict) { + if (check_amd_hwpstate_cpu(cpu) && boot_cpu_data.x86 < 0x19 && + !acpi_pstate_strict) { cpumask_clear(policy->cpus); cpumask_set_cpu(cpu, policy->cpus); cpumask_copy(data->freqdomain_cpus, -- cgit v1.2.3