summaryrefslogtreecommitdiff
path: root/arch/x86/kernel/smpboot.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/smpboot.c')
-rw-r--r--arch/x86/kernel/smpboot.c358
1 files changed, 1 insertions, 357 deletions
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 66199f9574ef..5e7f9532a10d 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -56,7 +56,6 @@
#include <linux/numa.h>
#include <linux/pgtable.h>
#include <linux/overflow.h>
-#include <linux/syscore_ops.h>
#include <asm/acpi.h>
#include <asm/desc.h>
@@ -188,7 +187,7 @@ static void smp_callin(void)
*/
set_cpu_sibling_map(raw_smp_processor_id());
- init_freq_invariance(true, false);
+ ap_init_aperfmperf();
/*
* Get our bogomips.
@@ -1406,7 +1405,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
{
smp_prepare_cpus_common();
- init_freq_invariance(false, false);
smp_sanity_check();
switch (apic_intr_mode) {
@@ -1858,357 +1856,3 @@ void native_play_dead(void)
}
#endif
-
-#ifdef CONFIG_X86_64
-/*
- * APERF/MPERF frequency ratio computation.
- *
- * The scheduler wants to do frequency invariant accounting and needs a <1
- * ratio to account for the 'current' frequency, corresponding to
- * freq_curr / freq_max.
- *
- * Since the frequency freq_curr on x86 is controlled by micro-controller and
- * our P-state setting is little more than a request/hint, we need to observe
- * the effective frequency 'BusyMHz', i.e. the average frequency over a time
- * interval after discarding idle time. This is given by:
- *
- * BusyMHz = delta_APERF / delta_MPERF * freq_base
- *
- * where freq_base is the max non-turbo P-state.
- *
- * The freq_max term has to be set to a somewhat arbitrary value, because we
- * can't know which turbo states will be available at a given point in time:
- * it all depends on the thermal headroom of the entire package. We set it to
- * the turbo level with 4 cores active.
- *
- * Benchmarks show that's a good compromise between the 1C turbo ratio
- * (freq_curr/freq_max would rarely reach 1) and something close to freq_base,
- * which would ignore the entire turbo range (a conspicuous part, making
- * freq_curr/freq_max always maxed out).
- *
- * An exception to the heuristic above is the Atom uarch, where we choose the
- * highest turbo level for freq_max since Atom's are generally oriented towards
- * power efficiency.
- *
- * Setting freq_max to anything less than the 1C turbo ratio makes the ratio
- * freq_curr / freq_max to eventually grow >1, in which case we clip it to 1.
- */
-
-DEFINE_STATIC_KEY_FALSE(arch_scale_freq_key);
-
-static DEFINE_PER_CPU(u64, arch_prev_aperf);
-static DEFINE_PER_CPU(u64, arch_prev_mperf);
-static u64 arch_turbo_freq_ratio = SCHED_CAPACITY_SCALE;
-static u64 arch_max_freq_ratio = SCHED_CAPACITY_SCALE;
-
-void arch_set_max_freq_ratio(bool turbo_disabled)
-{
- arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE :
- arch_turbo_freq_ratio;
-}
-EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio);
-
-static bool turbo_disabled(void)
-{
- u64 misc_en;
- int err;
-
- err = rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_en);
- if (err)
- return false;
-
- return (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
-}
-
-static bool slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
-{
- int err;
-
- err = rdmsrl_safe(MSR_ATOM_CORE_RATIOS, base_freq);
- if (err)
- return false;
-
- err = rdmsrl_safe(MSR_ATOM_CORE_TURBO_RATIOS, turbo_freq);
- if (err)
- return false;
-
- *base_freq = (*base_freq >> 16) & 0x3F; /* max P state */
- *turbo_freq = *turbo_freq & 0x3F; /* 1C turbo */
-
- return true;
-}
-
-#define X86_MATCH(model) \
- X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, \
- INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL)
-
-static const struct x86_cpu_id has_knl_turbo_ratio_limits[] = {
- X86_MATCH(XEON_PHI_KNL),
- X86_MATCH(XEON_PHI_KNM),
- {}
-};
-
-static const struct x86_cpu_id has_skx_turbo_ratio_limits[] = {
- X86_MATCH(SKYLAKE_X),
- {}
-};
-
-static const struct x86_cpu_id has_glm_turbo_ratio_limits[] = {
- X86_MATCH(ATOM_GOLDMONT),
- X86_MATCH(ATOM_GOLDMONT_D),
- X86_MATCH(ATOM_GOLDMONT_PLUS),
- {}
-};
-
-static bool knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq,
- int num_delta_fratio)
-{
- int fratio, delta_fratio, found;
- int err, i;
- u64 msr;
-
- err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
- if (err)
- return false;
-
- *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
-
- err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
- if (err)
- return false;
-
- fratio = (msr >> 8) & 0xFF;
- i = 16;
- found = 0;
- do {
- if (found >= num_delta_fratio) {
- *turbo_freq = fratio;
- return true;
- }
-
- delta_fratio = (msr >> (i + 5)) & 0x7;
-
- if (delta_fratio) {
- found += 1;
- fratio -= delta_fratio;
- }
-
- i += 8;
- } while (i < 64);
-
- return true;
-}
-
-static bool skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size)
-{
- u64 ratios, counts;
- u32 group_size;
- int err, i;
-
- err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
- if (err)
- return false;
-
- *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
-
- err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios);
- if (err)
- return false;
-
- err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts);
- if (err)
- return false;
-
- for (i = 0; i < 64; i += 8) {
- group_size = (counts >> i) & 0xFF;
- if (group_size >= size) {
- *turbo_freq = (ratios >> i) & 0xFF;
- return true;
- }
- }
-
- return false;
-}
-
-static bool core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
-{
- u64 msr;
- int err;
-
- err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
- if (err)
- return false;
-
- err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
- if (err)
- return false;
-
- *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
- *turbo_freq = (msr >> 24) & 0xFF; /* 4C turbo */
-
- /* The CPU may have less than 4 cores */
- if (!*turbo_freq)
- *turbo_freq = msr & 0xFF; /* 1C turbo */
-
- return true;
-}
-
-static bool intel_set_max_freq_ratio(void)
-{
- u64 base_freq, turbo_freq;
- u64 turbo_ratio;
-
- if (slv_set_max_freq_ratio(&base_freq, &turbo_freq))
- goto out;
-
- if (x86_match_cpu(has_glm_turbo_ratio_limits) &&
- skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
- goto out;
-
- if (x86_match_cpu(has_knl_turbo_ratio_limits) &&
- knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
- goto out;
-
- if (x86_match_cpu(has_skx_turbo_ratio_limits) &&
- skx_set_max_freq_ratio(&base_freq, &turbo_freq, 4))
- goto out;
-
- if (core_set_max_freq_ratio(&base_freq, &turbo_freq))
- goto out;
-
- return false;
-
-out:
- /*
- * Some hypervisors advertise X86_FEATURE_APERFMPERF
- * but then fill all MSR's with zeroes.
- * Some CPUs have turbo boost but don't declare any turbo ratio
- * in MSR_TURBO_RATIO_LIMIT.
- */
- if (!base_freq || !turbo_freq) {
- pr_debug("Couldn't determine cpu base or turbo frequency, necessary for scale-invariant accounting.\n");
- return false;
- }
-
- turbo_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE, base_freq);
- if (!turbo_ratio) {
- pr_debug("Non-zero turbo and base frequencies led to a 0 ratio.\n");
- return false;
- }
-
- arch_turbo_freq_ratio = turbo_ratio;
- arch_set_max_freq_ratio(turbo_disabled());
-
- return true;
-}
-
-static void init_counter_refs(void)
-{
- u64 aperf, mperf;
-
- rdmsrl(MSR_IA32_APERF, aperf);
- rdmsrl(MSR_IA32_MPERF, mperf);
-
- this_cpu_write(arch_prev_aperf, aperf);
- this_cpu_write(arch_prev_mperf, mperf);
-}
-
-#ifdef CONFIG_PM_SLEEP
-static struct syscore_ops freq_invariance_syscore_ops = {
- .resume = init_counter_refs,
-};
-
-static void register_freq_invariance_syscore_ops(void)
-{
- /* Bail out if registered already. */
- if (freq_invariance_syscore_ops.node.prev)
- return;
-
- register_syscore_ops(&freq_invariance_syscore_ops);
-}
-#else
-static inline void register_freq_invariance_syscore_ops(void) {}
-#endif
-
-void init_freq_invariance(bool secondary, bool cppc_ready)
-{
- bool ret = false;
-
- if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
- return;
-
- if (secondary) {
- if (static_branch_likely(&arch_scale_freq_key)) {
- init_counter_refs();
- }
- return;
- }
-
- if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
- ret = intel_set_max_freq_ratio();
- else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
- if (!cppc_ready) {
- return;
- }
- ret = amd_set_max_freq_ratio(&arch_turbo_freq_ratio);
- }
-
- if (ret) {
- init_counter_refs();
- static_branch_enable(&arch_scale_freq_key);
- register_freq_invariance_syscore_ops();
- pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio);
- } else {
- pr_debug("Couldn't determine max cpu frequency, necessary for scale-invariant accounting.\n");
- }
-}
-
-static void disable_freq_invariance_workfn(struct work_struct *work)
-{
- static_branch_disable(&arch_scale_freq_key);
-}
-
-static DECLARE_WORK(disable_freq_invariance_work,
- disable_freq_invariance_workfn);
-
-DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
-
-void arch_scale_freq_tick(void)
-{
- u64 freq_scale;
- u64 aperf, mperf;
- u64 acnt, mcnt;
-
- if (!arch_scale_freq_invariant())
- return;
-
- rdmsrl(MSR_IA32_APERF, aperf);
- rdmsrl(MSR_IA32_MPERF, mperf);
-
- acnt = aperf - this_cpu_read(arch_prev_aperf);
- mcnt = mperf - this_cpu_read(arch_prev_mperf);
-
- this_cpu_write(arch_prev_aperf, aperf);
- this_cpu_write(arch_prev_mperf, mperf);
-
- if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt))
- goto error;
-
- if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt)
- goto error;
-
- freq_scale = div64_u64(acnt, mcnt);
- if (!freq_scale)
- goto error;
-
- if (freq_scale > SCHED_CAPACITY_SCALE)
- freq_scale = SCHED_CAPACITY_SCALE;
-
- this_cpu_write(arch_freq_scale, freq_scale);
- return;
-
-error:
- pr_warn("Scheduler frequency invariance went wobbly, disabling!\n");
- schedule_work(&disable_freq_invariance_work);
-}
-#endif /* CONFIG_X86_64 */