summaryrefslogtreecommitdiff
path: root/drivers/idle/intel_idle.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/idle/intel_idle.c')
-rw-r--r--drivers/idle/intel_idle.c231
1 files changed, 208 insertions, 23 deletions
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index aa2d19db2b1d..34201d7ef33e 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -199,6 +199,43 @@ static __cpuidle int intel_idle_xstate(struct cpuidle_device *dev,
return __intel_idle(dev, drv, index);
}
+static __always_inline int __intel_idle_hlt(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ raw_safe_halt();
+ raw_local_irq_disable();
+ return index;
+}
+
+/**
+ * intel_idle_hlt - Ask the processor to enter the given idle state using hlt.
+ * @dev: cpuidle device of the target CPU.
+ * @drv: cpuidle driver (assumed to point to intel_idle_driver).
+ * @index: Target idle state index.
+ *
+ * Use the HLT instruction to notify the processor that the CPU represented by
+ * @dev is idle and it can try to enter the idle state corresponding to @index.
+ *
+ * Must be called under local_irq_disable().
+ */
+static __cpuidle int intel_idle_hlt(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ return __intel_idle_hlt(dev, drv, index);
+}
+
+static __cpuidle int intel_idle_hlt_irq_on(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ int ret;
+
+ raw_local_irq_enable();
+ ret = __intel_idle_hlt(dev, drv, index);
+ raw_local_irq_disable();
+
+ return ret;
+}
+
/**
* intel_idle_s2idle - Ask the processor to enter the given idle state.
* @dev: cpuidle device of the target CPU.
@@ -1242,6 +1279,25 @@ static struct cpuidle_state snr_cstates[] __initdata = {
.enter = NULL }
};
+static struct cpuidle_state vmguest_cstates[] __initdata = {
+ {
+ .name = "C1",
+ .desc = "HLT",
+ .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
+ .exit_latency = 5,
+ .target_residency = 10,
+ .enter = &intel_idle_hlt, },
+ {
+ .name = "C1L",
+ .desc = "Long HLT",
+ .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 5,
+ .target_residency = 200,
+ .enter = &intel_idle_hlt, },
+ {
+ .enter = NULL }
+};
+
static const struct idle_cpu idle_cpu_nehalem __initconst = {
.state_table = nehalem_cstates,
.auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
@@ -1839,6 +1895,66 @@ static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
return true;
}
+static void state_update_enter_method(struct cpuidle_state *state, int cstate)
+{
+ if (state->enter == intel_idle_hlt) {
+ if (force_irq_on) {
+ pr_info("forced intel_idle_irq for state %d\n", cstate);
+ state->enter = intel_idle_hlt_irq_on;
+ }
+ return;
+ }
+ if (state->enter == intel_idle_hlt_irq_on)
+ return; /* no update scenarios */
+
+ if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) {
+ /*
+ * Combining with XSTATE with IBRS or IRQ_ENABLE flags
+ * is not currently supported but this driver.
+ */
+ WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IBRS);
+ WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE);
+ state->enter = intel_idle_xstate;
+ return;
+ }
+
+ if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
+ state->flags & CPUIDLE_FLAG_IBRS) {
+ /*
+ * IBRS mitigation requires that C-states are entered
+ * with interrupts disabled.
+ */
+ WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE);
+ state->enter = intel_idle_ibrs;
+ return;
+ }
+
+ if (state->flags & CPUIDLE_FLAG_IRQ_ENABLE) {
+ state->enter = intel_idle_irq;
+ return;
+ }
+
+ if (force_irq_on) {
+ pr_info("forced intel_idle_irq for state %d\n", cstate);
+ state->enter = intel_idle_irq;
+ }
+}
+
+/*
+ * For mwait based states, we want to verify the cpuid data to see if the state
+ * is actually supported by this specific CPU.
+ * For non-mwait based states, this check should be skipped.
+ */
+static bool should_verify_mwait(struct cpuidle_state *state)
+{
+ if (state->enter == intel_idle_hlt)
+ return false;
+ if (state->enter == intel_idle_hlt_irq_on)
+ return false;
+
+ return true;
+}
+
static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
{
int cstate;
@@ -1887,35 +2003,15 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
}
mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
- if (!intel_idle_verify_cstate(mwait_hint))
+ if (should_verify_mwait(&cpuidle_state_table[cstate]) && !intel_idle_verify_cstate(mwait_hint))
continue;
/* Structure copy. */
drv->states[drv->state_count] = cpuidle_state_table[cstate];
state = &drv->states[drv->state_count];
- if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) {
- /*
- * Combining with XSTATE with IBRS or IRQ_ENABLE flags
- * is not currently supported but this driver.
- */
- WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IBRS);
- WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE);
- state->enter = intel_idle_xstate;
- } else if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
- state->flags & CPUIDLE_FLAG_IBRS) {
- /*
- * IBRS mitigation requires that C-states are entered
- * with interrupts disabled.
- */
- WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE);
- state->enter = intel_idle_ibrs;
- } else if (state->flags & CPUIDLE_FLAG_IRQ_ENABLE) {
- state->enter = intel_idle_irq;
- } else if (force_irq_on) {
- pr_info("forced intel_idle_irq for state %d\n", cstate);
- state->enter = intel_idle_irq;
- }
+ state_update_enter_method(state, cstate);
+
if ((disabled_states_mask & BIT(drv->state_count)) ||
((icpu->use_acpi || force_use_acpi) &&
@@ -2041,6 +2137,93 @@ static void __init intel_idle_cpuidle_devices_uninit(void)
cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i));
}
+/*
+ * Match up the latency and break even point of the bare metal (cpu based)
+ * states with the deepest VM available state.
+ *
+ * We only want to do this for the deepest state, the ones that has
+ * the TLB_FLUSHED flag set on the .
+ *
+ * All our short idle states are dominated by vmexit/vmenter latencies,
+ * not the underlying hardware latencies so we keep our values for these.
+ */
+static void matchup_vm_state_with_baremetal(void)
+{
+ int cstate;
+
+ for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
+ int matching_cstate;
+
+ if (intel_idle_max_cstate_reached(cstate))
+ break;
+
+ if (!cpuidle_state_table[cstate].enter)
+ break;
+
+ if (!(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_TLB_FLUSHED))
+ continue;
+
+ for (matching_cstate = 0; matching_cstate < CPUIDLE_STATE_MAX; ++matching_cstate) {
+ if (!icpu->state_table[matching_cstate].enter)
+ break;
+ if (icpu->state_table[matching_cstate].exit_latency > cpuidle_state_table[cstate].exit_latency) {
+ cpuidle_state_table[cstate].exit_latency = icpu->state_table[matching_cstate].exit_latency;
+ cpuidle_state_table[cstate].target_residency = icpu->state_table[matching_cstate].target_residency;
+ }
+ }
+
+ }
+}
+
+
+static int __init intel_idle_vminit(const struct x86_cpu_id *id)
+{
+ int retval;
+
+ cpuidle_state_table = vmguest_cstates;
+
+ icpu = (const struct idle_cpu *)id->driver_data;
+
+ pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
+ boot_cpu_data.x86_model);
+
+ intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
+ if (!intel_idle_cpuidle_devices)
+ return -ENOMEM;
+
+ /*
+ * We don't know exactly what the host will do when we go idle, but as a worst estimate
+ * we can assume that the exit latency of the deepest host state will be hit for our
+ * deep (long duration) guest idle state.
+ * The same logic applies to the break even point for the long duration guest idle state.
+ * So lets copy these two properties from the table we found for the host CPU type.
+ */
+ matchup_vm_state_with_baremetal();
+
+ intel_idle_cpuidle_driver_init(&intel_idle_driver);
+
+ retval = cpuidle_register_driver(&intel_idle_driver);
+ if (retval) {
+ struct cpuidle_driver *drv = cpuidle_get_driver();
+ printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
+ drv ? drv->name : "none");
+ goto init_driver_fail;
+ }
+
+ retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
+ intel_idle_cpu_online, NULL);
+ if (retval < 0)
+ goto hp_setup_fail;
+
+ return 0;
+hp_setup_fail:
+ intel_idle_cpuidle_devices_uninit();
+ cpuidle_unregister_driver(&intel_idle_driver);
+init_driver_fail:
+ free_percpu(intel_idle_cpuidle_devices);
+ return retval;
+}
+
static int __init intel_idle_init(void)
{
const struct x86_cpu_id *id;
@@ -2059,6 +2242,8 @@ static int __init intel_idle_init(void)
id = x86_match_cpu(intel_idle_ids);
if (id) {
if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+ return intel_idle_vminit(id);
pr_debug("Please enable MWAIT in BIOS SETUP\n");
return -ENODEV;
}