summaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/apic/apic.c68
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c24
-rw-r--r--arch/x86/kernel/apic/io_apic.c8
-rw-r--r--arch/x86/kernel/apic/probe_32.c3
-rw-r--r--arch/x86/kernel/cpu/amd.c66
-rw-r--r--arch/x86/kernel/cpu/bugs.c107
-rw-r--r--arch/x86/kernel/cpu/common.c44
-rw-r--r--arch/x86/kernel/cpu/mtrr/cyrix.c1
-rw-r--r--arch/x86/kernel/cpu/umwait.c39
-rw-r--r--arch/x86/kernel/head_64.S8
-rw-r--r--arch/x86/kernel/hpet.c12
-rw-r--r--arch/x86/kernel/kvm.c8
-rw-r--r--arch/x86/kernel/ptrace.c1
-rw-r--r--arch/x86/kernel/stacktrace.c2
-rw-r--r--arch/x86/kernel/sysfb_efi.c46
-rw-r--r--arch/x86/kernel/uprobes.c17
16 files changed, 364 insertions, 90 deletions
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index f5291362da1a..aa5495d0f478 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -722,7 +722,7 @@ static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;
/*
- * Temporary interrupt handler.
+ * Temporary interrupt handler and polled calibration function.
*/
static void __init lapic_cal_handler(struct clock_event_device *dev)
{
@@ -851,7 +851,8 @@ bool __init apic_needs_pit(void)
static int __init calibrate_APIC_clock(void)
{
struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
- void (*real_handler)(struct clock_event_device *dev);
+ u64 tsc_perj = 0, tsc_start = 0;
+ unsigned long jif_start;
unsigned long deltaj;
long delta, deltatsc;
int pm_referenced = 0;
@@ -878,28 +879,64 @@ static int __init calibrate_APIC_clock(void)
apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
"calibrating APIC timer ...\n");
+ /*
+ * There are platforms w/o global clockevent devices. Instead of
+ * making the calibration conditional on that, use a polling based
+ * approach everywhere.
+ */
local_irq_disable();
- /* Replace the global interrupt handler */
- real_handler = global_clock_event->event_handler;
- global_clock_event->event_handler = lapic_cal_handler;
-
/*
* Setup the APIC counter to maximum. There is no way the lapic
* can underflow in the 100ms detection time frame
*/
__setup_APIC_LVTT(0xffffffff, 0, 0);
- /* Let the interrupts run */
+ /*
+ * Methods to terminate the calibration loop:
+ * 1) Global clockevent if available (jiffies)
+ * 2) TSC if available and frequency is known
+ */
+ jif_start = READ_ONCE(jiffies);
+
+ if (tsc_khz) {
+ tsc_start = rdtsc();
+ tsc_perj = div_u64((u64)tsc_khz * 1000, HZ);
+ }
+
+ /*
+ * Enable interrupts so the tick can fire, if a global
+ * clockevent device is available
+ */
local_irq_enable();
- while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
- cpu_relax();
+ while (lapic_cal_loops <= LAPIC_CAL_LOOPS) {
+ /* Wait for a tick to elapse */
+ while (1) {
+ if (tsc_khz) {
+ u64 tsc_now = rdtsc();
+ if ((tsc_now - tsc_start) >= tsc_perj) {
+ tsc_start += tsc_perj;
+ break;
+ }
+ } else {
+ unsigned long jif_now = READ_ONCE(jiffies);
- local_irq_disable();
+ if (time_after(jif_now, jif_start)) {
+ jif_start = jif_now;
+ break;
+ }
+ }
+ cpu_relax();
+ }
- /* Restore the real event handler */
- global_clock_event->event_handler = real_handler;
+ /* Invoke the calibration routine */
+ local_irq_disable();
+ lapic_cal_handler(NULL);
+ local_irq_enable();
+ }
+
+ local_irq_disable();
/* Build delta t1-t2 as apic timer counts down */
delta = lapic_cal_t1 - lapic_cal_t2;
@@ -943,10 +980,11 @@ static int __init calibrate_APIC_clock(void)
levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
/*
- * PM timer calibration failed or not turned on
- * so lets try APIC timer based calibration
+ * PM timer calibration failed or not turned on so lets try APIC
+ * timer based calibration, if a global clockevent device is
+ * available.
*/
- if (!pm_referenced) {
+ if (!pm_referenced && global_clock_event) {
apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
/*
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index afee386ff711..caedd8d60d36 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -38,32 +38,12 @@ static int bigsmp_early_logical_apicid(int cpu)
return early_per_cpu(x86_cpu_to_apicid, cpu);
}
-static inline unsigned long calculate_ldr(int cpu)
-{
- unsigned long val, id;
-
- val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
- id = per_cpu(x86_bios_cpu_apicid, cpu);
- val |= SET_APIC_LOGICAL_ID(id);
-
- return val;
-}
-
/*
- * Set up the logical destination ID.
- *
- * Intel recommends to set DFR, LDR and TPR before enabling
- * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
- * document number 292116). So here it goes...
+ * bigsmp enables physical destination mode
+ * and doesn't use LDR and DFR
*/
static void bigsmp_init_apic_ldr(void)
{
- unsigned long val;
- int cpu = smp_processor_id();
-
- apic_write(APIC_DFR, APIC_DFR_FLAT);
- val = calculate_ldr(cpu);
- apic_write(APIC_LDR, val);
}
static void bigsmp_setup_apic_routing(void)
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index c7bb6c69f21c..d6af97fd170a 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2438,7 +2438,13 @@ unsigned int arch_dynirq_lower_bound(unsigned int from)
* dmar_alloc_hwirq() may be called before setup_IO_APIC(), so use
* gsi_top if ioapic_dynirq_base hasn't been initialized yet.
*/
- return ioapic_initialized ? ioapic_dynirq_base : gsi_top;
+ if (!ioapic_initialized)
+ return gsi_top;
+ /*
+ * For DT enabled machines ioapic_dynirq_base is irrelevant and not
+ * updated. So simply return @from if ioapic_dynirq_base == 0.
+ */
+ return ioapic_dynirq_base ? : from;
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 1492799b8f43..ee2d91e382f1 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -184,7 +184,8 @@ void __init default_setup_apic_routing(void)
def_to_bigsmp = 0;
break;
}
- /* If P4 and above fall through */
+ /* P4 and above */
+ /* fall through */
case X86_VENDOR_HYGON:
case X86_VENDOR_AMD:
def_to_bigsmp = 1;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 8d4e50428b68..68c363c341bf 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -804,6 +804,64 @@ static void init_amd_ln(struct cpuinfo_x86 *c)
msr_set_bit(MSR_AMD64_DE_CFG, 31);
}
+static bool rdrand_force;
+
+static int __init rdrand_cmdline(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ if (!strcmp(str, "force"))
+ rdrand_force = true;
+ else
+ return -EINVAL;
+
+ return 0;
+}
+early_param("rdrand", rdrand_cmdline);
+
+static void clear_rdrand_cpuid_bit(struct cpuinfo_x86 *c)
+{
+ /*
+ * Saving of the MSR used to hide the RDRAND support during
+ * suspend/resume is done by arch/x86/power/cpu.c, which is
+ * dependent on CONFIG_PM_SLEEP.
+ */
+ if (!IS_ENABLED(CONFIG_PM_SLEEP))
+ return;
+
+ /*
+ * The nordrand option can clear X86_FEATURE_RDRAND, so check for
+ * RDRAND support using the CPUID function directly.
+ */
+ if (!(cpuid_ecx(1) & BIT(30)) || rdrand_force)
+ return;
+
+ msr_clear_bit(MSR_AMD64_CPUID_FN_1, 62);
+
+ /*
+ * Verify that the CPUID change has occurred in case the kernel is
+ * running virtualized and the hypervisor doesn't support the MSR.
+ */
+ if (cpuid_ecx(1) & BIT(30)) {
+ pr_info_once("BIOS may not properly restore RDRAND after suspend, but hypervisor does not support hiding RDRAND via CPUID.\n");
+ return;
+ }
+
+ clear_cpu_cap(c, X86_FEATURE_RDRAND);
+ pr_info_once("BIOS may not properly restore RDRAND after suspend, hiding RDRAND via CPUID. Use rdrand=force to reenable.\n");
+}
+
+static void init_amd_jg(struct cpuinfo_x86 *c)
+{
+ /*
+ * Some BIOS implementations do not restore proper RDRAND support
+ * across suspend and resume. Check on whether to hide the RDRAND
+ * instruction support via CPUID.
+ */
+ clear_rdrand_cpuid_bit(c);
+}
+
static void init_amd_bd(struct cpuinfo_x86 *c)
{
u64 value;
@@ -818,6 +876,13 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
wrmsrl_safe(MSR_F15H_IC_CFG, value);
}
}
+
+ /*
+ * Some BIOS implementations do not restore proper RDRAND support
+ * across suspend and resume. Check on whether to hide the RDRAND
+ * instruction support via CPUID.
+ */
+ clear_rdrand_cpuid_bit(c);
}
static void init_amd_zn(struct cpuinfo_x86 *c)
@@ -860,6 +925,7 @@ static void init_amd(struct cpuinfo_x86 *c)
case 0x10: init_amd_gh(c); break;
case 0x12: init_amd_ln(c); break;
case 0x15: init_amd_bd(c); break;
+ case 0x16: init_amd_jg(c); break;
case 0x17: init_amd_zn(c); break;
}
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 66ca906aa790..c6fa3ef10b4e 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -34,6 +34,7 @@
#include "cpu.h"
+static void __init spectre_v1_select_mitigation(void);
static void __init spectre_v2_select_mitigation(void);
static void __init ssb_select_mitigation(void);
static void __init l1tf_select_mitigation(void);
@@ -98,17 +99,11 @@ void __init check_bugs(void)
if (boot_cpu_has(X86_FEATURE_STIBP))
x86_spec_ctrl_mask |= SPEC_CTRL_STIBP;
- /* Select the proper spectre mitigation before patching alternatives */
+ /* Select the proper CPU mitigations before patching alternatives: */
+ spectre_v1_select_mitigation();
spectre_v2_select_mitigation();
-
- /*
- * Select proper mitigation for any exposure to the Speculative Store
- * Bypass vulnerability.
- */
ssb_select_mitigation();
-
l1tf_select_mitigation();
-
mds_select_mitigation();
arch_smt_update();
@@ -274,6 +269,98 @@ static int __init mds_cmdline(char *str)
early_param("mds", mds_cmdline);
#undef pr_fmt
+#define pr_fmt(fmt) "Spectre V1 : " fmt
+
+enum spectre_v1_mitigation {
+ SPECTRE_V1_MITIGATION_NONE,
+ SPECTRE_V1_MITIGATION_AUTO,
+};
+
+static enum spectre_v1_mitigation spectre_v1_mitigation __ro_after_init =
+ SPECTRE_V1_MITIGATION_AUTO;
+
+static const char * const spectre_v1_strings[] = {
+ [SPECTRE_V1_MITIGATION_NONE] = "Vulnerable: __user pointer sanitization and usercopy barriers only; no swapgs barriers",
+ [SPECTRE_V1_MITIGATION_AUTO] = "Mitigation: usercopy/swapgs barriers and __user pointer sanitization",
+};
+
+/*
+ * Does SMAP provide full mitigation against speculative kernel access to
+ * userspace?
+ */
+static bool smap_works_speculatively(void)
+{
+ if (!boot_cpu_has(X86_FEATURE_SMAP))
+ return false;
+
+ /*
+ * On CPUs which are vulnerable to Meltdown, SMAP does not
+ * prevent speculative access to user data in the L1 cache.
+ * Consider SMAP to be non-functional as a mitigation on these
+ * CPUs.
+ */
+ if (boot_cpu_has(X86_BUG_CPU_MELTDOWN))
+ return false;
+
+ return true;
+}
+
+static void __init spectre_v1_select_mitigation(void)
+{
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1) || cpu_mitigations_off()) {
+ spectre_v1_mitigation = SPECTRE_V1_MITIGATION_NONE;
+ return;
+ }
+
+ if (spectre_v1_mitigation == SPECTRE_V1_MITIGATION_AUTO) {
+ /*
+ * With Spectre v1, a user can speculatively control either
+ * path of a conditional swapgs with a user-controlled GS
+ * value. The mitigation is to add lfences to both code paths.
+ *
+ * If FSGSBASE is enabled, the user can put a kernel address in
+ * GS, in which case SMAP provides no protection.
+ *
+ * [ NOTE: Don't check for X86_FEATURE_FSGSBASE until the
+ * FSGSBASE enablement patches have been merged. ]
+ *
+ * If FSGSBASE is disabled, the user can only put a user space
+ * address in GS. That makes an attack harder, but still
+ * possible if there's no SMAP protection.
+ */
+ if (!smap_works_speculatively()) {
+ /*
+ * Mitigation can be provided from SWAPGS itself or
+ * PTI as the CR3 write in the Meltdown mitigation
+ * is serializing.
+ *
+ * If neither is there, mitigate with an LFENCE to
+ * stop speculation through swapgs.
+ */
+ if (boot_cpu_has_bug(X86_BUG_SWAPGS) &&
+ !boot_cpu_has(X86_FEATURE_PTI))
+ setup_force_cpu_cap(X86_FEATURE_FENCE_SWAPGS_USER);
+
+ /*
+ * Enable lfences in the kernel entry (non-swapgs)
+ * paths, to prevent user entry from speculatively
+ * skipping swapgs.
+ */
+ setup_force_cpu_cap(X86_FEATURE_FENCE_SWAPGS_KERNEL);
+ }
+ }
+
+ pr_info("%s\n", spectre_v1_strings[spectre_v1_mitigation]);
+}
+
+static int __init nospectre_v1_cmdline(char *str)
+{
+ spectre_v1_mitigation = SPECTRE_V1_MITIGATION_NONE;
+ return 0;
+}
+early_param("nospectre_v1", nospectre_v1_cmdline);
+
+#undef pr_fmt
#define pr_fmt(fmt) "Spectre V2 : " fmt
static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
@@ -1226,7 +1313,7 @@ static ssize_t l1tf_show_state(char *buf)
static ssize_t mds_show_state(char *buf)
{
- if (!hypervisor_is_type(X86_HYPER_NATIVE)) {
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
return sprintf(buf, "%s; SMT Host state unknown\n",
mds_strings[mds_mitigation]);
}
@@ -1290,7 +1377,7 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
break;
case X86_BUG_SPECTRE_V1:
- return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+ return sprintf(buf, "%s\n", spectre_v1_strings[spectre_v1_mitigation]);
case X86_BUG_SPECTRE_V2:
return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 11472178e17f..f125bf7ecb6f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1022,6 +1022,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
#define NO_L1TF BIT(3)
#define NO_MDS BIT(4)
#define MSBDS_ONLY BIT(5)
+#define NO_SWAPGS BIT(6)
#define VULNWL(_vendor, _family, _model, _whitelist) \
{ X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist }
@@ -1048,30 +1049,38 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION),
VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION),
- VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY),
- VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY),
- VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY),
- VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY),
- VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY),
- VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY),
+ VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
+ VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
+ VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
+ VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
+ VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
+ VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
VULNWL_INTEL(CORE_YONAH, NO_SSB),
- VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY),
+ VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
- VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF),
- VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF),
- VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF),
+ VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS),
+ VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF | NO_SWAPGS),
+ VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS),
+
+ /*
+ * Technically, swapgs isn't serializing on AMD (despite it previously
+ * being documented as such in the APM). But according to AMD, %gs is
+ * updated non-speculatively, and the issuing of %gs-relative memory
+ * operands will be blocked until the %gs update completes, which is
+ * good enough for our purposes.
+ */
/* AMD Family 0xf - 0x12 */
- VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
- VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
- VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
- VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
+ VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
+ VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
+ VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
+ VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
/* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
- VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS),
- VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS),
+ VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS),
+ VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS),
{}
};
@@ -1108,6 +1117,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
setup_force_cpu_bug(X86_BUG_MSBDS_ONLY);
}
+ if (!cpu_matches(NO_SWAPGS))
+ setup_force_cpu_bug(X86_BUG_SWAPGS);
+
if (cpu_matches(NO_MELTDOWN))
return;
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index 4296c702a3f7..72182809b333 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -98,6 +98,7 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg)
case 7:
if (size < 0x40)
break;
+ /* Else, fall through */
case 6:
case 5:
case 4:
diff --git a/arch/x86/kernel/cpu/umwait.c b/arch/x86/kernel/cpu/umwait.c
index 6a204e7336c1..32b4dc9030aa 100644
--- a/arch/x86/kernel/cpu/umwait.c
+++ b/arch/x86/kernel/cpu/umwait.c
@@ -18,6 +18,12 @@
static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE);
/*
+ * Cache the original IA32_UMWAIT_CONTROL MSR value which is configured by
+ * hardware or BIOS before kernel boot.
+ */
+static u32 orig_umwait_control_cached __ro_after_init;
+
+/*
* Serialize access to umwait_control_cached and IA32_UMWAIT_CONTROL MSR in
* the sysfs write functions.
*/
@@ -53,6 +59,23 @@ static int umwait_cpu_online(unsigned int cpu)
}
/*
+ * The CPU hotplug callback sets the control MSR to the original control
+ * value.
+ */
+static int umwait_cpu_offline(unsigned int cpu)
+{
+ /*
+ * This code is protected by the CPU hotplug already and
+ * orig_umwait_control_cached is never changed after it caches
+ * the original control MSR value in umwait_init(). So there
+ * is no race condition here.
+ */
+ wrmsr(MSR_IA32_UMWAIT_CONTROL, orig_umwait_control_cached, 0);
+
+ return 0;
+}
+
+/*
* On resume, restore IA32_UMWAIT_CONTROL MSR on the boot processor which
* is the only active CPU at this time. The MSR is set up on the APs via the
* CPU hotplug callback.
@@ -185,8 +208,22 @@ static int __init umwait_init(void)
if (!boot_cpu_has(X86_FEATURE_WAITPKG))
return -ENODEV;
+ /*
+ * Cache the original control MSR value before the control MSR is
+ * changed. This is the only place where orig_umwait_control_cached
+ * is modified.
+ */
+ rdmsrl(MSR_IA32_UMWAIT_CONTROL, orig_umwait_control_cached);
+
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "umwait:online",
- umwait_cpu_online, NULL);
+ umwait_cpu_online, umwait_cpu_offline);
+ if (ret < 0) {
+ /*
+ * On failure, the control MSR on all CPUs has the
+ * original control value.
+ */
+ return ret;
+ }
register_syscore_ops(&umwait_syscore_ops);
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index a6342c899be5..f3d3e9646a99 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -193,10 +193,10 @@ ENTRY(secondary_startup_64)
/* Set up %gs.
*
- * The base of %gs always points to the bottom of the irqstack
- * union. If the stack protector canary is enabled, it is
- * located at %gs:40. Note that, on SMP, the boot cpu uses
- * init data section till per cpu areas are set up.
+ * The base of %gs always points to fixed_percpu_data. If the
+ * stack protector canary is enabled, it is located at %gs:40.
+ * Note that, on SMP, the boot cpu uses init data section until
+ * the per cpu areas are set up.
*/
movl $MSR_GS_BASE,%ecx
movl initial_gs(%rip),%eax
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index c43e96a938d0..c6f791bc481e 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -827,10 +827,6 @@ int __init hpet_enable(void)
if (!hpet_cfg_working())
goto out_nohpet;
- /* Validate that the counter is counting */
- if (!hpet_counting())
- goto out_nohpet;
-
/*
* Read the period and check for a sane value:
*/
@@ -896,6 +892,14 @@ int __init hpet_enable(void)
}
hpet_print_config();
+ /*
+ * Validate that the counter is counting. This needs to be done
+ * after sanitizing the config registers to properly deal with
+ * force enabled HPETs.
+ */
+ if (!hpet_counting())
+ goto out_nohpet;
+
clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq);
if (id & HPET_ID_LEGSUP) {
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index b7f34fe2171e..4ab377c9fffe 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -308,9 +308,6 @@ static notrace void kvm_guest_apic_eoi_write(u32 reg, u32 val)
static void kvm_guest_cpu_init(void)
{
- if (!kvm_para_available())
- return;
-
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) {
u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
@@ -625,9 +622,6 @@ static void __init kvm_guest_init(void)
{
int i;
- if (!kvm_para_available())
- return;
-
paravirt_ops_setup();
register_reboot_notifier(&kvm_pv_reboot_nb);
for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
@@ -848,8 +842,6 @@ asm(
*/
void __init kvm_spinlock_init(void)
{
- if (!kvm_para_available())
- return;
/* Does host kernel support KVM_FEATURE_PV_UNHALT? */
if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
return;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0fdbe89d0754..3c5bbe8e4120 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -201,6 +201,7 @@ static int set_segment_reg(struct task_struct *task,
case offsetof(struct user_regs_struct, ss):
if (unlikely(value == 0))
return -EIO;
+ /* Else, fall through */
default:
*pt_regs_access(task_pt_regs(task), offset) = value;
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 4f36d3241faf..2d6898c2cb64 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -100,7 +100,7 @@ copy_stack_frame(const void __user *fp, struct stack_frame_user *frame)
{
int ret;
- if (!access_ok(fp, sizeof(*frame)))
+ if (__range_not_ok(fp, sizeof(*frame), TASK_SIZE))
return 0;
ret = 1;
diff --git a/arch/x86/kernel/sysfb_efi.c b/arch/x86/kernel/sysfb_efi.c
index 8eb67a670b10..653b7f617b61 100644
--- a/arch/x86/kernel/sysfb_efi.c
+++ b/arch/x86/kernel/sysfb_efi.c
@@ -230,9 +230,55 @@ static const struct dmi_system_id efifb_dmi_system_table[] __initconst = {
{},
};
+/*
+ * Some devices have a portrait LCD but advertise a landscape resolution (and
+ * pitch). We simply swap width and height for these devices so that we can
+ * correctly deal with some of them coming with multiple resolutions.
+ */
+static const struct dmi_system_id efifb_dmi_swap_width_height[] __initconst = {
+ {
+ /*
+ * Lenovo MIIX310-10ICR, only some batches have the troublesome
+ * 800x1280 portrait screen. Luckily the portrait version has
+ * its own BIOS version, so we match on that.
+ */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "MIIX 310-10ICR"),
+ DMI_EXACT_MATCH(DMI_BIOS_VERSION, "1HCN44WW"),
+ },
+ },
+ {
+ /* Lenovo MIIX 320-10ICR with 800x1280 portrait screen */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION,
+ "Lenovo MIIX 320-10ICR"),
+ },
+ },
+ {
+ /* Lenovo D330 with 800x1280 or 1200x1920 portrait screen */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION,
+ "Lenovo ideapad D330-10IGM"),
+ },
+ },
+ {},
+};
+
__init void sysfb_apply_efi_quirks(void)
{
if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI ||
!(screen_info.capabilities & VIDEO_CAPABILITY_SKIP_QUIRKS))
dmi_check_system(efifb_dmi_system_table);
+
+ if (screen_info.orig_video_isVGA == VIDEO_TYPE_EFI &&
+ dmi_check_system(efifb_dmi_swap_width_height)) {
+ u16 temp = screen_info.lfb_width;
+
+ screen_info.lfb_width = screen_info.lfb_height;
+ screen_info.lfb_height = temp;
+ screen_info.lfb_linelength = 4 * screen_info.lfb_width;
+ }
}
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index d8359ebeea70..8cd745ef8c7b 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -508,9 +508,12 @@ struct uprobe_xol_ops {
void (*abort)(struct arch_uprobe *, struct pt_regs *);
};
-static inline int sizeof_long(void)
+static inline int sizeof_long(struct pt_regs *regs)
{
- return in_ia32_syscall() ? 4 : 8;
+ /*
+ * Check registers for mode as in_xxx_syscall() does not apply here.
+ */
+ return user_64bit_mode(regs) ? 8 : 4;
}
static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
@@ -521,9 +524,9 @@ static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
static int emulate_push_stack(struct pt_regs *regs, unsigned long val)
{
- unsigned long new_sp = regs->sp - sizeof_long();
+ unsigned long new_sp = regs->sp - sizeof_long(regs);
- if (copy_to_user((void __user *)new_sp, &val, sizeof_long()))
+ if (copy_to_user((void __user *)new_sp, &val, sizeof_long(regs)))
return -EFAULT;
regs->sp = new_sp;
@@ -556,7 +559,7 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs
long correction = utask->vaddr - utask->xol_vaddr;
regs->ip += correction;
} else if (auprobe->defparam.fixups & UPROBE_FIX_CALL) {
- regs->sp += sizeof_long(); /* Pop incorrect return address */
+ regs->sp += sizeof_long(regs); /* Pop incorrect return address */
if (emulate_push_stack(regs, utask->vaddr + auprobe->defparam.ilen))
return -ERESTART;
}
@@ -675,7 +678,7 @@ static int branch_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
* "call" insn was executed out-of-line. Just restore ->sp and restart.
* We could also restore ->ip and try to call branch_emulate_op() again.
*/
- regs->sp += sizeof_long();
+ regs->sp += sizeof_long(regs);
return -ERESTART;
}
@@ -1056,7 +1059,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
unsigned long
arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs)
{
- int rasize = sizeof_long(), nleft;
+ int rasize = sizeof_long(regs), nleft;
unsigned long orig_ret_vaddr = 0; /* clear high bits for 32-bit apps */
if (copy_from_user(&orig_ret_vaddr, (void __user *)regs->sp, rasize))