summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/boot/compressed/sev.c10
-rw-r--r--arch/x86/entry/vdso/Makefile3
-rw-r--r--arch/x86/events/utils.c5
-rw-r--r--arch/x86/hyperv/hv_init.c20
-rw-r--r--arch/x86/hyperv/hv_vtl.c3
-rw-r--r--arch/x86/include/asm/cpu.h2
-rw-r--r--arch/x86/include/asm/fpu/api.h3
-rw-r--r--arch/x86/include/asm/i8259.h2
-rw-r--r--arch/x86/include/asm/intel-family.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/include/asm/mce.h2
-rw-r--r--arch/x86/include/asm/mshyperv.h2
-rw-r--r--arch/x86/include/asm/msr-index.h9
-rw-r--r--arch/x86/include/asm/nospec-branch.h67
-rw-r--r--arch/x86/include/asm/smp.h1
-rw-r--r--arch/x86/include/asm/svm.h1
-rw-r--r--arch/x86/include/asm/uaccess.h2
-rw-r--r--arch/x86/kernel/acpi/boot.c3
-rw-r--r--arch/x86/kernel/alternative.c13
-rw-r--r--arch/x86/kernel/callthunks.c5
-rw-r--r--arch/x86/kernel/cpu/amd.c8
-rw-r--r--arch/x86/kernel/cpu/bugs.c95
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c68
-rw-r--r--arch/x86/kernel/cpu/mce/core.c32
-rw-r--r--arch/x86/kernel/cpu/mce/intel.c20
-rw-r--r--arch/x86/kernel/cpu/mce/internal.h4
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c10
-rw-r--r--arch/x86/kernel/fpu/core.c5
-rw-r--r--arch/x86/kernel/fpu/xstate.c12
-rw-r--r--arch/x86/kernel/fpu/xstate.h3
-rw-r--r--arch/x86/kernel/i8259.c38
-rw-r--r--arch/x86/kernel/sev-shared.c53
-rw-r--r--arch/x86/kernel/sev.c30
-rw-r--r--arch/x86/kernel/smp.c39
-rw-r--r--arch/x86/kernel/smpboot.c27
-rw-r--r--arch/x86/kernel/topology.c2
-rw-r--r--arch/x86/kernel/tsc_sync.c10
-rw-r--r--arch/x86/kernel/vmlinux.lds.S7
-rw-r--r--arch/x86/kvm/cpuid.c8
-rw-r--r--arch/x86/kvm/lapic.c8
-rw-r--r--arch/x86/kvm/pmu.c27
-rw-r--r--arch/x86/kvm/pmu.h6
-rw-r--r--arch/x86/kvm/svm/avic.c5
-rw-r--r--arch/x86/kvm/svm/nested.c3
-rw-r--r--arch/x86/kvm/svm/pmu.c2
-rw-r--r--arch/x86/kvm/svm/svm.c5
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c4
-rw-r--r--arch/x86/kvm/x86.c40
-rw-r--r--arch/x86/lib/copy_mc.c8
-rw-r--r--arch/x86/lib/retpoline.S186
-rw-r--r--arch/x86/mm/pti.c58
51 files changed, 580 insertions, 399 deletions
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
index dc8c876fbd8f..80d76aea1f7b 100644
--- a/arch/x86/boot/compressed/sev.c
+++ b/arch/x86/boot/compressed/sev.c
@@ -103,6 +103,16 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
return ES_OK;
}
+static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size)
+{
+ return ES_OK;
+}
+
+static bool fault_in_kernel_space(unsigned long address)
+{
+ return false;
+}
+
#undef __init
#define __init
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 6a1821bd7d5e..83c0afb7c741 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -42,7 +42,8 @@ vdso_img-$(VDSO64-y) += 64
vdso_img-$(VDSOX32-y) += x32
vdso_img-$(VDSO32-y) += 32
-obj-$(VDSO32-y) += vdso32-setup.o
+obj-$(VDSO32-y) += vdso32-setup.o
+OBJECT_FILES_NON_STANDARD_vdso32-setup.o := n
vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
vobjs32 := $(foreach F,$(vobjs32-y),$(obj)/$F)
diff --git a/arch/x86/events/utils.c b/arch/x86/events/utils.c
index 76b1f8bb0fd5..dab4ed199227 100644
--- a/arch/x86/events/utils.c
+++ b/arch/x86/events/utils.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <asm/insn.h>
+#include <linux/mm.h>
#include "perf_event.h"
@@ -132,9 +133,9 @@ static int get_branch_type(unsigned long from, unsigned long to, int abort,
* The LBR logs any address in the IP, even if the IP just
* faulted. This means userspace can control the from address.
* Ensure we don't blindly read any address by validating it is
- * a known text address.
+ * a known text address and not a vsyscall address.
*/
- if (kernel_text_address(from)) {
+ if (kernel_text_address(from) && !in_gate_area_no_mm(from)) {
addr = (void *)from;
/*
* Assume we can get the maximum possible size
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 783ed339f341..21556ad87f4b 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -7,6 +7,8 @@
* Author : K. Y. Srinivasan <kys@microsoft.com>
*/
+#define pr_fmt(fmt) "Hyper-V: " fmt
+
#include <linux/efi.h>
#include <linux/types.h>
#include <linux/bitfield.h>
@@ -191,7 +193,7 @@ void set_hv_tscchange_cb(void (*cb)(void))
struct hv_tsc_emulation_control emu_ctrl = {.enabled = 1};
if (!hv_reenlightenment_available()) {
- pr_warn("Hyper-V: reenlightenment support is unavailable\n");
+ pr_warn("reenlightenment support is unavailable\n");
return;
}
@@ -394,6 +396,7 @@ static void __init hv_get_partition_id(void)
local_irq_restore(flags);
}
+#if IS_ENABLED(CONFIG_HYPERV_VTL_MODE)
static u8 __init get_vtl(void)
{
u64 control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_REGISTERS;
@@ -416,13 +419,16 @@ static u8 __init get_vtl(void)
if (hv_result_success(ret)) {
ret = output->as64.low & HV_X64_VTL_MASK;
} else {
- pr_err("Failed to get VTL(%lld) and set VTL to zero by default.\n", ret);
- ret = 0;
+ pr_err("Failed to get VTL(error: %lld) exiting...\n", ret);
+ BUG();
}
local_irq_restore(flags);
return ret;
}
+#else
+static inline u8 get_vtl(void) { return 0; }
+#endif
/*
* This function is to be invoked early in the boot sequence after the
@@ -564,7 +570,7 @@ skip_hypercall_pg_init:
if (cpu_feature_enabled(X86_FEATURE_IBT) &&
*(u32 *)hv_hypercall_pg != gen_endbr()) {
setup_clear_cpu_cap(X86_FEATURE_IBT);
- pr_warn("Hyper-V: Disabling IBT because of Hyper-V bug\n");
+ pr_warn("Disabling IBT because of Hyper-V bug\n");
}
#endif
@@ -604,8 +610,10 @@ skip_hypercall_pg_init:
hv_query_ext_cap(0);
/* Find the VTL */
- if (!ms_hyperv.paravisor_present && hv_isolation_type_snp())
- ms_hyperv.vtl = get_vtl();
+ ms_hyperv.vtl = get_vtl();
+
+ if (ms_hyperv.vtl > 0) /* non default VTL */
+ hv_vtl_early_init();
return;
diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c
index 36a562218010..999f5ac82fe9 100644
--- a/arch/x86/hyperv/hv_vtl.c
+++ b/arch/x86/hyperv/hv_vtl.c
@@ -215,7 +215,7 @@ static int hv_vtl_wakeup_secondary_cpu(int apicid, unsigned long start_eip)
return hv_vtl_bringup_vcpu(vp_id, start_eip);
}
-static int __init hv_vtl_early_init(void)
+int __init hv_vtl_early_init(void)
{
/*
* `boot_cpu_has` returns the runtime feature support,
@@ -230,4 +230,3 @@ static int __init hv_vtl_early_init(void)
return 0;
}
-early_initcall(hv_vtl_early_init);
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index 3a233ebff712..25050d953eee 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -28,8 +28,6 @@ struct x86_cpu {
};
#ifdef CONFIG_HOTPLUG_CPU
-extern int arch_register_cpu(int num);
-extern void arch_unregister_cpu(int);
extern void soft_restart_cpu(void);
#endif
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index 31089b851c4f..a2be3aefff9f 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -157,7 +157,8 @@ static inline void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) {
static inline void fpu_sync_guest_vmexit_xfd_state(void) { }
#endif
-extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u32 pkru);
+extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf,
+ unsigned int size, u64 xfeatures, u32 pkru);
extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru);
static inline void fpstate_set_confidential(struct fpu_guest *gfpu)
diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h
index 637fa1df3512..c715097e92fd 100644
--- a/arch/x86/include/asm/i8259.h
+++ b/arch/x86/include/asm/i8259.h
@@ -69,6 +69,8 @@ struct legacy_pic {
void (*make_irq)(unsigned int irq);
};
+void legacy_pic_pcat_compat(void);
+
extern struct legacy_pic *legacy_pic;
extern struct legacy_pic null_legacy_pic;
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 5fcd85fd64fd..197316121f04 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -27,6 +27,7 @@
* _X - regular server parts
* _D - micro server parts
* _N,_P - other mobile parts
+ * _H - premium mobile parts
* _S - other client parts
*
* Historical OPTDIFFs:
@@ -124,6 +125,7 @@
#define INTEL_FAM6_METEORLAKE 0xAC
#define INTEL_FAM6_METEORLAKE_L 0xAA
+#define INTEL_FAM6_ARROWLAKE_H 0xC5
#define INTEL_FAM6_ARROWLAKE 0xC6
#define INTEL_FAM6_LUNARLAKE_M 0xBD
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 17715cb8731d..70d139406bc8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -528,7 +528,6 @@ struct kvm_pmu {
u64 raw_event_mask;
struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC];
struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED];
- struct irq_work irq_work;
/*
* Overlay the bitmap with a 64-bit atomic so that all bits can be
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 180b1cbfcc4e..6de6e1d95952 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -245,7 +245,7 @@ static inline void cmci_recheck(void) {}
int mce_available(struct cpuinfo_x86 *c);
bool mce_is_memory_error(struct mce *m);
bool mce_is_correctable(struct mce *m);
-int mce_usable_address(struct mce *m);
+bool mce_usable_address(struct mce *m);
DECLARE_PER_CPU(unsigned, mce_exception_count);
DECLARE_PER_CPU(unsigned, mce_poll_count);
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 033b53f993c6..896445edc6a8 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -340,8 +340,10 @@ static inline u64 hv_get_non_nested_register(unsigned int reg) { return 0; }
#ifdef CONFIG_HYPERV_VTL_MODE
void __init hv_vtl_init_platform(void);
+int __init hv_vtl_early_init(void);
#else
static inline void __init hv_vtl_init_platform(void) {}
+static inline int __init hv_vtl_early_init(void) { return 0; }
#endif
#include <asm-generic/mshyperv.h>
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 1d111350197f..b37abb55e948 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -637,12 +637,17 @@
/* AMD Last Branch Record MSRs */
#define MSR_AMD64_LBR_SELECT 0xc000010e
-/* Fam 17h MSRs */
-#define MSR_F17H_IRPERF 0xc00000e9
+/* Zen4 */
+#define MSR_ZEN4_BP_CFG 0xc001102e
+#define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5
+/* Zen 2 */
#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3
#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1)
+/* Fam 17h MSRs */
+#define MSR_F17H_IRPERF 0xc00000e9
+
/* Fam 16h MSRs */
#define MSR_F16H_L2I_PERF_CTL 0xc0010230
#define MSR_F16H_L2I_PERF_CTR 0xc0010231
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index c55cc243592e..14cd3cd5f85a 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -271,7 +271,7 @@
.Lskip_rsb_\@:
.endm
-#ifdef CONFIG_CPU_UNRET_ENTRY
+#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO)
#define CALL_UNTRAIN_RET "call entry_untrain_ret"
#else
#define CALL_UNTRAIN_RET ""
@@ -288,38 +288,24 @@
* As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
* where we have a stack but before any RET instruction.
*/
-.macro UNTRAIN_RET
-#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
- defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO)
+.macro __UNTRAIN_RET ibpb_feature, call_depth_insns
+#if defined(CONFIG_RETHUNK) || defined(CONFIG_CPU_IBPB_ENTRY)
VALIDATE_UNRET_END
ALTERNATIVE_3 "", \
CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \
- "call entry_ibpb", X86_FEATURE_ENTRY_IBPB, \
- __stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
+ "call entry_ibpb", \ibpb_feature, \
+ __stringify(\call_depth_insns), X86_FEATURE_CALL_DEPTH
#endif
.endm
-.macro UNTRAIN_RET_VM
-#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
- defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO)
- VALIDATE_UNRET_END
- ALTERNATIVE_3 "", \
- CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \
- "call entry_ibpb", X86_FEATURE_IBPB_ON_VMEXIT, \
- __stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
-#endif
-.endm
+#define UNTRAIN_RET \
+ __UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, __stringify(RESET_CALL_DEPTH)
-.macro UNTRAIN_RET_FROM_CALL
-#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
- defined(CONFIG_CALL_DEPTH_TRACKING)
- VALIDATE_UNRET_END
- ALTERNATIVE_3 "", \
- CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \
- "call entry_ibpb", X86_FEATURE_ENTRY_IBPB, \
- __stringify(RESET_CALL_DEPTH_FROM_CALL), X86_FEATURE_CALL_DEPTH
-#endif
-.endm
+#define UNTRAIN_RET_VM \
+ __UNTRAIN_RET X86_FEATURE_IBPB_ON_VMEXIT, __stringify(RESET_CALL_DEPTH)
+
+#define UNTRAIN_RET_FROM_CALL \
+ __UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, __stringify(RESET_CALL_DEPTH_FROM_CALL)
.macro CALL_DEPTH_ACCOUNT
@@ -348,13 +334,23 @@ extern void __x86_return_thunk(void);
static inline void __x86_return_thunk(void) {}
#endif
+#ifdef CONFIG_CPU_UNRET_ENTRY
extern void retbleed_return_thunk(void);
+#else
+static inline void retbleed_return_thunk(void) {}
+#endif
+
+#ifdef CONFIG_CPU_SRSO
extern void srso_return_thunk(void);
extern void srso_alias_return_thunk(void);
+#else
+static inline void srso_return_thunk(void) {}
+static inline void srso_alias_return_thunk(void) {}
+#endif
-extern void retbleed_untrain_ret(void);
-extern void srso_untrain_ret(void);
-extern void srso_alias_untrain_ret(void);
+extern void retbleed_return_thunk(void);
+extern void srso_return_thunk(void);
+extern void srso_alias_return_thunk(void);
extern void entry_untrain_ret(void);
extern void entry_ibpb(void);
@@ -362,12 +358,7 @@ extern void entry_ibpb(void);
extern void (*x86_return_thunk)(void);
#ifdef CONFIG_CALL_DEPTH_TRACKING
-extern void __x86_return_skl(void);
-
-static inline void x86_set_skl_return_thunk(void)
-{
- x86_return_thunk = &__x86_return_skl;
-}
+extern void call_depth_return_thunk(void);
#define CALL_DEPTH_ACCOUNT \
ALTERNATIVE("", \
@@ -380,12 +371,12 @@ DECLARE_PER_CPU(u64, __x86_ret_count);
DECLARE_PER_CPU(u64, __x86_stuffs_count);
DECLARE_PER_CPU(u64, __x86_ctxsw_count);
#endif
-#else
-static inline void x86_set_skl_return_thunk(void) {}
+#else /* !CONFIG_CALL_DEPTH_TRACKING */
+static inline void call_depth_return_thunk(void) {}
#define CALL_DEPTH_ACCOUNT ""
-#endif
+#endif /* CONFIG_CALL_DEPTH_TRACKING */
#ifdef CONFIG_RETPOLINE
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index ad98dd1d9cfb..c31c633419fe 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -129,7 +129,6 @@ void native_smp_send_reschedule(int cpu);
void native_send_call_func_ipi(const struct cpumask *mask);
void native_send_call_func_single_ipi(int cpu);
-bool smp_park_other_cpus_in_init(void);
void smp_store_cpu_info(int id);
asmlinkage __visible void smp_reboot_interrupt(void);
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 19bf955b67e0..3ac0ffc4f3e2 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -268,6 +268,7 @@ enum avic_ipi_failure_cause {
AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
AVIC_IPI_FAILURE_INVALID_TARGET,
AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
+ AVIC_IPI_FAILURE_INVALID_IPI_VECTOR,
};
#define AVIC_PHYSICAL_MAX_INDEX_MASK GENMASK_ULL(8, 0)
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 8bae40a66282..5c367c1290c3 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -496,7 +496,7 @@ copy_mc_to_kernel(void *to, const void *from, unsigned len);
#define copy_mc_to_kernel copy_mc_to_kernel
unsigned long __must_check
-copy_mc_to_user(void *to, const void *from, unsigned len);
+copy_mc_to_user(void __user *to, const void *from, unsigned len);
#endif
/*
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 2a0ea38955df..c55c0ef47a18 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -148,6 +148,9 @@ static int __init acpi_parse_madt(struct acpi_table_header *table)
pr_debug("Local APIC address 0x%08x\n", madt->address);
}
+ if (madt->flags & ACPI_MADT_PCAT_COMPAT)
+ legacy_pic_pcat_compat();
+
/* ACPI 6.3 and newer support the online capable bit. */
if (acpi_gbl_FADT.header.revision > 6 ||
(acpi_gbl_FADT.header.revision == 6 &&
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 517ee01503be..73be3931e4f0 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -403,6 +403,17 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
u8 insn_buff[MAX_PATCH_LEN];
DPRINTK(ALT, "alt table %px, -> %px", start, end);
+
+ /*
+ * In the case CONFIG_X86_5LEVEL=y, KASAN_SHADOW_START is defined using
+ * cpu_feature_enabled(X86_FEATURE_LA57) and is therefore patched here.
+ * During the process, KASAN becomes confused seeing partial LA57
+ * conversion and triggers a false-positive out-of-bound report.
+ *
+ * Disable KASAN until the patching is complete.
+ */
+ kasan_disable_current();
+
/*
* The scan order should be from start to end. A later scanned
* alternative code can overwrite previously scanned alternative code.
@@ -452,6 +463,8 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
text_poke_early(instr, insn_buff, insn_buff_sz);
}
+
+ kasan_enable_current();
}
static inline bool is_jcc32(struct insn *insn)
diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c
index faa9f2299848..e9ad518a5003 100644
--- a/arch/x86/kernel/callthunks.c
+++ b/arch/x86/kernel/callthunks.c
@@ -48,11 +48,6 @@ EXPORT_SYMBOL_GPL(__x86_call_count);
extern s32 __call_sites[], __call_sites_end[];
-struct thunk_desc {
- void *template;
- unsigned int template_size;
-};
-
struct core_text {
unsigned long base;
unsigned long end;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 03ef962a6992..ece2b5b7b0fe 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -80,6 +80,10 @@ static const int amd_div0[] =
AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x00, 0x0, 0x2f, 0xf),
AMD_MODEL_RANGE(0x17, 0x50, 0x0, 0x5f, 0xf));
+static const int amd_erratum_1485[] =
+ AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x19, 0x10, 0x0, 0x1f, 0xf),
+ AMD_MODEL_RANGE(0x19, 0x60, 0x0, 0xaf, 0xf));
+
static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
{
int osvw_id = *erratum++;
@@ -1149,6 +1153,10 @@ static void init_amd(struct cpuinfo_x86 *c)
pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n");
setup_force_cpu_bug(X86_BUG_DIV0);
}
+
+ if (!cpu_has(c, X86_FEATURE_HYPERVISOR) &&
+ cpu_has_amd_erratum(c, amd_erratum_1485))
+ msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT);
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 10499bcd4e39..bb0ab8466b91 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -63,7 +63,7 @@ EXPORT_SYMBOL_GPL(x86_pred_cmd);
static DEFINE_MUTEX(spec_ctrl_mutex);
-void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk;
+void (*x86_return_thunk)(void) __ro_after_init = __x86_return_thunk;
/* Update SPEC_CTRL MSR and its cached copy unconditionally */
static void update_spec_ctrl(u64 val)
@@ -717,7 +717,7 @@ void update_gds_msr(void)
case GDS_MITIGATION_UCODE_NEEDED:
case GDS_MITIGATION_HYPERVISOR:
return;
- };
+ }
wrmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
@@ -1019,7 +1019,6 @@ static void __init retbleed_select_mitigation(void)
do_cmd_auto:
case RETBLEED_CMD_AUTO:
- default:
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY))
@@ -1042,8 +1041,7 @@ do_cmd_auto:
setup_force_cpu_cap(X86_FEATURE_RETHUNK);
setup_force_cpu_cap(X86_FEATURE_UNRET);
- if (IS_ENABLED(CONFIG_RETHUNK))
- x86_return_thunk = retbleed_return_thunk;
+ x86_return_thunk = retbleed_return_thunk;
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
@@ -1061,7 +1059,8 @@ do_cmd_auto:
case RETBLEED_MITIGATION_STUFF:
setup_force_cpu_cap(X86_FEATURE_RETHUNK);
setup_force_cpu_cap(X86_FEATURE_CALL_DEPTH);
- x86_set_skl_return_thunk();
+
+ x86_return_thunk = call_depth_return_thunk;
break;
default:
@@ -1290,6 +1289,8 @@ spectre_v2_user_select_mitigation(void)
spectre_v2_user_ibpb = mode;
switch (cmd) {
+ case SPECTRE_V2_USER_CMD_NONE:
+ break;
case SPECTRE_V2_USER_CMD_FORCE:
case SPECTRE_V2_USER_CMD_PRCTL_IBPB:
case SPECTRE_V2_USER_CMD_SECCOMP_IBPB:
@@ -1301,8 +1302,6 @@ spectre_v2_user_select_mitigation(void)
case SPECTRE_V2_USER_CMD_SECCOMP:
static_branch_enable(&switch_mm_cond_ibpb);
break;
- default:
- break;
}
pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n",
@@ -2160,6 +2159,10 @@ static int l1d_flush_prctl_get(struct task_struct *task)
static int ssb_prctl_get(struct task_struct *task)
{
switch (ssb_mode) {
+ case SPEC_STORE_BYPASS_NONE:
+ if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
+ return PR_SPEC_ENABLE;
+ return PR_SPEC_NOT_AFFECTED;
case SPEC_STORE_BYPASS_DISABLE:
return PR_SPEC_DISABLE;
case SPEC_STORE_BYPASS_SECCOMP:
@@ -2171,11 +2174,8 @@ static int ssb_prctl_get(struct task_struct *task)
if (task_spec_ssb_disable(task))
return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
- default:
- if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
- return PR_SPEC_ENABLE;
- return PR_SPEC_NOT_AFFECTED;
}
+ BUG();
}
static int ib_prctl_get(struct task_struct *task)
@@ -2353,6 +2353,8 @@ early_param("l1tf", l1tf_cmdline);
enum srso_mitigation {
SRSO_MITIGATION_NONE,
+ SRSO_MITIGATION_UCODE_NEEDED,
+ SRSO_MITIGATION_SAFE_RET_UCODE_NEEDED,
SRSO_MITIGATION_MICROCODE,
SRSO_MITIGATION_SAFE_RET,
SRSO_MITIGATION_IBPB,
@@ -2368,11 +2370,13 @@ enum srso_mitigation_cmd {
};
static const char * const srso_strings[] = {
- [SRSO_MITIGATION_NONE] = "Vulnerable",
- [SRSO_MITIGATION_MICROCODE] = "Mitigation: microcode",
- [SRSO_MITIGATION_SAFE_RET] = "Mitigation: safe RET",
- [SRSO_MITIGATION_IBPB] = "Mitigation: IBPB",
- [SRSO_MITIGATION_IBPB_ON_VMEXIT] = "Mitigation: IBPB on VMEXIT only"
+ [SRSO_MITIGATION_NONE] = "Vulnerable",
+ [SRSO_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode",
+ [SRSO_MITIGATION_SAFE_RET_UCODE_NEEDED] = "Vulnerable: Safe RET, no microcode",
+ [SRSO_MITIGATION_MICROCODE] = "Vulnerable: Microcode, no safe RET",
+ [SRSO_MITIGATION_SAFE_RET] = "Mitigation: Safe RET",
+ [SRSO_MITIGATION_IBPB] = "Mitigation: IBPB",
+ [SRSO_MITIGATION_IBPB_ON_VMEXIT] = "Mitigation: IBPB on VMEXIT only"
};
static enum srso_mitigation srso_mitigation __ro_after_init = SRSO_MITIGATION_NONE;
@@ -2406,34 +2410,44 @@ static void __init srso_select_mitigation(void)
{
bool has_microcode = boot_cpu_has(X86_FEATURE_IBPB_BRTYPE);
- if (!boot_cpu_has_bug(X86_BUG_SRSO) || cpu_mitigations_off())
- goto pred_cmd;
+ if (cpu_mitigations_off())
+ return;
- if (!has_microcode) {
- pr_warn("IBPB-extending microcode not applied!\n");
- pr_warn(SRSO_NOTICE);
- } else {
+ if (!boot_cpu_has_bug(X86_BUG_SRSO)) {
+ if (boot_cpu_has(X86_FEATURE_SBPB))
+ x86_pred_cmd = PRED_CMD_SBPB;
+ return;
+ }
+
+ if (has_microcode) {
/*
* Zen1/2 with SMT off aren't vulnerable after the right
* IBPB microcode has been applied.
+ *
+ * Zen1/2 don't have SBPB, no need to try to enable it here.
*/
if (boot_cpu_data.x86 < 0x19 && !cpu_smt_possible()) {
setup_force_cpu_cap(X86_FEATURE_SRSO_NO);
return;
}
- }
- if (retbleed_mitigation == RETBLEED_MITIGATION_IBPB) {
- if (has_microcode) {
- pr_err("Retbleed IBPB mitigation enabled, using same for SRSO\n");
+ if (retbleed_mitigation == RETBLEED_MITIGATION_IBPB) {
srso_mitigation = SRSO_MITIGATION_IBPB;
- goto pred_cmd;
+ goto out;
}
+ } else {
+ pr_warn("IBPB-extending microcode not applied!\n");
+ pr_warn(SRSO_NOTICE);
+
+ /* may be overwritten by SRSO_CMD_SAFE_RET below */
+ srso_mitigation = SRSO_MITIGATION_UCODE_NEEDED;
}
switch (srso_cmd) {
case SRSO_CMD_OFF:
- goto pred_cmd;
+ if (boot_cpu_has(X86_FEATURE_SBPB))
+ x86_pred_cmd = PRED_CMD_SBPB;
+ return;
case SRSO_CMD_MICROCODE:
if (has_microcode) {
@@ -2458,10 +2472,12 @@ static void __init srso_select_mitigation(void)
setup_force_cpu_cap(X86_FEATURE_SRSO);
x86_return_thunk = srso_return_thunk;
}
- srso_mitigation = SRSO_MITIGATION_SAFE_RET;
+ if (has_microcode)
+ srso_mitigation = SRSO_MITIGATION_SAFE_RET;
+ else
+ srso_mitigation = SRSO_MITIGATION_SAFE_RET_UCODE_NEEDED;
} else {
pr_err("WARNING: kernel not compiled with CPU_SRSO.\n");
- goto pred_cmd;
}
break;
@@ -2473,7 +2489,6 @@ static void __init srso_select_mitigation(void)
}
} else {
pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n");
- goto pred_cmd;
}
break;
@@ -2485,20 +2500,12 @@ static void __init srso_select_mitigation(void)
}
} else {
pr_err("WARNING: kernel not compiled with CPU_SRSO.\n");
- goto pred_cmd;
}
break;
-
- default:
- break;
}
- pr_info("%s%s\n", srso_strings[srso_mitigation], (has_microcode ? "" : ", no microcode"));
-
-pred_cmd:
- if ((boot_cpu_has(X86_FEATURE_SRSO_NO) || srso_cmd == SRSO_CMD_OFF) &&
- boot_cpu_has(X86_FEATURE_SBPB))
- x86_pred_cmd = PRED_CMD_SBPB;
+out:
+ pr_info("%s\n", srso_strings[srso_mitigation]);
}
#undef pr_fmt
@@ -2704,9 +2711,7 @@ static ssize_t srso_show_state(char *buf)
if (boot_cpu_has(X86_FEATURE_SRSO_NO))
return sysfs_emit(buf, "Mitigation: SMT disabled\n");
- return sysfs_emit(buf, "%s%s\n",
- srso_strings[srso_mitigation],
- boot_cpu_has(X86_FEATURE_IBPB_BRTYPE) ? "" : ", no microcode");
+ return sysfs_emit(buf, "%s\n", srso_strings[srso_mitigation]);
}
static ssize_t gds_show_state(char *buf)
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index c267f43de39e..f3517b8a8e91 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -713,17 +713,75 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
deferred_error_interrupt_enable(c);
}
-bool amd_mce_is_memory_error(struct mce *m)
+/*
+ * DRAM ECC errors are reported in the Northbridge (bank 4) with
+ * Extended Error Code 8.
+ */
+static bool legacy_mce_is_memory_error(struct mce *m)
+{
+ return m->bank == 4 && XEC(m->status, 0x1f) == 8;
+}
+
+/*
+ * DRAM ECC errors are reported in Unified Memory Controllers with
+ * Extended Error Code 0.
+ */
+static bool smca_mce_is_memory_error(struct mce *m)
{
enum smca_bank_types bank_type;
- /* ErrCodeExt[20:16] */
- u8 xec = (m->status >> 16) & 0x1f;
+
+ if (XEC(m->status, 0x3f))
+ return false;
bank_type = smca_get_bank_type(m->extcpu, m->bank);
+
+ return bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2;
+}
+
+bool amd_mce_is_memory_error(struct mce *m)
+{
if (mce_flags.smca)
- return (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2) && xec == 0x0;
+ return smca_mce_is_memory_error(m);
+ else
+ return legacy_mce_is_memory_error(m);
+}
+
+/*
+ * AMD systems do not have an explicit indicator that the value in MCA_ADDR is
+ * a system physical address. Therefore, individual cases need to be detected.
+ * Future cases and checks will be added as needed.
+ *
+ * 1) General case
+ * a) Assume address is not usable.
+ * 2) Poison errors
+ * a) Indicated by MCA_STATUS[43]: poison. Defined for all banks except legacy
+ * northbridge (bank 4).
+ * b) Refers to poison consumption in the core. Does not include "no action",
+ * "action optional", or "deferred" error severities.
+ * c) Will include a usable address so that immediate action can be taken.
+ * 3) Northbridge DRAM ECC errors
+ * a) Reported in legacy bank 4 with extended error code (XEC) 8.
+ * b) MCA_STATUS[43] is *not* defined as poison in legacy bank 4. Therefore,
+ * this bit should not be checked.
+ *
+ * NOTE: SMCA UMC memory errors fall into case #1.
+ */
+bool amd_mce_usable_address(struct mce *m)
+{
+ /* Check special northbridge case 3) first. */
+ if (!mce_flags.smca) {
+ if (legacy_mce_is_memory_error(m))
+ return true;
+ else if (m->bank == 4)
+ return false;
+ }
- return m->bank == 4 && xec == 0x8;
+ /* Check poison bit for all other bank types. */
+ if (m->status & MCI_STATUS_POISON)
+ return true;
+
+ /* Assume address is not usable for all others. */
+ return false;
}
static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 6f35f724cc14..0214d4232346 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -453,32 +453,22 @@ static void mce_irq_work_cb(struct irq_work *entry)
mce_schedule_work();
}
-/*
- * Check if the address reported by the CPU is in a format we can parse.
- * It would be possible to add code for most other cases, but all would
- * be somewhat complicated (e.g. segment offset would require an instruction
- * parser). So only support physical addresses up to page granularity for now.
- */
-int mce_usable_address(struct mce *m)
+bool mce_usable_address(struct mce *m)
{
if (!(m->status & MCI_STATUS_ADDRV))
- return 0;
-
- /* Checks after this one are Intel/Zhaoxin-specific: */
- if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL &&
- boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN)
- return 1;
-
- if (!(m->status & MCI_STATUS_MISCV))
- return 0;
+ return false;
- if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT)
- return 0;
+ switch (m->cpuvendor) {
+ case X86_VENDOR_AMD:
+ return amd_mce_usable_address(m);
- if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS)
- return 0;
+ case X86_VENDOR_INTEL:
+ case X86_VENDOR_ZHAOXIN:
+ return intel_mce_usable_address(m);
- return 1;
+ default:
+ return true;
+ }
}
EXPORT_SYMBOL_GPL(mce_usable_address);
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index f5323551c1a9..52bce533ddcc 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -536,3 +536,23 @@ bool intel_filter_mce(struct mce *m)
return false;
}
+
+/*
+ * Check if the address reported by the CPU is in a format we can parse.
+ * It would be possible to add code for most other cases, but all would
+ * be somewhat complicated (e.g. segment offset would require an instruction
+ * parser). So only support physical addresses up to page granularity for now.
+ */
+bool intel_mce_usable_address(struct mce *m)
+{
+ if (!(m->status & MCI_STATUS_MISCV))
+ return false;
+
+ if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT)
+ return false;
+
+ if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS)
+ return false;
+
+ return true;
+}
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index bcf1b3c66c9c..e13a26c9c0ac 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -49,6 +49,7 @@ void intel_init_cmci(void);
void intel_init_lmce(void);
void intel_clear_lmce(void);
bool intel_filter_mce(struct mce *m);
+bool intel_mce_usable_address(struct mce *m);
#else
# define cmci_intel_adjust_timer mce_adjust_timer_default
static inline bool mce_intel_cmci_poll(void) { return false; }
@@ -58,6 +59,7 @@ static inline void intel_init_cmci(void) { }
static inline void intel_init_lmce(void) { }
static inline void intel_clear_lmce(void) { }
static inline bool intel_filter_mce(struct mce *m) { return false; }
+static inline bool intel_mce_usable_address(struct mce *m) { return false; }
#endif
void mce_timer_kick(unsigned long interval);
@@ -210,6 +212,7 @@ extern bool filter_mce(struct mce *m);
#ifdef CONFIG_X86_MCE_AMD
extern bool amd_filter_mce(struct mce *m);
+bool amd_mce_usable_address(struct mce *m);
/*
* If MCA_CONFIG[McaLsbInStatusSupported] is set, extract ErrAddr in bits
@@ -237,6 +240,7 @@ static __always_inline void smca_extract_err_addr(struct mce *m)
#else
static inline bool amd_filter_mce(struct mce *m) { return false; }
+static inline bool amd_mce_usable_address(struct mce *m) { return false; }
static inline void smca_extract_err_addr(struct mce *m) { }
#endif
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index ded1fc7cb7cb..f136ac046851 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -30,15 +30,15 @@ struct rmid_entry {
struct list_head list;
};
-/**
- * @rmid_free_lru A least recently used list of free RMIDs
+/*
+ * @rmid_free_lru - A least recently used list of free RMIDs
* These RMIDs are guaranteed to have an occupancy less than the
* threshold occupancy
*/
static LIST_HEAD(rmid_free_lru);
-/**
- * @rmid_limbo_count count of currently unused but (potentially)
+/*
+ * @rmid_limbo_count - count of currently unused but (potentially)
* dirty RMIDs.
* This counts RMIDs that no one is currently using but that
* may have a occupancy value > resctrl_rmid_realloc_threshold. User can
@@ -46,7 +46,7 @@ static LIST_HEAD(rmid_free_lru);
*/
static unsigned int rmid_limbo_count;
-/**
+/*
* @rmid_entry - The entry in the limbo and free lists.
*/
static struct rmid_entry *rmid_ptrs;
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index a86d37052a64..a21a4d0ecc34 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -369,14 +369,15 @@ int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest)
EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpstate);
void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf,
- unsigned int size, u32 pkru)
+ unsigned int size, u64 xfeatures, u32 pkru)
{
struct fpstate *kstate = gfpu->fpstate;
union fpregs_state *ustate = buf;
struct membuf mb = { .p = buf, .left = size };
if (cpu_feature_enabled(X86_FEATURE_XSAVE)) {
- __copy_xstate_to_uabi_buf(mb, kstate, pkru, XSTATE_COPY_XSAVE);
+ __copy_xstate_to_uabi_buf(mb, kstate, xfeatures, pkru,
+ XSTATE_COPY_XSAVE);
} else {
memcpy(&ustate->fxsave, &kstate->regs.fxsave,
sizeof(ustate->fxsave));
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index cadf68737e6b..ef6906107c54 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1049,6 +1049,7 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
* __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
* @to: membuf descriptor
* @fpstate: The fpstate buffer from which to copy
+ * @xfeatures: The mask of xfeatures to save (XSAVE mode only)
* @pkru_val: The PKRU value to store in the PKRU component
* @copy_mode: The requested copy mode
*
@@ -1059,7 +1060,8 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
* It supports partial copy but @to.pos always starts from zero.
*/
void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
- u32 pkru_val, enum xstate_copy_mode copy_mode)
+ u64 xfeatures, u32 pkru_val,
+ enum xstate_copy_mode copy_mode)
{
const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr);
struct xregs_state *xinit = &init_fpstate.regs.xsave;
@@ -1083,7 +1085,7 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
break;
case XSTATE_COPY_XSAVE:
- header.xfeatures &= fpstate->user_xfeatures;
+ header.xfeatures &= fpstate->user_xfeatures & xfeatures;
break;
}
@@ -1185,6 +1187,7 @@ void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
enum xstate_copy_mode copy_mode)
{
__copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate,
+ tsk->thread.fpu.fpstate->user_xfeatures,
tsk->thread.pkru, copy_mode);
}
@@ -1536,10 +1539,7 @@ static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
fpregs_restore_userregs();
newfps->xfeatures = curfps->xfeatures | xfeatures;
-
- if (!guest_fpu)
- newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
-
+ newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
newfps->xfd = curfps->xfd & ~xfeatures;
/* Do the final updates within the locked region */
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index a4ecb04d8d64..3518fb26d06b 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -43,7 +43,8 @@ enum xstate_copy_mode {
struct membuf;
extern void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
- u32 pkru_val, enum xstate_copy_mode copy_mode);
+ u64 xfeatures, u32 pkru_val,
+ enum xstate_copy_mode copy_mode);
extern void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
enum xstate_copy_mode mode);
extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru);
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 30a55207c000..c20d1832c481 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -32,6 +32,7 @@
*/
static void init_8259A(int auto_eoi);
+static bool pcat_compat __ro_after_init;
static int i8259A_auto_eoi;
DEFINE_RAW_SPINLOCK(i8259A_lock);
@@ -299,15 +300,32 @@ static void unmask_8259A(void)
static int probe_8259A(void)
{
+ unsigned char new_val, probe_val = ~(1 << PIC_CASCADE_IR);
unsigned long flags;
- unsigned char probe_val = ~(1 << PIC_CASCADE_IR);
- unsigned char new_val;
+
+ /*
+ * If MADT has the PCAT_COMPAT flag set, then do not bother probing
+ * for the PIC. Some BIOSes leave the PIC uninitialized and probing
+ * fails.
+ *
+ * Right now this causes problems as quite some code depends on
+ * nr_legacy_irqs() > 0 or has_legacy_pic() == true. This is silly
+ * when the system has an IO/APIC because then PIC is not required
+ * at all, except for really old machines where the timer interrupt
+ * must be routed through the PIC. So just pretend that the PIC is
+ * there and let legacy_pic->init() initialize it for nothing.
+ *
+ * Alternatively this could just try to initialize the PIC and
+ * repeat the probe, but for cases where there is no PIC that's
+ * just pointless.
+ */
+ if (pcat_compat)
+ return nr_legacy_irqs();
+
/*
- * Check to see if we have a PIC.
- * Mask all except the cascade and read
- * back the value we just wrote. If we don't
- * have a PIC, we will read 0xff as opposed to the
- * value we wrote.
+ * Check to see if we have a PIC. Mask all except the cascade and
+ * read back the value we just wrote. If we don't have a PIC, we
+ * will read 0xff as opposed to the value we wrote.
*/
raw_spin_lock_irqsave(&i8259A_lock, flags);
@@ -429,5 +447,9 @@ static int __init i8259A_init_ops(void)
return 0;
}
-
device_initcall(i8259A_init_ops);
+
+void __init legacy_pic_pcat_compat(void)
+{
+ pcat_compat = true;
+}
diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c
index dcf325b7b022..ccb0915e84e1 100644
--- a/arch/x86/kernel/sev-shared.c
+++ b/arch/x86/kernel/sev-shared.c
@@ -632,6 +632,23 @@ fail:
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
}
+static enum es_result vc_insn_string_check(struct es_em_ctxt *ctxt,
+ unsigned long address,
+ bool write)
+{
+ if (user_mode(ctxt->regs) && fault_in_kernel_space(address)) {
+ ctxt->fi.vector = X86_TRAP_PF;
+ ctxt->fi.error_code = X86_PF_USER;
+ ctxt->fi.cr2 = address;
+ if (write)
+ ctxt->fi.error_code |= X86_PF_WRITE;
+
+ return ES_EXCEPTION;
+ }
+
+ return ES_OK;
+}
+
static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt,
void *src, char *buf,
unsigned int data_size,
@@ -639,7 +656,12 @@ static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt,
bool backwards)
{
int i, b = backwards ? -1 : 1;
- enum es_result ret = ES_OK;
+ unsigned long address = (unsigned long)src;
+ enum es_result ret;
+
+ ret = vc_insn_string_check(ctxt, address, false);
+ if (ret != ES_OK)
+ return ret;
for (i = 0; i < count; i++) {
void *s = src + (i * data_size * b);
@@ -660,7 +682,12 @@ static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt,
bool backwards)
{
int i, s = backwards ? -1 : 1;
- enum es_result ret = ES_OK;
+ unsigned long address = (unsigned long)dst;
+ enum es_result ret;
+
+ ret = vc_insn_string_check(ctxt, address, true);
+ if (ret != ES_OK)
+ return ret;
for (i = 0; i < count; i++) {
void *d = dst + (i * data_size * s);
@@ -696,6 +723,9 @@ static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt,
static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
{
struct insn *insn = &ctxt->insn;
+ size_t size;
+ u64 port;
+
*exitinfo = 0;
switch (insn->opcode.bytes[0]) {
@@ -704,7 +734,7 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
case 0x6d:
*exitinfo |= IOIO_TYPE_INS;
*exitinfo |= IOIO_SEG_ES;
- *exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
+ port = ctxt->regs->dx & 0xffff;
break;
/* OUTS opcodes */
@@ -712,41 +742,43 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
case 0x6f:
*exitinfo |= IOIO_TYPE_OUTS;
*exitinfo |= IOIO_SEG_DS;
- *exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
+ port = ctxt->regs->dx & 0xffff;
break;
/* IN immediate opcodes */
case 0xe4:
case 0xe5:
*exitinfo |= IOIO_TYPE_IN;
- *exitinfo |= (u8)insn->immediate.value << 16;
+ port = (u8)insn->immediate.value & 0xffff;
break;
/* OUT immediate opcodes */
case 0xe6:
case 0xe7:
*exitinfo |= IOIO_TYPE_OUT;
- *exitinfo |= (u8)insn->immediate.value << 16;
+ port = (u8)insn->immediate.value & 0xffff;
break;
/* IN register opcodes */
case 0xec:
case 0xed:
*exitinfo |= IOIO_TYPE_IN;
- *exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
+ port = ctxt->regs->dx & 0xffff;
break;
/* OUT register opcodes */
case 0xee:
case 0xef:
*exitinfo |= IOIO_TYPE_OUT;
- *exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
+ port = ctxt->regs->dx & 0xffff;
break;
default:
return ES_DECODE_FAILED;
}
+ *exitinfo |= port << 16;
+
switch (insn->opcode.bytes[0]) {
case 0x6c:
case 0x6e:
@@ -756,12 +788,15 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
case 0xee:
/* Single byte opcodes */
*exitinfo |= IOIO_DATA_8;
+ size = 1;
break;
default:
/* Length determined by instruction parsing */
*exitinfo |= (insn->opnd_bytes == 2) ? IOIO_DATA_16
: IOIO_DATA_32;
+ size = (insn->opnd_bytes == 2) ? 2 : 4;
}
+
switch (insn->addr_bytes) {
case 2:
*exitinfo |= IOIO_ADDR_16;
@@ -777,7 +812,7 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
if (insn_has_rep_prefix(insn))
*exitinfo |= IOIO_REP;
- return ES_OK;
+ return vc_ioio_check(ctxt, (u16)port, size);
}
static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index d8c1e3be74c0..6395bfd87b68 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -524,6 +524,33 @@ static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt
return ES_OK;
}
+static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size)
+{
+ BUG_ON(size > 4);
+
+ if (user_mode(ctxt->regs)) {
+ struct thread_struct *t = &current->thread;
+ struct io_bitmap *iobm = t->io_bitmap;
+ size_t idx;
+
+ if (!iobm)
+ goto fault;
+
+ for (idx = port; idx < port + size; ++idx) {
+ if (test_bit(idx, iobm->bitmap))
+ goto fault;
+ }
+ }
+
+ return ES_OK;
+
+fault:
+ ctxt->fi.vector = X86_TRAP_GP;
+ ctxt->fi.error_code = 0;
+
+ return ES_EXCEPTION;
+}
+
/* Include code shared with pre-decompression boot stage */
#include "sev-shared.c"
@@ -1508,6 +1535,9 @@ static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
return ES_DECODE_FAILED;
}
+ if (user_mode(ctxt->regs))
+ return ES_UNSUPPORTED;
+
switch (mmio) {
case INSN_MMIO_WRITE:
memcpy(ghcb->shared_buffer, reg_data, bytes);
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 6eb06d001bcc..96a771f9f930 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -131,7 +131,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
}
/*
- * Disable virtualization, APIC etc. and park the CPU in a HLT loop
+ * this function calls the 'stop' function on all other CPUs in the system.
*/
DEFINE_IDTENTRY_SYSVEC(sysvec_reboot)
{
@@ -172,17 +172,13 @@ static void native_stop_other_cpus(int wait)
* 2) Wait for all other CPUs to report that they reached the
* HLT loop in stop_this_cpu()
*
- * 3) If the system uses INIT/STARTUP for CPU bringup, then
- * send all present CPUs an INIT vector, which brings them
- * completely out of the way.
+ * 3) If #2 timed out send an NMI to the CPUs which did not
+ * yet report
*
- * 4) If #3 is not possible and #2 timed out send an NMI to the
- * CPUs which did not yet report
- *
- * 5) Wait for all other CPUs to report that they reached the
+ * 4) Wait for all other CPUs to report that they reached the
* HLT loop in stop_this_cpu()
*
- * #4 can obviously race against a CPU reaching the HLT loop late.
+ * #3 can obviously race against a CPU reaching the HLT loop late.
* That CPU will have reported already and the "have all CPUs
* reached HLT" condition will be true despite the fact that the
* other CPU is still handling the NMI. Again, there is no
@@ -198,7 +194,7 @@ static void native_stop_other_cpus(int wait)
/*
* Don't wait longer than a second for IPI completion. The
* wait request is not checked here because that would
- * prevent an NMI/INIT shutdown in case that not all
+ * prevent an NMI shutdown attempt in case that not all
* CPUs reach shutdown state.
*/
timeout = USEC_PER_SEC;
@@ -206,27 +202,7 @@ static void native_stop_other_cpus(int wait)
udelay(1);
}
- /*
- * Park all other CPUs in INIT including "offline" CPUs, if
- * possible. That's a safe place where they can't resume execution
- * of HLT and then execute the HLT loop from overwritten text or
- * page tables.
- *
- * The only downside is a broadcast MCE, but up to the point where
- * the kexec() kernel brought all APs online again an MCE will just
- * make HLT resume and handle the MCE. The machine crashes and burns
- * due to overwritten text, page tables and data. So there is a
- * choice between fire and frying pan. The result is pretty much
- * the same. Chose frying pan until x86 provides a sane mechanism
- * to park a CPU.
- */
- if (smp_park_other_cpus_in_init())
- goto done;
-
- /*
- * If park with INIT was not possible and the REBOOT_VECTOR didn't
- * take all secondary CPUs offline, try with the NMI.
- */
+ /* if the REBOOT_VECTOR didn't work, try with the NMI */
if (!cpumask_empty(&cpus_stop_mask)) {
/*
* If NMI IPI is enabled, try to register the stop handler
@@ -249,7 +225,6 @@ static void native_stop_other_cpus(int wait)
udelay(1);
}
-done:
local_irq_save(flags);
disable_local_APIC();
mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 48e040618731..2a187c0cbd5b 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1240,33 +1240,6 @@ void arch_thaw_secondary_cpus_end(void)
cache_aps_init();
}
-bool smp_park_other_cpus_in_init(void)
-{
- unsigned int cpu, this_cpu = smp_processor_id();
- unsigned int apicid;
-
- if (apic->wakeup_secondary_cpu_64 || apic->wakeup_secondary_cpu)
- return false;
-
- /*
- * If this is a crash stop which does not execute on the boot CPU,
- * then this cannot use the INIT mechanism because INIT to the boot
- * CPU will reset the machine.
- */
- if (this_cpu)
- return false;
-
- for_each_cpu_and(cpu, &cpus_booted_once_mask, cpu_present_mask) {
- if (cpu == this_cpu)
- continue;
- apicid = apic->cpu_present_to_apicid(cpu);
- if (apicid == BAD_APICID)
- continue;
- send_init_sequence(apicid);
- }
- return true;
-}
-
/*
* Early setup to make printk work.
*/
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index ca004e2e4469..0bab03130033 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -54,7 +54,7 @@ void arch_unregister_cpu(int num)
EXPORT_SYMBOL(arch_unregister_cpu);
#else /* CONFIG_HOTPLUG_CPU */
-static int __init arch_register_cpu(int num)
+int __init arch_register_cpu(int num)
{
return register_cpu(&per_cpu(cpu_devices, num).cpu, num);
}
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index bbc440c93e08..1123ef3ccf90 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -15,6 +15,7 @@
* ( The serial nature of the boot logic and the CPU hotplug lock
* protects against more than 2 CPUs entering this code. )
*/
+#include <linux/workqueue.h>
#include <linux/topology.h>
#include <linux/spinlock.h>
#include <linux/kernel.h>
@@ -342,6 +343,13 @@ static inline unsigned int loop_timeout(int cpu)
return (cpumask_weight(topology_core_cpumask(cpu)) > 1) ? 2 : 20;
}
+static void tsc_sync_mark_tsc_unstable(struct work_struct *work)
+{
+ mark_tsc_unstable("check_tsc_sync_source failed");
+}
+
+static DECLARE_WORK(tsc_sync_work, tsc_sync_mark_tsc_unstable);
+
/*
* The freshly booted CPU initiates this via an async SMP function call.
*/
@@ -395,7 +403,7 @@ retry:
"turning off TSC clock.\n", max_warp);
if (random_warps)
pr_warn("TSC warped randomly between CPUs\n");
- mark_tsc_unstable("check_tsc_sync_source failed");
+ schedule_work(&tsc_sync_work);
}
/*
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index f15fb71f280e..54a5596adaa6 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -139,10 +139,7 @@ SECTIONS
STATIC_CALL_TEXT
ALIGN_ENTRY_TEXT_BEGIN
-#ifdef CONFIG_CPU_SRSO
*(.text..__x86.rethunk_untrain)
-#endif
-
ENTRY_TEXT
#ifdef CONFIG_CPU_SRSO
@@ -520,12 +517,12 @@ INIT_PER_CPU(irq_stack_backing_store);
"fixed_percpu_data is not at start of per-cpu area");
#endif
-#ifdef CONFIG_RETHUNK
+#ifdef CONFIG_CPU_UNRET_ENTRY
. = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned");
-. = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned");
#endif
#ifdef CONFIG_CPU_SRSO
+. = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned");
/*
* GNU ld cannot do XOR until 2.41.
* https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=f6f78318fca803c4907fb8d7f6ded8295f1947b1
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 0544e30b4946..773132c3bf5a 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -360,14 +360,6 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
vcpu->arch.guest_supported_xcr0 =
cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
- /*
- * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if
- * XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't
- * supported by the host.
- */
- vcpu->arch.guest_fpu.fpstate->user_xfeatures = vcpu->arch.guest_supported_xcr0 |
- XFEATURE_MASK_FPSSE;
-
kvm_update_pv_runtime(vcpu);
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index dcd60b39e794..3e977dbbf993 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2759,13 +2759,17 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
{
u32 reg = kvm_lapic_get_reg(apic, lvt_type);
int vector, mode, trig_mode;
+ int r;
if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
vector = reg & APIC_VECTOR_MASK;
mode = reg & APIC_MODE_MASK;
trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
- return __apic_accept_irq(apic, mode, vector, 1, trig_mode,
- NULL);
+
+ r = __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL);
+ if (r && lvt_type == APIC_LVTPC)
+ kvm_lapic_set_reg(apic, APIC_LVTPC, reg | APIC_LVT_MASKED);
+ return r;
}
return 0;
}
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index edb89b51b383..9ae07db6f0f6 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -93,14 +93,6 @@ void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops)
#undef __KVM_X86_PMU_OP
}
-static void kvm_pmi_trigger_fn(struct irq_work *irq_work)
-{
- struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu, irq_work);
- struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
-
- kvm_pmu_deliver_pmi(vcpu);
-}
-
static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
{
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
@@ -124,20 +116,7 @@ static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
}
- if (!pmc->intr || skip_pmi)
- return;
-
- /*
- * Inject PMI. If vcpu was in a guest mode during NMI PMI
- * can be ejected on a guest mode re-entry. Otherwise we can't
- * be sure that vcpu wasn't executing hlt instruction at the
- * time of vmexit and is not going to re-enter guest mode until
- * woken up. So we should wake it, but this is impossible from
- * NMI context. Do it from irq work instead.
- */
- if (in_pmi && !kvm_handling_nmi_from_guest(pmc->vcpu))
- irq_work_queue(&pmc_to_pmu(pmc)->irq_work);
- else
+ if (pmc->intr && !skip_pmi)
kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
}
@@ -675,9 +654,6 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
void kvm_pmu_reset(struct kvm_vcpu *vcpu)
{
- struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
-
- irq_work_sync(&pmu->irq_work);
static_call(kvm_x86_pmu_reset)(vcpu);
}
@@ -687,7 +663,6 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu)
memset(pmu, 0, sizeof(*pmu));
static_call(kvm_x86_pmu_init)(vcpu);
- init_irq_work(&pmu->irq_work, kvm_pmi_trigger_fn);
pmu->event_count = 0;
pmu->need_cleanup = false;
kvm_pmu_refresh(vcpu);
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 7d9ba301c090..1d64113de488 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -74,6 +74,12 @@ static inline u64 pmc_read_counter(struct kvm_pmc *pmc)
return counter & pmc_bitmask(pmc);
}
+static inline void pmc_write_counter(struct kvm_pmc *pmc, u64 val)
+{
+ pmc->counter += val - pmc_read_counter(pmc);
+ pmc->counter &= pmc_bitmask(pmc);
+}
+
static inline void pmc_release_perf_event(struct kvm_pmc *pmc)
{
if (pmc->perf_event) {
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 2092db892d7d..4b74ea91f4e6 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -529,8 +529,11 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
WARN_ONCE(1, "Invalid backing page\n");
break;
+ case AVIC_IPI_FAILURE_INVALID_IPI_VECTOR:
+ /* Invalid IPI with vector < 16 */
+ break;
default:
- pr_err("Unknown IPI interception\n");
+ vcpu_unimpl(vcpu, "Unknown avic incomplete IPI interception\n");
}
return 1;
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index dd496c9e5f91..3fea8c47679e 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -1253,6 +1253,9 @@ void svm_leave_nested(struct kvm_vcpu *vcpu)
nested_svm_uninit_mmu_context(vcpu);
vmcb_mark_all_dirty(svm->vmcb);
+
+ if (kvm_apicv_activated(vcpu->kvm))
+ kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
}
kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index cef5a3d0abd0..373ff6a6687b 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -160,7 +160,7 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
/* MSR_PERFCTRn */
pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER);
if (pmc) {
- pmc->counter += data - pmc_read_counter(pmc);
+ pmc_write_counter(pmc, data);
pmc_update_sample_period(pmc);
return 0;
}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 9507df93f410..beea99c8e8e0 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -691,7 +691,7 @@ static int svm_hardware_enable(void)
*/
if (boot_cpu_has(X86_FEATURE_V_TSC_AUX)) {
struct sev_es_save_area *hostsa;
- u32 msr_hi;
+ u32 __maybe_unused msr_hi;
hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400);
@@ -913,8 +913,7 @@ void svm_set_x2apic_msr_interception(struct vcpu_svm *svm, bool intercept)
if (intercept == svm->x2avic_msrs_intercepted)
return;
- if (!x2avic_enabled ||
- !apic_x2apic_mode(svm->vcpu.arch.apic))
+ if (!x2avic_enabled)
return;
for (i = 0; i < MAX_DIRECT_ACCESS_MSRS; i++) {
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index f2efa0bf7ae8..820d3e1f6b4f 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -436,11 +436,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!msr_info->host_initiated &&
!(msr & MSR_PMC_FULL_WIDTH_BIT))
data = (s64)(s32)data;
- pmc->counter += data - pmc_read_counter(pmc);
+ pmc_write_counter(pmc, data);
pmc_update_sample_period(pmc);
break;
} else if ((pmc = get_fixed_pmc(pmu, msr))) {
- pmc->counter += data - pmc_read_counter(pmc);
+ pmc_write_counter(pmc, data);
pmc_update_sample_period(pmc);
break;
} else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9f18b06bbda6..41cce5031126 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5382,26 +5382,37 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
return 0;
}
-static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
- struct kvm_xsave *guest_xsave)
-{
- if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
- return;
-
- fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu,
- guest_xsave->region,
- sizeof(guest_xsave->region),
- vcpu->arch.pkru);
-}
static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
u8 *state, unsigned int size)
{
+ /*
+ * Only copy state for features that are enabled for the guest. The
+ * state itself isn't problematic, but setting bits in the header for
+ * features that are supported in *this* host but not exposed to the
+ * guest can result in KVM_SET_XSAVE failing when live migrating to a
+ * compatible host without the features that are NOT exposed to the
+ * guest.
+ *
+ * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if
+ * XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't
+ * supported by the host.
+ */
+ u64 supported_xcr0 = vcpu->arch.guest_supported_xcr0 |
+ XFEATURE_MASK_FPSSE;
+
if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
return;
- fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu,
- state, size, vcpu->arch.pkru);
+ fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size,
+ supported_xcr0, vcpu->arch.pkru);
+}
+
+static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
+ struct kvm_xsave *guest_xsave)
+{
+ return kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region,
+ sizeof(guest_xsave->region));
}
static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
@@ -12843,6 +12854,9 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
return true;
#endif
+ if (kvm_test_request(KVM_REQ_PMI, vcpu))
+ return true;
+
if (kvm_arch_interrupt_allowed(vcpu) &&
(kvm_cpu_has_interrupt(vcpu) ||
kvm_guest_apic_has_interrupt(vcpu)))
diff --git a/arch/x86/lib/copy_mc.c b/arch/x86/lib/copy_mc.c
index 80efd45a7761..6e8b7e600def 100644
--- a/arch/x86/lib/copy_mc.c
+++ b/arch/x86/lib/copy_mc.c
@@ -70,23 +70,23 @@ unsigned long __must_check copy_mc_to_kernel(void *dst, const void *src, unsigne
}
EXPORT_SYMBOL_GPL(copy_mc_to_kernel);
-unsigned long __must_check copy_mc_to_user(void *dst, const void *src, unsigned len)
+unsigned long __must_check copy_mc_to_user(void __user *dst, const void *src, unsigned len)
{
unsigned long ret;
if (copy_mc_fragile_enabled) {
__uaccess_begin();
- ret = copy_mc_fragile(dst, src, len);
+ ret = copy_mc_fragile((__force void *)dst, src, len);
__uaccess_end();
return ret;
}
if (static_cpu_has(X86_FEATURE_ERMS)) {
__uaccess_begin();
- ret = copy_mc_enhanced_fast_string(dst, src, len);
+ ret = copy_mc_enhanced_fast_string((__force void *)dst, src, len);
__uaccess_end();
return ret;
}
- return copy_user_generic(dst, src, len);
+ return copy_user_generic((__force void *)dst, src, len);
}
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index cd86aeb5fdd3..a48077c5ca61 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -126,11 +126,19 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array)
#include <asm/GEN-for-each-reg.h>
#undef GEN
#endif
+
+#ifdef CONFIG_RETHUNK
+
/*
- * This function name is magical and is used by -mfunction-return=thunk-extern
- * for the compiler to generate JMPs to it.
+ * Be careful here: that label cannot really be removed because in
+ * some configurations and toolchains, the JMP __x86_return_thunk the
+ * compiler issues is either a short one or the compiler doesn't use
+ * relocations for same-section JMPs and that breaks the returns
+ * detection logic in apply_returns() and in objtool.
*/
-#ifdef CONFIG_RETHUNK
+ .section .text..__x86.return_thunk
+
+#ifdef CONFIG_CPU_SRSO
/*
* srso_alias_untrain_ret() and srso_alias_safe_ret() are placed at
@@ -147,29 +155,18 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array)
*
* As a result, srso_alias_safe_ret() becomes a safe return.
*/
-#ifdef CONFIG_CPU_SRSO
- .section .text..__x86.rethunk_untrain
-
-SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+ .pushsection .text..__x86.rethunk_untrain
+SYM_CODE_START_NOALIGN(srso_alias_untrain_ret)
UNWIND_HINT_FUNC
ANNOTATE_NOENDBR
ASM_NOP2
lfence
jmp srso_alias_return_thunk
SYM_FUNC_END(srso_alias_untrain_ret)
-__EXPORT_THUNK(srso_alias_untrain_ret)
-
- .section .text..__x86.rethunk_safe
-#else
-/* dummy definition for alternatives */
-SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
- ANNOTATE_UNRET_SAFE
- ret
- int3
-SYM_FUNC_END(srso_alias_untrain_ret)
-#endif
+ .popsection
-SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE)
+ .pushsection .text..__x86.rethunk_safe
+SYM_CODE_START_NOALIGN(srso_alias_safe_ret)
lea 8(%_ASM_SP), %_ASM_SP
UNWIND_HINT_FUNC
ANNOTATE_UNRET_SAFE
@@ -177,14 +174,63 @@ SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE)
int3
SYM_FUNC_END(srso_alias_safe_ret)
- .section .text..__x86.return_thunk
-
-SYM_CODE_START(srso_alias_return_thunk)
+SYM_CODE_START_NOALIGN(srso_alias_return_thunk)
UNWIND_HINT_FUNC
ANNOTATE_NOENDBR
call srso_alias_safe_ret
ud2
SYM_CODE_END(srso_alias_return_thunk)
+ .popsection
+
+/*
+ * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret()
+ * above. On kernel entry, srso_untrain_ret() is executed which is a
+ *
+ * movabs $0xccccc30824648d48,%rax
+ *
+ * and when the return thunk executes the inner label srso_safe_ret()
+ * later, it is a stack manipulation and a RET which is mispredicted and
+ * thus a "safe" one to use.
+ */
+ .align 64
+ .skip 64 - (srso_safe_ret - srso_untrain_ret), 0xcc
+SYM_CODE_START_LOCAL_NOALIGN(srso_untrain_ret)
+ ANNOTATE_NOENDBR
+ .byte 0x48, 0xb8
+
+/*
+ * This forces the function return instruction to speculate into a trap
+ * (UD2 in srso_return_thunk() below). This RET will then mispredict
+ * and execution will continue at the return site read from the top of
+ * the stack.
+ */
+SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL)
+ lea 8(%_ASM_SP), %_ASM_SP
+ ret
+ int3
+ int3
+ /* end of movabs */
+ lfence
+ call srso_safe_ret
+ ud2
+SYM_CODE_END(srso_safe_ret)
+SYM_FUNC_END(srso_untrain_ret)
+
+SYM_CODE_START(srso_return_thunk)
+ UNWIND_HINT_FUNC
+ ANNOTATE_NOENDBR
+ call srso_safe_ret
+ ud2
+SYM_CODE_END(srso_return_thunk)
+
+#define JMP_SRSO_UNTRAIN_RET "jmp srso_untrain_ret"
+#define JMP_SRSO_ALIAS_UNTRAIN_RET "jmp srso_alias_untrain_ret"
+#else /* !CONFIG_CPU_SRSO */
+#define JMP_SRSO_UNTRAIN_RET "ud2"
+#define JMP_SRSO_ALIAS_UNTRAIN_RET "ud2"
+#endif /* CONFIG_CPU_SRSO */
+
+#ifdef CONFIG_CPU_UNRET_ENTRY
/*
* Some generic notes on the untraining sequences:
@@ -216,7 +262,7 @@ SYM_CODE_END(srso_alias_return_thunk)
*/
.align 64
.skip 64 - (retbleed_return_thunk - retbleed_untrain_ret), 0xcc
-SYM_START(retbleed_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+SYM_CODE_START_LOCAL_NOALIGN(retbleed_untrain_ret)
ANNOTATE_NOENDBR
/*
* As executed from retbleed_untrain_ret, this is:
@@ -264,72 +310,27 @@ SYM_CODE_END(retbleed_return_thunk)
jmp retbleed_return_thunk
int3
SYM_FUNC_END(retbleed_untrain_ret)
-__EXPORT_THUNK(retbleed_untrain_ret)
-/*
- * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret()
- * above. On kernel entry, srso_untrain_ret() is executed which is a
- *
- * movabs $0xccccc30824648d48,%rax
- *
- * and when the return thunk executes the inner label srso_safe_ret()
- * later, it is a stack manipulation and a RET which is mispredicted and
- * thus a "safe" one to use.
- */
- .align 64
- .skip 64 - (srso_safe_ret - srso_untrain_ret), 0xcc
-SYM_START(srso_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
- ANNOTATE_NOENDBR
- .byte 0x48, 0xb8
+#define JMP_RETBLEED_UNTRAIN_RET "jmp retbleed_untrain_ret"
+#else /* !CONFIG_CPU_UNRET_ENTRY */
+#define JMP_RETBLEED_UNTRAIN_RET "ud2"
+#endif /* CONFIG_CPU_UNRET_ENTRY */
-/*
- * This forces the function return instruction to speculate into a trap
- * (UD2 in srso_return_thunk() below). This RET will then mispredict
- * and execution will continue at the return site read from the top of
- * the stack.
- */
-SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL)
- lea 8(%_ASM_SP), %_ASM_SP
- ret
- int3
- int3
- /* end of movabs */
- lfence
- call srso_safe_ret
- ud2
-SYM_CODE_END(srso_safe_ret)
-SYM_FUNC_END(srso_untrain_ret)
-__EXPORT_THUNK(srso_untrain_ret)
-
-SYM_CODE_START(srso_return_thunk)
- UNWIND_HINT_FUNC
- ANNOTATE_NOENDBR
- call srso_safe_ret
- ud2
-SYM_CODE_END(srso_return_thunk)
+#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO)
SYM_FUNC_START(entry_untrain_ret)
- ALTERNATIVE_2 "jmp retbleed_untrain_ret", \
- "jmp srso_untrain_ret", X86_FEATURE_SRSO, \
- "jmp srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS
+ ALTERNATIVE_2 JMP_RETBLEED_UNTRAIN_RET, \
+ JMP_SRSO_UNTRAIN_RET, X86_FEATURE_SRSO, \
+ JMP_SRSO_ALIAS_UNTRAIN_RET, X86_FEATURE_SRSO_ALIAS
SYM_FUNC_END(entry_untrain_ret)
__EXPORT_THUNK(entry_untrain_ret)
-SYM_CODE_START(__x86_return_thunk)
- UNWIND_HINT_FUNC
- ANNOTATE_NOENDBR
- ANNOTATE_UNRET_SAFE
- ret
- int3
-SYM_CODE_END(__x86_return_thunk)
-EXPORT_SYMBOL(__x86_return_thunk)
-
-#endif /* CONFIG_RETHUNK */
+#endif /* CONFIG_CPU_UNRET_ENTRY || CONFIG_CPU_SRSO */
#ifdef CONFIG_CALL_DEPTH_TRACKING
.align 64
-SYM_FUNC_START(__x86_return_skl)
+SYM_FUNC_START(call_depth_return_thunk)
ANNOTATE_NOENDBR
/*
* Keep the hotpath in a 16byte I-fetch for the non-debug
@@ -356,6 +357,33 @@ SYM_FUNC_START(__x86_return_skl)
ANNOTATE_UNRET_SAFE
ret
int3
-SYM_FUNC_END(__x86_return_skl)
+SYM_FUNC_END(call_depth_return_thunk)
#endif /* CONFIG_CALL_DEPTH_TRACKING */
+
+/*
+ * This function name is magical and is used by -mfunction-return=thunk-extern
+ * for the compiler to generate JMPs to it.
+ *
+ * This code is only used during kernel boot or module init. All
+ * 'JMP __x86_return_thunk' sites are changed to something else by
+ * apply_returns().
+ *
+ * This should be converted eventually to call a warning function which
+ * should scream loudly when the default return thunk is called after
+ * alternatives have been applied.
+ *
+ * That warning function cannot BUG() because the bug splat cannot be
+ * displayed in all possible configurations, leading to users not really
+ * knowing why the machine froze.
+ */
+SYM_CODE_START(__x86_return_thunk)
+ UNWIND_HINT_FUNC
+ ANNOTATE_NOENDBR
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
+SYM_CODE_END(__x86_return_thunk)
+EXPORT_SYMBOL(__x86_return_thunk)
+
+#endif /* CONFIG_RETHUNK */
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 78414c6d1b5e..5dd733944629 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -69,6 +69,7 @@ static void __init pti_print_if_secure(const char *reason)
pr_info("%s\n", reason);
}
+/* Assume mode is auto unless overridden via cmdline below. */
static enum pti_mode {
PTI_AUTO = 0,
PTI_FORCE_OFF,
@@ -77,50 +78,49 @@ static enum pti_mode {
void __init pti_check_boottime_disable(void)
{
- char arg[5];
- int ret;
-
- /* Assume mode is auto unless overridden. */
- pti_mode = PTI_AUTO;
-
if (hypervisor_is_type(X86_HYPER_XEN_PV)) {
pti_mode = PTI_FORCE_OFF;
pti_print_if_insecure("disabled on XEN PV.");
return;
}
- ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg));
- if (ret > 0) {
- if (ret == 3 && !strncmp(arg, "off", 3)) {
- pti_mode = PTI_FORCE_OFF;
- pti_print_if_insecure("disabled on command line.");
- return;
- }
- if (ret == 2 && !strncmp(arg, "on", 2)) {
- pti_mode = PTI_FORCE_ON;
- pti_print_if_secure("force enabled on command line.");
- goto enable;
- }
- if (ret == 4 && !strncmp(arg, "auto", 4)) {
- pti_mode = PTI_AUTO;
- goto autosel;
- }
- }
-
- if (cmdline_find_option_bool(boot_command_line, "nopti") ||
- cpu_mitigations_off()) {
+ if (cpu_mitigations_off())
pti_mode = PTI_FORCE_OFF;
+ if (pti_mode == PTI_FORCE_OFF) {
pti_print_if_insecure("disabled on command line.");
return;
}
-autosel:
- if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
+ if (pti_mode == PTI_FORCE_ON)
+ pti_print_if_secure("force enabled on command line.");
+
+ if (pti_mode == PTI_AUTO && !boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
return;
-enable:
+
setup_force_cpu_cap(X86_FEATURE_PTI);
}
+static int __init pti_parse_cmdline(char *arg)
+{
+ if (!strcmp(arg, "off"))
+ pti_mode = PTI_FORCE_OFF;
+ else if (!strcmp(arg, "on"))
+ pti_mode = PTI_FORCE_ON;
+ else if (!strcmp(arg, "auto"))
+ pti_mode = PTI_AUTO;
+ else
+ return -EINVAL;
+ return 0;
+}
+early_param("pti", pti_parse_cmdline);
+
+static int __init pti_parse_cmdline_nopti(char *arg)
+{
+ pti_mode = PTI_FORCE_OFF;
+ return 0;
+}
+early_param("nopti", pti_parse_cmdline_nopti);
+
pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
{
/*