summaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2024-04-02 19:26:15 +0300
committerPaolo Bonzini <pbonzini@redhat.com>2024-04-02 19:26:15 +0300
commit52b761b48f8e23399fafe3834a173c990357b8de (patch)
tree06f7fe191b277dde269ecb11ff3e089da6f2aff9 /arch/x86/kernel
parent0d1756482e66f326eb65fe08eed24ce2efabb168 (diff)
parentd96c66ab9fb3ad8b243669cf6b41e68d0f7f9ecd (diff)
downloadlinux-52b761b48f8e23399fafe3834a173c990357b8de.tar.xz
Merge tag 'kvmarm-fixes-6.9-1' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm64 fixes for 6.9, part #1 - Ensure perf events programmed to count during guest execution are actually enabled before entering the guest in the nVHE configuration. - Restore out-of-range handler for stage-2 translation faults. - Several fixes to stage-2 TLB invalidations to avoid stale translations, possibly including partial walk caches. - Fix early handling of architectural VHE-only systems to ensure E2H is appropriately set. - Correct a format specifier warning in the arch_timer selftest. - Make the KVM banner message correctly handle all of the possible configurations.
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/cpu/common.c9
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c93
-rw-r--r--arch/x86/kernel/cpu/topology.c11
-rw-r--r--arch/x86/kernel/cpu/topology_common.c12
-rw-r--r--arch/x86/kernel/e820.c17
-rw-r--r--arch/x86/kernel/fpu/xstate.c5
-rw-r--r--arch/x86/kernel/fpu/xstate.h14
-rw-r--r--arch/x86/kernel/head64.c18
-rw-r--r--arch/x86/kernel/kprobes/core.c11
-rw-r--r--arch/x86/kernel/mpparse.c10
-rw-r--r--arch/x86/kernel/setup.c10
-rw-r--r--arch/x86/kernel/smpboot.c32
12 files changed, 114 insertions, 128 deletions
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index ba8cf5e9ce56..5c1e6d6be267 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -2307,6 +2307,8 @@ void arch_smt_update(void)
void __init arch_cpu_finalize_init(void)
{
+ struct cpuinfo_x86 *c = this_cpu_ptr(&cpu_info);
+
identify_boot_cpu();
select_idle_routine();
@@ -2345,6 +2347,13 @@ void __init arch_cpu_finalize_init(void)
fpu__init_system();
fpu__init_cpu();
+ /*
+ * Ensure that access to the per CPU representation has the initial
+ * boot CPU configuration.
+ */
+ *c = boot_cpu_data;
+ c->initialized = true;
+
alternative_instructions();
if (IS_ENABLED(CONFIG_X86_64)) {
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 303fef824167..e0fd57a8ba84 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -45,70 +45,70 @@ bool hyperv_paravisor_present __ro_after_init;
EXPORT_SYMBOL_GPL(hyperv_paravisor_present);
#if IS_ENABLED(CONFIG_HYPERV)
-static inline unsigned int hv_get_nested_reg(unsigned int reg)
+static inline unsigned int hv_get_nested_msr(unsigned int reg)
{
- if (hv_is_sint_reg(reg))
- return reg - HV_REGISTER_SINT0 + HV_REGISTER_NESTED_SINT0;
+ if (hv_is_sint_msr(reg))
+ return reg - HV_X64_MSR_SINT0 + HV_X64_MSR_NESTED_SINT0;
switch (reg) {
- case HV_REGISTER_SIMP:
- return HV_REGISTER_NESTED_SIMP;
- case HV_REGISTER_SIEFP:
- return HV_REGISTER_NESTED_SIEFP;
- case HV_REGISTER_SVERSION:
- return HV_REGISTER_NESTED_SVERSION;
- case HV_REGISTER_SCONTROL:
- return HV_REGISTER_NESTED_SCONTROL;
- case HV_REGISTER_EOM:
- return HV_REGISTER_NESTED_EOM;
+ case HV_X64_MSR_SIMP:
+ return HV_X64_MSR_NESTED_SIMP;
+ case HV_X64_MSR_SIEFP:
+ return HV_X64_MSR_NESTED_SIEFP;
+ case HV_X64_MSR_SVERSION:
+ return HV_X64_MSR_NESTED_SVERSION;
+ case HV_X64_MSR_SCONTROL:
+ return HV_X64_MSR_NESTED_SCONTROL;
+ case HV_X64_MSR_EOM:
+ return HV_X64_MSR_NESTED_EOM;
default:
return reg;
}
}
-u64 hv_get_non_nested_register(unsigned int reg)
+u64 hv_get_non_nested_msr(unsigned int reg)
{
u64 value;
- if (hv_is_synic_reg(reg) && ms_hyperv.paravisor_present)
+ if (hv_is_synic_msr(reg) && ms_hyperv.paravisor_present)
hv_ivm_msr_read(reg, &value);
else
rdmsrl(reg, value);
return value;
}
-EXPORT_SYMBOL_GPL(hv_get_non_nested_register);
+EXPORT_SYMBOL_GPL(hv_get_non_nested_msr);
-void hv_set_non_nested_register(unsigned int reg, u64 value)
+void hv_set_non_nested_msr(unsigned int reg, u64 value)
{
- if (hv_is_synic_reg(reg) && ms_hyperv.paravisor_present) {
+ if (hv_is_synic_msr(reg) && ms_hyperv.paravisor_present) {
hv_ivm_msr_write(reg, value);
/* Write proxy bit via wrmsl instruction */
- if (hv_is_sint_reg(reg))
+ if (hv_is_sint_msr(reg))
wrmsrl(reg, value | 1 << 20);
} else {
wrmsrl(reg, value);
}
}
-EXPORT_SYMBOL_GPL(hv_set_non_nested_register);
+EXPORT_SYMBOL_GPL(hv_set_non_nested_msr);
-u64 hv_get_register(unsigned int reg)
+u64 hv_get_msr(unsigned int reg)
{
if (hv_nested)
- reg = hv_get_nested_reg(reg);
+ reg = hv_get_nested_msr(reg);
- return hv_get_non_nested_register(reg);
+ return hv_get_non_nested_msr(reg);
}
-EXPORT_SYMBOL_GPL(hv_get_register);
+EXPORT_SYMBOL_GPL(hv_get_msr);
-void hv_set_register(unsigned int reg, u64 value)
+void hv_set_msr(unsigned int reg, u64 value)
{
if (hv_nested)
- reg = hv_get_nested_reg(reg);
+ reg = hv_get_nested_msr(reg);
- hv_set_non_nested_register(reg, value);
+ hv_set_non_nested_msr(reg, value);
}
-EXPORT_SYMBOL_GPL(hv_set_register);
+EXPORT_SYMBOL_GPL(hv_set_msr);
static void (*vmbus_handler)(void);
static void (*hv_stimer0_handler)(void);
@@ -352,13 +352,24 @@ static void __init reduced_hw_init(void)
x86_init.irqs.pre_vector_init = x86_init_noop;
}
+int hv_get_hypervisor_version(union hv_hypervisor_version_info *info)
+{
+ unsigned int hv_max_functions;
+
+ hv_max_functions = cpuid_eax(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS);
+ if (hv_max_functions < HYPERV_CPUID_VERSION) {
+ pr_err("%s: Could not detect Hyper-V version\n", __func__);
+ return -ENODEV;
+ }
+
+ cpuid(HYPERV_CPUID_VERSION, &info->eax, &info->ebx, &info->ecx, &info->edx);
+
+ return 0;
+}
+
static void __init ms_hyperv_init_platform(void)
{
int hv_max_functions_eax;
- int hv_host_info_eax;
- int hv_host_info_ebx;
- int hv_host_info_ecx;
- int hv_host_info_edx;
#ifdef CONFIG_PARAVIRT
pv_info.name = "Hyper-V";
@@ -409,21 +420,6 @@ static void __init ms_hyperv_init_platform(void)
pr_info("Hyper-V: running on a nested hypervisor\n");
}
- /*
- * Extract host information.
- */
- if (hv_max_functions_eax >= HYPERV_CPUID_VERSION) {
- hv_host_info_eax = cpuid_eax(HYPERV_CPUID_VERSION);
- hv_host_info_ebx = cpuid_ebx(HYPERV_CPUID_VERSION);
- hv_host_info_ecx = cpuid_ecx(HYPERV_CPUID_VERSION);
- hv_host_info_edx = cpuid_edx(HYPERV_CPUID_VERSION);
-
- pr_info("Hyper-V: Host Build %d.%d.%d.%d-%d-%d\n",
- hv_host_info_ebx >> 16, hv_host_info_ebx & 0xFFFF,
- hv_host_info_eax, hv_host_info_edx & 0xFFFFFF,
- hv_host_info_ecx, hv_host_info_edx >> 24);
- }
-
if (ms_hyperv.features & HV_ACCESS_FREQUENCY_MSRS &&
ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) {
x86_platform.calibrate_tsc = hv_get_tsc_khz;
@@ -456,7 +452,7 @@ static void __init ms_hyperv_init_platform(void)
/* To be supported: more work is required. */
ms_hyperv.features &= ~HV_MSR_REFERENCE_TSC_AVAILABLE;
- /* HV_REGISTER_CRASH_CTL is unsupported. */
+ /* HV_MSR_CRASH_CTL is unsupported. */
ms_hyperv.misc_features &= ~HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
/* Don't trust Hyper-V's TLB-flushing hypercalls. */
@@ -648,6 +644,7 @@ const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
.init.x2apic_available = ms_hyperv_x2apic_available,
.init.msi_ext_dest_id = ms_hyperv_msi_ext_dest_id,
.init.init_platform = ms_hyperv_init_platform,
+ .init.guest_late_init = ms_hyperv_late_init,
#ifdef CONFIG_AMD_MEM_ENCRYPT
.runtime.sev_es_hcall_prepare = hv_sev_es_hcall_prepare,
.runtime.sev_es_hcall_finish = hv_sev_es_hcall_finish,
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 3259b1d4fefe..aaca8d235dc2 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -415,6 +415,17 @@ void __init topology_init_possible_cpus(void)
unsigned int total = assigned + disabled;
u32 apicid, firstid;
+ /*
+ * If there was no APIC registered, then fake one so that the
+ * topology bitmap is populated. That ensures that the code below
+ * is valid and the various query interfaces can be used
+ * unconditionally. This does not affect the actual APIC code in
+ * any way because either the local APIC address has not been
+ * registered or the local APIC was disabled on the command line.
+ */
+ if (topo_info.boot_cpu_apic_id == BAD_APICID)
+ topology_register_boot_apic(0);
+
if (!restrict_to_up()) {
if (WARN_ON_ONCE(assigned > nr_cpu_ids)) {
disabled += assigned - nr_cpu_ids;
diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c
index a50ae8d63d1c..9a6069e7133c 100644
--- a/arch/x86/kernel/cpu/topology_common.c
+++ b/arch/x86/kernel/cpu/topology_common.c
@@ -140,7 +140,7 @@ static void parse_topology(struct topo_scan *tscan, bool early)
}
}
-static void topo_set_ids(struct topo_scan *tscan)
+static void topo_set_ids(struct topo_scan *tscan, bool early)
{
struct cpuinfo_x86 *c = tscan->c;
u32 apicid = c->topo.apicid;
@@ -148,8 +148,10 @@ static void topo_set_ids(struct topo_scan *tscan)
c->topo.pkg_id = topo_shift_apicid(apicid, TOPO_PKG_DOMAIN);
c->topo.die_id = topo_shift_apicid(apicid, TOPO_DIE_DOMAIN);
- c->topo.logical_pkg_id = topology_get_logical_id(apicid, TOPO_PKG_DOMAIN);
- c->topo.logical_die_id = topology_get_logical_id(apicid, TOPO_DIE_DOMAIN);
+ if (!early) {
+ c->topo.logical_pkg_id = topology_get_logical_id(apicid, TOPO_PKG_DOMAIN);
+ c->topo.logical_die_id = topology_get_logical_id(apicid, TOPO_DIE_DOMAIN);
+ }
/* Package relative core ID */
c->topo.core_id = (apicid & topo_domain_mask(TOPO_PKG_DOMAIN)) >>
@@ -187,7 +189,7 @@ void cpu_parse_topology(struct cpuinfo_x86 *c)
tscan.dom_shifts[dom], x86_topo_system.dom_shifts[dom]);
}
- topo_set_ids(&tscan);
+ topo_set_ids(&tscan, false);
}
void __init cpu_init_topology(struct cpuinfo_x86 *c)
@@ -208,7 +210,7 @@ void __init cpu_init_topology(struct cpuinfo_x86 *c)
x86_topo_system.dom_size[dom] = 1U << sft;
}
- topo_set_ids(&tscan);
+ topo_set_ids(&tscan, true);
/*
* AMD systems have Nodes per package which cannot be mapped to
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index b66f540de054..6f1b379e3b38 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1016,17 +1016,6 @@ void __init e820__reserve_setup_data(void)
e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
- /*
- * SETUP_EFI, SETUP_IMA and SETUP_RNG_SEED are supplied by
- * kexec and do not need to be reserved.
- */
- if (data->type != SETUP_EFI &&
- data->type != SETUP_IMA &&
- data->type != SETUP_RNG_SEED)
- e820__range_update_kexec(pa_data,
- sizeof(*data) + data->len,
- E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
-
if (data->type == SETUP_INDIRECT) {
len += data->len;
early_memunmap(data, sizeof(*data));
@@ -1038,12 +1027,9 @@ void __init e820__reserve_setup_data(void)
indirect = (struct setup_indirect *)data->data;
- if (indirect->type != SETUP_INDIRECT) {
+ if (indirect->type != SETUP_INDIRECT)
e820__range_update(indirect->addr, indirect->len,
E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
- e820__range_update_kexec(indirect->addr, indirect->len,
- E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
- }
}
pa_data = pa_next;
@@ -1051,7 +1037,6 @@ void __init e820__reserve_setup_data(void)
}
e820__update_table(e820_table);
- e820__update_table(e820_table_kexec);
pr_info("extended physical RAM map:\n");
e820__print_table("reserve setup_data");
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 117e74c44e75..33a214b1a4ce 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -178,10 +178,11 @@ void fpu__init_cpu_xstate(void)
* Must happen after CR4 setup and before xsetbv() to allow KVM
* lazy passthrough. Write independent of the dynamic state static
* key as that does not work on the boot CPU. This also ensures
- * that any stale state is wiped out from XFD.
+ * that any stale state is wiped out from XFD. Reset the per CPU
+ * xfd cache too.
*/
if (cpu_feature_enabled(X86_FEATURE_XFD))
- wrmsrl(MSR_IA32_XFD, init_fpstate.xfd);
+ xfd_set_state(init_fpstate.xfd);
/*
* XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index 3518fb26d06b..19ca623ffa2a 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -148,20 +148,26 @@ static inline void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rs
#endif
#ifdef CONFIG_X86_64
+static inline void xfd_set_state(u64 xfd)
+{
+ wrmsrl(MSR_IA32_XFD, xfd);
+ __this_cpu_write(xfd_state, xfd);
+}
+
static inline void xfd_update_state(struct fpstate *fpstate)
{
if (fpu_state_size_dynamic()) {
u64 xfd = fpstate->xfd;
- if (__this_cpu_read(xfd_state) != xfd) {
- wrmsrl(MSR_IA32_XFD, xfd);
- __this_cpu_write(xfd_state, xfd);
- }
+ if (__this_cpu_read(xfd_state) != xfd)
+ xfd_set_state(xfd);
}
}
extern int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu);
#else
+static inline void xfd_set_state(u64 xfd) { }
+
static inline void xfd_update_state(struct fpstate *fpstate) { }
static inline int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu) {
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 212e8e06aeba..a817ed0724d1 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -81,6 +81,13 @@ static inline bool check_la57_support(void)
if (!(native_read_cr4() & X86_CR4_LA57))
return false;
+ RIP_REL_REF(__pgtable_l5_enabled) = 1;
+ RIP_REL_REF(pgdir_shift) = 48;
+ RIP_REL_REF(ptrs_per_p4d) = 512;
+ RIP_REL_REF(page_offset_base) = __PAGE_OFFSET_BASE_L5;
+ RIP_REL_REF(vmalloc_base) = __VMALLOC_BASE_L5;
+ RIP_REL_REF(vmemmap_base) = __VMEMMAP_BASE_L5;
+
return true;
}
@@ -175,7 +182,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
p4d = (p4dval_t *)&RIP_REL_REF(level4_kernel_pgt);
p4d[MAX_PTRS_PER_P4D - 1] += load_delta;
- pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE_NOENC;
+ pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE;
}
RIP_REL_REF(level3_kernel_pgt)[PTRS_PER_PUD - 2].pud += load_delta;
@@ -431,15 +438,6 @@ asmlinkage __visible void __init __noreturn x86_64_start_kernel(char * real_mode
(__START_KERNEL & PGDIR_MASK)));
BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
- if (check_la57_support()) {
- __pgtable_l5_enabled = 1;
- pgdir_shift = 48;
- ptrs_per_p4d = 512;
- page_offset_base = __PAGE_OFFSET_BASE_L5;
- vmalloc_base = __VMALLOC_BASE_L5;
- vmemmap_base = __VMEMMAP_BASE_L5;
- }
-
cr4_init_shadow();
/* Kill off the identity-map trampoline */
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 091b3ab76a18..d0e49bd7c6f3 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -373,7 +373,16 @@ out:
kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset,
bool *on_func_entry)
{
- if (is_endbr(*(u32 *)addr)) {
+ u32 insn;
+
+ /*
+ * Since 'addr' is not guaranteed to be safe to access, use
+ * copy_from_kernel_nofault() to read the instruction:
+ */
+ if (copy_from_kernel_nofault(&insn, (void *)addr, sizeof(u32)))
+ return NULL;
+
+ if (is_endbr(insn)) {
*on_func_entry = !offset || offset == 4;
if (*on_func_entry)
offset = 4;
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 1ccd30c8246f..e89171b0347a 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -197,12 +197,12 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
if (!smp_check_mpc(mpc, oem, str))
return 0;
- /* Initialize the lapic mapping */
- if (!acpi_lapic)
- register_lapic_address(mpc->lapic);
-
- if (early)
+ if (early) {
+ /* Initialize the lapic mapping */
+ if (!acpi_lapic)
+ register_lapic_address(mpc->lapic);
return 1;
+ }
/* Now process the configuration blocks. */
while (count < mpc->length) {
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 3e1e96efadfe..ef206500ed6f 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1206,16 +1206,6 @@ void __init i386_reserve_resources(void)
#endif /* CONFIG_X86_32 */
-#ifndef CONFIG_SMP
-void __init smp_prepare_boot_cpu(void)
-{
- struct cpuinfo_x86 *c = &cpu_data(0);
-
- *c = boot_cpu_data;
- c->initialized = true;
-}
-#endif
-
static struct notifier_block kernel_offset_notifier = {
.notifier_call = dump_kernel_offset
};
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index fe355c89f6c1..76bb65045c64 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -313,14 +313,6 @@ static void notrace start_secondary(void *unused)
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
-static void __init smp_store_boot_cpu_info(void)
-{
- struct cpuinfo_x86 *c = &cpu_data(0);
-
- *c = boot_cpu_data;
- c->initialized = true;
-}
-
/*
* The bootstrap kernel entry code has set these up. Save them for
* a given CPU
@@ -1039,29 +1031,15 @@ static __init void disable_smp(void)
cpumask_set_cpu(0, topology_die_cpumask(0));
}
-static void __init smp_cpu_index_default(void)
-{
- int i;
- struct cpuinfo_x86 *c;
-
- for_each_possible_cpu(i) {
- c = &cpu_data(i);
- /* mark all to hotplug */
- c->cpu_index = nr_cpu_ids;
- }
-}
-
void __init smp_prepare_cpus_common(void)
{
unsigned int i;
- smp_cpu_index_default();
-
- /*
- * Setup boot CPU information
- */
- smp_store_boot_cpu_info(); /* Final full version of the data */
- mb();
+ /* Mark all except the boot CPU as hotpluggable */
+ for_each_possible_cpu(i) {
+ if (i)
+ per_cpu(cpu_info.cpu_index, i) = nr_cpu_ids;
+ }
for_each_possible_cpu(i) {
zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);