summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2019-11-02 23:12:51 +0300
committerDavid S. Miller <davem@davemloft.net>2019-11-02 23:54:56 +0300
commitd31e95585ca697fb31440c6fe30113adc85ecfbd (patch)
tree4936ea0aaa6b2aeeee4db51e3c60d938c9b9ed96 /arch/x86
parentc23fcbbc6aa4e0bb615e8a7f23e1f32aec235a1c (diff)
parent1204c70d9dcba31164f78ad5d8c88c42335d51f8 (diff)
downloadlinux-d31e95585ca697fb31440c6fe30113adc85ecfbd.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
The only slightly tricky merge conflict was the netdevsim because the mutex locking fix overlapped a lot of driver reload reorganization. The rest were (relatively) trivial in nature. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/boot/compressed/acpi.c48
-rw-r--r--arch/x86/boot/compressed/eboot.c4
-rw-r--r--arch/x86/boot/compressed/misc.c25
-rw-r--r--arch/x86/events/amd/ibs.c8
-rw-r--r--arch/x86/events/intel/pt.c2
-rw-r--r--arch/x86/events/intel/uncore.c44
-rw-r--r--arch/x86/events/intel/uncore.h12
-rw-r--r--arch/x86/hyperv/hv_apic.c20
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/include/asm/vmware.h14
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c3
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c4
-rw-r--r--arch/x86/kernel/head64.c22
-rw-r--r--arch/x86/kvm/cpuid.c2
-rw-r--r--arch/x86/kvm/lapic.c5
-rw-r--r--arch/x86/kvm/lapic.h5
-rw-r--r--arch/x86/kvm/svm.c16
-rw-r--r--arch/x86/kvm/vmx/nested.c64
-rw-r--r--arch/x86/kvm/vmx/nested.h13
-rw-r--r--arch/x86/kvm/vmx/vmx.c26
-rw-r--r--arch/x86/kvm/x86.c19
-rw-r--r--arch/x86/xen/enlighten_pv.c8
22 files changed, 227 insertions, 139 deletions
diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
index 149795c369f2..25019d42ae93 100644
--- a/arch/x86/boot/compressed/acpi.c
+++ b/arch/x86/boot/compressed/acpi.c
@@ -21,30 +21,6 @@
struct mem_vector immovable_mem[MAX_NUMNODES*2];
/*
- * Max length of 64-bit hex address string is 19, prefix "0x" + 16 hex
- * digits, and '\0' for termination.
- */
-#define MAX_ADDR_LEN 19
-
-static acpi_physical_address get_cmdline_acpi_rsdp(void)
-{
- acpi_physical_address addr = 0;
-
-#ifdef CONFIG_KEXEC
- char val[MAX_ADDR_LEN] = { };
- int ret;
-
- ret = cmdline_find_option("acpi_rsdp", val, MAX_ADDR_LEN);
- if (ret < 0)
- return 0;
-
- if (kstrtoull(val, 16, &addr))
- return 0;
-#endif
- return addr;
-}
-
-/*
* Search EFI system tables for RSDP. If both ACPI_20_TABLE_GUID and
* ACPI_TABLE_GUID are found, take the former, which has more features.
*/
@@ -298,6 +274,30 @@ acpi_physical_address get_rsdp_addr(void)
}
#if defined(CONFIG_RANDOMIZE_BASE) && defined(CONFIG_MEMORY_HOTREMOVE)
+/*
+ * Max length of 64-bit hex address string is 19, prefix "0x" + 16 hex
+ * digits, and '\0' for termination.
+ */
+#define MAX_ADDR_LEN 19
+
+static acpi_physical_address get_cmdline_acpi_rsdp(void)
+{
+ acpi_physical_address addr = 0;
+
+#ifdef CONFIG_KEXEC
+ char val[MAX_ADDR_LEN] = { };
+ int ret;
+
+ ret = cmdline_find_option("acpi_rsdp", val, MAX_ADDR_LEN);
+ if (ret < 0)
+ return 0;
+
+ if (kstrtoull(val, 16, &addr))
+ return 0;
+#endif
+ return addr;
+}
+
/* Compute SRAT address from RSDP. */
static unsigned long get_acpi_srat_table(void)
{
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index d6662fdef300..82bc60c8acb2 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -13,6 +13,7 @@
#include <asm/e820/types.h>
#include <asm/setup.h>
#include <asm/desc.h>
+#include <asm/boot.h>
#include "../string.h"
#include "eboot.h"
@@ -813,7 +814,8 @@ efi_main(struct efi_config *c, struct boot_params *boot_params)
status = efi_relocate_kernel(sys_table, &bzimage_addr,
hdr->init_size, hdr->init_size,
hdr->pref_address,
- hdr->kernel_alignment);
+ hdr->kernel_alignment,
+ LOAD_PHYSICAL_ADDR);
if (status != EFI_SUCCESS) {
efi_printk(sys_table, "efi_relocate_kernel() failed!\n");
goto fail;
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 53ac0cb2396d..9652d5c2afda 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -345,6 +345,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
{
const unsigned long kernel_total_size = VO__end - VO__text;
unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
+ unsigned long needed_size;
/* Retain x86 boot parameters pointer passed from startup_32/64. */
boot_params = rmode;
@@ -379,26 +380,38 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
free_mem_ptr = heap; /* Heap */
free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
+ /*
+ * The memory hole needed for the kernel is the larger of either
+ * the entire decompressed kernel plus relocation table, or the
+ * entire decompressed kernel plus .bss and .brk sections.
+ *
+ * On X86_64, the memory is mapped with PMD pages. Round the
+ * size up so that the full extent of PMD pages mapped is
+ * included in the check against the valid memory table
+ * entries. This ensures the full mapped area is usable RAM
+ * and doesn't include any reserved areas.
+ */
+ needed_size = max(output_len, kernel_total_size);
+#ifdef CONFIG_X86_64
+ needed_size = ALIGN(needed_size, MIN_KERNEL_ALIGN);
+#endif
+
/* Report initial kernel position details. */
debug_putaddr(input_data);
debug_putaddr(input_len);
debug_putaddr(output);
debug_putaddr(output_len);
debug_putaddr(kernel_total_size);
+ debug_putaddr(needed_size);
#ifdef CONFIG_X86_64
/* Report address of 32-bit trampoline */
debug_putaddr(trampoline_32bit);
#endif
- /*
- * The memory hole needed for the kernel is the larger of either
- * the entire decompressed kernel plus relocation table, or the
- * entire decompressed kernel plus .bss and .brk sections.
- */
choose_random_location((unsigned long)input_data, input_len,
(unsigned long *)&output,
- max(output_len, kernel_total_size),
+ needed_size,
&virt_addr);
/* Validate memory location choices. */
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 5b35b7ea5d72..26c36357c4c9 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -377,7 +377,8 @@ static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
struct hw_perf_event *hwc, u64 config)
{
config &= ~perf_ibs->cnt_mask;
- wrmsrl(hwc->config_base, config);
+ if (boot_cpu_data.x86 == 0x10)
+ wrmsrl(hwc->config_base, config);
config &= ~perf_ibs->enable_mask;
wrmsrl(hwc->config_base, config);
}
@@ -553,7 +554,8 @@ static struct perf_ibs perf_ibs_op = {
},
.msr = MSR_AMD64_IBSOPCTL,
.config_mask = IBS_OP_CONFIG_MASK,
- .cnt_mask = IBS_OP_MAX_CNT,
+ .cnt_mask = IBS_OP_MAX_CNT | IBS_OP_CUR_CNT |
+ IBS_OP_CUR_CNT_RAND,
.enable_mask = IBS_OP_ENABLE,
.valid_mask = IBS_OP_VAL,
.max_period = IBS_OP_MAX_CNT << 4,
@@ -614,7 +616,7 @@ fail:
if (event->attr.sample_type & PERF_SAMPLE_RAW)
offset_max = perf_ibs->offset_max;
else if (check_rip)
- offset_max = 2;
+ offset_max = 3;
else
offset_max = 1;
do {
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 74e80ed9c6c4..05e43d0f430b 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -627,7 +627,7 @@ static struct topa *topa_alloc(int cpu, gfp_t gfp)
* link as the 2nd entry in the table
*/
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) {
- TOPA_ENTRY(&tp->topa, 1)->base = page_to_phys(p);
+ TOPA_ENTRY(&tp->topa, 1)->base = page_to_phys(p) >> TOPA_SHIFT;
TOPA_ENTRY(&tp->topa, 1)->end = 1;
}
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 6fc2e06ab4c6..86467f85c383 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -502,10 +502,8 @@ void uncore_pmu_event_start(struct perf_event *event, int flags)
local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
uncore_enable_event(box, event);
- if (box->n_active == 1) {
- uncore_enable_box(box);
+ if (box->n_active == 1)
uncore_pmu_start_hrtimer(box);
- }
}
void uncore_pmu_event_stop(struct perf_event *event, int flags)
@@ -529,10 +527,8 @@ void uncore_pmu_event_stop(struct perf_event *event, int flags)
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
hwc->state |= PERF_HES_STOPPED;
- if (box->n_active == 0) {
- uncore_disable_box(box);
+ if (box->n_active == 0)
uncore_pmu_cancel_hrtimer(box);
- }
}
if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
@@ -778,6 +774,40 @@ static int uncore_pmu_event_init(struct perf_event *event)
return ret;
}
+static void uncore_pmu_enable(struct pmu *pmu)
+{
+ struct intel_uncore_pmu *uncore_pmu;
+ struct intel_uncore_box *box;
+
+ uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
+ if (!uncore_pmu)
+ return;
+
+ box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
+ if (!box)
+ return;
+
+ if (uncore_pmu->type->ops->enable_box)
+ uncore_pmu->type->ops->enable_box(box);
+}
+
+static void uncore_pmu_disable(struct pmu *pmu)
+{
+ struct intel_uncore_pmu *uncore_pmu;
+ struct intel_uncore_box *box;
+
+ uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
+ if (!uncore_pmu)
+ return;
+
+ box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
+ if (!box)
+ return;
+
+ if (uncore_pmu->type->ops->disable_box)
+ uncore_pmu->type->ops->disable_box(box);
+}
+
static ssize_t uncore_get_attr_cpumask(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -803,6 +833,8 @@ static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
pmu->pmu = (struct pmu) {
.attr_groups = pmu->type->attr_groups,
.task_ctx_nr = perf_invalid_context,
+ .pmu_enable = uncore_pmu_enable,
+ .pmu_disable = uncore_pmu_disable,
.event_init = uncore_pmu_event_init,
.add = uncore_pmu_event_add,
.del = uncore_pmu_event_del,
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index f36f7bebbc1b..bbfdaa720b45 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -441,18 +441,6 @@ static inline int uncore_freerunning_hw_config(struct intel_uncore_box *box,
return -EINVAL;
}
-static inline void uncore_disable_box(struct intel_uncore_box *box)
-{
- if (box->pmu->type->ops->disable_box)
- box->pmu->type->ops->disable_box(box);
-}
-
-static inline void uncore_enable_box(struct intel_uncore_box *box)
-{
- if (box->pmu->type->ops->enable_box)
- box->pmu->type->ops->enable_box(box);
-}
-
static inline void uncore_disable_event(struct intel_uncore_box *box,
struct perf_event *event)
{
diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index 5c056b8aebef..e01078e93dd3 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -260,11 +260,21 @@ void __init hv_apic_init(void)
}
if (ms_hyperv.hints & HV_X64_APIC_ACCESS_RECOMMENDED) {
- pr_info("Hyper-V: Using MSR based APIC access\n");
+ pr_info("Hyper-V: Using enlightened APIC (%s mode)",
+ x2apic_enabled() ? "x2apic" : "xapic");
+ /*
+ * With x2apic, architectural x2apic MSRs are equivalent to the
+ * respective synthetic MSRs, so there's no need to override
+ * the apic accessors. The only exception is
+ * hv_apic_eoi_write, because it benefits from lazy EOI when
+ * available, but it works for both xapic and x2apic modes.
+ */
apic_set_eoi_write(hv_apic_eoi_write);
- apic->read = hv_apic_read;
- apic->write = hv_apic_write;
- apic->icr_write = hv_apic_icr_write;
- apic->icr_read = hv_apic_icr_read;
+ if (!x2apic_enabled()) {
+ apic->read = hv_apic_read;
+ apic->write = hv_apic_write;
+ apic->icr_write = hv_apic_icr_write;
+ apic->icr_read = hv_apic_icr_read;
+ }
}
}
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 50eb430b0ad8..24d6598dea29 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1189,7 +1189,7 @@ struct kvm_x86_ops {
int (*set_nested_state)(struct kvm_vcpu *vcpu,
struct kvm_nested_state __user *user_kvm_nested_state,
struct kvm_nested_state *kvm_state);
- void (*get_vmcs12_pages)(struct kvm_vcpu *vcpu);
+ bool (*get_vmcs12_pages)(struct kvm_vcpu *vcpu);
int (*smi_allowed)(struct kvm_vcpu *vcpu);
int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h
index e00c9e875933..ac9fc51e2b18 100644
--- a/arch/x86/include/asm/vmware.h
+++ b/arch/x86/include/asm/vmware.h
@@ -4,6 +4,7 @@
#include <asm/cpufeatures.h>
#include <asm/alternative.h>
+#include <linux/stringify.h>
/*
* The hypercall definitions differ in the low word of the %edx argument
@@ -20,8 +21,8 @@
*/
/* Old port-based version */
-#define VMWARE_HYPERVISOR_PORT "0x5658"
-#define VMWARE_HYPERVISOR_PORT_HB "0x5659"
+#define VMWARE_HYPERVISOR_PORT 0x5658
+#define VMWARE_HYPERVISOR_PORT_HB 0x5659
/* Current vmcall / vmmcall version */
#define VMWARE_HYPERVISOR_HB BIT(0)
@@ -29,7 +30,8 @@
/* The low bandwidth call. The low word of edx is presumed clear. */
#define VMWARE_HYPERCALL \
- ALTERNATIVE_2("movw $" VMWARE_HYPERVISOR_PORT ", %%dx; inl (%%dx)", \
+ ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT) ", %%dx; " \
+ "inl (%%dx), %%eax", \
"vmcall", X86_FEATURE_VMCALL, \
"vmmcall", X86_FEATURE_VMW_VMMCALL)
@@ -38,7 +40,8 @@
* HB and OUT bits set.
*/
#define VMWARE_HYPERCALL_HB_OUT \
- ALTERNATIVE_2("movw $" VMWARE_HYPERVISOR_PORT_HB ", %%dx; rep outsb", \
+ ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT_HB) ", %%dx; " \
+ "rep outsb", \
"vmcall", X86_FEATURE_VMCALL, \
"vmmcall", X86_FEATURE_VMW_VMMCALL)
@@ -47,7 +50,8 @@
* HB bit set.
*/
#define VMWARE_HYPERCALL_HB_IN \
- ALTERNATIVE_2("movw $" VMWARE_HYPERVISOR_PORT_HB ", %%dx; rep insb", \
+ ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT_HB) ", %%dx; " \
+ "rep insb", \
"vmcall", X86_FEATURE_VMCALL, \
"vmmcall", X86_FEATURE_VMW_VMMCALL)
#endif
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 45e92cba92f5..b0889c48a2ac 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -156,7 +156,8 @@ static int x2apic_dead_cpu(unsigned int dead_cpu)
{
struct cluster_mask *cmsk = per_cpu(cluster_masks, dead_cpu);
- cpumask_clear_cpu(dead_cpu, &cmsk->mask);
+ if (cmsk)
+ cpumask_clear_cpu(dead_cpu, &cmsk->mask);
free_cpumask_var(per_cpu(ipi_mask, dead_cpu));
return 0;
}
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 267daad8c036..c656d92cd708 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -216,6 +216,10 @@ static void __init ms_hyperv_init_platform(void)
int hv_host_info_ecx;
int hv_host_info_edx;
+#ifdef CONFIG_PARAVIRT
+ pv_info.name = "Hyper-V";
+#endif
+
/*
* Extract the features and hints
*/
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 29ffa495bd1c..206a4b6144c2 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -222,13 +222,31 @@ unsigned long __head __startup_64(unsigned long physaddr,
* we might write invalid pmds, when the kernel is relocated
* cleanup_highmap() fixes this up along with the mappings
* beyond _end.
+ *
+ * Only the region occupied by the kernel image has so far
+ * been checked against the table of usable memory regions
+ * provided by the firmware, so invalidate pages outside that
+ * region. A page table entry that maps to a reserved area of
+ * memory would allow processor speculation into that area,
+ * and on some hardware (particularly the UV platform) even
+ * speculative access to some reserved areas is caught as an
+ * error, causing the BIOS to halt the system.
*/
pmd = fixup_pointer(level2_kernel_pgt, physaddr);
- for (i = 0; i < PTRS_PER_PMD; i++) {
+
+ /* invalidate pages before the kernel image */
+ for (i = 0; i < pmd_index((unsigned long)_text); i++)
+ pmd[i] &= ~_PAGE_PRESENT;
+
+ /* fixup pages that are part of the kernel image */
+ for (; i <= pmd_index((unsigned long)_end); i++)
if (pmd[i] & _PAGE_PRESENT)
pmd[i] += load_delta;
- }
+
+ /* invalidate pages after the kernel image */
+ for (; i < PTRS_PER_PMD; i++)
+ pmd[i] &= ~_PAGE_PRESENT;
/*
* Fixup phys_base - remove the memory encryption mask to obtain
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 9c5029cf6f3f..f68c0c753c38 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -363,7 +363,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
/* cpuid 7.0.ecx*/
const u32 kvm_cpuid_7_0_ecx_x86_features =
- F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
+ F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) |
F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 87b0fcc23ef8..b29d00b661ff 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -111,11 +111,6 @@ static inline int apic_enabled(struct kvm_lapic *apic)
(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
-static inline u8 kvm_xapic_id(struct kvm_lapic *apic)
-{
- return kvm_lapic_get_reg(apic, APIC_ID) >> 24;
-}
-
static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
{
return apic->vcpu->vcpu_id;
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 2aad7e226fc0..1f5014852e20 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -242,4 +242,9 @@ static inline enum lapic_mode kvm_apic_mode(u64 apic_base)
return apic_base & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
}
+static inline u8 kvm_xapic_id(struct kvm_lapic *apic)
+{
+ return kvm_lapic_get_reg(apic, APIC_ID) >> 24;
+}
+
#endif
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index f8ecb6df5106..c5673bda4b66 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -734,8 +734,14 @@ static int get_npt_level(struct kvm_vcpu *vcpu)
static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
{
vcpu->arch.efer = efer;
- if (!npt_enabled && !(efer & EFER_LMA))
- efer &= ~EFER_LME;
+
+ if (!npt_enabled) {
+ /* Shadow paging assumes NX to be available. */
+ efer |= EFER_NX;
+
+ if (!(efer & EFER_LMA))
+ efer &= ~EFER_LME;
+ }
to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
@@ -4591,6 +4597,7 @@ static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
int ret = 0;
struct vcpu_svm *svm = to_svm(vcpu);
u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR);
+ u32 id = kvm_xapic_id(vcpu->arch.apic);
if (ldr == svm->ldr_reg)
return 0;
@@ -4598,7 +4605,7 @@ static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
avic_invalidate_logical_id_entry(vcpu);
if (ldr)
- ret = avic_ldr_write(vcpu, vcpu->vcpu_id, ldr);
+ ret = avic_ldr_write(vcpu, id, ldr);
if (!ret)
svm->ldr_reg = ldr;
@@ -4610,8 +4617,7 @@ static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu)
{
u64 *old, *new;
struct vcpu_svm *svm = to_svm(vcpu);
- u32 apic_id_reg = kvm_lapic_get_reg(vcpu->arch.apic, APIC_ID);
- u32 id = (apic_id_reg >> 24) & 0xff;
+ u32 id = kvm_xapic_id(vcpu->arch.apic);
if (vcpu->vcpu_id == id)
return 0;
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index e76eb4f07f6c..0e7c9301fe86 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2917,7 +2917,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12);
-static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
+static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2937,19 +2937,18 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
vmx->nested.apic_access_page = NULL;
}
page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->apic_access_addr);
- /*
- * If translation failed, no matter: This feature asks
- * to exit when accessing the given address, and if it
- * can never be accessed, this feature won't do
- * anything anyway.
- */
if (!is_error_page(page)) {
vmx->nested.apic_access_page = page;
hpa = page_to_phys(vmx->nested.apic_access_page);
vmcs_write64(APIC_ACCESS_ADDR, hpa);
} else {
- secondary_exec_controls_clearbit(vmx,
- SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
+ pr_debug_ratelimited("%s: no backing 'struct page' for APIC-access address in vmcs12\n",
+ __func__);
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror =
+ KVM_INTERNAL_ERROR_EMULATION;
+ vcpu->run->internal.ndata = 0;
+ return false;
}
}
@@ -2994,6 +2993,7 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
exec_controls_setbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
else
exec_controls_clearbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
+ return true;
}
/*
@@ -3032,13 +3032,15 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
/*
* If from_vmentry is false, this is being called from state restore (either RSM
* or KVM_SET_NESTED_STATE). Otherwise it's called from vmlaunch/vmresume.
-+ *
-+ * Returns:
-+ * 0 - success, i.e. proceed with actual VMEnter
-+ * 1 - consistency check VMExit
-+ * -1 - consistency check VMFail
+ *
+ * Returns:
+ * NVMX_ENTRY_SUCCESS: Entered VMX non-root mode
+ * NVMX_ENTRY_VMFAIL: Consistency check VMFail
+ * NVMX_ENTRY_VMEXIT: Consistency check VMExit
+ * NVMX_ENTRY_KVM_INTERNAL_ERROR: KVM internal error
*/
-int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
+enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
+ bool from_vmentry)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
@@ -3081,11 +3083,12 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
prepare_vmcs02_early(vmx, vmcs12);
if (from_vmentry) {
- nested_get_vmcs12_pages(vcpu);
+ if (unlikely(!nested_get_vmcs12_pages(vcpu)))
+ return NVMX_VMENTRY_KVM_INTERNAL_ERROR;
if (nested_vmx_check_vmentry_hw(vcpu)) {
vmx_switch_vmcs(vcpu, &vmx->vmcs01);
- return -1;
+ return NVMX_VMENTRY_VMFAIL;
}
if (nested_vmx_check_guest_state(vcpu, vmcs12, &exit_qual))
@@ -3149,7 +3152,7 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
* returned as far as L1 is concerned. It will only return (and set
* the success flag) when L2 exits (see nested_vmx_vmexit()).
*/
- return 0;
+ return NVMX_VMENTRY_SUCCESS;
/*
* A failed consistency check that leads to a VMExit during L1's
@@ -3165,14 +3168,14 @@ vmentry_fail_vmexit:
vmx_switch_vmcs(vcpu, &vmx->vmcs01);
if (!from_vmentry)
- return 1;
+ return NVMX_VMENTRY_VMEXIT;
load_vmcs12_host_state(vcpu, vmcs12);
vmcs12->vm_exit_reason = exit_reason | VMX_EXIT_REASONS_FAILED_VMENTRY;
vmcs12->exit_qualification = exit_qual;
if (enable_shadow_vmcs || vmx->nested.hv_evmcs)
vmx->nested.need_vmcs12_to_shadow_sync = true;
- return 1;
+ return NVMX_VMENTRY_VMEXIT;
}
/*
@@ -3182,9 +3185,9 @@ vmentry_fail_vmexit:
static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
{
struct vmcs12 *vmcs12;
+ enum nvmx_vmentry_status status;
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 interrupt_shadow = vmx_get_interrupt_shadow(vcpu);
- int ret;
if (!nested_vmx_check_permission(vcpu))
return 1;
@@ -3244,13 +3247,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
* the nested entry.
*/
vmx->nested.nested_run_pending = 1;
- ret = nested_vmx_enter_non_root_mode(vcpu, true);
- vmx->nested.nested_run_pending = !ret;
- if (ret > 0)
- return 1;
- else if (ret)
- return nested_vmx_failValid(vcpu,
- VMXERR_ENTRY_INVALID_CONTROL_FIELD);
+ status = nested_vmx_enter_non_root_mode(vcpu, true);
+ if (unlikely(status != NVMX_VMENTRY_SUCCESS))
+ goto vmentry_failed;
/* Hide L1D cache contents from the nested guest. */
vmx->vcpu.arch.l1tf_flush_l1d = true;
@@ -3281,6 +3280,15 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
return kvm_vcpu_halt(vcpu);
}
return 1;
+
+vmentry_failed:
+ vmx->nested.nested_run_pending = 0;
+ if (status == NVMX_VMENTRY_KVM_INTERNAL_ERROR)
+ return 0;
+ if (status == NVMX_VMENTRY_VMEXIT)
+ return 1;
+ WARN_ON_ONCE(status != NVMX_VMENTRY_VMFAIL);
+ return nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
}
/*
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index 187d39bf0bf1..6280f33e5fa6 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -6,6 +6,16 @@
#include "vmcs12.h"
#include "vmx.h"
+/*
+ * Status returned by nested_vmx_enter_non_root_mode():
+ */
+enum nvmx_vmentry_status {
+ NVMX_VMENTRY_SUCCESS, /* Entered VMX non-root mode */
+ NVMX_VMENTRY_VMFAIL, /* Consistency check VMFail */
+ NVMX_VMENTRY_VMEXIT, /* Consistency check VMExit */
+ NVMX_VMENTRY_KVM_INTERNAL_ERROR,/* KVM internal error */
+};
+
void vmx_leave_nested(struct kvm_vcpu *vcpu);
void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
bool apicv);
@@ -13,7 +23,8 @@ void nested_vmx_hardware_unsetup(void);
__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *));
void nested_vmx_vcpu_setup(void);
void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu);
-int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry);
+enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
+ bool from_vmentry);
bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason);
void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
u32 exit_intr_info, unsigned long exit_qualification);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e7970a2e8eae..5d21a4ab28cf 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -969,17 +969,9 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
u64 guest_efer = vmx->vcpu.arch.efer;
u64 ignore_bits = 0;
- if (!enable_ept) {
- /*
- * NX is needed to handle CR0.WP=1, CR4.SMEP=1. Testing
- * host CPUID is more efficient than testing guest CPUID
- * or CR4. Host SMEP is anyway a requirement for guest SMEP.
- */
- if (boot_cpu_has(X86_FEATURE_SMEP))
- guest_efer |= EFER_NX;
- else if (!(guest_efer & EFER_NX))
- ignore_bits |= EFER_NX;
- }
+ /* Shadow paging assumes NX to be available. */
+ if (!enable_ept)
+ guest_efer |= EFER_NX;
/*
* LMA and LME handled by hardware; SCE meaningless outside long mode.
@@ -5543,14 +5535,6 @@ static int handle_encls(struct kvm_vcpu *vcpu)
return 1;
}
-static int handle_unexpected_vmexit(struct kvm_vcpu *vcpu)
-{
- kvm_skip_emulated_instruction(vcpu);
- WARN_ONCE(1, "Unexpected VM-Exit Reason = 0x%x",
- vmcs_read32(VM_EXIT_REASON));
- return 1;
-}
-
/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -5602,15 +5586,11 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_INVVPID] = handle_vmx_instruction,
[EXIT_REASON_RDRAND] = handle_invalid_op,
[EXIT_REASON_RDSEED] = handle_invalid_op,
- [EXIT_REASON_XSAVES] = handle_unexpected_vmexit,
- [EXIT_REASON_XRSTORS] = handle_unexpected_vmexit,
[EXIT_REASON_PML_FULL] = handle_pml_full,
[EXIT_REASON_INVPCID] = handle_invpcid,
[EXIT_REASON_VMFUNC] = handle_vmx_instruction,
[EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer,
[EXIT_REASON_ENCLS] = handle_encls,
- [EXIT_REASON_UMWAIT] = handle_unexpected_vmexit,
- [EXIT_REASON_TPAUSE] = handle_unexpected_vmexit,
};
static const int kvm_vmx_max_exit_handlers =
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 661e2bf38526..ff395f812719 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -360,8 +360,7 @@ EXPORT_SYMBOL_GPL(kvm_set_apic_base);
asmlinkage __visible void kvm_spurious_fault(void)
{
/* Fault while not rebooting. We want the trace. */
- if (!kvm_rebooting)
- BUG();
+ BUG_ON(!kvm_rebooting);
}
EXPORT_SYMBOL_GPL(kvm_spurious_fault);
@@ -2537,6 +2536,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
static void kvmclock_reset(struct kvm_vcpu *vcpu)
{
vcpu->arch.pv_time_enabled = false;
+ vcpu->arch.time = 0;
}
static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
@@ -2702,8 +2702,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_KVM_SYSTEM_TIME: {
struct kvm_arch *ka = &vcpu->kvm->arch;
- kvmclock_reset(vcpu);
-
if (vcpu->vcpu_id == 0 && !msr_info->host_initiated) {
bool tmp = (msr == MSR_KVM_SYSTEM_TIME);
@@ -2717,14 +2715,13 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
/* we verify if the enable bit is set... */
+ vcpu->arch.pv_time_enabled = false;
if (!(data & 1))
break;
- if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
+ if (!kvm_gfn_to_hva_cache_init(vcpu->kvm,
&vcpu->arch.pv_time, data & ~1ULL,
sizeof(struct pvclock_vcpu_time_info)))
- vcpu->arch.pv_time_enabled = false;
- else
vcpu->arch.pv_time_enabled = true;
break;
@@ -7941,8 +7938,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
bool req_immediate_exit = false;
if (kvm_request_pending(vcpu)) {
- if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu))
- kvm_x86_ops->get_vmcs12_pages(vcpu);
+ if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu)) {
+ if (unlikely(!kvm_x86_ops->get_vmcs12_pages(vcpu))) {
+ r = 0;
+ goto out;
+ }
+ }
if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
kvm_mmu_unload(vcpu);
if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 58f79ab32358..5bfea374a160 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -117,6 +117,14 @@ static void __init xen_banner(void)
printk(KERN_INFO "Xen version: %d.%d%s%s\n",
version >> 16, version & 0xffff, extra.extraversion,
xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
+
+#ifdef CONFIG_X86_32
+ pr_warn("WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n"
+ "Support for running as 32-bit PV-guest under Xen will soon be removed\n"
+ "from the Linux kernel!\n"
+ "Please use either a 64-bit kernel or switch to HVM or PVH mode!\n"
+ "WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n");
+#endif
}
static void __init xen_pv_init_platform(void)