summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/kernel/amd_nb.c3
-rw-r--r--arch/x86/kernel/apic/apic.c4
-rw-r--r--arch/x86/kernel/cpu/mce/severity.c4
-rw-r--r--arch/x86/kvm/mmu.c101
-rw-r--r--arch/x86/kvm/vmx/nested.c4
-rw-r--r--arch/x86/kvm/x86.c7
-rw-r--r--arch/x86/purgatory/Makefile35
8 files changed, 139 insertions, 21 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 74e88e5edd9c..bdc16b0aa7c6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -335,6 +335,7 @@ struct kvm_mmu_page {
int root_count; /* Currently serving as active root */
unsigned int unsync_children;
struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
+ unsigned long mmu_valid_gen;
DECLARE_BITMAP(unsync_child_bitmap, 512);
#ifdef CONFIG_X86_32
@@ -856,6 +857,7 @@ struct kvm_arch {
unsigned long n_requested_mmu_pages;
unsigned long n_max_mmu_pages;
unsigned int indirect_shadow_pages;
+ unsigned long mmu_valid_gen;
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
/*
* Hash table of struct kvm_mmu_page.
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index d63e63b7d1d9..251c795b4eb3 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -21,6 +21,7 @@
#define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464
#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec
#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494
+#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444
/* Protect the PCI config register pairs used for SMN and DF indirect access. */
static DEFINE_MUTEX(smn_mutex);
@@ -50,6 +51,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F3) },
{}
};
EXPORT_SYMBOL_GPL(amd_nb_misc_ids);
@@ -63,6 +65,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F4) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) },
{}
};
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index aa5495d0f478..08fb79f37793 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -834,6 +834,10 @@ bool __init apic_needs_pit(void)
if (!boot_cpu_has(X86_FEATURE_APIC))
return true;
+ /* Virt guests may lack ARAT, but still have DEADLINE */
+ if (!boot_cpu_has(X86_FEATURE_ARAT))
+ return true;
+
/* Deadline timer is based on TSC so no further PIT action required */
if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
return false;
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index 210f1f5db5f7..87bcdc6dc2f0 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -107,11 +107,11 @@ static struct severity {
*/
MCESEV(
AO, "Action optional: memory scrubbing error",
- SER, MASK(MCI_STATUS_OVER|MCI_UC_AR|MCACOD_SCRUBMSK, MCI_STATUS_UC|MCACOD_SCRUB)
+ SER, MASK(MCI_UC_AR|MCACOD_SCRUBMSK, MCI_STATUS_UC|MCACOD_SCRUB)
),
MCESEV(
AO, "Action optional: last level cache writeback error",
- SER, MASK(MCI_STATUS_OVER|MCI_UC_AR|MCACOD, MCI_STATUS_UC|MCACOD_L3WB)
+ SER, MASK(MCI_UC_AR|MCACOD, MCI_STATUS_UC|MCACOD_L3WB)
),
/* ignore OVER for UCNA */
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 218b277bfda3..a63964e7cec7 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2095,6 +2095,12 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct
if (!direct)
sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
+
+ /*
+ * active_mmu_pages must be a FIFO list, as kvm_zap_obsolete_pages()
+ * depends on valid pages being added to the head of the list. See
+ * comments in kvm_zap_obsolete_pages().
+ */
list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
kvm_mod_used_mmu_pages(vcpu->kvm, +1);
return sp;
@@ -2244,7 +2250,7 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
#define for_each_valid_sp(_kvm, _sp, _gfn) \
hlist_for_each_entry(_sp, \
&(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
- if ((_sp)->role.invalid) { \
+ if (is_obsolete_sp((_kvm), (_sp)) || (_sp)->role.invalid) { \
} else
#define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn) \
@@ -2301,6 +2307,11 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { }
static void mmu_audit_disable(void) { }
#endif
+static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+ return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
+}
+
static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
struct list_head *invalid_list)
{
@@ -2525,6 +2536,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
if (level > PT_PAGE_TABLE_LEVEL && need_sync)
flush |= kvm_sync_pages(vcpu, gfn, &invalid_list);
}
+ sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
clear_page(sp->spt);
trace_kvm_mmu_get_page(sp, true);
@@ -4233,6 +4245,13 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3,
return false;
if (cached_root_available(vcpu, new_cr3, new_role)) {
+ /*
+ * It is possible that the cached previous root page is
+ * obsolete because of a change in the MMU generation
+ * number. However, changing the generation number is
+ * accompanied by KVM_REQ_MMU_RELOAD, which will free
+ * the root set here and allocate a new one.
+ */
kvm_make_request(KVM_REQ_LOAD_CR3, vcpu);
if (!skip_tlb_flush) {
kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
@@ -5649,11 +5668,89 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
return alloc_mmu_pages(vcpu);
}
+
+static void kvm_zap_obsolete_pages(struct kvm *kvm)
+{
+ struct kvm_mmu_page *sp, *node;
+ LIST_HEAD(invalid_list);
+ int ign;
+
+restart:
+ list_for_each_entry_safe_reverse(sp, node,
+ &kvm->arch.active_mmu_pages, link) {
+ /*
+ * No obsolete valid page exists before a newly created page
+ * since active_mmu_pages is a FIFO list.
+ */
+ if (!is_obsolete_sp(kvm, sp))
+ break;
+
+ /*
+ * Do not repeatedly zap a root page to avoid unnecessary
+ * KVM_REQ_MMU_RELOAD, otherwise we may not be able to
+ * progress:
+ * vcpu 0 vcpu 1
+ * call vcpu_enter_guest():
+ * 1): handle KVM_REQ_MMU_RELOAD
+ * and require mmu-lock to
+ * load mmu
+ * repeat:
+ * 1): zap root page and
+ * send KVM_REQ_MMU_RELOAD
+ *
+ * 2): if (cond_resched_lock(mmu-lock))
+ *
+ * 2): hold mmu-lock and load mmu
+ *
+ * 3): see KVM_REQ_MMU_RELOAD bit
+ * on vcpu->requests is set
+ * then return 1 to call
+ * vcpu_enter_guest() again.
+ * goto repeat;
+ *
+ * Since we are reversely walking the list and the invalid
+ * list will be moved to the head, skip the invalid page
+ * can help us to avoid the infinity list walking.
+ */
+ if (sp->role.invalid)
+ continue;
+
+ if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
+ kvm_mmu_commit_zap_page(kvm, &invalid_list);
+ cond_resched_lock(&kvm->mmu_lock);
+ goto restart;
+ }
+
+ if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign))
+ goto restart;
+ }
+
+ kvm_mmu_commit_zap_page(kvm, &invalid_list);
+}
+
+/*
+ * Fast invalidate all shadow pages and use lock-break technique
+ * to zap obsolete pages.
+ *
+ * It's required when memslot is being deleted or VM is being
+ * destroyed, in these cases, we should ensure that KVM MMU does
+ * not use any resource of the being-deleted slot or all slots
+ * after calling the function.
+ */
+static void kvm_mmu_zap_all_fast(struct kvm *kvm)
+{
+ spin_lock(&kvm->mmu_lock);
+ kvm->arch.mmu_valid_gen++;
+
+ kvm_zap_obsolete_pages(kvm);
+ spin_unlock(&kvm->mmu_lock);
+}
+
static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot,
struct kvm_page_track_notifier_node *node)
{
- kvm_mmu_zap_all(kvm);
+ kvm_mmu_zap_all_fast(kvm);
}
void kvm_mmu_init_vm(struct kvm *kvm)
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index ced9fba32598..a3cba321b5c5 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -4540,6 +4540,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
int len;
gva_t gva = 0;
struct vmcs12 *vmcs12;
+ struct x86_exception e;
short offset;
if (!nested_vmx_check_permission(vcpu))
@@ -4588,7 +4589,8 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
vmx_instruction_info, true, len, &gva))
return 1;
/* _system ok, nested_vmx_check_permission has verified cpl=0 */
- kvm_write_guest_virt_system(vcpu, gva, &field_value, len, NULL);
+ if (kvm_write_guest_virt_system(vcpu, gva, &field_value, len, &e))
+ kvm_inject_page_fault(vcpu, &e);
}
return nested_vmx_succeed(vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 290c3c3efb87..91602d310a3f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5312,6 +5312,13 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
/* kvm_write_guest_virt_system can pull in tons of pages. */
vcpu->arch.l1tf_flush_l1d = true;
+ /*
+ * FIXME: this should call handle_emulation_failure if X86EMUL_IO_NEEDED
+ * is returned, but our callers are not ready for that and they blindly
+ * call kvm_inject_page_fault. Ensure that they at least do not leak
+ * uninitialized kernel stack memory into cr2 and error code.
+ */
+ memset(exception, 0, sizeof(*exception));
return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
PFERR_WRITE_MASK, exception);
}
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 8901a1f89cf5..10fb42da0007 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -18,37 +18,40 @@ targets += purgatory.ro
KASAN_SANITIZE := n
KCOV_INSTRUMENT := n
+# These are adjustments to the compiler flags used for objects that
+# make up the standalone purgatory.ro
+
+PURGATORY_CFLAGS_REMOVE := -mcmodel=kernel
+PURGATORY_CFLAGS := -mcmodel=large -ffreestanding -fno-zero-initialized-in-bss
+
# Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That
# in turn leaves some undefined symbols like __fentry__ in purgatory and not
# sure how to relocate those.
ifdef CONFIG_FUNCTION_TRACER
-CFLAGS_REMOVE_sha256.o += $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_purgatory.o += $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_string.o += $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_kexec-purgatory.o += $(CC_FLAGS_FTRACE)
+PURGATORY_CFLAGS_REMOVE += $(CC_FLAGS_FTRACE)
endif
ifdef CONFIG_STACKPROTECTOR
-CFLAGS_REMOVE_sha256.o += -fstack-protector
-CFLAGS_REMOVE_purgatory.o += -fstack-protector
-CFLAGS_REMOVE_string.o += -fstack-protector
-CFLAGS_REMOVE_kexec-purgatory.o += -fstack-protector
+PURGATORY_CFLAGS_REMOVE += -fstack-protector
endif
ifdef CONFIG_STACKPROTECTOR_STRONG
-CFLAGS_REMOVE_sha256.o += -fstack-protector-strong
-CFLAGS_REMOVE_purgatory.o += -fstack-protector-strong
-CFLAGS_REMOVE_string.o += -fstack-protector-strong
-CFLAGS_REMOVE_kexec-purgatory.o += -fstack-protector-strong
+PURGATORY_CFLAGS_REMOVE += -fstack-protector-strong
endif
ifdef CONFIG_RETPOLINE
-CFLAGS_REMOVE_sha256.o += $(RETPOLINE_CFLAGS)
-CFLAGS_REMOVE_purgatory.o += $(RETPOLINE_CFLAGS)
-CFLAGS_REMOVE_string.o += $(RETPOLINE_CFLAGS)
-CFLAGS_REMOVE_kexec-purgatory.o += $(RETPOLINE_CFLAGS)
+PURGATORY_CFLAGS_REMOVE += $(RETPOLINE_CFLAGS)
endif
+CFLAGS_REMOVE_purgatory.o += $(PURGATORY_CFLAGS_REMOVE)
+CFLAGS_purgatory.o += $(PURGATORY_CFLAGS)
+
+CFLAGS_REMOVE_sha256.o += $(PURGATORY_CFLAGS_REMOVE)
+CFLAGS_sha256.o += $(PURGATORY_CFLAGS)
+
+CFLAGS_REMOVE_string.o += $(PURGATORY_CFLAGS_REMOVE)
+CFLAGS_string.o += $(PURGATORY_CFLAGS)
+
$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
$(call if_changed,ld)