From 218bf772bddd221489c38dde6ef8e917131161f6 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 2 Jun 2021 13:52:24 -0700 Subject: kvm: LAPIC: Restore guard to prevent illegal APIC register access Per the SDM, "any access that touches bytes 4 through 15 of an APIC register may cause undefined behavior and must not be executed." Worse, such an access in kvm_lapic_reg_read can result in a leak of kernel stack contents. Prior to commit 01402cf81051 ("kvm: LAPIC: write down valid APIC registers"), such an access was explicitly disallowed. Restore the guard that was removed in that commit. Fixes: 01402cf81051 ("kvm: LAPIC: write down valid APIC registers") Signed-off-by: Jim Mattson Reported-by: syzbot Message-Id: <20210602205224.3189316-1-jmattson@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 6d72d8f43310..17fa4ab1b834 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1410,6 +1410,9 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len, if (!apic_x2apic_mode(apic)) valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI); + if (alignment + len > 4) + return 1; + if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset))) return 1; -- cgit v1.2.3 From 02ffbe6351f5c88337143bcbc649832ded7445c0 Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Wed, 9 Jun 2021 20:22:17 +0800 Subject: KVM: SVM: fix doc warnings Fix kernel-doc warnings: arch/x86/kvm/svm/avic.c:233: warning: Function parameter or member 'activate' not described in 'avic_update_access_page' arch/x86/kvm/svm/avic.c:233: warning: Function parameter or member 'kvm' not described in 'avic_update_access_page' arch/x86/kvm/svm/avic.c:781: warning: Function parameter or member 'e' not described in 'get_pi_vcpu_info' arch/x86/kvm/svm/avic.c:781: warning: Function parameter or member 'kvm' not described in 'get_pi_vcpu_info' arch/x86/kvm/svm/avic.c:781: warning: Function parameter or member 'svm' not described in 'get_pi_vcpu_info' arch/x86/kvm/svm/avic.c:781: warning: Function parameter or member 'vcpu_info' not described in 'get_pi_vcpu_info' arch/x86/kvm/svm/avic.c:1009: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst Signed-off-by: ChenXiaoSong Message-Id: <20210609122217.2967131-1-chenxiaosong2@huawei.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/avic.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 0e62e6a2438c..5e7e920113f3 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -221,7 +221,7 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, return &avic_physical_id_table[index]; } -/** +/* * Note: * AVIC hardware walks the nested page table to check permissions, * but does not use the SPA address specified in the leaf page @@ -764,7 +764,7 @@ out: return ret; } -/** +/* * Note: * The HW cannot support posting multicast/broadcast * interrupts to a vCPU. So, we still use legacy interrupt @@ -1005,7 +1005,7 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu) WRITE_ONCE(*(svm->avic_physical_id_cache), entry); } -/** +/* * This function is called during VCPU halt/unhalt. */ static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run) -- cgit v1.2.3 From 551912d286e940e63abe9e005f434691ee24fd15 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 28 May 2021 15:07:56 -0500 Subject: KVM: x86: Fix fall-through warnings for Clang In preparation to enable -Wimplicit-fallthrough for Clang, fix a couple of warnings by explicitly adding break statements instead of just letting the code fall through to the next case. Link: https://github.com/KSPP/linux/issues/115 Signed-off-by: Gustavo A. R. Silva Message-Id: <20210528200756.GA39320@embeddedor> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 1 + arch/x86/kvm/vmx/vmx.c | 1 + 2 files changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 9a48f138832d..b4da665bb892 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -655,6 +655,7 @@ static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func) if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP)) entry->ecx = F(RDPID); ++array->nent; + break; default: break; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 50b42d7a8a11..c2a779b688e6 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6247,6 +6247,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) switch (kvm_get_apic_mode(vcpu)) { case LAPIC_MODE_INVALID: WARN_ONCE(true, "Invalid local APIC state"); + break; case LAPIC_MODE_DISABLED: break; case LAPIC_MODE_XAPIC: -- cgit v1.2.3 From 78fcb2c91adfec8ce3a2ba6b4d0dda89f2f4a7c6 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 9 Jun 2021 11:56:11 -0700 Subject: KVM: x86: Immediately reset the MMU context when the SMM flag is cleared Immediately reset the MMU context when the vCPU's SMM flag is cleared so that the SMM flag in the MMU role is always synchronized with the vCPU's flag. If RSM fails (which isn't correctly emulated), KVM will bail without calling post_leave_smm() and leave the MMU in a bad state. The bad MMU role can lead to a NULL pointer dereference when grabbing a shadow page's rmap for a page fault as the initial lookups for the gfn will happen with the vCPU's SMM flag (=0), whereas the rmap lookup will use the shadow page's SMM flag, which comes from the MMU (=1). SMM has an entirely different set of memslots, and so the initial lookup can find a memslot (SMM=0) and then explode on the rmap memslot lookup (SMM=1). general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] CPU: 1 PID: 8410 Comm: syz-executor382 Not tainted 5.13.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__gfn_to_rmap arch/x86/kvm/mmu/mmu.c:935 [inline] RIP: 0010:gfn_to_rmap+0x2b0/0x4d0 arch/x86/kvm/mmu/mmu.c:947 Code: <42> 80 3c 20 00 74 08 4c 89 ff e8 f1 79 a9 00 4c 89 fb 4d 8b 37 44 RSP: 0018:ffffc90000ffef98 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff888015b9f414 RCX: ffff888019669c40 RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000001 RBP: 0000000000000001 R08: ffffffff811d9cdb R09: ffffed10065a6002 R10: ffffed10065a6002 R11: 0000000000000000 R12: dffffc0000000000 R13: 0000000000000003 R14: 0000000000000001 R15: 0000000000000000 FS: 000000000124b300(0000) GS:ffff8880b9b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000000028e31000 CR4: 00000000001526e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: rmap_add arch/x86/kvm/mmu/mmu.c:965 [inline] mmu_set_spte+0x862/0xe60 arch/x86/kvm/mmu/mmu.c:2604 __direct_map arch/x86/kvm/mmu/mmu.c:2862 [inline] direct_page_fault+0x1f74/0x2b70 arch/x86/kvm/mmu/mmu.c:3769 kvm_mmu_do_page_fault arch/x86/kvm/mmu.h:124 [inline] kvm_mmu_page_fault+0x199/0x1440 arch/x86/kvm/mmu/mmu.c:5065 vmx_handle_exit+0x26/0x160 arch/x86/kvm/vmx/vmx.c:6122 vcpu_enter_guest+0x3bdd/0x9630 arch/x86/kvm/x86.c:9428 vcpu_run+0x416/0xc20 arch/x86/kvm/x86.c:9494 kvm_arch_vcpu_ioctl_run+0x4e8/0xa40 arch/x86/kvm/x86.c:9722 kvm_vcpu_ioctl+0x70f/0xbb0 arch/x86/kvm/../../../virt/kvm/kvm_main.c:3460 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:1069 [inline] __se_sys_ioctl+0xfb/0x170 fs/ioctl.c:1055 do_syscall_64+0x3f/0xb0 arch/x86/entry/common.c:47 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x440ce9 Cc: stable@vger.kernel.org Reported-by: syzbot+fb0b6a7e8713aeb0319c@syzkaller.appspotmail.com Fixes: 9ec19493fb86 ("KVM: x86: clear SMM flags before loading state while leaving SMM") Signed-off-by: Sean Christopherson Message-Id: <20210609185619.992058-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9dd23bdfc6cc..54d212fe9b15 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7106,7 +7106,10 @@ static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt) static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags) { - emul_to_vcpu(ctxt)->arch.hflags = emul_flags; + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); + + vcpu->arch.hflags = emul_flags; + kvm_mmu_reset_context(vcpu); } static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt, -- cgit v1.2.3 From 934002cd660b035b926438244b4294e647507e13 Mon Sep 17 00:00:00 2001 From: Alper Gun Date: Thu, 10 Jun 2021 17:46:04 +0000 Subject: KVM: SVM: Call SEV Guest Decommission if ASID binding fails Send SEV_CMD_DECOMMISSION command to PSP firmware if ASID binding fails. If a failure happens after a successful LAUNCH_START command, a decommission command should be executed. Otherwise, guest context will be unfreed inside the AMD SP. After the firmware will not have memory to allocate more SEV guest context, LAUNCH_START command will begin to fail with SEV_RET_RESOURCE_LIMIT error. The existing code calls decommission inside sev_unbind_asid, but it is not called if a failure happens before guest activation succeeds. If sev_bind_asid fails, decommission is never called. PSP firmware has a limit for the number of guests. If sev_asid_binding fails many times, PSP firmware will not have resources to create another guest context. Cc: stable@vger.kernel.org Fixes: 59414c989220 ("KVM: SVM: Add support for KVM_SEV_LAUNCH_START command") Reported-by: Peter Gonda Signed-off-by: Alper Gun Reviewed-by: Marc Orr Signed-off-by: Paolo Bonzini Message-Id: <20210610174604.2554090-1-alpergun@google.com> --- arch/x86/kvm/svm/sev.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index e0ce5da97fc2..8d36f0c73071 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -199,9 +199,19 @@ static void sev_asid_free(struct kvm_sev_info *sev) sev->misc_cg = NULL; } -static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) +static void sev_decommission(unsigned int handle) { struct sev_data_decommission decommission; + + if (!handle) + return; + + decommission.handle = handle; + sev_guest_decommission(&decommission, NULL); +} + +static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) +{ struct sev_data_deactivate deactivate; if (!handle) @@ -214,9 +224,7 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) sev_guest_deactivate(&deactivate, NULL); up_read(&sev_deactivate_lock); - /* decommission handle */ - decommission.handle = handle; - sev_guest_decommission(&decommission, NULL); + sev_decommission(handle); } static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) @@ -341,8 +349,10 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) /* Bind ASID to this guest */ ret = sev_bind_asid(kvm, start.handle, error); - if (ret) + if (ret) { + sev_decommission(start.handle); goto e_free_session; + } /* return handle to userspace */ params.handle = start.handle; -- cgit v1.2.3 From dfdc0a714d241bfbf951886c373cd1ae463fcc25 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 10 Jun 2021 21:59:33 -0700 Subject: KVM: X86: Fix x86_emulator slab cache leak Commit c9b8b07cded58 (KVM: x86: Dynamically allocate per-vCPU emulation context) tries to allocate per-vCPU emulation context dynamically, however, the x86_emulator slab cache is still exiting after the kvm module is unload as below after destroying the VM and unloading the kvm module. grep x86_emulator /proc/slabinfo x86_emulator 36 36 2672 12 8 : tunables 0 0 0 : slabdata 3 3 0 This patch fixes this slab cache leak by destroying the x86_emulator slab cache when the kvm module is unloaded. Fixes: c9b8b07cded58 (KVM: x86: Dynamically allocate per-vCPU emulation context) Cc: stable@vger.kernel.org Signed-off-by: Wanpeng Li Message-Id: <1623387573-5969-1-git-send-email-wanpengli@tencent.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 54d212fe9b15..6d425310054b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8261,6 +8261,7 @@ void kvm_arch_exit(void) kvm_x86_ops.hardware_enable = NULL; kvm_mmu_module_exit(); free_percpu(user_return_msrs); + kmem_cache_destroy(x86_emulator_cache); kmem_cache_destroy(x86_fpu_cache); #ifdef CONFIG_KVM_XEN static_key_deferred_flush(&kvm_xen_enabled); -- cgit v1.2.3 From 654430efde27248be563df9a88631204b5fe2df2 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 10 Jun 2021 15:00:26 -0700 Subject: KVM: x86/mmu: Calculate and check "full" mmu_role for nested MMU Calculate and check the full mmu_role when initializing the MMU context for the nested MMU, where "full" means the bits and pieces of the role that aren't handled by kvm_calc_mmu_role_common(). While the nested MMU isn't used for shadow paging, things like the number of levels in the guest's page tables are surprisingly important when walking the guest page tables. Failure to reinitialize the nested MMU context if L2's paging mode changes can result in unexpected and/or missed page faults, and likely other explosions. E.g. if an L1 vCPU is running both a 32-bit PAE L2 and a 64-bit L2, the "common" role calculation will yield the same role for both L2s. If the 64-bit L2 is run after the 32-bit PAE L2, L0 will fail to reinitialize the nested MMU context, ultimately resulting in a bad walk of L2's page tables as the MMU will still have a guest root_level of PT32E_ROOT_LEVEL. WARNING: CPU: 4 PID: 167334 at arch/x86/kvm/vmx/vmx.c:3075 ept_save_pdptrs+0x15/0xe0 [kvm_intel] Modules linked in: kvm_intel] CPU: 4 PID: 167334 Comm: CPU 3/KVM Not tainted 5.13.0-rc1-d849817d5673-reqs #185 Hardware name: ASUS Q87M-E/Q87M-E, BIOS 1102 03/03/2014 RIP: 0010:ept_save_pdptrs+0x15/0xe0 [kvm_intel] Code: <0f> 0b c3 f6 87 d8 02 00f RSP: 0018:ffffbba702dbba00 EFLAGS: 00010202 RAX: 0000000000000011 RBX: 0000000000000002 RCX: ffffffff810a2c08 RDX: ffff91d7bc30acc0 RSI: 0000000000000011 RDI: ffff91d7bc30a600 RBP: ffff91d7bc30a600 R08: 0000000000000010 R09: 0000000000000007 R10: 0000000000000000 R11: 0000000000000000 R12: ffff91d7bc30a600 R13: ffff91d7bc30acc0 R14: ffff91d67c123460 R15: 0000000115d7e005 FS: 00007fe8e9ffb700(0000) GS:ffff91d90fb00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000029f15a001 CR4: 00000000001726e0 Call Trace: kvm_pdptr_read+0x3a/0x40 [kvm] paging64_walk_addr_generic+0x327/0x6a0 [kvm] paging64_gva_to_gpa_nested+0x3f/0xb0 [kvm] kvm_fetch_guest_virt+0x4c/0xb0 [kvm] __do_insn_fetch_bytes+0x11a/0x1f0 [kvm] x86_decode_insn+0x787/0x1490 [kvm] x86_decode_emulated_instruction+0x58/0x1e0 [kvm] x86_emulate_instruction+0x122/0x4f0 [kvm] vmx_handle_exit+0x120/0x660 [kvm_intel] kvm_arch_vcpu_ioctl_run+0xe25/0x1cb0 [kvm] kvm_vcpu_ioctl+0x211/0x5a0 [kvm] __x64_sys_ioctl+0x83/0xb0 do_syscall_64+0x40/0xb0 entry_SYSCALL_64_after_hwframe+0x44/0xae Cc: Vitaly Kuznetsov Cc: stable@vger.kernel.org Fixes: bf627a928837 ("x86/kvm/mmu: check if MMU reconfiguration is needed in init_kvm_nested_mmu()") Signed-off-by: Sean Christopherson Message-Id: <20210610220026.1364486-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 0144c40d09c7..8d5876dfc6b7 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4739,9 +4739,33 @@ static void init_kvm_softmmu(struct kvm_vcpu *vcpu) context->inject_page_fault = kvm_inject_page_fault; } +static union kvm_mmu_role kvm_calc_nested_mmu_role(struct kvm_vcpu *vcpu) +{ + union kvm_mmu_role role = kvm_calc_shadow_root_page_role_common(vcpu, false); + + /* + * Nested MMUs are used only for walking L2's gva->gpa, they never have + * shadow pages of their own and so "direct" has no meaning. Set it + * to "true" to try to detect bogus usage of the nested MMU. + */ + role.base.direct = true; + + if (!is_paging(vcpu)) + role.base.level = 0; + else if (is_long_mode(vcpu)) + role.base.level = is_la57_mode(vcpu) ? PT64_ROOT_5LEVEL : + PT64_ROOT_4LEVEL; + else if (is_pae(vcpu)) + role.base.level = PT32E_ROOT_LEVEL; + else + role.base.level = PT32_ROOT_LEVEL; + + return role; +} + static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu) { - union kvm_mmu_role new_role = kvm_calc_mmu_role_common(vcpu, false); + union kvm_mmu_role new_role = kvm_calc_nested_mmu_role(vcpu); struct kvm_mmu *g_context = &vcpu->arch.nested_mmu; if (new_role.as_u64 == g_context->mmu_role.as_u64) -- cgit v1.2.3