From bf672720e83cf04c007aa11c242229e70985135b Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Wed, 26 Jul 2023 16:59:45 +0300 Subject: KVM: x86: check the kvm_cpu_get_interrupt result before using it The code was blindly assuming that kvm_cpu_get_interrupt never returns -1 when there is a pending interrupt. While this should be true, a bug in KVM can still cause this. If -1 is returned, the code before this patch was converting it to 0xFF, and 0xFF interrupt was injected to the guest, which results in an issue which was hard to debug. Add WARN_ON_ONCE to catch this case and skip the injection if this happens again. Signed-off-by: Maxim Levitsky Message-Id: <20230726135945.260841-4-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a6b9bea62fb8..00b87fcf6da4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10203,9 +10203,13 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu, if (r < 0) goto out; if (r) { - kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false); - static_call(kvm_x86_inject_irq)(vcpu, false); - WARN_ON(static_call(kvm_x86_interrupt_allowed)(vcpu, true) < 0); + int irq = kvm_cpu_get_interrupt(vcpu); + + if (!WARN_ON_ONCE(irq == -1)) { + kvm_queue_interrupt(vcpu, irq, false); + static_call(kvm_x86_inject_irq)(vcpu, false); + WARN_ON(static_call(kvm_x86_interrupt_allowed)(vcpu, true) < 0); + } } if (kvm_cpu_has_injectable_intr(vcpu)) static_call(kvm_x86_enable_irq_window)(vcpu); -- cgit v1.2.3 From 5e1fe4a21c0c2a69419d97d62d3213e8f843920d Mon Sep 17 00:00:00 2001 From: Like Xu Date: Mon, 24 Jul 2023 19:12:36 +0800 Subject: KVM: x86/irq: Conditionally register IRQ bypass consumer again As was attempted commit 14717e203186 ("kvm: Conditionally register IRQ bypass consumer"): "if we don't support a mechanism for bypassing IRQs, don't register as a consumer. Initially this applied to AMD processors, but when AVIC support was implemented for assigned devices, kvm_arch_has_irq_bypass() was always returning true. We can still skip registering the consumer where enable_apicv or posted-interrupts capability is unsupported or globally disabled. This eliminates meaningless dev_info()s when the connect fails between producer and consumer", such as on Linux hosts where enable_apicv or posted-interrupts capability is unsupported or globally disabled. Cc: Alex Williamson Reported-by: Yong He Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217379 Signed-off-by: Like Xu Message-Id: <20230724111236.76570-1-likexu@tencent.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 00b87fcf6da4..43e0c51da65e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -13189,7 +13189,7 @@ EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma); bool kvm_arch_has_irq_bypass(void) { - return true; + return enable_apicv && irq_remapping_cap(IRQ_POSTING_CAP); } int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, -- cgit v1.2.3 From 3f2739bd1e0b7e9669333852b4e618294d5a1e54 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 21 Jul 2023 15:43:36 -0700 Subject: KVM: x86: Acquire SRCU read lock when handling fastpath MSR writes Temporarily acquire kvm->srcu for read when potentially emulating WRMSR in the VM-Exit fastpath handler, as several of the common helpers used during emulation expect the caller to provide SRCU protection. E.g. if the guest is counting instructions retired, KVM will query the PMU event filter when stepping over the WRMSR. dump_stack+0x85/0xdf lockdep_rcu_suspicious+0x109/0x120 pmc_event_is_allowed+0x165/0x170 kvm_pmu_trigger_event+0xa5/0x190 handle_fastpath_set_msr_irqoff+0xca/0x1e0 svm_vcpu_run+0x5c3/0x7b0 [kvm_amd] vcpu_enter_guest+0x2108/0x2580 Alternatively, check_pmu_event_filter() could acquire kvm->srcu, but this isn't the first bug of this nature, e.g. see commit 5c30e8101e8d ("KVM: SVM: Skip WRMSR fastpath on VM-Exit if next RIP isn't valid"). Providing protection for the entirety of WRMSR emulation will allow reverting the aforementioned commit, and will avoid having to play whack-a-mole when new uses of SRCU-protected structures are inevitably added in common emulation helpers. Fixes: dfdeda67ea2d ("KVM: x86/pmu: Prevent the PMU from counting disallowed events") Reported-by: Greg Thelen Reported-by: Aaron Lewis Signed-off-by: Sean Christopherson Message-Id: <20230721224337.2335137-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 43e0c51da65e..27080bd986b5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2172,6 +2172,8 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu) u64 data; fastpath_t ret = EXIT_FASTPATH_NONE; + kvm_vcpu_srcu_read_lock(vcpu); + switch (msr) { case APIC_BASE_MSR + (APIC_ICR >> 4): data = kvm_read_edx_eax(vcpu); @@ -2194,6 +2196,8 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu) if (ret != EXIT_FASTPATH_NONE) trace_kvm_msr_write(msr, data); + kvm_vcpu_srcu_read_unlock(vcpu); + return ret; } EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff); -- cgit v1.2.3 From 26a0652cb453c72f6aab0974bc4939e9b14f886b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jun 2023 13:30:35 -0700 Subject: KVM: x86: Disallow KVM_SET_SREGS{2} if incoming CR0 is invalid Reject KVM_SET_SREGS{2} with -EINVAL if the incoming CR0 is invalid, e.g. due to setting bits 63:32, illegal combinations, or to a value that isn't allowed in VMX (non-)root mode. The VMX checks in particular are "fun" as failure to disallow Real Mode for an L2 that is configured with unrestricted guest disabled, when KVM itself has unrestricted guest enabled, will result in KVM forcing VM86 mode to virtual Real Mode for L2, but then fail to unwind the related metadata when synthesizing a nested VM-Exit back to L1 (which has unrestricted guest enabled). Opportunistically fix a benign typo in the prototype for is_valid_cr4(). Cc: stable@vger.kernel.org Reported-by: syzbot+5feef0b9ee9c8e9e5689@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/000000000000f316b705fdf6e2b4@google.com Signed-off-by: Sean Christopherson Message-Id: <20230613203037.1968489-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm-x86-ops.h | 1 + arch/x86/include/asm/kvm_host.h | 3 ++- arch/x86/kvm/svm/svm.c | 6 ++++++ arch/x86/kvm/vmx/vmx.c | 28 +++++++++++++++++++++------- arch/x86/kvm/x86.c | 34 ++++++++++++++++++++++------------ 5 files changed, 52 insertions(+), 20 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 13bc212cd4bc..e3054e3e46d5 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -37,6 +37,7 @@ KVM_X86_OP(get_segment) KVM_X86_OP(get_cpl) KVM_X86_OP(set_segment) KVM_X86_OP(get_cs_db_l_bits) +KVM_X86_OP(is_valid_cr0) KVM_X86_OP(set_cr0) KVM_X86_OP_OPTIONAL(post_set_cr3) KVM_X86_OP(is_valid_cr4) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 28bd38303d70..3bc146dfd38d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1566,9 +1566,10 @@ struct kvm_x86_ops { void (*set_segment)(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); + bool (*is_valid_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); void (*post_set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); - bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr0); + bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index cea08e5fa69b..956726d867aa 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1786,6 +1786,11 @@ static void sev_post_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) } } +static bool svm_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) +{ + return true; +} + void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { struct vcpu_svm *svm = to_svm(vcpu); @@ -4809,6 +4814,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .set_segment = svm_set_segment, .get_cpl = svm_get_cpl, .get_cs_db_l_bits = svm_get_cs_db_l_bits, + .is_valid_cr0 = svm_is_valid_cr0, .set_cr0 = svm_set_cr0, .post_set_cr3 = sev_post_set_cr3, .is_valid_cr4 = svm_is_valid_cr4, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 0dd4612b0ca2..3d011d62d969 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3047,6 +3047,15 @@ static void enter_rmode(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm); + /* + * KVM should never use VM86 to virtualize Real Mode when L2 is active, + * as using VM86 is unnecessary if unrestricted guest is enabled, and + * if unrestricted guest is disabled, VM-Enter (from L1) with CR0.PG=0 + * should VM-Fail and KVM should reject userspace attempts to stuff + * CR0.PG=0 when L2 is active. + */ + WARN_ON_ONCE(is_guest_mode(vcpu)); + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); @@ -3236,6 +3245,17 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu) #define CR3_EXITING_BITS (CPU_BASED_CR3_LOAD_EXITING | \ CPU_BASED_CR3_STORE_EXITING) +static bool vmx_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) +{ + if (is_guest_mode(vcpu)) + return nested_guest_cr0_valid(vcpu, cr0); + + if (to_vmx(vcpu)->nested.vmxon) + return nested_host_cr0_valid(vcpu, cr0); + + return true; +} + void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -5375,18 +5395,11 @@ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) val = (val & ~vmcs12->cr0_guest_host_mask) | (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask); - if (!nested_guest_cr0_valid(vcpu, val)) - return 1; - if (kvm_set_cr0(vcpu, val)) return 1; vmcs_writel(CR0_READ_SHADOW, orig_val); return 0; } else { - if (to_vmx(vcpu)->nested.vmxon && - !nested_host_cr0_valid(vcpu, val)) - return 1; - return kvm_set_cr0(vcpu, val); } } @@ -8214,6 +8227,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .set_segment = vmx_set_segment, .get_cpl = vmx_get_cpl, .get_cs_db_l_bits = vmx_get_cs_db_l_bits, + .is_valid_cr0 = vmx_is_valid_cr0, .set_cr0 = vmx_set_cr0, .is_valid_cr4 = vmx_is_valid_cr4, .set_cr4 = vmx_set_cr4, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 27080bd986b5..278dbd37dab2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -906,6 +906,22 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) } EXPORT_SYMBOL_GPL(load_pdptrs); +static bool kvm_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) +{ +#ifdef CONFIG_X86_64 + if (cr0 & 0xffffffff00000000UL) + return false; +#endif + + if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) + return false; + + if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) + return false; + + return static_call(kvm_x86_is_valid_cr0)(vcpu, cr0); +} + void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0) { /* @@ -952,20 +968,13 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { unsigned long old_cr0 = kvm_read_cr0(vcpu); - cr0 |= X86_CR0_ET; - -#ifdef CONFIG_X86_64 - if (cr0 & 0xffffffff00000000UL) + if (!kvm_is_valid_cr0(vcpu, cr0)) return 1; -#endif - - cr0 &= ~CR0_RESERVED_BITS; - if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) - return 1; + cr0 |= X86_CR0_ET; - if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) - return 1; + /* Write to CR0 reserved bits are ignored, even on Intel. */ + cr0 &= ~CR0_RESERVED_BITS; #ifdef CONFIG_X86_64 if ((vcpu->arch.efer & EFER_LME) && !is_paging(vcpu) && @@ -11468,7 +11477,8 @@ static bool kvm_is_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) return false; } - return kvm_is_valid_cr4(vcpu, sregs->cr4); + return kvm_is_valid_cr4(vcpu, sregs->cr4) && + kvm_is_valid_cr0(vcpu, sregs->cr0); } static int __set_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs, -- cgit v1.2.3