From baea2ce53f8c7448b4b8dadb563eb0df1bfdfb33 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 10 Aug 2022 14:55:27 -0400 Subject: selftests: kvm: fix compilation Commit 49de12ba06ef ("selftests: drop KSFT_KHDR_INSTALL make target") dropped from tools/testing/selftests/lib.mk the code related to KSFT_KHDR_INSTALL, but in doing so it also dropped the definition of the ARCH variable. The ARCH variable is used in several subdirectories, but kvm/ is the only one of these that was using KSFT_KHDR_INSTALL. As a result, kvm selftests cannot be built anymore: In file included from include/x86_64/vmx.h:12, from x86_64/vmx_pmu_caps_test.c:18: include/x86_64/processor.h:15:10: fatal error: asm/msr-index.h: No such file or directory 15 | #include | ^~~~~~~~~~~~~~~~~ In file included from ../../../../tools/include/asm/atomic.h:6, from ../../../../tools/include/linux/atomic.h:5, from rseq_test.c:15: ../../../../tools/include/asm/../../arch/x86/include/asm/atomic.h:11:10: fatal error: asm/cmpxchg.h: No such file or directory 11 | #include | ^~~~~~~~~~~~~~~ Fix it by including the definition that was present in lib.mk. Fixes: 49de12ba06ef ("selftests: drop KSFT_KHDR_INSTALL make target") Cc: Guillaume Tucker Cc: Anders Roxell Cc: Shuah Khan Cc: linux-kselftest@vger.kernel.org Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index c7f47429d6cd..07e354690745 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -4,6 +4,8 @@ include ../../../build/Build.include all: top_srcdir = ../../../.. +include $(top_srcdir)/scripts/subarch.include +ARCH ?= $(SUBARCH) # For cross-builds to work, UNAME_M has to map to ARCH and arch specific # directories and targets in this Makefile. "uname -m" doesn't map to -- cgit v1.2.3 From 901d3765fa804ce42812f1d5b1f3de2dfbb26723 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 4 Aug 2022 15:28:32 +0200 Subject: KVM: x86: revalidate steal time cache if MSR value changes Commit 7e2175ebd695 ("KVM: x86: Fix recording of guest steal time / preempted status", 2021-11-11) open coded the previous call to kvm_map_gfn, but in doing so it dropped the comparison between the cached guest physical address and the one in the MSR. This cause an incorrect cache hit if the guest modifies the steal time address while the memslots remain the same. This can happen with kexec, in which case the steal time data is written at the address used by the old kernel instead of the old one. While at it, rename the variable from gfn to gpa since it is a plain physical address and not a right-shifted one. Reported-by: Dave Young Reported-by: Xiaoying Yan Analyzed-by: Dr. David Alan Gilbert Cc: David Woodhouse Cc: stable@vger.kernel.org Fixes: 7e2175ebd695 ("KVM: x86: Fix recording of guest steal time / preempted status") Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 33560bfa0cac..0f3c2e034740 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3414,6 +3414,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu) struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache; struct kvm_steal_time __user *st; struct kvm_memslots *slots; + gpa_t gpa = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS; u64 steal; u32 version; @@ -3431,13 +3432,12 @@ static void record_steal_time(struct kvm_vcpu *vcpu) slots = kvm_memslots(vcpu->kvm); if (unlikely(slots->generation != ghc->generation || + gpa != ghc->gpa || kvm_is_error_hva(ghc->hva) || !ghc->memslot)) { - gfn_t gfn = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS; - /* We rely on the fact that it fits in a single page. */ BUILD_BUG_ON((sizeof(*st) - 1) & KVM_STEAL_VALID_BITS); - if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gfn, sizeof(*st)) || + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gpa, sizeof(*st)) || kvm_is_error_hva(ghc->hva) || !ghc->memslot) return; } -- cgit v1.2.3 From c3c28d24d910a746b02f496d190e0e8c6560224b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 4 Aug 2022 15:28:32 +0200 Subject: KVM: x86: do not report preemption if the steal time cache is stale Commit 7e2175ebd695 ("KVM: x86: Fix recording of guest steal time / preempted status", 2021-11-11) open coded the previous call to kvm_map_gfn, but in doing so it dropped the comparison between the cached guest physical address and the one in the MSR. This cause an incorrect cache hit if the guest modifies the steal time address while the memslots remain the same. This can happen with kexec, in which case the preempted bit is written at the address used by the old kernel instead of the old one. Cc: David Woodhouse Cc: stable@vger.kernel.org Fixes: 7e2175ebd695 ("KVM: x86: Fix recording of guest steal time / preempted status") Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0f3c2e034740..8ee4698cb90a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4715,6 +4715,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) struct kvm_steal_time __user *st; struct kvm_memslots *slots; static const u8 preempted = KVM_VCPU_PREEMPTED; + gpa_t gpa = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS; /* * The vCPU can be marked preempted if and only if the VM-Exit was on @@ -4742,6 +4743,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) slots = kvm_memslots(vcpu->kvm); if (unlikely(slots->generation != ghc->generation || + gpa != ghc->gpa || kvm_is_error_hva(ghc->hva) || !ghc->memslot)) return; -- cgit v1.2.3 From 70c8327c11c6d4080d4a774f531c6a6521592568 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 4 Aug 2022 23:50:28 +0000 Subject: KVM: x86: Bug the VM if an accelerated x2APIC trap occurs on a "bad" reg Bug the VM if retrieving the x2APIC MSR/register while processing an accelerated vAPIC trap VM-Exit fails. In theory it's impossible for the lookup to fail as hardware has already validated the register, but bugs happen, and not checking the result of kvm_lapic_msr_read() would result in consuming the uninitialized "val" if a KVM or hardware bug occurs. Fixes: 1bd9dfec9fd4 ("KVM: x86: Do not block APIC write for non ICR registers") Reported-by: Dan Carpenter Cc: Suravee Suthikulpanit Signed-off-by: Sean Christopherson Message-Id: <20220804235028.1766253-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index e2ce3556915e..9dda989a1cf0 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2284,10 +2284,12 @@ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) struct kvm_lapic *apic = vcpu->arch.apic; u64 val; - if (apic_x2apic_mode(apic)) - kvm_lapic_msr_read(apic, offset, &val); - else + if (apic_x2apic_mode(apic)) { + if (KVM_BUG_ON(kvm_lapic_msr_read(apic, offset, &val), vcpu->kvm)) + return; + } else { val = kvm_lapic_get_reg(apic, offset); + } /* * ICR is a single 64-bit register when x2APIC is enabled. For legacy -- cgit v1.2.3 From 2bc685e63332957f906446d281162f93b5d523fa Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Mon, 18 Jul 2022 15:47:56 +0800 Subject: KVM: X86: avoid uninitialized 'fault.async_page_fault' from fixed-up #PF kvm_fixup_and_inject_pf_error() was introduced to fixup the error code( e.g., to add RSVD flag) and inject the #PF to the guest, when guest MAXPHYADDR is smaller than the host one. When it comes to nested, L0 is expected to intercept and fix up the #PF and then inject to L2 directly if - L2.MAXPHYADDR < L0.MAXPHYADDR and - L1 has no intention to intercept L2's #PF (e.g., L2 and L1 have the same MAXPHYADDR value && L1 is using EPT for L2), instead of constructing a #PF VM Exit to L1. Currently, with PFEC_MASK and PFEC_MATCH both set to 0 in vmcs02, the interception and injection may happen on all L2 #PFs. However, failing to initialize 'fault' in kvm_fixup_and_inject_pf_error() may cause the fault.async_page_fault being NOT zeroed, and later the #PF being treated as a nested async page fault, and then being injected to L1. Instead of zeroing 'fault' at the beginning of this function, we mannually set the value of 'fault.async_page_fault', because false is the value we really expect. Fixes: 897861479c064 ("KVM: x86: Add helper functions for illegal GPA checking and page fault injection") Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=216178 Reported-by: Yang Lixiao Signed-off-by: Yu Zhang Reviewed-by: Sean Christopherson Message-Id: <20220718074756.53788-1-yu.c.zhang@linux.intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8ee4698cb90a..924016d54fbf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -13022,6 +13022,7 @@ void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_c fault.error_code = error_code; fault.nested_page_fault = false; fault.address = gva; + fault.async_page_fault = false; } vcpu->arch.walk_mmu->inject_page_fault(vcpu, &fault); } -- cgit v1.2.3 From 4ac5b4237793a6db791999edd53f0396c04053cd Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Fri, 29 Jul 2022 15:48:01 +0200 Subject: KVM: x86: emulator: Fix illegal LEA handling The emulator mishandles LEA with register source operand. Even though such LEA is illegal, it can be encoded and fed to CPU. In which case real hardware throws #UD. The emulator, instead, returns address of x86_emulate_ctxt._regs. This info leak hurts host's kASLR. Tell the decoder that illegal LEA is not to be emulated. Signed-off-by: Michal Luczaj Message-Id: <20220729134801.1120-1-mhal@rbox.co> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 047c583596bb..b4eeb7c75dfa 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4578,6 +4578,10 @@ static const struct mode_dual mode_dual_63 = { N, I(DstReg | SrcMem32 | ModRM | Mov, em_movsxd) }; +static const struct instr_dual instr_dual_8d = { + D(DstReg | SrcMem | ModRM | NoAccess), N +}; + static const struct opcode opcode_table[256] = { /* 0x00 - 0x07 */ F6ALU(Lock, em_add), @@ -4634,7 +4638,7 @@ static const struct opcode opcode_table[256] = { I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov), I2bv(DstReg | SrcMem | ModRM | Mov, em_mov), I(DstMem | SrcNone | ModRM | Mov | PageTable, em_mov_rm_sreg), - D(ModRM | SrcMem | NoAccess | DstReg), + ID(0, &instr_dual_8d), I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm), G(0, group1A), /* 0x90 - 0x97 */ -- cgit v1.2.3 From 982bae43f11c37b51d2f1961bb25ef7cac3746fa Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 3 Aug 2022 22:49:55 +0000 Subject: KVM: x86: Tag kvm_mmu_x86_module_init() with __init Mark kvm_mmu_x86_module_init() with __init, the entire reason it exists is to initialize variables when kvm.ko is loaded, i.e. it must never be called after module initialization. Fixes: 1d0e84806047 ("KVM: x86/mmu: Resolve nx_huge_pages when kvm.ko is loaded") Cc: stable@vger.kernel.org Reviewed-by: Kai Huang Tested-by: Michael Roth Signed-off-by: Sean Christopherson Message-Id: <20220803224957.1285926-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/mmu/mmu.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e8281d64a431..5ffa578cafe1 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1704,7 +1704,7 @@ static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm) #define kvm_arch_pmi_in_guest(vcpu) \ ((vcpu) && (vcpu)->arch.handling_intr_from_guest) -void kvm_mmu_x86_module_init(void); +void __init kvm_mmu_x86_module_init(void); int kvm_mmu_vendor_module_init(void); void kvm_mmu_vendor_module_exit(void); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 06ac8c7cef67..300927fd7b53 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -6698,7 +6698,7 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) * nx_huge_pages needs to be resolved to true/false when kvm.ko is loaded, as * its default value of -1 is technically undefined behavior for a boolean. */ -void kvm_mmu_x86_module_init(void) +void __init kvm_mmu_x86_module_init(void) { if (nx_huge_pages == -1) __set_nx_huge_pages(get_nx_auto_mode()); -- cgit v1.2.3 From c3e0c8c2e8b17bae30d5978bc2decdd4098f0f99 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 3 Aug 2022 22:49:56 +0000 Subject: KVM: x86/mmu: Fully re-evaluate MMIO caching when SPTE masks change Fully re-evaluate whether or not MMIO caching can be enabled when SPTE masks change; simply clearing enable_mmio_caching when a configuration isn't compatible with caching fails to handle the scenario where the masks are updated, e.g. by VMX for EPT or by SVM to account for the C-bit location, and toggle compatibility from false=>true. Snapshot the original module param so that re-evaluating MMIO caching preserves userspace's desire to allow caching. Use a snapshot approach so that enable_mmio_caching still reflects KVM's actual behavior. Fixes: 8b9e74bfbf8c ("KVM: x86/mmu: Use enable_mmio_caching to track if MMIO caching is enabled") Reported-by: Michael Roth Cc: Tom Lendacky Cc: stable@vger.kernel.org Tested-by: Michael Roth Signed-off-by: Sean Christopherson Reviewed-by: Kai Huang Message-Id: <20220803224957.1285926-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 4 ++++ arch/x86/kvm/mmu/spte.c | 19 +++++++++++++++++++ arch/x86/kvm/mmu/spte.h | 1 + 3 files changed, 24 insertions(+) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 300927fd7b53..918ff23c0bc3 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -6697,11 +6697,15 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) /* * nx_huge_pages needs to be resolved to true/false when kvm.ko is loaded, as * its default value of -1 is technically undefined behavior for a boolean. + * Forward the module init call to SPTE code so that it too can handle module + * params that need to be resolved/snapshot. */ void __init kvm_mmu_x86_module_init(void) { if (nx_huge_pages == -1) __set_nx_huge_pages(get_nx_auto_mode()); + + kvm_mmu_spte_module_init(); } /* diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index 7314d27d57a4..66f76f5a15bd 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -20,6 +20,7 @@ #include bool __read_mostly enable_mmio_caching = true; +static bool __ro_after_init allow_mmio_caching; module_param_named(mmio_caching, enable_mmio_caching, bool, 0444); u64 __read_mostly shadow_host_writable_mask; @@ -43,6 +44,18 @@ u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask; u8 __read_mostly shadow_phys_bits; +void __init kvm_mmu_spte_module_init(void) +{ + /* + * Snapshot userspace's desire to allow MMIO caching. Whether or not + * KVM can actually enable MMIO caching depends on vendor-specific + * hardware capabilities and other module params that can't be resolved + * until the vendor module is loaded, i.e. enable_mmio_caching can and + * will change when the vendor module is (re)loaded. + */ + allow_mmio_caching = enable_mmio_caching; +} + static u64 generation_mmio_spte_mask(u64 gen) { u64 mask; @@ -340,6 +353,12 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask) BUG_ON((u64)(unsigned)access_mask != access_mask); WARN_ON(mmio_value & shadow_nonpresent_or_rsvd_lower_gfn_mask); + /* + * Reset to the original module param value to honor userspace's desire + * to (dis)allow MMIO caching. Update the param itself so that + * userspace can see whether or not KVM is actually using MMIO caching. + */ + enable_mmio_caching = allow_mmio_caching; if (!enable_mmio_caching) mmio_value = 0; diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index cabe3fbb4f39..26b144ffd146 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -450,6 +450,7 @@ static inline u64 restore_acc_track_spte(u64 spte) u64 kvm_mmu_changed_pte_notifier_make_spte(u64 old_spte, kvm_pfn_t new_pfn); +void __init kvm_mmu_spte_module_init(void); void kvm_mmu_reset_all_pte_masks(void); #endif -- cgit v1.2.3 From 0c29397ac1fdd64ae59941a477511a05e61a4754 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 3 Aug 2022 22:49:57 +0000 Subject: KVM: SVM: Disable SEV-ES support if MMIO caching is disable Disable SEV-ES if MMIO caching is disabled as SEV-ES relies on MMIO SPTEs generating #NPF(RSVD), which are reflected by the CPU into the guest as a #VC. With SEV-ES, the untrusted host, a.k.a. KVM, doesn't have access to the guest instruction stream or register state and so can't directly emulate in response to a #NPF on an emulated MMIO GPA. Disabling MMIO caching means guest accesses to emulated MMIO ranges cause #NPF(!PRESENT), and those flavors of #NPF cause automatic VM-Exits, not #VC. Adjust KVM's MMIO masks to account for the C-bit location prior to doing SEV(-ES) setup, and document that dependency between adjusting the MMIO SPTE mask and SEV(-ES) setup. Fixes: b09763da4dd8 ("KVM: x86/mmu: Add module param to disable MMIO caching (for testing)") Reported-by: Michael Roth Tested-by: Michael Roth Cc: Tom Lendacky Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20220803224957.1285926-4-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.h | 2 ++ arch/x86/kvm/mmu/spte.c | 1 + arch/x86/kvm/mmu/spte.h | 2 -- arch/x86/kvm/svm/sev.c | 10 ++++++++++ arch/x86/kvm/svm/svm.c | 9 ++++++--- 5 files changed, 19 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index a99acec925eb..6bdaacb6faa0 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -6,6 +6,8 @@ #include "kvm_cache_regs.h" #include "cpuid.h" +extern bool __read_mostly enable_mmio_caching; + #define PT_WRITABLE_SHIFT 1 #define PT_USER_SHIFT 2 diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index 66f76f5a15bd..03ca740bf721 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -22,6 +22,7 @@ bool __read_mostly enable_mmio_caching = true; static bool __ro_after_init allow_mmio_caching; module_param_named(mmio_caching, enable_mmio_caching, bool, 0444); +EXPORT_SYMBOL_GPL(enable_mmio_caching); u64 __read_mostly shadow_host_writable_mask; u64 __read_mostly shadow_mmu_writable_mask; diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index 26b144ffd146..9a9414b8d1d6 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -5,8 +5,6 @@ #include "mmu_internal.h" -extern bool __read_mostly enable_mmio_caching; - /* * A MMU present SPTE is backed by actual memory and may or may not be present * in hardware. E.g. MMIO SPTEs are not considered present. Use bit 11, as it diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index b0e793e7d85c..28064060413a 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -22,6 +22,7 @@ #include #include +#include "mmu.h" #include "x86.h" #include "svm.h" #include "svm_ops.h" @@ -2221,6 +2222,15 @@ void __init sev_hardware_setup(void) if (!sev_es_enabled) goto out; + /* + * SEV-ES requires MMIO caching as KVM doesn't have access to the guest + * instruction stream, i.e. can't emulate in response to a #NPF and + * instead relies on #NPF(RSVD) being reflected into the guest as #VC + * (the guest can then do a #VMGEXIT to request MMIO emulation). + */ + if (!enable_mmio_caching) + goto out; + /* Does the CPU support SEV-ES? */ if (!boot_cpu_has(X86_FEATURE_SEV_ES)) goto out; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 38f873cb6f2c..f3813dbacb9f 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -5034,13 +5034,16 @@ static __init int svm_hardware_setup(void) /* Setup shadow_me_value and shadow_me_mask */ kvm_mmu_set_me_spte_mask(sme_me_mask, sme_me_mask); - /* Note, SEV setup consumes npt_enabled. */ + svm_adjust_mmio_mask(); + + /* + * Note, SEV setup consumes npt_enabled and enable_mmio_caching (which + * may be modified by svm_adjust_mmio_mask()). + */ sev_hardware_setup(); svm_hv_hardware_setup(); - svm_adjust_mmio_mask(); - for_each_possible_cpu(cpu) { r = svm_cpu_init(cpu); if (r) -- cgit v1.2.3 From af735db31285fa699384c649be72a9f32ecbb665 Mon Sep 17 00:00:00 2001 From: Coleman Dietsch Date: Mon, 8 Aug 2022 14:06:06 -0500 Subject: KVM: x86/xen: Initialize Xen timer only once Add a check for existing xen timers before initializing a new one. Currently kvm_xen_init_timer() is called on every KVM_XEN_VCPU_ATTR_TYPE_TIMER, which is causing the following ODEBUG crash when vcpu->arch.xen.timer is already set. ODEBUG: init active (active state 0) object type: hrtimer hint: xen_timer_callbac0 RIP: 0010:debug_print_object+0x16e/0x250 lib/debugobjects.c:502 Call Trace: __debug_object_init debug_hrtimer_init debug_init hrtimer_init kvm_xen_init_timer kvm_xen_vcpu_set_attr kvm_arch_vcpu_ioctl kvm_vcpu_ioctl vfs_ioctl Fixes: 536395260582 ("KVM: x86/xen: handle PV timers oneshot mode") Cc: stable@vger.kernel.org Link: https://syzkaller.appspot.com/bug?id=8234a9dfd3aafbf092cc5a7cd9842e3ebc45fc42 Reported-by: syzbot+e54f930ed78eb0f85281@syzkaller.appspotmail.com Signed-off-by: Coleman Dietsch Reviewed-by: Sean Christopherson Message-Id: <20220808190607.323899-2-dietschc@csp.edu> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/xen.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index a0c05ccbf4b1..6e554041e862 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -713,7 +713,9 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) break; } vcpu->arch.xen.timer_virq = data->u.timer.port; - kvm_xen_init_timer(vcpu); + + if (!vcpu->arch.xen.timer.function) + kvm_xen_init_timer(vcpu); /* Restart the timer if it's set */ if (data->u.timer.expires_ns) -- cgit v1.2.3 From c036899136355758dcd88878145036ab4d9c1f26 Mon Sep 17 00:00:00 2001 From: Coleman Dietsch Date: Mon, 8 Aug 2022 14:06:07 -0500 Subject: KVM: x86/xen: Stop Xen timer before changing IRQ Stop Xen timer (if it's running) prior to changing the IRQ vector and potentially (re)starting the timer. Changing the IRQ vector while the timer is still running can result in KVM injecting a garbage event, e.g. vm_xen_inject_timer_irqs() could see a non-zero xen.timer_pending from a previous timer but inject the new xen.timer_virq. Fixes: 536395260582 ("KVM: x86/xen: handle PV timers oneshot mode") Cc: stable@vger.kernel.org Link: https://syzkaller.appspot.com/bug?id=8234a9dfd3aafbf092cc5a7cd9842e3ebc45fc42 Reported-by: syzbot+e54f930ed78eb0f85281@syzkaller.appspotmail.com Signed-off-by: Coleman Dietsch Reviewed-by: Sean Christopherson Acked-by: David Woodhouse Message-Id: <20220808190607.323899-3-dietschc@csp.edu> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/xen.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 6e554041e862..280cb5dc7341 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -707,26 +707,25 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) break; case KVM_XEN_VCPU_ATTR_TYPE_TIMER: - if (data->u.timer.port) { - if (data->u.timer.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) { - r = -EINVAL; - break; - } - vcpu->arch.xen.timer_virq = data->u.timer.port; - - if (!vcpu->arch.xen.timer.function) - kvm_xen_init_timer(vcpu); - - /* Restart the timer if it's set */ - if (data->u.timer.expires_ns) - kvm_xen_start_timer(vcpu, data->u.timer.expires_ns, - data->u.timer.expires_ns - - get_kvmclock_ns(vcpu->kvm)); - } else if (kvm_xen_timer_enabled(vcpu)) { - kvm_xen_stop_timer(vcpu); - vcpu->arch.xen.timer_virq = 0; + if (data->u.timer.port && + data->u.timer.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) { + r = -EINVAL; + break; } + if (!vcpu->arch.xen.timer.function) + kvm_xen_init_timer(vcpu); + + /* Stop the timer (if it's running) before changing the vector */ + kvm_xen_stop_timer(vcpu); + vcpu->arch.xen.timer_virq = data->u.timer.port; + + /* Start the timer if the new value has a valid vector+expiry. */ + if (data->u.timer.port && data->u.timer.expires_ns) + kvm_xen_start_timer(vcpu, data->u.timer.expires_ns, + data->u.timer.expires_ns - + get_kvmclock_ns(vcpu->kvm)); + r = 0; break; -- cgit v1.2.3 From 1685c0f32554a7f35962061d17155c58454f1cd2 Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Sun, 7 Aug 2022 05:21:41 +0000 Subject: KVM: x86/mmu: rename trace function name for asynchronous page fault Rename the tracepoint function from trace_kvm_async_pf_doublefault() to trace_kvm_async_pf_repeated_fault() to make it clear, since double fault has nothing to do with this trace function. Asynchronous Page Fault (APF) is an artifact generated by KVM when it cannot find a physical page to satisfy an EPT violation. KVM uses APF to tell the guest OS to do something else such as scheduling other guest processes to make forward progress. However, when another guest process also touches a previously APFed page, KVM halts the vCPU instead of generating a repeated APF to avoid wasting cycles. Double fault (#DF) clearly has a different meaning and a different consequence when triggered. #DF requires two nested contributory exceptions instead of two page faults faulting at the same address. A prevous bug on APF indicates that it may trigger a double fault in the guest [1] and clearly this trace function has nothing to do with it. So rename this function should be a valid choice. No functional change intended. [1] https://www.spinics.net/lists/kvm/msg214957.html Signed-off-by: Mingwei Zhang Message-Id: <20220807052141.69186-1-mizhang@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 2 +- include/trace/events/kvm.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 918ff23c0bc3..eccddb136954 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4164,7 +4164,7 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) if (!fault->prefetch && kvm_can_do_async_pf(vcpu)) { trace_kvm_try_async_get_page(fault->addr, fault->gfn); if (kvm_find_async_pf_gfn(vcpu, fault->gfn)) { - trace_kvm_async_pf_doublefault(fault->addr, fault->gfn); + trace_kvm_async_pf_repeated_fault(fault->addr, fault->gfn); kvm_make_request(KVM_REQ_APF_HALT, vcpu); return RET_PF_RETRY; } else if (kvm_arch_setup_async_pf(vcpu, fault->addr, fault->gfn)) { diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 37e1e1a2d67d..3bd31ea23fee 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -282,7 +282,7 @@ DEFINE_EVENT(kvm_async_get_page_class, kvm_try_async_get_page, TP_ARGS(gva, gfn) ); -DEFINE_EVENT(kvm_async_get_page_class, kvm_async_pf_doublefault, +DEFINE_EVENT(kvm_async_get_page_class, kvm_async_pf_repeated_fault, TP_PROTO(u64 gva, u64 gfn), -- cgit v1.2.3 From 8bad4606acc91523f54fe4f254fd9f187cd21204 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 5 Aug 2022 19:41:33 +0000 Subject: KVM: x86/mmu: Add sanity check that MMIO SPTE mask doesn't overlap gen Add compile-time and init-time sanity checks to ensure that the MMIO SPTE mask doesn't overlap the MMIO SPTE generation or the MMU-present bit. The generation currently avoids using bit 63, but that's as much coincidence as it is strictly necessarly. That will change in the future, as TDX support will require setting bit 63 (SUPPRESS_VE) in the mask. Explicitly carve out the bits that are allowed in the mask so that any future shuffling of SPTE bits doesn't silently break MMIO caching (KVM has broken MMIO caching more than once due to overlapping the generation with other things). Suggested-by: Kai Huang Signed-off-by: Sean Christopherson Reviewed-by: Kai Huang Message-Id: <20220805194133.86299-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/spte.c | 8 ++++++++ arch/x86/kvm/mmu/spte.h | 14 ++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index 03ca740bf721..2e08b2a45361 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -363,6 +363,14 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask) if (!enable_mmio_caching) mmio_value = 0; + /* + * The mask must contain only bits that are carved out specifically for + * the MMIO SPTE mask, e.g. to ensure there's no overlap with the MMIO + * generation. + */ + if (WARN_ON(mmio_mask & ~SPTE_MMIO_ALLOWED_MASK)) + mmio_value = 0; + /* * Disable MMIO caching if the MMIO value collides with the bits that * are used to hold the relocated GFN when the L1TF mitigation is diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index 9a9414b8d1d6..f3744eea45f5 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -123,6 +123,20 @@ static_assert(!(EPT_SPTE_MMU_WRITABLE & SHADOW_ACC_TRACK_SAVED_MASK)); static_assert(!(SPTE_MMU_PRESENT_MASK & (MMIO_SPTE_GEN_LOW_MASK | MMIO_SPTE_GEN_HIGH_MASK))); +/* + * The SPTE MMIO mask must NOT overlap the MMIO generation bits or the + * MMU-present bit. The generation obviously co-exists with the magic MMIO + * mask/value, and MMIO SPTEs are considered !MMU-present. + * + * The SPTE MMIO mask is allowed to use hardware "present" bits (i.e. all EPT + * RWX bits), all physical address bits (legal PA bits are used for "fast" MMIO + * and so they're off-limits for generation; additional checks ensure the mask + * doesn't overlap legal PA bits), and bit 63 (carved out for future usage). + */ +#define SPTE_MMIO_ALLOWED_MASK (BIT_ULL(63) | GENMASK_ULL(51, 12) | GENMASK_ULL(2, 0)) +static_assert(!(SPTE_MMIO_ALLOWED_MASK & + (SPTE_MMU_PRESENT_MASK | MMIO_SPTE_GEN_LOW_MASK | MMIO_SPTE_GEN_HIGH_MASK))); + #define MMIO_SPTE_GEN_LOW_BITS (MMIO_SPTE_GEN_LOW_END - MMIO_SPTE_GEN_LOW_START + 1) #define MMIO_SPTE_GEN_HIGH_BITS (MMIO_SPTE_GEN_HIGH_END - MMIO_SPTE_GEN_HIGH_START + 1) -- cgit v1.2.3 From f2759c08d8f4d584e88d129e7392260fb4079ec2 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 20 Jul 2022 09:22:47 +0000 Subject: KVM: Shove vm stats_id init into kvm_create_vm() Initialize stats_id alongside other struct kvm fields to make it more difficult to unintentionally access stats_id before it's set. While at it, move the format string to the first line of the call and fix the indentation of the second line. No functional change intended. Signed-off-by: Oliver Upton Message-Id: <20220720092259.3491733-2-oliver.upton@linux.dev> Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 32896c845ffe..8230f22cc01c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1155,6 +1155,9 @@ static struct kvm *kvm_create_vm(unsigned long type) */ kvm->debugfs_dentry = ERR_PTR(-ENOENT); + snprintf(kvm->stats_id, sizeof(kvm->stats_id), "kvm-%d", + task_pid_nr(current)); + if (init_srcu_struct(&kvm->srcu)) goto out_err_no_srcu; if (init_srcu_struct(&kvm->irq_srcu)) @@ -4902,9 +4905,6 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) if (r < 0) goto put_kvm; - snprintf(kvm->stats_id, sizeof(kvm->stats_id), - "kvm-%d", task_pid_nr(current)); - file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); if (IS_ERR(file)) { put_unused_fd(r); -- cgit v1.2.3 From 58fc116645c53db117c8e6c7b549b228e24a6c73 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 20 Jul 2022 09:22:48 +0000 Subject: KVM: Shove vcpu stats_id init into kvm_vcpu_init() Initialize stats_id alongside other kvm_vcpu fields to make it more difficult to unintentionally access stats_id before it's set. No functional change intended. Signed-off-by: Oliver Upton Message-Id: <20220720092259.3491733-3-oliver.upton@linux.dev> Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8230f22cc01c..f601428e0cc8 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -484,6 +484,10 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) vcpu->ready = false; preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); vcpu->last_used_slot = NULL; + + /* Fill the stats id string for the vcpu */ + snprintf(vcpu->stats_id, sizeof(vcpu->stats_id), "kvm-%d/vcpu-%d", + task_pid_nr(current), id); } static void kvm_vcpu_destroy(struct kvm_vcpu *vcpu) @@ -3919,10 +3923,6 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) if (r) goto unlock_vcpu_destroy; - /* Fill the stats id string for the vcpu */ - snprintf(vcpu->stats_id, sizeof(vcpu->stats_id), "kvm-%d/vcpu-%d", - task_pid_nr(current), id); - /* Now it's all set up, let userspace reach it */ kvm_get_kvm(kvm); r = create_vcpu_fd(vcpu); -- cgit v1.2.3 From 20020f4cf2f92115c34b7bbaea02fcc810709a7e Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 20 Jul 2022 09:22:49 +0000 Subject: KVM: Get an fd before creating the VM Allocate a VM's fd at the very beginning of kvm_dev_ioctl_create_vm() so that KVM can use the fd value to generate strigns, e.g. for debugfs, when creating and initializing the VM. Signed-off-by: Oliver Upton Message-Id: <20220720092259.3491733-4-oliver.upton@linux.dev> Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f601428e0cc8..eeb084a9cb70 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4889,25 +4889,27 @@ EXPORT_SYMBOL_GPL(file_is_kvm); static int kvm_dev_ioctl_create_vm(unsigned long type) { - int r; + int r, fd; struct kvm *kvm; struct file *file; + fd = get_unused_fd_flags(O_CLOEXEC); + if (fd < 0) + return fd; + kvm = kvm_create_vm(type); - if (IS_ERR(kvm)) - return PTR_ERR(kvm); + if (IS_ERR(kvm)) { + r = PTR_ERR(kvm); + goto put_fd; + } + #ifdef CONFIG_KVM_MMIO r = kvm_coalesced_mmio_init(kvm); if (r < 0) goto put_kvm; #endif - r = get_unused_fd_flags(O_CLOEXEC); - if (r < 0) - goto put_kvm; - file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); if (IS_ERR(file)) { - put_unused_fd(r); r = PTR_ERR(file); goto put_kvm; } @@ -4918,18 +4920,20 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) * cases it will be called by the final fput(file) and will take * care of doing kvm_put_kvm(kvm). */ - if (kvm_create_vm_debugfs(kvm, r) < 0) { - put_unused_fd(r); + if (kvm_create_vm_debugfs(kvm, fd) < 0) { fput(file); - return -ENOMEM; + r = -ENOMEM; + goto put_fd; } kvm_uevent_notify_change(KVM_EVENT_CREATE_VM, kvm); - fd_install(r, file); - return r; + fd_install(fd, file); + return fd; put_kvm: kvm_put_kvm(kvm); +put_fd: + put_unused_fd(fd); return r; } -- cgit v1.2.3 From 59f82aad5940ffd0fedfe07ff755168722193d56 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 20 Jul 2022 09:22:50 +0000 Subject: KVM: Pass the name of the VM fd to kvm_create_vm_debugfs() At the time the VM fd is used in kvm_create_vm_debugfs(), the fd has been allocated but not yet installed. It is only really useful as an identifier in strings for the VM (such as debugfs). Treat it exactly as such by passing the string name of the fd to kvm_create_vm_debugfs(), futureproofing against possible misuse of the VM fd. Suggested-by: Sean Christopherson Signed-off-by: Oliver Upton Message-Id: <20220720092259.3491733-5-oliver.upton@linux.dev> Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index eeb084a9cb70..8e08f66ddbd4 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1021,7 +1021,7 @@ static void kvm_destroy_vm_debugfs(struct kvm *kvm) } } -static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) +static int kvm_create_vm_debugfs(struct kvm *kvm, const char *fdname) { static DEFINE_MUTEX(kvm_debugfs_lock); struct dentry *dent; @@ -1035,7 +1035,7 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) if (!debugfs_initialized()) return 0; - snprintf(dir_name, sizeof(dir_name), "%d-%d", task_pid_nr(current), fd); + snprintf(dir_name, sizeof(dir_name), "%d-%s", task_pid_nr(current), fdname); mutex_lock(&kvm_debugfs_lock); dent = debugfs_lookup(dir_name, kvm_debugfs_dir); if (dent) { @@ -4889,6 +4889,7 @@ EXPORT_SYMBOL_GPL(file_is_kvm); static int kvm_dev_ioctl_create_vm(unsigned long type) { + char fdname[ITOA_MAX_LEN + 1]; int r, fd; struct kvm *kvm; struct file *file; @@ -4897,6 +4898,8 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) if (fd < 0) return fd; + snprintf(fdname, sizeof(fdname), "%d", fd); + kvm = kvm_create_vm(type); if (IS_ERR(kvm)) { r = PTR_ERR(kvm); @@ -4920,7 +4923,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) * cases it will be called by the final fput(file) and will take * care of doing kvm_put_kvm(kvm). */ - if (kvm_create_vm_debugfs(kvm, fd) < 0) { + if (kvm_create_vm_debugfs(kvm, fdname) < 0) { fput(file); r = -ENOMEM; goto put_fd; -- cgit v1.2.3 From b74ed7a68ec183403763571a54ca3dcc24cf19eb Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 20 Jul 2022 09:22:51 +0000 Subject: KVM: Actually create debugfs in kvm_create_vm() Doing debugfs creation after vm creation leaves things in a quasi-initialized state for a while. This is further complicated by the fact that we tear down debugfs from kvm_destroy_vm(). Align debugfs and stats init/destroy with the vm init/destroy pattern to avoid any headaches. Note the fix for a benign mistake in error handling for calls to kvm_arch_create_vm_debugfs() rolled in. Since all implementations of the function return 0 unconditionally it isn't actually a bug at the moment. Lastly, tear down debugfs/stats data in the kvm_create_vm_debugfs() error path. Previously it was safe to assume that kvm_destroy_vm() would take out the garbage, that is no longer the case. Signed-off-by: Oliver Upton Message-Id: <20220720092259.3491733-6-oliver.upton@linux.dev> Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8e08f66ddbd4..515dfe9d3bcf 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1028,7 +1028,7 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, const char *fdname) char dir_name[ITOA_MAX_LEN * 2]; struct kvm_stat_data *stat_data; const struct _kvm_stats_desc *pdesc; - int i, ret; + int i, ret = -ENOMEM; int kvm_debugfs_num_entries = kvm_vm_stats_header.num_desc + kvm_vcpu_stats_header.num_desc; @@ -1054,13 +1054,13 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, const char *fdname) sizeof(*kvm->debugfs_stat_data), GFP_KERNEL_ACCOUNT); if (!kvm->debugfs_stat_data) - return -ENOMEM; + goto out_err; for (i = 0; i < kvm_vm_stats_header.num_desc; ++i) { pdesc = &kvm_vm_stats_desc[i]; stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL_ACCOUNT); if (!stat_data) - return -ENOMEM; + goto out_err; stat_data->kvm = kvm; stat_data->desc = pdesc; @@ -1075,7 +1075,7 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, const char *fdname) pdesc = &kvm_vcpu_stats_desc[i]; stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL_ACCOUNT); if (!stat_data) - return -ENOMEM; + goto out_err; stat_data->kvm = kvm; stat_data->desc = pdesc; @@ -1087,12 +1087,13 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, const char *fdname) } ret = kvm_arch_create_vm_debugfs(kvm); - if (ret) { - kvm_destroy_vm_debugfs(kvm); - return i; - } + if (ret) + goto out_err; return 0; +out_err: + kvm_destroy_vm_debugfs(kvm); + return ret; } /* @@ -1123,7 +1124,7 @@ int __weak kvm_arch_create_vm_debugfs(struct kvm *kvm) return 0; } -static struct kvm *kvm_create_vm(unsigned long type) +static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) { struct kvm *kvm = kvm_arch_alloc_vm(); struct kvm_memslots *slots; @@ -1212,7 +1213,7 @@ static struct kvm *kvm_create_vm(unsigned long type) r = kvm_arch_post_init_vm(kvm); if (r) - goto out_err; + goto out_err_mmu_notifier; mutex_lock(&kvm_lock); list_add(&kvm->vm_list, &vm_list); @@ -1228,12 +1229,18 @@ static struct kvm *kvm_create_vm(unsigned long type) */ if (!try_module_get(kvm_chardev_ops.owner)) { r = -ENODEV; - goto out_err; + goto out_err_mmu_notifier; } + r = kvm_create_vm_debugfs(kvm, fdname); + if (r) + goto out_err; + return kvm; out_err: + module_put(kvm_chardev_ops.owner); +out_err_mmu_notifier: #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) if (kvm->mmu_notifier.ops) mmu_notifier_unregister(&kvm->mmu_notifier, current->mm); @@ -4900,7 +4907,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) snprintf(fdname, sizeof(fdname), "%d", fd); - kvm = kvm_create_vm(type); + kvm = kvm_create_vm(type, fdname); if (IS_ERR(kvm)) { r = PTR_ERR(kvm); goto put_fd; @@ -4923,11 +4930,6 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) * cases it will be called by the final fput(file) and will take * care of doing kvm_put_kvm(kvm). */ - if (kvm_create_vm_debugfs(kvm, fdname) < 0) { - fput(file); - r = -ENOMEM; - goto put_fd; - } kvm_uevent_notify_change(KVM_EVENT_CREATE_VM, kvm); fd_install(fd, file); -- cgit v1.2.3 From 66d42ac73fc680dbd7a1402f8b44967426522d0f Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 10 Aug 2022 18:41:13 +0800 Subject: KVM: selftests: Make rseq compatible with glibc-2.35 The rseq information is registered by TLS, starting from glibc-2.35. In this case, the test always fails due to syscall(__NR_rseq). For example, on RHEL9.1 where upstream glibc-2.35 features are enabled on downstream glibc-2.34, the test fails like below. # ./rseq_test ==== Test Assertion Failure ==== rseq_test.c:60: !r pid=112043 tid=112043 errno=22 - Invalid argument 1 0x0000000000401973: main at rseq_test.c:226 2 0x0000ffff84b6c79b: ?? ??:0 3 0x0000ffff84b6c86b: ?? ??:0 4 0x0000000000401b6f: _start at ??:? rseq failed, errno = 22 (Invalid argument) # rpm -aq | grep glibc-2 glibc-2.34-39.el9.aarch64 Fix the issue by using "../rseq/rseq.c" to fetch the rseq information, registred by TLS if it exists. Otherwise, we're going to register our own rseq information as before. Reported-by: Yihuang Yu Suggested-by: Florian Weimer Suggested-by: Mathieu Desnoyers Suggested-by: Paolo Bonzini Signed-off-by: Gavin Shan Message-Id: <20220810104114.6838-2-gshan@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 5 +++-- tools/testing/selftests/kvm/rseq_test.c | 26 ++++++-------------------- 2 files changed, 9 insertions(+), 22 deletions(-) diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 07e354690745..4c122f1b1737 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -199,7 +199,8 @@ endif CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \ -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \ - -I$( Date: Wed, 10 Aug 2022 18:41:14 +0800 Subject: KVM: selftests: Use getcpu() instead of sched_getcpu() in rseq_test sched_getcpu() is glibc dependent and it can simply return the CPU ID from the registered rseq information, as Florian Weimer pointed. In this case, it's pointless to compare the return value from sched_getcpu() and that fetched from the registered rseq information. Fix the issue by replacing sched_getcpu() with getcpu(), as Florian suggested. The comments are modified accordingly by replacing "sched_getcpu()" with "getcpu()". Reported-by: Yihuang Yu Suggested-by: Florian Weimer Suggested-by: Mathieu Desnoyers Suggested-by: Sean Christopherson Signed-off-by: Gavin Shan Message-Id: <20220810104114.6838-3-gshan@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/rseq_test.c | 42 +++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c index 49f6018e57bc..fac248a43666 100644 --- a/tools/testing/selftests/kvm/rseq_test.c +++ b/tools/testing/selftests/kvm/rseq_test.c @@ -41,6 +41,18 @@ static void guest_code(void) GUEST_SYNC(0); } +/* + * We have to perform direct system call for getcpu() because it's + * not available until glic 2.29. + */ +static void sys_getcpu(unsigned *cpu) +{ + int r; + + r = syscall(__NR_getcpu, cpu, NULL, NULL); + TEST_ASSERT(!r, "getcpu failed, errno = %d (%s)", errno, strerror(errno)); +} + static int next_cpu(int cpu) { /* @@ -85,7 +97,7 @@ static void *migration_worker(void *__rseq_tid) atomic_inc(&seq_cnt); /* - * Ensure the odd count is visible while sched_getcpu() isn't + * Ensure the odd count is visible while getcpu() isn't * stable, i.e. while changing affinity is in-progress. */ smp_wmb(); @@ -126,10 +138,10 @@ static void *migration_worker(void *__rseq_tid) * check completes. * * 3. To ensure the read-side makes efficient forward progress, - * e.g. if sched_getcpu() involves a syscall. Stalling the - * read-side means the test will spend more time waiting for - * sched_getcpu() to stabilize and less time trying to hit - * the timing-dependent bug. + * e.g. if getcpu() involves a syscall. Stalling the read-side + * means the test will spend more time waiting for getcpu() + * to stabilize and less time trying to hit the timing-dependent + * bug. * * Because any bug in this area is likely to be timing-dependent, * run with a range of delays at 1us intervals from 1us to 10us @@ -224,9 +236,9 @@ int main(int argc, char *argv[]) /* * Verify rseq's CPU matches sched's CPU. Ensure migration - * doesn't occur between sched_getcpu() and reading the rseq - * cpu_id by rereading both if the sequence count changes, or - * if the count is odd (migration in-progress). + * doesn't occur between getcpu() and reading the rseq cpu_id + * by rereading both if the sequence count changes, or if the + * count is odd (migration in-progress). */ do { /* @@ -236,12 +248,12 @@ int main(int argc, char *argv[]) snapshot = atomic_read(&seq_cnt) & ~1; /* - * Ensure reading sched_getcpu() and rseq.cpu_id - * complete in a single "no migration" window, i.e. are - * not reordered across the seq_cnt reads. + * Ensure calling getcpu() and reading rseq.cpu_id complete + * in a single "no migration" window, i.e. are not reordered + * across the seq_cnt reads. */ smp_rmb(); - cpu = sched_getcpu(); + sys_getcpu(&cpu); rseq_cpu = rseq_current_cpu_raw(); smp_rmb(); } while (snapshot != atomic_read(&seq_cnt)); @@ -253,9 +265,9 @@ int main(int argc, char *argv[]) /* * Sanity check that the test was able to enter the guest a reasonable * number of times, e.g. didn't get stalled too often/long waiting for - * sched_getcpu() to stabilize. A 2:1 migration:KVM_RUN ratio is a - * fairly conservative ratio on x86-64, which can do _more_ KVM_RUNs - * than migrations given the 1us+ delay in the migration task. + * getcpu() to stabilize. A 2:1 migration:KVM_RUN ratio is a fairly + * conservative ratio on x86-64, which can do _more_ KVM_RUNs than + * migrations given the 1us+ delay in the migration task. */ TEST_ASSERT(i > (NR_TASK_MIGRATIONS / 2), "Only performed %d KVM_RUNs, task stalled too much?\n", i); -- cgit v1.2.3 From 9d27d46160737fc0da7c6b7b9b44ec5135322d2c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 4 Aug 2022 12:18:15 -0700 Subject: KVM: selftests: Test all possible "invalid" PERF_CAPABILITIES.LBR_FMT vals Test all possible input values to verify that KVM rejects all values except the exact host value. Due to the LBR format affecting the core functionality of LBRs, KVM can't emulate "other" formats, so even though there are a variety of legal values, KVM should reject anything but an exact host match. Suggested-by: Like Xu Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c index 6ec901dab61e..069589c52f41 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c @@ -59,6 +59,7 @@ int main(int argc, char *argv[]) int ret; union cpuid10_eax eax; union perf_capabilities host_cap; + uint64_t val; host_cap.capabilities = kvm_get_feature_msr(MSR_IA32_PERF_CAPABILITIES); host_cap.capabilities &= (PMU_CAP_FW_WRITES | PMU_CAP_LBR_FMT); @@ -91,11 +92,17 @@ int main(int argc, char *argv[]) vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.lbr_format); ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), (u64)host_cap.lbr_format); - /* testcase 3, check invalid LBR format is rejected */ - /* Note, on Arch LBR capable platforms, LBR_FMT in perf capability msr is 0x3f, - * to avoid the failure, use a true invalid format 0x30 for the test. */ - ret = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0x30); - TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail."); + /* + * Testcase 3, check that an "invalid" LBR format is rejected. Only an + * exact match of the host's format (and 0/disabled) is allowed. + */ + for (val = 1; val <= PMU_CAP_LBR_FMT; val++) { + if (val == (host_cap.capabilities & PMU_CAP_LBR_FMT)) + continue; + + ret = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val); + TEST_ASSERT(!ret, "Bad LBR FMT = 0x%lx didn't fail", val); + } printf("Completed perf capability tests.\n"); kvm_vm_free(vm); -- cgit v1.2.3 From 17a024a8b981243fa1d17cf0084de43cc5f9b603 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Jul 2022 23:34:22 +0000 Subject: KVM: x86: Refresh PMU after writes to MSR_IA32_PERF_CAPABILITIES Refresh the PMU if userspace modifies MSR_IA32_PERF_CAPABILITIES. KVM consumes the vCPU's PERF_CAPABILITIES when enumerating PEBS support, but relies on CPUID updates to refresh the PMU. I.e. KVM will do the wrong thing if userspace stuffs PERF_CAPABILITIES _after_ setting guest CPUID. Opportunistically fix a curly-brace indentation. Fixes: c59a1f106f5c ("KVM: x86/pmu: Add IA32_PEBS_ENABLE MSR emulation for extended PEBS") Cc: Like Xu Signed-off-by: Sean Christopherson Message-Id: <20220727233424.2968356-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 924016d54fbf..132d662d9713 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3546,9 +3546,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; vcpu->arch.perf_capabilities = data; - + kvm_pmu_refresh(vcpu); return 0; - } + } case MSR_EFER: return set_efer(vcpu, msr_info); case MSR_K7_HWCR: -- cgit v1.2.3 From 7de8e5b6b139a3a94aca24e8d8d35b77dc33e0ac Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Jul 2022 23:34:23 +0000 Subject: KVM: VMX: Use proper type-safe functions for vCPU => LBRs helpers Turn vcpu_to_lbr_desc() and vcpu_to_lbr_records() into functions in order to provide type safety, to document exactly what they return, and to allow consuming the helpers in vmx.h. Move the definitions as necessary (the macros "reference" to_vmx() before its definition). No functional change intended. Signed-off-by: Sean Christopherson Message-Id: <20220727233424.2968356-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.h | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index fb8e3480a9d7..f57a527cc589 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -6,6 +6,7 @@ #include #include +#include #include "capabilities.h" #include "../kvm_cache_regs.h" @@ -104,15 +105,6 @@ static inline bool intel_pmu_has_perf_global_ctrl(struct kvm_pmu *pmu) return pmu->version > 1; } -#define vcpu_to_lbr_desc(vcpu) (&to_vmx(vcpu)->lbr_desc) -#define vcpu_to_lbr_records(vcpu) (&to_vmx(vcpu)->lbr_desc.records) - -void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu); -bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu); - -int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu); -void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu); - struct lbr_desc { /* Basic info about guest LBR records. */ struct x86_pmu_lbr records; @@ -542,6 +534,22 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) return container_of(vcpu, struct vcpu_vmx, vcpu); } +static inline struct lbr_desc *vcpu_to_lbr_desc(struct kvm_vcpu *vcpu) +{ + return &to_vmx(vcpu)->lbr_desc; +} + +static inline struct x86_pmu_lbr *vcpu_to_lbr_records(struct kvm_vcpu *vcpu) +{ + return &vcpu_to_lbr_desc(vcpu)->records; +} + +void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu); +bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu); + +int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu); +void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu); + static inline unsigned long vmx_get_exit_qual(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); -- cgit v1.2.3 From 6348aafa8d24c156124f76b5a1507079c3213112 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Jul 2022 23:34:24 +0000 Subject: KVM: VMX: Adjust number of LBR records for PERF_CAPABILITIES at refresh Now that the PMU is refreshed when MSR_IA32_PERF_CAPABILITIES is written by host userspace, zero out the number of LBR records for a vCPU during PMU refresh if PMU_CAP_LBR_FMT is not set in PERF_CAPABILITIES instead of handling the check at run-time. guest_cpuid_has() is expensive due to the linear search of guest CPUID entries, intel_pmu_lbr_is_enabled() is checked on every VM-Enter, _and_ simply enumerating the same "Model" as the host causes KVM to set the number of LBR records to a non-zero value. Signed-off-by: Sean Christopherson Message-Id: <20220727233424.2968356-4-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/pmu_intel.c | 12 +++--------- arch/x86/kvm/vmx/vmx.h | 7 +++++-- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 862c1a4d971b..c399637a3a79 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -171,13 +171,6 @@ static inline struct kvm_pmc *get_fw_gp_pmc(struct kvm_pmu *pmu, u32 msr) return get_gp_pmc(pmu, msr, MSR_IA32_PMC0); } -bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu) -{ - struct x86_pmu_lbr *lbr = vcpu_to_lbr_records(vcpu); - - return lbr->nr && (vcpu_get_perf_capabilities(vcpu) & PMU_CAP_LBR_FMT); -} - static bool intel_pmu_is_valid_lbr_msr(struct kvm_vcpu *vcpu, u32 index) { struct x86_pmu_lbr *records = vcpu_to_lbr_records(vcpu); @@ -592,7 +585,9 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) bitmap_set(pmu->all_valid_pmc_idx, INTEL_PMC_MAX_GENERIC, pmu->nr_arch_fixed_counters); - if (cpuid_model_is_consistent(vcpu)) + perf_capabilities = vcpu_get_perf_capabilities(vcpu); + if (cpuid_model_is_consistent(vcpu) && + (perf_capabilities & PMU_CAP_LBR_FMT)) x86_perf_get_lbr(&lbr_desc->records); else lbr_desc->records.nr = 0; @@ -600,7 +595,6 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) if (lbr_desc->records.nr) bitmap_set(pmu->all_valid_pmc_idx, INTEL_PMC_IDX_FIXED_VLBR, 1); - perf_capabilities = vcpu_get_perf_capabilities(vcpu); if (perf_capabilities & PERF_CAP_PEBS_FORMAT) { if (perf_capabilities & PERF_CAP_PEBS_BASELINE) { pmu->pebs_enable_mask = counter_mask; diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index f57a527cc589..24d58c2ffaa3 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -544,9 +544,12 @@ static inline struct x86_pmu_lbr *vcpu_to_lbr_records(struct kvm_vcpu *vcpu) return &vcpu_to_lbr_desc(vcpu)->records; } -void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu); -bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu); +static inline bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu) +{ + return !!vcpu_to_lbr_records(vcpu)->nr; +} +void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu); int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu); void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From b4aed4d85ff82848eab4637ee06f532852721bba Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Mon, 27 Jun 2022 16:51:50 +0700 Subject: Documentation: KVM: extend KVM_CAP_VM_DISABLE_NX_HUGE_PAGES heading underline Extend heading underline for KVM_CAP_VM_DISABLE_NX_HUGE_PAGE to match the heading text length. Link: https://lore.kernel.org/lkml/20220627181937.3be67263@canb.auug.org.au/ Fixes: 084cc29f8bbb03 ("KVM: x86/MMU: Allow NX huge pages to be disabled on a per-vm basis") Reported-by: Stephen Rothwell Cc: Paolo Bonzini Cc: Jonathan Corbet Cc: David Matlack Cc: Ben Gardon Cc: Peter Xu Cc: kvm@vger.kernel.org Cc: linux-next@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Bagas Sanjaya Message-Id: <20220627095151.19339-2-bagasdotme@gmail.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 9788b19f9ff7..4003dd93e700 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -8262,7 +8262,7 @@ dump related UV data. Also the vcpu ioctl `KVM_S390_PV_CPU_COMMAND` is available and supports the `KVM_PV_DUMP_CPU` subcommand. 8.38 KVM_CAP_VM_DISABLE_NX_HUGE_PAGES ---------------------------- +------------------------------------- :Capability KVM_CAP_VM_DISABLE_NX_HUGE_PAGES :Architectures: x86 -- cgit v1.2.3 From 19a7cc817a380f7a412d7d76e145e9e2bc47e52f Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Mon, 27 Jun 2022 16:51:51 +0700 Subject: KVM: x86/MMU: properly format KVM_CAP_VM_DISABLE_NX_HUGE_PAGES capability table There is unexpected warning on KVM_CAP_VM_DISABLE_NX_HUGE_PAGES capability table, which cause the table to be rendered as paragraph text instead. The warning is due to missing colon at capability name and returns keyword, as well as improper alignment on multi-line returns field. Fix the warning by adding missing colons and aligning the field. Link: https://lore.kernel.org/lkml/20220627181937.3be67263@canb.auug.org.au/ Fixes: 084cc29f8bbb03 ("KVM: x86/MMU: Allow NX huge pages to be disabled on a per-vm basis") Reported-by: Stephen Rothwell Cc: Paolo Bonzini Cc: Jonathan Corbet Cc: David Matlack Cc: Ben Gardon Cc: Peter Xu Cc: kvm@vger.kernel.org Cc: linux-next@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Bagas Sanjaya Message-Id: <20220627095151.19339-3-bagasdotme@gmail.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 4003dd93e700..abd7c32126ce 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -8264,13 +8264,13 @@ available and supports the `KVM_PV_DUMP_CPU` subcommand. 8.38 KVM_CAP_VM_DISABLE_NX_HUGE_PAGES ------------------------------------- -:Capability KVM_CAP_VM_DISABLE_NX_HUGE_PAGES +:Capability: KVM_CAP_VM_DISABLE_NX_HUGE_PAGES :Architectures: x86 :Type: vm :Parameters: arg[0] must be 0. -:Returns 0 on success, -EPERM if the userspace process does not - have CAP_SYS_BOOT, -EINVAL if args[0] is not 0 or any vCPUs have been - created. +:Returns: 0 on success, -EPERM if the userspace process does not + have CAP_SYS_BOOT, -EINVAL if args[0] is not 0 or any vCPUs have been + created. This capability disables the NX huge pages mitigation for iTLB MULTIHIT. -- cgit v1.2.3