From 12aad9164763bb4890277d3b2a4664f443a48e3f Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Fri, 17 Feb 2023 20:33:35 +0100 Subject: KVM: x86: Shrink struct kvm_pmu Move the 'version' member to the beginning of the structure to reuse an existing hole instead of introducing another one. This allows us to save 8 bytes for 64 bit builds. Signed-off-by: Mathias Krause Link: https://lore.kernel.org/r/20230217193336.15278-2-minipli@grsecurity.net Signed-off-by: Sean Christopherson --- arch/x86/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a45de1118a42..a440ceae06fb 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -515,6 +515,7 @@ struct kvm_pmc { #define MSR_ARCH_PERFMON_FIXED_CTR_MAX (MSR_ARCH_PERFMON_FIXED_CTR0 + KVM_PMC_MAX_FIXED - 1) #define KVM_AMD_PMC_MAX_GENERIC 6 struct kvm_pmu { + u8 version; unsigned nr_arch_gp_counters; unsigned nr_arch_fixed_counters; unsigned available_event_types; @@ -527,7 +528,6 @@ struct kvm_pmu { u64 global_ovf_ctrl_mask; u64 reserved_bits; u64 raw_event_mask; - u8 version; struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC]; struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED]; struct irq_work irq_work; -- cgit v1.2.3 From 7e768ce8278bafe43e2a4771a82b61856190a3fc Mon Sep 17 00:00:00 2001 From: Like Xu Date: Tue, 4 Apr 2023 15:17:59 +0800 Subject: KVM: x86/pmu: Zero out pmu->all_valid_pmc_idx each time it's refreshed The kvm_pmu_refresh() may be called repeatedly (e.g. configure guest CPUID repeatedly or update MSR_IA32_PERF_CAPABILITIES) and each call will use the last pmu->all_valid_pmc_idx value, with the residual bits introducing additional overhead later in the vPMU emulation. Fixes: b35e5548b411 ("KVM: x86/vPMU: Add lazy mechanism to release perf_event per vPMC") Suggested-by: Sean Christopherson Signed-off-by: Like Xu Link: https://lore.kernel.org/r/20230404071759.75376-1-likexu@tencent.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/pmu.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 612e6c70ce2e..29492c2a0c82 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -589,6 +589,7 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) */ void kvm_pmu_refresh(struct kvm_vcpu *vcpu) { + bitmap_zero(vcpu_to_pmu(vcpu)->all_valid_pmc_idx, X86_PMC_IDX_MAX); static_call(kvm_x86_pmu_refresh)(vcpu); } -- cgit v1.2.3 From 098f4c061ea10b777033b71c10bd9fd706820ee9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Sat, 28 Jan 2023 00:14:27 +0000 Subject: KVM: x86/pmu: Disallow legacy LBRs if architectural LBRs are available Disallow enabling LBR support if the CPU supports architectural LBRs. Traditional LBR support is absent on CPU models that have architectural LBRs, and KVM doesn't yet support arch LBRs, i.e. KVM will pass through non-existent MSRs if userspace enables LBRs for the guest. Cc: stable@vger.kernel.org Cc: Yang Weijiang Cc: Like Xu Reported-by: Paolo Bonzini Fixes: be635e34c284 ("KVM: vmx/pmu: Expose LBR_FMT in the MSR_IA32_PERF_CAPABILITIES") Tested-by: Like Xu Link: https://lore.kernel.org/r/20230128001427.2548858-1-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/vmx/vmx.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index d7bf14abdba1..c18f74899f01 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7793,9 +7793,11 @@ static u64 vmx_get_perf_capabilities(void) if (boot_cpu_has(X86_FEATURE_PDCM)) rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap); - x86_perf_get_lbr(&lbr); - if (lbr.nr) - perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT; + if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR)) { + x86_perf_get_lbr(&lbr); + if (lbr.nr) + perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT; + } if (vmx_pebs_supported()) { perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK; -- cgit v1.2.3 From b1932c5c19ddcbe9140b0583a0931b620c21ca02 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 10 Mar 2023 16:45:58 -0800 Subject: KVM: x86: Rename kvm_init_msr_list() to clarify it inits multiple lists Rename kvm_init_msr_list() to kvm_init_msr_lists() to clarify that it initializes multiple lists: MSRs to save, emulated MSRs, and feature MSRs. No functional change intended. Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20230311004618.920745-2-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 237c483b1230..087497aebded 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7084,7 +7084,7 @@ static void kvm_probe_msr_to_save(u32 msr_index) msrs_to_save[num_msrs_to_save++] = msr_index; } -static void kvm_init_msr_list(void) +static void kvm_init_msr_lists(void) { unsigned i; @@ -9452,7 +9452,7 @@ static int __kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) kvm_caps.max_guest_tsc_khz = max; } kvm_caps.default_tsc_scaling_ratio = 1ULL << kvm_caps.tsc_scaling_ratio_frac_bits; - kvm_init_msr_list(); + kvm_init_msr_lists(); return 0; out_unwind_ops: -- cgit v1.2.3 From fb3146b4dc3bc6d0c0402a75f21d628eccf9bf8c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 10 Mar 2023 16:45:59 -0800 Subject: KVM: x86: Add a helper to query whether or not a vCPU has ever run Add a helper to query if a vCPU has run so that KVM doesn't have to open code the check on last_vmentry_cpu being set to a magic value. No functional change intended. Suggested-by: Xiaoyao Li Cc: Like Xu Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20230311004618.920745-3-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/cpuid.c | 2 +- arch/x86/kvm/mmu/mmu.c | 2 +- arch/x86/kvm/x86.h | 5 +++++ 3 files changed, 7 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 9583a110cf5f..b736ddb42088 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -420,7 +420,7 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2, * KVM_SET_CPUID{,2} again. To support this legacy behavior, check * whether the supplied CPUID data is equal to what's already set. */ - if (vcpu->arch.last_vmentry_cpu != -1) { + if (kvm_vcpu_has_run(vcpu)) { r = kvm_cpuid_check_equal(vcpu, e2, nent); if (r) return r; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 144c5a01cd77..8ced48797d59 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5393,7 +5393,7 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu) * Changing guest CPUID after KVM_RUN is forbidden, see the comment in * kvm_arch_vcpu_ioctl(). */ - KVM_BUG_ON(vcpu->arch.last_vmentry_cpu != -1, vcpu->kvm); + KVM_BUG_ON(kvm_vcpu_has_run(vcpu), vcpu->kvm); } void kvm_mmu_reset_context(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index a8167b47b8c8..754190af1791 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -83,6 +83,11 @@ static inline unsigned int __shrink_ple_window(unsigned int val, void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu); int kvm_check_nested_events(struct kvm_vcpu *vcpu); +static inline bool kvm_vcpu_has_run(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.last_vmentry_cpu != -1; +} + static inline bool kvm_is_exception_pending(struct kvm_vcpu *vcpu) { return vcpu->arch.exception.pending || -- cgit v1.2.3 From 5757f5b9562244e4b63f65576c16ca9eb21f81d2 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 10 Mar 2023 16:46:00 -0800 Subject: KVM: x86: Add macros to track first...last VMX feature MSRs Add macros to track the range of VMX feature MSRs that are emulated by KVM to reduce the maintenance cost of extending the set of emulated MSRs. Note, KVM doesn't necessarily emulate all known/consumed VMX MSRs, e.g. PROCBASED_CTLS3 is consumed by KVM to enable IPI virtualization, but is not emulated as KVM doesn't emulate/virtualize IPI virtualization for nested guests. No functional change intended. Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20230311004618.920745-4-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 2 +- arch/x86/kvm/vmx/vmx.c | 8 ++++---- arch/x86/kvm/x86.h | 8 ++++++++ 3 files changed, 13 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 70183d2271b5..7584eb85410b 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4124,7 +4124,7 @@ static bool svm_has_emulated_msr(struct kvm *kvm, u32 index) { switch (index) { case MSR_IA32_MCG_EXT_CTL: - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: + case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: return false; case MSR_IA32_SMBASE: if (!IS_ENABLED(CONFIG_KVM_SMM)) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index c18f74899f01..e89340dfa322 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1945,7 +1945,7 @@ static inline bool is_vmx_feature_control_msr_valid(struct vcpu_vmx *vmx, static int vmx_get_msr_feature(struct kvm_msr_entry *msr) { switch (msr->index) { - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: + case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: if (!nested) return 1; return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data); @@ -2030,7 +2030,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = to_vmx(vcpu)->msr_ia32_sgxlepubkeyhash [msr_info->index - MSR_IA32_SGXLEPUBKEYHASH0]; break; - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: + case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: if (!nested_vmx_allowed(vcpu)) return 1; if (vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index, @@ -2384,7 +2384,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vmx->msr_ia32_sgxlepubkeyhash [msr_index - MSR_IA32_SGXLEPUBKEYHASH0] = data; break; - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: + case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: if (!msr_info->host_initiated) return 1; /* they are read-only */ if (!nested_vmx_allowed(vcpu)) @@ -6974,7 +6974,7 @@ static bool vmx_has_emulated_msr(struct kvm *kvm, u32 index) * real mode. */ return enable_unrestricted_guest || emulate_invalid_guest_state; - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: + case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: return nested; case MSR_AMD64_VIRT_SPEC_CTRL: case MSR_AMD64_TSC_RATIO: diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 754190af1791..4bc483d082ee 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -40,6 +40,14 @@ void kvm_spurious_fault(void); failed; \ }) +/* + * The first...last VMX feature MSRs that are emulated by KVM. This may or may + * not cover all known VMX MSRs, as KVM doesn't emulate an MSR until there's an + * associated feature that KVM supports for nested virtualization. + */ +#define KVM_FIRST_EMULATED_VMX_MSR MSR_IA32_VMX_BASIC +#define KVM_LAST_EMULATED_VMX_MSR MSR_IA32_VMX_VMFUNC + #define KVM_DEFAULT_PLE_GAP 128 #define KVM_VMX_DEFAULT_PLE_WINDOW 4096 #define KVM_DEFAULT_PLE_WINDOW_GROW 2 -- cgit v1.2.3 From 9eb6ba31db27253a11441368d2801c1eedc48b4f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 10 Mar 2023 16:46:01 -0800 Subject: KVM: x86: Generate set of VMX feature MSRs using first/last definitions Add VMX MSRs to the runtime list of feature MSRs by iterating over the range of emulated MSRs instead of manually defining each MSR in the "all" list. Using the range definition reduces the cost of emulating a new VMX MSR, e.g. prevents forgetting to add an MSR to the list. Extracting the VMX MSRs from the "all" list, which is a compile-time constant, also shrinks the list to the point where the compiler can heavily optimize code that iterates over the list. No functional change intended. Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20230311004618.920745-5-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 53 ++++++++++++++++++++++------------------------------- 1 file changed, 22 insertions(+), 31 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 087497aebded..6b667c651951 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1543,36 +1543,19 @@ static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)]; static unsigned num_emulated_msrs; /* - * List of msr numbers which are used to expose MSR-based features that - * can be used by a hypervisor to validate requested CPU features. + * List of MSRs that control the existence of MSR-based features, i.e. MSRs + * that are effectively CPUID leafs. VMX MSRs are also included in the set of + * feature MSRs, but are handled separately to allow expedited lookups. */ -static const u32 msr_based_features_all[] = { - MSR_IA32_VMX_BASIC, - MSR_IA32_VMX_TRUE_PINBASED_CTLS, - MSR_IA32_VMX_PINBASED_CTLS, - MSR_IA32_VMX_TRUE_PROCBASED_CTLS, - MSR_IA32_VMX_PROCBASED_CTLS, - MSR_IA32_VMX_TRUE_EXIT_CTLS, - MSR_IA32_VMX_EXIT_CTLS, - MSR_IA32_VMX_TRUE_ENTRY_CTLS, - MSR_IA32_VMX_ENTRY_CTLS, - MSR_IA32_VMX_MISC, - MSR_IA32_VMX_CR0_FIXED0, - MSR_IA32_VMX_CR0_FIXED1, - MSR_IA32_VMX_CR4_FIXED0, - MSR_IA32_VMX_CR4_FIXED1, - MSR_IA32_VMX_VMCS_ENUM, - MSR_IA32_VMX_PROCBASED_CTLS2, - MSR_IA32_VMX_EPT_VPID_CAP, - MSR_IA32_VMX_VMFUNC, - +static const u32 msr_based_features_all_except_vmx[] = { MSR_AMD64_DE_CFG, MSR_IA32_UCODE_REV, MSR_IA32_ARCH_CAPABILITIES, MSR_IA32_PERF_CAPABILITIES, }; -static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)]; +static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all_except_vmx) + + (KVM_LAST_EMULATED_VMX_MSR - KVM_FIRST_EMULATED_VMX_MSR + 1)]; static unsigned int num_msr_based_features; /* @@ -7009,6 +6992,18 @@ out: return r; } +static void kvm_probe_feature_msr(u32 msr_index) +{ + struct kvm_msr_entry msr = { + .index = msr_index, + }; + + if (kvm_get_msr_feature(&msr)) + return; + + msr_based_features[num_msr_based_features++] = msr_index; +} + static void kvm_probe_msr_to_save(u32 msr_index) { u32 dummy[2]; @@ -7110,15 +7105,11 @@ static void kvm_init_msr_lists(void) emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i]; } - for (i = 0; i < ARRAY_SIZE(msr_based_features_all); i++) { - struct kvm_msr_entry msr; + for (i = KVM_FIRST_EMULATED_VMX_MSR; i <= KVM_LAST_EMULATED_VMX_MSR; i++) + kvm_probe_feature_msr(i); - msr.index = msr_based_features_all[i]; - if (kvm_get_msr_feature(&msr)) - continue; - - msr_based_features[num_msr_based_features++] = msr_based_features_all[i]; - } + for (i = 0; i < ARRAY_SIZE(msr_based_features_all_except_vmx); i++) + kvm_probe_feature_msr(msr_based_features_all_except_vmx[i]); } static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, -- cgit v1.2.3 From 0094f62c7eaaaf53a011a4e46f9f32e5f3295e8c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 10 Mar 2023 16:46:03 -0800 Subject: KVM: x86: Disallow writes to immutable feature MSRs after KVM_RUN Disallow writes to feature MSRs after KVM_RUN to prevent userspace from changing the vCPU model after running the vCPU. Similar to guest CPUID, KVM uses feature MSRs to configure intercepts, determine what operations are/aren't allowed, etc. Changing the capabilities while the vCPU is active will at best yield unpredictable guest behavior, and at worst could be dangerous to KVM. Allow writing the current value, e.g. so that userspace can blindly set all MSRs when emulating RESET, and unconditionally allow writes to MSR_IA32_UCODE_REV so that userspace can emulate patch loads. Special case the VMX MSRs to keep the generic list small, i.e. so that KVM can do a linear walk of the generic list without incurring meaningful overhead. Cc: Like Xu Cc: Yu Zhang Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20230311004618.920745-7-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6b667c651951..cf33b7d910a6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1558,6 +1558,25 @@ static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all_except_vmx) + (KVM_LAST_EMULATED_VMX_MSR - KVM_FIRST_EMULATED_VMX_MSR + 1)]; static unsigned int num_msr_based_features; +/* + * All feature MSRs except uCode revID, which tracks the currently loaded uCode + * patch, are immutable once the vCPU model is defined. + */ +static bool kvm_is_immutable_feature_msr(u32 msr) +{ + int i; + + if (msr >= KVM_FIRST_EMULATED_VMX_MSR && msr <= KVM_LAST_EMULATED_VMX_MSR) + return true; + + for (i = 0; i < ARRAY_SIZE(msr_based_features_all_except_vmx); i++) { + if (msr == msr_based_features_all_except_vmx[i]) + return msr != MSR_IA32_UCODE_REV; + } + + return false; +} + /* * Some IA32_ARCH_CAPABILITIES bits have dependencies on MSRs that KVM * does not yet virtualize. These include: @@ -2175,6 +2194,22 @@ static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) { + u64 val; + + /* + * Disallow writes to immutable feature MSRs after KVM_RUN. KVM does + * not support modifying the guest vCPU model on the fly, e.g. changing + * the nVMX capabilities while L2 is running is nonsensical. Ignore + * writes of the same value, e.g. to allow userspace to blindly stuff + * all MSRs when emulating RESET. + */ + if (kvm_vcpu_has_run(vcpu) && kvm_is_immutable_feature_msr(index)) { + if (do_get_msr(vcpu, index, &val) || *data != val) + return -EINVAL; + + return 0; + } + return kvm_set_msr_ignored_check(vcpu, index, *data, true); } -- cgit v1.2.3 From 3a6de51a437fb4d2433f8a99fb59f43866cdbb98 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 10 Mar 2023 16:46:04 -0800 Subject: KVM: x86/pmu: WARN and bug the VM if PMU is refreshed after vCPU has run Now that KVM disallows changing feature MSRs, i.e. PERF_CAPABILITIES, after running a vCPU, WARN and bug the VM if the PMU is refreshed after the vCPU has run. Note, KVM has disallowed CPUID updates after running a vCPU since commit feb627e8d6f6 ("KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN"), i.e. PERF_CAPABILITIES was the only remaining way to trigger a PMU refresh after KVM_RUN. Cc: Like Xu Link: https://lore.kernel.org/r/20230311004618.920745-8-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/pmu.c | 3 +++ arch/x86/kvm/x86.c | 10 +++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 29492c2a0c82..136184fe9e92 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -589,6 +589,9 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) */ void kvm_pmu_refresh(struct kvm_vcpu *vcpu) { + if (KVM_BUG_ON(kvm_vcpu_has_run(vcpu), vcpu->kvm)) + return; + bitmap_zero(vcpu_to_pmu(vcpu)->all_valid_pmc_idx, X86_PMC_IDX_MAX); static_call(kvm_x86_pmu_refresh)(vcpu); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cf33b7d910a6..a86ad45a53b8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3632,9 +3632,17 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (data & ~kvm_caps.supported_perf_cap) return 1; + /* + * Note, this is not just a performance optimization! KVM + * disallows changing feature MSRs after the vCPU has run; PMU + * refresh will bug the VM if called after the vCPU has run. + */ + if (vcpu->arch.perf_capabilities == data) + break; + vcpu->arch.perf_capabilities = data; kvm_pmu_refresh(vcpu); - return 0; + break; case MSR_EFER: return set_efer(vcpu, msr_info); case MSR_K7_HWCR: -- cgit v1.2.3 From 957d0f70e97bb2257d8bf746ff2f524b793751b3 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 10 Mar 2023 16:46:05 -0800 Subject: KVM: x86/pmu: Zero out LBR capabilities during PMU refresh Zero out the LBR capabilities during PMU refresh to avoid exposing LBRs to the guest against userspace's wishes. If userspace modifies the guest's CPUID model or invokes KVM_CAP_PMU_CAPABILITY to disable vPMU after an initial KVM_SET_CPUID2, but before the first KVM_RUN, KVM will retain the previous LBR info due to bailing before refreshing the LBR descriptor. Note, this is a very theoretical bug, there is no known use case where a VMM would deliberately enable the vPMU via KVM_SET_CPUID2, and then later disable the vPMU. Link: https://lore.kernel.org/r/20230311004618.920745-9-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/vmx/pmu_intel.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index e8a3be0b9df9..d889bb2a1de5 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -531,6 +531,16 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) pmu->pebs_enable_mask = ~0ull; pmu->pebs_data_cfg_mask = ~0ull; + memset(&lbr_desc->records, 0, sizeof(lbr_desc->records)); + + /* + * Setting passthrough of LBR MSRs is done only in the VM-Entry loop, + * and PMU refresh is disallowed after the vCPU has run, i.e. this code + * should never be reached while KVM is passing through MSRs. + */ + if (KVM_BUG_ON(lbr_desc->msr_passthrough, vcpu->kvm)) + return; + entry = kvm_find_cpuid_entry(vcpu, 0xa); if (!entry || !vcpu->kvm->arch.enable_pmu) return; -- cgit v1.2.3 From cdd2fbf6360e84c02ac4cea11733083c2ca8dace Mon Sep 17 00:00:00 2001 From: Like Xu Date: Tue, 14 Feb 2023 13:07:46 +0800 Subject: KVM: x86/pmu: Rename pmc_is_enabled() to pmc_is_globally_enabled() The name of function pmc_is_enabled() is a bit misleading. A PMC can be disabled either by PERF_CLOBAL_CTRL or by its corresponding EVTSEL. Append global semantics to its name. Suggested-by: Jim Mattson Signed-off-by: Like Xu Link: https://lore.kernel.org/r/20230214050757.9623-2-likexu@tencent.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/pmu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 136184fe9e92..35d80151b853 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -93,7 +93,7 @@ void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops) #undef __KVM_X86_PMU_OP } -static inline bool pmc_is_enabled(struct kvm_pmc *pmc) +static inline bool pmc_is_globally_enabled(struct kvm_pmc *pmc) { return static_call(kvm_x86_pmu_pmc_is_enabled)(pmc); } @@ -409,7 +409,7 @@ static void reprogram_counter(struct kvm_pmc *pmc) pmc_pause_counter(pmc); - if (!pmc_speculative_in_use(pmc) || !pmc_is_enabled(pmc)) + if (!pmc_speculative_in_use(pmc) || !pmc_is_globally_enabled(pmc)) goto reprogram_complete; if (!check_pmu_event_filter(pmc)) @@ -688,7 +688,7 @@ void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id) for_each_set_bit(i, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX) { pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i); - if (!pmc || !pmc_is_enabled(pmc) || !pmc_speculative_in_use(pmc)) + if (!pmc || !pmc_is_globally_enabled(pmc) || !pmc_speculative_in_use(pmc)) continue; /* Ignore checks for edge detect, pin control, invert and CMASK bits */ -- cgit v1.2.3 From 8bca8c5ce40b03862d782b9991da146909199327 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 26 Jan 2023 17:08:03 -0800 Subject: KVM: VMX: Refactor intel_pmu_{g,}set_msr() to align with other helpers Invert the flows in intel_pmu_{g,s}et_msr()'s case statements so that they follow the kernel's preferred style of: if () return return which is also the style used by every other {g,s}et_msr() helper (except AMD's PMU variant, which doesn't use a switch statement). Modify the "set" paths with costly side effects, i.e. that reprogram counters, to skip only the side effects, i.e. to perform reserved bits checks even if the value is unchanged. None of the reserved bits checks are expensive, so there's no strong justification for skipping them, and guarding only the side effect makes it slightly more obvious what is being skipped and why. No functional change intended (assuming no reserved bit bugs). Link: https://lkml.kernel.org/r/Y%2B6cfen%2FCpO3%2FdLO%40google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/vmx/pmu_intel.c | 109 ++++++++++++++++++++++--------------------- 1 file changed, 57 insertions(+), 52 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index d889bb2a1de5..c45bd10f80a1 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -351,45 +351,47 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) switch (msr) { case MSR_CORE_PERF_FIXED_CTR_CTRL: msr_info->data = pmu->fixed_ctr_ctrl; - return 0; + break; case MSR_CORE_PERF_GLOBAL_STATUS: msr_info->data = pmu->global_status; - return 0; + break; case MSR_CORE_PERF_GLOBAL_CTRL: msr_info->data = pmu->global_ctrl; - return 0; + break; case MSR_CORE_PERF_GLOBAL_OVF_CTRL: msr_info->data = 0; - return 0; + break; case MSR_IA32_PEBS_ENABLE: msr_info->data = pmu->pebs_enable; - return 0; + break; case MSR_IA32_DS_AREA: msr_info->data = pmu->ds_area; - return 0; + break; case MSR_PEBS_DATA_CFG: msr_info->data = pmu->pebs_data_cfg; - return 0; + break; default: if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) { u64 val = pmc_read_counter(pmc); msr_info->data = val & pmu->counter_bitmask[KVM_PMC_GP]; - return 0; + break; } else if ((pmc = get_fixed_pmc(pmu, msr))) { u64 val = pmc_read_counter(pmc); msr_info->data = val & pmu->counter_bitmask[KVM_PMC_FIXED]; - return 0; + break; } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { msr_info->data = pmc->eventsel; - return 0; - } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, true)) - return 0; + break; + } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, true)) { + break; + } + return 1; } - return 1; + return 0; } static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) @@ -402,44 +404,43 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) switch (msr) { case MSR_CORE_PERF_FIXED_CTR_CTRL: - if (pmu->fixed_ctr_ctrl == data) - return 0; - if (!(data & pmu->fixed_ctr_ctrl_mask)) { + if (data & pmu->fixed_ctr_ctrl_mask) + return 1; + + if (pmu->fixed_ctr_ctrl != data) reprogram_fixed_counters(pmu, data); - return 0; - } break; case MSR_CORE_PERF_GLOBAL_STATUS: - if (msr_info->host_initiated) { - pmu->global_status = data; - return 0; - } - break; /* RO MSR */ + if (!msr_info->host_initiated) + return 1; /* RO MSR */ + + pmu->global_status = data; + break; case MSR_CORE_PERF_GLOBAL_CTRL: - if (pmu->global_ctrl == data) - return 0; - if (kvm_valid_perf_global_ctrl(pmu, data)) { + if (!kvm_valid_perf_global_ctrl(pmu, data)) + return 1; + + if (pmu->global_ctrl != data) { diff = pmu->global_ctrl ^ data; pmu->global_ctrl = data; reprogram_counters(pmu, diff); - return 0; } break; case MSR_CORE_PERF_GLOBAL_OVF_CTRL: - if (!(data & pmu->global_ovf_ctrl_mask)) { - if (!msr_info->host_initiated) - pmu->global_status &= ~data; - return 0; - } + if (data & pmu->global_ovf_ctrl_mask) + return 1; + + if (!msr_info->host_initiated) + pmu->global_status &= ~data; break; case MSR_IA32_PEBS_ENABLE: - if (pmu->pebs_enable == data) - return 0; - if (!(data & pmu->pebs_enable_mask)) { + if (data & pmu->pebs_enable_mask) + return 1; + + if (pmu->pebs_enable != data) { diff = pmu->pebs_enable ^ data; pmu->pebs_enable = data; reprogram_counters(pmu, diff); - return 0; } break; case MSR_IA32_DS_AREA: @@ -447,15 +448,14 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; if (is_noncanonical_address(data, vcpu)) return 1; + pmu->ds_area = data; - return 0; + break; case MSR_PEBS_DATA_CFG: - if (pmu->pebs_data_cfg == data) - return 0; - if (!(data & pmu->pebs_data_cfg_mask)) { - pmu->pebs_data_cfg = data; - return 0; - } + if (data & pmu->pebs_data_cfg_mask) + return 1; + + pmu->pebs_data_cfg = data; break; default: if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || @@ -463,33 +463,38 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if ((msr & MSR_PMC_FULL_WIDTH_BIT) && (data & ~pmu->counter_bitmask[KVM_PMC_GP])) return 1; + if (!msr_info->host_initiated && !(msr & MSR_PMC_FULL_WIDTH_BIT)) data = (s64)(s32)data; pmc->counter += data - pmc_read_counter(pmc); pmc_update_sample_period(pmc); - return 0; + break; } else if ((pmc = get_fixed_pmc(pmu, msr))) { pmc->counter += data - pmc_read_counter(pmc); pmc_update_sample_period(pmc); - return 0; + break; } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { - if (data == pmc->eventsel) - return 0; reserved_bits = pmu->reserved_bits; if ((pmc->idx == 2) && (pmu->raw_event_mask & HSW_IN_TX_CHECKPOINTED)) reserved_bits ^= HSW_IN_TX_CHECKPOINTED; - if (!(data & reserved_bits)) { + if (data & reserved_bits) + return 1; + + if (data != pmc->eventsel) { pmc->eventsel = data; kvm_pmu_request_counter_reprogam(pmc); - return 0; } - } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, false)) - return 0; + break; + } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, false)) { + break; + } + /* Not a known PMU MSR. */ + return 1; } - return 1; + return 0; } static void setup_fixed_pmc_eventsel(struct kvm_pmu *pmu) -- cgit v1.2.3 From 649bccd7fac98225525c79cf4b1cecc4bafdfc54 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Tue, 14 Feb 2023 13:07:48 +0800 Subject: KVM: x86/pmu: Rewrite reprogram_counters() to improve performance A valid pmc is always tested before using pmu->reprogram_pmi. Eliminate this part of the redundancy by setting the counter's bitmask directly, and in addition, trigger KVM_REQ_PMU only once to save more cpu cycles. Signed-off-by: Like Xu Link: https://lore.kernel.org/r/20230214050757.9623-4-likexu@tencent.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/vmx/pmu_intel.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index c45bd10f80a1..eb291dfbe4aa 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -76,13 +76,13 @@ static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx) static void reprogram_counters(struct kvm_pmu *pmu, u64 diff) { int bit; - struct kvm_pmc *pmc; - for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX) { - pmc = intel_pmc_idx_to_pmc(pmu, bit); - if (pmc) - kvm_pmu_request_counter_reprogam(pmc); - } + if (!diff) + return; + + for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX) + set_bit(bit, pmu->reprogram_pmi); + kvm_make_request(KVM_REQ_PMU, pmu_to_vcpu(pmu)); } static bool intel_hw_event_available(struct kvm_pmc *pmc) -- cgit v1.2.3 From 4fa5843d81fdce9ba7e03f8a666e2f8e592aec2b Mon Sep 17 00:00:00 2001 From: Like Xu Date: Fri, 10 Mar 2023 19:33:49 +0800 Subject: KVM: x86/pmu: Fix a typo in kvm_pmu_request_counter_reprogam() Fix a "reprogam" => "reprogram" typo in kvm_pmu_request_counter_reprogam(). Fixes: 68fb4757e867 ("KVM: x86/pmu: Defer reprogram_counter() to kvm_pmu_handle_event()") Signed-off-by: Like Xu Link: https://lore.kernel.org/r/20230310113349.31799-1-likexu@tencent.com [sean: trim the changelog] Signed-off-by: Sean Christopherson --- arch/x86/kvm/pmu.c | 2 +- arch/x86/kvm/pmu.h | 2 +- arch/x86/kvm/svm/pmu.c | 2 +- arch/x86/kvm/vmx/pmu_intel.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 35d80151b853..2226db9601e2 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -650,7 +650,7 @@ static void kvm_pmu_incr_counter(struct kvm_pmc *pmc) { pmc->prev_counter = pmc->counter; pmc->counter = (pmc->counter + 1) & pmc_bitmask(pmc); - kvm_pmu_request_counter_reprogam(pmc); + kvm_pmu_request_counter_reprogram(pmc); } static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc, diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index be62c16f2265..5c7bbf03b599 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -195,7 +195,7 @@ static inline void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops) KVM_PMC_MAX_FIXED); } -static inline void kvm_pmu_request_counter_reprogam(struct kvm_pmc *pmc) +static inline void kvm_pmu_request_counter_reprogram(struct kvm_pmc *pmc) { set_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi); kvm_make_request(KVM_REQ_PMU, pmc->vcpu); diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index cc77a0681800..5fa939e411d8 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -161,7 +161,7 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) data &= ~pmu->reserved_bits; if (data != pmc->eventsel) { pmc->eventsel = data; - kvm_pmu_request_counter_reprogam(pmc); + kvm_pmu_request_counter_reprogram(pmc); } return 0; } diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index eb291dfbe4aa..741efe2c497b 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -57,7 +57,7 @@ static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data) pmc = get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + i); __set_bit(INTEL_PMC_IDX_FIXED + i, pmu->pmc_in_use); - kvm_pmu_request_counter_reprogam(pmc); + kvm_pmu_request_counter_reprogram(pmc); } } @@ -484,7 +484,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (data != pmc->eventsel) { pmc->eventsel = data; - kvm_pmu_request_counter_reprogam(pmc); + kvm_pmu_request_counter_reprogram(pmc); } break; } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, false)) { -- cgit v1.2.3 From dfdeda67ea2dac57d2d7506d65cfe5a0878ad285 Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Tue, 7 Mar 2023 14:13:56 +0000 Subject: KVM: x86/pmu: Prevent the PMU from counting disallowed events When counting "Instructions Retired" (0xc0) in a guest, KVM will occasionally increment the PMU counter regardless of if that event is being filtered. This is because some PMU events are incremented via kvm_pmu_trigger_event(), which doesn't know about the event filter. Add the event filter to kvm_pmu_trigger_event(), so events that are disallowed do not increment their counters. Fixes: 9cd803d496e7 ("KVM: x86: Update vPMCs when retiring instructions") Signed-off-by: Aaron Lewis Reviewed-by: Like Xu Link: https://lore.kernel.org/r/20230307141400.1486314-2-aaronlewis@google.com [sean: prepend "pmc" to the new function] Signed-off-by: Sean Christopherson --- arch/x86/kvm/pmu.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 2226db9601e2..597a8f8f90b9 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -400,6 +400,12 @@ static bool check_pmu_event_filter(struct kvm_pmc *pmc) return is_fixed_event_allowed(filter, pmc->idx); } +static bool pmc_event_is_allowed(struct kvm_pmc *pmc) +{ + return pmc_is_globally_enabled(pmc) && pmc_speculative_in_use(pmc) && + check_pmu_event_filter(pmc); +} + static void reprogram_counter(struct kvm_pmc *pmc) { struct kvm_pmu *pmu = pmc_to_pmu(pmc); @@ -409,10 +415,7 @@ static void reprogram_counter(struct kvm_pmc *pmc) pmc_pause_counter(pmc); - if (!pmc_speculative_in_use(pmc) || !pmc_is_globally_enabled(pmc)) - goto reprogram_complete; - - if (!check_pmu_event_filter(pmc)) + if (!pmc_event_is_allowed(pmc)) goto reprogram_complete; if (pmc->counter < pmc->prev_counter) @@ -688,7 +691,7 @@ void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id) for_each_set_bit(i, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX) { pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i); - if (!pmc || !pmc_is_globally_enabled(pmc) || !pmc_speculative_in_use(pmc)) + if (!pmc || !pmc_event_is_allowed(pmc)) continue; /* Ignore checks for edge detect, pin control, invert and CMASK bits */ -- cgit v1.2.3