From 53550b89220beda42934a76a61313ecf308b2b03 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Jun 2023 18:10:47 -0700 Subject: KVM: x86/pmu: Rename global_ovf_ctrl_mask to global_status_mask Rename global_ovf_ctrl_mask to global_status_mask to avoid confusion now that Intel has renamed GLOBAL_OVF_CTRL to GLOBAL_STATUS_RESET in PMU v4. GLOBAL_OVF_CTRL and GLOBAL_STATUS_RESET are the same MSR index, i.e. are just different names for the same thing, but the SDM provides different entries in the IA-32 Architectural MSRs table, which gets really confusing when looking at PMU v4 definitions since it *looks* like GLOBAL_STATUS has bits that don't exist in GLOBAL_OVF_CTRL, but in reality the bits are simply defined in the GLOBAL_STATUS_RESET entry. No functional change intended. Cc: Like Xu Link: https://lore.kernel.org/r/20230603011058.1038821-2-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/vmx/pmu_intel.c | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index fb9d1f2d6136..28bd38303d70 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -523,7 +523,7 @@ struct kvm_pmu { u64 global_status; u64 counter_bitmask[2]; u64 global_ctrl_mask; - u64 global_ovf_ctrl_mask; + u64 global_status_mask; u64 reserved_bits; u64 raw_event_mask; struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC]; diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 741efe2c497b..fb96cbfc9ae8 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -427,7 +427,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } break; case MSR_CORE_PERF_GLOBAL_OVF_CTRL: - if (data & pmu->global_ovf_ctrl_mask) + /* + * GLOBAL_OVF_CTRL, a.k.a. GLOBAL STATUS_RESET, clears bits in + * GLOBAL_STATUS, and so the set of reserved bits is the same. + */ + if (data & pmu->global_status_mask) return 1; if (!msr_info->host_initiated) @@ -531,7 +535,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) pmu->reserved_bits = 0xffffffff00200000ull; pmu->raw_event_mask = X86_RAW_EVENT_MASK; pmu->global_ctrl_mask = ~0ull; - pmu->global_ovf_ctrl_mask = ~0ull; + pmu->global_status_mask = ~0ull; pmu->fixed_ctr_ctrl_mask = ~0ull; pmu->pebs_enable_mask = ~0ull; pmu->pebs_data_cfg_mask = ~0ull; @@ -585,11 +589,17 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) counter_mask = ~(((1ull << pmu->nr_arch_gp_counters) - 1) | (((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED)); pmu->global_ctrl_mask = counter_mask; - pmu->global_ovf_ctrl_mask = pmu->global_ctrl_mask + + /* + * GLOBAL_STATUS and GLOBAL_OVF_CONTROL (a.k.a. GLOBAL_STATUS_RESET) + * share reserved bit definitions. The kernel just happens to use + * OVF_CTRL for the names. + */ + pmu->global_status_mask = pmu->global_ctrl_mask & ~(MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF | MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD); if (vmx_pt_mode_is_host_guest()) - pmu->global_ovf_ctrl_mask &= + pmu->global_status_mask &= ~MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI; entry = kvm_find_cpuid_entry_index(vcpu, 7, 0); -- cgit v1.2.3 From 8de18543dfe34a6aac9deefb0256db2609cfa351 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Fri, 2 Jun 2023 18:10:48 -0700 Subject: KVM: x86/pmu: Move reprogram_counters() to pmu.h Move reprogram_counters() out of Intel specific PMU code and into pmu.h so that it can be used to implement AMD PMU v2 support. No functional change intended. Suggested-by: Sean Christopherson Signed-off-by: Like Xu [sean: rewrite changelog] Link: https://lore.kernel.org/r/20230603011058.1038821-3-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/pmu.h | 12 ++++++++++++ arch/x86/kvm/vmx/pmu_intel.c | 12 ------------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 5c7bbf03b599..986563aeeef8 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -201,6 +201,18 @@ static inline void kvm_pmu_request_counter_reprogram(struct kvm_pmc *pmc) kvm_make_request(KVM_REQ_PMU, pmc->vcpu); } +static inline void reprogram_counters(struct kvm_pmu *pmu, u64 diff) +{ + int bit; + + if (!diff) + return; + + for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX) + set_bit(bit, pmu->reprogram_pmi); + kvm_make_request(KVM_REQ_PMU, pmu_to_vcpu(pmu)); +} + void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu); void kvm_pmu_handle_event(struct kvm_vcpu *vcpu); int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data); diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index fb96cbfc9ae8..edcf8670eb4e 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -73,18 +73,6 @@ static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx) } } -static void reprogram_counters(struct kvm_pmu *pmu, u64 diff) -{ - int bit; - - if (!diff) - return; - - for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX) - set_bit(bit, pmu->reprogram_pmi); - kvm_make_request(KVM_REQ_PMU, pmu_to_vcpu(pmu)); -} - static bool intel_hw_event_available(struct kvm_pmc *pmc) { struct kvm_pmu *pmu = pmc_to_pmu(pmc); -- cgit v1.2.3 From 30dab5c0b65ea580878f36d6ae7ff85f06813387 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Fri, 2 Jun 2023 18:10:49 -0700 Subject: KVM: x86/pmu: Reject userspace attempts to set reserved GLOBAL_STATUS bits Reject userspace writes to MSR_CORE_PERF_GLOBAL_STATUS that attempt to set reserved bits. Allowing userspace to stuff reserved bits doesn't harm KVM itself, but it's architecturally wrong and the guest can't clear the unsupported bits, e.g. makes the guest's PMI handler very confused. Signed-off-by: Like Xu [sean: rewrite changelog to avoid use of #GP, rebase on name change] Link: https://lore.kernel.org/r/20230603011058.1038821-4-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/vmx/pmu_intel.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index edcf8670eb4e..efd113f24c1b 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -402,6 +402,9 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!msr_info->host_initiated) return 1; /* RO MSR */ + if (data & pmu->global_status_mask) + return 1; + pmu->global_status = data; break; case MSR_CORE_PERF_GLOBAL_CTRL: -- cgit v1.2.3 From c85cdc1cc1ea27573ab15c76f13a06f12530aa54 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Fri, 2 Jun 2023 18:10:50 -0700 Subject: KVM: x86/pmu: Move handling PERF_GLOBAL_CTRL and friends to common x86 Move the handling of GLOBAL_CTRL, GLOBAL_STATUS, and GLOBAL_OVF_CTRL, a.k.a. GLOBAL_STATUS_RESET, from Intel PMU code to generic x86 PMU code. AMD PerfMonV2 defines three registers that have the same semantics as Intel's variants, just with different names and indices. Conveniently, since KVM virtualizes GLOBAL_CTRL on Intel only for PMU v2 and above, and AMD's version shows up in v2, KVM can use common code for the existence check as well. Signed-off-by: Like Xu Co-developed-by: Sean Christopherson Link: https://lore.kernel.org/r/20230603011058.1038821-5-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/pmu.c | 71 ++++++++++++++++++++++++++++++++++++++++++-- arch/x86/kvm/pmu.h | 14 +++++++++ arch/x86/kvm/vmx/nested.c | 4 +-- arch/x86/kvm/vmx/pmu_intel.c | 47 ++--------------------------- arch/x86/kvm/vmx/vmx.h | 12 -------- 5 files changed, 86 insertions(+), 62 deletions(-) diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 1690d41c1830..c720cc186ab4 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -562,6 +562,14 @@ void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu) bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) { + switch (msr) { + case MSR_CORE_PERF_GLOBAL_STATUS: + case MSR_CORE_PERF_GLOBAL_CTRL: + case MSR_CORE_PERF_GLOBAL_OVF_CTRL: + return kvm_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)); + default: + break; + } return static_call(kvm_x86_pmu_msr_idx_to_pmc)(vcpu, msr) || static_call(kvm_x86_pmu_is_valid_msr)(vcpu, msr); } @@ -577,13 +585,70 @@ static void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr) int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { - return static_call(kvm_x86_pmu_get_msr)(vcpu, msr_info); + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); + u32 msr = msr_info->index; + + switch (msr) { + case MSR_CORE_PERF_GLOBAL_STATUS: + msr_info->data = pmu->global_status; + break; + case MSR_CORE_PERF_GLOBAL_CTRL: + msr_info->data = pmu->global_ctrl; + break; + case MSR_CORE_PERF_GLOBAL_OVF_CTRL: + msr_info->data = 0; + break; + default: + return static_call(kvm_x86_pmu_get_msr)(vcpu, msr_info); + } + + return 0; } int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { - kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index); - return static_call(kvm_x86_pmu_set_msr)(vcpu, msr_info); + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); + u32 msr = msr_info->index; + u64 data = msr_info->data; + u64 diff; + + switch (msr) { + case MSR_CORE_PERF_GLOBAL_STATUS: + if (!msr_info->host_initiated) + return 1; /* RO MSR */ + + if (data & pmu->global_status_mask) + return 1; + + pmu->global_status = data; + break; + case MSR_CORE_PERF_GLOBAL_CTRL: + if (!kvm_valid_perf_global_ctrl(pmu, data)) + return 1; + + if (pmu->global_ctrl != data) { + diff = pmu->global_ctrl ^ data; + pmu->global_ctrl = data; + reprogram_counters(pmu, diff); + } + break; + case MSR_CORE_PERF_GLOBAL_OVF_CTRL: + /* + * GLOBAL_OVF_CTRL, a.k.a. GLOBAL STATUS_RESET, clears bits in + * GLOBAL_STATUS, and so the set of reserved bits is the same. + */ + if (data & pmu->global_status_mask) + return 1; + + if (!msr_info->host_initiated) + pmu->global_status &= ~data; + break; + default: + kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index); + return static_call(kvm_x86_pmu_set_msr)(vcpu, msr_info); + } + + return 0; } /* refresh PMU settings. This function generally is called when underlying diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 986563aeeef8..7c2c64142443 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -41,6 +41,20 @@ struct kvm_pmu_ops { void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops); +static inline bool kvm_pmu_has_perf_global_ctrl(struct kvm_pmu *pmu) +{ + /* + * Architecturally, Intel's SDM states that IA32_PERF_GLOBAL_CTRL is + * supported if "CPUID.0AH: EAX[7:0] > 0", i.e. if the PMU version is + * greater than zero. However, KVM only exposes and emulates the MSR + * to/for the guest if the guest PMU supports at least "Architectural + * Performance Monitoring Version 2". + * + * AMD's version of PERF_GLOBAL_CTRL conveniently shows up with v2. + */ + return pmu->version > 1; +} + static inline u64 pmc_bitmask(struct kvm_pmc *pmc) { struct kvm_pmu *pmu = pmc_to_pmu(pmc); diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index e35cf0bd0df9..ba2ed6d87364 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2649,7 +2649,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, } if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) && - intel_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)) && + kvm_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)) && WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, vmcs12->guest_ia32_perf_global_ctrl))) { *entry_failure_code = ENTRY_FAIL_DEFAULT; @@ -4524,7 +4524,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, vcpu->arch.pat = vmcs12->host_ia32_pat; } if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) && - intel_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu))) + kvm_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu))) WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, vmcs12->host_ia32_perf_global_ctrl)); diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index efd113f24c1b..ff2f52d1e22f 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -100,7 +100,7 @@ static bool intel_pmc_is_enabled(struct kvm_pmc *pmc) { struct kvm_pmu *pmu = pmc_to_pmu(pmc); - if (!intel_pmu_has_perf_global_ctrl(pmu)) + if (!kvm_pmu_has_perf_global_ctrl(pmu)) return true; return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl); @@ -186,11 +186,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) switch (msr) { case MSR_CORE_PERF_FIXED_CTR_CTRL: - case MSR_CORE_PERF_GLOBAL_STATUS: - case MSR_CORE_PERF_GLOBAL_CTRL: - case MSR_CORE_PERF_GLOBAL_OVF_CTRL: - return intel_pmu_has_perf_global_ctrl(pmu); - break; + return kvm_pmu_has_perf_global_ctrl(pmu); case MSR_IA32_PEBS_ENABLE: ret = vcpu_get_perf_capabilities(vcpu) & PERF_CAP_PEBS_FORMAT; break; @@ -340,15 +336,6 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_CORE_PERF_FIXED_CTR_CTRL: msr_info->data = pmu->fixed_ctr_ctrl; break; - case MSR_CORE_PERF_GLOBAL_STATUS: - msr_info->data = pmu->global_status; - break; - case MSR_CORE_PERF_GLOBAL_CTRL: - msr_info->data = pmu->global_ctrl; - break; - case MSR_CORE_PERF_GLOBAL_OVF_CTRL: - msr_info->data = 0; - break; case MSR_IA32_PEBS_ENABLE: msr_info->data = pmu->pebs_enable; break; @@ -398,36 +385,6 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (pmu->fixed_ctr_ctrl != data) reprogram_fixed_counters(pmu, data); break; - case MSR_CORE_PERF_GLOBAL_STATUS: - if (!msr_info->host_initiated) - return 1; /* RO MSR */ - - if (data & pmu->global_status_mask) - return 1; - - pmu->global_status = data; - break; - case MSR_CORE_PERF_GLOBAL_CTRL: - if (!kvm_valid_perf_global_ctrl(pmu, data)) - return 1; - - if (pmu->global_ctrl != data) { - diff = pmu->global_ctrl ^ data; - pmu->global_ctrl = data; - reprogram_counters(pmu, diff); - } - break; - case MSR_CORE_PERF_GLOBAL_OVF_CTRL: - /* - * GLOBAL_OVF_CTRL, a.k.a. GLOBAL STATUS_RESET, clears bits in - * GLOBAL_STATUS, and so the set of reserved bits is the same. - */ - if (data & pmu->global_status_mask) - return 1; - - if (!msr_info->host_initiated) - pmu->global_status &= ~data; - break; case MSR_IA32_PEBS_ENABLE: if (data & pmu->pebs_enable_mask) return 1; diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 9e66531861cf..32384ba38499 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -93,18 +93,6 @@ union vmx_exit_reason { u32 full; }; -static inline bool intel_pmu_has_perf_global_ctrl(struct kvm_pmu *pmu) -{ - /* - * Architecturally, Intel's SDM states that IA32_PERF_GLOBAL_CTRL is - * supported if "CPUID.0AH: EAX[7:0] > 0", i.e. if the PMU version is - * greater than zero. However, KVM only exposes and emulates the MSR - * to/for the guest if the guest PMU supports at least "Architectural - * Performance Monitoring Version 2". - */ - return pmu->version > 1; -} - struct lbr_desc { /* Basic info about guest LBR records. */ struct x86_pmu_lbr records; -- cgit v1.2.3 From 13afa29ae489d9b7c1038179f1e2bec74873e471 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Fri, 2 Jun 2023 18:10:51 -0700 Subject: KVM: x86/pmu: Provide Intel PMU's pmc_is_enabled() as generic x86 code Move the Intel PMU implementation of pmc_is_enabled() to common x86 code as pmc_is_globally_enabled(), and drop AMD's implementation. AMD PMU currently supports only v1, and thus not PERF_GLOBAL_CONTROL, thus the semantics for AMD are unchanged. And when support for AMD PMU v2 comes along, the common behavior will also Just Work. Signed-off-by: Like Xu Co-developed-by: Sean Christopherson Link: https://lore.kernel.org/r/20230603011058.1038821-6-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/kvm-x86-pmu-ops.h | 1 - arch/x86/kvm/pmu.c | 5 ----- arch/x86/kvm/pmu.h | 16 +++++++++++++++- arch/x86/kvm/svm/pmu.c | 9 --------- arch/x86/kvm/vmx/pmu_intel.c | 14 +------------- 5 files changed, 16 insertions(+), 29 deletions(-) diff --git a/arch/x86/include/asm/kvm-x86-pmu-ops.h b/arch/x86/include/asm/kvm-x86-pmu-ops.h index c17e3e96fc1d..6c98f4bb4228 100644 --- a/arch/x86/include/asm/kvm-x86-pmu-ops.h +++ b/arch/x86/include/asm/kvm-x86-pmu-ops.h @@ -13,7 +13,6 @@ BUILD_BUG_ON(1) * at the call sites. */ KVM_X86_PMU_OP(hw_event_available) -KVM_X86_PMU_OP(pmc_is_enabled) KVM_X86_PMU_OP(pmc_idx_to_pmc) KVM_X86_PMU_OP(rdpmc_ecx_to_pmc) KVM_X86_PMU_OP(msr_idx_to_pmc) diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index c720cc186ab4..4315f46aabfb 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -93,11 +93,6 @@ void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops) #undef __KVM_X86_PMU_OP } -static inline bool pmc_is_globally_enabled(struct kvm_pmc *pmc) -{ - return static_call(kvm_x86_pmu_pmc_is_enabled)(pmc); -} - static void kvm_pmi_trigger_fn(struct irq_work *irq_work) { struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu, irq_work); diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 7c2c64142443..f77bfc7ede42 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -20,7 +20,6 @@ struct kvm_pmu_ops { bool (*hw_event_available)(struct kvm_pmc *pmc); - bool (*pmc_is_enabled)(struct kvm_pmc *pmc); struct kvm_pmc *(*pmc_idx_to_pmc)(struct kvm_pmu *pmu, int pmc_idx); struct kvm_pmc *(*rdpmc_ecx_to_pmc)(struct kvm_vcpu *vcpu, unsigned int idx, u64 *mask); @@ -227,6 +226,21 @@ static inline void reprogram_counters(struct kvm_pmu *pmu, u64 diff) kvm_make_request(KVM_REQ_PMU, pmu_to_vcpu(pmu)); } +/* + * Check if a PMC is enabled by comparing it against global_ctrl bits. + * + * If the vPMU doesn't have global_ctrl MSR, all vPMCs are enabled. + */ +static inline bool pmc_is_globally_enabled(struct kvm_pmc *pmc) +{ + struct kvm_pmu *pmu = pmc_to_pmu(pmc); + + if (!kvm_pmu_has_perf_global_ctrl(pmu)) + return true; + + return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl); +} + void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu); void kvm_pmu_handle_event(struct kvm_vcpu *vcpu); int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data); diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index 5fa939e411d8..70143275e0a7 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -78,14 +78,6 @@ static bool amd_hw_event_available(struct kvm_pmc *pmc) return true; } -/* check if a PMC is enabled by comparing it against global_ctrl bits. Because - * AMD CPU doesn't have global_ctrl MSR, all PMCs are enabled (return TRUE). - */ -static bool amd_pmc_is_enabled(struct kvm_pmc *pmc) -{ - return true; -} - static bool amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); @@ -220,7 +212,6 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu) struct kvm_pmu_ops amd_pmu_ops __initdata = { .hw_event_available = amd_hw_event_available, - .pmc_is_enabled = amd_pmc_is_enabled, .pmc_idx_to_pmc = amd_pmc_idx_to_pmc, .rdpmc_ecx_to_pmc = amd_rdpmc_ecx_to_pmc, .msr_idx_to_pmc = amd_msr_idx_to_pmc, diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index ff2f52d1e22f..b2f279f934b1 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -95,17 +95,6 @@ static bool intel_hw_event_available(struct kvm_pmc *pmc) return true; } -/* check if a PMC is enabled by comparing it with globl_ctrl bits. */ -static bool intel_pmc_is_enabled(struct kvm_pmc *pmc) -{ - struct kvm_pmu *pmu = pmc_to_pmu(pmc); - - if (!kvm_pmu_has_perf_global_ctrl(pmu)) - return true; - - return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl); -} - static bool intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); @@ -759,7 +748,7 @@ void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu) pmc = intel_pmc_idx_to_pmc(pmu, bit); if (!pmc || !pmc_speculative_in_use(pmc) || - !intel_pmc_is_enabled(pmc) || !pmc->perf_event) + !pmc_is_globally_enabled(pmc) || !pmc->perf_event) continue; /* @@ -774,7 +763,6 @@ void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu) struct kvm_pmu_ops intel_pmu_ops __initdata = { .hw_event_available = intel_hw_event_available, - .pmc_is_enabled = intel_pmc_is_enabled, .pmc_idx_to_pmc = intel_pmc_idx_to_pmc, .rdpmc_ecx_to_pmc = intel_rdpmc_ecx_to_pmc, .msr_idx_to_pmc = intel_msr_idx_to_pmc, -- cgit v1.2.3 From 6593039d33c119968b9e2d85eaca853b391e8a55 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Fri, 2 Jun 2023 18:10:52 -0700 Subject: KVM: x86: Explicitly zero cpuid "0xa" leaf when PMU is disabled Add an explicit !enable_pmu check as relying on kvm_pmu_cap to be zeroed isn't obvious. Although when !enable_pmu, KVM will have zero-padded kvm_pmu_cap to do subsequent CPUID leaf assignments. Suggested-by: Sean Christopherson Signed-off-by: Like Xu Link: https://lore.kernel.org/r/20230603011058.1038821-7-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/cpuid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 0c9660a07b23..61bc71882f07 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -948,7 +948,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) union cpuid10_eax eax; union cpuid10_edx edx; - if (!static_cpu_has(X86_FEATURE_ARCH_PERFMON)) { + if (!enable_pmu || !static_cpu_has(X86_FEATURE_ARCH_PERFMON)) { entry->eax = entry->ebx = entry->ecx = entry->edx = 0; break; } -- cgit v1.2.3 From 6a08083f294cb623fbc882417b28c5646b01f4bf Mon Sep 17 00:00:00 2001 From: Like Xu Date: Fri, 2 Jun 2023 18:10:53 -0700 Subject: KVM: x86/pmu: Disable vPMU if the minimum num of counters isn't met Disable PMU support when running on AMD and perf reports fewer than four general purpose counters. All AMD PMUs must define at least four counters due to AMD's legacy architecture hardcoding the number of counters without providing a way to enumerate the number of counters to software, e.g. from AMD's APM: The legacy architecture defines four performance counters (PerfCtrn) and corresponding event-select registers (PerfEvtSeln). Virtualizing fewer than four counters can lead to guest instability as software expects four counters to be available. Rather than bleed AMD details into the common code, just define a const unsigned int and provide a convenient location to document why Intel and AMD have different mins (in particular, AMD's lack of any way to enumerate less than four counters to the guest). Keep the minimum number of counters at Intel at one, even though old P6 and Core Solo/Duo processor effectively require a minimum of two counters. KVM can, and more importantly has up until this point, supported a vPMU so long as the CPU has at least one counter. Perf's support for P6/Core CPUs does require two counters, but perf will happily chug along with a single counter when running on a modern CPU. Cc: Jim Mattson Suggested-by: Sean Christopherson Signed-off-by: Like Xu [sean: set Intel min to '1', not '2'] Link: https://lore.kernel.org/r/20230603011058.1038821-8-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/pmu.h | 14 ++++++++++---- arch/x86/kvm/svm/pmu.c | 1 + arch/x86/kvm/vmx/pmu_intel.c | 1 + 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index f77bfc7ede42..7d9ba301c090 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -36,6 +36,7 @@ struct kvm_pmu_ops { const u64 EVENTSEL_EVENT; const int MAX_NR_GP_COUNTERS; + const int MIN_NR_GP_COUNTERS; }; void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops); @@ -174,6 +175,7 @@ extern struct x86_pmu_capability kvm_pmu_cap; static inline void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops) { bool is_intel = boot_cpu_data.x86_vendor == X86_VENDOR_INTEL; + int min_nr_gp_ctrs = pmu_ops->MIN_NR_GP_COUNTERS; /* * Hybrid PMUs don't play nice with virtualization without careful @@ -188,11 +190,15 @@ static inline void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops) perf_get_x86_pmu_capability(&kvm_pmu_cap); /* - * For Intel, only support guest architectural pmu - * on a host with architectural pmu. + * WARN if perf did NOT disable hardware PMU if the number of + * architecturally required GP counters aren't present, i.e. if + * there are a non-zero number of counters, but fewer than what + * is architecturally required. */ - if ((is_intel && !kvm_pmu_cap.version) || - !kvm_pmu_cap.num_counters_gp) + if (!kvm_pmu_cap.num_counters_gp || + WARN_ON_ONCE(kvm_pmu_cap.num_counters_gp < min_nr_gp_ctrs)) + enable_pmu = false; + else if (is_intel && !kvm_pmu_cap.version) enable_pmu = false; } diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index 70143275e0a7..e5c69062a909 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -224,4 +224,5 @@ struct kvm_pmu_ops amd_pmu_ops __initdata = { .reset = amd_pmu_reset, .EVENTSEL_EVENT = AMD64_EVENTSEL_EVENT, .MAX_NR_GP_COUNTERS = KVM_AMD_PMC_MAX_GENERIC, + .MIN_NR_GP_COUNTERS = AMD64_NUM_COUNTERS, }; diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index b2f279f934b1..30ec9ccdea47 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -777,4 +777,5 @@ struct kvm_pmu_ops intel_pmu_ops __initdata = { .cleanup = intel_pmu_cleanup, .EVENTSEL_EVENT = ARCH_PERFMON_EVENTSEL_EVENT, .MAX_NR_GP_COUNTERS = KVM_INTEL_PMC_MAX_GENERIC, + .MIN_NR_GP_COUNTERS = 1, }; -- cgit v1.2.3 From d338d8789e64b2d8ed2f82f9364c415d6efa118d Mon Sep 17 00:00:00 2001 From: Like Xu Date: Fri, 2 Jun 2023 18:10:54 -0700 Subject: KVM: x86/pmu: Advertise PERFCTR_CORE iff the min nr of counters is met Enable and advertise PERFCTR_CORE if and only if the minimum number of required counters are available, i.e. if perf says there are less than six general purpose counters. Opportunistically, use kvm_cpu_cap_check_and_set() instead of open coding the check for host support. Suggested-by: Sean Christopherson Signed-off-by: Like Xu [sean: massage shortlog and changelog] Link: https://lore.kernel.org/r/20230603011058.1038821-9-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index ca32389f3c36..d9669e3cc00a 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -5025,9 +5025,18 @@ static __init void svm_set_cpu_caps(void) boot_cpu_has(X86_FEATURE_AMD_SSBD)) kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD); - /* AMD PMU PERFCTR_CORE CPUID */ - if (enable_pmu && boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) - kvm_cpu_cap_set(X86_FEATURE_PERFCTR_CORE); + if (enable_pmu) { + /* + * Enumerate support for PERFCTR_CORE if and only if KVM has + * access to enough counters to virtualize "core" support, + * otherwise limit vPMU support to the legacy number of counters. + */ + if (kvm_pmu_cap.num_counters_gp < AMD64_NUM_COUNTERS_CORE) + kvm_pmu_cap.num_counters_gp = min(AMD64_NUM_COUNTERS, + kvm_pmu_cap.num_counters_gp); + else + kvm_cpu_cap_check_and_set(X86_FEATURE_PERFCTR_CORE); + } /* CPUID 0x8000001F (SME/SEV features) */ sev_set_cpu_caps(); -- cgit v1.2.3 From 1c2bf8a6b045a6ac4e75a7a07fde70db63e5a380 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Fri, 2 Jun 2023 18:10:55 -0700 Subject: KVM: x86/pmu: Constrain the num of guest counters with kvm_pmu_cap Cap the number of general purpose counters enumerated on AMD to what KVM actually supports, i.e. don't allow userspace to coerce KVM into thinking there are more counters than actually exist, e.g. by enumerating X86_FEATURE_PERFCTR_CORE in guest CPUID when its not supported. Suggested-by: Sean Christopherson Signed-off-by: Like Xu [sean: massage changelog] Link: https://lore.kernel.org/r/20230603011058.1038821-10-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/pmu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index e5c69062a909..c03958063a76 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -170,6 +170,9 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu) else pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS; + pmu->nr_arch_gp_counters = min_t(unsigned int, pmu->nr_arch_gp_counters, + kvm_pmu_cap.num_counters_gp); + pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1; pmu->reserved_bits = 0xfffffff000280000ull; pmu->raw_event_mask = AMD64_RAW_EVENT_MASK; -- cgit v1.2.3 From fe8d76c1a6f04387953727ee3e63469956e7360d Mon Sep 17 00:00:00 2001 From: Like Xu Date: Fri, 2 Jun 2023 18:10:56 -0700 Subject: KVM: x86/cpuid: Add a KVM-only leaf to redirect AMD PerfMonV2 flag Add a KVM-only leaf for AMD's PerfMonV2 to redirect the kernel's scattered version to its architectural location, e.g. so that KVM can query guest support via guest_cpuid_has(). Suggested-by: Sean Christopherson Signed-off-by: Like Xu [sean: massage changelog] Link: https://lore.kernel.org/r/20230603011058.1038821-11-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/reverse_cpuid.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index a5717282bb9c..56cbdb24400a 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -15,6 +15,7 @@ enum kvm_only_cpuid_leafs { CPUID_12_EAX = NCAPINTS, CPUID_7_1_EDX, CPUID_8000_0007_EDX, + CPUID_8000_0022_EAX, NR_KVM_CPU_CAPS, NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, @@ -47,6 +48,9 @@ enum kvm_only_cpuid_leafs { /* CPUID level 0x80000007 (EDX). */ #define KVM_X86_FEATURE_CONSTANT_TSC KVM_X86_FEATURE(CPUID_8000_0007_EDX, 8) +/* CPUID level 0x80000022 (EAX) */ +#define KVM_X86_FEATURE_PERFMON_V2 KVM_X86_FEATURE(CPUID_8000_0022_EAX, 0) + struct cpuid_reg { u32 function; u32 index; @@ -74,6 +78,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_7_1_EDX] = { 7, 1, CPUID_EDX}, [CPUID_8000_0007_EDX] = {0x80000007, 0, CPUID_EDX}, [CPUID_8000_0021_EAX] = {0x80000021, 0, CPUID_EAX}, + [CPUID_8000_0022_EAX] = {0x80000022, 0, CPUID_EAX}, }; /* @@ -108,6 +113,8 @@ static __always_inline u32 __feature_translate(int x86_feature) return KVM_X86_FEATURE_SGX_EDECCSSA; else if (x86_feature == X86_FEATURE_CONSTANT_TSC) return KVM_X86_FEATURE_CONSTANT_TSC; + else if (x86_feature == X86_FEATURE_PERFMON_V2) + return KVM_X86_FEATURE_PERFMON_V2; return x86_feature; } -- cgit v1.2.3 From 4a2771895ca63a055df815be5e307cce8e85308c Mon Sep 17 00:00:00 2001 From: Like Xu Date: Fri, 2 Jun 2023 18:10:57 -0700 Subject: KVM: x86/svm/pmu: Add AMD PerfMonV2 support If AMD Performance Monitoring Version 2 (PerfMonV2) is detected by the guest, it can use a new scheme to manage the Core PMCs using the new global control and status registers. In addition to benefiting from the PerfMonV2 functionality in the same way as the host (higher precision), the guest also can reduce the number of vm-exits by lowering the total number of MSRs accesses. In terms of implementation details, amd_is_valid_msr() is resurrected since three newly added MSRs could not be mapped to one vPMC. The possibility of emulating PerfMonV2 on the mainframe has also been eliminated for reasons of precision. Co-developed-by: Sandipan Das Signed-off-by: Sandipan Das Signed-off-by: Like Xu [sean: drop "Based on the observed HW." comments] Link: https://lore.kernel.org/r/20230603011058.1038821-12-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/pmu.c | 18 ++++++++++++++++- arch/x86/kvm/svm/pmu.c | 55 +++++++++++++++++++++++++++++++++++++++++--------- arch/x86/kvm/x86.c | 10 +++++++++ 3 files changed, 72 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 4315f46aabfb..bf653df86112 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -585,11 +585,14 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) switch (msr) { case MSR_CORE_PERF_GLOBAL_STATUS: + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS: msr_info->data = pmu->global_status; break; + case MSR_AMD64_PERF_CNTR_GLOBAL_CTL: case MSR_CORE_PERF_GLOBAL_CTRL: msr_info->data = pmu->global_ctrl; break; + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR: case MSR_CORE_PERF_GLOBAL_OVF_CTRL: msr_info->data = 0; break; @@ -607,16 +610,28 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) u64 data = msr_info->data; u64 diff; + /* + * Note, AMD ignores writes to reserved bits and read-only PMU MSRs, + * whereas Intel generates #GP on attempts to write reserved/RO MSRs. + */ switch (msr) { case MSR_CORE_PERF_GLOBAL_STATUS: if (!msr_info->host_initiated) return 1; /* RO MSR */ + fallthrough; + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS: + /* Per PPR, Read-only MSR. Writes are ignored. */ + if (!msr_info->host_initiated) + break; if (data & pmu->global_status_mask) return 1; pmu->global_status = data; break; + case MSR_AMD64_PERF_CNTR_GLOBAL_CTL: + data &= ~pmu->global_ctrl_mask; + fallthrough; case MSR_CORE_PERF_GLOBAL_CTRL: if (!kvm_valid_perf_global_ctrl(pmu, data)) return 1; @@ -634,7 +649,8 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) */ if (data & pmu->global_status_mask) return 1; - + fallthrough; + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR: if (!msr_info->host_initiated) pmu->global_status &= ~data; break; diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index c03958063a76..cef5a3d0abd0 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -94,12 +94,6 @@ static struct kvm_pmc *amd_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu, return amd_pmc_idx_to_pmc(vcpu_to_pmu(vcpu), idx & ~(3u << 30)); } -static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) -{ - /* All MSRs refer to exactly one PMC, so msr_idx_to_pmc is enough. */ - return false; -} - static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); @@ -111,6 +105,29 @@ static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr) return pmc; } +static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) +{ + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); + + switch (msr) { + case MSR_K7_EVNTSEL0 ... MSR_K7_PERFCTR3: + return pmu->version > 0; + case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5: + return guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE); + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS: + case MSR_AMD64_PERF_CNTR_GLOBAL_CTL: + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR: + return pmu->version > 1; + default: + if (msr > MSR_F15H_PERF_CTR5 && + msr < MSR_F15H_PERF_CTL0 + 2 * pmu->nr_arch_gp_counters) + return pmu->version > 1; + break; + } + + return amd_msr_idx_to_pmc(vcpu, msr); +} + static int amd_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); @@ -164,23 +181,39 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) static void amd_pmu_refresh(struct kvm_vcpu *vcpu) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); + union cpuid_0x80000022_ebx ebx; - if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) + pmu->version = 1; + if (guest_cpuid_has(vcpu, X86_FEATURE_PERFMON_V2)) { + pmu->version = 2; + /* + * Note, PERFMON_V2 is also in 0x80000022.0x0, i.e. the guest + * CPUID entry is guaranteed to be non-NULL. + */ + BUILD_BUG_ON(x86_feature_cpuid(X86_FEATURE_PERFMON_V2).function != 0x80000022 || + x86_feature_cpuid(X86_FEATURE_PERFMON_V2).index); + ebx.full = kvm_find_cpuid_entry_index(vcpu, 0x80000022, 0)->ebx; + pmu->nr_arch_gp_counters = ebx.split.num_core_pmc; + } else if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) { pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS_CORE; - else + } else { pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS; + } pmu->nr_arch_gp_counters = min_t(unsigned int, pmu->nr_arch_gp_counters, kvm_pmu_cap.num_counters_gp); + if (pmu->version > 1) { + pmu->global_ctrl_mask = ~((1ull << pmu->nr_arch_gp_counters) - 1); + pmu->global_status_mask = pmu->global_ctrl_mask; + } + pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1; pmu->reserved_bits = 0xfffffff000280000ull; pmu->raw_event_mask = AMD64_RAW_EVENT_MASK; - pmu->version = 1; /* not applicable to AMD; but clean them to prevent any fall out */ pmu->counter_bitmask[KVM_PMC_FIXED] = 0; pmu->nr_arch_fixed_counters = 0; - pmu->global_status = 0; bitmap_set(pmu->all_valid_pmc_idx, 0, pmu->nr_arch_gp_counters); } @@ -211,6 +244,8 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu) pmc_stop_counter(pmc); pmc->counter = pmc->prev_counter = pmc->eventsel = 0; } + + pmu->global_ctrl = pmu->global_status = 0; } struct kvm_pmu_ops amd_pmu_ops __initdata = { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c0778ca39650..abfba3cae0ba 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1483,6 +1483,10 @@ static const u32 msrs_to_save_pmu[] = { MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5, MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2, MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5, + + MSR_AMD64_PERF_CNTR_GLOBAL_CTL, + MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, + MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, }; static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_base) + @@ -7150,6 +7154,12 @@ static void kvm_probe_msr_to_save(u32 msr_index) kvm_pmu_cap.num_counters_fixed) return; break; + case MSR_AMD64_PERF_CNTR_GLOBAL_CTL: + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS: + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR: + if (!kvm_cpu_cap_has(X86_FEATURE_PERFMON_V2)) + return; + break; case MSR_IA32_XFD: case MSR_IA32_XFD_ERR: if (!kvm_cpu_cap_has(X86_FEATURE_XFD)) -- cgit v1.2.3 From 94cdeebd82111d7b7da5bd4da053eed9e0f65d72 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Fri, 2 Jun 2023 18:10:58 -0700 Subject: KVM: x86/cpuid: Add AMD CPUID ExtPerfMonAndDbg leaf 0x80000022 CPUID leaf 0x80000022 i.e. ExtPerfMonAndDbg advertises some new performance monitoring features for AMD processors. Bit 0 of EAX indicates support for Performance Monitoring Version 2 (PerfMonV2) features. If found to be set during PMU initialization, the EBX bits of the same CPUID function can be used to determine the number of available PMCs for different PMU types. Expose the relevant bits via KVM_GET_SUPPORTED_CPUID so that guests can make use of the PerfMonV2 features. Co-developed-by: Sandipan Das Signed-off-by: Sandipan Das Signed-off-by: Like Xu Link: https://lore.kernel.org/r/20230603011058.1038821-13-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/cpuid.c | 28 +++++++++++++++++++++++++++- arch/x86/kvm/svm/svm.c | 4 ++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 61bc71882f07..0e5584f4acd7 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -734,6 +734,10 @@ void kvm_set_cpu_caps(void) F(NULL_SEL_CLR_BASE) | F(AUTOIBRS) | 0 /* PrefetchCtlMsr */ ); + kvm_cpu_cap_init_kvm_defined(CPUID_8000_0022_EAX, + F(PERFMON_V2) + ); + /* * Synthesize "LFENCE is serializing" into the AMD-defined entry in * KVM's supported CPUID if the feature is reported as supported by the @@ -1128,7 +1132,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->edx = 0; break; case 0x80000000: - entry->eax = min(entry->eax, 0x80000021); + entry->eax = min(entry->eax, 0x80000022); /* * Serializing LFENCE is reported in a multitude of ways, and * NullSegClearsBase is not reported in CPUID on Zen2; help @@ -1233,6 +1237,28 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->ebx = entry->ecx = entry->edx = 0; cpuid_entry_override(entry, CPUID_8000_0021_EAX); break; + /* AMD Extended Performance Monitoring and Debug */ + case 0x80000022: { + union cpuid_0x80000022_ebx ebx; + + entry->ecx = entry->edx = 0; + if (!enable_pmu || !kvm_cpu_cap_has(X86_FEATURE_PERFMON_V2)) { + entry->eax = entry->ebx; + break; + } + + cpuid_entry_override(entry, CPUID_8000_0022_EAX); + + if (kvm_cpu_cap_has(X86_FEATURE_PERFMON_V2)) + ebx.split.num_core_pmc = kvm_pmu_cap.num_counters_gp; + else if (kvm_cpu_cap_has(X86_FEATURE_PERFCTR_CORE)) + ebx.split.num_core_pmc = AMD64_NUM_COUNTERS_CORE; + else + ebx.split.num_core_pmc = AMD64_NUM_COUNTERS; + + entry->ebx = ebx.full; + break; + } /*Add support for Centaur's CPUID instruction*/ case 0xC0000000: /*Just support up to 0xC0000004 now*/ diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index d9669e3cc00a..ff48cdea1fbf 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -5036,6 +5036,10 @@ static __init void svm_set_cpu_caps(void) kvm_pmu_cap.num_counters_gp); else kvm_cpu_cap_check_and_set(X86_FEATURE_PERFCTR_CORE); + + if (kvm_pmu_cap.version != 2 || + !kvm_cpu_cap_has(X86_FEATURE_PERFCTR_CORE)) + kvm_cpu_cap_clear(X86_FEATURE_PERFMON_V2); } /* CPUID 0x8000001F (SME/SEV features) */ -- cgit v1.2.3