summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/vmx
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-05-01 22:06:20 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2023-05-01 22:06:20 +0300
commitc8c655c34e33544aec9d64b660872ab33c29b5f1 (patch)
tree4aad88f698f04cef9e5d9d573a6df6283085dadd /arch/x86/kvm/vmx
parentd75439d64a1e2b35e0f08906205b00279753cbed (diff)
parentb3c98052d46948a8d65d2778c7f306ff38366aac (diff)
downloadlinux-c8c655c34e33544aec9d64b660872ab33c29b5f1.tar.xz
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini: "s390: - More phys_to_virt conversions - Improvement of AP management for VSIE (nested virtualization) ARM64: - Numerous fixes for the pathological lock inversion issue that plagued KVM/arm64 since... forever. - New framework allowing SMCCC-compliant hypercalls to be forwarded to userspace, hopefully paving the way for some more features being moved to VMMs rather than be implemented in the kernel. - Large rework of the timer code to allow a VM-wide offset to be applied to both virtual and physical counters as well as a per-timer, per-vcpu offset that complements the global one. This last part allows the NV timer code to be implemented on top. - A small set of fixes to make sure that we don't change anything affecting the EL1&0 translation regime just after having having taken an exception to EL2 until we have executed a DSB. This ensures that speculative walks started in EL1&0 have completed. - The usual selftest fixes and improvements. x86: - Optimize CR0.WP toggling by avoiding an MMU reload when TDP is enabled, and by giving the guest control of CR0.WP when EPT is enabled on VMX (VMX-only because SVM doesn't support per-bit controls) - Add CR0/CR4 helpers to query single bits, and clean up related code where KVM was interpreting kvm_read_cr4_bits()'s "unsigned long" return as a bool - Move AMD_PSFD to cpufeatures.h and purge KVM's definition - Avoid unnecessary writes+flushes when the guest is only adding new PTEs - Overhaul .sync_page() and .invlpg() to utilize .sync_page()'s optimizations when emulating invalidations - Clean up the range-based flushing APIs - Revamp the TDP MMU's reaping of Accessed/Dirty bits to clear a single A/D bit using a LOCK AND instead of XCHG, and skip all of the "handle changed SPTE" overhead associated with writing the entire entry - Track the number of "tail" entries in a pte_list_desc to avoid having to walk (potentially) all descriptors during insertion and deletion, which gets quite expensive if the guest is spamming fork() - Disallow virtualizing legacy LBRs if architectural LBRs are available, the two are mutually exclusive in hardware - Disallow writes to immutable feature MSRs (notably PERF_CAPABILITIES) after KVM_RUN, similar to CPUID features - Overhaul the vmx_pmu_caps selftest to better validate PERF_CAPABILITIES - Apply PMU filters to emulated events and add test coverage to the pmu_event_filter selftest - AMD SVM: - Add support for virtual NMIs - Fixes for edge cases related to virtual interrupts - Intel AMX: - Don't advertise XTILE_CFG in KVM_GET_SUPPORTED_CPUID if XTILE_DATA is not being reported due to userspace not opting in via prctl() - Fix a bug in emulation of ENCLS in compatibility mode - Allow emulation of NOP and PAUSE for L2 - AMX selftests improvements - Misc cleanups MIPS: - Constify MIPS's internal callbacks (a leftover from the hardware enabling rework that landed in 6.3) Generic: - Drop unnecessary casts from "void *" throughout kvm_main.c - Tweak the layout of "struct kvm_mmu_memory_cache" to shrink the struct size by 8 bytes on 64-bit kernels by utilizing a padding hole Documentation: - Fix goof introduced by the conversion to rST" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (211 commits) KVM: s390: pci: fix virtual-physical confusion on module unload/load KVM: s390: vsie: clarifications on setting the APCB KVM: s390: interrupt: fix virtual-physical confusion for next alert GISA KVM: arm64: Have kvm_psci_vcpu_on() use WRITE_ONCE() to update mp_state KVM: arm64: Acquire mp_state_lock in kvm_arch_vcpu_ioctl_vcpu_init() KVM: selftests: Test the PMU event "Instructions retired" KVM: selftests: Copy full counter values from guest in PMU event filter test KVM: selftests: Use error codes to signal errors in PMU event filter test KVM: selftests: Print detailed info in PMU event filter asserts KVM: selftests: Add helpers for PMC asserts in PMU event filter test KVM: selftests: Add a common helper for the PMU event filter guest code KVM: selftests: Fix spelling mistake "perrmited" -> "permitted" KVM: arm64: vhe: Drop extra isb() on guest exit KVM: arm64: vhe: Synchronise with page table walker on MMU update KVM: arm64: pkvm: Document the side effects of kvm_flush_dcache_to_poc() KVM: arm64: nvhe: Synchronise with page table walker on TLBI KVM: arm64: Handle 32bit CNTPCTSS traps KVM: arm64: nvhe: Synchronise with page table walker on vcpu run KVM: arm64: vgic: Don't acquire its_lock before config_lock KVM: selftests: Add test to verify KVM's supported XCR0 ...
Diffstat (limited to 'arch/x86/kvm/vmx')
-rw-r--r--arch/x86/kvm/vmx/hyperv.c107
-rw-r--r--arch/x86/kvm/vmx/hyperv.h115
-rw-r--r--arch/x86/kvm/vmx/nested.c126
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c135
-rw-r--r--arch/x86/kvm/vmx/sgx.c4
-rw-r--r--arch/x86/kvm/vmx/vmx.c96
-rw-r--r--arch/x86/kvm/vmx/vmx.h20
-rw-r--r--arch/x86/kvm/vmx/vmx_ops.h22
8 files changed, 351 insertions, 274 deletions
diff --git a/arch/x86/kvm/vmx/hyperv.c b/arch/x86/kvm/vmx/hyperv.c
index 22daca752797..79450e1ed7cf 100644
--- a/arch/x86/kvm/vmx/hyperv.c
+++ b/arch/x86/kvm/vmx/hyperv.c
@@ -13,7 +13,110 @@
#define CC KVM_NESTED_VMENTER_CONSISTENCY_CHECK
-DEFINE_STATIC_KEY_FALSE(enable_evmcs);
+/*
+ * Enlightened VMCSv1 doesn't support these:
+ *
+ * POSTED_INTR_NV = 0x00000002,
+ * GUEST_INTR_STATUS = 0x00000810,
+ * APIC_ACCESS_ADDR = 0x00002014,
+ * POSTED_INTR_DESC_ADDR = 0x00002016,
+ * EOI_EXIT_BITMAP0 = 0x0000201c,
+ * EOI_EXIT_BITMAP1 = 0x0000201e,
+ * EOI_EXIT_BITMAP2 = 0x00002020,
+ * EOI_EXIT_BITMAP3 = 0x00002022,
+ * GUEST_PML_INDEX = 0x00000812,
+ * PML_ADDRESS = 0x0000200e,
+ * VM_FUNCTION_CONTROL = 0x00002018,
+ * EPTP_LIST_ADDRESS = 0x00002024,
+ * VMREAD_BITMAP = 0x00002026,
+ * VMWRITE_BITMAP = 0x00002028,
+ *
+ * TSC_MULTIPLIER = 0x00002032,
+ * PLE_GAP = 0x00004020,
+ * PLE_WINDOW = 0x00004022,
+ * VMX_PREEMPTION_TIMER_VALUE = 0x0000482E,
+ *
+ * Currently unsupported in KVM:
+ * GUEST_IA32_RTIT_CTL = 0x00002814,
+ */
+#define EVMCS1_SUPPORTED_PINCTRL \
+ (PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | \
+ PIN_BASED_EXT_INTR_MASK | \
+ PIN_BASED_NMI_EXITING | \
+ PIN_BASED_VIRTUAL_NMIS)
+
+#define EVMCS1_SUPPORTED_EXEC_CTRL \
+ (CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR | \
+ CPU_BASED_HLT_EXITING | \
+ CPU_BASED_CR3_LOAD_EXITING | \
+ CPU_BASED_CR3_STORE_EXITING | \
+ CPU_BASED_UNCOND_IO_EXITING | \
+ CPU_BASED_MOV_DR_EXITING | \
+ CPU_BASED_USE_TSC_OFFSETTING | \
+ CPU_BASED_MWAIT_EXITING | \
+ CPU_BASED_MONITOR_EXITING | \
+ CPU_BASED_INVLPG_EXITING | \
+ CPU_BASED_RDPMC_EXITING | \
+ CPU_BASED_INTR_WINDOW_EXITING | \
+ CPU_BASED_CR8_LOAD_EXITING | \
+ CPU_BASED_CR8_STORE_EXITING | \
+ CPU_BASED_RDTSC_EXITING | \
+ CPU_BASED_TPR_SHADOW | \
+ CPU_BASED_USE_IO_BITMAPS | \
+ CPU_BASED_MONITOR_TRAP_FLAG | \
+ CPU_BASED_USE_MSR_BITMAPS | \
+ CPU_BASED_NMI_WINDOW_EXITING | \
+ CPU_BASED_PAUSE_EXITING | \
+ CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)
+
+#define EVMCS1_SUPPORTED_2NDEXEC \
+ (SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | \
+ SECONDARY_EXEC_WBINVD_EXITING | \
+ SECONDARY_EXEC_ENABLE_VPID | \
+ SECONDARY_EXEC_ENABLE_EPT | \
+ SECONDARY_EXEC_UNRESTRICTED_GUEST | \
+ SECONDARY_EXEC_DESC | \
+ SECONDARY_EXEC_ENABLE_RDTSCP | \
+ SECONDARY_EXEC_ENABLE_INVPCID | \
+ SECONDARY_EXEC_XSAVES | \
+ SECONDARY_EXEC_RDSEED_EXITING | \
+ SECONDARY_EXEC_RDRAND_EXITING | \
+ SECONDARY_EXEC_TSC_SCALING | \
+ SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | \
+ SECONDARY_EXEC_PT_USE_GPA | \
+ SECONDARY_EXEC_PT_CONCEAL_VMX | \
+ SECONDARY_EXEC_BUS_LOCK_DETECTION | \
+ SECONDARY_EXEC_NOTIFY_VM_EXITING | \
+ SECONDARY_EXEC_ENCLS_EXITING)
+
+#define EVMCS1_SUPPORTED_3RDEXEC (0ULL)
+
+#define EVMCS1_SUPPORTED_VMEXIT_CTRL \
+ (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | \
+ VM_EXIT_SAVE_DEBUG_CONTROLS | \
+ VM_EXIT_ACK_INTR_ON_EXIT | \
+ VM_EXIT_HOST_ADDR_SPACE_SIZE | \
+ VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | \
+ VM_EXIT_SAVE_IA32_PAT | \
+ VM_EXIT_LOAD_IA32_PAT | \
+ VM_EXIT_SAVE_IA32_EFER | \
+ VM_EXIT_LOAD_IA32_EFER | \
+ VM_EXIT_CLEAR_BNDCFGS | \
+ VM_EXIT_PT_CONCEAL_PIP | \
+ VM_EXIT_CLEAR_IA32_RTIT_CTL)
+
+#define EVMCS1_SUPPORTED_VMENTRY_CTRL \
+ (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | \
+ VM_ENTRY_LOAD_DEBUG_CONTROLS | \
+ VM_ENTRY_IA32E_MODE | \
+ VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | \
+ VM_ENTRY_LOAD_IA32_PAT | \
+ VM_ENTRY_LOAD_IA32_EFER | \
+ VM_ENTRY_LOAD_BNDCFGS | \
+ VM_ENTRY_PT_CONCEAL_PIP | \
+ VM_ENTRY_LOAD_IA32_RTIT_CTL)
+
+#define EVMCS1_SUPPORTED_VMFUNC (0)
#define EVMCS1_OFFSET(x) offsetof(struct hv_enlightened_vmcs, x)
#define EVMCS1_FIELD(number, name, clean_field)[ROL16(number, 6)] = \
@@ -506,6 +609,8 @@ int nested_evmcs_check_controls(struct vmcs12 *vmcs12)
}
#if IS_ENABLED(CONFIG_HYPERV)
+DEFINE_STATIC_KEY_FALSE(__kvm_is_using_evmcs);
+
/*
* KVM on Hyper-V always uses the latest known eVMCSv1 revision, the assumption
* is: in case a feature has corresponding fields in eVMCS described and it was
diff --git a/arch/x86/kvm/vmx/hyperv.h b/arch/x86/kvm/vmx/hyperv.h
index 78d17667e7ec..9623fe1651c4 100644
--- a/arch/x86/kvm/vmx/hyperv.h
+++ b/arch/x86/kvm/vmx/hyperv.h
@@ -16,117 +16,10 @@
struct vmcs_config;
-DECLARE_STATIC_KEY_FALSE(enable_evmcs);
-
#define current_evmcs ((struct hv_enlightened_vmcs *)this_cpu_read(current_vmcs))
#define KVM_EVMCS_VERSION 1
-/*
- * Enlightened VMCSv1 doesn't support these:
- *
- * POSTED_INTR_NV = 0x00000002,
- * GUEST_INTR_STATUS = 0x00000810,
- * APIC_ACCESS_ADDR = 0x00002014,
- * POSTED_INTR_DESC_ADDR = 0x00002016,
- * EOI_EXIT_BITMAP0 = 0x0000201c,
- * EOI_EXIT_BITMAP1 = 0x0000201e,
- * EOI_EXIT_BITMAP2 = 0x00002020,
- * EOI_EXIT_BITMAP3 = 0x00002022,
- * GUEST_PML_INDEX = 0x00000812,
- * PML_ADDRESS = 0x0000200e,
- * VM_FUNCTION_CONTROL = 0x00002018,
- * EPTP_LIST_ADDRESS = 0x00002024,
- * VMREAD_BITMAP = 0x00002026,
- * VMWRITE_BITMAP = 0x00002028,
- *
- * TSC_MULTIPLIER = 0x00002032,
- * PLE_GAP = 0x00004020,
- * PLE_WINDOW = 0x00004022,
- * VMX_PREEMPTION_TIMER_VALUE = 0x0000482E,
- *
- * Currently unsupported in KVM:
- * GUEST_IA32_RTIT_CTL = 0x00002814,
- */
-#define EVMCS1_SUPPORTED_PINCTRL \
- (PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | \
- PIN_BASED_EXT_INTR_MASK | \
- PIN_BASED_NMI_EXITING | \
- PIN_BASED_VIRTUAL_NMIS)
-
-#define EVMCS1_SUPPORTED_EXEC_CTRL \
- (CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR | \
- CPU_BASED_HLT_EXITING | \
- CPU_BASED_CR3_LOAD_EXITING | \
- CPU_BASED_CR3_STORE_EXITING | \
- CPU_BASED_UNCOND_IO_EXITING | \
- CPU_BASED_MOV_DR_EXITING | \
- CPU_BASED_USE_TSC_OFFSETTING | \
- CPU_BASED_MWAIT_EXITING | \
- CPU_BASED_MONITOR_EXITING | \
- CPU_BASED_INVLPG_EXITING | \
- CPU_BASED_RDPMC_EXITING | \
- CPU_BASED_INTR_WINDOW_EXITING | \
- CPU_BASED_CR8_LOAD_EXITING | \
- CPU_BASED_CR8_STORE_EXITING | \
- CPU_BASED_RDTSC_EXITING | \
- CPU_BASED_TPR_SHADOW | \
- CPU_BASED_USE_IO_BITMAPS | \
- CPU_BASED_MONITOR_TRAP_FLAG | \
- CPU_BASED_USE_MSR_BITMAPS | \
- CPU_BASED_NMI_WINDOW_EXITING | \
- CPU_BASED_PAUSE_EXITING | \
- CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)
-
-#define EVMCS1_SUPPORTED_2NDEXEC \
- (SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | \
- SECONDARY_EXEC_WBINVD_EXITING | \
- SECONDARY_EXEC_ENABLE_VPID | \
- SECONDARY_EXEC_ENABLE_EPT | \
- SECONDARY_EXEC_UNRESTRICTED_GUEST | \
- SECONDARY_EXEC_DESC | \
- SECONDARY_EXEC_ENABLE_RDTSCP | \
- SECONDARY_EXEC_ENABLE_INVPCID | \
- SECONDARY_EXEC_XSAVES | \
- SECONDARY_EXEC_RDSEED_EXITING | \
- SECONDARY_EXEC_RDRAND_EXITING | \
- SECONDARY_EXEC_TSC_SCALING | \
- SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | \
- SECONDARY_EXEC_PT_USE_GPA | \
- SECONDARY_EXEC_PT_CONCEAL_VMX | \
- SECONDARY_EXEC_BUS_LOCK_DETECTION | \
- SECONDARY_EXEC_NOTIFY_VM_EXITING | \
- SECONDARY_EXEC_ENCLS_EXITING)
-
-#define EVMCS1_SUPPORTED_3RDEXEC (0ULL)
-
-#define EVMCS1_SUPPORTED_VMEXIT_CTRL \
- (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | \
- VM_EXIT_SAVE_DEBUG_CONTROLS | \
- VM_EXIT_ACK_INTR_ON_EXIT | \
- VM_EXIT_HOST_ADDR_SPACE_SIZE | \
- VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | \
- VM_EXIT_SAVE_IA32_PAT | \
- VM_EXIT_LOAD_IA32_PAT | \
- VM_EXIT_SAVE_IA32_EFER | \
- VM_EXIT_LOAD_IA32_EFER | \
- VM_EXIT_CLEAR_BNDCFGS | \
- VM_EXIT_PT_CONCEAL_PIP | \
- VM_EXIT_CLEAR_IA32_RTIT_CTL)
-
-#define EVMCS1_SUPPORTED_VMENTRY_CTRL \
- (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | \
- VM_ENTRY_LOAD_DEBUG_CONTROLS | \
- VM_ENTRY_IA32E_MODE | \
- VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | \
- VM_ENTRY_LOAD_IA32_PAT | \
- VM_ENTRY_LOAD_IA32_EFER | \
- VM_ENTRY_LOAD_BNDCFGS | \
- VM_ENTRY_PT_CONCEAL_PIP | \
- VM_ENTRY_LOAD_IA32_RTIT_CTL)
-
-#define EVMCS1_SUPPORTED_VMFUNC (0)
-
struct evmcs_field {
u16 offset;
u16 clean_field;
@@ -174,6 +67,13 @@ static inline u64 evmcs_read_any(struct hv_enlightened_vmcs *evmcs,
#if IS_ENABLED(CONFIG_HYPERV)
+DECLARE_STATIC_KEY_FALSE(__kvm_is_using_evmcs);
+
+static __always_inline bool kvm_is_using_evmcs(void)
+{
+ return static_branch_unlikely(&__kvm_is_using_evmcs);
+}
+
static __always_inline int get_evmcs_offset(unsigned long field,
u16 *clean_field)
{
@@ -263,6 +163,7 @@ static inline void evmcs_load(u64 phys_addr)
void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf);
#else /* !IS_ENABLED(CONFIG_HYPERV) */
+static __always_inline bool kvm_is_using_evmcs(void) { return false; }
static __always_inline void evmcs_write64(unsigned long field, u64 value) {}
static __always_inline void evmcs_write32(unsigned long field, u32 value) {}
static __always_inline void evmcs_write16(unsigned long field, u16 value) {}
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 768487611db7..e35cf0bd0df9 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -358,6 +358,7 @@ static bool nested_ept_root_matches(hpa_t root_hpa, u64 root_eptp, u64 eptp)
static void nested_ept_invalidate_addr(struct kvm_vcpu *vcpu, gpa_t eptp,
gpa_t addr)
{
+ unsigned long roots = 0;
uint i;
struct kvm_mmu_root_info *cached_root;
@@ -368,8 +369,10 @@ static void nested_ept_invalidate_addr(struct kvm_vcpu *vcpu, gpa_t eptp,
if (nested_ept_root_matches(cached_root->hpa, cached_root->pgd,
eptp))
- vcpu->arch.mmu->invlpg(vcpu, addr, cached_root->hpa);
+ roots |= KVM_MMU_ROOT_PREVIOUS(i);
}
+ if (roots)
+ kvm_mmu_invalidate_addr(vcpu, vcpu->arch.mmu, addr, roots);
}
static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
@@ -654,6 +657,9 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
MSR_IA32_PRED_CMD, MSR_TYPE_W);
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_FLUSH_CMD, MSR_TYPE_W);
+
kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false);
vmx->nested.force_msr_bitmap_recalc = false;
@@ -4483,7 +4489,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
* CR0_GUEST_HOST_MASK is already set in the original vmcs01
* (KVM doesn't change it);
*/
- vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
+ vcpu->arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits();
vmx_set_cr0(vcpu, vmcs12->host_cr0);
/* Same as above - no reason to call set_cr4_guest_host_mask(). */
@@ -4634,7 +4640,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
*/
vmx_set_efer(vcpu, nested_vmx_get_vmcs01_guest_efer(vmx));
- vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
+ vcpu->arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits();
vmx_set_cr0(vcpu, vmcs_readl(CR0_READ_SHADOW));
vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
@@ -5156,7 +5162,7 @@ static int handle_vmxon(struct kvm_vcpu *vcpu)
* does force CR0.PE=1, but only to also force VM86 in order to emulate
* Real Mode, and so there's no need to check CR0.PE manually.
*/
- if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) {
+ if (!kvm_is_cr4_bit_set(vcpu, X86_CR4_VMXE)) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
@@ -6755,36 +6761,9 @@ static u64 nested_vmx_calc_vmcs_enum_msr(void)
return (u64)max_idx << VMCS_FIELD_INDEX_SHIFT;
}
-/*
- * nested_vmx_setup_ctls_msrs() sets up variables containing the values to be
- * returned for the various VMX controls MSRs when nested VMX is enabled.
- * The same values should also be used to verify that vmcs12 control fields are
- * valid during nested entry from L1 to L2.
- * Each of these control msrs has a low and high 32-bit half: A low bit is on
- * if the corresponding bit in the (32-bit) control field *must* be on, and a
- * bit in the high half is on if the corresponding bit in the control field
- * may be on. See also vmx_control_verify().
- */
-void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps)
+static void nested_vmx_setup_pinbased_ctls(struct vmcs_config *vmcs_conf,
+ struct nested_vmx_msrs *msrs)
{
- struct nested_vmx_msrs *msrs = &vmcs_conf->nested;
-
- /*
- * Note that as a general rule, the high half of the MSRs (bits in
- * the control fields which may be 1) should be initialized by the
- * intersection of the underlying hardware's MSR (i.e., features which
- * can be supported) and the list of features we want to expose -
- * because they are known to be properly supported in our code.
- * Also, usually, the low half of the MSRs (bits which must be 1) can
- * be set to 0, meaning that L1 may turn off any of these bits. The
- * reason is that if one of these bits is necessary, it will appear
- * in vmcs01 and prepare_vmcs02, when it bitwise-or's the control
- * fields of vmcs01 and vmcs02, will turn these bits off - and
- * nested_vmx_l1_wants_exit() will not pass related exits to L1.
- * These rules have exceptions below.
- */
-
- /* pin-based controls */
msrs->pinbased_ctls_low =
PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
@@ -6797,8 +6776,11 @@ void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps)
msrs->pinbased_ctls_high |=
PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
PIN_BASED_VMX_PREEMPTION_TIMER;
+}
- /* exit controls */
+static void nested_vmx_setup_exit_ctls(struct vmcs_config *vmcs_conf,
+ struct nested_vmx_msrs *msrs)
+{
msrs->exit_ctls_low =
VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
@@ -6817,8 +6799,11 @@ void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps)
/* We support free control of debug control saving. */
msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
+}
- /* entry controls */
+static void nested_vmx_setup_entry_ctls(struct vmcs_config *vmcs_conf,
+ struct nested_vmx_msrs *msrs)
+{
msrs->entry_ctls_low =
VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
@@ -6834,8 +6819,11 @@ void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps)
/* We support free control of debug control loading. */
msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
+}
- /* cpu-based controls */
+static void nested_vmx_setup_cpubased_ctls(struct vmcs_config *vmcs_conf,
+ struct nested_vmx_msrs *msrs)
+{
msrs->procbased_ctls_low =
CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
@@ -6867,12 +6855,12 @@ void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps)
/* We support free control of CR3 access interception. */
msrs->procbased_ctls_low &=
~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING);
+}
- /*
- * secondary cpu-based controls. Do not include those that
- * depend on CPUID bits, they are added later by
- * vmx_vcpu_after_set_cpuid.
- */
+static void nested_vmx_setup_secondary_ctls(u32 ept_caps,
+ struct vmcs_config *vmcs_conf,
+ struct nested_vmx_msrs *msrs)
+{
msrs->secondary_ctls_low = 0;
msrs->secondary_ctls_high = vmcs_conf->cpu_based_2nd_exec_ctrl;
@@ -6950,8 +6938,11 @@ void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps)
if (enable_sgx)
msrs->secondary_ctls_high |= SECONDARY_EXEC_ENCLS_EXITING;
+}
- /* miscellaneous data */
+static void nested_vmx_setup_misc_data(struct vmcs_config *vmcs_conf,
+ struct nested_vmx_msrs *msrs)
+{
msrs->misc_low = (u32)vmcs_conf->misc & VMX_MISC_SAVE_EFER_LMA;
msrs->misc_low |=
MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS |
@@ -6959,7 +6950,10 @@ void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps)
VMX_MISC_ACTIVITY_HLT |
VMX_MISC_ACTIVITY_WAIT_SIPI;
msrs->misc_high = 0;
+}
+static void nested_vmx_setup_basic(struct nested_vmx_msrs *msrs)
+{
/*
* This MSR reports some information about VMX support. We
* should return information about the VMX we emulate for the
@@ -6974,7 +6968,10 @@ void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps)
if (cpu_has_vmx_basic_inout())
msrs->basic |= VMX_BASIC_INOUT;
+}
+static void nested_vmx_setup_cr_fixed(struct nested_vmx_msrs *msrs)
+{
/*
* These MSRs specify bits which the guest must keep fixed on
* while L1 is in VMXON mode (in L1's root mode, or running an L2).
@@ -6991,6 +6988,51 @@ void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps)
if (vmx_umip_emulated())
msrs->cr4_fixed1 |= X86_CR4_UMIP;
+}
+
+/*
+ * nested_vmx_setup_ctls_msrs() sets up variables containing the values to be
+ * returned for the various VMX controls MSRs when nested VMX is enabled.
+ * The same values should also be used to verify that vmcs12 control fields are
+ * valid during nested entry from L1 to L2.
+ * Each of these control msrs has a low and high 32-bit half: A low bit is on
+ * if the corresponding bit in the (32-bit) control field *must* be on, and a
+ * bit in the high half is on if the corresponding bit in the control field
+ * may be on. See also vmx_control_verify().
+ */
+void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps)
+{
+ struct nested_vmx_msrs *msrs = &vmcs_conf->nested;
+
+ /*
+ * Note that as a general rule, the high half of the MSRs (bits in
+ * the control fields which may be 1) should be initialized by the
+ * intersection of the underlying hardware's MSR (i.e., features which
+ * can be supported) and the list of features we want to expose -
+ * because they are known to be properly supported in our code.
+ * Also, usually, the low half of the MSRs (bits which must be 1) can
+ * be set to 0, meaning that L1 may turn off any of these bits. The
+ * reason is that if one of these bits is necessary, it will appear
+ * in vmcs01 and prepare_vmcs02, when it bitwise-or's the control
+ * fields of vmcs01 and vmcs02, will turn these bits off - and
+ * nested_vmx_l1_wants_exit() will not pass related exits to L1.
+ * These rules have exceptions below.
+ */
+ nested_vmx_setup_pinbased_ctls(vmcs_conf, msrs);
+
+ nested_vmx_setup_exit_ctls(vmcs_conf, msrs);
+
+ nested_vmx_setup_entry_ctls(vmcs_conf, msrs);
+
+ nested_vmx_setup_cpubased_ctls(vmcs_conf, msrs);
+
+ nested_vmx_setup_secondary_ctls(ept_caps, vmcs_conf, msrs);
+
+ nested_vmx_setup_misc_data(vmcs_conf, msrs);
+
+ nested_vmx_setup_basic(msrs);
+
+ nested_vmx_setup_cr_fixed(msrs);
msrs->vmcs_enum = nested_vmx_calc_vmcs_enum_msr();
}
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index e8a3be0b9df9..741efe2c497b 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -57,7 +57,7 @@ static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
pmc = get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + i);
__set_bit(INTEL_PMC_IDX_FIXED + i, pmu->pmc_in_use);
- kvm_pmu_request_counter_reprogam(pmc);
+ kvm_pmu_request_counter_reprogram(pmc);
}
}
@@ -76,13 +76,13 @@ static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
static void reprogram_counters(struct kvm_pmu *pmu, u64 diff)
{
int bit;
- struct kvm_pmc *pmc;
- for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX) {
- pmc = intel_pmc_idx_to_pmc(pmu, bit);
- if (pmc)
- kvm_pmu_request_counter_reprogam(pmc);
- }
+ if (!diff)
+ return;
+
+ for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX)
+ set_bit(bit, pmu->reprogram_pmi);
+ kvm_make_request(KVM_REQ_PMU, pmu_to_vcpu(pmu));
}
static bool intel_hw_event_available(struct kvm_pmc *pmc)
@@ -351,45 +351,47 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
switch (msr) {
case MSR_CORE_PERF_FIXED_CTR_CTRL:
msr_info->data = pmu->fixed_ctr_ctrl;
- return 0;
+ break;
case MSR_CORE_PERF_GLOBAL_STATUS:
msr_info->data = pmu->global_status;
- return 0;
+ break;
case MSR_CORE_PERF_GLOBAL_CTRL:
msr_info->data = pmu->global_ctrl;
- return 0;
+ break;
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
msr_info->data = 0;
- return 0;
+ break;
case MSR_IA32_PEBS_ENABLE:
msr_info->data = pmu->pebs_enable;
- return 0;
+ break;
case MSR_IA32_DS_AREA:
msr_info->data = pmu->ds_area;
- return 0;
+ break;
case MSR_PEBS_DATA_CFG:
msr_info->data = pmu->pebs_data_cfg;
- return 0;
+ break;
default:
if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
(pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
u64 val = pmc_read_counter(pmc);
msr_info->data =
val & pmu->counter_bitmask[KVM_PMC_GP];
- return 0;
+ break;
} else if ((pmc = get_fixed_pmc(pmu, msr))) {
u64 val = pmc_read_counter(pmc);
msr_info->data =
val & pmu->counter_bitmask[KVM_PMC_FIXED];
- return 0;
+ break;
} else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
msr_info->data = pmc->eventsel;
- return 0;
- } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, true))
- return 0;
+ break;
+ } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, true)) {
+ break;
+ }
+ return 1;
}
- return 1;
+ return 0;
}
static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
@@ -402,44 +404,43 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
switch (msr) {
case MSR_CORE_PERF_FIXED_CTR_CTRL:
- if (pmu->fixed_ctr_ctrl == data)
- return 0;
- if (!(data & pmu->fixed_ctr_ctrl_mask)) {
+ if (data & pmu->fixed_ctr_ctrl_mask)
+ return 1;
+
+ if (pmu->fixed_ctr_ctrl != data)
reprogram_fixed_counters(pmu, data);
- return 0;
- }
break;
case MSR_CORE_PERF_GLOBAL_STATUS:
- if (msr_info->host_initiated) {
- pmu->global_status = data;
- return 0;
- }
- break; /* RO MSR */
+ if (!msr_info->host_initiated)
+ return 1; /* RO MSR */
+
+ pmu->global_status = data;
+ break;
case MSR_CORE_PERF_GLOBAL_CTRL:
- if (pmu->global_ctrl == data)
- return 0;
- if (kvm_valid_perf_global_ctrl(pmu, data)) {
+ if (!kvm_valid_perf_global_ctrl(pmu, data))
+ return 1;
+
+ if (pmu->global_ctrl != data) {
diff = pmu->global_ctrl ^ data;
pmu->global_ctrl = data;
reprogram_counters(pmu, diff);
- return 0;
}
break;
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
- if (!(data & pmu->global_ovf_ctrl_mask)) {
- if (!msr_info->host_initiated)
- pmu->global_status &= ~data;
- return 0;
- }
+ if (data & pmu->global_ovf_ctrl_mask)
+ return 1;
+
+ if (!msr_info->host_initiated)
+ pmu->global_status &= ~data;
break;
case MSR_IA32_PEBS_ENABLE:
- if (pmu->pebs_enable == data)
- return 0;
- if (!(data & pmu->pebs_enable_mask)) {
+ if (data & pmu->pebs_enable_mask)
+ return 1;
+
+ if (pmu->pebs_enable != data) {
diff = pmu->pebs_enable ^ data;
pmu->pebs_enable = data;
reprogram_counters(pmu, diff);
- return 0;
}
break;
case MSR_IA32_DS_AREA:
@@ -447,15 +448,14 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
if (is_noncanonical_address(data, vcpu))
return 1;
+
pmu->ds_area = data;
- return 0;
+ break;
case MSR_PEBS_DATA_CFG:
- if (pmu->pebs_data_cfg == data)
- return 0;
- if (!(data & pmu->pebs_data_cfg_mask)) {
- pmu->pebs_data_cfg = data;
- return 0;
- }
+ if (data & pmu->pebs_data_cfg_mask)
+ return 1;
+
+ pmu->pebs_data_cfg = data;
break;
default:
if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
@@ -463,33 +463,38 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if ((msr & MSR_PMC_FULL_WIDTH_BIT) &&
(data & ~pmu->counter_bitmask[KVM_PMC_GP]))
return 1;
+
if (!msr_info->host_initiated &&
!(msr & MSR_PMC_FULL_WIDTH_BIT))
data = (s64)(s32)data;
pmc->counter += data - pmc_read_counter(pmc);
pmc_update_sample_period(pmc);
- return 0;
+ break;
} else if ((pmc = get_fixed_pmc(pmu, msr))) {
pmc->counter += data - pmc_read_counter(pmc);
pmc_update_sample_period(pmc);
- return 0;
+ break;
} else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
- if (data == pmc->eventsel)
- return 0;
reserved_bits = pmu->reserved_bits;
if ((pmc->idx == 2) &&
(pmu->raw_event_mask & HSW_IN_TX_CHECKPOINTED))
reserved_bits ^= HSW_IN_TX_CHECKPOINTED;
- if (!(data & reserved_bits)) {
+ if (data & reserved_bits)
+ return 1;
+
+ if (data != pmc->eventsel) {
pmc->eventsel = data;
- kvm_pmu_request_counter_reprogam(pmc);
- return 0;
+ kvm_pmu_request_counter_reprogram(pmc);
}
- } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, false))
- return 0;
+ break;
+ } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, false)) {
+ break;
+ }
+ /* Not a known PMU MSR. */
+ return 1;
}
- return 1;
+ return 0;
}
static void setup_fixed_pmc_eventsel(struct kvm_pmu *pmu)
@@ -531,6 +536,16 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->pebs_enable_mask = ~0ull;
pmu->pebs_data_cfg_mask = ~0ull;
+ memset(&lbr_desc->records, 0, sizeof(lbr_desc->records));
+
+ /*
+ * Setting passthrough of LBR MSRs is done only in the VM-Entry loop,
+ * and PMU refresh is disallowed after the vCPU has run, i.e. this code
+ * should never be reached while KVM is passing through MSRs.
+ */
+ if (KVM_BUG_ON(lbr_desc->msr_passthrough, vcpu->kvm))
+ return;
+
entry = kvm_find_cpuid_entry(vcpu, 0xa);
if (!entry || !vcpu->kvm->arch.enable_pmu)
return;
diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c
index aa53c98034bf..0574030b071f 100644
--- a/arch/x86/kvm/vmx/sgx.c
+++ b/arch/x86/kvm/vmx/sgx.c
@@ -29,14 +29,14 @@ static int sgx_get_encls_gva(struct kvm_vcpu *vcpu, unsigned long offset,
/* Skip vmcs.GUEST_DS retrieval for 64-bit mode to avoid VMREADs. */
*gva = offset;
- if (!is_long_mode(vcpu)) {
+ if (!is_64_bit_mode(vcpu)) {
vmx_get_segment(vcpu, &s, VCPU_SREG_DS);
*gva += s.base;
}
if (!IS_ALIGNED(*gva, alignment)) {
fault = true;
- } else if (likely(is_long_mode(vcpu))) {
+ } else if (likely(is_64_bit_mode(vcpu))) {
fault = is_noncanonical_address(*gva, vcpu);
} else {
*gva &= 0xffffffff;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index d2d6e1b6c788..44fb619803b8 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -164,6 +164,7 @@ module_param(allow_smaller_maxphyaddr, bool, S_IRUGO);
static u32 vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = {
MSR_IA32_SPEC_CTRL,
MSR_IA32_PRED_CMD,
+ MSR_IA32_FLUSH_CMD,
MSR_IA32_TSC,
#ifdef CONFIG_X86_64
MSR_FS_BASE,
@@ -579,7 +580,7 @@ static __init void hv_init_evmcs(void)
if (enlightened_vmcs) {
pr_info("Using Hyper-V Enlightened VMCS\n");
- static_branch_enable(&enable_evmcs);
+ static_branch_enable(&__kvm_is_using_evmcs);
}
if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH)
@@ -595,7 +596,7 @@ static void hv_reset_evmcs(void)
{
struct hv_vp_assist_page *vp_ap;
- if (!static_branch_unlikely(&enable_evmcs))
+ if (!kvm_is_using_evmcs())
return;
/*
@@ -1945,7 +1946,7 @@ static inline bool is_vmx_feature_control_msr_valid(struct vcpu_vmx *vmx,
static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
{
switch (msr->index) {
- case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
+ case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
if (!nested)
return 1;
return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data);
@@ -2030,7 +2031,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = to_vmx(vcpu)->msr_ia32_sgxlepubkeyhash
[msr_info->index - MSR_IA32_SGXLEPUBKEYHASH0];
break;
- case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
+ case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
if (!nested_vmx_allowed(vcpu))
return 1;
if (vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
@@ -2285,33 +2286,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (data & ~(TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR))
return 1;
goto find_uret_msr;
- case MSR_IA32_PRED_CMD:
- if (!msr_info->host_initiated &&
- !guest_has_pred_cmd_msr(vcpu))
- return 1;
-
- if (data & ~PRED_CMD_IBPB)
- return 1;
- if (!boot_cpu_has(X86_FEATURE_IBPB))
- return 1;
- if (!data)
- break;
-
- wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
-
- /*
- * For non-nested:
- * When it's written (to non-zero) for the first time, pass
- * it through.
- *
- * For nested:
- * The handling of the MSR bitmap for L2 guests is done in
- * nested_vmx_prepare_msr_bitmap. We should not touch the
- * vmcs02.msr_bitmap here since it gets completely overwritten
- * in the merging.
- */
- vmx_disable_intercept_for_msr(vcpu, MSR_IA32_PRED_CMD, MSR_TYPE_W);
- break;
case MSR_IA32_CR_PAT:
if (!kvm_pat_valid(data))
return 1;
@@ -2366,7 +2340,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vmx->msr_ia32_sgxlepubkeyhash
[msr_index - MSR_IA32_SGXLEPUBKEYHASH0] = data;
break;
- case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
+ case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
if (!msr_info->host_initiated)
return 1; /* they are read-only */
if (!nested_vmx_allowed(vcpu))
@@ -2816,8 +2790,7 @@ static int vmx_hardware_enable(void)
* This can happen if we hot-added a CPU but failed to allocate
* VP assist page for it.
*/
- if (static_branch_unlikely(&enable_evmcs) &&
- !hv_get_vp_assist_page(cpu))
+ if (kvm_is_using_evmcs() && !hv_get_vp_assist_page(cpu))
return -EFAULT;
intel_pt_handle_vmx(1);
@@ -2869,7 +2842,7 @@ struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags)
memset(vmcs, 0, vmcs_config.size);
/* KVM supports Enlightened VMCS v1 only */
- if (static_branch_unlikely(&enable_evmcs))
+ if (kvm_is_using_evmcs())
vmcs->hdr.revision_id = KVM_EVMCS_VERSION;
else
vmcs->hdr.revision_id = vmcs_config.revision_id;
@@ -2964,7 +2937,7 @@ static __init int alloc_kvm_area(void)
* still be marked with revision_id reported by
* physical CPU.
*/
- if (static_branch_unlikely(&enable_evmcs))
+ if (kvm_is_using_evmcs())
vmcs->hdr.revision_id = vmcs_config.revision_id;
per_cpu(vmxarea, cpu) = vmcs;
@@ -3931,7 +3904,7 @@ static void vmx_msr_bitmap_l01_changed(struct vcpu_vmx *vmx)
* 'Enlightened MSR Bitmap' feature L0 needs to know that MSR
* bitmap has changed.
*/
- if (IS_ENABLED(CONFIG_HYPERV) && static_branch_unlikely(&enable_evmcs)) {
+ if (kvm_is_using_evmcs()) {
struct hv_enlightened_vmcs *evmcs = (void *)vmx->vmcs01.vmcs;
if (evmcs->hv_enlightenments_control.msr_bitmap)
@@ -4773,7 +4746,7 @@ static void init_vmcs(struct vcpu_vmx *vmx)
/* 22.2.1, 20.8.1 */
vm_entry_controls_set(vmx, vmx_vmentry_ctrl());
- vmx->vcpu.arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
+ vmx->vcpu.arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits();
vmcs_writel(CR0_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr0_guest_owned_bits);
set_cr4_guest_host_mask(vmx);
@@ -5163,7 +5136,7 @@ bool vmx_guest_inject_ac(struct kvm_vcpu *vcpu)
if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
return true;
- return vmx_get_cpl(vcpu) == 3 && kvm_read_cr0_bits(vcpu, X86_CR0_AM) &&
+ return vmx_get_cpl(vcpu) == 3 && kvm_is_cr0_bit_set(vcpu, X86_CR0_AM) &&
(kvm_get_rflags(vcpu) & X86_EFLAGS_AC);
}
@@ -5500,7 +5473,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
break;
case 3: /* lmsw */
val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
- trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val);
+ trace_kvm_cr_write(0, (kvm_read_cr0_bits(vcpu, ~0xful) | val));
kvm_lmsw(vcpu, val);
return kvm_skip_emulated_instruction(vcpu);
@@ -6957,7 +6930,7 @@ static bool vmx_has_emulated_msr(struct kvm *kvm, u32 index)
* real mode.
*/
return enable_unrestricted_guest || emulate_invalid_guest_state;
- case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
+ case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
return nested;
case MSR_AMD64_VIRT_SPEC_CTRL:
case MSR_AMD64_TSC_RATIO:
@@ -7310,7 +7283,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx_vcpu_enter_exit(vcpu, __vmx_vcpu_run_flags(vmx));
/* All fields are clean at this point */
- if (static_branch_unlikely(&enable_evmcs)) {
+ if (kvm_is_using_evmcs()) {
current_evmcs->hv_clean_fields |=
HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
@@ -7440,7 +7413,7 @@ static int vmx_vcpu_create(struct kvm_vcpu *vcpu)
* feature only for vmcs01, KVM currently isn't equipped to realize any
* performance benefits from enabling it for vmcs02.
*/
- if (IS_ENABLED(CONFIG_HYPERV) && static_branch_unlikely(&enable_evmcs) &&
+ if (kvm_is_using_evmcs() &&
(ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) {
struct hv_enlightened_vmcs *evmcs = (void *)vmx->vmcs01.vmcs;
@@ -7558,7 +7531,7 @@ static u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
if (!kvm_arch_has_noncoherent_dma(vcpu->kvm))
return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT;
- if (kvm_read_cr0(vcpu) & X86_CR0_CD) {
+ if (kvm_read_cr0_bits(vcpu, X86_CR0_CD)) {
if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
cache = MTRR_TYPE_WRBACK;
else
@@ -7744,6 +7717,13 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
vmx_set_intercept_for_msr(vcpu, MSR_IA32_XFD_ERR, MSR_TYPE_R,
!guest_cpuid_has(vcpu, X86_FEATURE_XFD));
+ if (boot_cpu_has(X86_FEATURE_IBPB))
+ vmx_set_intercept_for_msr(vcpu, MSR_IA32_PRED_CMD, MSR_TYPE_W,
+ !guest_has_pred_cmd_msr(vcpu));
+
+ if (boot_cpu_has(X86_FEATURE_FLUSH_L1D))
+ vmx_set_intercept_for_msr(vcpu, MSR_IA32_FLUSH_CMD, MSR_TYPE_W,
+ !guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D));
set_cr4_guest_host_mask(vmx);
@@ -7776,9 +7756,11 @@ static u64 vmx_get_perf_capabilities(void)
if (boot_cpu_has(X86_FEATURE_PDCM))
rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap);
- x86_perf_get_lbr(&lbr);
- if (lbr.nr)
- perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT;
+ if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR)) {
+ x86_perf_get_lbr(&lbr);
+ if (lbr.nr)
+ perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT;
+ }
if (vmx_pebs_supported()) {
perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK;
@@ -7918,6 +7900,21 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
/* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED. */
break;
+ case x86_intercept_pause:
+ /*
+ * PAUSE is a single-byte NOP with a REPE prefix, i.e. collides
+ * with vanilla NOPs in the emulator. Apply the interception
+ * check only to actual PAUSE instructions. Don't check
+ * PAUSE-loop-exiting, software can't expect a given PAUSE to
+ * exit, i.e. KVM is within its rights to allow L2 to execute
+ * the PAUSE.
+ */
+ if ((info->rep_prefix != REPE_PREFIX) ||
+ !nested_cpu_has2(vmcs12, CPU_BASED_PAUSE_EXITING))
+ return X86EMUL_CONTINUE;
+
+ break;
+
/* TODO: check more intercepts... */
default:
break;
@@ -8415,9 +8412,8 @@ static __init int hardware_setup(void)
#if IS_ENABLED(CONFIG_HYPERV)
if (ms_hyperv.nested_features & HV_X64_NESTED_GUEST_MAPPING_FLUSH
&& enable_ept) {
- vmx_x86_ops.tlb_remote_flush = hv_remote_flush_tlb;
- vmx_x86_ops.tlb_remote_flush_with_range =
- hv_remote_flush_tlb_with_range;
+ vmx_x86_ops.flush_remote_tlbs = hv_flush_remote_tlbs;
+ vmx_x86_ops.flush_remote_tlbs_range = hv_flush_remote_tlbs_range;
}
#endif
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 2acdc54bc34b..9e66531861cf 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -369,7 +369,7 @@ struct vcpu_vmx {
struct lbr_desc lbr_desc;
/* Save desired MSR intercept (read: pass-through) state */
-#define MAX_POSSIBLE_PASSTHROUGH_MSRS 15
+#define MAX_POSSIBLE_PASSTHROUGH_MSRS 16
struct {
DECLARE_BITMAP(read, MAX_POSSIBLE_PASSTHROUGH_MSRS);
DECLARE_BITMAP(write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
@@ -640,6 +640,24 @@ BUILD_CONTROLS_SHADOW(tertiary_exec, TERTIARY_VM_EXEC_CONTROL, 64)
(1 << VCPU_EXREG_EXIT_INFO_1) | \
(1 << VCPU_EXREG_EXIT_INFO_2))
+static inline unsigned long vmx_l1_guest_owned_cr0_bits(void)
+{
+ unsigned long bits = KVM_POSSIBLE_CR0_GUEST_BITS;
+
+ /*
+ * CR0.WP needs to be intercepted when KVM is shadowing legacy paging
+ * in order to construct shadow PTEs with the correct protections.
+ * Note! CR0.WP technically can be passed through to the guest if
+ * paging is disabled, but checking CR0.PG would generate a cyclical
+ * dependency of sorts due to forcing the caller to ensure CR0 holds
+ * the correct value prior to determining which CR0 bits can be owned
+ * by L1. Keep it simple and limit the optimization to EPT.
+ */
+ if (!enable_ept)
+ bits &= ~X86_CR0_WP;
+ return bits;
+}
+
static __always_inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
{
return container_of(kvm, struct kvm_vmx, kvm);
diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h
index db95bde52998..ce47dc265f89 100644
--- a/arch/x86/kvm/vmx/vmx_ops.h
+++ b/arch/x86/kvm/vmx/vmx_ops.h
@@ -147,7 +147,7 @@ do_exception:
static __always_inline u16 vmcs_read16(unsigned long field)
{
vmcs_check16(field);
- if (static_branch_unlikely(&enable_evmcs))
+ if (kvm_is_using_evmcs())
return evmcs_read16(field);
return __vmcs_readl(field);
}
@@ -155,7 +155,7 @@ static __always_inline u16 vmcs_read16(unsigned long field)
static __always_inline u32 vmcs_read32(unsigned long field)
{
vmcs_check32(field);
- if (static_branch_unlikely(&enable_evmcs))
+ if (kvm_is_using_evmcs())
return evmcs_read32(field);
return __vmcs_readl(field);
}
@@ -163,7 +163,7 @@ static __always_inline u32 vmcs_read32(unsigned long field)
static __always_inline u64 vmcs_read64(unsigned long field)
{
vmcs_check64(field);
- if (static_branch_unlikely(&enable_evmcs))
+ if (kvm_is_using_evmcs())
return evmcs_read64(field);
#ifdef CONFIG_X86_64
return __vmcs_readl(field);
@@ -175,7 +175,7 @@ static __always_inline u64 vmcs_read64(unsigned long field)
static __always_inline unsigned long vmcs_readl(unsigned long field)
{
vmcs_checkl(field);
- if (static_branch_unlikely(&enable_evmcs))
+ if (kvm_is_using_evmcs())
return evmcs_read64(field);
return __vmcs_readl(field);
}
@@ -222,7 +222,7 @@ static __always_inline void __vmcs_writel(unsigned long field, unsigned long val
static __always_inline void vmcs_write16(unsigned long field, u16 value)
{
vmcs_check16(field);
- if (static_branch_unlikely(&enable_evmcs))
+ if (kvm_is_using_evmcs())
return evmcs_write16(field, value);
__vmcs_writel(field, value);
@@ -231,7 +231,7 @@ static __always_inline void vmcs_write16(unsigned long field, u16 value)
static __always_inline void vmcs_write32(unsigned long field, u32 value)
{
vmcs_check32(field);
- if (static_branch_unlikely(&enable_evmcs))
+ if (kvm_is_using_evmcs())
return evmcs_write32(field, value);
__vmcs_writel(field, value);
@@ -240,7 +240,7 @@ static __always_inline void vmcs_write32(unsigned long field, u32 value)
static __always_inline void vmcs_write64(unsigned long field, u64 value)
{
vmcs_check64(field);
- if (static_branch_unlikely(&enable_evmcs))
+ if (kvm_is_using_evmcs())
return evmcs_write64(field, value);
__vmcs_writel(field, value);
@@ -252,7 +252,7 @@ static __always_inline void vmcs_write64(unsigned long field, u64 value)
static __always_inline void vmcs_writel(unsigned long field, unsigned long value)
{
vmcs_checkl(field);
- if (static_branch_unlikely(&enable_evmcs))
+ if (kvm_is_using_evmcs())
return evmcs_write64(field, value);
__vmcs_writel(field, value);
@@ -262,7 +262,7 @@ static __always_inline void vmcs_clear_bits(unsigned long field, u32 mask)
{
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000,
"vmcs_clear_bits does not support 64-bit fields");
- if (static_branch_unlikely(&enable_evmcs))
+ if (kvm_is_using_evmcs())
return evmcs_write32(field, evmcs_read32(field) & ~mask);
__vmcs_writel(field, __vmcs_readl(field) & ~mask);
@@ -272,7 +272,7 @@ static __always_inline void vmcs_set_bits(unsigned long field, u32 mask)
{
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000,
"vmcs_set_bits does not support 64-bit fields");
- if (static_branch_unlikely(&enable_evmcs))
+ if (kvm_is_using_evmcs())
return evmcs_write32(field, evmcs_read32(field) | mask);
__vmcs_writel(field, __vmcs_readl(field) | mask);
@@ -289,7 +289,7 @@ static inline void vmcs_load(struct vmcs *vmcs)
{
u64 phys_addr = __pa(vmcs);
- if (static_branch_unlikely(&enable_evmcs))
+ if (kvm_is_using_evmcs())
return evmcs_load(phys_addr);
vmx_asm1(vmptrld, "m"(phys_addr), vmcs, phys_addr);