summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-03-16 21:32:12 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2023-03-16 21:32:12 +0300
commit0ddc84d2dd43e2c2c3f634baa05ea10abd31197e (patch)
tree8f49aea42e45692574290f7b2e12022e32409521 /arch/x86
parent9c1bec9c0b08abeac72ed6214b723adc224013bf (diff)
parentf3e707413dbe3920a972d0c2b51175180e7de36b (diff)
downloadlinux-0ddc84d2dd43e2c2c3f634baa05ea10abd31197e.tar.xz
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: "ARM64: - Address a rather annoying bug w.r.t. guest timer offsetting. The synchronization of timer offsets between vCPUs was broken, leading to inconsistent timer reads within the VM. x86: - New tests for the slow path of the EVTCHNOP_send Xen hypercall - Add missing nVMX consistency checks for CR0 and CR4 - Fix bug that broke AMD GATag on 512 vCPU machines Selftests: - Skip hugetlb tests if huge pages are not available - Sync KVM exit reasons" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: selftests: Sync KVM exit reasons in selftests KVM: selftests: Add macro to generate KVM exit reason strings KVM: selftests: Print expected and actual exit reason in KVM exit reason assert KVM: selftests: Make vCPU exit reason test assertion common KVM: selftests: Add EVTCHNOP_send slow path test to xen_shinfo_test KVM: selftests: Use enum for test numbers in xen_shinfo_test KVM: selftests: Add helpers to make Xen-style VMCALL/VMMCALL hypercalls KVM: selftests: Move the guts of kvm_hypercall() to a separate macro KVM: SVM: WARN if GATag generation drops VM or vCPU ID information KVM: SVM: Modify AVIC GATag to support max number of 512 vCPUs KVM: SVM: Fix a benign off-by-one bug in AVIC physical table mask selftests: KVM: skip hugetlb tests if huge pages are not available KVM: VMX: Use tabs instead of spaces for indentation KVM: VMX: Fix indentation coding style issue KVM: nVMX: remove unnecessary #ifdef KVM: nVMX: add missing consistency checks for CR0 and CR4 KVM: arm64: timers: Convert per-vcpu virtual offset to a global value
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/svm.h12
-rw-r--r--arch/x86/kvm/svm/avic.c37
-rw-r--r--arch/x86/kvm/vmx/nested.c18
-rw-r--r--arch/x86/kvm/vmx/vmenter.S4
-rw-r--r--arch/x86/kvm/vmx/vmx.c12
5 files changed, 52 insertions, 31 deletions
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index cb1ee53ad3b1..770dcf75eaa9 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -261,20 +261,22 @@ enum avic_ipi_failure_cause {
AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
};
-#define AVIC_PHYSICAL_MAX_INDEX_MASK GENMASK_ULL(9, 0)
+#define AVIC_PHYSICAL_MAX_INDEX_MASK GENMASK_ULL(8, 0)
/*
- * For AVIC, the max index allowed for physical APIC ID
- * table is 0xff (255).
+ * For AVIC, the max index allowed for physical APIC ID table is 0xfe (254), as
+ * 0xff is a broadcast to all CPUs, i.e. can't be targeted individually.
*/
#define AVIC_MAX_PHYSICAL_ID 0XFEULL
/*
- * For x2AVIC, the max index allowed for physical APIC ID
- * table is 0x1ff (511).
+ * For x2AVIC, the max index allowed for physical APIC ID table is 0x1ff (511).
*/
#define X2AVIC_MAX_PHYSICAL_ID 0x1FFUL
+static_assert((AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == AVIC_MAX_PHYSICAL_ID);
+static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_MAX_PHYSICAL_ID);
+
#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index ca684979e90d..cfc8ab773025 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -27,19 +27,38 @@
#include "irq.h"
#include "svm.h"
-/* AVIC GATAG is encoded using VM and VCPU IDs */
-#define AVIC_VCPU_ID_BITS 8
-#define AVIC_VCPU_ID_MASK ((1 << AVIC_VCPU_ID_BITS) - 1)
+/*
+ * Encode the arbitrary VM ID and the vCPU's default APIC ID, i.e the vCPU ID,
+ * into the GATag so that KVM can retrieve the correct vCPU from a GALog entry
+ * if an interrupt can't be delivered, e.g. because the vCPU isn't running.
+ *
+ * For the vCPU ID, use however many bits are currently allowed for the max
+ * guest physical APIC ID (limited by the size of the physical ID table), and
+ * use whatever bits remain to assign arbitrary AVIC IDs to VMs. Note, the
+ * size of the GATag is defined by hardware (32 bits), but is an opaque value
+ * as far as hardware is concerned.
+ */
+#define AVIC_VCPU_ID_MASK AVIC_PHYSICAL_MAX_INDEX_MASK
-#define AVIC_VM_ID_BITS 24
-#define AVIC_VM_ID_NR (1 << AVIC_VM_ID_BITS)
-#define AVIC_VM_ID_MASK ((1 << AVIC_VM_ID_BITS) - 1)
+#define AVIC_VM_ID_SHIFT HWEIGHT32(AVIC_PHYSICAL_MAX_INDEX_MASK)
+#define AVIC_VM_ID_MASK (GENMASK(31, AVIC_VM_ID_SHIFT) >> AVIC_VM_ID_SHIFT)
-#define AVIC_GATAG(x, y) (((x & AVIC_VM_ID_MASK) << AVIC_VCPU_ID_BITS) | \
- (y & AVIC_VCPU_ID_MASK))
-#define AVIC_GATAG_TO_VMID(x) ((x >> AVIC_VCPU_ID_BITS) & AVIC_VM_ID_MASK)
+#define AVIC_GATAG_TO_VMID(x) ((x >> AVIC_VM_ID_SHIFT) & AVIC_VM_ID_MASK)
#define AVIC_GATAG_TO_VCPUID(x) (x & AVIC_VCPU_ID_MASK)
+#define __AVIC_GATAG(vm_id, vcpu_id) ((((vm_id) & AVIC_VM_ID_MASK) << AVIC_VM_ID_SHIFT) | \
+ ((vcpu_id) & AVIC_VCPU_ID_MASK))
+#define AVIC_GATAG(vm_id, vcpu_id) \
+({ \
+ u32 ga_tag = __AVIC_GATAG(vm_id, vcpu_id); \
+ \
+ WARN_ON_ONCE(AVIC_GATAG_TO_VCPUID(ga_tag) != (vcpu_id)); \
+ WARN_ON_ONCE(AVIC_GATAG_TO_VMID(ga_tag) != (vm_id)); \
+ ga_tag; \
+})
+
+static_assert(__AVIC_GATAG(AVIC_VM_ID_MASK, AVIC_VCPU_ID_MASK) == -1u);
+
static bool force_avic;
module_param_unsafe(force_avic, bool, 0444);
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 7c4f5ca405c7..1bc2b80273c9 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2903,7 +2903,7 @@ static int nested_vmx_check_address_space_size(struct kvm_vcpu *vcpu,
static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
- bool ia32e;
+ bool ia32e = !!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE);
if (CC(!nested_host_cr0_valid(vcpu, vmcs12->host_cr0)) ||
CC(!nested_host_cr4_valid(vcpu, vmcs12->host_cr4)) ||
@@ -2923,12 +2923,6 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
vmcs12->host_ia32_perf_global_ctrl)))
return -EINVAL;
-#ifdef CONFIG_X86_64
- ia32e = !!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE);
-#else
- ia32e = false;
-#endif
-
if (ia32e) {
if (CC(!(vmcs12->host_cr4 & X86_CR4_PAE)))
return -EINVAL;
@@ -3022,7 +3016,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12,
enum vm_entry_failure_code *entry_failure_code)
{
- bool ia32e;
+ bool ia32e = !!(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE);
*entry_failure_code = ENTRY_FAIL_DEFAULT;
@@ -3048,6 +3042,13 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
vmcs12->guest_ia32_perf_global_ctrl)))
return -EINVAL;
+ if (CC((vmcs12->guest_cr0 & (X86_CR0_PG | X86_CR0_PE)) == X86_CR0_PG))
+ return -EINVAL;
+
+ if (CC(ia32e && !(vmcs12->guest_cr4 & X86_CR4_PAE)) ||
+ CC(ia32e && !(vmcs12->guest_cr0 & X86_CR0_PG)))
+ return -EINVAL;
+
/*
* If the load IA32_EFER VM-entry control is 1, the following checks
* are performed on the field for the IA32_EFER MSR:
@@ -3059,7 +3060,6 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
*/
if (to_vmx(vcpu)->nested.nested_run_pending &&
(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) {
- ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0;
if (CC(!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer)) ||
CC(ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA)) ||
CC(((vmcs12->guest_cr0 & X86_CR0_PG) &&
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index f550540ed54e..631fd7da2bc3 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -262,7 +262,7 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
* eIBRS has its own protection against poisoned RSB, so it doesn't
* need the RSB filling sequence. But it does need to be enabled, and a
* single call to retire, before the first unbalanced RET.
- */
+ */
FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT,\
X86_FEATURE_RSB_VMEXIT_LITE
@@ -311,7 +311,7 @@ SYM_FUNC_END(vmx_do_nmi_irqoff)
* vmread_error_trampoline - Trampoline from inline asm to vmread_error()
* @field: VMCS field encoding that failed
* @fault: %true if the VMREAD faulted, %false if it failed
-
+ *
* Save and restore volatile registers across a call to vmread_error(). Note,
* all parameters are passed on the stack.
*/
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index bcac3efcde41..d2d6e1b6c788 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -874,7 +874,7 @@ void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu)
*/
if (is_guest_mode(vcpu))
eb |= get_vmcs12(vcpu)->exception_bitmap;
- else {
+ else {
int mask = 0, match = 0;
if (enable_ept && (eb & (1u << PF_VECTOR))) {
@@ -1282,7 +1282,7 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
}
}
- if (vmx->nested.need_vmcs12_to_shadow_sync)
+ if (vmx->nested.need_vmcs12_to_shadow_sync)
nested_sync_vmcs12_to_shadow(vcpu);
if (vmx->guest_state_loaded)
@@ -5049,10 +5049,10 @@ static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
if (to_vmx(vcpu)->nested.nested_run_pending)
return -EBUSY;
- /*
- * An IRQ must not be injected into L2 if it's supposed to VM-Exit,
- * e.g. if the IRQ arrived asynchronously after checking nested events.
- */
+ /*
+ * An IRQ must not be injected into L2 if it's supposed to VM-Exit,
+ * e.g. if the IRQ arrived asynchronously after checking nested events.
+ */
if (for_injection && is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
return -EBUSY;