From a9466078687fb740298a52a095ee4832738efbea Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 9 Feb 2024 14:16:59 -0800 Subject: KVM: x86: Move nEPT exit_qualification field from kvm_vcpu_arch to x86_exception Move the exit_qualification field that is used to track information about in-flight nEPT violations from "struct kvm_vcpu_arch" to "x86_exception", i.e. associate the information with the actual nEPT violation instead of the vCPU. To handle bits that are pulled from vmcs.EXIT_QUALIFICATION, i.e. that are propagated from the "original" EPT violation VM-Exit, simply grab them from the VMCS on-demand when injecting a nEPT Violation or a PML Full VM-exit. Aside from being ugly, having an exit_qualification field in kvm_vcpu_arch is outright dangerous, e.g. see commit d7f0a00e438d ("KVM: VMX: Report up-to-date exit qualification to userspace"). Opportunstically add a comment to call out that PML Full and EPT Violation VM-Exits use the same bit to report NMI blocking information. Link: https://lore.kernel.org/r/20240209221700.393189-3-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/kvm_host.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 16e07a2eee19..de03d9d90f2c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -993,9 +993,6 @@ struct kvm_vcpu_arch { u64 msr_kvm_poll_control; - /* set at EPT violation at this point */ - unsigned long exit_qualification; - /* pv related host specific info */ struct { bool pv_unhalted; -- cgit v1.2.3 From 546d714b0880f20572d21ece5b9544c0e37a7a49 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 4 Apr 2024 08:13:14 -0400 Subject: KVM: introduce new vendor op for KVM_GET_DEVICE_ATTR Allow vendor modules to provide their own attributes on /dev/kvm. To avoid proliferation of vendor ops, implement KVM_HAS_DEVICE_ATTR and KVM_GET_DEVICE_ATTR in terms of the same function. You're not supposed to use KVM_GET_DEVICE_ATTR to do complicated computations, especially on /dev/kvm. Reviewed-by: Michael Roth Signed-off-by: Paolo Bonzini Reviewed-by: Isaku Yamahata Message-ID: <20240404121327.3107131-5-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm-x86-ops.h | 1 + arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/x86.c | 38 ++++++++++++++++++++++++-------------- 3 files changed, 26 insertions(+), 14 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 110d7f29ca9a..5187fcf4b610 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -121,6 +121,7 @@ KVM_X86_OP(enter_smm) KVM_X86_OP(leave_smm) KVM_X86_OP(enable_smi_window) #endif +KVM_X86_OP_OPTIONAL(dev_get_attr) KVM_X86_OP_OPTIONAL(mem_enc_ioctl) KVM_X86_OP_OPTIONAL(mem_enc_register_region) KVM_X86_OP_OPTIONAL(mem_enc_unregister_region) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 16e07a2eee19..04c430eb25cf 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1778,6 +1778,7 @@ struct kvm_x86_ops { void (*enable_smi_window)(struct kvm_vcpu *vcpu); #endif + int (*dev_get_attr)(u32 group, u64 attr, u64 *val); int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp); int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp); int (*mem_enc_unregister_region)(struct kvm *kvm, struct kvm_enc_region *argp); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3d2029402513..3934e7682734 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4842,34 +4842,44 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) return r; } -static int kvm_x86_dev_get_attr(struct kvm_device_attr *attr) +static int __kvm_x86_dev_get_attr(struct kvm_device_attr *attr, u64 *val) { - u64 __user *uaddr = u64_to_user_ptr(attr->addr); - - if (attr->group) + if (attr->group) { + if (kvm_x86_ops.dev_get_attr) + return static_call(kvm_x86_dev_get_attr)(attr->group, attr->attr, val); return -ENXIO; + } switch (attr->attr) { case KVM_X86_XCOMP_GUEST_SUPP: - if (put_user(kvm_caps.supported_xcr0, uaddr)) - return -EFAULT; + *val = kvm_caps.supported_xcr0; return 0; default: return -ENXIO; } } +static int kvm_x86_dev_get_attr(struct kvm_device_attr *attr) +{ + u64 __user *uaddr = u64_to_user_ptr(attr->addr); + int r; + u64 val; + + r = __kvm_x86_dev_get_attr(attr, &val); + if (r < 0) + return r; + + if (put_user(val, uaddr)) + return -EFAULT; + + return 0; +} + static int kvm_x86_dev_has_attr(struct kvm_device_attr *attr) { - if (attr->group) - return -ENXIO; + u64 val; - switch (attr->attr) { - case KVM_X86_XCOMP_GUEST_SUPP: - return 0; - default: - return -ENXIO; - } + return __kvm_x86_dev_get_attr(attr, &val); } long kvm_arch_dev_ioctl(struct file *filp, -- cgit v1.2.3 From ac5c48027bacb1b5525120db6d013373e0520b50 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 4 Apr 2024 08:13:15 -0400 Subject: KVM: SEV: publish supported VMSA features Compute the set of features to be stored in the VMSA when KVM is initialized; move it from there into kvm_sev_info when SEV is initialized, and then into the initial VMSA. The new variable can then be used to return the set of supported features to userspace, via the KVM_GET_DEVICE_ATTR ioctl. Signed-off-by: Paolo Bonzini Reviewed-by: Isaku Yamahata Message-ID: <20240404121327.3107131-6-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- .../virt/kvm/x86/amd-memory-encryption.rst | 12 +++++++++++ arch/x86/include/uapi/asm/kvm.h | 9 ++++++-- arch/x86/kvm/svm/sev.c | 24 ++++++++++++++++++++-- arch/x86/kvm/svm/svm.c | 1 + arch/x86/kvm/svm/svm.h | 2 ++ 5 files changed, 44 insertions(+), 4 deletions(-) (limited to 'arch/x86/include') diff --git a/Documentation/virt/kvm/x86/amd-memory-encryption.rst b/Documentation/virt/kvm/x86/amd-memory-encryption.rst index 84335d119ff1..2ea648e4c97a 100644 --- a/Documentation/virt/kvm/x86/amd-memory-encryption.rst +++ b/Documentation/virt/kvm/x86/amd-memory-encryption.rst @@ -425,6 +425,18 @@ issued by the hypervisor to make the guest ready for execution. Returns: 0 on success, -negative on error +Device attribute API +==================== + +Attributes of the SEV implementation can be retrieved through the +``KVM_HAS_DEVICE_ATTR`` and ``KVM_GET_DEVICE_ATTR`` ioctls on the ``/dev/kvm`` +device node, using group ``KVM_X86_GRP_SEV``. + +Currently only one attribute is implemented: + +* ``KVM_X86_SEV_VMSA_FEATURES``: return the set of all bits that + are accepted in the ``vmsa_features`` of ``KVM_SEV_INIT2``. + Firmware Management =================== diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index ef11aa4cab42..b7dc515f4c27 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -457,8 +457,13 @@ struct kvm_sync_regs { #define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001 -/* attributes for system fd (group 0) */ -#define KVM_X86_XCOMP_GUEST_SUPP 0 +/* vendor-independent attributes for system fd (group 0) */ +#define KVM_X86_GRP_SYSTEM 0 +# define KVM_X86_XCOMP_GUEST_SUPP 0 + +/* vendor-specific groups and attributes for system fd */ +#define KVM_X86_GRP_SEV 1 +# define KVM_X86_SEV_VMSA_FEATURES 0 struct kvm_vmx_nested_state_data { __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 5d41f27a8af5..5055935dfd1d 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -46,6 +46,7 @@ module_param_named(sev_es, sev_es_enabled, bool, 0444); /* enable/disable SEV-ES DebugSwap support */ static bool sev_es_debug_swap_enabled = false; module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0444); +static u64 sev_supported_vmsa_features; static u8 sev_enc_bit; static DECLARE_RWSEM(sev_deactivate_lock); @@ -603,8 +604,8 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm) save->xss = svm->vcpu.arch.ia32_xss; save->dr6 = svm->vcpu.arch.dr6; - if (sev_es_debug_swap_enabled) { - save->sev_features |= SVM_SEV_FEAT_DEBUG_SWAP; + if (sev_supported_vmsa_features) { + save->sev_features = sev_supported_vmsa_features; pr_warn_once("Enabling DebugSwap with KVM_SEV_ES_INIT. " "This will not work starting with Linux 6.10\n"); } @@ -1843,6 +1844,21 @@ out_fput: return ret; } +int sev_dev_get_attr(u32 group, u64 attr, u64 *val) +{ + if (group != KVM_X86_GRP_SEV) + return -ENXIO; + + switch (attr) { + case KVM_X86_SEV_VMSA_FEATURES: + *val = sev_supported_vmsa_features; + return 0; + + default: + return -ENXIO; + } +} + int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp) { struct kvm_sev_cmd sev_cmd; @@ -2275,6 +2291,10 @@ out: if (!sev_es_enabled || !cpu_feature_enabled(X86_FEATURE_DEBUG_SWAP) || !cpu_feature_enabled(X86_FEATURE_NO_NESTED_DATA_BP)) sev_es_debug_swap_enabled = false; + + sev_supported_vmsa_features = 0; + if (sev_es_debug_swap_enabled) + sev_supported_vmsa_features |= SVM_SEV_FEAT_DEBUG_SWAP; } void sev_hardware_unsetup(void) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index e7f47a1f3eb1..450535d6757f 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -5026,6 +5026,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { #endif #ifdef CONFIG_KVM_AMD_SEV + .dev_get_attr = sev_dev_get_attr, .mem_enc_ioctl = sev_mem_enc_ioctl, .mem_enc_register_region = sev_mem_enc_register_region, .mem_enc_unregister_region = sev_mem_enc_unregister_region, diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index ec8ca7d92cf1..1c6601a9cbbf 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -692,6 +692,7 @@ void __init sev_set_cpu_caps(void); void __init sev_hardware_setup(void); void sev_hardware_unsetup(void); int sev_cpu_init(struct svm_cpu_data *sd); +int sev_dev_get_attr(u32 group, u64 attr, u64 *val); extern unsigned int max_sev_asid; #else static inline struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu) { @@ -704,6 +705,7 @@ static inline void __init sev_set_cpu_caps(void) {} static inline void __init sev_hardware_setup(void) {} static inline void sev_hardware_unsetup(void) {} static inline int sev_cpu_init(struct svm_cpu_data *sd) { return 0; } +static inline int sev_dev_get_attr(u32 group, u64 attr, u64 *val) { return -ENXIO; } #define max_sev_asid 0 #endif -- cgit v1.2.3 From 517987e3fb1909415b9231ef167e2a79208aaa73 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 4 Apr 2024 08:13:17 -0400 Subject: KVM: x86: add fields to struct kvm_arch for CoCo features Some VM types have characteristics in common; in fact, the only use of VM types right now is kvm_arch_has_private_mem and it assumes that _all_ nonzero VM types have private memory. We will soon introduce a VM type for SEV and SEV-ES VMs, and at that point we will have two special characteristics of confidential VMs that depend on the VM type: not just if memory is private, but also whether guest state is protected. For the latter we have kvm->arch.guest_state_protected, which is only set on a fully initialized VM. For VM types with protected guest state, we can actually fix a problem in the SEV-ES implementation, where ioctls to set registers do not cause an error even if the VM has been initialized and the guest state encrypted. Make sure that when using VM types that will become an error. Signed-off-by: Paolo Bonzini Message-Id: <20240209183743.22030-7-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini Reviewed-by: Isaku Yamahata Message-ID: <20240404121327.3107131-8-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 7 +++- arch/x86/kvm/x86.c | 93 ++++++++++++++++++++++++++++++++--------- 2 files changed, 79 insertions(+), 21 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 04c430eb25cf..3d56b5bb10e9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1279,12 +1279,14 @@ enum kvm_apicv_inhibit { }; struct kvm_arch { - unsigned long vm_type; unsigned long n_used_mmu_pages; unsigned long n_requested_mmu_pages; unsigned long n_max_mmu_pages; unsigned int indirect_shadow_pages; u8 mmu_valid_gen; + u8 vm_type; + bool has_private_mem; + bool has_protected_state; struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; struct list_head active_mmu_pages; struct list_head zapped_obsolete_pages; @@ -2153,8 +2155,9 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd); void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, int tdp_max_root_level, int tdp_huge_page_level); + #ifdef CONFIG_KVM_PRIVATE_MEM -#define kvm_arch_has_private_mem(kvm) ((kvm)->arch.vm_type != KVM_X86_DEFAULT_VM) +#define kvm_arch_has_private_mem(kvm) ((kvm)->arch.has_private_mem) #else #define kvm_arch_has_private_mem(kvm) false #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3934e7682734..d4a8d896798f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5555,11 +5555,15 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, return 0; } -static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, - struct kvm_debugregs *dbgregs) +static int kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, + struct kvm_debugregs *dbgregs) { unsigned int i; + if (vcpu->kvm->arch.has_protected_state && + vcpu->arch.guest_state_protected) + return -EINVAL; + memset(dbgregs, 0, sizeof(*dbgregs)); BUILD_BUG_ON(ARRAY_SIZE(vcpu->arch.db) != ARRAY_SIZE(dbgregs->db)); @@ -5568,6 +5572,7 @@ static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, dbgregs->dr6 = vcpu->arch.dr6; dbgregs->dr7 = vcpu->arch.dr7; + return 0; } static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, @@ -5575,6 +5580,10 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, { unsigned int i; + if (vcpu->kvm->arch.has_protected_state && + vcpu->arch.guest_state_protected) + return -EINVAL; + if (dbgregs->flags) return -EINVAL; @@ -5595,8 +5604,8 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, } -static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu, - u8 *state, unsigned int size) +static int kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu, + u8 *state, unsigned int size) { /* * Only copy state for features that are enabled for the guest. The @@ -5614,24 +5623,25 @@ static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu, XFEATURE_MASK_FPSSE; if (fpstate_is_confidential(&vcpu->arch.guest_fpu)) - return; + return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0; fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size, supported_xcr0, vcpu->arch.pkru); + return 0; } -static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, - struct kvm_xsave *guest_xsave) +static int kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, + struct kvm_xsave *guest_xsave) { - kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region, - sizeof(guest_xsave->region)); + return kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region, + sizeof(guest_xsave->region)); } static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, struct kvm_xsave *guest_xsave) { if (fpstate_is_confidential(&vcpu->arch.guest_fpu)) - return 0; + return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0; return fpu_copy_uabi_to_guest_fpstate(&vcpu->arch.guest_fpu, guest_xsave->region, @@ -5639,18 +5649,23 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, &vcpu->arch.pkru); } -static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu, - struct kvm_xcrs *guest_xcrs) +static int kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu, + struct kvm_xcrs *guest_xcrs) { + if (vcpu->kvm->arch.has_protected_state && + vcpu->arch.guest_state_protected) + return -EINVAL; + if (!boot_cpu_has(X86_FEATURE_XSAVE)) { guest_xcrs->nr_xcrs = 0; - return; + return 0; } guest_xcrs->nr_xcrs = 1; guest_xcrs->flags = 0; guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK; guest_xcrs->xcrs[0].value = vcpu->arch.xcr0; + return 0; } static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu, @@ -5658,6 +5673,10 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu, { int i, r = 0; + if (vcpu->kvm->arch.has_protected_state && + vcpu->arch.guest_state_protected) + return -EINVAL; + if (!boot_cpu_has(X86_FEATURE_XSAVE)) return -EINVAL; @@ -6040,7 +6059,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, case KVM_GET_DEBUGREGS: { struct kvm_debugregs dbgregs; - kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs); + r = kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs); + if (r < 0) + break; r = -EFAULT; if (copy_to_user(argp, &dbgregs, @@ -6070,7 +6091,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (!u.xsave) break; - kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave); + r = kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave); + if (r < 0) + break; r = -EFAULT; if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave))) @@ -6099,7 +6122,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (!u.xsave) break; - kvm_vcpu_ioctl_x86_get_xsave2(vcpu, u.buffer, size); + r = kvm_vcpu_ioctl_x86_get_xsave2(vcpu, u.buffer, size); + if (r < 0) + break; r = -EFAULT; if (copy_to_user(argp, u.xsave, size)) @@ -6115,7 +6140,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (!u.xcrs) break; - kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs); + r = kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs); + if (r < 0) + break; r = -EFAULT; if (copy_to_user(argp, u.xcrs, @@ -6259,6 +6286,11 @@ long kvm_arch_vcpu_ioctl(struct file *filp, } #endif case KVM_GET_SREGS2: { + r = -EINVAL; + if (vcpu->kvm->arch.has_protected_state && + vcpu->arch.guest_state_protected) + goto out; + u.sregs2 = kzalloc(sizeof(struct kvm_sregs2), GFP_KERNEL); r = -ENOMEM; if (!u.sregs2) @@ -6271,6 +6303,11 @@ long kvm_arch_vcpu_ioctl(struct file *filp, break; } case KVM_SET_SREGS2: { + r = -EINVAL; + if (vcpu->kvm->arch.has_protected_state && + vcpu->arch.guest_state_protected) + goto out; + u.sregs2 = memdup_user(argp, sizeof(struct kvm_sregs2)); if (IS_ERR(u.sregs2)) { r = PTR_ERR(u.sregs2); @@ -11478,6 +11515,10 @@ static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { + if (vcpu->kvm->arch.has_protected_state && + vcpu->arch.guest_state_protected) + return -EINVAL; + vcpu_load(vcpu); __get_regs(vcpu, regs); vcpu_put(vcpu); @@ -11519,6 +11560,10 @@ static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { + if (vcpu->kvm->arch.has_protected_state && + vcpu->arch.guest_state_protected) + return -EINVAL; + vcpu_load(vcpu); __set_regs(vcpu, regs); vcpu_put(vcpu); @@ -11591,6 +11636,10 @@ static void __get_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2) int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { + if (vcpu->kvm->arch.has_protected_state && + vcpu->arch.guest_state_protected) + return -EINVAL; + vcpu_load(vcpu); __get_sregs(vcpu, sregs); vcpu_put(vcpu); @@ -11858,6 +11907,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, { int ret; + if (vcpu->kvm->arch.has_protected_state && + vcpu->arch.guest_state_protected) + return -EINVAL; + vcpu_load(vcpu); ret = __set_sregs(vcpu, sregs); vcpu_put(vcpu); @@ -11975,7 +12028,7 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) struct fxregs_state *fxsave; if (fpstate_is_confidential(&vcpu->arch.guest_fpu)) - return 0; + return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0; vcpu_load(vcpu); @@ -11998,7 +12051,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) struct fxregs_state *fxsave; if (fpstate_is_confidential(&vcpu->arch.guest_fpu)) - return 0; + return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0; vcpu_load(vcpu); @@ -12524,6 +12577,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) return -EINVAL; kvm->arch.vm_type = type; + kvm->arch.has_private_mem = + (type == KVM_X86_SW_PROTECTED_VM); ret = kvm_page_track_init(kvm); if (ret) -- cgit v1.2.3 From 26c44aa9e076ed83d7793c55ac5082086a89c0cd Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 4 Apr 2024 08:13:20 -0400 Subject: KVM: SEV: define VM types for SEV and SEV-ES Signed-off-by: Paolo Bonzini Message-ID: <20240404121327.3107131-11-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 2 ++ arch/x86/include/uapi/asm/kvm.h | 2 ++ arch/x86/kvm/svm/sev.c | 16 +++++++++++++--- arch/x86/kvm/svm/svm.c | 11 +++++++++++ arch/x86/kvm/svm/svm.h | 1 + 5 files changed, 29 insertions(+), 3 deletions(-) (limited to 'arch/x86/include') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 0b5a33ee71ee..f0b76ff5030d 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -8819,6 +8819,8 @@ means the VM type with value @n is supported. Possible values of @n are:: #define KVM_X86_DEFAULT_VM 0 #define KVM_X86_SW_PROTECTED_VM 1 + #define KVM_X86_SEV_VM 2 + #define KVM_X86_SEV_ES_VM 3 Note, KVM_X86_SW_PROTECTED_VM is currently only for development and testing. Do not use KVM_X86_SW_PROTECTED_VM for "real" VMs, and especially not in diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index b7dc515f4c27..ab609adacb11 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -861,5 +861,7 @@ struct kvm_hyperv_eventfd { #define KVM_X86_DEFAULT_VM 0 #define KVM_X86_SW_PROTECTED_VM 1 +#define KVM_X86_SEV_VM 2 +#define KVM_X86_SEV_ES_VM 3 #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index f98448dc8be8..1512bacd74a9 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -251,6 +251,9 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) if (kvm->created_vcpus) return -EINVAL; + if (kvm->arch.vm_type != KVM_X86_DEFAULT_VM) + return -EINVAL; + if (unlikely(sev->active)) return -EINVAL; @@ -272,6 +275,7 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) INIT_LIST_HEAD(&sev->regions_list); INIT_LIST_HEAD(&sev->mirror_vms); + sev->need_init = false; kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_SEV); @@ -1808,7 +1812,8 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) if (ret) goto out_fput; - if (sev_guest(kvm) || !sev_guest(source_kvm)) { + if (kvm->arch.vm_type != source_kvm->arch.vm_type || + sev_guest(kvm) || !sev_guest(source_kvm)) { ret = -EINVAL; goto out_unlock; } @@ -2132,6 +2137,7 @@ int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd) mirror_sev->asid = source_sev->asid; mirror_sev->fd = source_sev->fd; mirror_sev->es_active = source_sev->es_active; + mirror_sev->need_init = false; mirror_sev->handle = source_sev->handle; INIT_LIST_HEAD(&mirror_sev->regions_list); INIT_LIST_HEAD(&mirror_sev->mirror_vms); @@ -2197,10 +2203,14 @@ void sev_vm_destroy(struct kvm *kvm) void __init sev_set_cpu_caps(void) { - if (sev_enabled) + if (sev_enabled) { kvm_cpu_cap_set(X86_FEATURE_SEV); - if (sev_es_enabled) + kvm_caps.supported_vm_types |= BIT(KVM_X86_SEV_VM); + } + if (sev_es_enabled) { kvm_cpu_cap_set(X86_FEATURE_SEV_ES); + kvm_caps.supported_vm_types |= BIT(KVM_X86_SEV_ES_VM); + } } void __init sev_hardware_setup(void) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index c22e87ebf0de..b0038ece55cb 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4086,6 +4086,9 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu) static int svm_vcpu_pre_run(struct kvm_vcpu *vcpu) { + if (to_kvm_sev_info(vcpu->kvm)->need_init) + return -EINVAL; + return 1; } @@ -4891,6 +4894,14 @@ static void svm_vm_destroy(struct kvm *kvm) static int svm_vm_init(struct kvm *kvm) { + int type = kvm->arch.vm_type; + + if (type != KVM_X86_DEFAULT_VM && + type != KVM_X86_SW_PROTECTED_VM) { + kvm->arch.has_protected_state = (type == KVM_X86_SEV_ES_VM); + to_kvm_sev_info(kvm)->need_init = true; + } + if (!pause_filter_count || !pause_filter_thresh) kvm->arch.pause_in_guest = true; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 5d5b8ed43db8..323901782547 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -79,6 +79,7 @@ enum { struct kvm_sev_info { bool active; /* SEV enabled guest */ bool es_active; /* SEV-ES enabled guest */ + bool need_init; /* waiting for SEV_INIT2 */ unsigned int asid; /* ASID used for this guest */ unsigned int handle; /* SEV firmware handle */ int fd; /* SEV device fd */ -- cgit v1.2.3 From eb4441864e03dab04754f0b5c7ebbc98ceee099c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 4 Apr 2024 08:13:21 -0400 Subject: KVM: SEV: sync FPU and AVX state at LAUNCH_UPDATE_VMSA time SEV-ES allows passing custom contents for x87, SSE and AVX state into the VMSA. Allow userspace to do that with the usual KVM_SET_XSAVE API and only mark FPU contents as confidential after it has been copied and encrypted into the VMSA. Since the XSAVE state for AVX is the first, it does not need the compacted-state handling of get_xsave_addr(). However, there are other parts of XSAVE state in the VMSA that currently are not handled, and the validation logic of get_xsave_addr() is pointless to duplicate in KVM, so move get_xsave_addr() to public FPU API; it is really just a facility to operate on XSAVE state and does not expose any internal details of arch/x86/kernel/fpu. Acked-by: Dave Hansen Signed-off-by: Paolo Bonzini Message-ID: <20240404121327.3107131-12-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/fpu/api.h | 3 +++ arch/x86/kernel/fpu/xstate.c | 1 + arch/x86/kernel/fpu/xstate.h | 2 -- arch/x86/kvm/svm/sev.c | 50 ++++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/svm/svm.c | 8 ------- 5 files changed, 54 insertions(+), 10 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index a2be3aefff9f..f86ad3335529 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -143,6 +143,9 @@ extern void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfe extern u64 xstate_get_guest_group_perm(void); +extern void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); + + /* KVM specific functions */ extern bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu); extern void fpu_free_guest_fpstate(struct fpu_guest *gfpu); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 33a214b1a4ce..6d32e415b01e 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -991,6 +991,7 @@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr) return __raw_xsave_addr(xsave, xfeature_nr); } +EXPORT_SYMBOL_GPL(get_xsave_addr); #ifdef CONFIG_ARCH_HAS_PKEYS diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index 19ca623ffa2a..05df04f39628 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -54,8 +54,6 @@ extern int copy_sigframe_from_user_to_xstate(struct task_struct *tsk, const void extern void fpu__init_cpu_xstate(void); extern void fpu__init_system_xstate(unsigned int legacy_size); -extern void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); - static inline u64 xfeatures_mask_supervisor(void) { return fpu_kernel_cfg.max_features & XFEATURE_MASK_SUPERVISOR_SUPPORTED; diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 1512bacd74a9..3517d6736c93 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "mmu.h" @@ -584,6 +585,10 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm) struct kvm_vcpu *vcpu = &svm->vcpu; struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info; struct sev_es_save_area *save = svm->sev_es.vmsa; + struct xregs_state *xsave; + const u8 *s; + u8 *d; + int i; /* Check some debug related fields before encrypting the VMSA */ if (svm->vcpu.guest_debug || (svm->vmcb->save.dr7 & ~DR7_FIXED_1)) @@ -626,6 +631,44 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm) save->sev_features = sev->vmsa_features; + /* + * Skip FPU and AVX setup with KVM_SEV_ES_INIT to avoid + * breaking older measurements. + */ + if (vcpu->kvm->arch.vm_type != KVM_X86_DEFAULT_VM) { + xsave = &vcpu->arch.guest_fpu.fpstate->regs.xsave; + save->x87_dp = xsave->i387.rdp; + save->mxcsr = xsave->i387.mxcsr; + save->x87_ftw = xsave->i387.twd; + save->x87_fsw = xsave->i387.swd; + save->x87_fcw = xsave->i387.cwd; + save->x87_fop = xsave->i387.fop; + save->x87_ds = 0; + save->x87_cs = 0; + save->x87_rip = xsave->i387.rip; + + for (i = 0; i < 8; i++) { + /* + * The format of the x87 save area is undocumented and + * definitely not what you would expect. It consists of + * an 8*8 bytes area with bytes 0-7, and an 8*2 bytes + * area with bytes 8-9 of each register. + */ + d = save->fpreg_x87 + i * 8; + s = ((u8 *)xsave->i387.st_space) + i * 16; + memcpy(d, s, 8); + save->fpreg_x87[64 + i * 2] = s[8]; + save->fpreg_x87[64 + i * 2 + 1] = s[9]; + } + memcpy(save->fpreg_xmm, xsave->i387.xmm_space, 256); + + s = get_xsave_addr(xsave, XFEATURE_YMM); + if (s) + memcpy(save->fpreg_ymm, s, 256); + else + memset(save->fpreg_ymm, 0, 256); + } + pr_debug("Virtual Machine Save Area (VMSA):\n"); print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, save, sizeof(*save), false); @@ -664,6 +707,13 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu, if (ret) return ret; + /* + * SEV-ES guests maintain an encrypted version of their FPU + * state which is restored and saved on VMRUN and VMEXIT. + * Mark vcpu->arch.guest_fpu->fpstate as scratch so it won't + * do xsave/xrstor on it. + */ + fpstate_set_confidential(&vcpu->arch.guest_fpu); vcpu->arch.guest_state_protected = true; return 0; } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index b0038ece55cb..0f3b59da0d4a 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1433,14 +1433,6 @@ static int svm_vcpu_create(struct kvm_vcpu *vcpu) vmsa_page = snp_safe_alloc_page(vcpu); if (!vmsa_page) goto error_free_vmcb_page; - - /* - * SEV-ES guests maintain an encrypted version of their FPU - * state which is restored and saved on VMRUN and VMEXIT. - * Mark vcpu->arch.guest_fpu->fpstate as scratch so it won't - * do xsave/xrstor on it. - */ - fpstate_set_confidential(&vcpu->arch.guest_fpu); } err = avic_init_vcpu(svm); -- cgit v1.2.3 From 4f5defae708992dd2658a45c8d09e57517432e5a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 4 Apr 2024 08:13:22 -0400 Subject: KVM: SEV: introduce KVM_SEV_INIT2 operation The idea that no parameter would ever be necessary when enabling SEV or SEV-ES for a VM was decidedly optimistic. In fact, in some sense it's already a parameter whether SEV or SEV-ES is desired. Another possible source of variability is the desired set of VMSA features, as that affects the measurement of the VM's initial state and cannot be changed arbitrarily by the hypervisor. Create a new sub-operation for KVM_MEMORY_ENCRYPT_OP that can take a struct, and put the new op to work by including the VMSA features as a field of the struct. The existing KVM_SEV_INIT and KVM_SEV_ES_INIT use the full set of supported VMSA features for backwards compatibility. The struct also includes the usual bells and whistles for future extensibility: a flags field that must be zero for now, and some padding at the end. Signed-off-by: Paolo Bonzini Message-ID: <20240404121327.3107131-13-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- .../virt/kvm/x86/amd-memory-encryption.rst | 40 ++++++++++++++-- arch/x86/include/uapi/asm/kvm.h | 9 ++++ arch/x86/kvm/svm/sev.c | 53 +++++++++++++++++++--- 3 files changed, 92 insertions(+), 10 deletions(-) (limited to 'arch/x86/include') diff --git a/Documentation/virt/kvm/x86/amd-memory-encryption.rst b/Documentation/virt/kvm/x86/amd-memory-encryption.rst index 2ea648e4c97a..3381556d596d 100644 --- a/Documentation/virt/kvm/x86/amd-memory-encryption.rst +++ b/Documentation/virt/kvm/x86/amd-memory-encryption.rst @@ -76,15 +76,49 @@ are defined in ````. KVM implements the following commands to support common lifecycle events of SEV guests, such as launching, running, snapshotting, migrating and decommissioning. -1. KVM_SEV_INIT ---------------- +1. KVM_SEV_INIT2 +---------------- -The KVM_SEV_INIT command is used by the hypervisor to initialize the SEV platform +The KVM_SEV_INIT2 command is used by the hypervisor to initialize the SEV platform context. In a typical workflow, this command should be the first command issued. +For this command to be accepted, either KVM_X86_SEV_VM or KVM_X86_SEV_ES_VM +must have been passed to the KVM_CREATE_VM ioctl. A virtual machine created +with those machine types in turn cannot be run until KVM_SEV_INIT2 is invoked. + +Parameters: struct kvm_sev_init (in) Returns: 0 on success, -negative on error +:: + + struct kvm_sev_init { + __u64 vmsa_features; /* initial value of features field in VMSA */ + __u32 flags; /* must be 0 */ + __u32 pad[9]; + }; + +It is an error if the hypervisor does not support any of the bits that +are set in ``flags`` or ``vmsa_features``. ``vmsa_features`` must be +0 for SEV virtual machines, as they do not have a VMSA. + +This command replaces the deprecated KVM_SEV_INIT and KVM_SEV_ES_INIT commands. +The commands did not have any parameters (the ```data``` field was unused) and +only work for the KVM_X86_DEFAULT_VM machine type (0). + +They behave as if: + +* the VM type is KVM_X86_SEV_VM for KVM_SEV_INIT, or KVM_X86_SEV_ES_VM for + KVM_SEV_ES_INIT + +* the ``flags`` and ``vmsa_features`` fields of ``struct kvm_sev_init`` are + set to zero + +If the ``KVM_X86_SEV_VMSA_FEATURES`` attribute does not exist, the hypervisor only +supports KVM_SEV_INIT and KVM_SEV_ES_INIT. In that case, note that KVM_SEV_ES_INIT +might set the debug swap VMSA feature (bit 5) depending on the value of the +``debug_swap`` parameter of ``kvm-amd.ko``. + 2. KVM_SEV_LAUNCH_START ----------------------- diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index ab609adacb11..72ad5ace118d 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -694,6 +694,9 @@ enum sev_cmd_id { /* Guest Migration Extension */ KVM_SEV_SEND_CANCEL, + /* Second time is the charm; improved versions of the above ioctls. */ + KVM_SEV_INIT2, + KVM_SEV_NR_MAX, }; @@ -705,6 +708,12 @@ struct kvm_sev_cmd { __u32 sev_fd; }; +struct kvm_sev_init { + __u64 vmsa_features; + __u32 flags; + __u32 pad[9]; +}; + struct kvm_sev_launch_start { __u32 handle; __u32 policy; diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 3517d6736c93..2f20270be93b 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -243,27 +243,31 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) sev_decommission(handle); } -static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) +static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp, + struct kvm_sev_init *data, + unsigned long vm_type) { struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct sev_platform_init_args init_args = {0}; + bool es_active = vm_type != KVM_X86_SEV_VM; + u64 valid_vmsa_features = es_active ? sev_supported_vmsa_features : 0; int ret; if (kvm->created_vcpus) return -EINVAL; - if (kvm->arch.vm_type != KVM_X86_DEFAULT_VM) + if (data->flags) + return -EINVAL; + + if (data->vmsa_features & ~valid_vmsa_features) return -EINVAL; if (unlikely(sev->active)) return -EINVAL; sev->active = true; - sev->es_active = argp->id == KVM_SEV_ES_INIT; - sev->vmsa_features = sev_supported_vmsa_features; - if (sev_supported_vmsa_features) - pr_warn_once("Enabling DebugSwap with KVM_SEV_ES_INIT. " - "This will not work starting with Linux 6.10\n"); + sev->es_active = es_active; + sev->vmsa_features = data->vmsa_features; ret = sev_asid_new(sev); if (ret) @@ -293,6 +297,38 @@ e_no_asid: return ret; } +static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) +{ + struct kvm_sev_init data = { + .vmsa_features = 0, + }; + unsigned long vm_type; + + if (kvm->arch.vm_type != KVM_X86_DEFAULT_VM) + return -EINVAL; + + vm_type = (argp->id == KVM_SEV_INIT ? KVM_X86_SEV_VM : KVM_X86_SEV_ES_VM); + return __sev_guest_init(kvm, argp, &data, vm_type); +} + +static int sev_guest_init2(struct kvm *kvm, struct kvm_sev_cmd *argp) +{ + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + struct kvm_sev_init data; + + if (!sev->need_init) + return -EINVAL; + + if (kvm->arch.vm_type != KVM_X86_SEV_VM && + kvm->arch.vm_type != KVM_X86_SEV_ES_VM) + return -EINVAL; + + if (copy_from_user(&data, u64_to_user_ptr(argp->data), sizeof(data))) + return -EFAULT; + + return __sev_guest_init(kvm, argp, &data, kvm->arch.vm_type); +} + static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error) { unsigned int asid = sev_get_asid(kvm); @@ -1960,6 +1996,9 @@ int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp) case KVM_SEV_INIT: r = sev_guest_init(kvm, &sev_cmd); break; + case KVM_SEV_INIT2: + r = sev_guest_init2(kvm, &sev_cmd); + break; case KVM_SEV_LAUNCH_START: r = sev_launch_start(kvm, &sev_cmd); break; -- cgit v1.2.3 From e913ef159fad39dfe0b1a5f73e6b0a9c9ece2c1d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 22 Jan 2024 15:54:02 -0800 Subject: KVM: x86: Split core of hypercall emulation to helper function By necessity, TDX will use a different register ABI for hypercalls. Break out the core functionality so that it may be reused for TDX. Signed-off-by: Sean Christopherson Signed-off-by: Isaku Yamahata Message-Id: <5134caa55ac3dec33fb2addb5545b52b3b52db02.1705965635.git.isaku.yamahata@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 4 +++ arch/x86/kvm/x86.c | 56 ++++++++++++++++++++++++++++------------- 2 files changed, 42 insertions(+), 18 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3d56b5bb10e9..01c69840647e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -2142,6 +2142,10 @@ static inline void kvm_clear_apicv_inhibit(struct kvm *kvm, kvm_set_or_clear_apicv_inhibit(kvm, reason, false); } +unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, + unsigned long a0, unsigned long a1, + unsigned long a2, unsigned long a3, + int op_64_bit, int cpl); int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d584f5739402..2d2619d3eee4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10080,26 +10080,15 @@ static int complete_hypercall_exit(struct kvm_vcpu *vcpu) return kvm_skip_emulated_instruction(vcpu); } -int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) +unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, + unsigned long a0, unsigned long a1, + unsigned long a2, unsigned long a3, + int op_64_bit, int cpl) { - unsigned long nr, a0, a1, a2, a3, ret; - int op_64_bit; - - if (kvm_xen_hypercall_enabled(vcpu->kvm)) - return kvm_xen_hypercall(vcpu); - - if (kvm_hv_hypercall_enabled(vcpu)) - return kvm_hv_hypercall(vcpu); - - nr = kvm_rax_read(vcpu); - a0 = kvm_rbx_read(vcpu); - a1 = kvm_rcx_read(vcpu); - a2 = kvm_rdx_read(vcpu); - a3 = kvm_rsi_read(vcpu); + unsigned long ret; trace_kvm_hypercall(nr, a0, a1, a2, a3); - op_64_bit = is_64_bit_hypercall(vcpu); if (!op_64_bit) { nr &= 0xFFFFFFFF; a0 &= 0xFFFFFFFF; @@ -10108,7 +10097,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) a3 &= 0xFFFFFFFF; } - if (static_call(kvm_x86_get_cpl)(vcpu) != 0) { + if (cpl) { ret = -KVM_EPERM; goto out; } @@ -10169,18 +10158,49 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) WARN_ON_ONCE(vcpu->run->hypercall.flags & KVM_EXIT_HYPERCALL_MBZ); vcpu->arch.complete_userspace_io = complete_hypercall_exit; + /* stat is incremented on completion. */ return 0; } default: ret = -KVM_ENOSYS; break; } + out: + ++vcpu->stat.hypercalls; + return ret; +} +EXPORT_SYMBOL_GPL(__kvm_emulate_hypercall); + +int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) +{ + unsigned long nr, a0, a1, a2, a3, ret; + int op_64_bit; + int cpl; + + if (kvm_xen_hypercall_enabled(vcpu->kvm)) + return kvm_xen_hypercall(vcpu); + + if (kvm_hv_hypercall_enabled(vcpu)) + return kvm_hv_hypercall(vcpu); + + nr = kvm_rax_read(vcpu); + a0 = kvm_rbx_read(vcpu); + a1 = kvm_rcx_read(vcpu); + a2 = kvm_rdx_read(vcpu); + a3 = kvm_rsi_read(vcpu); + op_64_bit = is_64_bit_hypercall(vcpu); + cpl = static_call(kvm_x86_get_cpl)(vcpu); + + ret = __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl); + if (nr == KVM_HC_MAP_GPA_RANGE && !ret) + /* MAP_GPA tosses the request to the user space. */ + return 0; + if (!op_64_bit) ret = (u32)ret; kvm_rax_write(vcpu, ret); - ++vcpu->stat.hypercalls; return kvm_skip_emulated_instruction(vcpu); } EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); -- cgit v1.2.3 From 7fa5e2929198fe728fb6ad9cbc9e395185867743 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 22 Jan 2024 15:53:14 -0800 Subject: KVM: x86/mmu: Add Suppress VE bit to EPT shadow_mmio_mask/shadow_present_mask To make use of the same value of shadow_mmio_mask and shadow_present_mask for TDX and VMX, add Suppress-VE bit to shadow_mmio_mask and shadow_present_mask so that they can be common for both VMX and TDX. TDX will require shadow_mmio_mask and shadow_present_mask to include VMX_SUPPRESS_VE for shared GPA so that EPT violation is triggered for shared GPA. For VMX, VMX_SUPPRESS_VE doesn't matter for MMIO because the spte value is defined so as to cause EPT misconfig. Signed-off-by: Isaku Yamahata Message-Id: <97cc616b3563cd8277be91aaeb3e14bce23c3649.1705965635.git.isaku.yamahata@intel.com> Reviewed-by: Xiaoyao Li Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/vmx.h | 1 + arch/x86/kvm/mmu/spte.c | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 4dba17363008..ac6da0a5f5e6 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -514,6 +514,7 @@ enum vmcs_field { #define VMX_EPT_IPAT_BIT (1ull << 6) #define VMX_EPT_ACCESS_BIT (1ull << 8) #define VMX_EPT_DIRTY_BIT (1ull << 9) +#define VMX_EPT_SUPPRESS_VE_BIT (1ull << 63) #define VMX_EPT_RWX_MASK (VMX_EPT_READABLE_MASK | \ VMX_EPT_WRITABLE_MASK | \ VMX_EPT_EXECUTABLE_MASK) diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index 768aaeddf5fa..0a0e83859c27 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -413,7 +413,9 @@ void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only) shadow_dirty_mask = has_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull; shadow_nx_mask = 0ull; shadow_x_mask = VMX_EPT_EXECUTABLE_MASK; - shadow_present_mask = has_exec_only ? 0ull : VMX_EPT_READABLE_MASK; + /* VMX_EPT_SUPPRESS_VE_BIT is needed for W or X violation. */ + shadow_present_mask = + (has_exec_only ? 0ull : VMX_EPT_READABLE_MASK) | VMX_EPT_SUPPRESS_VE_BIT; /* * EPT overrides the host MTRRs, and so KVM must program the desired * memtype directly into the SPTEs. Note, this mask is just the mask @@ -430,7 +432,7 @@ void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only) * of an EPT paging-structure entry is 110b (write/execute). */ kvm_mmu_set_mmio_spte_mask(VMX_EPT_MISCONFIG_WX_VALUE, - VMX_EPT_RWX_MASK, 0); + VMX_EPT_RWX_MASK | VMX_EPT_SUPPRESS_VE_BIT, 0); } EXPORT_SYMBOL_GPL(kvm_mmu_set_ept_masks); -- cgit v1.2.3 From 949019b98289b801edce7e92e022a0e95fc4cc3b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 22 Jan 2024 15:53:15 -0800 Subject: KVM: x86/mmu: Track shadow MMIO value on a per-VM basis TDX will use a different shadow PTE entry value for MMIO from VMX. Add a member to kvm_arch and track value for MMIO per-VM instead of a global variable. By using the per-VM EPT entry value for MMIO, the existing VMX logic is kept working. Introduce a separate setter function so that guest TD can use a different value later. Signed-off-by: Sean Christopherson Signed-off-by: Isaku Yamahata Message-Id: <229a18434e5d83f45b1fcd7bf1544d79db1becb6.1705965635.git.isaku.yamahata@intel.com> Reviewed-by: Xiaoyao Li Reviewed-by: Binbin Wu Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/mmu/mmu.c | 7 ++++--- arch/x86/kvm/mmu/spte.c | 4 ++-- arch/x86/kvm/mmu/spte.h | 4 ++-- arch/x86/kvm/mmu/tdp_mmu.c | 6 +++--- 5 files changed, 13 insertions(+), 10 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 01c69840647e..9f92bdb78504 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1313,6 +1313,8 @@ struct kvm_arch { */ spinlock_t mmu_unsync_pages_lock; + u64 shadow_mmio_value; + struct iommu_domain *iommu_domain; bool iommu_noncoherent; #define __KVM_HAVE_ARCH_NONCOHERENT_DMA diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index fbfdc606f1f1..45b6d8f9e359 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2462,7 +2462,7 @@ static int mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp, return kvm_mmu_prepare_zap_page(kvm, child, invalid_list); } - } else if (is_mmio_spte(pte)) { + } else if (is_mmio_spte(kvm, pte)) { mmu_spte_clear_no_track(spte); } return 0; @@ -4144,7 +4144,7 @@ static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct) if (WARN_ON_ONCE(reserved)) return -EINVAL; - if (is_mmio_spte(spte)) { + if (is_mmio_spte(vcpu->kvm, spte)) { gfn_t gfn = get_mmio_spte_gfn(spte); unsigned int access = get_mmio_spte_access(spte); @@ -4760,7 +4760,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_new_pgd); static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, unsigned int access) { - if (unlikely(is_mmio_spte(*sptep))) { + if (unlikely(is_mmio_spte(vcpu->kvm, *sptep))) { if (gfn != get_mmio_spte_gfn(*sptep)) { mmu_spte_clear_no_track(sptep); return true; @@ -6267,6 +6267,7 @@ static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm) void kvm_mmu_init_vm(struct kvm *kvm) { + kvm->arch.shadow_mmio_value = shadow_mmio_value; INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages); diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index 0a0e83859c27..a5e014d7bc62 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -74,10 +74,10 @@ u64 make_mmio_spte(struct kvm_vcpu *vcpu, u64 gfn, unsigned int access) u64 spte = generation_mmio_spte_mask(gen); u64 gpa = gfn << PAGE_SHIFT; - WARN_ON_ONCE(!shadow_mmio_value); + WARN_ON_ONCE(!vcpu->kvm->arch.shadow_mmio_value); access &= shadow_mmio_access_mask; - spte |= shadow_mmio_value | access; + spte |= vcpu->kvm->arch.shadow_mmio_value | access; spte |= gpa | shadow_nonpresent_or_rsvd_mask; spte |= (gpa & shadow_nonpresent_or_rsvd_mask) << SHADOW_NONPRESENT_OR_RSVD_MASK_LEN; diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index 8056b7853a79..5dd5405fa07a 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -265,9 +265,9 @@ static inline struct kvm_mmu_page *root_to_sp(hpa_t root) return spte_to_child_sp(root); } -static inline bool is_mmio_spte(u64 spte) +static inline bool is_mmio_spte(struct kvm *kvm, u64 spte) { - return (spte & shadow_mmio_mask) == shadow_mmio_value && + return (spte & shadow_mmio_mask) == kvm->arch.shadow_mmio_value && likely(enable_mmio_caching); } diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index f5401967897a..5fd618abc243 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -495,8 +495,8 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, * impact the guest since both the former and current SPTEs * are nonpresent. */ - if (WARN_ON_ONCE(!is_mmio_spte(old_spte) && - !is_mmio_spte(new_spte) && + if (WARN_ON_ONCE(!is_mmio_spte(kvm, old_spte) && + !is_mmio_spte(kvm, new_spte) && !is_removed_spte(new_spte))) pr_err("Unexpected SPTE change! Nonpresent SPTEs\n" "should not be replaced with another,\n" @@ -1028,7 +1028,7 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, } /* If a MMIO SPTE is installed, the MMIO will need to be emulated. */ - if (unlikely(is_mmio_spte(new_spte))) { + if (unlikely(is_mmio_spte(vcpu->kvm, new_spte))) { vcpu->stat.pf_mmio_spte_created++; trace_mark_mmio_spte(rcu_dereference(iter->sptep), iter->gfn, new_spte); -- cgit v1.2.3 From fb29541eadb679261cdbd8c4d56444d68ee777fb Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 5 Apr 2024 13:43:29 -0400 Subject: KVM, x86: add architectural support code for #VE Dump the contents of the #VE info data structure and assert that #VE does not happen, but do not yet do anything with it. No functional change intended, separated for clarity only. Extracted from a patch by Isaku Yamahata . Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/vmx.h | 12 ++++++++++++ arch/x86/kvm/vmx/vmx.c | 4 ++++ 2 files changed, 16 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index ac6da0a5f5e6..d77a31039f24 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -71,6 +71,7 @@ #define SECONDARY_EXEC_ENCLS_EXITING VMCS_CONTROL_BIT(ENCLS_EXITING) #define SECONDARY_EXEC_RDSEED_EXITING VMCS_CONTROL_BIT(RDSEED_EXITING) #define SECONDARY_EXEC_ENABLE_PML VMCS_CONTROL_BIT(PAGE_MOD_LOGGING) +#define SECONDARY_EXEC_EPT_VIOLATION_VE VMCS_CONTROL_BIT(EPT_VIOLATION_VE) #define SECONDARY_EXEC_PT_CONCEAL_VMX VMCS_CONTROL_BIT(PT_CONCEAL_VMX) #define SECONDARY_EXEC_ENABLE_XSAVES VMCS_CONTROL_BIT(XSAVES) #define SECONDARY_EXEC_MODE_BASED_EPT_EXEC VMCS_CONTROL_BIT(MODE_BASED_EPT_EXEC) @@ -226,6 +227,8 @@ enum vmcs_field { VMREAD_BITMAP_HIGH = 0x00002027, VMWRITE_BITMAP = 0x00002028, VMWRITE_BITMAP_HIGH = 0x00002029, + VE_INFORMATION_ADDRESS = 0x0000202A, + VE_INFORMATION_ADDRESS_HIGH = 0x0000202B, XSS_EXIT_BITMAP = 0x0000202C, XSS_EXIT_BITMAP_HIGH = 0x0000202D, ENCLS_EXITING_BITMAP = 0x0000202E, @@ -631,4 +634,13 @@ enum vmx_l1d_flush_state { extern enum vmx_l1d_flush_state l1tf_vmx_mitigation; +struct vmx_ve_information { + u32 exit_reason; + u32 delivery; + u64 exit_qualification; + u64 guest_linear_address; + u64 guest_physical_address; + u16 eptp_index; +}; + #endif diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 6780313914f8..d780eee9b697 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6408,6 +6408,10 @@ void dump_vmcs(struct kvm_vcpu *vcpu) if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID) pr_err("Virtual processor ID = 0x%04x\n", vmcs_read16(VIRTUAL_PROCESSOR_ID)); + if (secondary_exec_control & SECONDARY_EXEC_EPT_VIOLATION_VE) { + pr_err("VE info address = 0x%016llx\n", + vmcs_read64(VE_INFORMATION_ADDRESS)); + } } /* -- cgit v1.2.3 From 63b6206e2f9a4ca756262a8bc20fb869d6db52be Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 27 Feb 2024 18:41:33 -0800 Subject: KVM: x86: Remove separate "bit" defines for page fault error code masks Open code the bit number directly in the PFERR_* masks and drop the intermediate PFERR_*_BIT defines, as having to bounce through two macros just to see which flag corresponds to which bit is quite annoying, as is having to define two macros just to add recognition of a new flag. Use ternary operator to derive the bit in permission_fault(), the one function that actually needs the bit number as part of clever shifting to avoid conditional branches. Generally the compiler is able to turn it into a conditional move, and if not it's not really a big deal. No functional change intended. Signed-off-by: Sean Christopherson Reviewed-by: Paolo Bonzini Message-ID: <20240228024147.41573-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 32 ++++++++++---------------------- arch/x86/kvm/mmu.h | 5 ++--- 2 files changed, 12 insertions(+), 25 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 01c69840647e..065beb3b3539 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -254,28 +254,16 @@ enum x86_intercept_stage; KVM_GUESTDBG_INJECT_DB | \ KVM_GUESTDBG_BLOCKIRQ) - -#define PFERR_PRESENT_BIT 0 -#define PFERR_WRITE_BIT 1 -#define PFERR_USER_BIT 2 -#define PFERR_RSVD_BIT 3 -#define PFERR_FETCH_BIT 4 -#define PFERR_PK_BIT 5 -#define PFERR_SGX_BIT 15 -#define PFERR_GUEST_FINAL_BIT 32 -#define PFERR_GUEST_PAGE_BIT 33 -#define PFERR_IMPLICIT_ACCESS_BIT 48 - -#define PFERR_PRESENT_MASK BIT(PFERR_PRESENT_BIT) -#define PFERR_WRITE_MASK BIT(PFERR_WRITE_BIT) -#define PFERR_USER_MASK BIT(PFERR_USER_BIT) -#define PFERR_RSVD_MASK BIT(PFERR_RSVD_BIT) -#define PFERR_FETCH_MASK BIT(PFERR_FETCH_BIT) -#define PFERR_PK_MASK BIT(PFERR_PK_BIT) -#define PFERR_SGX_MASK BIT(PFERR_SGX_BIT) -#define PFERR_GUEST_FINAL_MASK BIT_ULL(PFERR_GUEST_FINAL_BIT) -#define PFERR_GUEST_PAGE_MASK BIT_ULL(PFERR_GUEST_PAGE_BIT) -#define PFERR_IMPLICIT_ACCESS BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT) +#define PFERR_PRESENT_MASK BIT(0) +#define PFERR_WRITE_MASK BIT(1) +#define PFERR_USER_MASK BIT(2) +#define PFERR_RSVD_MASK BIT(3) +#define PFERR_FETCH_MASK BIT(4) +#define PFERR_PK_MASK BIT(5) +#define PFERR_SGX_MASK BIT(15) +#define PFERR_GUEST_FINAL_MASK BIT_ULL(32) +#define PFERR_GUEST_PAGE_MASK BIT_ULL(33) +#define PFERR_IMPLICIT_ACCESS BIT_ULL(48) #define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \ PFERR_WRITE_MASK | \ diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 60f21bb4c27b..2343c9f00e31 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -213,7 +213,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, */ u64 implicit_access = access & PFERR_IMPLICIT_ACCESS; bool not_smap = ((rflags & X86_EFLAGS_AC) | implicit_access) == X86_EFLAGS_AC; - int index = (pfec + (not_smap << PFERR_RSVD_BIT)) >> 1; + int index = (pfec | (not_smap ? PFERR_RSVD_MASK : 0)) >> 1; u32 errcode = PFERR_PRESENT_MASK; bool fault; @@ -234,8 +234,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, pkru_bits = (vcpu->arch.pkru >> (pte_pkey * 2)) & 3; /* clear present bit, replace PFEC.RSVD with ACC_USER_MASK. */ - offset = (pfec & ~1) + - ((pte_access & PT_USER_MASK) << (PFERR_RSVD_BIT - PT_USER_SHIFT)); + offset = (pfec & ~1) | ((pte_access & PT_USER_MASK) ? PFERR_RSVD_MASK : 0); pkru_bits &= mmu->pkru_mask >> offset; errcode |= -pkru_bits & PFERR_PK_MASK; -- cgit v1.2.3 From 9b62e03e192ce9300608f9be69be9854a166eae3 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 27 Feb 2024 18:41:34 -0800 Subject: KVM: x86: Define more SEV+ page fault error bits/flags for #NPF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Define more #NPF error code flags that are relevant to SEV+ (mostly SNP) guests, as specified by the APM: * Bit 31 (RMP): Set to 1 if the fault was caused due to an RMP check or a VMPL check failure, 0 otherwise. * Bit 34 (ENC): Set to 1 if the guest’s effective C-bit was 1, 0 otherwise. * Bit 35 (SIZEM): Set to 1 if the fault was caused by a size mismatch between PVALIDATE or RMPADJUST and the RMP, 0 otherwise. * Bit 36 (VMPL): Set to 1 if the fault was caused by a VMPL permission check failure, 0 otherwise. Note, the APM is *extremely* misleading, and strongly implies that the above flags can _only_ be set for #NPF exits from SNP guests. That is a lie, as bit 34 (C-bit=1, i.e. was encrypted) can be set when running _any_ flavor of SEV guest on SNP capable hardware. Signed-off-by: Sean Christopherson Message-ID: <20240228024147.41573-4-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 065beb3b3539..1be4608669ea 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -261,8 +261,12 @@ enum x86_intercept_stage; #define PFERR_FETCH_MASK BIT(4) #define PFERR_PK_MASK BIT(5) #define PFERR_SGX_MASK BIT(15) +#define PFERR_GUEST_RMP_MASK BIT_ULL(31) #define PFERR_GUEST_FINAL_MASK BIT_ULL(32) #define PFERR_GUEST_PAGE_MASK BIT_ULL(33) +#define PFERR_GUEST_ENC_MASK BIT_ULL(34) +#define PFERR_GUEST_SIZEM_MASK BIT_ULL(35) +#define PFERR_GUEST_VMPL_MASK BIT_ULL(36) #define PFERR_IMPLICIT_ACCESS BIT_ULL(48) #define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \ -- cgit v1.2.3 From dee281e4b4355286c76d1788dc8e65ec236d6e04 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 17 Apr 2024 07:30:29 -0400 Subject: KVM: x86: Move synthetic PFERR_* sanity checks to SVM's #NPF handler Move the sanity check that hardware never sets bits that collide with KVM- define synthetic bits from kvm_mmu_page_fault() to npf_interception(), i.e. make the sanity check #NPF specific. The legacy #PF path already WARNs if _any_ of bits 63:32 are set, and the error code that comes from VMX's EPT Violatation and Misconfig is 100% synthesized (KVM morphs VMX's EXIT_QUALIFICATION into error code flags). Add a compile-time assert in the legacy #PF handler to make sure that KVM- define flags are covered by its existing sanity check on the upper bits. Opportunistically add a description of PFERR_IMPLICIT_ACCESS, since we are removing the comment that defined it. Signed-off-by: Sean Christopherson Reviewed-by: Kai Huang Reviewed-by: Binbin Wu Message-ID: <20240228024147.41573-8-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 6 ++++++ arch/x86/kvm/mmu/mmu.c | 14 +++----------- arch/x86/kvm/svm/svm.c | 9 +++++++++ 3 files changed, 18 insertions(+), 11 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 1be4608669ea..0e63dcd8ac1c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -267,7 +267,13 @@ enum x86_intercept_stage; #define PFERR_GUEST_ENC_MASK BIT_ULL(34) #define PFERR_GUEST_SIZEM_MASK BIT_ULL(35) #define PFERR_GUEST_VMPL_MASK BIT_ULL(36) + +/* + * IMPLICIT_ACCESS is a KVM-defined flag used to correctly perform SMAP checks + * when emulating instructions that triggers implicit access. + */ #define PFERR_IMPLICIT_ACCESS BIT_ULL(48) +#define PFERR_SYNTHETIC_MASK (PFERR_IMPLICIT_ACCESS) #define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \ PFERR_WRITE_MASK | \ diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 955de52e0c18..1b2ae00a7970 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4501,6 +4501,9 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, return -EFAULT; #endif + /* Ensure the above sanity check also covers KVM-defined flags. */ + BUILD_BUG_ON(lower_32_bits(PFERR_SYNTHETIC_MASK)); + vcpu->arch.l1tf_flush_l1d = true; if (!flags) { trace_kvm_page_fault(vcpu, fault_address, error_code); @@ -5785,17 +5788,6 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err int r, emulation_type = EMULTYPE_PF; bool direct = vcpu->arch.mmu->root_role.direct; - /* - * IMPLICIT_ACCESS is a KVM-defined flag used to correctly perform SMAP - * checks when emulating instructions that triggers implicit access. - * WARN if hardware generates a fault with an error code that collides - * with the KVM-defined value. Clear the flag and continue on, i.e. - * don't terminate the VM, as KVM can't possibly be relying on a flag - * that KVM doesn't know about. - */ - if (WARN_ON_ONCE(error_code & PFERR_IMPLICIT_ACCESS)) - error_code &= ~PFERR_IMPLICIT_ACCESS; - if (WARN_ON_ONCE(!VALID_PAGE(vcpu->arch.mmu->root.hpa))) return RET_PF_RETRY; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 0f3b59da0d4a..535018f152a3 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2047,6 +2047,15 @@ static int npf_interception(struct kvm_vcpu *vcpu) u64 fault_address = svm->vmcb->control.exit_info_2; u64 error_code = svm->vmcb->control.exit_info_1; + /* + * WARN if hardware generates a fault with an error code that collides + * with KVM-defined sythentic flags. Clear the flags and continue on, + * i.e. don't terminate the VM, as KVM can't possibly be relying on a + * flag that KVM doesn't know about. + */ + if (WARN_ON_ONCE(error_code & PFERR_SYNTHETIC_MASK)) + error_code &= ~PFERR_SYNTHETIC_MASK; + trace_kvm_page_fault(vcpu, fault_address, error_code); return kvm_mmu_page_fault(vcpu, fault_address, error_code, static_cpu_has(X86_FEATURE_DECODEASSISTS) ? -- cgit v1.2.3 From b3d5dc629c32f03d6ae0ddff628a67d999b723e0 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 27 Feb 2024 09:28:08 -0500 Subject: KVM: x86/mmu: Use synthetic page fault error code to indicate private faults Add and use a synthetic, KVM-defined page fault error code to indicate whether a fault is to private vs. shared memory. TDX and SNP have different mechanisms for reporting private vs. shared, and KVM's software-protected VMs have no mechanism at all. Usurp an error code flag to avoid having to plumb another parameter to kvm_mmu_page_fault() and friends. Alternatively, KVM could borrow AMD's PFERR_GUEST_ENC_MASK, i.e. set it for TDX and software-protected VMs as appropriate, but that would require *clearing* the flag for SEV and SEV-ES VMs, which support encrypted memory at the hardware layer, but don't utilize private memory at the KVM layer. Opportunistically add a comment to call out that the logic for software- protected VMs is (and was before this commit) broken for nested MMUs, i.e. for nested TDP, as the GPA is an L2 GPA. Punt on trying to play nice with nested MMUs as there is a _lot_ of functionality that simply doesn't work for software-protected VMs, e.g. all of the paths where KVM accesses guest memory need to be updated to be aware of private vs. shared memory. Signed-off-by: Sean Christopherson Message-Id: <20240228024147.41573-6-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 7 ++++++- arch/x86/kvm/mmu/mmu.c | 14 ++++++++++++++ arch/x86/kvm/mmu/mmu_internal.h | 2 +- 3 files changed, 21 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0e63dcd8ac1c..816d248264db 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -273,7 +273,12 @@ enum x86_intercept_stage; * when emulating instructions that triggers implicit access. */ #define PFERR_IMPLICIT_ACCESS BIT_ULL(48) -#define PFERR_SYNTHETIC_MASK (PFERR_IMPLICIT_ACCESS) +/* + * PRIVATE_ACCESS is a KVM-defined flag us to indicate that a fault occurred + * when the guest was accessing private memory. + */ +#define PFERR_PRIVATE_ACCESS BIT_ULL(49) +#define PFERR_SYNTHETIC_MASK (PFERR_IMPLICIT_ACCESS | PFERR_PRIVATE_ACCESS) #define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \ PFERR_WRITE_MASK | \ diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 757c24de3485..34aef57f200a 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5798,6 +5798,20 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err if (WARN_ON_ONCE(!VALID_PAGE(vcpu->arch.mmu->root.hpa))) return RET_PF_RETRY; + /* + * Except for reserved faults (emulated MMIO is shared-only), set the + * PFERR_PRIVATE_ACCESS flag for software-protected VMs based on the gfn's + * current attributes, which are the source of truth for such VMs. Note, + * this wrong for nested MMUs as the GPA is an L2 GPA, but KVM doesn't + * currently supported nested virtualization (among many other things) + * for software-protected VMs. + */ + if (IS_ENABLED(CONFIG_KVM_SW_PROTECTED_VM) && + !(error_code & PFERR_RSVD_MASK) && + vcpu->kvm->arch.vm_type == KVM_X86_SW_PROTECTED_VM && + kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(cr2_or_gpa))) + error_code |= PFERR_PRIVATE_ACCESS; + r = RET_PF_INVALID; if (unlikely(error_code & PFERR_RSVD_MASK)) { r = handle_mmio_page_fault(vcpu, cr2_or_gpa, direct); diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index 797b80f996a7..dfd9ff383663 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -306,7 +306,7 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, .max_level = KVM_MAX_HUGEPAGE_LEVEL, .req_level = PG_LEVEL_4K, .goal_level = PG_LEVEL_4K, - .is_private = kvm_mem_is_private(vcpu->kvm, cr2_or_gpa >> PAGE_SHIFT), + .is_private = err & PFERR_PRIVATE_ACCESS, }; int r; -- cgit v1.2.3 From cd389f50700343774ae6b25b08e16247b3c7fa4c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 27 Feb 2024 09:11:28 -0500 Subject: KVM: x86/mmu: check for invalid async page faults involving private memory Right now the error code is not used when an async page fault is completed. This is not a problem in the current code, but it is untidy. For protected VMs, we will also need to check that the page attributes match the current state of the page, because asynchronous page faults can only occur on shared pages (private pages go through kvm_faultin_pfn_private() instead of __gfn_to_pfn_memslot()). Start by piping the error code from kvm_arch_setup_async_pf() to kvm_arch_async_page_ready() via the architecture-specific async page fault data. For now, it can be used to assert that there are no async page faults on private memory. Extracted from a patch by Isaku Yamahata. Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/mmu/mmu.c | 18 +++++++++++------- 2 files changed, 12 insertions(+), 7 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 816d248264db..0369e9efe429 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1849,6 +1849,7 @@ struct kvm_arch_async_pf { gfn_t gfn; unsigned long cr3; bool direct_map; + u64 error_code; }; extern u32 __read_mostly kvm_nr_uret_msrs; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 34aef57f200a..4bb121b5f58e 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4206,24 +4206,28 @@ static u32 alloc_apf_token(struct kvm_vcpu *vcpu) return (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id; } -static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, - gfn_t gfn) +static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, + struct kvm_page_fault *fault) { struct kvm_arch_async_pf arch; arch.token = alloc_apf_token(vcpu); - arch.gfn = gfn; + arch.gfn = fault->gfn; + arch.error_code = fault->error_code; arch.direct_map = vcpu->arch.mmu->root_role.direct; arch.cr3 = kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu); - return kvm_setup_async_pf(vcpu, cr2_or_gpa, - kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch); + return kvm_setup_async_pf(vcpu, fault->addr, + kvm_vcpu_gfn_to_hva(vcpu, fault->gfn), &arch); } void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) { int r; + if (WARN_ON_ONCE(work->arch.error_code & PFERR_PRIVATE_ACCESS)) + return; + if ((vcpu->arch.mmu->root_role.direct != work->arch.direct_map) || work->wakeup_all) return; @@ -4236,7 +4240,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) work->arch.cr3 != kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu)) return; - kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true, NULL); + kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, work->arch.error_code, true, NULL); } static inline u8 kvm_max_level_for_order(int order) @@ -4333,7 +4337,7 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault trace_kvm_async_pf_repeated_fault(fault->addr, fault->gfn); kvm_make_request(KVM_REQ_APF_HALT, vcpu); return RET_PF_RETRY; - } else if (kvm_arch_setup_async_pf(vcpu, fault->addr, fault->gfn)) { + } else if (kvm_arch_setup_async_pf(vcpu, fault)) { return RET_PF_RETRY; } } -- cgit v1.2.3 From d916f00316b206255164392eeb2aca5f87cdb18a Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Wed, 1 May 2024 02:10:45 -0500 Subject: KVM: SEV: Add support to handle AP reset MSR protocol Add support for AP Reset Hold being invoked using the GHCB MSR protocol, available in version 2 of the GHCB specification. Signed-off-by: Tom Lendacky Signed-off-by: Brijesh Singh Signed-off-by: Ashish Kalra Signed-off-by: Michael Roth Message-ID: <20240501071048.2208265-2-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/sev-common.h | 6 +++-- arch/x86/kvm/svm/sev.c | 56 +++++++++++++++++++++++++++++++++------ arch/x86/kvm/svm/svm.h | 1 + 3 files changed, 53 insertions(+), 10 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index b463fcbd4b90..01261f7054ad 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -54,8 +54,10 @@ (((unsigned long)fn) << 32)) /* AP Reset Hold */ -#define GHCB_MSR_AP_RESET_HOLD_REQ 0x006 -#define GHCB_MSR_AP_RESET_HOLD_RESP 0x007 +#define GHCB_MSR_AP_RESET_HOLD_REQ 0x006 +#define GHCB_MSR_AP_RESET_HOLD_RESP 0x007 +#define GHCB_MSR_AP_RESET_HOLD_RESULT_POS 12 +#define GHCB_MSR_AP_RESET_HOLD_RESULT_MASK GENMASK_ULL(51, 0) /* GHCB GPA Register */ #define GHCB_MSR_REG_GPA_REQ 0x012 diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 598d78b4107f..6e31cb408dd8 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -49,6 +49,10 @@ static bool sev_es_debug_swap_enabled = true; module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0444); static u64 sev_supported_vmsa_features; +#define AP_RESET_HOLD_NONE 0 +#define AP_RESET_HOLD_NAE_EVENT 1 +#define AP_RESET_HOLD_MSR_PROTO 2 + static u8 sev_enc_bit; static DECLARE_RWSEM(sev_deactivate_lock); static DEFINE_MUTEX(sev_bitmap_lock); @@ -2727,6 +2731,9 @@ vmgexit_err: void sev_es_unmap_ghcb(struct vcpu_svm *svm) { + /* Clear any indication that the vCPU is in a type of AP Reset Hold */ + svm->sev_es.ap_reset_hold_type = AP_RESET_HOLD_NONE; + if (!svm->sev_es.ghcb) return; @@ -2938,6 +2945,22 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) GHCB_MSR_INFO_POS); break; } + case GHCB_MSR_AP_RESET_HOLD_REQ: + svm->sev_es.ap_reset_hold_type = AP_RESET_HOLD_MSR_PROTO; + ret = kvm_emulate_ap_reset_hold(&svm->vcpu); + + /* + * Preset the result to a non-SIPI return and then only set + * the result to non-zero when delivering a SIPI. + */ + set_ghcb_msr_bits(svm, 0, + GHCB_MSR_AP_RESET_HOLD_RESULT_MASK, + GHCB_MSR_AP_RESET_HOLD_RESULT_POS); + + set_ghcb_msr_bits(svm, GHCB_MSR_AP_RESET_HOLD_RESP, + GHCB_MSR_INFO_MASK, + GHCB_MSR_INFO_POS); + break; case GHCB_MSR_TERM_REQ: { u64 reason_set, reason_code; @@ -3037,6 +3060,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) ret = 1; break; case SVM_VMGEXIT_AP_HLT_LOOP: + svm->sev_es.ap_reset_hold_type = AP_RESET_HOLD_NAE_EVENT; ret = kvm_emulate_ap_reset_hold(vcpu); break; case SVM_VMGEXIT_AP_JUMP_TABLE: { @@ -3280,15 +3304,31 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) return; } - /* - * Subsequent SIPI: Return from an AP Reset Hold VMGEXIT, where - * the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a - * non-zero value. - */ - if (!svm->sev_es.ghcb) - return; + /* Subsequent SIPI */ + switch (svm->sev_es.ap_reset_hold_type) { + case AP_RESET_HOLD_NAE_EVENT: + /* + * Return from an AP Reset Hold VMGEXIT, where the guest will + * set the CS and RIP. Set SW_EXIT_INFO_2 to a non-zero value. + */ + ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 1); + break; + case AP_RESET_HOLD_MSR_PROTO: + /* + * Return from an AP Reset Hold VMGEXIT, where the guest will + * set the CS and RIP. Set GHCB data field to a non-zero value. + */ + set_ghcb_msr_bits(svm, 1, + GHCB_MSR_AP_RESET_HOLD_RESULT_MASK, + GHCB_MSR_AP_RESET_HOLD_RESULT_POS); - ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 1); + set_ghcb_msr_bits(svm, GHCB_MSR_AP_RESET_HOLD_RESP, + GHCB_MSR_INFO_MASK, + GHCB_MSR_INFO_POS); + break; + default: + break; + } } struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 323901782547..6fd0f5862681 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -199,6 +199,7 @@ struct vcpu_sev_es_state { u8 valid_bitmap[16]; struct kvm_host_map ghcb_map; bool received_first_sipi; + unsigned int ap_reset_hold_type; /* SEV-ES scratch area support */ u64 sw_scratch; -- cgit v1.2.3 From ae01818398236ad6d8ecd6970334baf0b7c57409 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Wed, 1 May 2024 02:10:46 -0500 Subject: KVM: SEV: Add GHCB handling for Hypervisor Feature Support requests Version 2 of the GHCB specification introduced advertisement of features that are supported by the Hypervisor. Now that KVM supports version 2 of the GHCB specification, bump the maximum supported protocol version. Signed-off-by: Brijesh Singh Signed-off-by: Ashish Kalra Signed-off-by: Michael Roth Message-ID: <20240501071048.2208265-3-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/sev-common.h | 2 ++ arch/x86/kvm/svm/sev.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index 01261f7054ad..5a8246dd532f 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -101,6 +101,8 @@ enum psc_op { /* GHCB Hypervisor Feature Request/Response */ #define GHCB_MSR_HV_FT_REQ 0x080 #define GHCB_MSR_HV_FT_RESP 0x081 +#define GHCB_MSR_HV_FT_POS 12 +#define GHCB_MSR_HV_FT_MASK GENMASK_ULL(51, 0) #define GHCB_MSR_HV_FT_RESP_VAL(v) \ /* GHCBData[63:12] */ \ (((u64)(v) & GENMASK_ULL(63, 12)) >> 12) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 6e31cb408dd8..37d396636b71 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -36,6 +36,8 @@ #define GHCB_VERSION_MAX 1ULL #define GHCB_VERSION_MIN 1ULL +#define GHCB_HV_FT_SUPPORTED GHCB_HV_FT_SNP + /* enable/disable SEV support */ static bool sev_enabled = true; module_param_named(sev, sev_enabled, bool, 0444); @@ -2701,6 +2703,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm) case SVM_VMGEXIT_AP_HLT_LOOP: case SVM_VMGEXIT_AP_JUMP_TABLE: case SVM_VMGEXIT_UNSUPPORTED_EVENT: + case SVM_VMGEXIT_HV_FEATURES: break; default: reason = GHCB_ERR_INVALID_EVENT; @@ -2961,6 +2964,12 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) GHCB_MSR_INFO_MASK, GHCB_MSR_INFO_POS); break; + case GHCB_MSR_HV_FT_REQ: + set_ghcb_msr_bits(svm, GHCB_HV_FT_SUPPORTED, + GHCB_MSR_HV_FT_MASK, GHCB_MSR_HV_FT_POS); + set_ghcb_msr_bits(svm, GHCB_MSR_HV_FT_RESP, + GHCB_MSR_INFO_MASK, GHCB_MSR_INFO_POS); + break; case GHCB_MSR_TERM_REQ: { u64 reason_set, reason_code; @@ -3085,6 +3094,11 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) ret = 1; break; } + case SVM_VMGEXIT_HV_FEATURES: + ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, GHCB_HV_FT_SUPPORTED); + + ret = 1; + break; case SVM_VMGEXIT_UNSUPPORTED_EVENT: vcpu_unimpl(vcpu, "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n", -- cgit v1.2.3 From 4af663c2f64a8d252e690c60cf8b8abf22dc2951 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Wed, 1 May 2024 02:10:48 -0500 Subject: KVM: SEV: Allow per-guest configuration of GHCB protocol version The GHCB protocol version may be different from one guest to the next. Add a field to track it for each KVM instance and extend KVM_SEV_INIT2 to allow it to be configured by userspace. Now that all SEV-ES support for GHCB protocol version 2 is in place, go ahead and default to it when creating SEV-ES guests through the new KVM_SEV_INIT2 interface. Keep the older KVM_SEV_ES_INIT interface restricted to GHCB protocol version 1. Suggested-by: Sean Christopherson Signed-off-by: Michael Roth Message-ID: <20240501071048.2208265-5-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- .../virt/kvm/x86/amd-memory-encryption.rst | 11 ++++++-- arch/x86/include/uapi/asm/kvm.h | 4 ++- arch/x86/kvm/svm/sev.c | 32 ++++++++++++++++++++-- arch/x86/kvm/svm/svm.h | 1 + 4 files changed, 42 insertions(+), 6 deletions(-) (limited to 'arch/x86/include') diff --git a/Documentation/virt/kvm/x86/amd-memory-encryption.rst b/Documentation/virt/kvm/x86/amd-memory-encryption.rst index 3381556d596d..9677a0714a39 100644 --- a/Documentation/virt/kvm/x86/amd-memory-encryption.rst +++ b/Documentation/virt/kvm/x86/amd-memory-encryption.rst @@ -95,13 +95,19 @@ Returns: 0 on success, -negative on error struct kvm_sev_init { __u64 vmsa_features; /* initial value of features field in VMSA */ __u32 flags; /* must be 0 */ - __u32 pad[9]; + __u16 ghcb_version; /* maximum guest GHCB version allowed */ + __u16 pad1; + __u32 pad2[8]; }; It is an error if the hypervisor does not support any of the bits that are set in ``flags`` or ``vmsa_features``. ``vmsa_features`` must be 0 for SEV virtual machines, as they do not have a VMSA. +``ghcb_version`` must be 0 for SEV virtual machines, as they do not issue GHCB +requests. If ``ghcb_version`` is 0 for any other guest type, then the maximum +allowed guest GHCB protocol will default to version 2. + This command replaces the deprecated KVM_SEV_INIT and KVM_SEV_ES_INIT commands. The commands did not have any parameters (the ```data``` field was unused) and only work for the KVM_X86_DEFAULT_VM machine type (0). @@ -112,7 +118,8 @@ They behave as if: KVM_SEV_ES_INIT * the ``flags`` and ``vmsa_features`` fields of ``struct kvm_sev_init`` are - set to zero + set to zero, and ``ghcb_version`` is set to 0 for KVM_SEV_INIT and 1 for + KVM_SEV_ES_INIT. If the ``KVM_X86_SEV_VMSA_FEATURES`` attribute does not exist, the hypervisor only supports KVM_SEV_INIT and KVM_SEV_ES_INIT. In that case, note that KVM_SEV_ES_INIT diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 72ad5ace118d..9fae1b73b529 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -711,7 +711,9 @@ struct kvm_sev_cmd { struct kvm_sev_init { __u64 vmsa_features; __u32 flags; - __u32 pad[9]; + __u16 ghcb_version; + __u16 pad1; + __u32 pad2[8]; }; struct kvm_sev_launch_start { diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 01baa8aa7e12..a4bde1193b92 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -33,7 +33,8 @@ #include "cpuid.h" #include "trace.h" -#define GHCB_VERSION_MAX 1ULL +#define GHCB_VERSION_MAX 2ULL +#define GHCB_VERSION_DEFAULT 2ULL #define GHCB_VERSION_MIN 1ULL #define GHCB_HV_FT_SUPPORTED GHCB_HV_FT_SNP @@ -268,12 +269,24 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp, if (data->vmsa_features & ~valid_vmsa_features) return -EINVAL; + if (data->ghcb_version > GHCB_VERSION_MAX || (!es_active && data->ghcb_version)) + return -EINVAL; + if (unlikely(sev->active)) return -EINVAL; sev->active = true; sev->es_active = es_active; sev->vmsa_features = data->vmsa_features; + sev->ghcb_version = data->ghcb_version; + + /* + * Currently KVM supports the full range of mandatory features defined + * by version 2 of the GHCB protocol, so default to that for SEV-ES + * guests created via KVM_SEV_INIT2. + */ + if (sev->es_active && !sev->ghcb_version) + sev->ghcb_version = GHCB_VERSION_DEFAULT; ret = sev_asid_new(sev); if (ret) @@ -307,6 +320,7 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) { struct kvm_sev_init data = { .vmsa_features = 0, + .ghcb_version = 0, }; unsigned long vm_type; @@ -314,6 +328,14 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) return -EINVAL; vm_type = (argp->id == KVM_SEV_INIT ? KVM_X86_SEV_VM : KVM_X86_SEV_ES_VM); + + /* + * KVM_SEV_ES_INIT has been deprecated by KVM_SEV_INIT2, so it will + * continue to only ever support the minimal GHCB protocol version. + */ + if (vm_type == KVM_X86_SEV_ES_VM) + data.ghcb_version = GHCB_VERSION_MIN; + return __sev_guest_init(kvm, argp, &data, vm_type); } @@ -2897,6 +2919,7 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; struct kvm_vcpu *vcpu = &svm->vcpu; + struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info; u64 ghcb_info; int ret = 1; @@ -2907,7 +2930,7 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) switch (ghcb_info) { case GHCB_MSR_SEV_INFO_REQ: - set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX, + set_ghcb_msr(svm, GHCB_MSR_SEV_INFO((__u64)sev->ghcb_version, GHCB_VERSION_MIN, sev_enc_bit)); break; @@ -3268,11 +3291,14 @@ void sev_init_vmcb(struct vcpu_svm *svm) void sev_es_vcpu_reset(struct vcpu_svm *svm) { + struct kvm_vcpu *vcpu = &svm->vcpu; + struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info; + /* * Set the GHCB MSR value as per the GHCB specification when emulating * vCPU RESET for an SEV-ES guest. */ - set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX, + set_ghcb_msr(svm, GHCB_MSR_SEV_INFO((__u64)sev->ghcb_version, GHCB_VERSION_MIN, sev_enc_bit)); } diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 6fd0f5862681..9ae0c57c7d20 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -87,6 +87,7 @@ struct kvm_sev_info { struct list_head regions_list; /* List of registered regions */ u64 ap_jump_table; /* SEV-ES AP Jump Table address */ u64 vmsa_features; + u16 ghcb_version; /* Highest guest GHCB protocol version allowed */ struct kvm *enc_context_owner; /* Owner of copied encryption context */ struct list_head mirror_vms; /* List of VMs mirroring */ struct list_head mirror_entry; /* Use as a list entry of mirrors */ -- cgit v1.2.3