From bc3d7c5570a03ab45bde4bae83697c80900fb714 Mon Sep 17 00:00:00 2001 From: Peter Gonda Date: Thu, 7 Sep 2023 09:24:49 -0700 Subject: KVM: SVM: Update SEV-ES shutdown intercepts with more metadata Currently if an SEV-ES VM shuts down userspace sees KVM_RUN struct with only errno=EINVAL. This is a very limited amount of information to debug the situation. Instead return KVM_EXIT_SHUTDOWN to alert userspace the VM is shutting down and is not usable any further. Signed-off-by: Peter Gonda Suggested-by: Sean Christopherson Suggested-by: Tom Lendacky Cc: Paolo Bonzini Cc: Sean Christopherson Cc: Tom Lendacky Cc: Joerg Roedel Cc: Borislav Petkov Cc: x86@kernel.org Cc: kvm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Tom Lendacky Link: https://lore.kernel.org/r/20230907162449.1739785-1-pgonda@google.com [sean: tweak changelog] Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'arch/x86/kvm/svm/svm.c') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 9507df93f410..f36a6d819280 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2203,12 +2203,6 @@ static int shutdown_interception(struct kvm_vcpu *vcpu) struct kvm_run *kvm_run = vcpu->run; struct vcpu_svm *svm = to_svm(vcpu); - /* - * The VM save area has already been encrypted so it - * cannot be reinitialized - just terminate. - */ - if (sev_es_guest(vcpu->kvm)) - return -EINVAL; /* * VMCB is undefined after a SHUTDOWN intercept. INIT the vCPU to put @@ -2217,9 +2211,14 @@ static int shutdown_interception(struct kvm_vcpu *vcpu) * userspace. At a platform view, INIT is acceptable behavior as * there exist bare metal platforms that automatically INIT the CPU * in response to shutdown. + * + * The VM save area for SEV-ES guests has already been encrypted so it + * cannot be reinitialized, i.e. synthesizing INIT is futile. */ - clear_page(svm->vmcb); - kvm_vcpu_reset(vcpu, true); + if (!sev_es_guest(vcpu->kvm)) { + clear_page(svm->vmcb); + kvm_vcpu_reset(vcpu, true); + } kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; return 0; -- cgit v1.2.3 From aeb904f6b9f1de588cf3130dc8a2c458b236704e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 24 Aug 2023 18:36:20 -0700 Subject: KVM: x86: Refactor can_emulate_instruction() return to be more expressive Refactor and rename can_emulate_instruction() to allow vendor code to return more than true/false, e.g. to explicitly differentiate between "retry", "fault", and "unhandleable". For now, just do the plumbing, a future patch will expand SVM's implementation to signal outright failure if KVM attempts EMULTYPE_SKIP on an SEV guest. No functional change intended (or rather, none that are visible to the guest or userspace). Link: https://lore.kernel.org/r/20230825013621.2845700-4-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/kvm-x86-ops.h | 2 +- arch/x86/include/asm/kvm_host.h | 4 ++-- arch/x86/kvm/svm/svm.c | 31 +++++++++++++++++-------------- arch/x86/kvm/vmx/vmx.c | 12 ++++++------ arch/x86/kvm/x86.c | 15 +++++++++------ 5 files changed, 35 insertions(+), 29 deletions(-) (limited to 'arch/x86/kvm/svm/svm.c') diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index e3054e3e46d5..ee2404a559af 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -126,7 +126,7 @@ KVM_X86_OP_OPTIONAL(vm_copy_enc_context_from) KVM_X86_OP_OPTIONAL(vm_move_enc_context_from) KVM_X86_OP_OPTIONAL(guest_memory_reclaimed) KVM_X86_OP(get_msr_feature) -KVM_X86_OP(can_emulate_instruction) +KVM_X86_OP(check_emulate_instruction) KVM_X86_OP(apic_init_signal_blocked) KVM_X86_OP_OPTIONAL(enable_l2_tlb_flush) KVM_X86_OP_OPTIONAL(migrate_timers) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 17715cb8731d..89583b410527 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1734,8 +1734,8 @@ struct kvm_x86_ops { int (*get_msr_feature)(struct kvm_msr_entry *entry); - bool (*can_emulate_instruction)(struct kvm_vcpu *vcpu, int emul_type, - void *insn, int insn_len); + int (*check_emulate_instruction)(struct kvm_vcpu *vcpu, int emul_type, + void *insn, int insn_len); bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu); int (*enable_l2_tlb_flush)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index f36a6d819280..c4e700f945f8 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -364,8 +364,8 @@ static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK; } -static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, - void *insn, int insn_len); +static int svm_check_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, + void *insn, int insn_len); static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu, bool commit_side_effects) @@ -391,7 +391,7 @@ static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu, * right thing and treats "can't emulate" as outright failure * for EMULTYPE_SKIP. */ - if (!svm_can_emulate_instruction(vcpu, EMULTYPE_SKIP, NULL, 0)) + if (svm_check_emulate_instruction(vcpu, EMULTYPE_SKIP, NULL, 0) != X86EMUL_CONTINUE) return 0; if (unlikely(!commit_side_effects)) @@ -4727,15 +4727,15 @@ static void svm_enable_smi_window(struct kvm_vcpu *vcpu) } #endif -static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, - void *insn, int insn_len) +static int svm_check_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, + void *insn, int insn_len) { bool smep, smap, is_user; u64 error_code; /* Emulation is always possible when KVM has access to all guest state. */ if (!sev_guest(vcpu->kvm)) - return true; + return X86EMUL_CONTINUE; /* #UD and #GP should never be intercepted for SEV guests. */ WARN_ON_ONCE(emul_type & (EMULTYPE_TRAP_UD | @@ -4747,14 +4747,14 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, * to guest register state. */ if (sev_es_guest(vcpu->kvm)) - return false; + return X86EMUL_RETRY_INSTR; /* * Emulation is possible if the instruction is already decoded, e.g. * when completing I/O after returning from userspace. */ if (emul_type & EMULTYPE_NO_DECODE) - return true; + return X86EMUL_CONTINUE; /* * Emulation is possible for SEV guests if and only if a prefilled @@ -4780,9 +4780,11 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, * success (and in practice it will work the vast majority of the time). */ if (unlikely(!insn)) { - if (!(emul_type & EMULTYPE_SKIP)) - kvm_queue_exception(vcpu, UD_VECTOR); - return false; + if (emul_type & EMULTYPE_SKIP) + return X86EMUL_RETRY_INSTR; + + kvm_queue_exception(vcpu, UD_VECTOR); + return X86EMUL_PROPAGATE_FAULT; } /* @@ -4793,7 +4795,7 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, * table used to translate CS:RIP resides in emulated MMIO. */ if (likely(insn_len)) - return true; + return X86EMUL_CONTINUE; /* * Detect and workaround Errata 1096 Fam_17h_00_0Fh. @@ -4851,6 +4853,7 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, kvm_inject_gp(vcpu, 0); else kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + return X86EMUL_PROPAGATE_FAULT; } resume_guest: @@ -4868,7 +4871,7 @@ resume_guest: * doesn't explicitly define "ignored", i.e. doing nothing and letting * the guest spin is technically "ignoring" the access. */ - return false; + return X86EMUL_RETRY_INSTR; } static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu) @@ -5028,7 +5031,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .vm_copy_enc_context_from = sev_vm_copy_enc_context_from, .vm_move_enc_context_from = sev_vm_move_enc_context_from, - .can_emulate_instruction = svm_can_emulate_instruction, + .check_emulate_instruction = svm_check_emulate_instruction, .apic_init_signal_blocked = svm_apic_init_signal_blocked, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 72e3943f3693..4e453ba28320 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1657,8 +1657,8 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data) return 0; } -static bool vmx_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, - void *insn, int insn_len) +static int vmx_check_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, + void *insn, int insn_len) { /* * Emulation of instructions in SGX enclaves is impossible as RIP does @@ -1669,9 +1669,9 @@ static bool vmx_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, */ if (to_vmx(vcpu)->exit_reason.enclave_mode) { kvm_queue_exception(vcpu, UD_VECTOR); - return false; + return X86EMUL_PROPAGATE_FAULT; } - return true; + return X86EMUL_CONTINUE; } static int skip_emulated_instruction(struct kvm_vcpu *vcpu) @@ -5792,7 +5792,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) { gpa_t gpa; - if (!vmx_can_emulate_instruction(vcpu, EMULTYPE_PF, NULL, 0)) + if (vmx_check_emulate_instruction(vcpu, EMULTYPE_PF, NULL, 0)) return 1; /* @@ -8341,7 +8341,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .enable_smi_window = vmx_enable_smi_window, #endif - .can_emulate_instruction = vmx_can_emulate_instruction, + .check_emulate_instruction = vmx_check_emulate_instruction, .apic_init_signal_blocked = vmx_apic_init_signal_blocked, .migrate_timers = vmx_migrate_timers, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9f18b06bbda6..104e6b4520a9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7474,11 +7474,11 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val, } EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system); -static int kvm_can_emulate_insn(struct kvm_vcpu *vcpu, int emul_type, - void *insn, int insn_len) +static int kvm_check_emulate_insn(struct kvm_vcpu *vcpu, int emul_type, + void *insn, int insn_len) { - return static_call(kvm_x86_can_emulate_instruction)(vcpu, emul_type, - insn, insn_len); + return static_call(kvm_x86_check_emulate_instruction)(vcpu, emul_type, + insn, insn_len); } int handle_ud(struct kvm_vcpu *vcpu) @@ -7488,8 +7488,10 @@ int handle_ud(struct kvm_vcpu *vcpu) int emul_type = EMULTYPE_TRAP_UD; char sig[5]; /* ud2; .ascii "kvm" */ struct x86_exception e; + int r; - if (unlikely(!kvm_can_emulate_insn(vcpu, emul_type, NULL, 0))) + r = kvm_check_emulate_insn(vcpu, emul_type, NULL, 0); + if (r != X86EMUL_CONTINUE) return 1; if (fep_flags && @@ -8871,7 +8873,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; bool writeback = true; - if (unlikely(!kvm_can_emulate_insn(vcpu, emulation_type, insn, insn_len))) + r = kvm_check_emulate_insn(vcpu, emulation_type, insn, insn_len); + if (r != X86EMUL_CONTINUE) return 1; vcpu->arch.l1tf_flush_l1d = true; -- cgit v1.2.3 From 00682995409696866fe43984c74c8688bdf8f0a5 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 24 Aug 2023 18:36:21 -0700 Subject: KVM: SVM: Treat all "skip" emulation for SEV guests as outright failures Treat EMULTYPE_SKIP failures on SEV guests as unhandleable emulation instead of simply resuming the guest, and drop the hack-a-fix which effects that behavior for the INT3/INTO injection path. If KVM can't skip an instruction for which KVM has already done partial emulation, resuming the guest is undesirable as doing so may corrupt guest state. Link: https://lore.kernel.org/r/20230825013621.2845700-5-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 12 +----------- arch/x86/kvm/x86.c | 9 +++++++-- 2 files changed, 8 insertions(+), 13 deletions(-) (limited to 'arch/x86/kvm/svm/svm.c') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index c4e700f945f8..b7472ad183b9 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -364,8 +364,6 @@ static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK; } -static int svm_check_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, - void *insn, int insn_len); static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu, bool commit_side_effects) @@ -386,14 +384,6 @@ static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu, } if (!svm->next_rip) { - /* - * FIXME: Drop this when kvm_emulate_instruction() does the - * right thing and treats "can't emulate" as outright failure - * for EMULTYPE_SKIP. - */ - if (svm_check_emulate_instruction(vcpu, EMULTYPE_SKIP, NULL, 0) != X86EMUL_CONTINUE) - return 0; - if (unlikely(!commit_side_effects)) old_rflags = svm->vmcb->save.rflags; @@ -4781,7 +4771,7 @@ static int svm_check_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, */ if (unlikely(!insn)) { if (emul_type & EMULTYPE_SKIP) - return X86EMUL_RETRY_INSTR; + return X86EMUL_UNHANDLEABLE; kvm_queue_exception(vcpu, UD_VECTOR); return X86EMUL_PROPAGATE_FAULT; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 104e6b4520a9..cc7d29e9104b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8874,8 +8874,13 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, bool writeback = true; r = kvm_check_emulate_insn(vcpu, emulation_type, insn, insn_len); - if (r != X86EMUL_CONTINUE) - return 1; + if (r != X86EMUL_CONTINUE) { + if (r == X86EMUL_RETRY_INSTR || r == X86EMUL_PROPAGATE_FAULT) + return 1; + + WARN_ON_ONCE(r != X86EMUL_UNHANDLEABLE); + return handle_emulation_failure(vcpu, emulation_type); + } vcpu->arch.l1tf_flush_l1d = true; -- cgit v1.2.3 From 26951ec8623e915823985e86d2c428213f110659 Mon Sep 17 00:00:00 2001 From: Peng Hao Date: Fri, 13 Oct 2023 19:30:20 +0800 Subject: KVM: x86: Use octal for file permission Convert all module params to octal permissions to improve code readability and to make checkpatch happy: WARNING: Symbolic permissions 'S_IRUGO' are not preferred. Consider using octal permissions '0444'. Signed-off-by: Peng Hao Link: https://lore.kernel.org/r/20231013113020.77523-1-flyingpeng@tencent.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 2 +- arch/x86/kvm/vmx/vmx.c | 20 ++++++++++---------- arch/x86/kvm/x86.c | 18 +++++++++--------- 3 files changed, 20 insertions(+), 20 deletions(-) (limited to 'arch/x86/kvm/svm/svm.c') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 9507df93f410..3872c4f3689c 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -199,7 +199,7 @@ module_param_named(npt, npt_enabled, bool, 0444); /* allow nested virtualization in KVM/SVM */ static int nested = true; -module_param(nested, int, S_IRUGO); +module_param(nested, int, 0444); /* enable/disable Next RIP Save */ int nrips = true; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 72e3943f3693..f17cb5f84f4e 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -82,28 +82,28 @@ bool __read_mostly enable_vpid = 1; module_param_named(vpid, enable_vpid, bool, 0444); static bool __read_mostly enable_vnmi = 1; -module_param_named(vnmi, enable_vnmi, bool, S_IRUGO); +module_param_named(vnmi, enable_vnmi, bool, 0444); bool __read_mostly flexpriority_enabled = 1; -module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO); +module_param_named(flexpriority, flexpriority_enabled, bool, 0444); bool __read_mostly enable_ept = 1; -module_param_named(ept, enable_ept, bool, S_IRUGO); +module_param_named(ept, enable_ept, bool, 0444); bool __read_mostly enable_unrestricted_guest = 1; module_param_named(unrestricted_guest, - enable_unrestricted_guest, bool, S_IRUGO); + enable_unrestricted_guest, bool, 0444); bool __read_mostly enable_ept_ad_bits = 1; -module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO); +module_param_named(eptad, enable_ept_ad_bits, bool, 0444); static bool __read_mostly emulate_invalid_guest_state = true; -module_param(emulate_invalid_guest_state, bool, S_IRUGO); +module_param(emulate_invalid_guest_state, bool, 0444); static bool __read_mostly fasteoi = 1; -module_param(fasteoi, bool, S_IRUGO); +module_param(fasteoi, bool, 0444); -module_param(enable_apicv, bool, S_IRUGO); +module_param(enable_apicv, bool, 0444); bool __read_mostly enable_ipiv = true; module_param(enable_ipiv, bool, 0444); @@ -114,10 +114,10 @@ module_param(enable_ipiv, bool, 0444); * use VMX instructions. */ static bool __read_mostly nested = 1; -module_param(nested, bool, S_IRUGO); +module_param(nested, bool, 0444); bool __read_mostly enable_pml = 1; -module_param_named(pml, enable_pml, bool, S_IRUGO); +module_param_named(pml, enable_pml, bool, 0444); static bool __read_mostly error_on_inconsistent_vmcs_config = true; module_param(error_on_inconsistent_vmcs_config, bool, 0444); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 756bdc1127ff..94ccad95f7e0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -145,21 +145,21 @@ EXPORT_STATIC_CALL_GPL(kvm_x86_get_cs_db_l_bits); EXPORT_STATIC_CALL_GPL(kvm_x86_cache_reg); static bool __read_mostly ignore_msrs = 0; -module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); +module_param(ignore_msrs, bool, 0644); bool __read_mostly report_ignored_msrs = true; -module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR); +module_param(report_ignored_msrs, bool, 0644); EXPORT_SYMBOL_GPL(report_ignored_msrs); unsigned int min_timer_period_us = 200; -module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); +module_param(min_timer_period_us, uint, 0644); static bool __read_mostly kvmclock_periodic_sync = true; -module_param(kvmclock_periodic_sync, bool, S_IRUGO); +module_param(kvmclock_periodic_sync, bool, 0444); /* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */ static u32 __read_mostly tsc_tolerance_ppm = 250; -module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR); +module_param(tsc_tolerance_ppm, uint, 0644); /* * lapic timer advance (tscdeadline mode only) in nanoseconds. '-1' enables @@ -168,13 +168,13 @@ module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR); * tuning, i.e. allows privileged userspace to set an exact advancement time. */ static int __read_mostly lapic_timer_advance_ns = -1; -module_param(lapic_timer_advance_ns, int, S_IRUGO | S_IWUSR); +module_param(lapic_timer_advance_ns, int, 0644); static bool __read_mostly vector_hashing = true; -module_param(vector_hashing, bool, S_IRUGO); +module_param(vector_hashing, bool, 0444); bool __read_mostly enable_vmware_backdoor = false; -module_param(enable_vmware_backdoor, bool, S_IRUGO); +module_param(enable_vmware_backdoor, bool, 0444); EXPORT_SYMBOL_GPL(enable_vmware_backdoor); /* @@ -186,7 +186,7 @@ static int __read_mostly force_emulation_prefix; module_param(force_emulation_prefix, int, 0644); int __read_mostly pi_inject_timer = -1; -module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR); +module_param(pi_inject_timer, bint, 0644); /* Enable/disable PMU virtualization */ bool __read_mostly enable_pmu = true; -- cgit v1.2.3