From 1db9d9ded771389aae5760d20dd1bac113451b9c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 21 Oct 2020 20:48:02 +0100 Subject: KVM: arm64: Add kimg_hyp_va() helper KVM/arm64 is so far unable to deal with function pointers, as the compiler will generate the kernel's runtime VA, and not the linear mapping address, meaning that kern_hyp_va() will give the wrong result. We so far have been able to use PC-relative addressing, but that's not always easy to use, and prevents the implementation of things like the mapping of an index to a pointer. To allow this, provide a new helper that computes the required translation from the kernel image to the HYP VA space. Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_mmu.h | 18 +++++++++++++++ arch/arm64/kvm/va_layout.c | 50 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 331394306cce..608c3a83e740 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -98,6 +98,24 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v) #define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v)))) +static __always_inline unsigned long __kimg_hyp_va(unsigned long v) +{ + unsigned long offset; + + asm volatile(ALTERNATIVE_CB("movz %0, #0\n" + "movk %0, #0, lsl #16\n" + "movk %0, #0, lsl #32\n" + "movk %0, #0, lsl #48\n", + kvm_update_kimg_phys_offset) + : "=r" (offset)); + + return __kern_hyp_va((v - offset) | PAGE_OFFSET); +} + +#define kimg_fn_hyp_va(v) ((typeof(*v))(__kimg_hyp_va((unsigned long)(v)))) + +#define kimg_fn_ptr(x) (typeof(x) **)(x) + /* * We currently support using a VM-specified IPA size. For backward * compatibility, the default IPA size is fixed to 40bits. diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index e0404bcab019..1d00d2cb93fd 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -11,6 +11,7 @@ #include #include #include +#include /* * The LSB of the HYP VA tag @@ -201,3 +202,52 @@ void kvm_patch_vector_branch(struct alt_instr *alt, AARCH64_INSN_BRANCH_NOLINK); *updptr++ = cpu_to_le32(insn); } + +static void generate_mov_q(u64 val, __le32 *origptr, __le32 *updptr, int nr_inst) +{ + u32 insn, oinsn, rd; + + BUG_ON(nr_inst != 4); + + /* Compute target register */ + oinsn = le32_to_cpu(*origptr); + rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, oinsn); + + /* movz rd, #(val & 0xffff) */ + insn = aarch64_insn_gen_movewide(rd, + (u16)val, + 0, + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_MOVEWIDE_ZERO); + *updptr++ = cpu_to_le32(insn); + + /* movk rd, #((val >> 16) & 0xffff), lsl #16 */ + insn = aarch64_insn_gen_movewide(rd, + (u16)(val >> 16), + 16, + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_MOVEWIDE_KEEP); + *updptr++ = cpu_to_le32(insn); + + /* movk rd, #((val >> 32) & 0xffff), lsl #32 */ + insn = aarch64_insn_gen_movewide(rd, + (u16)(val >> 32), + 32, + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_MOVEWIDE_KEEP); + *updptr++ = cpu_to_le32(insn); + + /* movk rd, #((val >> 48) & 0xffff), lsl #48 */ + insn = aarch64_insn_gen_movewide(rd, + (u16)(val >> 48), + 48, + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_MOVEWIDE_KEEP); + *updptr++ = cpu_to_le32(insn); +} + +void kvm_update_kimg_phys_offset(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst) +{ + generate_mov_q(kimage_voffset + PHYS_OFFSET, origptr, updptr, nr_inst); +} -- cgit v1.2.3 From 7cd0aaafaadcaaf280887f8b478393a9fcfc69e3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 11 Oct 2020 18:17:38 +0100 Subject: KVM: arm64: Turn host HVC handling into a dispatch table Now that we can use function pointer, use a dispatch table to call the individual HVC handlers, leading to more maintainable code. Further improvements include helpers to declare the mapping of local variables to values passed in the host context. Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier --- arch/arm64/kernel/image-vars.h | 1 + arch/arm64/kvm/hyp/nvhe/hyp-main.c | 228 ++++++++++++++++++++++--------------- 2 files changed, 135 insertions(+), 94 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index c615b285ff5b..e8c194f8de88 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -64,6 +64,7 @@ __efistub__ctype = _ctype; /* Alternative callbacks for init-time patching of nVHE hyp code. */ KVM_NVHE_ALIAS(kvm_patch_vector_branch); KVM_NVHE_ALIAS(kvm_update_va_mask); +KVM_NVHE_ALIAS(kvm_update_kimg_phys_offset); /* Global kernel state accessed by nVHE hyp code. */ KVM_NVHE_ALIAS(kvm_vgic_global_state); diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index e2eafe2c93af..c0543b2e760e 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -12,106 +12,146 @@ #include #include -#include - -static void handle_host_hcall(unsigned long func_id, - struct kvm_cpu_context *host_ctxt) -{ - unsigned long ret = 0; - - switch (func_id) { - case KVM_HOST_SMCCC_FUNC(__kvm_vcpu_run): { - unsigned long r1 = host_ctxt->regs.regs[1]; - struct kvm_vcpu *vcpu = (struct kvm_vcpu *)r1; - - ret = __kvm_vcpu_run(kern_hyp_va(vcpu)); - break; - } - case KVM_HOST_SMCCC_FUNC(__kvm_flush_vm_context): - __kvm_flush_vm_context(); - break; - case KVM_HOST_SMCCC_FUNC(__kvm_tlb_flush_vmid_ipa): { - unsigned long r1 = host_ctxt->regs.regs[1]; - struct kvm_s2_mmu *mmu = (struct kvm_s2_mmu *)r1; - phys_addr_t ipa = host_ctxt->regs.regs[2]; - int level = host_ctxt->regs.regs[3]; - - __kvm_tlb_flush_vmid_ipa(kern_hyp_va(mmu), ipa, level); - break; - } - case KVM_HOST_SMCCC_FUNC(__kvm_tlb_flush_vmid): { - unsigned long r1 = host_ctxt->regs.regs[1]; - struct kvm_s2_mmu *mmu = (struct kvm_s2_mmu *)r1; - - __kvm_tlb_flush_vmid(kern_hyp_va(mmu)); - break; - } - case KVM_HOST_SMCCC_FUNC(__kvm_tlb_flush_local_vmid): { - unsigned long r1 = host_ctxt->regs.regs[1]; - struct kvm_s2_mmu *mmu = (struct kvm_s2_mmu *)r1; - - __kvm_tlb_flush_local_vmid(kern_hyp_va(mmu)); - break; - } - case KVM_HOST_SMCCC_FUNC(__kvm_timer_set_cntvoff): { - u64 cntvoff = host_ctxt->regs.regs[1]; - - __kvm_timer_set_cntvoff(cntvoff); - break; - } - case KVM_HOST_SMCCC_FUNC(__kvm_enable_ssbs): - __kvm_enable_ssbs(); - break; - case KVM_HOST_SMCCC_FUNC(__vgic_v3_get_ich_vtr_el2): - ret = __vgic_v3_get_ich_vtr_el2(); - break; - case KVM_HOST_SMCCC_FUNC(__vgic_v3_read_vmcr): - ret = __vgic_v3_read_vmcr(); - break; - case KVM_HOST_SMCCC_FUNC(__vgic_v3_write_vmcr): { - u32 vmcr = host_ctxt->regs.regs[1]; - - __vgic_v3_write_vmcr(vmcr); - break; - } - case KVM_HOST_SMCCC_FUNC(__vgic_v3_init_lrs): - __vgic_v3_init_lrs(); - break; - case KVM_HOST_SMCCC_FUNC(__kvm_get_mdcr_el2): - ret = __kvm_get_mdcr_el2(); - break; - case KVM_HOST_SMCCC_FUNC(__vgic_v3_save_aprs): { - unsigned long r1 = host_ctxt->regs.regs[1]; - struct vgic_v3_cpu_if *cpu_if = (struct vgic_v3_cpu_if *)r1; - - __vgic_v3_save_aprs(kern_hyp_va(cpu_if)); - break; - } - case KVM_HOST_SMCCC_FUNC(__vgic_v3_restore_aprs): { - unsigned long r1 = host_ctxt->regs.regs[1]; - struct vgic_v3_cpu_if *cpu_if = (struct vgic_v3_cpu_if *)r1; - - __vgic_v3_restore_aprs(kern_hyp_va(cpu_if)); - break; - } - default: - /* Invalid host HVC. */ - host_ctxt->regs.regs[0] = SMCCC_RET_NOT_SUPPORTED; - return; - } - - host_ctxt->regs.regs[0] = SMCCC_RET_SUCCESS; - host_ctxt->regs.regs[1] = ret; +#define cpu_reg(ctxt, r) (ctxt)->regs.regs[r] +#define DECLARE_REG(type, name, ctxt, reg) \ + type name = (type)cpu_reg(ctxt, (reg)) + +static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1); + + cpu_reg(host_ctxt, 1) = __kvm_vcpu_run(kern_hyp_va(vcpu)); +} + +static void handle___kvm_flush_vm_context(struct kvm_cpu_context *host_ctxt) +{ + __kvm_flush_vm_context(); +} + +static void handle___kvm_tlb_flush_vmid_ipa(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1); + DECLARE_REG(phys_addr_t, ipa, host_ctxt, 2); + DECLARE_REG(int, level, host_ctxt, 3); + + __kvm_tlb_flush_vmid_ipa(kern_hyp_va(mmu), ipa, level); +} + +static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1); + + __kvm_tlb_flush_vmid(kern_hyp_va(mmu)); +} + +static void handle___kvm_tlb_flush_local_vmid(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1); + + __kvm_tlb_flush_local_vmid(kern_hyp_va(mmu)); +} + +static void handle___kvm_timer_set_cntvoff(struct kvm_cpu_context *host_ctxt) +{ + __kvm_timer_set_cntvoff(cpu_reg(host_ctxt, 1)); +} + +static void handle___kvm_enable_ssbs(struct kvm_cpu_context *host_ctxt) +{ + __kvm_enable_ssbs(); +} + +static void handle___vgic_v3_get_ich_vtr_el2(struct kvm_cpu_context *host_ctxt) +{ + cpu_reg(host_ctxt, 1) = __vgic_v3_get_ich_vtr_el2(); +} + +static void handle___vgic_v3_read_vmcr(struct kvm_cpu_context *host_ctxt) +{ + cpu_reg(host_ctxt, 1) = __vgic_v3_read_vmcr(); +} + +static void handle___vgic_v3_write_vmcr(struct kvm_cpu_context *host_ctxt) +{ + __vgic_v3_write_vmcr(cpu_reg(host_ctxt, 1)); +} + +static void handle___vgic_v3_init_lrs(struct kvm_cpu_context *host_ctxt) +{ + __vgic_v3_init_lrs(); +} + +static void handle___kvm_get_mdcr_el2(struct kvm_cpu_context *host_ctxt) +{ + cpu_reg(host_ctxt, 1) = __kvm_get_mdcr_el2(); +} + +static void handle___vgic_v3_save_aprs(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1); + + __vgic_v3_save_aprs(kern_hyp_va(cpu_if)); +} + +static void handle___vgic_v3_restore_aprs(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1); + + __vgic_v3_restore_aprs(kern_hyp_va(cpu_if)); +} + +typedef void (*hcall_t)(struct kvm_cpu_context *); + +#define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = kimg_fn_ptr(handle_##x) + +static const hcall_t *host_hcall[] = { + HANDLE_FUNC(__kvm_vcpu_run), + HANDLE_FUNC(__kvm_flush_vm_context), + HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa), + HANDLE_FUNC(__kvm_tlb_flush_vmid), + HANDLE_FUNC(__kvm_tlb_flush_local_vmid), + HANDLE_FUNC(__kvm_timer_set_cntvoff), + HANDLE_FUNC(__kvm_enable_ssbs), + HANDLE_FUNC(__vgic_v3_get_ich_vtr_el2), + HANDLE_FUNC(__vgic_v3_read_vmcr), + HANDLE_FUNC(__vgic_v3_write_vmcr), + HANDLE_FUNC(__vgic_v3_init_lrs), + HANDLE_FUNC(__kvm_get_mdcr_el2), + HANDLE_FUNC(__vgic_v3_save_aprs), + HANDLE_FUNC(__vgic_v3_restore_aprs), +}; + +static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(unsigned long, id, host_ctxt, 0); + const hcall_t *kfn; + hcall_t hfn; + + id -= KVM_HOST_SMCCC_ID(0); + + if (unlikely(id >= ARRAY_SIZE(host_hcall))) + goto inval; + + kfn = host_hcall[id]; + if (unlikely(!kfn)) + goto inval; + + cpu_reg(host_ctxt, 0) = SMCCC_RET_SUCCESS; + + hfn = kimg_fn_hyp_va(kfn); + hfn(host_ctxt); + + return; +inval: + cpu_reg(host_ctxt, 0) = SMCCC_RET_NOT_SUPPORTED; } void handle_trap(struct kvm_cpu_context *host_ctxt) { u64 esr = read_sysreg_el2(SYS_ESR); - unsigned long func_id; - if (ESR_ELx_EC(esr) != ESR_ELx_EC_HVC64) + if (unlikely(ESR_ELx_EC(esr) != ESR_ELx_EC_HVC64)) hyp_panic(); - func_id = host_ctxt->regs.regs[0]; - handle_host_hcall(func_id, host_ctxt); + handle_host_hcall(host_ctxt); } -- cgit v1.2.3 From c22588c99635ac4dace0ce2d55c1e2dc4f13cb54 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Oct 2020 11:56:50 +0100 Subject: KVM: arm64: Don't adjust PC on SError during SMC trap On SMC trap, the prefered return address is set to that of the SMC instruction itself. It is thus wrong to try and roll it back when an SError occurs while trapping on SMC. It is still necessary on HVC though, as HVC doesn't cause a trap, and sets ELR to returning *after* the HVC. It also became apparent that there is no 16bit encoding for an AArch32 HVC instruction, meaning that the displacement is always 4 bytes, no matter what the ISA is. Take this opportunity to simplify it. Acked-by: Mark Rutland Signed-off-by: Marc Zyngier --- arch/arm64/kvm/handle_exit.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 5d690d60ccad..79a720657c47 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -245,15 +245,15 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index) u8 esr_ec = ESR_ELx_EC(kvm_vcpu_get_esr(vcpu)); /* - * HVC/SMC already have an adjusted PC, which we need - * to correct in order to return to after having - * injected the SError. + * HVC already have an adjusted PC, which we need to + * correct in order to return to after having injected + * the SError. + * + * SMC, on the other hand, is *trapped*, meaning its + * preferred return address is the SMC itself. */ - if (esr_ec == ESR_ELx_EC_HVC32 || esr_ec == ESR_ELx_EC_HVC64 || - esr_ec == ESR_ELx_EC_SMC32 || esr_ec == ESR_ELx_EC_SMC64) { - u32 adj = kvm_vcpu_trap_il_is32bit(vcpu) ? 4 : 2; - *vcpu_pc(vcpu) -= adj; - } + if (esr_ec == ESR_ELx_EC_HVC32 || esr_ec == ESR_ELx_EC_HVC64) + *vcpu_pc(vcpu) -= 4; return 1; } -- cgit v1.2.3 From 6ddbc281e2aa21c5917e015a373958455f5eb3c1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Oct 2020 11:14:38 +0100 Subject: KVM: arm64: Move kvm_vcpu_trap_il_is32bit into kvm_skip_instr32() There is no need to feed the result of kvm_vcpu_trap_il_is32bit() to kvm_skip_instr(), as only AArch32 has a variable length ISA, and this helper can equally be called from kvm_skip_instr32(), reducing the complexity at all the call sites. Acked-by: Mark Rutland Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_emulate.h | 8 ++++---- arch/arm64/kvm/handle_exit.c | 6 +++--- arch/arm64/kvm/hyp/aarch32.c | 4 ++-- arch/arm64/kvm/mmio.c | 2 +- arch/arm64/kvm/mmu.c | 2 +- arch/arm64/kvm/sys_regs.c | 2 +- 6 files changed, 12 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 5ef2669ccd6c..0864f425547d 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -26,7 +26,7 @@ unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu); void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v); bool kvm_condition_valid32(const struct kvm_vcpu *vcpu); -void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr); +void kvm_skip_instr32(struct kvm_vcpu *vcpu); void kvm_inject_undefined(struct kvm_vcpu *vcpu); void kvm_inject_vabt(struct kvm_vcpu *vcpu); @@ -472,10 +472,10 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, return data; /* Leave LE untouched */ } -static __always_inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr) +static __always_inline void kvm_skip_instr(struct kvm_vcpu *vcpu) { if (vcpu_mode_is_32bit(vcpu)) { - kvm_skip_instr32(vcpu, is_wide_instr); + kvm_skip_instr32(vcpu); } else { *vcpu_pc(vcpu) += 4; *vcpu_cpsr(vcpu) &= ~PSR_BTYPE_MASK; @@ -494,7 +494,7 @@ static __always_inline void __kvm_skip_instr(struct kvm_vcpu *vcpu) *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); vcpu_gp_regs(vcpu)->pstate = read_sysreg_el2(SYS_SPSR); - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + kvm_skip_instr(vcpu); write_sysreg_el2(vcpu_gp_regs(vcpu)->pstate, SYS_SPSR); write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR); diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 79a720657c47..30bf8e22df54 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -61,7 +61,7 @@ static int handle_smc(struct kvm_vcpu *vcpu) * otherwise return to the same address... */ vcpu_set_reg(vcpu, 0, ~0UL); - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + kvm_skip_instr(vcpu); return 1; } @@ -100,7 +100,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu) kvm_clear_request(KVM_REQ_UNHALT, vcpu); } - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + kvm_skip_instr(vcpu); return 1; } @@ -221,7 +221,7 @@ static int handle_trap_exceptions(struct kvm_vcpu *vcpu) * that fail their condition code check" */ if (!kvm_condition_valid(vcpu)) { - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + kvm_skip_instr(vcpu); handled = 1; } else { exit_handle_fn exit_handler; diff --git a/arch/arm64/kvm/hyp/aarch32.c b/arch/arm64/kvm/hyp/aarch32.c index ae56d8a4b382..f98cbe2626a1 100644 --- a/arch/arm64/kvm/hyp/aarch32.c +++ b/arch/arm64/kvm/hyp/aarch32.c @@ -123,13 +123,13 @@ static void kvm_adjust_itstate(struct kvm_vcpu *vcpu) * kvm_skip_instr - skip a trapped instruction and proceed to the next * @vcpu: The vcpu pointer */ -void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr) +void kvm_skip_instr32(struct kvm_vcpu *vcpu) { u32 pc = *vcpu_pc(vcpu); bool is_thumb; is_thumb = !!(*vcpu_cpsr(vcpu) & PSR_AA32_T_BIT); - if (is_thumb && !is_wide_instr) + if (is_thumb && !kvm_vcpu_trap_il_is32bit(vcpu)) pc += 2; else pc += 4; diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c index 6a2826f1bf5e..7e8eb32ae7d2 100644 --- a/arch/arm64/kvm/mmio.c +++ b/arch/arm64/kvm/mmio.c @@ -115,7 +115,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu) * The MMIO instruction is emulated and should not be re-executed * in the guest. */ - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + kvm_skip_instr(vcpu); return 0; } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 57972bdb213a..0bec07cf8d06 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1014,7 +1014,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) * cautious, and skip the instruction. */ if (kvm_is_error_hva(hva) && kvm_vcpu_dabt_is_cm(vcpu)) { - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + kvm_skip_instr(vcpu); ret = 1; goto out_unlock; } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index fb12d3ef423a..1232a814ca7f 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2199,7 +2199,7 @@ static void perform_access(struct kvm_vcpu *vcpu, /* Skip instruction if instructed so */ if (likely(r->access(vcpu, params, r))) - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + kvm_skip_instr(vcpu); } /* -- cgit v1.2.3 From cdb5e02ed133731f8a6676a389ed40ca303cab7c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 14 Oct 2020 09:29:27 +0100 Subject: KVM: arm64: Make kvm_skip_instr() and co private to HYP In an effort to remove the vcpu PC manipulations from EL1 on nVHE systems, move kvm_skip_instr() to be HYP-specific. EL1's intent to increment PC post emulation is now signalled via a flag in the vcpu structure. Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_emulate.h | 27 ++------------ arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/handle_exit.c | 6 ++-- arch/arm64/kvm/hyp/include/hyp/adjust_pc.h | 56 ++++++++++++++++++++++++++++++ arch/arm64/kvm/hyp/include/hyp/switch.h | 2 ++ arch/arm64/kvm/hyp/nvhe/switch.c | 3 ++ arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c | 2 ++ arch/arm64/kvm/hyp/vgic-v3-sr.c | 2 ++ arch/arm64/kvm/hyp/vhe/switch.c | 3 ++ arch/arm64/kvm/mmio.c | 2 +- arch/arm64/kvm/mmu.c | 2 +- arch/arm64/kvm/sys_regs.c | 2 +- 12 files changed, 77 insertions(+), 31 deletions(-) create mode 100644 arch/arm64/kvm/hyp/include/hyp/adjust_pc.h (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 0864f425547d..6d2b5d1aa7b3 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -472,32 +472,9 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, return data; /* Leave LE untouched */ } -static __always_inline void kvm_skip_instr(struct kvm_vcpu *vcpu) +static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu) { - if (vcpu_mode_is_32bit(vcpu)) { - kvm_skip_instr32(vcpu); - } else { - *vcpu_pc(vcpu) += 4; - *vcpu_cpsr(vcpu) &= ~PSR_BTYPE_MASK; - } - - /* advance the singlestep state machine */ - *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS; -} - -/* - * Skip an instruction which has been emulated at hyp while most guest sysregs - * are live. - */ -static __always_inline void __kvm_skip_instr(struct kvm_vcpu *vcpu) -{ - *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); - vcpu_gp_regs(vcpu)->pstate = read_sysreg_el2(SYS_SPSR); - - kvm_skip_instr(vcpu); - - write_sysreg_el2(vcpu_gp_regs(vcpu)->pstate, SYS_SPSR); - write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR); + vcpu->arch.flags |= KVM_ARM64_INCREMENT_PC; } #endif /* __ARM64_KVM_EMULATE_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 781d029b8aa8..7991a1c13254 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -407,6 +407,7 @@ struct kvm_vcpu_arch { #define KVM_ARM64_GUEST_HAS_SVE (1 << 5) /* SVE exposed to guest */ #define KVM_ARM64_VCPU_SVE_FINALIZED (1 << 6) /* SVE config completed */ #define KVM_ARM64_GUEST_HAS_PTRAUTH (1 << 7) /* PTRAUTH exposed to guest */ +#define KVM_ARM64_INCREMENT_PC (1 << 8) /* Increment PC */ #define vcpu_has_sve(vcpu) (system_supports_sve() && \ ((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_SVE)) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 30bf8e22df54..d4e00a864ee6 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -61,7 +61,7 @@ static int handle_smc(struct kvm_vcpu *vcpu) * otherwise return to the same address... */ vcpu_set_reg(vcpu, 0, ~0UL); - kvm_skip_instr(vcpu); + kvm_incr_pc(vcpu); return 1; } @@ -100,7 +100,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu) kvm_clear_request(KVM_REQ_UNHALT, vcpu); } - kvm_skip_instr(vcpu); + kvm_incr_pc(vcpu); return 1; } @@ -221,7 +221,7 @@ static int handle_trap_exceptions(struct kvm_vcpu *vcpu) * that fail their condition code check" */ if (!kvm_condition_valid(vcpu)) { - kvm_skip_instr(vcpu); + kvm_incr_pc(vcpu); handled = 1; } else { exit_handle_fn exit_handler; diff --git a/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h b/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h new file mode 100644 index 000000000000..d3043b07e78e --- /dev/null +++ b/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Guest PC manipulation helpers + * + * Copyright (C) 2012,2013 - ARM Ltd + * Copyright (C) 2020 - Google LLC + * Author: Marc Zyngier + */ + +#ifndef __ARM64_KVM_HYP_ADJUST_PC_H__ +#define __ARM64_KVM_HYP_ADJUST_PC_H__ + +#include +#include + +static inline void kvm_skip_instr(struct kvm_vcpu *vcpu) +{ + if (vcpu_mode_is_32bit(vcpu)) { + kvm_skip_instr32(vcpu); + } else { + *vcpu_pc(vcpu) += 4; + *vcpu_cpsr(vcpu) &= ~PSR_BTYPE_MASK; + } + + /* advance the singlestep state machine */ + *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS; +} + +/* + * Skip an instruction which has been emulated at hyp while most guest sysregs + * are live. + */ +static inline void __kvm_skip_instr(struct kvm_vcpu *vcpu) +{ + *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); + vcpu_gp_regs(vcpu)->pstate = read_sysreg_el2(SYS_SPSR); + + kvm_skip_instr(vcpu); + + write_sysreg_el2(vcpu_gp_regs(vcpu)->pstate, SYS_SPSR); + write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR); +} + +/* + * Adjust the guest PC on entry, depending on flags provided by EL1 + * for the purpose of emulation (MMIO, sysreg). + */ +static inline void __adjust_pc(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.flags & KVM_ARM64_INCREMENT_PC) { + kvm_skip_instr(vcpu); + vcpu->arch.flags &= ~KVM_ARM64_INCREMENT_PC; + } +} + +#endif diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 1f875a8f20c4..8b2328f62a07 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -7,6 +7,8 @@ #ifndef __ARM64_KVM_HYP_SWITCH_H__ #define __ARM64_KVM_HYP_SWITCH_H__ +#include + #include #include #include diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 8ae8160bc93a..3e50ff35aa4f 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -4,6 +4,7 @@ * Author: Marc Zyngier */ +#include #include #include @@ -189,6 +190,8 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) __sysreg_save_state_nvhe(host_ctxt); + __adjust_pc(vcpu); + /* * We must restore the 32-bit state before the sysregs, thanks * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c index bd1bab551d48..8f0585640241 100644 --- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c +++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c @@ -4,6 +4,8 @@ * Author: Marc Zyngier */ +#include + #include #include #include diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 452f4cacd674..80406f463c28 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -4,6 +4,8 @@ * Author: Marc Zyngier */ +#include + #include #include #include diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 62546e20b251..af8e940d0f03 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -4,6 +4,7 @@ * Author: Marc Zyngier */ +#include #include #include @@ -133,6 +134,8 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) __load_guest_stage2(vcpu->arch.hw_mmu); __activate_traps(vcpu); + __adjust_pc(vcpu); + sysreg_restore_guest_state_vhe(guest_ctxt); __debug_switch_to_guest(vcpu); diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c index 7e8eb32ae7d2..3e2d8ba11a02 100644 --- a/arch/arm64/kvm/mmio.c +++ b/arch/arm64/kvm/mmio.c @@ -115,7 +115,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu) * The MMIO instruction is emulated and should not be re-executed * in the guest. */ - kvm_skip_instr(vcpu); + kvm_incr_pc(vcpu); return 0; } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 0bec07cf8d06..2ecd0c5622a6 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1014,7 +1014,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) * cautious, and skip the instruction. */ if (kvm_is_error_hva(hva) && kvm_vcpu_dabt_is_cm(vcpu)) { - kvm_skip_instr(vcpu); + kvm_incr_pc(vcpu); ret = 1; goto out_unlock; } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 1232a814ca7f..6a6f06205ea7 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2199,7 +2199,7 @@ static void perform_access(struct kvm_vcpu *vcpu, /* Skip instruction if instructed so */ if (likely(r->access(vcpu, params, r))) - kvm_skip_instr(vcpu); + kvm_incr_pc(vcpu); } /* -- cgit v1.2.3 From defe21f49bc98b095300752aa1e19bb608f3e97d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 14 Oct 2020 12:12:45 +0100 Subject: KVM: arm64: Move PC rollback on SError to HYP Instead of handling the "PC rollback on SError during HVC" at EL1 (which requires disclosing PC to a potentially untrusted kernel), let's move this fixup to ... fixup_guest_exit(), which is where we do all fixups. Isn't that neat? Signed-off-by: Marc Zyngier --- arch/arm64/kvm/handle_exit.c | 17 ----------------- arch/arm64/kvm/hyp/include/hyp/switch.h | 15 +++++++++++++++ 2 files changed, 15 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index d4e00a864ee6..f79137ee4274 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -241,23 +241,6 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index) { struct kvm_run *run = vcpu->run; - if (ARM_SERROR_PENDING(exception_index)) { - u8 esr_ec = ESR_ELx_EC(kvm_vcpu_get_esr(vcpu)); - - /* - * HVC already have an adjusted PC, which we need to - * correct in order to return to after having injected - * the SError. - * - * SMC, on the other hand, is *trapped*, meaning its - * preferred return address is the SMC itself. - */ - if (esr_ec == ESR_ELx_EC_HVC32 || esr_ec == ESR_ELx_EC_HVC64) - *vcpu_pc(vcpu) -= 4; - - return 1; - } - exception_index = ARM_EXCEPTION_CODE(exception_index); switch (exception_index) { diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 8b2328f62a07..84473574c2e7 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -411,6 +411,21 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); + if (ARM_SERROR_PENDING(*exit_code)) { + u8 esr_ec = kvm_vcpu_trap_get_class(vcpu); + + /* + * HVC already have an adjusted PC, which we need to + * correct in order to return to after having injected + * the SError. + * + * SMC, on the other hand, is *trapped*, meaning its + * preferred return address is the SMC itself. + */ + if (esr_ec == ESR_ELx_EC_HVC32 || esr_ec == ESR_ELx_EC_HVC64) + write_sysreg_el2(read_sysreg_el2(SYS_ELR) - 4, SYS_ELR); + } + /* * We're using the raw exception code in order to only process * the trap if no SError is pending. We will come back to the -- cgit v1.2.3 From 21c810017cef75435be8b8f1da2110c6d1fd887b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 14 Oct 2020 19:36:11 +0100 Subject: KVM: arm64: Move VHE direct sysreg accessors into kvm_host.h As we are about to need to access system registers from the HYP code based on their internal encoding, move the direct sysreg accessors to a common include file, with a VHE-specific guard. No functionnal change. Acked-by: Mark Rutland Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 91 +++++++++++++++++++++++++++++++++++++++ arch/arm64/kvm/sys_regs.c | 81 ---------------------------------- 2 files changed, 91 insertions(+), 81 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 7991a1c13254..0672b3db6121 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -439,6 +439,97 @@ struct kvm_vcpu_arch { u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg); void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg); +static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) +{ + /* + * *** VHE ONLY *** + * + * System registers listed in the switch are not saved on every + * exit from the guest but are only saved on vcpu_put. + * + * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but + * should never be listed below, because the guest cannot modify its + * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's + * thread when emulating cross-VCPU communication. + */ + if (!has_vhe()) + return false; + + switch (reg) { + case CSSELR_EL1: *val = read_sysreg_s(SYS_CSSELR_EL1); break; + case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12); break; + case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12); break; + case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break; + case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12); break; + case TCR_EL1: *val = read_sysreg_s(SYS_TCR_EL12); break; + case ESR_EL1: *val = read_sysreg_s(SYS_ESR_EL12); break; + case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12); break; + case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12); break; + case FAR_EL1: *val = read_sysreg_s(SYS_FAR_EL12); break; + case MAIR_EL1: *val = read_sysreg_s(SYS_MAIR_EL12); break; + case VBAR_EL1: *val = read_sysreg_s(SYS_VBAR_EL12); break; + case CONTEXTIDR_EL1: *val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break; + case TPIDR_EL0: *val = read_sysreg_s(SYS_TPIDR_EL0); break; + case TPIDRRO_EL0: *val = read_sysreg_s(SYS_TPIDRRO_EL0); break; + case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break; + case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break; + case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break; + case ELR_EL1: *val = read_sysreg_s(SYS_ELR_EL12); break; + case PAR_EL1: *val = read_sysreg_par(); break; + case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break; + case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break; + case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break; + default: return false; + } + + return true; +} + +static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) +{ + /* + * *** VHE ONLY *** + * + * System registers listed in the switch are not restored on every + * entry to the guest but are only restored on vcpu_load. + * + * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but + * should never be listed below, because the MPIDR should only be set + * once, before running the VCPU, and never changed later. + */ + if (!has_vhe()) + return false; + + switch (reg) { + case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); break; + case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break; + case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break; + case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break; + case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break; + case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break; + case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break; + case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break; + case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break; + case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break; + case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break; + case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break; + case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break; + case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break; + case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break; + case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break; + case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break; + case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break; + case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break; + case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break; + case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; + case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; + case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break; + default: return false; + } + + return true; +} + /* * CP14 and CP15 live in the same array, as they are backed by the * same system registers. diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 6a6f06205ea7..26c7c25f8a6d 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -64,87 +64,6 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu, return false; } -static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) -{ - /* - * System registers listed in the switch are not saved on every - * exit from the guest but are only saved on vcpu_put. - * - * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but - * should never be listed below, because the guest cannot modify its - * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's - * thread when emulating cross-VCPU communication. - */ - switch (reg) { - case CSSELR_EL1: *val = read_sysreg_s(SYS_CSSELR_EL1); break; - case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12); break; - case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12); break; - case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break; - case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12); break; - case TCR_EL1: *val = read_sysreg_s(SYS_TCR_EL12); break; - case ESR_EL1: *val = read_sysreg_s(SYS_ESR_EL12); break; - case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12); break; - case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12); break; - case FAR_EL1: *val = read_sysreg_s(SYS_FAR_EL12); break; - case MAIR_EL1: *val = read_sysreg_s(SYS_MAIR_EL12); break; - case VBAR_EL1: *val = read_sysreg_s(SYS_VBAR_EL12); break; - case CONTEXTIDR_EL1: *val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break; - case TPIDR_EL0: *val = read_sysreg_s(SYS_TPIDR_EL0); break; - case TPIDRRO_EL0: *val = read_sysreg_s(SYS_TPIDRRO_EL0); break; - case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break; - case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break; - case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break; - case ELR_EL1: *val = read_sysreg_s(SYS_ELR_EL12); break; - case PAR_EL1: *val = read_sysreg_par(); break; - case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break; - case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break; - case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break; - default: return false; - } - - return true; -} - -static bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) -{ - /* - * System registers listed in the switch are not restored on every - * entry to the guest but are only restored on vcpu_load. - * - * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but - * should never be listed below, because the MPIDR should only be set - * once, before running the VCPU, and never changed later. - */ - switch (reg) { - case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); break; - case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break; - case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break; - case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break; - case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break; - case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break; - case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break; - case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break; - case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break; - case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break; - case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break; - case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break; - case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break; - case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break; - case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break; - case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break; - case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break; - case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break; - case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break; - case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break; - case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; - case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; - case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break; - default: return false; - } - - return true; -} - u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) { u64 val = 0x8badf00d8badf00d; -- cgit v1.2.3 From e650b64f1a56cbc700f0a2d2ab8d23155757e2f3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 14 Oct 2020 19:42:38 +0100 Subject: KVM: arm64: Add basic hooks for injecting exceptions from EL2 Add the basic infrastructure to describe injection of exceptions into a guest. So far, nothing uses this code path. Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 28 ++++++++++++++++++++++++++-- arch/arm64/kvm/hyp/exception.c | 17 +++++++++++++++++ arch/arm64/kvm/hyp/include/hyp/adjust_pc.h | 10 ++++++++-- arch/arm64/kvm/hyp/nvhe/Makefile | 2 +- arch/arm64/kvm/hyp/vhe/Makefile | 2 +- 5 files changed, 53 insertions(+), 6 deletions(-) create mode 100644 arch/arm64/kvm/hyp/exception.c (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 0672b3db6121..7a1faf917f3c 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -407,9 +407,33 @@ struct kvm_vcpu_arch { #define KVM_ARM64_GUEST_HAS_SVE (1 << 5) /* SVE exposed to guest */ #define KVM_ARM64_VCPU_SVE_FINALIZED (1 << 6) /* SVE config completed */ #define KVM_ARM64_GUEST_HAS_PTRAUTH (1 << 7) /* PTRAUTH exposed to guest */ -#define KVM_ARM64_INCREMENT_PC (1 << 8) /* Increment PC */ +#define KVM_ARM64_PENDING_EXCEPTION (1 << 8) /* Exception pending */ +#define KVM_ARM64_EXCEPT_MASK (7 << 9) /* Target EL/MODE */ -#define vcpu_has_sve(vcpu) (system_supports_sve() && \ +/* + * When KVM_ARM64_PENDING_EXCEPTION is set, KVM_ARM64_EXCEPT_MASK can + * take the following values: + * + * For AArch32 EL1: + */ +#define KVM_ARM64_EXCEPT_AA32_UND (0 << 9) +#define KVM_ARM64_EXCEPT_AA32_IABT (1 << 9) +#define KVM_ARM64_EXCEPT_AA32_DABT (2 << 9) +/* For AArch64: */ +#define KVM_ARM64_EXCEPT_AA64_ELx_SYNC (0 << 9) +#define KVM_ARM64_EXCEPT_AA64_ELx_IRQ (1 << 9) +#define KVM_ARM64_EXCEPT_AA64_ELx_FIQ (2 << 9) +#define KVM_ARM64_EXCEPT_AA64_ELx_SERR (3 << 9) +#define KVM_ARM64_EXCEPT_AA64_EL1 (0 << 11) +#define KVM_ARM64_EXCEPT_AA64_EL2 (1 << 11) + +/* + * Overlaps with KVM_ARM64_EXCEPT_MASK on purpose so that it can't be + * set together with an exception... + */ +#define KVM_ARM64_INCREMENT_PC (1 << 9) /* Increment PC */ + +#define vcpu_has_sve(vcpu) (system_supports_sve() && \ ((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_SVE)) #ifdef CONFIG_ARM64_PTR_AUTH diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c new file mode 100644 index 000000000000..6533a9270850 --- /dev/null +++ b/arch/arm64/kvm/hyp/exception.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Fault injection for both 32 and 64bit guests. + * + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * Based on arch/arm/kvm/emulate.c + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + */ + +#include + +void kvm_inject_exception(struct kvm_vcpu *vcpu) +{ +} diff --git a/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h b/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h index d3043b07e78e..b1f60923a8fe 100644 --- a/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h +++ b/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h @@ -13,6 +13,8 @@ #include #include +void kvm_inject_exception(struct kvm_vcpu *vcpu); + static inline void kvm_skip_instr(struct kvm_vcpu *vcpu) { if (vcpu_mode_is_32bit(vcpu)) { @@ -43,11 +45,15 @@ static inline void __kvm_skip_instr(struct kvm_vcpu *vcpu) /* * Adjust the guest PC on entry, depending on flags provided by EL1 - * for the purpose of emulation (MMIO, sysreg). + * for the purpose of emulation (MMIO, sysreg) or exception injection. */ static inline void __adjust_pc(struct kvm_vcpu *vcpu) { - if (vcpu->arch.flags & KVM_ARM64_INCREMENT_PC) { + if (vcpu->arch.flags & KVM_ARM64_PENDING_EXCEPTION) { + kvm_inject_exception(vcpu); + vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION | + KVM_ARM64_EXCEPT_MASK); + } else if (vcpu->arch.flags & KVM_ARM64_INCREMENT_PC) { kvm_skip_instr(vcpu); vcpu->arch.flags &= ~KVM_ARM64_INCREMENT_PC; } diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index ddde15fe85f2..77b8c4e06f2f 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -8,7 +8,7 @@ ccflags-y := -D__KVM_NVHE_HYPERVISOR__ obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o hyp-main.o obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ - ../fpsimd.o ../hyp-entry.o + ../fpsimd.o ../hyp-entry.o ../exception.o ## ## Build rules for compiling nVHE hyp code diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile index 461e97c375cc..96bec0ecf9dd 100644 --- a/arch/arm64/kvm/hyp/vhe/Makefile +++ b/arch/arm64/kvm/hyp/vhe/Makefile @@ -8,4 +8,4 @@ ccflags-y := -D__KVM_VHE_HYPERVISOR__ obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ - ../fpsimd.o ../hyp-entry.o + ../fpsimd.o ../hyp-entry.o ../exception.o -- cgit v1.2.3 From bb666c472ca25efb38d1163131cc01546b3a653a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 14 Oct 2020 19:52:29 +0100 Subject: KVM: arm64: Inject AArch64 exceptions from HYP Move the AArch64 exception injection code from EL1 to HYP, leaving only the ESR_EL1 updates to EL1. In order to come with the differences between VHE and nVHE, two set of system register accessors are provided. SPSR, ELR, PC and PSTATE are now completely handled in the hypervisor. Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_emulate.h | 12 ++++ arch/arm64/kvm/hyp/exception.c | 136 +++++++++++++++++++++++++++++++++++ arch/arm64/kvm/inject_fault.c | 114 ++--------------------------- 3 files changed, 154 insertions(+), 108 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 6d2b5d1aa7b3..736a342dadf7 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -21,6 +21,18 @@ #include #include +#define CURRENT_EL_SP_EL0_VECTOR 0x0 +#define CURRENT_EL_SP_ELx_VECTOR 0x200 +#define LOWER_EL_AArch64_VECTOR 0x400 +#define LOWER_EL_AArch32_VECTOR 0x600 + +enum exception_type { + except_type_sync = 0, + except_type_irq = 0x80, + except_type_fiq = 0x100, + except_type_serror = 0x180, +}; + unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num); unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu); void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v); diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c index 6533a9270850..94d6d823424d 100644 --- a/arch/arm64/kvm/hyp/exception.c +++ b/arch/arm64/kvm/hyp/exception.c @@ -11,7 +11,143 @@ */ #include +#include +#include + +#if !defined (__KVM_NVHE_HYPERVISOR__) && !defined (__KVM_VHE_HYPERVISOR__) +#error Hypervisor code only! +#endif + +static inline u64 __vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) +{ + u64 val; + + if (__vcpu_read_sys_reg_from_cpu(reg, &val)) + return val; + + return __vcpu_sys_reg(vcpu, reg); +} + +static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) +{ + if (__vcpu_write_sys_reg_to_cpu(val, reg)) + return; + + __vcpu_sys_reg(vcpu, reg) = val; +} + +static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, u64 val) +{ + write_sysreg_el1(val, SYS_SPSR); +} + +/* + * This performs the exception entry at a given EL (@target_mode), stashing PC + * and PSTATE into ELR and SPSR respectively, and compute the new PC/PSTATE. + * The EL passed to this function *must* be a non-secure, privileged mode with + * bit 0 being set (PSTATE.SP == 1). + * + * When an exception is taken, most PSTATE fields are left unchanged in the + * handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all + * of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx + * layouts, so we don't need to shuffle these for exceptions from AArch32 EL0. + * + * For the SPSR_ELx layout for AArch64, see ARM DDI 0487E.a page C5-429. + * For the SPSR_ELx layout for AArch32, see ARM DDI 0487E.a page C5-426. + * + * Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from + * MSB to LSB. + */ +static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode, + enum exception_type type) +{ + unsigned long sctlr, vbar, old, new, mode; + u64 exc_offset; + + mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT); + + if (mode == target_mode) + exc_offset = CURRENT_EL_SP_ELx_VECTOR; + else if ((mode | PSR_MODE_THREAD_BIT) == target_mode) + exc_offset = CURRENT_EL_SP_EL0_VECTOR; + else if (!(mode & PSR_MODE32_BIT)) + exc_offset = LOWER_EL_AArch64_VECTOR; + else + exc_offset = LOWER_EL_AArch32_VECTOR; + + switch (target_mode) { + case PSR_MODE_EL1h: + vbar = __vcpu_read_sys_reg(vcpu, VBAR_EL1); + sctlr = __vcpu_read_sys_reg(vcpu, SCTLR_EL1); + __vcpu_write_sys_reg(vcpu, *vcpu_pc(vcpu), ELR_EL1); + break; + default: + /* Don't do that */ + BUG(); + } + + *vcpu_pc(vcpu) = vbar + exc_offset + type; + + old = *vcpu_cpsr(vcpu); + new = 0; + + new |= (old & PSR_N_BIT); + new |= (old & PSR_Z_BIT); + new |= (old & PSR_C_BIT); + new |= (old & PSR_V_BIT); + + // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests) + + new |= (old & PSR_DIT_BIT); + + // PSTATE.UAO is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D5-2579. + + // PSTATE.PAN is unchanged unless SCTLR_ELx.SPAN == 0b0 + // SCTLR_ELx.SPAN is RES1 when ARMv8.1-PAN is not implemented + // See ARM DDI 0487E.a, page D5-2578. + new |= (old & PSR_PAN_BIT); + if (!(sctlr & SCTLR_EL1_SPAN)) + new |= PSR_PAN_BIT; + + // PSTATE.SS is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D2-2452. + + // PSTATE.IL is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D1-2306. + + // PSTATE.SSBS is set to SCTLR_ELx.DSSBS upon any exception to AArch64 + // See ARM DDI 0487E.a, page D13-3258 + if (sctlr & SCTLR_ELx_DSSBS) + new |= PSR_SSBS_BIT; + + // PSTATE.BTYPE is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, pages D1-2293 to D1-2294. + + new |= PSR_D_BIT; + new |= PSR_A_BIT; + new |= PSR_I_BIT; + new |= PSR_F_BIT; + + new |= target_mode; + + *vcpu_cpsr(vcpu) = new; + __vcpu_write_spsr(vcpu, old); +} void kvm_inject_exception(struct kvm_vcpu *vcpu) { + switch (vcpu->arch.flags & KVM_ARM64_EXCEPT_MASK) { + case (KVM_ARM64_EXCEPT_AA64_ELx_SYNC | + KVM_ARM64_EXCEPT_AA64_EL1): + enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync); + break; + default: + /* + * Only EL1_SYNC makes sense so far, EL2_{SYNC,IRQ} + * will be implemented at some point. Everything + * else gets silently ignored. + */ + break; + } } diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 34a96ab244fa..8862431f8e3b 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -14,119 +14,15 @@ #include #include -#define CURRENT_EL_SP_EL0_VECTOR 0x0 -#define CURRENT_EL_SP_ELx_VECTOR 0x200 -#define LOWER_EL_AArch64_VECTOR 0x400 -#define LOWER_EL_AArch32_VECTOR 0x600 - -enum exception_type { - except_type_sync = 0, - except_type_irq = 0x80, - except_type_fiq = 0x100, - except_type_serror = 0x180, -}; - -/* - * This performs the exception entry at a given EL (@target_mode), stashing PC - * and PSTATE into ELR and SPSR respectively, and compute the new PC/PSTATE. - * The EL passed to this function *must* be a non-secure, privileged mode with - * bit 0 being set (PSTATE.SP == 1). - * - * When an exception is taken, most PSTATE fields are left unchanged in the - * handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all - * of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx - * layouts, so we don't need to shuffle these for exceptions from AArch32 EL0. - * - * For the SPSR_ELx layout for AArch64, see ARM DDI 0487E.a page C5-429. - * For the SPSR_ELx layout for AArch32, see ARM DDI 0487E.a page C5-426. - * - * Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from - * MSB to LSB. - */ -static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode, - enum exception_type type) -{ - unsigned long sctlr, vbar, old, new, mode; - u64 exc_offset; - - mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT); - - if (mode == target_mode) - exc_offset = CURRENT_EL_SP_ELx_VECTOR; - else if ((mode | PSR_MODE_THREAD_BIT) == target_mode) - exc_offset = CURRENT_EL_SP_EL0_VECTOR; - else if (!(mode & PSR_MODE32_BIT)) - exc_offset = LOWER_EL_AArch64_VECTOR; - else - exc_offset = LOWER_EL_AArch32_VECTOR; - - switch (target_mode) { - case PSR_MODE_EL1h: - vbar = vcpu_read_sys_reg(vcpu, VBAR_EL1); - sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); - vcpu_write_sys_reg(vcpu, *vcpu_pc(vcpu), ELR_EL1); - break; - default: - /* Don't do that */ - BUG(); - } - - *vcpu_pc(vcpu) = vbar + exc_offset + type; - - old = *vcpu_cpsr(vcpu); - new = 0; - - new |= (old & PSR_N_BIT); - new |= (old & PSR_Z_BIT); - new |= (old & PSR_C_BIT); - new |= (old & PSR_V_BIT); - - // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests) - - new |= (old & PSR_DIT_BIT); - - // PSTATE.UAO is set to zero upon any exception to AArch64 - // See ARM DDI 0487E.a, page D5-2579. - - // PSTATE.PAN is unchanged unless SCTLR_ELx.SPAN == 0b0 - // SCTLR_ELx.SPAN is RES1 when ARMv8.1-PAN is not implemented - // See ARM DDI 0487E.a, page D5-2578. - new |= (old & PSR_PAN_BIT); - if (!(sctlr & SCTLR_EL1_SPAN)) - new |= PSR_PAN_BIT; - - // PSTATE.SS is set to zero upon any exception to AArch64 - // See ARM DDI 0487E.a, page D2-2452. - - // PSTATE.IL is set to zero upon any exception to AArch64 - // See ARM DDI 0487E.a, page D1-2306. - - // PSTATE.SSBS is set to SCTLR_ELx.DSSBS upon any exception to AArch64 - // See ARM DDI 0487E.a, page D13-3258 - if (sctlr & SCTLR_ELx_DSSBS) - new |= PSR_SSBS_BIT; - - // PSTATE.BTYPE is set to zero upon any exception to AArch64 - // See ARM DDI 0487E.a, pages D1-2293 to D1-2294. - - new |= PSR_D_BIT; - new |= PSR_A_BIT; - new |= PSR_I_BIT; - new |= PSR_F_BIT; - - new |= target_mode; - - *vcpu_cpsr(vcpu) = new; - vcpu_write_spsr(vcpu, old); -} - static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) { unsigned long cpsr = *vcpu_cpsr(vcpu); bool is_aarch32 = vcpu_mode_is_32bit(vcpu); u32 esr = 0; - enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync); + vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 | + KVM_ARM64_EXCEPT_AA64_ELx_SYNC | + KVM_ARM64_PENDING_EXCEPTION); vcpu_write_sys_reg(vcpu, addr, FAR_EL1); @@ -156,7 +52,9 @@ static void inject_undef64(struct kvm_vcpu *vcpu) { u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); - enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync); + vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 | + KVM_ARM64_EXCEPT_AA64_ELx_SYNC | + KVM_ARM64_PENDING_EXCEPTION); /* * Build an unknown exception, depending on the instruction -- cgit v1.2.3 From 41613b519ce78bfe1328b8bd693944e80fc8b6c3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 14 Oct 2020 19:53:49 +0100 Subject: KVM: arm64: Inject AArch32 exceptions from HYP Similarly to what has been done for AArch64, move the AArch32 exception injection to HYP. In order to not use the regmap selection code at EL2, simplify the code populating the target mode's LR register by useing the compatibility aliases for LR_abt and LR_und. We also introduce new accessors for SPSR_abt and SPSR_und, and move VBAR/SCTLR to using the AArch64 accessors (the use of the AArch32 names was an ARMv7 leftover). Acked-by: Mark Rutland Signed-off-by: Marc Zyngier --- arch/arm64/kvm/aarch32.c | 149 ++---------------------------- arch/arm64/kvm/hyp/exception.c | 200 ++++++++++++++++++++++++++++++++++++++--- 2 files changed, 195 insertions(+), 154 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/aarch32.c b/arch/arm64/kvm/aarch32.c index 40a62a99fbf8..ad453b47c517 100644 --- a/arch/arm64/kvm/aarch32.c +++ b/arch/arm64/kvm/aarch32.c @@ -19,20 +19,6 @@ #define DFSR_FSC_EXTABT_nLPAE 0x08 #define DFSR_LPAE BIT(9) -/* - * Table taken from ARMv8 ARM DDI0487B-B, table G1-10. - */ -static const u8 return_offsets[8][2] = { - [0] = { 0, 0 }, /* Reset, unused */ - [1] = { 4, 2 }, /* Undefined */ - [2] = { 0, 0 }, /* SVC, unused */ - [3] = { 4, 4 }, /* Prefetch abort */ - [4] = { 8, 8 }, /* Data abort */ - [5] = { 0, 0 }, /* HVC, unused */ - [6] = { 4, 4 }, /* IRQ, unused */ - [7] = { 4, 4 }, /* FIQ, unused */ -}; - static bool pre_fault_synchronize(struct kvm_vcpu *vcpu) { preempt_disable(); @@ -53,132 +39,10 @@ static void post_fault_synchronize(struct kvm_vcpu *vcpu, bool loaded) } } -/* - * When an exception is taken, most CPSR fields are left unchanged in the - * handler. However, some are explicitly overridden (e.g. M[4:0]). - * - * The SPSR/SPSR_ELx layouts differ, and the below is intended to work with - * either format. Note: SPSR.J bit doesn't exist in SPSR_ELx, but this bit was - * obsoleted by the ARMv7 virtualization extensions and is RES0. - * - * For the SPSR layout seen from AArch32, see: - * - ARM DDI 0406C.d, page B1-1148 - * - ARM DDI 0487E.a, page G8-6264 - * - * For the SPSR_ELx layout for AArch32 seen from AArch64, see: - * - ARM DDI 0487E.a, page C5-426 - * - * Here we manipulate the fields in order of the AArch32 SPSR_ELx layout, from - * MSB to LSB. - */ -static unsigned long get_except32_cpsr(struct kvm_vcpu *vcpu, u32 mode) -{ - u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); - unsigned long old, new; - - old = *vcpu_cpsr(vcpu); - new = 0; - - new |= (old & PSR_AA32_N_BIT); - new |= (old & PSR_AA32_Z_BIT); - new |= (old & PSR_AA32_C_BIT); - new |= (old & PSR_AA32_V_BIT); - new |= (old & PSR_AA32_Q_BIT); - - // CPSR.IT[7:0] are set to zero upon any exception - // See ARM DDI 0487E.a, section G1.12.3 - // See ARM DDI 0406C.d, section B1.8.3 - - new |= (old & PSR_AA32_DIT_BIT); - - // CPSR.SSBS is set to SCTLR.DSSBS upon any exception - // See ARM DDI 0487E.a, page G8-6244 - if (sctlr & BIT(31)) - new |= PSR_AA32_SSBS_BIT; - - // CPSR.PAN is unchanged unless SCTLR.SPAN == 0b0 - // SCTLR.SPAN is RES1 when ARMv8.1-PAN is not implemented - // See ARM DDI 0487E.a, page G8-6246 - new |= (old & PSR_AA32_PAN_BIT); - if (!(sctlr & BIT(23))) - new |= PSR_AA32_PAN_BIT; - - // SS does not exist in AArch32, so ignore - - // CPSR.IL is set to zero upon any exception - // See ARM DDI 0487E.a, page G1-5527 - - new |= (old & PSR_AA32_GE_MASK); - - // CPSR.IT[7:0] are set to zero upon any exception - // See prior comment above - - // CPSR.E is set to SCTLR.EE upon any exception - // See ARM DDI 0487E.a, page G8-6245 - // See ARM DDI 0406C.d, page B4-1701 - if (sctlr & BIT(25)) - new |= PSR_AA32_E_BIT; - - // CPSR.A is unchanged upon an exception to Undefined, Supervisor - // CPSR.A is set upon an exception to other modes - // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 - // See ARM DDI 0406C.d, page B1-1182 - new |= (old & PSR_AA32_A_BIT); - if (mode != PSR_AA32_MODE_UND && mode != PSR_AA32_MODE_SVC) - new |= PSR_AA32_A_BIT; - - // CPSR.I is set upon any exception - // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 - // See ARM DDI 0406C.d, page B1-1182 - new |= PSR_AA32_I_BIT; - - // CPSR.F is set upon an exception to FIQ - // CPSR.F is unchanged upon an exception to other modes - // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 - // See ARM DDI 0406C.d, page B1-1182 - new |= (old & PSR_AA32_F_BIT); - if (mode == PSR_AA32_MODE_FIQ) - new |= PSR_AA32_F_BIT; - - // CPSR.T is set to SCTLR.TE upon any exception - // See ARM DDI 0487E.a, page G8-5514 - // See ARM DDI 0406C.d, page B1-1181 - if (sctlr & BIT(30)) - new |= PSR_AA32_T_BIT; - - new |= mode; - - return new; -} - -static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) -{ - unsigned long spsr = *vcpu_cpsr(vcpu); - bool is_thumb = (spsr & PSR_AA32_T_BIT); - u32 return_offset = return_offsets[vect_offset >> 2][is_thumb]; - u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); - - *vcpu_cpsr(vcpu) = get_except32_cpsr(vcpu, mode); - - /* Note: These now point to the banked copies */ - vcpu_write_spsr(vcpu, host_spsr_to_spsr32(spsr)); - *vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; - - /* Branch to exception vector */ - if (sctlr & (1 << 13)) - vect_offset += 0xffff0000; - else /* always have security exceptions */ - vect_offset += vcpu_cp15(vcpu, c12_VBAR); - - *vcpu_pc(vcpu) = vect_offset; -} - void kvm_inject_undef32(struct kvm_vcpu *vcpu) { - bool loaded = pre_fault_synchronize(vcpu); - - prepare_fault32(vcpu, PSR_AA32_MODE_UND, 4); - post_fault_synchronize(vcpu, loaded); + vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_UND | + KVM_ARM64_PENDING_EXCEPTION); } /* @@ -188,7 +52,6 @@ void kvm_inject_undef32(struct kvm_vcpu *vcpu) static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr) { - u32 vect_offset; u32 *far, *fsr; bool is_lpae; bool loaded; @@ -196,17 +59,17 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, loaded = pre_fault_synchronize(vcpu); if (is_pabt) { - vect_offset = 12; + vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_IABT | + KVM_ARM64_PENDING_EXCEPTION); far = &vcpu_cp15(vcpu, c6_IFAR); fsr = &vcpu_cp15(vcpu, c5_IFSR); } else { /* !iabt */ - vect_offset = 16; + vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_DABT | + KVM_ARM64_PENDING_EXCEPTION); far = &vcpu_cp15(vcpu, c6_DFAR); fsr = &vcpu_cp15(vcpu, c5_DFSR); } - prepare_fault32(vcpu, PSR_AA32_MODE_ABT, vect_offset); - *far = addr; /* Give the guest an IMPLEMENTATION DEFINED exception */ diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c index 94d6d823424d..73629094f903 100644 --- a/arch/arm64/kvm/hyp/exception.c +++ b/arch/arm64/kvm/hyp/exception.c @@ -41,6 +41,22 @@ static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, u64 val) write_sysreg_el1(val, SYS_SPSR); } +static void __vcpu_write_spsr_abt(struct kvm_vcpu *vcpu, u64 val) +{ + if (has_vhe()) + write_sysreg(val, spsr_abt); + else + vcpu->arch.ctxt.spsr_abt = val; +} + +static void __vcpu_write_spsr_und(struct kvm_vcpu *vcpu, u64 val) +{ + if (has_vhe()) + write_sysreg(val, spsr_und); + else + vcpu->arch.ctxt.spsr_und = val; +} + /* * This performs the exception entry at a given EL (@target_mode), stashing PC * and PSTATE into ELR and SPSR respectively, and compute the new PC/PSTATE. @@ -135,19 +151,181 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode, __vcpu_write_spsr(vcpu, old); } -void kvm_inject_exception(struct kvm_vcpu *vcpu) +/* + * When an exception is taken, most CPSR fields are left unchanged in the + * handler. However, some are explicitly overridden (e.g. M[4:0]). + * + * The SPSR/SPSR_ELx layouts differ, and the below is intended to work with + * either format. Note: SPSR.J bit doesn't exist in SPSR_ELx, but this bit was + * obsoleted by the ARMv7 virtualization extensions and is RES0. + * + * For the SPSR layout seen from AArch32, see: + * - ARM DDI 0406C.d, page B1-1148 + * - ARM DDI 0487E.a, page G8-6264 + * + * For the SPSR_ELx layout for AArch32 seen from AArch64, see: + * - ARM DDI 0487E.a, page C5-426 + * + * Here we manipulate the fields in order of the AArch32 SPSR_ELx layout, from + * MSB to LSB. + */ +static unsigned long get_except32_cpsr(struct kvm_vcpu *vcpu, u32 mode) +{ + u32 sctlr = __vcpu_read_sys_reg(vcpu, SCTLR_EL1); + unsigned long old, new; + + old = *vcpu_cpsr(vcpu); + new = 0; + + new |= (old & PSR_AA32_N_BIT); + new |= (old & PSR_AA32_Z_BIT); + new |= (old & PSR_AA32_C_BIT); + new |= (old & PSR_AA32_V_BIT); + new |= (old & PSR_AA32_Q_BIT); + + // CPSR.IT[7:0] are set to zero upon any exception + // See ARM DDI 0487E.a, section G1.12.3 + // See ARM DDI 0406C.d, section B1.8.3 + + new |= (old & PSR_AA32_DIT_BIT); + + // CPSR.SSBS is set to SCTLR.DSSBS upon any exception + // See ARM DDI 0487E.a, page G8-6244 + if (sctlr & BIT(31)) + new |= PSR_AA32_SSBS_BIT; + + // CPSR.PAN is unchanged unless SCTLR.SPAN == 0b0 + // SCTLR.SPAN is RES1 when ARMv8.1-PAN is not implemented + // See ARM DDI 0487E.a, page G8-6246 + new |= (old & PSR_AA32_PAN_BIT); + if (!(sctlr & BIT(23))) + new |= PSR_AA32_PAN_BIT; + + // SS does not exist in AArch32, so ignore + + // CPSR.IL is set to zero upon any exception + // See ARM DDI 0487E.a, page G1-5527 + + new |= (old & PSR_AA32_GE_MASK); + + // CPSR.IT[7:0] are set to zero upon any exception + // See prior comment above + + // CPSR.E is set to SCTLR.EE upon any exception + // See ARM DDI 0487E.a, page G8-6245 + // See ARM DDI 0406C.d, page B4-1701 + if (sctlr & BIT(25)) + new |= PSR_AA32_E_BIT; + + // CPSR.A is unchanged upon an exception to Undefined, Supervisor + // CPSR.A is set upon an exception to other modes + // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 + // See ARM DDI 0406C.d, page B1-1182 + new |= (old & PSR_AA32_A_BIT); + if (mode != PSR_AA32_MODE_UND && mode != PSR_AA32_MODE_SVC) + new |= PSR_AA32_A_BIT; + + // CPSR.I is set upon any exception + // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 + // See ARM DDI 0406C.d, page B1-1182 + new |= PSR_AA32_I_BIT; + + // CPSR.F is set upon an exception to FIQ + // CPSR.F is unchanged upon an exception to other modes + // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 + // See ARM DDI 0406C.d, page B1-1182 + new |= (old & PSR_AA32_F_BIT); + if (mode == PSR_AA32_MODE_FIQ) + new |= PSR_AA32_F_BIT; + + // CPSR.T is set to SCTLR.TE upon any exception + // See ARM DDI 0487E.a, page G8-5514 + // See ARM DDI 0406C.d, page B1-1181 + if (sctlr & BIT(30)) + new |= PSR_AA32_T_BIT; + + new |= mode; + + return new; +} + +/* + * Table taken from ARMv8 ARM DDI0487B-B, table G1-10. + */ +static const u8 return_offsets[8][2] = { + [0] = { 0, 0 }, /* Reset, unused */ + [1] = { 4, 2 }, /* Undefined */ + [2] = { 0, 0 }, /* SVC, unused */ + [3] = { 4, 4 }, /* Prefetch abort */ + [4] = { 8, 8 }, /* Data abort */ + [5] = { 0, 0 }, /* HVC, unused */ + [6] = { 4, 4 }, /* IRQ, unused */ + [7] = { 4, 4 }, /* FIQ, unused */ +}; + +static void enter_exception32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) { - switch (vcpu->arch.flags & KVM_ARM64_EXCEPT_MASK) { - case (KVM_ARM64_EXCEPT_AA64_ELx_SYNC | - KVM_ARM64_EXCEPT_AA64_EL1): - enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync); + unsigned long spsr = *vcpu_cpsr(vcpu); + bool is_thumb = (spsr & PSR_AA32_T_BIT); + u32 sctlr = __vcpu_read_sys_reg(vcpu, SCTLR_EL1); + u32 return_address; + + *vcpu_cpsr(vcpu) = get_except32_cpsr(vcpu, mode); + return_address = *vcpu_pc(vcpu); + return_address += return_offsets[vect_offset >> 2][is_thumb]; + + /* KVM only enters the ABT and UND modes, so only deal with those */ + switch(mode) { + case PSR_AA32_MODE_ABT: + __vcpu_write_spsr_abt(vcpu, host_spsr_to_spsr32(spsr)); + vcpu_gp_regs(vcpu)->compat_lr_abt = return_address; break; - default: - /* - * Only EL1_SYNC makes sense so far, EL2_{SYNC,IRQ} - * will be implemented at some point. Everything - * else gets silently ignored. - */ + + case PSR_AA32_MODE_UND: + __vcpu_write_spsr_und(vcpu, host_spsr_to_spsr32(spsr)); + vcpu_gp_regs(vcpu)->compat_lr_und = return_address; break; } + + /* Branch to exception vector */ + if (sctlr & (1 << 13)) + vect_offset += 0xffff0000; + else /* always have security exceptions */ + vect_offset += __vcpu_read_sys_reg(vcpu, VBAR_EL1); + + *vcpu_pc(vcpu) = vect_offset; +} + +void kvm_inject_exception(struct kvm_vcpu *vcpu) +{ + if (vcpu_el1_is_32bit(vcpu)) { + switch (vcpu->arch.flags & KVM_ARM64_EXCEPT_MASK) { + case KVM_ARM64_EXCEPT_AA32_UND: + enter_exception32(vcpu, PSR_AA32_MODE_UND, 4); + break; + case KVM_ARM64_EXCEPT_AA32_IABT: + enter_exception32(vcpu, PSR_AA32_MODE_ABT, 12); + break; + case KVM_ARM64_EXCEPT_AA32_DABT: + enter_exception32(vcpu, PSR_AA32_MODE_ABT, 16); + break; + default: + /* Err... */ + break; + } + } else { + switch (vcpu->arch.flags & KVM_ARM64_EXCEPT_MASK) { + case (KVM_ARM64_EXCEPT_AA64_ELx_SYNC | + KVM_ARM64_EXCEPT_AA64_EL1): + enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync); + break; + default: + /* + * Only EL1_SYNC makes sense so far, EL2_{SYNC,IRQ} + * will be implemented at some point. Everything + * else gets silently ignored. + */ + break; + } + } } -- cgit v1.2.3 From 7d76b8a60350ff5e919daacd78ef3c2fe04735a2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 15 Oct 2020 09:24:40 +0100 Subject: KVM: arm64: Remove SPSR manipulation primitives The SPSR setting code is now completely unused, including that dealing with banked AArch32 SPSRs. Cleanup time. Acked-by: Mark Rutland Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_emulate.h | 26 ---------- arch/arm64/kvm/regmap.c | 96 ------------------------------------ 2 files changed, 122 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 736a342dadf7..5d957d0e7b69 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -34,8 +34,6 @@ enum exception_type { }; unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num); -unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu); -void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v); bool kvm_condition_valid32(const struct kvm_vcpu *vcpu); void kvm_skip_instr32(struct kvm_vcpu *vcpu); @@ -180,30 +178,6 @@ static __always_inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num, vcpu_gp_regs(vcpu)->regs[reg_num] = val; } -static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu) -{ - if (vcpu_mode_is_32bit(vcpu)) - return vcpu_read_spsr32(vcpu); - - if (vcpu->arch.sysregs_loaded_on_cpu) - return read_sysreg_el1(SYS_SPSR); - else - return __vcpu_sys_reg(vcpu, SPSR_EL1); -} - -static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) -{ - if (vcpu_mode_is_32bit(vcpu)) { - vcpu_write_spsr32(vcpu, v); - return; - } - - if (vcpu->arch.sysregs_loaded_on_cpu) - write_sysreg_el1(v, SYS_SPSR); - else - __vcpu_sys_reg(vcpu, SPSR_EL1) = v; -} - /* * The layout of SPSR for an AArch32 state is different when observed from an * AArch64 SPSR_ELx or an AArch32 SPSR_*. This function generates the AArch32 diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c index accc1d5fba61..ae7e290bb017 100644 --- a/arch/arm64/kvm/regmap.c +++ b/arch/arm64/kvm/regmap.c @@ -126,99 +126,3 @@ unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num) return reg_array + vcpu_reg_offsets[mode][reg_num]; } - -/* - * Return the SPSR for the current mode of the virtual CPU. - */ -static int vcpu_spsr32_mode(const struct kvm_vcpu *vcpu) -{ - unsigned long mode = *vcpu_cpsr(vcpu) & PSR_AA32_MODE_MASK; - switch (mode) { - case PSR_AA32_MODE_SVC: return KVM_SPSR_SVC; - case PSR_AA32_MODE_ABT: return KVM_SPSR_ABT; - case PSR_AA32_MODE_UND: return KVM_SPSR_UND; - case PSR_AA32_MODE_IRQ: return KVM_SPSR_IRQ; - case PSR_AA32_MODE_FIQ: return KVM_SPSR_FIQ; - default: BUG(); - } -} - -unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu) -{ - int spsr_idx = vcpu_spsr32_mode(vcpu); - - if (!vcpu->arch.sysregs_loaded_on_cpu) { - switch (spsr_idx) { - case KVM_SPSR_SVC: - return __vcpu_sys_reg(vcpu, SPSR_EL1); - case KVM_SPSR_ABT: - return vcpu->arch.ctxt.spsr_abt; - case KVM_SPSR_UND: - return vcpu->arch.ctxt.spsr_und; - case KVM_SPSR_IRQ: - return vcpu->arch.ctxt.spsr_irq; - case KVM_SPSR_FIQ: - return vcpu->arch.ctxt.spsr_fiq; - } - } - - switch (spsr_idx) { - case KVM_SPSR_SVC: - return read_sysreg_el1(SYS_SPSR); - case KVM_SPSR_ABT: - return read_sysreg(spsr_abt); - case KVM_SPSR_UND: - return read_sysreg(spsr_und); - case KVM_SPSR_IRQ: - return read_sysreg(spsr_irq); - case KVM_SPSR_FIQ: - return read_sysreg(spsr_fiq); - default: - BUG(); - } -} - -void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v) -{ - int spsr_idx = vcpu_spsr32_mode(vcpu); - - if (!vcpu->arch.sysregs_loaded_on_cpu) { - switch (spsr_idx) { - case KVM_SPSR_SVC: - __vcpu_sys_reg(vcpu, SPSR_EL1) = v; - break; - case KVM_SPSR_ABT: - vcpu->arch.ctxt.spsr_abt = v; - break; - case KVM_SPSR_UND: - vcpu->arch.ctxt.spsr_und = v; - break; - case KVM_SPSR_IRQ: - vcpu->arch.ctxt.spsr_irq = v; - break; - case KVM_SPSR_FIQ: - vcpu->arch.ctxt.spsr_fiq = v; - break; - } - - return; - } - - switch (spsr_idx) { - case KVM_SPSR_SVC: - write_sysreg_el1(v, SYS_SPSR); - break; - case KVM_SPSR_ABT: - write_sysreg(v, spsr_abt); - break; - case KVM_SPSR_UND: - write_sysreg(v, spsr_und); - break; - case KVM_SPSR_IRQ: - write_sysreg(v, spsr_irq); - break; - case KVM_SPSR_FIQ: - write_sysreg(v, spsr_fiq); - break; - } -} -- cgit v1.2.3 From dcfba399325f919b25854ca17ef1535f5d754fe1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 15 Oct 2020 09:45:24 +0100 Subject: KVM: arm64: Consolidate exception injection Move the AArch32 exception injection code back into the inject_fault.c file, removing the need for a few non-static functions now that AArch32 host support is a thing of the past. Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_emulate.h | 3 -- arch/arm64/kvm/Makefile | 2 +- arch/arm64/kvm/aarch32.c | 95 ------------------------------------ arch/arm64/kvm/inject_fault.c | 75 ++++++++++++++++++++++++++-- 4 files changed, 73 insertions(+), 102 deletions(-) delete mode 100644 arch/arm64/kvm/aarch32.c (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 5d957d0e7b69..3105bb73f539 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -42,9 +42,6 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu); void kvm_inject_vabt(struct kvm_vcpu *vcpu); void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); -void kvm_inject_undef32(struct kvm_vcpu *vcpu); -void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr); -void kvm_inject_pabt32(struct kvm_vcpu *vcpu, unsigned long addr); static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) { diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 1504c81fbf5d..9b32a89a25c8 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -16,7 +16,7 @@ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \ inject_fault.o regmap.o va_layout.o handle_exit.o \ guest.o debug.o reset.o sys_regs.o \ vgic-sys-reg-v3.o fpsimd.o pmu.o \ - aarch32.o arch_timer.o \ + arch_timer.o \ vgic/vgic.o vgic/vgic-init.o \ vgic/vgic-irqfd.o vgic/vgic-v2.o \ vgic/vgic-v3.o vgic/vgic-v4.o \ diff --git a/arch/arm64/kvm/aarch32.c b/arch/arm64/kvm/aarch32.c deleted file mode 100644 index ad453b47c517..000000000000 --- a/arch/arm64/kvm/aarch32.c +++ /dev/null @@ -1,95 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * (not much of an) Emulation layer for 32bit guests. - * - * Copyright (C) 2012,2013 - ARM Ltd - * Author: Marc Zyngier - * - * based on arch/arm/kvm/emulate.c - * Copyright (C) 2012 - Virtual Open Systems and Columbia University - * Author: Christoffer Dall - */ - -#include -#include -#include -#include - -#define DFSR_FSC_EXTABT_LPAE 0x10 -#define DFSR_FSC_EXTABT_nLPAE 0x08 -#define DFSR_LPAE BIT(9) - -static bool pre_fault_synchronize(struct kvm_vcpu *vcpu) -{ - preempt_disable(); - if (vcpu->arch.sysregs_loaded_on_cpu) { - kvm_arch_vcpu_put(vcpu); - return true; - } - - preempt_enable(); - return false; -} - -static void post_fault_synchronize(struct kvm_vcpu *vcpu, bool loaded) -{ - if (loaded) { - kvm_arch_vcpu_load(vcpu, smp_processor_id()); - preempt_enable(); - } -} - -void kvm_inject_undef32(struct kvm_vcpu *vcpu) -{ - vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_UND | - KVM_ARM64_PENDING_EXCEPTION); -} - -/* - * Modelled after TakeDataAbortException() and TakePrefetchAbortException - * pseudocode. - */ -static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, - unsigned long addr) -{ - u32 *far, *fsr; - bool is_lpae; - bool loaded; - - loaded = pre_fault_synchronize(vcpu); - - if (is_pabt) { - vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_IABT | - KVM_ARM64_PENDING_EXCEPTION); - far = &vcpu_cp15(vcpu, c6_IFAR); - fsr = &vcpu_cp15(vcpu, c5_IFSR); - } else { /* !iabt */ - vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_DABT | - KVM_ARM64_PENDING_EXCEPTION); - far = &vcpu_cp15(vcpu, c6_DFAR); - fsr = &vcpu_cp15(vcpu, c5_DFSR); - } - - *far = addr; - - /* Give the guest an IMPLEMENTATION DEFINED exception */ - is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31); - if (is_lpae) { - *fsr = DFSR_LPAE | DFSR_FSC_EXTABT_LPAE; - } else { - /* no need to shuffle FS[4] into DFSR[10] as its 0 */ - *fsr = DFSR_FSC_EXTABT_nLPAE; - } - - post_fault_synchronize(vcpu, loaded); -} - -void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr) -{ - inject_abt32(vcpu, false, addr); -} - -void kvm_inject_pabt32(struct kvm_vcpu *vcpu, unsigned long addr) -{ - inject_abt32(vcpu, true, addr); -} diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 8862431f8e3b..e2a2e48ca371 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -66,6 +66,75 @@ static void inject_undef64(struct kvm_vcpu *vcpu) vcpu_write_sys_reg(vcpu, esr, ESR_EL1); } +#define DFSR_FSC_EXTABT_LPAE 0x10 +#define DFSR_FSC_EXTABT_nLPAE 0x08 +#define DFSR_LPAE BIT(9) + +static bool pre_fault_synchronize(struct kvm_vcpu *vcpu) +{ + preempt_disable(); + if (vcpu->arch.sysregs_loaded_on_cpu) { + kvm_arch_vcpu_put(vcpu); + return true; + } + + preempt_enable(); + return false; +} + +static void post_fault_synchronize(struct kvm_vcpu *vcpu, bool loaded) +{ + if (loaded) { + kvm_arch_vcpu_load(vcpu, smp_processor_id()); + preempt_enable(); + } +} + +static void inject_undef32(struct kvm_vcpu *vcpu) +{ + vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_UND | + KVM_ARM64_PENDING_EXCEPTION); +} + +/* + * Modelled after TakeDataAbortException() and TakePrefetchAbortException + * pseudocode. + */ +static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, + unsigned long addr) +{ + u32 *far, *fsr; + bool is_lpae; + bool loaded; + + loaded = pre_fault_synchronize(vcpu); + + if (is_pabt) { + vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_IABT | + KVM_ARM64_PENDING_EXCEPTION); + far = &vcpu_cp15(vcpu, c6_IFAR); + fsr = &vcpu_cp15(vcpu, c5_IFSR); + } else { /* !iabt */ + vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_DABT | + KVM_ARM64_PENDING_EXCEPTION); + far = &vcpu_cp15(vcpu, c6_DFAR); + fsr = &vcpu_cp15(vcpu, c5_DFSR); + } + + *far = addr; + + /* Give the guest an IMPLEMENTATION DEFINED exception */ + is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31); + if (is_lpae) { + *fsr = DFSR_LPAE | DFSR_FSC_EXTABT_LPAE; + } else { + /* no need to shuffle FS[4] into DFSR[10] as its 0 */ + *fsr = DFSR_FSC_EXTABT_nLPAE; + } + + post_fault_synchronize(vcpu, loaded); +} + /** * kvm_inject_dabt - inject a data abort into the guest * @vcpu: The VCPU to receive the data abort @@ -77,7 +146,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) { if (vcpu_el1_is_32bit(vcpu)) - kvm_inject_dabt32(vcpu, addr); + inject_abt32(vcpu, false, addr); else inject_abt64(vcpu, false, addr); } @@ -93,7 +162,7 @@ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) { if (vcpu_el1_is_32bit(vcpu)) - kvm_inject_pabt32(vcpu, addr); + inject_abt32(vcpu, true, addr); else inject_abt64(vcpu, true, addr); } @@ -108,7 +177,7 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) void kvm_inject_undefined(struct kvm_vcpu *vcpu) { if (vcpu_el1_is_32bit(vcpu)) - kvm_inject_undef32(vcpu); + inject_undef32(vcpu); else inject_undef64(vcpu); } -- cgit v1.2.3 From 90c1f934ed7141a6d4c202936d12faaeb405fb66 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 16 Oct 2020 18:41:24 +0100 Subject: KVM: arm64: Get rid of the AArch32 register mapping code The only use of the register mapping code was for the sake of the LR mapping, which we trivially solved in a previous patch. Get rid of the whole thing now. Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_emulate.h | 2 - arch/arm64/kvm/Makefile | 2 +- arch/arm64/kvm/guest.c | 28 +++++++- arch/arm64/kvm/regmap.c | 128 ----------------------------------- 4 files changed, 26 insertions(+), 134 deletions(-) delete mode 100644 arch/arm64/kvm/regmap.c (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 3105bb73f539..c8f550a53516 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -33,8 +33,6 @@ enum exception_type { except_type_serror = 0x180, }; -unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num); - bool kvm_condition_valid32(const struct kvm_vcpu *vcpu); void kvm_skip_instr32(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 9b32a89a25c8..60fd181df624 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -13,7 +13,7 @@ obj-$(CONFIG_KVM) += hyp/ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \ $(KVM)/vfio.o $(KVM)/irqchip.o \ arm.o mmu.o mmio.o psci.o perf.o hypercalls.o pvtime.o \ - inject_fault.o regmap.o va_layout.o handle_exit.o \ + inject_fault.o va_layout.o handle_exit.o \ guest.o debug.o reset.o sys_regs.o \ vgic-sys-reg-v3.o fpsimd.o pmu.o \ arch_timer.o \ diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index dfb5218137ca..3f23f7478d2a 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -252,10 +252,32 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) memcpy(addr, valp, KVM_REG_SIZE(reg->id)); if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) { - int i; + int i, nr_reg; + + switch (*vcpu_cpsr(vcpu)) { + /* + * Either we are dealing with user mode, and only the + * first 15 registers (+ PC) must be narrowed to 32bit. + * AArch32 r0-r14 conveniently map to AArch64 x0-x14. + */ + case PSR_AA32_MODE_USR: + case PSR_AA32_MODE_SYS: + nr_reg = 15; + break; + + /* + * Otherwide, this is a priviledged mode, and *all* the + * registers must be narrowed to 32bit. + */ + default: + nr_reg = 31; + break; + } + + for (i = 0; i < nr_reg; i++) + vcpu_set_reg(vcpu, i, (u32)vcpu_get_reg(vcpu, i)); - for (i = 0; i < 16; i++) - *vcpu_reg32(vcpu, i) = (u32)*vcpu_reg32(vcpu, i); + *vcpu_pc(vcpu) = (u32)*vcpu_pc(vcpu); } out: return err; diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c deleted file mode 100644 index ae7e290bb017..000000000000 --- a/arch/arm64/kvm/regmap.c +++ /dev/null @@ -1,128 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2012,2013 - ARM Ltd - * Author: Marc Zyngier - * - * Derived from arch/arm/kvm/emulate.c: - * Copyright (C) 2012 - Virtual Open Systems and Columbia University - * Author: Christoffer Dall - */ - -#include -#include -#include -#include - -#define VCPU_NR_MODES 6 -#define REG_OFFSET(_reg) \ - (offsetof(struct user_pt_regs, _reg) / sizeof(unsigned long)) - -#define USR_REG_OFFSET(R) REG_OFFSET(compat_usr(R)) - -static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][16] = { - /* USR Registers */ - { - USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), - USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), - USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), - USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), - USR_REG_OFFSET(12), USR_REG_OFFSET(13), USR_REG_OFFSET(14), - REG_OFFSET(pc) - }, - - /* FIQ Registers */ - { - USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), - USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), - USR_REG_OFFSET(6), USR_REG_OFFSET(7), - REG_OFFSET(compat_r8_fiq), /* r8 */ - REG_OFFSET(compat_r9_fiq), /* r9 */ - REG_OFFSET(compat_r10_fiq), /* r10 */ - REG_OFFSET(compat_r11_fiq), /* r11 */ - REG_OFFSET(compat_r12_fiq), /* r12 */ - REG_OFFSET(compat_sp_fiq), /* r13 */ - REG_OFFSET(compat_lr_fiq), /* r14 */ - REG_OFFSET(pc) - }, - - /* IRQ Registers */ - { - USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), - USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), - USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), - USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), - USR_REG_OFFSET(12), - REG_OFFSET(compat_sp_irq), /* r13 */ - REG_OFFSET(compat_lr_irq), /* r14 */ - REG_OFFSET(pc) - }, - - /* SVC Registers */ - { - USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), - USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), - USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), - USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), - USR_REG_OFFSET(12), - REG_OFFSET(compat_sp_svc), /* r13 */ - REG_OFFSET(compat_lr_svc), /* r14 */ - REG_OFFSET(pc) - }, - - /* ABT Registers */ - { - USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), - USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), - USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), - USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), - USR_REG_OFFSET(12), - REG_OFFSET(compat_sp_abt), /* r13 */ - REG_OFFSET(compat_lr_abt), /* r14 */ - REG_OFFSET(pc) - }, - - /* UND Registers */ - { - USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), - USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), - USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), - USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), - USR_REG_OFFSET(12), - REG_OFFSET(compat_sp_und), /* r13 */ - REG_OFFSET(compat_lr_und), /* r14 */ - REG_OFFSET(pc) - }, -}; - -/* - * Return a pointer to the register number valid in the current mode of - * the virtual CPU. - */ -unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num) -{ - unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.regs; - unsigned long mode = *vcpu_cpsr(vcpu) & PSR_AA32_MODE_MASK; - - switch (mode) { - case PSR_AA32_MODE_USR ... PSR_AA32_MODE_SVC: - mode &= ~PSR_MODE32_BIT; /* 0 ... 3 */ - break; - - case PSR_AA32_MODE_ABT: - mode = 4; - break; - - case PSR_AA32_MODE_UND: - mode = 5; - break; - - case PSR_AA32_MODE_SYS: - mode = 0; /* SYS maps to USR */ - break; - - default: - BUG(); - } - - return reg_array + vcpu_reg_offsets[mode][reg_num]; -} -- cgit v1.2.3 From ca4e514774930f30b66375a974b5edcbebaf0e7e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 10 Nov 2020 11:10:15 +0000 Subject: KVM: arm64: Introduce handling of AArch32 TTBCR2 traps ARMv8.2 introduced TTBCR2, which shares TCR_EL1 with TTBCR. Gracefully handle traps to this register when HCR_EL2.TVM is set. Cc: stable@vger.kernel.org Reported-by: James Morse Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/sys_regs.c | 1 + 2 files changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 7a1faf917f3c..803cb2427b54 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -212,6 +212,7 @@ enum vcpu_sysreg { #define c2_TTBR1 (TTBR1_EL1 * 2) /* Translation Table Base Register 1 */ #define c2_TTBR1_high (c2_TTBR1 + 1) /* TTBR1 top 32 bits */ #define c2_TTBCR (TCR_EL1 * 2) /* Translation Table Base Control R. */ +#define c2_TTBCR2 (c2_TTBCR + 1) /* Translation Table Base Control R. 2 */ #define c3_DACR (DACR32_EL2 * 2)/* Domain Access Control Register */ #define c5_DFSR (ESR_EL1 * 2) /* Data Fault Status Register */ #define c5_IFSR (IFSR32_EL2 * 2)/* Instruction Fault Status Register */ diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 26c7c25f8a6d..afdf18d694cb 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1925,6 +1925,7 @@ static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, { Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR }, + { Op1( 0), CRn( 2), CRm( 0), Op2( 3), access_vm_reg, NULL, c2_TTBCR2 }, { Op1( 0), CRn( 3), CRm( 0), Op2( 0), access_vm_reg, NULL, c3_DACR }, { Op1( 0), CRn( 5), CRm( 0), Op2( 0), access_vm_reg, NULL, c5_DFSR }, { Op1( 0), CRn( 5), CRm( 0), Op2( 1), access_vm_reg, NULL, c5_IFSR }, -- cgit v1.2.3 From 4ff3fc316d78daa2ed6de2f13616fb33a2926d8e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 27 Oct 2020 22:23:28 +0000 Subject: KVM: arm64: Move AArch32 exceptions over to AArch64 sysregs The use of the AArch32-specific accessors have always been a bit annoying on 64bit, and it is time for a change. Let's move the AArch32 exception injection over to the AArch64 encoding, which requires us to split the two halves of FAR_EL1 into DFAR and IFAR. This enables us to drop the preempt_disable() games on VHE, and to kill the last user of the vcpu_cp15() macro. Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 1 - arch/arm64/kvm/inject_fault.c | 62 +++++++++++++-------------------------- 2 files changed, 20 insertions(+), 43 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 803cb2427b54..c905c3fcaaa0 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -562,7 +562,6 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) #define CPx_BIAS IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) #define vcpu_cp14(v,r) ((v)->arch.ctxt.copro[(r) ^ CPx_BIAS]) -#define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r) ^ CPx_BIAS]) struct kvm_vm_stat { ulong remote_tlb_flush; diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index e2a2e48ca371..b47df73e98d7 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -69,26 +69,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) #define DFSR_FSC_EXTABT_LPAE 0x10 #define DFSR_FSC_EXTABT_nLPAE 0x08 #define DFSR_LPAE BIT(9) - -static bool pre_fault_synchronize(struct kvm_vcpu *vcpu) -{ - preempt_disable(); - if (vcpu->arch.sysregs_loaded_on_cpu) { - kvm_arch_vcpu_put(vcpu); - return true; - } - - preempt_enable(); - return false; -} - -static void post_fault_synchronize(struct kvm_vcpu *vcpu, bool loaded) -{ - if (loaded) { - kvm_arch_vcpu_load(vcpu, smp_processor_id()); - preempt_enable(); - } -} +#define TTBCR_EAE BIT(31) static void inject_undef32(struct kvm_vcpu *vcpu) { @@ -100,39 +81,36 @@ static void inject_undef32(struct kvm_vcpu *vcpu) * Modelled after TakeDataAbortException() and TakePrefetchAbortException * pseudocode. */ -static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, - unsigned long addr) +static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, u32 addr) { - u32 *far, *fsr; - bool is_lpae; - bool loaded; + u64 far; + u32 fsr; + + /* Give the guest an IMPLEMENTATION DEFINED exception */ + if (vcpu_read_sys_reg(vcpu, TCR_EL1) & TTBCR_EAE) { + fsr = DFSR_LPAE | DFSR_FSC_EXTABT_LPAE; + } else { + /* no need to shuffle FS[4] into DFSR[10] as its 0 */ + fsr = DFSR_FSC_EXTABT_nLPAE; + } - loaded = pre_fault_synchronize(vcpu); + far = vcpu_read_sys_reg(vcpu, FAR_EL1); if (is_pabt) { vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_IABT | KVM_ARM64_PENDING_EXCEPTION); - far = &vcpu_cp15(vcpu, c6_IFAR); - fsr = &vcpu_cp15(vcpu, c5_IFSR); + far &= GENMASK(31, 0); + far |= (u64)addr << 32; + vcpu_write_sys_reg(vcpu, fsr, IFSR32_EL2); } else { /* !iabt */ vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_DABT | KVM_ARM64_PENDING_EXCEPTION); - far = &vcpu_cp15(vcpu, c6_DFAR); - fsr = &vcpu_cp15(vcpu, c5_DFSR); - } - - *far = addr; - - /* Give the guest an IMPLEMENTATION DEFINED exception */ - is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31); - if (is_lpae) { - *fsr = DFSR_LPAE | DFSR_FSC_EXTABT_LPAE; - } else { - /* no need to shuffle FS[4] into DFSR[10] as its 0 */ - *fsr = DFSR_FSC_EXTABT_nLPAE; + far &= GENMASK(63, 32); + far |= addr; + vcpu_write_sys_reg(vcpu, fsr, ESR_EL1); } - post_fault_synchronize(vcpu, loaded); + vcpu_write_sys_reg(vcpu, far, FAR_EL1); } /** -- cgit v1.2.3 From 6ed6750f2b6d4a51f27615f3323d1850449299e3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 29 Oct 2020 17:09:12 +0000 Subject: KVM: arm64: Add AArch32 mapping annotation In order to deal with the few AArch32 system registers that map to only a particular half of their AArch64 counterpart (such as DFAR and IFAR being colocated in FAR_EL1), let's add an optional annotation to the sysreg descriptor structure, indicating whether a register maps to the upper or lower 32bits of a register. Nothing is using these annotation yet. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index 5a6fc30f5989..259864c3c76b 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -27,6 +27,12 @@ struct sys_reg_desc { /* Sysreg string for debug */ const char *name; + enum { + AA32_ZEROHIGH, + AA32_LO, + AA32_HI, + } aarch32_map; + /* MRS/MSR instruction which accesses it. */ u8 Op0; u8 Op1; @@ -153,6 +159,7 @@ const struct sys_reg_desc *find_reg_by_id(u64 id, const struct sys_reg_desc table[], unsigned int num); +#define AA32(_x) .aarch32_map = AA32_##_x #define Op0(_x) .Op0 = _x #define Op1(_x) .Op1 = _x #define CRn(_x) .CRn = _x -- cgit v1.2.3 From b1ea1d760d3331da19e33650bf8c09ce028a0a49 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 29 Oct 2020 17:14:20 +0000 Subject: KVM: arm64: Map AArch32 cp15 register to AArch64 sysregs Move all the cp15 registers over to their AArch64 counterpart. This requires the annotation of a few of them (such as the usual DFAR/IFAR vs FAR_EL1), and a new helper that generates mask/shift pairs for the various configurations. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 114 +++++++++++++++++++++++++++------------------- 1 file changed, 66 insertions(+), 48 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index afdf18d694cb..ab66101c855e 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -128,6 +128,24 @@ static bool access_dcsw(struct kvm_vcpu *vcpu, return true; } +static void get_access_mask(const struct sys_reg_desc *r, u64 *mask, u64 *shift) +{ + switch (r->aarch32_map) { + case AA32_LO: + *mask = GENMASK_ULL(31, 0); + *shift = 0; + break; + case AA32_HI: + *mask = GENMASK_ULL(63, 32); + *shift = 32; + break; + default: + *mask = GENMASK_ULL(63, 0); + *shift = 0; + break; + } +} + /* * Generic accessor for VM registers. Only called as long as HCR_TVM * is set. If the guest enables the MMU, we stop trapping the VM @@ -138,26 +156,21 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { bool was_enabled = vcpu_has_cache_enabled(vcpu); - u64 val; - int reg = r->reg; + u64 val, mask, shift; BUG_ON(!p->is_write); - /* See the 32bit mapping in kvm_host.h */ - if (p->is_aarch32) - reg = r->reg / 2; + get_access_mask(r, &mask, &shift); - if (!p->is_aarch32 || !p->is_32bit) { - val = p->regval; + if (~mask) { + val = vcpu_read_sys_reg(vcpu, r->reg); + val &= ~mask; } else { - val = vcpu_read_sys_reg(vcpu, reg); - if (r->reg % 2) - val = (p->regval << 32) | (u64)lower_32_bits(val); - else - val = ((u64)upper_32_bits(val) << 32) | - lower_32_bits(p->regval); + val = 0; } - vcpu_write_sys_reg(vcpu, val, reg); + + val |= (p->regval & (mask >> shift)) << shift; + vcpu_write_sys_reg(vcpu, val, r->reg); kvm_toggle_cache(vcpu, was_enabled); return true; @@ -167,17 +180,13 @@ static bool access_actlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { + u64 mask, shift; + if (p->is_write) return ignore_write(vcpu, p); - p->regval = vcpu_read_sys_reg(vcpu, ACTLR_EL1); - - if (p->is_aarch32) { - if (r->Op2 & 2) - p->regval = upper_32_bits(p->regval); - else - p->regval = lower_32_bits(p->regval); - } + get_access_mask(r, &mask, &shift); + p->regval = (vcpu_read_sys_reg(vcpu, r->reg) & mask) >> shift; return true; } @@ -1264,10 +1273,6 @@ static bool access_csselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { int reg = r->reg; - /* See the 32bit mapping in kvm_host.h */ - if (p->is_aarch32) - reg = r->reg / 2; - if (p->is_write) vcpu_write_sys_reg(vcpu, p->regval, reg); else @@ -1919,20 +1924,29 @@ static const struct sys_reg_desc cp14_64_regs[] = { */ static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn( 0), CRm( 0), Op2( 1), access_ctr }, - { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR }, - { Op1( 0), CRn( 1), CRm( 0), Op2( 1), access_actlr }, - { Op1( 0), CRn( 1), CRm( 0), Op2( 3), access_actlr }, - { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, - { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, - { Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR }, - { Op1( 0), CRn( 2), CRm( 0), Op2( 3), access_vm_reg, NULL, c2_TTBCR2 }, - { Op1( 0), CRn( 3), CRm( 0), Op2( 0), access_vm_reg, NULL, c3_DACR }, - { Op1( 0), CRn( 5), CRm( 0), Op2( 0), access_vm_reg, NULL, c5_DFSR }, - { Op1( 0), CRn( 5), CRm( 0), Op2( 1), access_vm_reg, NULL, c5_IFSR }, - { Op1( 0), CRn( 5), CRm( 1), Op2( 0), access_vm_reg, NULL, c5_ADFSR }, - { Op1( 0), CRn( 5), CRm( 1), Op2( 1), access_vm_reg, NULL, c5_AIFSR }, - { Op1( 0), CRn( 6), CRm( 0), Op2( 0), access_vm_reg, NULL, c6_DFAR }, - { Op1( 0), CRn( 6), CRm( 0), Op2( 2), access_vm_reg, NULL, c6_IFAR }, + { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, SCTLR_EL1 }, + /* ACTLR */ + { AA32(LO), Op1( 0), CRn( 1), CRm( 0), Op2( 1), access_actlr, NULL, ACTLR_EL1 }, + /* ACTLR2 */ + { AA32(HI), Op1( 0), CRn( 1), CRm( 0), Op2( 3), access_actlr, NULL, ACTLR_EL1 }, + { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, TTBR0_EL1 }, + { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, TTBR1_EL1 }, + /* TTBCR */ + { AA32(LO), Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, TCR_EL1 }, + /* TTBCR2 */ + { AA32(HI), Op1( 0), CRn( 2), CRm( 0), Op2( 3), access_vm_reg, NULL, TCR_EL1 }, + { Op1( 0), CRn( 3), CRm( 0), Op2( 0), access_vm_reg, NULL, DACR32_EL2 }, + /* DFSR */ + { Op1( 0), CRn( 5), CRm( 0), Op2( 0), access_vm_reg, NULL, ESR_EL1 }, + { Op1( 0), CRn( 5), CRm( 0), Op2( 1), access_vm_reg, NULL, IFSR32_EL2 }, + /* ADFSR */ + { Op1( 0), CRn( 5), CRm( 1), Op2( 0), access_vm_reg, NULL, AFSR0_EL1 }, + /* AIFSR */ + { Op1( 0), CRn( 5), CRm( 1), Op2( 1), access_vm_reg, NULL, AFSR1_EL1 }, + /* DFAR */ + { AA32(LO), Op1( 0), CRn( 6), CRm( 0), Op2( 0), access_vm_reg, NULL, FAR_EL1 }, + /* IFAR */ + { AA32(HI), Op1( 0), CRn( 6), CRm( 0), Op2( 2), access_vm_reg, NULL, FAR_EL1 }, /* * DC{C,I,CI}SW operations: @@ -1958,15 +1972,19 @@ static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn( 9), CRm(14), Op2( 2), access_pminten }, { Op1( 0), CRn( 9), CRm(14), Op2( 3), access_pmovs }, - { Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR }, - { Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR }, - { Op1( 0), CRn(10), CRm( 3), Op2( 0), access_vm_reg, NULL, c10_AMAIR0 }, - { Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 }, + /* PRRR/MAIR0 */ + { AA32(LO), Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, MAIR_EL1 }, + /* NMRR/MAIR1 */ + { AA32(HI), Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, MAIR_EL1 }, + /* AMAIR0 */ + { AA32(LO), Op1( 0), CRn(10), CRm( 3), Op2( 0), access_vm_reg, NULL, AMAIR_EL1 }, + /* AMAIR1 */ + { AA32(HI), Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, AMAIR_EL1 }, /* ICC_SRE */ { Op1( 0), CRn(12), CRm(12), Op2( 5), access_gic_sre }, - { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID }, + { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, CONTEXTIDR_EL1 }, /* Arch Tmers */ { SYS_DESC(SYS_AARCH32_CNTP_TVAL), access_arch_timer }, @@ -2041,14 +2059,14 @@ static const struct sys_reg_desc cp15_regs[] = { { Op1(1), CRn( 0), CRm( 0), Op2(0), access_ccsidr }, { Op1(1), CRn( 0), CRm( 0), Op2(1), access_clidr }, - { Op1(2), CRn( 0), CRm( 0), Op2(0), access_csselr, NULL, c0_CSSELR }, + { Op1(2), CRn( 0), CRm( 0), Op2(0), access_csselr, NULL, CSSELR_EL1 }, }; static const struct sys_reg_desc cp15_64_regs[] = { - { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, + { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR0_EL1 }, { Op1( 0), CRn( 0), CRm( 9), Op2( 0), access_pmu_evcntr }, { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI1R */ - { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 }, + { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR1_EL1 }, { Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */ { Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */ { SYS_DESC(SYS_AARCH32_CNTP_CVAL), access_arch_timer }, -- cgit v1.2.3 From 1da42c34d7c42fe2840bfe3de83cd0b5aa374859 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 29 Oct 2020 17:17:23 +0000 Subject: KVM: arm64: Map AArch32 cp14 register to AArch64 sysregs Similarly to what has been done on the cp15 front, repaint the debug registers to use their AArch64 counterparts. This results in some simplification as we can remove the 32bit-specific accessors. Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 8 --- arch/arm64/kvm/sys_regs.c | 109 +++++++++++++------------------------- 2 files changed, 37 insertions(+), 80 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index c905c3fcaaa0..0d023e4f80a0 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -555,14 +555,6 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) return true; } -/* - * CP14 and CP15 live in the same array, as they are backed by the - * same system registers. - */ -#define CPx_BIAS IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) - -#define vcpu_cp14(v,r) ((v)->arch.ctxt.copro[(r) ^ CPx_BIAS]) - struct kvm_vm_stat { ulong remote_tlb_flush; }; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index ab66101c855e..660ff6c18b2e 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -366,26 +366,30 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu, */ static void reg_to_dbg(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *rd, u64 *dbg_reg) { - u64 val = p->regval; + u64 mask, shift, val; - if (p->is_32bit) { - val &= 0xffffffffUL; - val |= ((*dbg_reg >> 32) << 32); - } + get_access_mask(rd, &mask, &shift); + val = *dbg_reg; + val &= ~mask; + val |= (p->regval & (mask >> shift)) << shift; *dbg_reg = val; + vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY; } static void dbg_to_reg(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *rd, u64 *dbg_reg) { - p->regval = *dbg_reg; - if (p->is_32bit) - p->regval &= 0xffffffffUL; + u64 mask, shift; + + get_access_mask(rd, &mask, &shift); + p->regval = (*dbg_reg & mask) >> shift; } static bool trap_bvr(struct kvm_vcpu *vcpu, @@ -395,9 +399,9 @@ static bool trap_bvr(struct kvm_vcpu *vcpu, u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; if (p->is_write) - reg_to_dbg(vcpu, p, dbg_reg); + reg_to_dbg(vcpu, p, rd, dbg_reg); else - dbg_to_reg(vcpu, p, dbg_reg); + dbg_to_reg(vcpu, p, rd, dbg_reg); trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); @@ -437,9 +441,9 @@ static bool trap_bcr(struct kvm_vcpu *vcpu, u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; if (p->is_write) - reg_to_dbg(vcpu, p, dbg_reg); + reg_to_dbg(vcpu, p, rd, dbg_reg); else - dbg_to_reg(vcpu, p, dbg_reg); + dbg_to_reg(vcpu, p, rd, dbg_reg); trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); @@ -480,9 +484,9 @@ static bool trap_wvr(struct kvm_vcpu *vcpu, u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; if (p->is_write) - reg_to_dbg(vcpu, p, dbg_reg); + reg_to_dbg(vcpu, p, rd, dbg_reg); else - dbg_to_reg(vcpu, p, dbg_reg); + dbg_to_reg(vcpu, p, rd, dbg_reg); trace_trap_reg(__func__, rd->reg, p->is_write, vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]); @@ -523,9 +527,9 @@ static bool trap_wcr(struct kvm_vcpu *vcpu, u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; if (p->is_write) - reg_to_dbg(vcpu, p, dbg_reg); + reg_to_dbg(vcpu, p, rd, dbg_reg); else - dbg_to_reg(vcpu, p, dbg_reg); + dbg_to_reg(vcpu, p, rd, dbg_reg); trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); @@ -1744,66 +1748,27 @@ static bool trap_dbgidr(struct kvm_vcpu *vcpu, } } -static bool trap_debug32(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *r) -{ - if (p->is_write) { - vcpu_cp14(vcpu, r->reg) = p->regval; - vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY; - } else { - p->regval = vcpu_cp14(vcpu, r->reg); - } - - return true; -} - -/* AArch32 debug register mappings +/* + * AArch32 debug register mappings * * AArch32 DBGBVRn is mapped to DBGBVRn_EL1[31:0] * AArch32 DBGBXVRn is mapped to DBGBVRn_EL1[63:32] * - * All control registers and watchpoint value registers are mapped to - * the lower 32 bits of their AArch64 equivalents. We share the trap - * handlers with the above AArch64 code which checks what mode the - * system is in. + * None of the other registers share their location, so treat them as + * if they were 64bit. */ - -static bool trap_xvr(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *rd) -{ - u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; - - if (p->is_write) { - u64 val = *dbg_reg; - - val &= 0xffffffffUL; - val |= p->regval << 32; - *dbg_reg = val; - - vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY; - } else { - p->regval = *dbg_reg >> 32; - } - - trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); - - return true; -} - -#define DBG_BCR_BVR_WCR_WVR(n) \ - /* DBGBVRn */ \ - { Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_bvr, NULL, n }, \ - /* DBGBCRn */ \ - { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_bcr, NULL, n }, \ - /* DBGWVRn */ \ - { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_wvr, NULL, n }, \ - /* DBGWCRn */ \ +#define DBG_BCR_BVR_WCR_WVR(n) \ + /* DBGBVRn */ \ + { AA32(LO), Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_bvr, NULL, n }, \ + /* DBGBCRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_bcr, NULL, n }, \ + /* DBGWVRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_wvr, NULL, n }, \ + /* DBGWCRn */ \ { Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_wcr, NULL, n } -#define DBGBXVR(n) \ - { Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_xvr, NULL, n } +#define DBGBXVR(n) \ + { AA32(HI), Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_bvr, NULL, n } /* * Trapped cp14 registers. We generally ignore most of the external @@ -1821,9 +1786,9 @@ static const struct sys_reg_desc cp14_regs[] = { { Op1( 0), CRn( 0), CRm( 1), Op2( 0), trap_raz_wi }, DBG_BCR_BVR_WCR_WVR(1), /* DBGDCCINT */ - { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32, NULL, cp14_DBGDCCINT }, + { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug_regs, NULL, MDCCINT_EL1 }, /* DBGDSCRext */ - { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32, NULL, cp14_DBGDSCRext }, + { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug_regs, NULL, MDSCR_EL1 }, DBG_BCR_BVR_WCR_WVR(2), /* DBGDTR[RT]Xint */ { Op1( 0), CRn( 0), CRm( 3), Op2( 0), trap_raz_wi }, @@ -1838,7 +1803,7 @@ static const struct sys_reg_desc cp14_regs[] = { { Op1( 0), CRn( 0), CRm( 6), Op2( 2), trap_raz_wi }, DBG_BCR_BVR_WCR_WVR(6), /* DBGVCR */ - { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32, NULL, cp14_DBGVCR }, + { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug_regs, NULL, DBGVCR32_EL2 }, DBG_BCR_BVR_WCR_WVR(7), DBG_BCR_BVR_WCR_WVR(8), DBG_BCR_BVR_WCR_WVR(9), -- cgit v1.2.3 From 2d27fd784893a767ec4162afc6d8c86eec2d1bfe Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 29 Oct 2020 17:20:13 +0000 Subject: KVM: arm64: Drop is_32bit trap attribute The is_32bit attribute is now completely unused, drop it. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 3 --- arch/arm64/kvm/sys_regs.h | 1 - arch/arm64/kvm/vgic-sys-reg-v3.c | 2 -- 3 files changed, 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 660ff6c18b2e..a4726cdbe16f 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2176,7 +2176,6 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu, int Rt2 = (esr >> 10) & 0x1f; params.is_aarch32 = true; - params.is_32bit = false; params.CRm = (esr >> 1) & 0xf; params.is_write = ((esr & 1) == 0); @@ -2227,7 +2226,6 @@ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu, int Rt = kvm_vcpu_sys_get_rt(vcpu); params.is_aarch32 = true; - params.is_32bit = true; params.CRm = (esr >> 1) & 0xf; params.regval = vcpu_get_reg(vcpu, Rt); params.is_write = ((esr & 1) == 0); @@ -2322,7 +2320,6 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu) trace_kvm_handle_sys_reg(esr); params.is_aarch32 = false; - params.is_32bit = false; params.Op0 = (esr >> 20) & 3; params.Op1 = (esr >> 14) & 0x7; params.CRn = (esr >> 10) & 0xf; diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index 259864c3c76b..8c4958d6b5ce 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -20,7 +20,6 @@ struct sys_reg_params { u64 regval; bool is_write; bool is_aarch32; - bool is_32bit; /* Only valid if is_aarch32 is true */ }; struct sys_reg_desc { diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c index 2f92bdcb1188..806d6701a7da 100644 --- a/arch/arm64/kvm/vgic-sys-reg-v3.c +++ b/arch/arm64/kvm/vgic-sys-reg-v3.c @@ -269,7 +269,6 @@ int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id, params.regval = *reg; params.is_write = is_write; params.is_aarch32 = false; - params.is_32bit = false; if (find_reg_by_id(sysreg, ¶ms, gic_v3_icc_reg_descs, ARRAY_SIZE(gic_v3_icc_reg_descs))) @@ -289,7 +288,6 @@ int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write, u64 id, params.regval = *reg; params.is_write = is_write; params.is_aarch32 = false; - params.is_32bit = false; r = find_reg_by_id(sysreg, ¶ms, gic_v3_icc_reg_descs, ARRAY_SIZE(gic_v3_icc_reg_descs)); -- cgit v1.2.3 From 50f304532770c19a127b1e1b6769c0538abda58f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 29 Oct 2020 17:20:49 +0000 Subject: KVM: arm64: Drop is_aarch32 trap attribute is_aarch32 is only used once, and can be trivially replaced by testing Op0 instead. Drop it. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 7 ++----- arch/arm64/kvm/sys_regs.h | 1 - arch/arm64/kvm/vgic-sys-reg-v3.c | 2 -- 3 files changed, 2 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index a4726cdbe16f..64fdfb64d791 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -213,7 +213,7 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu, * equivalent to ICC_SGI0R_EL1, as there is no "alternative" secure * group. */ - if (p->is_aarch32) { + if (p->Op0 == 0) { /* AArch32 */ switch (p->Op1) { default: /* Keep GCC quiet */ case 0: /* ICC_SGI1R */ @@ -224,7 +224,7 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu, g1 = false; break; } - } else { + } else { /* AArch64 */ switch (p->Op2) { default: /* Keep GCC quiet */ case 5: /* ICC_SGI1R_EL1 */ @@ -2175,7 +2175,6 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu, int Rt = kvm_vcpu_sys_get_rt(vcpu); int Rt2 = (esr >> 10) & 0x1f; - params.is_aarch32 = true; params.CRm = (esr >> 1) & 0xf; params.is_write = ((esr & 1) == 0); @@ -2225,7 +2224,6 @@ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu, u32 esr = kvm_vcpu_get_esr(vcpu); int Rt = kvm_vcpu_sys_get_rt(vcpu); - params.is_aarch32 = true; params.CRm = (esr >> 1) & 0xf; params.regval = vcpu_get_reg(vcpu, Rt); params.is_write = ((esr & 1) == 0); @@ -2319,7 +2317,6 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu) trace_kvm_handle_sys_reg(esr); - params.is_aarch32 = false; params.Op0 = (esr >> 20) & 3; params.Op1 = (esr >> 14) & 0x7; params.CRn = (esr >> 10) & 0xf; diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index 8c4958d6b5ce..416153b593a6 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -19,7 +19,6 @@ struct sys_reg_params { u8 Op2; u64 regval; bool is_write; - bool is_aarch32; }; struct sys_reg_desc { diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c index 806d6701a7da..07d5271e9f05 100644 --- a/arch/arm64/kvm/vgic-sys-reg-v3.c +++ b/arch/arm64/kvm/vgic-sys-reg-v3.c @@ -268,7 +268,6 @@ int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id, params.regval = *reg; params.is_write = is_write; - params.is_aarch32 = false; if (find_reg_by_id(sysreg, ¶ms, gic_v3_icc_reg_descs, ARRAY_SIZE(gic_v3_icc_reg_descs))) @@ -287,7 +286,6 @@ int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write, u64 id, if (is_write) params.regval = *reg; params.is_write = is_write; - params.is_aarch32 = false; r = find_reg_by_id(sysreg, ¶ms, gic_v3_icc_reg_descs, ARRAY_SIZE(gic_v3_icc_reg_descs)); -- cgit v1.2.3 From 5f7e02aebdf0c8d255f3ff2df8595fd220e7d5ce Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 29 Oct 2020 17:21:37 +0000 Subject: KVM: arm64: Drop legacy copro shadow register Finally remove one of the biggest 32bit legacy: the copro shadow mapping. We won't missit. Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 48 +-------------------------------------- 1 file changed, 1 insertion(+), 47 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 0d023e4f80a0..c527f9567713 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -201,49 +201,6 @@ enum vcpu_sysreg { NR_SYS_REGS /* Nothing after this line! */ }; -/* 32bit mapping */ -#define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */ -#define c0_CSSELR (CSSELR_EL1 * 2)/* Cache Size Selection Register */ -#define c1_SCTLR (SCTLR_EL1 * 2) /* System Control Register */ -#define c1_ACTLR (ACTLR_EL1 * 2) /* Auxiliary Control Register */ -#define c1_CPACR (CPACR_EL1 * 2) /* Coprocessor Access Control */ -#define c2_TTBR0 (TTBR0_EL1 * 2) /* Translation Table Base Register 0 */ -#define c2_TTBR0_high (c2_TTBR0 + 1) /* TTBR0 top 32 bits */ -#define c2_TTBR1 (TTBR1_EL1 * 2) /* Translation Table Base Register 1 */ -#define c2_TTBR1_high (c2_TTBR1 + 1) /* TTBR1 top 32 bits */ -#define c2_TTBCR (TCR_EL1 * 2) /* Translation Table Base Control R. */ -#define c2_TTBCR2 (c2_TTBCR + 1) /* Translation Table Base Control R. 2 */ -#define c3_DACR (DACR32_EL2 * 2)/* Domain Access Control Register */ -#define c5_DFSR (ESR_EL1 * 2) /* Data Fault Status Register */ -#define c5_IFSR (IFSR32_EL2 * 2)/* Instruction Fault Status Register */ -#define c5_ADFSR (AFSR0_EL1 * 2) /* Auxiliary Data Fault Status R */ -#define c5_AIFSR (AFSR1_EL1 * 2) /* Auxiliary Instr Fault Status R */ -#define c6_DFAR (FAR_EL1 * 2) /* Data Fault Address Register */ -#define c6_IFAR (c6_DFAR + 1) /* Instruction Fault Address Register */ -#define c7_PAR (PAR_EL1 * 2) /* Physical Address Register */ -#define c7_PAR_high (c7_PAR + 1) /* PAR top 32 bits */ -#define c10_PRRR (MAIR_EL1 * 2) /* Primary Region Remap Register */ -#define c10_NMRR (c10_PRRR + 1) /* Normal Memory Remap Register */ -#define c12_VBAR (VBAR_EL1 * 2) /* Vector Base Address Register */ -#define c13_CID (CONTEXTIDR_EL1 * 2) /* Context ID Register */ -#define c13_TID_URW (TPIDR_EL0 * 2) /* Thread ID, User R/W */ -#define c13_TID_URO (TPIDRRO_EL0 * 2)/* Thread ID, User R/O */ -#define c13_TID_PRIV (TPIDR_EL1 * 2) /* Thread ID, Privileged */ -#define c10_AMAIR0 (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */ -#define c10_AMAIR1 (c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */ -#define c14_CNTKCTL (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */ - -#define cp14_DBGDSCRext (MDSCR_EL1 * 2) -#define cp14_DBGBCR0 (DBGBCR0_EL1 * 2) -#define cp14_DBGBVR0 (DBGBVR0_EL1 * 2) -#define cp14_DBGBXVR0 (cp14_DBGBVR0 + 1) -#define cp14_DBGWCR0 (DBGWCR0_EL1 * 2) -#define cp14_DBGWVR0 (DBGWVR0_EL1 * 2) -#define cp14_DBGDCCINT (MDCCINT_EL1 * 2) -#define cp14_DBGVCR (DBGVCR32_EL2 * 2) - -#define NR_COPRO_REGS (NR_SYS_REGS * 2) - struct kvm_cpu_context { struct user_pt_regs regs; /* sp = sp_el0 */ @@ -254,10 +211,7 @@ struct kvm_cpu_context { struct user_fpsimd_state fp_regs; - union { - u64 sys_regs[NR_SYS_REGS]; - u32 copro[NR_COPRO_REGS]; - }; + u64 sys_regs[NR_SYS_REGS]; struct kvm_vcpu *__hyp_running_vcpu; }; -- cgit v1.2.3 From 6ac4a5ac50d1d25a61aa00e660eebb21a2ff9b96 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 2 Nov 2020 18:11:16 +0000 Subject: KVM: arm64: Drop kvm_coproc.h kvm_coproc.h used to serve as a compatibility layer for the files shared between the 32 and 64 bit ports. Another one bites the dust... Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_coproc.h | 38 ------------------------------------- arch/arm64/include/asm/kvm_host.h | 17 +++++++++++++++++ arch/arm64/kvm/arm.c | 3 +-- arch/arm64/kvm/guest.c | 1 - arch/arm64/kvm/handle_exit.c | 1 - arch/arm64/kvm/reset.c | 1 - arch/arm64/kvm/sys_regs.c | 1 - 7 files changed, 18 insertions(+), 44 deletions(-) delete mode 100644 arch/arm64/include/asm/kvm_coproc.h (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h deleted file mode 100644 index d6bb40122fdb..000000000000 --- a/arch/arm64/include/asm/kvm_coproc.h +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012,2013 - ARM Ltd - * Author: Marc Zyngier - * - * Derived from arch/arm/include/asm/kvm_coproc.h - * Copyright (C) 2012 Rusty Russell IBM Corporation - */ - -#ifndef __ARM64_KVM_COPROC_H__ -#define __ARM64_KVM_COPROC_H__ - -#include - -void kvm_reset_sys_regs(struct kvm_vcpu *vcpu); - -struct kvm_sys_reg_table { - const struct sys_reg_desc *table; - size_t num; -}; - -int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu); -int kvm_handle_cp14_32(struct kvm_vcpu *vcpu); -int kvm_handle_cp14_64(struct kvm_vcpu *vcpu); -int kvm_handle_cp15_32(struct kvm_vcpu *vcpu); -int kvm_handle_cp15_64(struct kvm_vcpu *vcpu); -int kvm_handle_sys_reg(struct kvm_vcpu *vcpu); - -#define kvm_coproc_table_init kvm_sys_reg_table_init -void kvm_sys_reg_table_init(void); - -struct kvm_one_reg; -int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); -int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); -int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); -unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu); - -#endif /* __ARM64_KVM_COPROC_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index c527f9567713..709f892f7a14 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -533,6 +533,12 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); + +unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu); +int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); +int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); +int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); + int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events); @@ -595,6 +601,17 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); int handle_exit(struct kvm_vcpu *vcpu, int exception_index); void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index); +int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu); +int kvm_handle_cp14_32(struct kvm_vcpu *vcpu); +int kvm_handle_cp14_64(struct kvm_vcpu *vcpu); +int kvm_handle_cp15_32(struct kvm_vcpu *vcpu); +int kvm_handle_cp15_64(struct kvm_vcpu *vcpu); +int kvm_handle_sys_reg(struct kvm_vcpu *vcpu); + +void kvm_reset_sys_regs(struct kvm_vcpu *vcpu); + +void kvm_sys_reg_table_init(void); + /* MMIO helpers */ void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 5750ec34960e..9d69d2bf6943 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include @@ -1525,7 +1524,7 @@ static int init_subsystems(void) goto out; kvm_perf_init(); - kvm_coproc_table_init(); + kvm_sys_reg_table_init(); out: on_each_cpu(_kvm_arch_hardware_disable, NULL, 1); diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 3f23f7478d2a..9bbd30e62799 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include "trace.h" diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index f79137ee4274..cebe39f3b1b6 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index f32490229a4c..74ce92a4988c 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 64fdfb64d791..d2e1d745f067 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From d3a9e4146a6f79f19430bca3f2a4d6ebaaffe36b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 6 Oct 2020 18:44:12 -0700 Subject: KVM: VMX: Drop guest CPUID check for VMXE in vmx_set_cr4() Drop vmx_set_cr4()'s somewhat hidden guest_cpuid_has() check on VMXE now that common x86 handles the check by incorporating VMXE into the CR4 reserved bits, i.e. in cr4_guest_rsvd_bits. This fixes a bug where KVM incorrectly rejects KVM_SET_SREGS with CR4.VMXE=1 if it's executed before KVM_SET_CPUID{,2}. Fixes: 5e1746d6205d ("KVM: nVMX: Allow setting the VMXE bit in CR4") Reported-by: Stas Sergeev Signed-off-by: Sean Christopherson Message-Id: <20201007014417.29276-2-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 47b8357b9751..7ebf08db6747 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3129,9 +3129,10 @@ int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) * must first be able to turn on cr4.VMXE (see handle_vmon()). * So basically the check on whether to allow nested VMX * is here. We operate under the default treatment of SMM, - * so VMX cannot be enabled under SMM. + * so VMX cannot be enabled under SMM. Note, guest CPUID is + * intentionally ignored, it's handled by cr4_guest_rsvd_bits. */ - if (!nested_vmx_allowed(vcpu) || is_smm(vcpu)) + if (!nested || is_smm(vcpu)) return 1; } -- cgit v1.2.3 From a447e38a7fadb2e554c3942dda183e55cccd5df0 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 6 Oct 2020 18:44:13 -0700 Subject: KVM: VMX: Drop explicit 'nested' check from vmx_set_cr4() Drop vmx_set_cr4()'s explicit check on the 'nested' module param now that common x86 handles the check by incorporating VMXE into the CR4 reserved bits, via kvm_cpu_caps. X86_FEATURE_VMX is set in kvm_cpu_caps (by vmx_set_cpu_caps()), if and only if 'nested' is true. No functional change intended. Signed-off-by: Sean Christopherson Message-Id: <20201007014417.29276-3-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 7ebf08db6747..d9a4526c730c 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3123,18 +3123,13 @@ int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) } } - if (cr4 & X86_CR4_VMXE) { - /* - * To use VMXON (and later other VMX instructions), a guest - * must first be able to turn on cr4.VMXE (see handle_vmon()). - * So basically the check on whether to allow nested VMX - * is here. We operate under the default treatment of SMM, - * so VMX cannot be enabled under SMM. Note, guest CPUID is - * intentionally ignored, it's handled by cr4_guest_rsvd_bits. - */ - if (!nested || is_smm(vcpu)) - return 1; - } + /* + * We operate under the default treatment of SMM, so VMX cannot be + * enabled under SMM. Note, whether or not VMXE is allowed at all is + * handled by kvm_valid_cr4(). + */ + if ((cr4 & X86_CR4_VMXE) && is_smm(vcpu)) + return 1; if (vmx->nested.vmxon && !nested_cr4_valid(vcpu, cr4)) return 1; -- cgit v1.2.3 From 311a06593b9a3944a63ed176b95cb8d857f7c83b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 6 Oct 2020 18:44:14 -0700 Subject: KVM: SVM: Drop VMXE check from svm_set_cr4() Drop svm_set_cr4()'s explicit check CR4.VMXE now that common x86 handles the check by incorporating VMXE into the CR4 reserved bits, via kvm_cpu_caps. SVM obviously does not set X86_FEATURE_VMX. No functional change intended. Signed-off-by: Sean Christopherson Message-Id: <20201007014417.29276-4-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 1e81cfebd491..e584da54738c 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1687,9 +1687,6 @@ int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE; unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4; - if (cr4 & X86_CR4_VMXE) - return 1; - if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE)) svm_flush_tlb(vcpu); -- cgit v1.2.3 From c2fe3cd4604ac87c587db05d41843d667dc43815 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 6 Oct 2020 18:44:15 -0700 Subject: KVM: x86: Move vendor CR4 validity check to dedicated kvm_x86_ops hook Split out VMX's checks on CR4.VMXE to a dedicated hook, .is_valid_cr4(), and invoke the new hook from kvm_valid_cr4(). This fixes an issue where KVM_SET_SREGS would return success while failing to actually set CR4. Fixing the issue by explicitly checking kvm_x86_ops.set_cr4()'s return in __set_sregs() is not a viable option as KVM has already stuffed a variety of vCPU state. Note, kvm_valid_cr4() and is_valid_cr4() have different return types and inverted semantics. This will be remedied in a future patch. Fixes: 5e1746d6205d ("KVM: nVMX: Allow setting the VMXE bit in CR4") Signed-off-by: Sean Christopherson Message-Id: <20201007014417.29276-5-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 3 ++- arch/x86/kvm/svm/svm.c | 9 +++++++-- arch/x86/kvm/svm/svm.h | 2 +- arch/x86/kvm/vmx/nested.c | 2 +- arch/x86/kvm/vmx/vmx.c | 31 ++++++++++++++++++------------- arch/x86/kvm/vmx/vmx.h | 2 +- arch/x86/kvm/x86.c | 6 ++++-- 7 files changed, 34 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 324ddd7fd0aa..5bb0a2bbd9c4 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1115,7 +1115,8 @@ struct kvm_x86_ops { struct kvm_segment *var, int seg); void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); - int (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); + bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr0); + void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index e584da54738c..8c858f80f399 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1682,7 +1682,12 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) update_cr0_intercept(svm); } -int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +static bool svm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +{ + return true; +} + +void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE; unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4; @@ -1696,7 +1701,6 @@ int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) cr4 |= host_cr4_mce; to_svm(vcpu)->vmcb->save.cr4 = cr4; vmcb_mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); - return 0; } static void svm_set_segment(struct kvm_vcpu *vcpu, @@ -4212,6 +4216,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .get_cpl = svm_get_cpl, .get_cs_db_l_bits = kvm_get_cs_db_l_bits, .set_cr0 = svm_set_cr0, + .is_valid_cr4 = svm_is_valid_cr4, .set_cr4 = svm_set_cr4, .set_efer = svm_set_efer, .get_idt = svm_get_idt, diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 1d853fe4c778..eb6fbafbe36c 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -358,7 +358,7 @@ void svm_vcpu_free_msrpm(u32 *msrpm); int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer); void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); -int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); +void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); void svm_flush_tlb(struct kvm_vcpu *vcpu); void disable_nmi_singlestep(struct vcpu_svm *svm); bool svm_smi_blocked(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 89af692deb7e..efebd849b3c9 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4814,7 +4814,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu) /* * The Intel VMX Instruction Reference lists a bunch of bits that are * prerequisite to running VMXON, most notably cr4.VMXE must be set to - * 1 (see vmx_set_cr4() for when we allow the guest to set this). + * 1 (see vmx_is_valid_cr4() for when we allow the guest to set this). * Otherwise, we should fail with #UD. But most faulting conditions * have already been checked by hardware, prior to the VM-exit for * VMXON. We do test guest cr4.VMXE because processor CR4 always has diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index d9a4526c730c..3327964a1e2e 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3095,7 +3095,23 @@ static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long pgd, vmcs_writel(GUEST_CR3, guest_cr3); } -int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +static bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +{ + /* + * We operate under the default treatment of SMM, so VMX cannot be + * enabled under SMM. Note, whether or not VMXE is allowed at all is + * handled by kvm_valid_cr4(). + */ + if ((cr4 & X86_CR4_VMXE) && is_smm(vcpu)) + return false; + + if (to_vmx(vcpu)->nested.vmxon && !nested_cr4_valid(vcpu, cr4)) + return false; + + return true; +} + +void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { struct vcpu_vmx *vmx = to_vmx(vcpu); /* @@ -3123,17 +3139,6 @@ int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) } } - /* - * We operate under the default treatment of SMM, so VMX cannot be - * enabled under SMM. Note, whether or not VMXE is allowed at all is - * handled by kvm_valid_cr4(). - */ - if ((cr4 & X86_CR4_VMXE) && is_smm(vcpu)) - return 1; - - if (vmx->nested.vmxon && !nested_cr4_valid(vcpu, cr4)) - return 1; - vcpu->arch.cr4 = cr4; kvm_register_mark_available(vcpu, VCPU_EXREG_CR4); @@ -3164,7 +3169,6 @@ int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) vmcs_writel(CR4_READ_SHADOW, cr4); vmcs_writel(GUEST_CR4, hw_cr4); - return 0; } void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) @@ -7612,6 +7616,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .get_cpl = vmx_get_cpl, .get_cs_db_l_bits = vmx_get_cs_db_l_bits, .set_cr0 = vmx_set_cr0, + .is_valid_cr4 = vmx_is_valid_cr4, .set_cr4 = vmx_set_cr4, .set_efer = vmx_set_efer, .get_idt = vmx_get_idt, diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index f6f66e5c6510..9d3a557949ac 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -321,7 +321,7 @@ u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu); void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask); int vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer); void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); -int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); +void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); void set_cr4_guest_host_mask(struct vcpu_vmx *vmx); void ept_save_pdptrs(struct kvm_vcpu *vcpu); void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 078a39d489fe..8c8205cb57bc 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -972,6 +972,9 @@ int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) if (cr4 & vcpu->arch.cr4_guest_rsvd_bits) return -EINVAL; + if (!kvm_x86_ops.is_valid_cr4(vcpu, cr4)) + return -EINVAL; + return 0; } EXPORT_SYMBOL_GPL(kvm_valid_cr4); @@ -1006,8 +1009,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) return 1; } - if (kvm_x86_ops.set_cr4(vcpu, cr4)) - return 1; + kvm_x86_ops.set_cr4(vcpu, cr4); if (((cr4 ^ old_cr4) & mmu_role_bits) || (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE))) -- cgit v1.2.3 From ee69c92bac61f4379e97f40b259a1c1257e5987f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 6 Oct 2020 18:44:16 -0700 Subject: KVM: x86: Return bool instead of int for CR4 and SREGS validity checks Rework the common CR4 and SREGS checks to return a bool instead of an int, i.e. true/false instead of 0/-EINVAL, and add "is" to the name to clarify the polarity of the return value (which is effectively inverted by this change). No functional changed intended. Signed-off-by: Sean Christopherson Message-Id: <20201007014417.29276-6-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 2 +- arch/x86/kvm/vmx/vmx.c | 2 +- arch/x86/kvm/x86.c | 28 ++++++++++++---------------- arch/x86/kvm/x86.h | 2 +- 4 files changed, 15 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 9e4c226dbf7d..b0f37183e8f5 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -254,7 +254,7 @@ static bool nested_vmcb_checks(struct vcpu_svm *svm, struct vmcb *vmcb12) (vmcb12->save.cr3 & MSR_CR3_LONG_MBZ_MASK)) return false; } - if (kvm_valid_cr4(&svm->vcpu, vmcb12->save.cr4)) + if (!kvm_is_valid_cr4(&svm->vcpu, vmcb12->save.cr4)) return false; return nested_vmcb_check_controls(&vmcb12->control); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 3327964a1e2e..39ff7600b0af 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3100,7 +3100,7 @@ static bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) /* * We operate under the default treatment of SMM, so VMX cannot be * enabled under SMM. Note, whether or not VMXE is allowed at all is - * handled by kvm_valid_cr4(). + * handled by kvm_is_valid_cr4(). */ if ((cr4 & X86_CR4_VMXE) && is_smm(vcpu)) return false; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8c8205cb57bc..2db86702cac4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -964,20 +964,17 @@ int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) } EXPORT_SYMBOL_GPL(kvm_set_xcr); -int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { if (cr4 & cr4_reserved_bits) - return -EINVAL; + return false; if (cr4 & vcpu->arch.cr4_guest_rsvd_bits) - return -EINVAL; - - if (!kvm_x86_ops.is_valid_cr4(vcpu, cr4)) - return -EINVAL; + return false; - return 0; + return kvm_x86_ops.is_valid_cr4(vcpu, cr4); } -EXPORT_SYMBOL_GPL(kvm_valid_cr4); +EXPORT_SYMBOL_GPL(kvm_is_valid_cr4); int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { @@ -986,7 +983,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) X86_CR4_SMEP; unsigned long mmu_role_bits = pdptr_bits | X86_CR4_SMAP | X86_CR4_PKE; - if (kvm_valid_cr4(vcpu, cr4)) + if (!kvm_is_valid_cr4(vcpu, cr4)) return 1; if (is_long_mode(vcpu)) { @@ -9535,7 +9532,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, } EXPORT_SYMBOL_GPL(kvm_task_switch); -static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +static bool kvm_is_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) { /* @@ -9543,19 +9540,18 @@ static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) * 64-bit mode (though maybe in a 32-bit code segment). * CR4.PAE and EFER.LMA must be set. */ - if (!(sregs->cr4 & X86_CR4_PAE) - || !(sregs->efer & EFER_LMA)) - return -EINVAL; + if (!(sregs->cr4 & X86_CR4_PAE) || !(sregs->efer & EFER_LMA)) + return false; } else { /* * Not in 64-bit mode: EFER.LMA is clear and the code * segment cannot be 64-bit. */ if (sregs->efer & EFER_LMA || sregs->cs.l) - return -EINVAL; + return false; } - return kvm_valid_cr4(vcpu, sregs->cr4); + return kvm_is_valid_cr4(vcpu, sregs->cr4); } static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) @@ -9567,7 +9563,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) struct desc_ptr dt; int ret = -EINVAL; - if (kvm_valid_sregs(vcpu, sregs)) + if (!kvm_is_valid_sregs(vcpu, sregs)) goto out; apic_base_msr.data = sregs->apic_base; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index e7ca622a468f..764c967a1993 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -369,7 +369,7 @@ static inline bool kvm_dr6_valid(u64 data) void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu); void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu); int kvm_spec_ctrl_test_value(u64 value); -int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); +bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu); int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r, struct x86_exception *e); -- cgit v1.2.3 From 1c96dcceaeb3a99aaf0d548eef2223e0b02a7e40 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 5 Nov 2020 11:20:49 -0500 Subject: KVM: x86: fix apic_accept_events vs check_nested_events vmx_apic_init_signal_blocked is buggy in that it returns true even in VMX non-root mode. In non-root mode, however, INITs are not latched, they just cause a vmexit. Previously, KVM was waiting for them to be processed when kvm_apic_accept_events and in the meanwhile it ate the SIPIs that the processor received. However, in order to implement the wait-for-SIPI activity state, KVM will have to process KVM_APIC_SIPI in vmx_check_nested_events, and it will not be possible anymore to disregard SIPIs in non-root mode as the code is currently doing. By calling kvm_x86_ops.nested_ops->check_events, we can force a vmexit (with the side-effect of latching INITs) before incorrectly injecting an INIT or SIPI in a guest, and therefore vmx_apic_init_signal_blocked can do the right thing. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 30 ++++++++++++++++++++++++++---- arch/x86/kvm/vmx/vmx.c | 2 +- 2 files changed, 27 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 105e7859d1f2..e3ee597ff540 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2843,14 +2843,35 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; u8 sipi_vector; + int r; unsigned long pe; - if (!lapic_in_kernel(vcpu) || !apic->pending_events) + if (!lapic_in_kernel(vcpu)) + return; + + /* + * Read pending events before calling the check_events + * callback. + */ + pe = smp_load_acquire(&apic->pending_events); + if (!pe) return; + if (is_guest_mode(vcpu)) { + r = kvm_x86_ops.nested_ops->check_events(vcpu); + if (r < 0) + return; + /* + * If an event has happened and caused a vmexit, + * we know INITs are latched and therefore + * we will not incorrectly deliver an APIC + * event instead of a vmexit. + */ + } + /* * INITs are latched while CPU is in specific states - * (SMM, VMX non-root mode, SVM with GIF=0). + * (SMM, VMX root mode, SVM with GIF=0). * Because a CPU cannot be in these states immediately * after it has processed an INIT signal (and thus in * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs @@ -2858,13 +2879,13 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) */ if (kvm_vcpu_latch_init(vcpu)) { WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED); - if (test_bit(KVM_APIC_SIPI, &apic->pending_events)) + if (test_bit(KVM_APIC_SIPI, &pe)) clear_bit(KVM_APIC_SIPI, &apic->pending_events); return; } - pe = xchg(&apic->pending_events, 0); if (test_bit(KVM_APIC_INIT, &pe)) { + clear_bit(KVM_APIC_INIT, &apic->pending_events); kvm_vcpu_reset(vcpu, true); if (kvm_vcpu_is_bsp(apic->vcpu)) vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; @@ -2873,6 +2894,7 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) } if (test_bit(KVM_APIC_SIPI, &pe) && vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { + clear_bit(KVM_APIC_SIPI, &apic->pending_events); /* evaluate pending_events before reading the vector */ smp_rmb(); sipi_vector = apic->sipi_vector; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 39ff7600b0af..fd70bdf63844 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7558,7 +7558,7 @@ static void enable_smi_window(struct kvm_vcpu *vcpu) static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu) { - return to_vmx(vcpu)->nested.vmxon; + return to_vmx(vcpu)->nested.vmxon && !is_guest_mode(vcpu); } static void vmx_migrate_timers(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From bf0cd88ce363a2de3684baaa48d3f194acdc516c Mon Sep 17 00:00:00 2001 From: Yadong Qi Date: Fri, 6 Nov 2020 14:51:22 +0800 Subject: KVM: x86: emulate wait-for-SIPI and SIPI-VMExit Background: We have a lightweight HV, it needs INIT-VMExit and SIPI-VMExit to wake-up APs for guests since it do not monitor the Local APIC. But currently virtual wait-for-SIPI(WFS) state is not supported in nVMX, so when running on top of KVM, the L1 HV cannot receive the INIT-VMExit and SIPI-VMExit which cause the L2 guest cannot wake up the APs. According to Intel SDM Chapter 25.2 Other Causes of VM Exits, SIPIs cause VM exits when a logical processor is in wait-for-SIPI state. In this patch: 1. introduce SIPI exit reason, 2. introduce wait-for-SIPI state for nVMX, 3. advertise wait-for-SIPI support to guest. When L1 hypervisor is not monitoring Local APIC, L0 need to emulate INIT-VMExit and SIPI-VMExit to L1 to emulate INIT-SIPI-SIPI for L2. L2 LAPIC write would be traped by L0 Hypervisor(KVM), L0 should emulate the INIT/SIPI vmexit to L1 hypervisor to set proper state for L2's vcpu state. Handle procdure: Source vCPU: L2 write LAPIC.ICR(INIT). L0 trap LAPIC.ICR write(INIT): inject a latched INIT event to target vCPU. Target vCPU: L0 emulate an INIT VMExit to L1 if is guest mode. L1 set guest VMCS, guest_activity_state=WAIT_SIPI, vmresume. L0 set vcpu.mp_state to INIT_RECEIVED if (vmcs12.guest_activity_state == WAIT_SIPI). Source vCPU: L2 write LAPIC.ICR(SIPI). L0 trap LAPIC.ICR write(INIT): inject a latched SIPI event to traget vCPU. Target vCPU: L0 emulate an SIPI VMExit to L1 if (vcpu.mp_state == INIT_RECEIVED). L1 set CS:IP, guest_activity_state=ACTIVE, vmresume. L0 resume to L2. L2 start-up. Signed-off-by: Yadong Qi Message-Id: <20200922052343.84388-1-yadong.qi@intel.com> Signed-off-by: Paolo Bonzini Message-Id: <20201106065122.403183-1-yadong.qi@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/vmx.h | 1 + arch/x86/include/uapi/asm/vmx.h | 2 ++ arch/x86/kvm/vmx/nested.c | 55 ++++++++++++++++++++++++++++++----------- 3 files changed, 44 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index f8ba5289ecb0..38ca445a8429 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -113,6 +113,7 @@ #define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f #define VMX_MISC_SAVE_EFER_LMA 0x00000020 #define VMX_MISC_ACTIVITY_HLT 0x00000040 +#define VMX_MISC_ACTIVITY_WAIT_SIPI 0x00000100 #define VMX_MISC_ZERO_LEN_INS 0x40000000 #define VMX_MISC_MSR_LIST_MULTIPLIER 512 diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index b8ff9e8ac0d5..ada955c5ebb6 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -32,6 +32,7 @@ #define EXIT_REASON_EXTERNAL_INTERRUPT 1 #define EXIT_REASON_TRIPLE_FAULT 2 #define EXIT_REASON_INIT_SIGNAL 3 +#define EXIT_REASON_SIPI_SIGNAL 4 #define EXIT_REASON_INTERRUPT_WINDOW 7 #define EXIT_REASON_NMI_WINDOW 8 @@ -94,6 +95,7 @@ { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \ { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \ { EXIT_REASON_INIT_SIGNAL, "INIT_SIGNAL" }, \ + { EXIT_REASON_SIPI_SIGNAL, "SIPI_SIGNAL" }, \ { EXIT_REASON_INTERRUPT_WINDOW, "INTERRUPT_WINDOW" }, \ { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \ { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index efebd849b3c9..e2f26564a12d 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2952,7 +2952,8 @@ static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu, static int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12) { if (CC(vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE && - vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT)) + vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT && + vmcs12->guest_activity_state != GUEST_ACTIVITY_WAIT_SIPI)) return -EINVAL; return 0; @@ -3559,19 +3560,29 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) */ nested_cache_shadow_vmcs12(vcpu, vmcs12); - /* - * If we're entering a halted L2 vcpu and the L2 vcpu won't be - * awakened by event injection or by an NMI-window VM-exit or - * by an interrupt-window VM-exit, halt the vcpu. - */ - if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) && - !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) && - !(vmcs12->cpu_based_vm_exec_control & CPU_BASED_NMI_WINDOW_EXITING) && - !((vmcs12->cpu_based_vm_exec_control & CPU_BASED_INTR_WINDOW_EXITING) && - (vmcs12->guest_rflags & X86_EFLAGS_IF))) { + switch (vmcs12->guest_activity_state) { + case GUEST_ACTIVITY_HLT: + /* + * If we're entering a halted L2 vcpu and the L2 vcpu won't be + * awakened by event injection or by an NMI-window VM-exit or + * by an interrupt-window VM-exit, halt the vcpu. + */ + if (!(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) && + !nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING) && + !(nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING) && + (vmcs12->guest_rflags & X86_EFLAGS_IF))) { + vmx->nested.nested_run_pending = 0; + return kvm_vcpu_halt(vcpu); + } + break; + case GUEST_ACTIVITY_WAIT_SIPI: vmx->nested.nested_run_pending = 0; - return kvm_vcpu_halt(vcpu); + vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; + break; + default: + break; } + return 1; vmentry_failed: @@ -3797,7 +3808,20 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) return -EBUSY; nested_vmx_update_pending_dbg(vcpu); clear_bit(KVM_APIC_INIT, &apic->pending_events); - nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0); + if (vcpu->arch.mp_state != KVM_MP_STATE_INIT_RECEIVED) + nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0); + return 0; + } + + if (lapic_in_kernel(vcpu) && + test_bit(KVM_APIC_SIPI, &apic->pending_events)) { + if (block_nested_events) + return -EBUSY; + + clear_bit(KVM_APIC_SIPI, &apic->pending_events); + if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) + nested_vmx_vmexit(vcpu, EXIT_REASON_SIPI_SIGNAL, 0, + apic->sipi_vector & 0xFFUL); return 0; } @@ -4036,6 +4060,8 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT; + else if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) + vmcs12->guest_activity_state = GUEST_ACTIVITY_WAIT_SIPI; else vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE; @@ -6483,7 +6509,8 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps) msrs->misc_low |= MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS | VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE | - VMX_MISC_ACTIVITY_HLT; + VMX_MISC_ACTIVITY_HLT | + VMX_MISC_ACTIVITY_WAIT_SIPI; msrs->misc_high = 0; /* -- cgit v1.2.3 From c21d54f0307ff42a346294899107b570b98c47b5 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 29 Sep 2020 17:09:43 +0200 Subject: KVM: x86: hyper-v: allow KVM_GET_SUPPORTED_HV_CPUID as a system ioctl KVM_GET_SUPPORTED_HV_CPUID is a vCPU ioctl but its output is now independent from vCPU and in some cases VMMs may want to use it as a system ioctl instead. In particular, QEMU doesn CPU feature expansion before any vCPU gets created so KVM_GET_SUPPORTED_HV_CPUID can't be used. Convert KVM_GET_SUPPORTED_HV_CPUID to 'dual' system/vCPU ioctl with the same meaning. Signed-off-by: Vitaly Kuznetsov Message-Id: <20200929150944.1235688-2-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 16 ++++++++++----- arch/x86/kvm/hyperv.c | 6 +++--- arch/x86/kvm/hyperv.h | 4 ++-- arch/x86/kvm/vmx/evmcs.c | 3 +-- arch/x86/kvm/x86.c | 45 +++++++++++++++++++++++++----------------- include/uapi/linux/kvm.h | 3 ++- 6 files changed, 46 insertions(+), 31 deletions(-) (limited to 'arch') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index e00a66d72372..81d54fe76a2d 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -4455,9 +4455,9 @@ that KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is present. 4.118 KVM_GET_SUPPORTED_HV_CPUID -------------------------------- -:Capability: KVM_CAP_HYPERV_CPUID +:Capability: KVM_CAP_HYPERV_CPUID (vcpu), KVM_CAP_SYS_HYPERV_CPUID (system) :Architectures: x86 -:Type: vcpu ioctl +:Type: system ioctl, vcpu ioctl :Parameters: struct kvm_cpuid2 (in/out) :Returns: 0 on success, -1 on error @@ -4502,9 +4502,6 @@ Currently, the following list of CPUID leaves are returned: - HYPERV_CPUID_SYNDBG_INTERFACE - HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES -HYPERV_CPUID_NESTED_FEATURES leaf is only exposed when Enlightened VMCS was -enabled on the corresponding vCPU (KVM_CAP_HYPERV_ENLIGHTENED_VMCS). - Userspace invokes KVM_GET_SUPPORTED_HV_CPUID by passing a kvm_cpuid2 structure with the 'nent' field indicating the number of entries in the variable-size array 'entries'. If the number of entries is too low to describe all Hyper-V @@ -4515,6 +4512,15 @@ number of valid entries in the 'entries' array, which is then filled. 'index' and 'flags' fields in 'struct kvm_cpuid_entry2' are currently reserved, userspace should not expect to get any particular value there. +Note, vcpu version of KVM_GET_SUPPORTED_HV_CPUID is currently deprecated. Unlike +system ioctl which exposes all supported feature bits unconditionally, vcpu +version has the following quirks: +- HYPERV_CPUID_NESTED_FEATURES leaf and HV_X64_ENLIGHTENED_VMCS_RECOMMENDED + feature bit are only exposed when Enlightened VMCS was previously enabled + on the corresponding vCPU (KVM_CAP_HYPERV_ENLIGHTENED_VMCS). +- HV_STIMER_DIRECT_MODE_AVAILABLE bit is only exposed with in-kernel LAPIC. + (presumes KVM_CREATE_IRQCHIP has already been called). + 4.119 KVM_ARM_VCPU_FINALIZE --------------------------- diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 5c7c4060b45c..922c69dcca4d 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1951,8 +1951,8 @@ int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args) return kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd); } -int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, - struct kvm_cpuid_entry2 __user *entries) +int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 __user *entries) { uint16_t evmcs_ver = 0; struct kvm_cpuid_entry2 cpuid_entries[] = { @@ -2037,7 +2037,7 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, * Direct Synthetic timers only make sense with in-kernel * LAPIC */ - if (lapic_in_kernel(vcpu)) + if (!vcpu || lapic_in_kernel(vcpu)) ent->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE; break; diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index e68c6c2e9649..6d7def2b0aad 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -126,7 +126,7 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm, void kvm_hv_init_vm(struct kvm *kvm); void kvm_hv_destroy_vm(struct kvm *kvm); int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args); -int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, - struct kvm_cpuid_entry2 __user *entries); +int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 __user *entries); #endif diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c index f3199bb02f22..41f24661af04 100644 --- a/arch/x86/kvm/vmx/evmcs.c +++ b/arch/x86/kvm/vmx/evmcs.c @@ -326,7 +326,6 @@ bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa) uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu) { - struct vcpu_vmx *vmx = to_vmx(vcpu); /* * vmcs_version represents the range of supported Enlightened VMCS * versions: lower 8 bits is the minimal version, higher 8 bits is the @@ -334,7 +333,7 @@ uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu) * KVM_EVMCS_VERSION. */ if (kvm_cpu_cap_get(X86_FEATURE_VMX) && - vmx->nested.enlightened_vmcs_enabled) + (!vcpu || to_vmx(vcpu)->nested.enlightened_vmcs_enabled)) return (KVM_EVMCS_VERSION << 8) | 1; return 0; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2db86702cac4..773cb52cb775 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3677,6 +3677,27 @@ static inline bool kvm_can_mwait_in_guest(void) boot_cpu_has(X86_FEATURE_ARAT); } +static int kvm_ioctl_get_supported_hv_cpuid(struct kvm_vcpu *vcpu, + struct kvm_cpuid2 __user *cpuid_arg) +{ + struct kvm_cpuid2 cpuid; + int r; + + r = -EFAULT; + if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid))) + return r; + + r = kvm_get_hv_cpuid(vcpu, &cpuid, cpuid_arg->entries); + if (r) + return r; + + r = -EFAULT; + if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid))) + return r; + + return 0; +} + int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r = 0; @@ -3713,6 +3734,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_HYPERV_TLBFLUSH: case KVM_CAP_HYPERV_SEND_IPI: case KVM_CAP_HYPERV_CPUID: + case KVM_CAP_SYS_HYPERV_CPUID: case KVM_CAP_PCI_SEGMENT: case KVM_CAP_DEBUGREGS: case KVM_CAP_X86_ROBUST_SINGLESTEP: @@ -3898,6 +3920,9 @@ long kvm_arch_dev_ioctl(struct file *filp, case KVM_GET_MSRS: r = msr_io(NULL, argp, do_get_msr_feature, 1); break; + case KVM_GET_SUPPORTED_HV_CPUID: + r = kvm_ioctl_get_supported_hv_cpuid(NULL, argp); + break; default: r = -EINVAL; break; @@ -4974,25 +4999,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, srcu_read_unlock(&vcpu->kvm->srcu, idx); break; } - case KVM_GET_SUPPORTED_HV_CPUID: { - struct kvm_cpuid2 __user *cpuid_arg = argp; - struct kvm_cpuid2 cpuid; - - r = -EFAULT; - if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid))) - goto out; - - r = kvm_vcpu_ioctl_get_hv_cpuid(vcpu, &cpuid, - cpuid_arg->entries); - if (r) - goto out; - - r = -EFAULT; - if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid))) - goto out; - r = 0; + case KVM_GET_SUPPORTED_HV_CPUID: + r = kvm_ioctl_get_supported_hv_cpuid(vcpu, argp); break; - } default: r = -EINVAL; } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index ca41220b40b8..204afbe1240e 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1053,6 +1053,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_X86_USER_SPACE_MSR 188 #define KVM_CAP_X86_MSR_FILTER 189 #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 +#define KVM_CAP_SYS_HYPERV_CPUID 191 #ifdef KVM_CAP_IRQ_ROUTING @@ -1511,7 +1512,7 @@ struct kvm_enc_region { /* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT_2 */ #define KVM_CLEAR_DIRTY_LOG _IOWR(KVMIO, 0xc0, struct kvm_clear_dirty_log) -/* Available with KVM_CAP_HYPERV_CPUID */ +/* Available with KVM_CAP_HYPERV_CPUID (vcpu) / KVM_CAP_SYS_HYPERV_CPUID (system) */ #define KVM_GET_SUPPORTED_HV_CPUID _IOWR(KVMIO, 0xc1, struct kvm_cpuid2) /* Available with KVM_CAP_ARM_SVE */ -- cgit v1.2.3 From ff5a983cbb3746d371de2cc95ea7dcfd982b4084 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Sep 2020 21:20:33 -0400 Subject: KVM: X86: Don't track dirty for KVM_SET_[TSS_ADDR|IDENTITY_MAP_ADDR] Originally, we have three code paths that can dirty a page without vcpu context for X86: - init_rmode_identity_map - init_rmode_tss - kvmgt_rw_gpa init_rmode_identity_map and init_rmode_tss will be setup on destination VM no matter what (and the guest cannot even see them), so it does not make sense to track them at all. To do this, allow __x86_set_memory_region() to return the userspace address that just allocated to the caller. Then in both of the functions we directly write to the userspace address instead of calling kvm_write_*() APIs. Another trivial change is that we don't need to explicitly clear the identity page table root in init_rmode_identity_map() because no matter what we'll write to the whole page with 4M huge page entries. Suggested-by: Paolo Bonzini Reviewed-by: Sean Christopherson Signed-off-by: Peter Xu Message-Id: <20201001012044.5151-4-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 3 +- arch/x86/kvm/svm/avic.c | 9 +++-- arch/x86/kvm/vmx/vmx.c | 88 ++++++++++++++++++++--------------------- arch/x86/kvm/x86.c | 37 ++++++++++++++--- 4 files changed, 81 insertions(+), 56 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 5bb0a2bbd9c4..69e94aa716e9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1696,7 +1696,8 @@ void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu); int kvm_is_in_guest(void); -int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size); +void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, + u32 size); bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu); bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 8c550999ace0..0ef84d57b72e 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -233,7 +233,8 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, */ static int avic_update_access_page(struct kvm *kvm, bool activate) { - int ret = 0; + void __user *ret; + int r = 0; mutex_lock(&kvm->slots_lock); /* @@ -249,13 +250,15 @@ static int avic_update_access_page(struct kvm *kvm, bool activate) APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, APIC_DEFAULT_PHYS_BASE, activate ? PAGE_SIZE : 0); - if (ret) + if (IS_ERR(ret)) { + r = PTR_ERR(ret); goto out; + } kvm->arch.apic_access_page_done = activate; out: mutex_unlock(&kvm->slots_lock); - return ret; + return r; } static int avic_init_backing_page(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index fd70bdf63844..46b32aa43811 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3515,42 +3515,33 @@ bool __vmx_guest_state_valid(struct kvm_vcpu *vcpu) return true; } -static int init_rmode_tss(struct kvm *kvm) +static int init_rmode_tss(struct kvm *kvm, void __user *ua) { - gfn_t fn; - u16 data = 0; - int idx, r; + const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0))); + u16 data; + int i; + + for (i = 0; i < 3; i++) { + if (__copy_to_user(ua + PAGE_SIZE * i, zero_page, PAGE_SIZE)) + return -EFAULT; + } - idx = srcu_read_lock(&kvm->srcu); - fn = to_kvm_vmx(kvm)->tss_addr >> PAGE_SHIFT; - r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); - if (r < 0) - goto out; data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE; - r = kvm_write_guest_page(kvm, fn++, &data, - TSS_IOPB_BASE_OFFSET, sizeof(u16)); - if (r < 0) - goto out; - r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE); - if (r < 0) - goto out; - r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); - if (r < 0) - goto out; + if (__copy_to_user(ua + TSS_IOPB_BASE_OFFSET, &data, sizeof(u16))) + return -EFAULT; + data = ~0; - r = kvm_write_guest_page(kvm, fn, &data, - RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1, - sizeof(u8)); -out: - srcu_read_unlock(&kvm->srcu, idx); - return r; + if (__copy_to_user(ua + RMODE_TSS_SIZE - 1, &data, sizeof(u8))) + return -EFAULT; + + return 0; } static int init_rmode_identity_map(struct kvm *kvm) { struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm); int i, r = 0; - kvm_pfn_t identity_map_pfn; + void __user *uaddr; u32 tmp; /* Protect kvm_vmx->ept_identity_pagetable_done. */ @@ -3561,24 +3552,24 @@ static int init_rmode_identity_map(struct kvm *kvm) if (!kvm_vmx->ept_identity_map_addr) kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR; - identity_map_pfn = kvm_vmx->ept_identity_map_addr >> PAGE_SHIFT; - r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, - kvm_vmx->ept_identity_map_addr, PAGE_SIZE); - if (r < 0) + uaddr = __x86_set_memory_region(kvm, + IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, + kvm_vmx->ept_identity_map_addr, + PAGE_SIZE); + if (IS_ERR(uaddr)) { + r = PTR_ERR(uaddr); goto out; + } - r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); - if (r < 0) - goto out; /* Set up identity-mapping pagetable for EPT in real mode */ for (i = 0; i < PT32_ENT_PER_PAGE; i++) { tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE); - r = kvm_write_guest_page(kvm, identity_map_pfn, - &tmp, i * sizeof(tmp), sizeof(tmp)); - if (r < 0) + if (__copy_to_user(uaddr + i * sizeof(tmp), &tmp, sizeof(tmp))) { + r = -EFAULT; goto out; + } } kvm_vmx->ept_identity_pagetable_done = true; @@ -3605,19 +3596,22 @@ static void seg_setup(int seg) static int alloc_apic_access_page(struct kvm *kvm) { struct page *page; - int r = 0; + void __user *hva; + int ret = 0; mutex_lock(&kvm->slots_lock); if (kvm->arch.apic_access_page_done) goto out; - r = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, - APIC_DEFAULT_PHYS_BASE, PAGE_SIZE); - if (r) + hva = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, + APIC_DEFAULT_PHYS_BASE, PAGE_SIZE); + if (IS_ERR(hva)) { + ret = PTR_ERR(hva); goto out; + } page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); if (is_error_page(page)) { - r = -EFAULT; + ret = -EFAULT; goto out; } @@ -3629,7 +3623,7 @@ static int alloc_apic_access_page(struct kvm *kvm) kvm->arch.apic_access_page_done = true; out: mutex_unlock(&kvm->slots_lock); - return r; + return ret; } int allocate_vpid(void) @@ -4638,7 +4632,7 @@ static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection) static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) { - int ret; + void __user *ret; if (enable_unrestricted_guest) return 0; @@ -4648,10 +4642,12 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) PAGE_SIZE * 3); mutex_unlock(&kvm->slots_lock); - if (ret) - return ret; + if (IS_ERR(ret)) + return PTR_ERR(ret); + to_kvm_vmx(kvm)->tss_addr = addr; - return init_rmode_tss(kvm); + + return init_rmode_tss(kvm, ret); } static int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 773cb52cb775..b4ac726526f8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10354,7 +10354,32 @@ void kvm_arch_sync_events(struct kvm *kvm) kvm_free_pit(kvm); } -int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) +#define ERR_PTR_USR(e) ((void __user *)ERR_PTR(e)) + +/** + * __x86_set_memory_region: Setup KVM internal memory slot + * + * @kvm: the kvm pointer to the VM. + * @id: the slot ID to setup. + * @gpa: the GPA to install the slot (unused when @size == 0). + * @size: the size of the slot. Set to zero to uninstall a slot. + * + * This function helps to setup a KVM internal memory slot. Specify + * @size > 0 to install a new slot, while @size == 0 to uninstall a + * slot. The return code can be one of the following: + * + * HVA: on success (uninstall will return a bogus HVA) + * -errno: on error + * + * The caller should always use IS_ERR() to check the return value + * before use. Note, the KVM internal memory slots are guaranteed to + * remain valid and unchanged until the VM is destroyed, i.e., the + * GPA->HVA translation will not change. However, the HVA is a user + * address, i.e. its accessibility is not guaranteed, and must be + * accessed via __copy_{to,from}_user(). + */ +void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, + u32 size) { int i, r; unsigned long hva, old_npages; @@ -10363,12 +10388,12 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) /* Called with kvm->slots_lock held. */ if (WARN_ON(id >= KVM_MEM_SLOTS_NUM)) - return -EINVAL; + return ERR_PTR_USR(-EINVAL); slot = id_to_memslot(slots, id); if (size) { if (slot && slot->npages) - return -EEXIST; + return ERR_PTR_USR(-EEXIST); /* * MAP_SHARED to prevent internal slot pages from being moved @@ -10377,7 +10402,7 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) hva = vm_mmap(NULL, 0, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0); if (IS_ERR((void *)hva)) - return PTR_ERR((void *)hva); + return (void __user *)hva; } else { if (!slot || !slot->npages) return 0; @@ -10396,13 +10421,13 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) m.memory_size = size; r = __kvm_set_memory_region(kvm, &m); if (r < 0) - return r; + return ERR_PTR_USR(r); } if (!size) vm_munmap(hva, old_npages * PAGE_SIZE); - return 0; + return (void __user *)hva; } EXPORT_SYMBOL_GPL(__x86_set_memory_region); -- cgit v1.2.3 From fb04a1eddb1a65b6588a021bdc132270d5ae48bb Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Sep 2020 21:22:22 -0400 Subject: KVM: X86: Implement ring-based dirty memory tracking This patch is heavily based on previous work from Lei Cao and Paolo Bonzini . [1] KVM currently uses large bitmaps to track dirty memory. These bitmaps are copied to userspace when userspace queries KVM for its dirty page information. The use of bitmaps is mostly sufficient for live migration, as large parts of memory are be dirtied from one log-dirty pass to another. However, in a checkpointing system, the number of dirty pages is small and in fact it is often bounded---the VM is paused when it has dirtied a pre-defined number of pages. Traversing a large, sparsely populated bitmap to find set bits is time-consuming, as is copying the bitmap to user-space. A similar issue will be there for live migration when the guest memory is huge while the page dirty procedure is trivial. In that case for each dirty sync we need to pull the whole dirty bitmap to userspace and analyse every bit even if it's mostly zeros. The preferred data structure for above scenarios is a dense list of guest frame numbers (GFN). This patch series stores the dirty list in kernel memory that can be memory mapped into userspace to allow speedy harvesting. This patch enables dirty ring for X86 only. However it should be easily extended to other archs as well. [1] https://patchwork.kernel.org/patch/10471409/ Signed-off-by: Lei Cao Signed-off-by: Paolo Bonzini Signed-off-by: Peter Xu Message-Id: <20201001012222.5767-1-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 93 +++++++++++++++++++ arch/x86/include/asm/kvm_host.h | 3 + arch/x86/include/uapi/asm/kvm.h | 1 + arch/x86/kvm/Makefile | 3 +- arch/x86/kvm/mmu/mmu.c | 8 ++ arch/x86/kvm/mmu/tdp_mmu.c | 2 +- arch/x86/kvm/vmx/vmx.c | 7 ++ arch/x86/kvm/x86.c | 9 ++ include/linux/kvm_dirty_ring.h | 103 +++++++++++++++++++++ include/linux/kvm_host.h | 13 +++ include/trace/events/kvm.h | 63 +++++++++++++ include/uapi/linux/kvm.h | 53 +++++++++++ virt/kvm/dirty_ring.c | 194 ++++++++++++++++++++++++++++++++++++++++ virt/kvm/kvm_main.c | 113 ++++++++++++++++++++++- 14 files changed, 662 insertions(+), 3 deletions(-) create mode 100644 include/linux/kvm_dirty_ring.h create mode 100644 virt/kvm/dirty_ring.c (limited to 'arch') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 81d54fe76a2d..e264ebc35e27 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -262,6 +262,18 @@ The KVM_RUN ioctl (cf.) communicates with userspace via a shared memory region. This ioctl returns the size of that region. See the KVM_RUN documentation for details. +Besides the size of the KVM_RUN communication region, other areas of +the VCPU file descriptor can be mmap-ed, including: + +- if KVM_CAP_COALESCED_MMIO is available, a page at + KVM_COALESCED_MMIO_PAGE_OFFSET * PAGE_SIZE; for historical reasons, + this page is included in the result of KVM_GET_VCPU_MMAP_SIZE. + KVM_CAP_COALESCED_MMIO is not documented yet. + +- if KVM_CAP_DIRTY_LOG_RING is available, a number of pages at + KVM_DIRTY_LOG_PAGE_OFFSET * PAGE_SIZE. For more information on + KVM_CAP_DIRTY_LOG_RING, see section 8.3. + 4.6 KVM_SET_MEMORY_REGION ------------------------- @@ -6396,3 +6408,84 @@ When enabled, KVM will disable paravirtual features provided to the guest according to the bits in the KVM_CPUID_FEATURES CPUID leaf (0x40000001). Otherwise, a guest may use the paravirtual features regardless of what has actually been exposed through the CPUID leaf. + + +8.29 KVM_CAP_DIRTY_LOG_RING +--------------------------- + +:Architectures: x86 +:Parameters: args[0] - size of the dirty log ring + +KVM is capable of tracking dirty memory using ring buffers that are +mmaped into userspace; there is one dirty ring per vcpu. + +The dirty ring is available to userspace as an array of +``struct kvm_dirty_gfn``. Each dirty entry it's defined as:: + + struct kvm_dirty_gfn { + __u32 flags; + __u32 slot; /* as_id | slot_id */ + __u64 offset; + }; + +The following values are defined for the flags field to define the +current state of the entry:: + + #define KVM_DIRTY_GFN_F_DIRTY BIT(0) + #define KVM_DIRTY_GFN_F_RESET BIT(1) + #define KVM_DIRTY_GFN_F_MASK 0x3 + +Userspace should call KVM_ENABLE_CAP ioctl right after KVM_CREATE_VM +ioctl to enable this capability for the new guest and set the size of +the rings. Enabling the capability is only allowed before creating any +vCPU, and the size of the ring must be a power of two. The larger the +ring buffer, the less likely the ring is full and the VM is forced to +exit to userspace. The optimal size depends on the workload, but it is +recommended that it be at least 64 KiB (4096 entries). + +Just like for dirty page bitmaps, the buffer tracks writes to +all user memory regions for which the KVM_MEM_LOG_DIRTY_PAGES flag was +set in KVM_SET_USER_MEMORY_REGION. Once a memory region is registered +with the flag set, userspace can start harvesting dirty pages from the +ring buffer. + +An entry in the ring buffer can be unused (flag bits ``00``), +dirty (flag bits ``01``) or harvested (flag bits ``1X``). The +state machine for the entry is as follows:: + + dirtied harvested reset + 00 -----------> 01 -------------> 1X -------+ + ^ | + | | + +------------------------------------------+ + +To harvest the dirty pages, userspace accesses the mmaped ring buffer +to read the dirty GFNs. If the flags has the DIRTY bit set (at this stage +the RESET bit must be cleared), then it means this GFN is a dirty GFN. +The userspace should harvest this GFN and mark the flags from state +``01b`` to ``1Xb`` (bit 0 will be ignored by KVM, but bit 1 must be set +to show that this GFN is harvested and waiting for a reset), and move +on to the next GFN. The userspace should continue to do this until the +flags of a GFN have the DIRTY bit cleared, meaning that it has harvested +all the dirty GFNs that were available. + +It's not necessary for userspace to harvest the all dirty GFNs at once. +However it must collect the dirty GFNs in sequence, i.e., the userspace +program cannot skip one dirty GFN to collect the one next to it. + +After processing one or more entries in the ring buffer, userspace +calls the VM ioctl KVM_RESET_DIRTY_RINGS to notify the kernel about +it, so that the kernel will reprotect those collected GFNs. +Therefore, the ioctl must be called *before* reading the content of +the dirty pages. + +The dirty ring can get full. When it happens, the KVM_RUN of the +vcpu will return with exit reason KVM_EXIT_DIRTY_LOG_FULL. + +The dirty ring interface has a major difference comparing to the +KVM_GET_DIRTY_LOG interface in that, when reading the dirty ring from +userspace, it's still possible that the kernel has not yet flushed the +processor's dirty page buffers into the kernel buffer (with dirty bitmaps, the +flushing is done by the KVM_GET_DIRTY_LOG ioctl). To achieve that, one +needs to kick the vcpu out of KVM_RUN using a signal. The resulting +vmexit ensures that all dirty GFNs are flushed to the dirty rings. diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 69e94aa716e9..f002cdb13a0b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1232,6 +1232,7 @@ struct kvm_x86_ops { void (*enable_log_dirty_pt_masked)(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t offset, unsigned long mask); + int (*cpu_dirty_log_size)(void); /* pmu operations of sub-arch */ const struct kvm_pmu_ops *pmu_ops; @@ -1744,4 +1745,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu) #define GET_SMSTATE(type, buf, offset) \ (*(type *)((buf) + (offset) - 0x7e00)) +int kvm_cpu_dirty_log_size(void); + #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 89e5f3d1bba8..8e76d3701db3 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -12,6 +12,7 @@ #define KVM_PIO_PAGE_OFFSET 1 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 +#define KVM_DIRTY_LOG_PAGE_OFFSET 64 #define DE_VECTOR 0 #define DB_VECTOR 1 diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index b804444e16d4..4bd14ab01323 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -10,7 +10,8 @@ endif KVM := ../../../virt/kvm kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ - $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o + $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o \ + $(KVM)/dirty_ring.o kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \ diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 5bb1939b65d8..12e5cfe0995e 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1289,6 +1289,14 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask); } +int kvm_cpu_dirty_log_size(void) +{ + if (kvm_x86_ops.cpu_dirty_log_size) + return kvm_x86_ops.cpu_dirty_log_size(); + + return 0; +} + bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, struct kvm_memory_slot *slot, u64 gfn) { diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index ff28a5c6abd6..cffa51c6049e 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -185,7 +185,7 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn, if ((!is_writable_pte(old_spte) || pfn_changed) && is_writable_pte(new_spte)) { slot = __gfn_to_memslot(__kvm_memslots(kvm, as_id), gfn); - mark_page_dirty_in_slot(slot, gfn); + mark_page_dirty_in_slot(kvm, slot, gfn); } } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 46b32aa43811..2b6d538454a6 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7583,6 +7583,11 @@ static bool vmx_check_apicv_inhibit_reasons(ulong bit) return supported & BIT(bit); } +static int vmx_cpu_dirty_log_size(void) +{ + return enable_pml ? PML_ENTITY_NUM : 0; +} + static struct kvm_x86_ops vmx_x86_ops __initdata = { .hardware_unsetup = hardware_unsetup, @@ -7712,6 +7717,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .migrate_timers = vmx_migrate_timers, .msr_filter_changed = vmx_msr_filter_changed, + .cpu_dirty_log_size = vmx_cpu_dirty_log_size, }; static __init int hardware_setup(void) @@ -7829,6 +7835,7 @@ static __init int hardware_setup(void) vmx_x86_ops.slot_disable_log_dirty = NULL; vmx_x86_ops.flush_log_dirty = NULL; vmx_x86_ops.enable_log_dirty_pt_masked = NULL; + vmx_x86_ops.cpu_dirty_log_size = NULL; } if (!cpu_has_vmx_preemption_timer()) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b4ac726526f8..6c704a597b7c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8754,6 +8754,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) bool req_immediate_exit = false; + /* Forbid vmenter if vcpu dirty ring is soft-full */ + if (unlikely(vcpu->kvm->dirty_ring_size && + kvm_dirty_ring_soft_full(&vcpu->dirty_ring))) { + vcpu->run->exit_reason = KVM_EXIT_DIRTY_RING_FULL; + trace_kvm_dirty_ring_exit(vcpu); + r = 0; + goto out; + } + if (kvm_request_pending(vcpu)) { if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) { if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) { diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h new file mode 100644 index 000000000000..120e5e90fa1d --- /dev/null +++ b/include/linux/kvm_dirty_ring.h @@ -0,0 +1,103 @@ +#ifndef KVM_DIRTY_RING_H +#define KVM_DIRTY_RING_H + +#include + +/** + * kvm_dirty_ring: KVM internal dirty ring structure + * + * @dirty_index: free running counter that points to the next slot in + * dirty_ring->dirty_gfns, where a new dirty page should go + * @reset_index: free running counter that points to the next dirty page + * in dirty_ring->dirty_gfns for which dirty trap needs to + * be reenabled + * @size: size of the compact list, dirty_ring->dirty_gfns + * @soft_limit: when the number of dirty pages in the list reaches this + * limit, vcpu that owns this ring should exit to userspace + * to allow userspace to harvest all the dirty pages + * @dirty_gfns: the array to keep the dirty gfns + * @index: index of this dirty ring + */ +struct kvm_dirty_ring { + u32 dirty_index; + u32 reset_index; + u32 size; + u32 soft_limit; + struct kvm_dirty_gfn *dirty_gfns; + int index; +}; + +#if (KVM_DIRTY_LOG_PAGE_OFFSET == 0) +/* + * If KVM_DIRTY_LOG_PAGE_OFFSET not defined, kvm_dirty_ring.o should + * not be included as well, so define these nop functions for the arch. + */ +static inline u32 kvm_dirty_ring_get_rsvd_entries(void) +{ + return 0; +} + +static inline int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, + int index, u32 size) +{ + return 0; +} + +static inline struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm) +{ + return NULL; +} + +static inline int kvm_dirty_ring_reset(struct kvm *kvm, + struct kvm_dirty_ring *ring) +{ + return 0; +} + +static inline void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, + u32 slot, u64 offset) +{ +} + +static inline struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, + u32 offset) +{ + return NULL; +} + +static inline void kvm_dirty_ring_free(struct kvm_dirty_ring *ring) +{ +} + +static inline bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring) +{ + return true; +} + +#else /* KVM_DIRTY_LOG_PAGE_OFFSET == 0 */ + +u32 kvm_dirty_ring_get_rsvd_entries(void); +int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size); +struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm); + +/* + * called with kvm->slots_lock held, returns the number of + * processed pages. + */ +int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring); + +/* + * returns =0: successfully pushed + * <0: unable to push, need to wait + */ +void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, u32 slot, u64 offset); + +/* for use in vm_operations_struct */ +struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, u32 offset); + +void kvm_dirty_ring_free(struct kvm_dirty_ring *ring); +bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring); + +#endif /* KVM_DIRTY_LOG_PAGE_OFFSET == 0 */ + +#endif /* KVM_DIRTY_RING_H */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ca7c1459a8e3..864b156391c8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -34,6 +34,7 @@ #include #include +#include #ifndef KVM_MAX_VCPU_ID #define KVM_MAX_VCPU_ID KVM_MAX_VCPUS @@ -319,6 +320,7 @@ struct kvm_vcpu { bool preempted; bool ready; struct kvm_vcpu_arch arch; + struct kvm_dirty_ring dirty_ring; }; static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu) @@ -505,6 +507,7 @@ struct kvm { struct srcu_struct irq_srcu; pid_t userspace_pid; unsigned int max_halt_poll_ns; + u32 dirty_ring_size; }; #define kvm_err(fmt, ...) \ @@ -1477,4 +1480,14 @@ static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu) } #endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */ +/* + * This defines how many reserved entries we want to keep before we + * kick the vcpu to the userspace to avoid dirty ring full. This + * value can be tuned to higher if e.g. PML is enabled on the host. + */ +#define KVM_DIRTY_RING_RSVD_ENTRIES 64 + +/* Max number of entries allowed for each kvm dirty ring */ +#define KVM_DIRTY_RING_MAX_ENTRIES 65536 + #endif diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 26cfb0fa8e7e..49d7d0fe29f6 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -399,6 +399,69 @@ TRACE_EVENT(kvm_halt_poll_ns, #define trace_kvm_halt_poll_ns_shrink(vcpu_id, new, old) \ trace_kvm_halt_poll_ns(false, vcpu_id, new, old) +TRACE_EVENT(kvm_dirty_ring_push, + TP_PROTO(struct kvm_dirty_ring *ring, u32 slot, u64 offset), + TP_ARGS(ring, slot, offset), + + TP_STRUCT__entry( + __field(int, index) + __field(u32, dirty_index) + __field(u32, reset_index) + __field(u32, slot) + __field(u64, offset) + ), + + TP_fast_assign( + __entry->index = ring->index; + __entry->dirty_index = ring->dirty_index; + __entry->reset_index = ring->reset_index; + __entry->slot = slot; + __entry->offset = offset; + ), + + TP_printk("ring %d: dirty 0x%x reset 0x%x " + "slot %u offset 0x%llx (used %u)", + __entry->index, __entry->dirty_index, + __entry->reset_index, __entry->slot, __entry->offset, + __entry->dirty_index - __entry->reset_index) +); + +TRACE_EVENT(kvm_dirty_ring_reset, + TP_PROTO(struct kvm_dirty_ring *ring), + TP_ARGS(ring), + + TP_STRUCT__entry( + __field(int, index) + __field(u32, dirty_index) + __field(u32, reset_index) + ), + + TP_fast_assign( + __entry->index = ring->index; + __entry->dirty_index = ring->dirty_index; + __entry->reset_index = ring->reset_index; + ), + + TP_printk("ring %d: dirty 0x%x reset 0x%x (used %u)", + __entry->index, __entry->dirty_index, __entry->reset_index, + __entry->dirty_index - __entry->reset_index) +); + +TRACE_EVENT(kvm_dirty_ring_exit, + TP_PROTO(struct kvm_vcpu *vcpu), + TP_ARGS(vcpu), + + TP_STRUCT__entry( + __field(int, vcpu_id) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; + ), + + TP_printk("vcpu %d", __entry->vcpu_id) +); + #endif /* _TRACE_KVM_MAIN_H */ /* This part must be outside protection */ diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 204afbe1240e..886802b8ffba 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -250,6 +250,7 @@ struct kvm_hyperv_exit { #define KVM_EXIT_ARM_NISV 28 #define KVM_EXIT_X86_RDMSR 29 #define KVM_EXIT_X86_WRMSR 30 +#define KVM_EXIT_DIRTY_RING_FULL 31 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -1054,6 +1055,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_X86_MSR_FILTER 189 #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 #define KVM_CAP_SYS_HYPERV_CPUID 191 +#define KVM_CAP_DIRTY_LOG_RING 192 #ifdef KVM_CAP_IRQ_ROUTING @@ -1558,6 +1560,9 @@ struct kvm_pv_cmd { /* Available with KVM_CAP_X86_MSR_FILTER */ #define KVM_X86_SET_MSR_FILTER _IOW(KVMIO, 0xc6, struct kvm_msr_filter) +/* Available with KVM_CAP_DIRTY_LOG_RING */ +#define KVM_RESET_DIRTY_RINGS _IO(KVMIO, 0xc7) + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ @@ -1711,4 +1716,52 @@ struct kvm_hyperv_eventfd { #define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0) #define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1) +/* + * Arch needs to define the macro after implementing the dirty ring + * feature. KVM_DIRTY_LOG_PAGE_OFFSET should be defined as the + * starting page offset of the dirty ring structures. + */ +#ifndef KVM_DIRTY_LOG_PAGE_OFFSET +#define KVM_DIRTY_LOG_PAGE_OFFSET 0 +#endif + +/* + * KVM dirty GFN flags, defined as: + * + * |---------------+---------------+--------------| + * | bit 1 (reset) | bit 0 (dirty) | Status | + * |---------------+---------------+--------------| + * | 0 | 0 | Invalid GFN | + * | 0 | 1 | Dirty GFN | + * | 1 | X | GFN to reset | + * |---------------+---------------+--------------| + * + * Lifecycle of a dirty GFN goes like: + * + * dirtied harvested reset + * 00 -----------> 01 -------------> 1X -------+ + * ^ | + * | | + * +------------------------------------------+ + * + * The userspace program is only responsible for the 01->1X state + * conversion after harvesting an entry. Also, it must not skip any + * dirty bits, so that dirty bits are always harvested in sequence. + */ +#define KVM_DIRTY_GFN_F_DIRTY BIT(0) +#define KVM_DIRTY_GFN_F_RESET BIT(1) +#define KVM_DIRTY_GFN_F_MASK 0x3 + +/* + * KVM dirty rings should be mapped at KVM_DIRTY_LOG_PAGE_OFFSET of + * per-vcpu mmaped regions as an array of struct kvm_dirty_gfn. The + * size of the gfn buffer is decided by the first argument when + * enabling KVM_CAP_DIRTY_LOG_RING. + */ +struct kvm_dirty_gfn { + __u32 flags; + __u32 slot; + __u64 offset; +}; + #endif /* __LINUX_KVM_H */ diff --git a/virt/kvm/dirty_ring.c b/virt/kvm/dirty_ring.c new file mode 100644 index 000000000000..9d01299563ee --- /dev/null +++ b/virt/kvm/dirty_ring.c @@ -0,0 +1,194 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * KVM dirty ring implementation + * + * Copyright 2019 Red Hat, Inc. + */ +#include +#include +#include +#include +#include + +int __weak kvm_cpu_dirty_log_size(void) +{ + return 0; +} + +u32 kvm_dirty_ring_get_rsvd_entries(void) +{ + return KVM_DIRTY_RING_RSVD_ENTRIES + kvm_cpu_dirty_log_size(); +} + +static u32 kvm_dirty_ring_used(struct kvm_dirty_ring *ring) +{ + return READ_ONCE(ring->dirty_index) - READ_ONCE(ring->reset_index); +} + +bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring) +{ + return kvm_dirty_ring_used(ring) >= ring->soft_limit; +} + +static bool kvm_dirty_ring_full(struct kvm_dirty_ring *ring) +{ + return kvm_dirty_ring_used(ring) >= ring->size; +} + +struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); + + WARN_ON_ONCE(vcpu->kvm != kvm); + + return &vcpu->dirty_ring; +} + +static void kvm_reset_dirty_gfn(struct kvm *kvm, u32 slot, u64 offset, u64 mask) +{ + struct kvm_memory_slot *memslot; + int as_id, id; + + as_id = slot >> 16; + id = (u16)slot; + + if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) + return; + + memslot = id_to_memslot(__kvm_memslots(kvm, as_id), id); + + if (!memslot || (offset + __fls(mask)) >= memslot->npages) + return; + + spin_lock(&kvm->mmu_lock); + kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset, mask); + spin_unlock(&kvm->mmu_lock); +} + +int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size) +{ + ring->dirty_gfns = vmalloc(size); + if (!ring->dirty_gfns) + return -ENOMEM; + memset(ring->dirty_gfns, 0, size); + + ring->size = size / sizeof(struct kvm_dirty_gfn); + ring->soft_limit = ring->size - kvm_dirty_ring_get_rsvd_entries(); + ring->dirty_index = 0; + ring->reset_index = 0; + ring->index = index; + + return 0; +} + +static inline void kvm_dirty_gfn_set_invalid(struct kvm_dirty_gfn *gfn) +{ + gfn->flags = 0; +} + +static inline void kvm_dirty_gfn_set_dirtied(struct kvm_dirty_gfn *gfn) +{ + gfn->flags = KVM_DIRTY_GFN_F_DIRTY; +} + +static inline bool kvm_dirty_gfn_invalid(struct kvm_dirty_gfn *gfn) +{ + return gfn->flags == 0; +} + +static inline bool kvm_dirty_gfn_harvested(struct kvm_dirty_gfn *gfn) +{ + return gfn->flags & KVM_DIRTY_GFN_F_RESET; +} + +int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring) +{ + u32 cur_slot, next_slot; + u64 cur_offset, next_offset; + unsigned long mask; + int count = 0; + struct kvm_dirty_gfn *entry; + bool first_round = true; + + /* This is only needed to make compilers happy */ + cur_slot = cur_offset = mask = 0; + + while (true) { + entry = &ring->dirty_gfns[ring->reset_index & (ring->size - 1)]; + + if (!kvm_dirty_gfn_harvested(entry)) + break; + + next_slot = READ_ONCE(entry->slot); + next_offset = READ_ONCE(entry->offset); + + /* Update the flags to reflect that this GFN is reset */ + kvm_dirty_gfn_set_invalid(entry); + + ring->reset_index++; + count++; + /* + * Try to coalesce the reset operations when the guest is + * scanning pages in the same slot. + */ + if (!first_round && next_slot == cur_slot) { + s64 delta = next_offset - cur_offset; + + if (delta >= 0 && delta < BITS_PER_LONG) { + mask |= 1ull << delta; + continue; + } + + /* Backwards visit, careful about overflows! */ + if (delta > -BITS_PER_LONG && delta < 0 && + (mask << -delta >> -delta) == mask) { + cur_offset = next_offset; + mask = (mask << -delta) | 1; + continue; + } + } + kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask); + cur_slot = next_slot; + cur_offset = next_offset; + mask = 1; + first_round = false; + } + + kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask); + + trace_kvm_dirty_ring_reset(ring); + + return count; +} + +void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, u32 slot, u64 offset) +{ + struct kvm_dirty_gfn *entry; + + /* It should never get full */ + WARN_ON_ONCE(kvm_dirty_ring_full(ring)); + + entry = &ring->dirty_gfns[ring->dirty_index & (ring->size - 1)]; + + entry->slot = slot; + entry->offset = offset; + /* + * Make sure the data is filled in before we publish this to + * the userspace program. There's no paired kernel-side reader. + */ + smp_wmb(); + kvm_dirty_gfn_set_dirtied(entry); + ring->dirty_index++; + trace_kvm_dirty_ring_push(ring, slot, offset); +} + +struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, u32 offset) +{ + return vmalloc_to_page((void *)ring->dirty_gfns + offset * PAGE_SIZE); +} + +void kvm_dirty_ring_free(struct kvm_dirty_ring *ring) +{ + vfree(ring->dirty_gfns); + ring->dirty_gfns = NULL; +} diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 68598fdba226..78ef414512bf 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -63,6 +63,8 @@ #define CREATE_TRACE_POINTS #include +#include + /* Worst case buffer size needed for holding an integer. */ #define ITOA_MAX_LEN 12 @@ -415,6 +417,7 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) void kvm_vcpu_destroy(struct kvm_vcpu *vcpu) { + kvm_dirty_ring_free(&vcpu->dirty_ring); kvm_arch_vcpu_destroy(vcpu); /* @@ -2644,8 +2647,13 @@ void mark_page_dirty_in_slot(struct kvm *kvm, { if (memslot && memslot->dirty_bitmap) { unsigned long rel_gfn = gfn - memslot->base_gfn; + u32 slot = (memslot->as_id << 16) | memslot->id; - set_bit_le(rel_gfn, memslot->dirty_bitmap); + if (kvm->dirty_ring_size) + kvm_dirty_ring_push(kvm_dirty_ring_get(kvm), + slot, rel_gfn); + else + set_bit_le(rel_gfn, memslot->dirty_bitmap); } } EXPORT_SYMBOL_GPL(mark_page_dirty_in_slot); @@ -3005,6 +3013,17 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) } EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); +static bool kvm_page_in_dirty_ring(struct kvm *kvm, unsigned long pgoff) +{ +#if KVM_DIRTY_LOG_PAGE_OFFSET > 0 + return (pgoff >= KVM_DIRTY_LOG_PAGE_OFFSET) && + (pgoff < KVM_DIRTY_LOG_PAGE_OFFSET + + kvm->dirty_ring_size / PAGE_SIZE); +#else + return false; +#endif +} + static vm_fault_t kvm_vcpu_fault(struct vm_fault *vmf) { struct kvm_vcpu *vcpu = vmf->vma->vm_file->private_data; @@ -3020,6 +3039,10 @@ static vm_fault_t kvm_vcpu_fault(struct vm_fault *vmf) else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET) page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); #endif + else if (kvm_page_in_dirty_ring(vcpu->kvm, vmf->pgoff)) + page = kvm_dirty_ring_get_page( + &vcpu->dirty_ring, + vmf->pgoff - KVM_DIRTY_LOG_PAGE_OFFSET); else return kvm_arch_vcpu_fault(vcpu, vmf); get_page(page); @@ -3033,6 +3056,14 @@ static const struct vm_operations_struct kvm_vcpu_vm_ops = { static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma) { + struct kvm_vcpu *vcpu = file->private_data; + unsigned long pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + + if ((kvm_page_in_dirty_ring(vcpu->kvm, vma->vm_pgoff) || + kvm_page_in_dirty_ring(vcpu->kvm, vma->vm_pgoff + pages - 1)) && + ((vma->vm_flags & VM_EXEC) || !(vma->vm_flags & VM_SHARED))) + return -EINVAL; + vma->vm_ops = &kvm_vcpu_vm_ops; return 0; } @@ -3126,6 +3157,13 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) if (r) goto vcpu_free_run_page; + if (kvm->dirty_ring_size) { + r = kvm_dirty_ring_alloc(&vcpu->dirty_ring, + id, kvm->dirty_ring_size); + if (r) + goto arch_vcpu_destroy; + } + mutex_lock(&kvm->lock); if (kvm_get_vcpu_by_id(kvm, id)) { r = -EEXIST; @@ -3159,6 +3197,8 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) unlock_vcpu_destroy: mutex_unlock(&kvm->lock); + kvm_dirty_ring_free(&vcpu->dirty_ring); +arch_vcpu_destroy: kvm_arch_vcpu_destroy(vcpu); vcpu_free_run_page: free_page((unsigned long)vcpu->run); @@ -3631,12 +3671,78 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) #endif case KVM_CAP_NR_MEMSLOTS: return KVM_USER_MEM_SLOTS; + case KVM_CAP_DIRTY_LOG_RING: +#if KVM_DIRTY_LOG_PAGE_OFFSET > 0 + return KVM_DIRTY_RING_MAX_ENTRIES * sizeof(struct kvm_dirty_gfn); +#else + return 0; +#endif default: break; } return kvm_vm_ioctl_check_extension(kvm, arg); } +static int kvm_vm_ioctl_enable_dirty_log_ring(struct kvm *kvm, u32 size) +{ + int r; + + if (!KVM_DIRTY_LOG_PAGE_OFFSET) + return -EINVAL; + + /* the size should be power of 2 */ + if (!size || (size & (size - 1))) + return -EINVAL; + + /* Should be bigger to keep the reserved entries, or a page */ + if (size < kvm_dirty_ring_get_rsvd_entries() * + sizeof(struct kvm_dirty_gfn) || size < PAGE_SIZE) + return -EINVAL; + + if (size > KVM_DIRTY_RING_MAX_ENTRIES * + sizeof(struct kvm_dirty_gfn)) + return -E2BIG; + + /* We only allow it to set once */ + if (kvm->dirty_ring_size) + return -EINVAL; + + mutex_lock(&kvm->lock); + + if (kvm->created_vcpus) { + /* We don't allow to change this value after vcpu created */ + r = -EINVAL; + } else { + kvm->dirty_ring_size = size; + r = 0; + } + + mutex_unlock(&kvm->lock); + return r; +} + +static int kvm_vm_ioctl_reset_dirty_pages(struct kvm *kvm) +{ + int i; + struct kvm_vcpu *vcpu; + int cleared = 0; + + if (!kvm->dirty_ring_size) + return -EINVAL; + + mutex_lock(&kvm->slots_lock); + + kvm_for_each_vcpu(i, vcpu, kvm) + cleared += kvm_dirty_ring_reset(vcpu->kvm, &vcpu->dirty_ring); + + mutex_unlock(&kvm->slots_lock); + + if (cleared) + kvm_flush_remote_tlbs(kvm); + + return cleared; +} + int __attribute__((weak)) kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) { @@ -3667,6 +3773,8 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm, kvm->max_halt_poll_ns = cap->args[0]; return 0; } + case KVM_CAP_DIRTY_LOG_RING: + return kvm_vm_ioctl_enable_dirty_log_ring(kvm, cap->args[0]); default: return kvm_vm_ioctl_enable_cap(kvm, cap); } @@ -3851,6 +3959,9 @@ static long kvm_vm_ioctl(struct file *filp, case KVM_CHECK_EXTENSION: r = kvm_vm_ioctl_check_extension_generic(kvm, arg); break; + case KVM_RESET_DIRTY_RINGS: + r = kvm_vm_ioctl_reset_dirty_pages(kvm); + break; default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); } -- cgit v1.2.3 From 044c59c409b7fd753707dc437890e94d2b0bd819 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Sep 2020 21:22:26 -0400 Subject: KVM: Don't allocate dirty bitmap if dirty ring is enabled Because kvm dirty rings and kvm dirty log is used in an exclusive way, Let's avoid creating the dirty_bitmap when kvm dirty ring is enabled. At the meantime, since the dirty_bitmap will be conditionally created now, we can't use it as a sign of "whether this memory slot enabled dirty tracking". Change users like that to check against the kvm memory slot flags. Note that there still can be chances where the kvm memory slot got its dirty_bitmap allocated, _if_ the memory slots are created before enabling of the dirty rings and at the same time with the dirty tracking capability enabled, they'll still with the dirty_bitmap. However it should not hurt much (e.g., the bitmaps will always be freed if they are there), and the real users normally won't trigger this because dirty bit tracking flag should in most cases only be applied to kvm slots only before migration starts, that should be far latter than kvm initializes (VM starts). Signed-off-by: Peter Xu Message-Id: <20201001012226.5868-1-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 2 +- include/linux/kvm_host.h | 5 +++++ virt/kvm/kvm_main.c | 4 ++-- 3 files changed, 8 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 12e5cfe0995e..5dfe0ede0e81 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -820,7 +820,7 @@ gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn, slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); if (!slot || slot->flags & KVM_MEMSLOT_INVALID) return NULL; - if (no_dirty_log && slot->dirty_bitmap) + if (no_dirty_log && kvm_slot_dirty_track_enabled(slot)) return NULL; return slot; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 864b156391c8..f3b1013fb22c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -351,6 +351,11 @@ struct kvm_memory_slot { u16 as_id; }; +static inline bool kvm_slot_dirty_track_enabled(struct kvm_memory_slot *slot) +{ + return slot->flags & KVM_MEM_LOG_DIRTY_PAGES; +} + static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot) { return ALIGN(memslot->npages, BITS_PER_LONG) / 8; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 110aa5cc0c93..3abcb2ce5b7d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1365,7 +1365,7 @@ int __kvm_set_memory_region(struct kvm *kvm, /* Allocate/free page dirty bitmap as needed */ if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES)) new.dirty_bitmap = NULL; - else if (!new.dirty_bitmap) { + else if (!new.dirty_bitmap && !kvm->dirty_ring_size) { r = kvm_alloc_dirty_bitmap(&new); if (r) return r; @@ -2657,7 +2657,7 @@ void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, gfn_t gfn) { - if (memslot && memslot->dirty_bitmap) { + if (memslot && kvm_slot_dirty_track_enabled(memslot)) { unsigned long rel_gfn = gfn - memslot->base_gfn; u32 slot = (memslot->as_id << 16) | memslot->id; -- cgit v1.2.3 From 2259c17f01887666220a35619c44c576aeed2a30 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Thu, 29 Oct 2020 10:06:48 -0700 Subject: kvm: x86: Sink cpuid update into vendor-specific set_cr4 functions On emulated VM-entry and VM-exit, update the CPUID bits that reflect CR4.OSXSAVE and CR4.PKE. This fixes a bug where the CPUID bits could continue to reflect L2 CR4 values after emulated VM-exit to L1. It also fixes a related bug where the CPUID bits could continue to reflect L1 CR4 values after emulated VM-entry to L2. The latter bug is mainly relevant to SVM, wherein CPUID is not a required intercept. However, it could also be relevant to VMX, because the code to conditionally update these CPUID bits assumes that the guest CPUID and the guest CR4 are always in sync. Fixes: 8eb3f87d903168 ("KVM: nVMX: fix guest CR4 loading when emulating L2 to L1 exit") Fixes: 2acf923e38fb6a ("KVM: VMX: Enable XSAVE/XRSTOR for guest") Fixes: b9baba86148904 ("KVM, pkeys: expose CPUID/CR4 to guest") Reported-by: Abhiroop Dabral Signed-off-by: Jim Mattson Reviewed-by: Ricardo Koller Reviewed-by: Peter Shier Cc: Haozhong Zhang Cc: Dexuan Cui Cc: Huaitong Han Message-Id: <20201029170648.483210-1-jmattson@google.com> --- arch/x86/kvm/cpuid.c | 1 + arch/x86/kvm/svm/svm.c | 3 +++ arch/x86/kvm/vmx/vmx.c | 4 ++++ arch/x86/kvm/x86.c | 8 -------- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index f87b5dfbaba4..5d352cc204ce 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -146,6 +146,7 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu) MSR_IA32_MISC_ENABLE_MWAIT); } } +EXPORT_SYMBOL_GPL(kvm_update_cpuid_runtime); static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) { diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 8c858f80f399..253809216cc7 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1701,6 +1701,9 @@ void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) cr4 |= host_cr4_mce; to_svm(vcpu)->vmcb->save.cr4 = cr4; vmcb_mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); + + if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE)) + kvm_update_cpuid_runtime(vcpu); } static void svm_set_segment(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 2b6d538454a6..c3441e7e5a87 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3113,6 +3113,7 @@ static bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { + unsigned long old_cr4 = vcpu->arch.cr4; struct vcpu_vmx *vmx = to_vmx(vcpu); /* * Pass through host's Machine Check Enable value to hw_cr4, which @@ -3169,6 +3170,9 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) vmcs_writel(CR4_READ_SHADOW, cr4); vmcs_writel(GUEST_CR4, hw_cr4); + + if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE)) + kvm_update_cpuid_runtime(vcpu); } void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6c704a597b7c..a3fdc16cfd6f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1012,9 +1012,6 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE))) kvm_mmu_reset_context(vcpu); - if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE)) - kvm_update_cpuid_runtime(vcpu); - return 0; } EXPORT_SYMBOL_GPL(kvm_set_cr4); @@ -9576,7 +9573,6 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { struct msr_data apic_base_msr; int mmu_reset_needed = 0; - int cpuid_update_needed = 0; int pending_vec, max_bits, idx; struct desc_ptr dt; int ret = -EINVAL; @@ -9611,11 +9607,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) vcpu->arch.cr0 = sregs->cr0; mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; - cpuid_update_needed |= ((kvm_read_cr4(vcpu) ^ sregs->cr4) & - (X86_CR4_OSXSAVE | X86_CR4_PKE)); kvm_x86_ops.set_cr4(vcpu, sregs->cr4); - if (cpuid_update_needed) - kvm_update_cpuid_runtime(vcpu); idx = srcu_read_lock(&vcpu->kvm->srcu); if (is_pae_paging(vcpu)) { -- cgit v1.2.3 From 8934c8454064757efd8d3fb0a729db7eb2d0e5f5 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 13 Nov 2020 11:38:38 +0000 Subject: KVM: arm64: Remove redundant Spectre-v2 code from kvm_map_vector() '__kvm_bp_vect_base' is only used when dealing with the hardened vectors so remove the redundant assignments in kvm_map_vectors(). Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Cc: Marc Zyngier Cc: Quentin Perret Link: https://lore.kernel.org/r/20201113113847.21619-2-will@kernel.org --- arch/arm64/kvm/arm.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 5750ec34960e..b43b637ded14 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1306,11 +1306,6 @@ static int kvm_map_vectors(void) * !SV2 + HEL2 -> allocate one vector slot and use exec mapping * SV2 + HEL2 -> use hardened vectors and use exec mapping */ - if (cpus_have_const_cap(ARM64_SPECTRE_V2)) { - __kvm_bp_vect_base = kvm_ksym_ref(__bp_harden_hyp_vecs); - __kvm_bp_vect_base = kern_hyp_va(__kvm_bp_vect_base); - } - if (cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) { phys_addr_t vect_pa = __pa_symbol(__bp_harden_hyp_vecs); unsigned long size = __BP_HARDEN_HYP_VECS_SZ; -- cgit v1.2.3 From de5bcdb48498abeb019ae075d139850c52661627 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 13 Nov 2020 11:38:39 +0000 Subject: KVM: arm64: Tidy up kvm_map_vector() The bulk of the work in kvm_map_vector() is conditional on the ARM64_HARDEN_EL2_VECTORS capability, so return early if that is not set and make the code a bit easier to read. Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Cc: Marc Zyngier Cc: Quentin Perret Link: https://lore.kernel.org/r/20201113113847.21619-3-will@kernel.org --- arch/arm64/kvm/arm.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index b43b637ded14..476bc613d0e6 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1297,6 +1297,8 @@ static unsigned long nvhe_percpu_order(void) static int kvm_map_vectors(void) { + int slot; + /* * SV2 = ARM64_SPECTRE_V2 * HEL2 = ARM64_HARDEN_EL2_VECTORS @@ -1306,22 +1308,20 @@ static int kvm_map_vectors(void) * !SV2 + HEL2 -> allocate one vector slot and use exec mapping * SV2 + HEL2 -> use hardened vectors and use exec mapping */ - if (cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) { - phys_addr_t vect_pa = __pa_symbol(__bp_harden_hyp_vecs); - unsigned long size = __BP_HARDEN_HYP_VECS_SZ; + if (!cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) + return 0; - /* - * Always allocate a spare vector slot, as we don't - * know yet which CPUs have a BP hardening slot that - * we can reuse. - */ - __kvm_harden_el2_vector_slot = atomic_inc_return(&arm64_el2_vector_last_slot); - BUG_ON(__kvm_harden_el2_vector_slot >= BP_HARDEN_EL2_SLOTS); - return create_hyp_exec_mappings(vect_pa, size, - &__kvm_bp_vect_base); - } + /* + * Always allocate a spare vector slot, as we don't know yet which CPUs + * have a BP hardening slot that we can reuse. + */ + slot = atomic_inc_return(&arm64_el2_vector_last_slot); + BUG_ON(slot >= BP_HARDEN_EL2_SLOTS); + __kvm_harden_el2_vector_slot = slot; - return 0; + return create_hyp_exec_mappings(__pa_symbol(__bp_harden_hyp_vecs), + __BP_HARDEN_HYP_VECS_SZ, + &__kvm_bp_vect_base); } static void cpu_init_hyp_mode(void) -- cgit v1.2.3 From 042c76a9502bf281befc0ae2793ef1de55b65544 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 13 Nov 2020 11:38:40 +0000 Subject: KVM: arm64: Move kvm_get_hyp_vector() out of header file kvm_get_hyp_vector() has only one caller, so move it out of kvm_mmu.h and inline it into a new function, cpu_set_hyp_vector(), for setting the vector. Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Cc: Marc Zyngier Cc: Quentin Perret Link: https://lore.kernel.org/r/20201113113847.21619-4-will@kernel.org --- arch/arm64/include/asm/kvm_mmu.h | 43 ------------------------------------- arch/arm64/kvm/arm.c | 46 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 45 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 331394306cce..23182e7d9413 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -208,52 +208,9 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa, return ret; } -/* - * EL2 vectors can be mapped and rerouted in a number of ways, - * depending on the kernel configuration and CPU present: - * - * - If the CPU is affected by Spectre-v2, the hardening sequence is - * placed in one of the vector slots, which is executed before jumping - * to the real vectors. - * - * - If the CPU also has the ARM64_HARDEN_EL2_VECTORS cap, the slot - * containing the hardening sequence is mapped next to the idmap page, - * and executed before jumping to the real vectors. - * - * - If the CPU only has the ARM64_HARDEN_EL2_VECTORS cap, then an - * empty slot is selected, mapped next to the idmap page, and - * executed before jumping to the real vectors. - * - * Note that ARM64_HARDEN_EL2_VECTORS is somewhat incompatible with - * VHE, as we don't have hypervisor-specific mappings. If the system - * is VHE and yet selects this capability, it will be ignored. - */ extern void *__kvm_bp_vect_base; extern int __kvm_harden_el2_vector_slot; -static inline void *kvm_get_hyp_vector(void) -{ - struct bp_hardening_data *data = arm64_get_bp_hardening_data(); - void *vect = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector)); - int slot = -1; - - if (cpus_have_const_cap(ARM64_SPECTRE_V2) && data->fn) { - vect = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs)); - slot = data->hyp_vectors_slot; - } - - if (this_cpu_has_cap(ARM64_HARDEN_EL2_VECTORS) && !has_vhe()) { - vect = __kvm_bp_vect_base; - if (slot == -1) - slot = __kvm_harden_el2_vector_slot; - } - - if (slot != -1) - vect += slot * SZ_2K; - - return vect; -} - #define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr) static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 476bc613d0e6..c63c0b3c9b17 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1375,13 +1375,55 @@ static void cpu_hyp_reset(void) __hyp_reset_vectors(); } +/* + * EL2 vectors can be mapped and rerouted in a number of ways, + * depending on the kernel configuration and CPU present: + * + * - If the CPU is affected by Spectre-v2, the hardening sequence is + * placed in one of the vector slots, which is executed before jumping + * to the real vectors. + * + * - If the CPU also has the ARM64_HARDEN_EL2_VECTORS cap, the slot + * containing the hardening sequence is mapped next to the idmap page, + * and executed before jumping to the real vectors. + * + * - If the CPU only has the ARM64_HARDEN_EL2_VECTORS cap, then an + * empty slot is selected, mapped next to the idmap page, and + * executed before jumping to the real vectors. + * + * Note that ARM64_HARDEN_EL2_VECTORS is somewhat incompatible with + * VHE, as we don't have hypervisor-specific mappings. If the system + * is VHE and yet selects this capability, it will be ignored. + */ +static void cpu_set_hyp_vector(void) +{ + struct bp_hardening_data *data = arm64_get_bp_hardening_data(); + void *vect = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector)); + int slot = -1; + + if (cpus_have_const_cap(ARM64_SPECTRE_V2) && data->fn) { + vect = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs)); + slot = data->hyp_vectors_slot; + } + + if (this_cpu_has_cap(ARM64_HARDEN_EL2_VECTORS) && !has_vhe()) { + vect = __kvm_bp_vect_base; + if (slot == -1) + slot = __kvm_harden_el2_vector_slot; + } + + if (slot != -1) + vect += slot * SZ_2K; + + *this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)vect; +} + static void cpu_hyp_reinit(void) { kvm_init_host_cpu_context(&this_cpu_ptr_hyp_sym(kvm_host_data)->host_ctxt); cpu_hyp_reset(); - - *this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)kvm_get_hyp_vector(); + cpu_set_hyp_vector(); if (is_kernel_in_hyp_mode()) kvm_timer_init_vhe(); -- cgit v1.2.3 From 07cf8aa922db7747cd6e100d2e3f7ca839c7a419 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 13 Nov 2020 11:38:41 +0000 Subject: KVM: arm64: Make BP hardening globals static instead Branch predictor hardening of the hyp vectors is partially driven by a couple of global variables ('__kvm_bp_vect_base' and '__kvm_harden_el2_vector_slot'). However, these are only used within a single compilation unit, so internalise them there instead. Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Cc: Marc Zyngier Cc: Quentin Perret Link: https://lore.kernel.org/r/20201113113847.21619-5-will@kernel.org --- arch/arm64/include/asm/kvm_mmu.h | 3 --- arch/arm64/kvm/arm.c | 8 ++++++++ arch/arm64/kvm/va_layout.c | 3 --- 3 files changed, 8 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 23182e7d9413..db721be0df62 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -208,9 +208,6 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa, return ret; } -extern void *__kvm_bp_vect_base; -extern int __kvm_harden_el2_vector_slot; - #define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr) static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index c63c0b3c9b17..3262c16f0449 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -51,6 +51,14 @@ DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector); static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); unsigned long kvm_arm_hyp_percpu_base[NR_CPUS]; +/* Hypervisor VA of the indirect vector trampoline page */ +static void *__kvm_bp_vect_base; +/* + * Slot in the hyp vector page for use by the indirect vector trampoline + * when mitigation against Spectre-v2 is not required. + */ +static int __kvm_harden_el2_vector_slot; + /* The VMID used in the VTTBR */ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); static u32 kvm_next_vmid; diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index e0404bcab019..d1195c288c9f 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -131,9 +131,6 @@ void __init kvm_update_va_mask(struct alt_instr *alt, } } -void *__kvm_bp_vect_base; -int __kvm_harden_el2_vector_slot; - void kvm_patch_vector_branch(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst) { -- cgit v1.2.3 From 6279017e807708a07db5edace462713a93625da3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 13 Nov 2020 11:38:42 +0000 Subject: KVM: arm64: Move BP hardening helpers into spectre.h The BP hardening helpers are an integral part of the Spectre-v2 mitigation, so move them into asm/spectre.h and inline the arm64_get_bp_hardening_data() function at the same time. Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Cc: Marc Zyngier Cc: Quentin Perret Link: https://lore.kernel.org/r/20201113113847.21619-6-will@kernel.org --- arch/arm64/include/asm/mmu.h | 29 ----------------------------- arch/arm64/include/asm/spectre.h | 30 ++++++++++++++++++++++++++++++ arch/arm64/kvm/arm.c | 2 +- arch/arm64/kvm/hyp/hyp-entry.S | 1 + 4 files changed, 32 insertions(+), 30 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index b2e91c187e2a..75beffe2ee8a 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -12,9 +12,6 @@ #define USER_ASID_FLAG (UL(1) << USER_ASID_BIT) #define TTBR_ASID_MASK (UL(0xffff) << 48) -#define BP_HARDEN_EL2_SLOTS 4 -#define __BP_HARDEN_HYP_VECS_SZ (BP_HARDEN_EL2_SLOTS * SZ_2K) - #ifndef __ASSEMBLY__ #include @@ -41,32 +38,6 @@ static inline bool arm64_kernel_unmapped_at_el0(void) return cpus_have_const_cap(ARM64_UNMAP_KERNEL_AT_EL0); } -typedef void (*bp_hardening_cb_t)(void); - -struct bp_hardening_data { - int hyp_vectors_slot; - bp_hardening_cb_t fn; -}; - -DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); - -static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void) -{ - return this_cpu_ptr(&bp_hardening_data); -} - -static inline void arm64_apply_bp_hardening(void) -{ - struct bp_hardening_data *d; - - if (!cpus_have_const_cap(ARM64_SPECTRE_V2)) - return; - - d = arm64_get_bp_hardening_data(); - if (d->fn) - d->fn(); -} - extern void arm64_memblock_init(void); extern void paging_init(void); extern void bootmem_init(void); diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index fcdfbce302bd..d22f8b7d9c50 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -9,7 +9,15 @@ #ifndef __ASM_SPECTRE_H #define __ASM_SPECTRE_H +#define BP_HARDEN_EL2_SLOTS 4 +#define __BP_HARDEN_HYP_VECS_SZ (BP_HARDEN_EL2_SLOTS * SZ_2K) + +#ifndef __ASSEMBLY__ + +#include + #include +#include /* Watch out, ordering is important here. */ enum mitigation_state { @@ -20,6 +28,27 @@ enum mitigation_state { struct task_struct; +typedef void (*bp_hardening_cb_t)(void); + +struct bp_hardening_data { + int hyp_vectors_slot; + bp_hardening_cb_t fn; +}; + +DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); + +static inline void arm64_apply_bp_hardening(void) +{ + struct bp_hardening_data *d; + + if (!cpus_have_const_cap(ARM64_SPECTRE_V2)) + return; + + d = this_cpu_ptr(&bp_hardening_data); + if (d->fn) + d->fn(); +} + enum mitigation_state arm64_get_spectre_v2_state(void); bool has_spectre_v2(const struct arm64_cpu_capabilities *cap, int scope); void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused); @@ -29,4 +58,5 @@ bool has_spectre_v4(const struct arm64_cpu_capabilities *cap, int scope); void spectre_v4_enable_mitigation(const struct arm64_cpu_capabilities *__unused); void spectre_v4_enable_task_mitigation(struct task_struct *tsk); +#endif /* __ASSEMBLY__ */ #endif /* __ASM_SPECTRE_H */ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 3262c16f0449..044c5fc81f90 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1405,7 +1405,7 @@ static void cpu_hyp_reset(void) */ static void cpu_set_hyp_vector(void) { - struct bp_hardening_data *data = arm64_get_bp_hardening_data(); + struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data); void *vect = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector)); int slot = -1; diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 0a5b36eb54b3..874eacdabc64 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -13,6 +13,7 @@ #include #include #include +#include .macro save_caller_saved_regs_vect /* x0 and x1 were saved in the vector entry */ -- cgit v1.2.3 From da592e68a5a333b81111bd6336838764732f723e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 13 Nov 2020 11:38:43 +0000 Subject: KVM: arm64: Re-jig logic when patching hardened hyp vectors The hardened hyp vectors are not used on systems running with VHE or CPUs without the ARM64_HARDEN_EL2_VECTORS capability. Re-jig the checking logic slightly in kvm_patch_vector_branch() so that it's a bit clearer what we're looking for. This is purely cosmetic. Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Cc: Marc Zyngier Cc: Quentin Perret Link: https://lore.kernel.org/r/20201113113847.21619-7-will@kernel.org --- arch/arm64/kvm/va_layout.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index d1195c288c9f..760db2c84b9b 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -139,8 +139,8 @@ void kvm_patch_vector_branch(struct alt_instr *alt, BUG_ON(nr_inst != 5); - if (has_vhe() || !cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) { - WARN_ON_ONCE(cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)); + if (!cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS) || + WARN_ON_ONCE(has_vhe())) { return; } -- cgit v1.2.3 From b881cdce77b48bd488f268041f32951bab89bb0f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 13 Nov 2020 11:38:44 +0000 Subject: KVM: arm64: Allocate hyp vectors statically The EL2 vectors installed when a guest is running point at one of the following configurations for a given CPU: - Straight at __kvm_hyp_vector - A trampoline containing an SMC sequence to mitigate Spectre-v2 and then a direct branch to __kvm_hyp_vector - A dynamically-allocated trampoline which has an indirect branch to __kvm_hyp_vector - A dynamically-allocated trampoline containing an SMC sequence to mitigate Spectre-v2 and then an indirect branch to __kvm_hyp_vector The indirect branches mean that VA randomization at EL2 isn't trivially bypassable using Spectre-v3a (where the vector base is readable by the guest). Rather than populate these vectors dynamically, configure everything statically and use an enumerated type to identify the vector "slot" corresponding to one of the configurations above. This both simplifies the code, but also makes it much easier to implement at EL2 later on. Signed-off-by: Will Deacon [maz: fixed double call to kvm_init_vector_slots() on nVHE] Signed-off-by: Marc Zyngier Cc: Marc Zyngier Cc: Quentin Perret Link: https://lore.kernel.org/r/20201113113847.21619-8-will@kernel.org --- arch/arm64/include/asm/kvm_asm.h | 5 --- arch/arm64/include/asm/spectre.h | 36 ++++++++++++++++- arch/arm64/kernel/cpu_errata.c | 2 + arch/arm64/kernel/proton-pack.c | 63 ++++++----------------------- arch/arm64/kvm/arm.c | 86 ++++++++++++++++------------------------ arch/arm64/kvm/hyp/Makefile | 2 +- arch/arm64/kvm/hyp/hyp-entry.S | 72 ++++++++++++++++++--------------- arch/arm64/kvm/hyp/smccc_wa.S | 32 --------------- arch/arm64/kvm/va_layout.c | 11 +---- 9 files changed, 126 insertions(+), 183 deletions(-) delete mode 100644 arch/arm64/kvm/hyp/smccc_wa.S (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 54387ccd1ab2..94b7c9a99576 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -34,8 +34,6 @@ */ #define KVM_VECTOR_PREAMBLE (2 * AARCH64_INSN_SIZE) -#define __SMCCC_WORKAROUND_1_SMC_SZ 36 - #define KVM_HOST_SMCCC_ID(id) \ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ ARM_SMCCC_SMC_64, \ @@ -175,7 +173,6 @@ extern unsigned long kvm_arm_hyp_percpu_base[NR_CPUS]; DECLARE_KVM_NVHE_SYM(__per_cpu_start); DECLARE_KVM_NVHE_SYM(__per_cpu_end); -extern atomic_t arm64_el2_vector_last_slot; DECLARE_KVM_HYP_SYM(__bp_harden_hyp_vecs); #define __bp_harden_hyp_vecs CHOOSE_HYP_SYM(__bp_harden_hyp_vecs) @@ -198,8 +195,6 @@ extern void __vgic_v3_init_lrs(void); extern u32 __kvm_get_mdcr_el2(void); -extern char __smccc_workaround_1_smc[__SMCCC_WORKAROUND_1_SMC_SZ]; - /* * Obtain the PC-relative address of a kernel symbol * s: symbol diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index d22f8b7d9c50..fa86b8f655b7 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -28,11 +28,41 @@ enum mitigation_state { struct task_struct; +/* + * Note: the order of this enum corresponds to __bp_harden_hyp_vecs and + * we rely on having the direct vectors first. + */ +enum arm64_hyp_spectre_vector { + /* + * Take exceptions directly to __kvm_hyp_vector. This must be + * 0 so that it used by default when mitigations are not needed. + */ + HYP_VECTOR_DIRECT, + + /* + * Bounce via a slot in the hypervisor text mapping of + * __bp_harden_hyp_vecs, which contains an SMC call. + */ + HYP_VECTOR_SPECTRE_DIRECT, + + /* + * Bounce via a slot in a special mapping of __bp_harden_hyp_vecs + * next to the idmap page. + */ + HYP_VECTOR_INDIRECT, + + /* + * Bounce via a slot in a special mapping of __bp_harden_hyp_vecs + * next to the idmap page, which contains an SMC call. + */ + HYP_VECTOR_SPECTRE_INDIRECT, +}; + typedef void (*bp_hardening_cb_t)(void); struct bp_hardening_data { - int hyp_vectors_slot; - bp_hardening_cb_t fn; + enum arm64_hyp_spectre_vector slot; + bp_hardening_cb_t fn; }; DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); @@ -53,6 +83,8 @@ enum mitigation_state arm64_get_spectre_v2_state(void); bool has_spectre_v2(const struct arm64_cpu_capabilities *cap, int scope); void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused); +void cpu_el2_vector_harden_enable(const struct arm64_cpu_capabilities *__unused); + enum mitigation_state arm64_get_spectre_v4_state(void); bool has_spectre_v4(const struct arm64_cpu_capabilities *cap, int scope); void spectre_v4_enable_mitigation(const struct arm64_cpu_capabilities *__unused); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 61314fd70f13..7a040abaedea 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -459,9 +459,11 @@ const struct arm64_cpu_capabilities arm64_errata[] = { }, #ifdef CONFIG_RANDOMIZE_BASE { + /* Must come after the Spectre-v2 entry */ .desc = "EL2 vector hardening", .capability = ARM64_HARDEN_EL2_VECTORS, ERRATA_MIDR_RANGE_LIST(ca57_a72), + .cpu_enable = cpu_el2_vector_harden_enable, }, #endif { diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index c18eb7d41274..a4ba94129750 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -26,6 +26,7 @@ #include #include +#include /* * We try to ensure that the mitigation state can never change as the result of @@ -169,72 +170,26 @@ bool has_spectre_v2(const struct arm64_cpu_capabilities *entry, int scope) return true; } -DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); - enum mitigation_state arm64_get_spectre_v2_state(void) { return spectre_v2_state; } -#ifdef CONFIG_KVM -#include -#include - -atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1); - -static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, - const char *hyp_vecs_end) -{ - void *dst = lm_alias(__bp_harden_hyp_vecs + slot * SZ_2K); - int i; - - for (i = 0; i < SZ_2K; i += 0x80) - memcpy(dst + i, hyp_vecs_start, hyp_vecs_end - hyp_vecs_start); - - __flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K); -} +DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); static void install_bp_hardening_cb(bp_hardening_cb_t fn) { - static DEFINE_RAW_SPINLOCK(bp_lock); - int cpu, slot = -1; - const char *hyp_vecs_start = __smccc_workaround_1_smc; - const char *hyp_vecs_end = __smccc_workaround_1_smc + - __SMCCC_WORKAROUND_1_SMC_SZ; + __this_cpu_write(bp_hardening_data.fn, fn); /* * Vinz Clortho takes the hyp_vecs start/end "keys" at * the door when we're a guest. Skip the hyp-vectors work. */ - if (!is_hyp_mode_available()) { - __this_cpu_write(bp_hardening_data.fn, fn); + if (!is_hyp_mode_available()) return; - } - raw_spin_lock(&bp_lock); - for_each_possible_cpu(cpu) { - if (per_cpu(bp_hardening_data.fn, cpu) == fn) { - slot = per_cpu(bp_hardening_data.hyp_vectors_slot, cpu); - break; - } - } - - if (slot == -1) { - slot = atomic_inc_return(&arm64_el2_vector_last_slot); - BUG_ON(slot >= BP_HARDEN_EL2_SLOTS); - __copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end); - } - - __this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot); - __this_cpu_write(bp_hardening_data.fn, fn); - raw_spin_unlock(&bp_lock); + __this_cpu_write(bp_hardening_data.slot, HYP_VECTOR_SPECTRE_DIRECT); } -#else -static void install_bp_hardening_cb(bp_hardening_cb_t fn) -{ - __this_cpu_write(bp_hardening_data.fn, fn); -} -#endif /* CONFIG_KVM */ static void call_smc_arch_workaround_1(void) { @@ -315,6 +270,14 @@ void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused) update_mitigation_state(&spectre_v2_state, state); } +void cpu_el2_vector_harden_enable(const struct arm64_cpu_capabilities *__unused) +{ + struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data); + + if (this_cpu_has_cap(ARM64_HARDEN_EL2_VECTORS)) + data->slot += HYP_VECTOR_INDIRECT; +} + /* * Spectre v4. * diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 044c5fc81f90..5e6fe5eef3ec 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -51,14 +51,6 @@ DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector); static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); unsigned long kvm_arm_hyp_percpu_base[NR_CPUS]; -/* Hypervisor VA of the indirect vector trampoline page */ -static void *__kvm_bp_vect_base; -/* - * Slot in the hyp vector page for use by the indirect vector trampoline - * when mitigation against Spectre-v2 is not required. - */ -static int __kvm_harden_el2_vector_slot; - /* The VMID used in the VTTBR */ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); static u32 kvm_next_vmid; @@ -1303,33 +1295,38 @@ static unsigned long nvhe_percpu_order(void) return size ? get_order(size) : 0; } -static int kvm_map_vectors(void) +/* A lookup table holding the hypervisor VA for each vector slot */ +static void *hyp_spectre_vector_selector[BP_HARDEN_EL2_SLOTS]; + +static void kvm_init_vector_slot(void *base, enum arm64_hyp_spectre_vector slot) { - int slot; + hyp_spectre_vector_selector[slot] = base + (slot * SZ_2K); +} + +static int kvm_init_vector_slots(void) +{ + int err; + void *base; + + base = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector)); + kvm_init_vector_slot(base, HYP_VECTOR_DIRECT); + + base = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs)); + kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_DIRECT); - /* - * SV2 = ARM64_SPECTRE_V2 - * HEL2 = ARM64_HARDEN_EL2_VECTORS - * - * !SV2 + !HEL2 -> use direct vectors - * SV2 + !HEL2 -> use hardened vectors in place - * !SV2 + HEL2 -> allocate one vector slot and use exec mapping - * SV2 + HEL2 -> use hardened vectors and use exec mapping - */ if (!cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) return 0; - /* - * Always allocate a spare vector slot, as we don't know yet which CPUs - * have a BP hardening slot that we can reuse. - */ - slot = atomic_inc_return(&arm64_el2_vector_last_slot); - BUG_ON(slot >= BP_HARDEN_EL2_SLOTS); - __kvm_harden_el2_vector_slot = slot; + if (!has_vhe()) { + err = create_hyp_exec_mappings(__pa_symbol(__bp_harden_hyp_vecs), + __BP_HARDEN_HYP_VECS_SZ, &base); + if (err) + return err; + } - return create_hyp_exec_mappings(__pa_symbol(__bp_harden_hyp_vecs), - __BP_HARDEN_HYP_VECS_SZ, - &__kvm_bp_vect_base); + kvm_init_vector_slot(base, HYP_VECTOR_INDIRECT); + kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_INDIRECT); + return 0; } static void cpu_init_hyp_mode(void) @@ -1406,24 +1403,9 @@ static void cpu_hyp_reset(void) static void cpu_set_hyp_vector(void) { struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data); - void *vect = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector)); - int slot = -1; - - if (cpus_have_const_cap(ARM64_SPECTRE_V2) && data->fn) { - vect = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs)); - slot = data->hyp_vectors_slot; - } - - if (this_cpu_has_cap(ARM64_HARDEN_EL2_VECTORS) && !has_vhe()) { - vect = __kvm_bp_vect_base; - if (slot == -1) - slot = __kvm_harden_el2_vector_slot; - } - - if (slot != -1) - vect += slot * SZ_2K; + void *vector = hyp_spectre_vector_selector[data->slot]; - *this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)vect; + *this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)vector; } static void cpu_hyp_reinit(void) @@ -1661,12 +1643,6 @@ static int init_hyp_mode(void) goto out_err; } - err = kvm_map_vectors(); - if (err) { - kvm_err("Cannot map vectors\n"); - goto out_err; - } - /* * Map the Hyp stack pages */ @@ -1810,6 +1786,12 @@ int kvm_arch_init(void *opaque) goto out_err; } + err = kvm_init_vector_slots(); + if (err) { + kvm_err("Cannot initialise vector slots\n"); + goto out_err; + } + err = init_subsystems(); if (err) goto out_hyp; diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 4a81eddabcd8..687598e41b21 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -10,4 +10,4 @@ subdir-ccflags-y := -I$(incdir) \ -DDISABLE_BRANCH_PROFILING \ $(DISABLE_STACKLEAK_PLUGIN) -obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o smccc_wa.o +obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 874eacdabc64..d0a3660c7256 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -188,52 +188,62 @@ SYM_CODE_START(__kvm_hyp_vector) valid_vect el1_error // Error 32-bit EL1 SYM_CODE_END(__kvm_hyp_vector) -.macro hyp_ventry - .align 7 +.macro spectrev2_smccc_wa1_smc + sub sp, sp, #(8 * 4) + stp x2, x3, [sp, #(8 * 0)] + stp x0, x1, [sp, #(8 * 2)] + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 + smc #0 + ldp x2, x3, [sp, #(8 * 0)] + add sp, sp, #(8 * 2) +.endm + +.macro hyp_ventry indirect, spectrev2 + .align 7 1: esb - .rept 26 - nop - .endr -/* - * The default sequence is to directly branch to the KVM vectors, - * using the computed offset. This applies for VHE as well as - * !ARM64_HARDEN_EL2_VECTORS. The first vector must always run the preamble. - * - * For ARM64_HARDEN_EL2_VECTORS configurations, this gets replaced - * with: - * - * stp x0, x1, [sp, #-16]! - * movz x0, #(addr & 0xffff) - * movk x0, #((addr >> 16) & 0xffff), lsl #16 - * movk x0, #((addr >> 32) & 0xffff), lsl #32 - * br x0 - * - * Where: - * addr = kern_hyp_va(__kvm_hyp_vector) + vector-offset + KVM_VECTOR_PREAMBLE. - * See kvm_patch_vector_branch for details. - */ -alternative_cb kvm_patch_vector_branch + .if \spectrev2 != 0 + spectrev2_smccc_wa1_smc + .else stp x0, x1, [sp, #-16]! - b __kvm_hyp_vector + (1b - 0b + KVM_VECTOR_PREAMBLE) + .endif + .if \indirect != 0 + alternative_cb kvm_patch_vector_branch + /* + * For ARM64_HARDEN_EL2_VECTORS configurations, these NOPs get replaced + * with: + * + * movz x0, #(addr & 0xffff) + * movk x0, #((addr >> 16) & 0xffff), lsl #16 + * movk x0, #((addr >> 32) & 0xffff), lsl #32 + * br x0 + * + * Where: + * addr = kern_hyp_va(__kvm_hyp_vector) + vector-offset + KVM_VECTOR_PREAMBLE. + * See kvm_patch_vector_branch for details. + */ nop nop nop -alternative_cb_end + nop + alternative_cb_end + .endif + b __kvm_hyp_vector + (1b - 0b + KVM_VECTOR_PREAMBLE) .endm -.macro generate_vectors +.macro generate_vectors indirect, spectrev2 0: .rept 16 - hyp_ventry + hyp_ventry \indirect, \spectrev2 .endr .org 0b + SZ_2K // Safety measure .endm .align 11 SYM_CODE_START(__bp_harden_hyp_vecs) - .rept BP_HARDEN_EL2_SLOTS - generate_vectors - .endr + generate_vectors indirect = 0, spectrev2 = 0 // HYP_VECTOR_DIRECT + generate_vectors indirect = 0, spectrev2 = 1 // HYP_VECTOR_SPECTRE_DIRECT + generate_vectors indirect = 1, spectrev2 = 0 // HYP_VECTOR_INDIRECT + generate_vectors indirect = 1, spectrev2 = 1 // HYP_VECTOR_SPECTRE_INDIRECT 1: .org __bp_harden_hyp_vecs + __BP_HARDEN_HYP_VECS_SZ .org 1b SYM_CODE_END(__bp_harden_hyp_vecs) diff --git a/arch/arm64/kvm/hyp/smccc_wa.S b/arch/arm64/kvm/hyp/smccc_wa.S deleted file mode 100644 index b0441dbdf68b..000000000000 --- a/arch/arm64/kvm/hyp/smccc_wa.S +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2015-2018 - ARM Ltd - * Author: Marc Zyngier - */ - -#include -#include - -#include -#include - - /* - * This is not executed directly and is instead copied into the vectors - * by install_bp_hardening_cb(). - */ - .data - .pushsection .rodata - .global __smccc_workaround_1_smc -SYM_DATA_START(__smccc_workaround_1_smc) - esb - sub sp, sp, #(8 * 4) - stp x2, x3, [sp, #(8 * 0)] - stp x0, x1, [sp, #(8 * 2)] - mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 - smc #0 - ldp x2, x3, [sp, #(8 * 0)] - ldp x0, x1, [sp, #(8 * 2)] - add sp, sp, #(8 * 4) -1: .org __smccc_workaround_1_smc + __SMCCC_WORKAROUND_1_SMC_SZ - .org 1b -SYM_DATA_END(__smccc_workaround_1_smc) diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index 760db2c84b9b..cc8e8756600f 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -137,7 +137,7 @@ void kvm_patch_vector_branch(struct alt_instr *alt, u64 addr; u32 insn; - BUG_ON(nr_inst != 5); + BUG_ON(nr_inst != 4); if (!cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS) || WARN_ON_ONCE(has_vhe())) { @@ -160,15 +160,6 @@ void kvm_patch_vector_branch(struct alt_instr *alt, */ addr += KVM_VECTOR_PREAMBLE; - /* stp x0, x1, [sp, #-16]! */ - insn = aarch64_insn_gen_load_store_pair(AARCH64_INSN_REG_0, - AARCH64_INSN_REG_1, - AARCH64_INSN_REG_SP, - -16, - AARCH64_INSN_VARIANT_64BIT, - AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX); - *updptr++ = cpu_to_le32(insn); - /* movz x0, #(addr & 0xffff) */ insn = aarch64_insn_gen_movewide(AARCH64_INSN_REG_0, (u16)addr, -- cgit v1.2.3 From c4792b6dbc5070fe67f4cdcfdad39416333acbe0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 13 Nov 2020 11:38:45 +0000 Subject: arm64: spectre: Rename ARM64_HARDEN_EL2_VECTORS to ARM64_SPECTRE_V3A Since ARM64_HARDEN_EL2_VECTORS is really a mitigation for Spectre-v3a, rename it accordingly for consistency with the v2 and v4 mitigation. Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Cc: Marc Zyngier Cc: Quentin Perret Link: https://lore.kernel.org/r/20201113113847.21619-9-will@kernel.org --- Documentation/arm64/memory.rst | 2 +- arch/arm64/include/asm/cpucaps.h | 2 +- arch/arm64/include/asm/spectre.h | 2 +- arch/arm64/kernel/cpu_errata.c | 6 +++--- arch/arm64/kernel/proton-pack.c | 13 ++++++++++--- arch/arm64/kvm/arm.c | 8 ++++---- arch/arm64/kvm/hyp/hyp-entry.S | 3 +-- arch/arm64/kvm/va_layout.c | 4 +--- 8 files changed, 22 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/Documentation/arm64/memory.rst b/Documentation/arm64/memory.rst index cf03b3290800..75df7fb30a7b 100644 --- a/Documentation/arm64/memory.rst +++ b/Documentation/arm64/memory.rst @@ -100,7 +100,7 @@ hypervisor maps kernel pages in EL2 at a fixed (and potentially random) offset from the linear mapping. See the kern_hyp_va macro and kvm_update_va_mask function for more details. MMIO devices such as GICv2 gets mapped next to the HYP idmap page, as do vectors when -ARM64_HARDEN_EL2_VECTORS is selected for particular CPUs. +ARM64_SPECTRE_V3A is enabled for particular CPUs. When using KVM with the Virtualization Host Extensions, no additional mappings are created, since the host kernel runs directly in EL2. diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index e7d98997c09c..162539d4c8cd 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -21,7 +21,7 @@ #define ARM64_HAS_VIRT_HOST_EXTN 11 #define ARM64_WORKAROUND_CAVIUM_27456 12 #define ARM64_HAS_32BIT_EL0 13 -#define ARM64_HARDEN_EL2_VECTORS 14 +#define ARM64_SPECTRE_V3A 14 #define ARM64_HAS_CNP 15 #define ARM64_HAS_NO_FPSIMD 16 #define ARM64_WORKAROUND_REPEAT_TLBI 17 diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index fa86b8f655b7..b4df683ed800 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -83,7 +83,7 @@ enum mitigation_state arm64_get_spectre_v2_state(void); bool has_spectre_v2(const struct arm64_cpu_capabilities *cap, int scope); void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused); -void cpu_el2_vector_harden_enable(const struct arm64_cpu_capabilities *__unused); +void spectre_v3a_enable_mitigation(const struct arm64_cpu_capabilities *__unused); enum mitigation_state arm64_get_spectre_v4_state(void); bool has_spectre_v4(const struct arm64_cpu_capabilities *cap, int scope); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 7a040abaedea..949d5615a47e 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -460,10 +460,10 @@ const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_RANDOMIZE_BASE { /* Must come after the Spectre-v2 entry */ - .desc = "EL2 vector hardening", - .capability = ARM64_HARDEN_EL2_VECTORS, + .desc = "Spectre-v3a", + .capability = ARM64_SPECTRE_V3A, ERRATA_MIDR_RANGE_LIST(ca57_a72), - .cpu_enable = cpu_el2_vector_harden_enable, + .cpu_enable = spectre_v3a_enable_mitigation, }, #endif { diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index a4ba94129750..cf9f8b885aea 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Handle detection, reporting and mitigation of Spectre v1, v2 and v4, as + * Handle detection, reporting and mitigation of Spectre v1, v2, v3a and v4, as * detailed at: * * https://developer.arm.com/support/arm-security-updates/speculative-processor-vulnerability @@ -270,11 +270,18 @@ void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused) update_mitigation_state(&spectre_v2_state, state); } -void cpu_el2_vector_harden_enable(const struct arm64_cpu_capabilities *__unused) +/* + * Spectre-v3a. + * + * Phew, there's not an awful lot to do here! We just instruct EL2 to use + * an indirect trampoline for the hyp vectors so that guests can't read + * VBAR_EL2 to defeat randomisation of the hypervisor VA layout. + */ +void spectre_v3a_enable_mitigation(const struct arm64_cpu_capabilities *__unused) { struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data); - if (this_cpu_has_cap(ARM64_HARDEN_EL2_VECTORS)) + if (this_cpu_has_cap(ARM64_SPECTRE_V3A)) data->slot += HYP_VECTOR_INDIRECT; } diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 5e6fe5eef3ec..4cc29300f533 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1314,7 +1314,7 @@ static int kvm_init_vector_slots(void) base = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs)); kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_DIRECT); - if (!cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) + if (!cpus_have_const_cap(ARM64_SPECTRE_V3A)) return 0; if (!has_vhe()) { @@ -1388,15 +1388,15 @@ static void cpu_hyp_reset(void) * placed in one of the vector slots, which is executed before jumping * to the real vectors. * - * - If the CPU also has the ARM64_HARDEN_EL2_VECTORS cap, the slot + * - If the CPU also has the ARM64_SPECTRE_V3A cap, the slot * containing the hardening sequence is mapped next to the idmap page, * and executed before jumping to the real vectors. * - * - If the CPU only has the ARM64_HARDEN_EL2_VECTORS cap, then an + * - If the CPU only has the ARM64_SPECTRE_V3A cap, then an * empty slot is selected, mapped next to the idmap page, and * executed before jumping to the real vectors. * - * Note that ARM64_HARDEN_EL2_VECTORS is somewhat incompatible with + * Note that ARM64_SPECTRE_V3A is somewhat incompatible with * VHE, as we don't have hypervisor-specific mappings. If the system * is VHE and yet selects this capability, it will be ignored. */ diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index d0a3660c7256..e3249e2dda09 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -209,8 +209,7 @@ SYM_CODE_END(__kvm_hyp_vector) .if \indirect != 0 alternative_cb kvm_patch_vector_branch /* - * For ARM64_HARDEN_EL2_VECTORS configurations, these NOPs get replaced - * with: + * For ARM64_SPECTRE_V3A configurations, these NOPs get replaced with: * * movz x0, #(addr & 0xffff) * movk x0, #((addr >> 16) & 0xffff), lsl #16 diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index cc8e8756600f..02362fa27be4 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -139,10 +139,8 @@ void kvm_patch_vector_branch(struct alt_instr *alt, BUG_ON(nr_inst != 4); - if (!cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS) || - WARN_ON_ONCE(has_vhe())) { + if (!cpus_have_const_cap(ARM64_SPECTRE_V3A) || WARN_ON_ONCE(has_vhe())) return; - } /* * Compute HYP VA by using the same computation as kern_hyp_va() -- cgit v1.2.3 From cd1f56b930e857c170d8a04f0f989bfb8a1b5ac1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 13 Nov 2020 11:38:46 +0000 Subject: arm64: spectre: Consolidate spectre-v3a detection The spectre-v3a mitigation is split between cpu_errata.c and spectre.c, with the former handling detection of the problem and the latter handling enabling of the workaround. Move the detection logic alongside the enabling logic, like we do for the other spectre mitigations. Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Cc: Marc Zyngier Cc: Quentin Perret Link: https://lore.kernel.org/r/20201113113847.21619-10-will@kernel.org --- arch/arm64/include/asm/spectre.h | 1 + arch/arm64/kernel/cpu_errata.c | 13 ++----------- arch/arm64/kernel/proton-pack.c | 12 ++++++++++++ 3 files changed, 15 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index b4df683ed800..12a4eb5e4e6b 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -83,6 +83,7 @@ enum mitigation_state arm64_get_spectre_v2_state(void); bool has_spectre_v2(const struct arm64_cpu_capabilities *cap, int scope); void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused); +bool has_spectre_v3a(const struct arm64_cpu_capabilities *cap, int scope); void spectre_v3a_enable_mitigation(const struct arm64_cpu_capabilities *__unused); enum mitigation_state arm64_get_spectre_v4_state(void); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 949d5615a47e..0709c827f2b3 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -196,16 +196,6 @@ has_neoverse_n1_erratum_1542419(const struct arm64_cpu_capabilities *entry, return is_midr_in_range(midr, &range) && has_dic; } -#ifdef CONFIG_RANDOMIZE_BASE - -static const struct midr_range ca57_a72[] = { - MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), - {}, -}; - -#endif - #ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = { #ifdef CONFIG_QCOM_FALKOR_ERRATUM_1009 @@ -462,7 +452,8 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Must come after the Spectre-v2 entry */ .desc = "Spectre-v3a", .capability = ARM64_SPECTRE_V3A, - ERRATA_MIDR_RANGE_LIST(ca57_a72), + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .matches = has_spectre_v3a, .cpu_enable = spectre_v3a_enable_mitigation, }, #endif diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index cf9f8b885aea..d89be9882998 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -277,6 +277,18 @@ void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused) * an indirect trampoline for the hyp vectors so that guests can't read * VBAR_EL2 to defeat randomisation of the hypervisor VA layout. */ +bool has_spectre_v3a(const struct arm64_cpu_capabilities *entry, int scope) +{ + static const struct midr_range spectre_v3a_unsafe_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), + {}, + }; + + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + return is_midr_in_range_list(read_cpuid_id(), spectre_v3a_unsafe_list); +} + void spectre_v3a_enable_mitigation(const struct arm64_cpu_capabilities *__unused) { struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data); -- cgit v1.2.3 From 4f6a36fed71dfe51df0ae9a282dc87c76d629bff Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 13 Nov 2020 11:38:47 +0000 Subject: KVM: arm64: Remove redundant hyp vectors entry The hyp vectors entry corresponding to HYP_VECTOR_DIRECT (i.e. when neither Spectre-v2 nor Spectre-v3a are present) is unused, as we can simply dispatch straight to __kvm_hyp_vector in this case. Remove the redundant vector, and massage the logic for resolving a slot to a vectors entry. Reported-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201113113847.21619-11-will@kernel.org --- arch/arm64/include/asm/spectre.h | 2 +- arch/arm64/kvm/arm.c | 9 ++++++++- arch/arm64/kvm/hyp/hyp-entry.S | 1 - 3 files changed, 9 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index 12a4eb5e4e6b..4e6d90a4fbe0 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -10,7 +10,7 @@ #define __ASM_SPECTRE_H #define BP_HARDEN_EL2_SLOTS 4 -#define __BP_HARDEN_HYP_VECS_SZ (BP_HARDEN_EL2_SLOTS * SZ_2K) +#define __BP_HARDEN_HYP_VECS_SZ ((BP_HARDEN_EL2_SLOTS - 1) * SZ_2K) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 4cc29300f533..9af9652ab9a3 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1298,9 +1298,16 @@ static unsigned long nvhe_percpu_order(void) /* A lookup table holding the hypervisor VA for each vector slot */ static void *hyp_spectre_vector_selector[BP_HARDEN_EL2_SLOTS]; +static int __kvm_vector_slot2idx(enum arm64_hyp_spectre_vector slot) +{ + return slot - (slot != HYP_VECTOR_DIRECT); +} + static void kvm_init_vector_slot(void *base, enum arm64_hyp_spectre_vector slot) { - hyp_spectre_vector_selector[slot] = base + (slot * SZ_2K); + int idx = __kvm_vector_slot2idx(slot); + + hyp_spectre_vector_selector[slot] = base + (idx * SZ_2K); } static int kvm_init_vector_slots(void) diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index e3249e2dda09..d179056e1af8 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -239,7 +239,6 @@ SYM_CODE_END(__kvm_hyp_vector) .align 11 SYM_CODE_START(__bp_harden_hyp_vecs) - generate_vectors indirect = 0, spectrev2 = 0 // HYP_VECTOR_DIRECT generate_vectors indirect = 0, spectrev2 = 1 // HYP_VECTOR_SPECTRE_DIRECT generate_vectors indirect = 1, spectrev2 = 0 // HYP_VECTOR_INDIRECT generate_vectors indirect = 1, spectrev2 = 1 // HYP_VECTOR_SPECTRE_INDIRECT -- cgit v1.2.3 From 789f52c071a0fdaa15ed119912fedd840458e25f Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Fri, 6 Nov 2020 16:39:23 +0800 Subject: x86/kvm: remove unused macro HV_CLOCK_SIZE This macro is useless, and could cause gcc warning: arch/x86/kernel/kvmclock.c:47:0: warning: macro "HV_CLOCK_SIZE" is not used [-Wunused-macros] Let's remove it. Signed-off-by: Alex Shi Cc: Paolo Bonzini Cc: Sean Christopherson Cc: Vitaly Kuznetsov Cc: Wanpeng Li Cc: Jim Mattson Cc: Joerg Roedel Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: x86@kernel.org Cc: "H. Peter Anvin" Cc: kvm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Message-Id: <1604651963-10067-1-git-send-email-alex.shi@linux.alibaba.com> Signed-off-by: Paolo Bonzini --- arch/x86/kernel/kvmclock.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 34b18f6eeb2c..aa593743acf6 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -44,7 +44,6 @@ static int __init parse_no_kvmclock_vsyscall(char *arg) early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall); /* Aligned to page sizes to match whats mapped via vsyscalls to userspace */ -#define HV_CLOCK_SIZE (sizeof(struct pvclock_vsyscall_time_info) * NR_CPUS) #define HVC_BOOT_ARRAY_SIZE \ (PAGE_SIZE / sizeof(struct pvclock_vsyscall_time_info)) -- cgit v1.2.3 From 7e8e6eed75e290526d5c98d023e88b141e2c93ec Mon Sep 17 00:00:00 2001 From: Cathy Avery Date: Sun, 11 Oct 2020 14:48:17 -0400 Subject: KVM: SVM: Move asid to vcpu_svm KVM does not have separate ASIDs for L1 and L2; either the nested hypervisor and nested guests share a single ASID, or on older processor the ASID is used only to implement TLB flushing. Either way, ASIDs are handled at the VM level. In preparation for having different VMCBs passed to VMLOAD/VMRUN/VMSAVE for L1 and L2, store the current ASID to struct vcpu_svm and only move it to the VMCB in svm_vcpu_run. This way, TLB flushes can be applied no matter which VMCB will be active during the next svm_vcpu_run. Signed-off-by: Cathy Avery Message-Id: <20201011184818.3609-2-cavery@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 10 +++++++--- arch/x86/kvm/svm/svm.h | 1 + 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 253809216cc7..3b53a7ead04b 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1215,6 +1215,7 @@ static void init_vmcb(struct vcpu_svm *svm) save->cr4 = 0; } svm->asid_generation = 0; + svm->asid = 0; svm->nested.vmcb12_gpa = 0; svm->vcpu.arch.hflags = 0; @@ -1755,12 +1756,11 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) ++sd->asid_generation; sd->next_asid = sd->min_asid; svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID; + vmcb_mark_dirty(svm->vmcb, VMCB_ASID); } svm->asid_generation = sd->asid_generation; - svm->vmcb->control.asid = sd->next_asid++; - - vmcb_mark_dirty(svm->vmcb, VMCB_ASID); + svm->asid = sd->next_asid++; } static void svm_set_dr6(struct vcpu_svm *svm, unsigned long value) @@ -3570,6 +3570,10 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) sync_lapic_to_cr8(vcpu); + if (unlikely(svm->asid != svm->vmcb->control.asid)) { + svm->vmcb->control.asid = svm->asid; + vmcb_mark_dirty(svm->vmcb, VMCB_ASID); + } svm->vmcb->save.cr2 = vcpu->arch.cr2; /* diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index eb6fbafbe36c..fdff76eb6ceb 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -106,6 +106,7 @@ struct vcpu_svm { struct vmcb *vmcb; unsigned long vmcb_pa; struct svm_cpu_data *svm_data; + u32 asid; uint64_t asid_generation; uint64_t sysenter_esp; uint64_t sysenter_eip; -- cgit v1.2.3 From dc924b062488a0376aae41d3e0a27dc99f852a5e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sun, 15 Nov 2020 09:44:18 -0500 Subject: KVM: SVM: check CR4 changes against vcpu->arch Similarly to what vmx/vmx.c does, use vcpu->arch.cr4 to check if CR4 bits PGE, PKE and OSXSAVE have changed. When switching between VMCB01 and VMCB02, CPUID has to be adjusted every time if CR4.PKE or CR4.OSXSAVE change; without this patch, instead, CR4 would be checked against the previous value for L2 on vmentry, and against the previous value for L1 on vmexit, and CPUID would not be updated. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 3b53a7ead04b..6dc337b9c231 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1691,7 +1691,7 @@ static bool svm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE; - unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4; + unsigned long old_cr4 = vcpu->arch.cr4; if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE)) svm_flush_tlb(vcpu); -- cgit v1.2.3 From 33dd3574f5fef57c2c6caccf98925d63aa2a8d09 Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Tue, 27 Oct 2020 10:59:43 -0700 Subject: kvm: x86/mmu: Add existing trace points to TDP MMU The TDP MMU was initially implemented without some of the usual tracepoints found in mmu.c. Correct this discrepancy by adding the missing trace points to the TDP MMU. Tested: ran the demand paging selftest on an Intel Skylake machine with all the trace points used by the TDP MMU enabled and observed them firing with expected values. This patch can be viewed in Gerrit at: https://linux-review.googlesource.com/c/virt/kvm/kvm/+/3812 Signed-off-by: Ben Gardon Message-Id: <20201027175944.1183301-1-bgardon@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_mmu.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index cffa51c6049e..d6354f379554 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -7,6 +7,8 @@ #include "tdp_mmu.h" #include "spte.h" +#include + #ifdef CONFIG_X86_64 static bool __read_mostly tdp_mmu_enabled = false; module_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0644); @@ -108,6 +110,8 @@ static struct kvm_mmu_page *alloc_tdp_mmu_page(struct kvm_vcpu *vcpu, gfn_t gfn, sp->gfn = gfn; sp->tdp_mmu_page = true; + trace_kvm_mmu_get_page(sp, true); + return sp; } @@ -278,6 +282,8 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, pt = spte_to_child_pt(old_spte, level); sp = sptep_to_sp(pt); + trace_kvm_mmu_prepare_zap_page(sp); + list_del(&sp->link); if (sp->lpage_disallowed) @@ -480,11 +486,13 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write, if (unlikely(is_noslot_pfn(pfn))) { new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL); trace_mark_mmio_spte(iter->sptep, iter->gfn, new_spte); - } else + } else { make_spte_ret = make_spte(vcpu, ACC_ALL, iter->level, iter->gfn, pfn, iter->old_spte, prefault, true, map_writable, !shadow_accessed_mask, &new_spte); + trace_kvm_mmu_set_spte(iter->level, iter->gfn, iter->sptep); + } if (new_spte == iter->old_spte) ret = RET_PF_SPURIOUS; @@ -698,6 +706,8 @@ static int age_gfn_range(struct kvm *kvm, struct kvm_memory_slot *slot, tdp_mmu_set_spte_no_acc_track(kvm, &iter, new_spte); young = 1; + + trace_kvm_age_page(iter.gfn, iter.level, slot, young); } return young; -- cgit v1.2.3 From b9a98c3437e353b269ebf3567acc5c3dc757c7a5 Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Tue, 27 Oct 2020 10:59:44 -0700 Subject: kvm: x86/mmu: Add TDP MMU SPTE changed trace point Add an extremely verbose trace point to the TDP MMU to log all SPTE changes, regardless of callstack / motivation. This is useful when a complete picture of the paging structure is needed or a change cannot be explained with the other, existing trace points. Tested: ran the demand paging selftest on an Intel Skylake machine with all the trace points used by the TDP MMU enabled and observed them firing with expected values. This patch can be viewed in Gerrit at: https://linux-review.googlesource.com/c/virt/kvm/kvm/+/3813 Signed-off-by: Ben Gardon Message-Id: <20201027175944.1183301-2-bgardon@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmutrace.h | 29 +++++++++++++++++++++++++++++ arch/x86/kvm/mmu/tdp_mmu.c | 2 ++ 2 files changed, 31 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h index 213699b27b44..e798489b56b5 100644 --- a/arch/x86/kvm/mmu/mmutrace.h +++ b/arch/x86/kvm/mmu/mmutrace.h @@ -381,6 +381,35 @@ TRACE_EVENT( ) ); +TRACE_EVENT( + kvm_tdp_mmu_spte_changed, + TP_PROTO(int as_id, gfn_t gfn, int level, u64 old_spte, u64 new_spte), + TP_ARGS(as_id, gfn, level, old_spte, new_spte), + + TP_STRUCT__entry( + __field(u64, gfn) + __field(u64, old_spte) + __field(u64, new_spte) + /* Level cannot be larger than 5 on x86, so it fits in a u8. */ + __field(u8, level) + /* as_id can only be 0 or 1 x86, so it fits in a u8. */ + __field(u8, as_id) + ), + + TP_fast_assign( + __entry->gfn = gfn; + __entry->old_spte = old_spte; + __entry->new_spte = new_spte; + __entry->level = level; + __entry->as_id = as_id; + ), + + TP_printk("as id %d gfn %llx level %d old_spte %llx new_spte %llx", + __entry->as_id, __entry->gfn, __entry->level, + __entry->old_spte, __entry->new_spte + ) +); + #endif /* _TRACE_KVMMMU_H */ #undef TRACE_INCLUDE_PATH diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index d6354f379554..75db27fda8f3 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -248,6 +248,8 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, if (old_spte == new_spte) return; + trace_kvm_tdp_mmu_spte_changed(as_id, gfn, level, old_spte, new_spte); + /* * The only times a SPTE should be changed from a non-present to * non-present state is when an MMIO entry is installed/modified/ -- cgit v1.2.3 From 68b824e428c5fb5c3dc5ef80b1543e767534b58e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 24 Oct 2020 16:33:38 +0100 Subject: KVM: arm64: Patch kimage_voffset instead of loading the EL1 value Directly using the kimage_voffset variable is fine for now, but will become more problematic as we start distrusting EL1. Instead, patch the kimage_voffset into the HYP text, ensuring we don't have to load an untrusted value later on. Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_mmu.h | 22 ++++++++++++++++++++++ arch/arm64/kernel/image-vars.h | 4 +--- arch/arm64/kvm/hyp/nvhe/host.S | 5 +---- arch/arm64/kvm/va_layout.c | 6 ++++++ 4 files changed, 30 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 608c3a83e740..5633c5a27aad 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -72,6 +72,28 @@ alternative_cb kvm_update_va_mask alternative_cb_end .endm +/* + * Convert a kernel image address to a PA + * reg: kernel address to be converted in place + * tmp: temporary register + * + * The actual code generation takes place in kvm_get_kimage_voffset, and + * the instructions below are only there to reserve the space and + * perform the register allocation (kvm_get_kimage_voffset uses the + * specific registers encoded in the instructions). + */ +.macro kimg_pa reg, tmp +alternative_cb kvm_get_kimage_voffset + movz \tmp, #0 + movk \tmp, #0, lsl #16 + movk \tmp, #0, lsl #32 + movk \tmp, #0, lsl #48 +alternative_cb_end + + /* reg = __pa(reg) */ + sub \reg, \reg, \tmp +.endm + #else #include diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index e8c194f8de88..4b32588918d9 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -65,13 +65,11 @@ __efistub__ctype = _ctype; KVM_NVHE_ALIAS(kvm_patch_vector_branch); KVM_NVHE_ALIAS(kvm_update_va_mask); KVM_NVHE_ALIAS(kvm_update_kimg_phys_offset); +KVM_NVHE_ALIAS(kvm_get_kimage_voffset); /* Global kernel state accessed by nVHE hyp code. */ KVM_NVHE_ALIAS(kvm_vgic_global_state); -/* Kernel constant needed to compute idmap addresses. */ -KVM_NVHE_ALIAS(kimage_voffset); - /* Kernel symbols used to call panic() from nVHE hyp code (via ERET). */ KVM_NVHE_ALIAS(__hyp_panic_string); KVM_NVHE_ALIAS(panic); diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index ed27f06a31ba..4e207c1c5126 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -115,10 +115,7 @@ SYM_FUNC_END(__hyp_do_panic) * Preserve x0-x4, which may contain stub parameters. */ ldr x5, =__kvm_handle_stub_hvc - ldr_l x6, kimage_voffset - - /* x5 = __pa(x5) */ - sub x5, x5, x6 + kimg_pa x5, x6 br x5 .L__vect_end\@: .if ((.L__vect_end\@ - .L__vect_start\@) > 0x80) diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index 1d00d2cb93fd..d61117805de0 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -251,3 +251,9 @@ void kvm_update_kimg_phys_offset(struct alt_instr *alt, { generate_mov_q(kimage_voffset + PHYS_OFFSET, origptr, updptr, nr_inst); } + +void kvm_get_kimage_voffset(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst) +{ + generate_mov_q(kimage_voffset, origptr, updptr, nr_inst); +} -- cgit v1.2.3 From 29052f1b92f2bcb0419c544459f4c919bfb20898 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 21 Oct 2020 21:52:54 +0100 Subject: KVM: arm64: Simplify __kvm_enable_ssbs() Move the setting of SSBS directly into the HVC handler, using the C helpers rather than the inline asssembly code. Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_asm.h | 2 -- arch/arm64/include/asm/sysreg.h | 1 + arch/arm64/kvm/hyp/nvhe/hyp-main.c | 6 +++++- arch/arm64/kvm/hyp/nvhe/sysreg-sr.c | 11 ----------- 4 files changed, 6 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 54387ccd1ab2..a542c422a036 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -189,8 +189,6 @@ extern void __kvm_timer_set_cntvoff(u64 cntvoff); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); -extern void __kvm_enable_ssbs(void); - extern u64 __vgic_v3_get_ich_vtr_el2(void); extern u64 __vgic_v3_read_vmcr(void); extern void __vgic_v3_write_vmcr(u32 vmcr); diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 174817ba119c..153faa15d490 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -461,6 +461,7 @@ #define SYS_PMCCFILTR_EL0 sys_reg(3, 3, 14, 15, 7) +#define SYS_SCTLR_EL2 sys_reg(3, 4, 1, 0, 0) #define SYS_ZCR_EL2 sys_reg(3, 4, 1, 2, 0) #define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0) #define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index c0543b2e760e..82df7fc24760 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -58,7 +58,11 @@ static void handle___kvm_timer_set_cntvoff(struct kvm_cpu_context *host_ctxt) static void handle___kvm_enable_ssbs(struct kvm_cpu_context *host_ctxt) { - __kvm_enable_ssbs(); + u64 tmp; + + tmp = read_sysreg_el2(SYS_SCTLR); + tmp |= SCTLR_ELx_DSSBS; + write_sysreg_el2(tmp, SYS_SCTLR); } static void handle___vgic_v3_get_ich_vtr_el2(struct kvm_cpu_context *host_ctxt) diff --git a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c index 88a25fc8fcd3..29305022bc04 100644 --- a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c @@ -33,14 +33,3 @@ void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt) __sysreg_restore_user_state(ctxt); __sysreg_restore_el2_return_state(ctxt); } - -void __kvm_enable_ssbs(void) -{ - u64 tmp; - - asm volatile( - "mrs %0, sctlr_el2\n" - "orr %0, %0, %1\n" - "msr sctlr_el2, %0" - : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS)); -} -- cgit v1.2.3 From 83fa381f66ccb025f9e182d0b2ef9cd53c1f33ab Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 24 Oct 2020 10:56:55 +0100 Subject: KVM: arm64: Avoid repetitive stack access on host EL1 to EL2 exception Registers x0/x1 get repeateadly pushed and poped during a host HVC call. Instead, leave the registers on the stack, trading a store instruction on the fast path for an add on the slow path. Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/host.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index 4e207c1c5126..fe2740b224cf 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -13,8 +13,6 @@ .text SYM_FUNC_START(__host_exit) - stp x0, x1, [sp, #-16]! - get_host_ctxt x0, x1 /* Store the host regs x2 and x3 */ @@ -99,13 +97,15 @@ SYM_FUNC_END(__hyp_do_panic) mrs x0, esr_el2 lsr x0, x0, #ESR_ELx_EC_SHIFT cmp x0, #ESR_ELx_EC_HVC64 - ldp x0, x1, [sp], #16 b.ne __host_exit + ldp x0, x1, [sp] // Don't fixup the stack yet + /* Check for a stub HVC call */ cmp x0, #HVC_STUB_HCALL_NR b.hs __host_exit + add sp, sp, #16 /* * Compute the idmap address of __kvm_handle_stub_hvc and * jump there. Since we use kimage_voffset, do not use the -- cgit v1.2.3 From 14bda7a927336055d7c0deb1483f9cdb687c2080 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 13 Nov 2020 16:39:44 +0000 Subject: KVM: arm64: Add kvm_vcpu_has_pmu() helper There are a number of places where we check for the KVM_ARM_VCPU_PMU_V3 feature. Wrap this check into a new kvm_vcpu_has_pmu(), and use it at the existing locations. No functional change. Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 3 +++ arch/arm64/kvm/pmu-emul.c | 8 +++----- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 709f892f7a14..8c681d621a82 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -731,4 +731,7 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); #define kvm_arm_vcpu_sve_finalized(vcpu) \ ((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED) +#define kvm_vcpu_has_pmu(vcpu) \ + (test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features)) + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 2ed5ef8f274b..e7e3b4629864 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -913,8 +913,7 @@ static bool pmu_irq_is_valid(struct kvm *kvm, int irq) int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { - if (!kvm_arm_support_pmu_v3() || - !test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) + if (!kvm_arm_support_pmu_v3() || !kvm_vcpu_has_pmu(vcpu)) return -ENODEV; if (vcpu->arch.pmu.created) @@ -1015,7 +1014,7 @@ int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) if (!irqchip_in_kernel(vcpu->kvm)) return -EINVAL; - if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) + if (!kvm_vcpu_has_pmu(vcpu)) return -ENODEV; if (!kvm_arm_pmu_irq_initialized(vcpu)) @@ -1035,8 +1034,7 @@ int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) case KVM_ARM_VCPU_PMU_V3_IRQ: case KVM_ARM_VCPU_PMU_V3_INIT: case KVM_ARM_VCPU_PMU_V3_FILTER: - if (kvm_arm_support_pmu_v3() && - test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) + if (kvm_arm_support_pmu_v3() && kvm_vcpu_has_pmu(vcpu)) return 0; } -- cgit v1.2.3 From 9bbfa4b565379eeb2fb8fdbcc9979549ae0e48d9 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Thu, 26 Nov 2020 14:49:16 +0000 Subject: KVM: arm64: Refuse to run VCPU if PMU is not initialized When enabling the PMU in kvm_arm_pmu_v3_enable(), KVM returns early if the PMU flag created is false and skips any other checks. Because PMU emulation is gated only on the VCPU feature being set, this makes it possible for userspace to get away with setting the VCPU feature but not doing any initialization for the PMU. Fix it by returning an error when trying to run the VCPU if the PMU hasn't been initialized correctly. The PMU is marked as created only if the interrupt ID has been set when using an in-kernel irqchip. This means the same check in kvm_arm_pmu_v3_enable() is redundant, remove it. Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201126144916.164075-1-alexandru.elisei@arm.com --- arch/arm64/kvm/pmu-emul.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index e7e3b4629864..c640d16d1bbd 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -825,9 +825,12 @@ bool kvm_arm_support_pmu_v3(void) int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) { - if (!vcpu->arch.pmu.created) + if (!kvm_vcpu_has_pmu(vcpu)) return 0; + if (!vcpu->arch.pmu.created) + return -EINVAL; + /* * A valid interrupt configuration for the PMU is either to have a * properly configured interrupt number and using an in-kernel @@ -835,9 +838,6 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) */ if (irqchip_in_kernel(vcpu->kvm)) { int irq = vcpu->arch.pmu.irq_num; - if (!kvm_arm_pmu_irq_initialized(vcpu)) - return -EINVAL; - /* * If we are using an in-kernel vgic, at this point we know * the vgic will be initialized, so we can check the PMU irq -- cgit v1.2.3 From 04355e41a60338206d6498fe463a86131d5ca06b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 12 Nov 2020 18:00:30 +0000 Subject: KVM: arm64: Set ID_AA64DFR0_EL1.PMUVer to 0 when no PMU support We always expose the HW view of PMU in ID_AA64FDR0_EL1.PMUver, even when the PMU feature is disabled, while the architecture says that FEAT_PMUv3 not being implemented should result in this field being zero. Let's follow the architecture's guidance. Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index d2e1d745f067..6629cfde2838 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1070,10 +1070,15 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, (0xfUL << ID_AA64ISAR1_GPA_SHIFT) | (0xfUL << ID_AA64ISAR1_GPI_SHIFT)); } else if (id == SYS_ID_AA64DFR0_EL1) { + u64 cap = 0; + /* Limit guests to PMUv3 for ARMv8.1 */ + if (kvm_vcpu_has_pmu(vcpu)) + cap = ID_AA64DFR0_PMUVER_8_1; + val = cpuid_feature_cap_perfmon_field(val, ID_AA64DFR0_PMUVER_SHIFT, - ID_AA64DFR0_PMUVER_8_1); + cap); } else if (id == SYS_ID_DFR0_EL1) { /* Limit guests to PMUv3 for ARMv8.1 */ val = cpuid_feature_cap_perfmon_field(val, -- cgit v1.2.3 From 77da43039ab5cfc9631159fd87fe38d4c34cdaf5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 12 Nov 2020 18:13:27 +0000 Subject: KVM: arm64: Refuse illegal KVM_ARM_VCPU_PMU_V3 at reset time We accept to configure a PMU when a vcpu is created, even if the HW (or the host) doesn't support it. This results in failures when attributes get set, which is a bit odd as we should have failed the vcpu creation the first place. Move the check to the point where we check the vcpu feature set, and fail early if we cannot support a PMU. This further simplifies the attribute handling. Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier --- arch/arm64/kvm/pmu-emul.c | 4 ++-- arch/arm64/kvm/reset.c | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index c640d16d1bbd..812495e915e4 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -913,7 +913,7 @@ static bool pmu_irq_is_valid(struct kvm *kvm, int irq) int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { - if (!kvm_arm_support_pmu_v3() || !kvm_vcpu_has_pmu(vcpu)) + if (!kvm_vcpu_has_pmu(vcpu)) return -ENODEV; if (vcpu->arch.pmu.created) @@ -1034,7 +1034,7 @@ int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) case KVM_ARM_VCPU_PMU_V3_IRQ: case KVM_ARM_VCPU_PMU_V3_INIT: case KVM_ARM_VCPU_PMU_V3_FILTER: - if (kvm_arm_support_pmu_v3() && kvm_vcpu_has_pmu(vcpu)) + if (kvm_vcpu_has_pmu(vcpu)) return 0; } diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 74ce92a4988c..3e772ea4e066 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -285,6 +285,10 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) pstate = VCPU_RESET_PSTATE_EL1; } + if (kvm_vcpu_has_pmu(vcpu) && !kvm_arm_support_pmu_v3()) { + ret = -EINVAL; + goto out; + } break; } -- cgit v1.2.3 From b0737e999ec0af007b10ac0b7db97932394a248f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 12 Nov 2020 18:49:28 +0000 Subject: KVM: arm64: Inject UNDEF on PMU access when no PMU configured The ARMv8 architecture says that in the absence of FEAT_PMUv3, all the PMU-related register generate an UNDEF. Let's make sure that all our PMU handers catch this case by hooking into check_pmu_access_disabled(), and add checks in a couple of other places. Note that we still cannot deliver an exception into the guest as the offending cases are already caught by the RAZ/WI handling. But this puts us one step away to architectural compliance. Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 6629cfde2838..b098d667bb42 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -609,8 +609,9 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) static bool check_pmu_access_disabled(struct kvm_vcpu *vcpu, u64 flags) { u64 reg = __vcpu_sys_reg(vcpu, PMUSERENR_EL0); - bool enabled = (reg & flags) || vcpu_mode_priv(vcpu); + bool enabled = kvm_vcpu_has_pmu(vcpu); + enabled &= (reg & flags) || vcpu_mode_priv(vcpu); if (!enabled) kvm_inject_undefined(vcpu); @@ -857,10 +858,8 @@ static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (!kvm_arm_pmu_v3_ready(vcpu)) return trap_raz_wi(vcpu, p, r); - if (!vcpu_mode_priv(vcpu)) { - kvm_inject_undefined(vcpu); + if (check_pmu_access_disabled(vcpu, 0)) return false; - } if (p->is_write) { u64 val = p->regval & mask; @@ -928,6 +927,11 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (!kvm_arm_pmu_v3_ready(vcpu)) return trap_raz_wi(vcpu, p, r); + if (!kvm_vcpu_has_pmu(vcpu)) { + kvm_inject_undefined(vcpu); + return false; + } + if (p->is_write) { if (!vcpu_mode_priv(vcpu)) { kvm_inject_undefined(vcpu); -- cgit v1.2.3 From f975ccb08d6530e58bac660c7a938f98bae5a651 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 13 Nov 2020 14:12:53 +0000 Subject: KVM: arm64: Remove PMU RAZ/WI handling There is no RAZ/WI handling allowed for the PMU registers in the ARMv8 architecture. Nobody can remember how we cam to the conclusion that we could do this, but the ARMv8 ARM is pretty clear that we cannot. Remove the RAZ/WI handling of the PMU system registers when it is not configured. Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 30 ------------------------------ 1 file changed, 30 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index b098d667bb42..3bd4cc40536b 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -643,9 +643,6 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { u64 val; - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - if (pmu_access_el0_disabled(vcpu)) return false; @@ -672,9 +669,6 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, static bool access_pmselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - if (pmu_access_event_counter_el0_disabled(vcpu)) return false; @@ -693,9 +687,6 @@ static bool access_pmceid(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { u64 pmceid; - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - BUG_ON(p->is_write); if (pmu_access_el0_disabled(vcpu)) @@ -728,9 +719,6 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu, { u64 idx; - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - if (r->CRn == 9 && r->CRm == 13) { if (r->Op2 == 2) { /* PMXEVCNTR_EL0 */ @@ -784,9 +772,6 @@ static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { u64 idx, reg; - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - if (pmu_access_el0_disabled(vcpu)) return false; @@ -824,9 +809,6 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { u64 val, mask; - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - if (pmu_access_el0_disabled(vcpu)) return false; @@ -855,9 +837,6 @@ static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { u64 mask = kvm_pmu_valid_counter_mask(vcpu); - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - if (check_pmu_access_disabled(vcpu, 0)) return false; @@ -882,9 +861,6 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { u64 mask = kvm_pmu_valid_counter_mask(vcpu); - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - if (pmu_access_el0_disabled(vcpu)) return false; @@ -907,9 +883,6 @@ static bool access_pmswinc(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { u64 mask; - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - if (!p->is_write) return read_from_write_only(vcpu, p, r); @@ -924,9 +897,6 @@ static bool access_pmswinc(struct kvm_vcpu *vcpu, struct sys_reg_params *p, static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - if (!kvm_vcpu_has_pmu(vcpu)) { kvm_inject_undefined(vcpu); return false; -- cgit v1.2.3 From a3da93580202ac9075d4e96f73c8435b9d7262c1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 12 Nov 2020 18:50:06 +0000 Subject: KVM: arm64: Remove dead PMU sysreg decoding code The handling of traps in access_pmu_evcntr() has a couple of omminous "else return false;" statements that don't make any sense: the decoding tree coverse all the registers that trap to this handler, and returning false implies that we change PC, which we don't. Get rid of what is evidently dead code. Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 3bd4cc40536b..dd7a73468286 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -717,7 +717,7 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - u64 idx; + u64 idx = ~0UL; if (r->CRn == 9 && r->CRm == 13) { if (r->Op2 == 2) { @@ -733,8 +733,6 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu, return false; idx = ARMV8_PMU_CYCLE_IDX; - } else { - return false; } } else if (r->CRn == 0 && r->CRm == 9) { /* PMCCNTR */ @@ -748,10 +746,11 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu, return false; idx = ((r->CRm & 3) << 3) | (r->Op2 & 7); - } else { - return false; } + /* Catch any decoding mistake */ + WARN_ON(idx == ~0UL); + if (!pmu_counter_idx_valid(vcpu, idx)) return false; -- cgit v1.2.3 From 46acf89de499b2db07e120c62a796e8a0efbad8d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 13 Nov 2020 16:41:40 +0000 Subject: KVM: arm64: Gate kvm_pmu_update_state() on the PMU feature We currently gate the update of the PMU state on the PMU being "ready". The "ready" state is only set to true when the first vcpu run is successful, and if it isn't, we never reach the update code. So the "ready" state is never the right thing to check for, and it should instead be the presence of the PMU feature, which makes a bit more sense. Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier --- arch/arm64/kvm/pmu-emul.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 812495e915e4..5ad900c609ee 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -384,7 +384,7 @@ static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) struct kvm_pmu *pmu = &vcpu->arch.pmu; bool overflow; - if (!kvm_arm_pmu_v3_ready(vcpu)) + if (!kvm_vcpu_has_pmu(vcpu)) return; overflow = !!kvm_pmu_overflow_status(vcpu); -- cgit v1.2.3 From 7521c3a9e63041602d531e36c07a340f188dc1fa Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 13 Nov 2020 16:42:08 +0000 Subject: KVM: arm64: Get rid of the PMU ready state The PMU ready state has no user left. Goodbye. Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier --- arch/arm64/kvm/pmu-emul.c | 1 - include/kvm/arm_pmu.h | 3 --- 2 files changed, 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 5ad900c609ee..398f6df1bbe4 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -851,7 +851,6 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) } kvm_pmu_vcpu_reset(vcpu); - vcpu->arch.pmu.ready = true; return 0; } diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 1d94acd0bc85..fc85f50fa0e9 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -24,13 +24,11 @@ struct kvm_pmu { int irq_num; struct kvm_pmc pmc[ARMV8_PMU_MAX_COUNTERS]; DECLARE_BITMAP(chained, ARMV8_PMU_MAX_COUNTER_PAIRS); - bool ready; bool created; bool irq_level; struct irq_work overflow_work; }; -#define kvm_arm_pmu_v3_ready(v) ((v)->arch.pmu.ready) #define kvm_arm_pmu_irq_initialized(v) ((v)->arch.pmu.irq_num >= VGIC_NR_SGIS) u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx); void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val); @@ -61,7 +59,6 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu); struct kvm_pmu { }; -#define kvm_arm_pmu_v3_ready(v) (false) #define kvm_arm_pmu_irq_initialized(v) (false) static inline u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) -- cgit v1.2.3 From 8cce12b3c82717df72afb955ce74c769b0eb2b4f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 27 Nov 2020 12:46:36 -0500 Subject: KVM: nSVM: set fixed bits by hand SVM generally ignores fixed-1 bits. Set them manually so that we do not end up by mistake without those bits set in struct kvm_vcpu; it is part of userspace API that KVM always returns value with the bits set. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index b0f37183e8f5..b0b667456b2e 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -381,7 +381,7 @@ static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *vmcb12) svm->vmcb->save.ds = vmcb12->save.ds; svm->vmcb->save.gdtr = vmcb12->save.gdtr; svm->vmcb->save.idtr = vmcb12->save.idtr; - kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags); + kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED); svm_set_efer(&svm->vcpu, vmcb12->save.efer); svm_set_cr0(&svm->vcpu, vmcb12->save.cr0); svm_set_cr4(&svm->vcpu, vmcb12->save.cr4); @@ -394,8 +394,8 @@ static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *vmcb12) svm->vmcb->save.rax = vmcb12->save.rax; svm->vmcb->save.rsp = vmcb12->save.rsp; svm->vmcb->save.rip = vmcb12->save.rip; - svm->vmcb->save.dr7 = vmcb12->save.dr7; - svm->vcpu.arch.dr6 = vmcb12->save.dr6; + svm->vmcb->save.dr7 = vmcb12->save.dr7 | DR7_FIXED_1; + svm->vcpu.arch.dr6 = vmcb12->save.dr6 | DR6_FIXED_1 | DR6_RTM; svm->vmcb->save.cpl = vmcb12->save.cpl; } @@ -660,13 +660,14 @@ int nested_svm_vmexit(struct vcpu_svm *svm) svm->vmcb->save.gdtr = hsave->save.gdtr; svm->vmcb->save.idtr = hsave->save.idtr; kvm_set_rflags(&svm->vcpu, hsave->save.rflags); + kvm_set_rflags(&svm->vcpu, hsave->save.rflags | X86_EFLAGS_FIXED); svm_set_efer(&svm->vcpu, hsave->save.efer); svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE); svm_set_cr4(&svm->vcpu, hsave->save.cr4); kvm_rax_write(&svm->vcpu, hsave->save.rax); kvm_rsp_write(&svm->vcpu, hsave->save.rsp); kvm_rip_write(&svm->vcpu, hsave->save.rip); - svm->vmcb->save.dr7 = 0; + svm->vmcb->save.dr7 = DR7_FIXED_1; svm->vmcb->save.cpl = 0; svm->vmcb->control.exit_int_info = 0; -- cgit v1.2.3 From 8d14797b53f044fda3ed42b5b6357c7622b8af58 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 18 Nov 2020 19:44:00 +0000 Subject: KVM: arm64: Move 'struct kvm_arch_memory_slot' out of uapi/ 'struct kvm_arch_memory_slot' isn't part of the user ABI, so move it out of the uapi/ headers in case we start using it in future and accidentally back ourselves into a corner. Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201118194402.2892-2-will@kernel.org --- arch/arm64/include/asm/kvm_host.h | 3 +++ arch/arm64/include/uapi/asm/kvm.h | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 781d029b8aa8..32db7196504f 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -89,6 +89,9 @@ struct kvm_s2_mmu { struct kvm *kvm; }; +struct kvm_arch_memory_slot { +}; + struct kvm_arch { struct kvm_s2_mmu mmu; diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 1c17c3a24411..24223adae150 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -156,9 +156,6 @@ struct kvm_sync_regs { __u64 device_irq_level; }; -struct kvm_arch_memory_slot { -}; - /* * PMU filter structure. Describe a range of events with a particular * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER. -- cgit v1.2.3 From 36fb4cd55f626dff0f6e76bed14707fa00147b7f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 18 Nov 2020 19:44:01 +0000 Subject: KVM: arm64: Remove kvm_arch_vm_ioctl_check_extension() kvm_arch_vm_ioctl_check_extension() is only called from kvm_vm_ioctl_check_extension(), so we can inline it and remove the extra function. Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Cc: Marc Zyngier Link: https://lore.kernel.org/r/20201118194402.2892-3-will@kernel.org --- arch/arm64/include/asm/cpufeature.h | 5 ++++ arch/arm64/include/asm/kvm_host.h | 1 - arch/arm64/kvm/arm.c | 31 ++++++++++++++++++++-- arch/arm64/kvm/reset.c | 52 ------------------------------------- 4 files changed, 34 insertions(+), 55 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 97244d4feca9..04ab88db4b43 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -697,6 +697,11 @@ static inline bool system_supports_generic_auth(void) cpus_have_const_cap(ARM64_HAS_GENERIC_AUTH); } +static inline bool system_has_full_ptr_auth(void) +{ + return system_supports_address_auth() && system_supports_generic_auth(); +} + static __always_inline bool system_uses_irq_prio_masking(void) { return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 32db7196504f..6691043d8930 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -58,7 +58,6 @@ int kvm_arm_init_sve(void); int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu); -int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext); void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start); struct kvm_vmid { diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 5750ec34960e..97a9332f12cc 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -182,6 +182,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ARM_IRQ_LINE_LAYOUT_2: case KVM_CAP_ARM_NISV_TO_USER: case KVM_CAP_ARM_INJECT_EXT_DABT: + case KVM_CAP_SET_GUEST_DEBUG: + case KVM_CAP_VCPU_ATTRIBUTES: r = 1; break; case KVM_CAP_ARM_SET_DEVICE_ADDR: @@ -213,10 +215,35 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_STEAL_TIME: r = kvm_arm_pvtime_supported(); break; - default: - r = kvm_arch_vm_ioctl_check_extension(kvm, ext); + case KVM_CAP_ARM_EL1_32BIT: + r = cpus_have_const_cap(ARM64_HAS_32BIT_EL1); + break; + case KVM_CAP_GUEST_DEBUG_HW_BPS: + r = get_num_brps(); + break; + case KVM_CAP_GUEST_DEBUG_HW_WPS: + r = get_num_wrps(); + break; + case KVM_CAP_ARM_PMU_V3: + r = kvm_arm_support_pmu_v3(); + break; + case KVM_CAP_ARM_INJECT_SERROR_ESR: + r = cpus_have_const_cap(ARM64_HAS_RAS_EXTN); + break; + case KVM_CAP_ARM_VM_IPA_SIZE: + r = get_kvm_ipa_limit(); break; + case KVM_CAP_ARM_SVE: + r = system_supports_sve(); + break; + case KVM_CAP_ARM_PTRAUTH_ADDRESS: + case KVM_CAP_ARM_PTRAUTH_GENERIC: + r = system_has_full_ptr_auth(); + break; + default: + r = 0; } + return r; } diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index f32490229a4c..c7985f4607a9 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -42,58 +42,6 @@ static u32 kvm_ipa_limit; #define VCPU_RESET_PSTATE_SVC (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \ PSR_AA32_I_BIT | PSR_AA32_F_BIT) -static bool system_has_full_ptr_auth(void) -{ - return system_supports_address_auth() && system_supports_generic_auth(); -} - -/** - * kvm_arch_vm_ioctl_check_extension - * - * We currently assume that the number of HW registers is uniform - * across all CPUs (see cpuinfo_sanity_check). - */ -int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext) -{ - int r; - - switch (ext) { - case KVM_CAP_ARM_EL1_32BIT: - r = cpus_have_const_cap(ARM64_HAS_32BIT_EL1); - break; - case KVM_CAP_GUEST_DEBUG_HW_BPS: - r = get_num_brps(); - break; - case KVM_CAP_GUEST_DEBUG_HW_WPS: - r = get_num_wrps(); - break; - case KVM_CAP_ARM_PMU_V3: - r = kvm_arm_support_pmu_v3(); - break; - case KVM_CAP_ARM_INJECT_SERROR_ESR: - r = cpus_have_const_cap(ARM64_HAS_RAS_EXTN); - break; - case KVM_CAP_SET_GUEST_DEBUG: - case KVM_CAP_VCPU_ATTRIBUTES: - r = 1; - break; - case KVM_CAP_ARM_VM_IPA_SIZE: - r = kvm_ipa_limit; - break; - case KVM_CAP_ARM_SVE: - r = system_supports_sve(); - break; - case KVM_CAP_ARM_PTRAUTH_ADDRESS: - case KVM_CAP_ARM_PTRAUTH_GENERIC: - r = system_has_full_ptr_auth(); - break; - default: - r = 0; - } - - return r; -} - unsigned int kvm_sve_max_vl; int kvm_arm_init_sve(void) -- cgit v1.2.3 From bf118a5cb7e6d17e7ec9492e4dc676e7e7b69d01 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 18 Nov 2020 19:44:02 +0000 Subject: KVM: arm64: Remove unused __extended_idmap_trampoline() prototype __extended_idmap_trampoline() was removed a long time ago by 3421e9d88d7a ("arm64: KVM: Simplify HYP init/teardown") so remove the unused function prototype. Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Cc: Marc Zyngier Link: https://lore.kernel.org/r/20201118194402.2892-4-will@kernel.org --- arch/arm64/include/asm/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 6691043d8930..724c74cfacdd 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -58,7 +58,6 @@ int kvm_arm_init_sve(void); int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu); -void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start); struct kvm_vmid { /* The VMID generation used for the virt. memory system */ -- cgit v1.2.3 From c73a44161776f6e60d933717f3b34084b0a0eba0 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Thu, 26 Nov 2020 14:46:40 +0100 Subject: KVM: arm64: CSSELR_EL1 max is 13 Not counting TnD, which KVM doesn't currently consider, CSSELR_EL1 can have a maximum value of 0b1101 (13), which corresponds to an instruction cache at level 7. With CSSELR_MAX set to 12 we can only select up to cache level 6. Change it to 14. Signed-off-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201126134641.35231-2-drjones@redhat.com --- arch/arm64/kvm/sys_regs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index c1fac9836af1..ef453f7827fa 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -169,7 +169,7 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) static u32 cache_levels; /* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */ -#define CSSELR_MAX 12 +#define CSSELR_MAX 14 /* Which cache CCSIDR represents depends on CSSELR value. */ static u32 get_ccsidr(u32 csselr) -- cgit v1.2.3 From 7f43c2014fa03bb8718569ae628acf2089683bff Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 26 Nov 2020 17:25:30 +0000 Subject: arm64: Make the Meltdown mitigation state available Our Meltdown mitigation state isn't exposed outside of cpufeature.c, contrary to the rest of the Spectre mitigation state. As we are going to use it in KVM, expose a arm64_get_meltdown_state() helper which returns the same possible values as arm64_get_spectre_v?_state(). Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/spectre.h | 2 ++ arch/arm64/kernel/cpufeature.c | 20 +++++++++++++++++--- 2 files changed, 19 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index fcdfbce302bd..52e788981f4a 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -29,4 +29,6 @@ bool has_spectre_v4(const struct arm64_cpu_capabilities *cap, int scope); void spectre_v4_enable_mitigation(const struct arm64_cpu_capabilities *__unused); void spectre_v4_enable_task_mitigation(struct task_struct *tsk); +enum mitigation_state arm64_get_meltdown_state(void); + #endif /* __ASM_SPECTRE_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 6f36c4f62f69..280b10762f6b 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2846,14 +2846,28 @@ static int __init enable_mrs_emulation(void) core_initcall(enable_mrs_emulation); +enum mitigation_state arm64_get_meltdown_state(void) +{ + if (__meltdown_safe) + return SPECTRE_UNAFFECTED; + + if (arm64_kernel_unmapped_at_el0()) + return SPECTRE_MITIGATED; + + return SPECTRE_VULNERABLE; +} + ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) { - if (__meltdown_safe) + switch (arm64_get_meltdown_state()) { + case SPECTRE_UNAFFECTED: return sprintf(buf, "Not affected\n"); - if (arm64_kernel_unmapped_at_el0()) + case SPECTRE_MITIGATED: return sprintf(buf, "Mitigation: PTI\n"); - return sprintf(buf, "Vulnerable\n"); + default: + return sprintf(buf, "Vulnerable\n"); + } } -- cgit v1.2.3 From 57e3cebd022fbc035dcf190ac789fd2ffc747f5b Mon Sep 17 00:00:00 2001 From: Shenming Lu Date: Sat, 28 Nov 2020 22:18:57 +0800 Subject: KVM: arm64: Delay the polling of the GICR_VPENDBASER.Dirty bit In order to reduce the impact of the VPT parsing happening on the GIC, we can split the vcpu reseidency in two phases: - programming GICR_VPENDBASER: this still happens in vcpu_load() - checking for the VPT parsing to be complete: this can happen on vcpu entry (in kvm_vgic_flush_hwstate()) This allows the GIC and the CPU to work in parallel, rewmoving some of the entry overhead. Suggested-by: Marc Zyngier Signed-off-by: Shenming Lu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201128141857.983-3-lushenming@huawei.com --- arch/arm64/kvm/vgic/vgic-v4.c | 12 ++++++++++++ arch/arm64/kvm/vgic/vgic.c | 3 +++ drivers/irqchip/irq-gic-v3-its.c | 12 ++++++++---- drivers/irqchip/irq-gic-v4.c | 19 +++++++++++++++++++ include/kvm/arm_vgic.h | 1 + include/linux/irqchip/arm-gic-v4.h | 4 ++++ 6 files changed, 47 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c index b5fa73c9fd35..66508b03094f 100644 --- a/arch/arm64/kvm/vgic/vgic-v4.c +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -353,6 +353,18 @@ int vgic_v4_load(struct kvm_vcpu *vcpu) return err; } +void vgic_v4_commit(struct kvm_vcpu *vcpu) +{ + struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; + + /* + * No need to wait for the vPE to be ready across a shallow guest + * exit, as only a vcpu_put will invalidate it. + */ + if (!vpe->ready) + its_commit_vpe(vpe); +} + static struct vgic_its *vgic_get_its(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *irq_entry) { diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index c3643b7f101b..1c597c9885fa 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -915,6 +915,9 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) if (can_access_vgic_from_kernel()) vgic_restore_state(vcpu); + + if (vgic_supports_direct_msis(vcpu->kvm)) + vgic_v4_commit(vcpu); } void kvm_vgic_load(struct kvm_vcpu *vcpu) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 0fec31931e11..7db602434ac5 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -3842,8 +3842,6 @@ static void its_vpe_schedule(struct its_vpe *vpe) val |= vpe->idai ? GICR_VPENDBASER_IDAI : 0; val |= GICR_VPENDBASER_Valid; gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); - - its_wait_vpt_parse_complete(); } static void its_vpe_deschedule(struct its_vpe *vpe) @@ -3891,6 +3889,10 @@ static int its_vpe_set_vcpu_affinity(struct irq_data *d, void *vcpu_info) its_vpe_deschedule(vpe); return 0; + case COMMIT_VPE: + its_wait_vpt_parse_complete(); + return 0; + case INVALL_VPE: its_vpe_invall(vpe); return 0; @@ -4052,8 +4054,6 @@ static void its_vpe_4_1_schedule(struct its_vpe *vpe, val |= FIELD_PREP(GICR_VPENDBASER_4_1_VPEID, vpe->vpe_id); gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); - - its_wait_vpt_parse_complete(); } static void its_vpe_4_1_deschedule(struct its_vpe *vpe, @@ -4128,6 +4128,10 @@ static int its_vpe_4_1_set_vcpu_affinity(struct irq_data *d, void *vcpu_info) its_vpe_4_1_deschedule(vpe, info); return 0; + case COMMIT_VPE: + its_wait_vpt_parse_complete(); + return 0; + case INVALL_VPE: its_vpe_4_1_invall(vpe); return 0; diff --git a/drivers/irqchip/irq-gic-v4.c b/drivers/irqchip/irq-gic-v4.c index 0c18714ae13e..5d1dc9915272 100644 --- a/drivers/irqchip/irq-gic-v4.c +++ b/drivers/irqchip/irq-gic-v4.c @@ -232,6 +232,8 @@ int its_make_vpe_non_resident(struct its_vpe *vpe, bool db) if (!ret) vpe->resident = false; + vpe->ready = false; + return ret; } @@ -258,6 +260,23 @@ int its_make_vpe_resident(struct its_vpe *vpe, bool g0en, bool g1en) return ret; } +int its_commit_vpe(struct its_vpe *vpe) +{ + struct its_cmd_info info = { + .cmd_type = COMMIT_VPE, + }; + int ret; + + WARN_ON(preemptible()); + + ret = its_send_vpe_cmd(vpe, &info); + if (!ret) + vpe->ready = true; + + return ret; +} + + int its_invall_vpe(struct its_vpe *vpe) { struct its_cmd_info info = { diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index a8d8fdcd3723..3d74f1060bd1 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -402,6 +402,7 @@ int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq, struct kvm_kernel_irq_routing_entry *irq_entry); int vgic_v4_load(struct kvm_vcpu *vcpu); +void vgic_v4_commit(struct kvm_vcpu *vcpu); int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db); #endif /* __KVM_ARM_VGIC_H */ diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index 6976b8331b60..943c3411ca10 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -39,6 +39,8 @@ struct its_vpe { irq_hw_number_t vpe_db_lpi; /* VPE resident */ bool resident; + /* VPT parse complete */ + bool ready; union { /* GICv4.0 implementations */ struct { @@ -104,6 +106,7 @@ enum its_vcpu_info_cmd_type { PROP_UPDATE_AND_INV_VLPI, SCHEDULE_VPE, DESCHEDULE_VPE, + COMMIT_VPE, INVALL_VPE, PROP_UPDATE_VSGI, }; @@ -129,6 +132,7 @@ int its_alloc_vcpu_irqs(struct its_vm *vm); void its_free_vcpu_irqs(struct its_vm *vm); int its_make_vpe_resident(struct its_vpe *vpe, bool g0en, bool g1en); int its_make_vpe_non_resident(struct its_vpe *vpe, bool db); +int its_commit_vpe(struct its_vpe *vpe); int its_invall_vpe(struct its_vpe *vpe); int its_map_vlpi(int irq, struct its_vlpi_map *map); int its_get_vlpi(int irq, struct its_vlpi_map *map); -- cgit v1.2.3 From 4f1df628d4ec22b04f67e068e6d02538d3dd557b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 26 Nov 2020 17:27:13 +0000 Subject: KVM: arm64: Advertise ID_AA64PFR0_EL1.CSV3=1 if the CPUs are Meltdown-safe Cores that predate the introduction of ID_AA64PFR0_EL1.CSV3 to the ARMv8 architecture have this field set to 0, even of some of them are not affected by the vulnerability. The kernel maintains a list of unaffected cores (A53, A55 and a few others) so that it doesn't impose an expensive mitigation uncessarily. As we do for CSV2, let's expose the CSV3 property to guests that run on HW that is effectively not vulnerable. This can be reset to zero by writing to the ID register from userspace, ensuring that VMs can be migrated despite the new property being set. Reported-by: Will Deacon Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/arm.c | 6 ++++-- arch/arm64/kvm/sys_regs.c | 16 +++++++++++++--- 3 files changed, 18 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 0cd9f0f75c13..147347028a20 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -120,6 +120,7 @@ struct kvm_arch { unsigned int pmuver; u8 pfr0_csv2; + u8 pfr0_csv3; }; struct kvm_vcpu_fault_info { diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index c0ffb019ca8b..dc3fa6a0f9e5 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -102,7 +102,7 @@ static int kvm_arm_default_max_vcpus(void) return vgic_present ? kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS; } -static void set_default_csv2(struct kvm *kvm) +static void set_default_spectre(struct kvm *kvm) { /* * The default is to expose CSV2 == 1 if the HW isn't affected. @@ -114,6 +114,8 @@ static void set_default_csv2(struct kvm *kvm) */ if (arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED) kvm->arch.pfr0_csv2 = 1; + if (arm64_get_meltdown_state() == SPECTRE_UNAFFECTED) + kvm->arch.pfr0_csv3 = 1; } /** @@ -141,7 +143,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) /* The maximum number of VCPUs is limited by the host's GIC model */ kvm->arch.max_vcpus = kvm_arm_default_max_vcpus(); - set_default_csv2(kvm); + set_default_spectre(kvm); return ret; out_free_stage2_pgd: diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index c1fac9836af1..6f653c0f60ec 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1122,6 +1122,8 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, val &= ~(0xfUL << ID_AA64PFR0_AMU_SHIFT); val &= ~(0xfUL << ID_AA64PFR0_CSV2_SHIFT); val |= ((u64)vcpu->kvm->arch.pfr0_csv2 << ID_AA64PFR0_CSV2_SHIFT); + val &= ~(0xfUL << ID_AA64PFR0_CSV3_SHIFT); + val |= ((u64)vcpu->kvm->arch.pfr0_csv3 << ID_AA64PFR0_CSV3_SHIFT); } else if (id == SYS_ID_AA64PFR1_EL1) { val &= ~(0xfUL << ID_AA64PFR1_MTE_SHIFT); } else if (id == SYS_ID_AA64ISAR1_EL1 && !vcpu_has_ptrauth(vcpu)) { @@ -1209,9 +1211,9 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, void __user *uaddr) { const u64 id = sys_reg_to_index(rd); + u8 csv2, csv3; int err; u64 val; - u8 csv2; err = reg_from_user(&val, uaddr, id); if (err) @@ -1227,13 +1229,21 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, (csv2 && arm64_get_spectre_v2_state() != SPECTRE_UNAFFECTED)) return -EINVAL; - /* We can only differ with CSV2, and anything else is an error */ + /* Same thing for CSV3 */ + csv3 = cpuid_feature_extract_unsigned_field(val, ID_AA64PFR0_CSV3_SHIFT); + if (csv3 > 1 || + (csv3 && arm64_get_meltdown_state() != SPECTRE_UNAFFECTED)) + return -EINVAL; + + /* We can only differ with CSV[23], and anything else is an error */ val ^= read_id_reg(vcpu, rd, false); - val &= ~(0xFUL << ID_AA64PFR0_CSV2_SHIFT); + val &= ~((0xFUL << ID_AA64PFR0_CSV2_SHIFT) | + (0xFUL << ID_AA64PFR0_CSV3_SHIFT)); if (val) return -EINVAL; vcpu->kvm->arch.pfr0_csv2 = csv2; + vcpu->kvm->arch.pfr0_csv3 = csv3 ; return 0; } -- cgit v1.2.3 From dee734a7de9169018b8108208587d3ff1fdfff18 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 30 Nov 2020 09:39:59 -0500 Subject: KVM: x86: adjust SEV for commit 7e8e6eed75e Since the ASID is now stored in svm->asid, pre_sev_run should also place it there and not directly in the VMCB control area. Reported-by: Ashish Kalra Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index c0b14106258a..3418bb18dae7 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1187,7 +1187,7 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu) int asid = sev_get_asid(svm->vcpu.kvm); /* Assign the asid allocated with this SEV guest */ - svm->vmcb->control.asid = asid; + svm->asid = asid; /* * Flush guest TLB: -- cgit v1.2.3 From 652d0b701d136ede6bc8a977b3abbe2d420226b9 Mon Sep 17 00:00:00 2001 From: Keqian Zhu Date: Mon, 17 Aug 2020 19:07:28 +0800 Subject: KVM: arm64: Use kvm_write_guest_lock when init stolen time There is a lock version kvm_write_guest. Use it to simplify code. Signed-off-by: Keqian Zhu Signed-off-by: Marc Zyngier Reviewed-by: Andrew Jones Reviewed-by: Steven Price Link: https://lore.kernel.org/r/20200817110728.12196-3-zhukeqian1@huawei.com --- arch/arm64/kvm/pvtime.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/pvtime.c b/arch/arm64/kvm/pvtime.c index 920ac43077ad..78a09f7a6637 100644 --- a/arch/arm64/kvm/pvtime.c +++ b/arch/arm64/kvm/pvtime.c @@ -53,7 +53,6 @@ gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu) struct pvclock_vcpu_stolen_time init_values = {}; struct kvm *kvm = vcpu->kvm; u64 base = vcpu->arch.steal.base; - int idx; if (base == GPA_INVALID) return base; @@ -63,10 +62,7 @@ gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu) * the feature enabled. */ vcpu->arch.steal.last_steal = current->sched_info.run_delay; - - idx = srcu_read_lock(&kvm->srcu); - kvm_write_guest(kvm, base, &init_values, sizeof(init_values)); - srcu_read_unlock(&kvm->srcu, idx); + kvm_write_guest_lock(kvm, base, &init_values, sizeof(init_values)); return base; } -- cgit v1.2.3 From d8b369c4e31430a4746571bcae45a98933827232 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:40:57 +0000 Subject: KVM: arm64: Add kvm-arm.mode early kernel parameter Add an early parameter that allows users to select the mode of operation for KVM/arm64. For now, the only supported value is "protected". By passing this flag users opt into the hypervisor placing additional restrictions on the host kernel. These allow the hypervisor to spawn guests whose state is kept private from the host. Restrictions will include stage-2 address translation to prevent host from accessing guest memory, filtering its SMC calls, etc. Without this parameter, the default behaviour remains selecting VHE/nVHE based on hardware support and CONFIG_ARM64_VHE. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-2-dbrazdil@google.com --- Documentation/admin-guide/kernel-parameters.txt | 10 ++++++++++ arch/arm64/include/asm/kvm_host.h | 9 +++++++++ arch/arm64/kvm/arm.c | 16 ++++++++++++++++ 3 files changed, 35 insertions(+) (limited to 'arch') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 526d65d8573a..ee9f13776388 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2259,6 +2259,16 @@ for all guests. Default is 1 (enabled) if in 64-bit or 32-bit PAE mode. + kvm-arm.mode= + [KVM,ARM] Select one of KVM/arm64's modes of operation. + + protected: nVHE-based mode with support for guests whose + state is kept private from the host. + Not valid if the kernel is running in EL2. + + Defaults to VHE/nVHE based on hardware support and + the value of CONFIG_ARM64_VHE. + kvm-arm.vgic_v3_group0_trap= [KVM,ARM] Trap guest accesses to GICv3 group-0 system registers diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 781d029b8aa8..820954ad2477 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -50,6 +50,15 @@ #define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ KVM_DIRTY_LOG_INITIALLY_SET) +/* + * Mode of operation configurable with kvm-arm.mode early param. + * See Documentation/admin-guide/kernel-parameters.txt for more information. + */ +enum kvm_mode { + KVM_MODE_DEFAULT, + KVM_MODE_PROTECTED, +}; + DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); extern unsigned int kvm_sve_max_vl; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 5750ec34960e..5db90680742c 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -46,6 +46,8 @@ __asm__(".arch_extension virt"); #endif +static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT; + DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector); static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); @@ -1790,6 +1792,20 @@ void kvm_arch_exit(void) kvm_perf_teardown(); } +static int __init early_kvm_mode_cfg(char *arg) +{ + if (!arg) + return -EINVAL; + + if (strcmp(arg, "protected") == 0) { + kvm_mode = KVM_MODE_PROTECTED; + return 0; + } + + return -EINVAL; +} +early_param("kvm-arm.mode", early_kvm_mode_cfg); + static int arm_init(void) { int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); -- cgit v1.2.3 From 3eb681fba2bf8b67b65ce92d0ebfd7cbfc263da9 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:40:58 +0000 Subject: KVM: arm64: Add ARM64_KVM_PROTECTED_MODE CPU capability Expose the boolean value whether the system is running with KVM in protected mode (nVHE + kernel param). CPU capability was selected over a global variable to allow use in alternatives. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-3-dbrazdil@google.com --- arch/arm64/include/asm/cpucaps.h | 3 ++- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/include/asm/virt.h | 8 ++++++++ arch/arm64/kernel/cpufeature.c | 22 ++++++++++++++++++++++ arch/arm64/kvm/arm.c | 9 ++++++++- 5 files changed, 41 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index a7242ef2a2cd..350c98103d45 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -65,7 +65,8 @@ #define ARM64_MTE 57 #define ARM64_WORKAROUND_1508412 58 #define ARM64_HAS_LDAPR 59 +#define ARM64_KVM_PROTECTED_MODE 60 -#define ARM64_NCAPS 60 +#define ARM64_NCAPS 61 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 820954ad2477..7119d38e3c56 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -58,6 +58,7 @@ enum kvm_mode { KVM_MODE_DEFAULT, KVM_MODE_PROTECTED, }; +enum kvm_mode kvm_get_mode(void); DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 6069be50baf9..eb81dcc220b6 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -97,6 +97,14 @@ static __always_inline bool has_vhe(void) return cpus_have_final_cap(ARM64_HAS_VIRT_HOST_EXTN); } +static __always_inline bool is_protected_kvm_enabled(void) +{ + if (is_vhe_hyp_code()) + return false; + else + return cpus_have_final_cap(ARM64_KVM_PROTECTED_MODE); +} + #endif /* __ASSEMBLY__ */ #endif /* ! __ASM__VIRT_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index cffcb0198bae..0c029adf72d3 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -74,6 +74,7 @@ #include #include #include +#include #include #include #include @@ -1703,6 +1704,21 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) } #endif /* CONFIG_ARM64_MTE */ +#ifdef CONFIG_KVM +static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, int __unused) +{ + if (kvm_get_mode() != KVM_MODE_PROTECTED) + return false; + + if (is_kernel_in_hyp_mode()) { + pr_warn("Protected KVM not available with VHE\n"); + return false; + } + + return true; +} +#endif /* CONFIG_KVM */ + /* Internal helper functions to match cpu capability type */ static bool cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap) @@ -1794,6 +1810,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_pos = ID_AA64PFR0_EL1_SHIFT, .min_field_value = ID_AA64PFR0_EL1_32BIT_64BIT, }, + { + .desc = "Protected KVM", + .capability = ARM64_KVM_PROTECTED_MODE, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = is_kvm_protected_mode, + }, #endif { .desc = "Kernel page table isolation (KPTI)", diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 5db90680742c..745e1b4bcbe0 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1771,7 +1771,9 @@ int kvm_arch_init(void *opaque) if (err) goto out_hyp; - if (in_hyp_mode) + if (is_protected_kvm_enabled()) + kvm_info("Protected nVHE mode initialized successfully\n"); + else if (in_hyp_mode) kvm_info("VHE mode initialized successfully\n"); else kvm_info("Hyp mode initialized successfully\n"); @@ -1806,6 +1808,11 @@ static int __init early_kvm_mode_cfg(char *arg) } early_param("kvm-arm.mode", early_kvm_mode_cfg); +enum kvm_mode kvm_get_mode(void) +{ + return kvm_mode; +} + static int arm_init(void) { int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); -- cgit v1.2.3 From c1f45f4eb6fd8704f72d5ed64184121e9fe129a0 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:03 +0000 Subject: arm64: Make cpu_logical_map() take unsigned int CPU index should never be negative. Change the signature of (set_)cpu_logical_map to take an unsigned int. This still works even if the users treat the CPU index as an int, and will allow the hypervisor's implementation to check that the index is valid with a single upper-bound check. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-8-dbrazdil@google.com --- arch/arm64/include/asm/smp.h | 4 ++-- arch/arm64/kernel/setup.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index 2e7f529ec5a6..bcb01ca15325 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -46,9 +46,9 @@ DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number); * Logical CPU mapping. */ extern u64 __cpu_logical_map[NR_CPUS]; -extern u64 cpu_logical_map(int cpu); +extern u64 cpu_logical_map(unsigned int cpu); -static inline void set_cpu_logical_map(int cpu, u64 hwid) +static inline void set_cpu_logical_map(unsigned int cpu, u64 hwid) { __cpu_logical_map[cpu] = hwid; } diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 133257ffd859..2f2973bc67c7 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -276,7 +276,7 @@ arch_initcall(reserve_memblock_reserved_regions); u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID }; -u64 cpu_logical_map(int cpu) +u64 cpu_logical_map(unsigned int cpu) { return __cpu_logical_map[cpu]; } -- cgit v1.2.3 From 78869f0f0552d032c7e32724c4abb2715e8f974a Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:04 +0000 Subject: arm64: Extract parts of el2_setup into a macro When a CPU is booted in EL2, the kernel checks for VHE support and initializes the CPU core accordingly. For nVHE it also installs the stub vectors and drops down to EL1. Once KVM gains the ability to boot cores without going through the kernel entry point, it will need to initialize the CPU the same way. Extract the relevant bits of el2_setup into an init_el2_state macro with an argument specifying whether to initialize for VHE or nVHE. The following ifdefs are removed: * CONFIG_ARM_GIC_V3 - always selected on arm64 * CONFIG_COMPAT - hstr_el2 can be set even without 32-bit support No functional change intended. Size of el2_setup increased by 148 bytes due to duplication. Signed-off-by: David Brazdil [maz: reworked to fit the new PSTATE initial setup code] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-9-dbrazdil@google.com --- arch/arm64/include/asm/el2_setup.h | 181 +++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/head.S | 138 ++++------------------------ 2 files changed, 199 insertions(+), 120 deletions(-) create mode 100644 arch/arm64/include/asm/el2_setup.h (limited to 'arch') diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h new file mode 100644 index 000000000000..a7f5a1bbc8ac --- /dev/null +++ b/arch/arm64/include/asm/el2_setup.h @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + */ + +#ifndef __ARM_KVM_INIT_H__ +#define __ARM_KVM_INIT_H__ + +#ifndef __ASSEMBLY__ +#error Assembly-only header +#endif + +#include +#include +#include +#include + +.macro __init_el2_sctlr + mov_q x0, INIT_SCTLR_EL2_MMU_OFF + msr sctlr_el2, x0 + isb +.endm + +/* + * Allow Non-secure EL1 and EL0 to access physical timer and counter. + * This is not necessary for VHE, since the host kernel runs in EL2, + * and EL0 accesses are configured in the later stage of boot process. + * Note that when HCR_EL2.E2H == 1, CNTHCTL_EL2 has the same bit layout + * as CNTKCTL_EL1, and CNTKCTL_EL1 accessing instructions are redefined + * to access CNTHCTL_EL2. This allows the kernel designed to run at EL1 + * to transparently mess with the EL0 bits via CNTKCTL_EL1 access in + * EL2. + */ +.macro __init_el2_timers mode +.ifeqs "\mode", "nvhe" + mrs x0, cnthctl_el2 + orr x0, x0, #3 // Enable EL1 physical timers + msr cnthctl_el2, x0 +.endif + msr cntvoff_el2, xzr // Clear virtual offset +.endm + +.macro __init_el2_debug mode + mrs x1, id_aa64dfr0_el1 + sbfx x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4 + cmp x0, #1 + b.lt 1f // Skip if no PMU present + mrs x0, pmcr_el0 // Disable debug access traps + ubfx x0, x0, #11, #5 // to EL2 and allow access to +1: + csel x2, xzr, x0, lt // all PMU counters from EL1 + + /* Statistical profiling */ + ubfx x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4 + cbz x0, 3f // Skip if SPE not present + +.ifeqs "\mode", "nvhe" + mrs_s x0, SYS_PMBIDR_EL1 // If SPE available at EL2, + and x0, x0, #(1 << SYS_PMBIDR_EL1_P_SHIFT) + cbnz x0, 2f // then permit sampling of physical + mov x0, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \ + 1 << SYS_PMSCR_EL2_PA_SHIFT) + msr_s SYS_PMSCR_EL2, x0 // addresses and physical counter +2: + mov x0, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT) + orr x2, x2, x0 // If we don't have VHE, then + // use EL1&0 translation. +.else + orr x2, x2, #MDCR_EL2_TPMS // For VHE, use EL2 translation + // and disable access from EL1 +.endif + +3: + msr mdcr_el2, x2 // Configure debug traps +.endm + +/* LORegions */ +.macro __init_el2_lor + mrs x1, id_aa64mmfr1_el1 + ubfx x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4 + cbz x0, 1f + msr_s SYS_LORC_EL1, xzr +1: +.endm + +/* Stage-2 translation */ +.macro __init_el2_stage2 + msr vttbr_el2, xzr +.endm + +/* GICv3 system register access */ +.macro __init_el2_gicv3 + mrs x0, id_aa64pfr0_el1 + ubfx x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4 + cbz x0, 1f + + mrs_s x0, SYS_ICC_SRE_EL2 + orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1 + orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1 + msr_s SYS_ICC_SRE_EL2, x0 + isb // Make sure SRE is now set + mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back, + tbz x0, #0, 1f // and check that it sticks + msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults +1: +.endm + +.macro __init_el2_hstr + msr hstr_el2, xzr // Disable CP15 traps to EL2 +.endm + +/* Virtual CPU ID registers */ +.macro __init_el2_nvhe_idregs + mrs x0, midr_el1 + mrs x1, mpidr_el1 + msr vpidr_el2, x0 + msr vmpidr_el2, x1 +.endm + +/* Coprocessor traps */ +.macro __init_el2_nvhe_cptr + mov x0, #0x33ff + msr cptr_el2, x0 // Disable copro. traps to EL2 +.endm + +/* SVE register access */ +.macro __init_el2_nvhe_sve + mrs x1, id_aa64pfr0_el1 + ubfx x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4 + cbz x1, 1f + + bic x0, x0, #CPTR_EL2_TZ // Also disable SVE traps + msr cptr_el2, x0 // Disable copro. traps to EL2 + isb + mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector + msr_s SYS_ZCR_EL2, x1 // length for EL1. +1: +.endm + +.macro __init_el2_nvhe_prepare_eret + mov x0, #INIT_PSTATE_EL1 + msr spsr_el2, x0 +.endm + +/** + * Initialize EL2 registers to sane values. This should be called early on all + * cores that were booted in EL2. + * + * Regs: x0, x1 and x2 are clobbered. + */ +.macro init_el2_state mode +.ifnes "\mode", "vhe" +.ifnes "\mode", "nvhe" +.error "Invalid 'mode' argument" +.endif +.endif + + __init_el2_sctlr + __init_el2_timers \mode + __init_el2_debug \mode + __init_el2_lor + __init_el2_stage2 + __init_el2_gicv3 + __init_el2_hstr + + /* + * When VHE is not in use, early init of EL2 needs to be done here. + * When VHE _is_ in use, EL1 will not be used in the host and + * requires no configuration, and all non-hyp-specific EL2 setup + * will be done via the _EL1 system register aliases in __cpu_setup. + */ +.ifeqs "\mode", "nvhe" + __init_el2_nvhe_idregs + __init_el2_nvhe_cptr + __init_el2_nvhe_sve + __init_el2_nvhe_prepare_eret +.endif +.endm + +#endif /* __ARM_KVM_INIT_H__ */ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 0b145bca1b0e..7eba3a1e84b0 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -11,7 +11,6 @@ #include #include -#include #include #include @@ -21,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -508,155 +508,53 @@ SYM_INNER_LABEL(init_el1, SYM_L_LOCAL) eret SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) - mov_q x0, INIT_SCTLR_EL2_MMU_OFF - msr sctlr_el2, x0 - #ifdef CONFIG_ARM64_VHE /* - * Check for VHE being present. For the rest of the EL2 setup, - * x2 being non-zero indicates that we do have VHE, and that the - * kernel is intended to run at EL2. + * Check for VHE being present. x2 being non-zero indicates that we + * do have VHE, and that the kernel is intended to run at EL2. */ mrs x2, id_aa64mmfr1_el1 ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4 -#else - mov x2, xzr + cbz x2, init_el2_nvhe #endif - - /* Hyp configuration. */ - mov_q x0, HCR_HOST_NVHE_FLAGS - cbz x2, set_hcr + /* + * When VHE _is_ in use, EL1 will not be used in the host and + * requires no configuration, and all non-hyp-specific EL2 setup + * will be done via the _EL1 system register aliases in __cpu_setup. + */ mov_q x0, HCR_HOST_VHE_FLAGS -set_hcr: msr hcr_el2, x0 isb - /* - * Allow Non-secure EL1 and EL0 to access physical timer and counter. - * This is not necessary for VHE, since the host kernel runs in EL2, - * and EL0 accesses are configured in the later stage of boot process. - * Note that when HCR_EL2.E2H == 1, CNTHCTL_EL2 has the same bit layout - * as CNTKCTL_EL1, and CNTKCTL_EL1 accessing instructions are redefined - * to access CNTHCTL_EL2. This allows the kernel designed to run at EL1 - * to transparently mess with the EL0 bits via CNTKCTL_EL1 access in - * EL2. - */ - cbnz x2, 1f - mrs x0, cnthctl_el2 - orr x0, x0, #3 // Enable EL1 physical timers - msr cnthctl_el2, x0 -1: - msr cntvoff_el2, xzr // Clear virtual offset - -#ifdef CONFIG_ARM_GIC_V3 - /* GICv3 system register access */ - mrs x0, id_aa64pfr0_el1 - ubfx x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4 - cbz x0, 3f - - mrs_s x0, SYS_ICC_SRE_EL2 - orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1 - orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1 - msr_s SYS_ICC_SRE_EL2, x0 - isb // Make sure SRE is now set - mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back, - tbz x0, #0, 3f // and check that it sticks - msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults - -3: -#endif - - /* Populate ID registers. */ - mrs x0, midr_el1 - mrs x1, mpidr_el1 - msr vpidr_el2, x0 - msr vmpidr_el2, x1 - -#ifdef CONFIG_COMPAT - msr hstr_el2, xzr // Disable CP15 traps to EL2 -#endif - - /* EL2 debug */ - mrs x1, id_aa64dfr0_el1 - sbfx x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4 - cmp x0, #1 - b.lt 4f // Skip if no PMU present - mrs x0, pmcr_el0 // Disable debug access traps - ubfx x0, x0, #11, #5 // to EL2 and allow access to -4: - csel x3, xzr, x0, lt // all PMU counters from EL1 - - /* Statistical profiling */ - ubfx x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4 - cbz x0, 7f // Skip if SPE not present - cbnz x2, 6f // VHE? - mrs_s x4, SYS_PMBIDR_EL1 // If SPE available at EL2, - and x4, x4, #(1 << SYS_PMBIDR_EL1_P_SHIFT) - cbnz x4, 5f // then permit sampling of physical - mov x4, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \ - 1 << SYS_PMSCR_EL2_PA_SHIFT) - msr_s SYS_PMSCR_EL2, x4 // addresses and physical counter -5: - mov x1, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT) - orr x3, x3, x1 // If we don't have VHE, then - b 7f // use EL1&0 translation. -6: // For VHE, use EL2 translation - orr x3, x3, #MDCR_EL2_TPMS // and disable access from EL1 -7: - msr mdcr_el2, x3 // Configure debug traps - - /* LORegions */ - mrs x1, id_aa64mmfr1_el1 - ubfx x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4 - cbz x0, 1f - msr_s SYS_LORC_EL1, xzr -1: - - /* Stage-2 translation */ - msr vttbr_el2, xzr - - cbz x2, install_el2_stub + init_el2_state vhe isb + mov_q x0, INIT_PSTATE_EL2 msr spsr_el2, x0 msr elr_el2, lr mov w0, #BOOT_CPU_MODE_EL2 eret -SYM_INNER_LABEL(install_el2_stub, SYM_L_LOCAL) +SYM_INNER_LABEL(init_el2_nvhe, SYM_L_LOCAL) /* * When VHE is not in use, early init of EL2 and EL1 needs to be * done here. - * When VHE _is_ in use, EL1 will not be used in the host and - * requires no configuration, and all non-hyp-specific EL2 setup - * will be done via the _EL1 system register aliases in __cpu_setup. */ mov_q x0, INIT_SCTLR_EL1_MMU_OFF msr sctlr_el1, x0 - /* Coprocessor traps. */ - mov x0, #0x33ff - msr cptr_el2, x0 // Disable copro. traps to EL2 - - /* SVE register access */ - mrs x1, id_aa64pfr0_el1 - ubfx x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4 - cbz x1, 7f - - bic x0, x0, #CPTR_EL2_TZ // Also disable SVE traps - msr cptr_el2, x0 // Disable copro. traps to EL2 + mov_q x0, HCR_HOST_NVHE_FLAGS + msr hcr_el2, x0 isb - mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector - msr_s SYS_ZCR_EL2, x1 // length for EL1. + + init_el2_state nvhe /* Hypervisor stub */ -7: adr_l x0, __hyp_stub_vectors + adr_l x0, __hyp_stub_vectors msr vbar_el2, x0 - isb - mov x0, #INIT_PSTATE_EL1 - msr spsr_el2, x0 + msr elr_el2, lr mov w0, #BOOT_CPU_MODE_EL2 eret -- cgit v1.2.3 From 5be1d6226d35800393579340f35b8b0d7b2a3177 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:05 +0000 Subject: KVM: arm64: Remove vector_ptr param of hyp-init KVM precomputes the hyp VA of __kvm_hyp_host_vector, essentially a constant (minus ASLR), before passing it to __kvm_hyp_init. Now that we have alternatives for converting kimg VA to hyp VA, replace this with computing the constant inside __kvm_hyp_init, thus removing the need for an argument. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-10-dbrazdil@google.com --- arch/arm64/include/asm/kvm_asm.h | 2 -- arch/arm64/include/asm/kvm_mmu.h | 24 ++++++++++++++++++++++++ arch/arm64/kvm/arm.c | 4 +--- arch/arm64/kvm/hyp/nvhe/hyp-init.S | 9 ++++++--- 4 files changed, 31 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index a542c422a036..4698ea1fbb37 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -165,10 +165,8 @@ struct kvm_vcpu; struct kvm_s2_mmu; DECLARE_KVM_NVHE_SYM(__kvm_hyp_init); -DECLARE_KVM_NVHE_SYM(__kvm_hyp_host_vector); DECLARE_KVM_HYP_SYM(__kvm_hyp_vector); #define __kvm_hyp_init CHOOSE_NVHE_SYM(__kvm_hyp_init) -#define __kvm_hyp_host_vector CHOOSE_NVHE_SYM(__kvm_hyp_host_vector) #define __kvm_hyp_vector CHOOSE_HYP_SYM(__kvm_hyp_vector) extern unsigned long kvm_arm_hyp_percpu_base[NR_CPUS]; diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 5633c5a27aad..ab6f96df20df 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -94,6 +94,30 @@ alternative_cb_end sub \reg, \reg, \tmp .endm +/* + * Convert a kernel image address to a hyp VA + * reg: kernel address to be converted in place + * tmp: temporary register + * + * The actual code generation takes place in kvm_get_kimage_voffset, and + * the instructions below are only there to reserve the space and + * perform the register allocation (kvm_update_kimg_phys_offset uses the + * specific registers encoded in the instructions). + */ +.macro kimg_hyp_va reg, tmp +alternative_cb kvm_update_kimg_phys_offset + movz \tmp, #0 + movk \tmp, #0, lsl #16 + movk \tmp, #0, lsl #32 + movk \tmp, #0, lsl #48 +alternative_cb_end + + sub \reg, \reg, \tmp + mov_q \tmp, PAGE_OFFSET + orr \reg, \reg, \tmp + kern_hyp_va \reg +.endm + #else #include diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 745e1b4bcbe0..1f86d7d0ecf2 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1335,7 +1335,6 @@ static void cpu_init_hyp_mode(void) { phys_addr_t pgd_ptr; unsigned long hyp_stack_ptr; - unsigned long vector_ptr; unsigned long tpidr_el2; struct arm_smccc_res res; @@ -1353,7 +1352,6 @@ static void cpu_init_hyp_mode(void) pgd_ptr = kvm_mmu_get_httbr(); hyp_stack_ptr = __this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE; hyp_stack_ptr = kern_hyp_va(hyp_stack_ptr); - vector_ptr = (unsigned long)kern_hyp_va(kvm_ksym_ref(__kvm_hyp_host_vector)); /* * Call initialization code, and switch to the full blown HYP code. @@ -1363,7 +1361,7 @@ static void cpu_init_hyp_mode(void) */ BUG_ON(!system_capabilities_finalized()); arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__kvm_hyp_init), - pgd_ptr, tpidr_el2, hyp_stack_ptr, vector_ptr, &res); + pgd_ptr, tpidr_el2, hyp_stack_ptr, &res); WARN_ON(res.a0 != SMCCC_RET_SUCCESS); /* diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index b11a9d7db677..931a8c38f085 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -50,7 +50,6 @@ __invalid: * x1: HYP pgd * x2: per-CPU offset * x3: HYP stack - * x4: HYP vectors */ __do_hyp_init: /* Check for a stub HVC call */ @@ -134,9 +133,13 @@ alternative_else_nop_endif msr sctlr_el2, x0 isb - /* Set the stack and new vectors */ + /* Set the stack */ mov sp, x3 - msr vbar_el2, x4 + + /* Set the host vector */ + ldr x0, =__kvm_hyp_host_vector + kimg_hyp_va x0, x1 + msr vbar_el2, x0 /* Hello, World! */ mov x0, #SMCCC_RET_SUCCESS -- cgit v1.2.3 From 63fec24351e827021137a15b307bd1e64772b7fe Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:06 +0000 Subject: KVM: arm64: Move hyp-init params to a per-CPU struct Once we start initializing KVM on newly booted cores before the rest of the kernel, parameters to __do_hyp_init will need to be provided by EL2 rather than EL1. At that point it will not be possible to pass its three arguments directly because PSCI_CPU_ON only supports one context argument. Refactor __do_hyp_init to accept its parameters in a struct. This prepares the code for KVM booting cores as well as removes any limits on the number of __do_hyp_init arguments. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-11-dbrazdil@google.com --- arch/arm64/include/asm/kvm_asm.h | 6 ++++++ arch/arm64/include/asm/kvm_hyp.h | 2 +- arch/arm64/kernel/asm-offsets.c | 3 +++ arch/arm64/kvm/arm.c | 23 +++++++++++++---------- arch/arm64/kvm/hyp/nvhe/hyp-init.S | 16 +++++++--------- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 2 ++ 6 files changed, 32 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 4698ea1fbb37..cb750c525aee 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -150,6 +150,12 @@ extern void *__vhe_undefined_symbol; #endif +struct kvm_nvhe_init_params { + unsigned long tpidr_el2; + unsigned long stack_hyp_va; + phys_addr_t pgd_pa; +}; + /* Translate a kernel address @ptr into its equivalent linear mapping */ #define kvm_ksym_ref(ptr) \ ({ \ diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 6b664de5ec1f..cb25c15e3d8d 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -14,6 +14,7 @@ DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); DECLARE_PER_CPU(unsigned long, kvm_hyp_vector); +DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); #define read_sysreg_elx(r,nvh,vh) \ ({ \ @@ -98,4 +99,3 @@ void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par); #endif #endif /* __ARM64_KVM_HYP_H__ */ - diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 679b19b8a7ff..52771a46f600 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -109,6 +109,9 @@ int main(void) DEFINE(CPU_APGAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APGAKEYLO_EL1])); DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu)); DEFINE(HOST_DATA_CONTEXT, offsetof(struct kvm_host_data, host_ctxt)); + DEFINE(NVHE_INIT_TPIDR_EL2, offsetof(struct kvm_nvhe_init_params, tpidr_el2)); + DEFINE(NVHE_INIT_STACK_HYP_VA, offsetof(struct kvm_nvhe_init_params, stack_hyp_va)); + DEFINE(NVHE_INIT_PGD_PA, offsetof(struct kvm_nvhe_init_params, pgd_pa)); #endif #ifdef CONFIG_CPU_PM DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp)); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 1f86d7d0ecf2..0b823e448917 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -52,6 +52,7 @@ DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector); static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); unsigned long kvm_arm_hyp_percpu_base[NR_CPUS]; +DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); /* The VMID used in the VTTBR */ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); @@ -1333,9 +1334,7 @@ static int kvm_map_vectors(void) static void cpu_init_hyp_mode(void) { - phys_addr_t pgd_ptr; - unsigned long hyp_stack_ptr; - unsigned long tpidr_el2; + struct kvm_nvhe_init_params *params = this_cpu_ptr_nvhe_sym(kvm_init_params); struct arm_smccc_res res; /* Switch from the HYP stub to our own HYP init vector */ @@ -1346,12 +1345,17 @@ static void cpu_init_hyp_mode(void) * kernel's mapping to the linear mapping, and store it in tpidr_el2 * so that we can use adr_l to access per-cpu variables in EL2. */ - tpidr_el2 = (unsigned long)this_cpu_ptr_nvhe_sym(__per_cpu_start) - - (unsigned long)kvm_ksym_ref(CHOOSE_NVHE_SYM(__per_cpu_start)); + params->tpidr_el2 = (unsigned long)this_cpu_ptr_nvhe_sym(__per_cpu_start) - + (unsigned long)kvm_ksym_ref(CHOOSE_NVHE_SYM(__per_cpu_start)); - pgd_ptr = kvm_mmu_get_httbr(); - hyp_stack_ptr = __this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE; - hyp_stack_ptr = kern_hyp_va(hyp_stack_ptr); + params->stack_hyp_va = kern_hyp_va(__this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE); + params->pgd_pa = kvm_mmu_get_httbr(); + + /* + * Flush the init params from the data cache because the struct will + * be read while the MMU is off. + */ + kvm_flush_dcache_to_poc(params, sizeof(*params)); /* * Call initialization code, and switch to the full blown HYP code. @@ -1360,8 +1364,7 @@ static void cpu_init_hyp_mode(void) * cpus_have_const_cap() wrapper. */ BUG_ON(!system_capabilities_finalized()); - arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__kvm_hyp_init), - pgd_ptr, tpidr_el2, hyp_stack_ptr, &res); + arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__kvm_hyp_init), virt_to_phys(params), &res); WARN_ON(res.a0 != SMCCC_RET_SUCCESS); /* diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index 931a8c38f085..e712e317337c 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -47,9 +47,7 @@ __invalid: /* * x0: SMCCC function ID - * x1: HYP pgd - * x2: per-CPU offset - * x3: HYP stack + * x1: struct kvm_nvhe_init_params PA */ __do_hyp_init: /* Check for a stub HVC call */ @@ -70,10 +68,13 @@ __do_hyp_init: mov x0, #SMCCC_RET_NOT_SUPPORTED eret -1: - /* Set tpidr_el2 for use by HYP to free a register */ - msr tpidr_el2, x2 +1: ldr x0, [x1, #NVHE_INIT_TPIDR_EL2] + msr tpidr_el2, x0 + ldr x0, [x1, #NVHE_INIT_STACK_HYP_VA] + mov sp, x0 + + ldr x1, [x1, #NVHE_INIT_PGD_PA] phys_to_ttbr x0, x1 alternative_if ARM64_HAS_CNP orr x0, x0, #TTBR_CNP_BIT @@ -133,9 +134,6 @@ alternative_else_nop_endif msr sctlr_el2, x0 isb - /* Set the stack */ - mov sp, x3 - /* Set the host vector */ ldr x0, =__kvm_hyp_host_vector kimg_hyp_va x0, x1 diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 82df7fc24760..a4f1cac714d7 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -16,6 +16,8 @@ #define DECLARE_REG(type, name, ctxt, reg) \ type name = (type)cpu_reg(ctxt, (reg)) +DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); + static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1); -- cgit v1.2.3 From d3e1086c64528ee0b955326b4c0e947cde3b6923 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:07 +0000 Subject: KVM: arm64: Init MAIR/TCR_EL2 from params struct MAIR_EL2 and TCR_EL2 are currently initialized from their _EL1 values. This will not work once KVM starts intercepting PSCI ON/SUSPEND SMCs and initializing EL2 state before EL1 state. Obtain the EL1 values during KVM init and store them in the init params struct. The struct will stay in memory and can be used when booting new cores. Take the opportunity to move copying the T0SZ value from idmap_t0sz in KVM init rather than in .hyp.idmap.text. This avoids the need for the idmap_t0sz symbol alias. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-12-dbrazdil@google.com --- arch/arm64/include/asm/kvm_asm.h | 2 ++ arch/arm64/kernel/asm-offsets.c | 2 ++ arch/arm64/kernel/image-vars.h | 3 --- arch/arm64/kvm/arm.c | 22 ++++++++++++++++++++++ arch/arm64/kvm/hyp/nvhe/hyp-init.S | 38 ++++++++------------------------------ 5 files changed, 34 insertions(+), 33 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index cb750c525aee..4e2661d1ae2b 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -151,6 +151,8 @@ extern void *__vhe_undefined_symbol; #endif struct kvm_nvhe_init_params { + unsigned long mair_el2; + unsigned long tcr_el2; unsigned long tpidr_el2; unsigned long stack_hyp_va; phys_addr_t pgd_pa; diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 52771a46f600..5e82488f1b82 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -109,6 +109,8 @@ int main(void) DEFINE(CPU_APGAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APGAKEYLO_EL1])); DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu)); DEFINE(HOST_DATA_CONTEXT, offsetof(struct kvm_host_data, host_ctxt)); + DEFINE(NVHE_INIT_MAIR_EL2, offsetof(struct kvm_nvhe_init_params, mair_el2)); + DEFINE(NVHE_INIT_TCR_EL2, offsetof(struct kvm_nvhe_init_params, tcr_el2)); DEFINE(NVHE_INIT_TPIDR_EL2, offsetof(struct kvm_nvhe_init_params, tpidr_el2)); DEFINE(NVHE_INIT_STACK_HYP_VA, offsetof(struct kvm_nvhe_init_params, stack_hyp_va)); DEFINE(NVHE_INIT_PGD_PA, offsetof(struct kvm_nvhe_init_params, pgd_pa)); diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 4b32588918d9..08e69faedf6c 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -77,9 +77,6 @@ KVM_NVHE_ALIAS(panic); /* Vectors installed by hyp-init on reset HVC. */ KVM_NVHE_ALIAS(__hyp_stub_vectors); -/* IDMAP TCR_EL1.T0SZ as computed by the EL1 init code */ -KVM_NVHE_ALIAS(idmap_t0sz); - /* Kernel symbol used by icache_is_vpipt(). */ KVM_NVHE_ALIAS(__icache_flags); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 0b823e448917..d9961f0b767e 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1336,6 +1336,7 @@ static void cpu_init_hyp_mode(void) { struct kvm_nvhe_init_params *params = this_cpu_ptr_nvhe_sym(kvm_init_params); struct arm_smccc_res res; + unsigned long tcr; /* Switch from the HYP stub to our own HYP init vector */ __hyp_set_vectors(kvm_get_idmap_vector()); @@ -1348,6 +1349,27 @@ static void cpu_init_hyp_mode(void) params->tpidr_el2 = (unsigned long)this_cpu_ptr_nvhe_sym(__per_cpu_start) - (unsigned long)kvm_ksym_ref(CHOOSE_NVHE_SYM(__per_cpu_start)); + params->mair_el2 = read_sysreg(mair_el1); + + /* + * The ID map may be configured to use an extended virtual address + * range. This is only the case if system RAM is out of range for the + * currently configured page size and VA_BITS, in which case we will + * also need the extended virtual range for the HYP ID map, or we won't + * be able to enable the EL2 MMU. + * + * However, at EL2, there is only one TTBR register, and we can't switch + * between translation tables *and* update TCR_EL2.T0SZ at the same + * time. Bottom line: we need to use the extended range with *both* our + * translation tables. + * + * So use the same T0SZ value we use for the ID map. + */ + tcr = (read_sysreg(tcr_el1) & TCR_EL2_MASK) | TCR_EL2_RES1; + tcr &= ~TCR_T0SZ_MASK; + tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET; + params->tcr_el2 = tcr; + params->stack_hyp_va = kern_hyp_va(__this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE); params->pgd_pa = kvm_mmu_get_httbr(); diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index e712e317337c..712f57289357 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -71,48 +71,26 @@ __do_hyp_init: 1: ldr x0, [x1, #NVHE_INIT_TPIDR_EL2] msr tpidr_el2, x0 + ldr x0, [x1, #NVHE_INIT_MAIR_EL2] + msr mair_el2, x0 + ldr x0, [x1, #NVHE_INIT_STACK_HYP_VA] mov sp, x0 - ldr x1, [x1, #NVHE_INIT_PGD_PA] - phys_to_ttbr x0, x1 + ldr x0, [x1, #NVHE_INIT_PGD_PA] + phys_to_ttbr x2, x0 alternative_if ARM64_HAS_CNP - orr x0, x0, #TTBR_CNP_BIT + orr x2, x2, #TTBR_CNP_BIT alternative_else_nop_endif - msr ttbr0_el2, x0 - - mrs x0, tcr_el1 - mov_q x1, TCR_EL2_MASK - and x0, x0, x1 - mov x1, #TCR_EL2_RES1 - orr x0, x0, x1 - - /* - * The ID map may be configured to use an extended virtual address - * range. This is only the case if system RAM is out of range for the - * currently configured page size and VA_BITS, in which case we will - * also need the extended virtual range for the HYP ID map, or we won't - * be able to enable the EL2 MMU. - * - * However, at EL2, there is only one TTBR register, and we can't switch - * between translation tables *and* update TCR_EL2.T0SZ at the same - * time. Bottom line: we need to use the extended range with *both* our - * translation tables. - * - * So use the same T0SZ value we use for the ID map. - */ - ldr_l x1, idmap_t0sz - bfi x0, x1, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH + msr ttbr0_el2, x2 /* * Set the PS bits in TCR_EL2. */ + ldr x0, [x1, #NVHE_INIT_TCR_EL2] tcr_compute_pa_size x0, #TCR_EL2_PS_SHIFT, x1, x2 - msr tcr_el2, x0 - mrs x0, mair_el1 - msr mair_el2, x0 isb /* Invalidate the stale TLBs from Bootloader */ -- cgit v1.2.3 From 2d7bf218ca739554bf7277ab0dbfa5399d01f7c6 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:08 +0000 Subject: KVM: arm64: Add .hyp.data..ro_after_init ELF section Add rules for renaming the .data..ro_after_init ELF section in KVM nVHE object files to .hyp.data..ro_after_init, linking it into the kernel and mapping it in hyp at runtime. The section is RW to the host, then mapped RO in hyp. The expectation is that the host populates the variables in the section and they are never changed by hyp afterwards. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-13-dbrazdil@google.com --- arch/arm64/include/asm/sections.h | 1 + arch/arm64/kernel/vmlinux.lds.S | 10 ++++++++++ arch/arm64/kvm/arm.c | 8 ++++++++ arch/arm64/kvm/hyp/nvhe/hyp.lds.S | 1 + 4 files changed, 20 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h index 3994169985ef..8ff579361731 100644 --- a/arch/arm64/include/asm/sections.h +++ b/arch/arm64/include/asm/sections.h @@ -11,6 +11,7 @@ extern char __alt_instructions[], __alt_instructions_end[]; extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[]; extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; extern char __hyp_text_start[], __hyp_text_end[]; +extern char __hyp_data_ro_after_init_start[], __hyp_data_ro_after_init_end[]; extern char __idmap_text_start[], __idmap_text_end[]; extern char __initdata_begin[], __initdata_end[]; extern char __inittext_begin[], __inittext_end[]; diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index d6cdcf4aa6a5..43af13968dfd 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -30,6 +30,13 @@ jiffies = jiffies_64; *(__kvm_ex_table) \ __stop___kvm_ex_table = .; +#define HYPERVISOR_DATA_SECTIONS \ + HYP_SECTION_NAME(.data..ro_after_init) : { \ + __hyp_data_ro_after_init_start = .; \ + *(HYP_SECTION_NAME(.data..ro_after_init)) \ + __hyp_data_ro_after_init_end = .; \ + } + #define HYPERVISOR_PERCPU_SECTION \ . = ALIGN(PAGE_SIZE); \ HYP_SECTION_NAME(.data..percpu) : { \ @@ -37,6 +44,7 @@ jiffies = jiffies_64; } #else /* CONFIG_KVM */ #define HYPERVISOR_EXTABLE +#define HYPERVISOR_DATA_SECTIONS #define HYPERVISOR_PERCPU_SECTION #endif @@ -234,6 +242,8 @@ SECTIONS _sdata = .; RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN) + HYPERVISOR_DATA_SECTIONS + /* * Data written with the MMU off but read with the MMU on requires * cache lines to be invalidated, discarding up to a Cache Writeback diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index d9961f0b767e..ef519e482616 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1627,6 +1627,14 @@ static int init_hyp_mode(void) goto out_err; } + err = create_hyp_mappings(kvm_ksym_ref(__hyp_data_ro_after_init_start), + kvm_ksym_ref(__hyp_data_ro_after_init_end), + PAGE_HYP_RO); + if (err) { + kvm_err("Cannot map .hyp.data..ro_after_init section\n"); + goto out_err; + } + err = create_hyp_mappings(kvm_ksym_ref(__start_rodata), kvm_ksym_ref(__end_rodata), PAGE_HYP_RO); if (err) { diff --git a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S index bb2d986ff696..5d76ff2ba63e 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S @@ -16,4 +16,5 @@ SECTIONS { HYP_SECTION_NAME(.data..percpu) : { PERCPU_INPUT(L1_CACHE_BYTES) } + HYP_SECTION(.data..ro_after_init) } -- cgit v1.2.3 From 687413d34d4aa72103de3e545f431f480dd21d7f Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:09 +0000 Subject: KVM: arm64: Support per_cpu_ptr in nVHE hyp code When compiling with __KVM_NVHE_HYPERVISOR__, redefine per_cpu_offset() to __hyp_per_cpu_offset() which looks up the base of the nVHE per-CPU region of the given cpu and computes its offset from the .hyp.data..percpu section. This enables use of per_cpu_ptr() helpers in nVHE hyp code. Until now only this_cpu_ptr() was supported by setting TPIDR_EL2. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-14-dbrazdil@google.com --- arch/arm64/include/asm/percpu.h | 6 ++++++ arch/arm64/kernel/image-vars.h | 3 +++ arch/arm64/kvm/hyp/nvhe/Makefile | 3 ++- arch/arm64/kvm/hyp/nvhe/hyp-smp.c | 24 ++++++++++++++++++++++++ 4 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/kvm/hyp/nvhe/hyp-smp.c (limited to 'arch') diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 1599e17379d8..8f1661603b78 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -239,6 +239,12 @@ PERCPU_RET_OP(add, add, ldadd) #define this_cpu_cmpxchg_8(pcp, o, n) \ _pcp_protect_return(cmpxchg_relaxed, pcp, o, n) +#ifdef __KVM_NVHE_HYPERVISOR__ +extern unsigned long __hyp_per_cpu_offset(unsigned int cpu); +#define __per_cpu_offset +#define per_cpu_offset(cpu) __hyp_per_cpu_offset((cpu)) +#endif + #include /* Redefine macros for nVHE hyp under DEBUG_PREEMPT to avoid its dependencies. */ diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 08e69faedf6c..39289d75118d 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -99,6 +99,9 @@ KVM_NVHE_ALIAS(gic_nonsecure_priorities); KVM_NVHE_ALIAS(__start___kvm_ex_table); KVM_NVHE_ALIAS(__stop___kvm_ex_table); +/* Array containing bases of nVHE per-CPU memory regions. */ +KVM_NVHE_ALIAS(kvm_arm_hyp_percpu_base); + #endif /* CONFIG_KVM */ #endif /* __ARM64_KERNEL_IMAGE_VARS_H */ diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index ddde15fe85f2..2d842e009a40 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -6,7 +6,8 @@ asflags-y := -D__KVM_NVHE_HYPERVISOR__ ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o hyp-main.o +obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \ + hyp-main.o hyp-smp.o obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ ../fpsimd.o ../hyp-entry.o diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c new file mode 100644 index 000000000000..7b0363b4857f --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 - Google LLC + * Author: David Brazdil + */ + +#include +#include +#include + +unsigned long __hyp_per_cpu_offset(unsigned int cpu) +{ + unsigned long *cpu_base_array; + unsigned long this_cpu_base; + unsigned long elf_base; + + if (cpu >= ARRAY_SIZE(kvm_arm_hyp_percpu_base)) + hyp_panic(); + + cpu_base_array = (unsigned long *)hyp_symbol_addr(kvm_arm_hyp_percpu_base); + this_cpu_base = kern_hyp_va(cpu_base_array[cpu]); + elf_base = (unsigned long)hyp_symbol_addr(__per_cpu_start); + return this_cpu_base - elf_base; +} -- cgit v1.2.3 From 94f5e8a4642aedb19ca73f534372d7ed65e1c84e Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:10 +0000 Subject: KVM: arm64: Create nVHE copy of cpu_logical_map When KVM starts validating host's PSCI requests, it will need to map MPIDR back to the CPU ID. To this end, copy cpu_logical_map into nVHE hyp memory when KVM is initialized. Only copy the information for CPUs that are online at the point of KVM initialization so that KVM rejects CPUs whose features were not checked against the finalized capabilities. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-15-dbrazdil@google.com --- arch/arm64/kvm/arm.c | 19 +++++++++++++++++++ arch/arm64/kvm/hyp/nvhe/hyp-smp.c | 16 ++++++++++++++++ 2 files changed, 35 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index ef519e482616..bcd6e5d3c89a 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -64,6 +64,8 @@ static bool vgic_present; static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled); DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use); +extern u64 kvm_nvhe_sym(__cpu_logical_map)[NR_CPUS]; + int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; @@ -1506,6 +1508,20 @@ static inline void hyp_cpu_pm_exit(void) } #endif +static void init_cpu_logical_map(void) +{ + unsigned int cpu; + + /* + * Copy the MPIDR <-> logical CPU ID mapping to hyp. + * Only copy the set of online CPUs whose features have been chacked + * against the finalized system capabilities. The hypervisor will not + * allow any other CPUs from the `possible` set to boot. + */ + for_each_online_cpu(cpu) + kvm_nvhe_sym(__cpu_logical_map)[cpu] = cpu_logical_map(cpu); +} + static int init_common_resources(void) { return kvm_set_ipa_limit(); @@ -1684,6 +1700,9 @@ static int init_hyp_mode(void) } } + if (is_protected_kvm_enabled()) + init_cpu_logical_map(); + return 0; out_err: diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c index 7b0363b4857f..cbab0c6246e2 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c @@ -8,6 +8,22 @@ #include #include +/* + * nVHE copy of data structures tracking available CPU cores. + * Only entries for CPUs that were online at KVM init are populated. + * Other CPUs should not be allowed to boot because their features were + * not checked against the finalized system capabilities. + */ +u64 __ro_after_init __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID }; + +u64 cpu_logical_map(unsigned int cpu) +{ + if (cpu >= ARRAY_SIZE(__cpu_logical_map)) + hyp_panic(); + + return __cpu_logical_map[cpu]; +} + unsigned long __hyp_per_cpu_offset(unsigned int cpu) { unsigned long *cpu_base_array; -- cgit v1.2.3 From a805e1fb30990e29b3174c39bf39015065e5dc19 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:11 +0000 Subject: KVM: arm64: Add SMC handler in nVHE EL2 Add handler of host SMCs in KVM nVHE trap handler. Forward all SMCs to EL3 and propagate the result back to EL1. This is done in preparation for validating host SMCs in KVM protected mode. The implementation assumes that firmware uses SMCCC v1.2 or older. That means x0-x17 can be used both for arguments and results, other GPRs are preserved. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-16-dbrazdil@google.com --- arch/arm64/kvm/hyp/nvhe/host.S | 38 ++++++++++++++++++++++++++++++++++++++ arch/arm64/kvm/hyp/nvhe/hyp-main.c | 35 ++++++++++++++++++++++++++++++++--- 2 files changed, 70 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index fe2740b224cf..2b56f0bdf874 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -180,3 +180,41 @@ SYM_CODE_START(__kvm_hyp_host_vector) invalid_host_el1_vect // FIQ 32-bit EL1 invalid_host_el1_vect // Error 32-bit EL1 SYM_CODE_END(__kvm_hyp_host_vector) + +/* + * Forward SMC with arguments in struct kvm_cpu_context, and + * store the result into the same struct. Assumes SMCCC 1.2 or older. + * + * x0: struct kvm_cpu_context* + */ +SYM_CODE_START(__kvm_hyp_host_forward_smc) + /* + * Use x18 to keep the pointer to the host context because + * x18 is callee-saved in SMCCC but not in AAPCS64. + */ + mov x18, x0 + + ldp x0, x1, [x18, #CPU_XREG_OFFSET(0)] + ldp x2, x3, [x18, #CPU_XREG_OFFSET(2)] + ldp x4, x5, [x18, #CPU_XREG_OFFSET(4)] + ldp x6, x7, [x18, #CPU_XREG_OFFSET(6)] + ldp x8, x9, [x18, #CPU_XREG_OFFSET(8)] + ldp x10, x11, [x18, #CPU_XREG_OFFSET(10)] + ldp x12, x13, [x18, #CPU_XREG_OFFSET(12)] + ldp x14, x15, [x18, #CPU_XREG_OFFSET(14)] + ldp x16, x17, [x18, #CPU_XREG_OFFSET(16)] + + smc #0 + + stp x0, x1, [x18, #CPU_XREG_OFFSET(0)] + stp x2, x3, [x18, #CPU_XREG_OFFSET(2)] + stp x4, x5, [x18, #CPU_XREG_OFFSET(4)] + stp x6, x7, [x18, #CPU_XREG_OFFSET(6)] + stp x8, x9, [x18, #CPU_XREG_OFFSET(8)] + stp x10, x11, [x18, #CPU_XREG_OFFSET(10)] + stp x12, x13, [x18, #CPU_XREG_OFFSET(12)] + stp x14, x15, [x18, #CPU_XREG_OFFSET(14)] + stp x16, x17, [x18, #CPU_XREG_OFFSET(16)] + + ret +SYM_CODE_END(__kvm_hyp_host_forward_smc) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index a4f1cac714d7..f25680ede080 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -18,6 +18,8 @@ DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); +void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt); + static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1); @@ -152,12 +154,39 @@ inval: cpu_reg(host_ctxt, 0) = SMCCC_RET_NOT_SUPPORTED; } +static void default_host_smc_handler(struct kvm_cpu_context *host_ctxt) +{ + __kvm_hyp_host_forward_smc(host_ctxt); +} + +static void skip_host_instruction(void) +{ + write_sysreg_el2(read_sysreg_el2(SYS_ELR) + 4, SYS_ELR); +} + +static void handle_host_smc(struct kvm_cpu_context *host_ctxt) +{ + default_host_smc_handler(host_ctxt); + + /* + * Unlike HVC, the return address of an SMC is the instruction's PC. + * Move the return address past the instruction. + */ + skip_host_instruction(); +} + void handle_trap(struct kvm_cpu_context *host_ctxt) { u64 esr = read_sysreg_el2(SYS_ESR); - if (unlikely(ESR_ELx_EC(esr) != ESR_ELx_EC_HVC64)) + switch (ESR_ELx_EC(esr)) { + case ESR_ELx_EC_HVC64: + handle_host_hcall(host_ctxt); + break; + case ESR_ELx_EC_SMC64: + handle_host_smc(host_ctxt); + break; + default: hyp_panic(); - - handle_host_hcall(host_ctxt); + } } -- cgit v1.2.3 From eeeee7193df015074c8302381356e8e617a5e2b0 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:12 +0000 Subject: KVM: arm64: Bootstrap PSCI SMC handler in nVHE EL2 Add a handler of PSCI SMCs in nVHE hyp code. The handler is initialized with the version used by the host's PSCI driver and the function IDs it was configured with. If the SMC function ID matches one of the configured PSCI calls (for v0.1) or falls into the PSCI function ID range (for v0.2+), the SMC is handled by the PSCI handler. For now, all SMCs return PSCI_RET_NOT_SUPPORTED. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-17-dbrazdil@google.com --- arch/arm64/include/asm/kvm_hyp.h | 2 + arch/arm64/kvm/arm.c | 25 ++++++- arch/arm64/kvm/hyp/include/nvhe/trap_handler.h | 18 +++++ arch/arm64/kvm/hyp/nvhe/Makefile | 2 +- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 10 ++- arch/arm64/kvm/hyp/nvhe/psci-relay.c | 100 +++++++++++++++++++++++++ 6 files changed, 151 insertions(+), 6 deletions(-) create mode 100644 arch/arm64/kvm/hyp/include/nvhe/trap_handler.h create mode 100644 arch/arm64/kvm/hyp/nvhe/psci-relay.c (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index cb25c15e3d8d..c0450828378b 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -93,6 +93,8 @@ void deactivate_traps_vhe_put(void); u64 __guest_enter(struct kvm_vcpu *vcpu); +bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt); + void __noreturn hyp_panic(void); #ifdef __KVM_NVHE_HYPERVISOR__ void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index bcd6e5d3c89a..aa40bef09dfc 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #define CREATE_TRACE_POINTS @@ -65,6 +66,8 @@ static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled); DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use); extern u64 kvm_nvhe_sym(__cpu_logical_map)[NR_CPUS]; +extern u32 kvm_nvhe_sym(kvm_host_psci_version); +extern struct psci_0_1_function_ids kvm_nvhe_sym(kvm_host_psci_0_1_function_ids); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { @@ -1522,6 +1525,22 @@ static void init_cpu_logical_map(void) kvm_nvhe_sym(__cpu_logical_map)[cpu] = cpu_logical_map(cpu); } +static bool init_psci_relay(void) +{ + /* + * If PSCI has not been initialized, protected KVM cannot install + * itself on newly booted CPUs. + */ + if (!psci_ops.get_version) { + kvm_err("Cannot initialize protected mode without PSCI\n"); + return false; + } + + kvm_nvhe_sym(kvm_host_psci_version) = psci_ops.get_version(); + kvm_nvhe_sym(kvm_host_psci_0_1_function_ids) = get_psci_0_1_function_ids(); + return true; +} + static int init_common_resources(void) { return kvm_set_ipa_limit(); @@ -1700,9 +1719,13 @@ static int init_hyp_mode(void) } } - if (is_protected_kvm_enabled()) + if (is_protected_kvm_enabled()) { init_cpu_logical_map(); + if (!init_psci_relay()) + goto out_err; + } + return 0; out_err: diff --git a/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h b/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h new file mode 100644 index 000000000000..1e6d995968a1 --- /dev/null +++ b/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Trap handler helpers. + * + * Copyright (C) 2020 - Google LLC + * Author: Marc Zyngier + */ + +#ifndef __ARM64_KVM_NVHE_TRAP_HANDLER_H__ +#define __ARM64_KVM_NVHE_TRAP_HANDLER_H__ + +#include + +#define cpu_reg(ctxt, r) (ctxt)->regs.regs[r] +#define DECLARE_REG(type, name, ctxt, reg) \ + type name = (type)cpu_reg(ctxt, (reg)) + +#endif /* __ARM64_KVM_NVHE_TRAP_HANDLER_H__ */ diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index 2d842e009a40..bf62c8e42ab2 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -7,7 +7,7 @@ asflags-y := -D__KVM_NVHE_HYPERVISOR__ ccflags-y := -D__KVM_NVHE_HYPERVISOR__ obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \ - hyp-main.o hyp-smp.o + hyp-main.o hyp-smp.o psci-relay.o obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ ../fpsimd.o ../hyp-entry.o diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index f25680ede080..bde658d51404 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -12,9 +12,7 @@ #include #include -#define cpu_reg(ctxt, r) (ctxt)->regs.regs[r] -#define DECLARE_REG(type, name, ctxt, reg) \ - type name = (type)cpu_reg(ctxt, (reg)) +#include DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); @@ -166,7 +164,11 @@ static void skip_host_instruction(void) static void handle_host_smc(struct kvm_cpu_context *host_ctxt) { - default_host_smc_handler(host_ctxt); + bool handled; + + handled = kvm_host_psci_handler(host_ctxt); + if (!handled) + default_host_smc_handler(host_ctxt); /* * Unlike HVC, the return address of an SMC is the instruction's PC. diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c new file mode 100644 index 000000000000..61375d4571c2 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 - Google LLC + * Author: David Brazdil + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* Config options set by the host. */ +__ro_after_init u32 kvm_host_psci_version; +__ro_after_init struct psci_0_1_function_ids kvm_host_psci_0_1_function_ids; + +static u64 get_psci_func_id(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(u64, func_id, host_ctxt, 0); + + return func_id; +} + +static bool is_psci_0_1_call(u64 func_id) +{ + return (func_id == kvm_host_psci_0_1_function_ids.cpu_suspend) || + (func_id == kvm_host_psci_0_1_function_ids.cpu_on) || + (func_id == kvm_host_psci_0_1_function_ids.cpu_off) || + (func_id == kvm_host_psci_0_1_function_ids.migrate); +} + +static bool is_psci_0_2_call(u64 func_id) +{ + /* SMCCC reserves IDs 0x00-1F with the given 32/64-bit base for PSCI. */ + return (PSCI_0_2_FN(0) <= func_id && func_id <= PSCI_0_2_FN(31)) || + (PSCI_0_2_FN64(0) <= func_id && func_id <= PSCI_0_2_FN64(31)); +} + +static bool is_psci_call(u64 func_id) +{ + switch (kvm_host_psci_version) { + case PSCI_VERSION(0, 1): + return is_psci_0_1_call(func_id); + default: + return is_psci_0_2_call(func_id); + } +} + +static unsigned long psci_0_1_handler(u64 func_id, struct kvm_cpu_context *host_ctxt) +{ + return PSCI_RET_NOT_SUPPORTED; +} + +static unsigned long psci_0_2_handler(u64 func_id, struct kvm_cpu_context *host_ctxt) +{ + switch (func_id) { + default: + return PSCI_RET_NOT_SUPPORTED; + } +} + +static unsigned long psci_1_0_handler(u64 func_id, struct kvm_cpu_context *host_ctxt) +{ + switch (func_id) { + default: + return psci_0_2_handler(func_id, host_ctxt); + } +} + +bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt) +{ + u64 func_id = get_psci_func_id(host_ctxt); + unsigned long ret; + + if (!is_psci_call(func_id)) + return false; + + switch (kvm_host_psci_version) { + case PSCI_VERSION(0, 1): + ret = psci_0_1_handler(func_id, host_ctxt); + break; + case PSCI_VERSION(0, 2): + ret = psci_0_2_handler(func_id, host_ctxt); + break; + default: + ret = psci_1_0_handler(func_id, host_ctxt); + break; + } + + cpu_reg(host_ctxt, 0) = ret; + cpu_reg(host_ctxt, 1) = 0; + cpu_reg(host_ctxt, 2) = 0; + cpu_reg(host_ctxt, 3) = 0; + return true; +} -- cgit v1.2.3 From d084ecc5c72811e7231838f7c128bfcc7f8d2889 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:13 +0000 Subject: KVM: arm64: Add offset for hyp VA <-> PA conversion Add a host-initialized constant to KVM nVHE hyp code for converting between EL2 linear map virtual addresses and physical addresses. Also add `__hyp_pa` macro that performs the conversion. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-18-dbrazdil@google.com --- arch/arm64/kvm/hyp/nvhe/psci-relay.c | 3 +++ arch/arm64/kvm/va_layout.c | 30 +++++++++++++++++++++++++++--- 2 files changed, 30 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index 61375d4571c2..70b42f433449 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -18,6 +18,9 @@ /* Config options set by the host. */ __ro_after_init u32 kvm_host_psci_version; __ro_after_init struct psci_0_1_function_ids kvm_host_psci_0_1_function_ids; +__ro_after_init s64 hyp_physvirt_offset; + +#define __hyp_pa(x) ((phys_addr_t)((x)) + hyp_physvirt_offset) static u64 get_psci_func_id(struct kvm_cpu_context *host_ctxt) { diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index d61117805de0..16aa8615c279 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -23,6 +23,30 @@ static u8 tag_lsb; static u64 tag_val; static u64 va_mask; +/* + * Compute HYP VA by using the same computation as kern_hyp_va(). + */ +static u64 __early_kern_hyp_va(u64 addr) +{ + addr &= va_mask; + addr |= tag_val << tag_lsb; + return addr; +} + +/* + * Store a hyp VA <-> PA offset into a hyp-owned variable. + */ +static void init_hyp_physvirt_offset(void) +{ + extern s64 kvm_nvhe_sym(hyp_physvirt_offset); + u64 kern_va, hyp_va; + + /* Compute the offset from the hyp VA and PA of a random symbol. */ + kern_va = (u64)kvm_ksym_ref(__hyp_text_start); + hyp_va = __early_kern_hyp_va(kern_va); + CHOOSE_NVHE_SYM(hyp_physvirt_offset) = (s64)__pa(kern_va) - (s64)hyp_va; +} + /* * We want to generate a hyp VA with the following format (with V == * vabits_actual): @@ -54,6 +78,8 @@ __init void kvm_compute_layout(void) tag_val |= get_random_long() & GENMASK_ULL(vabits_actual - 2, tag_lsb); } tag_val >>= tag_lsb; + + init_hyp_physvirt_offset(); } static u32 compute_instruction(int n, u32 rd, u32 rn) @@ -151,9 +177,7 @@ void kvm_patch_vector_branch(struct alt_instr *alt, /* * Compute HYP VA by using the same computation as kern_hyp_va() */ - addr = (uintptr_t)kvm_ksym_ref(__kvm_hyp_vector); - addr &= va_mask; - addr |= tag_val << tag_lsb; + addr = __early_kern_hyp_va((u64)kvm_ksym_ref(__kvm_hyp_vector)); /* Use PC[10:7] to branch to the same vector in KVM */ addr |= ((u64)origptr & GENMASK_ULL(10, 7)); -- cgit v1.2.3 From 1fd12b7e4d0082a9f373e26ab11fc94bcc307d33 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:14 +0000 Subject: KVM: arm64: Forward safe PSCI SMCs coming from host Forward the following PSCI SMCs issued by host to EL3 as they do not require the hypervisor's intervention. This assumes that EL3 correctly implements the PSCI specification. Only function IDs implemented in Linux are included. Where both 32-bit and 64-bit variants exist, it is assumed that the host will always use the 64-bit variant. * SMCs that only return information about the system * PSCI_VERSION - PSCI version implemented by EL3 * PSCI_FEATURES - optional features supported by EL3 * AFFINITY_INFO - power state of core/cluster * MIGRATE_INFO_TYPE - whether Trusted OS can be migrated * MIGRATE_INFO_UP_CPU - resident core of Trusted OS * operations which do not affect the hypervisor * MIGRATE - migrate Trusted OS to a different core * SET_SUSPEND_MODE - toggle OS-initiated mode * system shutdown/reset * SYSTEM_OFF * SYSTEM_RESET * SYSTEM_RESET2 Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-19-dbrazdil@google.com --- arch/arm64/kvm/hyp/nvhe/psci-relay.c | 42 +++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index 70b42f433449..5ad56a875ffa 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -54,14 +54,50 @@ static bool is_psci_call(u64 func_id) } } +static unsigned long psci_call(unsigned long fn, unsigned long arg0, + unsigned long arg1, unsigned long arg2) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(fn, arg0, arg1, arg2, &res); + return res.a0; +} + +static unsigned long psci_forward(struct kvm_cpu_context *host_ctxt) +{ + return psci_call(cpu_reg(host_ctxt, 0), cpu_reg(host_ctxt, 1), + cpu_reg(host_ctxt, 2), cpu_reg(host_ctxt, 3)); +} + +static __noreturn unsigned long psci_forward_noreturn(struct kvm_cpu_context *host_ctxt) +{ + psci_forward(host_ctxt); + hyp_panic(); /* unreachable */ +} + static unsigned long psci_0_1_handler(u64 func_id, struct kvm_cpu_context *host_ctxt) { - return PSCI_RET_NOT_SUPPORTED; + if ((func_id == kvm_host_psci_0_1_function_ids.cpu_off) || + (func_id == kvm_host_psci_0_1_function_ids.migrate)) + return psci_forward(host_ctxt); + else + return PSCI_RET_NOT_SUPPORTED; } static unsigned long psci_0_2_handler(u64 func_id, struct kvm_cpu_context *host_ctxt) { switch (func_id) { + case PSCI_0_2_FN_PSCI_VERSION: + case PSCI_0_2_FN_CPU_OFF: + case PSCI_0_2_FN64_AFFINITY_INFO: + case PSCI_0_2_FN64_MIGRATE: + case PSCI_0_2_FN_MIGRATE_INFO_TYPE: + case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU: + return psci_forward(host_ctxt); + case PSCI_0_2_FN_SYSTEM_OFF: + case PSCI_0_2_FN_SYSTEM_RESET: + psci_forward_noreturn(host_ctxt); + unreachable(); default: return PSCI_RET_NOT_SUPPORTED; } @@ -70,6 +106,10 @@ static unsigned long psci_0_2_handler(u64 func_id, struct kvm_cpu_context *host_ static unsigned long psci_1_0_handler(u64 func_id, struct kvm_cpu_context *host_ctxt) { switch (func_id) { + case PSCI_1_0_FN_PSCI_FEATURES: + case PSCI_1_0_FN_SET_SUSPEND_MODE: + case PSCI_1_1_FN64_SYSTEM_RESET2: + return psci_forward(host_ctxt); default: return psci_0_2_handler(func_id, host_ctxt); } -- cgit v1.2.3 From f74e1e2128b7681f0d9c2a66dc4480e7d7196b49 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:15 +0000 Subject: KVM: arm64: Extract __do_hyp_init into a helper function In preparation for adding a CPU entry point in nVHE hyp code, extract most of __do_hyp_init hypervisor initialization code into a common helper function. This will be invoked by the entry point to install KVM on the newly booted CPU. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-20-dbrazdil@google.com --- arch/arm64/kvm/hyp/nvhe/hyp-init.S | 47 ++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index 712f57289357..b0856b006bc0 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -68,17 +68,36 @@ __do_hyp_init: mov x0, #SMCCC_RET_NOT_SUPPORTED eret -1: ldr x0, [x1, #NVHE_INIT_TPIDR_EL2] - msr tpidr_el2, x0 +1: mov x0, x1 + mov x4, lr + bl ___kvm_hyp_init + mov lr, x4 - ldr x0, [x1, #NVHE_INIT_MAIR_EL2] - msr mair_el2, x0 + /* Hello, World! */ + mov x0, #SMCCC_RET_SUCCESS + eret +SYM_CODE_END(__kvm_hyp_init) + +/* + * Initialize the hypervisor in EL2. + * + * Only uses x0..x3 so as to not clobber callee-saved SMCCC registers + * and leave x4 for the caller. + * + * x0: struct kvm_nvhe_init_params PA + */ +SYM_CODE_START_LOCAL(___kvm_hyp_init) + ldr x1, [x0, #NVHE_INIT_TPIDR_EL2] + msr tpidr_el2, x1 + + ldr x1, [x0, #NVHE_INIT_STACK_HYP_VA] + mov sp, x1 - ldr x0, [x1, #NVHE_INIT_STACK_HYP_VA] - mov sp, x0 + ldr x1, [x0, #NVHE_INIT_MAIR_EL2] + msr mair_el2, x1 - ldr x0, [x1, #NVHE_INIT_PGD_PA] - phys_to_ttbr x2, x0 + ldr x1, [x0, #NVHE_INIT_PGD_PA] + phys_to_ttbr x2, x1 alternative_if ARM64_HAS_CNP orr x2, x2, #TTBR_CNP_BIT alternative_else_nop_endif @@ -87,9 +106,9 @@ alternative_else_nop_endif /* * Set the PS bits in TCR_EL2. */ - ldr x0, [x1, #NVHE_INIT_TCR_EL2] - tcr_compute_pa_size x0, #TCR_EL2_PS_SHIFT, x1, x2 - msr tcr_el2, x0 + ldr x1, [x0, #NVHE_INIT_TCR_EL2] + tcr_compute_pa_size x1, #TCR_EL2_PS_SHIFT, x2, x3 + msr tcr_el2, x1 isb @@ -117,10 +136,8 @@ alternative_else_nop_endif kimg_hyp_va x0, x1 msr vbar_el2, x0 - /* Hello, World! */ - mov x0, #SMCCC_RET_SUCCESS - eret -SYM_CODE_END(__kvm_hyp_init) + ret +SYM_CODE_END(___kvm_hyp_init) SYM_CODE_START(__kvm_handle_stub_hvc) cmp x0, #HVC_SOFT_RESTART -- cgit v1.2.3 From 04e05f057a04275cb68c8053b29c5642ae0bad4f Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:16 +0000 Subject: KVM: arm64: Add function to enter host from KVM nVHE hyp code All nVHE hyp code is currently executed as handlers of host's HVCs. This will change as nVHE starts intercepting host's PSCI CPU_ON SMCs. The newly booted CPU will need to initialize EL2 state and then enter the host. Add __host_enter function that branches into the existing host state-restoring code after the trap handler would have returned. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-21-dbrazdil@google.com --- arch/arm64/kvm/hyp/nvhe/host.S | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index 2b56f0bdf874..a820dfdc9c25 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -39,6 +39,7 @@ SYM_FUNC_START(__host_exit) bl handle_trap /* Restore host regs x0-x17 */ +__host_enter_restore_full: ldp x0, x1, [x29, #CPU_XREG_OFFSET(0)] ldp x2, x3, [x29, #CPU_XREG_OFFSET(2)] ldp x4, x5, [x29, #CPU_XREG_OFFSET(4)] @@ -61,6 +62,14 @@ __host_enter_without_restoring: sb SYM_FUNC_END(__host_exit) +/* + * void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt); + */ +SYM_FUNC_START(__host_enter) + mov x29, x0 + b __host_enter_restore_full +SYM_FUNC_END(__host_enter) + /* * void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par); */ -- cgit v1.2.3 From cdf367192766ad11a03e8d5098556be43b8eb6b0 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:17 +0000 Subject: KVM: arm64: Intercept host's CPU_ON SMCs Add a handler of the CPU_ON PSCI call from host. When invoked, it looks up the logical CPU ID corresponding to the provided MPIDR and populates the state struct of the target CPU with the provided x0, pc. It then calls CPU_ON itself, with an entry point in hyp that initializes EL2 state before returning ERET to the provided PC in EL1. There is a simple atomic lock around the boot args struct. If it is already locked, CPU_ON will return PENDING_ON error code. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-22-dbrazdil@google.com --- arch/arm64/kvm/hyp/nvhe/hyp-init.S | 48 +++++++++++++++ arch/arm64/kvm/hyp/nvhe/psci-relay.c | 115 +++++++++++++++++++++++++++++++++++ 2 files changed, 163 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index b0856b006bc0..d07e75f8242e 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -139,6 +140,53 @@ alternative_else_nop_endif ret SYM_CODE_END(___kvm_hyp_init) +/* + * PSCI CPU_ON entry point + * + * x0: struct kvm_nvhe_init_params PA + */ +SYM_CODE_START(kvm_hyp_cpu_entry) + mov x1, #1 // is_cpu_on = true + b __kvm_hyp_init_cpu +SYM_CODE_END(kvm_hyp_cpu_entry) + +/* + * Common code for CPU entry points. Initializes EL2 state and + * installs the hypervisor before handing over to a C handler. + * + * x0: struct kvm_nvhe_init_params PA + * x1: bool is_cpu_on + */ +SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu) + mov x28, x0 // Stash arguments + mov x29, x1 + + /* Check that the core was booted in EL2. */ + mrs x0, CurrentEL + cmp x0, #CurrentEL_EL2 + b.eq 2f + + /* The core booted in EL1. KVM cannot be initialized on it. */ +1: wfe + wfi + b 1b + +2: msr SPsel, #1 // We want to use SP_EL{1,2} + + /* Initialize EL2 CPU state to sane values. */ + init_el2_state nvhe // Clobbers x0..x2 + + /* Enable MMU, set vectors and stack. */ + mov x0, x28 + bl ___kvm_hyp_init // Clobbers x0..x3 + + /* Leave idmap. */ + mov x0, x29 + ldr x1, =kvm_host_psci_cpu_entry + kimg_hyp_va x1, x2 + br x1 +SYM_CODE_END(__kvm_hyp_init_cpu) + SYM_CODE_START(__kvm_handle_stub_hvc) cmp x0, #HVC_SOFT_RESTART b.ne 1f diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index 5ad56a875ffa..637e22ed71fc 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -9,12 +9,17 @@ #include #include #include +#include #include #include #include #include +void kvm_hyp_cpu_entry(unsigned long r0); + +void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt); + /* Config options set by the host. */ __ro_after_init u32 kvm_host_psci_version; __ro_after_init struct psci_0_1_function_ids kvm_host_psci_0_1_function_ids; @@ -22,6 +27,24 @@ __ro_after_init s64 hyp_physvirt_offset; #define __hyp_pa(x) ((phys_addr_t)((x)) + hyp_physvirt_offset) +#define INVALID_CPU_ID UINT_MAX + +struct psci_boot_args { + atomic_t lock; + unsigned long pc; + unsigned long r0; +}; + +#define PSCI_BOOT_ARGS_UNLOCKED 0 +#define PSCI_BOOT_ARGS_LOCKED 1 + +#define PSCI_BOOT_ARGS_INIT \ + ((struct psci_boot_args){ \ + .lock = ATOMIC_INIT(PSCI_BOOT_ARGS_UNLOCKED), \ + }) + +static DEFINE_PER_CPU(struct psci_boot_args, cpu_on_args) = PSCI_BOOT_ARGS_INIT; + static u64 get_psci_func_id(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(u64, func_id, host_ctxt, 0); @@ -75,11 +98,101 @@ static __noreturn unsigned long psci_forward_noreturn(struct kvm_cpu_context *ho hyp_panic(); /* unreachable */ } +static unsigned int find_cpu_id(u64 mpidr) +{ + unsigned int i; + + /* Reject invalid MPIDRs */ + if (mpidr & ~MPIDR_HWID_BITMASK) + return INVALID_CPU_ID; + + for (i = 0; i < NR_CPUS; i++) { + if (cpu_logical_map(i) == mpidr) + return i; + } + + return INVALID_CPU_ID; +} + +static __always_inline bool try_acquire_boot_args(struct psci_boot_args *args) +{ + return atomic_cmpxchg_acquire(&args->lock, + PSCI_BOOT_ARGS_UNLOCKED, + PSCI_BOOT_ARGS_LOCKED) == + PSCI_BOOT_ARGS_UNLOCKED; +} + +static __always_inline void release_boot_args(struct psci_boot_args *args) +{ + atomic_set_release(&args->lock, PSCI_BOOT_ARGS_UNLOCKED); +} + +static int psci_cpu_on(u64 func_id, struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(u64, mpidr, host_ctxt, 1); + DECLARE_REG(unsigned long, pc, host_ctxt, 2); + DECLARE_REG(unsigned long, r0, host_ctxt, 3); + + unsigned int cpu_id; + struct psci_boot_args *boot_args; + struct kvm_nvhe_init_params *init_params; + int ret; + + /* + * Find the logical CPU ID for the given MPIDR. The search set is + * the set of CPUs that were online at the point of KVM initialization. + * Booting other CPUs is rejected because their cpufeatures were not + * checked against the finalized capabilities. This could be relaxed + * by doing the feature checks in hyp. + */ + cpu_id = find_cpu_id(mpidr); + if (cpu_id == INVALID_CPU_ID) + return PSCI_RET_INVALID_PARAMS; + + boot_args = per_cpu_ptr(hyp_symbol_addr(cpu_on_args), cpu_id); + init_params = per_cpu_ptr(hyp_symbol_addr(kvm_init_params), cpu_id); + + /* Check if the target CPU is already being booted. */ + if (!try_acquire_boot_args(boot_args)) + return PSCI_RET_ALREADY_ON; + + boot_args->pc = pc; + boot_args->r0 = r0; + wmb(); + + ret = psci_call(func_id, mpidr, + __hyp_pa(hyp_symbol_addr(kvm_hyp_cpu_entry)), + __hyp_pa(init_params)); + + /* If successful, the lock will be released by the target CPU. */ + if (ret != PSCI_RET_SUCCESS) + release_boot_args(boot_args); + + return ret; +} + +asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on) +{ + struct psci_boot_args *boot_args; + struct kvm_cpu_context *host_ctxt; + + host_ctxt = &this_cpu_ptr(hyp_symbol_addr(kvm_host_data))->host_ctxt; + boot_args = this_cpu_ptr(hyp_symbol_addr(cpu_on_args)); + + cpu_reg(host_ctxt, 0) = boot_args->r0; + write_sysreg_el2(boot_args->pc, SYS_ELR); + release_boot_args(boot_args); + + __host_enter(host_ctxt); +} + static unsigned long psci_0_1_handler(u64 func_id, struct kvm_cpu_context *host_ctxt) { if ((func_id == kvm_host_psci_0_1_function_ids.cpu_off) || (func_id == kvm_host_psci_0_1_function_ids.migrate)) return psci_forward(host_ctxt); + else if (func_id == kvm_host_psci_0_1_function_ids.cpu_on) + return psci_cpu_on(func_id, host_ctxt); else return PSCI_RET_NOT_SUPPORTED; } @@ -98,6 +211,8 @@ static unsigned long psci_0_2_handler(u64 func_id, struct kvm_cpu_context *host_ case PSCI_0_2_FN_SYSTEM_RESET: psci_forward_noreturn(host_ctxt); unreachable(); + case PSCI_0_2_FN64_CPU_ON: + return psci_cpu_on(func_id, host_ctxt); default: return PSCI_RET_NOT_SUPPORTED; } -- cgit v1.2.3 From abf16336dd22d018cd2577f0789b01ed705484d7 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:18 +0000 Subject: KVM: arm64: Intercept host's CPU_SUSPEND PSCI SMCs Add a handler of CPU_SUSPEND host PSCI SMCs. The SMC can either enter a sleep state indistinguishable from a WFI or a deeper sleep state that behaves like a CPU_OFF+CPU_ON except that the core is still considered online while asleep. The handler saves r0,pc of the host and makes the same call to EL3 with the hyp CPU entry point. It either returns back to the handler and then back to the host, or wakes up into the entry point and initializes EL2 state before dropping back to EL1. No EL2 state needs to be saved/restored for this purpose. CPU_ON and CPU_SUSPEND are both implemented using struct psci_boot_args to store the state upon powerup, with each CPU having separate structs for CPU_ON and CPU_SUSPEND so that CPU_SUSPEND can operate locklessly and so that a CPU_ON call targeting a CPU cannot interfere with a concurrent CPU_SUSPEND call on that CPU. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-23-dbrazdil@google.com --- arch/arm64/kvm/hyp/nvhe/hyp-init.S | 10 ++++++++ arch/arm64/kvm/hyp/nvhe/psci-relay.c | 44 ++++++++++++++++++++++++++++++++++-- 2 files changed, 52 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index d07e75f8242e..0853f62b052b 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -150,6 +150,16 @@ SYM_CODE_START(kvm_hyp_cpu_entry) b __kvm_hyp_init_cpu SYM_CODE_END(kvm_hyp_cpu_entry) +/* + * PSCI CPU_SUSPEND entry point + * + * x0: struct kvm_nvhe_init_params PA + */ +SYM_CODE_START(kvm_hyp_cpu_resume) + mov x1, #0 // is_cpu_on = false + b __kvm_hyp_init_cpu +SYM_CODE_END(kvm_hyp_cpu_resume) + /* * Common code for CPU entry points. Initializes EL2 state and * installs the hypervisor before handing over to a C handler. diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index 637e22ed71fc..688cf7f40d42 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -17,6 +17,7 @@ #include void kvm_hyp_cpu_entry(unsigned long r0); +void kvm_hyp_cpu_resume(unsigned long r0); void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt); @@ -44,6 +45,7 @@ struct psci_boot_args { }) static DEFINE_PER_CPU(struct psci_boot_args, cpu_on_args) = PSCI_BOOT_ARGS_INIT; +static DEFINE_PER_CPU(struct psci_boot_args, suspend_args) = PSCI_BOOT_ARGS_INIT; static u64 get_psci_func_id(struct kvm_cpu_context *host_ctxt) { @@ -171,17 +173,51 @@ static int psci_cpu_on(u64 func_id, struct kvm_cpu_context *host_ctxt) return ret; } +static int psci_cpu_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(u64, power_state, host_ctxt, 1); + DECLARE_REG(unsigned long, pc, host_ctxt, 2); + DECLARE_REG(unsigned long, r0, host_ctxt, 3); + + struct psci_boot_args *boot_args; + struct kvm_nvhe_init_params *init_params; + + boot_args = this_cpu_ptr(hyp_symbol_addr(suspend_args)); + init_params = this_cpu_ptr(hyp_symbol_addr(kvm_init_params)); + + /* + * No need to acquire a lock before writing to boot_args because a core + * can only suspend itself. Racy CPU_ON calls use a separate struct. + */ + boot_args->pc = pc; + boot_args->r0 = r0; + + /* + * Will either return if shallow sleep state, or wake up into the entry + * point if it is a deep sleep state. + */ + return psci_call(func_id, power_state, + __hyp_pa(hyp_symbol_addr(kvm_hyp_cpu_resume)), + __hyp_pa(init_params)); +} + asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on) { struct psci_boot_args *boot_args; struct kvm_cpu_context *host_ctxt; host_ctxt = &this_cpu_ptr(hyp_symbol_addr(kvm_host_data))->host_ctxt; - boot_args = this_cpu_ptr(hyp_symbol_addr(cpu_on_args)); + + if (is_cpu_on) + boot_args = this_cpu_ptr(hyp_symbol_addr(cpu_on_args)); + else + boot_args = this_cpu_ptr(hyp_symbol_addr(suspend_args)); cpu_reg(host_ctxt, 0) = boot_args->r0; write_sysreg_el2(boot_args->pc, SYS_ELR); - release_boot_args(boot_args); + + if (is_cpu_on) + release_boot_args(boot_args); __host_enter(host_ctxt); } @@ -193,6 +229,8 @@ static unsigned long psci_0_1_handler(u64 func_id, struct kvm_cpu_context *host_ return psci_forward(host_ctxt); else if (func_id == kvm_host_psci_0_1_function_ids.cpu_on) return psci_cpu_on(func_id, host_ctxt); + else if (func_id == kvm_host_psci_0_1_function_ids.cpu_suspend) + return psci_cpu_suspend(func_id, host_ctxt); else return PSCI_RET_NOT_SUPPORTED; } @@ -211,6 +249,8 @@ static unsigned long psci_0_2_handler(u64 func_id, struct kvm_cpu_context *host_ case PSCI_0_2_FN_SYSTEM_RESET: psci_forward_noreturn(host_ctxt); unreachable(); + case PSCI_0_2_FN64_CPU_SUSPEND: + return psci_cpu_suspend(func_id, host_ctxt); case PSCI_0_2_FN64_CPU_ON: return psci_cpu_on(func_id, host_ctxt); default: -- cgit v1.2.3 From d945f8d9ec4ab5b062ce9696761ca3a21de1e64d Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:19 +0000 Subject: KVM: arm64: Intercept host's SYSTEM_SUSPEND PSCI SMCs Add a handler of SYSTEM_SUSPEND host PSCI SMCs. The semantics are equivalent to CPU_SUSPEND, typically called on the last online CPU. Reuse the same entry point and boot args struct as CPU_SUSPEND. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-24-dbrazdil@google.com --- arch/arm64/kvm/hyp/nvhe/hyp-init.S | 2 +- arch/arm64/kvm/hyp/nvhe/psci-relay.c | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index 0853f62b052b..a2e251547625 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -151,7 +151,7 @@ SYM_CODE_START(kvm_hyp_cpu_entry) SYM_CODE_END(kvm_hyp_cpu_entry) /* - * PSCI CPU_SUSPEND entry point + * PSCI CPU_SUSPEND / SYSTEM_SUSPEND entry point * * x0: struct kvm_nvhe_init_params PA */ diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index 688cf7f40d42..08dc9de69314 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -201,6 +201,30 @@ static int psci_cpu_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt) __hyp_pa(init_params)); } +static int psci_system_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(unsigned long, pc, host_ctxt, 1); + DECLARE_REG(unsigned long, r0, host_ctxt, 2); + + struct psci_boot_args *boot_args; + struct kvm_nvhe_init_params *init_params; + + boot_args = this_cpu_ptr(hyp_symbol_addr(suspend_args)); + init_params = this_cpu_ptr(hyp_symbol_addr(kvm_init_params)); + + /* + * No need to acquire a lock before writing to boot_args because a core + * can only suspend itself. Racy CPU_ON calls use a separate struct. + */ + boot_args->pc = pc; + boot_args->r0 = r0; + + /* Will only return on error. */ + return psci_call(func_id, + __hyp_pa(hyp_symbol_addr(kvm_hyp_cpu_resume)), + __hyp_pa(init_params), 0); +} + asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on) { struct psci_boot_args *boot_args; @@ -265,6 +289,8 @@ static unsigned long psci_1_0_handler(u64 func_id, struct kvm_cpu_context *host_ case PSCI_1_0_FN_SET_SUSPEND_MODE: case PSCI_1_1_FN64_SYSTEM_RESET2: return psci_forward(host_ctxt); + case PSCI_1_0_FN64_SYSTEM_SUSPEND: + return psci_system_suspend(func_id, host_ctxt); default: return psci_0_2_handler(func_id, host_ctxt); } -- cgit v1.2.3 From fa8c3d65538aa11bb117cbf872400d5caa7f340b Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:20 +0000 Subject: KVM: arm64: Keep nVHE EL2 vector installed KVM by default keeps the stub vector installed and installs the nVHE vector only briefly for init and later on demand. Change this policy to install the vector at init and then never uninstall it if the kernel was given the protected KVM command line parameter. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-25-dbrazdil@google.com --- arch/arm64/kvm/arm.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index aa40bef09dfc..7d162a296141 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1453,7 +1453,8 @@ static void _kvm_arch_hardware_disable(void *discard) void kvm_arch_hardware_disable(void) { - _kvm_arch_hardware_disable(NULL); + if (!is_protected_kvm_enabled()) + _kvm_arch_hardware_disable(NULL); } #ifdef CONFIG_CPU_PM @@ -1496,11 +1497,13 @@ static struct notifier_block hyp_init_cpu_pm_nb = { static void __init hyp_cpu_pm_init(void) { - cpu_pm_register_notifier(&hyp_init_cpu_pm_nb); + if (!is_protected_kvm_enabled()) + cpu_pm_register_notifier(&hyp_init_cpu_pm_nb); } static void __init hyp_cpu_pm_exit(void) { - cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb); + if (!is_protected_kvm_enabled()) + cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb); } #else static inline void hyp_cpu_pm_init(void) @@ -1588,7 +1591,8 @@ static int init_subsystems(void) kvm_coproc_table_init(); out: - on_each_cpu(_kvm_arch_hardware_disable, NULL, 1); + if (err || !is_protected_kvm_enabled()) + on_each_cpu(_kvm_arch_hardware_disable, NULL, 1); return err; } -- cgit v1.2.3 From b93c17c4185bf6b50f2f0b332afb4abe8b766a7a Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:21 +0000 Subject: KVM: arm64: Trap host SMCs in protected mode While protected KVM is installed, start trapping all host SMCs. For now these are simply forwarded to EL3, except PSCI CPU_ON/CPU_SUSPEND/SYSTEM_SUSPEND which are intercepted and the hypervisor installed on newly booted cores. Create new constant HCR_HOST_NVHE_PROTECTED_FLAGS with the new set of HCR flags to use while the nVHE vector is installed when the kernel was booted with the protected flag enabled. Switch back to the default HCR flags when switching back to the stub vector. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-26-dbrazdil@google.com --- arch/arm64/include/asm/kvm_arm.h | 1 + arch/arm64/kvm/hyp/nvhe/hyp-init.S | 10 ++++++++++ arch/arm64/kvm/hyp/nvhe/switch.c | 5 ++++- 3 files changed, 15 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 64ce29378467..4e90c2debf70 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -80,6 +80,7 @@ HCR_FMO | HCR_IMO | HCR_PTW ) #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF) #define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA) +#define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) /* TCR_EL2 Registers bits */ diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index a2e251547625..31b060a44045 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -88,6 +88,11 @@ SYM_CODE_END(__kvm_hyp_init) * x0: struct kvm_nvhe_init_params PA */ SYM_CODE_START_LOCAL(___kvm_hyp_init) +alternative_if ARM64_KVM_PROTECTED_MODE + mov_q x1, HCR_HOST_NVHE_PROTECTED_FLAGS + msr hcr_el2, x1 +alternative_else_nop_endif + ldr x1, [x0, #NVHE_INIT_TPIDR_EL2] msr tpidr_el2, x1 @@ -230,6 +235,11 @@ reset: msr sctlr_el2, x5 isb +alternative_if ARM64_KVM_PROTECTED_MODE + mov_q x5, HCR_HOST_NVHE_FLAGS + msr hcr_el2, x5 +alternative_else_nop_endif + /* Install stub vectors */ adr_l x5, __hyp_stub_vectors msr vbar_el2, x5 diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 8ae8160bc93a..e1f8e0797144 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -96,7 +96,10 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT; write_sysreg(mdcr_el2, mdcr_el2); - write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2); + if (is_protected_kvm_enabled()) + write_sysreg(HCR_HOST_NVHE_PROTECTED_FLAGS, hcr_el2); + else + write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2); write_sysreg(CPTR_EL2_DEFAULT, cptr_el2); write_sysreg(__kvm_hyp_host_vector, vbar_el2); } -- cgit v1.2.3 From f19f6644a5433cfae8a068445b149bc2247c1445 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:22 +0000 Subject: KVM: arm64: Fix EL2 mode availability checks With protected nVHE hyp code interception host's PSCI SMCs, the host starts seeing new CPUs boot in EL1 instead of EL2. The kernel logic that keeps track of the boot mode needs to be adjusted. Add a static key enabled if KVM protected mode initialization is successful. When the key is enabled, is_hyp_mode_available continues to report `true` because its users either treat it as a check whether KVM will be / was initialized, or whether stub HVCs can be made (eg. hibernate). is_hyp_mode_mismatched is changed to report `false` when the key is enabled. That's because all cores' modes matched at the point of KVM init and KVM will not allow cores not present at init to boot. That said, the function is never used after KVM is initialized. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201202184122.26046-27-dbrazdil@google.com --- arch/arm64/include/asm/virt.h | 18 ++++++++++++++++++ arch/arm64/kvm/arm.c | 9 ++++++--- 2 files changed, 24 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index eb81dcc220b6..ee6a48df89d9 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -65,9 +65,19 @@ extern u32 __boot_cpu_mode[2]; void __hyp_set_vectors(phys_addr_t phys_vector_base); void __hyp_reset_vectors(void); +DECLARE_STATIC_KEY_FALSE(kvm_protected_mode_initialized); + /* Reports the availability of HYP mode */ static inline bool is_hyp_mode_available(void) { + /* + * If KVM protected mode is initialized, all CPUs must have been booted + * in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1. + */ + if (IS_ENABLED(CONFIG_KVM) && + static_branch_likely(&kvm_protected_mode_initialized)) + return true; + return (__boot_cpu_mode[0] == BOOT_CPU_MODE_EL2 && __boot_cpu_mode[1] == BOOT_CPU_MODE_EL2); } @@ -75,6 +85,14 @@ static inline bool is_hyp_mode_available(void) /* Check if the bootloader has booted CPUs in different modes */ static inline bool is_hyp_mode_mismatched(void) { + /* + * If KVM protected mode is initialized, all CPUs must have been booted + * in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1. + */ + if (IS_ENABLED(CONFIG_KVM) && + static_branch_likely(&kvm_protected_mode_initialized)) + return false; + return __boot_cpu_mode[0] != __boot_cpu_mode[1]; } diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 7d162a296141..fadcc94931f9 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -48,6 +48,7 @@ __asm__(".arch_extension virt"); #endif static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT; +DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized); DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector); @@ -1848,12 +1849,14 @@ int kvm_arch_init(void *opaque) if (err) goto out_hyp; - if (is_protected_kvm_enabled()) + if (is_protected_kvm_enabled()) { + static_branch_enable(&kvm_protected_mode_initialized); kvm_info("Protected nVHE mode initialized successfully\n"); - else if (in_hyp_mode) + } else if (in_hyp_mode) { kvm_info("VHE mode initialized successfully\n"); - else + } else { kvm_info("Hyp mode initialized successfully\n"); + } return 0; -- cgit v1.2.3 From 0cc519f85a527e1c5ad5a7f182105fe614e9ff80 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Dec 2020 19:51:49 +0000 Subject: KVM: arm64: Fix nVHE boot on VHE systems Conflict resolution gone astray results in the kernel not booting on VHE-capable HW when VHE support is disabled. Thankfully spotted by David. Reported-by: David Brazdil Signed-off-by: Marc Zyngier --- arch/arm64/kernel/head.S | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 7eba3a1e84b0..957683029438 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -515,8 +515,11 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) */ mrs x2, id_aa64mmfr1_el1 ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4 - cbz x2, init_el2_nvhe +#else + mov x2, xzr #endif + cbz x2, init_el2_nvhe + /* * When VHE _is_ in use, EL1 will not be used in the host and * requires no configuration, and all non-hyp-specific EL2 setup -- cgit v1.2.3 From f57ad63a835c6f1fe646ea985e78a79eb206a5b3 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 3 Dec 2020 16:33:19 +0200 Subject: KVM: x86: ignore SIPIs that are received while not in wait-for-sipi state In the commit 1c96dcceaeb3 ("KVM: x86: fix apic_accept_events vs check_nested_events"), we accidently started latching SIPIs that are received while the cpu is not waiting for them. This causes vCPUs to never enter a halted state. Fixes: 1c96dcceaeb3 ("KVM: x86: fix apic_accept_events vs check_nested_events") Signed-off-by: Maxim Levitsky Message-Id: <20201203143319.159394-2-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index e3ee597ff540..6a87623aa578 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2892,14 +2892,15 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) else vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; } - if (test_bit(KVM_APIC_SIPI, &pe) && - vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { + if (test_bit(KVM_APIC_SIPI, &pe)) { clear_bit(KVM_APIC_SIPI, &apic->pending_events); - /* evaluate pending_events before reading the vector */ - smp_rmb(); - sipi_vector = apic->sipi_vector; - kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector); - vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { + /* evaluate pending_events before reading the vector */ + smp_rmb(); + sipi_vector = apic->sipi_vector; + kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector); + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + } } } -- cgit v1.2.3 From c4196218737137b8e8cf7d0c375765f4f36cb591 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 6 Nov 2020 08:34:23 +0100 Subject: KVM: s390: Add memcg accounting to KVM allocations Almost all kvm allocations in the s390x KVM code can be attributed to the process that triggers the allocation (in other words, no global allocation for other guests). This will help the memcg controller to make the right decisions. Signed-off-by: Christian Borntraeger Acked-by: Janosch Frank Acked-by: Cornelia Huck --- arch/s390/kvm/guestdbg.c | 8 ++++---- arch/s390/kvm/intercept.c | 2 +- arch/s390/kvm/interrupt.c | 10 +++++----- arch/s390/kvm/kvm-s390.c | 20 ++++++++++---------- arch/s390/kvm/priv.c | 4 ++-- arch/s390/kvm/pv.c | 6 +++--- arch/s390/kvm/vsie.c | 4 ++-- 7 files changed, 27 insertions(+), 27 deletions(-) (limited to 'arch') diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c index 394a5f53805b..3765c4223bf9 100644 --- a/arch/s390/kvm/guestdbg.c +++ b/arch/s390/kvm/guestdbg.c @@ -184,7 +184,7 @@ static int __import_wp_info(struct kvm_vcpu *vcpu, if (wp_info->len < 0 || wp_info->len > MAX_WP_SIZE) return -EINVAL; - wp_info->old_data = kmalloc(bp_data->len, GFP_KERNEL); + wp_info->old_data = kmalloc(bp_data->len, GFP_KERNEL_ACCOUNT); if (!wp_info->old_data) return -ENOMEM; /* try to backup the original value */ @@ -234,7 +234,7 @@ int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu, if (nr_wp > 0) { wp_info = kmalloc_array(nr_wp, sizeof(*wp_info), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!wp_info) { ret = -ENOMEM; goto error; @@ -243,7 +243,7 @@ int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu, if (nr_bp > 0) { bp_info = kmalloc_array(nr_bp, sizeof(*bp_info), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!bp_info) { ret = -ENOMEM; goto error; @@ -349,7 +349,7 @@ static struct kvm_hw_wp_info_arch *any_wp_changed(struct kvm_vcpu *vcpu) if (!wp_info || !wp_info->old_data || wp_info->len <= 0) continue; - temp = kmalloc(wp_info->len, GFP_KERNEL); + temp = kmalloc(wp_info->len, GFP_KERNEL_ACCOUNT); if (!temp) continue; diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index e7a7c499a73f..72b25b7cc6ae 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -398,7 +398,7 @@ int handle_sthyi(struct kvm_vcpu *vcpu) if (!kvm_s390_pv_cpu_is_protected(vcpu) && (addr & ~PAGE_MASK)) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - sctns = (void *)get_zeroed_page(GFP_KERNEL); + sctns = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); if (!sctns) return -ENOMEM; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 2f177298c663..e3183bd05910 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1792,7 +1792,7 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, goto out; } gisa_out: - tmp_inti = kzalloc(sizeof(*inti), GFP_KERNEL); + tmp_inti = kzalloc(sizeof(*inti), GFP_KERNEL_ACCOUNT); if (tmp_inti) { tmp_inti->type = KVM_S390_INT_IO(1, 0, 0, 0); tmp_inti->io.io_int_word = isc_to_int_word(isc); @@ -2015,7 +2015,7 @@ int kvm_s390_inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti; int rc; - inti = kzalloc(sizeof(*inti), GFP_KERNEL); + inti = kzalloc(sizeof(*inti), GFP_KERNEL_ACCOUNT); if (!inti) return -ENOMEM; @@ -2414,7 +2414,7 @@ static int enqueue_floating_irq(struct kvm_device *dev, return -EINVAL; while (len >= sizeof(struct kvm_s390_irq)) { - inti = kzalloc(sizeof(*inti), GFP_KERNEL); + inti = kzalloc(sizeof(*inti), GFP_KERNEL_ACCOUNT); if (!inti) return -ENOMEM; @@ -2462,7 +2462,7 @@ static int register_io_adapter(struct kvm_device *dev, if (dev->kvm->arch.adapters[adapter_info.id] != NULL) return -EINVAL; - adapter = kzalloc(sizeof(*adapter), GFP_KERNEL); + adapter = kzalloc(sizeof(*adapter), GFP_KERNEL_ACCOUNT); if (!adapter) return -ENOMEM; @@ -3290,7 +3290,7 @@ int kvm_s390_gib_init(u8 nisc) goto out; } - gib = (struct kvm_s390_gib *)get_zeroed_page(GFP_KERNEL | GFP_DMA); + gib = (struct kvm_s390_gib *)get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA); if (!gib) { rc = -ENOMEM; goto out; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6b74b92c1a58..19804c388d61 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1254,7 +1254,7 @@ static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) ret = -EBUSY; goto out; } - proc = kzalloc(sizeof(*proc), GFP_KERNEL); + proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT); if (!proc) { ret = -ENOMEM; goto out; @@ -1416,7 +1416,7 @@ static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr) struct kvm_s390_vm_cpu_processor *proc; int ret = 0; - proc = kzalloc(sizeof(*proc), GFP_KERNEL); + proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT); if (!proc) { ret = -ENOMEM; goto out; @@ -1444,7 +1444,7 @@ static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) struct kvm_s390_vm_cpu_machine *mach; int ret = 0; - mach = kzalloc(sizeof(*mach), GFP_KERNEL); + mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT); if (!mach) { ret = -ENOMEM; goto out; @@ -1812,7 +1812,7 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) return -EINVAL; - keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL); + keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT); if (!keys) return -ENOMEM; @@ -1857,7 +1857,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) return -EINVAL; - keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL); + keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT); if (!keys) return -ENOMEM; @@ -2625,7 +2625,7 @@ static void sca_dispose(struct kvm *kvm) int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { - gfp_t alloc_flags = GFP_KERNEL; + gfp_t alloc_flags = GFP_KERNEL_ACCOUNT; int i, rc; char debug_name[16]; static unsigned long sca_offset; @@ -2670,7 +2670,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) BUILD_BUG_ON(sizeof(struct sie_page2) != 4096); kvm->arch.sie_page2 = - (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA); + (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA); if (!kvm->arch.sie_page2) goto out_err; @@ -2900,7 +2900,7 @@ static int sca_switch_to_extended(struct kvm *kvm) if (kvm->arch.use_esca) return 0; - new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO); + new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO); if (!new_sca) return -ENOMEM; @@ -3133,7 +3133,7 @@ void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) { - vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL); + vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT); if (!vcpu->arch.sie_block->cbrlo) return -ENOMEM; return 0; @@ -3243,7 +3243,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) int rc; BUILD_BUG_ON(sizeof(struct sie_page) != 4096); - sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL); + sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT); if (!sie_page) return -ENOMEM; diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index cd74989ce0b0..9928f785c677 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -879,7 +879,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu) switch (fc) { case 1: /* same handling for 1 and 2 */ case 2: - mem = get_zeroed_page(GFP_KERNEL); + mem = get_zeroed_page(GFP_KERNEL_ACCOUNT); if (!mem) goto out_no_data; if (stsi((void *) mem, fc, sel1, sel2)) @@ -888,7 +888,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu) case 3: if (sel1 != 2 || sel2 != 2) goto out_no_data; - mem = get_zeroed_page(GFP_KERNEL); + mem = get_zeroed_page(GFP_KERNEL_ACCOUNT); if (!mem) goto out_no_data; handle_stsi_3_2_2(vcpu, (void *) mem); diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index eb99e2f95ebe..373b654c84bd 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -60,7 +60,7 @@ int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) if (kvm_s390_pv_cpu_get_handle(vcpu)) return -EINVAL; - vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL, + vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT, get_order(uv_info.guest_cpu_stor_len)); if (!vcpu->arch.pv.stor_base) return -ENOMEM; @@ -72,7 +72,7 @@ int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) uvcb.stor_origin = (u64)vcpu->arch.pv.stor_base; /* Alloc Secure Instruction Data Area Designation */ - vcpu->arch.sie_block->sidad = __get_free_page(GFP_KERNEL | __GFP_ZERO); + vcpu->arch.sie_block->sidad = __get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); if (!vcpu->arch.sie_block->sidad) { free_pages(vcpu->arch.pv.stor_base, get_order(uv_info.guest_cpu_stor_len)); @@ -120,7 +120,7 @@ static int kvm_s390_pv_alloc_vm(struct kvm *kvm) struct kvm_memory_slot *memslot; kvm->arch.pv.stor_var = NULL; - kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL, get_order(base)); + kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT, get_order(base)); if (!kvm->arch.pv.stor_base) return -ENOMEM; diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 4f3cbf6003a9..c5d0a58b2c29 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -1234,7 +1234,7 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr) mutex_lock(&kvm->arch.vsie.mutex); if (kvm->arch.vsie.page_count < nr_vcpus) { - page = alloc_page(GFP_KERNEL | __GFP_ZERO | GFP_DMA); + page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO | GFP_DMA); if (!page) { mutex_unlock(&kvm->arch.vsie.mutex); return ERR_PTR(-ENOMEM); @@ -1336,7 +1336,7 @@ out_put: void kvm_s390_vsie_init(struct kvm *kvm) { mutex_init(&kvm->arch.vsie.mutex); - INIT_RADIX_TREE(&kvm->arch.vsie.addr_to_page, GFP_KERNEL); + INIT_RADIX_TREE(&kvm->arch.vsie.addr_to_page, GFP_KERNEL_ACCOUNT); } /* Destroy the vsie data structures. To be called when a vm is destroyed. */ -- cgit v1.2.3 From 0cd2a787cffb5750ba2e7b5de39a6f3d1dfc17e9 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 9 Nov 2020 13:14:35 +0100 Subject: s390/gmap: make gmap memcg aware gmap allocations can be attributed to a process. Signed-off-by: Christian Borntraeger Acked-by: Heiko Carstens Acked-by: Janosch Frank Acked-by: Cornelia Huck --- arch/s390/mm/gmap.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index cfb0017f33a7..0160ac97a27d 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2,7 +2,7 @@ /* * KVM guest address space mapping code * - * Copyright IBM Corp. 2007, 2016, 2018 + * Copyright IBM Corp. 2007, 2020 * Author(s): Martin Schwidefsky * David Hildenbrand * Janosch Frank @@ -56,19 +56,19 @@ static struct gmap *gmap_alloc(unsigned long limit) atype = _ASCE_TYPE_REGION1; etype = _REGION1_ENTRY_EMPTY; } - gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); + gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL_ACCOUNT); if (!gmap) goto out; INIT_LIST_HEAD(&gmap->crst_list); INIT_LIST_HEAD(&gmap->children); INIT_LIST_HEAD(&gmap->pt_list); - INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL); - INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC); - INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC); + INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL_ACCOUNT); + INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC | __GFP_ACCOUNT); + INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC | __GFP_ACCOUNT); spin_lock_init(&gmap->guest_table_lock); spin_lock_init(&gmap->shadow_lock); refcount_set(&gmap->ref_count, 1); - page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); + page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); if (!page) goto out_free; page->index = 0; @@ -309,7 +309,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, unsigned long *new; /* since we dont free the gmap table until gmap_free we can unlock */ - page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); + page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); if (!page) return -ENOMEM; new = (unsigned long *) page_to_phys(page); @@ -594,7 +594,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) if (pmd_large(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m) return -EFAULT; /* Link gmap segment table entry location to page table. */ - rc = radix_tree_preload(GFP_KERNEL); + rc = radix_tree_preload(GFP_KERNEL_ACCOUNT); if (rc) return rc; ptl = pmd_lock(mm, pmd); @@ -1218,11 +1218,11 @@ static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr, vmaddr = __gmap_translate(parent, paddr); if (IS_ERR_VALUE(vmaddr)) return vmaddr; - rmap = kzalloc(sizeof(*rmap), GFP_KERNEL); + rmap = kzalloc(sizeof(*rmap), GFP_KERNEL_ACCOUNT); if (!rmap) return -ENOMEM; rmap->raddr = raddr; - rc = radix_tree_preload(GFP_KERNEL); + rc = radix_tree_preload(GFP_KERNEL_ACCOUNT); if (rc) { kfree(rmap); return rc; @@ -1741,7 +1741,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, BUG_ON(!gmap_is_shadow(sg)); /* Allocate a shadow region second table */ - page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); + page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); if (!page) return -ENOMEM; page->index = r2t & _REGION_ENTRY_ORIGIN; @@ -1825,7 +1825,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, BUG_ON(!gmap_is_shadow(sg)); /* Allocate a shadow region second table */ - page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); + page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); if (!page) return -ENOMEM; page->index = r3t & _REGION_ENTRY_ORIGIN; @@ -1909,7 +1909,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, BUG_ON(!gmap_is_shadow(sg) || (sgt & _REGION3_ENTRY_LARGE)); /* Allocate a shadow segment table */ - page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); + page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); if (!page) return -ENOMEM; page->index = sgt & _REGION_ENTRY_ORIGIN; @@ -2116,7 +2116,7 @@ int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte) parent = sg->parent; prot = (pte_val(pte) & _PAGE_PROTECT) ? PROT_READ : PROT_WRITE; - rmap = kzalloc(sizeof(*rmap), GFP_KERNEL); + rmap = kzalloc(sizeof(*rmap), GFP_KERNEL_ACCOUNT); if (!rmap) return -ENOMEM; rmap->raddr = (saddr & PAGE_MASK) | _SHADOW_RMAP_PGTABLE; @@ -2128,7 +2128,7 @@ int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte) rc = vmaddr; break; } - rc = radix_tree_preload(GFP_KERNEL); + rc = radix_tree_preload(GFP_KERNEL_ACCOUNT); if (rc) break; rc = -EAGAIN; -- cgit v1.2.3 From 50a05be484cb70d9dfb55fa5a6ed57eab193901f Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 25 Nov 2020 10:06:58 +0100 Subject: KVM: s390: track synchronous pfault events in kvm_stat Right now we do count pfault (pseudo page faults aka async page faults start and completion events). What we do not count is, if an async page fault would have been possible by the host, but it was disabled by the guest (e.g. interrupts off, pfault disabled, secure execution....). Let us count those as well in the pfault_sync counter. Signed-off-by: Christian Borntraeger Reviewed-by: David Hildenbrand Reviewed-by: Cornelia Huck Link: https://lore.kernel.org/r/20201125090658.38463-1-borntraeger@de.ibm.com --- arch/s390/include/asm/kvm_host.h | 1 + arch/s390/kvm/kvm-s390.c | 2 ++ 2 files changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 463c24e26000..74f9a036bab2 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -459,6 +459,7 @@ struct kvm_vcpu_stat { u64 diagnose_308; u64 diagnose_500; u64 diagnose_other; + u64 pfault_sync; }; #define PGM_OPERATION 0x01 diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 19804c388d61..065f94f22fd3 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -60,6 +60,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { VCPU_STAT("userspace_handled", exit_userspace), VCPU_STAT("exit_null", exit_null), + VCPU_STAT("pfault_sync", pfault_sync), VCPU_STAT("exit_validity", exit_validity), VCPU_STAT("exit_stop_request", exit_stop_request), VCPU_STAT("exit_external_request", exit_external_request), @@ -4111,6 +4112,7 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) current->thread.gmap_pfault = 0; if (kvm_arch_setup_async_pf(vcpu)) return 0; + vcpu->stat.pfault_sync++; return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1); } return vcpu_post_run_fault_in_sie(vcpu); -- cgit v1.2.3 From 6c44221b05236cc65d76cb5dc2463f738edff39d Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 29 Oct 2020 15:04:57 +0100 Subject: KVM/VMX: Use TEST %REG,%REG instead of CMP $0,%REG in vmenter.S Saves one byte in __vmx_vcpu_run for the same functionality. Cc: Paolo Bonzini Cc: Sean Christopherson Signed-off-by: Uros Bizjak Message-Id: <20201029140457.126965-1-ubizjak@gmail.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmenter.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 90ad7a6246e3..e85aa5faa22d 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -132,7 +132,7 @@ SYM_FUNC_START(__vmx_vcpu_run) mov (%_ASM_SP), %_ASM_AX /* Check if vmlaunch or vmresume is needed */ - cmpb $0, %bl + testb %bl, %bl /* Load guest registers. Don't clobber flags. */ mov VCPU_RCX(%_ASM_AX), %_ASM_CX -- cgit v1.2.3 From e1b35da5e624f8b09d2e98845c2e4c84b179d9a4 Mon Sep 17 00:00:00 2001 From: Kyung Min Park Date: Mon, 7 Dec 2020 19:34:40 -0800 Subject: x86: Enumerate AVX512 FP16 CPUID feature flag Enumerate AVX512 Half-precision floating point (FP16) CPUID feature flag. Compared with using FP32, using FP16 cut the number of bits required for storage in half, reducing the exponent from 8 bits to 5, and the mantissa from 23 bits to 10. Using FP16 also enables developers to train and run inference on deep learning models fast when all precision or magnitude (FP32) is not needed. A processor supports AVX512 FP16 if CPUID.(EAX=7,ECX=0):EDX[bit 23] is present. The AVX512 FP16 requires AVX512BW feature be implemented since the instructions for manipulating 32bit masks are associated with AVX512BW. The only in-kernel usage of this is kvm passthrough. The CPU feature flag is shown as "avx512_fp16" in /proc/cpuinfo. Signed-off-by: Kyung Min Park Acked-by: Dave Hansen Reviewed-by: Tony Luck Message-Id: <20201208033441.28207-2-kyung.min.park@intel.com> Acked-by: Borislav Petkov Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/cpuid-deps.c | 1 + 2 files changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index dad350d42ecf..b9dc6a56d360 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -374,6 +374,7 @@ #define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ +#define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index d502241995a3..42af31b64c2c 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -69,6 +69,7 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_CQM_MBM_TOTAL, X86_FEATURE_CQM_LLC }, { X86_FEATURE_CQM_MBM_LOCAL, X86_FEATURE_CQM_LLC }, { X86_FEATURE_AVX512_BF16, X86_FEATURE_AVX512VL }, + { X86_FEATURE_AVX512_FP16, X86_FEATURE_AVX512BW }, { X86_FEATURE_ENQCMD, X86_FEATURE_XSAVES }, { X86_FEATURE_PER_THREAD_MBA, X86_FEATURE_MBA }, {} -- cgit v1.2.3 From 2224fc9efb2d6593fbfb57287e39ba4958b188ba Mon Sep 17 00:00:00 2001 From: Cathy Zhang Date: Mon, 7 Dec 2020 19:34:41 -0800 Subject: KVM: x86: Expose AVX512_FP16 for supported CPUID AVX512_FP16 is supported by Intel processors, like Sapphire Rapids. It could gain better performance for it's faster compared to FP32 if the precision or magnitude requirements are met. It's availability is indicated by CPUID.(EAX=7,ECX=0):EDX[bit 23]. Expose it in KVM supported CPUID, then guest could make use of it; no new registers are used, only new instructions. Signed-off-by: Cathy Zhang Signed-off-by: Kyung Min Park Acked-by: Dave Hansen Reviewed-by: Tony Luck Message-Id: <20201208033441.28207-3-kyung.min.park@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 5d352cc204ce..a22a3108b5f0 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -419,7 +419,7 @@ void kvm_set_cpu_caps(void) F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) | - F(SERIALIZE) | F(TSXLDTRK) + F(SERIALIZE) | F(TSXLDTRK) | F(AVX512_FP16) ); /* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */ -- cgit v1.2.3 From 39485ed95d6b83b62fa75c06c2c4d33992e0d971 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 3 Dec 2020 09:40:15 -0500 Subject: KVM: x86: reinstate vendor-agnostic check on SPEC_CTRL cpuid bits Until commit e7c587da1252 ("x86/speculation: Use synthetic bits for IBRS/IBPB/STIBP"), KVM was testing both Intel and AMD CPUID bits before allowing the guest to write MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD. Testing only Intel bits on VMX processors, or only AMD bits on SVM processors, fails if the guests are created with the "opposite" vendor as the host. While at it, also tweak the host CPU check to use the vendor-agnostic feature bit X86_FEATURE_IBPB, since we only care about the availability of the MSR on the host here and not about specific CPUID bits. Fixes: e7c587da1252 ("x86/speculation: Use synthetic bits for IBRS/IBPB/STIBP") Cc: stable@vger.kernel.org Reported-by: Denis V. Lunev Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.h | 14 ++++++++++++++ arch/x86/kvm/svm/svm.c | 14 ++++---------- arch/x86/kvm/vmx/vmx.c | 8 ++++---- 3 files changed, 22 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index f7a6e8f83783..dc921d76e42e 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -264,6 +264,20 @@ static inline int guest_cpuid_stepping(struct kvm_vcpu *vcpu) return x86_stepping(best->eax); } +static inline bool guest_has_spec_ctrl_msr(struct kvm_vcpu *vcpu) +{ + return (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) || + guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) || + guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) || + guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD)); +} + +static inline bool guest_has_pred_cmd_msr(struct kvm_vcpu *vcpu) +{ + return (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) || + guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB)); +} + static inline bool supports_cpuid_fault(struct kvm_vcpu *vcpu) { return vcpu->arch.msr_platform_info & MSR_PLATFORM_INFO_CPUID_FAULT; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 6dc337b9c231..0e52fac4f5ae 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2545,10 +2545,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD)) + !guest_has_spec_ctrl_msr(vcpu)) return 1; msr_info->data = svm->spec_ctrl; @@ -2632,10 +2629,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; case MSR_IA32_SPEC_CTRL: if (!msr->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD)) + !guest_has_spec_ctrl_msr(vcpu)) return 1; if (kvm_spec_ctrl_test_value(data)) @@ -2660,12 +2654,12 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; case MSR_IA32_PRED_CMD: if (!msr->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB)) + !guest_has_pred_cmd_msr(vcpu)) return 1; if (data & ~PRED_CMD_IBPB) return 1; - if (!boot_cpu_has(X86_FEATURE_AMD_IBPB)) + if (!boot_cpu_has(X86_FEATURE_IBPB)) return 1; if (!data) break; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index c3441e7e5a87..4b854a197e44 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1826,7 +1826,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) + !guest_has_spec_ctrl_msr(vcpu)) return 1; msr_info->data = to_vmx(vcpu)->spec_ctrl; @@ -2028,7 +2028,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) + !guest_has_spec_ctrl_msr(vcpu)) return 1; if (kvm_spec_ctrl_test_value(data)) @@ -2063,12 +2063,12 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) goto find_uret_msr; case MSR_IA32_PRED_CMD: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) + !guest_has_pred_cmd_msr(vcpu)) return 1; if (data & ~PRED_CMD_IBPB) return 1; - if (!boot_cpu_has(X86_FEATURE_SPEC_CTRL)) + if (!boot_cpu_has(X86_FEATURE_IBPB)) return 1; if (!data) break; -- cgit v1.2.3 From 3f1a18b9fa1c294802d2750d1ef6a1221b10b76b Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 29 Oct 2020 14:56:00 +0100 Subject: KVM/VMX/SVM: Move kvm_machine_check function to x86.h Move kvm_machine_check to x86.h to avoid two exact copies of the same function in kvm.c and svm.c. Cc: Paolo Bonzini Cc: Sean Christopherson Signed-off-by: Uros Bizjak Message-Id: <20201029135600.122392-1-ubizjak@gmail.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 20 -------------------- arch/x86/kvm/vmx/vmx.c | 20 -------------------- arch/x86/kvm/x86.h | 20 ++++++++++++++++++++ 3 files changed, 20 insertions(+), 40 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 0e52fac4f5ae..544b6e362cf7 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include @@ -1933,25 +1932,6 @@ static bool is_erratum_383(void) return true; } -/* - * Trigger machine check on the host. We assume all the MSRs are already set up - * by the CPU and that we still run on the same CPU as the MCE occurred on. - * We pass a fake environment to the machine check handler because we want - * the guest to be always treated like user space, no matter what context - * it used internally. - */ -static void kvm_machine_check(void) -{ -#if defined(CONFIG_X86_MCE) - struct pt_regs regs = { - .cs = 3, /* Fake ring 3 no matter what the guest ran on */ - .flags = X86_EFLAGS_IF, - }; - - do_machine_check(®s); -#endif -} - static void svm_handle_mce(struct vcpu_svm *svm) { if (is_erratum_383()) { diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 4b854a197e44..849be2a9f260 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -40,7 +40,6 @@ #include #include #include -#include #include #include #include @@ -4716,25 +4715,6 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, return 1; } -/* - * Trigger machine check on the host. We assume all the MSRs are already set up - * by the CPU and that we still run on the same CPU as the MCE occurred on. - * We pass a fake environment to the machine check handler because we want - * the guest to be always treated like user space, no matter what context - * it used internally. - */ -static void kvm_machine_check(void) -{ -#if defined(CONFIG_X86_MCE) - struct pt_regs regs = { - .cs = 3, /* Fake ring 3 no matter what the guest ran on */ - .flags = X86_EFLAGS_IF, - }; - - do_machine_check(®s); -#endif -} - static int handle_machine_check(struct kvm_vcpu *vcpu) { /* handled by vmx_vcpu_run() */ diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 764c967a1993..bf812c89c2e3 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -3,6 +3,7 @@ #define ARCH_X86_KVM_X86_H #include +#include #include #include "kvm_cache_regs.h" #include "kvm_emulate.h" @@ -366,6 +367,25 @@ static inline bool kvm_dr6_valid(u64 data) return !(data >> 32); } +/* + * Trigger machine check on the host. We assume all the MSRs are already set up + * by the CPU and that we still run on the same CPU as the MCE occurred on. + * We pass a fake environment to the machine check handler because we want + * the guest to be always treated like user space, no matter what context + * it used internally. + */ +static inline void kvm_machine_check(void) +{ +#if defined(CONFIG_X86_MCE) + struct pt_regs regs = { + .cs = 3, /* Fake ring 3 no matter what the guest ran on */ + .flags = X86_EFLAGS_IF, + }; + + do_machine_check(®s); +#endif +} + void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu); void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu); int kvm_spec_ctrl_test_value(u64 value); -- cgit v1.2.3 From 69372cf01290b9587d2cee8fbe161d75d55c3adc Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:36 -0600 Subject: x86/cpu: Add VM page flush MSR availablility as a CPUID feature On systems that do not have hardware enforced cache coherency between encrypted and unencrypted mappings of the same physical page, the hypervisor can use the VM page flush MSR (0xc001011e) to flush the cache contents of an SEV guest page. When a small number of pages are being flushed, this can be used in place of issuing a WBINVD across all CPUs. CPUID 0x8000001f_eax[2] is used to determine if the VM page flush MSR is available. Add a CPUID feature to indicate it is supported and define the MSR. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 1 + arch/x86/kernel/cpu/scattered.c | 1 + 3 files changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b9dc6a56d360..9f9e9511f7cd 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -237,6 +237,7 @@ #define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ #define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ #define X86_FEATURE_SEV_ES ( 8*32+20) /* AMD Secure Encrypted Virtualization - Encrypted State */ +#define X86_FEATURE_VM_PAGE_FLUSH ( 8*32+21) /* "" VM Page Flush MSR is supported */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 972a34d93505..abfc9b0fbd8d 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -470,6 +470,7 @@ #define MSR_AMD64_ICIBSEXTDCTL 0xc001103c #define MSR_AMD64_IBSOPDATA4 0xc001103d #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ +#define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e #define MSR_AMD64_SEV_ES_GHCB 0xc0010130 #define MSR_AMD64_SEV 0xc0010131 #define MSR_AMD64_SEV_ENABLED_BIT 0 diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 866c9a9bcdee..236924930bf0 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -44,6 +44,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_SEV, CPUID_EAX, 1, 0x8000001f, 0 }, { X86_FEATURE_SEV_ES, CPUID_EAX, 3, 0x8000001f, 0 }, { X86_FEATURE_SME_COHERENT, CPUID_EAX, 10, 0x8000001f, 0 }, + { X86_FEATURE_VM_PAGE_FLUSH, CPUID_EAX, 2, 0x8000001f, 0 }, { 0, 0, 0, 0, 0 } }; -- cgit v1.2.3 From 9d4747d02376aeb8de38afa25430de79129c5799 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:37 -0600 Subject: KVM: SVM: Remove the call to sev_platform_status() during setup When both KVM support and the CCP driver are built into the kernel instead of as modules, KVM initialization can happen before CCP initialization. As a result, sev_platform_status() will return a failure when it is called from sev_hardware_setup(), when this isn't really an error condition. Since sev_platform_status() doesn't need to be called at this time anyway, remove the invocation from sev_hardware_setup(). Signed-off-by: Tom Lendacky Message-Id: <618380488358b56af558f2682203786f09a49483.1607620209.git.thomas.lendacky@amd.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 3418bb18dae7..7166aec97ffb 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1127,9 +1127,6 @@ void sev_vm_destroy(struct kvm *kvm) int __init sev_hardware_setup(void) { - struct sev_user_data_status *status; - int rc; - /* Maximum number of encrypted guests supported simultaneously */ max_sev_asid = cpuid_ecx(0x8000001F); @@ -1148,26 +1145,9 @@ int __init sev_hardware_setup(void) if (!sev_reclaim_asid_bitmap) return 1; - status = kmalloc(sizeof(*status), GFP_KERNEL); - if (!status) - return 1; - - /* - * Check SEV platform status. - * - * PLATFORM_STATUS can be called in any state, if we failed to query - * the PLATFORM status then either PSP firmware does not support SEV - * feature or SEV firmware is dead. - */ - rc = sev_platform_status(status, NULL); - if (rc) - goto err; - pr_info("SEV supported\n"); -err: - kfree(status); - return rc; + return 0; } void sev_hardware_teardown(void) -- cgit v1.2.3 From 916391a2d1dc225bfb68624352b1495ec529444e Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:38 -0600 Subject: KVM: SVM: Add support for SEV-ES capability in KVM Add support to KVM for determining if a system is capable of supporting SEV-ES as well as determining if a guest is an SEV-ES guest. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/kvm/Kconfig | 3 ++- arch/x86/kvm/svm/sev.c | 47 ++++++++++++++++++++++++++++++++++++++--------- arch/x86/kvm/svm/svm.c | 20 ++++++++++---------- arch/x86/kvm/svm/svm.h | 39 +++++++++++++++++++++++++++------------ 4 files changed, 77 insertions(+), 32 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index f92dfd8ef10d..7ac592664c52 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -100,7 +100,8 @@ config KVM_AMD_SEV depends on KVM_AMD && X86_64 depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m) help - Provides support for launching Encrypted VMs on AMD processors. + Provides support for launching Encrypted VMs (SEV) and Encrypted VMs + with Encrypted State (SEV-ES) on AMD processors. config KVM_MMU_AUDIT bool "Audit KVM MMU" diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 7166aec97ffb..a2b01cbd0511 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -932,7 +932,7 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp) struct kvm_sev_cmd sev_cmd; int r; - if (!svm_sev_enabled()) + if (!svm_sev_enabled() || !sev) return -ENOTTY; if (!argp) @@ -1125,29 +1125,58 @@ void sev_vm_destroy(struct kvm *kvm) sev_asid_free(sev->asid); } -int __init sev_hardware_setup(void) +void __init sev_hardware_setup(void) { + unsigned int eax, ebx, ecx, edx; + bool sev_es_supported = false; + bool sev_supported = false; + + /* Does the CPU support SEV? */ + if (!boot_cpu_has(X86_FEATURE_SEV)) + goto out; + + /* Retrieve SEV CPUID information */ + cpuid(0x8000001f, &eax, &ebx, &ecx, &edx); + /* Maximum number of encrypted guests supported simultaneously */ - max_sev_asid = cpuid_ecx(0x8000001F); + max_sev_asid = ecx; if (!svm_sev_enabled()) - return 1; + goto out; /* Minimum ASID value that should be used for SEV guest */ - min_sev_asid = cpuid_edx(0x8000001F); + min_sev_asid = edx; /* Initialize SEV ASID bitmaps */ sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL); if (!sev_asid_bitmap) - return 1; + goto out; sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL); if (!sev_reclaim_asid_bitmap) - return 1; + goto out; - pr_info("SEV supported\n"); + pr_info("SEV supported: %u ASIDs\n", max_sev_asid - min_sev_asid + 1); + sev_supported = true; - return 0; + /* SEV-ES support requested? */ + if (!sev_es) + goto out; + + /* Does the CPU support SEV-ES? */ + if (!boot_cpu_has(X86_FEATURE_SEV_ES)) + goto out; + + /* Has the system been allocated ASIDs for SEV-ES? */ + if (min_sev_asid == 1) + goto out; + + pr_info("SEV-ES supported: %u ASIDs\n", min_sev_asid - 1); + sev_es_supported = true; + +out: + sev = sev_supported; + sev_es = sev_es_supported; } void sev_hardware_teardown(void) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 544b6e362cf7..8cb9474b6a03 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -186,9 +186,13 @@ static int vgif = true; module_param(vgif, int, 0444); /* enable/disable SEV support */ -static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT); +int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT); module_param(sev, int, 0444); +/* enable/disable SEV-ES support */ +int sev_es = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT); +module_param(sev_es, int, 0444); + static bool __read_mostly dump_invalid_vmcb = 0; module_param(dump_invalid_vmcb, bool, 0644); @@ -958,15 +962,11 @@ static __init int svm_hardware_setup(void) kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); } - if (sev) { - if (boot_cpu_has(X86_FEATURE_SEV) && - IS_ENABLED(CONFIG_KVM_AMD_SEV)) { - r = sev_hardware_setup(); - if (r) - sev = false; - } else { - sev = false; - } + if (IS_ENABLED(CONFIG_KVM_AMD_SEV) && sev) { + sev_hardware_setup(); + } else { + sev = false; + sev_es = false; } svm_adjust_mmio_mask(); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index fdff76eb6ceb..ef0f0dfabc69 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -61,6 +61,7 @@ enum { struct kvm_sev_info { bool active; /* SEV enabled guest */ + bool es_active; /* SEV-ES enabled guest */ unsigned int asid; /* ASID used for this guest */ unsigned int handle; /* SEV firmware handle */ int fd; /* SEV device fd */ @@ -194,6 +195,28 @@ static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm) return container_of(kvm, struct kvm_svm, kvm); } +static inline bool sev_guest(struct kvm *kvm) +{ +#ifdef CONFIG_KVM_AMD_SEV + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + + return sev->active; +#else + return false; +#endif +} + +static inline bool sev_es_guest(struct kvm *kvm) +{ +#ifdef CONFIG_KVM_AMD_SEV + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + + return sev_guest(kvm) && sev->es_active; +#else + return false; +#endif +} + static inline void vmcb_mark_all_dirty(struct vmcb *vmcb) { vmcb->control.clean = 0; @@ -352,6 +375,9 @@ static inline bool gif_set(struct vcpu_svm *svm) #define MSR_CR3_LONG_MBZ_MASK 0xfff0000000000000U #define MSR_INVALID 0xffffffffU +extern int sev; +extern int sev_es; + u32 svm_msrpm_offset(u32 msr); u32 *svm_vcpu_alloc_msrpm(void); void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm); @@ -473,17 +499,6 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu); extern unsigned int max_sev_asid; -static inline bool sev_guest(struct kvm *kvm) -{ -#ifdef CONFIG_KVM_AMD_SEV - struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; - - return sev->active; -#else - return false; -#endif -} - static inline bool svm_sev_enabled(void) { return IS_ENABLED(CONFIG_KVM_AMD_SEV) ? max_sev_asid : 0; @@ -496,7 +511,7 @@ int svm_register_enc_region(struct kvm *kvm, int svm_unregister_enc_region(struct kvm *kvm, struct kvm_enc_region *range); void pre_sev_run(struct vcpu_svm *svm, int cpu); -int __init sev_hardware_setup(void); +void __init sev_hardware_setup(void); void sev_hardware_teardown(void); #endif -- cgit v1.2.3 From 0f60bde15ee11d03b6143f567cf840d30bf1b588 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:39 -0600 Subject: KVM: SVM: Add GHCB accessor functions for retrieving fields Update the GHCB accessor functions to add functions for retrieve GHCB fields by name. Update existing code to use the new accessor functions. Signed-off-by: Tom Lendacky Message-Id: <664172c53a5fb4959914e1a45d88e805649af0ad.1607620209.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/svm.h | 10 ++++++++++ arch/x86/kernel/cpu/vmware.c | 12 ++++++------ 2 files changed, 16 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 71d630bb5e08..1edf24f51b53 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -379,6 +379,16 @@ struct vmcb { (unsigned long *)&ghcb->save.valid_bitmap); \ } \ \ + static inline u64 ghcb_get_##field(struct ghcb *ghcb) \ + { \ + return ghcb->save.field; \ + } \ + \ + static inline u64 ghcb_get_##field##_if_valid(struct ghcb *ghcb) \ + { \ + return ghcb_##field##_is_valid(ghcb) ? ghcb->save.field : 0; \ + } \ + \ static inline void ghcb_set_##field(struct ghcb *ghcb, u64 value) \ { \ __set_bit(GHCB_BITMAP_IDX(field), \ diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 924571fe5864..c6ede3b3d302 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -501,12 +501,12 @@ static bool vmware_sev_es_hcall_finish(struct ghcb *ghcb, struct pt_regs *regs) ghcb_rbp_is_valid(ghcb))) return false; - regs->bx = ghcb->save.rbx; - regs->cx = ghcb->save.rcx; - regs->dx = ghcb->save.rdx; - regs->si = ghcb->save.rsi; - regs->di = ghcb->save.rdi; - regs->bp = ghcb->save.rbp; + regs->bx = ghcb_get_rbx(ghcb); + regs->cx = ghcb_get_rcx(ghcb); + regs->dx = ghcb_get_rdx(ghcb); + regs->si = ghcb_get_rsi(ghcb); + regs->di = ghcb_get_rdi(ghcb); + regs->bp = ghcb_get_rbp(ghcb); return true; } -- cgit v1.2.3 From add5e2f045414523aa6dc29d69b21e8f82e5ffb8 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:40 -0600 Subject: KVM: SVM: Add support for the SEV-ES VMSA Allocate a page during vCPU creation to be used as the encrypted VM save area (VMSA) for the SEV-ES guest. Provide a flag in the kvm_vcpu_arch structure that indicates whether the guest state is protected. When freeing a VMSA page that has been encrypted, the cache contents must be flushed using the MSR_AMD64_VM_PAGE_FLUSH before freeing the page. [ i386 build warnings ] Reported-by: kernel test robot Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 3 ++ arch/x86/kvm/svm/sev.c | 67 +++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/svm/svm.c | 24 +++++++++++++-- arch/x86/kvm/svm/svm.h | 5 +++ 4 files changed, 97 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f002cdb13a0b..8cf6b0493d49 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -805,6 +805,9 @@ struct kvm_vcpu_arch { */ bool enforce; } pv_cpuid; + + /* Protected Guests */ + bool guest_state_protected; }; struct kvm_lpage_info { diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index a2b01cbd0511..501adb43ece3 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "x86.h" #include "svm.h" @@ -1190,6 +1191,72 @@ void sev_hardware_teardown(void) sev_flush_asids(); } +/* + * Pages used by hardware to hold guest encrypted state must be flushed before + * returning them to the system. + */ +static void sev_flush_guest_memory(struct vcpu_svm *svm, void *va, + unsigned long len) +{ + /* + * If hardware enforced cache coherency for encrypted mappings of the + * same physical page is supported, nothing to do. + */ + if (boot_cpu_has(X86_FEATURE_SME_COHERENT)) + return; + + /* + * If the VM Page Flush MSR is supported, use it to flush the page + * (using the page virtual address and the guest ASID). + */ + if (boot_cpu_has(X86_FEATURE_VM_PAGE_FLUSH)) { + struct kvm_sev_info *sev; + unsigned long va_start; + u64 start, stop; + + /* Align start and stop to page boundaries. */ + va_start = (unsigned long)va; + start = (u64)va_start & PAGE_MASK; + stop = PAGE_ALIGN((u64)va_start + len); + + if (start < stop) { + sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info; + + while (start < stop) { + wrmsrl(MSR_AMD64_VM_PAGE_FLUSH, + start | sev->asid); + + start += PAGE_SIZE; + } + + return; + } + + WARN(1, "Address overflow, using WBINVD\n"); + } + + /* + * Hardware should always have one of the above features, + * but if not, use WBINVD and issue a warning. + */ + WARN_ONCE(1, "Using WBINVD to flush guest memory\n"); + wbinvd_on_all_cpus(); +} + +void sev_free_vcpu(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm; + + if (!sev_es_guest(vcpu->kvm)) + return; + + svm = to_svm(vcpu); + + if (vcpu->arch.guest_state_protected) + sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE); + __free_page(virt_to_page(svm->vmsa)); +} + void pre_sev_run(struct vcpu_svm *svm, int cpu) { struct svm_cpu_data *sd = per_cpu(svm_data, cpu); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 8cb9474b6a03..801e0a641258 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1288,6 +1288,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm; struct page *vmcb_page; + struct page *vmsa_page = NULL; int err; BUILD_BUG_ON(offsetof(struct vcpu_svm, vcpu) != 0); @@ -1298,9 +1299,19 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) if (!vmcb_page) goto out; + if (sev_es_guest(svm->vcpu.kvm)) { + /* + * SEV-ES guests require a separate VMSA page used to contain + * the encrypted register state of the guest. + */ + vmsa_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); + if (!vmsa_page) + goto error_free_vmcb_page; + } + err = avic_init_vcpu(svm); if (err) - goto error_free_vmcb_page; + goto error_free_vmsa_page; /* We initialize this flag to true to make sure that the is_running * bit would be set the first time the vcpu is loaded. @@ -1310,12 +1321,16 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) svm->msrpm = svm_vcpu_alloc_msrpm(); if (!svm->msrpm) - goto error_free_vmcb_page; + goto error_free_vmsa_page; svm_vcpu_init_msrpm(vcpu, svm->msrpm); svm->vmcb = page_address(vmcb_page); svm->vmcb_pa = __sme_set(page_to_pfn(vmcb_page) << PAGE_SHIFT); + + if (vmsa_page) + svm->vmsa = page_address(vmsa_page); + svm->asid_generation = 0; init_vmcb(svm); @@ -1324,6 +1339,9 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) return 0; +error_free_vmsa_page: + if (vmsa_page) + __free_page(vmsa_page); error_free_vmcb_page: __free_page(vmcb_page); out: @@ -1351,6 +1369,8 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) svm_free_nested(svm); + sev_free_vcpu(vcpu); + __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT)); __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); } diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index ef0f0dfabc69..f96a0a66ca35 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -168,6 +168,10 @@ struct vcpu_svm { DECLARE_BITMAP(read, MAX_DIRECT_ACCESS_MSRS); DECLARE_BITMAP(write, MAX_DIRECT_ACCESS_MSRS); } shadow_msr_intercept; + + /* SEV-ES support */ + struct vmcb_save_area *vmsa; + struct ghcb *ghcb; }; struct svm_cpu_data { @@ -513,5 +517,6 @@ int svm_unregister_enc_region(struct kvm *kvm, void pre_sev_run(struct vcpu_svm *svm, int cpu); void __init sev_hardware_setup(void); void sev_hardware_teardown(void); +void sev_free_vcpu(struct kvm_vcpu *vcpu); #endif -- cgit v1.2.3 From 1c04d8c986567c27c56c05205dceadc92efb14ff Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:41 -0600 Subject: KVM: x86: Mark GPRs dirty when written When performing VMGEXIT processing for an SEV-ES guest, register values will be synced between KVM and the GHCB. Prepare for detecting when a GPR has been updated (marked dirty) in order to determine whether to sync the register to the GHCB. Signed-off-by: Tom Lendacky Message-Id: <7ca2a1cdb61456f2fe9c64193e34d601e395c133.1607620209.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/kvm_cache_regs.h | 51 ++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index a889563ad02d..f15bc16de07c 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -9,6 +9,31 @@ (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ | X86_CR4_OSXMMEXCPT | X86_CR4_PGE | X86_CR4_TSD | X86_CR4_FSGSBASE) +static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu, + enum kvm_reg reg) +{ + return test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); +} + +static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu, + enum kvm_reg reg) +{ + return test_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty); +} + +static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu, + enum kvm_reg reg) +{ + __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); +} + +static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu, + enum kvm_reg reg) +{ + __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); + __set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty); +} + #define BUILD_KVM_GPR_ACCESSORS(lname, uname) \ static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu)\ { \ @@ -18,6 +43,7 @@ static __always_inline void kvm_##lname##_write(struct kvm_vcpu *vcpu, \ unsigned long val) \ { \ vcpu->arch.regs[VCPU_REGS_##uname] = val; \ + kvm_register_mark_dirty(vcpu, VCPU_REGS_##uname); \ } BUILD_KVM_GPR_ACCESSORS(rax, RAX) BUILD_KVM_GPR_ACCESSORS(rbx, RBX) @@ -37,31 +63,6 @@ BUILD_KVM_GPR_ACCESSORS(r14, R14) BUILD_KVM_GPR_ACCESSORS(r15, R15) #endif -static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu, - enum kvm_reg reg) -{ - return test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); -} - -static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu, - enum kvm_reg reg) -{ - return test_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty); -} - -static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu, - enum kvm_reg reg) -{ - __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); -} - -static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu, - enum kvm_reg reg) -{ - __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); - __set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty); -} - static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, int reg) { if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS)) -- cgit v1.2.3 From 9caec4bf1d0126fa5f2fcd21852958bccd2a4c18 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 14 Dec 2020 07:59:15 -0500 Subject: KVM: x86: remove bogus #GP injection There is no need to inject a #GP from kvm_mtrr_set_msr, kvm_emulate_wrmsr will handle it. Reviewed-by: Tom Lendacky Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mtrr.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c index 7f0059aa30e1..f472fdb6ae7e 100644 --- a/arch/x86/kvm/mtrr.c +++ b/arch/x86/kvm/mtrr.c @@ -84,12 +84,8 @@ bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) } else /* MTRR mask */ mask |= 0x7ff; - if (data & mask) { - kvm_inject_gp(vcpu, 0); - return false; - } - return true; + return (data & mask) == 0; } EXPORT_SYMBOL_GPL(kvm_mtrr_valid); -- cgit v1.2.3 From 8b474427cbeea05850fb32da65cc95eebcbad089 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 14 Dec 2020 07:44:46 -0500 Subject: KVM: x86: use kvm_complete_insn_gp in emulating RDMSR/WRMSR Simplify the four functions that handle {kernel,user} {rd,wr}msr, there is still some repetition between the two instances of rdmsr but the whole business of calling kvm_inject_gp and kvm_skip_emulated_instruction can be unified nicely. Because complete_emulated_wrmsr now becomes essentially a call to kvm_complete_insn_gp, remove complete_emulated_msr. Reviewed-by: Tom Lendacky Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 44 ++++++++++++++++---------------------------- 1 file changed, 16 insertions(+), 28 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a3fdc16cfd6f..5c5a6aa8696d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1634,27 +1634,20 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) } EXPORT_SYMBOL_GPL(kvm_set_msr); -static int complete_emulated_msr(struct kvm_vcpu *vcpu, bool is_read) +static int complete_emulated_rdmsr(struct kvm_vcpu *vcpu) { - if (vcpu->run->msr.error) { - kvm_inject_gp(vcpu, 0); - return 1; - } else if (is_read) { + int err = vcpu->run->msr.error; + if (!err) { kvm_rax_write(vcpu, (u32)vcpu->run->msr.data); kvm_rdx_write(vcpu, vcpu->run->msr.data >> 32); } - return kvm_skip_emulated_instruction(vcpu); -} - -static int complete_emulated_rdmsr(struct kvm_vcpu *vcpu) -{ - return complete_emulated_msr(vcpu, true); + return kvm_complete_insn_gp(vcpu, err); } static int complete_emulated_wrmsr(struct kvm_vcpu *vcpu) { - return complete_emulated_msr(vcpu, false); + return kvm_complete_insn_gp(vcpu, vcpu->run->msr.error); } static u64 kvm_msr_reason(int r) @@ -1717,18 +1710,16 @@ int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu) return 0; } - /* MSR read failed? Inject a #GP */ - if (r) { + if (!r) { + trace_kvm_msr_read(ecx, data); + + kvm_rax_write(vcpu, data & -1u); + kvm_rdx_write(vcpu, (data >> 32) & -1u); + } else { trace_kvm_msr_read_ex(ecx); - kvm_inject_gp(vcpu, 0); - return 1; } - trace_kvm_msr_read(ecx, data); - - kvm_rax_write(vcpu, data & -1u); - kvm_rdx_write(vcpu, (data >> 32) & -1u); - return kvm_skip_emulated_instruction(vcpu); + return kvm_complete_insn_gp(vcpu, r); } EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr); @@ -1749,15 +1740,12 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu) if (r < 0) return r; - /* MSR write failed? Inject a #GP */ - if (r > 0) { + if (!r) + trace_kvm_msr_write(ecx, data); + else trace_kvm_msr_write_ex(ecx, data); - kvm_inject_gp(vcpu, 0); - return 1; - } - trace_kvm_msr_write(ecx, data); - return kvm_skip_emulated_instruction(vcpu); + return kvm_complete_insn_gp(vcpu, r); } EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr); -- cgit v1.2.3 From f9a4d621761a2c7db686cc47772a0688d389f2d7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 14 Dec 2020 10:26:51 -0500 Subject: KVM: x86: introduce complete_emulated_msr callback This will be used by SEV-ES to inject MSR failure via the GHCB. Reviewed-by: Tom Lendacky Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm/svm.c | 1 + arch/x86/kvm/vmx/vmx.c | 1 + arch/x86/kvm/x86.c | 8 ++++---- 4 files changed, 7 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8cf6b0493d49..18aa15e6fadd 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1285,6 +1285,7 @@ struct kvm_x86_ops { void (*migrate_timers)(struct kvm_vcpu *vcpu); void (*msr_filter_changed)(struct kvm_vcpu *vcpu); + int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err); }; struct kvm_x86_nested_ops { diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 801e0a641258..4067d511be08 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4306,6 +4306,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .apic_init_signal_blocked = svm_apic_init_signal_blocked, .msr_filter_changed = svm_msr_filter_changed, + .complete_emulated_msr = kvm_complete_insn_gp, }; static struct kvm_x86_init_ops svm_init_ops __initdata = { diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 849be2a9f260..55fa51c0cd9d 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7701,6 +7701,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .migrate_timers = vmx_migrate_timers, .msr_filter_changed = vmx_msr_filter_changed, + .complete_emulated_msr = kvm_complete_insn_gp, .cpu_dirty_log_size = vmx_cpu_dirty_log_size, }; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5c5a6aa8696d..75f10aee23b5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1642,12 +1642,12 @@ static int complete_emulated_rdmsr(struct kvm_vcpu *vcpu) kvm_rdx_write(vcpu, vcpu->run->msr.data >> 32); } - return kvm_complete_insn_gp(vcpu, err); + return kvm_x86_ops.complete_emulated_msr(vcpu, err); } static int complete_emulated_wrmsr(struct kvm_vcpu *vcpu) { - return kvm_complete_insn_gp(vcpu, vcpu->run->msr.error); + return kvm_x86_ops.complete_emulated_msr(vcpu, vcpu->run->msr.error); } static u64 kvm_msr_reason(int r) @@ -1719,7 +1719,7 @@ int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu) trace_kvm_msr_read_ex(ecx); } - return kvm_complete_insn_gp(vcpu, r); + return kvm_x86_ops.complete_emulated_msr(vcpu, r); } EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr); @@ -1745,7 +1745,7 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu) else trace_kvm_msr_write_ex(ecx, data); - return kvm_complete_insn_gp(vcpu, r); + return kvm_x86_ops.complete_emulated_msr(vcpu, r); } EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr); -- cgit v1.2.3 From f1c6366e304328de301be362eca905a3503ff33b Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 14 Dec 2020 10:29:50 -0500 Subject: KVM: SVM: Add required changes to support intercepts under SEV-ES When a guest is running under SEV-ES, the hypervisor cannot access the guest register state. There are numerous places in the KVM code where certain registers are accessed that are not allowed to be accessed (e.g. RIP, CR0, etc). Add checks to prevent register accesses and add intercept update support at various points within the KVM code. Also, when handling a VMGEXIT, exceptions are passed back through the GHCB. Since the RDMSR/WRMSR intercepts (may) inject a #GP on error, update the SVM intercepts to handle this for SEV-ES guests. Signed-off-by: Tom Lendacky [Redo MSR part using the .complete_emulated_msr callback. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/svm.h | 3 +- arch/x86/kvm/svm/svm.c | 83 ++++++++++++++++++++++++++++++++++++++++------ arch/x86/kvm/x86.c | 11 ++++-- 3 files changed, 84 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 1edf24f51b53..bce28482d63d 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -178,7 +178,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define LBR_CTL_ENABLE_MASK BIT_ULL(0) #define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1) -#define SVM_INTERRUPT_SHADOW_MASK 1 +#define SVM_INTERRUPT_SHADOW_MASK BIT_ULL(0) +#define SVM_GUEST_INTERRUPT_MASK BIT_ULL(1) #define SVM_IOIO_STR_SHIFT 2 #define SVM_IOIO_REP_SHIFT 3 diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 4067d511be08..db81fb131033 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include "trace.h" @@ -339,6 +340,13 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + /* + * SEV-ES does not expose the next RIP. The RIP update is controlled by + * the type of exit and the #VC handler in the guest. + */ + if (sev_es_guest(vcpu->kvm)) + goto done; + if (nrips && svm->vmcb->control.next_rip != 0) { WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS)); svm->next_rip = svm->vmcb->control.next_rip; @@ -350,6 +358,8 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu) } else { kvm_rip_write(vcpu, svm->next_rip); } + +done: svm_set_interrupt_shadow(vcpu, 0); return 1; @@ -1651,9 +1661,18 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) static void update_cr0_intercept(struct vcpu_svm *svm) { - ulong gcr0 = svm->vcpu.arch.cr0; - u64 *hcr0 = &svm->vmcb->save.cr0; + ulong gcr0; + u64 *hcr0; + + /* + * SEV-ES guests must always keep the CR intercepts cleared. CR + * tracking is done using the CR write traps. + */ + if (sev_es_guest(svm->vcpu.kvm)) + return; + gcr0 = svm->vcpu.arch.cr0; + hcr0 = &svm->vmcb->save.cr0; *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK) | (gcr0 & SVM_CR0_SELECTIVE_MASK); @@ -1673,7 +1692,7 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) struct vcpu_svm *svm = to_svm(vcpu); #ifdef CONFIG_X86_64 - if (vcpu->arch.efer & EFER_LME) { + if (vcpu->arch.efer & EFER_LME && !vcpu->arch.guest_state_protected) { if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { vcpu->arch.efer |= EFER_LMA; svm->vmcb->save.efer |= EFER_LMA | EFER_LME; @@ -2583,6 +2602,20 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 0; } +static int svm_complete_emulated_msr(struct kvm_vcpu *vcpu, int err) +{ + struct vcpu_svm *svm = to_svm(vcpu); + if (!sev_es_guest(svm->vcpu.kvm) || !err) + return kvm_complete_insn_gp(&svm->vcpu, err); + + ghcb_set_sw_exit_info_1(svm->ghcb, 1); + ghcb_set_sw_exit_info_2(svm->ghcb, + X86_TRAP_GP | + SVM_EVTINJ_TYPE_EXEPT | + SVM_EVTINJ_VALID); + return 1; +} + static int rdmsr_interception(struct vcpu_svm *svm) { return kvm_emulate_rdmsr(&svm->vcpu); @@ -2801,7 +2834,14 @@ static int interrupt_window_interception(struct vcpu_svm *svm) static int pause_interception(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu = &svm->vcpu; - bool in_kernel = (svm_get_cpl(vcpu) == 0); + bool in_kernel; + + /* + * CPL is not made available for an SEV-ES guest, therefore + * vcpu->arch.preempted_in_kernel can never be true. Just + * set in_kernel to false as well. + */ + in_kernel = !sev_es_guest(svm->vcpu.kvm) && svm_get_cpl(vcpu) == 0; if (!kvm_pause_in_guest(vcpu->kvm)) grow_ple_window(vcpu); @@ -3064,10 +3104,13 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM); - if (!svm_is_intercept(svm, INTERCEPT_CR0_WRITE)) - vcpu->arch.cr0 = svm->vmcb->save.cr0; - if (npt_enabled) - vcpu->arch.cr3 = svm->vmcb->save.cr3; + /* SEV-ES guests must use the CR write traps to track CR registers. */ + if (!sev_es_guest(vcpu->kvm)) { + if (!svm_is_intercept(svm, INTERCEPT_CR0_WRITE)) + vcpu->arch.cr0 = svm->vmcb->save.cr0; + if (npt_enabled) + vcpu->arch.cr3 = svm->vmcb->save.cr3; + } if (is_guest_mode(vcpu)) { int vmexit; @@ -3179,6 +3222,13 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) { struct vcpu_svm *svm = to_svm(vcpu); + /* + * SEV-ES guests must always keep the CR intercepts cleared. CR + * tracking is done using the CR write traps. + */ + if (sev_es_guest(vcpu->kvm)) + return; + if (nested_svm_virtualize_tpr(vcpu)) return; @@ -3250,7 +3300,14 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu) if (!gif_set(svm)) return true; - if (is_guest_mode(vcpu)) { + if (sev_es_guest(svm->vcpu.kvm)) { + /* + * SEV-ES guests to not expose RFLAGS. Use the VMCB interrupt mask + * bit to determine the state of the IF flag. + */ + if (!(vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK)) + return true; + } else if (is_guest_mode(vcpu)) { /* As long as interrupts are being delivered... */ if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) ? !(svm->nested.hsave->save.rflags & X86_EFLAGS_IF) @@ -3432,6 +3489,12 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) svm->vcpu.arch.nmi_injected = true; break; case SVM_EXITINTINFO_TYPE_EXEPT: + /* + * Never re-inject a #VC exception. + */ + if (vector == X86_TRAP_VC) + break; + /* * In case of software exceptions, do not reinject the vector, * but re-execute the instruction instead. Rewind RIP first @@ -4306,7 +4369,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .apic_init_signal_blocked = svm_apic_init_signal_blocked, .msr_filter_changed = svm_msr_filter_changed, - .complete_emulated_msr = kvm_complete_insn_gp, + .complete_emulated_msr = svm_complete_emulated_msr, }; static struct kvm_x86_init_ops svm_init_ops __initdata = { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 75f10aee23b5..9685d056d808 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4006,7 +4006,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { int idx; - if (vcpu->preempted) + if (vcpu->preempted && !vcpu->arch.guest_state_protected) vcpu->arch.preempted_in_kernel = !kvm_x86_ops.get_cpl(vcpu); /* @@ -8149,7 +8149,14 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) { struct kvm_run *kvm_run = vcpu->run; - kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0; + /* + * if_flag is obsolete and useless, so do not bother + * setting it for SEV-ES guests. Userspace can just + * use kvm_run->ready_for_interrupt_injection. + */ + kvm_run->if_flag = !vcpu->arch.guest_state_protected + && (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0; + kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0; kvm_run->cr8 = kvm_get_cr8(vcpu); kvm_run->apic_base = kvm_get_apic_base(vcpu); -- cgit v1.2.3 From 8d4846b9b15045598d760470789716fb08b9b317 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:43 -0600 Subject: KVM: SVM: Prevent debugging under SEV-ES Since the guest register state of an SEV-ES guest is encrypted, debugging is not supported. Update the code to prevent guest debugging when the guest has protected state. Additionally, an SEV-ES guest must only and always intercept DR7 reads and writes. Update set_dr_intercepts() and clr_dr_intercepts() to account for this. Signed-off-by: Tom Lendacky Message-Id: <8db966fa2f9803d6454ce773863025d0e2e7f3cc.1607620209.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 9 +++++++++ arch/x86/kvm/svm/svm.h | 37 +++++++++++++++++++++++-------------- arch/x86/kvm/x86.c | 3 +++ 3 files changed, 35 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index db81fb131033..bec427acab20 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1805,6 +1805,9 @@ static void svm_set_dr6(struct vcpu_svm *svm, unsigned long value) { struct vmcb *vmcb = svm->vmcb; + if (svm->vcpu.arch.guest_state_protected) + return; + if (unlikely(value != vmcb->save.dr6)) { vmcb->save.dr6 = value; vmcb_mark_dirty(vmcb, VMCB_DR); @@ -1815,6 +1818,9 @@ static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + if (vcpu->arch.guest_state_protected) + return; + get_debugreg(vcpu->arch.db[0], 0); get_debugreg(vcpu->arch.db[1], 1); get_debugreg(vcpu->arch.db[2], 2); @@ -1833,6 +1839,9 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) { struct vcpu_svm *svm = to_svm(vcpu); + if (vcpu->arch.guest_state_protected) + return; + svm->vmcb->save.dr7 = value; vmcb_mark_dirty(svm->vmcb, VMCB_DR); } diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index f96a0a66ca35..abfe53d6b3dc 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -272,21 +272,24 @@ static inline void set_dr_intercepts(struct vcpu_svm *svm) { struct vmcb *vmcb = get_host_vmcb(svm); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ); + if (!sev_es_guest(svm->vcpu.kvm)) { + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE); + } + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE); vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE); recalc_intercepts(svm); @@ -298,6 +301,12 @@ static inline void clr_dr_intercepts(struct vcpu_svm *svm) vmcb->control.intercepts[INTERCEPT_DR] = 0; + /* DR7 access must remain intercepted for an SEV-ES guest */ + if (sev_es_guest(svm->vcpu.kvm)) { + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE); + } + recalc_intercepts(svm); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9685d056d808..d88e334b19e7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9664,6 +9664,9 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, unsigned long rflags; int i, r; + if (vcpu->arch.guest_state_protected) + return -EINVAL; + vcpu_load(vcpu); if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) { -- cgit v1.2.3 From bc624d9f1bbbfd6ae7057437cd3fcfef17066399 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:44 -0600 Subject: KVM: SVM: Do not allow instruction emulation under SEV-ES When a guest is running as an SEV-ES guest, it is not possible to emulate instructions. Add support to prevent instruction emulation. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index bec427acab20..cade703d6edb 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4157,6 +4157,12 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int i bool smep, smap, is_user; unsigned long cr4; + /* + * When the guest is an SEV-ES guest, emulation is not possible. + */ + if (sev_es_guest(vcpu->kvm)) + return false; + /* * Detect and workaround Errata 1096 Fam_17h_00_0Fh. * -- cgit v1.2.3 From 8164a5ffe4c65291efecc03a590c978fd14c240f Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:45 -0600 Subject: KVM: SVM: Cannot re-initialize the VMCB after shutdown with SEV-ES When a SHUTDOWN VMEXIT is encountered, normally the VMCB is re-initialized so that the guest can be re-launched. But when a guest is running as an SEV-ES guest, the VMSA cannot be re-initialized because it has been encrypted. For now, just return -EINVAL to prevent a possible attempt at a guest reset. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index cade703d6edb..19d1c9707fb7 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2010,6 +2010,13 @@ static int shutdown_interception(struct vcpu_svm *svm) { struct kvm_run *kvm_run = svm->vcpu.run; + /* + * The VM save area has already been encrypted so it + * cannot be reinitialized - just terminate. + */ + if (sev_es_guest(svm->vcpu.kvm)) + return -EINVAL; + /* * VMCB is undefined after a SHUTDOWN intercept * so reinitialize it. -- cgit v1.2.3 From e9093fd49285ff7b5e4d3f8b528f5b43445c5f5d Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:46 -0600 Subject: KVM: SVM: Prepare for SEV-ES exit handling in the sev.c file This is a pre-patch to consolidate some exit handling code into callable functions. Follow-on patches for SEV-ES exit handling will then be able to use them from the sev.c file. Signed-off-by: Tom Lendacky Message-Id: <5b8b0ffca8137f3e1e257f83df9f5c881c8a96a3.1607620209.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 64 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 38 insertions(+), 26 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 19d1c9707fb7..06ea34d61924 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3097,6 +3097,43 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) "excp_to:", save->last_excp_to); } +static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code) +{ + if (exit_code < ARRAY_SIZE(svm_exit_handlers) && + svm_exit_handlers[exit_code]) + return 0; + + vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%llx\n", exit_code); + dump_vmcb(vcpu); + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; + vcpu->run->internal.ndata = 2; + vcpu->run->internal.data[0] = exit_code; + vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu; + + return -EINVAL; +} + +static int svm_invoke_exit_handler(struct vcpu_svm *svm, u64 exit_code) +{ + if (svm_handle_invalid_exit(&svm->vcpu, exit_code)) + return 0; + +#ifdef CONFIG_RETPOLINE + if (exit_code == SVM_EXIT_MSR) + return msr_interception(svm); + else if (exit_code == SVM_EXIT_VINTR) + return interrupt_window_interception(svm); + else if (exit_code == SVM_EXIT_INTR) + return intr_interception(svm); + else if (exit_code == SVM_EXIT_HLT) + return halt_interception(svm); + else if (exit_code == SVM_EXIT_NPF) + return npf_interception(svm); +#endif + return svm_exit_handlers[exit_code](svm); +} + static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2, u32 *intr_info, u32 *error_code) { @@ -3163,32 +3200,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) if (exit_fastpath != EXIT_FASTPATH_NONE) return 1; - if (exit_code >= ARRAY_SIZE(svm_exit_handlers) - || !svm_exit_handlers[exit_code]) { - vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", exit_code); - dump_vmcb(vcpu); - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = - KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; - vcpu->run->internal.ndata = 2; - vcpu->run->internal.data[0] = exit_code; - vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu; - return 0; - } - -#ifdef CONFIG_RETPOLINE - if (exit_code == SVM_EXIT_MSR) - return msr_interception(svm); - else if (exit_code == SVM_EXIT_VINTR) - return interrupt_window_interception(svm); - else if (exit_code == SVM_EXIT_INTR) - return intr_interception(svm); - else if (exit_code == SVM_EXIT_HLT) - return halt_interception(svm); - else if (exit_code == SVM_EXIT_NPF) - return npf_interception(svm); -#endif - return svm_exit_handlers[exit_code](svm); + return svm_invoke_exit_handler(svm, exit_code); } static void reload_tss(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From 291bd20d5d88814a73d43b55b9428feab2f28094 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:47 -0600 Subject: KVM: SVM: Add initial support for a VMGEXIT VMEXIT SEV-ES adds a new VMEXIT reason code, VMGEXIT. Initial support for a VMGEXIT includes mapping the GHCB based on the guest GPA, which is obtained from a new VMCB field, and then validating the required inputs for the VMGEXIT exit reason. Since many of the VMGEXIT exit reasons correspond to existing VMEXIT reasons, the information from the GHCB is copied into the VMCB control exit code areas and KVM register areas. The standard exit handlers are invoked, similar to standard VMEXIT processing. Before restarting the vCPU, the GHCB is updated with any registers that have been updated by the hypervisor. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/svm.h | 2 +- arch/x86/include/uapi/asm/svm.h | 7 ++ arch/x86/kvm/svm/sev.c | 272 ++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/svm/svm.c | 6 +- arch/x86/kvm/svm/svm.h | 8 ++ 5 files changed, 292 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index bce28482d63d..caa8628f5fba 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -130,7 +130,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area { u32 exit_int_info_err; u64 nested_ctl; u64 avic_vapic_bar; - u8 reserved_4[8]; + u64 ghcb_gpa; u32 event_inj; u32 event_inj_err; u64 nested_cr3; diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index f1d8307454e0..09f723945425 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -81,6 +81,7 @@ #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402 +#define SVM_EXIT_VMGEXIT 0x403 /* SEV-ES software-defined VMGEXIT events */ #define SVM_VMGEXIT_MMIO_READ 0x80000001 @@ -187,6 +188,12 @@ { SVM_EXIT_NPF, "npf" }, \ { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ { SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \ + { SVM_EXIT_VMGEXIT, "vmgexit" }, \ + { SVM_VMGEXIT_MMIO_READ, "vmgexit_mmio_read" }, \ + { SVM_VMGEXIT_MMIO_WRITE, "vmgexit_mmio_write" }, \ + { SVM_VMGEXIT_NMI_COMPLETE, "vmgexit_nmi_complete" }, \ + { SVM_VMGEXIT_AP_HLT_LOOP, "vmgexit_ap_hlt_loop" }, \ + { SVM_VMGEXIT_AP_JUMP_TABLE, "vmgexit_ap_jump_table" }, \ { SVM_EXIT_ERR, "invalid_guest_state" } diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 501adb43ece3..0244f4f244b4 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -18,6 +18,7 @@ #include "x86.h" #include "svm.h" +#include "cpuid.h" static int sev_flush_asids(void); static DECLARE_RWSEM(sev_deactivate_lock); @@ -1257,11 +1258,226 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu) __free_page(virt_to_page(svm->vmsa)); } +static void dump_ghcb(struct vcpu_svm *svm) +{ + struct ghcb *ghcb = svm->ghcb; + unsigned int nbits; + + /* Re-use the dump_invalid_vmcb module parameter */ + if (!dump_invalid_vmcb) { + pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n"); + return; + } + + nbits = sizeof(ghcb->save.valid_bitmap) * 8; + + pr_err("GHCB (GPA=%016llx):\n", svm->vmcb->control.ghcb_gpa); + pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_code", + ghcb->save.sw_exit_code, ghcb_sw_exit_code_is_valid(ghcb)); + pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_1", + ghcb->save.sw_exit_info_1, ghcb_sw_exit_info_1_is_valid(ghcb)); + pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_2", + ghcb->save.sw_exit_info_2, ghcb_sw_exit_info_2_is_valid(ghcb)); + pr_err("%-20s%016llx is_valid: %u\n", "sw_scratch", + ghcb->save.sw_scratch, ghcb_sw_scratch_is_valid(ghcb)); + pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap); +} + +static void sev_es_sync_to_ghcb(struct vcpu_svm *svm) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + struct ghcb *ghcb = svm->ghcb; + + /* + * The GHCB protocol so far allows for the following data + * to be returned: + * GPRs RAX, RBX, RCX, RDX + * + * Copy their values to the GHCB if they are dirty. + */ + if (kvm_register_is_dirty(vcpu, VCPU_REGS_RAX)) + ghcb_set_rax(ghcb, vcpu->arch.regs[VCPU_REGS_RAX]); + if (kvm_register_is_dirty(vcpu, VCPU_REGS_RBX)) + ghcb_set_rbx(ghcb, vcpu->arch.regs[VCPU_REGS_RBX]); + if (kvm_register_is_dirty(vcpu, VCPU_REGS_RCX)) + ghcb_set_rcx(ghcb, vcpu->arch.regs[VCPU_REGS_RCX]); + if (kvm_register_is_dirty(vcpu, VCPU_REGS_RDX)) + ghcb_set_rdx(ghcb, vcpu->arch.regs[VCPU_REGS_RDX]); +} + +static void sev_es_sync_from_ghcb(struct vcpu_svm *svm) +{ + struct vmcb_control_area *control = &svm->vmcb->control; + struct kvm_vcpu *vcpu = &svm->vcpu; + struct ghcb *ghcb = svm->ghcb; + u64 exit_code; + + /* + * The GHCB protocol so far allows for the following data + * to be supplied: + * GPRs RAX, RBX, RCX, RDX + * XCR0 + * CPL + * + * VMMCALL allows the guest to provide extra registers. KVM also + * expects RSI for hypercalls, so include that, too. + * + * Copy their values to the appropriate location if supplied. + */ + memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs)); + + vcpu->arch.regs[VCPU_REGS_RAX] = ghcb_get_rax_if_valid(ghcb); + vcpu->arch.regs[VCPU_REGS_RBX] = ghcb_get_rbx_if_valid(ghcb); + vcpu->arch.regs[VCPU_REGS_RCX] = ghcb_get_rcx_if_valid(ghcb); + vcpu->arch.regs[VCPU_REGS_RDX] = ghcb_get_rdx_if_valid(ghcb); + vcpu->arch.regs[VCPU_REGS_RSI] = ghcb_get_rsi_if_valid(ghcb); + + svm->vmcb->save.cpl = ghcb_get_cpl_if_valid(ghcb); + + if (ghcb_xcr0_is_valid(ghcb)) { + vcpu->arch.xcr0 = ghcb_get_xcr0(ghcb); + kvm_update_cpuid_runtime(vcpu); + } + + /* Copy the GHCB exit information into the VMCB fields */ + exit_code = ghcb_get_sw_exit_code(ghcb); + control->exit_code = lower_32_bits(exit_code); + control->exit_code_hi = upper_32_bits(exit_code); + control->exit_info_1 = ghcb_get_sw_exit_info_1(ghcb); + control->exit_info_2 = ghcb_get_sw_exit_info_2(ghcb); + + /* Clear the valid entries fields */ + memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); +} + +static int sev_es_validate_vmgexit(struct vcpu_svm *svm) +{ + struct kvm_vcpu *vcpu; + struct ghcb *ghcb; + u64 exit_code = 0; + + ghcb = svm->ghcb; + + /* Only GHCB Usage code 0 is supported */ + if (ghcb->ghcb_usage) + goto vmgexit_err; + + /* + * Retrieve the exit code now even though is may not be marked valid + * as it could help with debugging. + */ + exit_code = ghcb_get_sw_exit_code(ghcb); + + if (!ghcb_sw_exit_code_is_valid(ghcb) || + !ghcb_sw_exit_info_1_is_valid(ghcb) || + !ghcb_sw_exit_info_2_is_valid(ghcb)) + goto vmgexit_err; + + switch (ghcb_get_sw_exit_code(ghcb)) { + case SVM_EXIT_READ_DR7: + break; + case SVM_EXIT_WRITE_DR7: + if (!ghcb_rax_is_valid(ghcb)) + goto vmgexit_err; + break; + case SVM_EXIT_RDTSC: + break; + case SVM_EXIT_RDPMC: + if (!ghcb_rcx_is_valid(ghcb)) + goto vmgexit_err; + break; + case SVM_EXIT_CPUID: + if (!ghcb_rax_is_valid(ghcb) || + !ghcb_rcx_is_valid(ghcb)) + goto vmgexit_err; + if (ghcb_get_rax(ghcb) == 0xd) + if (!ghcb_xcr0_is_valid(ghcb)) + goto vmgexit_err; + break; + case SVM_EXIT_INVD: + break; + case SVM_EXIT_IOIO: + if (!(ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_TYPE_MASK)) + if (!ghcb_rax_is_valid(ghcb)) + goto vmgexit_err; + break; + case SVM_EXIT_MSR: + if (!ghcb_rcx_is_valid(ghcb)) + goto vmgexit_err; + if (ghcb_get_sw_exit_info_1(ghcb)) { + if (!ghcb_rax_is_valid(ghcb) || + !ghcb_rdx_is_valid(ghcb)) + goto vmgexit_err; + } + break; + case SVM_EXIT_VMMCALL: + if (!ghcb_rax_is_valid(ghcb) || + !ghcb_cpl_is_valid(ghcb)) + goto vmgexit_err; + break; + case SVM_EXIT_RDTSCP: + break; + case SVM_EXIT_WBINVD: + break; + case SVM_EXIT_MONITOR: + if (!ghcb_rax_is_valid(ghcb) || + !ghcb_rcx_is_valid(ghcb) || + !ghcb_rdx_is_valid(ghcb)) + goto vmgexit_err; + break; + case SVM_EXIT_MWAIT: + if (!ghcb_rax_is_valid(ghcb) || + !ghcb_rcx_is_valid(ghcb)) + goto vmgexit_err; + break; + case SVM_VMGEXIT_UNSUPPORTED_EVENT: + break; + default: + goto vmgexit_err; + } + + return 0; + +vmgexit_err: + vcpu = &svm->vcpu; + + if (ghcb->ghcb_usage) { + vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n", + ghcb->ghcb_usage); + } else { + vcpu_unimpl(vcpu, "vmgexit: exit reason %#llx is not valid\n", + exit_code); + dump_ghcb(svm); + } + + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; + vcpu->run->internal.ndata = 2; + vcpu->run->internal.data[0] = exit_code; + vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu; + + return -EINVAL; +} + +static void pre_sev_es_run(struct vcpu_svm *svm) +{ + if (!svm->ghcb) + return; + + sev_es_sync_to_ghcb(svm); + + kvm_vcpu_unmap(&svm->vcpu, &svm->ghcb_map, true); + svm->ghcb = NULL; +} + void pre_sev_run(struct vcpu_svm *svm, int cpu) { struct svm_cpu_data *sd = per_cpu(svm_data, cpu); int asid = sev_get_asid(svm->vcpu.kvm); + /* Perform any SEV-ES pre-run actions */ + pre_sev_es_run(svm); + /* Assign the asid allocated with this SEV guest */ svm->asid = asid; @@ -1279,3 +1495,59 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu) svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID; vmcb_mark_dirty(svm->vmcb, VMCB_ASID); } + +static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) +{ + return -EINVAL; +} + +int sev_handle_vmgexit(struct vcpu_svm *svm) +{ + struct vmcb_control_area *control = &svm->vmcb->control; + u64 ghcb_gpa, exit_code; + struct ghcb *ghcb; + int ret; + + /* Validate the GHCB */ + ghcb_gpa = control->ghcb_gpa; + if (ghcb_gpa & GHCB_MSR_INFO_MASK) + return sev_handle_vmgexit_msr_protocol(svm); + + if (!ghcb_gpa) { + vcpu_unimpl(&svm->vcpu, "vmgexit: GHCB gpa is not set\n"); + return -EINVAL; + } + + if (kvm_vcpu_map(&svm->vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->ghcb_map)) { + /* Unable to map GHCB from guest */ + vcpu_unimpl(&svm->vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n", + ghcb_gpa); + return -EINVAL; + } + + svm->ghcb = svm->ghcb_map.hva; + ghcb = svm->ghcb_map.hva; + + exit_code = ghcb_get_sw_exit_code(ghcb); + + ret = sev_es_validate_vmgexit(svm); + if (ret) + return ret; + + sev_es_sync_from_ghcb(svm); + ghcb_set_sw_exit_info_1(ghcb, 0); + ghcb_set_sw_exit_info_2(ghcb, 0); + + ret = -EINVAL; + switch (exit_code) { + case SVM_VMGEXIT_UNSUPPORTED_EVENT: + vcpu_unimpl(&svm->vcpu, + "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n", + control->exit_info_1, control->exit_info_2); + break; + default: + ret = svm_invoke_exit_handler(svm, exit_code); + } + + return ret; +} diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 06ea34d61924..310de05d2479 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -194,7 +194,7 @@ module_param(sev, int, 0444); int sev_es = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT); module_param(sev_es, int, 0444); -static bool __read_mostly dump_invalid_vmcb = 0; +bool __read_mostly dump_invalid_vmcb; module_param(dump_invalid_vmcb, bool, 0644); static u8 rsm_ins_bytes[] = "\x0f\xaa"; @@ -2977,6 +2977,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_RSM] = rsm_interception, [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception, [SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception, + [SVM_EXIT_VMGEXIT] = sev_handle_vmgexit, }; static void dump_vmcb(struct kvm_vcpu *vcpu) @@ -3018,6 +3019,7 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl); pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3); pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar); + pr_err("%-20s%016llx\n", "ghcb:", control->ghcb_gpa); pr_err("%-20s%08x\n", "event_inj:", control->event_inj); pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err); pr_err("%-20s%lld\n", "virt_ext:", control->virt_ext); @@ -3114,7 +3116,7 @@ static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code) return -EINVAL; } -static int svm_invoke_exit_handler(struct vcpu_svm *svm, u64 exit_code) +int svm_invoke_exit_handler(struct vcpu_svm *svm, u64 exit_code) { if (svm_handle_invalid_exit(&svm->vcpu, exit_code)) return 0; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index abfe53d6b3dc..89bcb26977e5 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -17,6 +17,7 @@ #include #include +#include #include @@ -172,6 +173,7 @@ struct vcpu_svm { /* SEV-ES support */ struct vmcb_save_area *vmsa; struct ghcb *ghcb; + struct kvm_host_map ghcb_map; }; struct svm_cpu_data { @@ -390,6 +392,7 @@ static inline bool gif_set(struct vcpu_svm *svm) extern int sev; extern int sev_es; +extern bool dump_invalid_vmcb; u32 svm_msrpm_offset(u32 msr); u32 *svm_vcpu_alloc_msrpm(void); @@ -405,6 +408,7 @@ bool svm_smi_blocked(struct kvm_vcpu *vcpu); bool svm_nmi_blocked(struct kvm_vcpu *vcpu); bool svm_interrupt_blocked(struct kvm_vcpu *vcpu); void svm_set_gif(struct vcpu_svm *svm, bool value); +int svm_invoke_exit_handler(struct vcpu_svm *svm, u64 exit_code); /* nested.c */ @@ -510,6 +514,9 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu); /* sev.c */ +#define GHCB_MSR_INFO_POS 0 +#define GHCB_MSR_INFO_MASK (BIT_ULL(12) - 1) + extern unsigned int max_sev_asid; static inline bool svm_sev_enabled(void) @@ -527,5 +534,6 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu); void __init sev_hardware_setup(void); void sev_hardware_teardown(void); void sev_free_vcpu(struct kvm_vcpu *vcpu); +int sev_handle_vmgexit(struct vcpu_svm *svm); #endif -- cgit v1.2.3 From 1edc14599e06fdf23dcf7516f73f09091853eb9a Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:49 -0600 Subject: KVM: SVM: Add support for SEV-ES GHCB MSR protocol function 0x002 The GHCB specification defines a GHCB MSR protocol using the lower 12-bits of the GHCB MSR (in the hypervisor this corresponds to the GHCB GPA field in the VMCB). Function 0x002 is a request to set the GHCB MSR value to the SEV INFO as per the specification via the VMCB GHCB GPA field. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 26 +++++++++++++++++++++++++- arch/x86/kvm/svm/svm.h | 17 +++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 0244f4f244b4..2246e4f3e4f3 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -20,6 +20,7 @@ #include "svm.h" #include "cpuid.h" +static u8 sev_enc_bit; static int sev_flush_asids(void); static DECLARE_RWSEM(sev_deactivate_lock); static DEFINE_MUTEX(sev_bitmap_lock); @@ -1140,6 +1141,9 @@ void __init sev_hardware_setup(void) /* Retrieve SEV CPUID information */ cpuid(0x8000001f, &eax, &ebx, &ecx, &edx); + /* Set encryption bit location for SEV-ES guests */ + sev_enc_bit = ebx & 0x3f; + /* Maximum number of encrypted guests supported simultaneously */ max_sev_asid = ecx; @@ -1496,9 +1500,29 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu) vmcb_mark_dirty(svm->vmcb, VMCB_ASID); } +static void set_ghcb_msr(struct vcpu_svm *svm, u64 value) +{ + svm->vmcb->control.ghcb_gpa = value; +} + static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) { - return -EINVAL; + struct vmcb_control_area *control = &svm->vmcb->control; + u64 ghcb_info; + + ghcb_info = control->ghcb_gpa & GHCB_MSR_INFO_MASK; + + switch (ghcb_info) { + case GHCB_MSR_SEV_INFO_REQ: + set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX, + GHCB_VERSION_MIN, + sev_enc_bit)); + break; + default: + return -EINVAL; + } + + return 1; } int sev_handle_vmgexit(struct vcpu_svm *svm) diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 89bcb26977e5..546f8d05e81e 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -514,9 +514,26 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu); /* sev.c */ +#define GHCB_VERSION_MAX 1ULL +#define GHCB_VERSION_MIN 1ULL + #define GHCB_MSR_INFO_POS 0 #define GHCB_MSR_INFO_MASK (BIT_ULL(12) - 1) +#define GHCB_MSR_SEV_INFO_RESP 0x001 +#define GHCB_MSR_SEV_INFO_REQ 0x002 +#define GHCB_MSR_VER_MAX_POS 48 +#define GHCB_MSR_VER_MAX_MASK 0xffff +#define GHCB_MSR_VER_MIN_POS 32 +#define GHCB_MSR_VER_MIN_MASK 0xffff +#define GHCB_MSR_CBIT_POS 24 +#define GHCB_MSR_CBIT_MASK 0xff +#define GHCB_MSR_SEV_INFO(_max, _min, _cbit) \ + ((((_max) & GHCB_MSR_VER_MAX_MASK) << GHCB_MSR_VER_MAX_POS) | \ + (((_min) & GHCB_MSR_VER_MIN_MASK) << GHCB_MSR_VER_MIN_POS) | \ + (((_cbit) & GHCB_MSR_CBIT_MASK) << GHCB_MSR_CBIT_POS) | \ + GHCB_MSR_SEV_INFO_RESP) + extern unsigned int max_sev_asid; static inline bool svm_sev_enabled(void) -- cgit v1.2.3 From d36946679ef6a6fb32b655265602c174feb0ce5e Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:50 -0600 Subject: KVM: SVM: Add support for SEV-ES GHCB MSR protocol function 0x004 The GHCB specification defines a GHCB MSR protocol using the lower 12-bits of the GHCB MSR (in the hypervisor this corresponds to the GHCB GPA field in the VMCB). Function 0x004 is a request for CPUID information. Only a single CPUID result register can be sent per invocation, so the protocol defines the register that is requested. The GHCB MSR value is set to the CPUID register value as per the specification via the VMCB GHCB GPA field. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++-- arch/x86/kvm/svm/svm.h | 9 ++++++++ 2 files changed, 63 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 2246e4f3e4f3..fbb80b582843 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1500,6 +1500,18 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu) vmcb_mark_dirty(svm->vmcb, VMCB_ASID); } +static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask, + unsigned int pos) +{ + svm->vmcb->control.ghcb_gpa &= ~(mask << pos); + svm->vmcb->control.ghcb_gpa |= (value & mask) << pos; +} + +static u64 get_ghcb_msr_bits(struct vcpu_svm *svm, u64 mask, unsigned int pos) +{ + return (svm->vmcb->control.ghcb_gpa >> pos) & mask; +} + static void set_ghcb_msr(struct vcpu_svm *svm, u64 value) { svm->vmcb->control.ghcb_gpa = value; @@ -1508,7 +1520,9 @@ static void set_ghcb_msr(struct vcpu_svm *svm, u64 value) static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; + struct kvm_vcpu *vcpu = &svm->vcpu; u64 ghcb_info; + int ret = 1; ghcb_info = control->ghcb_gpa & GHCB_MSR_INFO_MASK; @@ -1518,11 +1532,49 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) GHCB_VERSION_MIN, sev_enc_bit)); break; + case GHCB_MSR_CPUID_REQ: { + u64 cpuid_fn, cpuid_reg, cpuid_value; + + cpuid_fn = get_ghcb_msr_bits(svm, + GHCB_MSR_CPUID_FUNC_MASK, + GHCB_MSR_CPUID_FUNC_POS); + + /* Initialize the registers needed by the CPUID intercept */ + vcpu->arch.regs[VCPU_REGS_RAX] = cpuid_fn; + vcpu->arch.regs[VCPU_REGS_RCX] = 0; + + ret = svm_invoke_exit_handler(svm, SVM_EXIT_CPUID); + if (!ret) { + ret = -EINVAL; + break; + } + + cpuid_reg = get_ghcb_msr_bits(svm, + GHCB_MSR_CPUID_REG_MASK, + GHCB_MSR_CPUID_REG_POS); + if (cpuid_reg == 0) + cpuid_value = vcpu->arch.regs[VCPU_REGS_RAX]; + else if (cpuid_reg == 1) + cpuid_value = vcpu->arch.regs[VCPU_REGS_RBX]; + else if (cpuid_reg == 2) + cpuid_value = vcpu->arch.regs[VCPU_REGS_RCX]; + else + cpuid_value = vcpu->arch.regs[VCPU_REGS_RDX]; + + set_ghcb_msr_bits(svm, cpuid_value, + GHCB_MSR_CPUID_VALUE_MASK, + GHCB_MSR_CPUID_VALUE_POS); + + set_ghcb_msr_bits(svm, GHCB_MSR_CPUID_RESP, + GHCB_MSR_INFO_MASK, + GHCB_MSR_INFO_POS); + break; + } default: - return -EINVAL; + ret = -EINVAL; } - return 1; + return ret; } int sev_handle_vmgexit(struct vcpu_svm *svm) diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 546f8d05e81e..9dd8429f2b27 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -534,6 +534,15 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu); (((_cbit) & GHCB_MSR_CBIT_MASK) << GHCB_MSR_CBIT_POS) | \ GHCB_MSR_SEV_INFO_RESP) +#define GHCB_MSR_CPUID_REQ 0x004 +#define GHCB_MSR_CPUID_RESP 0x005 +#define GHCB_MSR_CPUID_FUNC_POS 32 +#define GHCB_MSR_CPUID_FUNC_MASK 0xffffffff +#define GHCB_MSR_CPUID_VALUE_POS 32 +#define GHCB_MSR_CPUID_VALUE_MASK 0xffffffff +#define GHCB_MSR_CPUID_REG_POS 30 +#define GHCB_MSR_CPUID_REG_MASK 0x3 + extern unsigned int max_sev_asid; static inline bool svm_sev_enabled(void) -- cgit v1.2.3 From e1d71116b64a54c3948d2692d4338e643408c411 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:51 -0600 Subject: KVM: SVM: Add support for SEV-ES GHCB MSR protocol function 0x100 The GHCB specification defines a GHCB MSR protocol using the lower 12-bits of the GHCB MSR (in the hypervisor this corresponds to the GHCB GPA field in the VMCB). Function 0x100 is a request for termination of the guest. The guest has encountered some situation for which it has requested to be terminated. The GHCB MSR value contains the reason for the request. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 13 +++++++++++++ arch/x86/kvm/svm/svm.h | 6 ++++++ 2 files changed, 19 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index fbb80b582843..db123562e7d5 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1570,6 +1570,19 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) GHCB_MSR_INFO_POS); break; } + case GHCB_MSR_TERM_REQ: { + u64 reason_set, reason_code; + + reason_set = get_ghcb_msr_bits(svm, + GHCB_MSR_TERM_REASON_SET_MASK, + GHCB_MSR_TERM_REASON_SET_POS); + reason_code = get_ghcb_msr_bits(svm, + GHCB_MSR_TERM_REASON_MASK, + GHCB_MSR_TERM_REASON_POS); + pr_info("SEV-ES guest requested termination: %#llx:%#llx\n", + reason_set, reason_code); + fallthrough; + } default: ret = -EINVAL; } diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 9dd8429f2b27..fc69bc2e0cad 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -543,6 +543,12 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu); #define GHCB_MSR_CPUID_REG_POS 30 #define GHCB_MSR_CPUID_REG_MASK 0x3 +#define GHCB_MSR_TERM_REQ 0x100 +#define GHCB_MSR_TERM_REASON_SET_POS 12 +#define GHCB_MSR_TERM_REASON_SET_MASK 0xf +#define GHCB_MSR_TERM_REASON_POS 16 +#define GHCB_MSR_TERM_REASON_MASK 0xff + extern unsigned int max_sev_asid; static inline bool svm_sev_enabled(void) -- cgit v1.2.3 From d523ab6ba2753bd41b4447ae48024182cb4da94f Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:48 -0600 Subject: KVM: SVM: Create trace events for VMGEXIT processing Add trace events for entry to and exit from VMGEXIT processing. The vCPU id and the exit reason will be common for the trace events. The exit info fields will represent the input and output values for the entry and exit events, respectively. Signed-off-by: Tom Lendacky Message-Id: <25357dca49a38372e8f483753fb0c1c2a70a6898.1607620209.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 6 ++++++ arch/x86/kvm/trace.h | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 2 ++ 3 files changed, 61 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index db123562e7d5..089951cbe28e 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -15,10 +15,12 @@ #include #include #include +#include #include "x86.h" #include "svm.h" #include "cpuid.h" +#include "trace.h" static u8 sev_enc_bit; static int sev_flush_asids(void); @@ -1468,6 +1470,8 @@ static void pre_sev_es_run(struct vcpu_svm *svm) if (!svm->ghcb) return; + trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->ghcb); + sev_es_sync_to_ghcb(svm); kvm_vcpu_unmap(&svm->vcpu, &svm->ghcb_map, true); @@ -1617,6 +1621,8 @@ int sev_handle_vmgexit(struct vcpu_svm *svm) svm->ghcb = svm->ghcb_map.hva; ghcb = svm->ghcb_map.hva; + trace_kvm_vmgexit_enter(svm->vcpu.vcpu_id, ghcb); + exit_code = ghcb_get_sw_exit_code(ghcb); ret = sev_es_validate_vmgexit(svm); diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index aef960f90f26..7da931a511c9 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -1578,6 +1578,59 @@ TRACE_EVENT(kvm_hv_syndbg_get_msr, __entry->vcpu_id, __entry->vp_index, __entry->msr, __entry->data) ); + +/* + * Tracepoint for the start of VMGEXIT processing + */ +TRACE_EVENT(kvm_vmgexit_enter, + TP_PROTO(unsigned int vcpu_id, struct ghcb *ghcb), + TP_ARGS(vcpu_id, ghcb), + + TP_STRUCT__entry( + __field(unsigned int, vcpu_id) + __field(u64, exit_reason) + __field(u64, info1) + __field(u64, info2) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->exit_reason = ghcb->save.sw_exit_code; + __entry->info1 = ghcb->save.sw_exit_info_1; + __entry->info2 = ghcb->save.sw_exit_info_2; + ), + + TP_printk("vcpu %u, exit_reason %llx, exit_info1 %llx, exit_info2 %llx", + __entry->vcpu_id, __entry->exit_reason, + __entry->info1, __entry->info2) +); + +/* + * Tracepoint for the end of VMGEXIT processing + */ +TRACE_EVENT(kvm_vmgexit_exit, + TP_PROTO(unsigned int vcpu_id, struct ghcb *ghcb), + TP_ARGS(vcpu_id, ghcb), + + TP_STRUCT__entry( + __field(unsigned int, vcpu_id) + __field(u64, exit_reason) + __field(u64, info1) + __field(u64, info2) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->exit_reason = ghcb->save.sw_exit_code; + __entry->info1 = ghcb->save.sw_exit_info_1; + __entry->info2 = ghcb->save.sw_exit_info_2; + ), + + TP_printk("vcpu %u, exit_reason %llx, exit_info1 %llx, exit_info2 %llx", + __entry->vcpu_id, __entry->exit_reason, + __entry->info1, __entry->info2) +); + #endif /* _TRACE_KVM_H */ #undef TRACE_INCLUDE_PATH diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d88e334b19e7..10704bd8c38a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11314,3 +11314,5 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_ga_log); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_apicv_update_request); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_enter); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_exit); -- cgit v1.2.3 From 59e38b58de283f76c533a2da416abf93bfd9ea41 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:52 -0600 Subject: KVM: SVM: Create trace events for VMGEXIT MSR protocol processing Add trace events for entry to and exit from VMGEXIT MSR protocol processing. The vCPU will be common for the trace events. The MSR protocol processing is guided by the GHCB GPA in the VMCB, so the GHCB GPA will represent the input and output values for the entry and exit events, respectively. Additionally, the exit event will contain the return code for the event. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 6 ++++++ arch/x86/kvm/trace.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 2 ++ 3 files changed, 52 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 089951cbe28e..2a36efadf9a3 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1530,6 +1530,9 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) ghcb_info = control->ghcb_gpa & GHCB_MSR_INFO_MASK; + trace_kvm_vmgexit_msr_protocol_enter(svm->vcpu.vcpu_id, + control->ghcb_gpa); + switch (ghcb_info) { case GHCB_MSR_SEV_INFO_REQ: set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX, @@ -1591,6 +1594,9 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) ret = -EINVAL; } + trace_kvm_vmgexit_msr_protocol_exit(svm->vcpu.vcpu_id, + control->ghcb_gpa, ret); + return ret; } diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 7da931a511c9..2de30c20bc26 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -1631,6 +1631,50 @@ TRACE_EVENT(kvm_vmgexit_exit, __entry->info1, __entry->info2) ); +/* + * Tracepoint for the start of VMGEXIT MSR procotol processing + */ +TRACE_EVENT(kvm_vmgexit_msr_protocol_enter, + TP_PROTO(unsigned int vcpu_id, u64 ghcb_gpa), + TP_ARGS(vcpu_id, ghcb_gpa), + + TP_STRUCT__entry( + __field(unsigned int, vcpu_id) + __field(u64, ghcb_gpa) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->ghcb_gpa = ghcb_gpa; + ), + + TP_printk("vcpu %u, ghcb_gpa %016llx", + __entry->vcpu_id, __entry->ghcb_gpa) +); + +/* + * Tracepoint for the end of VMGEXIT MSR procotol processing + */ +TRACE_EVENT(kvm_vmgexit_msr_protocol_exit, + TP_PROTO(unsigned int vcpu_id, u64 ghcb_gpa, int result), + TP_ARGS(vcpu_id, ghcb_gpa, result), + + TP_STRUCT__entry( + __field(unsigned int, vcpu_id) + __field(u64, ghcb_gpa) + __field(int, result) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->ghcb_gpa = ghcb_gpa; + __entry->result = result; + ), + + TP_printk("vcpu %u, ghcb_gpa %016llx, result %d", + __entry->vcpu_id, __entry->ghcb_gpa, __entry->result) +); + #endif /* _TRACE_KVM_H */ #undef TRACE_INCLUDE_PATH diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 10704bd8c38a..02c6fb166fc3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11316,3 +11316,5 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_ga_log); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_apicv_update_request); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_enter); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_exit); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_enter); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_exit); -- cgit v1.2.3 From 8f423a80d299a5b3964b8af005d1aab4e5e9106a Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:53 -0600 Subject: KVM: SVM: Support MMIO for an SEV-ES guest For an SEV-ES guest, MMIO is performed to a shared (un-encrypted) page so that both the hypervisor and guest can read or write to it and each see the contents. The GHCB specification provides software-defined VMGEXIT exit codes to indicate a request for an MMIO read or an MMIO write. Add support to recognize the MMIO requests and invoke SEV-ES specific routines that can complete the MMIO operation. These routines use common KVM support to complete the MMIO operation. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 124 +++++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/svm/svm.h | 6 +++ arch/x86/kvm/x86.c | 123 ++++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.h | 5 ++ 4 files changed, 258 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 2a36efadf9a3..dc8ccc14241d 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1262,6 +1262,9 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu) if (vcpu->arch.guest_state_protected) sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE); __free_page(virt_to_page(svm->vmsa)); + + if (svm->ghcb_sa_free) + kfree(svm->ghcb_sa); } static void dump_ghcb(struct vcpu_svm *svm) @@ -1436,6 +1439,11 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm) !ghcb_rcx_is_valid(ghcb)) goto vmgexit_err; break; + case SVM_VMGEXIT_MMIO_READ: + case SVM_VMGEXIT_MMIO_WRITE: + if (!ghcb_sw_scratch_is_valid(ghcb)) + goto vmgexit_err; + break; case SVM_VMGEXIT_UNSUPPORTED_EVENT: break; default: @@ -1470,6 +1478,24 @@ static void pre_sev_es_run(struct vcpu_svm *svm) if (!svm->ghcb) return; + if (svm->ghcb_sa_free) { + /* + * The scratch area lives outside the GHCB, so there is a + * buffer that, depending on the operation performed, may + * need to be synced, then freed. + */ + if (svm->ghcb_sa_sync) { + kvm_write_guest(svm->vcpu.kvm, + ghcb_get_sw_scratch(svm->ghcb), + svm->ghcb_sa, svm->ghcb_sa_len); + svm->ghcb_sa_sync = false; + } + + kfree(svm->ghcb_sa); + svm->ghcb_sa = NULL; + svm->ghcb_sa_free = false; + } + trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->ghcb); sev_es_sync_to_ghcb(svm); @@ -1504,6 +1530,86 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu) vmcb_mark_dirty(svm->vmcb, VMCB_ASID); } +#define GHCB_SCRATCH_AREA_LIMIT (16ULL * PAGE_SIZE) +static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) +{ + struct vmcb_control_area *control = &svm->vmcb->control; + struct ghcb *ghcb = svm->ghcb; + u64 ghcb_scratch_beg, ghcb_scratch_end; + u64 scratch_gpa_beg, scratch_gpa_end; + void *scratch_va; + + scratch_gpa_beg = ghcb_get_sw_scratch(ghcb); + if (!scratch_gpa_beg) { + pr_err("vmgexit: scratch gpa not provided\n"); + return false; + } + + scratch_gpa_end = scratch_gpa_beg + len; + if (scratch_gpa_end < scratch_gpa_beg) { + pr_err("vmgexit: scratch length (%#llx) not valid for scratch address (%#llx)\n", + len, scratch_gpa_beg); + return false; + } + + if ((scratch_gpa_beg & PAGE_MASK) == control->ghcb_gpa) { + /* Scratch area begins within GHCB */ + ghcb_scratch_beg = control->ghcb_gpa + + offsetof(struct ghcb, shared_buffer); + ghcb_scratch_end = control->ghcb_gpa + + offsetof(struct ghcb, reserved_1); + + /* + * If the scratch area begins within the GHCB, it must be + * completely contained in the GHCB shared buffer area. + */ + if (scratch_gpa_beg < ghcb_scratch_beg || + scratch_gpa_end > ghcb_scratch_end) { + pr_err("vmgexit: scratch area is outside of GHCB shared buffer area (%#llx - %#llx)\n", + scratch_gpa_beg, scratch_gpa_end); + return false; + } + + scratch_va = (void *)svm->ghcb; + scratch_va += (scratch_gpa_beg - control->ghcb_gpa); + } else { + /* + * The guest memory must be read into a kernel buffer, so + * limit the size + */ + if (len > GHCB_SCRATCH_AREA_LIMIT) { + pr_err("vmgexit: scratch area exceeds KVM limits (%#llx requested, %#llx limit)\n", + len, GHCB_SCRATCH_AREA_LIMIT); + return false; + } + scratch_va = kzalloc(len, GFP_KERNEL); + if (!scratch_va) + return false; + + if (kvm_read_guest(svm->vcpu.kvm, scratch_gpa_beg, scratch_va, len)) { + /* Unable to copy scratch area from guest */ + pr_err("vmgexit: kvm_read_guest for scratch area failed\n"); + + kfree(scratch_va); + return false; + } + + /* + * The scratch area is outside the GHCB. The operation will + * dictate whether the buffer needs to be synced before running + * the vCPU next time (i.e. a read was requested so the data + * must be written back to the guest memory). + */ + svm->ghcb_sa_sync = sync; + svm->ghcb_sa_free = true; + } + + svm->ghcb_sa = scratch_va; + svm->ghcb_sa_len = len; + + return true; +} + static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask, unsigned int pos) { @@ -1641,6 +1747,24 @@ int sev_handle_vmgexit(struct vcpu_svm *svm) ret = -EINVAL; switch (exit_code) { + case SVM_VMGEXIT_MMIO_READ: + if (!setup_vmgexit_scratch(svm, true, control->exit_info_2)) + break; + + ret = kvm_sev_es_mmio_read(&svm->vcpu, + control->exit_info_1, + control->exit_info_2, + svm->ghcb_sa); + break; + case SVM_VMGEXIT_MMIO_WRITE: + if (!setup_vmgexit_scratch(svm, false, control->exit_info_2)) + break; + + ret = kvm_sev_es_mmio_write(&svm->vcpu, + control->exit_info_1, + control->exit_info_2, + svm->ghcb_sa); + break; case SVM_VMGEXIT_UNSUPPORTED_EVENT: vcpu_unimpl(&svm->vcpu, "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n", diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index fc69bc2e0cad..9019ad6a8138 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -174,6 +174,12 @@ struct vcpu_svm { struct vmcb_save_area *vmsa; struct ghcb *ghcb; struct kvm_host_map ghcb_map; + + /* SEV-ES scratch area support */ + void *ghcb_sa; + u64 ghcb_sa_len; + bool ghcb_sa_sync; + bool ghcb_sa_free; }; struct svm_cpu_data { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 02c6fb166fc3..fd4c47b7c71c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11292,6 +11292,129 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva) } EXPORT_SYMBOL_GPL(kvm_handle_invpcid); +static int complete_sev_es_emulated_mmio(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + struct kvm_mmio_fragment *frag; + unsigned int len; + + BUG_ON(!vcpu->mmio_needed); + + /* Complete previous fragment */ + frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment]; + len = min(8u, frag->len); + if (!vcpu->mmio_is_write) + memcpy(frag->data, run->mmio.data, len); + + if (frag->len <= 8) { + /* Switch to the next fragment. */ + frag++; + vcpu->mmio_cur_fragment++; + } else { + /* Go forward to the next mmio piece. */ + frag->data += len; + frag->gpa += len; + frag->len -= len; + } + + if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) { + vcpu->mmio_needed = 0; + + // VMG change, at this point, we're always done + // RIP has already been advanced + return 1; + } + + // More MMIO is needed + run->mmio.phys_addr = frag->gpa; + run->mmio.len = min(8u, frag->len); + run->mmio.is_write = vcpu->mmio_is_write; + if (run->mmio.is_write) + memcpy(run->mmio.data, frag->data, min(8u, frag->len)); + run->exit_reason = KVM_EXIT_MMIO; + + vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio; + + return 0; +} + +int kvm_sev_es_mmio_write(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes, + void *data) +{ + int handled; + struct kvm_mmio_fragment *frag; + + if (!data) + return -EINVAL; + + handled = write_emultor.read_write_mmio(vcpu, gpa, bytes, data); + if (handled == bytes) + return 1; + + bytes -= handled; + gpa += handled; + data += handled; + + /*TODO: Check if need to increment number of frags */ + frag = vcpu->mmio_fragments; + vcpu->mmio_nr_fragments = 1; + frag->len = bytes; + frag->gpa = gpa; + frag->data = data; + + vcpu->mmio_needed = 1; + vcpu->mmio_cur_fragment = 0; + + vcpu->run->mmio.phys_addr = gpa; + vcpu->run->mmio.len = min(8u, frag->len); + vcpu->run->mmio.is_write = 1; + memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len)); + vcpu->run->exit_reason = KVM_EXIT_MMIO; + + vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio; + + return 0; +} +EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_write); + +int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes, + void *data) +{ + int handled; + struct kvm_mmio_fragment *frag; + + if (!data) + return -EINVAL; + + handled = read_emultor.read_write_mmio(vcpu, gpa, bytes, data); + if (handled == bytes) + return 1; + + bytes -= handled; + gpa += handled; + data += handled; + + /*TODO: Check if need to increment number of frags */ + frag = vcpu->mmio_fragments; + vcpu->mmio_nr_fragments = 1; + frag->len = bytes; + frag->gpa = gpa; + frag->data = data; + + vcpu->mmio_needed = 1; + vcpu->mmio_cur_fragment = 0; + + vcpu->run->mmio.phys_addr = gpa; + vcpu->run->mmio.len = min(8u, frag->len); + vcpu->run->mmio.is_write = 0; + vcpu->run->exit_reason = KVM_EXIT_MMIO; + + vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio; + + return 0; +} +EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_read); + EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index bf812c89c2e3..046709f16abe 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -427,4 +427,9 @@ bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type); __reserved_bits; \ }) +int kvm_sev_es_mmio_write(struct kvm_vcpu *vcpu, gpa_t src, unsigned int bytes, + void *dst); +int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t src, unsigned int bytes, + void *dst); + #endif -- cgit v1.2.3 From 7ed9abfe8e9f62384f9b11c9fca19e551dbec5bd Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:54 -0600 Subject: KVM: SVM: Support string IO operations for an SEV-ES guest For an SEV-ES guest, string-based port IO is performed to a shared (un-encrypted) page so that both the hypervisor and guest can read or write to it and each see the contents. For string-based port IO operations, invoke SEV-ES specific routines that can complete the operation using common KVM port IO support. Signed-off-by: Tom Lendacky Message-Id: <9d61daf0ffda496703717218f415cdc8fd487100.1607620209.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm/sev.c | 18 ++++++++++++-- arch/x86/kvm/svm/svm.c | 11 ++++++--- arch/x86/kvm/svm/svm.h | 1 + arch/x86/kvm/x86.c | 54 +++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.h | 3 +++ 6 files changed, 83 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 18aa15e6fadd..1c8c59d4a572 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -614,6 +614,7 @@ struct kvm_vcpu_arch { struct kvm_pio_request pio; void *pio_data; + void *guest_ins_data; u8 event_exit_inst_len; diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index dc8ccc14241d..154ac7601d02 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1406,9 +1406,14 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm) case SVM_EXIT_INVD: break; case SVM_EXIT_IOIO: - if (!(ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_TYPE_MASK)) - if (!ghcb_rax_is_valid(ghcb)) + if (ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_STR_MASK) { + if (!ghcb_sw_scratch_is_valid(ghcb)) goto vmgexit_err; + } else { + if (!(ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_TYPE_MASK)) + if (!ghcb_rax_is_valid(ghcb)) + goto vmgexit_err; + } break; case SVM_EXIT_MSR: if (!ghcb_rcx_is_valid(ghcb)) @@ -1776,3 +1781,12 @@ int sev_handle_vmgexit(struct vcpu_svm *svm) return ret; } + +int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) +{ + if (!setup_vmgexit_scratch(svm, in, svm->vmcb->control.exit_info_2)) + return -EINVAL; + + return kvm_sev_es_string_io(&svm->vcpu, size, port, + svm->ghcb_sa, svm->ghcb_sa_len, in); +} diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 310de05d2479..18a46847bffc 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2038,11 +2038,16 @@ static int io_interception(struct vcpu_svm *svm) ++svm->vcpu.stat.io_exits; string = (io_info & SVM_IOIO_STR_MASK) != 0; in = (io_info & SVM_IOIO_TYPE_MASK) != 0; - if (string) - return kvm_emulate_instruction(vcpu, 0); - port = io_info >> 16; size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; + + if (string) { + if (sev_es_guest(vcpu->kvm)) + return sev_es_string_io(svm, size, port, in); + else + return kvm_emulate_instruction(vcpu, 0); + } + svm->next_rip = svm->vmcb->control.exit_info_2; return kvm_fast_pio(&svm->vcpu, size, port, in); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 9019ad6a8138..b3f03dede6ac 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -573,5 +573,6 @@ void __init sev_hardware_setup(void); void sev_hardware_teardown(void); void sev_free_vcpu(struct kvm_vcpu *vcpu); int sev_handle_vmgexit(struct vcpu_svm *svm); +int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in); #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fd4c47b7c71c..536399fdbc4b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10783,6 +10783,10 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu) { + /* Can't read the RIP when guest state is protected, just return 0 */ + if (vcpu->arch.guest_state_protected) + return 0; + if (is_64_bit_mode(vcpu)) return kvm_rip_read(vcpu); return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) + @@ -11415,6 +11419,56 @@ int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes, } EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_read); +static int complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu) +{ + memcpy(vcpu->arch.guest_ins_data, vcpu->arch.pio_data, + vcpu->arch.pio.count * vcpu->arch.pio.size); + vcpu->arch.pio.count = 0; + + return 1; +} + +static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size, + unsigned int port, void *data, unsigned int count) +{ + int ret; + + ret = emulator_pio_out_emulated(vcpu->arch.emulate_ctxt, size, port, + data, count); + if (ret) + return ret; + + vcpu->arch.pio.count = 0; + + return 0; +} + +static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size, + unsigned int port, void *data, unsigned int count) +{ + int ret; + + ret = emulator_pio_in_emulated(vcpu->arch.emulate_ctxt, size, port, + data, count); + if (ret) { + vcpu->arch.pio.count = 0; + } else { + vcpu->arch.guest_ins_data = data; + vcpu->arch.complete_userspace_io = complete_sev_es_emulated_ins; + } + + return 0; +} + +int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size, + unsigned int port, void *data, unsigned int count, + int in) +{ + return in ? kvm_sev_es_ins(vcpu, size, port, data, count) + : kvm_sev_es_outs(vcpu, size, port, data, count); +} +EXPORT_SYMBOL_GPL(kvm_sev_es_string_io); + EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 046709f16abe..fe7f3df91b2c 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -431,5 +431,8 @@ int kvm_sev_es_mmio_write(struct kvm_vcpu *vcpu, gpa_t src, unsigned int bytes, void *dst); int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t src, unsigned int bytes, void *dst); +int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size, + unsigned int port, void *data, unsigned int count, + int in); #endif -- cgit v1.2.3 From 2985afbcdbb1957a8d31992cebbc4e49d2ad8a77 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:55 -0600 Subject: KVM: SVM: Add support for EFER write traps for an SEV-ES guest For SEV-ES guests, the interception of EFER write access is not recommended. EFER interception occurs prior to EFER being modified and the hypervisor is unable to modify EFER itself because the register is located in the encrypted register state. SEV-ES support introduces a new EFER write trap. This trap provides intercept support of an EFER write after it has been modified. The new EFER value is provided in the VMCB EXITINFO1 field, allowing the hypervisor to track the setting of the guest EFER. Add support to track the value of the guest EFER value using the EFER write trap so that the hypervisor understands the guest operating mode. Signed-off-by: Tom Lendacky Message-Id: <8993149352a3a87cd0625b3b61bfd31ab28977e1.1607620209.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/uapi/asm/svm.h | 2 ++ arch/x86/kvm/svm/svm.c | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index 09f723945425..6e3f92e17655 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -77,6 +77,7 @@ #define SVM_EXIT_MWAIT_COND 0x08c #define SVM_EXIT_XSETBV 0x08d #define SVM_EXIT_RDPRU 0x08e +#define SVM_EXIT_EFER_WRITE_TRAP 0x08f #define SVM_EXIT_INVPCID 0x0a2 #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 @@ -184,6 +185,7 @@ { SVM_EXIT_MONITOR, "monitor" }, \ { SVM_EXIT_MWAIT, "mwait" }, \ { SVM_EXIT_XSETBV, "xsetbv" }, \ + { SVM_EXIT_EFER_WRITE_TRAP, "write_efer_trap" }, \ { SVM_EXIT_INVPCID, "invpcid" }, \ { SVM_EXIT_NPF, "npf" }, \ { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 18a46847bffc..983b993c949b 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2503,6 +2503,25 @@ static int cr8_write_interception(struct vcpu_svm *svm) return 0; } +static int efer_trap(struct vcpu_svm *svm) +{ + struct msr_data msr_info; + int ret; + + /* + * Clear the EFER_SVME bit from EFER. The SVM code always sets this + * bit in svm_set_efer(), but __kvm_valid_efer() checks it against + * whether the guest has X86_FEATURE_SVM - this avoids a failure if + * the guest doesn't have X86_FEATURE_SVM. + */ + msr_info.host_initiated = false; + msr_info.index = MSR_EFER; + msr_info.data = svm->vmcb->control.exit_info_1 & ~EFER_SVME; + ret = kvm_set_msr_common(&svm->vcpu, &msr_info); + + return kvm_complete_insn_gp(&svm->vcpu, ret); +} + static int svm_get_msr_feature(struct kvm_msr_entry *msr) { msr->data = 0; @@ -2977,6 +2996,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_MWAIT] = mwait_interception, [SVM_EXIT_XSETBV] = xsetbv_interception, [SVM_EXIT_RDPRU] = rdpru_interception, + [SVM_EXIT_EFER_WRITE_TRAP] = efer_trap, [SVM_EXIT_INVPCID] = invpcid_interception, [SVM_EXIT_NPF] = npf_interception, [SVM_EXIT_RSM] = rsm_interception, -- cgit v1.2.3 From f27ad38aac23263c40fe26c0188182c129a8f8dd Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:56 -0600 Subject: KVM: SVM: Add support for CR0 write traps for an SEV-ES guest For SEV-ES guests, the interception of control register write access is not recommended. Control register interception occurs prior to the control register being modified and the hypervisor is unable to modify the control register itself because the register is located in the encrypted register state. SEV-ES support introduces new control register write traps. These traps provide intercept support of a control register write after the control register has been modified. The new control register value is provided in the VMCB EXITINFO1 field, allowing the hypervisor to track the setting of the guest control registers. Add support to track the value of the guest CR0 register using the control register write trap so that the hypervisor understands the guest operating mode. Signed-off-by: Tom Lendacky Message-Id: <182c9baf99df7e40ad9617ff90b84542705ef0d7.1607620209.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/include/uapi/asm/svm.h | 17 +++++++++++++++++ arch/x86/kvm/svm/svm.c | 26 ++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 33 ++++++++++++++++++++------------- 4 files changed, 64 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 1c8c59d4a572..f04d4c6f28f0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1477,6 +1477,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, int reason, bool has_error_code, u32 error_code); +void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0); int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index 6e3f92e17655..14b0d97b50e2 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -78,6 +78,22 @@ #define SVM_EXIT_XSETBV 0x08d #define SVM_EXIT_RDPRU 0x08e #define SVM_EXIT_EFER_WRITE_TRAP 0x08f +#define SVM_EXIT_CR0_WRITE_TRAP 0x090 +#define SVM_EXIT_CR1_WRITE_TRAP 0x091 +#define SVM_EXIT_CR2_WRITE_TRAP 0x092 +#define SVM_EXIT_CR3_WRITE_TRAP 0x093 +#define SVM_EXIT_CR4_WRITE_TRAP 0x094 +#define SVM_EXIT_CR5_WRITE_TRAP 0x095 +#define SVM_EXIT_CR6_WRITE_TRAP 0x096 +#define SVM_EXIT_CR7_WRITE_TRAP 0x097 +#define SVM_EXIT_CR8_WRITE_TRAP 0x098 +#define SVM_EXIT_CR9_WRITE_TRAP 0x099 +#define SVM_EXIT_CR10_WRITE_TRAP 0x09a +#define SVM_EXIT_CR11_WRITE_TRAP 0x09b +#define SVM_EXIT_CR12_WRITE_TRAP 0x09c +#define SVM_EXIT_CR13_WRITE_TRAP 0x09d +#define SVM_EXIT_CR14_WRITE_TRAP 0x09e +#define SVM_EXIT_CR15_WRITE_TRAP 0x09f #define SVM_EXIT_INVPCID 0x0a2 #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 @@ -186,6 +202,7 @@ { SVM_EXIT_MWAIT, "mwait" }, \ { SVM_EXIT_XSETBV, "xsetbv" }, \ { SVM_EXIT_EFER_WRITE_TRAP, "write_efer_trap" }, \ + { SVM_EXIT_CR0_WRITE_TRAP, "write_cr0_trap" }, \ { SVM_EXIT_INVPCID, "invpcid" }, \ { SVM_EXIT_NPF, "npf" }, \ { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 983b993c949b..ddcb7390bb0e 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2450,6 +2450,31 @@ static int cr_interception(struct vcpu_svm *svm) return kvm_complete_insn_gp(&svm->vcpu, err); } +static int cr_trap(struct vcpu_svm *svm) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + unsigned long old_value, new_value; + unsigned int cr; + + new_value = (unsigned long)svm->vmcb->control.exit_info_1; + + cr = svm->vmcb->control.exit_code - SVM_EXIT_CR0_WRITE_TRAP; + switch (cr) { + case 0: + old_value = kvm_read_cr0(vcpu); + svm_set_cr0(vcpu, new_value); + + kvm_post_set_cr0(vcpu, old_value, new_value); + break; + default: + WARN(1, "unhandled CR%d write trap", cr); + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } + + return kvm_complete_insn_gp(vcpu, 0); +} + static int dr_interception(struct vcpu_svm *svm) { int reg, dr; @@ -2997,6 +3022,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_XSETBV] = xsetbv_interception, [SVM_EXIT_RDPRU] = rdpru_interception, [SVM_EXIT_EFER_WRITE_TRAP] = efer_trap, + [SVM_EXIT_CR0_WRITE_TRAP] = cr_trap, [SVM_EXIT_INVPCID] = invpcid_interception, [SVM_EXIT_NPF] = npf_interception, [SVM_EXIT_RSM] = rsm_interception, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 536399fdbc4b..efa70e30d23f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -804,11 +804,29 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(pdptrs_changed); +void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0) +{ + unsigned long update_bits = X86_CR0_PG | X86_CR0_WP; + + if ((cr0 ^ old_cr0) & X86_CR0_PG) { + kvm_clear_async_pf_completion_queue(vcpu); + kvm_async_pf_hash_reset(vcpu); + } + + if ((cr0 ^ old_cr0) & update_bits) + kvm_mmu_reset_context(vcpu); + + if (((cr0 ^ old_cr0) & X86_CR0_CD) && + kvm_arch_has_noncoherent_dma(vcpu->kvm) && + !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED)) + kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL); +} +EXPORT_SYMBOL_GPL(kvm_post_set_cr0); + int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { unsigned long old_cr0 = kvm_read_cr0(vcpu); unsigned long pdptr_bits = X86_CR0_CD | X86_CR0_NW | X86_CR0_PG; - unsigned long update_bits = X86_CR0_PG | X86_CR0_WP; cr0 |= X86_CR0_ET; @@ -847,18 +865,7 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) kvm_x86_ops.set_cr0(vcpu, cr0); - if ((cr0 ^ old_cr0) & X86_CR0_PG) { - kvm_clear_async_pf_completion_queue(vcpu); - kvm_async_pf_hash_reset(vcpu); - } - - if ((cr0 ^ old_cr0) & update_bits) - kvm_mmu_reset_context(vcpu); - - if (((cr0 ^ old_cr0) & X86_CR0_CD) && - kvm_arch_has_noncoherent_dma(vcpu->kvm) && - !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED)) - kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL); + kvm_post_set_cr0(vcpu, old_cr0, cr0); return 0; } -- cgit v1.2.3 From 5b51cb13160ae0ba10645bd0a84e7847677fb6a0 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:57 -0600 Subject: KVM: SVM: Add support for CR4 write traps for an SEV-ES guest For SEV-ES guests, the interception of control register write access is not recommended. Control register interception occurs prior to the control register being modified and the hypervisor is unable to modify the control register itself because the register is located in the encrypted register state. SEV-ES guests introduce new control register write traps. These traps provide intercept support of a control register write after the control register has been modified. The new control register value is provided in the VMCB EXITINFO1 field, allowing the hypervisor to track the setting of the guest control registers. Add support to track the value of the guest CR4 register using the control register write trap so that the hypervisor understands the guest operating mode. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/include/uapi/asm/svm.h | 1 + arch/x86/kvm/svm/svm.c | 7 +++++++ arch/x86/kvm/x86.c | 16 ++++++++++++---- 4 files changed, 21 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f04d4c6f28f0..8ae099b48f00 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1478,6 +1478,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, int reason, bool has_error_code, u32 error_code); void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0); +void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4); int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index 14b0d97b50e2..c4152689ea93 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -203,6 +203,7 @@ { SVM_EXIT_XSETBV, "xsetbv" }, \ { SVM_EXIT_EFER_WRITE_TRAP, "write_efer_trap" }, \ { SVM_EXIT_CR0_WRITE_TRAP, "write_cr0_trap" }, \ + { SVM_EXIT_CR4_WRITE_TRAP, "write_cr4_trap" }, \ { SVM_EXIT_INVPCID, "invpcid" }, \ { SVM_EXIT_NPF, "npf" }, \ { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index ddcb7390bb0e..4b3d935a1325 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2466,6 +2466,12 @@ static int cr_trap(struct vcpu_svm *svm) kvm_post_set_cr0(vcpu, old_value, new_value); break; + case 4: + old_value = kvm_read_cr4(vcpu); + svm_set_cr4(vcpu, new_value); + + kvm_post_set_cr4(vcpu, old_value, new_value); + break; default: WARN(1, "unhandled CR%d write trap", cr); kvm_queue_exception(vcpu, UD_VECTOR); @@ -3023,6 +3029,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_RDPRU] = rdpru_interception, [SVM_EXIT_EFER_WRITE_TRAP] = efer_trap, [SVM_EXIT_CR0_WRITE_TRAP] = cr_trap, + [SVM_EXIT_CR4_WRITE_TRAP] = cr_trap, [SVM_EXIT_INVPCID] = invpcid_interception, [SVM_EXIT_NPF] = npf_interception, [SVM_EXIT_RSM] = rsm_interception, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index efa70e30d23f..c3686233508b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -983,12 +983,22 @@ bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) } EXPORT_SYMBOL_GPL(kvm_is_valid_cr4); +void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4) +{ + unsigned long mmu_role_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | + X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE; + + if (((cr4 ^ old_cr4) & mmu_role_bits) || + (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE))) + kvm_mmu_reset_context(vcpu); +} +EXPORT_SYMBOL_GPL(kvm_post_set_cr4); + int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { unsigned long old_cr4 = kvm_read_cr4(vcpu); unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_SMEP; - unsigned long mmu_role_bits = pdptr_bits | X86_CR4_SMAP | X86_CR4_PKE; if (!kvm_is_valid_cr4(vcpu, cr4)) return 1; @@ -1015,9 +1025,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) kvm_x86_ops.set_cr4(vcpu, cr4); - if (((cr4 ^ old_cr4) & mmu_role_bits) || - (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE))) - kvm_mmu_reset_context(vcpu); + kvm_post_set_cr4(vcpu, old_cr4, cr4); return 0; } -- cgit v1.2.3 From d1949b93c60504b338c89cf8b3873c0d11feb7ed Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:58 -0600 Subject: KVM: SVM: Add support for CR8 write traps for an SEV-ES guest For SEV-ES guests, the interception of control register write access is not recommended. Control register interception occurs prior to the control register being modified and the hypervisor is unable to modify the control register itself because the register is located in the encrypted register state. SEV-ES guests introduce new control register write traps. These traps provide intercept support of a control register write after the control register has been modified. The new control register value is provided in the VMCB EXITINFO1 field, allowing the hypervisor to track the setting of the guest control registers. Add support to track the value of the guest CR8 register using the control register write trap so that the hypervisor understands the guest operating mode. Signed-off-by: Tom Lendacky Message-Id: <5a01033f4c8b3106ca9374b7cadf8e33da852df1.1607620209.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/uapi/asm/svm.h | 1 + arch/x86/kvm/svm/svm.c | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index c4152689ea93..554f75fe013c 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -204,6 +204,7 @@ { SVM_EXIT_EFER_WRITE_TRAP, "write_efer_trap" }, \ { SVM_EXIT_CR0_WRITE_TRAP, "write_cr0_trap" }, \ { SVM_EXIT_CR4_WRITE_TRAP, "write_cr4_trap" }, \ + { SVM_EXIT_CR8_WRITE_TRAP, "write_cr8_trap" }, \ { SVM_EXIT_INVPCID, "invpcid" }, \ { SVM_EXIT_NPF, "npf" }, \ { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 4b3d935a1325..0f4b49639955 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2455,6 +2455,7 @@ static int cr_trap(struct vcpu_svm *svm) struct kvm_vcpu *vcpu = &svm->vcpu; unsigned long old_value, new_value; unsigned int cr; + int ret = 0; new_value = (unsigned long)svm->vmcb->control.exit_info_1; @@ -2472,13 +2473,16 @@ static int cr_trap(struct vcpu_svm *svm) kvm_post_set_cr4(vcpu, old_value, new_value); break; + case 8: + ret = kvm_set_cr8(&svm->vcpu, new_value); + break; default: WARN(1, "unhandled CR%d write trap", cr); kvm_queue_exception(vcpu, UD_VECTOR); return 1; } - return kvm_complete_insn_gp(vcpu, 0); + return kvm_complete_insn_gp(vcpu, ret); } static int dr_interception(struct vcpu_svm *svm) @@ -3030,6 +3034,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_EFER_WRITE_TRAP] = efer_trap, [SVM_EXIT_CR0_WRITE_TRAP] = cr_trap, [SVM_EXIT_CR4_WRITE_TRAP] = cr_trap, + [SVM_EXIT_CR8_WRITE_TRAP] = cr_trap, [SVM_EXIT_INVPCID] = invpcid_interception, [SVM_EXIT_NPF] = npf_interception, [SVM_EXIT_RSM] = rsm_interception, -- cgit v1.2.3 From 5265713a073754605108b3aba17619a0bbbae3c4 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:59 -0600 Subject: KVM: x86: Update __get_sregs() / __set_sregs() to support SEV-ES Since many of the registers used by the SEV-ES are encrypted and cannot be read or written, adjust the __get_sregs() / __set_sregs() to take into account whether the VMSA/guest state is encrypted. For __get_sregs(), return the actual value that is in use by the guest for all registers being tracked using the write trap support. For __set_sregs(), skip setting of all guest registers values. Signed-off-by: Tom Lendacky Message-Id: <23051868db76400a9b07a2020525483a1e62dbcf.1607620209.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c3686233508b..86947e757dce 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9439,6 +9439,9 @@ static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { struct desc_ptr dt; + if (vcpu->arch.guest_state_protected) + goto skip_protected_regs; + kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES); @@ -9456,9 +9459,11 @@ static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) sregs->gdt.limit = dt.size; sregs->gdt.base = dt.address; - sregs->cr0 = kvm_read_cr0(vcpu); sregs->cr2 = vcpu->arch.cr2; sregs->cr3 = kvm_read_cr3(vcpu); + +skip_protected_regs: + sregs->cr0 = kvm_read_cr0(vcpu); sregs->cr4 = kvm_read_cr4(vcpu); sregs->cr8 = kvm_get_cr8(vcpu); sregs->efer = vcpu->arch.efer; @@ -9595,6 +9600,9 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) if (kvm_set_apic_base(vcpu, &apic_base_msr)) goto out; + if (vcpu->arch.guest_state_protected) + goto skip_protected_regs; + dt.size = sregs->idt.limit; dt.address = sregs->idt.base; kvm_x86_ops.set_idt(vcpu, &dt); @@ -9629,14 +9637,6 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) if (mmu_reset_needed) kvm_mmu_reset_context(vcpu); - max_bits = KVM_NR_INTERRUPTS; - pending_vec = find_first_bit( - (const unsigned long *)sregs->interrupt_bitmap, max_bits); - if (pending_vec < max_bits) { - kvm_queue_interrupt(vcpu, pending_vec, false); - pr_debug("Set back pending irq %d\n", pending_vec); - } - kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS); kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES); @@ -9655,6 +9655,15 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) !is_protmode(vcpu)) vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; +skip_protected_regs: + max_bits = KVM_NR_INTERRUPTS; + pending_vec = find_first_bit( + (const unsigned long *)sregs->interrupt_bitmap, max_bits); + if (pending_vec < max_bits) { + kvm_queue_interrupt(vcpu, pending_vec, false); + pr_debug("Set back pending irq %d\n", pending_vec); + } + kvm_make_request(KVM_REQ_EVENT, vcpu); ret = 0; -- cgit v1.2.3 From 5719455fbd952a69ebc860d47bb0287e9198fe12 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:10:00 -0600 Subject: KVM: SVM: Do not report support for SMM for an SEV-ES guest SEV-ES guests do not currently support SMM. Update the has_emulated_msr() kvm_x86_ops function to take a struct kvm parameter so that the capability can be reported at a VM level. Since this op is also called during KVM initialization and before a struct kvm instance is available, comments will be added to each implementation of has_emulated_msr() to indicate the kvm parameter can be null. Signed-off-by: Tom Lendacky Message-Id: <75de5138e33b945d2fb17f81ae507bda381808e3.1607620209.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/svm/svm.c | 11 ++++++++++- arch/x86/kvm/vmx/vmx.c | 6 +++++- arch/x86/kvm/x86.c | 4 ++-- 4 files changed, 18 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8ae099b48f00..2b6f168436c8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1092,7 +1092,7 @@ struct kvm_x86_ops { void (*hardware_disable)(void); void (*hardware_unsetup)(void); bool (*cpu_has_accelerated_tpr)(void); - bool (*has_emulated_msr)(u32 index); + bool (*has_emulated_msr)(struct kvm *kvm, u32 index); void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu); unsigned int vm_size; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 0f4b49639955..6064a6035dbb 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3878,12 +3878,21 @@ static bool svm_cpu_has_accelerated_tpr(void) return false; } -static bool svm_has_emulated_msr(u32 index) +/* + * The kvm parameter can be NULL (module initialization, or invocation before + * VM creation). Be sure to check the kvm parameter before using it. + */ +static bool svm_has_emulated_msr(struct kvm *kvm, u32 index) { switch (index) { case MSR_IA32_MCG_EXT_CTL: case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: return false; + case MSR_IA32_SMBASE: + /* SEV-ES guests do not support SMM, so report false */ + if (kvm && sev_es_guest(kvm)) + return false; + break; default: break; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 55fa51c0cd9d..75c9c6a0a3a4 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6379,7 +6379,11 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) handle_exception_nmi_irqoff(vmx); } -static bool vmx_has_emulated_msr(u32 index) +/* + * The kvm parameter can be NULL (module initialization, or invocation before + * VM creation). Be sure to check the kvm parameter before using it. + */ +static bool vmx_has_emulated_msr(struct kvm *kvm, u32 index) { switch (index) { case MSR_IA32_SMBASE: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 86947e757dce..ba8e6a9b1a77 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3783,7 +3783,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) * fringe case that is not enabled except via specific settings * of the module parameters. */ - r = kvm_x86_ops.has_emulated_msr(MSR_IA32_SMBASE); + r = kvm_x86_ops.has_emulated_msr(kvm, MSR_IA32_SMBASE); break; case KVM_CAP_VAPIC: r = !kvm_x86_ops.cpu_has_accelerated_tpr(); @@ -5782,7 +5782,7 @@ static void kvm_init_msr_list(void) } for (i = 0; i < ARRAY_SIZE(emulated_msrs_all); i++) { - if (!kvm_x86_ops.has_emulated_msr(emulated_msrs_all[i])) + if (!kvm_x86_ops.has_emulated_msr(NULL, emulated_msrs_all[i])) continue; emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i]; -- cgit v1.2.3 From ed02b213098a90c2a415a0da18f05841f8cf0a81 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:10:01 -0600 Subject: KVM: SVM: Guest FPU state save/restore not needed for SEV-ES guest The guest FPU state is automatically restored on VMRUN and saved on VMEXIT by the hardware, so there is no reason to do this in KVM. Eliminate the allocation of the guest_fpu save area and key off that to skip operations related to the guest FPU state. Signed-off-by: Tom Lendacky Message-Id: <173e429b4d0d962c6a443c4553ffdaf31b7665a4.1607620209.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/svm/svm.c | 8 ++++++ arch/x86/kvm/x86.c | 56 +++++++++++++++++++++++++++++++++-------- 3 files changed, 56 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2b6f168436c8..39707e72b062 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1477,6 +1477,8 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, int reason, bool has_error_code, u32 error_code); +void kvm_free_guest_fpu(struct kvm_vcpu *vcpu); + void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0); void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4); int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 6064a6035dbb..f1c32cdc9d49 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1317,6 +1317,14 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) vmsa_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); if (!vmsa_page) goto error_free_vmcb_page; + + /* + * SEV-ES guests maintain an encrypted version of their FPU + * state which is restored and saved on VMRUN and VMEXIT. + * Free the fpu structure to prevent KVM from attempting to + * access the FPU state. + */ + kvm_free_guest_fpu(vcpu); } err = avic_init_vcpu(svm); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ba8e6a9b1a77..eea0e9fed62a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4503,6 +4503,9 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, struct kvm_xsave *guest_xsave) { + if (!vcpu->arch.guest_fpu) + return; + if (boot_cpu_has(X86_FEATURE_XSAVE)) { memset(guest_xsave, 0, sizeof(struct kvm_xsave)); fill_xsave((u8 *) guest_xsave->region, vcpu); @@ -4520,9 +4523,14 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, struct kvm_xsave *guest_xsave) { - u64 xstate_bv = - *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)]; - u32 mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / sizeof(u32)]; + u64 xstate_bv; + u32 mxcsr; + + if (!vcpu->arch.guest_fpu) + return 0; + + xstate_bv = *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)]; + mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / sizeof(u32)]; if (boot_cpu_has(X86_FEATURE_XSAVE)) { /* @@ -9245,9 +9253,14 @@ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) kvm_save_current_fpu(vcpu->arch.user_fpu); - /* PKRU is separately restored in kvm_x86_ops.run. */ - __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state, - ~XFEATURE_MASK_PKRU); + /* + * Guests with protected state can't have it set by the hypervisor, + * so skip trying to set it. + */ + if (vcpu->arch.guest_fpu) + /* PKRU is separately restored in kvm_x86_ops.run. */ + __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state, + ~XFEATURE_MASK_PKRU); fpregs_mark_activate(); fpregs_unlock(); @@ -9260,7 +9273,12 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) { fpregs_lock(); - kvm_save_current_fpu(vcpu->arch.guest_fpu); + /* + * Guests with protected state can't have it read by the hypervisor, + * so skip trying to save it. + */ + if (vcpu->arch.guest_fpu) + kvm_save_current_fpu(vcpu->arch.guest_fpu); copy_kernel_to_fpregs(&vcpu->arch.user_fpu->state); @@ -9770,6 +9788,9 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { struct fxregs_state *fxsave; + if (!vcpu->arch.guest_fpu) + return 0; + vcpu_load(vcpu); fxsave = &vcpu->arch.guest_fpu->state.fxsave; @@ -9790,6 +9811,9 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { struct fxregs_state *fxsave; + if (!vcpu->arch.guest_fpu) + return 0; + vcpu_load(vcpu); fxsave = &vcpu->arch.guest_fpu->state.fxsave; @@ -9848,6 +9872,9 @@ static int sync_regs(struct kvm_vcpu *vcpu) static void fx_init(struct kvm_vcpu *vcpu) { + if (!vcpu->arch.guest_fpu) + return; + fpstate_init(&vcpu->arch.guest_fpu->state); if (boot_cpu_has(X86_FEATURE_XSAVES)) vcpu->arch.guest_fpu->state.xsave.header.xcomp_bv = @@ -9861,6 +9888,15 @@ static void fx_init(struct kvm_vcpu *vcpu) vcpu->arch.cr0 |= X86_CR0_ET; } +void kvm_free_guest_fpu(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.guest_fpu) { + kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu); + vcpu->arch.guest_fpu = NULL; + } +} +EXPORT_SYMBOL_GPL(kvm_free_guest_fpu); + int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) { if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0) @@ -9956,7 +9992,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) return 0; free_guest_fpu: - kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu); + kvm_free_guest_fpu(vcpu); free_user_fpu: kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu); free_emulate_ctxt: @@ -10010,7 +10046,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt); free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu); - kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu); + kvm_free_guest_fpu(vcpu); kvm_hv_vcpu_uninit(vcpu); kvm_pmu_destroy(vcpu); @@ -10058,7 +10094,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) kvm_async_pf_hash_reset(vcpu); vcpu->arch.apf.halted = false; - if (kvm_mpx_supported()) { + if (vcpu->arch.guest_fpu && kvm_mpx_supported()) { void *mpx_state_buffer; /* -- cgit v1.2.3 From 4444dfe4050b79964d7bb9b86a99e2bb21a972b0 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 14 Dec 2020 11:16:03 -0500 Subject: KVM: SVM: Add NMI support for an SEV-ES guest The GHCB specification defines how NMIs are to be handled for an SEV-ES guest. To detect the completion of an NMI the hypervisor must not intercept the IRET instruction (because a #VC while running the NMI will issue an IRET) and, instead, must receive an NMI Complete exit event from the guest. Update the KVM support for detecting the completion of NMIs in the guest to follow the GHCB specification. When an SEV-ES guest is active, the IRET instruction will no longer be intercepted. Now, when the NMI Complete exit event is received, the iret_interception() function will be called to simulate the completion of the NMI. Signed-off-by: Tom Lendacky Message-Id: <5ea3dd69b8d4396cefdc9048ebc1ab7caa70a847.1607620209.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 4 ++++ arch/x86/kvm/svm/svm.c | 20 +++++++++++++------- 2 files changed, 17 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 154ac7601d02..c3006e9ef5ae 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1449,6 +1449,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm) if (!ghcb_sw_scratch_is_valid(ghcb)) goto vmgexit_err; break; + case SVM_VMGEXIT_NMI_COMPLETE: case SVM_VMGEXIT_UNSUPPORTED_EVENT: break; default: @@ -1770,6 +1771,9 @@ int sev_handle_vmgexit(struct vcpu_svm *svm) control->exit_info_2, svm->ghcb_sa); break; + case SVM_VMGEXIT_NMI_COMPLETE: + ret = svm_invoke_exit_handler(svm, SVM_EXIT_IRET); + break; case SVM_VMGEXIT_UNSUPPORTED_EVENT: vcpu_unimpl(&svm->vcpu, "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n", diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index f1c32cdc9d49..2e43f79c2703 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2319,9 +2319,11 @@ static int cpuid_interception(struct vcpu_svm *svm) static int iret_interception(struct vcpu_svm *svm) { ++svm->vcpu.stat.nmi_window_exits; - svm_clr_intercept(svm, INTERCEPT_IRET); svm->vcpu.arch.hflags |= HF_IRET_MASK; - svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu); + if (!sev_es_guest(svm->vcpu.kvm)) { + svm_clr_intercept(svm, INTERCEPT_IRET); + svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu); + } kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); return 1; } @@ -3302,7 +3304,8 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu) svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; vcpu->arch.hflags |= HF_NMI_MASK; - svm_set_intercept(svm, INTERCEPT_IRET); + if (!sev_es_guest(svm->vcpu.kvm)) + svm_set_intercept(svm, INTERCEPT_IRET); ++vcpu->stat.nmi_injections; } @@ -3386,10 +3389,12 @@ static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) if (masked) { svm->vcpu.arch.hflags |= HF_NMI_MASK; - svm_set_intercept(svm, INTERCEPT_IRET); + if (!sev_es_guest(svm->vcpu.kvm)) + svm_set_intercept(svm, INTERCEPT_IRET); } else { svm->vcpu.arch.hflags &= ~HF_NMI_MASK; - svm_clr_intercept(svm, INTERCEPT_IRET); + if (!sev_es_guest(svm->vcpu.kvm)) + svm_clr_intercept(svm, INTERCEPT_IRET); } } @@ -3567,8 +3572,9 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) * If we've made progress since setting HF_IRET_MASK, we've * executed an IRET and can allow NMI injection. */ - if ((svm->vcpu.arch.hflags & HF_IRET_MASK) - && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) { + if ((svm->vcpu.arch.hflags & HF_IRET_MASK) && + (sev_es_guest(svm->vcpu.kvm) || + kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip)) { svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); } -- cgit v1.2.3 From 85ca8be938c0e693b5ed5392279d5ecedf42901e Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:10:04 -0600 Subject: KVM: SVM: Set the encryption mask for the SVM host save area The SVM host save area is used to restore some host state on VMEXIT of an SEV-ES guest. After allocating the save area, clear it and add the encryption mask to the SVM host save area physical address that is programmed into the VM_HSAVE_PA MSR. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 1 - arch/x86/kvm/svm/svm.c | 3 ++- arch/x86/kvm/svm/svm.h | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index c3006e9ef5ae..0e922741023b 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -30,7 +30,6 @@ unsigned int max_sev_asid; static unsigned int min_sev_asid; static unsigned long *sev_asid_bitmap; static unsigned long *sev_reclaim_asid_bitmap; -#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT) struct enc_region { struct list_head list; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 2e43f79c2703..4be7d13d4462 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -497,7 +497,7 @@ static int svm_hardware_enable(void) wrmsrl(MSR_EFER, efer | EFER_SVME); - wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT); + wrmsrl(MSR_VM_HSAVE_PA, __sme_page_pa(sd->save_area)); if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); @@ -565,6 +565,7 @@ static int svm_cpu_init(int cpu) sd->save_area = alloc_page(GFP_KERNEL); if (!sd->save_area) goto free_cpu_data; + clear_page(page_address(sd->save_area)); if (svm_sev_enabled()) { sd->sev_vmcbs = kmalloc_array(max_sev_asid + 1, diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index b3f03dede6ac..b85c162a8a1e 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -21,6 +21,8 @@ #include +#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT) + static const u32 host_save_user_msrs[] = { #ifdef CONFIG_X86_64 MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE, -- cgit v1.2.3 From 80675b3ad45f79d97ce47a0faac3a6d22ab7e876 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:10:05 -0600 Subject: KVM: SVM: Update ASID allocation to support SEV-ES guests SEV and SEV-ES guests each have dedicated ASID ranges. Update the ASID allocation routine to return an ASID in the respective range. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 0e922741023b..b81d12f1bd37 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -61,19 +61,19 @@ static int sev_flush_asids(void) } /* Must be called with the sev_bitmap_lock held */ -static bool __sev_recycle_asids(void) +static bool __sev_recycle_asids(int min_asid, int max_asid) { int pos; /* Check if there are any ASIDs to reclaim before performing a flush */ - pos = find_next_bit(sev_reclaim_asid_bitmap, - max_sev_asid, min_sev_asid - 1); - if (pos >= max_sev_asid) + pos = find_next_bit(sev_reclaim_asid_bitmap, max_sev_asid, min_asid); + if (pos >= max_asid) return false; if (sev_flush_asids()) return false; + /* The flush process will flush all reclaimable SEV and SEV-ES ASIDs */ bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap, max_sev_asid); bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid); @@ -81,20 +81,23 @@ static bool __sev_recycle_asids(void) return true; } -static int sev_asid_new(void) +static int sev_asid_new(struct kvm_sev_info *sev) { + int pos, min_asid, max_asid; bool retry = true; - int pos; mutex_lock(&sev_bitmap_lock); /* - * SEV-enabled guest must use asid from min_sev_asid to max_sev_asid. + * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid. + * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1. */ + min_asid = sev->es_active ? 0 : min_sev_asid - 1; + max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid; again: - pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_sev_asid - 1); - if (pos >= max_sev_asid) { - if (retry && __sev_recycle_asids()) { + pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_asid); + if (pos >= max_asid) { + if (retry && __sev_recycle_asids(min_asid, max_asid)) { retry = false; goto again; } @@ -176,7 +179,7 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) if (unlikely(sev->active)) return ret; - asid = sev_asid_new(); + asid = sev_asid_new(sev); if (asid < 0) return ret; -- cgit v1.2.3 From 376c6d285017419e35c7177bc60abe7915fb7497 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:10:06 -0600 Subject: KVM: SVM: Provide support for SEV-ES vCPU creation/loading An SEV-ES vCPU requires additional VMCB initialization requirements for vCPU creation and vCPU load/put requirements. This includes: General VMCB initialization changes: - Set a VMCB control bit to enable SEV-ES support on the vCPU. - Set the VMCB encrypted VM save area address. - CRx registers are part of the encrypted register state and cannot be updated. Remove the CRx register read and write intercepts and replace them with CRx register write traps to track the CRx register values. - Certain MSR values are part of the encrypted register state and cannot be updated. Remove certain MSR intercepts (EFER, CR_PAT, etc.). - Remove the #GP intercept (no support for "enable_vmware_backdoor"). - Remove the XSETBV intercept since the hypervisor cannot modify XCR0. General vCPU creation changes: - Set the initial GHCB gpa value as per the GHCB specification. Signed-off-by: Tom Lendacky Message-Id: <3a8aef366416eddd5556dfa3fdc212aafa1ad0a2.1607620209.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/svm.h | 15 ++++++++++++- arch/x86/kvm/svm/sev.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/svm/svm.c | 20 ++++++++++++++--- arch/x86/kvm/svm/svm.h | 6 ++++- 4 files changed, 92 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index caa8628f5fba..a57331de59e2 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -98,6 +98,16 @@ enum { INTERCEPT_MWAIT_COND, INTERCEPT_XSETBV, INTERCEPT_RDPRU, + TRAP_EFER_WRITE, + TRAP_CR0_WRITE, + TRAP_CR1_WRITE, + TRAP_CR2_WRITE, + TRAP_CR3_WRITE, + TRAP_CR4_WRITE, + TRAP_CR5_WRITE, + TRAP_CR6_WRITE, + TRAP_CR7_WRITE, + TRAP_CR8_WRITE, /* Byte offset 014h (word 5) */ INTERCEPT_INVLPGB = 160, INTERCEPT_INVLPGB_ILLEGAL, @@ -144,6 +154,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area { u8 reserved_6[8]; /* Offset 0xe8 */ u64 avic_logical_id; /* Offset 0xf0 */ u64 avic_physical_id; /* Offset 0xf8 */ + u8 reserved_7[8]; + u64 vmsa_pa; /* Used for an SEV-ES guest */ }; @@ -198,6 +210,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define SVM_NESTED_CTL_NP_ENABLE BIT(0) #define SVM_NESTED_CTL_SEV_ENABLE BIT(1) +#define SVM_NESTED_CTL_SEV_ES_ENABLE BIT(2) struct vmcb_seg { u16 selector; @@ -295,7 +308,7 @@ struct ghcb { #define EXPECTED_VMCB_SAVE_AREA_SIZE 1032 -#define EXPECTED_VMCB_CONTROL_AREA_SIZE 256 +#define EXPECTED_VMCB_CONTROL_AREA_SIZE 272 #define EXPECTED_GHCB_SIZE PAGE_SIZE static inline void __unused_size_checks(void) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index b81d12f1bd37..584fede0b733 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1796,3 +1796,59 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->ghcb_sa, svm->ghcb_sa_len, in); } + +void sev_es_init_vmcb(struct vcpu_svm *svm) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + + svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE; + svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK; + + /* + * An SEV-ES guest requires a VMSA area that is a separate from the + * VMCB page. Do not include the encryption mask on the VMSA physical + * address since hardware will access it using the guest key. + */ + svm->vmcb->control.vmsa_pa = __pa(svm->vmsa); + + /* Can't intercept CR register access, HV can't modify CR registers */ + svm_clr_intercept(svm, INTERCEPT_CR0_READ); + svm_clr_intercept(svm, INTERCEPT_CR4_READ); + svm_clr_intercept(svm, INTERCEPT_CR8_READ); + svm_clr_intercept(svm, INTERCEPT_CR0_WRITE); + svm_clr_intercept(svm, INTERCEPT_CR4_WRITE); + svm_clr_intercept(svm, INTERCEPT_CR8_WRITE); + + svm_clr_intercept(svm, INTERCEPT_SELECTIVE_CR0); + + /* Track EFER/CR register changes */ + svm_set_intercept(svm, TRAP_EFER_WRITE); + svm_set_intercept(svm, TRAP_CR0_WRITE); + svm_set_intercept(svm, TRAP_CR4_WRITE); + svm_set_intercept(svm, TRAP_CR8_WRITE); + + /* No support for enable_vmware_backdoor */ + clr_exception_intercept(svm, GP_VECTOR); + + /* Can't intercept XSETBV, HV can't modify XCR0 directly */ + svm_clr_intercept(svm, INTERCEPT_XSETBV); + + /* Clear intercepts on selected MSRs */ + set_msr_interception(vcpu, svm->msrpm, MSR_EFER, 1, 1); + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_CR_PAT, 1, 1); + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1); + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1); + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1); + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1); +} + +void sev_es_create_vcpu(struct vcpu_svm *svm) +{ + /* + * Set the GHCB MSR value as per the GHCB specification when creating + * a vCPU for an SEV-ES guest. + */ + set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX, + GHCB_VERSION_MIN, + sev_enc_bit)); +} diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 4be7d13d4462..091f792498a2 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -90,7 +90,7 @@ static DEFINE_PER_CPU(u64, current_tsc_ratio); static const struct svm_direct_access_msrs { u32 index; /* Index of the MSR */ - bool always; /* True if intercept is always on */ + bool always; /* True if intercept is initially cleared */ } direct_access_msrs[MAX_DIRECT_ACCESS_MSRS] = { { .index = MSR_STAR, .always = true }, { .index = MSR_IA32_SYSENTER_CS, .always = true }, @@ -108,6 +108,9 @@ static const struct svm_direct_access_msrs { { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, { .index = MSR_IA32_LASTINTFROMIP, .always = false }, { .index = MSR_IA32_LASTINTTOIP, .always = false }, + { .index = MSR_EFER, .always = false }, + { .index = MSR_IA32_CR_PAT, .always = false }, + { .index = MSR_AMD64_SEV_ES_GHCB, .always = true }, { .index = MSR_INVALID, .always = false }, }; @@ -676,8 +679,8 @@ static void set_msr_interception_bitmap(struct kvm_vcpu *vcpu, u32 *msrpm, msrpm[offset] = tmp; } -static void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr, - int read, int write) +void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr, + int read, int write) { set_shadow_msr_intercept(vcpu, msr, read, write); set_msr_interception_bitmap(vcpu, msrpm, msr, read, write); @@ -1263,6 +1266,11 @@ static void init_vmcb(struct vcpu_svm *svm) if (sev_guest(svm->vcpu.kvm)) { svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE; clr_exception_intercept(svm, UD_VECTOR); + + if (sev_es_guest(svm->vcpu.kvm)) { + /* Perform SEV-ES specific VMCB updates */ + sev_es_init_vmcb(svm); + } } vmcb_mark_all_dirty(svm->vmcb); @@ -1356,6 +1364,10 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) svm_init_osvw(vcpu); vcpu->arch.microcode_version = 0x01000065; + if (sev_es_guest(svm->vcpu.kvm)) + /* Perform SEV-ES specific VMCB creation updates */ + sev_es_create_vcpu(svm); + return 0; error_free_vmsa_page: @@ -1451,6 +1463,7 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) loadsegment(gs, svm->host.gs); #endif #endif + for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); } @@ -3101,6 +3114,7 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) pr_err("%-20s%016llx\n", "avic_backing_page:", control->avic_backing_page); pr_err("%-20s%016llx\n", "avic_logical_id:", control->avic_logical_id); pr_err("%-20s%016llx\n", "avic_physical_id:", control->avic_physical_id); + pr_err("%-20s%016llx\n", "vmsa_pa:", control->vmsa_pa); pr_err("VMCB State Save Area:\n"); pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", "es:", diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index b85c162a8a1e..03359185331c 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -34,7 +34,7 @@ static const u32 host_save_user_msrs[] = { #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) -#define MAX_DIRECT_ACCESS_MSRS 15 +#define MAX_DIRECT_ACCESS_MSRS 18 #define MSRPM_OFFSETS 16 extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; extern bool npt_enabled; @@ -417,6 +417,8 @@ bool svm_nmi_blocked(struct kvm_vcpu *vcpu); bool svm_interrupt_blocked(struct kvm_vcpu *vcpu); void svm_set_gif(struct vcpu_svm *svm, bool value); int svm_invoke_exit_handler(struct vcpu_svm *svm, u64 exit_code); +void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr, + int read, int write); /* nested.c */ @@ -576,5 +578,7 @@ void sev_hardware_teardown(void); void sev_free_vcpu(struct kvm_vcpu *vcpu); int sev_handle_vmgexit(struct vcpu_svm *svm); int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in); +void sev_es_init_vmcb(struct vcpu_svm *svm); +void sev_es_create_vcpu(struct vcpu_svm *svm); #endif -- cgit v1.2.3 From 861377730aa9db4cbaa0f3bd3f4d295c152732c4 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:10:07 -0600 Subject: KVM: SVM: Provide support for SEV-ES vCPU loading An SEV-ES vCPU requires additional VMCB vCPU load/put requirements. SEV-ES hardware will restore certain registers on VMEXIT, but not save them on VMRUN (see Table B-3 and Table B-4 of the AMD64 APM Volume 2), so make the following changes: General vCPU load changes: - During vCPU loading, perform a VMSAVE to the per-CPU SVM save area and save the current values of XCR0, XSS and PKRU to the per-CPU SVM save area as these registers will be restored on VMEXIT. General vCPU put changes: - Do not attempt to restore registers that SEV-ES hardware has already restored on VMEXIT. Signed-off-by: Tom Lendacky Message-Id: <019390e9cb5e93cd73014fa5a040c17d42588733.1607620209.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/svm.h | 10 ++++++--- arch/x86/kvm/svm/sev.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/svm/svm.c | 36 ++++++++++++++++++++----------- arch/x86/kvm/svm/svm.h | 22 +++++++++++++------ arch/x86/kvm/x86.c | 3 ++- arch/x86/kvm/x86.h | 1 + 6 files changed, 103 insertions(+), 23 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index a57331de59e2..1c561945b426 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -234,7 +234,8 @@ struct vmcb_save_area { u8 cpl; u8 reserved_2[4]; u64 efer; - u8 reserved_3[112]; + u8 reserved_3[104]; + u64 xss; /* Valid for SEV-ES only */ u64 cr4; u64 cr3; u64 cr0; @@ -265,9 +266,12 @@ struct vmcb_save_area { /* * The following part of the save area is valid only for - * SEV-ES guests when referenced through the GHCB. + * SEV-ES guests when referenced through the GHCB or for + * saving to the host save area. */ - u8 reserved_7[104]; + u8 reserved_7[80]; + u32 pkru; + u8 reserved_7a[20]; u64 reserved_8; /* rax already available at 0x01f8 */ u64 rcx; u64 rdx; diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 584fede0b733..0ddae41e4865 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -16,12 +16,15 @@ #include #include #include +#include #include "x86.h" #include "svm.h" #include "cpuid.h" #include "trace.h" +#define __ex(x) __kvm_handle_fault_on_reboot(x) + static u8 sev_enc_bit; static int sev_flush_asids(void); static DECLARE_RWSEM(sev_deactivate_lock); @@ -1852,3 +1855,54 @@ void sev_es_create_vcpu(struct vcpu_svm *svm) GHCB_VERSION_MIN, sev_enc_bit)); } + +void sev_es_vcpu_load(struct vcpu_svm *svm, int cpu) +{ + struct svm_cpu_data *sd = per_cpu(svm_data, cpu); + struct vmcb_save_area *hostsa; + unsigned int i; + + /* + * As an SEV-ES guest, hardware will restore the host state on VMEXIT, + * of which one step is to perform a VMLOAD. Since hardware does not + * perform a VMSAVE on VMRUN, the host savearea must be updated. + */ + asm volatile(__ex("vmsave") : : "a" (__sme_page_pa(sd->save_area)) : "memory"); + + /* + * Certain MSRs are restored on VMEXIT, only save ones that aren't + * restored. + */ + for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) { + if (host_save_user_msrs[i].sev_es_restored) + continue; + + rdmsrl(host_save_user_msrs[i].index, svm->host_user_msrs[i]); + } + + /* XCR0 is restored on VMEXIT, save the current host value */ + hostsa = (struct vmcb_save_area *)(page_address(sd->save_area) + 0x400); + hostsa->xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); + + /* PKRU is restored on VMEXIT, save the curent host value */ + hostsa->pkru = read_pkru(); + + /* MSR_IA32_XSS is restored on VMEXIT, save the currnet host value */ + hostsa->xss = host_xss; +} + +void sev_es_vcpu_put(struct vcpu_svm *svm) +{ + unsigned int i; + + /* + * Certain MSRs are restored on VMEXIT and were saved with vmsave in + * sev_es_vcpu_load() above. Only restore ones that weren't. + */ + for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) { + if (host_save_user_msrs[i].sev_es_restored) + continue; + + wrmsrl(host_save_user_msrs[i].index, svm->host_user_msrs[i]); + } +} diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 091f792498a2..3a2e48a8d05c 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1417,15 +1417,20 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vmcb_mark_all_dirty(svm->vmcb); } + if (sev_es_guest(svm->vcpu.kvm)) { + sev_es_vcpu_load(svm, cpu); + } else { #ifdef CONFIG_X86_64 - rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base); + rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base); #endif - savesegment(fs, svm->host.fs); - savesegment(gs, svm->host.gs); - svm->host.ldt = kvm_read_ldt(); + savesegment(fs, svm->host.fs); + savesegment(gs, svm->host.gs); + svm->host.ldt = kvm_read_ldt(); - for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) - rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); + for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) + rdmsrl(host_save_user_msrs[i].index, + svm->host_user_msrs[i]); + } if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio; @@ -1453,19 +1458,24 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) avic_vcpu_put(vcpu); ++vcpu->stat.host_state_reload; - kvm_load_ldt(svm->host.ldt); + if (sev_es_guest(svm->vcpu.kvm)) { + sev_es_vcpu_put(svm); + } else { + kvm_load_ldt(svm->host.ldt); #ifdef CONFIG_X86_64 - loadsegment(fs, svm->host.fs); - wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gsbase); - load_gs_index(svm->host.gs); + loadsegment(fs, svm->host.fs); + wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gsbase); + load_gs_index(svm->host.gs); #else #ifdef CONFIG_X86_32_LAZY_GS - loadsegment(gs, svm->host.gs); + loadsegment(gs, svm->host.gs); #endif #endif - for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) - wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); + for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) + wrmsrl(host_save_user_msrs[i].index, + svm->host_user_msrs[i]); + } } static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 03359185331c..df9fe11a632c 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -23,15 +23,23 @@ #define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT) -static const u32 host_save_user_msrs[] = { +static const struct svm_host_save_msrs { + u32 index; /* Index of the MSR */ + bool sev_es_restored; /* True if MSR is restored on SEV-ES VMEXIT */ +} host_save_user_msrs[] = { #ifdef CONFIG_X86_64 - MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE, - MSR_FS_BASE, + { .index = MSR_STAR, .sev_es_restored = true }, + { .index = MSR_LSTAR, .sev_es_restored = true }, + { .index = MSR_CSTAR, .sev_es_restored = true }, + { .index = MSR_SYSCALL_MASK, .sev_es_restored = true }, + { .index = MSR_KERNEL_GS_BASE, .sev_es_restored = true }, + { .index = MSR_FS_BASE, .sev_es_restored = true }, #endif - MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, - MSR_TSC_AUX, + { .index = MSR_IA32_SYSENTER_CS, .sev_es_restored = true }, + { .index = MSR_IA32_SYSENTER_ESP, .sev_es_restored = true }, + { .index = MSR_IA32_SYSENTER_EIP, .sev_es_restored = true }, + { .index = MSR_TSC_AUX, .sev_es_restored = false }, }; - #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) #define MAX_DIRECT_ACCESS_MSRS 18 @@ -580,5 +588,7 @@ int sev_handle_vmgexit(struct vcpu_svm *svm); int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in); void sev_es_init_vmcb(struct vcpu_svm *svm); void sev_es_create_vcpu(struct vcpu_svm *svm); +void sev_es_vcpu_load(struct vcpu_svm *svm, int cpu); +void sev_es_vcpu_put(struct vcpu_svm *svm); #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index eea0e9fed62a..3e58612babfe 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -197,7 +197,8 @@ EXPORT_SYMBOL_GPL(host_efer); bool __read_mostly allow_smaller_maxphyaddr = 0; EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr); -static u64 __read_mostly host_xss; +u64 __read_mostly host_xss; +EXPORT_SYMBOL_GPL(host_xss); u64 __read_mostly supported_xss; EXPORT_SYMBOL_GPL(supported_xss); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index fe7f3df91b2c..c5ee0f5ce0f1 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -279,6 +279,7 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu); extern u64 host_xcr0; extern u64 supported_xcr0; +extern u64 host_xss; extern u64 supported_xss; static inline bool kvm_mpx_supported(void) -- cgit v1.2.3 From 16809ecdc1e8ab7278f1d60021ac809edd17d060 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:10:08 -0600 Subject: KVM: SVM: Provide an updated VMRUN invocation for SEV-ES guests The run sequence is different for an SEV-ES guest compared to a legacy or even an SEV guest. The guest vCPU register state of an SEV-ES guest will be restored on VMRUN and saved on VMEXIT. There is no need to restore the guest registers directly and through VMLOAD before VMRUN and no need to save the guest registers directly and through VMSAVE on VMEXIT. Update the svm_vcpu_run() function to skip register state saving and restoring and provide an alternative function for running an SEV-ES guest in vmenter.S Additionally, certain host state is restored across an SEV-ES VMRUN. As a result certain register states are not required to be restored upon VMEXIT (e.g. FS, GS, etc.), so only do that if the guest is not an SEV-ES guest. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 25 ++++++++++++++--------- arch/x86/kvm/svm/svm.h | 5 +++++ arch/x86/kvm/svm/vmenter.S | 50 ++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 6 ++++++ 4 files changed, 77 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 3a2e48a8d05c..941e5251e13f 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3700,16 +3700,20 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, guest_enter_irqoff(); lockdep_hardirqs_on(CALLER_ADDR0); - __svm_vcpu_run(svm->vmcb_pa, (unsigned long *)&svm->vcpu.arch.regs); + if (sev_es_guest(svm->vcpu.kvm)) { + __svm_sev_es_vcpu_run(svm->vmcb_pa); + } else { + __svm_vcpu_run(svm->vmcb_pa, (unsigned long *)&svm->vcpu.arch.regs); #ifdef CONFIG_X86_64 - native_wrmsrl(MSR_GS_BASE, svm->host.gs_base); + native_wrmsrl(MSR_GS_BASE, svm->host.gs_base); #else - loadsegment(fs, svm->host.fs); + loadsegment(fs, svm->host.fs); #ifndef CONFIG_X86_32_LAZY_GS - loadsegment(gs, svm->host.gs); + loadsegment(gs, svm->host.gs); #endif #endif + } /* * VMEXIT disables interrupts (host state), but tracing and lockdep @@ -3807,14 +3811,17 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - reload_tss(vcpu); + if (!sev_es_guest(svm->vcpu.kvm)) + reload_tss(vcpu); x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl); - vcpu->arch.cr2 = svm->vmcb->save.cr2; - vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; - vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; - vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; + if (!sev_es_guest(svm->vcpu.kvm)) { + vcpu->arch.cr2 = svm->vmcb->save.cr2; + vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; + vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; + vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; + } if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) kvm_before_interrupt(&svm->vcpu); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index df9fe11a632c..a5067f776ce0 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -591,4 +591,9 @@ void sev_es_create_vcpu(struct vcpu_svm *svm); void sev_es_vcpu_load(struct vcpu_svm *svm, int cpu); void sev_es_vcpu_put(struct vcpu_svm *svm); +/* vmenter.S */ + +void __svm_sev_es_vcpu_run(unsigned long vmcb_pa); +void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs); + #endif diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 1ec1ac40e328..6feb8c08f45a 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -168,3 +168,53 @@ SYM_FUNC_START(__svm_vcpu_run) pop %_ASM_BP ret SYM_FUNC_END(__svm_vcpu_run) + +/** + * __svm_sev_es_vcpu_run - Run a SEV-ES vCPU via a transition to SVM guest mode + * @vmcb_pa: unsigned long + */ +SYM_FUNC_START(__svm_sev_es_vcpu_run) + push %_ASM_BP +#ifdef CONFIG_X86_64 + push %r15 + push %r14 + push %r13 + push %r12 +#else + push %edi + push %esi +#endif + push %_ASM_BX + + /* Enter guest mode */ + mov %_ASM_ARG1, %_ASM_AX + sti + +1: vmrun %_ASM_AX + jmp 3f +2: cmpb $0, kvm_rebooting + jne 3f + ud2 + _ASM_EXTABLE(1b, 2b) + +3: cli + +#ifdef CONFIG_RETPOLINE + /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ + FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE +#endif + + pop %_ASM_BX + +#ifdef CONFIG_X86_64 + pop %r12 + pop %r13 + pop %r14 + pop %r15 +#else + pop %esi + pop %edi +#endif + pop %_ASM_BP + ret +SYM_FUNC_END(__svm_sev_es_vcpu_run) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3e58612babfe..648c677b12e9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -880,6 +880,9 @@ EXPORT_SYMBOL_GPL(kvm_lmsw); void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu) { + if (vcpu->arch.guest_state_protected) + return; + if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) { if (vcpu->arch.xcr0 != host_xcr0) @@ -900,6 +903,9 @@ EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state); void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu) { + if (vcpu->arch.guest_state_protected) + return; + if (static_cpu_has(X86_FEATURE_PKU) && (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) || (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU))) { -- cgit v1.2.3 From ad73109ae7ec30d5bfb76be108e304f9f0af4829 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:10:09 -0600 Subject: KVM: SVM: Provide support to launch and run an SEV-ES guest An SEV-ES guest is started by invoking a new SEV initialization ioctl, KVM_SEV_ES_INIT. This identifies the guest as an SEV-ES guest, which is used to drive the appropriate ASID allocation, VMSA encryption, etc. Before being able to run an SEV-ES vCPU, the vCPU VMSA must be encrypted and measured. This is done using the LAUNCH_UPDATE_VMSA command after all calls to LAUNCH_UPDATE_DATA have been performed, but before LAUNCH_MEASURE has been performed. In order to establish the encrypted VMSA, the current (traditional) VMSA and the GPRs are synced to the page that will hold the encrypted VMSA and then LAUNCH_UPDATE_VMSA is invoked. The vCPU is then marked as having protected guest state. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 104 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 0ddae41e4865..6eb097714d43 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -201,6 +201,16 @@ e_free: return ret; } +static int sev_es_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) +{ + if (!sev_es) + return -ENOTTY; + + to_kvm_svm(kvm)->sev_info.es_active = true; + + return sev_guest_init(kvm, argp); +} + static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error) { struct sev_data_activate *data; @@ -500,6 +510,94 @@ e_free: return ret; } +static int sev_es_sync_vmsa(struct vcpu_svm *svm) +{ + struct vmcb_save_area *save = &svm->vmcb->save; + + /* Check some debug related fields before encrypting the VMSA */ + if (svm->vcpu.guest_debug || (save->dr7 & ~DR7_FIXED_1)) + return -EINVAL; + + /* Sync registgers */ + save->rax = svm->vcpu.arch.regs[VCPU_REGS_RAX]; + save->rbx = svm->vcpu.arch.regs[VCPU_REGS_RBX]; + save->rcx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; + save->rdx = svm->vcpu.arch.regs[VCPU_REGS_RDX]; + save->rsp = svm->vcpu.arch.regs[VCPU_REGS_RSP]; + save->rbp = svm->vcpu.arch.regs[VCPU_REGS_RBP]; + save->rsi = svm->vcpu.arch.regs[VCPU_REGS_RSI]; + save->rdi = svm->vcpu.arch.regs[VCPU_REGS_RDI]; + save->r8 = svm->vcpu.arch.regs[VCPU_REGS_R8]; + save->r9 = svm->vcpu.arch.regs[VCPU_REGS_R9]; + save->r10 = svm->vcpu.arch.regs[VCPU_REGS_R10]; + save->r11 = svm->vcpu.arch.regs[VCPU_REGS_R11]; + save->r12 = svm->vcpu.arch.regs[VCPU_REGS_R12]; + save->r13 = svm->vcpu.arch.regs[VCPU_REGS_R13]; + save->r14 = svm->vcpu.arch.regs[VCPU_REGS_R14]; + save->r15 = svm->vcpu.arch.regs[VCPU_REGS_R15]; + save->rip = svm->vcpu.arch.regs[VCPU_REGS_RIP]; + + /* Sync some non-GPR registers before encrypting */ + save->xcr0 = svm->vcpu.arch.xcr0; + save->pkru = svm->vcpu.arch.pkru; + save->xss = svm->vcpu.arch.ia32_xss; + + /* + * SEV-ES will use a VMSA that is pointed to by the VMCB, not + * the traditional VMSA that is part of the VMCB. Copy the + * traditional VMSA as it has been built so far (in prep + * for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state. + */ + memcpy(svm->vmsa, save, sizeof(*save)); + + return 0; +} + +static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp) +{ + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + struct sev_data_launch_update_vmsa *vmsa; + int i, ret; + + if (!sev_es_guest(kvm)) + return -ENOTTY; + + vmsa = kzalloc(sizeof(*vmsa), GFP_KERNEL); + if (!vmsa) + return -ENOMEM; + + for (i = 0; i < kvm->created_vcpus; i++) { + struct vcpu_svm *svm = to_svm(kvm->vcpus[i]); + + /* Perform some pre-encryption checks against the VMSA */ + ret = sev_es_sync_vmsa(svm); + if (ret) + goto e_free; + + /* + * The LAUNCH_UPDATE_VMSA command will perform in-place + * encryption of the VMSA memory content (i.e it will write + * the same memory region with the guest's key), so invalidate + * it first. + */ + clflush_cache_range(svm->vmsa, PAGE_SIZE); + + vmsa->handle = sev->handle; + vmsa->address = __sme_pa(svm->vmsa); + vmsa->len = PAGE_SIZE; + ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, vmsa, + &argp->error); + if (ret) + goto e_free; + + svm->vcpu.arch.guest_state_protected = true; + } + +e_free: + kfree(vmsa); + return ret; +} + static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) { void __user *measure = (void __user *)(uintptr_t)argp->data; @@ -957,12 +1055,18 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp) case KVM_SEV_INIT: r = sev_guest_init(kvm, &sev_cmd); break; + case KVM_SEV_ES_INIT: + r = sev_es_guest_init(kvm, &sev_cmd); + break; case KVM_SEV_LAUNCH_START: r = sev_launch_start(kvm, &sev_cmd); break; case KVM_SEV_LAUNCH_UPDATE_DATA: r = sev_launch_update_data(kvm, &sev_cmd); break; + case KVM_SEV_LAUNCH_UPDATE_VMSA: + r = sev_launch_update_vmsa(kvm, &sev_cmd); + break; case KVM_SEV_LAUNCH_MEASURE: r = sev_launch_measure(kvm, &sev_cmd); break; -- cgit v1.2.3 From 8640ca588b032166d6be6b4d3632d565d6d88e89 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Tue, 15 Dec 2020 12:44:07 -0500 Subject: KVM: SVM: Add AP_JUMP_TABLE support in prep for AP booting The GHCB specification requires the hypervisor to save the address of an AP Jump Table so that, for example, vCPUs that have been parked by UEFI can be started by the OS. Provide support for the AP Jump Table set/get exit code. Signed-off-by: Tom Lendacky Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 28 ++++++++++++++++++++++++++++ arch/x86/kvm/svm/svm.h | 1 + 2 files changed, 29 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 6eb097714d43..8b5ef0fe4490 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -18,6 +18,8 @@ #include #include +#include + #include "x86.h" #include "svm.h" #include "cpuid.h" @@ -1559,6 +1561,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm) goto vmgexit_err; break; case SVM_VMGEXIT_NMI_COMPLETE: + case SVM_VMGEXIT_AP_JUMP_TABLE: case SVM_VMGEXIT_UNSUPPORTED_EVENT: break; default: @@ -1883,6 +1886,31 @@ int sev_handle_vmgexit(struct vcpu_svm *svm) case SVM_VMGEXIT_NMI_COMPLETE: ret = svm_invoke_exit_handler(svm, SVM_EXIT_IRET); break; + case SVM_VMGEXIT_AP_JUMP_TABLE: { + struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info; + + switch (control->exit_info_1) { + case 0: + /* Set AP jump table address */ + sev->ap_jump_table = control->exit_info_2; + break; + case 1: + /* Get AP jump table address */ + ghcb_set_sw_exit_info_2(ghcb, sev->ap_jump_table); + break; + default: + pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n", + control->exit_info_1); + ghcb_set_sw_exit_info_1(ghcb, 1); + ghcb_set_sw_exit_info_2(ghcb, + X86_TRAP_UD | + SVM_EVTINJ_TYPE_EXEPT | + SVM_EVTINJ_VALID); + } + + ret = 1; + break; + } case SVM_VMGEXIT_UNSUPPORTED_EVENT: vcpu_unimpl(&svm->vcpu, "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n", diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index a5067f776ce0..5431e6335e2e 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -78,6 +78,7 @@ struct kvm_sev_info { int fd; /* SEV device fd */ unsigned long pages_locked; /* Number of pages locked */ struct list_head regions_list; /* List of registered regions */ + u64 ap_jump_table; /* SEV-ES AP Jump Table address */ }; struct kvm_svm { -- cgit v1.2.3 From d45f89f7437d0f2c8275b4434096164db106384d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 16 Dec 2020 13:08:21 -0500 Subject: KVM: SVM: fix 32-bit compilation VCPU_REGS_R8...VCPU_REGS_R15 are not defined on 32-bit x86, so cull them from the synchronization of the VMSA. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 8b5ef0fe4490..e57847ff8bd2 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -529,6 +529,7 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm) save->rbp = svm->vcpu.arch.regs[VCPU_REGS_RBP]; save->rsi = svm->vcpu.arch.regs[VCPU_REGS_RSI]; save->rdi = svm->vcpu.arch.regs[VCPU_REGS_RDI]; +#ifdef CONFIG_X86_64 save->r8 = svm->vcpu.arch.regs[VCPU_REGS_R8]; save->r9 = svm->vcpu.arch.regs[VCPU_REGS_R9]; save->r10 = svm->vcpu.arch.regs[VCPU_REGS_R10]; @@ -537,6 +538,7 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm) save->r13 = svm->vcpu.arch.regs[VCPU_REGS_R13]; save->r14 = svm->vcpu.arch.regs[VCPU_REGS_R14]; save->r15 = svm->vcpu.arch.regs[VCPU_REGS_R15]; +#endif save->rip = svm->vcpu.arch.regs[VCPU_REGS_RIP]; /* Sync some non-GPR registers before encrypting */ -- cgit v1.2.3