From 675cabc89900732a8a90f19a3c6ed314327a96e0 Mon Sep 17 00:00:00 2001 From: Jintack Lim Date: Thu, 9 Feb 2023 17:58:03 +0000 Subject: arm64: Add ARM64_HAS_NESTED_VIRT cpufeature Add a new ARM64_HAS_NESTED_VIRT feature to indicate that the CPU has the ARMv8.3 nested virtualization capability, together with the 'kvm-arm.mode=nested' command line option. This will be used to support nested virtualization in KVM. Reviewed-by: Russell King (Oracle) Acked-by: Catalin Marinas Signed-off-by: Jintack Lim Signed-off-by: Andre Przywara Signed-off-by: Christoffer Dall [maz: moved the command-line option to kvm-arm.mode] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230209175820.1939006-2-maz@kernel.org Signed-off-by: Oliver Upton --- Documentation/admin-guide/kernel-parameters.txt | 7 ++++++- arch/arm64/include/asm/kvm_host.h | 5 +++++ arch/arm64/kernel/cpufeature.c | 25 +++++++++++++++++++++++++ arch/arm64/kvm/arm.c | 5 +++++ arch/arm64/tools/cpucaps | 1 + 5 files changed, 42 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 6cfa6e3996cf..b7b0704e360e 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2553,9 +2553,14 @@ protected: nVHE-based mode with support for guests whose state is kept private from the host. + nested: VHE-based mode with support for nested + virtualization. Requires at least ARMv8.3 + hardware. + Defaults to VHE/nVHE based on hardware support. Setting mode to "protected" will disable kexec and hibernation - for the host. + for the host. "nested" is experimental and should be + used with extreme caution. kvm-arm.vgic_v3_group0_trap= [KVM,ARM] Trap guest accesses to GICv3 group-0 diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 35a159d131b5..8919e971abdf 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -60,9 +60,14 @@ enum kvm_mode { KVM_MODE_DEFAULT, KVM_MODE_PROTECTED, + KVM_MODE_NV, KVM_MODE_NONE, }; +#ifdef CONFIG_KVM enum kvm_mode kvm_get_mode(void); +#else +static inline enum kvm_mode kvm_get_mode(void) { return KVM_MODE_NONE; }; +#endif DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index a77315b338e6..3fc14ee86239 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1956,6 +1956,20 @@ static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused) write_sysreg(read_sysreg(tpidr_el1), tpidr_el2); } +static bool has_nested_virt_support(const struct arm64_cpu_capabilities *cap, + int scope) +{ + if (kvm_get_mode() != KVM_MODE_NV) + return false; + + if (!has_cpuid_feature(cap, scope)) { + pr_warn("unavailable: %s\n", cap->desc); + return false; + } + + return true; +} + #ifdef CONFIG_ARM64_PAN static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) { @@ -2215,6 +2229,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = runs_at_el2, .cpu_enable = cpu_copy_el2regs, }, + { + .desc = "Nested Virtualization Support", + .capability = ARM64_HAS_NESTED_VIRT, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_nested_virt_support, + .sys_reg = SYS_ID_AA64MMFR2_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64MMFR2_EL1_NV_SHIFT, + .field_width = 4, + .min_field_value = ID_AA64MMFR2_EL1_NV_IMP, + }, { .capability = ARM64_HAS_32BIT_EL0_DO_NOT_USE, .type = ARM64_CPUCAP_SYSTEM_FEATURE, diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 9c5573bc4614..3fd8f37830f2 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2310,6 +2310,11 @@ static int __init early_kvm_mode_cfg(char *arg) return 0; } + if (strcmp(arg, "nested") == 0 && !WARN_ON(!is_kernel_in_hyp_mode())) { + kvm_mode = KVM_MODE_NV; + return 0; + } + return -EINVAL; } early_param("kvm-arm.mode", early_kvm_mode_cfg); diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index dfeb2c51e257..1af77a3657f7 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -31,6 +31,7 @@ HAS_GENERIC_AUTH_IMP_DEF HAS_IRQ_PRIO_MASKING HAS_LDAPR HAS_LSE_ATOMICS +HAS_NESTED_VIRT HAS_NO_FPSIMD HAS_NO_HW_PREFETCH HAS_PAN -- cgit v1.2.3 From 8531bd63a8dc6c83fef2e1b7cbfd6f1a50bf87db Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 9 Feb 2023 17:58:04 +0000 Subject: KVM: arm64: Use the S2 MMU context to iterate over S2 table Most of our S2 helpers take a kvm_s2_mmu pointer, but quickly revert back to using the kvm structure. By doing so, we lose track of which S2 MMU context we were initially using, and fallback to the "canonical" context. If we were trying to unmap a S2 context managed by a guest hypervisor, we end-up parsing the wrong set of page tables, and bad stuff happens (as this is often happening on the back of a trapped TLBI from the guest hypervisor). Instead, make sure we always use the provided MMU context all the way. This has no impact on non-NV, as we always pass the canonical MMU context. Signed-off-by: Marc Zyngier Reviewed-by: Andre Przywara Link: https://lore.kernel.org/r/20230209175820.1939006-3-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/mmu.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index a3ee3b605c9b..892d6a5fb2f5 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -46,16 +46,17 @@ static phys_addr_t stage2_range_addr_end(phys_addr_t addr, phys_addr_t end) * long will also starve other vCPUs. We have to also make sure that the page * tables are not freed while we released the lock. */ -static int stage2_apply_range(struct kvm *kvm, phys_addr_t addr, +static int stage2_apply_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end, int (*fn)(struct kvm_pgtable *, u64, u64), bool resched) { + struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu); int ret; u64 next; do { - struct kvm_pgtable *pgt = kvm->arch.mmu.pgt; + struct kvm_pgtable *pgt = mmu->pgt; if (!pgt) return -EINVAL; @@ -71,8 +72,8 @@ static int stage2_apply_range(struct kvm *kvm, phys_addr_t addr, return ret; } -#define stage2_apply_range_resched(kvm, addr, end, fn) \ - stage2_apply_range(kvm, addr, end, fn, true) +#define stage2_apply_range_resched(mmu, addr, end, fn) \ + stage2_apply_range(mmu, addr, end, fn, true) static bool memslot_is_logging(struct kvm_memory_slot *memslot) { @@ -235,7 +236,7 @@ static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 lockdep_assert_held_write(&kvm->mmu_lock); WARN_ON(size & ~PAGE_MASK); - WARN_ON(stage2_apply_range(kvm, start, end, kvm_pgtable_stage2_unmap, + WARN_ON(stage2_apply_range(mmu, start, end, kvm_pgtable_stage2_unmap, may_block)); } @@ -250,7 +251,7 @@ static void stage2_flush_memslot(struct kvm *kvm, phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT; phys_addr_t end = addr + PAGE_SIZE * memslot->npages; - stage2_apply_range_resched(kvm, addr, end, kvm_pgtable_stage2_flush); + stage2_apply_range_resched(&kvm->arch.mmu, addr, end, kvm_pgtable_stage2_flush); } /** @@ -934,8 +935,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, */ static void stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end) { - struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu); - stage2_apply_range_resched(kvm, addr, end, kvm_pgtable_stage2_wrprotect); + stage2_apply_range_resched(mmu, addr, end, kvm_pgtable_stage2_wrprotect); } /** -- cgit v1.2.3 From 89b0e7de3451a17f23bc1c39bc40eee1811f1669 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 9 Feb 2023 17:58:05 +0000 Subject: KVM: arm64: nv: Introduce nested virtualization VCPU feature Introduce the feature bit and a primitive that checks if the feature is set behind a static key check based on the cpus_have_const_cap check. Checking vcpu_has_nv() on systems without nested virt enabled should have negligible overhead. We don't yet allow userspace to actually set this feature. Reviewed-by: Ganapatrao Kulkarni Reviewed-by: Russell King (Oracle) Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230209175820.1939006-4-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_nested.h | 14 ++++++++++++++ arch/arm64/include/uapi/asm/kvm.h | 1 + 2 files changed, 15 insertions(+) create mode 100644 arch/arm64/include/asm/kvm_nested.h diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h new file mode 100644 index 000000000000..fd601ea68d13 --- /dev/null +++ b/arch/arm64/include/asm/kvm_nested.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ARM64_KVM_NESTED_H +#define __ARM64_KVM_NESTED_H + +#include + +static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu) +{ + return (!__is_defined(__KVM_NVHE_HYPERVISOR__) && + cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) && + test_bit(KVM_ARM_VCPU_HAS_EL2, vcpu->arch.features)); +} + +#endif /* __ARM64_KVM_NESTED_H */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index a7a857f1784d..f8129c624b07 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -109,6 +109,7 @@ struct kvm_regs { #define KVM_ARM_VCPU_SVE 4 /* enable SVE for this CPU */ #define KVM_ARM_VCPU_PTRAUTH_ADDRESS 5 /* VCPU uses address authentication */ #define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */ +#define KVM_ARM_VCPU_HAS_EL2 7 /* Support nested virtualization */ struct kvm_vcpu_init { __u32 target; -- cgit v1.2.3 From 2fb32357ae672fa1cb63e046ba387963687300b0 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 9 Feb 2023 17:58:06 +0000 Subject: KVM: arm64: nv: Reset VCPU to EL2 registers if VCPU nested virt is set Reset the VCPU with PSTATE.M = EL2h when the nested virtualization feature is enabled on the VCPU. Reviewed-by: Russell King (Oracle) Reviewed-by: Alexandru Elisei Signed-off-by: Christoffer Dall [maz: rework register reset not to use empty data structures] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230209175820.1939006-5-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/reset.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index e0267f672b8a..d061dcc21578 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -27,6 +27,7 @@ #include #include #include +#include #include /* Maximum phys_shift supported for any VM on this host */ @@ -38,6 +39,9 @@ static u32 kvm_ipa_limit; #define VCPU_RESET_PSTATE_EL1 (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT | \ PSR_F_BIT | PSR_D_BIT) +#define VCPU_RESET_PSTATE_EL2 (PSR_MODE_EL2h | PSR_A_BIT | PSR_I_BIT | \ + PSR_F_BIT | PSR_D_BIT) + #define VCPU_RESET_PSTATE_SVC (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \ PSR_AA32_I_BIT | PSR_AA32_F_BIT) @@ -220,6 +224,10 @@ static int kvm_set_vm_width(struct kvm_vcpu *vcpu) if (kvm_has_mte(kvm) && is32bit) return -EINVAL; + /* NV is incompatible with AArch32 */ + if (vcpu_has_nv(vcpu) && is32bit) + return -EINVAL; + if (is32bit) set_bit(KVM_ARCH_FLAG_EL1_32BIT, &kvm->arch.flags); @@ -272,6 +280,12 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) if (loaded) kvm_arch_vcpu_put(vcpu); + /* Disallow NV+SVE for the time being */ + if (vcpu_has_nv(vcpu) && vcpu_has_feature(vcpu, KVM_ARM_VCPU_SVE)) { + ret = -EINVAL; + goto out; + } + if (!kvm_arm_vcpu_sve_finalized(vcpu)) { if (test_bit(KVM_ARM_VCPU_SVE, vcpu->arch.features)) { ret = kvm_vcpu_enable_sve(vcpu); @@ -294,6 +308,8 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) default: if (vcpu_el1_is_32bit(vcpu)) { pstate = VCPU_RESET_PSTATE_SVC; + } else if (vcpu_has_nv(vcpu)) { + pstate = VCPU_RESET_PSTATE_EL2; } else { pstate = VCPU_RESET_PSTATE_EL1; } -- cgit v1.2.3 From 1d05d51bac78428764664e8f841ac52ee87f24db Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 9 Feb 2023 17:58:07 +0000 Subject: KVM: arm64: nv: Allow userspace to set PSR_MODE_EL2x We were not allowing userspace to set a more privileged mode for the VCPU than EL1, but we should allow this when nested virtualization is enabled for the VCPU. Reviewed-by: Russell King (Oracle) Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230209175820.1939006-6-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/guest.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 5626ddb540ce..63643c98e6c3 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include "trace.h" @@ -253,6 +254,11 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if (!vcpu_el1_is_32bit(vcpu)) return -EINVAL; break; + case PSR_MODE_EL2h: + case PSR_MODE_EL2t: + if (!vcpu_has_nv(vcpu)) + return -EINVAL; + fallthrough; case PSR_MODE_EL0t: case PSR_MODE_EL1t: case PSR_MODE_EL1h: -- cgit v1.2.3 From 5305cc2c34004042ef2683bebd126cf54eb6fc74 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 9 Feb 2023 17:58:08 +0000 Subject: KVM: arm64: nv: Add EL2 system registers to vcpu context Add the minimal set of EL2 system registers to the vcpu context. Nothing uses them just yet. Reviewed-by: Andre Przywara Reviewed-by: Russell King (Oracle) Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230209175820.1939006-7-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_host.h | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 8919e971abdf..f53473071bcb 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -325,12 +325,43 @@ enum vcpu_sysreg { TFSR_EL1, /* Tag Fault Status Register (EL1) */ TFSRE0_EL1, /* Tag Fault Status Register (EL0) */ - /* 32bit specific registers. Keep them at the end of the range */ + /* 32bit specific registers. */ DACR32_EL2, /* Domain Access Control Register */ IFSR32_EL2, /* Instruction Fault Status Register */ FPEXC32_EL2, /* Floating-Point Exception Control Register */ DBGVCR32_EL2, /* Debug Vector Catch Register */ + /* EL2 registers */ + VPIDR_EL2, /* Virtualization Processor ID Register */ + VMPIDR_EL2, /* Virtualization Multiprocessor ID Register */ + SCTLR_EL2, /* System Control Register (EL2) */ + ACTLR_EL2, /* Auxiliary Control Register (EL2) */ + HCR_EL2, /* Hypervisor Configuration Register */ + MDCR_EL2, /* Monitor Debug Configuration Register (EL2) */ + CPTR_EL2, /* Architectural Feature Trap Register (EL2) */ + HSTR_EL2, /* Hypervisor System Trap Register */ + HACR_EL2, /* Hypervisor Auxiliary Control Register */ + TTBR0_EL2, /* Translation Table Base Register 0 (EL2) */ + TTBR1_EL2, /* Translation Table Base Register 1 (EL2) */ + TCR_EL2, /* Translation Control Register (EL2) */ + VTTBR_EL2, /* Virtualization Translation Table Base Register */ + VTCR_EL2, /* Virtualization Translation Control Register */ + SPSR_EL2, /* EL2 saved program status register */ + ELR_EL2, /* EL2 exception link register */ + AFSR0_EL2, /* Auxiliary Fault Status Register 0 (EL2) */ + AFSR1_EL2, /* Auxiliary Fault Status Register 1 (EL2) */ + ESR_EL2, /* Exception Syndrome Register (EL2) */ + FAR_EL2, /* Fault Address Register (EL2) */ + HPFAR_EL2, /* Hypervisor IPA Fault Address Register */ + MAIR_EL2, /* Memory Attribute Indirection Register (EL2) */ + AMAIR_EL2, /* Auxiliary Memory Attribute Indirection Register (EL2) */ + VBAR_EL2, /* Vector Base Address Register (EL2) */ + RVBAR_EL2, /* Reset Vector Base Address Register */ + CONTEXTIDR_EL2, /* Context ID Register (EL2) */ + TPIDR_EL2, /* EL2 Software Thread ID Register */ + CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */ + SP_EL2, /* EL2 Stack Pointer */ + NR_SYS_REGS /* Nothing after this line! */ }; -- cgit v1.2.3 From 0043b29038e23573f529d8cd7c32967c2b6b976d Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 9 Feb 2023 17:58:09 +0000 Subject: KVM: arm64: nv: Add nested virt VCPU primitives for vEL2 VCPU state When running a nested hypervisor we commonly have to figure out if the VCPU mode is running in the context of a guest hypervisor or guest guest, or just a normal guest. Add convenient primitives for this. Reviewed-by: Russell King (Oracle) Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230209175820.1939006-8-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_emulate.h | 56 ++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 193583df2d9c..e5d826dc0b63 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -183,6 +183,62 @@ static __always_inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num, vcpu_gp_regs(vcpu)->regs[reg_num] = val; } +static inline bool vcpu_is_el2_ctxt(const struct kvm_cpu_context *ctxt) +{ + switch (ctxt->regs.pstate & (PSR_MODE32_BIT | PSR_MODE_MASK)) { + case PSR_MODE_EL2h: + case PSR_MODE_EL2t: + return true; + default: + return false; + } +} + +static inline bool vcpu_is_el2(const struct kvm_vcpu *vcpu) +{ + return vcpu_is_el2_ctxt(&vcpu->arch.ctxt); +} + +static inline bool __vcpu_el2_e2h_is_set(const struct kvm_cpu_context *ctxt) +{ + return ctxt_sys_reg(ctxt, HCR_EL2) & HCR_E2H; +} + +static inline bool vcpu_el2_e2h_is_set(const struct kvm_vcpu *vcpu) +{ + return __vcpu_el2_e2h_is_set(&vcpu->arch.ctxt); +} + +static inline bool __vcpu_el2_tge_is_set(const struct kvm_cpu_context *ctxt) +{ + return ctxt_sys_reg(ctxt, HCR_EL2) & HCR_TGE; +} + +static inline bool vcpu_el2_tge_is_set(const struct kvm_vcpu *vcpu) +{ + return __vcpu_el2_tge_is_set(&vcpu->arch.ctxt); +} + +static inline bool __is_hyp_ctxt(const struct kvm_cpu_context *ctxt) +{ + /* + * We are in a hypervisor context if the vcpu mode is EL2 or + * E2H and TGE bits are set. The latter means we are in the user space + * of the VHE kernel. ARMv8.1 ARM describes this as 'InHost' + * + * Note that the HCR_EL2.{E2H,TGE}={0,1} isn't really handled in the + * rest of the KVM code, and will result in a misbehaving guest. + */ + return vcpu_is_el2_ctxt(ctxt) || + (__vcpu_el2_e2h_is_set(ctxt) && __vcpu_el2_tge_is_set(ctxt)) || + __vcpu_el2_tge_is_set(ctxt); +} + +static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu) +{ + return __is_hyp_ctxt(&vcpu->arch.ctxt); +} + /* * The layout of SPSR for an AArch32 state is different when observed from an * AArch64 SPSR_ELx or an AArch32 SPSR_*. This function generates the AArch32 -- cgit v1.2.3 From 6ff9dc238a53f4554d97b561fe4e8467d86544d9 Mon Sep 17 00:00:00 2001 From: Jintack Lim Date: Thu, 9 Feb 2023 17:58:10 +0000 Subject: KVM: arm64: nv: Handle HCR_EL2.NV system register traps ARM v8.3 introduces a new bit in the HCR_EL2, which is the NV bit. When this bit is set, accessing EL2 registers in EL1 traps to EL2. In addition, executing the following instructions in EL1 will trap to EL2: tlbi, at, eret, and msr/mrs instructions to access SP_EL1. Most of the instructions that trap to EL2 with the NV bit were undef at EL1 prior to ARM v8.3. The only instruction that was not undef is eret. This patch sets up a handler for EL2 registers and SP_EL1 register accesses at EL1. The host hypervisor keeps those register values in memory, and will emulate their behavior. This patch doesn't set the NV bit yet. It will be set in a later patch once nested virtualization support is completed. Reviewed-by: Russell King (Oracle) Signed-off-by: Jintack Lim [maz: EL2_REG() macros] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230209175820.1939006-9-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/sysreg.h | 38 +++++++++++++++- arch/arm64/kvm/sys_regs.c | 99 ++++++++++++++++++++++++++++++++++++++--- 2 files changed, 131 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 1312fb48f18b..d13f168abe4b 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -490,23 +490,51 @@ #define SYS_PMCCFILTR_EL0 sys_reg(3, 3, 14, 15, 7) +#define SYS_VPIDR_EL2 sys_reg(3, 4, 0, 0, 0) +#define SYS_VMPIDR_EL2 sys_reg(3, 4, 0, 0, 5) + #define SYS_SCTLR_EL2 sys_reg(3, 4, 1, 0, 0) +#define SYS_ACTLR_EL2 sys_reg(3, 4, 1, 0, 1) +#define SYS_HCR_EL2 sys_reg(3, 4, 1, 1, 0) +#define SYS_MDCR_EL2 sys_reg(3, 4, 1, 1, 1) +#define SYS_CPTR_EL2 sys_reg(3, 4, 1, 1, 2) +#define SYS_HSTR_EL2 sys_reg(3, 4, 1, 1, 3) #define SYS_HFGRTR_EL2 sys_reg(3, 4, 1, 1, 4) #define SYS_HFGWTR_EL2 sys_reg(3, 4, 1, 1, 5) #define SYS_HFGITR_EL2 sys_reg(3, 4, 1, 1, 6) +#define SYS_HACR_EL2 sys_reg(3, 4, 1, 1, 7) + +#define SYS_TTBR0_EL2 sys_reg(3, 4, 2, 0, 0) +#define SYS_TTBR1_EL2 sys_reg(3, 4, 2, 0, 1) +#define SYS_TCR_EL2 sys_reg(3, 4, 2, 0, 2) +#define SYS_VTTBR_EL2 sys_reg(3, 4, 2, 1, 0) +#define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2) + #define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1) #define SYS_HDFGRTR_EL2 sys_reg(3, 4, 3, 1, 4) #define SYS_HDFGWTR_EL2 sys_reg(3, 4, 3, 1, 5) #define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6) #define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0) #define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1) +#define SYS_SP_EL1 sys_reg(3, 4, 4, 1, 0) #define SYS_IFSR32_EL2 sys_reg(3, 4, 5, 0, 1) +#define SYS_AFSR0_EL2 sys_reg(3, 4, 5, 1, 0) +#define SYS_AFSR1_EL2 sys_reg(3, 4, 5, 1, 1) #define SYS_ESR_EL2 sys_reg(3, 4, 5, 2, 0) #define SYS_VSESR_EL2 sys_reg(3, 4, 5, 2, 3) #define SYS_FPEXC32_EL2 sys_reg(3, 4, 5, 3, 0) #define SYS_TFSR_EL2 sys_reg(3, 4, 5, 6, 0) -#define SYS_VDISR_EL2 sys_reg(3, 4, 12, 1, 1) +#define SYS_FAR_EL2 sys_reg(3, 4, 6, 0, 0) +#define SYS_HPFAR_EL2 sys_reg(3, 4, 6, 0, 4) + +#define SYS_MAIR_EL2 sys_reg(3, 4, 10, 2, 0) +#define SYS_AMAIR_EL2 sys_reg(3, 4, 10, 3, 0) + +#define SYS_VBAR_EL2 sys_reg(3, 4, 12, 0, 0) +#define SYS_RVBAR_EL2 sys_reg(3, 4, 12, 0, 1) +#define SYS_RMR_EL2 sys_reg(3, 4, 12, 0, 2) +#define SYS_VDISR_EL2 sys_reg(3, 4, 12, 1, 1) #define __SYS__AP0Rx_EL2(x) sys_reg(3, 4, 12, 8, x) #define SYS_ICH_AP0R0_EL2 __SYS__AP0Rx_EL2(0) #define SYS_ICH_AP0R1_EL2 __SYS__AP0Rx_EL2(1) @@ -548,6 +576,12 @@ #define SYS_ICH_LR14_EL2 __SYS__LR8_EL2(6) #define SYS_ICH_LR15_EL2 __SYS__LR8_EL2(7) +#define SYS_CONTEXTIDR_EL2 sys_reg(3, 4, 13, 0, 1) +#define SYS_TPIDR_EL2 sys_reg(3, 4, 13, 0, 2) + +#define SYS_CNTVOFF_EL2 sys_reg(3, 4, 14, 0, 3) +#define SYS_CNTHCTL_EL2 sys_reg(3, 4, 14, 1, 0) + /* VHE encodings for architectural EL0/1 system registers */ #define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0) #define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0) @@ -570,6 +604,8 @@ #define SYS_CNTV_CTL_EL02 sys_reg(3, 5, 14, 3, 1) #define SYS_CNTV_CVAL_EL02 sys_reg(3, 5, 14, 3, 2) +#define SYS_SP_EL2 sys_reg(3, 6, 4, 1, 0) + /* Common SCTLR_ELx flags. */ #define SCTLR_ELx_ENTP2 (BIT(60)) #define SCTLR_ELx_DSSBS (BIT(44)) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index c6cbfe6b854b..1e6ae3b2e6dd 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -102,6 +103,18 @@ static u32 get_ccsidr(u32 csselr) return ccsidr; } +static bool access_rw(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) + vcpu_write_sys_reg(vcpu, p->regval, r->reg); + else + p->regval = vcpu_read_sys_reg(vcpu, r->reg); + + return true; +} + /* * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized). */ @@ -260,6 +273,14 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu, return read_zero(vcpu, p); } +static bool trap_undef(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + kvm_inject_undefined(vcpu); + return false; +} + /* * ARMv8.1 mandates at least a trivial LORegion implementation, where all the * RW registers are RES0 (which we can implement as RAZ/WI). On an ARMv8.0 @@ -370,12 +391,9 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - if (p->is_write) { - vcpu_write_sys_reg(vcpu, p->regval, r->reg); + access_rw(vcpu, p, r); + if (p->is_write) vcpu_set_flag(vcpu, DEBUG_DIRTY); - } else { - p->regval = vcpu_read_sys_reg(vcpu, r->reg); - } trace_trap_reg(__func__, r->reg, p->is_write, p->regval); @@ -1446,6 +1464,24 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu, .visibility = mte_visibility, \ } +static unsigned int el2_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + if (vcpu_has_nv(vcpu)) + return 0; + + return REG_HIDDEN; +} + +#define EL2_REG(name, acc, rst, v) { \ + SYS_DESC(SYS_##name), \ + .access = acc, \ + .reset = rst, \ + .reg = name, \ + .visibility = el2_visibility, \ + .val = v, \ +} + /* sys_reg_desc initialiser for known cpufeature ID registers */ #define ID_SANITISED(name) { \ SYS_DESC(SYS_##name), \ @@ -1490,6 +1526,18 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu, .visibility = raz_visibility, \ } +static bool access_sp_el1(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) + __vcpu_sys_reg(vcpu, SP_EL1) = p->regval; + else + p->regval = __vcpu_sys_reg(vcpu, SP_EL1); + + return true; +} + /* * Architected system registers. * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 @@ -1913,9 +1961,50 @@ static const struct sys_reg_desc sys_reg_descs[] = { { PMU_SYS_REG(SYS_PMCCFILTR_EL0), .access = access_pmu_evtyper, .reset = reset_val, .reg = PMCCFILTR_EL0, .val = 0 }, + EL2_REG(VPIDR_EL2, access_rw, reset_unknown, 0), + EL2_REG(VMPIDR_EL2, access_rw, reset_unknown, 0), + EL2_REG(SCTLR_EL2, access_rw, reset_val, SCTLR_EL2_RES1), + EL2_REG(ACTLR_EL2, access_rw, reset_val, 0), + EL2_REG(HCR_EL2, access_rw, reset_val, 0), + EL2_REG(MDCR_EL2, access_rw, reset_val, 0), + EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_EL2_DEFAULT ), + EL2_REG(HSTR_EL2, access_rw, reset_val, 0), + EL2_REG(HACR_EL2, access_rw, reset_val, 0), + + EL2_REG(TTBR0_EL2, access_rw, reset_val, 0), + EL2_REG(TTBR1_EL2, access_rw, reset_val, 0), + EL2_REG(TCR_EL2, access_rw, reset_val, TCR_EL2_RES1), + EL2_REG(VTTBR_EL2, access_rw, reset_val, 0), + EL2_REG(VTCR_EL2, access_rw, reset_val, 0), + { SYS_DESC(SYS_DACR32_EL2), NULL, reset_unknown, DACR32_EL2 }, + EL2_REG(SPSR_EL2, access_rw, reset_val, 0), + EL2_REG(ELR_EL2, access_rw, reset_val, 0), + { SYS_DESC(SYS_SP_EL1), access_sp_el1}, + { SYS_DESC(SYS_IFSR32_EL2), NULL, reset_unknown, IFSR32_EL2 }, + EL2_REG(AFSR0_EL2, access_rw, reset_val, 0), + EL2_REG(AFSR1_EL2, access_rw, reset_val, 0), + EL2_REG(ESR_EL2, access_rw, reset_val, 0), { SYS_DESC(SYS_FPEXC32_EL2), NULL, reset_val, FPEXC32_EL2, 0x700 }, + + EL2_REG(FAR_EL2, access_rw, reset_val, 0), + EL2_REG(HPFAR_EL2, access_rw, reset_val, 0), + + EL2_REG(MAIR_EL2, access_rw, reset_val, 0), + EL2_REG(AMAIR_EL2, access_rw, reset_val, 0), + + EL2_REG(VBAR_EL2, access_rw, reset_val, 0), + EL2_REG(RVBAR_EL2, access_rw, reset_val, 0), + { SYS_DESC(SYS_RMR_EL2), trap_undef }, + + EL2_REG(CONTEXTIDR_EL2, access_rw, reset_val, 0), + EL2_REG(TPIDR_EL2, access_rw, reset_val, 0), + + EL2_REG(CNTVOFF_EL2, access_rw, reset_val, 0), + EL2_REG(CNTHCTL_EL2, access_rw, reset_val, 0), + + EL2_REG(SP_EL2, NULL, reset_unknown, 0), }; static bool trap_dbgdidr(struct kvm_vcpu *vcpu, -- cgit v1.2.3 From 47f3a2fc765ae2719e6daf39c0a1c757934b152e Mon Sep 17 00:00:00 2001 From: Jintack Lim Date: Thu, 9 Feb 2023 17:58:11 +0000 Subject: KVM: arm64: nv: Support virtual EL2 exceptions Support injecting exceptions and performing exception returns to and from virtual EL2. This must be done entirely in software except when taking an exception from vEL0 to vEL2 when the virtual HCR_EL2.{E2H,TGE} == {1,1} (a VHE guest hypervisor). [maz: switch to common exception injection framework, illegal exeption return handling] Reviewed-by: Ganapatrao Kulkarni Signed-off-by: Jintack Lim Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230209175820.1939006-10-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_arm.h | 17 +++ arch/arm64/include/asm/kvm_emulate.h | 10 ++ arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/kvm/Makefile | 2 +- arch/arm64/kvm/emulate-nested.c | 203 +++++++++++++++++++++++++++++++++++ arch/arm64/kvm/hyp/exception.c | 48 ++++++--- arch/arm64/kvm/inject_fault.c | 61 +++++++++-- arch/arm64/kvm/trace_arm.h | 59 ++++++++++ 8 files changed, 382 insertions(+), 20 deletions(-) create mode 100644 arch/arm64/kvm/emulate-nested.c diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 26b0c97df986..bea827b76c22 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -350,4 +350,21 @@ #define CPACR_EL1_DEFAULT (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |\ CPACR_EL1_ZEN_EL1EN) +#define kvm_mode_names \ + { PSR_MODE_EL0t, "EL0t" }, \ + { PSR_MODE_EL1t, "EL1t" }, \ + { PSR_MODE_EL1h, "EL1h" }, \ + { PSR_MODE_EL2t, "EL2t" }, \ + { PSR_MODE_EL2h, "EL2h" }, \ + { PSR_MODE_EL3t, "EL3t" }, \ + { PSR_MODE_EL3h, "EL3h" }, \ + { PSR_AA32_MODE_USR, "32-bit USR" }, \ + { PSR_AA32_MODE_FIQ, "32-bit FIQ" }, \ + { PSR_AA32_MODE_IRQ, "32-bit IRQ" }, \ + { PSR_AA32_MODE_SVC, "32-bit SVC" }, \ + { PSR_AA32_MODE_ABT, "32-bit ABT" }, \ + { PSR_AA32_MODE_HYP, "32-bit HYP" }, \ + { PSR_AA32_MODE_UND, "32-bit UND" }, \ + { PSR_AA32_MODE_SYS, "32-bit SYS" } + #endif /* __ARM64_KVM_ARM_H__ */ diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index e5d826dc0b63..7e3fa8b387f6 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -33,6 +33,12 @@ enum exception_type { except_type_serror = 0x180, }; +#define kvm_exception_type_names \ + { except_type_sync, "SYNC" }, \ + { except_type_irq, "IRQ" }, \ + { except_type_fiq, "FIQ" }, \ + { except_type_serror, "SERROR" } + bool kvm_condition_valid32(const struct kvm_vcpu *vcpu); void kvm_skip_instr32(struct kvm_vcpu *vcpu); @@ -44,6 +50,10 @@ void kvm_inject_size_fault(struct kvm_vcpu *vcpu); void kvm_vcpu_wfi(struct kvm_vcpu *vcpu); +void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu); +int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2); +int kvm_inject_nested_irq(struct kvm_vcpu *vcpu); + #if defined(__KVM_VHE_HYPERVISOR__) || defined(__KVM_NVHE_HYPERVISOR__) static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) { diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f53473071bcb..1b585a4dd122 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -634,7 +634,7 @@ struct kvm_vcpu_arch { #define EXCEPT_AA64_EL1_IRQ __vcpu_except_flags(1) #define EXCEPT_AA64_EL1_FIQ __vcpu_except_flags(2) #define EXCEPT_AA64_EL1_SERR __vcpu_except_flags(3) -/* For AArch64 with NV (one day): */ +/* For AArch64 with NV: */ #define EXCEPT_AA64_EL2_SYNC __vcpu_except_flags(4) #define EXCEPT_AA64_EL2_IRQ __vcpu_except_flags(5) #define EXCEPT_AA64_EL2_FIQ __vcpu_except_flags(6) diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 5e33c2d4645a..31b07f2b2186 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -14,7 +14,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ inject_fault.o va_layout.o handle_exit.o \ guest.o debug.o reset.o sys_regs.o stacktrace.o \ vgic-sys-reg-v3.o fpsimd.o pkvm.o \ - arch_timer.o trng.o vmid.o \ + arch_timer.o trng.o vmid.o emulate-nested.o \ vgic/vgic.o vgic/vgic-init.o \ vgic/vgic-irqfd.o vgic/vgic-v2.o \ vgic/vgic-v3.o vgic/vgic-v4.o \ diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c new file mode 100644 index 000000000000..b96662029fb1 --- /dev/null +++ b/arch/arm64/kvm/emulate-nested.c @@ -0,0 +1,203 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2016 - Linaro and Columbia University + * Author: Jintack Lim + */ + +#include +#include + +#include +#include + +#include "hyp/include/hyp/adjust_pc.h" + +#include "trace.h" + +static u64 kvm_check_illegal_exception_return(struct kvm_vcpu *vcpu, u64 spsr) +{ + u64 mode = spsr & PSR_MODE_MASK; + + /* + * Possible causes for an Illegal Exception Return from EL2: + * - trying to return to EL3 + * - trying to return to an illegal M value + * - trying to return to a 32bit EL + * - trying to return to EL1 with HCR_EL2.TGE set + */ + if (mode == PSR_MODE_EL3t || mode == PSR_MODE_EL3h || + mode == 0b00001 || (mode & BIT(1)) || + (spsr & PSR_MODE32_BIT) || + (vcpu_el2_tge_is_set(vcpu) && (mode == PSR_MODE_EL1t || + mode == PSR_MODE_EL1h))) { + /* + * The guest is playing with our nerves. Preserve EL, SP, + * masks, flags from the existing PSTATE, and set IL. + * The HW will then generate an Illegal State Exception + * immediately after ERET. + */ + spsr = *vcpu_cpsr(vcpu); + + spsr &= (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | + PSR_N_BIT | PSR_Z_BIT | PSR_C_BIT | PSR_V_BIT | + PSR_MODE_MASK | PSR_MODE32_BIT); + spsr |= PSR_IL_BIT; + } + + return spsr; +} + +void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu) +{ + u64 spsr, elr, mode; + bool direct_eret; + + /* + * Going through the whole put/load motions is a waste of time + * if this is a VHE guest hypervisor returning to its own + * userspace, or the hypervisor performing a local exception + * return. No need to save/restore registers, no need to + * switch S2 MMU. Just do the canonical ERET. + */ + spsr = vcpu_read_sys_reg(vcpu, SPSR_EL2); + spsr = kvm_check_illegal_exception_return(vcpu, spsr); + + mode = spsr & (PSR_MODE_MASK | PSR_MODE32_BIT); + + direct_eret = (mode == PSR_MODE_EL0t && + vcpu_el2_e2h_is_set(vcpu) && + vcpu_el2_tge_is_set(vcpu)); + direct_eret |= (mode == PSR_MODE_EL2h || mode == PSR_MODE_EL2t); + + if (direct_eret) { + *vcpu_pc(vcpu) = vcpu_read_sys_reg(vcpu, ELR_EL2); + *vcpu_cpsr(vcpu) = spsr; + trace_kvm_nested_eret(vcpu, *vcpu_pc(vcpu), spsr); + return; + } + + preempt_disable(); + kvm_arch_vcpu_put(vcpu); + + elr = __vcpu_sys_reg(vcpu, ELR_EL2); + + trace_kvm_nested_eret(vcpu, elr, spsr); + + /* + * Note that the current exception level is always the virtual EL2, + * since we set HCR_EL2.NV bit only when entering the virtual EL2. + */ + *vcpu_pc(vcpu) = elr; + *vcpu_cpsr(vcpu) = spsr; + + kvm_arch_vcpu_load(vcpu, smp_processor_id()); + preempt_enable(); +} + +static void kvm_inject_el2_exception(struct kvm_vcpu *vcpu, u64 esr_el2, + enum exception_type type) +{ + trace_kvm_inject_nested_exception(vcpu, esr_el2, type); + + switch (type) { + case except_type_sync: + kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SYNC); + vcpu_write_sys_reg(vcpu, esr_el2, ESR_EL2); + break; + case except_type_irq: + kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_IRQ); + break; + default: + WARN_ONCE(1, "Unsupported EL2 exception injection %d\n", type); + } +} + +/* + * Emulate taking an exception to EL2. + * See ARM ARM J8.1.2 AArch64.TakeException() + */ +static int kvm_inject_nested(struct kvm_vcpu *vcpu, u64 esr_el2, + enum exception_type type) +{ + u64 pstate, mode; + bool direct_inject; + + if (!vcpu_has_nv(vcpu)) { + kvm_err("Unexpected call to %s for the non-nesting configuration\n", + __func__); + return -EINVAL; + } + + /* + * As for ERET, we can avoid doing too much on the injection path by + * checking that we either took the exception from a VHE host + * userspace or from vEL2. In these cases, there is no change in + * translation regime (or anything else), so let's do as little as + * possible. + */ + pstate = *vcpu_cpsr(vcpu); + mode = pstate & (PSR_MODE_MASK | PSR_MODE32_BIT); + + direct_inject = (mode == PSR_MODE_EL0t && + vcpu_el2_e2h_is_set(vcpu) && + vcpu_el2_tge_is_set(vcpu)); + direct_inject |= (mode == PSR_MODE_EL2h || mode == PSR_MODE_EL2t); + + if (direct_inject) { + kvm_inject_el2_exception(vcpu, esr_el2, type); + return 1; + } + + preempt_disable(); + + /* + * We may have an exception or PC update in the EL0/EL1 context. + * Commit it before entering EL2. + */ + __kvm_adjust_pc(vcpu); + + kvm_arch_vcpu_put(vcpu); + + kvm_inject_el2_exception(vcpu, esr_el2, type); + + /* + * A hard requirement is that a switch between EL1 and EL2 + * contexts has to happen between a put/load, so that we can + * pick the correct timer and interrupt configuration, among + * other things. + * + * Make sure the exception actually took place before we load + * the new context. + */ + __kvm_adjust_pc(vcpu); + + kvm_arch_vcpu_load(vcpu, smp_processor_id()); + preempt_enable(); + + return 1; +} + +int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2) +{ + return kvm_inject_nested(vcpu, esr_el2, except_type_sync); +} + +int kvm_inject_nested_irq(struct kvm_vcpu *vcpu) +{ + /* + * Do not inject an irq if the: + * - Current exception level is EL2, and + * - virtual HCR_EL2.TGE == 0 + * - virtual HCR_EL2.IMO == 0 + * + * See Table D1-17 "Physical interrupt target and masking when EL3 is + * not implemented and EL2 is implemented" in ARM DDI 0487C.a. + */ + + if (vcpu_is_el2(vcpu) && !vcpu_el2_tge_is_set(vcpu) && + !(__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_IMO)) + return 1; + + /* esr_el2 value doesn't matter for exits due to irqs. */ + return kvm_inject_nested(vcpu, 0, except_type_irq); +} diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c index 791d3de76771..424a5107cddb 100644 --- a/arch/arm64/kvm/hyp/exception.c +++ b/arch/arm64/kvm/hyp/exception.c @@ -14,6 +14,7 @@ #include #include #include +#include #if !defined (__KVM_NVHE_HYPERVISOR__) && !defined (__KVM_VHE_HYPERVISOR__) #error Hypervisor code only! @@ -23,7 +24,9 @@ static inline u64 __vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) { u64 val; - if (__vcpu_read_sys_reg_from_cpu(reg, &val)) + if (unlikely(vcpu_has_nv(vcpu))) + return vcpu_read_sys_reg(vcpu, reg); + else if (__vcpu_read_sys_reg_from_cpu(reg, &val)) return val; return __vcpu_sys_reg(vcpu, reg); @@ -31,18 +34,25 @@ static inline u64 __vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) { - if (__vcpu_write_sys_reg_to_cpu(val, reg)) - return; - - __vcpu_sys_reg(vcpu, reg) = val; + if (unlikely(vcpu_has_nv(vcpu))) + vcpu_write_sys_reg(vcpu, val, reg); + else if (!__vcpu_write_sys_reg_to_cpu(val, reg)) + __vcpu_sys_reg(vcpu, reg) = val; } -static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, u64 val) +static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long target_mode, + u64 val) { - if (has_vhe()) + if (unlikely(vcpu_has_nv(vcpu))) { + if (target_mode == PSR_MODE_EL1h) + vcpu_write_sys_reg(vcpu, val, SPSR_EL1); + else + vcpu_write_sys_reg(vcpu, val, SPSR_EL2); + } else if (has_vhe()) { write_sysreg_el1(val, SYS_SPSR); - else + } else { __vcpu_sys_reg(vcpu, SPSR_EL1) = val; + } } static void __vcpu_write_spsr_abt(struct kvm_vcpu *vcpu, u64 val) @@ -101,6 +111,11 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode, sctlr = __vcpu_read_sys_reg(vcpu, SCTLR_EL1); __vcpu_write_sys_reg(vcpu, *vcpu_pc(vcpu), ELR_EL1); break; + case PSR_MODE_EL2h: + vbar = __vcpu_read_sys_reg(vcpu, VBAR_EL2); + sctlr = __vcpu_read_sys_reg(vcpu, SCTLR_EL2); + __vcpu_write_sys_reg(vcpu, *vcpu_pc(vcpu), ELR_EL2); + break; default: /* Don't do that */ BUG(); @@ -153,7 +168,7 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode, new |= target_mode; *vcpu_cpsr(vcpu) = new; - __vcpu_write_spsr(vcpu, old); + __vcpu_write_spsr(vcpu, target_mode, old); } /* @@ -323,11 +338,20 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu) case unpack_vcpu_flag(EXCEPT_AA64_EL1_SYNC): enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync); break; + + case unpack_vcpu_flag(EXCEPT_AA64_EL2_SYNC): + enter_exception64(vcpu, PSR_MODE_EL2h, except_type_sync); + break; + + case unpack_vcpu_flag(EXCEPT_AA64_EL2_IRQ): + enter_exception64(vcpu, PSR_MODE_EL2h, except_type_irq); + break; + default: /* - * Only EL1_SYNC makes sense so far, EL2_{SYNC,IRQ} - * will be implemented at some point. Everything - * else gets silently ignored. + * Only EL1_SYNC and EL2_{SYNC,IRQ} makes + * sense so far. Everything else gets silently + * ignored. */ break; } diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index f32f4a2a347f..64c3aec0d937 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -12,17 +12,55 @@ #include #include +#include #include +static void pend_sync_exception(struct kvm_vcpu *vcpu) +{ + /* If not nesting, EL1 is the only possible exception target */ + if (likely(!vcpu_has_nv(vcpu))) { + kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC); + return; + } + + /* + * With NV, we need to pick between EL1 and EL2. Note that we + * never deal with a nesting exception here, hence never + * changing context, and the exception itself can be delayed + * until the next entry. + */ + switch(*vcpu_cpsr(vcpu) & PSR_MODE_MASK) { + case PSR_MODE_EL2h: + case PSR_MODE_EL2t: + kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SYNC); + break; + case PSR_MODE_EL1h: + case PSR_MODE_EL1t: + kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC); + break; + case PSR_MODE_EL0t: + if (vcpu_el2_tge_is_set(vcpu)) + kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SYNC); + else + kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC); + break; + default: + BUG(); + } +} + +static bool match_target_el(struct kvm_vcpu *vcpu, unsigned long target) +{ + return (vcpu_get_flag(vcpu, EXCEPT_MASK) == target); +} + static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) { unsigned long cpsr = *vcpu_cpsr(vcpu); bool is_aarch32 = vcpu_mode_is_32bit(vcpu); u64 esr = 0; - kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC); - - vcpu_write_sys_reg(vcpu, addr, FAR_EL1); + pend_sync_exception(vcpu); /* * Build an {i,d}abort, depending on the level and the @@ -43,14 +81,22 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr if (!is_iabt) esr |= ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT; - vcpu_write_sys_reg(vcpu, esr | ESR_ELx_FSC_EXTABT, ESR_EL1); + esr |= ESR_ELx_FSC_EXTABT; + + if (match_target_el(vcpu, unpack_vcpu_flag(EXCEPT_AA64_EL1_SYNC))) { + vcpu_write_sys_reg(vcpu, addr, FAR_EL1); + vcpu_write_sys_reg(vcpu, esr, ESR_EL1); + } else { + vcpu_write_sys_reg(vcpu, addr, FAR_EL2); + vcpu_write_sys_reg(vcpu, esr, ESR_EL2); + } } static void inject_undef64(struct kvm_vcpu *vcpu) { u64 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); - kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC); + pend_sync_exception(vcpu); /* * Build an unknown exception, depending on the instruction @@ -59,7 +105,10 @@ static void inject_undef64(struct kvm_vcpu *vcpu) if (kvm_vcpu_trap_il_is32bit(vcpu)) esr |= ESR_ELx_IL; - vcpu_write_sys_reg(vcpu, esr, ESR_EL1); + if (match_target_el(vcpu, unpack_vcpu_flag(EXCEPT_AA64_EL1_SYNC))) + vcpu_write_sys_reg(vcpu, esr, ESR_EL1); + else + vcpu_write_sys_reg(vcpu, esr, ESR_EL2); } #define DFSR_FSC_EXTABT_LPAE 0x10 diff --git a/arch/arm64/kvm/trace_arm.h b/arch/arm64/kvm/trace_arm.h index 33e4e7dd2719..f3e46a976125 100644 --- a/arch/arm64/kvm/trace_arm.h +++ b/arch/arm64/kvm/trace_arm.h @@ -2,6 +2,7 @@ #if !defined(_TRACE_ARM_ARM64_KVM_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_ARM_ARM64_KVM_H +#include #include #include @@ -301,6 +302,64 @@ TRACE_EVENT(kvm_timer_emulate, __entry->timer_idx, __entry->should_fire) ); +TRACE_EVENT(kvm_nested_eret, + TP_PROTO(struct kvm_vcpu *vcpu, unsigned long elr_el2, + unsigned long spsr_el2), + TP_ARGS(vcpu, elr_el2, spsr_el2), + + TP_STRUCT__entry( + __field(struct kvm_vcpu *, vcpu) + __field(unsigned long, elr_el2) + __field(unsigned long, spsr_el2) + __field(unsigned long, target_mode) + __field(unsigned long, hcr_el2) + ), + + TP_fast_assign( + __entry->vcpu = vcpu; + __entry->elr_el2 = elr_el2; + __entry->spsr_el2 = spsr_el2; + __entry->target_mode = spsr_el2 & (PSR_MODE_MASK | PSR_MODE32_BIT); + __entry->hcr_el2 = __vcpu_sys_reg(vcpu, HCR_EL2); + ), + + TP_printk("elr_el2: 0x%lx spsr_el2: 0x%08lx (M: %s) hcr_el2: %lx", + __entry->elr_el2, __entry->spsr_el2, + __print_symbolic(__entry->target_mode, kvm_mode_names), + __entry->hcr_el2) +); + +TRACE_EVENT(kvm_inject_nested_exception, + TP_PROTO(struct kvm_vcpu *vcpu, u64 esr_el2, int type), + TP_ARGS(vcpu, esr_el2, type), + + TP_STRUCT__entry( + __field(struct kvm_vcpu *, vcpu) + __field(unsigned long, esr_el2) + __field(int, type) + __field(unsigned long, spsr_el2) + __field(unsigned long, pc) + __field(unsigned long, source_mode) + __field(unsigned long, hcr_el2) + ), + + TP_fast_assign( + __entry->vcpu = vcpu; + __entry->esr_el2 = esr_el2; + __entry->type = type; + __entry->spsr_el2 = *vcpu_cpsr(vcpu); + __entry->pc = *vcpu_pc(vcpu); + __entry->source_mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT); + __entry->hcr_el2 = __vcpu_sys_reg(vcpu, HCR_EL2); + ), + + TP_printk("%s: esr_el2 0x%lx elr_el2: 0x%lx spsr_el2: 0x%08lx (M: %s) hcr_el2: %lx", + __print_symbolic(__entry->type, kvm_exception_type_names), + __entry->esr_el2, __entry->pc, __entry->spsr_el2, + __print_symbolic(__entry->source_mode, kvm_mode_names), + __entry->hcr_el2) +); + #endif /* _TRACE_ARM_ARM64_KVM_H */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3 From 93c33702cd2beb0cb49a857afdbf231c92eb9df5 Mon Sep 17 00:00:00 2001 From: Jintack Lim Date: Thu, 9 Feb 2023 17:58:12 +0000 Subject: KVM: arm64: nv: Inject HVC exceptions to the virtual EL2 As we expect all PSCI calls from the L1 hypervisor to be performed using SMC when nested virtualization is enabled, it is clear that all HVC instruction from the VM (including from the virtual EL2) are supposed to handled in the virtual EL2. Forward these to EL2 as required. Reviewed-by: Russell King (Oracle) Reviewed-by: Alexandru Elisei Signed-off-by: Jintack Lim [maz: add handling of HCR_EL2.HCD] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230209175820.1939006-11-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/handle_exit.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index e778eefcf214..2d8c09cf3e49 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +42,16 @@ static int handle_hvc(struct kvm_vcpu *vcpu) kvm_vcpu_hvc_get_imm(vcpu)); vcpu->stat.hvc_exit_stat++; + /* Forward hvc instructions to the virtual EL2 if the guest has EL2. */ + if (vcpu_has_nv(vcpu)) { + if (vcpu_read_sys_reg(vcpu, HCR_EL2) & HCR_HCD) + kvm_inject_undefined(vcpu); + else + kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu)); + + return 1; + } + ret = kvm_hvc_call_handler(vcpu); if (ret < 0) { vcpu_set_reg(vcpu, 0, ~0UL); -- cgit v1.2.3 From 6898a55ce38c13e47a0647380504d309e9b0f631 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 9 Feb 2023 17:58:13 +0000 Subject: KVM: arm64: nv: Handle trapped ERET from virtual EL2 When a guest hypervisor running virtual EL2 in EL1 executes an ERET instruction, we will have set HCR_EL2.NV which traps ERET to EL2, so that we can emulate the exception return in software. Reviewed-by: Russell King (Oracle) Reviewed-by: Alexandru Elisei Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230209175820.1939006-12-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/esr.h | 4 ++++ arch/arm64/include/asm/kvm_arm.h | 2 +- arch/arm64/kvm/handle_exit.c | 10 ++++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 206de10524e3..d4dd949b921e 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -272,6 +272,10 @@ (((e) & ESR_ELx_SYS64_ISS_OP2_MASK) >> \ ESR_ELx_SYS64_ISS_OP2_SHIFT)) +/* ISS field definitions for ERET/ERETAA/ERETAB trapping */ +#define ESR_ELx_ERET_ISS_ERET 0x2 +#define ESR_ELx_ERET_ISS_ERETA 0x1 + /* * ISS field definitions for floating-point exception traps * (FP_EXC_32/FP_EXC_64). diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index bea827b76c22..e24825dcda6b 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -344,7 +344,7 @@ ECN(SP_ALIGN), ECN(FP_EXC32), ECN(FP_EXC64), ECN(SERROR), \ ECN(BREAKPT_LOW), ECN(BREAKPT_CUR), ECN(SOFTSTP_LOW), \ ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \ - ECN(BKPT32), ECN(VECTOR32), ECN(BRK64) + ECN(BKPT32), ECN(VECTOR32), ECN(BRK64), ECN(ERET) #define CPACR_EL1_TTA (1 << 28) #define CPACR_EL1_DEFAULT (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |\ diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 2d8c09cf3e49..e75101f2aa6c 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -207,6 +207,15 @@ static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu) return 1; } +static int kvm_handle_eret(struct kvm_vcpu *vcpu) +{ + if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_ERET_ISS_ERET) + return kvm_handle_ptrauth(vcpu); + + kvm_emulate_nested_eret(vcpu); + return 1; +} + static exit_handle_fn arm_exit_handlers[] = { [0 ... ESR_ELx_EC_MAX] = kvm_handle_unknown_ec, [ESR_ELx_EC_WFx] = kvm_handle_wfx, @@ -222,6 +231,7 @@ static exit_handle_fn arm_exit_handlers[] = { [ESR_ELx_EC_SMC64] = handle_smc, [ESR_ELx_EC_SYS64] = kvm_handle_sys_reg, [ESR_ELx_EC_SVE] = handle_sve, + [ESR_ELx_EC_ERET] = kvm_handle_eret, [ESR_ELx_EC_IABT_LOW] = kvm_handle_guest_abort, [ESR_ELx_EC_DABT_LOW] = kvm_handle_guest_abort, [ESR_ELx_EC_SOFTSTP_LOW]= kvm_handle_guest_debug, -- cgit v1.2.3 From bd36b1a9eb5a2842e1c44d8d8e7ff9b07cff7ed8 Mon Sep 17 00:00:00 2001 From: Jintack Lim Date: Thu, 9 Feb 2023 17:58:14 +0000 Subject: KVM: arm64: nv: Handle SMCs taken from virtual EL2 Non-nested guests have used the hvc instruction to initiate SMCCC calls into KVM. This is quite a poor fit for NV as hvc exceptions are always taken to EL2. In other words, KVM needs to unconditionally forward the hvc exception back into vEL2 to uphold the architecture. Instead, treat the smc instruction from vEL2 as we would a guest hypercall, thereby allowing the vEL2 to interact with KVM's hypercall surface. Note that on NV-capable hardware HCR_EL2.TSC causes smc instructions executed in non-secure EL1 to trap to EL2, even if EL3 is not implemented. Reviewed-by: Alexandru Elisei Signed-off-by: Jintack Lim Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230209175820.1939006-13-maz@kernel.org [Oliver: redo commit message, only handle smc from vEL2] Signed-off-by: Oliver Upton --- arch/arm64/kvm/handle_exit.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index e75101f2aa6c..a798c0b4d717 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -63,6 +63,8 @@ static int handle_hvc(struct kvm_vcpu *vcpu) static int handle_smc(struct kvm_vcpu *vcpu) { + int ret; + /* * "If an SMC instruction executed at Non-secure EL1 is * trapped to EL2 because HCR_EL2.TSC is 1, the exception is a @@ -70,10 +72,30 @@ static int handle_smc(struct kvm_vcpu *vcpu) * * We need to advance the PC after the trap, as it would * otherwise return to the same address... + * + * Only handle SMCs from the virtual EL2 with an immediate of zero and + * skip it otherwise. */ - vcpu_set_reg(vcpu, 0, ~0UL); + if (!vcpu_is_el2(vcpu) || kvm_vcpu_hvc_get_imm(vcpu)) { + vcpu_set_reg(vcpu, 0, ~0UL); + kvm_incr_pc(vcpu); + return 1; + } + + /* + * If imm is zero then it is likely an SMCCC call. + * + * Note that on ARMv8.3, even if EL3 is not implemented, SMC executed + * at Non-secure EL1 is trapped to EL2 if HCR_EL2.TSC==1, rather than + * being treated as UNDEFINED. + */ + ret = kvm_hvc_call_handler(vcpu); + if (ret < 0) + vcpu_set_reg(vcpu, 0, ~0UL); + kvm_incr_pc(vcpu); - return 1; + + return ret; } /* -- cgit v1.2.3 From 9da117eec9243f41c349071773784ca5f9c5d473 Mon Sep 17 00:00:00 2001 From: Jintack Lim Date: Thu, 9 Feb 2023 17:58:15 +0000 Subject: KVM: arm64: nv: Add accessors for SPSR_EL1, ELR_EL1 and VBAR_EL1 from virtual EL2 For the same reason we trap virtual memory register accesses at virtual EL2, we need to trap SPSR_EL1, ELR_EL1 and VBAR_EL1 accesses. ARM v8.3 introduces the HCR_EL2.NV1 bit to be able to trap on those register accesses in EL1. Do not set this bit until the whole nesting support is completed, which happens further down the line... Reviewed-by: Russell King (Oracle) Signed-off-by: Jintack Lim Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230209175820.1939006-14-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/sys_regs.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 1e6ae3b2e6dd..de209059fd34 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1538,6 +1538,30 @@ static bool access_sp_el1(struct kvm_vcpu *vcpu, return true; } +static bool access_elr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) + vcpu_write_sys_reg(vcpu, p->regval, ELR_EL1); + else + p->regval = vcpu_read_sys_reg(vcpu, ELR_EL1); + + return true; +} + +static bool access_spsr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) + __vcpu_sys_reg(vcpu, SPSR_EL1) = p->regval; + else + p->regval = __vcpu_sys_reg(vcpu, SPSR_EL1); + + return true; +} + /* * Architected system registers. * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 @@ -1694,6 +1718,9 @@ static const struct sys_reg_desc sys_reg_descs[] = { PTRAUTH_KEY(APDB), PTRAUTH_KEY(APGA), + { SYS_DESC(SYS_SPSR_EL1), access_spsr}, + { SYS_DESC(SYS_ELR_EL1), access_elr}, + { SYS_DESC(SYS_AFSR0_EL1), access_vm_reg, reset_unknown, AFSR0_EL1 }, { SYS_DESC(SYS_AFSR1_EL1), access_vm_reg, reset_unknown, AFSR1_EL1 }, { SYS_DESC(SYS_ESR_EL1), access_vm_reg, reset_unknown, ESR_EL1 }, @@ -1741,7 +1768,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_LORC_EL1), trap_loregion }, { SYS_DESC(SYS_LORID_EL1), trap_loregion }, - { SYS_DESC(SYS_VBAR_EL1), NULL, reset_val, VBAR_EL1, 0 }, + { SYS_DESC(SYS_VBAR_EL1), access_rw, reset_val, VBAR_EL1, 0 }, { SYS_DESC(SYS_DISR_EL1), NULL, reset_val, DISR_EL1, 0 }, { SYS_DESC(SYS_ICC_IAR0_EL1), write_to_read_only }, -- cgit v1.2.3 From d9552fe133f9f7c357460b041e2d3b20310adf8d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 9 Feb 2023 17:58:16 +0000 Subject: KVM: arm64: nv: Emulate PSTATE.M for a guest hypervisor We can no longer blindly copy the VCPU's PSTATE into SPSR_EL2 and return to the guest and vice versa when taking an exception to the hypervisor, because we emulate virtual EL2 in EL1 and therefore have to translate the mode field from EL2 to EL1 and vice versa. This requires keeping track of the state we enter the guest, for which we transiently use a dedicated flag. Reviewed-by: Russell King (Oracle) Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230209175820.1939006-15-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_host.h | 2 ++ arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 19 ++++++++++++++++++- arch/arm64/kvm/hyp/vhe/switch.c | 24 ++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 1b585a4dd122..70eab7a6386b 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -645,6 +645,8 @@ struct kvm_vcpu_arch { #define DEBUG_STATE_SAVE_SPE __vcpu_single_flag(iflags, BIT(5)) /* Save TRBE context if active */ #define DEBUG_STATE_SAVE_TRBE __vcpu_single_flag(iflags, BIT(6)) +/* vcpu running in HYP context */ +#define VCPU_HYP_CONTEXT __vcpu_single_flag(iflags, BIT(7)) /* SVE enabled for host EL0 */ #define HOST_SVE_ENABLED __vcpu_single_flag(sflags, BIT(0)) diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index baa5b9b3dde5..0fbbf2870b7b 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -156,9 +156,26 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg_el1(ctxt_sys_reg(ctxt, SPSR_EL1), SYS_SPSR); } +/* Read the VCPU state's PSTATE, but translate (v)EL2 to EL1. */ +static inline u64 to_hw_pstate(const struct kvm_cpu_context *ctxt) +{ + u64 mode = ctxt->regs.pstate & (PSR_MODE_MASK | PSR_MODE32_BIT); + + switch (mode) { + case PSR_MODE_EL2t: + mode = PSR_MODE_EL1t; + break; + case PSR_MODE_EL2h: + mode = PSR_MODE_EL1h; + break; + } + + return (ctxt->regs.pstate & ~(PSR_MODE_MASK | PSR_MODE32_BIT)) | mode; +} + static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt) { - u64 pstate = ctxt->regs.pstate; + u64 pstate = to_hw_pstate(ctxt); u64 mode = pstate & PSR_AA32_MODE_MASK; /* diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 1a97391fedd2..76ea9392363d 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -120,6 +120,25 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu) static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code) { + /* + * If we were in HYP context on entry, adjust the PSTATE view + * so that the usual helpers work correctly. + */ + if (unlikely(vcpu_get_flag(vcpu, VCPU_HYP_CONTEXT))) { + u64 mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT); + + switch (mode) { + case PSR_MODE_EL1t: + mode = PSR_MODE_EL2t; + break; + case PSR_MODE_EL1h: + mode = PSR_MODE_EL2h; + break; + } + + *vcpu_cpsr(vcpu) &= ~(PSR_MODE_MASK | PSR_MODE32_BIT); + *vcpu_cpsr(vcpu) |= mode; + } } /* Switch to the guest for VHE systems running in EL2 */ @@ -154,6 +173,11 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) sysreg_restore_guest_state_vhe(guest_ctxt); __debug_switch_to_guest(vcpu); + if (is_hyp_ctxt(vcpu)) + vcpu_set_flag(vcpu, VCPU_HYP_CONTEXT); + else + vcpu_clear_flag(vcpu, VCPU_HYP_CONTEXT); + do { /* Jump in the fire! */ exit_code = __guest_enter(vcpu); -- cgit v1.2.3 From e6b367db0f9192692a3f3057878f1259fe966d33 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 9 Feb 2023 17:58:17 +0000 Subject: KVM: arm64: nv: Allow a sysreg to be hidden from userspace only So far, we never needed to distinguish between registers hidden from userspace and being hidden from a guest (they are always either visible to both, or hidden from both). With NV, we have the ugly case of the EL02 and EL12 registers, which are only a view on the EL0 and EL1 registers. It makes absolutely no sense to expose them to userspace, since it already has the canonical view. Add a new visibility flag (REG_HIDDEN_USER) and a new helper that checks for it and REG_HIDDEN when checking whether to expose a sysreg to userspace. Subsequent patches will make use of it. Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230209175820.1939006-16-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/sys_regs.c | 6 +++--- arch/arm64/kvm/sys_regs.h | 14 ++++++++++++-- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index de209059fd34..55a14c86a455 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2980,7 +2980,7 @@ int kvm_sys_reg_get_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, int ret; r = id_to_sys_reg_desc(vcpu, reg->id, table, num); - if (!r) + if (!r || sysreg_hidden_user(vcpu, r)) return -ENOENT; if (r->get_user) { @@ -3024,7 +3024,7 @@ int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, return -EFAULT; r = id_to_sys_reg_desc(vcpu, reg->id, table, num); - if (!r) + if (!r || sysreg_hidden_user(vcpu, r)) return -ENOENT; if (sysreg_user_write_ignore(vcpu, r)) @@ -3118,7 +3118,7 @@ static int walk_one_sys_reg(const struct kvm_vcpu *vcpu, if (!(rd->reg || rd->get_user)) return 0; - if (sysreg_hidden(vcpu, rd)) + if (sysreg_hidden_user(vcpu, rd)) return 0; if (!copy_reg_to_user(rd, uind)) diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index e4ebb3a379fd..6b11f2cc7146 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -85,8 +85,9 @@ struct sys_reg_desc { }; #define REG_HIDDEN (1 << 0) /* hidden from userspace and guest */ -#define REG_RAZ (1 << 1) /* RAZ from userspace and guest */ -#define REG_USER_WI (1 << 2) /* WI from userspace only */ +#define REG_HIDDEN_USER (1 << 1) /* hidden from userspace only */ +#define REG_RAZ (1 << 2) /* RAZ from userspace and guest */ +#define REG_USER_WI (1 << 3) /* WI from userspace only */ static __printf(2, 3) inline void print_sys_reg_msg(const struct sys_reg_params *p, @@ -152,6 +153,15 @@ static inline bool sysreg_hidden(const struct kvm_vcpu *vcpu, return sysreg_visibility(vcpu, r) & REG_HIDDEN; } +static inline bool sysreg_hidden_user(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *r) +{ + if (likely(!r->visibility)) + return false; + + return r->visibility(vcpu, r) & (REG_HIDDEN | REG_HIDDEN_USER); +} + static inline bool sysreg_visible_as_raz(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { -- cgit v1.2.3 From 280b748e871e5a9bd61fc13019b8cbda6e0242dd Mon Sep 17 00:00:00 2001 From: Jintack Lim Date: Thu, 9 Feb 2023 17:58:18 +0000 Subject: KVM: arm64: nv: Emulate EL12 register accesses from the virtual EL2 With HCR_EL2.NV bit set, accesses to EL12 registers in the virtual EL2 trap to EL2. Handle those traps just like we do for EL1 registers. One exception is CNTKCTL_EL12. We don't trap on CNTKCTL_EL1 for non-VHE virtual EL2 because we don't have to. However, accessing CNTKCTL_EL12 will trap since it's one of the EL12 registers controlled by HCR_EL2.NV bit. Therefore, add a handler for it and don't treat it as a non-trap-registers when preparing a shadow context. These registers, being only a view on their EL1 counterpart, are permanently hidden from userspace. Reviewed-by: Alexandru Elisei Signed-off-by: Jintack Lim [maz: EL12_REG(), register visibility] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230209175820.1939006-17-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/sys_regs.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 55a14c86a455..f5dd4f4eaaf0 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1482,6 +1482,26 @@ static unsigned int el2_visibility(const struct kvm_vcpu *vcpu, .val = v, \ } +/* + * EL{0,1}2 registers are the EL2 view on an EL0 or EL1 register when + * HCR_EL2.E2H==1, and only in the sysreg table for convenience of + * handling traps. Given that, they are always hidden from userspace. + */ +static unsigned int elx2_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + return REG_HIDDEN_USER; +} + +#define EL12_REG(name, acc, rst, v) { \ + SYS_DESC(SYS_##name##_EL12), \ + .access = acc, \ + .reset = rst, \ + .reg = name##_EL1, \ + .val = v, \ + .visibility = elx2_visibility, \ +} + /* sys_reg_desc initialiser for known cpufeature ID registers */ #define ID_SANITISED(name) { \ SYS_DESC(SYS_##name), \ @@ -2031,6 +2051,23 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG(CNTVOFF_EL2, access_rw, reset_val, 0), EL2_REG(CNTHCTL_EL2, access_rw, reset_val, 0), + EL12_REG(SCTLR, access_vm_reg, reset_val, 0x00C50078), + EL12_REG(CPACR, access_rw, reset_val, 0), + EL12_REG(TTBR0, access_vm_reg, reset_unknown, 0), + EL12_REG(TTBR1, access_vm_reg, reset_unknown, 0), + EL12_REG(TCR, access_vm_reg, reset_val, 0), + { SYS_DESC(SYS_SPSR_EL12), access_spsr}, + { SYS_DESC(SYS_ELR_EL12), access_elr}, + EL12_REG(AFSR0, access_vm_reg, reset_unknown, 0), + EL12_REG(AFSR1, access_vm_reg, reset_unknown, 0), + EL12_REG(ESR, access_vm_reg, reset_unknown, 0), + EL12_REG(FAR, access_vm_reg, reset_unknown, 0), + EL12_REG(MAIR, access_vm_reg, reset_unknown, 0), + EL12_REG(AMAIR, access_vm_reg, reset_amair_el1, 0), + EL12_REG(VBAR, access_rw, reset_val, 0), + EL12_REG(CONTEXTIDR, access_vm_reg, reset_val, 0), + EL12_REG(CNTKCTL, access_rw, reset_val, 0), + EL2_REG(SP_EL2, NULL, reset_unknown, 0), }; -- cgit v1.2.3 From 9f75b6d447d712b6ed9abc869eedf456fe7f5e9b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 9 Feb 2023 17:58:19 +0000 Subject: KVM: arm64: nv: Filter out unsupported features from ID regs As there is a number of features that we either can't support, or don't want to support right away with NV, let's add some basic filtering so that we don't advertize silly things to the EL2 guest. Whilst we are at it, advertize FEAT_TTL as well as FEAT_GTG, which the NV implementation will implement. Reviewed-by: Ganapatrao Kulkarni Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230209175820.1939006-18-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_nested.h | 6 ++ arch/arm64/kvm/Makefile | 2 +- arch/arm64/kvm/nested.c | 162 ++++++++++++++++++++++++++++++++++++ arch/arm64/kvm/sys_regs.c | 3 + 4 files changed, 172 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/kvm/nested.c diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index fd601ea68d13..8fb67f032fd1 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -11,4 +11,10 @@ static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu) test_bit(KVM_ARM_VCPU_HAS_EL2, vcpu->arch.features)); } +struct sys_reg_params; +struct sys_reg_desc; + +void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p, + const struct sys_reg_desc *r); + #endif /* __ARM64_KVM_NESTED_H */ diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 31b07f2b2186..c0c050e53157 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -14,7 +14,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ inject_fault.o va_layout.o handle_exit.o \ guest.o debug.o reset.o sys_regs.o stacktrace.o \ vgic-sys-reg-v3.o fpsimd.o pkvm.o \ - arch_timer.o trng.o vmid.o emulate-nested.o \ + arch_timer.o trng.o vmid.o emulate-nested.o nested.o \ vgic/vgic.o vgic/vgic-init.o \ vgic/vgic-irqfd.o vgic/vgic-v2.o \ vgic/vgic-v3.o vgic/vgic-v4.o \ diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c new file mode 100644 index 000000000000..f7ec27c27a4f --- /dev/null +++ b/arch/arm64/kvm/nested.c @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2017 - Columbia University and Linaro Ltd. + * Author: Jintack Lim + */ + +#include +#include + +#include +#include +#include + +#include "sys_regs.h" + +/* Protection against the sysreg repainting madness... */ +#define NV_FTR(r, f) ID_AA64##r##_EL1_##f + +/* + * Our emulated CPU doesn't support all the possible features. For the + * sake of simplicity (and probably mental sanity), wipe out a number + * of feature bits we don't intend to support for the time being. + * This list should get updated as new features get added to the NV + * support, and new extension to the architecture. + */ +void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + u32 id = sys_reg((u32)r->Op0, (u32)r->Op1, + (u32)r->CRn, (u32)r->CRm, (u32)r->Op2); + u64 val, tmp; + + val = p->regval; + + switch (id) { + case SYS_ID_AA64ISAR0_EL1: + /* Support everything but TME, O.S. and Range TLBIs */ + val &= ~(NV_FTR(ISAR0, TLB) | + NV_FTR(ISAR0, TME)); + break; + + case SYS_ID_AA64ISAR1_EL1: + /* Support everything but PtrAuth and Spec Invalidation */ + val &= ~(GENMASK_ULL(63, 56) | + NV_FTR(ISAR1, SPECRES) | + NV_FTR(ISAR1, GPI) | + NV_FTR(ISAR1, GPA) | + NV_FTR(ISAR1, API) | + NV_FTR(ISAR1, APA)); + break; + + case SYS_ID_AA64PFR0_EL1: + /* No AMU, MPAM, S-EL2, RAS or SVE */ + val &= ~(GENMASK_ULL(55, 52) | + NV_FTR(PFR0, AMU) | + NV_FTR(PFR0, MPAM) | + NV_FTR(PFR0, SEL2) | + NV_FTR(PFR0, RAS) | + NV_FTR(PFR0, SVE) | + NV_FTR(PFR0, EL3) | + NV_FTR(PFR0, EL2) | + NV_FTR(PFR0, EL1)); + /* 64bit EL1/EL2/EL3 only */ + val |= FIELD_PREP(NV_FTR(PFR0, EL1), 0b0001); + val |= FIELD_PREP(NV_FTR(PFR0, EL2), 0b0001); + val |= FIELD_PREP(NV_FTR(PFR0, EL3), 0b0001); + break; + + case SYS_ID_AA64PFR1_EL1: + /* Only support SSBS */ + val &= NV_FTR(PFR1, SSBS); + break; + + case SYS_ID_AA64MMFR0_EL1: + /* Hide ECV, FGT, ExS, Secure Memory */ + val &= ~(GENMASK_ULL(63, 43) | + NV_FTR(MMFR0, TGRAN4_2) | + NV_FTR(MMFR0, TGRAN16_2) | + NV_FTR(MMFR0, TGRAN64_2) | + NV_FTR(MMFR0, SNSMEM)); + + /* Disallow unsupported S2 page sizes */ + switch (PAGE_SIZE) { + case SZ_64K: + val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN16_2), 0b0001); + fallthrough; + case SZ_16K: + val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN4_2), 0b0001); + fallthrough; + case SZ_4K: + /* Support everything */ + break; + } + /* + * Since we can't support a guest S2 page size smaller than + * the host's own page size (due to KVM only populating its + * own S2 using the kernel's page size), advertise the + * limitation using FEAT_GTG. + */ + switch (PAGE_SIZE) { + case SZ_4K: + val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN4_2), 0b0010); + fallthrough; + case SZ_16K: + val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN16_2), 0b0010); + fallthrough; + case SZ_64K: + val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN64_2), 0b0010); + break; + } + /* Cap PARange to 48bits */ + tmp = FIELD_GET(NV_FTR(MMFR0, PARANGE), val); + if (tmp > 0b0101) { + val &= ~NV_FTR(MMFR0, PARANGE); + val |= FIELD_PREP(NV_FTR(MMFR0, PARANGE), 0b0101); + } + break; + + case SYS_ID_AA64MMFR1_EL1: + val &= (NV_FTR(MMFR1, PAN) | + NV_FTR(MMFR1, LO) | + NV_FTR(MMFR1, HPDS) | + NV_FTR(MMFR1, VH) | + NV_FTR(MMFR1, VMIDBits)); + break; + + case SYS_ID_AA64MMFR2_EL1: + val &= ~(NV_FTR(MMFR2, EVT) | + NV_FTR(MMFR2, BBM) | + NV_FTR(MMFR2, TTL) | + GENMASK_ULL(47, 44) | + NV_FTR(MMFR2, ST) | + NV_FTR(MMFR2, CCIDX) | + NV_FTR(MMFR2, VARange)); + + /* Force TTL support */ + val |= FIELD_PREP(NV_FTR(MMFR2, TTL), 0b0001); + break; + + case SYS_ID_AA64DFR0_EL1: + /* Only limited support for PMU, Debug, BPs and WPs */ + val &= (NV_FTR(DFR0, PMUVer) | + NV_FTR(DFR0, WRPs) | + NV_FTR(DFR0, BRPs) | + NV_FTR(DFR0, DebugVer)); + + /* Cap Debug to ARMv8.1 */ + tmp = FIELD_GET(NV_FTR(DFR0, DebugVer), val); + if (tmp > 0b0111) { + val &= ~NV_FTR(DFR0, DebugVer); + val |= FIELD_PREP(NV_FTR(DFR0, DebugVer), 0b0111); + } + break; + + default: + /* Unknown register, just wipe it clean */ + val = 0; + break; + } + + p->regval = val; +} diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index f5dd4f4eaaf0..82c1f8d786f7 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1223,6 +1223,9 @@ static bool access_id_reg(struct kvm_vcpu *vcpu, return write_to_read_only(vcpu, p, r); p->regval = read_id_reg(vcpu, r); + if (vcpu_has_nv(vcpu)) + access_nested_id_reg(vcpu, p, r); + return true; } -- cgit v1.2.3 From 191e0e155521182051fc2f32dde237b6fde2b0b4 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 9 Feb 2023 17:58:20 +0000 Subject: KVM: arm64: nv: Only toggle cache for virtual EL2 when SCTLR_EL2 changes So far we were flushing almost the entire universe whenever a VM would load/unload the SCTLR_EL1 and the two versions of that register had different MMU enabled settings. This turned out to be so slow that it prevented forward progress for a nested VM, because a scheduler timer tick interrupt would always be pending when we reached the nested VM. To avoid this problem, we consider the SCTLR_EL2 when evaluating if caches are on or off when entering virtual EL2 (because this is the value that we end up shadowing onto the hardware EL1 register). Reviewed-by: Alexandru Elisei Signed-off-by: Christoffer Dall Signed-off-by: Jintack Lim Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230209175820.1939006-19-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_mmu.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index e4a7e6369499..2890d57bec30 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -115,6 +115,7 @@ alternative_cb_end #include #include #include +#include #include void kvm_update_va_mask(struct alt_instr *alt, @@ -192,7 +193,15 @@ struct kvm; static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) { - return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101; + u64 cache_bits = SCTLR_ELx_M | SCTLR_ELx_C; + int reg; + + if (vcpu_is_el2(vcpu)) + reg = SCTLR_EL2; + else + reg = SCTLR_EL1; + + return (vcpu_read_sys_reg(vcpu, reg) & cache_bits) == cache_bits; } static inline void __clean_dcache_guest_page(void *va, size_t size) -- cgit v1.2.3 From 3fb901cdc9e4ab01aa32abe93004f94cdfe75648 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Sat, 11 Feb 2023 19:07:42 +0000 Subject: KVM: arm64: nv: Use reg_to_encoding() to get sysreg ID Avoid open-coding and just use the helper to encode the ID from the sysreg table entry. No functional change intended. Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20230211190742.49843-1-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/kvm/nested.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index f7ec27c27a4f..315354d27978 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -26,8 +26,7 @@ void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p, const struct sys_reg_desc *r) { - u32 id = sys_reg((u32)r->Op0, (u32)r->Op1, - (u32)r->CRn, (u32)r->CRm, (u32)r->Op2); + u32 id = reg_to_encoding(r); u64 val, tmp; val = p->regval; -- cgit v1.2.3