From 4dfa1f3657a0d4fb556d4440322d35bdcf5e4970 Mon Sep 17 00:00:00 2001 From: Zhiyuan Dai Date: Mon, 21 Mar 2022 10:56:27 +0800 Subject: arm64: Fix comments in macro __init_el2_gicv3 Fix typo in comment. Signed-off-by: Zhiyuan Dai Link: https://lore.kernel.org/r/1647831387-3686-1-git-send-email-daizhiyuan@phytium.com.cn Signed-off-by: Will Deacon --- arch/arm64/include/asm/el2_setup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 7f3c87f7a0ce..c31be7eda9df 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -107,7 +107,7 @@ isb // Make sure SRE is now set mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back, tbz x0, #0, .Lskip_gicv3_\@ // and check that it sticks - msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults + msr_s SYS_ICH_HCR_EL2, xzr // Reset ICH_HCR_EL2 to defaults .Lskip_gicv3_\@: .endm -- cgit v1.2.3 From 83bea32ac7ed37bbda58733de61fc9369513f9f9 Mon Sep 17 00:00:00 2001 From: Chanho Park Date: Thu, 7 Apr 2022 18:11:28 +0900 Subject: arm64: Add part number for Arm Cortex-A78AE Add the MIDR part number info for the Arm Cortex-A78AE[1] and add it to spectre-BHB affected list[2]. [1]: https://developer.arm.com/Processors/Cortex-A78AE [2]: https://developer.arm.com/Arm%20Security%20Center/Spectre-BHB Cc: Catalin Marinas Cc: Mark Rutland Cc: Will Deacon Cc: James Morse Signed-off-by: Chanho Park Link: https://lore.kernel.org/r/20220407091128.8700-1-chanho61.park@samsung.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/cputype.h | 2 ++ arch/arm64/kernel/proton-pack.c | 1 + 2 files changed, 3 insertions(+) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 232b439cbaf3..ff8f4511df71 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -75,6 +75,7 @@ #define ARM_CPU_PART_CORTEX_A77 0xD0D #define ARM_CPU_PART_NEOVERSE_V1 0xD40 #define ARM_CPU_PART_CORTEX_A78 0xD41 +#define ARM_CPU_PART_CORTEX_A78AE 0xD42 #define ARM_CPU_PART_CORTEX_X1 0xD44 #define ARM_CPU_PART_CORTEX_A510 0xD46 #define ARM_CPU_PART_CORTEX_A710 0xD47 @@ -130,6 +131,7 @@ #define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) #define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1) #define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78) +#define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE) #define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 5777929d35bf..40be3a7c2c53 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -853,6 +853,7 @@ u8 spectre_bhb_loop_affected(int scope) if (scope == SCOPE_LOCAL_CPU) { static const struct midr_range spectre_bhb_k32_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE), MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), -- cgit v1.2.3 From 6a437208cb942a2dd98f7e1c3fd347ed3d425ffc Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 19 Apr 2022 19:27:46 +0100 Subject: arm64: Expand ESR_ELx_WFx_ISS_TI to match its ARMv8.7 definition Starting with FEAT_WFXT in ARMv8.7, the TI field in the ISS that is reported on a WFx trap is expanded by one bit to allow the description of WFET and WFIT. Special care is taken to exclude the WFxT bit from the mask used to match WFI so that it also matches WFIT when trapped from EL0. Reviewed-by: Joey Gouly Signed-off-by: Marc Zyngier Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419182755.601427-2-maz@kernel.org --- arch/arm64/include/asm/esr.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index d52a0b269ee8..65c2201b11b2 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -133,7 +133,8 @@ #define ESR_ELx_CV (UL(1) << 24) #define ESR_ELx_COND_SHIFT (20) #define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT) -#define ESR_ELx_WFx_ISS_TI (UL(1) << 0) +#define ESR_ELx_WFx_ISS_TI (UL(3) << 0) +#define ESR_ELx_WFx_ISS_WFxT (UL(2) << 0) #define ESR_ELx_WFx_ISS_WFI (UL(0) << 0) #define ESR_ELx_WFx_ISS_WFE (UL(1) << 0) #define ESR_ELx_xVC_IMM_MASK ((1UL << 16) - 1) @@ -146,7 +147,8 @@ #define DISR_EL1_ESR_MASK (ESR_ELx_AET | ESR_ELx_EA | ESR_ELx_FSC) /* ESR value templates for specific events */ -#define ESR_ELx_WFx_MASK (ESR_ELx_EC_MASK | ESR_ELx_WFx_ISS_TI) +#define ESR_ELx_WFx_MASK (ESR_ELx_EC_MASK | \ + (ESR_ELx_WFx_ISS_TI & ~ESR_ELx_WFx_ISS_WFxT)) #define ESR_ELx_WFx_WFI_VAL ((ESR_ELx_EC_WFx << ESR_ELx_EC_SHIFT) | \ ESR_ELx_WFx_ISS_WFI) -- cgit v1.2.3 From bdcc2f280334e4e3f42a5a740494444f1026fb65 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 19 Apr 2022 19:27:47 +0100 Subject: arm64: Add RV and RN fields for ESR_ELx_WFx_ISS The ISS field exposed by ESR_ELx contain two additional subfields with FEAT_WFxT: - RN, the register number containing the timeout - RV, indicating if the register number is valid Describe these two fields according to the arch spec. No functional change. Reviewed-by: Joey Gouly Signed-off-by: Marc Zyngier Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419182755.601427-3-maz@kernel.org --- arch/arm64/include/asm/esr.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 65c2201b11b2..15156c478054 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -133,6 +133,8 @@ #define ESR_ELx_CV (UL(1) << 24) #define ESR_ELx_COND_SHIFT (20) #define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT) +#define ESR_ELx_WFx_ISS_RN (UL(0x1F) << 5) +#define ESR_ELx_WFx_ISS_RV (UL(1) << 2) #define ESR_ELx_WFx_ISS_TI (UL(3) << 0) #define ESR_ELx_WFx_ISS_WFxT (UL(2) << 0) #define ESR_ELx_WFx_ISS_WFI (UL(0) << 0) -- cgit v1.2.3 From 69bb02ebc38ace438c9cd7c5315cfe43862b51fe Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 20 Apr 2022 13:21:12 +0100 Subject: arm64: Add HWCAP advertising FEAT_WFXT In order to allow userspace to enjoy WFET, add a new HWCAP that advertises it when available. Signed-off-by: Marc Zyngier Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419182755.601427-9-maz@kernel.org --- Documentation/arm64/cpu-feature-registers.rst | 2 ++ Documentation/arm64/elf_hwcaps.rst | 4 ++++ arch/arm64/include/asm/hwcap.h | 1 + arch/arm64/include/uapi/asm/hwcap.h | 1 + arch/arm64/kernel/cpufeature.c | 2 ++ arch/arm64/kernel/cpuinfo.c | 1 + arch/arm64/tools/cpucaps | 1 + 7 files changed, 12 insertions(+) (limited to 'arch/arm64/include') diff --git a/Documentation/arm64/cpu-feature-registers.rst b/Documentation/arm64/cpu-feature-registers.rst index 749ae970c319..04ba83e1965f 100644 --- a/Documentation/arm64/cpu-feature-registers.rst +++ b/Documentation/arm64/cpu-feature-registers.rst @@ -290,6 +290,8 @@ infrastructure: +------------------------------+---------+---------+ | RPRES | [7-4] | y | +------------------------------+---------+---------+ + | WFXT | [3-0] | y | + +------------------------------+---------+---------+ Appendix I: Example diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst index a8f30963e550..af3ab524e826 100644 --- a/Documentation/arm64/elf_hwcaps.rst +++ b/Documentation/arm64/elf_hwcaps.rst @@ -264,6 +264,10 @@ HWCAP2_MTE3 Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0011, as described by Documentation/arm64/memory-tagging-extension.rst. +HWCAP2_WFXT + + Functionality implied by ID_AA64ISAR2_EL1.WFXT == 0b0010. + 4. Unused AT_HWCAP bits ----------------------- diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 8db5ec0089db..909337b50e1f 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -109,6 +109,7 @@ #define KERNEL_HWCAP_AFP __khwcap2_feature(AFP) #define KERNEL_HWCAP_RPRES __khwcap2_feature(RPRES) #define KERNEL_HWCAP_MTE3 __khwcap2_feature(MTE3) +#define KERNEL_HWCAP_WFXT __khwcap2_feature(WFXT) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 99cb5d383048..9dddde1d046c 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -79,5 +79,6 @@ #define HWCAP2_AFP (1 << 20) #define HWCAP2_RPRES (1 << 21) #define HWCAP2_MTE3 (1 << 22) +#define HWCAP2_WFXT (1 << 23) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index d72c4b4d389c..a6db8ca04429 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -237,6 +237,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar2[] = { ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_GPA3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_RPRES_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_WFXT_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -2575,6 +2576,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV), HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP), HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_RPRES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES), + HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_WFXT_SHIFT, 4, FTR_UNSIGNED, ID_AA64ISAR2_WFXT_SUPPORTED, CAP_HWCAP, KERNEL_HWCAP_WFXT), {}, }; diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 330b92ea863a..781eab314794 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -98,6 +98,7 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_AFP] = "afp", [KERNEL_HWCAP_RPRES] = "rpres", [KERNEL_HWCAP_MTE3] = "mte3", + [KERNEL_HWCAP_WFXT] = "wfxt", }; #ifdef CONFIG_COMPAT diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 3ed418f70e3b..01f7d253dec4 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -38,6 +38,7 @@ HAS_STAGE2_FWB HAS_SYSREG_GIC_CPUIF HAS_TLB_RANGE HAS_VIRT_HOST_EXTN +HAS_WFXT HW_DBM KVM_PROTECTED_MODE MISMATCHED_CACHE_TYPE -- cgit v1.2.3 From 9eae588529751f95834bca775b30b66291def7f6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 19 Apr 2022 19:27:54 +0100 Subject: arm64: Add wfet()/wfit() helpers Just like we have helpers for WFI and WFE, add the WFxT versions. Note that the encoding is that reported by objdump, as no currrent toolchain knows about these instructions yet. Signed-off-by: Marc Zyngier Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419182755.601427-10-maz@kernel.org --- arch/arm64/include/asm/barrier.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 62217be36217..9f3e2c3d2ca0 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -16,7 +16,11 @@ #define sev() asm volatile("sev" : : : "memory") #define wfe() asm volatile("wfe" : : : "memory") +#define wfet(val) asm volatile("msr s0_3_c1_c0_0, %0" \ + : : "r" (val) : "memory") #define wfi() asm volatile("wfi" : : : "memory") +#define wfit(val) asm volatile("msr s0_3_c1_c0_1, %0" \ + : : "r" (val) : "memory") #define isb() asm volatile("isb" : : : "memory") #define dmb(opt) asm volatile("dmb " #opt : : : "memory") -- cgit v1.2.3 From 89f5074c503b6b6f181c0240c931f67bcaf266e9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 19 Apr 2022 19:27:50 +0100 Subject: KVM: arm64: Handle blocking WFIT instruction When trapping a blocking WFIT instruction, take it into account when computing the deadline of the background timer. The state is tracked with a new vcpu flag, and is gated by a new CPU capability, which isn't currently enabled. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220419182755.601427-6-maz@kernel.org --- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/arch_timer.c | 22 ++++++++++++++++++++-- arch/arm64/kvm/arm.c | 1 + arch/arm64/kvm/handle_exit.c | 7 ++++++- 4 files changed, 28 insertions(+), 3 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e3b25dc6c367..9e6e8701933e 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -441,6 +441,7 @@ struct kvm_vcpu_arch { #define KVM_ARM64_DEBUG_STATE_SAVE_TRBE (1 << 13) /* Save TRBE context if active */ #define KVM_ARM64_FP_FOREIGN_FPSTATE (1 << 14) #define KVM_ARM64_ON_UNSUPPORTED_CPU (1 << 15) /* Physical CPU not in supported_cpus */ +#define KVM_ARM64_WFIT (1 << 16) /* WFIT instruction trapped */ #define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \ KVM_GUESTDBG_USE_SW_BP | \ diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index c92a68190f6a..4e39ace073af 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -239,6 +239,20 @@ static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) (ARCH_TIMER_CTRL_IT_MASK | ARCH_TIMER_CTRL_ENABLE)) == ARCH_TIMER_CTRL_ENABLE); } +static bool vcpu_has_wfit_active(struct kvm_vcpu *vcpu) +{ + return (cpus_have_final_cap(ARM64_HAS_WFXT) && + (vcpu->arch.flags & KVM_ARM64_WFIT)); +} + +static u64 wfit_delay_ns(struct kvm_vcpu *vcpu) +{ + struct arch_timer_context *ctx = vcpu_vtimer(vcpu); + u64 val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu)); + + return kvm_counter_compute_delta(ctx, val); +} + /* * Returns the earliest expiration time in ns among guest timers. * Note that it will return 0 if none of timers can fire. @@ -256,6 +270,9 @@ static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu) min_delta = min(min_delta, kvm_timer_compute_delta(ctx)); } + if (vcpu_has_wfit_active(vcpu)) + min_delta = min(min_delta, wfit_delay_ns(vcpu)); + /* If none of timers can fire, then return 0 */ if (min_delta == ULLONG_MAX) return 0; @@ -355,7 +372,7 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { - return 0; + return vcpu_has_wfit_active(vcpu) && wfit_delay_ns(vcpu) == 0; } /* @@ -481,7 +498,8 @@ static void kvm_timer_blocking(struct kvm_vcpu *vcpu) */ if (!kvm_timer_irq_can_fire(map.direct_vtimer) && !kvm_timer_irq_can_fire(map.direct_ptimer) && - !kvm_timer_irq_can_fire(map.emul_ptimer)) + !kvm_timer_irq_can_fire(map.emul_ptimer) && + !vcpu_has_wfit_active(vcpu)) return; /* diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 2122c699af06..e7cb8a4d2e81 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -634,6 +634,7 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu) preempt_enable(); kvm_vcpu_halt(vcpu); + vcpu->arch.flags &= ~KVM_ARM64_WFIT; kvm_clear_request(KVM_REQ_UNHALT, vcpu); preempt_disable(); diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 97fe14aab1a3..4260f2cd1971 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -85,16 +85,21 @@ static int handle_no_fpsimd(struct kvm_vcpu *vcpu) * WFI: Simply call kvm_vcpu_halt(), which will halt execution of * world-switches and schedule other host processes until there is an * incoming IRQ or FIQ to the VM. + * WFIT: Same as WFI, with a timed wakeup implemented as a background timer */ static int kvm_handle_wfx(struct kvm_vcpu *vcpu) { - if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_WFx_ISS_WFE) { + u64 esr = kvm_vcpu_get_esr(vcpu); + + if (esr & ESR_ELx_WFx_ISS_WFE) { trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true); vcpu->stat.wfe_exit_stat++; kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu)); } else { trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false); vcpu->stat.wfi_exit_stat++; + if ((esr & (ESR_ELx_WFx_ISS_RV | ESR_ELx_WFx_ISS_WFxT)) == (ESR_ELx_WFx_ISS_RV | ESR_ELx_WFx_ISS_WFxT)) + vcpu->arch.flags |= KVM_ARM64_WFIT; kvm_vcpu_wfi(vcpu); } -- cgit v1.2.3 From 23bc8f69f0eceecbb87c3801d2e48827d2dca92b Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Fri, 22 Apr 2022 14:00:33 +0800 Subject: arm64: mm: fix p?d_leaf() The pmd_leaf() is used to test a leaf mapped PMD, however, it misses the PROT_NONE mapped PMD on arm64. Fix it. A real world issue [1] caused by this was reported by Qian Cai. Also fix pud_leaf(). Link: https://patchwork.kernel.org/comment/24798260/ [1] Fixes: 8aa82df3c123 ("arm64: mm: add p?d_leaf() definitions") Reported-by: Qian Cai Signed-off-by: Muchun Song Link: https://lore.kernel.org/r/20220422060033.48711-1-songmuchun@bytedance.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 94e147e5456c..dff2b483ea50 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -535,7 +535,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, PMD_TYPE_TABLE) #define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \ PMD_TYPE_SECT) -#define pmd_leaf(pmd) pmd_sect(pmd) +#define pmd_leaf(pmd) (pmd_present(pmd) && !pmd_table(pmd)) #define pmd_bad(pmd) (!pmd_table(pmd)) #define pmd_leaf_size(pmd) (pmd_cont(pmd) ? CONT_PMD_SIZE : PMD_SIZE) @@ -625,7 +625,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) #define pud_none(pud) (!pud_val(pud)) #define pud_bad(pud) (!pud_table(pud)) #define pud_present(pud) pte_present(pud_pte(pud)) -#define pud_leaf(pud) pud_sect(pud) +#define pud_leaf(pud) (pud_present(pud) && !pud_table(pud)) #define pud_valid(pud) pte_valid(pud_pte(pud)) static inline void set_pud(pud_t *pudp, pud_t pud) -- cgit v1.2.3 From b4adc83b07706042ad6e6a767f6c04636db69bcc Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:13 +0100 Subject: arm64/sme: System register and exception syndrome definitions The arm64 Scalable Matrix Extension (SME) adds some new system registers, fields in existing system registers and exception syndromes. This patch adds definitions for these for use in future patches implementing support for this extension. Since SME will be the first user of FEAT_HCX in the kernel also include the definitions for enumerating it and the HCRX system register it adds. Signed-off-by: Mark Brown Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-6-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/esr.h | 12 ++++++- arch/arm64/include/asm/kvm_arm.h | 1 + arch/arm64/include/asm/sysreg.h | 67 ++++++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/traps.c | 1 + 4 files changed, 80 insertions(+), 1 deletion(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index d52a0b269ee8..43872e0cfd1e 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -37,7 +37,8 @@ #define ESR_ELx_EC_ERET (0x1a) /* EL2 only */ /* Unallocated EC: 0x1B */ #define ESR_ELx_EC_FPAC (0x1C) /* EL1 and above */ -/* Unallocated EC: 0x1D - 0x1E */ +#define ESR_ELx_EC_SME (0x1D) +/* Unallocated EC: 0x1E */ #define ESR_ELx_EC_IMP_DEF (0x1f) /* EL3 only */ #define ESR_ELx_EC_IABT_LOW (0x20) #define ESR_ELx_EC_IABT_CUR (0x21) @@ -327,6 +328,15 @@ #define ESR_ELx_CP15_32_ISS_SYS_CNTFRQ (ESR_ELx_CP15_32_ISS_SYS_VAL(0, 0, 14, 0) |\ ESR_ELx_CP15_32_ISS_DIR_READ) +/* + * ISS values for SME traps + */ + +#define ESR_ELx_SME_ISS_SME_DISABLED 0 +#define ESR_ELx_SME_ISS_ILL 1 +#define ESR_ELx_SME_ISS_SM_DISABLED 2 +#define ESR_ELx_SME_ISS_ZA_DISABLED 3 + #ifndef __ASSEMBLY__ #include diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 1767ded83888..13ae232ec4a1 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -279,6 +279,7 @@ #define CPTR_EL2_TCPAC (1U << 31) #define CPTR_EL2_TAM (1 << 30) #define CPTR_EL2_TTA (1 << 20) +#define CPTR_EL2_TSM (1 << 12) #define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT) #define CPTR_EL2_TZ (1 << 8) #define CPTR_NVHE_EL2_RES1 0x000032ff /* known RES1 bits in CPTR_EL2 (nVHE) */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index fbf5f8bb9055..bebfdd27296a 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -118,6 +118,10 @@ * System registers, organised loosely by encoding but grouped together * where the architected name contains an index. e.g. ID_MMFR_EL1. */ +#define SYS_SVCR_SMSTOP_SM_EL0 sys_reg(0, 3, 4, 2, 3) +#define SYS_SVCR_SMSTART_SM_EL0 sys_reg(0, 3, 4, 3, 3) +#define SYS_SVCR_SMSTOP_SMZA_EL0 sys_reg(0, 3, 4, 6, 3) + #define SYS_OSDTRRX_EL1 sys_reg(2, 0, 0, 0, 2) #define SYS_MDCCINT_EL1 sys_reg(2, 0, 0, 2, 0) #define SYS_MDSCR_EL1 sys_reg(2, 0, 0, 2, 2) @@ -181,6 +185,7 @@ #define SYS_ID_AA64PFR0_EL1 sys_reg(3, 0, 0, 4, 0) #define SYS_ID_AA64PFR1_EL1 sys_reg(3, 0, 0, 4, 1) #define SYS_ID_AA64ZFR0_EL1 sys_reg(3, 0, 0, 4, 4) +#define SYS_ID_AA64SMFR0_EL1 sys_reg(3, 0, 0, 4, 5) #define SYS_ID_AA64DFR0_EL1 sys_reg(3, 0, 0, 5, 0) #define SYS_ID_AA64DFR1_EL1 sys_reg(3, 0, 0, 5, 1) @@ -204,6 +209,8 @@ #define SYS_ZCR_EL1 sys_reg(3, 0, 1, 2, 0) #define SYS_TRFCR_EL1 sys_reg(3, 0, 1, 2, 1) +#define SYS_SMPRI_EL1 sys_reg(3, 0, 1, 2, 4) +#define SYS_SMCR_EL1 sys_reg(3, 0, 1, 2, 6) #define SYS_TTBR0_EL1 sys_reg(3, 0, 2, 0, 0) #define SYS_TTBR1_EL1 sys_reg(3, 0, 2, 0, 1) @@ -396,6 +403,8 @@ #define TRBIDR_ALIGN_MASK GENMASK(3, 0) #define TRBIDR_ALIGN_SHIFT 0 +#define SMPRI_EL1_PRIORITY_MASK 0xf + #define SYS_PMINTENSET_EL1 sys_reg(3, 0, 9, 14, 1) #define SYS_PMINTENCLR_EL1 sys_reg(3, 0, 9, 14, 2) @@ -451,8 +460,13 @@ #define SYS_CCSIDR_EL1 sys_reg(3, 1, 0, 0, 0) #define SYS_CLIDR_EL1 sys_reg(3, 1, 0, 0, 1) #define SYS_GMID_EL1 sys_reg(3, 1, 0, 0, 4) +#define SYS_SMIDR_EL1 sys_reg(3, 1, 0, 0, 6) #define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7) +#define SYS_SMIDR_EL1_IMPLEMENTER_SHIFT 24 +#define SYS_SMIDR_EL1_SMPS_SHIFT 15 +#define SYS_SMIDR_EL1_AFFINITY_SHIFT 0 + #define SYS_CSSELR_EL1 sys_reg(3, 2, 0, 0, 0) #define SYS_CTR_EL0 sys_reg(3, 3, 0, 0, 1) @@ -461,6 +475,10 @@ #define SYS_RNDR_EL0 sys_reg(3, 3, 2, 4, 0) #define SYS_RNDRRS_EL0 sys_reg(3, 3, 2, 4, 1) +#define SYS_SVCR_EL0 sys_reg(3, 3, 4, 2, 2) +#define SYS_SVCR_EL0_ZA_MASK 2 +#define SYS_SVCR_EL0_SM_MASK 1 + #define SYS_PMCR_EL0 sys_reg(3, 3, 9, 12, 0) #define SYS_PMCNTENSET_EL0 sys_reg(3, 3, 9, 12, 1) #define SYS_PMCNTENCLR_EL0 sys_reg(3, 3, 9, 12, 2) @@ -477,6 +495,7 @@ #define SYS_TPIDR_EL0 sys_reg(3, 3, 13, 0, 2) #define SYS_TPIDRRO_EL0 sys_reg(3, 3, 13, 0, 3) +#define SYS_TPIDR2_EL0 sys_reg(3, 3, 13, 0, 5) #define SYS_SCXTNUM_EL0 sys_reg(3, 3, 13, 0, 7) @@ -546,6 +565,9 @@ #define SYS_HFGITR_EL2 sys_reg(3, 4, 1, 1, 6) #define SYS_ZCR_EL2 sys_reg(3, 4, 1, 2, 0) #define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1) +#define SYS_HCRX_EL2 sys_reg(3, 4, 1, 2, 2) +#define SYS_SMPRIMAP_EL2 sys_reg(3, 4, 1, 2, 5) +#define SYS_SMCR_EL2 sys_reg(3, 4, 1, 2, 6) #define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0) #define SYS_HDFGRTR_EL2 sys_reg(3, 4, 3, 1, 4) #define SYS_HDFGWTR_EL2 sys_reg(3, 4, 3, 1, 5) @@ -605,6 +627,7 @@ #define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0) #define SYS_CPACR_EL12 sys_reg(3, 5, 1, 0, 2) #define SYS_ZCR_EL12 sys_reg(3, 5, 1, 2, 0) +#define SYS_SMCR_EL12 sys_reg(3, 5, 1, 2, 6) #define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0) #define SYS_TTBR1_EL12 sys_reg(3, 5, 2, 0, 1) #define SYS_TCR_EL12 sys_reg(3, 5, 2, 0, 2) @@ -628,6 +651,7 @@ #define SYS_CNTV_CVAL_EL02 sys_reg(3, 5, 14, 3, 2) /* Common SCTLR_ELx flags. */ +#define SCTLR_ELx_ENTP2 (BIT(60)) #define SCTLR_ELx_DSSBS (BIT(44)) #define SCTLR_ELx_ATA (BIT(43)) @@ -836,6 +860,7 @@ #define ID_AA64PFR0_ELx_32BIT_64BIT 0x2 /* id_aa64pfr1 */ +#define ID_AA64PFR1_SME_SHIFT 24 #define ID_AA64PFR1_MPAMFRAC_SHIFT 16 #define ID_AA64PFR1_RASFRAC_SHIFT 12 #define ID_AA64PFR1_MTE_SHIFT 8 @@ -846,6 +871,7 @@ #define ID_AA64PFR1_SSBS_PSTATE_ONLY 1 #define ID_AA64PFR1_SSBS_PSTATE_INSNS 2 #define ID_AA64PFR1_BT_BTI 0x1 +#define ID_AA64PFR1_SME 1 #define ID_AA64PFR1_MTE_NI 0x0 #define ID_AA64PFR1_MTE_EL0 0x1 @@ -874,6 +900,23 @@ #define ID_AA64ZFR0_AES_PMULL 0x2 #define ID_AA64ZFR0_SVEVER_SVE2 0x1 +/* id_aa64smfr0 */ +#define ID_AA64SMFR0_FA64_SHIFT 63 +#define ID_AA64SMFR0_I16I64_SHIFT 52 +#define ID_AA64SMFR0_F64F64_SHIFT 48 +#define ID_AA64SMFR0_I8I32_SHIFT 36 +#define ID_AA64SMFR0_F16F32_SHIFT 35 +#define ID_AA64SMFR0_B16F32_SHIFT 34 +#define ID_AA64SMFR0_F32F32_SHIFT 32 + +#define ID_AA64SMFR0_FA64 0x1 +#define ID_AA64SMFR0_I16I64 0x4 +#define ID_AA64SMFR0_F64F64 0x1 +#define ID_AA64SMFR0_I8I32 0x4 +#define ID_AA64SMFR0_F16F32 0x1 +#define ID_AA64SMFR0_B16F32 0x1 +#define ID_AA64SMFR0_F32F32 0x1 + /* id_aa64mmfr0 */ #define ID_AA64MMFR0_ECV_SHIFT 60 #define ID_AA64MMFR0_FGT_SHIFT 56 @@ -926,6 +969,7 @@ /* id_aa64mmfr1 */ #define ID_AA64MMFR1_ECBHB_SHIFT 60 +#define ID_AA64MMFR1_HCX_SHIFT 40 #define ID_AA64MMFR1_AFP_SHIFT 44 #define ID_AA64MMFR1_ETS_SHIFT 36 #define ID_AA64MMFR1_TWED_SHIFT 32 @@ -1119,9 +1163,24 @@ #define ZCR_ELx_LEN_SIZE 9 #define ZCR_ELx_LEN_MASK 0x1ff +#define SMCR_ELx_FA64_SHIFT 31 +#define SMCR_ELx_FA64_MASK (1 << SMCR_ELx_FA64_SHIFT) + +/* + * The SMCR_ELx_LEN_* definitions intentionally include bits [8:4] which + * are reserved by the SME architecture for future expansion of the LEN + * field, with compatible semantics. + */ +#define SMCR_ELx_LEN_SHIFT 0 +#define SMCR_ELx_LEN_SIZE 9 +#define SMCR_ELx_LEN_MASK 0x1ff + #define CPACR_EL1_FPEN_EL1EN (BIT(20)) /* enable EL1 access */ #define CPACR_EL1_FPEN_EL0EN (BIT(21)) /* enable EL0 access, if EL1EN set */ +#define CPACR_EL1_SMEN_EL1EN (BIT(24)) /* enable EL1 access */ +#define CPACR_EL1_SMEN_EL0EN (BIT(25)) /* enable EL0 access, if EL1EN set */ + #define CPACR_EL1_ZEN_EL1EN (BIT(16)) /* enable EL1 access */ #define CPACR_EL1_ZEN_EL0EN (BIT(17)) /* enable EL0 access, if EL1EN set */ @@ -1170,6 +1229,8 @@ #define TRFCR_ELx_ExTRE BIT(1) #define TRFCR_ELx_E0TRE BIT(0) +/* HCRX_EL2 definitions */ +#define HCRX_EL2_SMPME_MASK (1 << 5) /* GIC Hypervisor interface registers */ /* ICH_MISR_EL2 bit definitions */ @@ -1233,6 +1294,12 @@ #define ICH_VTR_TDS_SHIFT 19 #define ICH_VTR_TDS_MASK (1 << ICH_VTR_TDS_SHIFT) +/* HFG[WR]TR_EL2 bit definitions */ +#define HFGxTR_EL2_nTPIDR2_EL0_SHIFT 55 +#define HFGxTR_EL2_nTPIDR2_EL0_MASK BIT_MASK(HFGxTR_EL2_nTPIDR2_EL0_SHIFT) +#define HFGxTR_EL2_nSMPRI_EL1_SHIFT 54 +#define HFGxTR_EL2_nSMPRI_EL1_MASK BIT_MASK(HFGxTR_EL2_nSMPRI_EL1_SHIFT) + #define ARM64_FEATURE_FIELD_BITS 4 /* Create a mask for the feature bits of the specified feature. */ diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 0529fd57567e..6751621e5bea 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -821,6 +821,7 @@ static const char *esr_class_str[] = { [ESR_ELx_EC_SVE] = "SVE", [ESR_ELx_EC_ERET] = "ERET/ERETAA/ERETAB", [ESR_ELx_EC_FPAC] = "FPAC", + [ESR_ELx_EC_SME] = "SME", [ESR_ELx_EC_IMP_DEF] = "EL3 IMP DEF", [ESR_ELx_EC_IABT_LOW] = "IABT (lower EL)", [ESR_ELx_EC_IABT_CUR] = "IABT (current EL)", -- cgit v1.2.3 From ca8a4ebcff4465f0272637433c789a5e4a272626 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:14 +0100 Subject: arm64/sme: Manually encode SME instructions As with SVE rather than impose ambitious toolchain requirements for SME we manually encode the few instructions which we require in order to perform the work the kernel needs to do. The instructions used to save and restore context are provided as assembler macros while those for entering and leaving streaming mode are done in asm volatile blocks since they are expected to be used from C. We could do the SMSTART and SMSTOP operations with read/modify/write cycles on SVCR but using the aliases provided for individual field accesses should be slightly faster. These instructions are aliases for MSR but since our minimum toolchain requirements are old enough to mean that we can't use the sX_X_cX_cX_X form and they always use xzr rather than taking a value like write_sysreg_s() wants we just use .inst. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-7-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fpsimd.h | 25 ++++++++++++++++ arch/arm64/include/asm/fpsimdmacros.h | 54 +++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index cb24385e3632..6e2dc9dcbf49 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -249,6 +249,31 @@ static inline void sve_setup(void) { } #endif /* ! CONFIG_ARM64_SVE */ +#ifdef CONFIG_ARM64_SME + +static inline void sme_smstart_sm(void) +{ + asm volatile(__msr_s(SYS_SVCR_SMSTART_SM_EL0, "xzr")); +} + +static inline void sme_smstop_sm(void) +{ + asm volatile(__msr_s(SYS_SVCR_SMSTOP_SM_EL0, "xzr")); +} + +static inline void sme_smstop(void) +{ + asm volatile(__msr_s(SYS_SVCR_SMSTOP_SMZA_EL0, "xzr")); +} + +#else + +static inline void sme_smstart_sm(void) { } +static inline void sme_smstop_sm(void) { } +static inline void sme_smstop(void) { } + +#endif /* ! CONFIG_ARM64_SME */ + /* For use by EFI runtime services calls only */ extern void __efi_fpsimd_begin(void); extern void __efi_fpsimd_end(void); diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index 2509d7dde55a..2e9a33155081 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -93,6 +93,12 @@ .endif .endm +.macro _sme_check_wv v + .if (\v) < 12 || (\v) > 15 + .error "Bad vector select register \v." + .endif +.endm + /* SVE instruction encodings for non-SVE-capable assemblers */ /* (pre binutils 2.28, all kernel capable clang versions support SVE) */ @@ -174,6 +180,54 @@ | (\np) .endm +/* SME instruction encodings for non-SME-capable assemblers */ +/* (pre binutils 2.38/LLVM 13) */ + +/* RDSVL X\nx, #\imm */ +.macro _sme_rdsvl nx, imm + _check_general_reg \nx + _check_num (\imm), -0x20, 0x1f + .inst 0x04bf5800 \ + | (\nx) \ + | (((\imm) & 0x3f) << 5) +.endm + +/* + * STR (vector from ZA array): + * STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL] + */ +.macro _sme_str_zav nw, nxbase, offset=0 + _sme_check_wv \nw + _check_general_reg \nxbase + _check_num (\offset), -0x100, 0xff + .inst 0xe1200000 \ + | (((\nw) & 3) << 13) \ + | ((\nxbase) << 5) \ + | ((\offset) & 7) +.endm + +/* + * LDR (vector to ZA array): + * LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL] + */ +.macro _sme_ldr_zav nw, nxbase, offset=0 + _sme_check_wv \nw + _check_general_reg \nxbase + _check_num (\offset), -0x100, 0xff + .inst 0xe1000000 \ + | (((\nw) & 3) << 13) \ + | ((\nxbase) << 5) \ + | ((\offset) & 7) +.endm + +/* + * Zero the entire ZA array + * ZERO ZA + */ +.macro zero_za + .inst 0xc00800ff +.endm + .macro __for from:req, to:req .if (\from) == (\to) _for__body %\from -- cgit v1.2.3 From b2cf6a23289b3268cc7915a09c0c8372147b2727 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:15 +0100 Subject: arm64/sme: Early CPU setup for SME SME requires similar setup to that for SVE: disable traps to EL2 and make sure that the maximum vector length is available to EL1, for SME we have two traps - one for SME itself and one for TPIDR2. In addition since we currently make no active use of priority control for SCMUs we map all SME priorities lower ELs may configure to 0, the architecture specified minimum priority, to ensure that nothing we manage is able to configure itself to consume excessive resources. This will need to be revisited should there be a need to manage SME priorities at runtime. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-8-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/el2_setup.h | 64 +++++++++++++++++++++++++++++++++++--- 1 file changed, 60 insertions(+), 4 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index c31be7eda9df..fabdbde0fe02 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -143,6 +143,50 @@ .Lskip_sve_\@: .endm +/* SME register access and priority mapping */ +.macro __init_el2_nvhe_sme + mrs x1, id_aa64pfr1_el1 + ubfx x1, x1, #ID_AA64PFR1_SME_SHIFT, #4 + cbz x1, .Lskip_sme_\@ + + bic x0, x0, #CPTR_EL2_TSM // Also disable SME traps + msr cptr_el2, x0 // Disable copro. traps to EL2 + isb + + mrs x1, sctlr_el2 + orr x1, x1, #SCTLR_ELx_ENTP2 // Disable TPIDR2 traps + msr sctlr_el2, x1 + isb + + mov x1, #0 // SMCR controls + + mrs_s x2, SYS_ID_AA64SMFR0_EL1 + ubfx x2, x2, #ID_AA64SMFR0_FA64_SHIFT, #1 // Full FP in SM? + cbz x2, .Lskip_sme_fa64_\@ + + orr x1, x1, SMCR_ELx_FA64_MASK +.Lskip_sme_fa64_\@: + + orr x1, x1, #SMCR_ELx_LEN_MASK // Enable full SME vector + msr_s SYS_SMCR_EL2, x1 // length for EL1. + + mrs_s x1, SYS_SMIDR_EL1 // Priority mapping supported? + ubfx x1, x1, #SYS_SMIDR_EL1_SMPS_SHIFT, #1 + cbz x1, .Lskip_sme_\@ + + msr_s SYS_SMPRIMAP_EL2, xzr // Make all priorities equal + + mrs x1, id_aa64mmfr1_el1 // HCRX_EL2 present? + ubfx x1, x1, #ID_AA64MMFR1_HCX_SHIFT, #4 + cbz x1, .Lskip_sme_\@ + + mrs_s x1, SYS_HCRX_EL2 + orr x1, x1, #HCRX_EL2_SMPME_MASK // Enable priority mapping + msr_s SYS_HCRX_EL2, x1 + +.Lskip_sme_\@: +.endm + /* Disable any fine grained traps */ .macro __init_el2_fgt mrs x1, id_aa64mmfr0_el1 @@ -153,15 +197,26 @@ mrs x1, id_aa64dfr0_el1 ubfx x1, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4 cmp x1, #3 - b.lt .Lset_fgt_\@ + b.lt .Lset_debug_fgt_\@ /* Disable PMSNEVFR_EL1 read and write traps */ orr x0, x0, #(1 << 62) -.Lset_fgt_\@: +.Lset_debug_fgt_\@: msr_s SYS_HDFGRTR_EL2, x0 msr_s SYS_HDFGWTR_EL2, x0 - msr_s SYS_HFGRTR_EL2, xzr - msr_s SYS_HFGWTR_EL2, xzr + + mov x0, xzr + mrs x1, id_aa64pfr1_el1 + ubfx x1, x1, #ID_AA64PFR1_SME_SHIFT, #4 + cbz x1, .Lset_fgt_\@ + + /* Disable nVHE traps of TPIDR2 and SMPRI */ + orr x0, x0, #HFGxTR_EL2_nSMPRI_EL1_MASK + orr x0, x0, #HFGxTR_EL2_nTPIDR2_EL0_MASK + +.Lset_fgt_\@: + msr_s SYS_HFGRTR_EL2, x0 + msr_s SYS_HFGWTR_EL2, x0 msr_s SYS_HFGITR_EL2, xzr mrs x1, id_aa64pfr0_el1 // AMU traps UNDEF without AMU @@ -196,6 +251,7 @@ __init_el2_nvhe_idregs __init_el2_nvhe_cptr __init_el2_nvhe_sve + __init_el2_nvhe_sme __init_el2_fgt __init_el2_nvhe_prepare_eret .endm -- cgit v1.2.3 From 5e64b862c4823ab53aac028042abd918c2f27041 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:16 +0100 Subject: arm64/sme: Basic enumeration support This patch introduces basic cpufeature support for discovering the presence of the Scalable Matrix Extension. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-9-broonie@kernel.org Signed-off-by: Catalin Marinas --- Documentation/arm64/elf_hwcaps.rst | 33 +++++++++++++++++++++ arch/arm64/include/asm/cpu.h | 1 + arch/arm64/include/asm/cpufeature.h | 12 ++++++++ arch/arm64/include/asm/fpsimd.h | 2 ++ arch/arm64/include/asm/hwcap.h | 8 +++++ arch/arm64/include/uapi/asm/hwcap.h | 8 +++++ arch/arm64/kernel/cpufeature.c | 59 +++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/cpuinfo.c | 9 ++++++ arch/arm64/kernel/fpsimd.c | 30 +++++++++++++++++++ arch/arm64/tools/cpucaps | 2 ++ 10 files changed, 164 insertions(+) (limited to 'arch/arm64/include') diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst index a8f30963e550..f8d818eaaff5 100644 --- a/Documentation/arm64/elf_hwcaps.rst +++ b/Documentation/arm64/elf_hwcaps.rst @@ -264,6 +264,39 @@ HWCAP2_MTE3 Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0011, as described by Documentation/arm64/memory-tagging-extension.rst. +HWCAP2_SME + + Functionality implied by ID_AA64PFR1_EL1.SME == 0b0001, as described + by Documentation/arm64/sme.rst. + +HWCAP2_SME_I16I64 + + Functionality implied by ID_AA64SMFR0_EL1.I16I64 == 0b1111. + +HWCAP2_SME_F64F64 + + Functionality implied by ID_AA64SMFR0_EL1.F64F64 == 0b1. + +HWCAP2_SME_I8I32 + + Functionality implied by ID_AA64SMFR0_EL1.I8I32 == 0b1111. + +HWCAP2_SME_F16F32 + + Functionality implied by ID_AA64SMFR0_EL1.F16F32 == 0b1. + +HWCAP2_SME_B16F32 + + Functionality implied by ID_AA64SMFR0_EL1.B16F32 == 0b1. + +HWCAP2_SME_F32F32 + + Functionality implied by ID_AA64SMFR0_EL1.F32F32 == 0b1. + +HWCAP2_SME_FA64 + + Functionality implied by ID_AA64SMFR0_EL1.FA64 == 0b1. + 4. Unused AT_HWCAP bits ----------------------- diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index a58e366f0b07..d08062bcb9c1 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -58,6 +58,7 @@ struct cpuinfo_arm64 { u64 reg_id_aa64pfr0; u64 reg_id_aa64pfr1; u64 reg_id_aa64zfr0; + u64 reg_id_aa64smfr0; struct cpuinfo_32bit aarch32; diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index c62e7e5e2f0c..8ac12e4094aa 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -759,6 +759,18 @@ static __always_inline bool system_supports_sve(void) cpus_have_const_cap(ARM64_SVE); } +static __always_inline bool system_supports_sme(void) +{ + return IS_ENABLED(CONFIG_ARM64_SME) && + cpus_have_const_cap(ARM64_SME); +} + +static __always_inline bool system_supports_fa64(void) +{ + return IS_ENABLED(CONFIG_ARM64_SME) && + cpus_have_const_cap(ARM64_SME_FA64); +} + static __always_inline bool system_supports_cnp(void) { return IS_ENABLED(CONFIG_ARM64_CNP) && diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 6e2dc9dcbf49..2e8ef00e7520 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -74,6 +74,8 @@ extern void sve_set_vq(unsigned long vq_minus_1); struct arm64_cpu_capabilities; extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused); +extern void sme_kernel_enable(const struct arm64_cpu_capabilities *__unused); +extern void fa64_kernel_enable(const struct arm64_cpu_capabilities *__unused); extern u64 read_zcr_features(void); diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 8db5ec0089db..9f0ce004fdbc 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -109,6 +109,14 @@ #define KERNEL_HWCAP_AFP __khwcap2_feature(AFP) #define KERNEL_HWCAP_RPRES __khwcap2_feature(RPRES) #define KERNEL_HWCAP_MTE3 __khwcap2_feature(MTE3) +#define KERNEL_HWCAP_SME __khwcap2_feature(SME) +#define KERNEL_HWCAP_SME_I16I64 __khwcap2_feature(SME_I16I64) +#define KERNEL_HWCAP_SME_F64F64 __khwcap2_feature(SME_F64F64) +#define KERNEL_HWCAP_SME_I8I32 __khwcap2_feature(SME_I8I32) +#define KERNEL_HWCAP_SME_F16F32 __khwcap2_feature(SME_F16F32) +#define KERNEL_HWCAP_SME_B16F32 __khwcap2_feature(SME_B16F32) +#define KERNEL_HWCAP_SME_F32F32 __khwcap2_feature(SME_F32F32) +#define KERNEL_HWCAP_SME_FA64 __khwcap2_feature(SME_FA64) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 99cb5d383048..b0256cec63b5 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -79,5 +79,13 @@ #define HWCAP2_AFP (1 << 20) #define HWCAP2_RPRES (1 << 21) #define HWCAP2_MTE3 (1 << 22) +#define HWCAP2_SME (1 << 23) +#define HWCAP2_SME_I16I64 (1 << 24) +#define HWCAP2_SME_F64F64 (1 << 25) +#define HWCAP2_SME_I8I32 (1 << 26) +#define HWCAP2_SME_F16F32 (1 << 27) +#define HWCAP2_SME_B16F32 (1 << 28) +#define HWCAP2_SME_F32F32 (1 << 29) +#define HWCAP2_SME_FA64 (1 << 30) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index d72c4b4d389c..0f2d7ddd69ae 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -261,6 +261,8 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SME_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MPAMFRAC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_RASFRAC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_MTE), @@ -293,6 +295,24 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = { + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_FA64_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_I16I64_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F64F64_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_I8I32_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F16F32_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_B16F32_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F32F32_SHIFT, 1, 0), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ECV_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_FGT_SHIFT, 4, 0), @@ -645,6 +665,7 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1, &id_aa64pfr1_override), ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0), + ARM64_FTR_REG(SYS_ID_AA64SMFR0_EL1, ftr_id_aa64smfr0), /* Op1 = 0, CRn = 0, CRm = 5 */ ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0), @@ -960,6 +981,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0); init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1); init_cpu_ftr_reg(SYS_ID_AA64ZFR0_EL1, info->reg_id_aa64zfr0); + init_cpu_ftr_reg(SYS_ID_AA64SMFR0_EL1, info->reg_id_aa64smfr0); if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) init_32bit_cpu_features(&info->aarch32); @@ -2442,6 +2464,33 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .min_field_value = 1, }, +#ifdef CONFIG_ARM64_SME + { + .desc = "Scalable Matrix Extension", + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .capability = ARM64_SME, + .sys_reg = SYS_ID_AA64PFR1_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64PFR1_SME_SHIFT, + .field_width = 4, + .min_field_value = ID_AA64PFR1_SME, + .matches = has_cpuid_feature, + .cpu_enable = sme_kernel_enable, + }, + /* FA64 should be sorted after the base SME capability */ + { + .desc = "FA64", + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .capability = ARM64_SME_FA64, + .sys_reg = SYS_ID_AA64SMFR0_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64SMFR0_FA64_SHIFT, + .field_width = 1, + .min_field_value = ID_AA64SMFR0_FA64, + .matches = has_cpuid_feature, + .cpu_enable = fa64_kernel_enable, + }, +#endif /* CONFIG_ARM64_SME */ {}, }; @@ -2575,6 +2624,16 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV), HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP), HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_RPRES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES), +#ifdef CONFIG_ARM64_SME + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SME_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_SME, CAP_HWCAP, KERNEL_HWCAP_SME), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_FA64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_FA64, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_I16I64_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_I16I64, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F64F64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F64F64, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_I8I32_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_I8I32, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F16F32, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_B16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_B16F32, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F32F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F32F32, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32), +#endif /* CONFIG_ARM64_SME */ {}, }; diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 330b92ea863a..a73fe2888b7e 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -98,6 +98,14 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_AFP] = "afp", [KERNEL_HWCAP_RPRES] = "rpres", [KERNEL_HWCAP_MTE3] = "mte3", + [KERNEL_HWCAP_SME] = "sme", + [KERNEL_HWCAP_SME_I16I64] = "smei16i64", + [KERNEL_HWCAP_SME_F64F64] = "smef64f64", + [KERNEL_HWCAP_SME_I8I32] = "smei8i32", + [KERNEL_HWCAP_SME_F16F32] = "smef16f32", + [KERNEL_HWCAP_SME_B16F32] = "smeb16f32", + [KERNEL_HWCAP_SME_F32F32] = "smef32f32", + [KERNEL_HWCAP_SME_FA64] = "smefa64", }; #ifdef CONFIG_COMPAT @@ -401,6 +409,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1); info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1); info->reg_id_aa64zfr0 = read_cpuid(ID_AA64ZFR0_EL1); + info->reg_id_aa64smfr0 = read_cpuid(ID_AA64SMFR0_EL1); if (id_aa64pfr1_mte(info->reg_id_aa64pfr1)) info->reg_gmid = read_cpuid(GMID_EL1); diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 47af76e53221..e4fba0bfb55e 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -993,6 +993,32 @@ void fpsimd_release_task(struct task_struct *dead_task) #endif /* CONFIG_ARM64_SVE */ +#ifdef CONFIG_ARM64_SME + +void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) +{ + /* Set priority for all PEs to architecturally defined minimum */ + write_sysreg_s(read_sysreg_s(SYS_SMPRI_EL1) & ~SMPRI_EL1_PRIORITY_MASK, + SYS_SMPRI_EL1); + + /* Allow SME in kernel */ + write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_SMEN_EL1EN, CPACR_EL1); + isb(); +} + +/* + * This must be called after sme_kernel_enable(), we rely on the + * feature table being sorted to ensure this. + */ +void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) +{ + /* Allow use of FA64 */ + write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_FA64_MASK, + SYS_SMCR_EL1); +} + +#endif /* CONFIG_ARM64_SVE */ + /* * Trapped SVE access * @@ -1538,6 +1564,10 @@ static int __init fpsimd_init(void) if (!cpu_have_named_feature(ASIMD)) pr_notice("Advanced SIMD is not implemented\n"); + + if (cpu_have_named_feature(SME) && !cpu_have_named_feature(SVE)) + pr_notice("SME is implemented but not SVE\n"); + return sve_sysctl_init(); } core_initcall(fpsimd_init); diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 3ed418f70e3b..e52b289a27c2 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -43,6 +43,8 @@ KVM_PROTECTED_MODE MISMATCHED_CACHE_TYPE MTE MTE_ASYMM +SME +SME_FA64 SPECTRE_V2 SPECTRE_V3A SPECTRE_V4 -- cgit v1.2.3 From b42990d3bf77cc29d7c33e21518c1f806dae6b21 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:17 +0100 Subject: arm64/sme: Identify supported SME vector lengths at boot The vector lengths used for SME are controlled through a similar set of registers to those for SVE and enumerated using a similar algorithm with some slight differences due to the fact that unlike SVE there are no restrictions on which combinations of vector lengths can be supported nor any mandatory vector lengths which must be implemented. Add a new vector type and implement support for enumerating it. One slightly awkward feature is that we need to read the current vector length using a different instruction (or enter streaming mode which would have the same issue and be higher cost). Rather than add an ops structure we add special cases directly in the otherwise generic vec_probe_vqs() function, this is a bit inelegant but it's the only place where this is an issue. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-10-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpu.h | 3 + arch/arm64/include/asm/cpufeature.h | 7 ++ arch/arm64/include/asm/fpsimd.h | 26 ++++++++ arch/arm64/include/asm/processor.h | 1 + arch/arm64/kernel/cpufeature.c | 47 ++++++++++++++ arch/arm64/kernel/cpuinfo.c | 4 ++ arch/arm64/kernel/entry-fpsimd.S | 9 +++ arch/arm64/kernel/fpsimd.c | 123 +++++++++++++++++++++++++++++++++++- 8 files changed, 218 insertions(+), 2 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index d08062bcb9c1..115cdec1ae87 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -64,6 +64,9 @@ struct cpuinfo_arm64 { /* pseudo-ZCR for recording maximum ZCR_EL1 LEN value: */ u64 reg_zcr; + + /* pseudo-SMCR for recording maximum SMCR_EL1 LEN value: */ + u64 reg_smcr; }; DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data); diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 8ac12e4094aa..5ddfae233ea5 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -622,6 +622,13 @@ static inline bool id_aa64pfr0_sve(u64 pfr0) return val > 0; } +static inline bool id_aa64pfr1_sme(u64 pfr1) +{ + u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_SME_SHIFT); + + return val > 0; +} + static inline bool id_aa64pfr1_mte(u64 pfr1) { u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_MTE_SHIFT); diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 2e8ef00e7520..32cd682258d9 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -78,6 +78,7 @@ extern void sme_kernel_enable(const struct arm64_cpu_capabilities *__unused); extern void fa64_kernel_enable(const struct arm64_cpu_capabilities *__unused); extern u64 read_zcr_features(void); +extern u64 read_smcr_features(void); /* * Helpers to translate bit indices in sve_vq_map to VQ values (and @@ -172,6 +173,12 @@ static inline void write_vl(enum vec_type type, u64 val) tmp = read_sysreg_s(SYS_ZCR_EL1) & ~ZCR_ELx_LEN_MASK; write_sysreg_s(tmp | val, SYS_ZCR_EL1); break; +#endif +#ifdef CONFIG_ARM64_SME + case ARM64_VEC_SME: + tmp = read_sysreg_s(SYS_SMCR_EL1) & ~SMCR_ELx_LEN_MASK; + write_sysreg_s(tmp | val, SYS_SMCR_EL1); + break; #endif default: WARN_ON_ONCE(1); @@ -268,12 +275,31 @@ static inline void sme_smstop(void) asm volatile(__msr_s(SYS_SVCR_SMSTOP_SMZA_EL0, "xzr")); } +extern void __init sme_setup(void); + +static inline int sme_max_vl(void) +{ + return vec_max_vl(ARM64_VEC_SME); +} + +static inline int sme_max_virtualisable_vl(void) +{ + return vec_max_virtualisable_vl(ARM64_VEC_SME); +} + +extern unsigned int sme_get_vl(void); + #else static inline void sme_smstart_sm(void) { } static inline void sme_smstop_sm(void) { } static inline void sme_smstop(void) { } +static inline void sme_setup(void) { } +static inline unsigned int sme_get_vl(void) { return 0; } +static inline int sme_max_vl(void) { return 0; } +static inline int sme_max_virtualisable_vl(void) { return 0; } + #endif /* ! CONFIG_ARM64_SME */ /* For use by EFI runtime services calls only */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 73e38d9a540c..abf34a9c2eab 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -118,6 +118,7 @@ struct debug_info { enum vec_type { ARM64_VEC_SVE = 0, + ARM64_VEC_SME, ARM64_VEC_MAX, }; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 0f2d7ddd69ae..082b3f48cbfd 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -581,6 +581,12 @@ static const struct arm64_ftr_bits ftr_zcr[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_smcr[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, + SMCR_ELx_LEN_SHIFT, SMCR_ELx_LEN_SIZE, 0), /* LEN */ + ARM64_FTR_END, +}; + /* * Common ftr bits for a 32bit register with all hidden, strict * attributes, with 4bit feature fields and a default safe value of @@ -687,6 +693,7 @@ static const struct __ftr_reg_entry { /* Op1 = 0, CRn = 1, CRm = 2 */ ARM64_FTR_REG(SYS_ZCR_EL1, ftr_zcr), + ARM64_FTR_REG(SYS_SMCR_EL1, ftr_smcr), /* Op1 = 1, CRn = 0, CRm = 0 */ ARM64_FTR_REG(SYS_GMID_EL1, ftr_gmid), @@ -991,6 +998,12 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) vec_init_vq_map(ARM64_VEC_SVE); } + if (id_aa64pfr1_sme(info->reg_id_aa64pfr1)) { + init_cpu_ftr_reg(SYS_SMCR_EL1, info->reg_smcr); + if (IS_ENABLED(CONFIG_ARM64_SME)) + vec_init_vq_map(ARM64_VEC_SME); + } + if (id_aa64pfr1_mte(info->reg_id_aa64pfr1)) init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid); @@ -1217,6 +1230,9 @@ void update_cpu_features(int cpu, taint |= check_update_ftr_reg(SYS_ID_AA64ZFR0_EL1, cpu, info->reg_id_aa64zfr0, boot->reg_id_aa64zfr0); + taint |= check_update_ftr_reg(SYS_ID_AA64SMFR0_EL1, cpu, + info->reg_id_aa64smfr0, boot->reg_id_aa64smfr0); + if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) { taint |= check_update_ftr_reg(SYS_ZCR_EL1, cpu, info->reg_zcr, boot->reg_zcr); @@ -1227,6 +1243,16 @@ void update_cpu_features(int cpu, vec_update_vq_map(ARM64_VEC_SVE); } + if (id_aa64pfr1_sme(info->reg_id_aa64pfr1)) { + taint |= check_update_ftr_reg(SYS_SMCR_EL1, cpu, + info->reg_smcr, boot->reg_smcr); + + /* Probe vector lengths, unless we already gave up on SME */ + if (id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1)) && + !system_capabilities_finalized()) + vec_update_vq_map(ARM64_VEC_SME); + } + /* * The kernel uses the LDGM/STGM instructions and the number of tags * they read/write depends on the GMID_EL1.BS field. Check that the @@ -2931,6 +2957,23 @@ static void verify_sve_features(void) /* Add checks on other ZCR bits here if necessary */ } +static void verify_sme_features(void) +{ + u64 safe_smcr = read_sanitised_ftr_reg(SYS_SMCR_EL1); + u64 smcr = read_smcr_features(); + + unsigned int safe_len = safe_smcr & SMCR_ELx_LEN_MASK; + unsigned int len = smcr & SMCR_ELx_LEN_MASK; + + if (len < safe_len || vec_verify_vq_map(ARM64_VEC_SME)) { + pr_crit("CPU%d: SME: vector length support mismatch\n", + smp_processor_id()); + cpu_die_early(); + } + + /* Add checks on other SMCR bits here if necessary */ +} + static void verify_hyp_capabilities(void) { u64 safe_mmfr1, mmfr0, mmfr1; @@ -2983,6 +3026,9 @@ static void verify_local_cpu_capabilities(void) if (system_supports_sve()) verify_sve_features(); + if (system_supports_sme()) + verify_sme_features(); + if (is_hyp_mode_available()) verify_hyp_capabilities(); } @@ -3100,6 +3146,7 @@ void __init setup_cpu_features(void) pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n"); sve_setup(); + sme_setup(); minsigstksz_setup(); /* Advertise that we have computed the system capabilities */ diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index a73fe2888b7e..8a8136a096ac 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -421,6 +421,10 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) id_aa64pfr0_sve(info->reg_id_aa64pfr0)) info->reg_zcr = read_zcr_features(); + if (IS_ENABLED(CONFIG_ARM64_SME) && + id_aa64pfr1_sme(info->reg_id_aa64pfr1)) + info->reg_smcr = read_smcr_features(); + cpuinfo_detect_icache_policy(info); } diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index dc242e269f9a..deee5f01462e 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -86,3 +86,12 @@ SYM_FUNC_START(sve_flush_live) SYM_FUNC_END(sve_flush_live) #endif /* CONFIG_ARM64_SVE */ + +#ifdef CONFIG_ARM64_SME + +SYM_FUNC_START(sme_get_vl) + _sme_rdsvl 0, 1 + ret +SYM_FUNC_END(sme_get_vl) + +#endif /* CONFIG_ARM64_SME */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index e4fba0bfb55e..5e5fbd9cba75 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -136,6 +136,12 @@ __ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = { .max_virtualisable_vl = SVE_VL_MIN, }, #endif +#ifdef CONFIG_ARM64_SME + [ARM64_VEC_SME] = { + .type = ARM64_VEC_SME, + .name = "SME", + }, +#endif }; static unsigned int vec_vl_inherit_flag(enum vec_type type) @@ -186,6 +192,20 @@ extern void __percpu *efi_sve_state; #endif /* ! CONFIG_ARM64_SVE */ +#ifdef CONFIG_ARM64_SME + +static int get_sme_default_vl(void) +{ + return get_default_vl(ARM64_VEC_SME); +} + +static void set_sme_default_vl(int val) +{ + set_default_vl(ARM64_VEC_SME, val); +} + +#endif + DEFINE_PER_CPU(bool, fpsimd_context_busy); EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy); @@ -409,6 +429,8 @@ static unsigned int find_supported_vector_length(enum vec_type type, if (vl > max_vl) vl = max_vl; + if (vl < info->min_vl) + vl = info->min_vl; bit = find_next_bit(info->vq_map, SVE_VQ_MAX, __vq_to_bit(sve_vq_from_vl(vl))); @@ -770,7 +792,23 @@ static void vec_probe_vqs(struct vl_info *info, for (vq = SVE_VQ_MAX; vq >= SVE_VQ_MIN; --vq) { write_vl(info->type, vq - 1); /* self-syncing */ - vl = sve_get_vl(); + + switch (info->type) { + case ARM64_VEC_SVE: + vl = sve_get_vl(); + break; + case ARM64_VEC_SME: + vl = sme_get_vl(); + break; + default: + vl = 0; + break; + } + + /* Minimum VL identified? */ + if (sve_vq_from_vl(vl) > vq) + break; + vq = sve_vq_from_vl(vl); /* skip intervening lengths */ set_bit(__vq_to_bit(vq), map); } @@ -1017,7 +1055,88 @@ void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) SYS_SMCR_EL1); } -#endif /* CONFIG_ARM64_SVE */ +/* + * Read the pseudo-SMCR used by cpufeatures to identify the supported + * vector length. + * + * Use only if SME is present. + * This function clobbers the SME vector length. + */ +u64 read_smcr_features(void) +{ + u64 smcr; + unsigned int vq_max; + + sme_kernel_enable(NULL); + sme_smstart_sm(); + + /* + * Set the maximum possible VL. + */ + write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_LEN_MASK, + SYS_SMCR_EL1); + + smcr = read_sysreg_s(SYS_SMCR_EL1); + smcr &= ~(u64)SMCR_ELx_LEN_MASK; /* Only the LEN field */ + vq_max = sve_vq_from_vl(sve_get_vl()); + smcr |= vq_max - 1; /* set LEN field to maximum effective value */ + + sme_smstop_sm(); + + return smcr; +} + +void __init sme_setup(void) +{ + struct vl_info *info = &vl_info[ARM64_VEC_SME]; + u64 smcr; + int min_bit; + + if (!system_supports_sme()) + return; + + /* + * SME doesn't require any particular vector length be + * supported but it does require at least one. We should have + * disabled the feature entirely while bringing up CPUs but + * let's double check here. + */ + WARN_ON(bitmap_empty(info->vq_map, SVE_VQ_MAX)); + + min_bit = find_last_bit(info->vq_map, SVE_VQ_MAX); + info->min_vl = sve_vl_from_vq(__bit_to_vq(min_bit)); + + smcr = read_sanitised_ftr_reg(SYS_SMCR_EL1); + info->max_vl = sve_vl_from_vq((smcr & SMCR_ELx_LEN_MASK) + 1); + + /* + * Sanity-check that the max VL we determined through CPU features + * corresponds properly to sme_vq_map. If not, do our best: + */ + if (WARN_ON(info->max_vl != find_supported_vector_length(ARM64_VEC_SME, + info->max_vl))) + info->max_vl = find_supported_vector_length(ARM64_VEC_SME, + info->max_vl); + + WARN_ON(info->min_vl > info->max_vl); + + /* + * For the default VL, pick the maximum supported value <= 32 + * (256 bits) if there is one since this is guaranteed not to + * grow the signal frame when in streaming mode, otherwise the + * minimum available VL will be used. + */ + set_sme_default_vl(find_supported_vector_length(ARM64_VEC_SME, 32)); + + pr_info("SME: minimum available vector length %u bytes per vector\n", + info->min_vl); + pr_info("SME: maximum available vector length %u bytes per vector\n", + info->max_vl); + pr_info("SME: default vector length %u bytes per vector\n", + get_sme_default_vl()); +} + +#endif /* CONFIG_ARM64_SME */ /* * Trapped SVE access -- cgit v1.2.3 From 9e4ab6c89109472082616f8d2f6ada7deaffe161 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:19 +0100 Subject: arm64/sme: Implement vector length configuration prctl()s As for SVE provide a prctl() interface which allows processes to configure their SME vector length. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-12-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fpsimd.h | 4 ++++ arch/arm64/include/asm/processor.h | 4 +++- arch/arm64/include/asm/thread_info.h | 1 + arch/arm64/kernel/fpsimd.c | 32 ++++++++++++++++++++++++++++++++ include/uapi/linux/prctl.h | 9 +++++++++ kernel/sys.c | 12 ++++++++++++ 6 files changed, 61 insertions(+), 1 deletion(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 32cd682258d9..38fd6aab7feb 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -288,6 +288,8 @@ static inline int sme_max_virtualisable_vl(void) } extern unsigned int sme_get_vl(void); +extern int sme_set_current_vl(unsigned long arg); +extern int sme_get_current_vl(void); #else @@ -299,6 +301,8 @@ static inline void sme_setup(void) { } static inline unsigned int sme_get_vl(void) { return 0; } static inline int sme_max_vl(void) { return 0; } static inline int sme_max_virtualisable_vl(void) { return 0; } +static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; } +static inline int sme_get_current_vl(void) { return -EINVAL; } #endif /* ! CONFIG_ARM64_SME */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index abf34a9c2eab..7a57cbff8a03 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -355,9 +355,11 @@ extern void __init minsigstksz_setup(void); */ #include -/* Userspace interface for PR_SVE_{SET,GET}_VL prctl()s: */ +/* Userspace interface for PR_S[MV]E_{SET,GET}_VL prctl()s: */ #define SVE_SET_VL(arg) sve_set_current_vl(arg) #define SVE_GET_VL() sve_get_current_vl() +#define SME_SET_VL(arg) sme_set_current_vl(arg) +#define SME_GET_VL() sme_get_current_vl() /* PR_PAC_RESET_KEYS prctl */ #define PAC_RESET_KEYS(tsk, arg) ptrauth_prctl_reset_keys(tsk, arg) diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index e1317b7c4525..4e6b58dcd6f9 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -82,6 +82,7 @@ int arch_dup_task_struct(struct task_struct *dst, #define TIF_SVE_VL_INHERIT 24 /* Inherit SVE vl_onexec across exec */ #define TIF_SSBD 25 /* Wants SSB mitigation */ #define TIF_TAGGED_ADDR 26 /* Allow tagged user addresses */ +#define TIF_SME_VL_INHERIT 28 /* Inherit SME vl_onexec across exec */ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 754a96563f6f..39f44fcb9b99 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -149,6 +149,8 @@ static unsigned int vec_vl_inherit_flag(enum vec_type type) switch (type) { case ARM64_VEC_SVE: return TIF_SVE_VL_INHERIT; + case ARM64_VEC_SME: + return TIF_SME_VL_INHERIT; default: WARN_ON_ONCE(1); return 0; @@ -807,6 +809,36 @@ int sve_get_current_vl(void) return vec_prctl_status(ARM64_VEC_SVE, 0); } +#ifdef CONFIG_ARM64_SME +/* PR_SME_SET_VL */ +int sme_set_current_vl(unsigned long arg) +{ + unsigned long vl, flags; + int ret; + + vl = arg & PR_SME_VL_LEN_MASK; + flags = arg & ~vl; + + if (!system_supports_sme() || is_compat_task()) + return -EINVAL; + + ret = vec_set_vector_length(current, ARM64_VEC_SME, vl, flags); + if (ret) + return ret; + + return vec_prctl_status(ARM64_VEC_SME, flags); +} + +/* PR_SME_GET_VL */ +int sme_get_current_vl(void) +{ + if (!system_supports_sme() || is_compat_task()) + return -EINVAL; + + return vec_prctl_status(ARM64_VEC_SME, 0); +} +#endif /* CONFIG_ARM64_SME */ + static void vec_probe_vqs(struct vl_info *info, DECLARE_BITMAP(map, SVE_VQ_MAX)) { diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index e998764f0262..a5e06dcbba13 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -272,6 +272,15 @@ struct prctl_mm_map { # define PR_SCHED_CORE_SCOPE_THREAD_GROUP 1 # define PR_SCHED_CORE_SCOPE_PROCESS_GROUP 2 +/* arm64 Scalable Matrix Extension controls */ +/* Flag values must be in sync with SVE versions */ +#define PR_SME_SET_VL 63 /* set task vector length */ +# define PR_SME_SET_VL_ONEXEC (1 << 18) /* defer effect until exec */ +#define PR_SME_GET_VL 64 /* get task vector length */ +/* Bits common to PR_SME_SET_VL and PR_SME_GET_VL */ +# define PR_SME_VL_LEN_MASK 0xffff +# define PR_SME_VL_INHERIT (1 << 17) /* inherit across exec */ + #define PR_SET_VMA 0x53564d41 # define PR_SET_VMA_ANON_NAME 0 diff --git a/kernel/sys.c b/kernel/sys.c index 374f83e95239..b911fa6d81ab 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -117,6 +117,12 @@ #ifndef SVE_GET_VL # define SVE_GET_VL() (-EINVAL) #endif +#ifndef SME_SET_VL +# define SME_SET_VL(a) (-EINVAL) +#endif +#ifndef SME_GET_VL +# define SME_GET_VL() (-EINVAL) +#endif #ifndef PAC_RESET_KEYS # define PAC_RESET_KEYS(a, b) (-EINVAL) #endif @@ -2541,6 +2547,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_SVE_GET_VL: error = SVE_GET_VL(); break; + case PR_SME_SET_VL: + error = SME_SET_VL(arg2); + break; + case PR_SME_GET_VL: + error = SME_GET_VL(); + break; case PR_GET_SPECULATION_CTRL: if (arg3 || arg4 || arg5) return -EINVAL; -- cgit v1.2.3 From a9d69158595017d260ab37bf88b8f125e5e8144c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:20 +0100 Subject: arm64/sme: Implement support for TPIDR2 The Scalable Matrix Extension introduces support for a new thread specific data register TPIDR2 intended for use by libc. The kernel must save the value of TPIDR2 on context switch and should ensure that all new threads start off with a default value of 0. Add a field to the thread_struct to store TPIDR2 and context switch it with the other thread specific data. In case there are future extensions which also use TPIDR2 we introduce system_supports_tpidr2() and use that rather than system_supports_sme() for TPIDR2 handling. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-13-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpufeature.h | 5 +++++ arch/arm64/include/asm/processor.h | 1 + arch/arm64/kernel/fpsimd.c | 4 ++++ arch/arm64/kernel/process.c | 14 ++++++++++++-- 4 files changed, 22 insertions(+), 2 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 5ddfae233ea5..14a8f3d93add 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -778,6 +778,11 @@ static __always_inline bool system_supports_fa64(void) cpus_have_const_cap(ARM64_SME_FA64); } +static __always_inline bool system_supports_tpidr2(void) +{ + return system_supports_sme(); +} + static __always_inline bool system_supports_cnp(void) { return IS_ENABLED(CONFIG_ARM64_CNP) && diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 7a57cbff8a03..849e97d418a8 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -169,6 +169,7 @@ struct thread_struct { u64 mte_ctrl; #endif u64 sctlr_user; + u64 tpidr2_el0; }; static inline unsigned int thread_get_vl(struct thread_struct *thread, diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 39f44fcb9b99..231f2d85b65e 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1098,6 +1098,10 @@ void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) /* Allow SME in kernel */ write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_SMEN_EL1EN, CPACR_EL1); isb(); + + /* Allow EL0 to access TPIDR2 */ + write_sysreg(read_sysreg(SCTLR_EL1) | SCTLR_ELx_ENTP2, SCTLR_EL1); + isb(); } /* diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 7fa97df55e3a..e20571f19718 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -250,6 +250,8 @@ void show_regs(struct pt_regs *regs) static void tls_thread_flush(void) { write_sysreg(0, tpidr_el0); + if (system_supports_tpidr2()) + write_sysreg_s(0, SYS_TPIDR2_EL0); if (is_compat_task()) { current->thread.uw.tp_value = 0; @@ -343,6 +345,8 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, * out-of-sync with the saved value. */ *task_user_tls(p) = read_sysreg(tpidr_el0); + if (system_supports_tpidr2()) + p->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0); if (stack_start) { if (is_compat_thread(task_thread_info(p))) @@ -353,10 +357,12 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, /* * If a TLS pointer was passed to clone, use it for the new - * thread. + * thread. We also reset TPIDR2 if it's in use. */ - if (clone_flags & CLONE_SETTLS) + if (clone_flags & CLONE_SETTLS) { p->thread.uw.tp_value = tls; + p->thread.tpidr2_el0 = 0; + } } else { /* * A kthread has no context to ERET to, so ensure any buggy @@ -387,6 +393,8 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, void tls_preserve_current_state(void) { *task_user_tls(current) = read_sysreg(tpidr_el0); + if (system_supports_tpidr2() && !is_compat_task()) + current->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0); } static void tls_thread_switch(struct task_struct *next) @@ -399,6 +407,8 @@ static void tls_thread_switch(struct task_struct *next) write_sysreg(0, tpidrro_el0); write_sysreg(*task_user_tls(next), tpidr_el0); + if (system_supports_tpidr2()) + write_sysreg_s(next->thread.tpidr2_el0, SYS_TPIDR2_EL0); } /* -- cgit v1.2.3 From b40c559b45bec736f588c57dd5be967fe573058b Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:21 +0100 Subject: arm64/sme: Implement SVCR context switching In SME the use of both streaming SVE mode and ZA are tracked through PSTATE.SM and PSTATE.ZA, visible through the system register SVCR. In order to context switch the floating point state for SME we need to context switch the contents of this register as part of context switching the floating point state. Since changing the vector length exits streaming SVE mode and disables ZA we also make sure we update SVCR appropriately when setting vector length, and similarly ensure that new threads have streaming SVE mode and ZA disabled. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-14-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fpsimd.h | 3 ++- arch/arm64/include/asm/processor.h | 1 + arch/arm64/include/asm/thread_info.h | 1 + arch/arm64/kernel/fpsimd.c | 18 +++++++++++++++++- arch/arm64/kernel/process.c | 2 ++ arch/arm64/kvm/fpsimd.c | 7 ++++++- 6 files changed, 29 insertions(+), 3 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 38fd6aab7feb..821d270980da 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -46,7 +46,8 @@ extern void fpsimd_restore_current_state(void); extern void fpsimd_update_current_state(struct user_fpsimd_state const *state); extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state, - void *sve_state, unsigned int sve_vl); + void *sve_state, unsigned int sve_vl, + u64 *svcr); extern void fpsimd_flush_task_state(struct task_struct *target); extern void fpsimd_save_and_flush_cpu_state(void); diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 849e97d418a8..22cd11e86854 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -169,6 +169,7 @@ struct thread_struct { u64 mte_ctrl; #endif u64 sctlr_user; + u64 svcr; u64 tpidr2_el0; }; diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 4e6b58dcd6f9..848739c15de8 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -82,6 +82,7 @@ int arch_dup_task_struct(struct task_struct *dst, #define TIF_SVE_VL_INHERIT 24 /* Inherit SVE vl_onexec across exec */ #define TIF_SSBD 25 /* Wants SSB mitigation */ #define TIF_TAGGED_ADDR 26 /* Allow tagged user addresses */ +#define TIF_SME 27 /* SME in use */ #define TIF_SME_VL_INHERIT 28 /* Inherit SME vl_onexec across exec */ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 231f2d85b65e..1c113349f6cc 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -121,6 +121,7 @@ struct fpsimd_last_state_struct { struct user_fpsimd_state *st; void *sve_state; + u64 *svcr; unsigned int sve_vl; }; @@ -359,6 +360,9 @@ static void task_fpsimd_load(void) WARN_ON(!system_supports_fpsimd()); WARN_ON(!have_cpu_fpsimd_context()); + if (IS_ENABLED(CONFIG_ARM64_SME) && test_thread_flag(TIF_SME)) + write_sysreg_s(current->thread.svcr, SYS_SVCR_EL0); + if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) { sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1); sve_load_state(sve_pffr(¤t->thread), @@ -390,6 +394,12 @@ static void fpsimd_save(void) if (test_thread_flag(TIF_FOREIGN_FPSTATE)) return; + if (IS_ENABLED(CONFIG_ARM64_SME) && + test_thread_flag(TIF_SME)) { + u64 *svcr = last->svcr; + *svcr = read_sysreg_s(SYS_SVCR_EL0); + } + if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) { if (WARN_ON(sve_get_vl() != last->sve_vl)) { @@ -741,6 +751,10 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type, if (test_and_clear_tsk_thread_flag(task, TIF_SVE)) sve_to_fpsimd(task); + if (system_supports_sme() && type == ARM64_VEC_SME) + task->thread.svcr &= ~(SYS_SVCR_EL0_SM_MASK | + SYS_SVCR_EL0_ZA_MASK); + if (task == current) put_cpu_fpsimd_context(); @@ -1404,6 +1418,7 @@ static void fpsimd_bind_task_to_cpu(void) last->st = ¤t->thread.uw.fpsimd_state; last->sve_state = current->thread.sve_state; last->sve_vl = task_get_sve_vl(current); + last->svcr = ¤t->thread.svcr; current->thread.fpsimd_cpu = smp_processor_id(); if (system_supports_sve()) { @@ -1418,7 +1433,7 @@ static void fpsimd_bind_task_to_cpu(void) } void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, - unsigned int sve_vl) + unsigned int sve_vl, u64 *svcr) { struct fpsimd_last_state_struct *last = this_cpu_ptr(&fpsimd_last_state); @@ -1427,6 +1442,7 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, WARN_ON(!in_softirq() && !irqs_disabled()); last->st = st; + last->svcr = svcr; last->sve_state = sve_state; last->sve_vl = sve_vl; } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index e20571f19718..07f235b46cf5 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -310,6 +310,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) dst->thread.sve_state = NULL; clear_tsk_thread_flag(dst, TIF_SVE); + dst->thread.svcr = 0; + /* clear any pending asynchronous tag fault raised by the parent */ clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT); diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 397fdac75cb1..ac09f1f682ff 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -109,9 +109,14 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) WARN_ON_ONCE(!irqs_disabled()); if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { + /* + * Currently we do not support SME guests so SVCR is + * always 0 and we just need a variable to point to. + */ fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.fp_regs, vcpu->arch.sve_state, - vcpu->arch.sve_max_vl); + vcpu->arch.sve_max_vl, + NULL); clear_thread_flag(TIF_FOREIGN_FPSTATE); update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu)); -- cgit v1.2.3 From af7167d6d2675f3343eff3ad6c9b4a8e30122e2c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:22 +0100 Subject: arm64/sme: Implement streaming SVE context switching When in streaming mode we need to save and restore the streaming mode SVE register state rather than the regular SVE register state. This uses the streaming mode vector length and omits FFR but is otherwise identical, if TIF_SVE is enabled when we are in streaming mode then streaming mode takes precedence. This does not handle use of streaming SVE state with KVM, ptrace or signals. This will be updated in further patches. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-15-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fpsimd.h | 22 ++++++- arch/arm64/include/asm/fpsimdmacros.h | 11 ++++ arch/arm64/include/asm/processor.h | 10 ++++ arch/arm64/kernel/entry-fpsimd.S | 5 ++ arch/arm64/kernel/fpsimd.c | 109 +++++++++++++++++++++++++++------- arch/arm64/kvm/fpsimd.c | 2 +- 6 files changed, 136 insertions(+), 23 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 821d270980da..cd94f5c5b516 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -47,11 +47,21 @@ extern void fpsimd_update_current_state(struct user_fpsimd_state const *state); extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state, void *sve_state, unsigned int sve_vl, - u64 *svcr); + unsigned int sme_vl, u64 *svcr); extern void fpsimd_flush_task_state(struct task_struct *target); extern void fpsimd_save_and_flush_cpu_state(void); +static inline bool thread_sm_enabled(struct thread_struct *thread) +{ + return system_supports_sme() && (thread->svcr & SYS_SVCR_EL0_SM_MASK); +} + +static inline bool thread_za_enabled(struct thread_struct *thread) +{ + return system_supports_sme() && (thread->svcr & SYS_SVCR_EL0_ZA_MASK); +} + /* Maximum VL that SVE/SME VL-agnostic software can transparently support */ #define VL_ARCH_MAX 0x100 @@ -63,7 +73,14 @@ static inline size_t sve_ffr_offset(int vl) static inline void *sve_pffr(struct thread_struct *thread) { - return (char *)thread->sve_state + sve_ffr_offset(thread_get_sve_vl(thread)); + unsigned int vl; + + if (system_supports_sme() && thread_sm_enabled(thread)) + vl = thread_get_sme_vl(thread); + else + vl = thread_get_sve_vl(thread); + + return (char *)thread->sve_state + sve_ffr_offset(vl); } extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr); @@ -72,6 +89,7 @@ extern void sve_load_state(void const *state, u32 const *pfpsr, extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1); extern unsigned int sve_get_vl(void); extern void sve_set_vq(unsigned long vq_minus_1); +extern void sme_set_vq(unsigned long vq_minus_1); struct arm64_cpu_capabilities; extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused); diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index 2e9a33155081..f6ab36e0cd8d 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -262,6 +262,17 @@ 921: .endm +/* Update SMCR_EL1.LEN with the new VQ */ +.macro sme_load_vq xvqminus1, xtmp, xtmp2 + mrs_s \xtmp, SYS_SMCR_EL1 + bic \xtmp2, \xtmp, SMCR_ELx_LEN_MASK + orr \xtmp2, \xtmp2, \xvqminus1 + cmp \xtmp2, \xtmp + b.eq 921f + msr_s SYS_SMCR_EL1, \xtmp2 //self-synchronising +921: +.endm + /* Preserve the first 128-bits of Znz and zero the rest. */ .macro _sve_flush_z nz _sve_check_zreg \nz diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 22cd11e86854..7542310b4e6b 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -184,6 +184,11 @@ static inline unsigned int thread_get_sve_vl(struct thread_struct *thread) return thread_get_vl(thread, ARM64_VEC_SVE); } +static inline unsigned int thread_get_sme_vl(struct thread_struct *thread) +{ + return thread_get_vl(thread, ARM64_VEC_SME); +} + unsigned int task_get_vl(const struct task_struct *task, enum vec_type type); void task_set_vl(struct task_struct *task, enum vec_type type, unsigned long vl); @@ -197,6 +202,11 @@ static inline unsigned int task_get_sve_vl(const struct task_struct *task) return task_get_vl(task, ARM64_VEC_SVE); } +static inline unsigned int task_get_sme_vl(const struct task_struct *task) +{ + return task_get_vl(task, ARM64_VEC_SME); +} + static inline void task_set_sve_vl(struct task_struct *task, unsigned long vl) { task_set_vl(task, ARM64_VEC_SVE, vl); diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index deee5f01462e..6f88c0f86d50 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -94,4 +94,9 @@ SYM_FUNC_START(sme_get_vl) ret SYM_FUNC_END(sme_get_vl) +SYM_FUNC_START(sme_set_vq) + sme_load_vq x0, x1, x2 + ret +SYM_FUNC_END(sme_set_vq) + #endif /* CONFIG_ARM64_SME */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 1c113349f6cc..f8506a875eb2 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -123,6 +123,7 @@ struct fpsimd_last_state_struct { void *sve_state; u64 *svcr; unsigned int sve_vl; + unsigned int sme_vl; }; static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state); @@ -301,17 +302,28 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type, task->thread.vl_onexec[type] = vl; } +/* + * TIF_SME controls whether a task can use SME without trapping while + * in userspace, when TIF_SME is set then we must have storage + * alocated in sve_state and za_state to store the contents of both ZA + * and the SVE registers for both streaming and non-streaming modes. + * + * If both SVCR.ZA and SVCR.SM are disabled then at any point we + * may disable TIF_SME and reenable traps. + */ + + /* * TIF_SVE controls whether a task can use SVE without trapping while - * in userspace, and also the way a task's FPSIMD/SVE state is stored - * in thread_struct. + * in userspace, and also (together with TIF_SME) the way a task's + * FPSIMD/SVE state is stored in thread_struct. * * The kernel uses this flag to track whether a user task is actively * using SVE, and therefore whether full SVE register state needs to * be tracked. If not, the cheaper FPSIMD context handling code can * be used instead of the more costly SVE equivalents. * - * * TIF_SVE set: + * * TIF_SVE or SVCR.SM set: * * The task can execute SVE instructions while in userspace without * trapping to the kernel. @@ -319,7 +331,8 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type, * When stored, Z0-Z31 (incorporating Vn in bits[127:0] or the * corresponding Zn), P0-P15 and FFR are encoded in in * task->thread.sve_state, formatted appropriately for vector - * length task->thread.sve_vl. + * length task->thread.sve_vl or, if SVCR.SM is set, + * task->thread.sme_vl. * * task->thread.sve_state must point to a valid buffer at least * sve_state_size(task) bytes in size. @@ -357,19 +370,40 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type, */ static void task_fpsimd_load(void) { + bool restore_sve_regs = false; + bool restore_ffr; + WARN_ON(!system_supports_fpsimd()); WARN_ON(!have_cpu_fpsimd_context()); - if (IS_ENABLED(CONFIG_ARM64_SME) && test_thread_flag(TIF_SME)) - write_sysreg_s(current->thread.svcr, SYS_SVCR_EL0); - + /* Check if we should restore SVE first */ if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) { sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1); + restore_sve_regs = true; + restore_ffr = true; + } + + /* Restore SME, override SVE register configuration if needed */ + if (system_supports_sme()) { + unsigned long sme_vl = task_get_sme_vl(current); + + if (test_thread_flag(TIF_SME)) + sme_set_vq(sve_vq_from_vl(sme_vl) - 1); + + write_sysreg_s(current->thread.svcr, SYS_SVCR_EL0); + + if (thread_sm_enabled(¤t->thread)) { + restore_sve_regs = true; + restore_ffr = system_supports_fa64(); + } + } + + if (restore_sve_regs) sve_load_state(sve_pffr(¤t->thread), - ¤t->thread.uw.fpsimd_state.fpsr, true); - } else { + ¤t->thread.uw.fpsimd_state.fpsr, + restore_ffr); + else fpsimd_load_state(¤t->thread.uw.fpsimd_state); - } } /* @@ -387,6 +421,9 @@ static void fpsimd_save(void) struct fpsimd_last_state_struct const *last = this_cpu_ptr(&fpsimd_last_state); /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */ + bool save_sve_regs = false; + bool save_ffr; + unsigned int vl; WARN_ON(!system_supports_fpsimd()); WARN_ON(!have_cpu_fpsimd_context()); @@ -394,15 +431,33 @@ static void fpsimd_save(void) if (test_thread_flag(TIF_FOREIGN_FPSTATE)) return; - if (IS_ENABLED(CONFIG_ARM64_SME) && - test_thread_flag(TIF_SME)) { + if (test_thread_flag(TIF_SVE)) { + save_sve_regs = true; + save_ffr = true; + vl = last->sve_vl; + } + + if (system_supports_sme()) { u64 *svcr = last->svcr; *svcr = read_sysreg_s(SYS_SVCR_EL0); + + if (thread_za_enabled(¤t->thread)) { + /* ZA state managment is not implemented yet */ + force_signal_inject(SIGKILL, SI_KERNEL, 0, 0); + return; + } + + /* If we are in streaming mode override regular SVE. */ + if (*svcr & SYS_SVCR_EL0_SM_MASK) { + save_sve_regs = true; + save_ffr = system_supports_fa64(); + vl = last->sme_vl; + } } - if (IS_ENABLED(CONFIG_ARM64_SVE) && - test_thread_flag(TIF_SVE)) { - if (WARN_ON(sve_get_vl() != last->sve_vl)) { + if (IS_ENABLED(CONFIG_ARM64_SVE) && save_sve_regs) { + /* Get the configured VL from RDVL, will account for SM */ + if (WARN_ON(sve_get_vl() != vl)) { /* * Can't save the user regs, so current would * re-enter user with corrupt state. @@ -413,8 +468,8 @@ static void fpsimd_save(void) } sve_save_state((char *)last->sve_state + - sve_ffr_offset(last->sve_vl), - &last->st->fpsr, true); + sve_ffr_offset(vl), + &last->st->fpsr, save_ffr); } else { fpsimd_save_state(last->st); } @@ -619,7 +674,14 @@ static void sve_to_fpsimd(struct task_struct *task) */ static size_t sve_state_size(struct task_struct const *task) { - return SVE_SIG_REGS_SIZE(sve_vq_from_vl(task_get_sve_vl(task))); + unsigned int vl = 0; + + if (system_supports_sve()) + vl = task_get_sve_vl(task); + if (system_supports_sme()) + vl = max(vl, task_get_sme_vl(task)); + + return SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl)); } /* @@ -748,7 +810,8 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type, } fpsimd_flush_task_state(task); - if (test_and_clear_tsk_thread_flag(task, TIF_SVE)) + if (test_and_clear_tsk_thread_flag(task, TIF_SVE) || + thread_sm_enabled(&task->thread)) sve_to_fpsimd(task); if (system_supports_sme() && type == ARM64_VEC_SME) @@ -1375,6 +1438,9 @@ void fpsimd_flush_thread(void) fpsimd_flush_thread_vl(ARM64_VEC_SVE); } + if (system_supports_sme()) + fpsimd_flush_thread_vl(ARM64_VEC_SME); + put_cpu_fpsimd_context(); } @@ -1418,6 +1484,7 @@ static void fpsimd_bind_task_to_cpu(void) last->st = ¤t->thread.uw.fpsimd_state; last->sve_state = current->thread.sve_state; last->sve_vl = task_get_sve_vl(current); + last->sme_vl = task_get_sme_vl(current); last->svcr = ¤t->thread.svcr; current->thread.fpsimd_cpu = smp_processor_id(); @@ -1433,7 +1500,8 @@ static void fpsimd_bind_task_to_cpu(void) } void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, - unsigned int sve_vl, u64 *svcr) + unsigned int sve_vl, unsigned int sme_vl, + u64 *svcr) { struct fpsimd_last_state_struct *last = this_cpu_ptr(&fpsimd_last_state); @@ -1445,6 +1513,7 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, last->svcr = svcr; last->sve_state = sve_state; last->sve_vl = sve_vl; + last->sme_vl = sme_vl; } /* diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index ac09f1f682ff..394e583bb73e 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -116,7 +116,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.fp_regs, vcpu->arch.sve_state, vcpu->arch.sve_max_vl, - NULL); + 0, NULL); clear_thread_flag(TIF_FOREIGN_FPSTATE); update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu)); -- cgit v1.2.3 From 0033cd9339642f9b7bef23f96aa2e7277ab51cce Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:23 +0100 Subject: arm64/sme: Implement ZA context switching Allocate space for storing ZA on first access to SME and use that to save and restore ZA state when context switching. We do this by using the vector form of the LDR and STR ZA instructions, these do not require streaming mode and have implementation recommendations that they avoid contention issues in shared SMCU implementations. Since ZA is architecturally guaranteed to be zeroed when enabled we do not need to explicitly zero ZA, either we will be restoring from a saved copy or trapping on first use of SME so we know that ZA must be disabled. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-16-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fpsimd.h | 5 ++++- arch/arm64/include/asm/fpsimdmacros.h | 22 ++++++++++++++++++++++ arch/arm64/include/asm/kvm_host.h | 3 +++ arch/arm64/include/asm/processor.h | 1 + arch/arm64/kernel/entry-fpsimd.S | 22 ++++++++++++++++++++++ arch/arm64/kernel/fpsimd.c | 20 +++++++++++++------- arch/arm64/kvm/fpsimd.c | 2 +- 7 files changed, 66 insertions(+), 9 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index cd94f5c5b516..1a709c03bb6c 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -47,7 +47,8 @@ extern void fpsimd_update_current_state(struct user_fpsimd_state const *state); extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state, void *sve_state, unsigned int sve_vl, - unsigned int sme_vl, u64 *svcr); + void *za_state, unsigned int sme_vl, + u64 *svcr); extern void fpsimd_flush_task_state(struct task_struct *target); extern void fpsimd_save_and_flush_cpu_state(void); @@ -90,6 +91,8 @@ extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1); extern unsigned int sve_get_vl(void); extern void sve_set_vq(unsigned long vq_minus_1); extern void sme_set_vq(unsigned long vq_minus_1); +extern void za_save_state(void *state); +extern void za_load_state(void const *state); struct arm64_cpu_capabilities; extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused); diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index f6ab36e0cd8d..5e0910cf4832 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -319,3 +319,25 @@ ldr w\nxtmp, [\xpfpsr, #4] msr fpcr, x\nxtmp .endm + +.macro sme_save_za nxbase, xvl, nw + mov w\nw, #0 + +423: + _sme_str_zav \nw, \nxbase + add x\nxbase, x\nxbase, \xvl + add x\nw, x\nw, #1 + cmp \xvl, x\nw + bne 423b +.endm + +.macro sme_load_za nxbase, xvl, nw + mov w\nw, #0 + +423: + _sme_ldr_zav \nw, \nxbase + add x\nxbase, x\nxbase, \xvl + add x\nw, x\nw, #1 + cmp \xvl, x\nw + bne 423b +.endm diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 94a27a7520f4..8a7c442d5b57 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -295,8 +295,11 @@ struct vcpu_reset_state { struct kvm_vcpu_arch { struct kvm_cpu_context ctxt; + + /* Guest floating point state */ void *sve_state; unsigned int sve_max_vl; + u64 svcr; /* Stage 2 paging state used by the hardware on next switch */ struct kvm_s2_mmu *hw_mmu; diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 7542310b4e6b..6a3a6c3dec90 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -154,6 +154,7 @@ struct thread_struct { unsigned int fpsimd_cpu; void *sve_state; /* SVE registers, if any */ + void *za_state; /* ZA register, if any */ unsigned int vl[ARM64_VEC_MAX]; /* vector length */ unsigned int vl_onexec[ARM64_VEC_MAX]; /* vl after next exec */ unsigned long fault_address; /* fault info */ diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index 6f88c0f86d50..229436f33df5 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -99,4 +99,26 @@ SYM_FUNC_START(sme_set_vq) ret SYM_FUNC_END(sme_set_vq) +/* + * Save the SME state + * + * x0 - pointer to buffer for state + */ +SYM_FUNC_START(za_save_state) + _sme_rdsvl 1, 1 // x1 = VL/8 + sme_save_za 0, x1, 12 + ret +SYM_FUNC_END(za_save_state) + +/* + * Load the SME state + * + * x0 - pointer to buffer for state + */ +SYM_FUNC_START(za_load_state) + _sme_rdsvl 1, 1 // x1 = VL/8 + sme_load_za 0, x1, 12 + ret +SYM_FUNC_END(za_load_state) + #endif /* CONFIG_ARM64_SME */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index f8506a875eb2..dc38f3f2a28a 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -121,6 +121,7 @@ struct fpsimd_last_state_struct { struct user_fpsimd_state *st; void *sve_state; + void *za_state; u64 *svcr; unsigned int sve_vl; unsigned int sme_vl; @@ -387,11 +388,15 @@ static void task_fpsimd_load(void) if (system_supports_sme()) { unsigned long sme_vl = task_get_sme_vl(current); + /* Ensure VL is set up for restoring data */ if (test_thread_flag(TIF_SME)) sme_set_vq(sve_vq_from_vl(sme_vl) - 1); write_sysreg_s(current->thread.svcr, SYS_SVCR_EL0); + if (thread_za_enabled(¤t->thread)) + za_load_state(current->thread.za_state); + if (thread_sm_enabled(¤t->thread)) { restore_sve_regs = true; restore_ffr = system_supports_fa64(); @@ -441,11 +446,10 @@ static void fpsimd_save(void) u64 *svcr = last->svcr; *svcr = read_sysreg_s(SYS_SVCR_EL0); - if (thread_za_enabled(¤t->thread)) { - /* ZA state managment is not implemented yet */ - force_signal_inject(SIGKILL, SI_KERNEL, 0, 0); - return; - } + *svcr = read_sysreg_s(SYS_SVCR_EL0); + + if (*svcr & SYS_SVCR_EL0_ZA_MASK) + za_save_state(last->za_state); /* If we are in streaming mode override regular SVE. */ if (*svcr & SYS_SVCR_EL0_SM_MASK) { @@ -1483,6 +1487,7 @@ static void fpsimd_bind_task_to_cpu(void) WARN_ON(!system_supports_fpsimd()); last->st = ¤t->thread.uw.fpsimd_state; last->sve_state = current->thread.sve_state; + last->za_state = current->thread.za_state; last->sve_vl = task_get_sve_vl(current); last->sme_vl = task_get_sme_vl(current); last->svcr = ¤t->thread.svcr; @@ -1500,8 +1505,8 @@ static void fpsimd_bind_task_to_cpu(void) } void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, - unsigned int sve_vl, unsigned int sme_vl, - u64 *svcr) + unsigned int sve_vl, void *za_state, + unsigned int sme_vl, u64 *svcr) { struct fpsimd_last_state_struct *last = this_cpu_ptr(&fpsimd_last_state); @@ -1512,6 +1517,7 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, last->st = st; last->svcr = svcr; last->sve_state = sve_state; + last->za_state = za_state; last->sve_vl = sve_vl; last->sme_vl = sme_vl; } diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 394e583bb73e..57d7ac3cfa0c 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -116,7 +116,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.fp_regs, vcpu->arch.sve_state, vcpu->arch.sve_max_vl, - 0, NULL); + NULL, 0, &vcpu->arch.svcr); clear_thread_flag(TIF_FOREIGN_FPSTATE); update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu)); -- cgit v1.2.3 From 8bd7f91c03d886f41d35f6108078d20be5a4a1bd Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:24 +0100 Subject: arm64/sme: Implement traps and syscall handling for SME By default all SME operations in userspace will trap. When this happens we allocate storage space for the SME register state, set up the SVE registers and disable traps. We do not need to initialize ZA since the architecture guarantees that it will be zeroed when enabled and when we trap ZA is disabled. On syscall we exit streaming mode if we were previously in it and ensure that all but the lower 128 bits of the registers are zeroed while preserving the state of ZA. This follows the aarch64 PCS for SME, ZA state is preserved over a function call and streaming mode is exited. Since the traps for SME do not distinguish between streaming mode SVE and ZA usage if ZA is in use rather than reenabling traps we instead zero the parts of the SVE registers not shared with FPSIMD and leave SME enabled, this simplifies handling SME traps. If ZA is not in use then we reenable SME traps and fall through to normal handling of SVE. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-17-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/esr.h | 1 + arch/arm64/include/asm/exception.h | 1 + arch/arm64/include/asm/fpsimd.h | 39 +++++++++ arch/arm64/kernel/entry-common.c | 11 +++ arch/arm64/kernel/fpsimd.c | 167 +++++++++++++++++++++++++++++++++---- arch/arm64/kernel/process.c | 30 ++++++- arch/arm64/kernel/syscall.c | 29 ++++++- 7 files changed, 255 insertions(+), 23 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 43872e0cfd1e..0467837fd66b 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -76,6 +76,7 @@ #define ESR_ELx_IL_SHIFT (25) #define ESR_ELx_IL (UL(1) << ESR_ELx_IL_SHIFT) #define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1) +#define ESR_ELx_ISS(esr) ((esr) & ESR_ELx_ISS_MASK) /* ISS field definitions shared by different classes */ #define ESR_ELx_WNR_SHIFT (6) diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index 339477dca551..2add7f33b7c2 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -64,6 +64,7 @@ void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr, struct pt_regs *regs); void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs); void do_sve_acc(unsigned int esr, struct pt_regs *regs); +void do_sme_acc(unsigned int esr, struct pt_regs *regs); void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs); void do_sysinstr(unsigned int esr, struct pt_regs *regs); void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 1a709c03bb6c..6c33bc832ed4 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -239,6 +239,8 @@ static inline bool sve_vq_available(unsigned int vq) return vq_available(ARM64_VEC_SVE, vq); } +size_t sve_state_size(struct task_struct const *task); + #else /* ! CONFIG_ARM64_SVE */ static inline void sve_alloc(struct task_struct *task) { } @@ -278,10 +280,25 @@ static inline void vec_update_vq_map(enum vec_type t) { } static inline int vec_verify_vq_map(enum vec_type t) { return 0; } static inline void sve_setup(void) { } +static inline size_t sve_state_size(struct task_struct const *task) +{ + return 0; +} + #endif /* ! CONFIG_ARM64_SVE */ #ifdef CONFIG_ARM64_SME +static inline void sme_user_disable(void) +{ + sysreg_clear_set(cpacr_el1, CPACR_EL1_SMEN_EL0EN, 0); +} + +static inline void sme_user_enable(void) +{ + sysreg_clear_set(cpacr_el1, 0, CPACR_EL1_SMEN_EL0EN); +} + static inline void sme_smstart_sm(void) { asm volatile(__msr_s(SYS_SVCR_SMSTART_SM_EL0, "xzr")); @@ -309,16 +326,33 @@ static inline int sme_max_virtualisable_vl(void) return vec_max_virtualisable_vl(ARM64_VEC_SME); } +extern void sme_alloc(struct task_struct *task); extern unsigned int sme_get_vl(void); extern int sme_set_current_vl(unsigned long arg); extern int sme_get_current_vl(void); +/* + * Return how many bytes of memory are required to store the full SME + * specific state (currently just ZA) for task, given task's currently + * configured vector length. + */ +static inline size_t za_state_size(struct task_struct const *task) +{ + unsigned int vl = task_get_sme_vl(task); + + return ZA_SIG_REGS_SIZE(sve_vq_from_vl(vl)); +} + #else +static inline void sme_user_disable(void) { BUILD_BUG(); } +static inline void sme_user_enable(void) { BUILD_BUG(); } + static inline void sme_smstart_sm(void) { } static inline void sme_smstop_sm(void) { } static inline void sme_smstop(void) { } +static inline void sme_alloc(struct task_struct *task) { } static inline void sme_setup(void) { } static inline unsigned int sme_get_vl(void) { return 0; } static inline int sme_max_vl(void) { return 0; } @@ -326,6 +360,11 @@ static inline int sme_max_virtualisable_vl(void) { return 0; } static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; } static inline int sme_get_current_vl(void) { return -EINVAL; } +static inline size_t za_state_size(struct task_struct const *task) +{ + return 0; +} + #endif /* ! CONFIG_ARM64_SME */ /* For use by EFI runtime services calls only */ diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 878c65aa7206..29139e9a1517 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -537,6 +537,14 @@ static void noinstr el0_sve_acc(struct pt_regs *regs, unsigned long esr) exit_to_user_mode(regs); } +static void noinstr el0_sme_acc(struct pt_regs *regs, unsigned long esr) +{ + enter_from_user_mode(regs); + local_daif_restore(DAIF_PROCCTX); + do_sme_acc(esr, regs); + exit_to_user_mode(regs); +} + static void noinstr el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr) { enter_from_user_mode(regs); @@ -645,6 +653,9 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs) case ESR_ELx_EC_SVE: el0_sve_acc(regs, esr); break; + case ESR_ELx_EC_SME: + el0_sme_acc(regs, esr); + break; case ESR_ELx_EC_FP_EXC64: el0_fpsimd_exc(regs, esr); break; diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index dc38f3f2a28a..00a0cbd01ce5 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -209,6 +209,12 @@ static void set_sme_default_vl(int val) set_default_vl(ARM64_VEC_SME, val); } +static void sme_free(struct task_struct *); + +#else + +static inline void sme_free(struct task_struct *t) { } + #endif DEFINE_PER_CPU(bool, fpsimd_context_busy); @@ -676,7 +682,7 @@ static void sve_to_fpsimd(struct task_struct *task) * Return how many bytes of memory are required to store the full SVE * state for task, given task's currently configured vector length. */ -static size_t sve_state_size(struct task_struct const *task) +size_t sve_state_size(struct task_struct const *task) { unsigned int vl = 0; @@ -818,18 +824,22 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type, thread_sm_enabled(&task->thread)) sve_to_fpsimd(task); - if (system_supports_sme() && type == ARM64_VEC_SME) + if (system_supports_sme() && type == ARM64_VEC_SME) { task->thread.svcr &= ~(SYS_SVCR_EL0_SM_MASK | SYS_SVCR_EL0_ZA_MASK); + clear_thread_flag(TIF_SME); + } if (task == current) put_cpu_fpsimd_context(); /* - * Force reallocation of task SVE state to the correct size - * on next use: + * Force reallocation of task SVE and SME state to the correct + * size on next use: */ sve_free(task); + if (system_supports_sme() && type == ARM64_VEC_SME) + sme_free(task); task_set_vl(task, type, vl); @@ -1164,12 +1174,43 @@ void __init sve_setup(void) void fpsimd_release_task(struct task_struct *dead_task) { __sve_free(dead_task); + sme_free(dead_task); } #endif /* CONFIG_ARM64_SVE */ #ifdef CONFIG_ARM64_SME +/* This will move to uapi/asm/sigcontext.h when signals are implemented */ +#define ZA_SIG_REGS_SIZE(vq) ((vq * __SVE_VQ_BYTES) * (vq * __SVE_VQ_BYTES)) + +/* + * Ensure that task->thread.za_state is allocated and sufficiently large. + * + * This function should be used only in preparation for replacing + * task->thread.za_state with new data. The memory is always zeroed + * here to prevent stale data from showing through: this is done in + * the interest of testability and predictability, the architecture + * guarantees that when ZA is enabled it will be zeroed. + */ +void sme_alloc(struct task_struct *task) +{ + if (task->thread.za_state) { + memset(task->thread.za_state, 0, za_state_size(task)); + return; + } + + /* This could potentially be up to 64K. */ + task->thread.za_state = + kzalloc(za_state_size(task), GFP_KERNEL); +} + +static void sme_free(struct task_struct *task) +{ + kfree(task->thread.za_state); + task->thread.za_state = NULL; +} + void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) { /* Set priority for all PEs to architecturally defined minimum */ @@ -1279,6 +1320,29 @@ void __init sme_setup(void) #endif /* CONFIG_ARM64_SME */ +static void sve_init_regs(void) +{ + /* + * Convert the FPSIMD state to SVE, zeroing all the state that + * is not shared with FPSIMD. If (as is likely) the current + * state is live in the registers then do this there and + * update our metadata for the current task including + * disabling the trap, otherwise update our in-memory copy. + * We are guaranteed to not be in streaming mode, we can only + * take a SVE trap when not in streaming mode and we can't be + * in streaming mode when taking a SME trap. + */ + if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { + unsigned long vq_minus_one = + sve_vq_from_vl(task_get_sve_vl(current)) - 1; + sve_set_vq(vq_minus_one); + sve_flush_live(true, vq_minus_one); + fpsimd_bind_task_to_cpu(); + } else { + fpsimd_to_sve(current); + } +} + /* * Trapped SVE access * @@ -1310,22 +1374,77 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs) WARN_ON(1); /* SVE access shouldn't have trapped */ /* - * Convert the FPSIMD state to SVE, zeroing all the state that - * is not shared with FPSIMD. If (as is likely) the current - * state is live in the registers then do this there and - * update our metadata for the current task including - * disabling the trap, otherwise update our in-memory copy. + * Even if the task can have used streaming mode we can only + * generate SVE access traps in normal SVE mode and + * transitioning out of streaming mode may discard any + * streaming mode state. Always clear the high bits to avoid + * any potential errors tracking what is properly initialised. + */ + sve_init_regs(); + + put_cpu_fpsimd_context(); +} + +/* + * Trapped SME access + * + * Storage is allocated for the full SVE and SME state, the current + * FPSIMD register contents are migrated to SVE if SVE is not already + * active, and the access trap is disabled. + * + * TIF_SME should be clear on entry: otherwise, fpsimd_restore_current_state() + * would have disabled the SME access trap for userspace during + * ret_to_user, making an SVE access trap impossible in that case. + */ +void do_sme_acc(unsigned int esr, struct pt_regs *regs) +{ + /* Even if we chose not to use SME, the hardware could still trap: */ + if (unlikely(!system_supports_sme()) || WARN_ON(is_compat_task())) { + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); + return; + } + + /* + * If this not a trap due to SME being disabled then something + * is being used in the wrong mode, report as SIGILL. */ + if (ESR_ELx_ISS(esr) != ESR_ELx_SME_ISS_SME_DISABLED) { + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); + return; + } + + sve_alloc(current); + sme_alloc(current); + if (!current->thread.sve_state || !current->thread.za_state) { + force_sig(SIGKILL); + return; + } + + get_cpu_fpsimd_context(); + + /* With TIF_SME userspace shouldn't generate any traps */ + if (test_and_set_thread_flag(TIF_SME)) + WARN_ON(1); + if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { unsigned long vq_minus_one = - sve_vq_from_vl(task_get_sve_vl(current)) - 1; - sve_set_vq(vq_minus_one); - sve_flush_live(true, vq_minus_one); + sve_vq_from_vl(task_get_sme_vl(current)) - 1; + sme_set_vq(vq_minus_one); + fpsimd_bind_task_to_cpu(); - } else { - fpsimd_to_sve(current); } + /* + * If SVE was not already active initialise the SVE registers, + * any non-shared state between the streaming and regular SVE + * registers is architecturally guaranteed to be zeroed when + * we enter streaming mode. We do not need to initialize ZA + * since ZA must be disabled at this point and enabling ZA is + * architecturally defined to zero ZA. + */ + if (system_supports_sve() && !test_thread_flag(TIF_SVE)) + sve_init_regs(); + put_cpu_fpsimd_context(); } @@ -1442,8 +1561,12 @@ void fpsimd_flush_thread(void) fpsimd_flush_thread_vl(ARM64_VEC_SVE); } - if (system_supports_sme()) + if (system_supports_sme()) { + clear_thread_flag(TIF_SME); + sme_free(current); fpsimd_flush_thread_vl(ARM64_VEC_SME); + current->thread.svcr = 0; + } put_cpu_fpsimd_context(); } @@ -1493,14 +1616,22 @@ static void fpsimd_bind_task_to_cpu(void) last->svcr = ¤t->thread.svcr; current->thread.fpsimd_cpu = smp_processor_id(); + /* + * Toggle SVE and SME trapping for userspace if needed, these + * are serialsied by ret_to_user(). + */ + if (system_supports_sme()) { + if (test_thread_flag(TIF_SME)) + sme_user_enable(); + else + sme_user_disable(); + } + if (system_supports_sve()) { - /* Toggle SVE trapping for userspace if needed */ if (test_thread_flag(TIF_SVE)) sve_user_enable(); else sve_user_disable(); - - /* Serialised by exception return to user */ } } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 07f235b46cf5..99c293513817 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -300,17 +300,41 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) /* * Detach src's sve_state (if any) from dst so that it does not - * get erroneously used or freed prematurely. dst's sve_state + * get erroneously used or freed prematurely. dst's copies * will be allocated on demand later on if dst uses SVE. * For consistency, also clear TIF_SVE here: this could be done * later in copy_process(), but to avoid tripping up future - * maintainers it is best not to leave TIF_SVE and sve_state in + * maintainers it is best not to leave TIF flags and buffers in * an inconsistent state, even temporarily. */ dst->thread.sve_state = NULL; clear_tsk_thread_flag(dst, TIF_SVE); - dst->thread.svcr = 0; + /* + * In the unlikely event that we create a new thread with ZA + * enabled we should retain the ZA state so duplicate it here. + * This may be shortly freed if we exec() or if CLONE_SETTLS + * but it's simpler to do it here. To avoid confusing the rest + * of the code ensure that we have a sve_state allocated + * whenever za_state is allocated. + */ + if (thread_za_enabled(&src->thread)) { + dst->thread.sve_state = kzalloc(sve_state_size(src), + GFP_KERNEL); + if (!dst->thread.za_state) + return -ENOMEM; + dst->thread.za_state = kmemdup(src->thread.za_state, + za_state_size(src), + GFP_KERNEL); + if (!dst->thread.za_state) { + kfree(dst->thread.sve_state); + dst->thread.sve_state = NULL; + return -ENOMEM; + } + } else { + dst->thread.za_state = NULL; + clear_tsk_thread_flag(dst, TIF_SME); + } /* clear any pending asynchronous tag fault raised by the parent */ clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT); diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index c938603b3ba0..92c69e5ac269 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -158,11 +158,36 @@ trace_exit: syscall_trace_exit(regs); } -static inline void sve_user_discard(void) +/* + * As per the ABI exit SME streaming mode and clear the SVE state not + * shared with FPSIMD on syscall entry. + */ +static inline void fp_user_discard(void) { + /* + * If SME is active then exit streaming mode. If ZA is active + * then flush the SVE registers but leave userspace access to + * both SVE and SME enabled, otherwise disable SME for the + * task and fall through to disabling SVE too. This means + * that after a syscall we never have any streaming mode + * register state to track, if this changes the KVM code will + * need updating. + */ + if (system_supports_sme() && test_thread_flag(TIF_SME)) { + u64 svcr = read_sysreg_s(SYS_SVCR_EL0); + + if (svcr & SYS_SVCR_EL0_SM_MASK) + sme_smstop_sm(); + } + if (!system_supports_sve()) return; + /* + * If SME is not active then disable SVE, the registers will + * be cleared when userspace next attempts to access them and + * we do not need to track the SVE register state until then. + */ clear_thread_flag(TIF_SVE); /* @@ -177,7 +202,7 @@ static inline void sve_user_discard(void) void do_el0_svc(struct pt_regs *regs) { - sve_user_discard(); + fp_user_discard(); el0_svc_common(regs, regs->regs[8], __NR_syscalls, sys_call_table); } -- cgit v1.2.3 From 85ed24dad2904f7c141911d91b7807ab02694b5e Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:26 +0100 Subject: arm64/sme: Implement streaming SVE signal handling When in streaming mode we have the same set of SVE registers as we do in regular SVE mode with the exception of FFR and the use of the SME vector length. Provide signal handling for these registers by taking one of the reserved words in the SVE signal context as a flags field and defining a flag which is set for streaming mode. When the flag is set the vector length is set to the streaming mode vector length and we save and restore streaming mode data. We support entering or leaving streaming mode based on the value of the flag but do not support changing the vector length, this is not currently supported SVE signal handling. We could instead allocate a separate record in the signal frame for the streaming mode SVE context but this inflates the size of the maximal signal frame required and adds complication when validating signal frames from userspace, especially given the current structure of the code. Any implementation of support for streaming mode vectors in signals will have some potential for causing issues for applications that attempt to handle SVE vectors in signals, use streaming mode but do not understand streaming mode in their signal handling code, it is hard to identify a case that is clearly better than any other - they all have cases where they could cause unexpected register corruption or faults. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-19-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/processor.h | 8 ++++++ arch/arm64/include/uapi/asm/sigcontext.h | 16 +++++++++--- arch/arm64/kernel/signal.c | 42 ++++++++++++++++++++++++-------- 3 files changed, 53 insertions(+), 13 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 6a3a6c3dec90..1d2ca4870b84 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -190,6 +190,14 @@ static inline unsigned int thread_get_sme_vl(struct thread_struct *thread) return thread_get_vl(thread, ARM64_VEC_SME); } +static inline unsigned int thread_get_cur_vl(struct thread_struct *thread) +{ + if (system_supports_sme() && (thread->svcr & SYS_SVCR_EL0_SM_MASK)) + return thread_get_sme_vl(thread); + else + return thread_get_sve_vl(thread); +} + unsigned int task_get_vl(const struct task_struct *task, enum vec_type type); void task_set_vl(struct task_struct *task, enum vec_type type, unsigned long vl); diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h index 0c796c795dbe..57e9f8c3ee9e 100644 --- a/arch/arm64/include/uapi/asm/sigcontext.h +++ b/arch/arm64/include/uapi/asm/sigcontext.h @@ -134,9 +134,12 @@ struct extra_context { struct sve_context { struct _aarch64_ctx head; __u16 vl; - __u16 __reserved[3]; + __u16 flags; + __u16 __reserved[2]; }; +#define SVE_SIG_FLAG_SM 0x1 /* Context describes streaming mode */ + #endif /* !__ASSEMBLY__ */ #include @@ -186,9 +189,16 @@ struct sve_context { * sve_context.vl must equal the thread's current vector length when * doing a sigreturn. * + * On systems with support for SME the SVE register state may reflect either + * streaming or non-streaming mode. In streaming mode the streaming mode + * vector length will be used and the flag SVE_SIG_FLAG_SM will be set in + * the flags field. It is permitted to enter or leave streaming mode in + * a signal return, applications should take care to ensure that any difference + * in vector length between the two modes is handled, including any resizing + * and movement of context blocks. * - * Note: for all these macros, the "vq" argument denotes the SVE - * vector length in quadwords (i.e., units of 128 bits). + * Note: for all these macros, the "vq" argument denotes the vector length + * in quadwords (i.e., units of 128 bits). * * The correct way to obtain vq is to use sve_vq_from_vl(vl). The * result is valid if and only if sve_vl_valid(vl) is true. This is diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 42efa464e46e..0ddce6afd2a3 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -226,11 +226,17 @@ static int preserve_sve_context(struct sve_context __user *ctx) { int err = 0; u16 reserved[ARRAY_SIZE(ctx->__reserved)]; + u16 flags = 0; unsigned int vl = task_get_sve_vl(current); unsigned int vq = 0; - if (test_thread_flag(TIF_SVE)) + if (thread_sm_enabled(¤t->thread)) { + vl = task_get_sme_vl(current); vq = sve_vq_from_vl(vl); + flags |= SVE_SIG_FLAG_SM; + } else if (test_thread_flag(TIF_SVE)) { + vq = sve_vq_from_vl(vl); + } memset(reserved, 0, sizeof(reserved)); @@ -238,6 +244,7 @@ static int preserve_sve_context(struct sve_context __user *ctx) __put_user_error(round_up(SVE_SIG_CONTEXT_SIZE(vq), 16), &ctx->head.size, err); __put_user_error(vl, &ctx->vl, err); + __put_user_error(flags, &ctx->flags, err); BUILD_BUG_ON(sizeof(ctx->__reserved) != sizeof(reserved)); err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved)); @@ -258,18 +265,28 @@ static int preserve_sve_context(struct sve_context __user *ctx) static int restore_sve_fpsimd_context(struct user_ctxs *user) { int err; - unsigned int vq; + unsigned int vl, vq; struct user_fpsimd_state fpsimd; struct sve_context sve; if (__copy_from_user(&sve, user->sve, sizeof(sve))) return -EFAULT; - if (sve.vl != task_get_sve_vl(current)) + if (sve.flags & SVE_SIG_FLAG_SM) { + if (!system_supports_sme()) + return -EINVAL; + + vl = task_get_sme_vl(current); + } else { + vl = task_get_sve_vl(current); + } + + if (sve.vl != vl) return -EINVAL; if (sve.head.size <= sizeof(*user->sve)) { clear_thread_flag(TIF_SVE); + current->thread.svcr &= ~SYS_SVCR_EL0_SM_MASK; goto fpsimd_only; } @@ -301,7 +318,10 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) if (err) return -EFAULT; - set_thread_flag(TIF_SVE); + if (sve.flags & SVE_SIG_FLAG_SM) + current->thread.svcr |= SYS_SVCR_EL0_SM_MASK; + else + set_thread_flag(TIF_SVE); fpsimd_only: /* copy the FP and status/control registers */ @@ -393,7 +413,7 @@ static int parse_user_sigframe(struct user_ctxs *user, break; case SVE_MAGIC: - if (!system_supports_sve()) + if (!system_supports_sve() && !system_supports_sme()) goto invalid; if (user->sve) @@ -594,11 +614,12 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, if (system_supports_sve()) { unsigned int vq = 0; - if (add_all || test_thread_flag(TIF_SVE)) { - int vl = sve_max_vl(); + if (add_all || test_thread_flag(TIF_SVE) || + thread_sm_enabled(¤t->thread)) { + int vl = max(sve_max_vl(), sme_max_vl()); if (!add_all) - vl = task_get_sve_vl(current); + vl = thread_get_cur_vl(¤t->thread); vq = sve_vq_from_vl(vl); } @@ -649,8 +670,9 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user, __put_user_error(current->thread.fault_code, &esr_ctx->esr, err); } - /* Scalable Vector Extension state, if present */ - if (system_supports_sve() && err == 0 && user->sve_offset) { + /* Scalable Vector Extension state (including streaming), if present */ + if ((system_supports_sve() || system_supports_sme()) && + err == 0 && user->sve_offset) { struct sve_context __user *sve_ctx = apply_user_offset(user, user->sve_offset); err |= preserve_sve_context(sve_ctx); -- cgit v1.2.3 From 39782210eb7e87634d96cacb6ece370bc59d74ba Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:27 +0100 Subject: arm64/sme: Implement ZA signal handling Implement support for ZA in signal handling in a very similar way to how we implement support for SVE registers, using a signal context structure with optional register state after it. Where present this register state stores the ZA matrix as a series of horizontal vectors numbered from 0 to VL/8 in the endinanness independent format used for vectors. As with SVE we do not allow changes in the vector length during signal return but we do allow ZA to be enabled or disabled. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-20-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/uapi/asm/sigcontext.h | 41 +++++++++ arch/arm64/kernel/fpsimd.c | 3 - arch/arm64/kernel/signal.c | 139 +++++++++++++++++++++++++++++++ 3 files changed, 180 insertions(+), 3 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h index 57e9f8c3ee9e..4aaf31e3bf16 100644 --- a/arch/arm64/include/uapi/asm/sigcontext.h +++ b/arch/arm64/include/uapi/asm/sigcontext.h @@ -140,6 +140,14 @@ struct sve_context { #define SVE_SIG_FLAG_SM 0x1 /* Context describes streaming mode */ +#define ZA_MAGIC 0x54366345 + +struct za_context { + struct _aarch64_ctx head; + __u16 vl; + __u16 __reserved[3]; +}; + #endif /* !__ASSEMBLY__ */ #include @@ -259,4 +267,37 @@ struct sve_context { #define SVE_SIG_CONTEXT_SIZE(vq) \ (SVE_SIG_REGS_OFFSET + SVE_SIG_REGS_SIZE(vq)) +/* + * If the ZA register is enabled for the thread at signal delivery then, + * za_context.head.size >= ZA_SIG_CONTEXT_SIZE(sve_vq_from_vl(za_context.vl)) + * and the register data may be accessed using the ZA_SIG_*() macros. + * + * If za_context.head.size < ZA_SIG_CONTEXT_SIZE(sve_vq_from_vl(za_context.vl)) + * then ZA was not enabled and no register data was included in which case + * ZA register was not enabled for the thread and no register data + * the ZA_SIG_*() macros should not be used except for this check. + * + * The same convention applies when returning from a signal: a caller + * will need to remove or resize the za_context block if it wants to + * enable the ZA register when it was previously non-live or vice-versa. + * This may require the caller to allocate fresh memory and/or move other + * context blocks in the signal frame. + * + * Changing the vector length during signal return is not permitted: + * za_context.vl must equal the thread's current SME vector length when + * doing a sigreturn. + */ + +#define ZA_SIG_REGS_OFFSET \ + ((sizeof(struct za_context) + (__SVE_VQ_BYTES - 1)) \ + / __SVE_VQ_BYTES * __SVE_VQ_BYTES) + +#define ZA_SIG_REGS_SIZE(vq) ((vq * __SVE_VQ_BYTES) * (vq * __SVE_VQ_BYTES)) + +#define ZA_SIG_ZAV_OFFSET(vq, n) (ZA_SIG_REGS_OFFSET + \ + (SVE_SIG_ZREG_SIZE(vq) * n)) + +#define ZA_SIG_CONTEXT_SIZE(vq) \ + (ZA_SIG_REGS_OFFSET + ZA_SIG_REGS_SIZE(vq)) + #endif /* _UAPI__ASM_SIGCONTEXT_H */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 00a0cbd01ce5..80f7ca12f855 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1181,9 +1181,6 @@ void fpsimd_release_task(struct task_struct *dead_task) #ifdef CONFIG_ARM64_SME -/* This will move to uapi/asm/sigcontext.h when signals are implemented */ -#define ZA_SIG_REGS_SIZE(vq) ((vq * __SVE_VQ_BYTES) * (vq * __SVE_VQ_BYTES)) - /* * Ensure that task->thread.za_state is allocated and sufficiently large. * diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 0ddce6afd2a3..2295948d97fd 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -56,6 +56,7 @@ struct rt_sigframe_user_layout { unsigned long fpsimd_offset; unsigned long esr_offset; unsigned long sve_offset; + unsigned long za_offset; unsigned long extra_offset; unsigned long end_offset; }; @@ -218,6 +219,7 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx) struct user_ctxs { struct fpsimd_context __user *fpsimd; struct sve_context __user *sve; + struct za_context __user *za; }; #ifdef CONFIG_ARM64_SVE @@ -346,6 +348,101 @@ extern int restore_sve_fpsimd_context(struct user_ctxs *user); #endif /* ! CONFIG_ARM64_SVE */ +#ifdef CONFIG_ARM64_SME + +static int preserve_za_context(struct za_context __user *ctx) +{ + int err = 0; + u16 reserved[ARRAY_SIZE(ctx->__reserved)]; + unsigned int vl = task_get_sme_vl(current); + unsigned int vq; + + if (thread_za_enabled(¤t->thread)) + vq = sve_vq_from_vl(vl); + else + vq = 0; + + memset(reserved, 0, sizeof(reserved)); + + __put_user_error(ZA_MAGIC, &ctx->head.magic, err); + __put_user_error(round_up(ZA_SIG_CONTEXT_SIZE(vq), 16), + &ctx->head.size, err); + __put_user_error(vl, &ctx->vl, err); + BUILD_BUG_ON(sizeof(ctx->__reserved) != sizeof(reserved)); + err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved)); + + if (vq) { + /* + * This assumes that the ZA state has already been saved to + * the task struct by calling the function + * fpsimd_signal_preserve_current_state(). + */ + err |= __copy_to_user((char __user *)ctx + ZA_SIG_REGS_OFFSET, + current->thread.za_state, + ZA_SIG_REGS_SIZE(vq)); + } + + return err ? -EFAULT : 0; +} + +static int restore_za_context(struct user_ctxs __user *user) +{ + int err; + unsigned int vq; + struct za_context za; + + if (__copy_from_user(&za, user->za, sizeof(za))) + return -EFAULT; + + if (za.vl != task_get_sme_vl(current)) + return -EINVAL; + + if (za.head.size <= sizeof(*user->za)) { + current->thread.svcr &= ~SYS_SVCR_EL0_ZA_MASK; + return 0; + } + + vq = sve_vq_from_vl(za.vl); + + if (za.head.size < ZA_SIG_CONTEXT_SIZE(vq)) + return -EINVAL; + + /* + * Careful: we are about __copy_from_user() directly into + * thread.za_state with preemption enabled, so protection is + * needed to prevent a racing context switch from writing stale + * registers back over the new data. + */ + + fpsimd_flush_task_state(current); + /* From now, fpsimd_thread_switch() won't touch thread.sve_state */ + + sme_alloc(current); + if (!current->thread.za_state) { + current->thread.svcr &= ~SYS_SVCR_EL0_ZA_MASK; + clear_thread_flag(TIF_SME); + return -ENOMEM; + } + + err = __copy_from_user(current->thread.za_state, + (char __user const *)user->za + + ZA_SIG_REGS_OFFSET, + ZA_SIG_REGS_SIZE(vq)); + if (err) + return -EFAULT; + + set_thread_flag(TIF_SME); + current->thread.svcr |= SYS_SVCR_EL0_ZA_MASK; + + return 0; +} +#else /* ! CONFIG_ARM64_SME */ + +/* Turn any non-optimised out attempts to use these into a link error: */ +extern int preserve_za_context(void __user *ctx); +extern int restore_za_context(struct user_ctxs *user); + +#endif /* ! CONFIG_ARM64_SME */ static int parse_user_sigframe(struct user_ctxs *user, struct rt_sigframe __user *sf) @@ -360,6 +457,7 @@ static int parse_user_sigframe(struct user_ctxs *user, user->fpsimd = NULL; user->sve = NULL; + user->za = NULL; if (!IS_ALIGNED((unsigned long)base, 16)) goto invalid; @@ -425,6 +523,19 @@ static int parse_user_sigframe(struct user_ctxs *user, user->sve = (struct sve_context __user *)head; break; + case ZA_MAGIC: + if (!system_supports_sme()) + goto invalid; + + if (user->za) + goto invalid; + + if (size < sizeof(*user->za)) + goto invalid; + + user->za = (struct za_context __user *)head; + break; + case EXTRA_MAGIC: if (have_extra_context) goto invalid; @@ -548,6 +659,9 @@ static int restore_sigframe(struct pt_regs *regs, } } + if (err == 0 && system_supports_sme() && user.za) + err = restore_za_context(&user); + return err; } @@ -630,6 +744,24 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, return err; } + if (system_supports_sme()) { + unsigned int vl; + unsigned int vq = 0; + + if (add_all) + vl = sme_max_vl(); + else + vl = task_get_sme_vl(current); + + if (thread_za_enabled(¤t->thread)) + vq = sve_vq_from_vl(vl); + + err = sigframe_alloc(user, &user->za_offset, + ZA_SIG_CONTEXT_SIZE(vq)); + if (err) + return err; + } + return sigframe_alloc_end(user); } @@ -678,6 +810,13 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user, err |= preserve_sve_context(sve_ctx); } + /* ZA state if present */ + if (system_supports_sme() && err == 0 && user->za_offset) { + struct za_context __user *za_ctx = + apply_user_offset(user, user->za_offset); + err |= preserve_za_context(za_ctx); + } + if (err == 0 && user->extra_offset) { char __user *sfp = (char __user *)user->sigframe; char __user *userp = -- cgit v1.2.3 From e12310a0d30f260b26297bc8d7c95769489af038 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:28 +0100 Subject: arm64/sme: Implement ptrace support for streaming mode SVE registers The streaming mode SVE registers are represented using the same data structures as for SVE but since the vector lengths supported and in use may not be the same as SVE we represent them with a new type NT_ARM_SSVE. Unfortunately we only have a single 16 bit reserved field available in the header so there is no space to fit the current and maximum vector length for both standard and streaming SVE mode without redefining the structure in a way the creates a complicatd and fragile ABI. Since FFR is not present in streaming mode it is read and written as zero. Setting NT_ARM_SSVE registers will put the task into streaming mode, similarly setting NT_ARM_SVE registers will exit it. Reads that do not correspond to the current mode of the task will return the header with no register data. For compatibility reasons on write setting no flag for the register type will be interpreted as setting SVE registers, though users can provide no register data as an alternative mechanism for doing so. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-21-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fpsimd.h | 1 + arch/arm64/include/uapi/asm/ptrace.h | 13 ++- arch/arm64/kernel/fpsimd.c | 31 +++-- arch/arm64/kernel/ptrace.c | 214 +++++++++++++++++++++++++++-------- include/uapi/linux/elf.h | 1 + 5 files changed, 201 insertions(+), 59 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 6c33bc832ed4..5afcd0709aae 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -144,6 +144,7 @@ struct vl_info { extern void sve_alloc(struct task_struct *task); extern void fpsimd_release_task(struct task_struct *task); extern void fpsimd_sync_to_sve(struct task_struct *task); +extern void fpsimd_force_sync_to_sve(struct task_struct *task); extern void sve_sync_to_fpsimd(struct task_struct *task); extern void sve_sync_from_fpsimd_zeropad(struct task_struct *task); diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 758ae984ff97..522b925a78c1 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -109,7 +109,7 @@ struct user_hwdebug_state { } dbg_regs[16]; }; -/* SVE/FP/SIMD state (NT_ARM_SVE) */ +/* SVE/FP/SIMD state (NT_ARM_SVE & NT_ARM_SSVE) */ struct user_sve_header { __u32 size; /* total meaningful regset content in bytes */ @@ -220,6 +220,7 @@ struct user_sve_header { (SVE_PT_SVE_PREG_OFFSET(vq, __SVE_NUM_PREGS) - \ SVE_PT_SVE_PREGS_OFFSET(vq)) +/* For streaming mode SVE (SSVE) FFR must be read and written as zero */ #define SVE_PT_SVE_FFR_OFFSET(vq) \ (SVE_PT_REGS_OFFSET + __SVE_FFR_OFFSET(vq)) @@ -240,10 +241,12 @@ struct user_sve_header { - SVE_PT_SVE_OFFSET + (__SVE_VQ_BYTES - 1)) \ / __SVE_VQ_BYTES * __SVE_VQ_BYTES) -#define SVE_PT_SIZE(vq, flags) \ - (((flags) & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE ? \ - SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, flags) \ - : SVE_PT_FPSIMD_OFFSET + SVE_PT_FPSIMD_SIZE(vq, flags)) +#define SVE_PT_SIZE(vq, flags) \ + (((flags) & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE ? \ + SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, flags) \ + : ((((flags) & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD ? \ + SVE_PT_FPSIMD_OFFSET + SVE_PT_FPSIMD_SIZE(vq, flags) \ + : SVE_PT_REGS_OFFSET))) /* pointer authentication masks (NT_ARM_PAC_MASK) */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 80f7ca12f855..94f06e9d37cf 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -643,7 +643,7 @@ static void fpsimd_to_sve(struct task_struct *task) if (!system_supports_sve()) return; - vq = sve_vq_from_vl(task_get_sve_vl(task)); + vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread)); __fpsimd_to_sve(sst, fst, vq); } @@ -660,7 +660,7 @@ static void fpsimd_to_sve(struct task_struct *task) */ static void sve_to_fpsimd(struct task_struct *task) { - unsigned int vq; + unsigned int vq, vl; void const *sst = task->thread.sve_state; struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state; unsigned int i; @@ -669,7 +669,8 @@ static void sve_to_fpsimd(struct task_struct *task) if (!system_supports_sve()) return; - vq = sve_vq_from_vl(task_get_sve_vl(task)); + vl = thread_get_cur_vl(&task->thread); + vq = sve_vq_from_vl(vl); for (i = 0; i < SVE_NUM_ZREGS; ++i) { p = (__uint128_t const *)ZREG(sst, vq, i); fst->vregs[i] = arm64_le128_to_cpu(*p); @@ -717,6 +718,19 @@ void sve_alloc(struct task_struct *task) } +/* + * Force the FPSIMD state shared with SVE to be updated in the SVE state + * even if the SVE state is the current active state. + * + * This should only be called by ptrace. task must be non-runnable. + * task->thread.sve_state must point to at least sve_state_size(task) + * bytes of allocated kernel memory. + */ +void fpsimd_force_sync_to_sve(struct task_struct *task) +{ + fpsimd_to_sve(task); +} + /* * Ensure that task->thread.sve_state is up to date with respect to * the user task, irrespective of when SVE is in use or not. @@ -727,7 +741,8 @@ void sve_alloc(struct task_struct *task) */ void fpsimd_sync_to_sve(struct task_struct *task) { - if (!test_tsk_thread_flag(task, TIF_SVE)) + if (!test_tsk_thread_flag(task, TIF_SVE) && + !thread_sm_enabled(&task->thread)) fpsimd_to_sve(task); } @@ -741,7 +756,8 @@ void fpsimd_sync_to_sve(struct task_struct *task) */ void sve_sync_to_fpsimd(struct task_struct *task) { - if (test_tsk_thread_flag(task, TIF_SVE)) + if (test_tsk_thread_flag(task, TIF_SVE) || + thread_sm_enabled(&task->thread)) sve_to_fpsimd(task); } @@ -766,7 +782,7 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task) if (!test_tsk_thread_flag(task, TIF_SVE)) return; - vq = sve_vq_from_vl(task_get_sve_vl(task)); + vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread)); memset(sst, 0, SVE_SIG_REGS_SIZE(vq)); __fpsimd_to_sve(sst, fst, vq); @@ -810,8 +826,7 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type, /* * To ensure the FPSIMD bits of the SVE vector registers are preserved, * write any live register state back to task_struct, and convert to a - * regular FPSIMD thread. Since the vector length can only be changed - * with a syscall we can't be in streaming mode while reconfiguring. + * regular FPSIMD thread. */ if (task == current) { get_cpu_fpsimd_context(); diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 230a47b9189e..60185c27b394 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -713,21 +713,51 @@ static int system_call_set(struct task_struct *target, #ifdef CONFIG_ARM64_SVE static void sve_init_header_from_task(struct user_sve_header *header, - struct task_struct *target) + struct task_struct *target, + enum vec_type type) { unsigned int vq; + bool active; + bool fpsimd_only; + enum vec_type task_type; memset(header, 0, sizeof(*header)); - header->flags = test_tsk_thread_flag(target, TIF_SVE) ? - SVE_PT_REGS_SVE : SVE_PT_REGS_FPSIMD; - if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT)) - header->flags |= SVE_PT_VL_INHERIT; + /* Check if the requested registers are active for the task */ + if (thread_sm_enabled(&target->thread)) + task_type = ARM64_VEC_SME; + else + task_type = ARM64_VEC_SVE; + active = (task_type == type); + + switch (type) { + case ARM64_VEC_SVE: + if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT)) + header->flags |= SVE_PT_VL_INHERIT; + fpsimd_only = !test_tsk_thread_flag(target, TIF_SVE); + break; + case ARM64_VEC_SME: + if (test_tsk_thread_flag(target, TIF_SME_VL_INHERIT)) + header->flags |= SVE_PT_VL_INHERIT; + fpsimd_only = false; + break; + default: + WARN_ON_ONCE(1); + return; + } - header->vl = task_get_sve_vl(target); + if (active) { + if (fpsimd_only) { + header->flags |= SVE_PT_REGS_FPSIMD; + } else { + header->flags |= SVE_PT_REGS_SVE; + } + } + + header->vl = task_get_vl(target, type); vq = sve_vq_from_vl(header->vl); - header->max_vl = sve_max_vl(); + header->max_vl = vec_max_vl(type); header->size = SVE_PT_SIZE(vq, header->flags); header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl), SVE_PT_REGS_SVE); @@ -738,19 +768,17 @@ static unsigned int sve_size_from_header(struct user_sve_header const *header) return ALIGN(header->size, SVE_VQ_BYTES); } -static int sve_get(struct task_struct *target, - const struct user_regset *regset, - struct membuf to) +static int sve_get_common(struct task_struct *target, + const struct user_regset *regset, + struct membuf to, + enum vec_type type) { struct user_sve_header header; unsigned int vq; unsigned long start, end; - if (!system_supports_sve()) - return -EINVAL; - /* Header */ - sve_init_header_from_task(&header, target); + sve_init_header_from_task(&header, target, type); vq = sve_vq_from_vl(header.vl); membuf_write(&to, &header, sizeof(header)); @@ -758,49 +786,61 @@ static int sve_get(struct task_struct *target, if (target == current) fpsimd_preserve_current_state(); - /* Registers: FPSIMD-only case */ - BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header)); - if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD) + BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header)); + + switch ((header.flags & SVE_PT_REGS_MASK)) { + case SVE_PT_REGS_FPSIMD: return __fpr_get(target, regset, to); - /* Otherwise: full SVE case */ + case SVE_PT_REGS_SVE: + start = SVE_PT_SVE_OFFSET; + end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq); + membuf_write(&to, target->thread.sve_state, end - start); - BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header)); - start = SVE_PT_SVE_OFFSET; - end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq); - membuf_write(&to, target->thread.sve_state, end - start); + start = end; + end = SVE_PT_SVE_FPSR_OFFSET(vq); + membuf_zero(&to, end - start); - start = end; - end = SVE_PT_SVE_FPSR_OFFSET(vq); - membuf_zero(&to, end - start); + /* + * Copy fpsr, and fpcr which must follow contiguously in + * struct fpsimd_state: + */ + start = end; + end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE; + membuf_write(&to, &target->thread.uw.fpsimd_state.fpsr, + end - start); - /* - * Copy fpsr, and fpcr which must follow contiguously in - * struct fpsimd_state: - */ - start = end; - end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE; - membuf_write(&to, &target->thread.uw.fpsimd_state.fpsr, end - start); + start = end; + end = sve_size_from_header(&header); + return membuf_zero(&to, end - start); - start = end; - end = sve_size_from_header(&header); - return membuf_zero(&to, end - start); + default: + return 0; + } } -static int sve_set(struct task_struct *target, +static int sve_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) + struct membuf to) +{ + if (!system_supports_sve()) + return -EINVAL; + + return sve_get_common(target, regset, to, ARM64_VEC_SVE); +} + +static int sve_set_common(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf, + enum vec_type type) { int ret; struct user_sve_header header; unsigned int vq; unsigned long start, end; - if (!system_supports_sve()) - return -EINVAL; - /* Header */ if (count < sizeof(header)) return -EINVAL; @@ -813,13 +853,37 @@ static int sve_set(struct task_struct *target, * Apart from SVE_PT_REGS_MASK, all SVE_PT_* flags are consumed by * vec_set_vector_length(), which will also validate them for us: */ - ret = vec_set_vector_length(target, ARM64_VEC_SVE, header.vl, + ret = vec_set_vector_length(target, type, header.vl, ((unsigned long)header.flags & ~SVE_PT_REGS_MASK) << 16); if (ret) goto out; /* Actual VL set may be less than the user asked for: */ - vq = sve_vq_from_vl(task_get_sve_vl(target)); + vq = sve_vq_from_vl(task_get_vl(target, type)); + + /* Enter/exit streaming mode */ + if (system_supports_sme()) { + u64 old_svcr = target->thread.svcr; + + switch (type) { + case ARM64_VEC_SVE: + target->thread.svcr &= ~SYS_SVCR_EL0_SM_MASK; + break; + case ARM64_VEC_SME: + target->thread.svcr |= SYS_SVCR_EL0_SM_MASK; + break; + default: + WARN_ON_ONCE(1); + return -EINVAL; + } + + /* + * If we switched then invalidate any existing SVE + * state and ensure there's storage. + */ + if (target->thread.svcr != old_svcr) + sve_alloc(target); + } /* Registers: FPSIMD-only case */ @@ -828,10 +892,15 @@ static int sve_set(struct task_struct *target, ret = __fpr_set(target, regset, pos, count, kbuf, ubuf, SVE_PT_FPSIMD_OFFSET); clear_tsk_thread_flag(target, TIF_SVE); + if (type == ARM64_VEC_SME) + fpsimd_force_sync_to_sve(target); goto out; } - /* Otherwise: full SVE case */ + /* + * Otherwise: no registers or full SVE case. For backwards + * compatibility reasons we treat empty flags as SVE registers. + */ /* * If setting a different VL from the requested VL and there is @@ -852,8 +921,9 @@ static int sve_set(struct task_struct *target, /* * Ensure target->thread.sve_state is up to date with target's - * FPSIMD regs, so that a short copyin leaves trailing registers - * unmodified. + * FPSIMD regs, so that a short copyin leaves trailing + * registers unmodified. Always enable SVE even if going into + * streaming mode. */ fpsimd_sync_to_sve(target); set_tsk_thread_flag(target, TIF_SVE); @@ -889,8 +959,46 @@ out: return ret; } +static int sve_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + if (!system_supports_sve()) + return -EINVAL; + + return sve_set_common(target, regset, pos, count, kbuf, ubuf, + ARM64_VEC_SVE); +} + #endif /* CONFIG_ARM64_SVE */ +#ifdef CONFIG_ARM64_SME + +static int ssve_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + if (!system_supports_sme()) + return -EINVAL; + + return sve_get_common(target, regset, to, ARM64_VEC_SME); +} + +static int ssve_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + if (!system_supports_sme()) + return -EINVAL; + + return sve_set_common(target, regset, pos, count, kbuf, ubuf, + ARM64_VEC_SME); +} + +#endif /* CONFIG_ARM64_SME */ + #ifdef CONFIG_ARM64_PTR_AUTH static int pac_mask_get(struct task_struct *target, const struct user_regset *regset, @@ -1108,6 +1216,9 @@ enum aarch64_regset { #ifdef CONFIG_ARM64_SVE REGSET_SVE, #endif +#ifdef CONFIG_ARM64_SVE + REGSET_SSVE, +#endif #ifdef CONFIG_ARM64_PTR_AUTH REGSET_PAC_MASK, REGSET_PAC_ENABLED_KEYS, @@ -1188,6 +1299,17 @@ static const struct user_regset aarch64_regsets[] = { .set = sve_set, }, #endif +#ifdef CONFIG_ARM64_SME + [REGSET_SSVE] = { /* Streaming mode SVE */ + .core_note_type = NT_ARM_SSVE, + .n = DIV_ROUND_UP(SVE_PT_SIZE(SVE_VQ_MAX, SVE_PT_REGS_SVE), + SVE_VQ_BYTES), + .size = SVE_VQ_BYTES, + .align = SVE_VQ_BYTES, + .regset_get = ssve_get, + .set = ssve_set, + }, +#endif #ifdef CONFIG_ARM64_PTR_AUTH [REGSET_PAC_MASK] = { .core_note_type = NT_ARM_PAC_MASK, diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 787c657bfae8..a8dc688e1826 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -431,6 +431,7 @@ typedef struct elf64_shdr { #define NT_ARM_PACG_KEYS 0x408 /* ARM pointer authentication generic key */ #define NT_ARM_TAGGED_ADDR_CTRL 0x409 /* arm64 tagged address control (prctl()) */ #define NT_ARM_PAC_ENABLED_KEYS 0x40a /* arm64 ptr auth enabled keys (prctl()) */ +#define NT_ARM_SSVE 0x40b /* ARM Streaming SVE registers */ #define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */ #define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */ #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */ -- cgit v1.2.3 From 776b4a1cf36411e96972455ca72906b722b80ea1 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:29 +0100 Subject: arm64/sme: Add ptrace support for ZA The ZA array can be read and written with the NT_ARM_ZA. Similarly to our interface for the SVE vector registers the regset consists of a header with information on the current vector length followed by an optional register data payload, represented as for signals as a series of horizontal vectors from 0 to VL/8 in the endianness independent format used for vectors. On get if ZA is enabled then register data will be provided, otherwise it will be omitted. On set if register data is provided then ZA is enabled and initialized using the provided data, otherwise it is disabled. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-22-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/uapi/asm/ptrace.h | 56 ++++++++++++++ arch/arm64/kernel/ptrace.c | 144 +++++++++++++++++++++++++++++++++++ include/uapi/linux/elf.h | 1 + 3 files changed, 201 insertions(+) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 522b925a78c1..7fa2f7036aa7 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -268,6 +268,62 @@ struct user_pac_generic_keys { __uint128_t apgakey; }; +/* ZA state (NT_ARM_ZA) */ + +struct user_za_header { + __u32 size; /* total meaningful regset content in bytes */ + __u32 max_size; /* maxmium possible size for this thread */ + __u16 vl; /* current vector length */ + __u16 max_vl; /* maximum possible vector length */ + __u16 flags; + __u16 __reserved; +}; + +/* + * Common ZA_PT_* flags: + * These must be kept in sync with prctl interface in + */ +#define ZA_PT_VL_INHERIT ((1 << 17) /* PR_SME_VL_INHERIT */ >> 16) +#define ZA_PT_VL_ONEXEC ((1 << 18) /* PR_SME_SET_VL_ONEXEC */ >> 16) + + +/* + * The remainder of the ZA state follows struct user_za_header. The + * total size of the ZA state (including header) depends on the + * metadata in the header: ZA_PT_SIZE(vq, flags) gives the total size + * of the state in bytes, including the header. + * + * Refer to for details of how to pass the correct + * "vq" argument to these macros. + */ + +/* Offset from the start of struct user_za_header to the register data */ +#define ZA_PT_ZA_OFFSET \ + ((sizeof(struct user_za_header) + (__SVE_VQ_BYTES - 1)) \ + / __SVE_VQ_BYTES * __SVE_VQ_BYTES) + +/* + * The payload starts at offset ZA_PT_ZA_OFFSET, and is of size + * ZA_PT_ZA_SIZE(vq, flags). + * + * The ZA array is stored as a sequence of horizontal vectors ZAV of SVL/8 + * bytes each, starting from vector 0. + * + * Additional data might be appended in the future. + * + * The ZA matrix is represented in memory in an endianness-invariant layout + * which differs from the layout used for the FPSIMD V-registers on big-endian + * systems: see sigcontext.h for more explanation. + */ + +#define ZA_PT_ZAV_OFFSET(vq, n) \ + (ZA_PT_ZA_OFFSET + ((vq * __SVE_VQ_BYTES) * n)) + +#define ZA_PT_ZA_SIZE(vq) ((vq * __SVE_VQ_BYTES) * (vq * __SVE_VQ_BYTES)) + +#define ZA_PT_SIZE(vq) \ + (ZA_PT_ZA_OFFSET + ZA_PT_ZA_SIZE(vq)) + #endif /* __ASSEMBLY__ */ #endif /* _UAPI__ASM_PTRACE_H */ diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 60185c27b394..47d8a7472171 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -997,6 +997,141 @@ static int ssve_set(struct task_struct *target, ARM64_VEC_SME); } +static int za_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + struct user_za_header header; + unsigned int vq; + unsigned long start, end; + + if (!system_supports_sme()) + return -EINVAL; + + /* Header */ + memset(&header, 0, sizeof(header)); + + if (test_tsk_thread_flag(target, TIF_SME_VL_INHERIT)) + header.flags |= ZA_PT_VL_INHERIT; + + header.vl = task_get_sme_vl(target); + vq = sve_vq_from_vl(header.vl); + header.max_vl = sme_max_vl(); + header.max_size = ZA_PT_SIZE(vq); + + /* If ZA is not active there is only the header */ + if (thread_za_enabled(&target->thread)) + header.size = ZA_PT_SIZE(vq); + else + header.size = ZA_PT_ZA_OFFSET; + + membuf_write(&to, &header, sizeof(header)); + + BUILD_BUG_ON(ZA_PT_ZA_OFFSET != sizeof(header)); + end = ZA_PT_ZA_OFFSET; + + if (target == current) + fpsimd_preserve_current_state(); + + /* Any register data to include? */ + if (thread_za_enabled(&target->thread)) { + start = end; + end = ZA_PT_SIZE(vq); + membuf_write(&to, target->thread.za_state, end - start); + } + + /* Zero any trailing padding */ + start = end; + end = ALIGN(header.size, SVE_VQ_BYTES); + return membuf_zero(&to, end - start); +} + +static int za_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + struct user_za_header header; + unsigned int vq; + unsigned long start, end; + + if (!system_supports_sme()) + return -EINVAL; + + /* Header */ + if (count < sizeof(header)) + return -EINVAL; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &header, + 0, sizeof(header)); + if (ret) + goto out; + + /* + * All current ZA_PT_* flags are consumed by + * vec_set_vector_length(), which will also validate them for + * us: + */ + ret = vec_set_vector_length(target, ARM64_VEC_SME, header.vl, + ((unsigned long)header.flags) << 16); + if (ret) + goto out; + + /* Actual VL set may be less than the user asked for: */ + vq = sve_vq_from_vl(task_get_sme_vl(target)); + + /* Ensure there is some SVE storage for streaming mode */ + if (!target->thread.sve_state) { + sve_alloc(target); + if (!target->thread.sve_state) { + clear_thread_flag(TIF_SME); + ret = -ENOMEM; + goto out; + } + } + + /* Allocate/reinit ZA storage */ + sme_alloc(target); + if (!target->thread.za_state) { + ret = -ENOMEM; + clear_tsk_thread_flag(target, TIF_SME); + goto out; + } + + /* If there is no data then disable ZA */ + if (!count) { + target->thread.svcr &= ~SYS_SVCR_EL0_ZA_MASK; + goto out; + } + + /* + * If setting a different VL from the requested VL and there is + * register data, the data layout will be wrong: don't even + * try to set the registers in this case. + */ + if (vq != sve_vq_from_vl(header.vl)) { + ret = -EIO; + goto out; + } + + BUILD_BUG_ON(ZA_PT_ZA_OFFSET != sizeof(header)); + start = ZA_PT_ZA_OFFSET; + end = ZA_PT_SIZE(vq); + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + target->thread.za_state, + start, end); + if (ret) + goto out; + + /* Mark ZA as active and let userspace use it */ + set_tsk_thread_flag(target, TIF_SME); + target->thread.svcr |= SYS_SVCR_EL0_ZA_MASK; + +out: + fpsimd_flush_task_state(target); + return ret; +} + #endif /* CONFIG_ARM64_SME */ #ifdef CONFIG_ARM64_PTR_AUTH @@ -1218,6 +1353,7 @@ enum aarch64_regset { #endif #ifdef CONFIG_ARM64_SVE REGSET_SSVE, + REGSET_ZA, #endif #ifdef CONFIG_ARM64_PTR_AUTH REGSET_PAC_MASK, @@ -1309,6 +1445,14 @@ static const struct user_regset aarch64_regsets[] = { .regset_get = ssve_get, .set = ssve_set, }, + [REGSET_ZA] = { /* SME ZA */ + .core_note_type = NT_ARM_ZA, + .n = DIV_ROUND_UP(ZA_PT_ZA_SIZE(SVE_VQ_MAX), SVE_VQ_BYTES), + .size = SVE_VQ_BYTES, + .align = SVE_VQ_BYTES, + .regset_get = za_get, + .set = za_set, + }, #endif #ifdef CONFIG_ARM64_PTR_AUTH [REGSET_PAC_MASK] = { diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index a8dc688e1826..97808f958903 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -432,6 +432,7 @@ typedef struct elf64_shdr { #define NT_ARM_TAGGED_ADDR_CTRL 0x409 /* arm64 tagged address control (prctl()) */ #define NT_ARM_PAC_ENABLED_KEYS 0x40a /* arm64 ptr auth enabled keys (prctl()) */ #define NT_ARM_SSVE 0x40b /* ARM Streaming SVE registers */ +#define NT_ARM_ZA 0x40c /* ARM SME ZA registers */ #define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */ #define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */ #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */ -- cgit v1.2.3 From 861262ab862702061ae3355b811a07b15d1b2fc0 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:34 +0100 Subject: KVM: arm64: Handle SME host state when running guests While we don't currently support SME in guests we do currently support it for the host system so we need to take care of SME's impact, including the floating point register state, when running guests. Simiarly to SVE we need to manage the traps in CPACR_RL1, what is new is the handling of streaming mode and ZA. Normally we defer any handling of the floating point register state until the guest first uses it however if the system is in streaming mode FPSIMD and SVE operations may generate SME traps which we would need to distinguish from actual attempts by the guest to use SME. Rather than do this for the time being if we are in streaming mode when entering the guest we force the floating point state to be saved immediately and exit streaming mode, meaning that the guest won't generate SME traps for supported operations. We could handle ZA in the access trap similarly to the FPSIMD/SVE state without the disruption caused by streaming mode but for simplicity handle it the same way as streaming mode for now. This will be revisited when we support SME for guests (hopefully before SME hardware becomes available), for now it will only incur additional cost on systems with SME and even there only if streaming mode or ZA are enabled. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20220419112247.711548-27-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/fpsimd.c | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 8a7c442d5b57..f8f0d30dd1a2 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -454,6 +454,7 @@ struct kvm_vcpu_arch { #define KVM_ARM64_DEBUG_STATE_SAVE_TRBE (1 << 13) /* Save TRBE context if active */ #define KVM_ARM64_FP_FOREIGN_FPSTATE (1 << 14) #define KVM_ARM64_ON_UNSUPPORTED_CPU (1 << 15) /* Physical CPU not in supported_cpus */ +#define KVM_ARM64_HOST_SME_ENABLED (1 << 16) /* SME enabled for EL0 */ #define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \ KVM_GUESTDBG_USE_SW_BP | \ diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 57d7ac3cfa0c..441edb9c398c 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -82,6 +82,26 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED; + + /* + * We don't currently support SME guests but if we leave + * things in streaming mode then when the guest starts running + * FPSIMD or SVE code it may generate SME traps so as a + * special case if we are in streaming mode we force the host + * state to be saved now and exit streaming mode so that we + * don't have to handle any SME traps for valid guest + * operations. Do this for ZA as well for now for simplicity. + */ + if (system_supports_sme()) { + if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN) + vcpu->arch.flags |= KVM_ARM64_HOST_SME_ENABLED; + + if (read_sysreg_s(SYS_SVCR_EL0) & + (SYS_SVCR_EL0_SM_MASK | SYS_SVCR_EL0_ZA_MASK)) { + vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; + fpsimd_save_and_flush_cpu_state(); + } + } } /* @@ -135,6 +155,22 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) local_irq_save(flags); + /* + * If we have VHE then the Hyp code will reset CPACR_EL1 to + * CPACR_EL1_DEFAULT and we need to reenable SME. + */ + if (has_vhe() && system_supports_sme()) { + /* Also restore EL0 state seen on entry */ + if (vcpu->arch.flags & KVM_ARM64_HOST_SME_ENABLED) + sysreg_clear_set(CPACR_EL1, 0, + CPACR_EL1_SMEN_EL0EN | + CPACR_EL1_SMEN_EL1EN); + else + sysreg_clear_set(CPACR_EL1, + CPACR_EL1_SMEN_EL0EN, + CPACR_EL1_SMEN_EL1EN); + } + if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { if (vcpu_has_sve(vcpu)) { __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR); -- cgit v1.2.3 From 85ea6b1ec915c9dd90caf3674b203999d8c7e062 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 21 Apr 2022 15:38:10 +0100 Subject: KVM: arm64: Inject exception on out-of-IPA-range translation fault When taking a translation fault for an IPA that is outside of the range defined by the hypervisor (between the HW PARange and the IPA range), we stupidly treat it as an IO and forward the access to userspace. Of course, userspace can't do much with it, and things end badly. Arguably, the guest is braindead, but we should at least catch the case and inject an exception. Check the faulting IPA against: - the sanitised PARange: inject an address size fault - the IPA size: inject an abort Reported-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_emulate.h | 1 + arch/arm64/kvm/inject_fault.c | 28 ++++++++++++++++++++++++++++ arch/arm64/kvm/mmu.c | 19 +++++++++++++++++++ 3 files changed, 48 insertions(+) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 7496deab025a..f71358271b71 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -40,6 +40,7 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu); void kvm_inject_vabt(struct kvm_vcpu *vcpu); void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); +void kvm_inject_size_fault(struct kvm_vcpu *vcpu); void kvm_vcpu_wfi(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index b47df73e98d7..ba20405d2dc2 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -145,6 +145,34 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) inject_abt64(vcpu, true, addr); } +void kvm_inject_size_fault(struct kvm_vcpu *vcpu) +{ + unsigned long addr, esr; + + addr = kvm_vcpu_get_fault_ipa(vcpu); + addr |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); + + if (kvm_vcpu_trap_is_iabt(vcpu)) + kvm_inject_pabt(vcpu, addr); + else + kvm_inject_dabt(vcpu, addr); + + /* + * If AArch64 or LPAE, set FSC to 0 to indicate an Address + * Size Fault at level 0, as if exceeding PARange. + * + * Non-LPAE guests will only get the external abort, as there + * is no way to to describe the ASF. + */ + if (vcpu_el1_is_32bit(vcpu) && + !(vcpu_read_sys_reg(vcpu, TCR_EL1) & TTBCR_EAE)) + return; + + esr = vcpu_read_sys_reg(vcpu, ESR_EL1); + esr &= ~GENMASK_ULL(5, 0); + vcpu_write_sys_reg(vcpu, esr, ESR_EL1); +} + /** * kvm_inject_undefined - inject an undefined instruction into the guest * @vcpu: The vCPU in which to inject the exception diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 53ae2c0640bc..5400fc020164 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1337,6 +1337,25 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); is_iabt = kvm_vcpu_trap_is_iabt(vcpu); + if (fault_status == FSC_FAULT) { + /* Beyond sanitised PARange (which is the IPA limit) */ + if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) { + kvm_inject_size_fault(vcpu); + return 1; + } + + /* Falls between the IPA range and the PARange? */ + if (fault_ipa >= BIT_ULL(vcpu->arch.hw_mmu->pgt->ia_bits)) { + fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); + + if (is_iabt) + kvm_inject_pabt(vcpu, fault_ipa); + else + kvm_inject_dabt(vcpu, fault_ipa); + return 1; + } + } + /* Synchronous External Abort? */ if (kvm_vcpu_abt_issea(vcpu)) { /* -- cgit v1.2.3 From 92abe0f81e1385afd8f1dc66206b5be9a514899b Mon Sep 17 00:00:00 2001 From: Kalesh Singh Date: Wed, 20 Apr 2022 14:42:52 -0700 Subject: KVM: arm64: Introduce hyp_alloc_private_va_range() hyp_alloc_private_va_range() can be used to reserve private VA ranges in the nVHE hypervisor. Allocations are aligned based on the order of the requested size. This will be used to implement stack guard pages for KVM nVHE hypervisor (nVHE Hyp mode / not pKVM), in a subsequent patch in the series. Signed-off-by: Kalesh Singh Tested-by: Fuad Tabba Reviewed-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220420214317.3303360-2-kaleshsingh@google.com --- arch/arm64/include/asm/kvm_mmu.h | 1 + arch/arm64/kvm/mmu.c | 64 +++++++++++++++++++++++++++------------- 2 files changed, 44 insertions(+), 21 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 74735a864eee..a50cbb5ba402 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -154,6 +154,7 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v) int kvm_share_hyp(void *from, void *to); void kvm_unshare_hyp(void *from, void *to); int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot); +int hyp_alloc_private_va_range(size_t size, unsigned long *haddr); int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, void __iomem **kaddr, void __iomem **haddr); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 53ae2c0640bc..7de1e02ebfd1 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -457,23 +457,22 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot) return 0; } -static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size, - unsigned long *haddr, - enum kvm_pgtable_prot prot) + +/** + * hyp_alloc_private_va_range - Allocates a private VA range. + * @size: The size of the VA range to reserve. + * @haddr: The hypervisor virtual start address of the allocation. + * + * The private virtual address (VA) range is allocated below io_map_base + * and aligned based on the order of @size. + * + * Return: 0 on success or negative error code on failure. + */ +int hyp_alloc_private_va_range(size_t size, unsigned long *haddr) { unsigned long base; int ret = 0; - if (!kvm_host_owns_hyp_mappings()) { - base = kvm_call_hyp_nvhe(__pkvm_create_private_mapping, - phys_addr, size, prot); - if (IS_ERR_OR_NULL((void *)base)) - return PTR_ERR((void *)base); - *haddr = base; - - return 0; - } - mutex_lock(&kvm_hyp_pgd_mutex); /* @@ -484,8 +483,10 @@ static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size, * * The allocated size is always a multiple of PAGE_SIZE. */ - size = PAGE_ALIGN(size + offset_in_page(phys_addr)); - base = io_map_base - size; + base = io_map_base - PAGE_ALIGN(size); + + /* Align the allocation based on the order of its size */ + base = ALIGN_DOWN(base, PAGE_SIZE << get_order(size)); /* * Verify that BIT(VA_BITS - 1) hasn't been flipped by @@ -495,19 +496,40 @@ static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size, if ((base ^ io_map_base) & BIT(VA_BITS - 1)) ret = -ENOMEM; else - io_map_base = base; + *haddr = io_map_base = base; mutex_unlock(&kvm_hyp_pgd_mutex); + return ret; +} + +static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size, + unsigned long *haddr, + enum kvm_pgtable_prot prot) +{ + unsigned long addr; + int ret = 0; + + if (!kvm_host_owns_hyp_mappings()) { + addr = kvm_call_hyp_nvhe(__pkvm_create_private_mapping, + phys_addr, size, prot); + if (IS_ERR_VALUE(addr)) + return addr; + *haddr = addr; + + return 0; + } + + size = PAGE_ALIGN(size + offset_in_page(phys_addr)); + ret = hyp_alloc_private_va_range(size, &addr); if (ret) - goto out; + return ret; - ret = __create_hyp_mappings(base, size, phys_addr, prot); + ret = __create_hyp_mappings(addr, size, phys_addr, prot); if (ret) - goto out; + return ret; - *haddr = base + offset_in_page(phys_addr); -out: + *haddr = addr + offset_in_page(phys_addr); return ret; } -- cgit v1.2.3 From ce3354318a57875dc59f4bb841662e95bfba03db Mon Sep 17 00:00:00 2001 From: Kalesh Singh Date: Wed, 20 Apr 2022 14:42:54 -0700 Subject: KVM: arm64: Add guard pages for KVM nVHE hypervisor stack Map the stack pages in the flexible private VA range and allocate guard pages below the stack as unbacked VA space. The stack is aligned so that any valid stack address has PAGE_SHIFT bit as 1 - this is used for overflow detection (implemented in a subsequent patch in the series). Signed-off-by: Kalesh Singh Tested-by: Fuad Tabba Reviewed-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220420214317.3303360-4-kaleshsingh@google.com --- arch/arm64/include/asm/kvm_asm.h | 1 + arch/arm64/include/asm/kvm_mmu.h | 2 ++ arch/arm64/kvm/arm.c | 37 ++++++++++++++++++++++++++++++++++--- arch/arm64/kvm/mmu.c | 4 ++-- 4 files changed, 39 insertions(+), 5 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index d5b0386ef765..2e277f2ed671 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -169,6 +169,7 @@ struct kvm_nvhe_init_params { unsigned long tcr_el2; unsigned long tpidr_el2; unsigned long stack_hyp_va; + unsigned long stack_pa; phys_addr_t pgd_pa; unsigned long hcr_el2; unsigned long vttbr; diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index a50cbb5ba402..b208da3bebec 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -154,6 +154,8 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v) int kvm_share_hyp(void *from, void *to); void kvm_unshare_hyp(void *from, void *to); int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot); +int __create_hyp_mappings(unsigned long start, unsigned long size, + unsigned long phys, enum kvm_pgtable_prot prot); int hyp_alloc_private_va_range(size_t size, unsigned long *haddr); int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, void __iomem **kaddr, diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 523bc934fe2f..dd257d9f21a2 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1483,7 +1483,6 @@ static void cpu_prepare_hyp_mode(int cpu) tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET; params->tcr_el2 = tcr; - params->stack_hyp_va = kern_hyp_va(per_cpu(kvm_arm_hyp_stack_page, cpu) + PAGE_SIZE); params->pgd_pa = kvm_mmu_get_httbr(); if (is_protected_kvm_enabled()) params->hcr_el2 = HCR_HOST_NVHE_PROTECTED_FLAGS; @@ -1933,14 +1932,46 @@ static int init_hyp_mode(void) * Map the Hyp stack pages */ for_each_possible_cpu(cpu) { + struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu); char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu); - err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE, - PAGE_HYP); + unsigned long hyp_addr; + /* + * Allocate a contiguous HYP private VA range for the stack + * and guard page. The allocation is also aligned based on + * the order of its size. + */ + err = hyp_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr); + if (err) { + kvm_err("Cannot allocate hyp stack guard page\n"); + goto out_err; + } + + /* + * Since the stack grows downwards, map the stack to the page + * at the higher address and leave the lower guard page + * unbacked. + * + * Any valid stack address now has the PAGE_SHIFT bit as 1 + * and addresses corresponding to the guard page have the + * PAGE_SHIFT bit as 0 - this is used for overflow detection. + */ + err = __create_hyp_mappings(hyp_addr + PAGE_SIZE, PAGE_SIZE, + __pa(stack_page), PAGE_HYP); if (err) { kvm_err("Cannot map hyp stack\n"); goto out_err; } + + /* + * Save the stack PA in nvhe_init_params. This will be needed + * to recreate the stack mapping in protected nVHE mode. + * __hyp_pa() won't do the right thing there, since the stack + * has been mapped in the flexible private VA space. + */ + params->stack_pa = __pa(stack_page); + + params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE); } for_each_possible_cpu(cpu) { diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 7de1e02ebfd1..088e14eae4cf 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -258,8 +258,8 @@ static bool kvm_host_owns_hyp_mappings(void) return true; } -static int __create_hyp_mappings(unsigned long start, unsigned long size, - unsigned long phys, enum kvm_pgtable_prot prot) +int __create_hyp_mappings(unsigned long start, unsigned long size, + unsigned long phys, enum kvm_pgtable_prot prot) { int err; -- cgit v1.2.3 From 9369bc5c5e35985f38d04bd98c6d28a032e84b17 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 3 May 2022 06:02:02 +0000 Subject: KVM: arm64: Plumb cp10 ID traps through the AArch64 sysreg handler In order to enable HCR_EL2.TID3 for AArch32 guests KVM needs to handle traps where ESR_EL2.EC=0x8, which corresponds to an attempted VMRS access from an ID group register. Specifically, the MVFR{0-2} registers are accessed this way from AArch32. Conveniently, these registers are architecturally mapped to MVFR{0-2}_EL1 in AArch64. Furthermore, KVM already handles reads to these aliases in AArch64. Plumb VMRS read traps through to the general AArch64 system register handler. Signed-off-by: Oliver Upton Reviewed-by: Reiji Watanabe Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220503060205.2823727-5-oupton@google.com --- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/handle_exit.c | 1 + arch/arm64/kvm/sys_regs.c | 71 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 94a27a7520f4..05081b9b7369 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -683,6 +683,7 @@ int kvm_handle_cp14_64(struct kvm_vcpu *vcpu); int kvm_handle_cp15_32(struct kvm_vcpu *vcpu); int kvm_handle_cp15_64(struct kvm_vcpu *vcpu); int kvm_handle_sys_reg(struct kvm_vcpu *vcpu); +int kvm_handle_cp10_id(struct kvm_vcpu *vcpu); void kvm_reset_sys_regs(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 97fe14aab1a3..5088a86ace5b 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -167,6 +167,7 @@ static exit_handle_fn arm_exit_handlers[] = { [ESR_ELx_EC_CP15_64] = kvm_handle_cp15_64, [ESR_ELx_EC_CP14_MR] = kvm_handle_cp14_32, [ESR_ELx_EC_CP14_LS] = kvm_handle_cp14_load_store, + [ESR_ELx_EC_CP10_ID] = kvm_handle_cp10_id, [ESR_ELx_EC_CP14_64] = kvm_handle_cp14_64, [ESR_ELx_EC_HVC32] = handle_hvc, [ESR_ELx_EC_SMC32] = handle_smc, diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index f403ea47b8a3..586b292ca94f 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2346,6 +2346,77 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu, static bool emulate_sys_reg(struct kvm_vcpu *vcpu, struct sys_reg_params *params); +/* + * The CP10 ID registers are architecturally mapped to AArch64 feature + * registers. Abuse that fact so we can rely on the AArch64 handler for accesses + * from AArch32. + */ +static bool kvm_esr_cp10_id_to_sys64(u32 esr, struct sys_reg_params *params) +{ + u8 reg_id = (esr >> 10) & 0xf; + bool valid; + + params->is_write = ((esr & 1) == 0); + params->Op0 = 3; + params->Op1 = 0; + params->CRn = 0; + params->CRm = 3; + + /* CP10 ID registers are read-only */ + valid = !params->is_write; + + switch (reg_id) { + /* MVFR0 */ + case 0b0111: + params->Op2 = 0; + break; + /* MVFR1 */ + case 0b0110: + params->Op2 = 1; + break; + /* MVFR2 */ + case 0b0101: + params->Op2 = 2; + break; + default: + valid = false; + } + + if (valid) + return true; + + kvm_pr_unimpl("Unhandled cp10 register %s: %u\n", + params->is_write ? "write" : "read", reg_id); + return false; +} + +/** + * kvm_handle_cp10_id() - Handles a VMRS trap on guest access to a 'Media and + * VFP Register' from AArch32. + * @vcpu: The vCPU pointer + * + * MVFR{0-2} are architecturally mapped to the AArch64 MVFR{0-2}_EL1 registers. + * Work out the correct AArch64 system register encoding and reroute to the + * AArch64 system register emulation. + */ +int kvm_handle_cp10_id(struct kvm_vcpu *vcpu) +{ + int Rt = kvm_vcpu_sys_get_rt(vcpu); + u32 esr = kvm_vcpu_get_esr(vcpu); + struct sys_reg_params params; + + /* UNDEF on any unhandled register access */ + if (!kvm_esr_cp10_id_to_sys64(esr, ¶ms)) { + kvm_inject_undefined(vcpu); + return 1; + } + + if (emulate_sys_reg(vcpu, ¶ms)) + vcpu_set_reg(vcpu, Rt, params.regval); + + return 1; +} + /** * kvm_emulate_cp15_id_reg() - Handles an MRC trap on a guest CP15 access where * CRn=0, which corresponds to the AArch32 feature -- cgit v1.2.3 From fd1264c4ca610a99d52c35a37e5551eec442723d Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 3 May 2022 06:02:03 +0000 Subject: KVM: arm64: Start trapping ID registers for 32 bit guests To date KVM has not trapped ID register accesses from AArch32, meaning that guests get an unconstrained view of what hardware supports. This can be a serious problem because we try to base the guest's feature registers on values that are safe system-wide. Furthermore, KVM does not implement the latest ISA in the PMU and Debug architecture, so we constrain these fields to supported values. Since KVM now correctly handles CP15 and CP10 register traps, we no longer need to clear HCR_EL2.TID3 for 32 bit guests and will instead emulate reads with their safe values. Signed-off-by: Oliver Upton Reviewed-by: Reiji Watanabe Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220503060205.2823727-6-oupton@google.com --- arch/arm64/include/asm/kvm_arm.h | 3 ++- arch/arm64/include/asm/kvm_emulate.h | 7 ------- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 1767ded83888..b5de102928d8 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -80,11 +80,12 @@ * FMO: Override CPSR.F and enable signaling with VF * SWIO: Turn set/way invalidates into set/way clean+invalidate * PTW: Take a stage2 fault if a stage1 walk steps in device memory + * TID3: Trap EL1 reads of group 3 ID registers */ #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \ HCR_BSU_IS | HCR_FB | HCR_TACR | \ HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \ - HCR_FMO | HCR_IMO | HCR_PTW ) + HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 ) #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF) #define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA) #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 7496deab025a..ab5c66b77bb0 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -86,13 +86,6 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) if (vcpu_el1_is_32bit(vcpu)) vcpu->arch.hcr_el2 &= ~HCR_RW; - else - /* - * TID3: trap feature register accesses that we virtualise. - * For now this is conditional, since no AArch32 feature regs - * are currently virtualised. - */ - vcpu->arch.hcr_el2 |= HCR_TID3; if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) || vcpu_el1_is_32bit(vcpu)) -- cgit v1.2.3 From 05714cab7d63b189894235cf310fae7d6ffc2e9b Mon Sep 17 00:00:00 2001 From: Raghavendra Rao Ananta Date: Mon, 2 May 2022 23:38:46 +0000 Subject: KVM: arm64: Setup a framework for hypercall bitmap firmware registers KVM regularly introduces new hypercall services to the guests without any consent from the userspace. This means, the guests can observe hypercall services in and out as they migrate across various host kernel versions. This could be a major problem if the guest discovered a hypercall, started using it, and after getting migrated to an older kernel realizes that it's no longer available. Depending on how the guest handles the change, there's a potential chance that the guest would just panic. As a result, there's a need for the userspace to elect the services that it wishes the guest to discover. It can elect these services based on the kernels spread across its (migration) fleet. To remedy this, extend the existing firmware pseudo-registers, such as KVM_REG_ARM_PSCI_VERSION, but by creating a new COPROC register space for all the hypercall services available. These firmware registers are categorized based on the service call owners, but unlike the existing firmware pseudo-registers, they hold the features supported in the form of a bitmap. During the VM initialization, the registers are set to upper-limit of the features supported by the corresponding registers. It's expected that the VMMs discover the features provided by each register via GET_ONE_REG, and write back the desired values using SET_ONE_REG. KVM allows this modification only until the VM has started. Some of the standard features are not mapped to any bits of the registers. But since they can recreate the original problem of making it available without userspace's consent, they need to be explicitly added to the case-list in kvm_hvc_call_default_allowed(). Any function-id that's not enabled via the bitmap, or not listed in kvm_hvc_call_default_allowed, will be returned as SMCCC_RET_NOT_SUPPORTED to the guest. Older userspace code can simply ignore the feature and the hypercall services will be exposed unconditionally to the guests, thus ensuring backward compatibility. In this patch, the framework adds the register only for ARM's standard secure services (owner value 4). Currently, this includes support only for ARM True Random Number Generator (TRNG) service, with bit-0 of the register representing mandatory features of v1.0. Other services are momentarily added in the upcoming patches. Signed-off-by: Raghavendra Rao Ananta Reviewed-by: Gavin Shan [maz: reduced the scope of some helpers, tidy-up bitmap max values, dropped error-only fast path] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220502233853.1233742-3-rananta@google.com --- arch/arm64/include/asm/kvm_host.h | 12 +++++ arch/arm64/include/uapi/asm/kvm.h | 13 +++++ arch/arm64/kvm/arm.c | 1 + arch/arm64/kvm/guest.c | 8 ++- arch/arm64/kvm/hypercalls.c | 104 ++++++++++++++++++++++++++++++++++++++ include/kvm/arm_hypercalls.h | 1 + 6 files changed, 137 insertions(+), 2 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 94a27a7520f4..fef597af0beb 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -101,6 +101,15 @@ struct kvm_s2_mmu { struct kvm_arch_memory_slot { }; +/** + * struct kvm_smccc_features: Descriptor of the hypercall services exposed to the guests + * + * @std_bmap: Bitmap of standard secure service calls + */ +struct kvm_smccc_features { + unsigned long std_bmap; +}; + struct kvm_arch { struct kvm_s2_mmu mmu; @@ -150,6 +159,9 @@ struct kvm_arch { u8 pfr0_csv2; u8 pfr0_csv3; + + /* Hypercall features firmware registers' descriptor */ + struct kvm_smccc_features smccc_feat; }; struct kvm_vcpu_fault_info { diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index c1b6ddc02d2f..b0be5ec196b9 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -332,6 +332,19 @@ struct kvm_arm_copy_mte_tags { #define KVM_ARM64_SVE_VLS_WORDS \ ((KVM_ARM64_SVE_VQ_MAX - KVM_ARM64_SVE_VQ_MIN) / 64 + 1) +/* Bitmap feature firmware registers */ +#define KVM_REG_ARM_FW_FEAT_BMAP (0x0016 << KVM_REG_ARM_COPROC_SHIFT) +#define KVM_REG_ARM_FW_FEAT_BMAP_REG(r) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ + KVM_REG_ARM_FW_FEAT_BMAP | \ + ((r) & 0xffff)) + +#define KVM_REG_ARM_STD_BMAP KVM_REG_ARM_FW_FEAT_BMAP_REG(0) + +enum { + KVM_REG_ARM_STD_BIT_TRNG_V1_0 = 0, + KVM_REG_ARM_STD_BMAP_BIT_COUNT, +}; + /* Device Control API: ARM VGIC */ #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 523bc934fe2f..a37fadbd617e 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -156,6 +156,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.max_vcpus = kvm_arm_default_max_vcpus(); set_default_spectre(kvm); + kvm_arm_init_hypercalls(kvm); return ret; out_free_stage2_pgd: diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 0d5cca56cbda..8c607199cad1 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -756,7 +756,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) switch (reg->id & KVM_REG_ARM_COPROC_MASK) { case KVM_REG_ARM_CORE: return get_core_reg(vcpu, reg); - case KVM_REG_ARM_FW: return kvm_arm_get_fw_reg(vcpu, reg); + case KVM_REG_ARM_FW: + case KVM_REG_ARM_FW_FEAT_BMAP: + return kvm_arm_get_fw_reg(vcpu, reg); case KVM_REG_ARM64_SVE: return get_sve_reg(vcpu, reg); } @@ -774,7 +776,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) switch (reg->id & KVM_REG_ARM_COPROC_MASK) { case KVM_REG_ARM_CORE: return set_core_reg(vcpu, reg); - case KVM_REG_ARM_FW: return kvm_arm_set_fw_reg(vcpu, reg); + case KVM_REG_ARM_FW: + case KVM_REG_ARM_FW_FEAT_BMAP: + return kvm_arm_set_fw_reg(vcpu, reg); case KVM_REG_ARM64_SVE: return set_sve_reg(vcpu, reg); } diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index dd4e20b8d656..22f3a919e699 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -9,6 +9,9 @@ #include #include +#define KVM_ARM_SMCCC_STD_FEATURES \ + GENMASK(KVM_REG_ARM_STD_BMAP_BIT_COUNT - 1, 0) + static void kvm_ptp_get_time(struct kvm_vcpu *vcpu, u64 *val) { struct system_time_snapshot systime_snapshot; @@ -58,6 +61,56 @@ static void kvm_ptp_get_time(struct kvm_vcpu *vcpu, u64 *val) val[3] = lower_32_bits(cycles); } +static bool kvm_hvc_call_default_allowed(u32 func_id) +{ + switch (func_id) { + /* + * List of function-ids that are not gated with the bitmapped + * feature firmware registers, and are to be allowed for + * servicing the call by default. + */ + case ARM_SMCCC_VERSION_FUNC_ID: + case ARM_SMCCC_ARCH_FEATURES_FUNC_ID: + case ARM_SMCCC_HV_PV_TIME_FEATURES: + case ARM_SMCCC_HV_PV_TIME_ST: + case ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID: + case ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID: + case ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID: + return true; + default: + /* PSCI 0.2 and up is in the 0:0x1f range */ + if (ARM_SMCCC_OWNER_NUM(func_id) == ARM_SMCCC_OWNER_STANDARD && + ARM_SMCCC_FUNC_NUM(func_id) <= 0x1f) + return true; + + /* + * KVM's PSCI 0.1 doesn't comply with SMCCC, and has + * its own function-id base and range + */ + if (func_id >= KVM_PSCI_FN(0) && func_id <= KVM_PSCI_FN(3)) + return true; + + return false; + } +} + +static bool kvm_hvc_call_allowed(struct kvm_vcpu *vcpu, u32 func_id) +{ + struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat; + + switch (func_id) { + case ARM_SMCCC_TRNG_VERSION: + case ARM_SMCCC_TRNG_FEATURES: + case ARM_SMCCC_TRNG_GET_UUID: + case ARM_SMCCC_TRNG_RND32: + case ARM_SMCCC_TRNG_RND64: + return test_bit(KVM_REG_ARM_STD_BIT_TRNG_V1_0, + &smccc_feat->std_bmap); + default: + return kvm_hvc_call_default_allowed(func_id); + } +} + int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) { u32 func_id = smccc_get_function(vcpu); @@ -65,6 +118,9 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) u32 feature; gpa_t gpa; + if (!kvm_hvc_call_allowed(vcpu, func_id)) + goto out; + switch (func_id) { case ARM_SMCCC_VERSION_FUNC_ID: val[0] = ARM_SMCCC_VERSION_1_1; @@ -155,6 +211,7 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) return kvm_psci_call(vcpu); } +out: smccc_set_retval(vcpu, val[0], val[1], val[2], val[3]); return 1; } @@ -164,8 +221,16 @@ static const u64 kvm_arm_fw_reg_ids[] = { KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1, KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2, KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3, + KVM_REG_ARM_STD_BMAP, }; +void kvm_arm_init_hypercalls(struct kvm *kvm) +{ + struct kvm_smccc_features *smccc_feat = &kvm->arch.smccc_feat; + + smccc_feat->std_bmap = KVM_ARM_SMCCC_STD_FEATURES; +} + int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu) { return ARRAY_SIZE(kvm_arm_fw_reg_ids); @@ -236,6 +301,7 @@ static int get_kernel_wa_level(u64 regid) int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { + struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat; void __user *uaddr = (void __user *)(long)reg->addr; u64 val; @@ -248,6 +314,9 @@ int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3: val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK; break; + case KVM_REG_ARM_STD_BMAP: + val = READ_ONCE(smccc_feat->std_bmap); + break; default: return -ENOENT; } @@ -258,6 +327,39 @@ int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) return 0; } +static int kvm_arm_set_fw_reg_bmap(struct kvm_vcpu *vcpu, u64 reg_id, u64 val) +{ + int ret = 0; + struct kvm *kvm = vcpu->kvm; + struct kvm_smccc_features *smccc_feat = &kvm->arch.smccc_feat; + unsigned long *fw_reg_bmap, fw_reg_features; + + switch (reg_id) { + case KVM_REG_ARM_STD_BMAP: + fw_reg_bmap = &smccc_feat->std_bmap; + fw_reg_features = KVM_ARM_SMCCC_STD_FEATURES; + break; + default: + return -ENOENT; + } + + /* Check for unsupported bit */ + if (val & ~fw_reg_features) + return -EINVAL; + + mutex_lock(&kvm->lock); + + if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags)) { + ret = -EBUSY; + goto out; + } + + WRITE_ONCE(*fw_reg_bmap, val); +out: + mutex_unlock(&kvm->lock); + return ret; +} + int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { void __user *uaddr = (void __user *)(long)reg->addr; @@ -336,6 +438,8 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) return -EINVAL; return 0; + case KVM_REG_ARM_STD_BMAP: + return kvm_arm_set_fw_reg_bmap(vcpu, reg->id, val); default: return -ENOENT; } diff --git a/include/kvm/arm_hypercalls.h b/include/kvm/arm_hypercalls.h index 5d38628a8d04..1188f116cf4e 100644 --- a/include/kvm/arm_hypercalls.h +++ b/include/kvm/arm_hypercalls.h @@ -42,6 +42,7 @@ static inline void smccc_set_retval(struct kvm_vcpu *vcpu, struct kvm_one_reg; +void kvm_arm_init_hypercalls(struct kvm *kvm); int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); -- cgit v1.2.3 From 428fd6788d4d0e0d390de9fb4486be3c1187310d Mon Sep 17 00:00:00 2001 From: Raghavendra Rao Ananta Date: Mon, 2 May 2022 23:38:47 +0000 Subject: KVM: arm64: Add standard hypervisor firmware register Introduce the firmware register to hold the standard hypervisor service calls (owner value 5) as a bitmap. The bitmap represents the features that'll be enabled for the guest, as configured by the user-space. Currently, this includes support only for Paravirtualized time, represented by bit-0. Signed-off-by: Raghavendra Rao Ananta Reviewed-by: Gavin Shan [maz: tidy-up bitmap values] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220502233853.1233742-4-rananta@google.com --- arch/arm64/include/asm/kvm_host.h | 2 ++ arch/arm64/include/uapi/asm/kvm.h | 7 +++++++ arch/arm64/kvm/hypercalls.c | 23 ++++++++++++++++++++--- 3 files changed, 29 insertions(+), 3 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index fef597af0beb..281dfcfd0a4e 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -105,9 +105,11 @@ struct kvm_arch_memory_slot { * struct kvm_smccc_features: Descriptor of the hypercall services exposed to the guests * * @std_bmap: Bitmap of standard secure service calls + * @std_hyp_bmap: Bitmap of standard hypervisor service calls */ struct kvm_smccc_features { unsigned long std_bmap; + unsigned long std_hyp_bmap; }; struct kvm_arch { diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index b0be5ec196b9..7ff5a2cf7547 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -345,6 +345,13 @@ enum { KVM_REG_ARM_STD_BMAP_BIT_COUNT, }; +#define KVM_REG_ARM_STD_HYP_BMAP KVM_REG_ARM_FW_FEAT_BMAP_REG(1) + +enum { + KVM_REG_ARM_STD_HYP_BIT_PV_TIME = 0, + KVM_REG_ARM_STD_HYP_BMAP_BIT_COUNT, +}; + /* Device Control API: ARM VGIC */ #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 22f3a919e699..c43f78c33d64 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -11,6 +11,8 @@ #define KVM_ARM_SMCCC_STD_FEATURES \ GENMASK(KVM_REG_ARM_STD_BMAP_BIT_COUNT - 1, 0) +#define KVM_ARM_SMCCC_STD_HYP_FEATURES \ + GENMASK(KVM_REG_ARM_STD_HYP_BMAP_BIT_COUNT - 1, 0) static void kvm_ptp_get_time(struct kvm_vcpu *vcpu, u64 *val) { @@ -71,8 +73,6 @@ static bool kvm_hvc_call_default_allowed(u32 func_id) */ case ARM_SMCCC_VERSION_FUNC_ID: case ARM_SMCCC_ARCH_FEATURES_FUNC_ID: - case ARM_SMCCC_HV_PV_TIME_FEATURES: - case ARM_SMCCC_HV_PV_TIME_ST: case ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID: case ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID: case ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID: @@ -106,6 +106,10 @@ static bool kvm_hvc_call_allowed(struct kvm_vcpu *vcpu, u32 func_id) case ARM_SMCCC_TRNG_RND64: return test_bit(KVM_REG_ARM_STD_BIT_TRNG_V1_0, &smccc_feat->std_bmap); + case ARM_SMCCC_HV_PV_TIME_FEATURES: + case ARM_SMCCC_HV_PV_TIME_ST: + return test_bit(KVM_REG_ARM_STD_HYP_BIT_PV_TIME, + &smccc_feat->std_hyp_bmap); default: return kvm_hvc_call_default_allowed(func_id); } @@ -113,6 +117,7 @@ static bool kvm_hvc_call_allowed(struct kvm_vcpu *vcpu, u32 func_id) int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) { + struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat; u32 func_id = smccc_get_function(vcpu); u64 val[4] = {SMCCC_RET_NOT_SUPPORTED}; u32 feature; @@ -176,7 +181,9 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) } break; case ARM_SMCCC_HV_PV_TIME_FEATURES: - val[0] = SMCCC_RET_SUCCESS; + if (test_bit(KVM_REG_ARM_STD_HYP_BIT_PV_TIME, + &smccc_feat->std_hyp_bmap)) + val[0] = SMCCC_RET_SUCCESS; break; } break; @@ -222,6 +229,7 @@ static const u64 kvm_arm_fw_reg_ids[] = { KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2, KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3, KVM_REG_ARM_STD_BMAP, + KVM_REG_ARM_STD_HYP_BMAP, }; void kvm_arm_init_hypercalls(struct kvm *kvm) @@ -229,6 +237,7 @@ void kvm_arm_init_hypercalls(struct kvm *kvm) struct kvm_smccc_features *smccc_feat = &kvm->arch.smccc_feat; smccc_feat->std_bmap = KVM_ARM_SMCCC_STD_FEATURES; + smccc_feat->std_hyp_bmap = KVM_ARM_SMCCC_STD_HYP_FEATURES; } int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu) @@ -317,6 +326,9 @@ int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) case KVM_REG_ARM_STD_BMAP: val = READ_ONCE(smccc_feat->std_bmap); break; + case KVM_REG_ARM_STD_HYP_BMAP: + val = READ_ONCE(smccc_feat->std_hyp_bmap); + break; default: return -ENOENT; } @@ -339,6 +351,10 @@ static int kvm_arm_set_fw_reg_bmap(struct kvm_vcpu *vcpu, u64 reg_id, u64 val) fw_reg_bmap = &smccc_feat->std_bmap; fw_reg_features = KVM_ARM_SMCCC_STD_FEATURES; break; + case KVM_REG_ARM_STD_HYP_BMAP: + fw_reg_bmap = &smccc_feat->std_hyp_bmap; + fw_reg_features = KVM_ARM_SMCCC_STD_HYP_FEATURES; + break; default: return -ENOENT; } @@ -439,6 +455,7 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) return 0; case KVM_REG_ARM_STD_BMAP: + case KVM_REG_ARM_STD_HYP_BMAP: return kvm_arm_set_fw_reg_bmap(vcpu, reg->id, val); default: return -ENOENT; -- cgit v1.2.3 From b22216e1a617ca55b41337cd1e057ebc784a65d4 Mon Sep 17 00:00:00 2001 From: Raghavendra Rao Ananta Date: Mon, 2 May 2022 23:38:48 +0000 Subject: KVM: arm64: Add vendor hypervisor firmware register Introduce the firmware register to hold the vendor specific hypervisor service calls (owner value 6) as a bitmap. The bitmap represents the features that'll be enabled for the guest, as configured by the user-space. Currently, this includes support for KVM-vendor features along with reading the UID, represented by bit-0, and Precision Time Protocol (PTP), represented by bit-1. Signed-off-by: Raghavendra Rao Ananta Reviewed-by: Gavin Shan [maz: tidy-up bitmap values] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220502233853.1233742-5-rananta@google.com --- arch/arm64/include/asm/kvm_host.h | 2 ++ arch/arm64/include/uapi/asm/kvm.h | 8 ++++++++ arch/arm64/kvm/hypercalls.c | 25 ++++++++++++++++++++----- 3 files changed, 30 insertions(+), 5 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 281dfcfd0a4e..35a60d766fba 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -106,10 +106,12 @@ struct kvm_arch_memory_slot { * * @std_bmap: Bitmap of standard secure service calls * @std_hyp_bmap: Bitmap of standard hypervisor service calls + * @vendor_hyp_bmap: Bitmap of vendor specific hypervisor service calls */ struct kvm_smccc_features { unsigned long std_bmap; unsigned long std_hyp_bmap; + unsigned long vendor_hyp_bmap; }; struct kvm_arch { diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 7ff5a2cf7547..e523bb6eac67 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -352,6 +352,14 @@ enum { KVM_REG_ARM_STD_HYP_BMAP_BIT_COUNT, }; +#define KVM_REG_ARM_VENDOR_HYP_BMAP KVM_REG_ARM_FW_FEAT_BMAP_REG(2) + +enum { + KVM_REG_ARM_VENDOR_HYP_BIT_FUNC_FEAT = 0, + KVM_REG_ARM_VENDOR_HYP_BIT_PTP = 1, + KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_COUNT, +}; + /* Device Control API: ARM VGIC */ #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index c43f78c33d64..ccbd3cefb91a 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -13,6 +13,8 @@ GENMASK(KVM_REG_ARM_STD_BMAP_BIT_COUNT - 1, 0) #define KVM_ARM_SMCCC_STD_HYP_FEATURES \ GENMASK(KVM_REG_ARM_STD_HYP_BMAP_BIT_COUNT - 1, 0) +#define KVM_ARM_SMCCC_VENDOR_HYP_FEATURES \ + GENMASK(KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_COUNT - 1, 0) static void kvm_ptp_get_time(struct kvm_vcpu *vcpu, u64 *val) { @@ -73,9 +75,6 @@ static bool kvm_hvc_call_default_allowed(u32 func_id) */ case ARM_SMCCC_VERSION_FUNC_ID: case ARM_SMCCC_ARCH_FEATURES_FUNC_ID: - case ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID: - case ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID: - case ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID: return true; default: /* PSCI 0.2 and up is in the 0:0x1f range */ @@ -110,6 +109,13 @@ static bool kvm_hvc_call_allowed(struct kvm_vcpu *vcpu, u32 func_id) case ARM_SMCCC_HV_PV_TIME_ST: return test_bit(KVM_REG_ARM_STD_HYP_BIT_PV_TIME, &smccc_feat->std_hyp_bmap); + case ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID: + case ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID: + return test_bit(KVM_REG_ARM_VENDOR_HYP_BIT_FUNC_FEAT, + &smccc_feat->vendor_hyp_bmap); + case ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID: + return test_bit(KVM_REG_ARM_VENDOR_HYP_BIT_PTP, + &smccc_feat->vendor_hyp_bmap); default: return kvm_hvc_call_default_allowed(func_id); } @@ -202,8 +208,7 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) val[3] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3; break; case ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID: - val[0] = BIT(ARM_SMCCC_KVM_FUNC_FEATURES); - val[0] |= BIT(ARM_SMCCC_KVM_FUNC_PTP); + val[0] = smccc_feat->vendor_hyp_bmap; break; case ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID: kvm_ptp_get_time(vcpu, val); @@ -230,6 +235,7 @@ static const u64 kvm_arm_fw_reg_ids[] = { KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3, KVM_REG_ARM_STD_BMAP, KVM_REG_ARM_STD_HYP_BMAP, + KVM_REG_ARM_VENDOR_HYP_BMAP, }; void kvm_arm_init_hypercalls(struct kvm *kvm) @@ -238,6 +244,7 @@ void kvm_arm_init_hypercalls(struct kvm *kvm) smccc_feat->std_bmap = KVM_ARM_SMCCC_STD_FEATURES; smccc_feat->std_hyp_bmap = KVM_ARM_SMCCC_STD_HYP_FEATURES; + smccc_feat->vendor_hyp_bmap = KVM_ARM_SMCCC_VENDOR_HYP_FEATURES; } int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu) @@ -329,6 +336,9 @@ int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) case KVM_REG_ARM_STD_HYP_BMAP: val = READ_ONCE(smccc_feat->std_hyp_bmap); break; + case KVM_REG_ARM_VENDOR_HYP_BMAP: + val = READ_ONCE(smccc_feat->vendor_hyp_bmap); + break; default: return -ENOENT; } @@ -355,6 +365,10 @@ static int kvm_arm_set_fw_reg_bmap(struct kvm_vcpu *vcpu, u64 reg_id, u64 val) fw_reg_bmap = &smccc_feat->std_hyp_bmap; fw_reg_features = KVM_ARM_SMCCC_STD_HYP_FEATURES; break; + case KVM_REG_ARM_VENDOR_HYP_BMAP: + fw_reg_bmap = &smccc_feat->vendor_hyp_bmap; + fw_reg_features = KVM_ARM_SMCCC_VENDOR_HYP_FEATURES; + break; default: return -ENOENT; } @@ -456,6 +470,7 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) return 0; case KVM_REG_ARM_STD_BMAP: case KVM_REG_ARM_STD_HYP_BMAP: + case KVM_REG_ARM_VENDOR_HYP_BMAP: return kvm_arm_set_fw_reg_bmap(vcpu, reg->id, val); default: return -ENOENT; -- cgit v1.2.3 From 1e5794295c5dbfcc31cf5de840c9e095ae50efb7 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 4 May 2022 03:24:36 +0000 Subject: KVM: arm64: Dedupe vCPU power off helpers vcpu_power_off() and kvm_psci_vcpu_off() are equivalent; rename the former and replace all callsites to the latter. No functional change intended. Signed-off-by: Oliver Upton Reviewed-by: Reiji Watanabe Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220504032446.4133305-3-oupton@google.com --- arch/arm64/include/asm/kvm_host.h | 2 ++ arch/arm64/kvm/arm.c | 6 +++--- arch/arm64/kvm/psci.c | 11 ++--------- 3 files changed, 7 insertions(+), 12 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 94a27a7520f4..490cd7f3a905 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -841,4 +841,6 @@ void __init kvm_hyp_reserve(void); static inline void kvm_hyp_reserve(void) { } #endif +void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu); + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 523bc934fe2f..28c83c6ddbae 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -432,7 +432,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) vcpu->cpu = -1; } -static void vcpu_power_off(struct kvm_vcpu *vcpu) +void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu) { vcpu->arch.power_off = true; kvm_make_request(KVM_REQ_SLEEP, vcpu); @@ -460,7 +460,7 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, vcpu->arch.power_off = false; break; case KVM_MP_STATE_STOPPED: - vcpu_power_off(vcpu); + kvm_arm_vcpu_power_off(vcpu); break; default: ret = -EINVAL; @@ -1124,7 +1124,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, * Handle the "start in power-off" case. */ if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) - vcpu_power_off(vcpu); + kvm_arm_vcpu_power_off(vcpu); else vcpu->arch.power_off = false; diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index 67fbd6ef022c..9b1f3acae155 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -51,13 +51,6 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu) return PSCI_RET_SUCCESS; } -static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) -{ - vcpu->arch.power_off = true; - kvm_make_request(KVM_REQ_SLEEP, vcpu); - kvm_vcpu_kick(vcpu); -} - static inline bool kvm_psci_valid_affinity(struct kvm_vcpu *vcpu, unsigned long affinity) { @@ -245,7 +238,7 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) val = kvm_psci_vcpu_suspend(vcpu); break; case PSCI_0_2_FN_CPU_OFF: - kvm_psci_vcpu_off(vcpu); + kvm_arm_vcpu_power_off(vcpu); val = PSCI_RET_SUCCESS; break; case PSCI_0_2_FN_CPU_ON: @@ -379,7 +372,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) switch (psci_fn) { case KVM_PSCI_FN_CPU_OFF: - kvm_psci_vcpu_off(vcpu); + kvm_arm_vcpu_power_off(vcpu); val = PSCI_RET_SUCCESS; break; case KVM_PSCI_FN_CPU_ON: -- cgit v1.2.3 From b171f9bbb130cb323f2101edd32da2a25d43ebfa Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 4 May 2022 03:24:37 +0000 Subject: KVM: arm64: Track vCPU power state using MP state values A subsequent change to KVM will add support for additional power states. Store the MP state by value rather than keeping track of it as a boolean. No functional change intended. Signed-off-by: Oliver Upton Reviewed-by: Reiji Watanabe Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220504032446.4133305-4-oupton@google.com --- arch/arm64/include/asm/kvm_host.h | 5 +++-- arch/arm64/kvm/arm.c | 22 ++++++++++++---------- arch/arm64/kvm/psci.c | 12 ++++++------ 3 files changed, 21 insertions(+), 18 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 490cd7f3a905..f3f93d48e21a 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -365,8 +365,8 @@ struct kvm_vcpu_arch { u32 mdscr_el1; } guest_debug_preserved; - /* vcpu power-off state */ - bool power_off; + /* vcpu power state */ + struct kvm_mp_state mp_state; /* Don't run the guest (internal implementation need) */ bool pause; @@ -842,5 +842,6 @@ static inline void kvm_hyp_reserve(void) { } #endif void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu); +bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu); #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 28c83c6ddbae..29e107457c4d 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -434,18 +434,20 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu) { - vcpu->arch.power_off = true; + vcpu->arch.mp_state.mp_state = KVM_MP_STATE_STOPPED; kvm_make_request(KVM_REQ_SLEEP, vcpu); kvm_vcpu_kick(vcpu); } +bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_STOPPED; +} + int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - if (vcpu->arch.power_off) - mp_state->mp_state = KVM_MP_STATE_STOPPED; - else - mp_state->mp_state = KVM_MP_STATE_RUNNABLE; + *mp_state = vcpu->arch.mp_state; return 0; } @@ -457,7 +459,7 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, switch (mp_state->mp_state) { case KVM_MP_STATE_RUNNABLE: - vcpu->arch.power_off = false; + vcpu->arch.mp_state = *mp_state; break; case KVM_MP_STATE_STOPPED: kvm_arm_vcpu_power_off(vcpu); @@ -480,7 +482,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF); return ((irq_lines || kvm_vgic_vcpu_pending_irq(v)) - && !v->arch.power_off && !v->arch.pause); + && !kvm_arm_vcpu_stopped(v) && !v->arch.pause); } bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) @@ -597,10 +599,10 @@ static void vcpu_req_sleep(struct kvm_vcpu *vcpu) struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu); rcuwait_wait_event(wait, - (!vcpu->arch.power_off) &&(!vcpu->arch.pause), + (!kvm_arm_vcpu_stopped(vcpu)) && (!vcpu->arch.pause), TASK_INTERRUPTIBLE); - if (vcpu->arch.power_off || vcpu->arch.pause) { + if (kvm_arm_vcpu_stopped(vcpu) || vcpu->arch.pause) { /* Awaken to handle a signal, request we sleep again later. */ kvm_make_request(KVM_REQ_SLEEP, vcpu); } @@ -1126,7 +1128,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) kvm_arm_vcpu_power_off(vcpu); else - vcpu->arch.power_off = false; + vcpu->arch.mp_state.mp_state = KVM_MP_STATE_RUNNABLE; return 0; } diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index 9b1f3acae155..2e6f060214a7 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -76,7 +76,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) */ if (!vcpu) return PSCI_RET_INVALID_PARAMS; - if (!vcpu->arch.power_off) { + if (!kvm_arm_vcpu_stopped(vcpu)) { if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1) return PSCI_RET_ALREADY_ON; else @@ -100,12 +100,12 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) kvm_make_request(KVM_REQ_VCPU_RESET, vcpu); /* - * Make sure the reset request is observed if the change to - * power_off is observed. + * Make sure the reset request is observed if the RUNNABLE mp_state is + * observed. */ smp_wmb(); - vcpu->arch.power_off = false; + vcpu->arch.mp_state.mp_state = KVM_MP_STATE_RUNNABLE; kvm_vcpu_wake_up(vcpu); return PSCI_RET_SUCCESS; @@ -143,7 +143,7 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) mpidr = kvm_vcpu_get_mpidr_aff(tmp); if ((mpidr & target_affinity_mask) == target_affinity) { matching_cpus++; - if (!tmp->arch.power_off) + if (!kvm_arm_vcpu_stopped(tmp)) return PSCI_0_2_AFFINITY_LEVEL_ON; } } @@ -169,7 +169,7 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type, u64 flags) * re-initialized. */ kvm_for_each_vcpu(i, tmp, vcpu->kvm) - tmp->arch.power_off = true; + tmp->arch.mp_state.mp_state = KVM_MP_STATE_STOPPED; kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP); memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); -- cgit v1.2.3 From 7b33a09d036ffd9a04506122840629c7e870cf08 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 4 May 2022 03:24:40 +0000 Subject: KVM: arm64: Add support for userspace to suspend a vCPU Introduce a new MP state, KVM_MP_STATE_SUSPENDED, which indicates a vCPU is in a suspended state. In the suspended state the vCPU will block until a wakeup event (pending interrupt) is recognized. Add a new system event type, KVM_SYSTEM_EVENT_WAKEUP, to indicate to userspace that KVM has recognized one such wakeup event. It is the responsibility of userspace to then make the vCPU runnable, or leave it suspended until the next wakeup event. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220504032446.4133305-7-oupton@google.com --- Documentation/virt/kvm/api.rst | 37 ++++++++++++++++++++++++++-- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/arm.c | 51 +++++++++++++++++++++++++++++++++++++++ include/uapi/linux/kvm.h | 2 ++ 4 files changed, 89 insertions(+), 2 deletions(-) (limited to 'arch/arm64/include') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 4a900cdbc62e..46ca84600dca 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -1476,14 +1476,43 @@ Possible values are: [s390] KVM_MP_STATE_LOAD the vcpu is in a special load/startup state [s390] + KVM_MP_STATE_SUSPENDED the vcpu is in a suspend state and is waiting + for a wakeup event [arm64] ========================== =============================================== On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel irqchip, the multiprocessing state must be maintained by userspace on these architectures. -For arm64/riscv: -^^^^^^^^^^^^^^^^ +For arm64: +^^^^^^^^^^ + +If a vCPU is in the KVM_MP_STATE_SUSPENDED state, KVM will emulate the +architectural execution of a WFI instruction. + +If a wakeup event is recognized, KVM will exit to userspace with a +KVM_SYSTEM_EVENT exit, where the event type is KVM_SYSTEM_EVENT_WAKEUP. If +userspace wants to honor the wakeup, it must set the vCPU's MP state to +KVM_MP_STATE_RUNNABLE. If it does not, KVM will continue to await a wakeup +event in subsequent calls to KVM_RUN. + +.. warning:: + + If userspace intends to keep the vCPU in a SUSPENDED state, it is + strongly recommended that userspace take action to suppress the + wakeup event (such as masking an interrupt). Otherwise, subsequent + calls to KVM_RUN will immediately exit with a KVM_SYSTEM_EVENT_WAKEUP + event and inadvertently waste CPU cycles. + + Additionally, if userspace takes action to suppress a wakeup event, + it is strongly recommended that it also restores the vCPU to its + original state when the vCPU is made RUNNABLE again. For example, + if userspace masked a pending interrupt to suppress the wakeup, + the interrupt should be unmasked before returning control to the + guest. + +For riscv: +^^^^^^^^^^ The only states that are valid are KVM_MP_STATE_STOPPED and KVM_MP_STATE_RUNNABLE which reflect if the vcpu is paused or not. @@ -5985,6 +6014,7 @@ should put the acknowledged interrupt vector into the 'epr' field. #define KVM_SYSTEM_EVENT_SHUTDOWN 1 #define KVM_SYSTEM_EVENT_RESET 2 #define KVM_SYSTEM_EVENT_CRASH 3 + #define KVM_SYSTEM_EVENT_WAKEUP 4 __u32 type; __u32 ndata; __u64 data[16]; @@ -6009,6 +6039,9 @@ Valid values for 'type' are: has requested a crash condition maintenance. Userspace can choose to ignore the request, or to gather VM memory core dump and/or reset/shutdown of the VM. + - KVM_SYSTEM_EVENT_WAKEUP -- the exiting vCPU is in a suspended state and + KVM has recognized a wakeup event. Userspace may honor this event by + marking the exiting vCPU as runnable, or deny it and call KVM_RUN again. If KVM_CAP_SYSTEM_EVENT_DATA is present, the 'data' field can contain architecture specific information for the system-level event. Only diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f3f93d48e21a..46027b9b80ca 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -46,6 +46,7 @@ #define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3) #define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4) #define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5) +#define KVM_REQ_SUSPEND KVM_ARCH_REQ(6) #define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ KVM_DIRTY_LOG_INITIALLY_SET) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index efe54aba5cce..abd32a84ed7a 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -444,6 +444,18 @@ bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu) return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_STOPPED; } +static void kvm_arm_vcpu_suspend(struct kvm_vcpu *vcpu) +{ + vcpu->arch.mp_state.mp_state = KVM_MP_STATE_SUSPENDED; + kvm_make_request(KVM_REQ_SUSPEND, vcpu); + kvm_vcpu_kick(vcpu); +} + +static bool kvm_arm_vcpu_suspended(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_SUSPENDED; +} + int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { @@ -464,6 +476,9 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, case KVM_MP_STATE_STOPPED: kvm_arm_vcpu_power_off(vcpu); break; + case KVM_MP_STATE_SUSPENDED: + kvm_arm_vcpu_suspend(vcpu); + break; default: ret = -EINVAL; } @@ -648,6 +663,39 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu) preempt_enable(); } +static int kvm_vcpu_suspend(struct kvm_vcpu *vcpu) +{ + if (!kvm_arm_vcpu_suspended(vcpu)) + return 1; + + kvm_vcpu_wfi(vcpu); + + /* + * The suspend state is sticky; we do not leave it until userspace + * explicitly marks the vCPU as runnable. Request that we suspend again + * later. + */ + kvm_make_request(KVM_REQ_SUSPEND, vcpu); + + /* + * Check to make sure the vCPU is actually runnable. If so, exit to + * userspace informing it of the wakeup condition. + */ + if (kvm_arch_vcpu_runnable(vcpu)) { + memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); + vcpu->run->system_event.type = KVM_SYSTEM_EVENT_WAKEUP; + vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; + return 0; + } + + /* + * Otherwise, we were unblocked to process a different event, such as a + * pending signal. Return 1 and allow kvm_arch_vcpu_ioctl_run() to + * process the event. + */ + return 1; +} + /** * check_vcpu_requests - check and handle pending vCPU requests * @vcpu: the VCPU pointer @@ -686,6 +734,9 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_RELOAD_PMU, vcpu)) kvm_pmu_handle_pmcr(vcpu, __vcpu_sys_reg(vcpu, PMCR_EL0)); + + if (kvm_check_request(KVM_REQ_SUSPEND, vcpu)) + return kvm_vcpu_suspend(vcpu); } return 1; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 6a184d260c7f..7f72fb7b05f2 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -444,6 +444,7 @@ struct kvm_run { #define KVM_SYSTEM_EVENT_SHUTDOWN 1 #define KVM_SYSTEM_EVENT_RESET 2 #define KVM_SYSTEM_EVENT_CRASH 3 +#define KVM_SYSTEM_EVENT_WAKEUP 4 __u32 type; __u32 ndata; union { @@ -646,6 +647,7 @@ struct kvm_vapic_addr { #define KVM_MP_STATE_OPERATING 7 #define KVM_MP_STATE_LOAD 8 #define KVM_MP_STATE_AP_RESET_HOLD 9 +#define KVM_MP_STATE_SUSPENDED 10 struct kvm_mp_state { __u32 mp_state; -- cgit v1.2.3 From bfbab44568779e1682bc6f63688bb9c965f0e74a Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 4 May 2022 03:24:41 +0000 Subject: KVM: arm64: Implement PSCI SYSTEM_SUSPEND ARM DEN0022D.b 5.19 "SYSTEM_SUSPEND" describes a PSCI call that allows software to request that a system be placed in the deepest possible low-power state. Effectively, software can use this to suspend itself to RAM. Unfortunately, there really is no good way to implement a system-wide PSCI call in KVM. Any precondition checks done in the kernel will need to be repeated by userspace since there is no good way to protect a critical section that spans an exit to userspace. SYSTEM_RESET and SYSTEM_OFF are equally plagued by this issue, although no users have seemingly cared for the relatively long time these calls have been supported. The solution is to just make the whole implementation userspace's problem. Introduce a new system event, KVM_SYSTEM_EVENT_SUSPEND, that indicates to userspace a calling vCPU has invoked PSCI SYSTEM_SUSPEND. Additionally, add a CAP to get buy-in from userspace for this new exit type. Only advertise the SYSTEM_SUSPEND PSCI call if userspace has opted in. If a vCPU calls SYSTEM_SUSPEND, punt straight to userspace. Provide explicit documentation of userspace's responsibilites for the exit and point to the PSCI specification to describe the actual PSCI call. Reviewed-by: Reiji Watanabe Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220504032446.4133305-8-oupton@google.com --- Documentation/virt/kvm/api.rst | 39 +++++++++++++++++++++++++++++++++++++++ arch/arm64/include/asm/kvm_host.h | 2 ++ arch/arm64/kvm/arm.c | 5 +++++ arch/arm64/kvm/psci.c | 29 +++++++++++++++++++++++++++++ include/uapi/linux/kvm.h | 2 ++ 5 files changed, 77 insertions(+) (limited to 'arch/arm64/include') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 46ca84600dca..d8d7859fc556 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6015,6 +6015,7 @@ should put the acknowledged interrupt vector into the 'epr' field. #define KVM_SYSTEM_EVENT_RESET 2 #define KVM_SYSTEM_EVENT_CRASH 3 #define KVM_SYSTEM_EVENT_WAKEUP 4 + #define KVM_SYSTEM_EVENT_SUSPEND 5 __u32 type; __u32 ndata; __u64 data[16]; @@ -6042,6 +6043,34 @@ Valid values for 'type' are: - KVM_SYSTEM_EVENT_WAKEUP -- the exiting vCPU is in a suspended state and KVM has recognized a wakeup event. Userspace may honor this event by marking the exiting vCPU as runnable, or deny it and call KVM_RUN again. + - KVM_SYSTEM_EVENT_SUSPEND -- the guest has requested a suspension of + the VM. + +For arm/arm64: +^^^^^^^^^^^^^^ + + KVM_SYSTEM_EVENT_SUSPEND exits are enabled with the + KVM_CAP_ARM_SYSTEM_SUSPEND VM capability. If a guest invokes the PSCI + SYSTEM_SUSPEND function, KVM will exit to userspace with this event + type. + + It is the sole responsibility of userspace to implement the PSCI + SYSTEM_SUSPEND call according to ARM DEN0022D.b 5.19 "SYSTEM_SUSPEND". + KVM does not change the vCPU's state before exiting to userspace, so + the call parameters are left in-place in the vCPU registers. + + Userspace is _required_ to take action for such an exit. It must + either: + + - Honor the guest request to suspend the VM. Userspace can request + in-kernel emulation of suspension by setting the calling vCPU's + state to KVM_MP_STATE_SUSPENDED. Userspace must configure the vCPU's + state according to the parameters passed to the PSCI function when + the calling vCPU is resumed. See ARM DEN0022D.b 5.19.1 "Intended use" + for details on the function parameters. + + - Deny the guest request to suspend the VM. See ARM DEN0022D.b 5.19.2 + "Caller responsibilities" for possible return values. If KVM_CAP_SYSTEM_EVENT_DATA is present, the 'data' field can contain architecture specific information for the system-level event. Only @@ -7767,6 +7796,16 @@ At this time, KVM_PMU_CAP_DISABLE is the only capability. Setting this capability will disable PMU virtualization for that VM. Usermode should adjust CPUID leaf 0xA to reflect that the PMU is disabled. +8.36 KVM_CAP_ARM_SYSTEM_SUSPEND +------------------------------- + +:Capability: KVM_CAP_ARM_SYSTEM_SUSPEND +:Architectures: arm64 +:Type: vm + +When enabled, KVM will exit to userspace with KVM_EXIT_SYSTEM_EVENT of +type KVM_SYSTEM_EVENT_SUSPEND to process the guest suspend request. + 9. Known KVM API problems ========================= diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 46027b9b80ca..d9df81949f76 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -137,6 +137,8 @@ struct kvm_arch { */ #define KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED 3 #define KVM_ARCH_FLAG_EL1_32BIT 4 + /* PSCI SYSTEM_SUSPEND enabled for the guest */ +#define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 5 unsigned long flags; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index abd32a84ed7a..f8a89ae52710 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -97,6 +97,10 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, } mutex_unlock(&kvm->lock); break; + case KVM_CAP_ARM_SYSTEM_SUSPEND: + r = 0; + set_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags); + break; default: r = -EINVAL; break; @@ -210,6 +214,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_SET_GUEST_DEBUG: case KVM_CAP_VCPU_ATTRIBUTES: case KVM_CAP_PTP_KVM: + case KVM_CAP_ARM_SYSTEM_SUSPEND: r = 1; break; case KVM_CAP_SET_GUEST_DEBUG2: diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index 2e6f060214a7..5de30e72ad40 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -195,6 +195,15 @@ static void kvm_psci_system_reset2(struct kvm_vcpu *vcpu) KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2); } +static void kvm_psci_system_suspend(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + + memset(&run->system_event, 0, sizeof(vcpu->run->system_event)); + run->system_event.type = KVM_SYSTEM_EVENT_SUSPEND; + run->exit_reason = KVM_EXIT_SYSTEM_EVENT; +} + static void kvm_psci_narrow_to_32bit(struct kvm_vcpu *vcpu) { int i; @@ -300,6 +309,7 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor) { unsigned long val = PSCI_RET_NOT_SUPPORTED; u32 psci_fn = smccc_get_function(vcpu); + struct kvm *kvm = vcpu->kvm; u32 arg; int ret = 1; @@ -331,6 +341,11 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor) case ARM_SMCCC_VERSION_FUNC_ID: val = 0; break; + case PSCI_1_0_FN_SYSTEM_SUSPEND: + case PSCI_1_0_FN64_SYSTEM_SUSPEND: + if (test_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags)) + val = 0; + break; case PSCI_1_1_FN_SYSTEM_RESET2: case PSCI_1_1_FN64_SYSTEM_RESET2: if (minor >= 1) @@ -338,6 +353,20 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor) break; } break; + case PSCI_1_0_FN_SYSTEM_SUSPEND: + kvm_psci_narrow_to_32bit(vcpu); + fallthrough; + case PSCI_1_0_FN64_SYSTEM_SUSPEND: + /* + * Return directly to userspace without changing the vCPU's + * registers. Userspace depends on reading the SMCCC parameters + * to implement SYSTEM_SUSPEND. + */ + if (test_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags)) { + kvm_psci_system_suspend(vcpu); + return 0; + } + break; case PSCI_1_1_FN_SYSTEM_RESET2: kvm_psci_narrow_to_32bit(vcpu); fallthrough; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 7f72fb7b05f2..32c56384fd08 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -445,6 +445,7 @@ struct kvm_run { #define KVM_SYSTEM_EVENT_RESET 2 #define KVM_SYSTEM_EVENT_CRASH 3 #define KVM_SYSTEM_EVENT_WAKEUP 4 +#define KVM_SYSTEM_EVENT_SUSPEND 5 __u32 type; __u32 ndata; union { @@ -1154,6 +1155,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_DISABLE_QUIRKS2 213 /* #define KVM_CAP_VM_TSC_CONTROL 214 */ #define KVM_CAP_SYSTEM_EVENT_DATA 215 +#define KVM_CAP_ARM_SYSTEM_SUSPEND 216 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From f1f0c0cfeaa7c10eb536d9919bf3902af0e17bce Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Thu, 28 Apr 2022 11:34:04 +0100 Subject: KVM: arm64: Don't BUG_ON() if emulated register table is unsorted To emulate a register access, KVM uses a table of registers sorted by register encoding to speed up queries using binary search. When Linux boots, KVM checks that the table is sorted and uses a BUG_ON() statement to let the user know if it's not. The unfortunate side effect is that an unsorted sysreg table brings down the whole kernel, not just KVM, even though the rest of the kernel can function just fine without KVM. To make matters worse, on machines which lack a serial console, the user is left pondering why the machine is taking so long to boot. Improve this situation by returning an error from kvm_arch_init() if the sysreg tables are not in the correct order. The machine is still very much usable for the user, with the exception of virtualization, who can now easily determine what went wrong. A minor typo has also been corrected in the check_sysreg_table() function. Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220428103405.70884-2-alexandru.elisei@arm.com --- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/kvm/arm.c | 8 ++++++-- arch/arm64/kvm/sys_regs.c | 33 +++++++++++++++++++-------------- 3 files changed, 26 insertions(+), 17 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 94a27a7520f4..39ca91cc1f01 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -686,7 +686,7 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu); void kvm_reset_sys_regs(struct kvm_vcpu *vcpu); -void kvm_sys_reg_table_init(void); +int kvm_sys_reg_table_init(void); /* MMIO helpers */ void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 523bc934fe2f..ab365358747a 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1761,8 +1761,6 @@ static int init_subsystems(void) kvm_register_perf_callbacks(NULL); - kvm_sys_reg_table_init(); - out: if (err || !is_protected_kvm_enabled()) on_each_cpu(_kvm_arch_hardware_disable, NULL, 1); @@ -2089,6 +2087,12 @@ int kvm_arch_init(void *opaque) return -ENODEV; } + err = kvm_sys_reg_table_init(); + if (err) { + kvm_info("Error initializing system register tables"); + return err; + } + in_hyp_mode = is_kernel_in_hyp_mode(); if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) || diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 7b45c040cc27..57302048afd0 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2187,25 +2187,24 @@ static const struct sys_reg_desc cp15_64_regs[] = { { SYS_DESC(SYS_AARCH32_CNTP_CVAL), access_arch_timer }, }; -static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n, - bool is_32) +static bool check_sysreg_table(const struct sys_reg_desc *table, unsigned int n, + bool is_32) { unsigned int i; for (i = 0; i < n; i++) { if (!is_32 && table[i].reg && !table[i].reset) { - kvm_err("sys_reg table %p entry %d has lacks reset\n", - table, i); - return 1; + kvm_err("sys_reg table %p entry %d lacks reset\n", table, i); + return false; } if (i && cmp_sys_reg(&table[i-1], &table[i]) >= 0) { kvm_err("sys_reg table %p out of order (%d)\n", table, i - 1); - return 1; + return false; } } - return 0; + return true; } int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu) @@ -2860,18 +2859,22 @@ int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) return write_demux_regids(uindices); } -void kvm_sys_reg_table_init(void) +int kvm_sys_reg_table_init(void) { + bool valid = true; unsigned int i; struct sys_reg_desc clidr; /* Make sure tables are unique and in order. */ - BUG_ON(check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs), false)); - BUG_ON(check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs), true)); - BUG_ON(check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs), true)); - BUG_ON(check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs), true)); - BUG_ON(check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs), true)); - BUG_ON(check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs), false)); + valid &= check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs), false); + valid &= check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs), true); + valid &= check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs), true); + valid &= check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs), true); + valid &= check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs), true); + valid &= check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs), false); + + if (!valid) + return -EINVAL; /* We abuse the reset function to overwrite the table itself. */ for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++) @@ -2894,4 +2897,6 @@ void kvm_sys_reg_table_init(void) break; /* Clear all higher bits. */ cache_levels &= (1 << (i*3))-1; + + return 0; } -- cgit v1.2.3 From cae889302ebf5a9b22ca3580996118b8d20b3ae6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 14 May 2022 11:25:24 +0100 Subject: KVM: arm64: vgic-v3: List M1 Pro/Max as requiring the SEIS workaround Unsusprisingly, Apple M1 Pro/Max have the exact same defect as the original M1 and generate random SErrors in the host when a guest tickles the GICv3 CPU interface the wrong way. Add the part numbers for both the CPU types found in these two new implementations, and add them to the hall of shame. This also applies to the Ultra version, as it is composed of 2 Max SoCs. Signed-off-by: Marc Zyngier Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20220514102524.3188730-1-maz@kernel.org --- arch/arm64/include/asm/cputype.h | 8 ++++++++ arch/arm64/kvm/vgic/vgic-v3.c | 4 ++++ 2 files changed, 12 insertions(+) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index ff8f4511df71..60647bdc0b09 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -118,6 +118,10 @@ #define APPLE_CPU_PART_M1_ICESTORM 0x022 #define APPLE_CPU_PART_M1_FIRESTORM 0x023 +#define APPLE_CPU_PART_M1_ICESTORM_PRO 0x024 +#define APPLE_CPU_PART_M1_FIRESTORM_PRO 0x025 +#define APPLE_CPU_PART_M1_ICESTORM_MAX 0x028 +#define APPLE_CPU_PART_M1_FIRESTORM_MAX 0x029 #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) @@ -164,6 +168,10 @@ #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110) #define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM) #define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM) +#define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO) +#define MIDR_APPLE_M1_FIRESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_PRO) +#define MIDR_APPLE_M1_ICESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_MAX) +#define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX) /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ #define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index b549af8b1dc2..826ff6f2a4e7 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -612,6 +612,10 @@ early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable); static const struct midr_range broken_seis[] = { MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM), MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM), + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_PRO), + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_PRO), + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_MAX), + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX), {}, }; -- cgit v1.2.3 From 84d751a019a9792f5b4884e1d598b603c360ec22 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 10 May 2022 09:57:09 +0000 Subject: KVM: arm64: Pass pmu events to hyp via vcpu Instead of the host accessing hyp data directly, pass the pmu events of the current cpu to hyp via the vcpu. This adds 64 bits (in two fields) to the vcpu that need to be synced before every vcpu run in nvhe and protected modes. However, it isolates the hypervisor from the host, which allows us to use pmu in protected mode in a subsequent patch. No visible side effects in behavior intended. Signed-off-by: Fuad Tabba Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220510095710.148178-4-tabba@google.com --- arch/arm64/include/asm/kvm_host.h | 7 +------ arch/arm64/kvm/arm.c | 15 +++++++++++++++ arch/arm64/kvm/hyp/nvhe/switch.c | 20 ++++++-------------- arch/arm64/kvm/pmu.c | 12 ++++-------- include/kvm/arm_pmu.h | 6 ++++++ 5 files changed, 32 insertions(+), 28 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 94a27a7520f4..efca5a63bdaf 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -254,14 +254,8 @@ struct kvm_cpu_context { struct kvm_vcpu *__hyp_running_vcpu; }; -struct kvm_pmu_events { - u32 events_host; - u32 events_guest; -}; - struct kvm_host_data { struct kvm_cpu_context host_ctxt; - struct kvm_pmu_events pmu_events; }; struct kvm_host_psci_config { @@ -796,6 +790,7 @@ void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu); void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr); void kvm_clr_pmu_events(u32 clr); +struct kvm_pmu_events *kvm_get_pmu_events(void); void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu); void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu); #else diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 523bc934fe2f..aa1b15e9d5d9 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -751,6 +751,19 @@ static int noinstr kvm_arm_vcpu_enter_exit(struct kvm_vcpu *vcpu) return ret; } +/* + * Updates the vcpu's view of the pmu events for this cpu. + * Must be called before every vcpu run after disabling interrupts, to ensure + * that an interrupt cannot fire and update the structure. + */ +static void kvm_pmu_update_vcpu_events(struct kvm_vcpu *vcpu) +{ + if (has_vhe() || !kvm_vcpu_has_pmu(vcpu)) + return; + + vcpu->arch.pmu.events = *kvm_get_pmu_events(); +} + /** * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code * @vcpu: The VCPU pointer @@ -815,6 +828,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) kvm_vgic_flush_hwstate(vcpu); + kvm_pmu_update_vcpu_events(vcpu); + /* * Ensure we set mode to IN_GUEST_MODE after we disable * interrupts and before the final VCPU requests check. diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 6410d21d8695..ff7b29fb9787 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -123,13 +123,9 @@ static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) /** * Disable host events, enable guest events */ -static bool __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt) +static bool __pmu_switch_to_guest(struct kvm_vcpu *vcpu) { - struct kvm_host_data *host; - struct kvm_pmu_events *pmu; - - host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); - pmu = &host->pmu_events; + struct kvm_pmu_events *pmu = &vcpu->arch.pmu.events; if (pmu->events_host) write_sysreg(pmu->events_host, pmcntenclr_el0); @@ -143,13 +139,9 @@ static bool __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt) /** * Disable guest events, enable host events */ -static void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) +static void __pmu_switch_to_host(struct kvm_vcpu *vcpu) { - struct kvm_host_data *host; - struct kvm_pmu_events *pmu; - - host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); - pmu = &host->pmu_events; + struct kvm_pmu_events *pmu = &vcpu->arch.pmu.events; if (pmu->events_guest) write_sysreg(pmu->events_guest, pmcntenclr_el0); @@ -274,7 +266,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) host_ctxt->__hyp_running_vcpu = vcpu; guest_ctxt = &vcpu->arch.ctxt; - pmu_switch_needed = __pmu_switch_to_guest(host_ctxt); + pmu_switch_needed = __pmu_switch_to_guest(vcpu); __sysreg_save_state_nvhe(host_ctxt); /* @@ -336,7 +328,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) __debug_restore_host_buffers_nvhe(vcpu); if (pmu_switch_needed) - __pmu_switch_to_host(host_ctxt); + __pmu_switch_to_host(vcpu); /* Returning to host will clear PSR.I, remask PMR if needed */ if (system_uses_irq_prio_masking()) diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c index 4bd38ff34221..7887133d15f0 100644 --- a/arch/arm64/kvm/pmu.c +++ b/arch/arm64/kvm/pmu.c @@ -5,7 +5,8 @@ */ #include #include -#include + +static DEFINE_PER_CPU(struct kvm_pmu_events, kvm_pmu_events); /* * Given the perf event attributes and system type, determine @@ -25,14 +26,9 @@ static bool kvm_pmu_switch_needed(struct perf_event_attr *attr) return (attr->exclude_host != attr->exclude_guest); } -static struct kvm_pmu_events *kvm_get_pmu_events(void) +struct kvm_pmu_events *kvm_get_pmu_events(void) { - struct kvm_host_data *ctx = this_cpu_ptr_hyp_sym(kvm_host_data); - - if (!ctx) - return NULL; - - return &ctx->pmu_events; + return this_cpu_ptr(&kvm_pmu_events); } /* diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index eaa8290b116f..35a0903cae32 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -20,8 +20,14 @@ struct kvm_pmc { struct perf_event *perf_event; }; +struct kvm_pmu_events { + u32 events_host; + u32 events_guest; +}; + struct kvm_pmu { struct irq_work overflow_work; + struct kvm_pmu_events events; struct kvm_pmc pmc[ARMV8_PMU_MAX_COUNTERS]; DECLARE_BITMAP(chained, ARMV8_PMU_MAX_COUNTER_PAIRS); int irq_num; -- cgit v1.2.3 From 2cde51f1e10f260076899bf41add74ed4a6de034 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 15 May 2022 11:36:24 +0100 Subject: KVM: arm64: Hide KVM_REG_ARM_*_BMAP_BIT_COUNT from userspace These constants will change over time, and userspace has no business knowing about them. Hide them behind __KERNEL__. Signed-off-by: Marc Zyngier --- arch/arm64/include/uapi/asm/kvm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index e523bb6eac67..d4722b08f0d4 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -342,14 +342,18 @@ struct kvm_arm_copy_mte_tags { enum { KVM_REG_ARM_STD_BIT_TRNG_V1_0 = 0, +#ifdef __KERNEL__ KVM_REG_ARM_STD_BMAP_BIT_COUNT, +#endif }; #define KVM_REG_ARM_STD_HYP_BMAP KVM_REG_ARM_FW_FEAT_BMAP_REG(1) enum { KVM_REG_ARM_STD_HYP_BIT_PV_TIME = 0, +#ifdef __KERNEL__ KVM_REG_ARM_STD_HYP_BMAP_BIT_COUNT, +#endif }; #define KVM_REG_ARM_VENDOR_HYP_BMAP KVM_REG_ARM_FW_FEAT_BMAP_REG(2) @@ -357,7 +361,9 @@ enum { enum { KVM_REG_ARM_VENDOR_HYP_BIT_FUNC_FEAT = 0, KVM_REG_ARM_VENDOR_HYP_BIT_PTP = 1, +#ifdef __KERNEL__ KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_COUNT, +#endif }; /* Device Control API: ARM VGIC */ -- cgit v1.2.3 From 20492a62b99bd4367b79a76ca288d018f11980db Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 16 May 2022 13:02:24 +0100 Subject: KVM: arm64: pmu: Restore compilation when HW_PERF_EVENTS isn't selected Moving kvm_pmu_events into the vcpu (and refering to it) broke the somewhat unusual case where the kernel has no support for a PMU at all. In order to solve this, move things around a bit so that we can easily avoid refering to the pmu structure outside of PMU-aware code. As a bonus, pmu.c isn't compiled in when HW_PERF_EVENTS isn't selected. Reported-by: kernel test robot Reviewed-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/202205161814.KQHpOzsJ-lkp@intel.com --- arch/arm64/include/asm/kvm_host.h | 6 ------ arch/arm64/kvm/Makefile | 4 ++-- arch/arm64/kvm/arm.c | 13 ------------- arch/arm64/kvm/hyp/nvhe/switch.c | 5 +++++ include/kvm/arm_pmu.h | 24 ++++++++++++++++++++++++ 5 files changed, 31 insertions(+), 21 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index efca5a63bdaf..ef774b8955bc 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -789,10 +789,6 @@ void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu); #ifdef CONFIG_KVM void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr); void kvm_clr_pmu_events(u32 clr); - -struct kvm_pmu_events *kvm_get_pmu_events(void); -void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu); -void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu); #else static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {} static inline void kvm_clr_pmu_events(u32 clr) {} @@ -824,8 +820,6 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); #define kvm_has_mte(kvm) \ (system_supports_mte() && \ test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &(kvm)->arch.flags)) -#define kvm_vcpu_has_pmu(vcpu) \ - (test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features)) int kvm_trng_call(struct kvm_vcpu *vcpu); #ifdef CONFIG_KVM diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 261644b1a6bb..aa127ae9f675 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -13,7 +13,7 @@ obj-$(CONFIG_KVM) += hyp/ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ inject_fault.o va_layout.o handle_exit.o \ guest.o debug.o reset.o sys_regs.o \ - vgic-sys-reg-v3.o fpsimd.o pmu.o pkvm.o \ + vgic-sys-reg-v3.o fpsimd.o pkvm.o \ arch_timer.o trng.o vmid.o \ vgic/vgic.o vgic/vgic-init.o \ vgic/vgic-irqfd.o vgic/vgic-v2.o \ @@ -22,7 +22,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \ vgic/vgic-its.o vgic/vgic-debug.o -kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o +kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o always-y := hyp_constants.h hyp-constants.s diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index aa1b15e9d5d9..b3821c430ec9 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -751,19 +751,6 @@ static int noinstr kvm_arm_vcpu_enter_exit(struct kvm_vcpu *vcpu) return ret; } -/* - * Updates the vcpu's view of the pmu events for this cpu. - * Must be called before every vcpu run after disabling interrupts, to ensure - * that an interrupt cannot fire and update the structure. - */ -static void kvm_pmu_update_vcpu_events(struct kvm_vcpu *vcpu) -{ - if (has_vhe() || !kvm_vcpu_has_pmu(vcpu)) - return; - - vcpu->arch.pmu.events = *kvm_get_pmu_events(); -} - /** * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code * @vcpu: The VCPU pointer diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index ff7b29fb9787..c7cd2036a75e 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -123,6 +123,7 @@ static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) /** * Disable host events, enable guest events */ +#ifdef CONFIG_HW_PERF_EVENTS static bool __pmu_switch_to_guest(struct kvm_vcpu *vcpu) { struct kvm_pmu_events *pmu = &vcpu->arch.pmu.events; @@ -149,6 +150,10 @@ static void __pmu_switch_to_host(struct kvm_vcpu *vcpu) if (pmu->events_host) write_sysreg(pmu->events_host, pmcntenset_el0); } +#else +#define __pmu_switch_to_guest(v) ({ false; }) +#define __pmu_switch_to_host(v) do {} while (0) +#endif /** * Handler for protected VM MSR, MRS or System instruction execution in AArch64. diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 35a0903cae32..c0b868ce6a8f 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -72,6 +72,25 @@ int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu); + +struct kvm_pmu_events *kvm_get_pmu_events(void); +void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu); +void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu); + +#define kvm_vcpu_has_pmu(vcpu) \ + (test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features)) + +/* + * Updates the vcpu's view of the pmu events for this cpu. + * Must be called before every vcpu run after disabling interrupts, to ensure + * that an interrupt cannot fire and update the structure. + */ +#define kvm_pmu_update_vcpu_events(vcpu) \ + do { \ + if (!has_vhe() && kvm_vcpu_has_pmu(vcpu)) \ + vcpu->arch.pmu.events = *kvm_get_pmu_events(); \ + } while (0) + #else struct kvm_pmu { }; @@ -133,6 +152,11 @@ static inline u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) return 0; } +#define kvm_vcpu_has_pmu(vcpu) ({ false; }) +static inline void kvm_pmu_update_vcpu_events(struct kvm_vcpu *vcpu) {} +static inline void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) {} +static inline void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) {} + #endif #endif -- cgit v1.2.3