diff options
Diffstat (limited to 'arch/arm64/include/asm')
-rw-r--r-- | arch/arm64/include/asm/cpufeature.h | 5 | ||||
-rw-r--r-- | arch/arm64/include/asm/esr.h | 15 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_arm.h | 63 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_emulate.h | 34 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_host.h | 138 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_nested.h | 56 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_pgtable.h | 80 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_pkvm.h | 5 | ||||
-rw-r--r-- | arch/arm64/include/asm/pgtable-prot.h | 2 | ||||
-rw-r--r-- | arch/arm64/include/asm/sysreg.h | 25 | ||||
-rw-r--r-- | arch/arm64/include/asm/tlb.h | 15 | ||||
-rw-r--r-- | arch/arm64/include/asm/tlbflush.h | 100 | ||||
-rw-r--r-- | arch/arm64/include/asm/vncr_mapping.h | 103 |
13 files changed, 478 insertions, 163 deletions
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index f6d416fe49b0..acf109581ac0 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -819,6 +819,11 @@ static inline bool system_supports_tlb_range(void) return alternative_has_cap_unlikely(ARM64_HAS_TLB_RANGE); } +static inline bool system_supports_lpa2(void) +{ + return cpus_have_final_cap(ARM64_HAS_LPA2); +} + int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt); bool try_emulate_mrs(struct pt_regs *regs, u32 isn); diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index ae35939f395b..353fe08546cf 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -392,6 +392,21 @@ static inline bool esr_is_data_abort(unsigned long esr) return ec == ESR_ELx_EC_DABT_LOW || ec == ESR_ELx_EC_DABT_CUR; } +static inline bool esr_fsc_is_translation_fault(unsigned long esr) +{ + return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_FAULT; +} + +static inline bool esr_fsc_is_permission_fault(unsigned long esr) +{ + return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM; +} + +static inline bool esr_fsc_is_access_flag_fault(unsigned long esr) +{ + return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_ACCESS; +} + const char *esr_get_class_string(unsigned long esr); #endif /* __ASSEMBLY */ diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index b85f46a73e21..3c6f8ba1e479 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -108,6 +108,7 @@ #define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En) /* TCR_EL2 Registers bits */ +#define TCR_EL2_DS (1UL << 32) #define TCR_EL2_RES1 ((1U << 31) | (1 << 23)) #define TCR_EL2_TBI (1 << 20) #define TCR_EL2_PS_SHIFT 16 @@ -122,6 +123,7 @@ TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK | TCR_EL2_T0SZ_MASK) /* VTCR_EL2 Registers bits */ +#define VTCR_EL2_DS TCR_EL2_DS #define VTCR_EL2_RES1 (1U << 31) #define VTCR_EL2_HD (1 << 22) #define VTCR_EL2_HA (1 << 21) @@ -344,36 +346,47 @@ * Once we get to a point where the two describe the same thing, we'll * merge the definitions. One day. */ -#define __HFGRTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51)) +#define __HFGRTR_EL2_RES0 HFGxTR_EL2_RES0 #define __HFGRTR_EL2_MASK GENMASK(49, 0) -#define __HFGRTR_EL2_nMASK (GENMASK(58, 57) | GENMASK(55, 54) | BIT(50)) +#define __HFGRTR_EL2_nMASK ~(__HFGRTR_EL2_RES0 | __HFGRTR_EL2_MASK) -#define __HFGWTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51) | \ - BIT(46) | BIT(42) | BIT(40) | BIT(28) | \ - GENMASK(26, 25) | BIT(21) | BIT(18) | \ +/* + * The HFGWTR bits are a subset of HFGRTR bits. To ensure we don't miss any + * future additions, define __HFGWTR* macros relative to __HFGRTR* ones. + */ +#define __HFGRTR_ONLY_MASK (BIT(46) | BIT(42) | BIT(40) | BIT(28) | \ + GENMASK(26, 25) | BIT(21) | BIT(18) | \ GENMASK(15, 14) | GENMASK(10, 9) | BIT(2)) -#define __HFGWTR_EL2_MASK GENMASK(49, 0) -#define __HFGWTR_EL2_nMASK (GENMASK(58, 57) | GENMASK(55, 54) | BIT(50)) - -#define __HFGITR_EL2_RES0 GENMASK(63, 57) -#define __HFGITR_EL2_MASK GENMASK(54, 0) -#define __HFGITR_EL2_nMASK GENMASK(56, 55) - -#define __HDFGRTR_EL2_RES0 (BIT(49) | BIT(42) | GENMASK(39, 38) | \ - GENMASK(21, 20) | BIT(8)) -#define __HDFGRTR_EL2_MASK ~__HDFGRTR_EL2_nMASK -#define __HDFGRTR_EL2_nMASK GENMASK(62, 59) - -#define __HDFGWTR_EL2_RES0 (BIT(63) | GENMASK(59, 58) | BIT(51) | BIT(47) | \ - BIT(43) | GENMASK(40, 38) | BIT(34) | BIT(30) | \ - BIT(22) | BIT(9) | BIT(6)) -#define __HDFGWTR_EL2_MASK ~__HDFGWTR_EL2_nMASK -#define __HDFGWTR_EL2_nMASK GENMASK(62, 60) +#define __HFGWTR_EL2_RES0 (__HFGRTR_EL2_RES0 | __HFGRTR_ONLY_MASK) +#define __HFGWTR_EL2_MASK (__HFGRTR_EL2_MASK & ~__HFGRTR_ONLY_MASK) +#define __HFGWTR_EL2_nMASK ~(__HFGWTR_EL2_RES0 | __HFGWTR_EL2_MASK) + +#define __HFGITR_EL2_RES0 HFGITR_EL2_RES0 +#define __HFGITR_EL2_MASK (BIT(62) | BIT(60) | GENMASK(54, 0)) +#define __HFGITR_EL2_nMASK ~(__HFGITR_EL2_RES0 | __HFGITR_EL2_MASK) + +#define __HDFGRTR_EL2_RES0 HDFGRTR_EL2_RES0 +#define __HDFGRTR_EL2_MASK (BIT(63) | GENMASK(58, 50) | GENMASK(48, 43) | \ + GENMASK(41, 40) | GENMASK(37, 22) | \ + GENMASK(19, 9) | GENMASK(7, 0)) +#define __HDFGRTR_EL2_nMASK ~(__HDFGRTR_EL2_RES0 | __HDFGRTR_EL2_MASK) + +#define __HDFGWTR_EL2_RES0 HDFGWTR_EL2_RES0 +#define __HDFGWTR_EL2_MASK (GENMASK(57, 52) | GENMASK(50, 48) | \ + GENMASK(46, 44) | GENMASK(42, 41) | \ + GENMASK(37, 35) | GENMASK(33, 31) | \ + GENMASK(29, 23) | GENMASK(21, 10) | \ + GENMASK(8, 7) | GENMASK(5, 0)) +#define __HDFGWTR_EL2_nMASK ~(__HDFGWTR_EL2_RES0 | __HDFGWTR_EL2_MASK) + +#define __HAFGRTR_EL2_RES0 HAFGRTR_EL2_RES0 +#define __HAFGRTR_EL2_MASK (GENMASK(49, 17) | GENMASK(4, 0)) +#define __HAFGRTR_EL2_nMASK ~(__HAFGRTR_EL2_RES0 | __HAFGRTR_EL2_MASK) /* Similar definitions for HCRX_EL2 */ -#define __HCRX_EL2_RES0 (GENMASK(63, 16) | GENMASK(13, 12)) -#define __HCRX_EL2_MASK (0) -#define __HCRX_EL2_nMASK (GENMASK(15, 14) | GENMASK(4, 0)) +#define __HCRX_EL2_RES0 HCRX_EL2_RES0 +#define __HCRX_EL2_MASK (BIT(6)) +#define __HCRX_EL2_nMASK ~(__HCRX_EL2_RES0 | __HCRX_EL2_MASK) /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ #define HPFAR_MASK (~UL(0xf)) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 78a550537b67..b804fe832184 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -17,6 +17,7 @@ #include <asm/esr.h> #include <asm/kvm_arm.h> #include <asm/kvm_hyp.h> +#include <asm/kvm_nested.h> #include <asm/ptrace.h> #include <asm/cputype.h> #include <asm/virt.h> @@ -54,11 +55,6 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu); int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2); int kvm_inject_nested_irq(struct kvm_vcpu *vcpu); -static inline bool vcpu_has_feature(const struct kvm_vcpu *vcpu, int feature) -{ - return test_bit(feature, vcpu->kvm->arch.vcpu_features); -} - #if defined(__KVM_VHE_HYPERVISOR__) || defined(__KVM_NVHE_HYPERVISOR__) static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) { @@ -248,7 +244,7 @@ static inline bool __is_hyp_ctxt(const struct kvm_cpu_context *ctxt) static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu) { - return __is_hyp_ctxt(&vcpu->arch.ctxt); + return vcpu_has_nv(vcpu) && __is_hyp_ctxt(&vcpu->arch.ctxt); } /* @@ -404,14 +400,25 @@ static __always_inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu) return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC; } -static __always_inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu) +static inline +bool kvm_vcpu_trap_is_permission_fault(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_TYPE; + return esr_fsc_is_permission_fault(kvm_vcpu_get_esr(vcpu)); } -static __always_inline u8 kvm_vcpu_trap_get_fault_level(const struct kvm_vcpu *vcpu) +static inline +bool kvm_vcpu_trap_is_translation_fault(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_LEVEL; + return esr_fsc_is_translation_fault(kvm_vcpu_get_esr(vcpu)); +} + +static inline +u64 kvm_vcpu_trap_get_perm_fault_granule(const struct kvm_vcpu *vcpu) +{ + unsigned long esr = kvm_vcpu_get_esr(vcpu); + + BUG_ON(!esr_fsc_is_permission_fault(esr)); + return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(esr & ESR_ELx_FSC_LEVEL)); } static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu) @@ -454,12 +461,7 @@ static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu) * first), then a permission fault to allow the flags * to be set. */ - switch (kvm_vcpu_trap_get_fault_type(vcpu)) { - case ESR_ELx_FSC_PERM: - return true; - default: - return false; - } + return kvm_vcpu_trap_is_permission_fault(vcpu); } if (kvm_vcpu_trap_is_iabt(vcpu)) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 567bbc417e4b..21c57b812569 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -27,6 +27,7 @@ #include <asm/fpsimd.h> #include <asm/kvm.h> #include <asm/kvm_asm.h> +#include <asm/vncr_mapping.h> #define __KVM_HAVE_ARCH_INTC_INITIALIZED @@ -306,6 +307,7 @@ struct kvm_arch { * Atomic access to multiple idregs are guarded by kvm_arch.config_lock. */ #define IDREG_IDX(id) (((sys_reg_CRm(id) - 1) << 3) | sys_reg_Op2(id)) +#define IDX_IDREG(idx) sys_reg(3, 0, 0, ((idx) >> 3) + 1, (idx) & Op2_mask) #define IDREG(kvm, id) ((kvm)->arch.id_regs[IDREG_IDX(id)]) #define KVM_ARM_ID_REG_NUM (IDREG_IDX(sys_reg(3, 0, 0, 7, 7)) + 1) u64 id_regs[KVM_ARM_ID_REG_NUM]; @@ -324,33 +326,33 @@ struct kvm_vcpu_fault_info { u64 disr_el1; /* Deferred [SError] Status Register */ }; +/* + * VNCR() just places the VNCR_capable registers in the enum after + * __VNCR_START__, and the value (after correction) to be an 8-byte offset + * from the VNCR base. As we don't require the enum to be otherwise ordered, + * we need the terrible hack below to ensure that we correctly size the + * sys_regs array, no matter what. + * + * The __MAX__ macro has been lifted from Sean Eron Anderson's wonderful + * treasure trove of bit hacks: + * https://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax + */ +#define __MAX__(x,y) ((x) ^ (((x) ^ (y)) & -((x) < (y)))) +#define VNCR(r) \ + __before_##r, \ + r = __VNCR_START__ + ((VNCR_ ## r) / 8), \ + __after_##r = __MAX__(__before_##r - 1, r) + enum vcpu_sysreg { __INVALID_SYSREG__, /* 0 is reserved as an invalid value */ MPIDR_EL1, /* MultiProcessor Affinity Register */ CLIDR_EL1, /* Cache Level ID Register */ CSSELR_EL1, /* Cache Size Selection Register */ - SCTLR_EL1, /* System Control Register */ - ACTLR_EL1, /* Auxiliary Control Register */ - CPACR_EL1, /* Coprocessor Access Control */ - ZCR_EL1, /* SVE Control */ - TTBR0_EL1, /* Translation Table Base Register 0 */ - TTBR1_EL1, /* Translation Table Base Register 1 */ - TCR_EL1, /* Translation Control Register */ - TCR2_EL1, /* Extended Translation Control Register */ - ESR_EL1, /* Exception Syndrome Register */ - AFSR0_EL1, /* Auxiliary Fault Status Register 0 */ - AFSR1_EL1, /* Auxiliary Fault Status Register 1 */ - FAR_EL1, /* Fault Address Register */ - MAIR_EL1, /* Memory Attribute Indirection Register */ - VBAR_EL1, /* Vector Base Address Register */ - CONTEXTIDR_EL1, /* Context ID Register */ TPIDR_EL0, /* Thread ID, User R/W */ TPIDRRO_EL0, /* Thread ID, User R/O */ TPIDR_EL1, /* Thread ID, Privileged */ - AMAIR_EL1, /* Aux Memory Attribute Indirection Register */ CNTKCTL_EL1, /* Timer Control Register (EL1) */ PAR_EL1, /* Physical Address Register */ - MDSCR_EL1, /* Monitor Debug System Control Register */ MDCCINT_EL1, /* Monitor Debug Comms Channel Interrupt Enable Reg */ OSLSR_EL1, /* OS Lock Status Register */ DISR_EL1, /* Deferred Interrupt Status Register */ @@ -381,26 +383,11 @@ enum vcpu_sysreg { APGAKEYLO_EL1, APGAKEYHI_EL1, - ELR_EL1, - SP_EL1, - SPSR_EL1, - - CNTVOFF_EL2, - CNTV_CVAL_EL0, - CNTV_CTL_EL0, - CNTP_CVAL_EL0, - CNTP_CTL_EL0, - /* Memory Tagging Extension registers */ RGSR_EL1, /* Random Allocation Tag Seed Register */ GCR_EL1, /* Tag Control Register */ - TFSR_EL1, /* Tag Fault Status Register (EL1) */ TFSRE0_EL1, /* Tag Fault Status Register (EL0) */ - /* Permission Indirection Extension registers */ - PIR_EL1, /* Permission Indirection Register 1 (EL1) */ - PIRE0_EL1, /* Permission Indirection Register 0 (EL1) */ - /* 32bit specific registers. */ DACR32_EL2, /* Domain Access Control Register */ IFSR32_EL2, /* Instruction Fault Status Register */ @@ -408,21 +395,14 @@ enum vcpu_sysreg { DBGVCR32_EL2, /* Debug Vector Catch Register */ /* EL2 registers */ - VPIDR_EL2, /* Virtualization Processor ID Register */ - VMPIDR_EL2, /* Virtualization Multiprocessor ID Register */ SCTLR_EL2, /* System Control Register (EL2) */ ACTLR_EL2, /* Auxiliary Control Register (EL2) */ - HCR_EL2, /* Hypervisor Configuration Register */ MDCR_EL2, /* Monitor Debug Configuration Register (EL2) */ CPTR_EL2, /* Architectural Feature Trap Register (EL2) */ - HSTR_EL2, /* Hypervisor System Trap Register */ HACR_EL2, /* Hypervisor Auxiliary Control Register */ - HCRX_EL2, /* Extended Hypervisor Configuration Register */ TTBR0_EL2, /* Translation Table Base Register 0 (EL2) */ TTBR1_EL2, /* Translation Table Base Register 1 (EL2) */ TCR_EL2, /* Translation Control Register (EL2) */ - VTTBR_EL2, /* Virtualization Translation Table Base Register */ - VTCR_EL2, /* Virtualization Translation Control Register */ SPSR_EL2, /* EL2 saved program status register */ ELR_EL2, /* EL2 exception link register */ AFSR0_EL2, /* Auxiliary Fault Status Register 0 (EL2) */ @@ -435,19 +415,62 @@ enum vcpu_sysreg { VBAR_EL2, /* Vector Base Address Register (EL2) */ RVBAR_EL2, /* Reset Vector Base Address Register */ CONTEXTIDR_EL2, /* Context ID Register (EL2) */ - TPIDR_EL2, /* EL2 Software Thread ID Register */ CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */ SP_EL2, /* EL2 Stack Pointer */ - HFGRTR_EL2, - HFGWTR_EL2, - HFGITR_EL2, - HDFGRTR_EL2, - HDFGWTR_EL2, CNTHP_CTL_EL2, CNTHP_CVAL_EL2, CNTHV_CTL_EL2, CNTHV_CVAL_EL2, + __VNCR_START__, /* Any VNCR-capable reg goes after this point */ + + VNCR(SCTLR_EL1),/* System Control Register */ + VNCR(ACTLR_EL1),/* Auxiliary Control Register */ + VNCR(CPACR_EL1),/* Coprocessor Access Control */ + VNCR(ZCR_EL1), /* SVE Control */ + VNCR(TTBR0_EL1),/* Translation Table Base Register 0 */ + VNCR(TTBR1_EL1),/* Translation Table Base Register 1 */ + VNCR(TCR_EL1), /* Translation Control Register */ + VNCR(TCR2_EL1), /* Extended Translation Control Register */ + VNCR(ESR_EL1), /* Exception Syndrome Register */ + VNCR(AFSR0_EL1),/* Auxiliary Fault Status Register 0 */ + VNCR(AFSR1_EL1),/* Auxiliary Fault Status Register 1 */ + VNCR(FAR_EL1), /* Fault Address Register */ + VNCR(MAIR_EL1), /* Memory Attribute Indirection Register */ + VNCR(VBAR_EL1), /* Vector Base Address Register */ + VNCR(CONTEXTIDR_EL1), /* Context ID Register */ + VNCR(AMAIR_EL1),/* Aux Memory Attribute Indirection Register */ + VNCR(MDSCR_EL1),/* Monitor Debug System Control Register */ + VNCR(ELR_EL1), + VNCR(SP_EL1), + VNCR(SPSR_EL1), + VNCR(TFSR_EL1), /* Tag Fault Status Register (EL1) */ + VNCR(VPIDR_EL2),/* Virtualization Processor ID Register */ + VNCR(VMPIDR_EL2),/* Virtualization Multiprocessor ID Register */ + VNCR(HCR_EL2), /* Hypervisor Configuration Register */ + VNCR(HSTR_EL2), /* Hypervisor System Trap Register */ + VNCR(VTTBR_EL2),/* Virtualization Translation Table Base Register */ + VNCR(VTCR_EL2), /* Virtualization Translation Control Register */ + VNCR(TPIDR_EL2),/* EL2 Software Thread ID Register */ + VNCR(HCRX_EL2), /* Extended Hypervisor Configuration Register */ + + /* Permission Indirection Extension registers */ + VNCR(PIR_EL1), /* Permission Indirection Register 1 (EL1) */ + VNCR(PIRE0_EL1), /* Permission Indirection Register 0 (EL1) */ + + VNCR(HFGRTR_EL2), + VNCR(HFGWTR_EL2), + VNCR(HFGITR_EL2), + VNCR(HDFGRTR_EL2), + VNCR(HDFGWTR_EL2), + VNCR(HAFGRTR_EL2), + + VNCR(CNTVOFF_EL2), + VNCR(CNTV_CVAL_EL0), + VNCR(CNTV_CTL_EL0), + VNCR(CNTP_CVAL_EL0), + VNCR(CNTP_CTL_EL0), + NR_SYS_REGS /* Nothing after this line! */ }; @@ -464,6 +487,9 @@ struct kvm_cpu_context { u64 sys_regs[NR_SYS_REGS]; struct kvm_vcpu *__hyp_running_vcpu; + + /* This pointer has to be 4kB aligned. */ + u64 *vncr_array; }; struct kvm_host_data { @@ -826,8 +852,19 @@ struct kvm_vcpu_arch { * accessed by a running VCPU. For example, for userspace access or * for system registers that are never context switched, but only * emulated. + * + * Don't bother with VNCR-based accesses in the nVHE code, it has no + * business dealing with NV. */ -#define __ctxt_sys_reg(c,r) (&(c)->sys_regs[(r)]) +static inline u64 *__ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r) +{ +#if !defined (__KVM_NVHE_HYPERVISOR__) + if (unlikely(cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) && + r >= __VNCR_START__ && ctxt->vncr_array)) + return &ctxt->vncr_array[r - __VNCR_START__]; +#endif + return (u64 *)&ctxt->sys_regs[r]; +} #define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r)) @@ -871,6 +908,7 @@ static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break; case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break; case ELR_EL1: *val = read_sysreg_s(SYS_ELR_EL12); break; + case SPSR_EL1: *val = read_sysreg_s(SYS_SPSR_EL12); break; case PAR_EL1: *val = read_sysreg_par(); break; case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break; case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break; @@ -915,6 +953,7 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break; case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break; case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break; + case SPSR_EL1: write_sysreg_s(val, SYS_SPSR_EL12); break; case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break; case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; @@ -1175,6 +1214,13 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); #define kvm_vm_has_ran_once(kvm) \ (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &(kvm)->arch.flags)) +static inline bool __vcpu_has_feature(const struct kvm_arch *ka, int feature) +{ + return test_bit(feature, ka->vcpu_features); +} + +#define vcpu_has_feature(v, f) __vcpu_has_feature(&(v)->kvm->arch, (f)) + int kvm_trng_call(struct kvm_vcpu *vcpu); #ifdef CONFIG_KVM extern phys_addr_t hyp_mem_base; diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index 6cec8e9c6c91..4882905357f4 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -2,8 +2,9 @@ #ifndef __ARM64_KVM_NESTED_H #define __ARM64_KVM_NESTED_H -#include <asm/kvm_emulate.h> +#include <linux/bitfield.h> #include <linux/kvm_host.h> +#include <asm/kvm_emulate.h> static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu) { @@ -12,12 +13,55 @@ static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu) vcpu_has_feature(vcpu, KVM_ARM_VCPU_HAS_EL2)); } -extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu); +/* Translation helpers from non-VHE EL2 to EL1 */ +static inline u64 tcr_el2_ps_to_tcr_el1_ips(u64 tcr_el2) +{ + return (u64)FIELD_GET(TCR_EL2_PS_MASK, tcr_el2) << TCR_IPS_SHIFT; +} + +static inline u64 translate_tcr_el2_to_tcr_el1(u64 tcr) +{ + return TCR_EPD1_MASK | /* disable TTBR1_EL1 */ + ((tcr & TCR_EL2_TBI) ? TCR_TBI0 : 0) | + tcr_el2_ps_to_tcr_el1_ips(tcr) | + (tcr & TCR_EL2_TG0_MASK) | + (tcr & TCR_EL2_ORGN0_MASK) | + (tcr & TCR_EL2_IRGN0_MASK) | + (tcr & TCR_EL2_T0SZ_MASK); +} + +static inline u64 translate_cptr_el2_to_cpacr_el1(u64 cptr_el2) +{ + u64 cpacr_el1 = 0; + + if (cptr_el2 & CPTR_EL2_TTA) + cpacr_el1 |= CPACR_ELx_TTA; + if (!(cptr_el2 & CPTR_EL2_TFP)) + cpacr_el1 |= CPACR_ELx_FPEN; + if (!(cptr_el2 & CPTR_EL2_TZ)) + cpacr_el1 |= CPACR_ELx_ZEN; -struct sys_reg_params; -struct sys_reg_desc; + return cpacr_el1; +} + +static inline u64 translate_sctlr_el2_to_sctlr_el1(u64 val) +{ + /* Only preserve the minimal set of bits we support */ + val &= (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | SCTLR_ELx_SA | + SCTLR_ELx_I | SCTLR_ELx_IESB | SCTLR_ELx_WXN | SCTLR_ELx_EE); + val |= SCTLR_EL1_RES1; + + return val; +} + +static inline u64 translate_ttbr0_el2_to_ttbr0_el1(u64 ttbr0) +{ + /* Clear the ASID field */ + return ttbr0 & ~GENMASK_ULL(63, 48); +} + +extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu); -void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p, - const struct sys_reg_desc *r); +int kvm_init_nv_sysregs(struct kvm *kvm); #endif /* __ARM64_KVM_NESTED_H */ diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index d3e354bb8351..cfdf40f734b1 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -11,7 +11,8 @@ #include <linux/kvm_host.h> #include <linux/types.h> -#define KVM_PGTABLE_MAX_LEVELS 4U +#define KVM_PGTABLE_FIRST_LEVEL -1 +#define KVM_PGTABLE_LAST_LEVEL 3 /* * The largest supported block sizes for KVM (no 52-bit PA support): @@ -20,17 +21,29 @@ * - 64K (level 2): 512MB */ #ifdef CONFIG_ARM64_4K_PAGES -#define KVM_PGTABLE_MIN_BLOCK_LEVEL 1U +#define KVM_PGTABLE_MIN_BLOCK_LEVEL 1 #else -#define KVM_PGTABLE_MIN_BLOCK_LEVEL 2U +#define KVM_PGTABLE_MIN_BLOCK_LEVEL 2 #endif +#define kvm_lpa2_is_enabled() system_supports_lpa2() + +static inline u64 kvm_get_parange_max(void) +{ + if (kvm_lpa2_is_enabled() || + (IS_ENABLED(CONFIG_ARM64_PA_BITS_52) && PAGE_SHIFT == 16)) + return ID_AA64MMFR0_EL1_PARANGE_52; + else + return ID_AA64MMFR0_EL1_PARANGE_48; +} + static inline u64 kvm_get_parange(u64 mmfr0) { + u64 parange_max = kvm_get_parange_max(); u64 parange = cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_EL1_PARANGE_SHIFT); - if (parange > ID_AA64MMFR0_EL1_PARANGE_MAX) - parange = ID_AA64MMFR0_EL1_PARANGE_MAX; + if (parange > parange_max) + parange = parange_max; return parange; } @@ -41,6 +54,8 @@ typedef u64 kvm_pte_t; #define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT) #define KVM_PTE_ADDR_51_48 GENMASK(15, 12) +#define KVM_PTE_ADDR_MASK_LPA2 GENMASK(49, PAGE_SHIFT) +#define KVM_PTE_ADDR_51_50_LPA2 GENMASK(9, 8) #define KVM_PHYS_INVALID (-1ULL) @@ -51,21 +66,34 @@ static inline bool kvm_pte_valid(kvm_pte_t pte) static inline u64 kvm_pte_to_phys(kvm_pte_t pte) { - u64 pa = pte & KVM_PTE_ADDR_MASK; - - if (PAGE_SHIFT == 16) - pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48; + u64 pa; + + if (kvm_lpa2_is_enabled()) { + pa = pte & KVM_PTE_ADDR_MASK_LPA2; + pa |= FIELD_GET(KVM_PTE_ADDR_51_50_LPA2, pte) << 50; + } else { + pa = pte & KVM_PTE_ADDR_MASK; + if (PAGE_SHIFT == 16) + pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48; + } return pa; } static inline kvm_pte_t kvm_phys_to_pte(u64 pa) { - kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK; - - if (PAGE_SHIFT == 16) { - pa &= GENMASK(51, 48); - pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48); + kvm_pte_t pte; + + if (kvm_lpa2_is_enabled()) { + pte = pa & KVM_PTE_ADDR_MASK_LPA2; + pa &= GENMASK(51, 50); + pte |= FIELD_PREP(KVM_PTE_ADDR_51_50_LPA2, pa >> 50); + } else { + pte = pa & KVM_PTE_ADDR_MASK; + if (PAGE_SHIFT == 16) { + pa &= GENMASK(51, 48); + pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48); + } } return pte; @@ -76,28 +104,28 @@ static inline kvm_pfn_t kvm_pte_to_pfn(kvm_pte_t pte) return __phys_to_pfn(kvm_pte_to_phys(pte)); } -static inline u64 kvm_granule_shift(u32 level) +static inline u64 kvm_granule_shift(s8 level) { - /* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */ + /* Assumes KVM_PGTABLE_LAST_LEVEL is 3 */ return ARM64_HW_PGTABLE_LEVEL_SHIFT(level); } -static inline u64 kvm_granule_size(u32 level) +static inline u64 kvm_granule_size(s8 level) { return BIT(kvm_granule_shift(level)); } -static inline bool kvm_level_supports_block_mapping(u32 level) +static inline bool kvm_level_supports_block_mapping(s8 level) { return level >= KVM_PGTABLE_MIN_BLOCK_LEVEL; } static inline u32 kvm_supported_block_sizes(void) { - u32 level = KVM_PGTABLE_MIN_BLOCK_LEVEL; + s8 level = KVM_PGTABLE_MIN_BLOCK_LEVEL; u32 r = 0; - for (; level < KVM_PGTABLE_MAX_LEVELS; level++) + for (; level <= KVM_PGTABLE_LAST_LEVEL; level++) r |= BIT(kvm_granule_shift(level)); return r; @@ -142,7 +170,7 @@ struct kvm_pgtable_mm_ops { void* (*zalloc_page)(void *arg); void* (*zalloc_pages_exact)(size_t size); void (*free_pages_exact)(void *addr, size_t size); - void (*free_unlinked_table)(void *addr, u32 level); + void (*free_unlinked_table)(void *addr, s8 level); void (*get_page)(void *addr); void (*put_page)(void *addr); int (*page_count)(void *addr); @@ -238,7 +266,7 @@ struct kvm_pgtable_visit_ctx { u64 start; u64 addr; u64 end; - u32 level; + s8 level; enum kvm_pgtable_walk_flags flags; }; @@ -341,7 +369,7 @@ static inline bool kvm_pgtable_walk_lock_held(void) */ struct kvm_pgtable { u32 ia_bits; - u32 start_level; + s8 start_level; kvm_pteref_t pgd; struct kvm_pgtable_mm_ops *mm_ops; @@ -475,7 +503,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); * The page-table is assumed to be unreachable by any hardware walkers prior to * freeing and therefore no TLB invalidation is performed. */ -void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level); +void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level); /** * kvm_pgtable_stage2_create_unlinked() - Create an unlinked stage-2 paging structure. @@ -499,7 +527,7 @@ void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *p * an ERR_PTR(error) on failure. */ kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, - u64 phys, u32 level, + u64 phys, s8 level, enum kvm_pgtable_prot prot, void *mc, bool force_pte); @@ -725,7 +753,7 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, * Return: 0 on success, negative error code on failure. */ int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr, - kvm_pte_t *ptep, u32 *level); + kvm_pte_t *ptep, s8 *level); /** * kvm_pgtable_stage2_pte_prot() - Retrieve the protection attributes of a diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index e46250a02017..ad9cfb5c1ff4 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -56,10 +56,11 @@ static inline unsigned long hyp_vm_table_pages(void) static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages) { - unsigned long total = 0, i; + unsigned long total = 0; + int i; /* Provision the worst case scenario */ - for (i = 0; i < KVM_PGTABLE_MAX_LEVELS; i++) { + for (i = KVM_PGTABLE_FIRST_LEVEL; i <= KVM_PGTABLE_LAST_LEVEL; i++) { nr_pages = DIV_ROUND_UP(nr_pages, PTRS_PER_PTE); total += nr_pages; } diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index e9624f6326dd..483dbfa39c4c 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -71,6 +71,8 @@ extern bool arm64_use_ng_mappings; #define PTE_MAYBE_NG (arm64_use_ng_mappings ? PTE_NG : 0) #define PMD_MAYBE_NG (arm64_use_ng_mappings ? PMD_SECT_NG : 0) +#define lpa2_is_enabled() false + /* * If we have userspace only BTI we don't want to mark kernel pages * guarded even if the system does support BTI. diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 5e65f51c10d2..c3b19b376c86 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -645,6 +645,7 @@ #define OP_AT_S1E0W sys_insn(AT_Op0, 0, AT_CRn, 8, 3) #define OP_AT_S1E1RP sys_insn(AT_Op0, 0, AT_CRn, 9, 0) #define OP_AT_S1E1WP sys_insn(AT_Op0, 0, AT_CRn, 9, 1) +#define OP_AT_S1E1A sys_insn(AT_Op0, 0, AT_CRn, 9, 2) #define OP_AT_S1E2R sys_insn(AT_Op0, 4, AT_CRn, 8, 0) #define OP_AT_S1E2W sys_insn(AT_Op0, 4, AT_CRn, 8, 1) #define OP_AT_S12E1R sys_insn(AT_Op0, 4, AT_CRn, 8, 4) @@ -781,10 +782,16 @@ #define OP_TLBI_VMALLS12E1NXS sys_insn(1, 4, 9, 7, 6) /* Misc instructions */ +#define OP_GCSPUSHX sys_insn(1, 0, 7, 7, 4) +#define OP_GCSPOPCX sys_insn(1, 0, 7, 7, 5) +#define OP_GCSPOPX sys_insn(1, 0, 7, 7, 6) +#define OP_GCSPUSHM sys_insn(1, 3, 7, 7, 0) + #define OP_BRB_IALL sys_insn(1, 1, 7, 2, 4) #define OP_BRB_INJ sys_insn(1, 1, 7, 2, 5) #define OP_CFP_RCTX sys_insn(1, 3, 7, 3, 4) #define OP_DVP_RCTX sys_insn(1, 3, 7, 3, 5) +#define OP_COSP_RCTX sys_insn(1, 3, 7, 3, 6) #define OP_CPP_RCTX sys_insn(1, 3, 7, 3, 7) /* Common SCTLR_ELx flags. */ @@ -871,10 +878,12 @@ /* id_aa64mmfr0 */ #define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN 0x0 +#define ID_AA64MMFR0_EL1_TGRAN4_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT #define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX 0x7 #define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN 0x0 #define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX 0x7 #define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN 0x1 +#define ID_AA64MMFR0_EL1_TGRAN16_LPA2 ID_AA64MMFR0_EL1_TGRAN16_52_BIT #define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX 0xf #define ARM64_MIN_PARANGE_BITS 32 @@ -882,6 +891,7 @@ #define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_DEFAULT 0x0 #define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_NONE 0x1 #define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MIN 0x2 +#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2 0x3 #define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MAX 0x7 #ifdef CONFIG_ARM64_PA_BITS_52 @@ -892,11 +902,13 @@ #if defined(CONFIG_ARM64_4K_PAGES) #define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN4_SHIFT +#define ID_AA64MMFR0_EL1_TGRAN_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT #define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN #define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX #define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN4_2_SHIFT #elif defined(CONFIG_ARM64_16K_PAGES) #define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN16_SHIFT +#define ID_AA64MMFR0_EL1_TGRAN_LPA2 ID_AA64MMFR0_EL1_TGRAN16_52_BIT #define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN #define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX #define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN16_2_SHIFT @@ -1039,6 +1051,19 @@ #define PIRx_ELx_PERM(idx, perm) ((perm) << ((idx) * 4)) +/* + * Permission Overlay Extension (POE) permission encodings. + */ +#define POE_NONE UL(0x0) +#define POE_R UL(0x1) +#define POE_X UL(0x2) +#define POE_RX UL(0x3) +#define POE_W UL(0x4) +#define POE_RW UL(0x5) +#define POE_XW UL(0x6) +#define POE_RXW UL(0x7) +#define POE_MASK UL(0xf) + #define ARM64_FEATURE_FIELD_BITS 4 /* Defined for compatibility only, do not add new users. */ diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index 846c563689a8..0150deb332af 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -22,15 +22,15 @@ static void tlb_flush(struct mmu_gather *tlb); #include <asm-generic/tlb.h> /* - * get the tlbi levels in arm64. Default value is 0 if more than one - * of cleared_* is set or neither is set. - * Arm64 doesn't support p4ds now. + * get the tlbi levels in arm64. Default value is TLBI_TTL_UNKNOWN if more than + * one of cleared_* is set or neither is set - this elides the level hinting to + * the hardware. */ static inline int tlb_get_level(struct mmu_gather *tlb) { /* The TTL field is only valid for the leaf entry. */ if (tlb->freed_tables) - return 0; + return TLBI_TTL_UNKNOWN; if (tlb->cleared_ptes && !(tlb->cleared_pmds || tlb->cleared_puds || @@ -47,7 +47,12 @@ static inline int tlb_get_level(struct mmu_gather *tlb) tlb->cleared_p4ds)) return 1; - return 0; + if (tlb->cleared_p4ds && !(tlb->cleared_ptes || + tlb->cleared_pmds || + tlb->cleared_puds)) + return 0; + + return TLBI_TTL_UNKNOWN; } static inline void tlb_flush(struct mmu_gather *tlb) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index bb2c2833a987..1deb5d789c2e 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -94,19 +94,22 @@ static inline unsigned long get_trans_granule(void) * When ARMv8.4-TTL exists, TLBI operations take an additional hint for * the level at which the invalidation must take place. If the level is * wrong, no invalidation may take place. In the case where the level - * cannot be easily determined, a 0 value for the level parameter will - * perform a non-hinted invalidation. + * cannot be easily determined, the value TLBI_TTL_UNKNOWN will perform + * a non-hinted invalidation. Any provided level outside the hint range + * will also cause fall-back to non-hinted invalidation. * * For Stage-2 invalidation, use the level values provided to that effect * in asm/stage2_pgtable.h. */ #define TLBI_TTL_MASK GENMASK_ULL(47, 44) +#define TLBI_TTL_UNKNOWN INT_MAX + #define __tlbi_level(op, addr, level) do { \ u64 arg = addr; \ \ if (alternative_has_cap_unlikely(ARM64_HAS_ARMv8_4_TTL) && \ - level) { \ + level >= 0 && level <= 3) { \ u64 ttl = level & 3; \ ttl |= get_trans_granule() << 2; \ arg &= ~TLBI_TTL_MASK; \ @@ -122,28 +125,34 @@ static inline unsigned long get_trans_granule(void) } while (0) /* - * This macro creates a properly formatted VA operand for the TLB RANGE. - * The value bit assignments are: + * This macro creates a properly formatted VA operand for the TLB RANGE. The + * value bit assignments are: * * +----------+------+-------+-------+-------+----------------------+ * | ASID | TG | SCALE | NUM | TTL | BADDR | * +-----------------+-------+-------+-------+----------------------+ * |63 48|47 46|45 44|43 39|38 37|36 0| * - * The address range is determined by below formula: - * [BADDR, BADDR + (NUM + 1) * 2^(5*SCALE + 1) * PAGESIZE) + * The address range is determined by below formula: [BADDR, BADDR + (NUM + 1) * + * 2^(5*SCALE + 1) * PAGESIZE) + * + * Note that the first argument, baddr, is pre-shifted; If LPA2 is in use, BADDR + * holds addr[52:16]. Else BADDR holds page number. See for example ARM DDI + * 0487J.a section C5.5.60 "TLBI VAE1IS, TLBI VAE1ISNXS, TLB Invalidate by VA, + * EL1, Inner Shareable". * */ -#define __TLBI_VADDR_RANGE(addr, asid, scale, num, ttl) \ - ({ \ - unsigned long __ta = (addr) >> PAGE_SHIFT; \ - __ta &= GENMASK_ULL(36, 0); \ - __ta |= (unsigned long)(ttl) << 37; \ - __ta |= (unsigned long)(num) << 39; \ - __ta |= (unsigned long)(scale) << 44; \ - __ta |= get_trans_granule() << 46; \ - __ta |= (unsigned long)(asid) << 48; \ - __ta; \ +#define __TLBI_VADDR_RANGE(baddr, asid, scale, num, ttl) \ + ({ \ + unsigned long __ta = (baddr); \ + unsigned long __ttl = (ttl >= 1 && ttl <= 3) ? ttl : 0; \ + __ta &= GENMASK_ULL(36, 0); \ + __ta |= __ttl << 37; \ + __ta |= (unsigned long)(num) << 39; \ + __ta |= (unsigned long)(scale) << 44; \ + __ta |= get_trans_granule() << 46; \ + __ta |= (unsigned long)(asid) << 48; \ + __ta; \ }) /* These macros are used by the TLBI RANGE feature. */ @@ -216,12 +225,16 @@ static inline unsigned long get_trans_granule(void) * CPUs, ensuring that any walk-cache entries associated with the * translation are also invalidated. * - * __flush_tlb_range(vma, start, end, stride, last_level) + * __flush_tlb_range(vma, start, end, stride, last_level, tlb_level) * Invalidate the virtual-address range '[start, end)' on all * CPUs for the user address space corresponding to 'vma->mm'. * The invalidation operations are issued at a granularity * determined by 'stride' and only affect any walk-cache entries - * if 'last_level' is equal to false. + * if 'last_level' is equal to false. tlb_level is the level at + * which the invalidation must take place. If the level is wrong, + * no invalidation may take place. In the case where the level + * cannot be easily determined, the value TLBI_TTL_UNKNOWN will + * perform a non-hinted invalidation. * * * Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented @@ -345,34 +358,44 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) * @tlb_level: Translation Table level hint, if known * @tlbi_user: If 'true', call an additional __tlbi_user() * (typically for user ASIDs). 'flase' for IPA instructions + * @lpa2: If 'true', the lpa2 scheme is used as set out below * * When the CPU does not support TLB range operations, flush the TLB * entries one by one at the granularity of 'stride'. If the TLB * range ops are supported, then: * - * 1. If 'pages' is odd, flush the first page through non-range - * operations; + * 1. If FEAT_LPA2 is in use, the start address of a range operation must be + * 64KB aligned, so flush pages one by one until the alignment is reached + * using the non-range operations. This step is skipped if LPA2 is not in + * use. + * + * 2. The minimum range granularity is decided by 'scale', so multiple range + * TLBI operations may be required. Start from scale = 3, flush the largest + * possible number of pages ((num+1)*2^(5*scale+1)) that fit into the + * requested range, then decrement scale and continue until one or zero pages + * are left. We must start from highest scale to ensure 64KB start alignment + * is maintained in the LPA2 case. * - * 2. For remaining pages: the minimum range granularity is decided - * by 'scale', so multiple range TLBI operations may be required. - * Start from scale = 0, flush the corresponding number of pages - * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it - * until no pages left. + * 3. If there is 1 page remaining, flush it through non-range operations. Range + * operations can only span an even number of pages. We save this for last to + * ensure 64KB start alignment is maintained for the LPA2 case. * * Note that certain ranges can be represented by either num = 31 and * scale or num = 0 and scale + 1. The loop below favours the latter * since num is limited to 30 by the __TLBI_RANGE_NUM() macro. */ #define __flush_tlb_range_op(op, start, pages, stride, \ - asid, tlb_level, tlbi_user) \ + asid, tlb_level, tlbi_user, lpa2) \ do { \ int num = 0; \ - int scale = 0; \ + int scale = 3; \ + int shift = lpa2 ? 16 : PAGE_SHIFT; \ unsigned long addr; \ \ while (pages > 0) { \ if (!system_supports_tlb_range() || \ - pages % 2 == 1) { \ + pages == 1 || \ + (lpa2 && start != ALIGN(start, SZ_64K))) { \ addr = __TLBI_VADDR(start, asid); \ __tlbi_level(op, addr, tlb_level); \ if (tlbi_user) \ @@ -384,20 +407,20 @@ do { \ \ num = __TLBI_RANGE_NUM(pages, scale); \ if (num >= 0) { \ - addr = __TLBI_VADDR_RANGE(start, asid, scale, \ - num, tlb_level); \ + addr = __TLBI_VADDR_RANGE(start >> shift, asid, \ + scale, num, tlb_level); \ __tlbi(r##op, addr); \ if (tlbi_user) \ __tlbi_user(r##op, addr); \ start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \ pages -= __TLBI_RANGE_PAGES(num, scale); \ } \ - scale++; \ + scale--; \ } \ } while (0) #define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \ - __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false) + __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false, kvm_lpa2_is_enabled()); static inline void __flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, @@ -427,9 +450,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, asid = ASID(vma->vm_mm); if (last_level) - __flush_tlb_range_op(vale1is, start, pages, stride, asid, tlb_level, true); + __flush_tlb_range_op(vale1is, start, pages, stride, asid, + tlb_level, true, lpa2_is_enabled()); else - __flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true); + __flush_tlb_range_op(vae1is, start, pages, stride, asid, + tlb_level, true, lpa2_is_enabled()); dsb(ish); mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end); @@ -441,9 +466,10 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, /* * We cannot use leaf-only invalidation here, since we may be invalidating * table entries as part of collapsing hugepages or moving page tables. - * Set the tlb_level to 0 because we can not get enough information here. + * Set the tlb_level to TLBI_TTL_UNKNOWN because we can not get enough + * information here. */ - __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0); + __flush_tlb_range(vma, start, end, PAGE_SIZE, false, TLBI_TTL_UNKNOWN); } static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) diff --git a/arch/arm64/include/asm/vncr_mapping.h b/arch/arm64/include/asm/vncr_mapping.h new file mode 100644 index 000000000000..df2c47c55972 --- /dev/null +++ b/arch/arm64/include/asm/vncr_mapping.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * System register offsets in the VNCR page + * All offsets are *byte* displacements! + */ + +#ifndef __ARM64_VNCR_MAPPING_H__ +#define __ARM64_VNCR_MAPPING_H__ + +#define VNCR_VTTBR_EL2 0x020 +#define VNCR_VTCR_EL2 0x040 +#define VNCR_VMPIDR_EL2 0x050 +#define VNCR_CNTVOFF_EL2 0x060 +#define VNCR_HCR_EL2 0x078 +#define VNCR_HSTR_EL2 0x080 +#define VNCR_VPIDR_EL2 0x088 +#define VNCR_TPIDR_EL2 0x090 +#define VNCR_HCRX_EL2 0x0A0 +#define VNCR_VNCR_EL2 0x0B0 +#define VNCR_CPACR_EL1 0x100 +#define VNCR_CONTEXTIDR_EL1 0x108 +#define VNCR_SCTLR_EL1 0x110 +#define VNCR_ACTLR_EL1 0x118 +#define VNCR_TCR_EL1 0x120 +#define VNCR_AFSR0_EL1 0x128 +#define VNCR_AFSR1_EL1 0x130 +#define VNCR_ESR_EL1 0x138 +#define VNCR_MAIR_EL1 0x140 +#define VNCR_AMAIR_EL1 0x148 +#define VNCR_MDSCR_EL1 0x158 +#define VNCR_SPSR_EL1 0x160 +#define VNCR_CNTV_CVAL_EL0 0x168 +#define VNCR_CNTV_CTL_EL0 0x170 +#define VNCR_CNTP_CVAL_EL0 0x178 +#define VNCR_CNTP_CTL_EL0 0x180 +#define VNCR_SCXTNUM_EL1 0x188 +#define VNCR_TFSR_EL1 0x190 +#define VNCR_HFGRTR_EL2 0x1B8 +#define VNCR_HFGWTR_EL2 0x1C0 +#define VNCR_HFGITR_EL2 0x1C8 +#define VNCR_HDFGRTR_EL2 0x1D0 +#define VNCR_HDFGWTR_EL2 0x1D8 +#define VNCR_ZCR_EL1 0x1E0 +#define VNCR_HAFGRTR_EL2 0x1E8 +#define VNCR_TTBR0_EL1 0x200 +#define VNCR_TTBR1_EL1 0x210 +#define VNCR_FAR_EL1 0x220 +#define VNCR_ELR_EL1 0x230 +#define VNCR_SP_EL1 0x240 +#define VNCR_VBAR_EL1 0x250 +#define VNCR_TCR2_EL1 0x270 +#define VNCR_PIRE0_EL1 0x290 +#define VNCR_PIRE0_EL2 0x298 +#define VNCR_PIR_EL1 0x2A0 +#define VNCR_ICH_LR0_EL2 0x400 +#define VNCR_ICH_LR1_EL2 0x408 +#define VNCR_ICH_LR2_EL2 0x410 +#define VNCR_ICH_LR3_EL2 0x418 +#define VNCR_ICH_LR4_EL2 0x420 +#define VNCR_ICH_LR5_EL2 0x428 +#define VNCR_ICH_LR6_EL2 0x430 +#define VNCR_ICH_LR7_EL2 0x438 +#define VNCR_ICH_LR8_EL2 0x440 +#define VNCR_ICH_LR9_EL2 0x448 +#define VNCR_ICH_LR10_EL2 0x450 +#define VNCR_ICH_LR11_EL2 0x458 +#define VNCR_ICH_LR12_EL2 0x460 +#define VNCR_ICH_LR13_EL2 0x468 +#define VNCR_ICH_LR14_EL2 0x470 +#define VNCR_ICH_LR15_EL2 0x478 +#define VNCR_ICH_AP0R0_EL2 0x480 +#define VNCR_ICH_AP0R1_EL2 0x488 +#define VNCR_ICH_AP0R2_EL2 0x490 +#define VNCR_ICH_AP0R3_EL2 0x498 +#define VNCR_ICH_AP1R0_EL2 0x4A0 +#define VNCR_ICH_AP1R1_EL2 0x4A8 +#define VNCR_ICH_AP1R2_EL2 0x4B0 +#define VNCR_ICH_AP1R3_EL2 0x4B8 +#define VNCR_ICH_HCR_EL2 0x4C0 +#define VNCR_ICH_VMCR_EL2 0x4C8 +#define VNCR_VDISR_EL2 0x500 +#define VNCR_PMBLIMITR_EL1 0x800 +#define VNCR_PMBPTR_EL1 0x810 +#define VNCR_PMBSR_EL1 0x820 +#define VNCR_PMSCR_EL1 0x828 +#define VNCR_PMSEVFR_EL1 0x830 +#define VNCR_PMSICR_EL1 0x838 +#define VNCR_PMSIRR_EL1 0x840 +#define VNCR_PMSLATFR_EL1 0x848 +#define VNCR_TRFCR_EL1 0x880 +#define VNCR_MPAM1_EL1 0x900 +#define VNCR_MPAMHCR_EL2 0x930 +#define VNCR_MPAMVPMV_EL2 0x938 +#define VNCR_MPAMVPM0_EL2 0x940 +#define VNCR_MPAMVPM1_EL2 0x948 +#define VNCR_MPAMVPM2_EL2 0x950 +#define VNCR_MPAMVPM3_EL2 0x958 +#define VNCR_MPAMVPM4_EL2 0x960 +#define VNCR_MPAMVPM5_EL2 0x968 +#define VNCR_MPAMVPM6_EL2 0x970 +#define VNCR_MPAMVPM7_EL2 0x978 + +#endif /* __ARM64_VNCR_MAPPING_H__ */ |