From 53eaeb7fbe2702520125ae7d72742362c071a1f2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 22 Jan 2024 18:13:35 +0000 Subject: arm64: Add macro to compose a sysreg field value A common idiom is to compose a tupple (reg, field, val) into a symbol matching an autogenerated definition. Add a help performing the concatenation and replace it when open-coded implementations exist. Suggested-by: Oliver Upton Signed-off-by: Marc Zyngier Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20240122181344.258974-2-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/sysreg.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index c3b19b376c86..9e8999592f3a 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1181,6 +1181,8 @@ par; \ }) +#define SYS_FIELD_VALUE(reg, field, val) reg##_##field##_##val + #define SYS_FIELD_GET(reg, field, val) \ FIELD_GET(reg##_##field##_MASK, val) @@ -1188,7 +1190,8 @@ FIELD_PREP(reg##_##field##_MASK, val) #define SYS_FIELD_PREP_ENUM(reg, field, val) \ - FIELD_PREP(reg##_##field##_MASK, reg##_##field##_##val) + FIELD_PREP(reg##_##field##_MASK, \ + SYS_FIELD_VALUE(reg, field, val)) #endif -- cgit v1.2.3 From d9a065914dccce0bbd967cdef1ec21b4de545daf Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 22 Jan 2024 18:13:36 +0000 Subject: arm64: cpufeatures: Correctly handle signed values Although we've had signed values for some features such as PMUv3 and FP, the code that handles the comparaison with some limit has a couple of annoying issues: - the min_field_value is always unsigned, meaning that we cannot easily compare it with a negative value - it is not possible to have a range of values, let alone a range of negative values Fix this by: - adding an upper limit to the comparison, defaulting to all bits being set to the maximum positive value - ensuring that the signess of the min and max values are taken into account A ARM64_CPUID_FIELDS_NEG() macro is provided for signed features, but nothing is using it yet. Signed-off-by: Marc Zyngier Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20240122181344.258974-3-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/cpufeature.h | 1 + arch/arm64/kernel/cpufeature.c | 65 ++++++++++++++++++++++++++++++++----- 2 files changed, 57 insertions(+), 9 deletions(-) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 21c824edf8ce..a98d95f3492b 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -363,6 +363,7 @@ struct arm64_cpu_capabilities { u8 field_pos; u8 field_width; u8 min_field_value; + u8 max_field_value; u8 hwcap_type; bool sign; unsigned long hwcap; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 8d1a634a403e..92b1546f2622 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -140,12 +140,42 @@ void dump_cpu_features(void) pr_emerg("0x%*pb\n", ARM64_NCAPS, &system_cpucaps); } +#define __ARM64_MAX_POSITIVE(reg, field) \ + ((reg##_##field##_SIGNED ? \ + BIT(reg##_##field##_WIDTH - 1) : \ + BIT(reg##_##field##_WIDTH)) - 1) + +#define __ARM64_MIN_NEGATIVE(reg, field) BIT(reg##_##field##_WIDTH - 1) + +#define __ARM64_CPUID_FIELDS(reg, field, min_value, max_value) \ + .sys_reg = SYS_##reg, \ + .field_pos = reg##_##field##_SHIFT, \ + .field_width = reg##_##field##_WIDTH, \ + .sign = reg##_##field##_SIGNED, \ + .min_field_value = min_value, \ + .max_field_value = max_value, + +/* + * ARM64_CPUID_FIELDS() encodes a field with a range from min_value to + * an implicit maximum that depends on the sign-ess of the field. + * + * An unsigned field will be capped at all ones, while a signed field + * will be limited to the positive half only. + */ #define ARM64_CPUID_FIELDS(reg, field, min_value) \ - .sys_reg = SYS_##reg, \ - .field_pos = reg##_##field##_SHIFT, \ - .field_width = reg##_##field##_WIDTH, \ - .sign = reg##_##field##_SIGNED, \ - .min_field_value = reg##_##field##_##min_value, + __ARM64_CPUID_FIELDS(reg, field, \ + SYS_FIELD_VALUE(reg, field, min_value), \ + __ARM64_MAX_POSITIVE(reg, field)) + +/* + * ARM64_CPUID_FIELDS_NEG() encodes a field with a range from an + * implicit minimal value to max_value. This should be used when + * matching a non-implemented property. + */ +#define ARM64_CPUID_FIELDS_NEG(reg, field, max_value) \ + __ARM64_CPUID_FIELDS(reg, field, \ + __ARM64_MIN_NEGATIVE(reg, field), \ + SYS_FIELD_VALUE(reg, field, max_value)) #define __ARM64_FTR_BITS(SIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ { \ @@ -1451,11 +1481,28 @@ has_always(const struct arm64_cpu_capabilities *entry, int scope) static bool feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry) { - int val = cpuid_feature_extract_field_width(reg, entry->field_pos, - entry->field_width, - entry->sign); + int val, min, max; + u64 tmp; + + val = cpuid_feature_extract_field_width(reg, entry->field_pos, + entry->field_width, + entry->sign); + + tmp = entry->min_field_value; + tmp <<= entry->field_pos; + + min = cpuid_feature_extract_field_width(tmp, entry->field_pos, + entry->field_width, + entry->sign); + + tmp = entry->max_field_value; + tmp <<= entry->field_pos; + + max = cpuid_feature_extract_field_width(tmp, entry->field_pos, + entry->field_width, + entry->sign); - return val >= entry->min_field_value; + return val >= min && val <= max; } static u64 -- cgit v1.2.3 From 805bb61f827997c59b92669ae8cc3740ebcf1087 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 22 Jan 2024 18:13:39 +0000 Subject: arm64: cpufeature: Add ID_AA64MMFR4_EL1 handling Add ID_AA64MMFR4_EL1 to the list of idregs the kernel knows about, and describe the E2H0 field. Reviewed-by: Oliver Upton Reviewed-by: Suzuki K Poulose Signed-off-by: Marc Zyngier Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20240122181344.258974-6-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/cpu.h | 1 + arch/arm64/kernel/cpufeature.c | 7 +++++++ arch/arm64/kernel/cpuinfo.c | 1 + 3 files changed, 9 insertions(+) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index b1e43f56ee46..6c13fd47e170 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -56,6 +56,7 @@ struct cpuinfo_arm64 { u64 reg_id_aa64mmfr1; u64 reg_id_aa64mmfr2; u64 reg_id_aa64mmfr3; + u64 reg_id_aa64mmfr4; u64 reg_id_aa64pfr0; u64 reg_id_aa64pfr1; u64 reg_id_aa64zfr0; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index bae5d0655262..ad3753fbdcb1 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -437,6 +437,11 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr3[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_id_aa64mmfr4[] = { + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR4_EL1_E2H0_SHIFT, 4, 0), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_ctr[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RES1 */ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_DIC_SHIFT, 1, 1), @@ -754,6 +759,7 @@ static const struct __ftr_reg_entry { &id_aa64mmfr1_override), ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2), ARM64_FTR_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3), + ARM64_FTR_REG(SYS_ID_AA64MMFR4_EL1, ftr_id_aa64mmfr4), /* Op1 = 1, CRn = 0, CRm = 0 */ ARM64_FTR_REG(SYS_GMID_EL1, ftr_gmid), @@ -1078,6 +1084,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1); init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2); init_cpu_ftr_reg(SYS_ID_AA64MMFR3_EL1, info->reg_id_aa64mmfr3); + init_cpu_ftr_reg(SYS_ID_AA64MMFR4_EL1, info->reg_id_aa64mmfr4); init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0); init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1); init_cpu_ftr_reg(SYS_ID_AA64ZFR0_EL1, info->reg_id_aa64zfr0); diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 47043c0d95ec..7ca3fbd200f0 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -447,6 +447,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1); info->reg_id_aa64mmfr2 = read_cpuid(ID_AA64MMFR2_EL1); info->reg_id_aa64mmfr3 = read_cpuid(ID_AA64MMFR3_EL1); + info->reg_id_aa64mmfr4 = read_cpuid(ID_AA64MMFR4_EL1); info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1); info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1); info->reg_id_aa64zfr0 = read_cpuid(ID_AA64ZFR0_EL1); -- cgit v1.2.3 From 94f29ab2d8018c035098b58d89fc4ae36894a706 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 22 Jan 2024 18:13:43 +0000 Subject: KVM: arm64: Force guest's HCR_EL2.E2H RES1 when NV1 is not implemented If NV1 isn't supported on a system, make sure we always evaluate the guest's HCR_EL2.E2H as RES1, irrespective of what the guest may have written there. Reviewed-by: Suzuki K Poulose Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240122181344.258974-10-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_emulate.h | 3 ++- arch/arm64/kvm/sys_regs.c | 12 +++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index b804fe832184..debc3753d2ef 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -209,7 +209,8 @@ static inline bool vcpu_is_el2(const struct kvm_vcpu *vcpu) static inline bool __vcpu_el2_e2h_is_set(const struct kvm_cpu_context *ctxt) { - return ctxt_sys_reg(ctxt, HCR_EL2) & HCR_E2H; + return (!cpus_have_final_cap(ARM64_HAS_HCR_NV1) || + (ctxt_sys_reg(ctxt, HCR_EL2) & HCR_E2H)); } static inline bool vcpu_el2_e2h_is_set(const struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 08a9571fa809..041b11825578 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2175,6 +2175,16 @@ static bool access_spsr(struct kvm_vcpu *vcpu, return true; } +static u64 reset_hcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + u64 val = r->val; + + if (!cpus_have_final_cap(ARM64_HAS_HCR_NV1)) + val |= HCR_E2H; + + return __vcpu_sys_reg(vcpu, r->reg) = val; +} + /* * Architected system registers. * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 @@ -2666,7 +2676,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG_VNCR(VMPIDR_EL2, reset_unknown, 0), EL2_REG(SCTLR_EL2, access_rw, reset_val, SCTLR_EL2_RES1), EL2_REG(ACTLR_EL2, access_rw, reset_val, 0), - EL2_REG_VNCR(HCR_EL2, reset_val, 0), + EL2_REG_VNCR(HCR_EL2, reset_hcr, 0), EL2_REG(MDCR_EL2, access_rw, reset_val, 0), EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_NVHE_EL2_RES1), EL2_REG_VNCR(HSTR_EL2, reset_val, 0), -- cgit v1.2.3 From d198e2668e247f40f68b008ce1d2656dbf2d47eb Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Thu, 8 Feb 2024 10:54:21 +0000 Subject: KVM: arm64: add comments to __kern_hyp_va Document this function a little, to make it easier to understand. The assembly comments were copied from the kern_hyp_va asm macro. Signed-off-by: Joey Gouly Cc: Marc Zyngier Cc: Oliver Upton Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20240208105422.3444159-2-joey.gouly@arm.com [oliver: migrate a bit more detail from the asm variant] Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_mmu.h | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index e3e793d0ec30..7e7f276bcdd0 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -56,11 +56,6 @@ /* * Convert a kernel VA into a HYP VA. * reg: VA to be converted. - * - * The actual code generation takes place in kvm_update_va_mask, and - * the instructions below are only there to reserve the space and - * perform the register allocation (kvm_update_va_mask uses the - * specific registers encoded in the instructions). */ .macro kern_hyp_va reg #ifndef __KVM_VHE_HYPERVISOR__ @@ -127,14 +122,29 @@ void kvm_apply_hyp_relocations(void); #define __hyp_pa(x) (((phys_addr_t)(x)) + hyp_physvirt_offset) +/* + * Convert a kernel VA into a HYP VA. + * + * Can be called from hyp or non-hyp context. + * + * The actual code generation takes place in kvm_update_va_mask(), and + * the instructions below are only there to reserve the space and + * perform the register allocation (kvm_update_va_mask() uses the + * specific registers encoded in the instructions). + */ static __always_inline unsigned long __kern_hyp_va(unsigned long v) { +/* + * This #ifndef is an optimisation for when this is called from VHE hyp + * context. When called from a VHE non-hyp context, kvm_update_va_mask() will + * replace the instructions with `nop`s. + */ #ifndef __KVM_VHE_HYPERVISOR__ - asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n" - "ror %0, %0, #1\n" - "add %0, %0, #0\n" - "add %0, %0, #0, lsl 12\n" - "ror %0, %0, #63\n", + asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n" /* mask with va_mask */ + "ror %0, %0, #1\n" /* rotate to the first tag bit */ + "add %0, %0, #0\n" /* insert the low 12 bits of the tag */ + "add %0, %0, #0, lsl 12\n" /* insert the top 12 bits of the tag */ + "ror %0, %0, #63\n", /* rotate back */ ARM64_ALWAYS_SYSTEM, kvm_update_va_mask) : "+r" (v)); -- cgit v1.2.3 From a02395d0f3bfa4b2d2a0aa53cc0f2eaeea0e3e66 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Thu, 8 Feb 2024 10:54:22 +0000 Subject: KVM: arm64: removed unused kern_hyp_va asm macro The last usage of this macro was removed in: commit 5dc33bd199ca ("KVM: arm64: nVHE: Pass pointers consistently to hyp-init") Signed-off-by: Joey Gouly Cc: Marc Zyngier Cc: Oliver Upton Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20240208105422.3444159-3-joey.gouly@arm.com Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_mmu.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 7e7f276bcdd0..d5e48d870461 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -53,22 +53,6 @@ #include -/* - * Convert a kernel VA into a HYP VA. - * reg: VA to be converted. - */ -.macro kern_hyp_va reg -#ifndef __KVM_VHE_HYPERVISOR__ -alternative_cb ARM64_ALWAYS_SYSTEM, kvm_update_va_mask - and \reg, \reg, #1 /* mask with va_mask */ - ror \reg, \reg, #1 /* rotate to the first tag bit */ - add \reg, \reg, #0 /* insert the low 12 bits of the tag */ - add \reg, \reg, #0, lsl 12 /* insert the top 12 bits of the tag */ - ror \reg, \reg, #63 /* rotate back */ -alternative_cb_end -#endif -.endm - /* * Convert a hypervisor VA to a PA * reg: hypervisor address to be converted in place -- cgit v1.2.3 From c62d7a23b9479b946f00d58046e0bdf7f233a2b9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 14 Feb 2024 13:18:03 +0000 Subject: KVM: arm64: Add feature checking helpers In order to make it easier to check whether a particular feature is exposed to a guest, add a new set of helpers, with kvm_has_feat() being the most useful. Let's start making use of them in the PMU code (courtesy of Oliver). Follow-up changes will introduce additional use patterns. Reviewed-by: Suzuki K Poulose Co-developed--by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240214131827.2856277-3-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_host.h | 44 +++++++++++++++++++++++++++++++++++++++ arch/arm64/kvm/pmu-emul.c | 11 +++++----- arch/arm64/kvm/sys_regs.c | 6 ++---- include/kvm/arm_pmu.h | 11 ---------- 4 files changed, 52 insertions(+), 20 deletions(-) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 21c57b812569..527da3d2cedb 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1233,4 +1233,48 @@ static inline void kvm_hyp_reserve(void) { } void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu); bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu); +#define __expand_field_sign_unsigned(id, fld, val) \ + ((u64)SYS_FIELD_VALUE(id, fld, val)) + +#define __expand_field_sign_signed(id, fld, val) \ + ({ \ + u64 __val = SYS_FIELD_VALUE(id, fld, val); \ + sign_extend64(__val, id##_##fld##_WIDTH - 1); \ + }) + +#define expand_field_sign(id, fld, val) \ + (id##_##fld##_SIGNED ? \ + __expand_field_sign_signed(id, fld, val) : \ + __expand_field_sign_unsigned(id, fld, val)) + +#define get_idreg_field_unsigned(kvm, id, fld) \ + ({ \ + u64 __val = IDREG((kvm), SYS_##id); \ + FIELD_GET(id##_##fld##_MASK, __val); \ + }) + +#define get_idreg_field_signed(kvm, id, fld) \ + ({ \ + u64 __val = get_idreg_field_unsigned(kvm, id, fld); \ + sign_extend64(__val, id##_##fld##_WIDTH - 1); \ + }) + +#define get_idreg_field_enum(kvm, id, fld) \ + get_idreg_field_unsigned(kvm, id, fld) + +#define get_idreg_field(kvm, id, fld) \ + (id##_##fld##_SIGNED ? \ + get_idreg_field_signed(kvm, id, fld) : \ + get_idreg_field_unsigned(kvm, id, fld)) + +#define kvm_has_feat(kvm, id, fld, limit) \ + (get_idreg_field((kvm), id, fld) >= expand_field_sign(id, fld, limit)) + +#define kvm_has_feat_enum(kvm, id, fld, val) \ + (get_idreg_field_unsigned((kvm), id, fld) == __expand_field_sign_unsigned(id, fld, val)) + +#define kvm_has_feat_range(kvm, id, fld, min, max) \ + (get_idreg_field((kvm), id, fld) >= expand_field_sign(id, fld, min) && \ + get_idreg_field((kvm), id, fld) <= expand_field_sign(id, fld, max)) + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 3d9467ff73bc..925522470b2b 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -64,12 +64,11 @@ u64 kvm_pmu_evtyper_mask(struct kvm *kvm) { u64 mask = ARMV8_PMU_EXCLUDE_EL1 | ARMV8_PMU_EXCLUDE_EL0 | kvm_pmu_event_mask(kvm); - u64 pfr0 = IDREG(kvm, SYS_ID_AA64PFR0_EL1); - if (SYS_FIELD_GET(ID_AA64PFR0_EL1, EL2, pfr0)) + if (kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL2, IMP)) mask |= ARMV8_PMU_INCLUDE_EL2; - if (SYS_FIELD_GET(ID_AA64PFR0_EL1, EL3, pfr0)) + if (kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL3, IMP)) mask |= ARMV8_PMU_EXCLUDE_NS_EL0 | ARMV8_PMU_EXCLUDE_NS_EL1 | ARMV8_PMU_EXCLUDE_EL3; @@ -83,8 +82,10 @@ u64 kvm_pmu_evtyper_mask(struct kvm *kvm) */ static bool kvm_pmc_is_64bit(struct kvm_pmc *pmc) { + struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); + return (pmc->idx == ARMV8_PMU_CYCLE_IDX || - kvm_pmu_is_3p5(kvm_pmc_to_vcpu(pmc))); + kvm_has_feat(vcpu->kvm, ID_AA64DFR0_EL1, PMUVer, V3P5)); } static bool kvm_pmc_has_64bit_overflow(struct kvm_pmc *pmc) @@ -556,7 +557,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) return; /* Fixup PMCR_EL0 to reconcile the PMU version and the LP bit */ - if (!kvm_pmu_is_3p5(vcpu)) + if (!kvm_has_feat(vcpu->kvm, ID_AA64DFR0_EL1, PMUVer, V3P5)) val &= ~ARMV8_PMU_PMCR_LP; /* The reset bits don't indicate any state, and shouldn't be saved. */ diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 041b11825578..3c31f8cb9eef 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -505,10 +505,9 @@ static bool trap_loregion(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - u64 val = IDREG(vcpu->kvm, SYS_ID_AA64MMFR1_EL1); u32 sr = reg_to_encoding(r); - if (!(val & (0xfUL << ID_AA64MMFR1_EL1_LO_SHIFT))) { + if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, LO, IMP)) { kvm_inject_undefined(vcpu); return false; } @@ -2748,8 +2747,7 @@ static bool trap_dbgdidr(struct kvm_vcpu *vcpu, return ignore_write(vcpu, p); } else { u64 dfr = IDREG(vcpu->kvm, SYS_ID_AA64DFR0_EL1); - u64 pfr = IDREG(vcpu->kvm, SYS_ID_AA64PFR0_EL1); - u32 el3 = !!SYS_FIELD_GET(ID_AA64PFR0_EL1, EL3, pfr); + u32 el3 = kvm_has_feat(vcpu->kvm, ID_AA64PFR0_EL1, EL3, IMP); p->regval = ((SYS_FIELD_GET(ID_AA64DFR0_EL1, WRPs, dfr) << 28) | (SYS_FIELD_GET(ID_AA64DFR0_EL1, BRPs, dfr) << 24) | diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 4b9d8fb393a8..eb4c369a79eb 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -90,16 +90,6 @@ void kvm_vcpu_pmu_resync_el0(void); vcpu->arch.pmu.events = *kvm_get_pmu_events(); \ } while (0) -/* - * Evaluates as true when emulating PMUv3p5, and false otherwise. - */ -#define kvm_pmu_is_3p5(vcpu) ({ \ - u64 val = IDREG(vcpu->kvm, SYS_ID_AA64DFR0_EL1); \ - u8 pmuver = SYS_FIELD_GET(ID_AA64DFR0_EL1, PMUVer, val); \ - \ - pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P5; \ -}) - u8 kvm_arm_pmu_get_pmuver_limit(void); u64 kvm_pmu_evtyper_mask(struct kvm *kvm); int kvm_arm_set_default_pmu(struct kvm *kvm); @@ -168,7 +158,6 @@ static inline u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) } #define kvm_vcpu_has_pmu(vcpu) ({ false; }) -#define kvm_pmu_is_3p5(vcpu) ({ false; }) static inline void kvm_pmu_update_vcpu_events(struct kvm_vcpu *vcpu) {} static inline void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) {} static inline void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) {} -- cgit v1.2.3 From 888f0880702293096619b300150cd7e59fcd9743 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 14 Feb 2024 13:18:04 +0000 Subject: KVM: arm64: nv: Add sanitising to VNCR-backed sysregs VNCR-backed "registers" are actually only memory. Which means that there is zero control over what the guest can write, and that it is the hypervisor's job to actually sanitise the content of the backing store. Yeah, this is fun. In order to preserve some form of sanity, add a repainting mechanism that makes use of a per-VM set of RES0/RES1 masks, one pair per VNCR register. These masks get applied on access to the backing store via __vcpu_sys_reg(), ensuring that the state that is consumed by KVM is correct. So far, nothing populates these masks, but stay tuned. Signed-off-by: Marc Zyngier Reviewed-by: Joey Gouly Link: https://lore.kernel.org/r/20240214131827.2856277-4-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_host.h | 22 ++++++++++++++++++++- arch/arm64/kvm/arm.c | 1 + arch/arm64/kvm/nested.c | 41 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 62 insertions(+), 2 deletions(-) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 527da3d2cedb..8cda003d6267 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -238,6 +238,8 @@ static inline u16 kvm_mpidr_index(struct kvm_mpidr_data *data, u64 mpidr) return index; } +struct kvm_sysreg_masks; + struct kvm_arch { struct kvm_s2_mmu mmu; @@ -312,6 +314,9 @@ struct kvm_arch { #define KVM_ARM_ID_REG_NUM (IDREG_IDX(sys_reg(3, 0, 0, 7, 7)) + 1) u64 id_regs[KVM_ARM_ID_REG_NUM]; + /* Masks for VNCR-baked sysregs */ + struct kvm_sysreg_masks *sysreg_masks; + /* * For an untrusted host VM, 'pkvm.handle' is used to lookup * the associated pKVM instance in the hypervisor. @@ -474,6 +479,13 @@ enum vcpu_sysreg { NR_SYS_REGS /* Nothing after this line! */ }; +struct kvm_sysreg_masks { + struct { + u64 res0; + u64 res1; + } mask[NR_SYS_REGS - __VNCR_START__]; +}; + struct kvm_cpu_context { struct user_pt_regs regs; /* sp = sp_el0 */ @@ -868,7 +880,15 @@ static inline u64 *__ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r) #define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r)) -#define __vcpu_sys_reg(v,r) (ctxt_sys_reg(&(v)->arch.ctxt, (r))) +u64 kvm_vcpu_sanitise_vncr_reg(const struct kvm_vcpu *, enum vcpu_sysreg); +#define __vcpu_sys_reg(v,r) \ + (*({ \ + const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \ + u64 *__r = __ctxt_sys_reg(ctxt, (r)); \ + if (vcpu_has_nv((v)) && (r) >= __VNCR_START__) \ + *__r = kvm_vcpu_sanitise_vncr_reg((v), (r)); \ + __r; \ + })) u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg); void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index a25265aca432..c063e84fc72c 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -206,6 +206,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) pkvm_destroy_hyp_vm(kvm); kfree(kvm->arch.mpidr_data); + kfree(kvm->arch.sysreg_masks); kvm_destroy_vcpus(kvm); kvm_unshare_hyp(kvm, kvm + 1); diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index d55e809e26cb..c976cd4b8379 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -163,15 +163,54 @@ static u64 limit_nv_id_reg(u32 id, u64 val) return val; } + +u64 kvm_vcpu_sanitise_vncr_reg(const struct kvm_vcpu *vcpu, enum vcpu_sysreg sr) +{ + u64 v = ctxt_sys_reg(&vcpu->arch.ctxt, sr); + struct kvm_sysreg_masks *masks; + + masks = vcpu->kvm->arch.sysreg_masks; + + if (masks) { + sr -= __VNCR_START__; + + v &= ~masks->mask[sr].res0; + v |= masks->mask[sr].res1; + } + + return v; +} + +static void __maybe_unused set_sysreg_masks(struct kvm *kvm, int sr, u64 res0, u64 res1) +{ + int i = sr - __VNCR_START__; + + kvm->arch.sysreg_masks->mask[i].res0 = res0; + kvm->arch.sysreg_masks->mask[i].res1 = res1; +} + int kvm_init_nv_sysregs(struct kvm *kvm) { + int ret = 0; + mutex_lock(&kvm->arch.config_lock); + if (kvm->arch.sysreg_masks) + goto out; + + kvm->arch.sysreg_masks = kzalloc(sizeof(*(kvm->arch.sysreg_masks)), + GFP_KERNEL); + if (!kvm->arch.sysreg_masks) { + ret = -ENOMEM; + goto out; + } + for (int i = 0; i < KVM_ARM_ID_REG_NUM; i++) kvm->arch.id_regs[i] = limit_nv_id_reg(IDX_IDREG(i), kvm->arch.id_regs[i]); +out: mutex_unlock(&kvm->arch.config_lock); - return 0; + return ret; } -- cgit v1.2.3 From 19f3e7ea29f8f485f06b47f7c38f9e9e81013ada Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 14 Feb 2024 13:18:15 +0000 Subject: KVM: arm64: Register AArch64 system register entries with the sysreg xarray In order to reduce the number of lookups that we have to perform when handling a sysreg, register each AArch64 sysreg descriptor with the global xarray. The index of the descriptor is stored as a 10 bit field in the data word. Subsequent patches will retrieve and use the stored index. Reviewed-by: Joey Gouly Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240214131827.2856277-15-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_host.h | 3 +++ arch/arm64/kvm/emulate-nested.c | 39 +++++++++++++++++++++++++++++++++++++-- arch/arm64/kvm/sys_regs.c | 11 ++++++++++- 3 files changed, 50 insertions(+), 3 deletions(-) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 8cda003d6267..914fd54179bb 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1078,6 +1078,9 @@ int kvm_handle_cp10_id(struct kvm_vcpu *vcpu); void kvm_reset_sys_regs(struct kvm_vcpu *vcpu); int __init kvm_sys_reg_table_init(void); +struct sys_reg_desc; +int __init populate_sysreg_config(const struct sys_reg_desc *sr, + unsigned int idx); int __init populate_nv_trap_config(void); bool lock_all_vcpus(struct kvm *kvm); diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index 42f5b4483c80..ddf760b4dda5 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -427,12 +427,14 @@ static const complex_condition_check ccc[] = { * [19:14] bit number in the FGT register (6 bits) * [20] trap polarity (1 bit) * [25:21] FG filter (5 bits) - * [62:26] Unused (37 bits) + * [35:26] Main SysReg table index (10 bits) + * [62:36] Unused (27 bits) * [63] RES0 - Must be zero, as lost on insertion in the xarray */ #define TC_CGT_BITS 10 #define TC_FGT_BITS 4 #define TC_FGF_BITS 5 +#define TC_SRI_BITS 10 union trap_config { u64 val; @@ -442,7 +444,8 @@ union trap_config { unsigned long bit:6; /* Bit number */ unsigned long pol:1; /* Polarity */ unsigned long fgf:TC_FGF_BITS; /* Fine Grained Filter */ - unsigned long unused:37; /* Unused, should be zero */ + unsigned long sri:TC_SRI_BITS; /* SysReg Index */ + unsigned long unused:27; /* Unused, should be zero */ unsigned long mbz:1; /* Must Be Zero */ }; }; @@ -1868,6 +1871,38 @@ check_mcb: return ret; } +int __init populate_sysreg_config(const struct sys_reg_desc *sr, + unsigned int idx) +{ + union trap_config tc; + u32 encoding; + void *ret; + + /* + * 0 is a valid value for the index, but not for the storage. + * We'll store (idx+1), so check against an offset'd limit. + */ + if (idx >= (BIT(TC_SRI_BITS) - 1)) { + kvm_err("sysreg %s (%d) out of range\n", sr->name, idx); + return -EINVAL; + } + + encoding = sys_reg(sr->Op0, sr->Op1, sr->CRn, sr->CRm, sr->Op2); + tc = get_trap_config(encoding); + + if (tc.sri) { + kvm_err("sysreg %s (%d) duplicate entry (%d)\n", + sr->name, idx - 1, tc.sri); + return -EINVAL; + } + + tc.sri = idx + 1; + ret = xa_store(&sr_forward_xa, encoding, + xa_mk_value(tc.val), GFP_KERNEL); + + return xa_err(ret); +} + static enum trap_behaviour get_behaviour(struct kvm_vcpu *vcpu, const struct trap_bits *tb) { diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 57f3d0c53fc3..a410e99f827e 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -3972,6 +3972,7 @@ int __init kvm_sys_reg_table_init(void) struct sys_reg_params params; bool valid = true; unsigned int i; + int ret = 0; /* Make sure tables are unique and in order. */ valid &= check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs), false); @@ -3995,5 +3996,13 @@ int __init kvm_sys_reg_table_init(void) if (!first_idreg) return -EINVAL; - return populate_nv_trap_config(); + ret = populate_nv_trap_config(); + + for (i = 0; !ret && i < ARRAY_SIZE(sys_reg_descs); i++) + ret = populate_sysreg_config(sys_reg_descs + i, i); + + for (i = 0; !ret && i < ARRAY_SIZE(sys_insn_descs); i++) + ret = populate_sysreg_config(sys_insn_descs + i, i); + + return ret; } -- cgit v1.2.3 From cc5f84fbb008ff0904e0a8c0fb207cee2eb99bc9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 14 Feb 2024 13:18:16 +0000 Subject: KVM: arm64: Use the xarray as the primary sysreg/sysinsn walker Since we always start sysreg/sysinsn handling by searching the xarray, use it as the source of the index in the correct sys_reg_desc array. This allows some cleanup, such as moving the handling of unknown sysregs in a single location. Reviewed-by: Joey Gouly Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240214131827.2856277-16-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_nested.h | 2 +- arch/arm64/kvm/emulate-nested.c | 40 +++++++++++++++++------ arch/arm64/kvm/sys_regs.c | 63 +++++++++++-------------------------- 3 files changed, 50 insertions(+), 55 deletions(-) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index 4882905357f4..68465f87d308 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -60,7 +60,7 @@ static inline u64 translate_ttbr0_el2_to_ttbr0_el1(u64 ttbr0) return ttbr0 & ~GENMASK_ULL(63, 48); } -extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu); +extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu, int *sr_idx); int kvm_init_nv_sysregs(struct kvm *kvm); diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index ddf760b4dda5..5aeaa688755f 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -2007,7 +2007,7 @@ static bool check_fgt_bit(struct kvm *kvm, bool is_read, return !(kvm_get_sysreg_res0(kvm, sr) & BIT(tc.bit)); } -bool __check_nv_sr_forward(struct kvm_vcpu *vcpu) +bool __check_nv_sr_forward(struct kvm_vcpu *vcpu, int *sr_index) { union trap_config tc; enum trap_behaviour b; @@ -2015,9 +2015,6 @@ bool __check_nv_sr_forward(struct kvm_vcpu *vcpu) u32 sysreg; u64 esr, val; - if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu)) - return false; - esr = kvm_vcpu_get_esr(vcpu); sysreg = esr_sys64_to_sysreg(esr); is_read = (esr & ESR_ELx_SYS64_ISS_DIR_MASK) == ESR_ELx_SYS64_ISS_DIR_READ; @@ -2028,13 +2025,16 @@ bool __check_nv_sr_forward(struct kvm_vcpu *vcpu) * A value of 0 for the whole entry means that we know nothing * for this sysreg, and that it cannot be re-injected into the * nested hypervisor. In this situation, let's cut it short. - * - * Note that ultimately, we could also make use of the xarray - * to store the index of the sysreg in the local descriptor - * array, avoiding another search... Hint, hint... */ if (!tc.val) - return false; + goto local; + + /* + * If we're not nesting, immediately return to the caller, with the + * sysreg index, should we have it. + */ + if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu)) + goto local; switch ((enum fgt_group_id)tc.fgt) { case __NO_FGT_GROUP__: @@ -2076,7 +2076,7 @@ bool __check_nv_sr_forward(struct kvm_vcpu *vcpu) case __NR_FGT_GROUP_IDS__: /* Something is really wrong, bail out */ WARN_ONCE(1, "__NR_FGT_GROUP_IDS__"); - return false; + goto local; } if (tc.fgt != __NO_FGT_GROUP__ && check_fgt_bit(vcpu->kvm, is_read, @@ -2089,6 +2089,26 @@ bool __check_nv_sr_forward(struct kvm_vcpu *vcpu) ((b & BEHAVE_FORWARD_WRITE) && !is_read)) goto inject; +local: + if (!tc.sri) { + struct sys_reg_params params; + + params = esr_sys64_to_params(esr); + + /* + * Check for the IMPDEF range, as per DDI0487 J.a, + * D18.3.2 Reserved encodings for IMPLEMENTATION + * DEFINED registers. + */ + if (!(params.Op0 == 3 && (params.CRn & 0b1011) == 0b1011)) + print_sys_reg_msg(¶ms, + "Unsupported guest access at: %lx\n", + *vcpu_pc(vcpu)); + kvm_inject_undefined(vcpu); + return true; + } + + *sr_index = tc.sri - 1; return false; inject: diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index a410e99f827e..bc076ef8440b 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -3395,12 +3395,6 @@ int kvm_handle_cp14_32(struct kvm_vcpu *vcpu) return kvm_handle_cp_32(vcpu, ¶ms, cp14_regs, ARRAY_SIZE(cp14_regs)); } -static bool is_imp_def_sys_reg(struct sys_reg_params *params) -{ - // See ARM DDI 0487E.a, section D12.3.2 - return params->Op0 == 3 && (params->CRn & 0b1011) == 0b1011; -} - /** * emulate_sys_reg - Emulate a guest access to an AArch64 system register * @vcpu: The VCPU pointer @@ -3409,44 +3403,22 @@ static bool is_imp_def_sys_reg(struct sys_reg_params *params) * Return: true if the system register access was successful, false otherwise. */ static bool emulate_sys_reg(struct kvm_vcpu *vcpu, - struct sys_reg_params *params) + struct sys_reg_params *params) { const struct sys_reg_desc *r; r = find_reg(params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); - if (likely(r)) { perform_access(vcpu, params, r); return true; } - if (is_imp_def_sys_reg(params)) { - kvm_inject_undefined(vcpu); - } else { - print_sys_reg_msg(params, - "Unsupported guest sys_reg access at: %lx [%08lx]\n", - *vcpu_pc(vcpu), *vcpu_cpsr(vcpu)); - kvm_inject_undefined(vcpu); - } - return false; -} - -static int emulate_sys_instr(struct kvm_vcpu *vcpu, struct sys_reg_params *p) -{ - const struct sys_reg_desc *r; - - /* Search from the system instruction table. */ - r = find_reg(p, sys_insn_descs, ARRAY_SIZE(sys_insn_descs)); + print_sys_reg_msg(params, + "Unsupported guest sys_reg access at: %lx [%08lx]\n", + *vcpu_pc(vcpu), *vcpu_cpsr(vcpu)); + kvm_inject_undefined(vcpu); - if (likely(r)) { - perform_access(vcpu, p, r); - } else { - kvm_err("Unsupported guest sys instruction at: %lx\n", - *vcpu_pc(vcpu)); - print_sys_reg_instr(p); - kvm_inject_undefined(vcpu); - } - return 1; + return false; } static void kvm_reset_id_regs(struct kvm_vcpu *vcpu) @@ -3502,31 +3474,34 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu) */ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu) { + const struct sys_reg_desc *desc = NULL; struct sys_reg_params params; unsigned long esr = kvm_vcpu_get_esr(vcpu); int Rt = kvm_vcpu_sys_get_rt(vcpu); + int sr_idx; trace_kvm_handle_sys_reg(esr); - if (__check_nv_sr_forward(vcpu)) + if (__check_nv_sr_forward(vcpu, &sr_idx)) return 1; params = esr_sys64_to_params(esr); params.regval = vcpu_get_reg(vcpu, Rt); /* System registers have Op0=={2,3}, as per DDI487 J.a C5.1.2 */ - if (params.Op0 == 2 || params.Op0 == 3) { - if (!emulate_sys_reg(vcpu, ¶ms)) - return 1; + if (params.Op0 == 2 || params.Op0 == 3) + desc = &sys_reg_descs[sr_idx]; + else + desc = &sys_insn_descs[sr_idx]; - if (!params.is_write) - vcpu_set_reg(vcpu, Rt, params.regval); + perform_access(vcpu, ¶ms, desc); - return 1; - } + /* Read from system register? */ + if (!params.is_write && + (params.Op0 == 2 || params.Op0 == 3)) + vcpu_set_reg(vcpu, Rt, params.regval); - /* Hints, PSTATE (Op0 == 0) and System instructions (Op0 == 1) */ - return emulate_sys_instr(vcpu, ¶ms); + return 1; } /****************************************************************************** -- cgit v1.2.3 From 085eabaa74a12345b7dd083b160519fe3a52f2ce Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 14 Feb 2024 13:18:17 +0000 Subject: KVM: arm64: Rename __check_nv_sr_forward() to triage_sysreg_trap() __check_nv_sr_forward() is not specific to NV anymore, and does a lot more. Rename it to triage_sysreg_trap(), making it plain that its role is to handle where an exception is to be handled. Reviewed-by: Joey Gouly Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240214131827.2856277-17-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_nested.h | 1 - arch/arm64/kvm/emulate-nested.c | 2 +- arch/arm64/kvm/sys_regs.c | 2 +- arch/arm64/kvm/sys_regs.h | 2 ++ 4 files changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index 68465f87d308..c77d795556e1 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -60,7 +60,6 @@ static inline u64 translate_ttbr0_el2_to_ttbr0_el1(u64 ttbr0) return ttbr0 & ~GENMASK_ULL(63, 48); } -extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu, int *sr_idx); int kvm_init_nv_sysregs(struct kvm *kvm); diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index 5aeaa688755f..4f2a32a8f247 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -2007,7 +2007,7 @@ static bool check_fgt_bit(struct kvm *kvm, bool is_read, return !(kvm_get_sysreg_res0(kvm, sr) & BIT(tc.bit)); } -bool __check_nv_sr_forward(struct kvm_vcpu *vcpu, int *sr_index) +bool triage_sysreg_trap(struct kvm_vcpu *vcpu, int *sr_index) { union trap_config tc; enum trap_behaviour b; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index bc076ef8440b..938df5dc3684 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -3482,7 +3482,7 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu) trace_kvm_handle_sys_reg(esr); - if (__check_nv_sr_forward(vcpu, &sr_idx)) + if (triage_sysreg_trap(vcpu, &sr_idx)) return 1; params = esr_sys64_to_params(esr); diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index c65c129b3500..997eea21ba2a 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -233,6 +233,8 @@ int kvm_sys_reg_get_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, const struct sys_reg_desc table[], unsigned int num); +bool triage_sysreg_trap(struct kvm_vcpu *vcpu, int *sr_index); + #define AA32(_x) .aarch32_map = AA32_##_x #define Op0(_x) .Op0 = _x #define Op1(_x) .Op1 = _x -- cgit v1.2.3 From 2fd8f31c32f061822c18d13d17c1ea6a531cc443 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 14 Feb 2024 13:18:18 +0000 Subject: KVM: arm64: Add Fine-Grained UNDEF tracking information In order to efficiently handle system register access being disabled, and this resulting in an UNDEF exception being injected, we introduce the (slightly dubious) concept of Fine-Grained UNDEF, modeled after the architectural Fine-Grained Traps. For each FGT group, we keep a 64 bit word that has the exact same bit assignment as the corresponding FGT register, where a 1 indicates that trapping this register should result in an UNDEF exception being reinjected. So far, nothing populates this information, nor sets the corresponding trap bits. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240214131827.2856277-18-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_host.h | 21 +++++++++++++++++++++ arch/arm64/kvm/emulate-nested.c | 12 ------------ 2 files changed, 21 insertions(+), 12 deletions(-) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 914fd54179bb..c69b258fc989 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -240,9 +240,30 @@ static inline u16 kvm_mpidr_index(struct kvm_mpidr_data *data, u64 mpidr) struct kvm_sysreg_masks; +enum fgt_group_id { + __NO_FGT_GROUP__, + HFGxTR_GROUP, + HDFGRTR_GROUP, + HDFGWTR_GROUP = HDFGRTR_GROUP, + HFGITR_GROUP, + HAFGRTR_GROUP, + + /* Must be last */ + __NR_FGT_GROUP_IDS__ +}; + struct kvm_arch { struct kvm_s2_mmu mmu; + /* + * Fine-Grained UNDEF, mimicking the FGT layout defined by the + * architecture. We track them globally, as we present the + * same feature-set to all vcpus. + * + * Index 0 is currently spare. + */ + u64 fgu[__NR_FGT_GROUP_IDS__]; + /* Interrupt controller */ struct vgic_dist vgic; diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index 4f2a32a8f247..b67078b8271b 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -1009,18 +1009,6 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = { static DEFINE_XARRAY(sr_forward_xa); -enum fgt_group_id { - __NO_FGT_GROUP__, - HFGxTR_GROUP, - HDFGRTR_GROUP, - HDFGWTR_GROUP = HDFGRTR_GROUP, - HFGITR_GROUP, - HAFGRTR_GROUP, - - /* Must be last */ - __NR_FGT_GROUP_IDS__ -}; - enum fg_filter_id { __NO_FGF__, HCRX_FGTnXS, -- cgit v1.2.3 From c5bac1ef7df6bcce4feaa5a609119cab5d5e4730 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 14 Feb 2024 13:18:20 +0000 Subject: KVM: arm64: Move existing feature disabling over to FGU infrastructure We already trap a bunch of existing features for the purpose of disabling them (MAIR2, POR, ACCDATA, SME...). Let's move them over to our brand new FGU infrastructure. Reviewed-by: Joey Gouly Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240214131827.2856277-20-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_host.h | 4 ++++ arch/arm64/kvm/arm.c | 6 ++++++ arch/arm64/kvm/hyp/include/hyp/switch.h | 17 +++-------------- arch/arm64/kvm/sys_regs.c | 23 +++++++++++++++++++++++ 4 files changed, 36 insertions(+), 14 deletions(-) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index c69b258fc989..968a997b3f02 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -297,6 +297,8 @@ struct kvm_arch { #define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 6 /* Initial ID reg values loaded */ #define KVM_ARCH_FLAG_ID_REGS_INITIALIZED 7 + /* Fine-Grained UNDEF initialised */ +#define KVM_ARCH_FLAG_FGU_INITIALIZED 8 unsigned long flags; /* VM-wide vCPU feature set */ @@ -1107,6 +1109,8 @@ int __init populate_nv_trap_config(void); bool lock_all_vcpus(struct kvm *kvm); void unlock_all_vcpus(struct kvm *kvm); +void kvm_init_sysreg(struct kvm_vcpu *); + /* MMIO helpers */ void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index c063e84fc72c..9f806c9b7d5d 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -675,6 +675,12 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) return ret; } + /* + * This needs to happen after NV has imposed its own restrictions on + * the feature set + */ + kvm_init_sysreg(vcpu); + ret = kvm_timer_enable(vcpu); if (ret) return ret; diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index a09149fd91ed..245f9c1ca666 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -157,7 +157,7 @@ static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; struct kvm *kvm = kern_hyp_va(vcpu->kvm); - u64 r_clr = 0, w_clr = 0, r_set = 0, w_set = 0, tmp; + u64 r_clr = 0, w_clr = 0, r_set = 0, w_set = 0; u64 r_val, w_val; CHECK_FGT_MASKS(HFGRTR_EL2); @@ -174,13 +174,6 @@ static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu) ctxt_sys_reg(hctxt, HFGRTR_EL2) = read_sysreg_s(SYS_HFGRTR_EL2); ctxt_sys_reg(hctxt, HFGWTR_EL2) = read_sysreg_s(SYS_HFGWTR_EL2); - if (cpus_have_final_cap(ARM64_SME)) { - tmp = HFGxTR_EL2_nSMPRI_EL1_MASK | HFGxTR_EL2_nTPIDR2_EL0_MASK; - - r_clr |= tmp; - w_clr |= tmp; - } - /* * Trap guest writes to TCR_EL1 to prevent it from enabling HA or HD. */ @@ -195,15 +188,11 @@ static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu) compute_undef_clr_set(vcpu, kvm, HFGRTR_EL2, r_clr, r_set); compute_undef_clr_set(vcpu, kvm, HFGWTR_EL2, w_clr, w_set); - /* The default to trap everything not handled or supported in KVM. */ - tmp = HFGxTR_EL2_nAMAIR2_EL1 | HFGxTR_EL2_nMAIR2_EL1 | HFGxTR_EL2_nS2POR_EL1 | - HFGxTR_EL2_nPOR_EL1 | HFGxTR_EL2_nPOR_EL0 | HFGxTR_EL2_nACCDATA_EL1; - - r_val = __HFGRTR_EL2_nMASK & ~tmp; + r_val = __HFGRTR_EL2_nMASK; r_val |= r_set; r_val &= ~r_clr; - w_val = __HFGWTR_EL2_nMASK & ~tmp; + w_val = __HFGWTR_EL2_nMASK; w_val |= w_set; w_val &= ~w_clr; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 938df5dc3684..39e7c7f74717 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -3942,6 +3942,29 @@ int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm, struct reg_mask_range * return 0; } +void kvm_init_sysreg(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + + mutex_lock(&kvm->arch.config_lock); + + if (test_bit(KVM_ARCH_FLAG_FGU_INITIALIZED, &kvm->arch.flags)) + goto out; + + kvm->arch.fgu[HFGxTR_GROUP] = (HFGxTR_EL2_nAMAIR2_EL1 | + HFGxTR_EL2_nMAIR2_EL1 | + HFGxTR_EL2_nS2POR_EL1 | + HFGxTR_EL2_nPOR_EL1 | + HFGxTR_EL2_nPOR_EL0 | + HFGxTR_EL2_nACCDATA_EL1 | + HFGxTR_EL2_nSMPRI_EL1_MASK | + HFGxTR_EL2_nTPIDR2_EL0_MASK); + + set_bit(KVM_ARCH_FLAG_FGU_INITIALIZED, &kvm->arch.flags); +out: + mutex_unlock(&kvm->arch.config_lock); +} + int __init kvm_sys_reg_table_init(void) { struct sys_reg_params params; -- cgit v1.2.3 From 84de212d739ecd16c6289ec4ed47e27b0080bac6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 14 Feb 2024 13:18:25 +0000 Subject: KVM: arm64: Make FEAT_MOPS UNDEF if not advertised to the guest We unconditionally enable FEAT_MOPS, which is obviously wrong. So let's only do that when it is advertised to the guest. Which means we need to rely on a per-vcpu HCRX_EL2 shadow register. Signed-off-by: Marc Zyngier Reviewed-by: Joey Gouly Link: https://lore.kernel.org/r/20240214131827.2856277-25-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_arm.h | 4 +--- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/hyp/include/hyp/switch.h | 2 +- arch/arm64/kvm/sys_regs.c | 7 +++++++ 4 files changed, 10 insertions(+), 4 deletions(-) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 3c6f8ba1e479..a1769e415d72 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -102,9 +102,7 @@ #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) -#define HCRX_GUEST_FLAGS \ - (HCRX_EL2_SMPME | HCRX_EL2_TCR2En | \ - (cpus_have_final_cap(ARM64_HAS_MOPS) ? (HCRX_EL2_MSCEn | HCRX_EL2_MCE2) : 0)) +#define HCRX_GUEST_FLAGS (HCRX_EL2_SMPME | HCRX_EL2_TCR2En) #define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En) /* TCR_EL2 Registers bits */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 968a997b3f02..3feabde9c926 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -584,6 +584,7 @@ struct kvm_vcpu_arch { /* Values of trap registers for the guest. */ u64 hcr_el2; + u64 hcrx_el2; u64 mdcr_el2; u64 cptr_el2; diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 2d5891518006..e3fcf8c4d5b4 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -236,7 +236,7 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); if (cpus_have_final_cap(ARM64_HAS_HCX)) { - u64 hcrx = HCRX_GUEST_FLAGS; + u64 hcrx = vcpu->arch.hcrx_el2; if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) { u64 clr = 0, set = 0; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 38ed47bd29db..2cb69efac1dc 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -3956,6 +3956,13 @@ void kvm_init_sysreg(struct kvm_vcpu *vcpu) if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS)) vcpu->arch.hcr_el2 |= HCR_TTLBOS; + if (cpus_have_final_cap(ARM64_HAS_HCX)) { + vcpu->arch.hcrx_el2 = HCRX_GUEST_FLAGS; + + if (kvm_has_feat(kvm, ID_AA64ISAR2_EL1, MOPS, IMP)) + vcpu->arch.hcrx_el2 |= (HCRX_EL2_MSCEn | HCRX_EL2_MCE2); + } + if (test_bit(KVM_ARCH_FLAG_FGU_INITIALIZED, &kvm->arch.flags)) goto out; -- cgit v1.2.3 From 891766581deaf84919911c6a046592ce0ac49bc6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 14 Feb 2024 13:18:27 +0000 Subject: KVM: arm64: Add debugfs file for guest's ID registers Debugging ID register setup can be a complicated affair. Give the kernel hacker a way to dump that state in an easy to parse way. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240214131827.2856277-27-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_host.h | 3 ++ arch/arm64/kvm/sys_regs.c | 81 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 3feabde9c926..181fef12e8e8 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -319,6 +319,9 @@ struct kvm_arch { /* PMCR_EL0.N value for the guest */ u8 pmcr_n; + /* Iterator for idreg debugfs */ + u8 idreg_debugfs_iter; + /* Hypercall features firmware registers' descriptor */ struct kvm_smccc_features smccc_feat; struct maple_tree smccc_filter; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index b5335e5a6e6e..e3faedc8b7a3 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -3422,6 +3423,81 @@ static bool emulate_sys_reg(struct kvm_vcpu *vcpu, return false; } +static void *idregs_debug_start(struct seq_file *s, loff_t *pos) +{ + struct kvm *kvm = s->private; + u8 *iter; + + mutex_lock(&kvm->arch.config_lock); + + iter = &kvm->arch.idreg_debugfs_iter; + if (*iter == (u8)~0) { + *iter = *pos; + if (*iter >= KVM_ARM_ID_REG_NUM) + iter = NULL; + } else { + iter = ERR_PTR(-EBUSY); + } + + mutex_unlock(&kvm->arch.config_lock); + + return iter; +} + +static void *idregs_debug_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct kvm *kvm = s->private; + + (*pos)++; + + if ((kvm->arch.idreg_debugfs_iter + 1) < KVM_ARM_ID_REG_NUM) { + kvm->arch.idreg_debugfs_iter++; + + return &kvm->arch.idreg_debugfs_iter; + } + + return NULL; +} + +static void idregs_debug_stop(struct seq_file *s, void *v) +{ + struct kvm *kvm = s->private; + + if (IS_ERR(v)) + return; + + mutex_lock(&kvm->arch.config_lock); + + kvm->arch.idreg_debugfs_iter = ~0; + + mutex_unlock(&kvm->arch.config_lock); +} + +static int idregs_debug_show(struct seq_file *s, void *v) +{ + struct kvm *kvm = s->private; + const struct sys_reg_desc *desc; + + desc = first_idreg + kvm->arch.idreg_debugfs_iter; + + if (!desc->name) + return 0; + + seq_printf(s, "%20s:\t%016llx\n", + desc->name, IDREG(kvm, IDX_IDREG(kvm->arch.idreg_debugfs_iter))); + + return 0; +} + +static const struct seq_operations idregs_debug_sops = { + .start = idregs_debug_start, + .next = idregs_debug_next, + .stop = idregs_debug_stop, + .show = idregs_debug_show, +}; + +DEFINE_SEQ_ATTRIBUTE(idregs_debug); + static void kvm_reset_id_regs(struct kvm_vcpu *vcpu) { const struct sys_reg_desc *idreg = first_idreg; @@ -3441,6 +3517,11 @@ static void kvm_reset_id_regs(struct kvm_vcpu *vcpu) id = reg_to_encoding(idreg); } + kvm->arch.idreg_debugfs_iter = ~0; + + debugfs_create_file("idregs", 0444, kvm->debugfs_dentry, kvm, + &idregs_debug_fops); + set_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags); } -- cgit v1.2.3 From 75841d89f3ed51ba1ee5fd341488fe1f55f4eb06 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 3 Jan 2024 17:16:02 -0600 Subject: KVM: arm64: Fix typos Fix typos, most reported by "codespell arch/arm64". Only touches comments, no code changes. Signed-off-by: Bjorn Helgaas Cc: James Morse Cc: Suzuki K Poulose Cc: Zenghui Yu Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: kvmarm@lists.linux.dev Reviewed-by: Zenghui Yu Reviewed-by: Randy Dunlap Link: https://lore.kernel.org/r/20240103231605.1801364-6-helgaas@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_hyp.h | 2 +- arch/arm64/kvm/arch_timer.c | 2 +- arch/arm64/kvm/fpsimd.c | 2 +- arch/arm64/kvm/hyp/nvhe/host.S | 2 +- arch/arm64/kvm/hyp/nvhe/mm.c | 4 ++-- arch/arm64/kvm/inject_fault.c | 2 +- arch/arm64/kvm/vgic/vgic-init.c | 2 +- arch/arm64/kvm/vgic/vgic-its.c | 4 ++-- 8 files changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 145ce73fc16c..3e2a1ac0c9bb 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -70,7 +70,7 @@ DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); /* * Without an __arch_swab32(), we fall back to ___constant_swab32(), but the * static inline can allow the compiler to out-of-line this. KVM always wants - * the macro version as its always inlined. + * the macro version as it's always inlined. */ #define __kvm_swab32(x) ___constant_swab32(x) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 9dec8c419bf4..879982b1cc73 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -745,7 +745,7 @@ static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu, WARN_ON_ONCE(ret); /* - * The virtual offset behaviour is "interresting", as it + * The virtual offset behaviour is "interesting", as it * always applies when HCR_EL2.E2H==0, but only when * accessed from EL1 when HCR_EL2.E2H==1. So make sure we * track E2H when putting the HV timer in "direct" mode. diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 8c1d0d4853df..571cf6eef1e1 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -117,7 +117,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) } /* - * Called just before entering the guest once we are no longer preemptable + * Called just before entering the guest once we are no longer preemptible * and interrupts are disabled. If we have managed to run anything using * FP while we were preemptible (such as off the back of an interrupt), * then neither the host nor the guest own the FP hardware (and it was the diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index 7693a6757cd7..135cfb294ee5 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -110,7 +110,7 @@ SYM_FUNC_END(__host_enter) * u64 elr, u64 par); */ SYM_FUNC_START(__hyp_do_panic) - /* Prepare and exit to the host's panic funciton. */ + /* Prepare and exit to the host's panic function. */ mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ PSR_MODE_EL1h) msr spsr_el2, lr diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c index b01a3d1078a8..8850b591d775 100644 --- a/arch/arm64/kvm/hyp/nvhe/mm.c +++ b/arch/arm64/kvm/hyp/nvhe/mm.c @@ -155,7 +155,7 @@ int hyp_back_vmemmap(phys_addr_t back) start = hyp_memory[i].base; start = ALIGN_DOWN((u64)hyp_phys_to_page(start), PAGE_SIZE); /* - * The begining of the hyp_vmemmap region for the current + * The beginning of the hyp_vmemmap region for the current * memblock may already be backed by the page backing the end * the previous region, so avoid mapping it twice. */ @@ -408,7 +408,7 @@ static void *admit_host_page(void *arg) return pop_hyp_memcache(host_mc, hyp_phys_to_virt); } -/* Refill our local memcache by poping pages from the one provided by the host. */ +/* Refill our local memcache by popping pages from the one provided by the host. */ int refill_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages, struct kvm_hyp_memcache *host_mc) { diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 0bd93a5f21ce..a640e839848e 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -134,7 +134,7 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, u32 addr) if (vcpu_read_sys_reg(vcpu, TCR_EL1) & TTBCR_EAE) { fsr = DFSR_LPAE | DFSR_FSC_EXTABT_LPAE; } else { - /* no need to shuffle FS[4] into DFSR[10] as its 0 */ + /* no need to shuffle FS[4] into DFSR[10] as it's 0 */ fsr = DFSR_FSC_EXTABT_nLPAE; } diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index e949e1d0fd9f..df51157420e8 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -309,7 +309,7 @@ int vgic_init(struct kvm *kvm) vgic_lpi_translation_cache_init(kvm); /* - * If we have GICv4.1 enabled, unconditionnaly request enable the + * If we have GICv4.1 enabled, unconditionally request enable the * v4 support so that we get HW-accelerated vSGIs. Otherwise, only * enable it if we present a virtual ITS to the guest. */ diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index e2764d0ffa9f..c734180a5bcf 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -1342,8 +1342,8 @@ static int vgic_its_cmd_handle_inv(struct kvm *kvm, struct vgic_its *its, } /** - * vgic_its_invall - invalidate all LPIs targetting a given vcpu - * @vcpu: the vcpu for which the RD is targetted by an invalidation + * vgic_its_invall - invalidate all LPIs targeting a given vcpu + * @vcpu: the vcpu for which the RD is targeted by an invalidation * * Contrary to the INVALL command, this targets a RD instead of a * collection, and we don't need to hold the its_lock, since no ITS is -- cgit v1.2.3 From c034ec84e8795cf379bd47cc8871445f070a0110 Mon Sep 17 00:00:00 2001 From: Ankit Agrawal Date: Sat, 24 Feb 2024 20:35:43 +0530 Subject: KVM: arm64: Introduce new flag for non-cacheable IO memory Currently, KVM for ARM64 maps at stage 2 memory that is considered device (i.e. it is not RAM) with DEVICE_nGnRE memory attributes; this setting overrides (as per the ARM architecture [1]) any device MMIO mapping present at stage 1, resulting in a set-up whereby a guest operating system cannot determine device MMIO mapping memory attributes on its own but it is always overridden by the KVM stage 2 default. This set-up does not allow guest operating systems to select device memory attributes independently from KVM stage-2 mappings (refer to [1], "Combining stage 1 and stage 2 memory type attributes"), which turns out to be an issue in that guest operating systems (e.g. Linux) may request to map devices MMIO regions with memory attributes that guarantee better performance (e.g. gathering attribute - that for some devices can generate larger PCIe memory writes TLPs) and specific operations (e.g. unaligned transactions) such as the NormalNC memory type. The default device stage 2 mapping was chosen in KVM for ARM64 since it was considered safer (i.e. it would not allow guests to trigger uncontained failures ultimately crashing the machine) but this turned out to be asynchronous (SError) defeating the purpose. Failures containability is a property of the platform and is independent from the memory type used for MMIO device memory mappings. Actually, DEVICE_nGnRE memory type is even more problematic than Normal-NC memory type in terms of faults containability in that e.g. aborts triggered on DEVICE_nGnRE loads cannot be made, architecturally, synchronous (i.e. that would imply that the processor should issue at most 1 load transaction at a time - it cannot pipeline them - otherwise the synchronous abort semantics would break the no-speculation attribute attached to DEVICE_XXX memory). This means that regardless of the combined stage1+stage2 mappings a platform is safe if and only if device transactions cannot trigger uncontained failures and that in turn relies on platform capabilities and the device type being assigned (i.e. PCIe AER/DPC error containment and RAS architecture[3]); therefore the default KVM device stage 2 memory attributes play no role in making device assignment safer for a given platform (if the platform design adheres to design guidelines outlined in [3]) and therefore can be relaxed. For all these reasons, relax the KVM stage 2 device memory attributes from DEVICE_nGnRE to Normal-NC. The NormalNC was chosen over a different Normal memory type default at stage-2 (e.g. Normal Write-through) to avoid cache allocation/snooping. Relaxing S2 KVM device MMIO mappings to Normal-NC is not expected to trigger any issue on guest device reclaim use cases either (i.e. device MMIO unmap followed by a device reset) at least for PCIe devices, in that in PCIe a device reset is architected and carried out through PCI config space transactions that are naturally ordered with respect to MMIO transactions according to the PCI ordering rules. Having Normal-NC S2 default puts guests in control (thanks to stage1+stage2 combined memory attributes rules [1]) of device MMIO regions memory mappings, according to the rules described in [1] and summarized here ([(S1) - stage1], [(S2) - stage 2]): S1 | S2 | Result NORMAL-WB | NORMAL-NC | NORMAL-NC NORMAL-WT | NORMAL-NC | NORMAL-NC NORMAL-NC | NORMAL-NC | NORMAL-NC DEVICE | NORMAL-NC | DEVICE It is worth noting that currently, to map devices MMIO space to user space in a device pass-through use case the VFIO framework applies memory attributes derived from pgprot_noncached() settings applied to VMAs, which result in device-nGnRnE memory attributes for the stage-1 VMM mappings. This means that a userspace mapping for device MMIO space carried out with the current VFIO framework and a guest OS mapping for the same MMIO space may result in a mismatched alias as described in [2]. Defaulting KVM device stage-2 mappings to Normal-NC attributes does not change anything in this respect, in that the mismatched aliases would only affect (refer to [2] for a detailed explanation) ordering between the userspace and GuestOS mappings resulting stream of transactions (i.e. it does not cause loss of property for either stream of transactions on its own), which is harmless given that the userspace and GuestOS access to the device is carried out through independent transactions streams. A Normal-NC flag is not present today. So add a new kvm_pgtable_prot (KVM_PGTABLE_PROT_NORMAL_NC) flag for it, along with its corresponding PTE value 0x5 (0b101) determined from [1]. Lastly, adapt the stage2 PTE property setter function (stage2_set_prot_attr) to handle the NormalNC attribute. The entire discussion leading to this patch series may be followed through the following links. Link: https://lore.kernel.org/all/20230907181459.18145-3-ankita@nvidia.com Link: https://lore.kernel.org/r/20231205033015.10044-1-ankita@nvidia.com [1] section D8.5.5 - DDI0487J_a_a-profile_architecture_reference_manual.pdf [2] section B2.8 - DDI0487J_a_a-profile_architecture_reference_manual.pdf [3] sections 1.7.7.3/1.8.5.2/appendix C - DEN0029H_SBSA_7.1.pdf Suggested-by: Jason Gunthorpe Acked-by: Catalin Marinas Acked-by: Will Deacon Reviewed-by: Marc Zyngier Signed-off-by: Ankit Agrawal Link: https://lore.kernel.org/r/20240224150546.368-2-ankita@nvidia.com Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_pgtable.h | 2 ++ arch/arm64/include/asm/memory.h | 2 ++ arch/arm64/kvm/hyp/pgtable.c | 24 +++++++++++++++++++----- 3 files changed, 23 insertions(+), 5 deletions(-) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index cfdf40f734b1..19278dfe7978 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -197,6 +197,7 @@ enum kvm_pgtable_stage2_flags { * @KVM_PGTABLE_PROT_W: Write permission. * @KVM_PGTABLE_PROT_R: Read permission. * @KVM_PGTABLE_PROT_DEVICE: Device attributes. + * @KVM_PGTABLE_PROT_NORMAL_NC: Normal noncacheable attributes. * @KVM_PGTABLE_PROT_SW0: Software bit 0. * @KVM_PGTABLE_PROT_SW1: Software bit 1. * @KVM_PGTABLE_PROT_SW2: Software bit 2. @@ -208,6 +209,7 @@ enum kvm_pgtable_prot { KVM_PGTABLE_PROT_R = BIT(2), KVM_PGTABLE_PROT_DEVICE = BIT(3), + KVM_PGTABLE_PROT_NORMAL_NC = BIT(4), KVM_PGTABLE_PROT_SW0 = BIT(55), KVM_PGTABLE_PROT_SW1 = BIT(56), diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index d82305ab420f..449ca2ff1df6 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -173,6 +173,7 @@ * Memory types for Stage-2 translation */ #define MT_S2_NORMAL 0xf +#define MT_S2_NORMAL_NC 0x5 #define MT_S2_DEVICE_nGnRE 0x1 /* @@ -180,6 +181,7 @@ * Stage-2 enforces Normal-WB and Device-nGnRE */ #define MT_S2_FWB_NORMAL 6 +#define MT_S2_FWB_NORMAL_NC 5 #define MT_S2_FWB_DEVICE_nGnRE 1 #ifdef CONFIG_ARM64_4K_PAGES diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index c651df904fe3..e2982a8922c3 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -717,15 +717,29 @@ void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot, kvm_pte_t *ptep) { - bool device = prot & KVM_PGTABLE_PROT_DEVICE; - kvm_pte_t attr = device ? KVM_S2_MEMATTR(pgt, DEVICE_nGnRE) : - KVM_S2_MEMATTR(pgt, NORMAL); + kvm_pte_t attr; u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS; + switch (prot & (KVM_PGTABLE_PROT_DEVICE | + KVM_PGTABLE_PROT_NORMAL_NC)) { + case KVM_PGTABLE_PROT_DEVICE | KVM_PGTABLE_PROT_NORMAL_NC: + return -EINVAL; + case KVM_PGTABLE_PROT_DEVICE: + if (prot & KVM_PGTABLE_PROT_X) + return -EINVAL; + attr = KVM_S2_MEMATTR(pgt, DEVICE_nGnRE); + break; + case KVM_PGTABLE_PROT_NORMAL_NC: + if (prot & KVM_PGTABLE_PROT_X) + return -EINVAL; + attr = KVM_S2_MEMATTR(pgt, NORMAL_NC); + break; + default: + attr = KVM_S2_MEMATTR(pgt, NORMAL); + } + if (!(prot & KVM_PGTABLE_PROT_X)) attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN; - else if (device) - return -EINVAL; if (prot & KVM_PGTABLE_PROT_R) attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R; -- cgit v1.2.3 From 5c1ebe9ada19f95e5582f80b37e0b5003dc79ddb Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 27 Feb 2024 09:41:15 +0000 Subject: KVM: arm64: Don't initialize idreg debugfs w/ preemption disabled Testing KVM with DEBUG_ATOMIC_SLEEP enabled doesn't get far before hitting the first splat: BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1578 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 13062, name: vgic_lpi_stress preempt_count: 1, expected: 0 2 locks held by vgic_lpi_stress/13062: #0: ffff080084553240 (&vcpu->mutex){+.+.}-{3:3}, at: kvm_vcpu_ioctl+0xc0/0x13f0 #1: ffff800080485f08 (&kvm->arch.config_lock){+.+.}-{3:3}, at: kvm_arch_vcpu_ioctl+0xd60/0x1788 CPU: 19 PID: 13062 Comm: vgic_lpi_stress Tainted: G W O 6.8.0-dbg-DEV #1 Call trace: dump_backtrace+0xf8/0x148 show_stack+0x20/0x38 dump_stack_lvl+0xb4/0xf8 dump_stack+0x18/0x40 __might_resched+0x248/0x2a0 __might_sleep+0x50/0x88 down_write+0x30/0x150 start_creating+0x90/0x1a0 __debugfs_create_file+0x5c/0x1b0 debugfs_create_file+0x34/0x48 kvm_reset_sys_regs+0x120/0x1e8 kvm_reset_vcpu+0x148/0x270 kvm_arch_vcpu_ioctl+0xddc/0x1788 kvm_vcpu_ioctl+0xb6c/0x13f0 __arm64_sys_ioctl+0x98/0xd8 invoke_syscall+0x48/0x108 el0_svc_common+0xb4/0xf0 do_el0_svc+0x24/0x38 el0_svc+0x54/0x128 el0t_64_sync_handler+0x68/0xc0 el0t_64_sync+0x1a8/0x1b0 kvm_reset_vcpu() disables preemption as it needs to unload vCPU state from the CPU to twiddle with it, which subsequently explodes when taking the parent inode's rwsem while creating the idreg debugfs file. Fix it by moving the initialization to kvm_arch_create_vm_debugfs(). Fixes: 891766581dea ("KVM: arm64: Add debugfs file for guest's ID registers") Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20240227094115.1723330-3-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/arm.c | 5 +++++ arch/arm64/kvm/sys_regs.c | 13 ++++++++----- 3 files changed, 14 insertions(+), 5 deletions(-) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 181fef12e8e8..6883963bbc3a 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1102,6 +1102,7 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu); int kvm_handle_sys_reg(struct kvm_vcpu *vcpu); int kvm_handle_cp10_id(struct kvm_vcpu *vcpu); +void kvm_sys_regs_create_debugfs(struct kvm *kvm); void kvm_reset_sys_regs(struct kvm_vcpu *vcpu); int __init kvm_sys_reg_table_init(void); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 9f806c9b7d5d..9b04dbd3aef1 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -190,6 +190,11 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } +int kvm_arch_create_vm_debugfs(struct kvm *kvm) +{ + kvm_sys_regs_create_debugfs(kvm); + return 0; +} /** * kvm_arch_destroy_vm - destroy the VM data structure diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index e9319d90d66f..5d2b13953141 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -3499,6 +3499,14 @@ static const struct seq_operations idregs_debug_sops = { DEFINE_SEQ_ATTRIBUTE(idregs_debug); +void kvm_sys_regs_create_debugfs(struct kvm *kvm) +{ + kvm->arch.idreg_debugfs_iter = ~0; + + debugfs_create_file("idregs", 0444, kvm->debugfs_dentry, kvm, + &idregs_debug_fops); +} + static void kvm_reset_id_regs(struct kvm_vcpu *vcpu) { const struct sys_reg_desc *idreg = first_idreg; @@ -3518,11 +3526,6 @@ static void kvm_reset_id_regs(struct kvm_vcpu *vcpu) id = reg_to_encoding(idreg); } - kvm->arch.idreg_debugfs_iter = ~0; - - debugfs_create_file("idregs", 0444, kvm->debugfs_dentry, kvm, - &idregs_debug_fops); - set_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags); } -- cgit v1.2.3