From f5fe0adeed6019df495497a64cb57d563ead2296 Mon Sep 17 00:00:00 2001 From: Wujie Duan Date: Mon, 18 Mar 2024 17:47:35 +0800 Subject: KVM: arm64: Fix out-of-IPA space translation fault handling Commit 11e5ea5242e3 ("KVM: arm64: Use helpers to classify exception types reported via ESR") tried to abstract the translation fault check when handling an out-of IPA space condition, but incorrectly replaced it with a permission fault check. Restore the previous translation fault check. Fixes: 11e5ea5242e3 ("KVM: arm64: Use helpers to classify exception types reported via ESR") Acked-by: Ard Biesheuvel Reviewed-by: Marc Zyngier Signed-off-by: Wujie Duan Cc: stable@vger.kernel.org Link: https://lore.kernel.org/kvmarm/864jd3269g.wl-maz@kernel.org/ Signed-off-by: Oliver Upton --- arch/arm64/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 18680771cdb0..dc04bc767865 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1637,7 +1637,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); is_iabt = kvm_vcpu_trap_is_iabt(vcpu); - if (esr_fsc_is_permission_fault(esr)) { + if (esr_fsc_is_translation_fault(esr)) { /* Beyond sanitised PARange (which is the IPA limit) */ if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) { kvm_inject_size_fault(vcpu); -- cgit v1.2.3 From f62d4c3eb687d87b616b4279acec7862553bda77 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 27 Mar 2024 12:48:50 +0000 Subject: KVM: arm64: Don't defer TLB invalidation when zapping table entries Commit 7657ea920c54 ("KVM: arm64: Use TLBI range-based instructions for unmap") introduced deferred TLB invalidation for the stage-2 page-table so that range-based invalidation can be used for the accumulated addresses. This works fine if the structure of the page-tables remains unchanged, but if entire tables are zapped and subsequently freed then we transiently leave the hardware page-table walker with a reference to freed memory thanks to the translation walk caches. For example, stage2_unmap_walker() will free page-table pages: if (childp) mm_ops->put_page(childp); and issue the TLB invalidation later in kvm_pgtable_stage2_unmap(): if (stage2_unmap_defer_tlb_flush(pgt)) /* Perform the deferred TLB invalidations */ kvm_tlb_flush_vmid_range(pgt->mmu, addr, size); For now, take the conservative approach and invalidate the TLB eagerly when we clear a table entry. Note, however, that the existing level hint passed to __kvm_tlb_flush_vmid_ipa() is incorrect and will be fixed in a subsequent patch. Cc: Raghavendra Rao Ananta Cc: Shaoqin Huang Cc: Marc Zyngier Cc: Oliver Upton Signed-off-by: Will Deacon Reviewed-by: Shaoqin Huang Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20240327124853.11206-2-will@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/pgtable.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 3fae5830f8d2..de0b667ba296 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -896,9 +896,11 @@ static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx, if (kvm_pte_valid(ctx->old)) { kvm_clear_pte(ctx->ptep); - if (!stage2_unmap_defer_tlb_flush(pgt)) + if (!stage2_unmap_defer_tlb_flush(pgt) || + kvm_pte_table(ctx->old, ctx->level)) { kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level); + } } mm_ops->put_page(ctx->ptep); -- cgit v1.2.3 From 36e008323926036650299cfbb2dca704c7aba849 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 27 Mar 2024 12:48:51 +0000 Subject: KVM: arm64: Don't pass a TLBI level hint when zapping table entries The TLBI level hints are for leaf entries only, so take care not to pass them incorrectly after clearing a table entry. Cc: Gavin Shan Cc: Marc Zyngier Cc: Quentin Perret Fixes: 82bb02445de5 ("KVM: arm64: Implement kvm_pgtable_hyp_unmap() at EL2") Fixes: 6d9d2115c480 ("KVM: arm64: Add support for stage-2 map()/unmap() in generic page-table") Signed-off-by: Will Deacon Reviewed-by: Shaoqin Huang Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20240327124853.11206-3-will@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/pgtable.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index de0b667ba296..a40dafc43bb6 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -528,7 +528,7 @@ static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, kvm_clear_pte(ctx->ptep); dsb(ishst); - __tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), ctx->level); + __tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), TLBI_TTL_UNKNOWN); } else { if (ctx->end - ctx->addr < granule) return -EINVAL; @@ -896,10 +896,12 @@ static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx, if (kvm_pte_valid(ctx->old)) { kvm_clear_pte(ctx->ptep); - if (!stage2_unmap_defer_tlb_flush(pgt) || - kvm_pte_table(ctx->old, ctx->level)) { - kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, - ctx->addr, ctx->level); + if (kvm_pte_table(ctx->old, ctx->level)) { + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, + TLBI_TTL_UNKNOWN); + } else if (!stage2_unmap_defer_tlb_flush(pgt)) { + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, + ctx->level); } } -- cgit v1.2.3 From 0f0ff097bf77663b8d2692e33d56119947611bb0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 27 Mar 2024 12:48:52 +0000 Subject: KVM: arm64: Use TLBI_TTL_UNKNOWN in __kvm_tlb_flush_vmid_range() Commit c910f2b65518 ("arm64/mm: Update tlb invalidation routines for FEAT_LPA2") updated the __tlbi_level() macro to take the target level as an argument, with TLBI_TTL_UNKNOWN (rather than 0) indicating that the caller cannot provide level information. Unfortunately, the two implementations of __kvm_tlb_flush_vmid_range() were not updated and so now ask for an level 0 invalidation if FEAT_LPA2 is implemented. Fix the problem by passing TLBI_TTL_UNKNOWN instead of 0 as the level argument to __flush_s2_tlb_range_op() in __kvm_tlb_flush_vmid_range(). Cc: Catalin Marinas Cc: Oliver Upton Cc: Marc Zyngier Reviewed-by: Ryan Roberts Fixes: c910f2b65518 ("arm64/mm: Update tlb invalidation routines for FEAT_LPA2") Signed-off-by: Will Deacon Reviewed-by: Shaoqin Huang Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20240327124853.11206-4-will@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/nvhe/tlb.c | 3 ++- arch/arm64/kvm/hyp/vhe/tlb.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index a60fb13e2192..2fc68da4036d 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -154,7 +154,8 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, /* Switch to requested VMID */ __tlb_switch_to_guest(mmu, &cxt, false); - __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0); + __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, + TLBI_TTL_UNKNOWN); dsb(ish); __tlbi(vmalle1is); diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c index b32e2940df7d..1a60b95381e8 100644 --- a/arch/arm64/kvm/hyp/vhe/tlb.c +++ b/arch/arm64/kvm/hyp/vhe/tlb.c @@ -171,7 +171,8 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, /* Switch to requested VMID */ __tlb_switch_to_guest(mmu, &cxt); - __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0); + __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, + TLBI_TTL_UNKNOWN); dsb(ish); __tlbi(vmalle1is); -- cgit v1.2.3 From 4c36a156738887c1edd78589fe192d757989bcde Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 27 Mar 2024 12:48:53 +0000 Subject: KVM: arm64: Ensure target address is granule-aligned for range TLBI When zapping a table entry in stage2_try_break_pte(), we issue range TLB invalidation for the region that was mapped by the table. However, we neglect to align the base address down to the granule size and so if we ended up reaching the table entry via a misaligned address then we will accidentally skip invalidation for some prefix of the affected address range. Align 'ctx->addr' down to the granule size when performing TLB invalidation for an unmapped table in stage2_try_break_pte(). Cc: Raghavendra Rao Ananta Cc: Gavin Shan Cc: Shaoqin Huang Cc: Quentin Perret Fixes: defc8cc7abf0 ("KVM: arm64: Invalidate the table entries upon a range") Signed-off-by: Will Deacon Reviewed-by: Shaoqin Huang Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20240327124853.11206-5-will@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/pgtable.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index a40dafc43bb6..5a59ef88b646 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -843,12 +843,15 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx, * Perform the appropriate TLB invalidation based on the * evicted pte value (if any). */ - if (kvm_pte_table(ctx->old, ctx->level)) - kvm_tlb_flush_vmid_range(mmu, ctx->addr, - kvm_granule_size(ctx->level)); - else if (kvm_pte_valid(ctx->old)) + if (kvm_pte_table(ctx->old, ctx->level)) { + u64 size = kvm_granule_size(ctx->level); + u64 addr = ALIGN_DOWN(ctx->addr, size); + + kvm_tlb_flush_vmid_range(mmu, addr, size); + } else if (kvm_pte_valid(ctx->old)) { kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level); + } } if (stage2_pte_is_counted(ctx->old)) -- cgit v1.2.3 From b3320142f3db9b3f2a23460abd3e22292e1530a5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 21 Mar 2024 11:54:14 +0000 Subject: arm64: Fix early handling of FEAT_E2H0 not being implemented Commit 3944382fa6f2 introduced checks for the FEAT_E2H0 not being implemented. However, the check is absolutely wrong and makes a point it testing a bit that is guaranteed to be zero. On top of that, the detection happens way too late, after the init_el2_state has done its job. This went undetected because the HW this was tested on has E2H being RAO/WI, and not RES1. However, the bug shows up when run as a nested guest, where HCR_EL2.E2H is not necessarily set to 1. As a result, booting the kernel in hVHE mode fails with timer accesses being cought in a trap loop (which was fun to debug). Fix the check for ID_AA64MMFR4_EL1.E2H0, and set the HCR_EL2.E2H bit early so that it can be checked by the rest of the init sequence. With this, hVHE works again in a NV environment that doesn't have FEAT_E2H0. Fixes: 3944382fa6f2 ("arm64: Treat HCR_EL2.E2H as RES1 when ID_AA64MMFR4_EL1.E2H0 is negative") Signed-off-by: Marc Zyngier Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20240321115414.3169115-1-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kernel/head.S | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index ce08b744aaab..06234c3a15f3 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -291,6 +291,21 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) blr x2 0: mov_q x0, HCR_HOST_NVHE_FLAGS + + /* + * Compliant CPUs advertise their VHE-onlyness with + * ID_AA64MMFR4_EL1.E2H0 < 0. HCR_EL2.E2H can be + * RES1 in that case. Publish the E2H bit early so that + * it can be picked up by the init_el2_state macro. + * + * Fruity CPUs seem to have HCR_EL2.E2H set to RAO/WI, but + * don't advertise it (they predate this relaxation). + */ + mrs_s x1, SYS_ID_AA64MMFR4_EL1 + tbz x1, #(ID_AA64MMFR4_EL1_E2H0_SHIFT + ID_AA64MMFR4_EL1_E2H0_WIDTH - 1), 1f + + orr x0, x0, #HCR_E2H +1: msr hcr_el2, x0 isb @@ -303,22 +318,10 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) mov_q x1, INIT_SCTLR_EL1_MMU_OFF - /* - * Compliant CPUs advertise their VHE-onlyness with - * ID_AA64MMFR4_EL1.E2H0 < 0. HCR_EL2.E2H can be - * RES1 in that case. - * - * Fruity CPUs seem to have HCR_EL2.E2H set to RES1, but - * don't advertise it (they predate this relaxation). - */ - mrs_s x0, SYS_ID_AA64MMFR4_EL1 - ubfx x0, x0, #ID_AA64MMFR4_EL1_E2H0_SHIFT, #ID_AA64MMFR4_EL1_E2H0_WIDTH - tbnz x0, #(ID_AA64MMFR4_EL1_E2H0_SHIFT + ID_AA64MMFR4_EL1_E2H0_WIDTH - 1), 1f - mrs x0, hcr_el2 and x0, x0, #HCR_E2H cbz x0, 2f -1: + /* Set a sane SCTLR_EL1, the VHE way */ pre_disable_mmu_workaround msr_s SYS_SCTLR_EL12, x1 -- cgit v1.2.3 From d96c66ab9fb3ad8b243669cf6b41e68d0f7f9ecd Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 21 Mar 2024 17:37:06 +0000 Subject: KVM: arm64: Rationalise KVM banner output We are not very consistent when it comes to displaying which mode we're in (VHE, {n,h}VHE, protected or not). For example, booting in protected mode with hVHE results in: [ 0.969545] kvm [1]: Protected nVHE mode initialized successfully which is mildly amusing considering that the machine is VHE only. We already cleaned this up a bit with commit 1f3ca7023fe6 ("KVM: arm64: print Hyp mode"), but that's still unsatisfactory. Unify the three strings into one and use a mess of conditional statements to sort it out (yes, it's a slow day). Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240321173706.3280796-1-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/arm.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 3dee5490eea9..c4a0a35e02c7 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2597,14 +2597,11 @@ static __init int kvm_arm_init(void) if (err) goto out_hyp; - if (is_protected_kvm_enabled()) { - kvm_info("Protected nVHE mode initialized successfully\n"); - } else if (in_hyp_mode) { - kvm_info("VHE mode initialized successfully\n"); - } else { - char mode = cpus_have_final_cap(ARM64_KVM_HVHE) ? 'h' : 'n'; - kvm_info("Hyp mode (%cVHE) initialized successfully\n", mode); - } + kvm_info("%s%sVHE mode initialized successfully\n", + in_hyp_mode ? "" : (is_protected_kvm_enabled() ? + "Protected " : "Hyp "), + in_hyp_mode ? "" : (cpus_have_final_cap(ARM64_KVM_HVHE) ? + "h" : "n")); /* * FIXME: Do something reasonable if kvm_init() fails after pKVM -- cgit v1.2.3