From 5a169bf04cd2bfdbac967d12eb5b70915b29d7ee Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 19 Oct 2020 15:55:56 +0100 Subject: x86/kvm: Reserve KVM_FEATURE_MSI_EXT_DEST_ID No functional change; just reserve the feature bit for now so that VMMs can start to implement it. This will allow the host to indicate that MSI emulation supports 15-bit destination IDs, allowing up to 32768 CPUs without interrupt remapping. cf. https://patchwork.kernel.org/patch/11816693/ for qemu Signed-off-by: David Woodhouse Acked-by: Paolo Bonzini Message-Id: <4cd59bed05f4b7410d3d1ffd1e997ab53683874d.camel@infradead.org> Signed-off-by: Paolo Bonzini --- arch/x86/include/uapi/asm/kvm_para.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 812e9b4c1114..950afebfba88 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -32,6 +32,7 @@ #define KVM_FEATURE_POLL_CONTROL 12 #define KVM_FEATURE_PV_SCHED_YIELD 13 #define KVM_FEATURE_ASYNC_PF_INT 14 +#define KVM_FEATURE_MSI_EXT_DEST_ID 15 #define KVM_HINTS_REALTIME 0 -- cgit v1.2.3 From 28e81c6270b3d0e9faadd565f314ad9ac8256620 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 26 Oct 2020 09:51:09 +0000 Subject: KVM: arm64: Don't corrupt tpidr_el2 on failed HVC call The hyp-init code starts by stashing a register in TPIDR_EL2 in in order to free a register. This happens no matter if the HVC call is legal or not. Although nothing wrong seems to come out of it, it feels odd to alter the EL2 state for something that eventually returns an error. Instead, use the fact that we know exactly which bits of the __kvm_hyp_init call are non-zero to perform the check with a series of EOR/ROR instructions, combined with a build-time check that the value is the one we expect. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201026095116.72051-2-maz@kernel.org --- arch/arm64/kvm/hyp/nvhe/hyp-init.S | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index 47224dc62c51..b11a9d7db677 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -57,16 +57,25 @@ __do_hyp_init: cmp x0, #HVC_STUB_HCALL_NR b.lo __kvm_handle_stub_hvc - /* Set tpidr_el2 for use by HYP to free a register */ - msr tpidr_el2, x2 - - mov x2, #KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) - cmp x0, x2 - b.eq 1f + // We only actively check bits [24:31], and everything + // else has to be zero, which we check at build time. +#if (KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) & 0xFFFFFFFF00FFFFFF) +#error Unexpected __KVM_HOST_SMCCC_FUNC___kvm_hyp_init value +#endif + + ror x0, x0, #24 + eor x0, x0, #((KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) >> 24) & 0xF) + ror x0, x0, #4 + eor x0, x0, #((KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) >> 28) & 0xF) + cbz x0, 1f mov x0, #SMCCC_RET_NOT_SUPPORTED eret -1: phys_to_ttbr x0, x1 +1: + /* Set tpidr_el2 for use by HYP to free a register */ + msr tpidr_el2, x2 + + phys_to_ttbr x0, x1 alternative_if ARM64_HAS_CNP orr x0, x0, #TTBR_CNP_BIT alternative_else_nop_endif -- cgit v1.2.3 From b6d6db4de86f5b1be9bbe59c73302538301915c7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 26 Oct 2020 09:51:10 +0000 Subject: KVM: arm64: Remove leftover kern_hyp_va() in nVHE TLB invalidation The new calling convention says that pointers coming from the SMCCC interface are turned into their HYP version in the host HVC handler. However, there is still a stray kern_hyp_va() in the TLB invalidation code, which could result in a corrupted pointer. Drop the spurious conversion. Fixes: a071261d9318 ("KVM: arm64: nVHE: Fix pointers during SMCCC convertion") Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201026095116.72051-3-maz@kernel.org --- arch/arm64/kvm/hyp/nvhe/tlb.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index 544bca3072b7..ad212d8fa417 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -121,7 +121,6 @@ void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu) struct tlb_inv_context cxt; /* Switch to requested VMID */ - mmu = kern_hyp_va(mmu); __tlb_switch_to_guest(mmu, &cxt); __tlbi(vmalle1); -- cgit v1.2.3 From d2782505fbe3055f983c772a448ac5cb419f9df7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 26 Oct 2020 09:51:11 +0000 Subject: KVM: arm64: Drop useless PAN setting on host EL1 to EL2 transition Setting PSTATE.PAN when entering EL2 on nVHE doesn't make much sense as this bit only means something for translation regimes that include EL0. This obviously isn't the case in the nVHE case, so let's drop this setting. Signed-off-by: Marc Zyngier Reviewed-by: Vladimir Murzin Link: https://lore.kernel.org/r/20201026095116.72051-4-maz@kernel.org --- arch/arm64/kvm/hyp/nvhe/host.S | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index ff9a0f547b9f..ed27f06a31ba 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -17,8 +17,6 @@ SYM_FUNC_START(__host_exit) get_host_ctxt x0, x1 - ALTERNATIVE(nop, SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN) - /* Store the host regs x2 and x3 */ stp x2, x3, [x0, #CPU_XREG_OFFSET(2)] -- cgit v1.2.3 From 7efe8ef274024ef1d5c495c79dfcbbff38c5f366 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 26 Oct 2020 14:44:23 +0000 Subject: KVM: arm64: Allocate stage-2 pgd pages with GFP_KERNEL_ACCOUNT For consistency with the rest of the stage-2 page-table page allocations (performing using a kvm_mmu_memory_cache), ensure that __GFP_ACCOUNT is included in the GFP flags for the PGD pages. Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Reviewed-by: Gavin Shan Cc: Marc Zyngier Cc: Quentin Perret Link: https://lore.kernel.org/r/20201026144423.24683-1-will@kernel.org --- arch/arm64/kvm/hyp/pgtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 0cdf6e461cbd..95141b0d6088 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -846,7 +846,7 @@ int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm) u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE; - pgt->pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL | __GFP_ZERO); + pgt->pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT | __GFP_ZERO); if (!pgt->pgd) return -ENOMEM; -- cgit v1.2.3 From 4a1c2c7f63c52ccb11770b5ae25920a6b79d3548 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 29 Oct 2020 17:24:09 +0000 Subject: KVM: arm64: Fix AArch32 handling of DBGD{CCINT,SCRext} and DBGVCR The DBGD{CCINT,SCRext} and DBGVCR register entries in the cp14 array are missing their target register, resulting in all accesses being targetted at the guard sysreg (indexed by __INVALID_SYSREG__). Point the emulation code at the actual register entries. Fixes: bdfb4b389c8d ("arm64: KVM: add trap handlers for AArch32 debug registers") Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20201029172409.2768336-1-maz@kernel.org --- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/sys_regs.c | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 0aecbab6a7fb..781d029b8aa8 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -239,6 +239,7 @@ enum vcpu_sysreg { #define cp14_DBGWCR0 (DBGWCR0_EL1 * 2) #define cp14_DBGWVR0 (DBGWVR0_EL1 * 2) #define cp14_DBGDCCINT (MDCCINT_EL1 * 2) +#define cp14_DBGVCR (DBGVCR32_EL2 * 2) #define NR_COPRO_REGS (NR_SYS_REGS * 2) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 3c203cb8c103..983994f01a63 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1881,9 +1881,9 @@ static const struct sys_reg_desc cp14_regs[] = { { Op1( 0), CRn( 0), CRm( 1), Op2( 0), trap_raz_wi }, DBG_BCR_BVR_WCR_WVR(1), /* DBGDCCINT */ - { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32 }, + { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32, NULL, cp14_DBGDCCINT }, /* DBGDSCRext */ - { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32 }, + { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32, NULL, cp14_DBGDSCRext }, DBG_BCR_BVR_WCR_WVR(2), /* DBGDTR[RT]Xint */ { Op1( 0), CRn( 0), CRm( 3), Op2( 0), trap_raz_wi }, @@ -1898,7 +1898,7 @@ static const struct sys_reg_desc cp14_regs[] = { { Op1( 0), CRn( 0), CRm( 6), Op2( 2), trap_raz_wi }, DBG_BCR_BVR_WCR_WVR(6), /* DBGVCR */ - { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32 }, + { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32, NULL, cp14_DBGVCR }, DBG_BCR_BVR_WCR_WVR(7), DBG_BCR_BVR_WCR_WVR(8), DBG_BCR_BVR_WCR_WVR(9), -- cgit v1.2.3 From e2fc6a9f686d037cbd9b08b9fb657685b4a722d3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 29 Oct 2020 14:47:16 +0000 Subject: KVM: arm64: Fix masks in stage2_pte_cacheable() stage2_pte_cacheable() tries to figure out whether the mapping installed in its 'pte' parameter is cacheable or not. Unfortunately, it fails miserably because it extracts the memory attributes from the entry using FIELD_GET(), which returns the attributes shifted down to bit 0, but then compares this with the unshifted value generated by the PAGE_S2_MEMATTR() macro. A direct consequence of this bug is that cache maintenance is silently skipped, which in turn causes 32-bit guests to crash early on when their set/way maintenance is trapped but not emulated correctly. Fix the broken masks by avoiding the use of FIELD_GET() altogether. Fixes: 6d9d2115c480 ("KVM: arm64: Add support for stage-2 map()/unmap() in generic page-table") Reported-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Cc: Quentin Perret Link: https://lore.kernel.org/r/20201029144716.30476-1-will@kernel.org --- arch/arm64/kvm/hyp/pgtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 95141b0d6088..0271b4a3b9fe 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -635,7 +635,7 @@ static void stage2_flush_dcache(void *addr, u64 size) static bool stage2_pte_cacheable(kvm_pte_t pte) { - u64 memattr = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR, pte); + u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR; return memattr == PAGE_S2_MEMATTR(NORMAL); } -- cgit v1.2.3 From 2f40c46021bbb3ecd5c5f05764ecccbc276bc690 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 26 Oct 2020 10:06:26 +1100 Subject: KVM: arm64: Use fallback mapping sizes for contiguous huge page sizes Although huge pages can be created out of multiple contiguous PMDs or PTEs, the corresponding sizes are not supported at Stage-2 yet. Instead of failing the mapping, fall back to the nearer supported mapping size (CONT_PMD to PMD and CONT_PTE to PTE respectively). Suggested-by: Marc Zyngier Signed-off-by: Gavin Shan [maz: rewritten commit message] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201025230626.18501-1-gshan@redhat.com --- arch/arm64/kvm/mmu.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index a816cb8e619b..e431d2d8e368 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -787,14 +787,26 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, vma_shift = PAGE_SHIFT; } - if (vma_shift == PUD_SHIFT && - !fault_supports_stage2_huge_mapping(memslot, hva, PUD_SIZE)) - vma_shift = PMD_SHIFT; - - if (vma_shift == PMD_SHIFT && - !fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) { - force_pte = true; + switch (vma_shift) { + case PUD_SHIFT: + if (fault_supports_stage2_huge_mapping(memslot, hva, PUD_SIZE)) + break; + fallthrough; + case CONT_PMD_SHIFT: + vma_shift = PMD_SHIFT; + fallthrough; + case PMD_SHIFT: + if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) + break; + fallthrough; + case CONT_PTE_SHIFT: vma_shift = PAGE_SHIFT; + force_pte = true; + fallthrough; + case PAGE_SHIFT: + break; + default: + WARN_ONCE(1, "Unknown vma_shift %d", vma_shift); } vma_pagesize = 1UL << vma_shift; -- cgit v1.2.3 From 91a2c34b7d6fadc9c5d9433c620ea4c32ee7cae8 Mon Sep 17 00:00:00 2001 From: Santosh Shukla Date: Mon, 26 Oct 2020 16:54:07 +0530 Subject: KVM: arm64: Force PTE mapping on fault resulting in a device mapping VFIO allows a device driver to resolve a fault by mapping a MMIO range. This can be subsequently result in user_mem_abort() to try and compute a huge mapping based on the MMIO pfn, which is a sure recipe for things to go wrong. Instead, force a PTE mapping when the pfn faulted in has a device mapping. Fixes: 6d674e28f642 ("KVM: arm/arm64: Properly handle faulting of device mappings") Suggested-by: Marc Zyngier Signed-off-by: Santosh Shukla [maz: rewritten commit message] Signed-off-by: Marc Zyngier Reviewed-by: Gavin Shan Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1603711447-11998-2-git-send-email-sashukla@nvidia.com --- arch/arm64/kvm/mmu.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index e431d2d8e368..c7c6df6309d5 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -851,6 +851,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (kvm_is_device_pfn(pfn)) { device = true; + force_pte = true; } else if (logging_active && !write_fault) { /* * Only actually map the page as writable if this was a write -- cgit v1.2.3 From e9a33caec90e05673e2f7fb7c80f172031964d25 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 26 Oct 2020 13:49:29 +0000 Subject: KVM: arm64: Factor out is_{vhe,nvhe}_hyp_code() Currently has_vhe() detects whether it is being compiled for VHE/NVHE hyp code based on preprocessor definitions, and uses this knowledge to avoid redundant runtime checks. There are other cases where we'd like to use this knowledge, so let's factor the preprocessor checks out into separate helpers. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Signed-off-by: Marc Zyngier Acked-by: Will Deacon Cc: David Brazdil Cc: Marc Zyngier Cc: Will Deacon Link: https://lore.kernel.org/r/20201026134931.28246-2-mark.rutland@arm.com --- arch/arm64/include/asm/virt.h | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 09977acc007d..300be14ba77b 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -83,16 +83,27 @@ static inline bool is_kernel_in_hyp_mode(void) return read_sysreg(CurrentEL) == CurrentEL_EL2; } +static __always_inline bool is_vhe_hyp_code(void) +{ + /* Only defined for code run in VHE hyp context */ + return __is_defined(__KVM_VHE_HYPERVISOR__); +} + +static __always_inline bool is_nvhe_hyp_code(void) +{ + /* Only defined for code run in NVHE hyp context */ + return __is_defined(__KVM_NVHE_HYPERVISOR__); +} + static __always_inline bool has_vhe(void) { /* - * The following macros are defined for code specic to VHE/nVHE. - * If has_vhe() is inlined into those compilation units, it can - * be determined statically. Otherwise fall back to caps. + * Code only run in VHE/NVHE hyp context can assume VHE is present or + * absent. Otherwise fall back to caps. */ - if (__is_defined(__KVM_VHE_HYPERVISOR__)) + if (is_vhe_hyp_code()) return true; - else if (__is_defined(__KVM_NVHE_HYPERVISOR__)) + else if (is_nvhe_hyp_code()) return false; else return cpus_have_final_cap(ARM64_HAS_VIRT_HOST_EXTN); -- cgit v1.2.3 From dfc4e3f08903ed8fe0b66cc25b64524a82654166 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 26 Oct 2020 13:49:30 +0000 Subject: arm64: cpufeature: reorder cpus_have_{const, final}_cap() In a subsequent patch we'll modify cpus_have_const_cap() to call cpus_have_final_cap(), and hence we need to define cpus_have_final_cap() first. To make subsequent changes easier to follow, this patch reorders the two without making any other changes. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Signed-off-by: Marc Zyngier Acked-by: Will Deacon Cc: David Brazdil Cc: Marc Zyngier Cc: Will Deacon Link: https://lore.kernel.org/r/20201026134931.28246-3-mark.rutland@arm.com --- arch/arm64/include/asm/cpufeature.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index fba6700b457b..9f671aa0419b 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -428,35 +428,35 @@ static __always_inline bool __cpus_have_const_cap(int num) } /* - * Test for a capability, possibly with a runtime check. + * Test for a capability without a runtime check. * - * Before capabilities are finalized, this behaves as cpus_have_cap(). + * Before capabilities are finalized, this will BUG(). * After capabilities are finalized, this is patched to avoid a runtime check. * * @num must be a compile-time constant. */ -static __always_inline bool cpus_have_const_cap(int num) +static __always_inline bool cpus_have_final_cap(int num) { if (system_capabilities_finalized()) return __cpus_have_const_cap(num); else - return cpus_have_cap(num); + BUG(); } /* - * Test for a capability without a runtime check. + * Test for a capability, possibly with a runtime check. * - * Before capabilities are finalized, this will BUG(). + * Before capabilities are finalized, this behaves as cpus_have_cap(). * After capabilities are finalized, this is patched to avoid a runtime check. * * @num must be a compile-time constant. */ -static __always_inline bool cpus_have_final_cap(int num) +static __always_inline bool cpus_have_const_cap(int num) { if (system_capabilities_finalized()) return __cpus_have_const_cap(num); else - BUG(); + return cpus_have_cap(num); } static inline void cpus_set_cap(unsigned int num) -- cgit v1.2.3 From d86de40decaa14e6613af1b2783bf4d589d0f38b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 26 Oct 2020 13:49:31 +0000 Subject: arm64: cpufeature: upgrade hyp caps to final We finalize caps before initializing kvm hyp code, and any use of cpus_have_const_cap() in kvm hyp code generates redundant and potentially unsound code to read the cpu_hwcaps array. A number of helper functions used in both hyp context and regular kernel context use cpus_have_const_cap(), as some regular kernel code runs before the capabilities are finalized. It's tedious and error-prone to write separate copies of these for hyp and non-hyp code. So that we can avoid the redundant code, let's automatically upgrade cpus_have_const_cap() to cpus_have_final_cap() when used in hyp context. With this change, there's never a reason to access to cpu_hwcaps array from hyp code, and we don't need to create an NVHE alias for this. This should have no effect on non-hyp code. Signed-off-by: Mark Rutland Signed-off-by: Marc Zyngier Acked-by: Will Deacon Cc: David Brazdil Cc: Marc Zyngier Cc: Will Deacon Link: https://lore.kernel.org/r/20201026134931.28246-4-mark.rutland@arm.com --- arch/arm64/include/asm/cpufeature.h | 26 ++++++++++++++++++++++++-- arch/arm64/include/asm/virt.h | 12 ------------ arch/arm64/kernel/image-vars.h | 1 - 3 files changed, 24 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 9f671aa0419b..79d6a0371c78 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -375,6 +375,23 @@ cpucap_multi_entry_cap_matches(const struct arm64_cpu_capabilities *entry, return false; } +static __always_inline bool is_vhe_hyp_code(void) +{ + /* Only defined for code run in VHE hyp context */ + return __is_defined(__KVM_VHE_HYPERVISOR__); +} + +static __always_inline bool is_nvhe_hyp_code(void) +{ + /* Only defined for code run in NVHE hyp context */ + return __is_defined(__KVM_NVHE_HYPERVISOR__); +} + +static __always_inline bool is_hyp_code(void) +{ + return is_vhe_hyp_code() || is_nvhe_hyp_code(); +} + extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS]; extern struct static_key_false arm64_const_caps_ready; @@ -444,8 +461,11 @@ static __always_inline bool cpus_have_final_cap(int num) } /* - * Test for a capability, possibly with a runtime check. + * Test for a capability, possibly with a runtime check for non-hyp code. * + * For hyp code, this behaves the same as cpus_have_final_cap(). + * + * For non-hyp code: * Before capabilities are finalized, this behaves as cpus_have_cap(). * After capabilities are finalized, this is patched to avoid a runtime check. * @@ -453,7 +473,9 @@ static __always_inline bool cpus_have_final_cap(int num) */ static __always_inline bool cpus_have_const_cap(int num) { - if (system_capabilities_finalized()) + if (is_hyp_code()) + return cpus_have_final_cap(num); + else if (system_capabilities_finalized()) return __cpus_have_const_cap(num); else return cpus_have_cap(num); diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 300be14ba77b..6069be50baf9 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -83,18 +83,6 @@ static inline bool is_kernel_in_hyp_mode(void) return read_sysreg(CurrentEL) == CurrentEL_EL2; } -static __always_inline bool is_vhe_hyp_code(void) -{ - /* Only defined for code run in VHE hyp context */ - return __is_defined(__KVM_VHE_HYPERVISOR__); -} - -static __always_inline bool is_nvhe_hyp_code(void) -{ - /* Only defined for code run in NVHE hyp context */ - return __is_defined(__KVM_NVHE_HYPERVISOR__); -} - static __always_inline bool has_vhe(void) { /* diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index fbd4b6b1fde5..ad8432251733 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -87,7 +87,6 @@ KVM_NVHE_ALIAS(__icache_flags); /* Kernel symbols needed for cpus_have_final/const_caps checks. */ KVM_NVHE_ALIAS(arm64_const_caps_ready); KVM_NVHE_ALIAS(cpu_hwcap_keys); -KVM_NVHE_ALIAS(cpu_hwcaps); /* Static keys which are set if a vGIC trap should be handled in hyp. */ KVM_NVHE_ALIAS(vgic_v2_cpuif_trap); -- cgit v1.2.3 From 22f553842b14a1289c088a79a67fb479d3fa2a4e Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Tue, 27 Oct 2020 21:51:13 +0000 Subject: KVM: arm64: Handle Asymmetric AArch32 systems On a system without uniform support for AArch32 at EL0, it is possible for the guest to force run AArch32 at EL0 and potentially cause an illegal exception if running on a core without AArch32. Add an extra check so that if we catch the guest doing that, then we prevent it from running again by resetting vcpu->arch.target and return ARM_EXCEPTION_IL. We try to catch this misbehaviour as early as possible and not rely on an illegal exception occuring to signal the problem. Attempting to run a 32bit app in the guest will produce an error from QEMU if the guest exits while running in AArch32 EL0. Tested on Juno by instrumenting the host to fake asym aarch32 and instrumenting KVM to make the asymmetry visible to the guest. [will: Incorporated feedback from Marc] Signed-off-by: Qais Yousef Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Cc: James Morse Cc: Marc Zyngier Link: https://lore.kernel.org/r/20201021104611.2744565-2-qais.yousef@arm.com Link: https://lore.kernel.org/r/20201027215118.27003-2-will@kernel.org --- arch/arm64/kvm/arm.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index f56122eedffc..a3b32df1afb0 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -808,6 +808,25 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) preempt_enable(); + /* + * The ARMv8 architecture doesn't give the hypervisor + * a mechanism to prevent a guest from dropping to AArch32 EL0 + * if implemented by the CPU. If we spot the guest in such + * state and that we decided it wasn't supposed to do so (like + * with the asymmetric AArch32 case), return to userspace with + * a fatal error. + */ + if (!system_supports_32bit_el0() && vcpu_mode_is_32bit(vcpu)) { + /* + * As we have caught the guest red-handed, decide that + * it isn't fit for purpose anymore by making the vcpu + * invalid. The VMM can try and fix it by issuing a + * KVM_ARM_VCPU_INIT if it really wants to. + */ + vcpu->arch.target = -1; + ret = ARM_EXCEPTION_IL; + } + ret = handle_exit(vcpu, ret); } -- cgit v1.2.3 From 8a967d655e406c8a63744a60b221071fad9a736b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 30 Oct 2020 13:39:55 -0400 Subject: KVM: x86: replace static const variables with macros Even though the compiler is able to replace static const variables with their value, it will warn about them being unused when Linux is built with W=1. Use good old macros instead, this is not C++. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 10 +++++----- arch/x86/kvm/mmu/spte.c | 16 ++++++++-------- arch/x86/kvm/mmu/spte.h | 16 ++++++++-------- 3 files changed, 21 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 17587f496ec7..1f96adff8dc4 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -225,7 +225,7 @@ static gfn_t get_mmio_spte_gfn(u64 spte) { u64 gpa = spte & shadow_nonpresent_or_rsvd_lower_gfn_mask; - gpa |= (spte >> shadow_nonpresent_or_rsvd_mask_len) + gpa |= (spte >> SHADOW_NONPRESENT_OR_RSVD_MASK_LEN) & shadow_nonpresent_or_rsvd_mask; return gpa >> PAGE_SHIFT; @@ -591,15 +591,15 @@ static u64 mmu_spte_get_lockless(u64 *sptep) static u64 restore_acc_track_spte(u64 spte) { u64 new_spte = spte; - u64 saved_bits = (spte >> shadow_acc_track_saved_bits_shift) - & shadow_acc_track_saved_bits_mask; + u64 saved_bits = (spte >> SHADOW_ACC_TRACK_SAVED_BITS_SHIFT) + & SHADOW_ACC_TRACK_SAVED_BITS_MASK; WARN_ON_ONCE(spte_ad_enabled(spte)); WARN_ON_ONCE(!is_access_track_spte(spte)); new_spte &= ~shadow_acc_track_mask; - new_spte &= ~(shadow_acc_track_saved_bits_mask << - shadow_acc_track_saved_bits_shift); + new_spte &= ~(SHADOW_ACC_TRACK_SAVED_BITS_MASK << + SHADOW_ACC_TRACK_SAVED_BITS_SHIFT); new_spte |= saved_bits; return new_spte; diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index d9c5665a55e9..fcac2cac78fe 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -55,7 +55,7 @@ u64 make_mmio_spte(struct kvm_vcpu *vcpu, u64 gfn, unsigned int access) mask |= shadow_mmio_value | access; mask |= gpa | shadow_nonpresent_or_rsvd_mask; mask |= (gpa & shadow_nonpresent_or_rsvd_mask) - << shadow_nonpresent_or_rsvd_mask_len; + << SHADOW_NONPRESENT_OR_RSVD_MASK_LEN; return mask; } @@ -231,12 +231,12 @@ u64 mark_spte_for_access_track(u64 spte) !spte_can_locklessly_be_made_writable(spte), "kvm: Writable SPTE is not locklessly dirty-trackable\n"); - WARN_ONCE(spte & (shadow_acc_track_saved_bits_mask << - shadow_acc_track_saved_bits_shift), + WARN_ONCE(spte & (SHADOW_ACC_TRACK_SAVED_BITS_MASK << + SHADOW_ACC_TRACK_SAVED_BITS_SHIFT), "kvm: Access Tracking saved bit locations are not zero\n"); - spte |= (spte & shadow_acc_track_saved_bits_mask) << - shadow_acc_track_saved_bits_shift; + spte |= (spte & SHADOW_ACC_TRACK_SAVED_BITS_MASK) << + SHADOW_ACC_TRACK_SAVED_BITS_SHIFT; spte &= ~shadow_acc_track_mask; return spte; @@ -245,7 +245,7 @@ u64 mark_spte_for_access_track(u64 spte) void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 access_mask) { BUG_ON((u64)(unsigned)access_mask != access_mask); - WARN_ON(mmio_value & (shadow_nonpresent_or_rsvd_mask << shadow_nonpresent_or_rsvd_mask_len)); + WARN_ON(mmio_value & (shadow_nonpresent_or_rsvd_mask << SHADOW_NONPRESENT_OR_RSVD_MASK_LEN)); WARN_ON(mmio_value & shadow_nonpresent_or_rsvd_lower_gfn_mask); shadow_mmio_value = mmio_value | SPTE_MMIO_MASK; shadow_mmio_access_mask = access_mask; @@ -306,9 +306,9 @@ void kvm_mmu_reset_all_pte_masks(void) low_phys_bits = boot_cpu_data.x86_phys_bits; if (boot_cpu_has_bug(X86_BUG_L1TF) && !WARN_ON_ONCE(boot_cpu_data.x86_cache_bits >= - 52 - shadow_nonpresent_or_rsvd_mask_len)) { + 52 - SHADOW_NONPRESENT_OR_RSVD_MASK_LEN)) { low_phys_bits = boot_cpu_data.x86_cache_bits - - shadow_nonpresent_or_rsvd_mask_len; + - SHADOW_NONPRESENT_OR_RSVD_MASK_LEN; shadow_nonpresent_or_rsvd_mask = rsvd_bits(low_phys_bits, boot_cpu_data.x86_cache_bits - 1); } diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index 4ecf40e0b8fe..5c75a451c000 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -104,20 +104,20 @@ extern u64 __read_mostly shadow_acc_track_mask; */ extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask; +/* + * The number of high-order 1 bits to use in the mask above. + */ +#define SHADOW_NONPRESENT_OR_RSVD_MASK_LEN 5 + /* * The mask/shift to use for saving the original R/X bits when marking the PTE * as not-present for access tracking purposes. We do not save the W bit as the * PTEs being access tracked also need to be dirty tracked, so the W bit will be * restored only when a write is attempted to the page. */ -static const u64 shadow_acc_track_saved_bits_mask = PT64_EPT_READABLE_MASK | - PT64_EPT_EXECUTABLE_MASK; -static const u64 shadow_acc_track_saved_bits_shift = PT64_SECOND_AVAIL_BITS_SHIFT; - -/* - * The number of high-order 1 bits to use in the mask above. - */ -static const u64 shadow_nonpresent_or_rsvd_mask_len = 5; +#define SHADOW_ACC_TRACK_SAVED_BITS_MASK (PT64_EPT_READABLE_MASK | \ + PT64_EPT_EXECUTABLE_MASK) +#define SHADOW_ACC_TRACK_SAVED_BITS_SHIFT PT64_SECOND_AVAIL_BITS_SHIFT /* * In some cases, we need to preserve the GFN of a non-present or reserved -- cgit v1.2.3 From d383b3146d805a743658225c8973f5d38c6fedf4 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 30 Oct 2020 16:14:14 +0100 Subject: KVM: x86: Fix NULL dereference at kvm_msr_ignored_check() The newly introduced kvm_msr_ignored_check() tries to print error or debug messages via vcpu_*() macros, but those may cause Oops when NULL vcpu is passed for KVM_GET_MSRS ioctl. Fix it by replacing the print calls with kvm_*() macros. (Note that this will leave vcpu argument completely unused in the function, but I didn't touch it to make the fix as small as possible. A clean up may be applied later.) Fixes: 12bc2132b15e ("KVM: X86: Do the same ignore_msrs check for feature msrs") BugLink: https://bugzilla.suse.com/show_bug.cgi?id=1178280 Cc: Signed-off-by: Takashi Iwai Message-Id: <20201030151414.20165-1-tiwai@suse.de> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 397f599b20e5..f5ede41bf9e6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -265,13 +265,13 @@ static int kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr, if (ignore_msrs) { if (report_ignored_msrs) - vcpu_unimpl(vcpu, "ignored %s: 0x%x data 0x%llx\n", - op, msr, data); + kvm_pr_unimpl("ignored %s: 0x%x data 0x%llx\n", + op, msr, data); /* Mask the error */ return 0; } else { - vcpu_debug_ratelimited(vcpu, "unhandled %s: 0x%x data 0x%llx\n", - op, msr, data); + kvm_debug_ratelimited("unhandled %s: 0x%x data 0x%llx\n", + op, msr, data); return -ENOENT; } } -- cgit v1.2.3 From 064eedf2c50f692088e1418c553084bf9c1432f8 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 14 Oct 2020 16:33:46 +0200 Subject: KVM: VMX: eVMCS: make evmcs_sanitize_exec_ctrls() work again It was noticed that evmcs_sanitize_exec_ctrls() is not being executed nowadays despite the code checking 'enable_evmcs' static key looking correct. Turns out, static key magic doesn't work in '__init' section (and it is unclear when things changed) but setup_vmcs_config() is called only once per CPU so we don't really need it to. Switch to checking 'enlightened_vmcs' instead, it is supposed to be in sync with 'enable_evmcs'. Opportunistically make evmcs_sanitize_exec_ctrls '__init' and drop unneeded extra newline from it. Reported-by: Yang Weijiang Signed-off-by: Vitaly Kuznetsov Message-Id: <20201014143346.2430936-1-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/evmcs.c | 3 +-- arch/x86/kvm/vmx/evmcs.h | 3 +-- arch/x86/kvm/vmx/vmx.c | 4 +++- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c index e5325bd0f304..f3199bb02f22 100644 --- a/arch/x86/kvm/vmx/evmcs.c +++ b/arch/x86/kvm/vmx/evmcs.c @@ -297,14 +297,13 @@ const struct evmcs_field vmcs_field_to_evmcs_1[] = { }; const unsigned int nr_evmcs_1_fields = ARRAY_SIZE(vmcs_field_to_evmcs_1); -void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) +__init void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) { vmcs_conf->pin_based_exec_ctrl &= ~EVMCS1_UNSUPPORTED_PINCTRL; vmcs_conf->cpu_based_2nd_exec_ctrl &= ~EVMCS1_UNSUPPORTED_2NDEXEC; vmcs_conf->vmexit_ctrl &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL; vmcs_conf->vmentry_ctrl &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL; - } #endif diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h index e5f7a7ebf27d..bd41d9462355 100644 --- a/arch/x86/kvm/vmx/evmcs.h +++ b/arch/x86/kvm/vmx/evmcs.h @@ -185,7 +185,7 @@ static inline void evmcs_load(u64 phys_addr) vp_ap->enlighten_vmentry = 1; } -void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf); +__init void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf); #else /* !IS_ENABLED(CONFIG_HYPERV) */ static inline void evmcs_write64(unsigned long field, u64 value) {} static inline void evmcs_write32(unsigned long field, u32 value) {} @@ -194,7 +194,6 @@ static inline u64 evmcs_read64(unsigned long field) { return 0; } static inline u32 evmcs_read32(unsigned long field) { return 0; } static inline u16 evmcs_read16(unsigned long field) { return 0; } static inline void evmcs_load(u64 phys_addr) {} -static inline void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {} static inline void evmcs_touch_msr_bitmap(void) {} #endif /* IS_ENABLED(CONFIG_HYPERV) */ diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index d14c94d0aff1..61d1cecb6f12 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2560,8 +2560,10 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, vmcs_conf->vmexit_ctrl = _vmexit_control; vmcs_conf->vmentry_ctrl = _vmentry_control; - if (static_branch_unlikely(&enable_evmcs)) +#if IS_ENABLED(CONFIG_HYPERV) + if (enlightened_vmcs) evmcs_sanitize_exec_ctrls(vmcs_conf); +#endif return 0; } -- cgit v1.2.3 From 9478dec3b5e79a1431e2e2b911e32e52a11c6320 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sat, 31 Oct 2020 11:38:09 -0400 Subject: KVM: vmx: remove unused variable Reported-by: kernel test robot Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 61d1cecb6f12..47b8357b9751 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6836,7 +6836,6 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) static int vmx_create_vcpu(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx; - unsigned long *msr_bitmap; int i, cpu, err; BUILD_BUG_ON(offsetof(struct vcpu_vmx, vcpu) != 0); @@ -6896,7 +6895,6 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) bitmap_fill(vmx->shadow_msr_intercept.read, MAX_POSSIBLE_PASSTHROUGH_MSRS); bitmap_fill(vmx->shadow_msr_intercept.write, MAX_POSSIBLE_PASSTHROUGH_MSRS); - msr_bitmap = vmx->vmcs01.msr_bitmap; vmx_disable_intercept_for_msr(vcpu, MSR_IA32_TSC, MSR_TYPE_R); vmx_disable_intercept_for_msr(vcpu, MSR_FS_BASE, MSR_TYPE_RW); vmx_disable_intercept_for_msr(vcpu, MSR_GS_BASE, MSR_TYPE_RW); -- cgit v1.2.3