From 529c4b05a3cb2f324aac347042ee6d641478e946 Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Wed, 13 Dec 2017 17:07:18 +0000 Subject: arm64: handle 52-bit addresses in TTBR The top 4 bits of a 52-bit physical address are positioned at bits 2..5 in the TTBR registers. Introduce a couple of macros to move the bits there, and change all TTBR writers to use them. Leave TTBR0 PAN code unchanged, to avoid complicating it. A system with 52-bit PA will have PAN anyway (because it's ARMv8.1 or later), and a system without 52-bit PA can only use up to 48-bit PAs. A later patch in this series will add a kconfig dependency to ensure PAN is configured. In addition, when using 52-bit PA there is a special alignment requirement on the top-level table. We don't currently have any VA_BITS configuration that would violate the requirement, but one could be added in the future, so add a compile-time BUG_ON to check for it. Tested-by: Suzuki K Poulose Reviewed-by: Suzuki K Poulose Reviewed-by: Marc Zyngier Tested-by: Bob Picco Reviewed-by: Bob Picco Signed-off-by: Kristina Martsenko [catalin.marinas@arm.com: added TTBR_BADD_MASK_52 comment] Signed-off-by: Catalin Marinas --- virt/kvm/arm/arm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 6b60c98a6e22..c8d49879307f 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -509,7 +509,7 @@ static void update_vttbr(struct kvm *kvm) pgd_phys = virt_to_phys(kvm->arch.pgd); BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK); vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits); - kvm->arch.vttbr = pgd_phys | vmid; + kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid; spin_unlock(&kvm_vmid_lock); } -- cgit v1.2.3 From fa2a8445b1d3810c52f2a6b3a006456bd1aacb7e Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Wed, 13 Dec 2017 17:07:24 +0000 Subject: arm64: allow ID map to be extended to 52 bits Currently, when using VA_BITS < 48, if the ID map text happens to be placed in physical memory above VA_BITS, we increase the VA size (up to 48) and create a new table level, in order to map in the ID map text. This is okay because the system always supports 48 bits of VA. This patch extends the code such that if the system supports 52 bits of VA, and the ID map text is placed that high up, then we increase the VA size accordingly, up to 52. One difference from the current implementation is that so far the condition of VA_BITS < 48 has meant that the top level table is always "full", with the maximum number of entries, and an extra table level is always needed. Now, when VA_BITS = 48 (and using 64k pages), the top level table is not full, and we simply need to increase the number of entries in it, instead of creating a new table level. Tested-by: Suzuki K Poulose Reviewed-by: Suzuki K Poulose Reviewed-by: Marc Zyngier Tested-by: Bob Picco Reviewed-by: Bob Picco Signed-off-by: Kristina Martsenko [catalin.marinas@arm.com: reduce arguments to __create_hyp_mappings()] [catalin.marinas@arm.com: reworked/renamed __cpu_uses_extended_idmap_level()] Signed-off-by: Catalin Marinas --- arch/arm/include/asm/kvm_mmu.h | 5 +++ arch/arm64/include/asm/assembler.h | 2 - arch/arm64/include/asm/kvm_mmu.h | 7 +++- arch/arm64/include/asm/mmu_context.h | 10 +++++ arch/arm64/kernel/head.S | 76 +++++++++++++++++++++--------------- arch/arm64/kvm/hyp-init.S | 17 ++++---- arch/arm64/mm/mmu.c | 1 + virt/kvm/arm/mmu.c | 10 ++++- 8 files changed, 83 insertions(+), 45 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 8dbec683638b..8c5643e2eea4 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -211,6 +211,11 @@ static inline bool __kvm_cpu_uses_extended_idmap(void) return false; } +static inline unsigned long __kvm_idmap_ptrs_per_pgd(void) +{ + return PTRS_PER_PGD; +} + static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd, pgd_t *hyp_pgd, pgd_t *merged_hyp_pgd, diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 49ea3def4bd1..942fdb5ef0ad 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -344,10 +344,8 @@ alternative_endif * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map */ .macro tcr_set_idmap_t0sz, valreg, tmpreg -#ifndef CONFIG_ARM64_VA_BITS_48 ldr_l \tmpreg, idmap_t0sz bfi \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH -#endif .endm /* diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index b3f7b68b042d..b33bdb5eeb3d 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -273,7 +273,12 @@ void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled); static inline bool __kvm_cpu_uses_extended_idmap(void) { - return __cpu_uses_extended_idmap(); + return __cpu_uses_extended_idmap_level(); +} + +static inline unsigned long __kvm_idmap_ptrs_per_pgd(void) +{ + return idmap_ptrs_per_pgd; } /* diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index accc2ff32a0e..88451d47036b 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -63,6 +63,7 @@ static inline void cpu_set_reserved_ttbr0(void) * physical memory, in which case it will be smaller. */ extern u64 idmap_t0sz; +extern u64 idmap_ptrs_per_pgd; static inline bool __cpu_uses_extended_idmap(void) { @@ -70,6 +71,15 @@ static inline bool __cpu_uses_extended_idmap(void) unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS))); } +/* + * True if the extended ID map requires an extra level of translation table + * to be configured. + */ +static inline bool __cpu_uses_extended_idmap_level(void) +{ + return ARM64_HW_PGTABLE_LEVELS((64 - idmap_t0sz)) > CONFIG_PGTABLE_LEVELS; +} + /* * Set TCR.T0SZ to its default value (based on VA_BITS) */ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index eeec0001e204..66f01869e97c 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -176,7 +176,7 @@ ENDPROC(preserve_boot_args) * ptrs: #imm pointers per table page * * Preserves: virt - * Corrupts: tmp1, tmp2 + * Corrupts: ptrs, tmp1, tmp2 * Returns: tbl -> next level table page address */ .macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2 @@ -184,7 +184,8 @@ ENDPROC(preserve_boot_args) phys_to_pte \tmp1, \tmp2 orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type lsr \tmp1, \virt, #\shift - and \tmp1, \tmp1, #\ptrs - 1 // table index + sub \ptrs, \ptrs, #1 + and \tmp1, \tmp1, \ptrs // table index str \tmp2, [\tbl, \tmp1, lsl #3] add \tbl, \tbl, #PAGE_SIZE // next level table page .endm @@ -194,15 +195,17 @@ ENDPROC(preserve_boot_args) * block entry in the next level (tbl) for the given virtual address. * * Preserves: tbl, next, virt - * Corrupts: tmp1, tmp2 + * Corrupts: ptrs_per_pgd, tmp1, tmp2 */ - .macro create_pgd_entry, tbl, virt, tmp1, tmp2 - create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2 + .macro create_pgd_entry, tbl, virt, ptrs_per_pgd, tmp1, tmp2 + create_table_entry \tbl, \virt, PGDIR_SHIFT, \ptrs_per_pgd, \tmp1, \tmp2 #if SWAPPER_PGTABLE_LEVELS > 3 - create_table_entry \tbl, \virt, PUD_SHIFT, PTRS_PER_PUD, \tmp1, \tmp2 + mov \ptrs_per_pgd, PTRS_PER_PUD + create_table_entry \tbl, \virt, PUD_SHIFT, \ptrs_per_pgd, \tmp1, \tmp2 #endif #if SWAPPER_PGTABLE_LEVELS > 2 - create_table_entry \tbl, \virt, SWAPPER_TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2 + mov \ptrs_per_pgd, PTRS_PER_PTE + create_table_entry \tbl, \virt, SWAPPER_TABLE_SHIFT, \ptrs_per_pgd, \tmp1, \tmp2 #endif .endm @@ -266,26 +269,13 @@ __create_page_tables: adrp x0, idmap_pg_dir adrp x3, __idmap_text_start // __pa(__idmap_text_start) -#ifndef CONFIG_ARM64_VA_BITS_48 -#define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3) -#define EXTRA_PTRS (1 << (48 - EXTRA_SHIFT)) - - /* - * If VA_BITS < 48, it may be too small to allow for an ID mapping to be - * created that covers system RAM if that is located sufficiently high - * in the physical address space. So for the ID map, use an extended - * virtual range in that case, by configuring an additional translation - * level. - * First, we have to verify our assumption that the current value of - * VA_BITS was chosen such that all translation levels are fully - * utilised, and that lowering T0SZ will always result in an additional - * translation level to be configured. - */ -#if VA_BITS != EXTRA_SHIFT -#error "Mismatch between VA_BITS and page size/number of translation levels" -#endif - /* + * VA_BITS may be too small to allow for an ID mapping to be created + * that covers system RAM if that is located sufficiently high in the + * physical address space. So for the ID map, use an extended virtual + * range in that case, and configure an additional translation level + * if needed. + * * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the * entire ID map region can be mapped. As T0SZ == (64 - #bits used), * this number conveniently equals the number of leading zeroes in @@ -294,18 +284,41 @@ __create_page_tables: adrp x5, __idmap_text_end clz x5, x5 cmp x5, TCR_T0SZ(VA_BITS) // default T0SZ small enough? - b.ge 1f // .. then skip additional level + b.ge 1f // .. then skip VA range extension adr_l x6, idmap_t0sz str x5, [x6] dmb sy dc ivac, x6 // Invalidate potentially stale cache line - create_table_entry x0, x3, EXTRA_SHIFT, EXTRA_PTRS, x5, x6 -1: +#if (VA_BITS < 48) +#define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3) +#define EXTRA_PTRS (1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT)) + + /* + * If VA_BITS < 48, we have to configure an additional table level. + * First, we have to verify our assumption that the current value of + * VA_BITS was chosen such that all translation levels are fully + * utilised, and that lowering T0SZ will always result in an additional + * translation level to be configured. + */ +#if VA_BITS != EXTRA_SHIFT +#error "Mismatch between VA_BITS and page size/number of translation levels" #endif - create_pgd_entry x0, x3, x5, x6 + mov x4, EXTRA_PTRS + create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6 +#else + /* + * If VA_BITS == 48, we don't have to configure an additional + * translation level, but the top-level table has more entries. + */ + mov x4, #1 << (PHYS_MASK_SHIFT - PGDIR_SHIFT) + str_l x4, idmap_ptrs_per_pgd, x5 +#endif +1: + ldr_l x4, idmap_ptrs_per_pgd + create_pgd_entry x0, x3, x4, x5, x6 mov x5, x3 // __pa(__idmap_text_start) adr_l x6, __idmap_text_end // __pa(__idmap_text_end) create_block_map x0, x7, x3, x5, x6, x4 @@ -316,7 +329,8 @@ __create_page_tables: adrp x0, swapper_pg_dir mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text) add x5, x5, x23 // add KASLR displacement - create_pgd_entry x0, x5, x3, x6 + mov x4, PTRS_PER_PGD + create_pgd_entry x0, x5, x4, x3, x6 adrp x6, _end // runtime __pa(_end) adrp x3, _text // runtime __pa(_text) sub x6, x6, x3 // _end - _text diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index f9681cc00973..33c40b3eea01 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -72,24 +72,23 @@ __do_hyp_init: mov x5, #TCR_EL2_RES1 orr x4, x4, x5 -#ifndef CONFIG_ARM64_VA_BITS_48 /* - * If we are running with VA_BITS < 48, we may be running with an extra - * level of translation in the ID map. This is only the case if system - * RAM is out of range for the currently configured page size and number - * of translation levels, in which case we will also need the extra - * level for the HYP ID map, or we won't be able to enable the EL2 MMU. + * The ID map may be configured to use an extended virtual address + * range. This is only the case if system RAM is out of range for the + * currently configured page size and VA_BITS, in which case we will + * also need the extended virtual range for the HYP ID map, or we won't + * be able to enable the EL2 MMU. * * However, at EL2, there is only one TTBR register, and we can't switch * between translation tables *and* update TCR_EL2.T0SZ at the same - * time. Bottom line: we need the extra level in *both* our translation - * tables. + * time. Bottom line: we need to use the extended range with *both* our + * translation tables. * * So use the same T0SZ value we use for the ID map. */ ldr_l x5, idmap_t0sz bfi x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH -#endif + /* * Set the PS bits in TCR_EL2. */ diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 0c631a17ae1d..baa34418c3bf 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -50,6 +50,7 @@ #define NO_CONT_MAPPINGS BIT(1) u64 idmap_t0sz = TCR_T0SZ(VA_BITS); +u64 idmap_ptrs_per_pgd = PTRS_PER_PGD; u64 kimage_voffset __ro_after_init; EXPORT_SYMBOL(kimage_voffset); diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index b36945d49986..761787befd3b 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -629,14 +629,20 @@ static int __create_hyp_mappings(pgd_t *pgdp, { pgd_t *pgd; pud_t *pud; - unsigned long addr, next; + unsigned long addr, next, ptrs_per_pgd = PTRS_PER_PGD; int err = 0; + /* + * If it's not the hyp_pgd, fall back to the kvm idmap layout. + */ + if (pgdp != hyp_pgd) + ptrs_per_pgd = __kvm_idmap_ptrs_per_pgd(); + mutex_lock(&kvm_hyp_pgd_mutex); addr = start & PAGE_MASK; end = PAGE_ALIGN(end); do { - pgd = pgdp + pgd_index(addr); + pgd = pgdp + ((addr >> PGDIR_SHIFT) & (ptrs_per_pgd - 1)); if (pgd_none(*pgd)) { pud = pud_alloc_one(NULL, addr); -- cgit v1.2.3 From 6840bdd73d07216ab4bc46f5a8768c37ea519038 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 3 Jan 2018 16:38:35 +0000 Subject: arm64: KVM: Use per-CPU vector when BP hardening is enabled Now that we have per-CPU vectors, let's plug then in the KVM/arm64 code. Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm/include/asm/kvm_mmu.h | 10 ++++++++++ arch/arm64/include/asm/kvm_mmu.h | 38 ++++++++++++++++++++++++++++++++++++++ arch/arm64/kvm/hyp/switch.c | 2 +- virt/kvm/arm/arm.c | 8 +++++++- 4 files changed, 56 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 8c5643e2eea4..a2d176a308bd 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -226,6 +226,16 @@ static inline unsigned int kvm_get_vmid_bits(void) return 8; } +static inline void *kvm_get_hyp_vector(void) +{ + return kvm_ksym_ref(__kvm_hyp_vector); +} + +static inline int kvm_map_vectors(void) +{ + return 0; +} + #define kvm_phys_to_vttbr(addr) (addr) #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index b33bdb5eeb3d..72e279dbae5f 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -322,6 +322,44 @@ static inline unsigned int kvm_get_vmid_bits(void) return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8; } +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR +#include + +static inline void *kvm_get_hyp_vector(void) +{ + struct bp_hardening_data *data = arm64_get_bp_hardening_data(); + void *vect = kvm_ksym_ref(__kvm_hyp_vector); + + if (data->fn) { + vect = __bp_harden_hyp_vecs_start + + data->hyp_vectors_slot * SZ_2K; + + if (!has_vhe()) + vect = lm_alias(vect); + } + + return vect; +} + +static inline int kvm_map_vectors(void) +{ + return create_hyp_mappings(kvm_ksym_ref(__bp_harden_hyp_vecs_start), + kvm_ksym_ref(__bp_harden_hyp_vecs_end), + PAGE_HYP_EXEC); +} + +#else +static inline void *kvm_get_hyp_vector(void) +{ + return kvm_ksym_ref(__kvm_hyp_vector); +} + +static inline int kvm_map_vectors(void) +{ + return 0; +} +#endif + #define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr) #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index f7c651f3a8c0..8d4f3c9d6dc4 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -52,7 +52,7 @@ static void __hyp_text __activate_traps_vhe(void) val &= ~(CPACR_EL1_FPEN | CPACR_EL1_ZEN); write_sysreg(val, cpacr_el1); - write_sysreg(__kvm_hyp_vector, vbar_el1); + write_sysreg(kvm_get_hyp_vector(), vbar_el1); } static void __hyp_text __activate_traps_nvhe(void) diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index c8d49879307f..2df6a5c42f77 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1158,7 +1158,7 @@ static void cpu_init_hyp_mode(void *dummy) pgd_ptr = kvm_mmu_get_httbr(); stack_page = __this_cpu_read(kvm_arm_hyp_stack_page); hyp_stack_ptr = stack_page + PAGE_SIZE; - vector_ptr = (unsigned long)kvm_ksym_ref(__kvm_hyp_vector); + vector_ptr = (unsigned long)kvm_get_hyp_vector(); __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr); __cpu_init_stage2(); @@ -1403,6 +1403,12 @@ static int init_hyp_mode(void) goto out_err; } + err = kvm_map_vectors(); + if (err) { + kvm_err("Cannot map vectors\n"); + goto out_err; + } + /* * Map the Hyp stack pages */ -- cgit v1.2.3 From 36989e7fd386a9a5822c48691473863f8fbb404d Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 8 Jan 2018 15:38:04 +0000 Subject: KVM: arm/arm64: Convert kvm_host_cpu_state to a static per-cpu allocation kvm_host_cpu_state is a per-cpu allocation made from kvm_arch_init() used to store the host EL1 registers when KVM switches to a guest. Make it easier for ASM to generate pointers into this per-cpu memory by making it a static allocation. Signed-off-by: James Morse Acked-by: Christoffer Dall Signed-off-by: Catalin Marinas --- virt/kvm/arm/arm.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 2df6a5c42f77..2fc6009a766c 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -53,8 +53,8 @@ __asm__(".arch_extension virt"); #endif +DEFINE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state); static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); -static kvm_cpu_context_t __percpu *kvm_host_cpu_state; /* Per-CPU variable containing the currently running vcpu. */ static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu); @@ -354,7 +354,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) } vcpu->cpu = cpu; - vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state); + vcpu->arch.host_cpu_context = this_cpu_ptr(&kvm_host_cpu_state); kvm_arm_set_running_vcpu(vcpu); kvm_vgic_load(vcpu); @@ -1272,19 +1272,8 @@ static inline void hyp_cpu_pm_exit(void) } #endif -static void teardown_common_resources(void) -{ - free_percpu(kvm_host_cpu_state); -} - static int init_common_resources(void) { - kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t); - if (!kvm_host_cpu_state) { - kvm_err("Cannot allocate host CPU state\n"); - return -ENOMEM; - } - /* set size of VMID supported by CPU */ kvm_vmid_bits = kvm_get_vmid_bits(); kvm_info("%d-bit VMID\n", kvm_vmid_bits); @@ -1426,7 +1415,7 @@ static int init_hyp_mode(void) for_each_possible_cpu(cpu) { kvm_cpu_context_t *cpu_ctxt; - cpu_ctxt = per_cpu_ptr(kvm_host_cpu_state, cpu); + cpu_ctxt = per_cpu_ptr(&kvm_host_cpu_state, cpu); err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1, PAGE_HYP); if (err) { @@ -1550,7 +1539,6 @@ out_hyp: if (!in_hyp_mode) teardown_hyp_mode(); out_err: - teardown_common_resources(); return err; } -- cgit v1.2.3 From 98732d1b189b626a7593afd02dc3d2dc3f6c545a Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Mon, 15 Jan 2018 15:23:49 +0000 Subject: KVM: arm/arm64: fix HYP ID map extension to 52 bits Commit fa2a8445b1d3 incorrectly masks the index of the HYP ID map pgd entry, causing a non-VHE kernel to hang during boot. This happens when VA_BITS=48 and the ID map text is in 52-bit physical memory. In this case we don't need an extra table level but need more entries in the top-level table, so we need to map into hyp_pgd and need to use __kvm_idmap_ptrs_per_pgd to mask in the extra bits. However, __create_hyp_mappings currently masks by PTRS_PER_PGD instead. Fix it so that we always use __kvm_idmap_ptrs_per_pgd for the HYP ID map. This ensures that we use the larger mask for the top-level ID map table when it has more entries. In all other cases, PTRS_PER_PGD is used as normal. Fixes: fa2a8445b1d3 ("arm64: allow ID map to be extended to 52 bits") Acked-by: Marc Zyngier Signed-off-by: Kristina Martsenko Signed-off-by: Catalin Marinas --- virt/kvm/arm/mmu.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 761787befd3b..876caf531d32 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -623,21 +623,15 @@ static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start, return 0; } -static int __create_hyp_mappings(pgd_t *pgdp, +static int __create_hyp_mappings(pgd_t *pgdp, unsigned long ptrs_per_pgd, unsigned long start, unsigned long end, unsigned long pfn, pgprot_t prot) { pgd_t *pgd; pud_t *pud; - unsigned long addr, next, ptrs_per_pgd = PTRS_PER_PGD; + unsigned long addr, next; int err = 0; - /* - * If it's not the hyp_pgd, fall back to the kvm idmap layout. - */ - if (pgdp != hyp_pgd) - ptrs_per_pgd = __kvm_idmap_ptrs_per_pgd(); - mutex_lock(&kvm_hyp_pgd_mutex); addr = start & PAGE_MASK; end = PAGE_ALIGN(end); @@ -705,8 +699,8 @@ int create_hyp_mappings(void *from, void *to, pgprot_t prot) int err; phys_addr = kvm_kaddr_to_phys(from + virt_addr - start); - err = __create_hyp_mappings(hyp_pgd, virt_addr, - virt_addr + PAGE_SIZE, + err = __create_hyp_mappings(hyp_pgd, PTRS_PER_PGD, + virt_addr, virt_addr + PAGE_SIZE, __phys_to_pfn(phys_addr), prot); if (err) @@ -737,7 +731,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr) if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1)) return -EINVAL; - return __create_hyp_mappings(hyp_pgd, start, end, + return __create_hyp_mappings(hyp_pgd, PTRS_PER_PGD, start, end, __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE); } @@ -1743,7 +1737,7 @@ static int kvm_map_idmap_text(pgd_t *pgd) int err; /* Create the idmap in the boot page tables */ - err = __create_hyp_mappings(pgd, + err = __create_hyp_mappings(pgd, __kvm_idmap_ptrs_per_pgd(), hyp_idmap_start, hyp_idmap_end, __phys_to_pfn(hyp_idmap_start), PAGE_HYP_EXEC); -- cgit v1.2.3 From 4f5abad9e826bd579b0661efa32682d9c9bc3fa8 Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 15 Jan 2018 19:39:00 +0000 Subject: KVM: arm/arm64: mask/unmask daif around VHE guests Non-VHE systems take an exception to EL2 in order to world-switch into the guest. When returning from the guest KVM implicitly restores the DAIF flags when it returns to the kernel at EL1. With VHE none of this exception-level jumping happens, so KVMs world-switch code is exposed to the host kernel's DAIF values, and KVM spills the guest-exit DAIF values back into the host kernel. On entry to a guest we have Debug and SError exceptions unmasked, KVM has switched VBAR but isn't prepared to handle these. On guest exit Debug exceptions are left disabled once we return to the host and will stay this way until we enter user space. Add a helper to mask/unmask DAIF around VHE guests. The unmask can only happen after the hosts VBAR value has been synchronised by the isb in __vhe_hyp_call (via kvm_call_hyp()). Masking could be as late as setting KVMs VBAR value, but is kept here for symmetry. Acked-by: Marc Zyngier Signed-off-by: James Morse Reviewed-by: Christoffer Dall Signed-off-by: Catalin Marinas --- arch/arm/include/asm/kvm_host.h | 2 ++ arch/arm64/include/asm/kvm_host.h | 10 ++++++++++ virt/kvm/arm/arm.c | 4 ++++ 3 files changed, 16 insertions(+) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index a9f7d3f47134..b86fc4162539 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -301,4 +301,6 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, /* All host FP/SIMD state is restored on guest exit, so nothing to save: */ static inline void kvm_fpsimd_flush_cpu_state(void) {} +static inline void kvm_arm_vhe_guest_enter(void) {} +static inline void kvm_arm_vhe_guest_exit(void) {} #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 7ee72b402907..dcdd08edf5a5 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -398,4 +399,13 @@ static inline void kvm_fpsimd_flush_cpu_state(void) sve_flush_cpu_state(); } +static inline void kvm_arm_vhe_guest_enter(void) +{ + local_daif_mask(); +} + +static inline void kvm_arm_vhe_guest_exit(void) +{ + local_daif_restore(DAIF_PROCCTX_NOIRQ); +} #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 2fc6009a766c..38e81631fc91 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -704,9 +704,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) */ trace_kvm_entry(*vcpu_pc(vcpu)); guest_enter_irqoff(); + if (has_vhe()) + kvm_arm_vhe_guest_enter(); ret = kvm_call_hyp(__kvm_vcpu_run, vcpu); + if (has_vhe()) + kvm_arm_vhe_guest_exit(); vcpu->mode = OUTSIDE_GUEST_MODE; vcpu->stat.exits++; /* -- cgit v1.2.3 From 3368bd809764d3ef0810e16c1e1531fec32e8d8e Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 15 Jan 2018 19:39:04 +0000 Subject: KVM: arm64: Handle RAS SErrors from EL1 on guest exit We expect to have firmware-first handling of RAS SErrors, with errors notified via an APEI method. For systems without firmware-first, add some minimal handling to KVM. There are two ways KVM can take an SError due to a guest, either may be a RAS error: we exit the guest due to an SError routed to EL2 by HCR_EL2.AMO, or we take an SError from EL2 when we unmask PSTATE.A from __guest_exit. For SError that interrupt a guest and are routed to EL2 the existing behaviour is to inject an impdef SError into the guest. Add code to handle RAS SError based on the ESR. For uncontained and uncategorized errors arm64_is_fatal_ras_serror() will panic(), these errors compromise the host too. All other error types are contained: For the fatal errors the vCPU can't make progress, so we inject a virtual SError. We ignore contained errors where we can make progress as if we're lucky, we may not hit them again. If only some of the CPUs support RAS the guest will see the cpufeature sanitised version of the id registers, but we may still take RAS SError on this CPU. Move the SError handling out of handle_exit() into a new handler that runs before we can be preempted. This allows us to use this_cpu_has_cap(), via arm64_is_ras_serror(). Acked-by: Marc Zyngier Signed-off-by: James Morse Signed-off-by: Catalin Marinas --- arch/arm/include/asm/kvm_host.h | 3 +++ arch/arm64/include/asm/kvm_host.h | 2 ++ arch/arm64/kvm/handle_exit.c | 18 +++++++++++++++++- virt/kvm/arm/arm.c | 3 +++ 4 files changed, 25 insertions(+), 1 deletion(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index b86fc4162539..acbf9ec7b396 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -238,6 +238,9 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, int exception_index); +static inline void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run, + int exception_index) {} + static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, unsigned long hyp_stack_ptr, unsigned long vector_ptr) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 84fcb2a896a1..abcfd164e690 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -347,6 +347,8 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, int exception_index); +void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run, + int exception_index); int kvm_perf_init(void); int kvm_perf_teardown(void); diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 304203fa9e33..6a5a5db4292f 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -29,12 +29,19 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include "trace.h" typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); +static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u32 esr) +{ + if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(NULL, esr)) + kvm_inject_vabt(vcpu); +} + static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) { int ret; @@ -252,7 +259,6 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, case ARM_EXCEPTION_IRQ: return 1; case ARM_EXCEPTION_EL1_SERROR: - kvm_inject_vabt(vcpu); /* We may still need to return for single-step */ if (!(*vcpu_cpsr(vcpu) & DBG_SPSR_SS) && kvm_arm_handle_step_debug(vcpu, run)) @@ -275,3 +281,13 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, return 0; } } + +/* For exit types that need handling before we can be preempted */ +void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run, + int exception_index) +{ + exception_index = ARM_EXCEPTION_CODE(exception_index); + + if (exception_index == ARM_EXCEPTION_EL1_SERROR) + kvm_handle_guest_serror(vcpu, kvm_vcpu_get_hsr(vcpu)); +} diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 38e81631fc91..15bf026eb182 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -763,6 +763,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) guest_exit(); trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); + /* Exit types that need handling before we can be preempted */ + handle_exit_early(vcpu, run, ret); + preempt_enable(); ret = handle_exit(vcpu, run, ret); -- cgit v1.2.3