From cf87ac739e488055a6046a410caa8f4da108948f Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 10 Nov 2022 18:49:08 +0800 Subject: KVM: x86: Introduce KVM_REQ_DIRTY_RING_SOFT_FULL The VCPU isn't expected to be runnable when the dirty ring becomes soft full, until the dirty pages are harvested and the dirty ring is reset from userspace. So there is a check in each guest's entrace to see if the dirty ring is soft full or not. The VCPU is stopped from running if its dirty ring has been soft full. The similar check will be needed when the feature is going to be supported on ARM64. As Marc Zyngier suggested, a new event will avoid pointless overhead to check the size of the dirty ring ('vcpu->kvm->dirty_ring_size') in each guest's entrance. Add KVM_REQ_DIRTY_RING_SOFT_FULL. The event is raised when the dirty ring becomes soft full in kvm_dirty_ring_push(). The event is only cleared in the check, done in the newly added helper kvm_dirty_ring_check_request(). Since the VCPU is not runnable when the dirty ring becomes soft full, the KVM_REQ_DIRTY_RING_SOFT_FULL event is always set to prevent the VCPU from running until the dirty pages are harvested and the dirty ring is reset by userspace. kvm_dirty_ring_soft_full() becomes a private function with the newly added helper kvm_dirty_ring_check_request(). The alignment for the various event definitions in kvm_host.h is changed to tab character by the way. In order to avoid using 'container_of()', the argument @ring is replaced by @vcpu in kvm_dirty_ring_push(). Link: https://lore.kernel.org/kvmarm/87lerkwtm5.wl-maz@kernel.org Suggested-by: Marc Zyngier Signed-off-by: Gavin Shan Reviewed-by: Peter Xu Reviewed-by: Sean Christopherson Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110104914.31280-2-gshan@redhat.com --- arch/x86/kvm/x86.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9cf1ba865562..d0d32e67ebf3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10499,20 +10499,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) bool req_immediate_exit = false; - /* Forbid vmenter if vcpu dirty ring is soft-full */ - if (unlikely(vcpu->kvm->dirty_ring_size && - kvm_dirty_ring_soft_full(&vcpu->dirty_ring))) { - vcpu->run->exit_reason = KVM_EXIT_DIRTY_RING_FULL; - trace_kvm_dirty_ring_exit(vcpu); - r = 0; - goto out; - } - if (kvm_request_pending(vcpu)) { if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu)) { r = -EIO; goto out; } + + if (kvm_dirty_ring_check_request(vcpu)) { + r = 0; + goto out; + } + if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) { if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) { r = 0; -- cgit v1.2.3 From e8a18565e59303ac12c626a161d72bd890bd2062 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 10 Nov 2022 18:49:09 +0800 Subject: KVM: Move declaration of kvm_cpu_dirty_log_size() to kvm_dirty_ring.h Not all architectures like ARM64 need to override the function. Move its declaration to kvm_dirty_ring.h to avoid the following compiling warning on ARM64 when the feature is enabled. arch/arm64/kvm/../../../virt/kvm/dirty_ring.c:14:12: \ warning: no previous prototype for 'kvm_cpu_dirty_log_size' \ [-Wmissing-prototypes] \ int __weak kvm_cpu_dirty_log_size(void) Reported-by: kernel test robot Signed-off-by: Gavin Shan Reviewed-by: Peter Xu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110104914.31280-3-gshan@redhat.com --- arch/x86/include/asm/kvm_host.h | 2 -- include/linux/kvm_dirty_ring.h | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 7551b6f9c31c..b4dbde7d9eb1 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -2090,8 +2090,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu) #define GET_SMSTATE(type, buf, offset) \ (*(type *)((buf) + (offset) - 0x7e00)) -int kvm_cpu_dirty_log_size(void); - int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages); #define KVM_CLOCK_VALID_FLAGS \ diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h index 9c13c4c3d30c..199ead37b104 100644 --- a/include/linux/kvm_dirty_ring.h +++ b/include/linux/kvm_dirty_ring.h @@ -66,6 +66,7 @@ static inline void kvm_dirty_ring_free(struct kvm_dirty_ring *ring) #else /* CONFIG_HAVE_KVM_DIRTY_RING */ +int kvm_cpu_dirty_log_size(void); u32 kvm_dirty_ring_get_rsvd_entries(void); int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size); -- cgit v1.2.3 From 9cb1096f8590bc590326087bea65db932b53c3b5 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 10 Nov 2022 18:49:11 +0800 Subject: KVM: arm64: Enable ring-based dirty memory tracking Enable ring-based dirty memory tracking on ARM64: - Enable CONFIG_HAVE_KVM_DIRTY_RING_ACQ_REL. - Enable CONFIG_NEED_KVM_DIRTY_RING_WITH_BITMAP. - Set KVM_DIRTY_LOG_PAGE_OFFSET for the ring buffer's physical page offset. - Add ARM64 specific kvm_arch_allow_write_without_running_vcpu() to keep the site of saving vgic/its tables out of the no-running-vcpu radar. Signed-off-by: Gavin Shan Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110104914.31280-5-gshan@redhat.com --- Documentation/virt/kvm/api.rst | 2 +- arch/arm64/include/uapi/asm/kvm.h | 1 + arch/arm64/kvm/Kconfig | 2 ++ arch/arm64/kvm/arm.c | 3 +++ arch/arm64/kvm/vgic/vgic-its.c | 20 ++++++++++++++++++++ include/kvm/arm_vgic.h | 1 + 6 files changed, 28 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 1f1b09aa6db4..773e4b202f47 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -7921,7 +7921,7 @@ regardless of what has actually been exposed through the CPUID leaf. 8.29 KVM_CAP_DIRTY_LOG_RING/KVM_CAP_DIRTY_LOG_RING_ACQ_REL ---------------------------------------------------------- -:Architectures: x86 +:Architectures: x86, arm64 :Parameters: args[0] - size of the dirty log ring KVM is capable of tracking dirty memory using ring buffers that are diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 316917b98707..a7a857f1784d 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -43,6 +43,7 @@ #define __KVM_HAVE_VCPU_EVENTS #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 +#define KVM_DIRTY_LOG_PAGE_OFFSET 64 #define KVM_REG_SIZE(id) \ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 815cc118c675..05da3c8f7e88 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -32,6 +32,8 @@ menuconfig KVM select KVM_VFIO select HAVE_KVM_EVENTFD select HAVE_KVM_IRQFD + select HAVE_KVM_DIRTY_RING_ACQ_REL + select NEED_KVM_DIRTY_RING_WITH_BITMAP select HAVE_KVM_MSI select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQ_ROUTING diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 94d33e296e10..6b097605e38c 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -746,6 +746,9 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_SUSPEND, vcpu)) return kvm_vcpu_suspend(vcpu); + + if (kvm_dirty_ring_check_request(vcpu)) + return 0; } return 1; diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 733b53055f97..94a666dd1443 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -2743,6 +2743,7 @@ static int vgic_its_has_attr(struct kvm_device *dev, static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) { const struct vgic_its_abi *abi = vgic_its_get_abi(its); + struct vgic_dist *dist = &kvm->arch.vgic; int ret = 0; if (attr == KVM_DEV_ARM_VGIC_CTRL_INIT) /* Nothing to do */ @@ -2762,7 +2763,9 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) vgic_its_reset(kvm, its); break; case KVM_DEV_ARM_ITS_SAVE_TABLES: + dist->save_its_tables_in_progress = true; ret = abi->save_tables(its); + dist->save_its_tables_in_progress = false; break; case KVM_DEV_ARM_ITS_RESTORE_TABLES: ret = abi->restore_tables(its); @@ -2775,6 +2778,23 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) return ret; } +/* + * kvm_arch_allow_write_without_running_vcpu - allow writing guest memory + * without the running VCPU when dirty ring is enabled. + * + * The running VCPU is required to track dirty guest pages when dirty ring + * is enabled. Otherwise, the backup bitmap should be used to track the + * dirty guest pages. When vgic/its tables are being saved, the backup + * bitmap is used to track the dirty guest pages due to the missed running + * VCPU in the period. + */ +bool kvm_arch_allow_write_without_running_vcpu(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + + return dist->save_its_tables_in_progress; +} + static int vgic_its_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 4df9e73a8bb5..9270cd87da3f 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -263,6 +263,7 @@ struct vgic_dist { struct vgic_io_device dist_iodev; bool has_its; + bool save_its_tables_in_progress; /* * Contains the attributes and gpa of the LPI configuration table. -- cgit v1.2.3 From dfc7a7769ab7f2a2f629c673717ef1fa7b63aa42 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 7 Nov 2022 21:56:31 +0000 Subject: KVM: arm64: Combine visitor arguments into a context structure Passing new arguments by value to the visitor callbacks is extremely inflexible for stuffing new parameters used by only some of the visitors. Use a context structure instead and pass the pointer through to the visitor callback. While at it, redefine the 'flags' parameter to the visitor to contain the bit indicating the phase of the walk. Pass the entire set of flags through the context structure such that the walker can communicate additional state to the visitor callback. No functional change intended. Signed-off-by: Oliver Upton Reviewed-by: Ben Gardon Reviewed-by: Gavin Shan Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221107215644.1895162-2-oliver.upton@linux.dev --- arch/arm64/include/asm/kvm_pgtable.h | 15 +- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 10 +- arch/arm64/kvm/hyp/nvhe/setup.c | 16 +- arch/arm64/kvm/hyp/pgtable.c | 269 +++++++++++++++++----------------- 4 files changed, 154 insertions(+), 156 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 3252eb50ecfe..607f9bb8aab4 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -199,10 +199,17 @@ enum kvm_pgtable_walk_flags { KVM_PGTABLE_WALK_TABLE_POST = BIT(2), }; -typedef int (*kvm_pgtable_visitor_fn_t)(u64 addr, u64 end, u32 level, - kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, - void * const arg); +struct kvm_pgtable_visit_ctx { + kvm_pte_t *ptep; + void *arg; + u64 addr; + u64 end; + u32 level; + enum kvm_pgtable_walk_flags flags; +}; + +typedef int (*kvm_pgtable_visitor_fn_t)(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit); /** * struct kvm_pgtable_walker - Hook into a page-table walk. diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 1e78acf9662e..8f5b6a36a039 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -417,13 +417,11 @@ struct check_walk_data { enum pkvm_page_state (*get_page_state)(kvm_pte_t pte); }; -static int __check_page_state_visitor(u64 addr, u64 end, u32 level, - kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, - void * const arg) +static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { - struct check_walk_data *d = arg; - kvm_pte_t pte = *ptep; + struct check_walk_data *d = ctx->arg; + kvm_pte_t pte = *ctx->ptep; if (kvm_pte_valid(pte) && !addr_is_memory(kvm_pte_to_phys(pte))) return -EINVAL; diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index e8d4ea2fcfa0..a293cf5eba1b 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -186,15 +186,13 @@ static void hpool_put_page(void *addr) hyp_put_page(&hpool, addr); } -static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level, - kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, - void * const arg) +static int finalize_host_mappings_walker(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { - struct kvm_pgtable_mm_ops *mm_ops = arg; + struct kvm_pgtable_mm_ops *mm_ops = ctx->arg; enum kvm_pgtable_prot prot; enum pkvm_page_state state; - kvm_pte_t pte = *ptep; + kvm_pte_t pte = *ctx->ptep; phys_addr_t phys; if (!kvm_pte_valid(pte)) @@ -205,11 +203,11 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level, * was unable to access the hyp_vmemmap and so the buddy allocator has * initialised the refcount to '1'. */ - mm_ops->get_page(ptep); - if (flag != KVM_PGTABLE_WALK_LEAF) + mm_ops->get_page(ctx->ptep); + if (visit != KVM_PGTABLE_WALK_LEAF) return 0; - if (level != (KVM_PGTABLE_MAX_LEVELS - 1)) + if (ctx->level != (KVM_PGTABLE_MAX_LEVELS - 1)) return -EINVAL; phys = kvm_pte_to_phys(pte); diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index cdf8e76b0be1..900c8b9c0cfc 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -64,20 +64,20 @@ static bool kvm_phys_is_valid(u64 phys) return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_EL1_PARANGE_MAX)); } -static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level) +static bool kvm_block_mapping_supported(const struct kvm_pgtable_visit_ctx *ctx, u64 phys) { - u64 granule = kvm_granule_size(level); + u64 granule = kvm_granule_size(ctx->level); - if (!kvm_level_supports_block_mapping(level)) + if (!kvm_level_supports_block_mapping(ctx->level)) return false; - if (granule > (end - addr)) + if (granule > (ctx->end - ctx->addr)) return false; if (kvm_phys_is_valid(phys) && !IS_ALIGNED(phys, granule)) return false; - return IS_ALIGNED(addr, granule); + return IS_ALIGNED(ctx->addr, granule); } static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level) @@ -172,12 +172,12 @@ static kvm_pte_t kvm_init_invalid_leaf_owner(u8 owner_id) return FIELD_PREP(KVM_INVALID_PTE_OWNER_MASK, owner_id); } -static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 addr, - u32 level, kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag) +static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, + const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { struct kvm_pgtable_walker *walker = data->walker; - return walker->cb(addr, data->end, level, ptep, flag, walker->arg); + return walker->cb(ctx, visit); } static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, @@ -186,20 +186,24 @@ static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, kvm_pte_t *ptep, u32 level) { + enum kvm_pgtable_walk_flags flags = data->walker->flags; + struct kvm_pgtable_visit_ctx ctx = { + .ptep = ptep, + .arg = data->walker->arg, + .addr = data->addr, + .end = data->end, + .level = level, + .flags = flags, + }; int ret = 0; - u64 addr = data->addr; kvm_pte_t *childp, pte = *ptep; bool table = kvm_pte_table(pte, level); - enum kvm_pgtable_walk_flags flags = data->walker->flags; - if (table && (flags & KVM_PGTABLE_WALK_TABLE_PRE)) { - ret = kvm_pgtable_visitor_cb(data, addr, level, ptep, - KVM_PGTABLE_WALK_TABLE_PRE); - } + if (table && (ctx.flags & KVM_PGTABLE_WALK_TABLE_PRE)) + ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_TABLE_PRE); - if (!table && (flags & KVM_PGTABLE_WALK_LEAF)) { - ret = kvm_pgtable_visitor_cb(data, addr, level, ptep, - KVM_PGTABLE_WALK_LEAF); + if (!table && (ctx.flags & KVM_PGTABLE_WALK_LEAF)) { + ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_LEAF); pte = *ptep; table = kvm_pte_table(pte, level); } @@ -218,10 +222,8 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, if (ret) goto out; - if (flags & KVM_PGTABLE_WALK_TABLE_POST) { - ret = kvm_pgtable_visitor_cb(data, addr, level, ptep, - KVM_PGTABLE_WALK_TABLE_POST); - } + if (ctx.flags & KVM_PGTABLE_WALK_TABLE_POST) + ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_TABLE_POST); out: return ret; @@ -292,13 +294,13 @@ struct leaf_walk_data { u32 level; }; -static int leaf_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, void * const arg) +static int leaf_walker(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { - struct leaf_walk_data *data = arg; + struct leaf_walk_data *data = ctx->arg; - data->pte = *ptep; - data->level = level; + data->pte = *ctx->ptep; + data->level = ctx->level; return 0; } @@ -383,47 +385,47 @@ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte) return prot; } -static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level, - kvm_pte_t *ptep, struct hyp_map_data *data) +static bool hyp_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, + struct hyp_map_data *data) { - kvm_pte_t new, old = *ptep; - u64 granule = kvm_granule_size(level), phys = data->phys; + kvm_pte_t new, old = *ctx->ptep; + u64 granule = kvm_granule_size(ctx->level), phys = data->phys; - if (!kvm_block_mapping_supported(addr, end, phys, level)) + if (!kvm_block_mapping_supported(ctx, phys)) return false; data->phys += granule; - new = kvm_init_valid_leaf_pte(phys, data->attr, level); + new = kvm_init_valid_leaf_pte(phys, data->attr, ctx->level); if (old == new) return true; if (!kvm_pte_valid(old)) - data->mm_ops->get_page(ptep); + data->mm_ops->get_page(ctx->ptep); else if (WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW)) return false; - smp_store_release(ptep, new); + smp_store_release(ctx->ptep, new); return true; } -static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, void * const arg) +static int hyp_map_walker(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { kvm_pte_t *childp; - struct hyp_map_data *data = arg; + struct hyp_map_data *data = ctx->arg; struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; - if (hyp_map_walker_try_leaf(addr, end, level, ptep, arg)) + if (hyp_map_walker_try_leaf(ctx, data)) return 0; - if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1)) + if (WARN_ON(ctx->level == KVM_PGTABLE_MAX_LEVELS - 1)) return -EINVAL; childp = (kvm_pte_t *)mm_ops->zalloc_page(NULL); if (!childp) return -ENOMEM; - kvm_set_table_pte(ptep, childp, mm_ops); - mm_ops->get_page(ptep); + kvm_set_table_pte(ctx->ptep, childp, mm_ops); + mm_ops->get_page(ctx->ptep); return 0; } @@ -456,39 +458,39 @@ struct hyp_unmap_data { struct kvm_pgtable_mm_ops *mm_ops; }; -static int hyp_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, void * const arg) +static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { - kvm_pte_t pte = *ptep, *childp = NULL; - u64 granule = kvm_granule_size(level); - struct hyp_unmap_data *data = arg; + kvm_pte_t pte = *ctx->ptep, *childp = NULL; + u64 granule = kvm_granule_size(ctx->level); + struct hyp_unmap_data *data = ctx->arg; struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; if (!kvm_pte_valid(pte)) return -EINVAL; - if (kvm_pte_table(pte, level)) { + if (kvm_pte_table(pte, ctx->level)) { childp = kvm_pte_follow(pte, mm_ops); if (mm_ops->page_count(childp) != 1) return 0; - kvm_clear_pte(ptep); + kvm_clear_pte(ctx->ptep); dsb(ishst); - __tlbi_level(vae2is, __TLBI_VADDR(addr, 0), level); + __tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), ctx->level); } else { - if (end - addr < granule) + if (ctx->end - ctx->addr < granule) return -EINVAL; - kvm_clear_pte(ptep); + kvm_clear_pte(ctx->ptep); dsb(ishst); - __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level); + __tlbi_level(vale2is, __TLBI_VADDR(ctx->addr, 0), ctx->level); data->unmapped += granule; } dsb(ish); isb(); - mm_ops->put_page(ptep); + mm_ops->put_page(ctx->ptep); if (childp) mm_ops->put_page(childp); @@ -532,18 +534,18 @@ int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits, return 0; } -static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, void * const arg) +static int hyp_free_walker(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { - struct kvm_pgtable_mm_ops *mm_ops = arg; - kvm_pte_t pte = *ptep; + struct kvm_pgtable_mm_ops *mm_ops = ctx->arg; + kvm_pte_t pte = *ctx->ptep; if (!kvm_pte_valid(pte)) return 0; - mm_ops->put_page(ptep); + mm_ops->put_page(ctx->ptep); - if (kvm_pte_table(pte, level)) + if (kvm_pte_table(pte, ctx->level)) mm_ops->put_page(kvm_pte_follow(pte, mm_ops)); return 0; @@ -682,19 +684,19 @@ static bool stage2_pte_is_counted(kvm_pte_t pte) return !!pte; } -static void stage2_put_pte(kvm_pte_t *ptep, struct kvm_s2_mmu *mmu, u64 addr, - u32 level, struct kvm_pgtable_mm_ops *mm_ops) +static void stage2_put_pte(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s2_mmu *mmu, + struct kvm_pgtable_mm_ops *mm_ops) { /* * Clear the existing PTE, and perform break-before-make with * TLB maintenance if it was valid. */ - if (kvm_pte_valid(*ptep)) { - kvm_clear_pte(ptep); - kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, addr, level); + if (kvm_pte_valid(*ctx->ptep)) { + kvm_clear_pte(ctx->ptep); + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level); } - mm_ops->put_page(ptep); + mm_ops->put_page(ctx->ptep); } static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte) @@ -708,29 +710,28 @@ static bool stage2_pte_executable(kvm_pte_t pte) return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN); } -static bool stage2_leaf_mapping_allowed(u64 addr, u64 end, u32 level, +static bool stage2_leaf_mapping_allowed(const struct kvm_pgtable_visit_ctx *ctx, struct stage2_map_data *data) { - if (data->force_pte && (level < (KVM_PGTABLE_MAX_LEVELS - 1))) + if (data->force_pte && (ctx->level < (KVM_PGTABLE_MAX_LEVELS - 1))) return false; - return kvm_block_mapping_supported(addr, end, data->phys, level); + return kvm_block_mapping_supported(ctx, data->phys); } -static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, - kvm_pte_t *ptep, +static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, struct stage2_map_data *data) { - kvm_pte_t new, old = *ptep; - u64 granule = kvm_granule_size(level), phys = data->phys; + kvm_pte_t new, old = *ctx->ptep; + u64 granule = kvm_granule_size(ctx->level), phys = data->phys; struct kvm_pgtable *pgt = data->mmu->pgt; struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; - if (!stage2_leaf_mapping_allowed(addr, end, level, data)) + if (!stage2_leaf_mapping_allowed(ctx, data)) return -E2BIG; if (kvm_phys_is_valid(phys)) - new = kvm_init_valid_leaf_pte(phys, data->attr, level); + new = kvm_init_valid_leaf_pte(phys, data->attr, ctx->level); else new = kvm_init_invalid_leaf_owner(data->owner_id); @@ -744,7 +745,7 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, if (!stage2_pte_needs_update(old, new)) return -EAGAIN; - stage2_put_pte(ptep, data->mmu, addr, level, mm_ops); + stage2_put_pte(ctx, data->mmu, mm_ops); } /* Perform CMOs before installation of the guest stage-2 PTE */ @@ -755,26 +756,25 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, if (mm_ops->icache_inval_pou && stage2_pte_executable(new)) mm_ops->icache_inval_pou(kvm_pte_follow(new, mm_ops), granule); - smp_store_release(ptep, new); + smp_store_release(ctx->ptep, new); if (stage2_pte_is_counted(new)) - mm_ops->get_page(ptep); + mm_ops->get_page(ctx->ptep); if (kvm_phys_is_valid(phys)) data->phys += granule; return 0; } -static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level, - kvm_pte_t *ptep, +static int stage2_map_walk_table_pre(const struct kvm_pgtable_visit_ctx *ctx, struct stage2_map_data *data) { if (data->anchor) return 0; - if (!stage2_leaf_mapping_allowed(addr, end, level, data)) + if (!stage2_leaf_mapping_allowed(ctx, data)) return 0; - data->childp = kvm_pte_follow(*ptep, data->mm_ops); - kvm_clear_pte(ptep); + data->childp = kvm_pte_follow(*ctx->ptep, data->mm_ops); + kvm_clear_pte(ctx->ptep); /* * Invalidate the whole stage-2, as we may have numerous leaf @@ -782,29 +782,29 @@ static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level, * individually. */ kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu); - data->anchor = ptep; + data->anchor = ctx->ptep; return 0; } -static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, +static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx, struct stage2_map_data *data) { struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; - kvm_pte_t *childp, pte = *ptep; + kvm_pte_t *childp, pte = *ctx->ptep; int ret; if (data->anchor) { if (stage2_pte_is_counted(pte)) - mm_ops->put_page(ptep); + mm_ops->put_page(ctx->ptep); return 0; } - ret = stage2_map_walker_try_leaf(addr, end, level, ptep, data); + ret = stage2_map_walker_try_leaf(ctx, data); if (ret != -E2BIG) return ret; - if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1)) + if (WARN_ON(ctx->level == KVM_PGTABLE_MAX_LEVELS - 1)) return -EINVAL; if (!data->memcache) @@ -820,16 +820,15 @@ static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, * will be mapped lazily. */ if (stage2_pte_is_counted(pte)) - stage2_put_pte(ptep, data->mmu, addr, level, mm_ops); + stage2_put_pte(ctx, data->mmu, mm_ops); - kvm_set_table_pte(ptep, childp, mm_ops); - mm_ops->get_page(ptep); + kvm_set_table_pte(ctx->ptep, childp, mm_ops); + mm_ops->get_page(ctx->ptep); return 0; } -static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level, - kvm_pte_t *ptep, +static int stage2_map_walk_table_post(const struct kvm_pgtable_visit_ctx *ctx, struct stage2_map_data *data) { struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; @@ -839,17 +838,17 @@ static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level, if (!data->anchor) return 0; - if (data->anchor == ptep) { + if (data->anchor == ctx->ptep) { childp = data->childp; data->anchor = NULL; data->childp = NULL; - ret = stage2_map_walk_leaf(addr, end, level, ptep, data); + ret = stage2_map_walk_leaf(ctx, data); } else { - childp = kvm_pte_follow(*ptep, mm_ops); + childp = kvm_pte_follow(*ctx->ptep, mm_ops); } mm_ops->put_page(childp); - mm_ops->put_page(ptep); + mm_ops->put_page(ctx->ptep); return ret; } @@ -873,18 +872,18 @@ static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level, * the page-table, installing the block entry when it revisits the anchor * pointer and clearing the anchor to NULL. */ -static int stage2_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, void * const arg) +static int stage2_map_walker(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { - struct stage2_map_data *data = arg; + struct stage2_map_data *data = ctx->arg; - switch (flag) { + switch (visit) { case KVM_PGTABLE_WALK_TABLE_PRE: - return stage2_map_walk_table_pre(addr, end, level, ptep, data); + return stage2_map_walk_table_pre(ctx, data); case KVM_PGTABLE_WALK_LEAF: - return stage2_map_walk_leaf(addr, end, level, ptep, data); + return stage2_map_walk_leaf(ctx, data); case KVM_PGTABLE_WALK_TABLE_POST: - return stage2_map_walk_table_post(addr, end, level, ptep, data); + return stage2_map_walk_table_post(ctx, data); } return -EINVAL; @@ -949,25 +948,24 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size, return ret; } -static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, - void * const arg) +static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { - struct kvm_pgtable *pgt = arg; + struct kvm_pgtable *pgt = ctx->arg; struct kvm_s2_mmu *mmu = pgt->mmu; struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops; - kvm_pte_t pte = *ptep, *childp = NULL; + kvm_pte_t pte = *ctx->ptep, *childp = NULL; bool need_flush = false; if (!kvm_pte_valid(pte)) { if (stage2_pte_is_counted(pte)) { - kvm_clear_pte(ptep); - mm_ops->put_page(ptep); + kvm_clear_pte(ctx->ptep); + mm_ops->put_page(ctx->ptep); } return 0; } - if (kvm_pte_table(pte, level)) { + if (kvm_pte_table(pte, ctx->level)) { childp = kvm_pte_follow(pte, mm_ops); if (mm_ops->page_count(childp) != 1) @@ -981,11 +979,11 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, * block entry and rely on the remaining portions being faulted * back lazily. */ - stage2_put_pte(ptep, mmu, addr, level, mm_ops); + stage2_put_pte(ctx, mmu, mm_ops); if (need_flush && mm_ops->dcache_clean_inval_poc) mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops), - kvm_granule_size(level)); + kvm_granule_size(ctx->level)); if (childp) mm_ops->put_page(childp); @@ -1012,18 +1010,17 @@ struct stage2_attr_data { struct kvm_pgtable_mm_ops *mm_ops; }; -static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, - void * const arg) +static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { - kvm_pte_t pte = *ptep; - struct stage2_attr_data *data = arg; + kvm_pte_t pte = *ctx->ptep; + struct stage2_attr_data *data = ctx->arg; struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; if (!kvm_pte_valid(pte)) return 0; - data->level = level; + data->level = ctx->level; data->pte = pte; pte &= ~data->attr_clr; pte |= data->attr_set; @@ -1039,10 +1036,10 @@ static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, * stage-2 PTE if we are going to add executable permission. */ if (mm_ops->icache_inval_pou && - stage2_pte_executable(pte) && !stage2_pte_executable(*ptep)) + stage2_pte_executable(pte) && !stage2_pte_executable(*ctx->ptep)) mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops), - kvm_granule_size(level)); - WRITE_ONCE(*ptep, pte); + kvm_granule_size(ctx->level)); + WRITE_ONCE(*ctx->ptep, pte); } return 0; @@ -1140,20 +1137,19 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, return ret; } -static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, - void * const arg) +static int stage2_flush_walker(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { - struct kvm_pgtable *pgt = arg; + struct kvm_pgtable *pgt = ctx->arg; struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops; - kvm_pte_t pte = *ptep; + kvm_pte_t pte = *ctx->ptep; if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte)) return 0; if (mm_ops->dcache_clean_inval_poc) mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops), - kvm_granule_size(level)); + kvm_granule_size(ctx->level)); return 0; } @@ -1200,19 +1196,18 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, return 0; } -static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, - void * const arg) +static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { - struct kvm_pgtable_mm_ops *mm_ops = arg; - kvm_pte_t pte = *ptep; + struct kvm_pgtable_mm_ops *mm_ops = ctx->arg; + kvm_pte_t pte = *ctx->ptep; if (!stage2_pte_is_counted(pte)) return 0; - mm_ops->put_page(ptep); + mm_ops->put_page(ctx->ptep); - if (kvm_pte_table(pte, level)) + if (kvm_pte_table(pte, ctx->level)) mm_ops->put_page(kvm_pte_follow(pte, mm_ops)); return 0; -- cgit v1.2.3 From 83844a2317ecad935f6735abd854e4bf3f757040 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 7 Nov 2022 21:56:32 +0000 Subject: KVM: arm64: Stash observed pte value in visitor context Rather than reading the ptep all over the shop, read the ptep once from __kvm_pgtable_visit() and stick it in the visitor context. Reread the ptep after visiting a leaf in case the callback installed a new table underneath. No functional change intended. Signed-off-by: Oliver Upton Reviewed-by: Ben Gardon Reviewed-by: Gavin Shan Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221107215644.1895162-3-oliver.upton@linux.dev --- arch/arm64/include/asm/kvm_pgtable.h | 1 + arch/arm64/kvm/hyp/nvhe/mem_protect.c | 5 +- arch/arm64/kvm/hyp/nvhe/setup.c | 7 ++- arch/arm64/kvm/hyp/pgtable.c | 86 +++++++++++++++++------------------ 4 files changed, 48 insertions(+), 51 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 607f9bb8aab4..14d4b68a1e92 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -201,6 +201,7 @@ enum kvm_pgtable_walk_flags { struct kvm_pgtable_visit_ctx { kvm_pte_t *ptep; + kvm_pte_t old; void *arg; u64 addr; u64 end; diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 8f5b6a36a039..d21d1b08a055 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -421,12 +421,11 @@ static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx, enum kvm_pgtable_walk_flags visit) { struct check_walk_data *d = ctx->arg; - kvm_pte_t pte = *ctx->ptep; - if (kvm_pte_valid(pte) && !addr_is_memory(kvm_pte_to_phys(pte))) + if (kvm_pte_valid(ctx->old) && !addr_is_memory(kvm_pte_to_phys(ctx->old))) return -EINVAL; - return d->get_page_state(pte) == d->desired ? 0 : -EPERM; + return d->get_page_state(ctx->old) == d->desired ? 0 : -EPERM; } static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size, diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index a293cf5eba1b..6af443c9d78e 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -192,10 +192,9 @@ static int finalize_host_mappings_walker(const struct kvm_pgtable_visit_ctx *ctx struct kvm_pgtable_mm_ops *mm_ops = ctx->arg; enum kvm_pgtable_prot prot; enum pkvm_page_state state; - kvm_pte_t pte = *ctx->ptep; phys_addr_t phys; - if (!kvm_pte_valid(pte)) + if (!kvm_pte_valid(ctx->old)) return 0; /* @@ -210,7 +209,7 @@ static int finalize_host_mappings_walker(const struct kvm_pgtable_visit_ctx *ctx if (ctx->level != (KVM_PGTABLE_MAX_LEVELS - 1)) return -EINVAL; - phys = kvm_pte_to_phys(pte); + phys = kvm_pte_to_phys(ctx->old); if (!addr_is_memory(phys)) return -EINVAL; @@ -218,7 +217,7 @@ static int finalize_host_mappings_walker(const struct kvm_pgtable_visit_ctx *ctx * Adjust the host stage-2 mappings to match the ownership attributes * configured in the hypervisor stage-1. */ - state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte)); + state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(ctx->old)); switch (state) { case PKVM_PAGE_OWNED: return host_stage2_set_owner_locked(phys, PAGE_SIZE, pkvm_hyp_id); diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 900c8b9c0cfc..fb3696b3a997 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -189,6 +189,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, enum kvm_pgtable_walk_flags flags = data->walker->flags; struct kvm_pgtable_visit_ctx ctx = { .ptep = ptep, + .old = READ_ONCE(*ptep), .arg = data->walker->arg, .addr = data->addr, .end = data->end, @@ -196,16 +197,16 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, .flags = flags, }; int ret = 0; - kvm_pte_t *childp, pte = *ptep; - bool table = kvm_pte_table(pte, level); + kvm_pte_t *childp; + bool table = kvm_pte_table(ctx.old, level); if (table && (ctx.flags & KVM_PGTABLE_WALK_TABLE_PRE)) ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_TABLE_PRE); if (!table && (ctx.flags & KVM_PGTABLE_WALK_LEAF)) { ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_LEAF); - pte = *ptep; - table = kvm_pte_table(pte, level); + ctx.old = READ_ONCE(*ptep); + table = kvm_pte_table(ctx.old, level); } if (ret) @@ -217,7 +218,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, goto out; } - childp = kvm_pte_follow(pte, data->pgt->mm_ops); + childp = kvm_pte_follow(ctx.old, data->pgt->mm_ops); ret = __kvm_pgtable_walk(data, childp, level + 1); if (ret) goto out; @@ -299,7 +300,7 @@ static int leaf_walker(const struct kvm_pgtable_visit_ctx *ctx, { struct leaf_walk_data *data = ctx->arg; - data->pte = *ctx->ptep; + data->pte = ctx->old; data->level = ctx->level; return 0; @@ -388,7 +389,7 @@ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte) static bool hyp_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, struct hyp_map_data *data) { - kvm_pte_t new, old = *ctx->ptep; + kvm_pte_t new; u64 granule = kvm_granule_size(ctx->level), phys = data->phys; if (!kvm_block_mapping_supported(ctx, phys)) @@ -396,11 +397,11 @@ static bool hyp_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, data->phys += granule; new = kvm_init_valid_leaf_pte(phys, data->attr, ctx->level); - if (old == new) + if (ctx->old == new) return true; - if (!kvm_pte_valid(old)) + if (!kvm_pte_valid(ctx->old)) data->mm_ops->get_page(ctx->ptep); - else if (WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW)) + else if (WARN_ON((ctx->old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW)) return false; smp_store_release(ctx->ptep, new); @@ -461,16 +462,16 @@ struct hyp_unmap_data { static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, enum kvm_pgtable_walk_flags visit) { - kvm_pte_t pte = *ctx->ptep, *childp = NULL; + kvm_pte_t *childp = NULL; u64 granule = kvm_granule_size(ctx->level); struct hyp_unmap_data *data = ctx->arg; struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; - if (!kvm_pte_valid(pte)) + if (!kvm_pte_valid(ctx->old)) return -EINVAL; - if (kvm_pte_table(pte, ctx->level)) { - childp = kvm_pte_follow(pte, mm_ops); + if (kvm_pte_table(ctx->old, ctx->level)) { + childp = kvm_pte_follow(ctx->old, mm_ops); if (mm_ops->page_count(childp) != 1) return 0; @@ -538,15 +539,14 @@ static int hyp_free_walker(const struct kvm_pgtable_visit_ctx *ctx, enum kvm_pgtable_walk_flags visit) { struct kvm_pgtable_mm_ops *mm_ops = ctx->arg; - kvm_pte_t pte = *ctx->ptep; - if (!kvm_pte_valid(pte)) + if (!kvm_pte_valid(ctx->old)) return 0; mm_ops->put_page(ctx->ptep); - if (kvm_pte_table(pte, ctx->level)) - mm_ops->put_page(kvm_pte_follow(pte, mm_ops)); + if (kvm_pte_table(ctx->old, ctx->level)) + mm_ops->put_page(kvm_pte_follow(ctx->old, mm_ops)); return 0; } @@ -691,7 +691,7 @@ static void stage2_put_pte(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s * Clear the existing PTE, and perform break-before-make with * TLB maintenance if it was valid. */ - if (kvm_pte_valid(*ctx->ptep)) { + if (kvm_pte_valid(ctx->old)) { kvm_clear_pte(ctx->ptep); kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level); } @@ -722,7 +722,7 @@ static bool stage2_leaf_mapping_allowed(const struct kvm_pgtable_visit_ctx *ctx, static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, struct stage2_map_data *data) { - kvm_pte_t new, old = *ctx->ptep; + kvm_pte_t new; u64 granule = kvm_granule_size(ctx->level), phys = data->phys; struct kvm_pgtable *pgt = data->mmu->pgt; struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; @@ -735,14 +735,14 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, else new = kvm_init_invalid_leaf_owner(data->owner_id); - if (stage2_pte_is_counted(old)) { + if (stage2_pte_is_counted(ctx->old)) { /* * Skip updating the PTE if we are trying to recreate the exact * same mapping or only change the access permissions. Instead, * the vCPU will exit one more time from guest if still needed * and then go through the path of relaxing permissions. */ - if (!stage2_pte_needs_update(old, new)) + if (!stage2_pte_needs_update(ctx->old, new)) return -EAGAIN; stage2_put_pte(ctx, data->mmu, mm_ops); @@ -773,7 +773,7 @@ static int stage2_map_walk_table_pre(const struct kvm_pgtable_visit_ctx *ctx, if (!stage2_leaf_mapping_allowed(ctx, data)) return 0; - data->childp = kvm_pte_follow(*ctx->ptep, data->mm_ops); + data->childp = kvm_pte_follow(ctx->old, data->mm_ops); kvm_clear_pte(ctx->ptep); /* @@ -790,11 +790,11 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx, struct stage2_map_data *data) { struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; - kvm_pte_t *childp, pte = *ctx->ptep; + kvm_pte_t *childp; int ret; if (data->anchor) { - if (stage2_pte_is_counted(pte)) + if (stage2_pte_is_counted(ctx->old)) mm_ops->put_page(ctx->ptep); return 0; @@ -819,7 +819,7 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx, * a table. Accesses beyond 'end' that fall within the new table * will be mapped lazily. */ - if (stage2_pte_is_counted(pte)) + if (stage2_pte_is_counted(ctx->old)) stage2_put_pte(ctx, data->mmu, mm_ops); kvm_set_table_pte(ctx->ptep, childp, mm_ops); @@ -844,7 +844,7 @@ static int stage2_map_walk_table_post(const struct kvm_pgtable_visit_ctx *ctx, data->childp = NULL; ret = stage2_map_walk_leaf(ctx, data); } else { - childp = kvm_pte_follow(*ctx->ptep, mm_ops); + childp = kvm_pte_follow(ctx->old, mm_ops); } mm_ops->put_page(childp); @@ -954,23 +954,23 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_pgtable *pgt = ctx->arg; struct kvm_s2_mmu *mmu = pgt->mmu; struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops; - kvm_pte_t pte = *ctx->ptep, *childp = NULL; + kvm_pte_t *childp = NULL; bool need_flush = false; - if (!kvm_pte_valid(pte)) { - if (stage2_pte_is_counted(pte)) { + if (!kvm_pte_valid(ctx->old)) { + if (stage2_pte_is_counted(ctx->old)) { kvm_clear_pte(ctx->ptep); mm_ops->put_page(ctx->ptep); } return 0; } - if (kvm_pte_table(pte, ctx->level)) { - childp = kvm_pte_follow(pte, mm_ops); + if (kvm_pte_table(ctx->old, ctx->level)) { + childp = kvm_pte_follow(ctx->old, mm_ops); if (mm_ops->page_count(childp) != 1) return 0; - } else if (stage2_pte_cacheable(pgt, pte)) { + } else if (stage2_pte_cacheable(pgt, ctx->old)) { need_flush = !stage2_has_fwb(pgt); } @@ -982,7 +982,7 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, stage2_put_pte(ctx, mmu, mm_ops); if (need_flush && mm_ops->dcache_clean_inval_poc) - mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops), + mm_ops->dcache_clean_inval_poc(kvm_pte_follow(ctx->old, mm_ops), kvm_granule_size(ctx->level)); if (childp) @@ -1013,11 +1013,11 @@ struct stage2_attr_data { static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx, enum kvm_pgtable_walk_flags visit) { - kvm_pte_t pte = *ctx->ptep; + kvm_pte_t pte = ctx->old; struct stage2_attr_data *data = ctx->arg; struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; - if (!kvm_pte_valid(pte)) + if (!kvm_pte_valid(ctx->old)) return 0; data->level = ctx->level; @@ -1036,7 +1036,7 @@ static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx, * stage-2 PTE if we are going to add executable permission. */ if (mm_ops->icache_inval_pou && - stage2_pte_executable(pte) && !stage2_pte_executable(*ctx->ptep)) + stage2_pte_executable(pte) && !stage2_pte_executable(ctx->old)) mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops), kvm_granule_size(ctx->level)); WRITE_ONCE(*ctx->ptep, pte); @@ -1142,13 +1142,12 @@ static int stage2_flush_walker(const struct kvm_pgtable_visit_ctx *ctx, { struct kvm_pgtable *pgt = ctx->arg; struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops; - kvm_pte_t pte = *ctx->ptep; - if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte)) + if (!kvm_pte_valid(ctx->old) || !stage2_pte_cacheable(pgt, ctx->old)) return 0; if (mm_ops->dcache_clean_inval_poc) - mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops), + mm_ops->dcache_clean_inval_poc(kvm_pte_follow(ctx->old, mm_ops), kvm_granule_size(ctx->level)); return 0; } @@ -1200,15 +1199,14 @@ static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx, enum kvm_pgtable_walk_flags visit) { struct kvm_pgtable_mm_ops *mm_ops = ctx->arg; - kvm_pte_t pte = *ctx->ptep; - if (!stage2_pte_is_counted(pte)) + if (!stage2_pte_is_counted(ctx->old)) return 0; mm_ops->put_page(ctx->ptep); - if (kvm_pte_table(pte, ctx->level)) - mm_ops->put_page(kvm_pte_follow(pte, mm_ops)); + if (kvm_pte_table(ctx->old, ctx->level)) + mm_ops->put_page(kvm_pte_follow(ctx->old, mm_ops)); return 0; } -- cgit v1.2.3 From 2a611c7f87f26cca405da63a57f06d0e4dc14240 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 7 Nov 2022 21:56:33 +0000 Subject: KVM: arm64: Pass mm_ops through the visitor context As a prerequisite for getting visitors off of struct kvm_pgtable, pass mm_ops through the visitor context. No functional change intended. Signed-off-by: Oliver Upton Reviewed-by: Ben Gardon Reviewed-by: Gavin Shan Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221107215644.1895162-4-oliver.upton@linux.dev --- arch/arm64/include/asm/kvm_pgtable.h | 1 + arch/arm64/kvm/hyp/nvhe/setup.c | 3 +- arch/arm64/kvm/hyp/pgtable.c | 63 ++++++++++++++---------------------- 3 files changed, 26 insertions(+), 41 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 14d4b68a1e92..a752793482cb 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -203,6 +203,7 @@ struct kvm_pgtable_visit_ctx { kvm_pte_t *ptep; kvm_pte_t old; void *arg; + struct kvm_pgtable_mm_ops *mm_ops; u64 addr; u64 end; u32 level; diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 6af443c9d78e..1068338d77f3 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -189,7 +189,7 @@ static void hpool_put_page(void *addr) static int finalize_host_mappings_walker(const struct kvm_pgtable_visit_ctx *ctx, enum kvm_pgtable_walk_flags visit) { - struct kvm_pgtable_mm_ops *mm_ops = ctx->arg; + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; enum kvm_pgtable_prot prot; enum pkvm_page_state state; phys_addr_t phys; @@ -239,7 +239,6 @@ static int finalize_host_mappings(void) struct kvm_pgtable_walker walker = { .cb = finalize_host_mappings_walker, .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, - .arg = pkvm_pgtable.mm_ops, }; int i, ret; diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index fb3696b3a997..db25e81a9890 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -181,9 +181,10 @@ static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, } static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, - kvm_pte_t *pgtable, u32 level); + struct kvm_pgtable_mm_ops *mm_ops, kvm_pte_t *pgtable, u32 level); static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, + struct kvm_pgtable_mm_ops *mm_ops, kvm_pte_t *ptep, u32 level) { enum kvm_pgtable_walk_flags flags = data->walker->flags; @@ -191,6 +192,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, .ptep = ptep, .old = READ_ONCE(*ptep), .arg = data->walker->arg, + .mm_ops = mm_ops, .addr = data->addr, .end = data->end, .level = level, @@ -218,8 +220,8 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, goto out; } - childp = kvm_pte_follow(ctx.old, data->pgt->mm_ops); - ret = __kvm_pgtable_walk(data, childp, level + 1); + childp = kvm_pte_follow(ctx.old, mm_ops); + ret = __kvm_pgtable_walk(data, mm_ops, childp, level + 1); if (ret) goto out; @@ -231,7 +233,7 @@ out: } static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, - kvm_pte_t *pgtable, u32 level) + struct kvm_pgtable_mm_ops *mm_ops, kvm_pte_t *pgtable, u32 level) { u32 idx; int ret = 0; @@ -245,7 +247,7 @@ static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, if (data->addr >= data->end) break; - ret = __kvm_pgtable_visit(data, ptep, level); + ret = __kvm_pgtable_visit(data, mm_ops, ptep, level); if (ret) break; } @@ -269,7 +271,7 @@ static int _kvm_pgtable_walk(struct kvm_pgtable_walk_data *data) for (idx = kvm_pgd_page_idx(data); data->addr < data->end; ++idx) { kvm_pte_t *ptep = &pgt->pgd[idx * PTRS_PER_PTE]; - ret = __kvm_pgtable_walk(data, ptep, pgt->start_level); + ret = __kvm_pgtable_walk(data, pgt->mm_ops, ptep, pgt->start_level); if (ret) break; } @@ -332,7 +334,6 @@ int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr, struct hyp_map_data { u64 phys; kvm_pte_t attr; - struct kvm_pgtable_mm_ops *mm_ops; }; static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep) @@ -400,7 +401,7 @@ static bool hyp_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, if (ctx->old == new) return true; if (!kvm_pte_valid(ctx->old)) - data->mm_ops->get_page(ctx->ptep); + ctx->mm_ops->get_page(ctx->ptep); else if (WARN_ON((ctx->old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW)) return false; @@ -413,7 +414,7 @@ static int hyp_map_walker(const struct kvm_pgtable_visit_ctx *ctx, { kvm_pte_t *childp; struct hyp_map_data *data = ctx->arg; - struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; if (hyp_map_walker_try_leaf(ctx, data)) return 0; @@ -436,7 +437,6 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, int ret; struct hyp_map_data map_data = { .phys = ALIGN_DOWN(phys, PAGE_SIZE), - .mm_ops = pgt->mm_ops, }; struct kvm_pgtable_walker walker = { .cb = hyp_map_walker, @@ -454,18 +454,13 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, return ret; } -struct hyp_unmap_data { - u64 unmapped; - struct kvm_pgtable_mm_ops *mm_ops; -}; - static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, enum kvm_pgtable_walk_flags visit) { kvm_pte_t *childp = NULL; u64 granule = kvm_granule_size(ctx->level); - struct hyp_unmap_data *data = ctx->arg; - struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; + u64 *unmapped = ctx->arg; + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; if (!kvm_pte_valid(ctx->old)) return -EINVAL; @@ -486,7 +481,7 @@ static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, kvm_clear_pte(ctx->ptep); dsb(ishst); __tlbi_level(vale2is, __TLBI_VADDR(ctx->addr, 0), ctx->level); - data->unmapped += granule; + *unmapped += granule; } dsb(ish); @@ -501,12 +496,10 @@ static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) { - struct hyp_unmap_data unmap_data = { - .mm_ops = pgt->mm_ops, - }; + u64 unmapped = 0; struct kvm_pgtable_walker walker = { .cb = hyp_unmap_walker, - .arg = &unmap_data, + .arg = &unmapped, .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, }; @@ -514,7 +507,7 @@ u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) return 0; kvm_pgtable_walk(pgt, addr, size, &walker); - return unmap_data.unmapped; + return unmapped; } int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits, @@ -538,7 +531,7 @@ int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits, static int hyp_free_walker(const struct kvm_pgtable_visit_ctx *ctx, enum kvm_pgtable_walk_flags visit) { - struct kvm_pgtable_mm_ops *mm_ops = ctx->arg; + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; if (!kvm_pte_valid(ctx->old)) return 0; @@ -556,7 +549,6 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt) struct kvm_pgtable_walker walker = { .cb = hyp_free_walker, .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, - .arg = pgt->mm_ops, }; WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); @@ -575,8 +567,6 @@ struct stage2_map_data { struct kvm_s2_mmu *mmu; void *memcache; - struct kvm_pgtable_mm_ops *mm_ops; - /* Force mappings to page granularity */ bool force_pte; }; @@ -725,7 +715,7 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t new; u64 granule = kvm_granule_size(ctx->level), phys = data->phys; struct kvm_pgtable *pgt = data->mmu->pgt; - struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; if (!stage2_leaf_mapping_allowed(ctx, data)) return -E2BIG; @@ -773,7 +763,7 @@ static int stage2_map_walk_table_pre(const struct kvm_pgtable_visit_ctx *ctx, if (!stage2_leaf_mapping_allowed(ctx, data)) return 0; - data->childp = kvm_pte_follow(ctx->old, data->mm_ops); + data->childp = kvm_pte_follow(ctx->old, ctx->mm_ops); kvm_clear_pte(ctx->ptep); /* @@ -789,7 +779,7 @@ static int stage2_map_walk_table_pre(const struct kvm_pgtable_visit_ctx *ctx, static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx, struct stage2_map_data *data) { - struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; kvm_pte_t *childp; int ret; @@ -831,7 +821,7 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx, static int stage2_map_walk_table_post(const struct kvm_pgtable_visit_ctx *ctx, struct stage2_map_data *data) { - struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; kvm_pte_t *childp; int ret = 0; @@ -898,7 +888,6 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, .phys = ALIGN_DOWN(phys, PAGE_SIZE), .mmu = pgt->mmu, .memcache = mc, - .mm_ops = pgt->mm_ops, .force_pte = pgt->force_pte_cb && pgt->force_pte_cb(addr, addr + size, prot), }; struct kvm_pgtable_walker walker = { @@ -929,7 +918,6 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size, .phys = KVM_PHYS_INVALID, .mmu = pgt->mmu, .memcache = mc, - .mm_ops = pgt->mm_ops, .owner_id = owner_id, .force_pte = true, }; @@ -953,7 +941,7 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, { struct kvm_pgtable *pgt = ctx->arg; struct kvm_s2_mmu *mmu = pgt->mmu; - struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops; + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; kvm_pte_t *childp = NULL; bool need_flush = false; @@ -1007,7 +995,6 @@ struct stage2_attr_data { kvm_pte_t attr_clr; kvm_pte_t pte; u32 level; - struct kvm_pgtable_mm_ops *mm_ops; }; static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx, @@ -1015,7 +1002,7 @@ static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx, { kvm_pte_t pte = ctx->old; struct stage2_attr_data *data = ctx->arg; - struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; if (!kvm_pte_valid(ctx->old)) return 0; @@ -1055,7 +1042,6 @@ static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr, struct stage2_attr_data data = { .attr_set = attr_set & attr_mask, .attr_clr = attr_clr & attr_mask, - .mm_ops = pgt->mm_ops, }; struct kvm_pgtable_walker walker = { .cb = stage2_attr_walker, @@ -1198,7 +1184,7 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx, enum kvm_pgtable_walk_flags visit) { - struct kvm_pgtable_mm_ops *mm_ops = ctx->arg; + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; if (!stage2_pte_is_counted(ctx->old)) return 0; @@ -1218,7 +1204,6 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) .cb = stage2_free_walker, .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, - .arg = pgt->mm_ops, }; WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); -- cgit v1.2.3 From fa002e8e79b3f980455ba585c1f47b26680de5b9 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 7 Nov 2022 21:56:34 +0000 Subject: KVM: arm64: Don't pass kvm_pgtable through kvm_pgtable_walk_data In order to tear down page tables from outside the context of kvm_pgtable (such as an RCU callback), stop passing a pointer through kvm_pgtable_walk_data. No functional change intended. Signed-off-by: Oliver Upton Reviewed-by: Ben Gardon Reviewed-by: Gavin Shan Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221107215644.1895162-5-oliver.upton@linux.dev --- arch/arm64/kvm/hyp/pgtable.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index db25e81a9890..93989b750a26 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -50,7 +50,6 @@ #define KVM_MAX_OWNER_ID 1 struct kvm_pgtable_walk_data { - struct kvm_pgtable *pgt; struct kvm_pgtable_walker *walker; u64 addr; @@ -88,7 +87,7 @@ static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level) return (data->addr >> shift) & mask; } -static u32 __kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr) +static u32 kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr) { u64 shift = kvm_granule_shift(pgt->start_level - 1); /* May underflow */ u64 mask = BIT(pgt->ia_bits) - 1; @@ -96,11 +95,6 @@ static u32 __kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr) return (addr & mask) >> shift; } -static u32 kvm_pgd_page_idx(struct kvm_pgtable_walk_data *data) -{ - return __kvm_pgd_page_idx(data->pgt, data->addr); -} - static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level) { struct kvm_pgtable pgt = { @@ -108,7 +102,7 @@ static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level) .start_level = start_level, }; - return __kvm_pgd_page_idx(&pgt, -1ULL) + 1; + return kvm_pgd_page_idx(&pgt, -1ULL) + 1; } static bool kvm_pte_table(kvm_pte_t pte, u32 level) @@ -255,11 +249,10 @@ static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, return ret; } -static int _kvm_pgtable_walk(struct kvm_pgtable_walk_data *data) +static int _kvm_pgtable_walk(struct kvm_pgtable *pgt, struct kvm_pgtable_walk_data *data) { u32 idx; int ret = 0; - struct kvm_pgtable *pgt = data->pgt; u64 limit = BIT(pgt->ia_bits); if (data->addr > limit || data->end > limit) @@ -268,7 +261,7 @@ static int _kvm_pgtable_walk(struct kvm_pgtable_walk_data *data) if (!pgt->pgd) return -EINVAL; - for (idx = kvm_pgd_page_idx(data); data->addr < data->end; ++idx) { + for (idx = kvm_pgd_page_idx(pgt, data->addr); data->addr < data->end; ++idx) { kvm_pte_t *ptep = &pgt->pgd[idx * PTRS_PER_PTE]; ret = __kvm_pgtable_walk(data, pgt->mm_ops, ptep, pgt->start_level); @@ -283,13 +276,12 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, struct kvm_pgtable_walker *walker) { struct kvm_pgtable_walk_data walk_data = { - .pgt = pgt, .addr = ALIGN_DOWN(addr, PAGE_SIZE), .end = PAGE_ALIGN(walk_data.addr + size), .walker = walker, }; - return _kvm_pgtable_walk(&walk_data); + return _kvm_pgtable_walk(pgt, &walk_data); } struct leaf_walk_data { -- cgit v1.2.3 From 8e94e1252cc054bb31fd3e9a15235cd831970ec1 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 7 Nov 2022 21:56:35 +0000 Subject: KVM: arm64: Add a helper to tear down unlinked stage-2 subtrees A subsequent change to KVM will move the tear down of an unlinked stage-2 subtree out of the critical path of the break-before-make sequence. Introduce a new helper for tearing down unlinked stage-2 subtrees. Leverage the existing stage-2 free walkers to do so, with a deep call into __kvm_pgtable_walk() as the subtree is no longer reachable from the root. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221107215644.1895162-6-oliver.upton@linux.dev --- arch/arm64/include/asm/kvm_pgtable.h | 11 +++++++++++ arch/arm64/kvm/hyp/pgtable.c | 23 +++++++++++++++++++++++ 2 files changed, 34 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index a752793482cb..93b1feeaebab 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -333,6 +333,17 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, */ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); +/** + * kvm_pgtable_stage2_free_removed() - Free a removed stage-2 paging structure. + * @mm_ops: Memory management callbacks. + * @pgtable: Unlinked stage-2 paging structure to be freed. + * @level: Level of the stage-2 paging structure to be freed. + * + * The page-table is assumed to be unreachable by any hardware walkers prior to + * freeing and therefore no TLB invalidation is performed. + */ +void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level); + /** * kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table. * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 93989b750a26..363a5cce7e1a 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -1203,3 +1203,26 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) pgt->mm_ops->free_pages_exact(pgt->pgd, pgd_sz); pgt->pgd = NULL; } + +void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level) +{ + kvm_pte_t *ptep = (kvm_pte_t *)pgtable; + struct kvm_pgtable_walker walker = { + .cb = stage2_free_walker, + .flags = KVM_PGTABLE_WALK_LEAF | + KVM_PGTABLE_WALK_TABLE_POST, + }; + struct kvm_pgtable_walk_data data = { + .walker = &walker, + + /* + * At this point the IPA really doesn't matter, as the page + * table being traversed has already been removed from the stage + * 2. Set an appropriate range to cover the entire page table. + */ + .addr = 0, + .end = kvm_granule_size(level), + }; + + WARN_ON(__kvm_pgtable_walk(&data, mm_ops, ptep, level)); +} -- cgit v1.2.3 From 6b91b8f95cadd3441c056182daf9024475ac4a91 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 7 Nov 2022 21:56:36 +0000 Subject: KVM: arm64: Use an opaque type for pteps Use an opaque type for pteps and require visitors explicitly dereference the pointer before using. Protecting page table memory with RCU requires that KVM dereferences RCU-annotated pointers before using. However, RCU is not available for use in the nVHE hypervisor and the opaque type can be conditionally annotated with RCU for the stage-2 MMU. Call the type a 'pteref' to avoid a naming collision with raw pteps. No functional change intended. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221107215644.1895162-7-oliver.upton@linux.dev --- arch/arm64/include/asm/kvm_pgtable.h | 9 ++++++++- arch/arm64/kvm/hyp/pgtable.c | 27 ++++++++++++++------------- arch/arm64/kvm/mmu.c | 2 +- 3 files changed, 23 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 93b1feeaebab..cbd2851eefc1 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -37,6 +37,13 @@ static inline u64 kvm_get_parange(u64 mmfr0) typedef u64 kvm_pte_t; +typedef kvm_pte_t *kvm_pteref_t; + +static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared) +{ + return pteref; +} + #define KVM_PTE_VALID BIT(0) #define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT) @@ -175,7 +182,7 @@ typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end, struct kvm_pgtable { u32 ia_bits; u32 start_level; - kvm_pte_t *pgd; + kvm_pteref_t pgd; struct kvm_pgtable_mm_ops *mm_ops; /* Stage-2 only */ diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 363a5cce7e1a..7511494537e5 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -175,13 +175,14 @@ static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, } static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, - struct kvm_pgtable_mm_ops *mm_ops, kvm_pte_t *pgtable, u32 level); + struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, u32 level); static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, struct kvm_pgtable_mm_ops *mm_ops, - kvm_pte_t *ptep, u32 level) + kvm_pteref_t pteref, u32 level) { enum kvm_pgtable_walk_flags flags = data->walker->flags; + kvm_pte_t *ptep = kvm_dereference_pteref(pteref, false); struct kvm_pgtable_visit_ctx ctx = { .ptep = ptep, .old = READ_ONCE(*ptep), @@ -193,7 +194,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, .flags = flags, }; int ret = 0; - kvm_pte_t *childp; + kvm_pteref_t childp; bool table = kvm_pte_table(ctx.old, level); if (table && (ctx.flags & KVM_PGTABLE_WALK_TABLE_PRE)) @@ -214,7 +215,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, goto out; } - childp = kvm_pte_follow(ctx.old, mm_ops); + childp = (kvm_pteref_t)kvm_pte_follow(ctx.old, mm_ops); ret = __kvm_pgtable_walk(data, mm_ops, childp, level + 1); if (ret) goto out; @@ -227,7 +228,7 @@ out: } static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, - struct kvm_pgtable_mm_ops *mm_ops, kvm_pte_t *pgtable, u32 level) + struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, u32 level) { u32 idx; int ret = 0; @@ -236,12 +237,12 @@ static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, return -EINVAL; for (idx = kvm_pgtable_idx(data, level); idx < PTRS_PER_PTE; ++idx) { - kvm_pte_t *ptep = &pgtable[idx]; + kvm_pteref_t pteref = &pgtable[idx]; if (data->addr >= data->end) break; - ret = __kvm_pgtable_visit(data, mm_ops, ptep, level); + ret = __kvm_pgtable_visit(data, mm_ops, pteref, level); if (ret) break; } @@ -262,9 +263,9 @@ static int _kvm_pgtable_walk(struct kvm_pgtable *pgt, struct kvm_pgtable_walk_da return -EINVAL; for (idx = kvm_pgd_page_idx(pgt, data->addr); data->addr < data->end; ++idx) { - kvm_pte_t *ptep = &pgt->pgd[idx * PTRS_PER_PTE]; + kvm_pteref_t pteref = &pgt->pgd[idx * PTRS_PER_PTE]; - ret = __kvm_pgtable_walk(data, pgt->mm_ops, ptep, pgt->start_level); + ret = __kvm_pgtable_walk(data, pgt->mm_ops, pteref, pgt->start_level); if (ret) break; } @@ -507,7 +508,7 @@ int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits, { u64 levels = ARM64_HW_PGTABLE_LEVELS(va_bits); - pgt->pgd = (kvm_pte_t *)mm_ops->zalloc_page(NULL); + pgt->pgd = (kvm_pteref_t)mm_ops->zalloc_page(NULL); if (!pgt->pgd) return -ENOMEM; @@ -544,7 +545,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt) }; WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); - pgt->mm_ops->put_page(pgt->pgd); + pgt->mm_ops->put_page(kvm_dereference_pteref(pgt->pgd, false)); pgt->pgd = NULL; } @@ -1157,7 +1158,7 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE; - pgt->pgd = mm_ops->zalloc_pages_exact(pgd_sz); + pgt->pgd = (kvm_pteref_t)mm_ops->zalloc_pages_exact(pgd_sz); if (!pgt->pgd) return -ENOMEM; @@ -1200,7 +1201,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE; - pgt->mm_ops->free_pages_exact(pgt->pgd, pgd_sz); + pgt->mm_ops->free_pages_exact(kvm_dereference_pteref(pgt->pgd, false), pgd_sz); pgt->pgd = NULL; } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 60ee3d9f01f8..5e197ae190ef 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -640,7 +640,7 @@ static struct kvm_pgtable_mm_ops kvm_user_mm_ops = { static int get_user_mapping_size(struct kvm *kvm, u64 addr) { struct kvm_pgtable pgt = { - .pgd = (kvm_pte_t *)kvm->mm->pgd, + .pgd = (kvm_pteref_t)kvm->mm->pgd, .ia_bits = VA_BITS, .start_level = (KVM_PGTABLE_MAX_LEVELS - CONFIG_PGTABLE_LEVELS), -- cgit v1.2.3 From 5c359cca1faf6d7671537fe1c240e8668467864d Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 7 Nov 2022 21:56:37 +0000 Subject: KVM: arm64: Tear down unlinked stage-2 subtree after break-before-make The break-before-make sequence is a bit annoying as it opens a window wherein memory is unmapped from the guest. KVM should replace the PTE as quickly as possible and avoid unnecessary work in between. Presently, the stage-2 map walker tears down a removed table before installing a block mapping when coalescing a table into a block. As the removed table is no longer visible to hardware walkers after the DSB+TLBI, it is possible to move the remaining cleanup to happen after installing the new PTE. Reshuffle the stage-2 map walker to install the new block entry in the pre-order callback. Unwire all of the teardown logic and replace it with a call to kvm_pgtable_stage2_free_removed() after fixing the PTE. The post-order visitor is now completely unnecessary, so drop it. Finally, touch up the comments to better represent the now simplified map walker. Note that the call to tear down the unlinked stage-2 is indirected as a subsequent change will use an RCU callback to trigger tear down. RCU is not available to pKVM, so there is a need to use different implementations on pKVM and non-pKVM VMs. Signed-off-by: Oliver Upton Reviewed-by: Ben Gardon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221107215644.1895162-8-oliver.upton@linux.dev --- arch/arm64/include/asm/kvm_pgtable.h | 3 ++ arch/arm64/kvm/hyp/nvhe/mem_protect.c | 6 +++ arch/arm64/kvm/hyp/pgtable.c | 85 +++++++++-------------------------- arch/arm64/kvm/mmu.c | 8 ++++ 4 files changed, 39 insertions(+), 63 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index cbd2851eefc1..e70cf57b719e 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -92,6 +92,8 @@ static inline bool kvm_level_supports_block_mapping(u32 level) * allocation is physically contiguous. * @free_pages_exact: Free an exact number of memory pages previously * allocated by zalloc_pages_exact. + * @free_removed_table: Free a removed paging structure by unlinking and + * dropping references. * @get_page: Increment the refcount on a page. * @put_page: Decrement the refcount on a page. When the * refcount reaches 0 the page is automatically @@ -110,6 +112,7 @@ struct kvm_pgtable_mm_ops { void* (*zalloc_page)(void *arg); void* (*zalloc_pages_exact)(size_t size); void (*free_pages_exact)(void *addr, size_t size); + void (*free_removed_table)(void *addr, u32 level); void (*get_page)(void *addr); void (*put_page)(void *addr); int (*page_count)(void *addr); diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index d21d1b08a055..735769886b55 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -79,6 +79,11 @@ static void host_s2_put_page(void *addr) hyp_put_page(&host_s2_pool, addr); } +static void host_s2_free_removed_table(void *addr, u32 level) +{ + kvm_pgtable_stage2_free_removed(&host_kvm.mm_ops, addr, level); +} + static int prepare_s2_pool(void *pgt_pool_base) { unsigned long nr_pages, pfn; @@ -93,6 +98,7 @@ static int prepare_s2_pool(void *pgt_pool_base) host_kvm.mm_ops = (struct kvm_pgtable_mm_ops) { .zalloc_pages_exact = host_s2_zalloc_pages_exact, .zalloc_page = host_s2_zalloc_page, + .free_removed_table = host_s2_free_removed_table, .phys_to_virt = hyp_phys_to_virt, .virt_to_phys = hyp_virt_to_phys, .page_count = hyp_page_count, diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 7511494537e5..7c9782347570 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -750,13 +750,13 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, static int stage2_map_walk_table_pre(const struct kvm_pgtable_visit_ctx *ctx, struct stage2_map_data *data) { - if (data->anchor) - return 0; + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; + kvm_pte_t *childp = kvm_pte_follow(ctx->old, mm_ops); + int ret; if (!stage2_leaf_mapping_allowed(ctx, data)) return 0; - data->childp = kvm_pte_follow(ctx->old, ctx->mm_ops); kvm_clear_pte(ctx->ptep); /* @@ -765,8 +765,13 @@ static int stage2_map_walk_table_pre(const struct kvm_pgtable_visit_ctx *ctx, * individually. */ kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu); - data->anchor = ctx->ptep; - return 0; + + ret = stage2_map_walker_try_leaf(ctx, data); + + mm_ops->put_page(ctx->ptep); + mm_ops->free_removed_table(childp, ctx->level); + + return ret; } static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx, @@ -776,13 +781,6 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t *childp; int ret; - if (data->anchor) { - if (stage2_pte_is_counted(ctx->old)) - mm_ops->put_page(ctx->ptep); - - return 0; - } - ret = stage2_map_walker_try_leaf(ctx, data); if (ret != -E2BIG) return ret; @@ -811,49 +809,14 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx, return 0; } -static int stage2_map_walk_table_post(const struct kvm_pgtable_visit_ctx *ctx, - struct stage2_map_data *data) -{ - struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; - kvm_pte_t *childp; - int ret = 0; - - if (!data->anchor) - return 0; - - if (data->anchor == ctx->ptep) { - childp = data->childp; - data->anchor = NULL; - data->childp = NULL; - ret = stage2_map_walk_leaf(ctx, data); - } else { - childp = kvm_pte_follow(ctx->old, mm_ops); - } - - mm_ops->put_page(childp); - mm_ops->put_page(ctx->ptep); - - return ret; -} - /* - * This is a little fiddly, as we use all three of the walk flags. The idea - * is that the TABLE_PRE callback runs for table entries on the way down, - * looking for table entries which we could conceivably replace with a - * block entry for this mapping. If it finds one, then it sets the 'anchor' - * field in 'struct stage2_map_data' to point at the table entry, before - * clearing the entry to zero and descending into the now detached table. - * - * The behaviour of the LEAF callback then depends on whether or not the - * anchor has been set. If not, then we're not using a block mapping higher - * up the table and we perform the mapping at the existing leaves instead. - * If, on the other hand, the anchor _is_ set, then we drop references to - * all valid leaves so that the pages beneath the anchor can be freed. + * The TABLE_PRE callback runs for table entries on the way down, looking + * for table entries which we could conceivably replace with a block entry + * for this mapping. If it finds one it replaces the entry and calls + * kvm_pgtable_mm_ops::free_removed_table() to tear down the detached table. * - * Finally, the TABLE_POST callback does nothing if the anchor has not - * been set, but otherwise frees the page-table pages while walking back up - * the page-table, installing the block entry when it revisits the anchor - * pointer and clearing the anchor to NULL. + * Otherwise, the LEAF callback performs the mapping at the existing leaves + * instead. */ static int stage2_map_walker(const struct kvm_pgtable_visit_ctx *ctx, enum kvm_pgtable_walk_flags visit) @@ -865,11 +828,9 @@ static int stage2_map_walker(const struct kvm_pgtable_visit_ctx *ctx, return stage2_map_walk_table_pre(ctx, data); case KVM_PGTABLE_WALK_LEAF: return stage2_map_walk_leaf(ctx, data); - case KVM_PGTABLE_WALK_TABLE_POST: - return stage2_map_walk_table_post(ctx, data); + default: + return -EINVAL; } - - return -EINVAL; } int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, @@ -886,8 +847,7 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, struct kvm_pgtable_walker walker = { .cb = stage2_map_walker, .flags = KVM_PGTABLE_WALK_TABLE_PRE | - KVM_PGTABLE_WALK_LEAF | - KVM_PGTABLE_WALK_TABLE_POST, + KVM_PGTABLE_WALK_LEAF, .arg = &map_data, }; @@ -917,8 +877,7 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size, struct kvm_pgtable_walker walker = { .cb = stage2_map_walker, .flags = KVM_PGTABLE_WALK_TABLE_PRE | - KVM_PGTABLE_WALK_LEAF | - KVM_PGTABLE_WALK_TABLE_POST, + KVM_PGTABLE_WALK_LEAF, .arg = &map_data, }; @@ -1207,7 +1166,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level) { - kvm_pte_t *ptep = (kvm_pte_t *)pgtable; + kvm_pteref_t ptep = (kvm_pteref_t)pgtable; struct kvm_pgtable_walker walker = { .cb = stage2_free_walker, .flags = KVM_PGTABLE_WALK_LEAF | @@ -1225,5 +1184,5 @@ void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pg .end = kvm_granule_size(level), }; - WARN_ON(__kvm_pgtable_walk(&data, mm_ops, ptep, level)); + WARN_ON(__kvm_pgtable_walk(&data, mm_ops, ptep, level + 1)); } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 5e197ae190ef..73ae908eb5d9 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -128,6 +128,13 @@ static void kvm_s2_free_pages_exact(void *virt, size_t size) free_pages_exact(virt, size); } +static struct kvm_pgtable_mm_ops kvm_s2_mm_ops; + +static void stage2_free_removed_table(void *addr, u32 level) +{ + kvm_pgtable_stage2_free_removed(&kvm_s2_mm_ops, addr, level); +} + static void kvm_host_get_page(void *addr) { get_page(virt_to_page(addr)); @@ -662,6 +669,7 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = { .zalloc_page = stage2_memcache_zalloc_page, .zalloc_pages_exact = kvm_s2_zalloc_pages_exact, .free_pages_exact = kvm_s2_free_pages_exact, + .free_removed_table = stage2_free_removed_table, .get_page = kvm_host_get_page, .put_page = kvm_s2_put_page, .page_count = kvm_host_page_count, -- cgit v1.2.3 From c3119ae45dfb6038ca458ab5ba7a9fba2810845b Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 7 Nov 2022 21:56:38 +0000 Subject: KVM: arm64: Protect stage-2 traversal with RCU Use RCU to safely walk the stage-2 page tables in parallel. Acquire and release the RCU read lock when traversing the page tables. Defer the freeing of table memory to an RCU callback. Indirect the calls into RCU and provide stubs for hypervisor code, as RCU is not available in such a context. The RCU protection doesn't amount to much at the moment, as readers are already protected by the read-write lock (all walkers that free table memory take the write lock). Nonetheless, a subsequent change will futher relax the locking requirements around the stage-2 MMU, thereby depending on RCU. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221107215644.1895162-9-oliver.upton@linux.dev --- arch/arm64/include/asm/kvm_pgtable.h | 49 ++++++++++++++++++++++++++++++++++++ arch/arm64/kvm/hyp/pgtable.c | 10 +++++++- arch/arm64/kvm/mmu.c | 14 ++++++++++- 3 files changed, 71 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index e70cf57b719e..7634b6964779 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -37,6 +37,13 @@ static inline u64 kvm_get_parange(u64 mmfr0) typedef u64 kvm_pte_t; +/* + * RCU cannot be used in a non-kernel context such as the hyp. As such, page + * table walkers used in hyp do not call into RCU and instead use other + * synchronization mechanisms (such as a spinlock). + */ +#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__) + typedef kvm_pte_t *kvm_pteref_t; static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared) @@ -44,6 +51,40 @@ static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared return pteref; } +static inline void kvm_pgtable_walk_begin(void) {} +static inline void kvm_pgtable_walk_end(void) {} + +static inline bool kvm_pgtable_walk_lock_held(void) +{ + return true; +} + +#else + +typedef kvm_pte_t __rcu *kvm_pteref_t; + +static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared) +{ + return rcu_dereference_check(pteref, !shared); +} + +static inline void kvm_pgtable_walk_begin(void) +{ + rcu_read_lock(); +} + +static inline void kvm_pgtable_walk_end(void) +{ + rcu_read_unlock(); +} + +static inline bool kvm_pgtable_walk_lock_held(void) +{ + return rcu_read_lock_held(); +} + +#endif + #define KVM_PTE_VALID BIT(0) #define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT) @@ -202,11 +243,14 @@ struct kvm_pgtable { * children. * @KVM_PGTABLE_WALK_TABLE_POST: Visit table entries after their * children. + * @KVM_PGTABLE_WALK_SHARED: Indicates the page-tables may be shared + * with other software walkers. */ enum kvm_pgtable_walk_flags { KVM_PGTABLE_WALK_LEAF = BIT(0), KVM_PGTABLE_WALK_TABLE_PRE = BIT(1), KVM_PGTABLE_WALK_TABLE_POST = BIT(2), + KVM_PGTABLE_WALK_SHARED = BIT(3), }; struct kvm_pgtable_visit_ctx { @@ -223,6 +267,11 @@ struct kvm_pgtable_visit_ctx { typedef int (*kvm_pgtable_visitor_fn_t)(const struct kvm_pgtable_visit_ctx *ctx, enum kvm_pgtable_walk_flags visit); +static inline bool kvm_pgtable_walk_shared(const struct kvm_pgtable_visit_ctx *ctx) +{ + return ctx->flags & KVM_PGTABLE_WALK_SHARED; +} + /** * struct kvm_pgtable_walker - Hook into a page-table walk. * @cb: Callback function to invoke during the walk. diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 7c9782347570..d8d963521d4e 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -171,6 +171,9 @@ static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, enum kvm_pgtable_walk_flags visit) { struct kvm_pgtable_walker *walker = data->walker; + + /* Ensure the appropriate lock is held (e.g. RCU lock for stage-2 MMU) */ + WARN_ON_ONCE(kvm_pgtable_walk_shared(ctx) && !kvm_pgtable_walk_lock_held()); return walker->cb(ctx, visit); } @@ -281,8 +284,13 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, .end = PAGE_ALIGN(walk_data.addr + size), .walker = walker, }; + int r; + + kvm_pgtable_walk_begin(); + r = _kvm_pgtable_walk(pgt, &walk_data); + kvm_pgtable_walk_end(); - return _kvm_pgtable_walk(pgt, &walk_data); + return r; } struct leaf_walk_data { diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 73ae908eb5d9..52e042399ba5 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -130,9 +130,21 @@ static void kvm_s2_free_pages_exact(void *virt, size_t size) static struct kvm_pgtable_mm_ops kvm_s2_mm_ops; +static void stage2_free_removed_table_rcu_cb(struct rcu_head *head) +{ + struct page *page = container_of(head, struct page, rcu_head); + void *pgtable = page_to_virt(page); + u32 level = page_private(page); + + kvm_pgtable_stage2_free_removed(&kvm_s2_mm_ops, pgtable, level); +} + static void stage2_free_removed_table(void *addr, u32 level) { - kvm_pgtable_stage2_free_removed(&kvm_s2_mm_ops, addr, level); + struct page *page = virt_to_page(addr); + + set_page_private(page, (unsigned long)level); + call_rcu(&page->rcu_head, stage2_free_removed_table_rcu_cb); } static void kvm_host_get_page(void *addr) -- cgit v1.2.3 From ca5de2448c3b4c018fe3d6223df8b59068be1cd7 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 7 Nov 2022 21:56:39 +0000 Subject: KVM: arm64: Atomically update stage 2 leaf attributes in parallel walks The stage2 attr walker is already used for parallel walks. Since commit f783ef1c0e82 ("KVM: arm64: Add fast path to handle permission relaxation during dirty logging"), KVM acquires the read lock when write-unprotecting a PTE. However, the walker only uses a simple store to update the PTE. This is safe as the only possible race is with hardware updates to the access flag, which is benign. However, a subsequent change to KVM will allow more changes to the stage 2 page tables to be done in parallel. Prepare the stage 2 attribute walker by performing atomic updates to the PTE when walking in parallel. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221107215644.1895162-10-oliver.upton@linux.dev --- arch/arm64/kvm/hyp/pgtable.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index d8d963521d4e..a34e2050f931 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -185,7 +185,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, kvm_pteref_t pteref, u32 level) { enum kvm_pgtable_walk_flags flags = data->walker->flags; - kvm_pte_t *ptep = kvm_dereference_pteref(pteref, false); + kvm_pte_t *ptep = kvm_dereference_pteref(pteref, flags & KVM_PGTABLE_WALK_SHARED); struct kvm_pgtable_visit_ctx ctx = { .ptep = ptep, .old = READ_ONCE(*ptep), @@ -675,6 +675,16 @@ static bool stage2_pte_is_counted(kvm_pte_t pte) return !!pte; } +static bool stage2_try_set_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t new) +{ + if (!kvm_pgtable_walk_shared(ctx)) { + WRITE_ONCE(*ctx->ptep, new); + return true; + } + + return cmpxchg(ctx->ptep, ctx->old, new) == ctx->old; +} + static void stage2_put_pte(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s2_mmu *mmu, struct kvm_pgtable_mm_ops *mm_ops) { @@ -986,7 +996,9 @@ static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx, stage2_pte_executable(pte) && !stage2_pte_executable(ctx->old)) mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops), kvm_granule_size(ctx->level)); - WRITE_ONCE(*ctx->ptep, pte); + + if (!stage2_try_set_pte(ctx, pte)) + return -EAGAIN; } return 0; @@ -995,7 +1007,7 @@ static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx, static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr, u64 size, kvm_pte_t attr_set, kvm_pte_t attr_clr, kvm_pte_t *orig_pte, - u32 *level) + u32 *level, enum kvm_pgtable_walk_flags flags) { int ret; kvm_pte_t attr_mask = KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI; @@ -1006,7 +1018,7 @@ static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr, struct kvm_pgtable_walker walker = { .cb = stage2_attr_walker, .arg = &data, - .flags = KVM_PGTABLE_WALK_LEAF, + .flags = flags | KVM_PGTABLE_WALK_LEAF, }; ret = kvm_pgtable_walk(pgt, addr, size, &walker); @@ -1025,14 +1037,14 @@ int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size) { return stage2_update_leaf_attrs(pgt, addr, size, 0, KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W, - NULL, NULL); + NULL, NULL, 0); } kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr) { kvm_pte_t pte = 0; stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0, - &pte, NULL); + &pte, NULL, 0); dsb(ishst); return pte; } @@ -1041,7 +1053,7 @@ kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr) { kvm_pte_t pte = 0; stage2_update_leaf_attrs(pgt, addr, 1, 0, KVM_PTE_LEAF_ATTR_LO_S2_AF, - &pte, NULL); + &pte, NULL, 0); /* * "But where's the TLBI?!", you scream. * "Over in the core code", I sigh. @@ -1054,7 +1066,7 @@ kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr) bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr) { kvm_pte_t pte = 0; - stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL); + stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL, 0); return pte & KVM_PTE_LEAF_ATTR_LO_S2_AF; } @@ -1077,7 +1089,8 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, if (prot & KVM_PGTABLE_PROT_X) clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN; - ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level); + ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level, + KVM_PGTABLE_WALK_SHARED); if (!ret) kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, pgt->mmu, addr, level); return ret; -- cgit v1.2.3 From 331aa3a0547d1c794587e0df374d13b16645e832 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 7 Nov 2022 21:56:40 +0000 Subject: KVM: arm64: Split init and set for table PTE Create a helper to initialize a table and directly call smp_store_release() to install it (for now). Prepare for a subsequent change that generalizes PTE writes with a helper. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221107215644.1895162-11-oliver.upton@linux.dev --- arch/arm64/kvm/hyp/pgtable.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index a34e2050f931..f4dd77c6c97d 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -136,16 +136,13 @@ static void kvm_clear_pte(kvm_pte_t *ptep) WRITE_ONCE(*ptep, 0); } -static void kvm_set_table_pte(kvm_pte_t *ptep, kvm_pte_t *childp, - struct kvm_pgtable_mm_ops *mm_ops) +static kvm_pte_t kvm_init_table_pte(kvm_pte_t *childp, struct kvm_pgtable_mm_ops *mm_ops) { - kvm_pte_t old = *ptep, pte = kvm_phys_to_pte(mm_ops->virt_to_phys(childp)); + kvm_pte_t pte = kvm_phys_to_pte(mm_ops->virt_to_phys(childp)); pte |= FIELD_PREP(KVM_PTE_TYPE, KVM_PTE_TYPE_TABLE); pte |= KVM_PTE_VALID; - - WARN_ON(kvm_pte_valid(old)); - smp_store_release(ptep, pte); + return pte; } static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, u32 level) @@ -413,7 +410,7 @@ static bool hyp_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, static int hyp_map_walker(const struct kvm_pgtable_visit_ctx *ctx, enum kvm_pgtable_walk_flags visit) { - kvm_pte_t *childp; + kvm_pte_t *childp, new; struct hyp_map_data *data = ctx->arg; struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; @@ -427,8 +424,10 @@ static int hyp_map_walker(const struct kvm_pgtable_visit_ctx *ctx, if (!childp) return -ENOMEM; - kvm_set_table_pte(ctx->ptep, childp, mm_ops); + new = kvm_init_table_pte(childp, mm_ops); mm_ops->get_page(ctx->ptep); + smp_store_release(ctx->ptep, new); + return 0; } @@ -796,7 +795,7 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx, struct stage2_map_data *data) { struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; - kvm_pte_t *childp; + kvm_pte_t *childp, new; int ret; ret = stage2_map_walker_try_leaf(ctx, data); @@ -821,8 +820,9 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx, if (stage2_pte_is_counted(ctx->old)) stage2_put_pte(ctx, data->mmu, mm_ops); - kvm_set_table_pte(ctx->ptep, childp, mm_ops); + new = kvm_init_table_pte(childp, mm_ops); mm_ops->get_page(ctx->ptep); + smp_store_release(ctx->ptep, new); return 0; } -- cgit v1.2.3 From 0ab12f3574db6cb432917a667f9392a88e8f0dfc Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 7 Nov 2022 21:58:55 +0000 Subject: KVM: arm64: Make block->table PTE changes parallel-aware In order to service stage-2 faults in parallel, stage-2 table walkers must take exclusive ownership of the PTE being worked on. An additional requirement of the architecture is that software must perform a 'break-before-make' operation when changing the block size used for mapping memory. Roll these two concepts together into helpers for performing a 'break-before-make' sequence. Use a special PTE value to indicate a PTE has been locked by a software walker. Additionally, use an atomic compare-exchange to 'break' the PTE when the stage-2 page tables are possibly shared with another software walker. Elide the DSB + TLBI if the evicted PTE was invalid (and thus not subject to break-before-make). All of the atomics do nothing for now, as the stage-2 walker isn't fully ready to perform parallel walks. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221107215855.1895367-1-oliver.upton@linux.dev --- arch/arm64/kvm/hyp/pgtable.c | 80 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 75 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index f4dd77c6c97d..b9f0d792b8d9 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -49,6 +49,12 @@ #define KVM_INVALID_PTE_OWNER_MASK GENMASK(9, 2) #define KVM_MAX_OWNER_ID 1 +/* + * Used to indicate a pte for which a 'break-before-make' sequence is in + * progress. + */ +#define KVM_INVALID_PTE_LOCKED BIT(10) + struct kvm_pgtable_walk_data { struct kvm_pgtable_walker *walker; @@ -674,6 +680,11 @@ static bool stage2_pte_is_counted(kvm_pte_t pte) return !!pte; } +static bool stage2_pte_is_locked(kvm_pte_t pte) +{ + return !kvm_pte_valid(pte) && (pte & KVM_INVALID_PTE_LOCKED); +} + static bool stage2_try_set_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t new) { if (!kvm_pgtable_walk_shared(ctx)) { @@ -684,6 +695,64 @@ static bool stage2_try_set_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_ return cmpxchg(ctx->ptep, ctx->old, new) == ctx->old; } +/** + * stage2_try_break_pte() - Invalidates a pte according to the + * 'break-before-make' requirements of the + * architecture. + * + * @ctx: context of the visited pte. + * @mmu: stage-2 mmu + * + * Returns: true if the pte was successfully broken. + * + * If the removed pte was valid, performs the necessary serialization and TLB + * invalidation for the old value. For counted ptes, drops the reference count + * on the containing table page. + */ +static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx, + struct kvm_s2_mmu *mmu) +{ + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; + + if (stage2_pte_is_locked(ctx->old)) { + /* + * Should never occur if this walker has exclusive access to the + * page tables. + */ + WARN_ON(!kvm_pgtable_walk_shared(ctx)); + return false; + } + + if (!stage2_try_set_pte(ctx, KVM_INVALID_PTE_LOCKED)) + return false; + + /* + * Perform the appropriate TLB invalidation based on the evicted pte + * value (if any). + */ + if (kvm_pte_table(ctx->old, ctx->level)) + kvm_call_hyp(__kvm_tlb_flush_vmid, mmu); + else if (kvm_pte_valid(ctx->old)) + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level); + + if (stage2_pte_is_counted(ctx->old)) + mm_ops->put_page(ctx->ptep); + + return true; +} + +static void stage2_make_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t new) +{ + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; + + WARN_ON(!stage2_pte_is_locked(*ctx->ptep)); + + if (stage2_pte_is_counted(new)) + mm_ops->get_page(ctx->ptep); + + smp_store_release(ctx->ptep, new); +} + static void stage2_put_pte(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s2_mmu *mmu, struct kvm_pgtable_mm_ops *mm_ops) { @@ -812,17 +881,18 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx, if (!childp) return -ENOMEM; + if (!stage2_try_break_pte(ctx, data->mmu)) { + mm_ops->put_page(childp); + return -EAGAIN; + } + /* * If we've run into an existing block mapping then replace it with * a table. Accesses beyond 'end' that fall within the new table * will be mapped lazily. */ - if (stage2_pte_is_counted(ctx->old)) - stage2_put_pte(ctx, data->mmu, mm_ops); - new = kvm_init_table_pte(childp, mm_ops); - mm_ops->get_page(ctx->ptep); - smp_store_release(ctx->ptep, new); + stage2_make_pte(ctx, new); return 0; } -- cgit v1.2.3 From 946fbfdf336b811479e024136c7cabc00157b6b9 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 7 Nov 2022 21:59:34 +0000 Subject: KVM: arm64: Make leaf->leaf PTE changes parallel-aware Convert stage2_map_walker_try_leaf() to use the new break-before-make helpers, thereby making the handler parallel-aware. As before, avoid the break-before-make if recreating the existing mapping. Additionally, retry execution if another vCPU thread is modifying the same PTE. Signed-off-by: Oliver Upton Reviewed-by: Ben Gardon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221107215934.1895478-1-oliver.upton@linux.dev --- arch/arm64/kvm/hyp/pgtable.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index b9f0d792b8d9..238f29389617 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -804,18 +804,17 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, else new = kvm_init_invalid_leaf_owner(data->owner_id); - if (stage2_pte_is_counted(ctx->old)) { - /* - * Skip updating the PTE if we are trying to recreate the exact - * same mapping or only change the access permissions. Instead, - * the vCPU will exit one more time from guest if still needed - * and then go through the path of relaxing permissions. - */ - if (!stage2_pte_needs_update(ctx->old, new)) - return -EAGAIN; + /* + * Skip updating the PTE if we are trying to recreate the exact + * same mapping or only change the access permissions. Instead, + * the vCPU will exit one more time from guest if still needed + * and then go through the path of relaxing permissions. + */ + if (!stage2_pte_needs_update(ctx->old, new)) + return -EAGAIN; - stage2_put_pte(ctx, data->mmu, mm_ops); - } + if (!stage2_try_break_pte(ctx, data->mmu)) + return -EAGAIN; /* Perform CMOs before installation of the guest stage-2 PTE */ if (mm_ops->dcache_clean_inval_poc && stage2_pte_cacheable(pgt, new)) @@ -825,9 +824,8 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, if (mm_ops->icache_inval_pou && stage2_pte_executable(new)) mm_ops->icache_inval_pou(kvm_pte_follow(new, mm_ops), granule); - smp_store_release(ctx->ptep, new); - if (stage2_pte_is_counted(new)) - mm_ops->get_page(ctx->ptep); + stage2_make_pte(ctx, new); + if (kvm_phys_is_valid(phys)) data->phys += granule; return 0; -- cgit v1.2.3 From af87fc03cfdf6893011df419588d27acdfb9c197 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 7 Nov 2022 22:00:06 +0000 Subject: KVM: arm64: Make table->block changes parallel-aware stage2_map_walker_try_leaf() and friends now handle stage-2 PTEs generically, and perform the correct flush when a table PTE is removed. Additionally, they've been made parallel-aware, using an atomic break to take ownership of the PTE. Stop clearing the PTE in the pre-order callback and instead let stage2_map_walker_try_leaf() deal with it. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221107220006.1895572-1-oliver.upton@linux.dev --- arch/arm64/kvm/hyp/pgtable.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 238f29389617..f814422ef795 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -841,21 +841,12 @@ static int stage2_map_walk_table_pre(const struct kvm_pgtable_visit_ctx *ctx, if (!stage2_leaf_mapping_allowed(ctx, data)) return 0; - kvm_clear_pte(ctx->ptep); - - /* - * Invalidate the whole stage-2, as we may have numerous leaf - * entries below us which would otherwise need invalidating - * individually. - */ - kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu); - ret = stage2_map_walker_try_leaf(ctx, data); + if (ret) + return ret; - mm_ops->put_page(ctx->ptep); mm_ops->free_removed_table(childp, ctx->level); - - return ret; + return 0; } static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx, -- cgit v1.2.3 From 1577cb5823cefdff4416f272a88143ee933d97f5 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 7 Nov 2022 22:00:33 +0000 Subject: KVM: arm64: Handle stage-2 faults in parallel The stage-2 map walker has been made parallel-aware, and as such can be called while only holding the read side of the MMU lock. Rip out the conditional locking in user_mem_abort() and instead grab the read lock. Continue to take the write lock from other callsites to kvm_pgtable_stage2_map(). Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221107220033.1895655-1-oliver.upton@linux.dev --- arch/arm64/include/asm/kvm_pgtable.h | 3 ++- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 2 +- arch/arm64/kvm/hyp/pgtable.c | 5 +++-- arch/arm64/kvm/mmu.c | 31 +++++++------------------------ 4 files changed, 13 insertions(+), 28 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 7634b6964779..a874ce0ce7b5 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -412,6 +412,7 @@ void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pg * @prot: Permissions and attributes for the mapping. * @mc: Cache of pre-allocated and zeroed memory from which to allocate * page-table pages. + * @flags: Flags to control the page-table walk (ex. a shared walk) * * The offset of @addr within a page is ignored, @size is rounded-up to * the next page boundary and @phys is rounded-down to the previous page @@ -433,7 +434,7 @@ void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pg */ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, enum kvm_pgtable_prot prot, - void *mc); + void *mc, enum kvm_pgtable_walk_flags flags); /** * kvm_pgtable_stage2_set_owner() - Unmap and annotate pages in the IPA space to diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 735769886b55..f6d82bf33ce1 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -257,7 +257,7 @@ static inline int __host_stage2_idmap(u64 start, u64 end, enum kvm_pgtable_prot prot) { return kvm_pgtable_stage2_map(&host_kvm.pgt, start, end - start, start, - prot, &host_s2_pool); + prot, &host_s2_pool, 0); } /* diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index f814422ef795..5bca9610d040 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -912,7 +912,7 @@ static int stage2_map_walker(const struct kvm_pgtable_visit_ctx *ctx, int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, enum kvm_pgtable_prot prot, - void *mc) + void *mc, enum kvm_pgtable_walk_flags flags) { int ret; struct stage2_map_data map_data = { @@ -923,7 +923,8 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, }; struct kvm_pgtable_walker walker = { .cb = stage2_map_walker, - .flags = KVM_PGTABLE_WALK_TABLE_PRE | + .flags = flags | + KVM_PGTABLE_WALK_TABLE_PRE | KVM_PGTABLE_WALK_LEAF, .arg = &map_data, }; diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 52e042399ba5..410c2a37fe32 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -861,7 +861,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, write_lock(&kvm->mmu_lock); ret = kvm_pgtable_stage2_map(pgt, addr, PAGE_SIZE, pa, prot, - &cache); + &cache, 0); write_unlock(&kvm->mmu_lock); if (ret) break; @@ -1156,7 +1156,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, gfn_t gfn; kvm_pfn_t pfn; bool logging_active = memslot_is_logging(memslot); - bool use_read_lock = false; unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu); unsigned long vma_pagesize, fault_granule; enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; @@ -1191,8 +1190,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (logging_active) { force_pte = true; vma_shift = PAGE_SHIFT; - use_read_lock = (fault_status == FSC_PERM && write_fault && - fault_granule == PAGE_SIZE); } else { vma_shift = get_vma_page_shift(vma, hva); } @@ -1291,15 +1288,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (exec_fault && device) return -ENOEXEC; - /* - * To reduce MMU contentions and enhance concurrency during dirty - * logging dirty logging, only acquire read lock for permission - * relaxation. - */ - if (use_read_lock) - read_lock(&kvm->mmu_lock); - else - write_lock(&kvm->mmu_lock); + read_lock(&kvm->mmu_lock); pgt = vcpu->arch.hw_mmu->pgt; if (mmu_invalidate_retry(kvm, mmu_seq)) goto out_unlock; @@ -1343,15 +1332,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * permissions only if vma_pagesize equals fault_granule. Otherwise, * kvm_pgtable_stage2_map() should be called to change block size. */ - if (fault_status == FSC_PERM && vma_pagesize == fault_granule) { + if (fault_status == FSC_PERM && vma_pagesize == fault_granule) ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot); - } else { - WARN_ONCE(use_read_lock, "Attempted stage-2 map outside of write lock\n"); - + else ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize, __pfn_to_phys(pfn), prot, - memcache); - } + memcache, KVM_PGTABLE_WALK_SHARED); /* Mark the page dirty only if the fault is handled successfully */ if (writable && !ret) { @@ -1360,10 +1346,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, } out_unlock: - if (use_read_lock) - read_unlock(&kvm->mmu_lock); - else - write_unlock(&kvm->mmu_lock); + read_unlock(&kvm->mmu_lock); kvm_set_pfn_accessed(pfn); kvm_release_pfn_clean(pfn); return ret != -EAGAIN ? ret : 0; @@ -1569,7 +1552,7 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) */ kvm_pgtable_stage2_map(kvm->arch.mmu.pgt, range->start << PAGE_SHIFT, PAGE_SIZE, __pfn_to_phys(pfn), - KVM_PGTABLE_PROT_R, NULL); + KVM_PGTABLE_PROT_R, NULL, 0); return false; } -- cgit v1.2.3 From 579d7ebe90a332cc5b6c02db9250fd0816a64f63 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Thu, 3 Nov 2022 15:05:06 +0000 Subject: KVM: arm64: Fix kvm init failure when mode!=vhe and VA_BITS=52. For nvhe and protected modes, the hyp stage 1 page-tables were previously configured to have the same number of VA bits as the kernel's idmap. However, for kernel configs with VA_BITS=52 and where the kernel is loaded in physical memory below 48 bits, the idmap VA bits is actually smaller than the kernel's normal stage 1 VA bits. This can lead to kernel addresses that can't be mapped into the hypervisor, leading to kvm initialization failure during boot: kvm [1]: IPA Size Limit: 48 bits kvm [1]: Cannot map world-switch code kvm [1]: error initializing Hyp mode: -34 Fix this by ensuring that the hyp stage 1 VA size is the maximum of what's used for the idmap and the regular kernel stage 1. At the same time, refactor the code so that the hyp VA bits is only calculated in one place. Prior to 7ba8f2b2d652, the idmap was always 52 bits for a 52 VA bits kernel and therefore the hyp stage1 was also always 52 bits. Fixes: 7ba8f2b2d652 ("arm64: mm: use a 48-bit ID map when possible on 52-bit VA builds") Signed-off-by: Ryan Roberts [maz: commit message fixes] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221103150507.32948-2-ryan.roberts@arm.com --- arch/arm64/kvm/arm.c | 20 +++----------------- arch/arm64/kvm/mmu.c | 28 +++++++++++++++++++++++++++- 2 files changed, 30 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 94d33e296e10..803055da3ee3 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1518,7 +1518,7 @@ static int kvm_init_vector_slots(void) return 0; } -static void cpu_prepare_hyp_mode(int cpu) +static void cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits) { struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu); unsigned long tcr; @@ -1534,23 +1534,9 @@ static void cpu_prepare_hyp_mode(int cpu) params->mair_el2 = read_sysreg(mair_el1); - /* - * The ID map may be configured to use an extended virtual address - * range. This is only the case if system RAM is out of range for the - * currently configured page size and VA_BITS, in which case we will - * also need the extended virtual range for the HYP ID map, or we won't - * be able to enable the EL2 MMU. - * - * However, at EL2, there is only one TTBR register, and we can't switch - * between translation tables *and* update TCR_EL2.T0SZ at the same - * time. Bottom line: we need to use the extended range with *both* our - * translation tables. - * - * So use the same T0SZ value we use for the ID map. - */ tcr = (read_sysreg(tcr_el1) & TCR_EL2_MASK) | TCR_EL2_RES1; tcr &= ~TCR_T0SZ_MASK; - tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET; + tcr |= TCR_T0SZ(hyp_va_bits); params->tcr_el2 = tcr; params->pgd_pa = kvm_mmu_get_httbr(); @@ -2054,7 +2040,7 @@ static int init_hyp_mode(void) } /* Prepare the CPU initialization parameters */ - cpu_prepare_hyp_mode(cpu); + cpu_prepare_hyp_mode(cpu, hyp_va_bits); } if (is_protected_kvm_enabled()) { diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 60ee3d9f01f8..4efb983cff43 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1618,6 +1618,8 @@ static struct kvm_pgtable_mm_ops kvm_hyp_mm_ops = { int kvm_mmu_init(u32 *hyp_va_bits) { int err; + u32 idmap_bits; + u32 kernel_bits; hyp_idmap_start = __pa_symbol(__hyp_idmap_text_start); hyp_idmap_start = ALIGN_DOWN(hyp_idmap_start, PAGE_SIZE); @@ -1631,7 +1633,31 @@ int kvm_mmu_init(u32 *hyp_va_bits) */ BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK); - *hyp_va_bits = 64 - ((idmap_t0sz & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET); + /* + * The ID map may be configured to use an extended virtual address + * range. This is only the case if system RAM is out of range for the + * currently configured page size and VA_BITS_MIN, in which case we will + * also need the extended virtual range for the HYP ID map, or we won't + * be able to enable the EL2 MMU. + * + * However, in some cases the ID map may be configured for fewer than + * the number of VA bits used by the regular kernel stage 1. This + * happens when VA_BITS=52 and the kernel image is placed in PA space + * below 48 bits. + * + * At EL2, there is only one TTBR register, and we can't switch between + * translation tables *and* update TCR_EL2.T0SZ at the same time. Bottom + * line: we need to use the extended range with *both* our translation + * tables. + * + * So use the maximum of the idmap VA bits and the regular kernel stage + * 1 VA bits to assure that the hypervisor can both ID map its code page + * and map any kernel memory. + */ + idmap_bits = 64 - ((idmap_t0sz & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET); + kernel_bits = vabits_actual; + *hyp_va_bits = max(idmap_bits, kernel_bits); + kvm_debug("Using %u-bit virtual addresses at EL2\n", *hyp_va_bits); kvm_debug("IDMAP page: %lx\n", hyp_idmap_start); kvm_debug("HYP VA range: %lx:%lx\n", -- cgit v1.2.3 From a0d37784bfd7f699986ba3a64cfeb68a03cb7fd0 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Thu, 3 Nov 2022 15:05:07 +0000 Subject: KVM: arm64: Fix PAR_TO_HPFAR() to work independently of PA_BITS. Kernel configs with PAGE_SIZE=64KB and PA_BITS=48 still advertise 52 bit IPA space on HW that implements LPA. This is by design (admitedly this is a very unlikely configuration in the real world). However on such a config, attempting to create a vm with the guest kernel placed above 48 bits in IPA space results in misbehaviour due to the hypervisor incorrectly interpretting a faulting IPA. Fix up PAR_TO_HPFAR() to always take 52 bits out of the PAR rather than masking to CONFIG_ARM64_PA_BITS. If the system has a smaller implemented PARange this should be safe because the bits are res0. A more robust approach would be to discover the IPA size in use by the page-table and mask based on that, to avoid relying on res0 reading back as zero. But this information is difficult to access safely from the code's location, so take the easy way out. Fixes: bc1d7de8c550 ("kvm: arm64: Add 52bit support for PAR to HPFAR conversoin") Signed-off-by: Ryan Roberts [maz: commit message fixes] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221103150507.32948-3-ryan.roberts@arm.com --- arch/arm64/include/asm/kvm_arm.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 8aa8492dafc0..a82f2493a72b 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -340,9 +340,13 @@ * We have * PAR [PA_Shift - 1 : 12] = PA [PA_Shift - 1 : 12] * HPFAR [PA_Shift - 9 : 4] = FIPA [PA_Shift - 1 : 12] + * + * Always assume 52 bit PA since at this point, we don't know how many PA bits + * the page table has been set up for. This should be safe since unused address + * bits in PAR are res0. */ #define PAR_TO_HPFAR(par) \ - (((par) & GENMASK_ULL(PHYS_MASK_SHIFT - 1, 12)) >> 8) + (((par) & GENMASK_ULL(52 - 1, 12)) >> 8) #define ECN(x) { ESR_ELx_EC_##x, #x } -- cgit v1.2.3 From e1b3253340029b06f5f648d8390807cba4e4ec23 Mon Sep 17 00:00:00 2001 From: Zhiyuan Dai Date: Sun, 6 Nov 2022 20:30:40 +0800 Subject: KVM: arm64: Fix typo in comment Fix typo in comment (nVHE/VHE). Signed-off-by: Zhiyuan Dai Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/1667737840-702-1-git-send-email-daizhiyuan@phytium.com.cn --- arch/arm64/kvm/hyp/vhe/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile index 96bec0ecf9dd..3b9e5464b5b3 100644 --- a/arch/arm64/kvm/hyp/vhe/Makefile +++ b/arch/arm64/kvm/hyp/vhe/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # -# Makefile for Kernel-based Virtual Machine module, HYP/nVHE part +# Makefile for Kernel-based Virtual Machine module, HYP/VHE part # asflags-y := -D__KVM_VHE_HYPERVISOR__ -- cgit v1.2.3 From 0f4f7ae10ee4e6403659b2d9ddf05424eecde45b Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 10 Nov 2022 19:02:34 +0000 Subject: KVM: arm64: Move hyp refcount manipulation helpers to common header file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We will soon need to manipulate 'struct hyp_page' refcounts from outside page_alloc.c, so move the helpers to a common header file to allow them to be reused easily. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Oliver Upton Tested-by: Vincent Donnefort Signed-off-by: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-2-will@kernel.org --- arch/arm64/kvm/hyp/include/nvhe/memory.h | 22 ++++++++++++++++++++++ arch/arm64/kvm/hyp/nvhe/page_alloc.c | 19 ------------------- 2 files changed, 22 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h index 592b7edb3edb..9422900e5c6a 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/memory.h +++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h @@ -38,6 +38,10 @@ static inline phys_addr_t hyp_virt_to_phys(void *addr) #define hyp_page_to_virt(page) __hyp_va(hyp_page_to_phys(page)) #define hyp_page_to_pool(page) (((struct hyp_page *)page)->pool) +/* + * Refcounting for 'struct hyp_page'. + * hyp_pool::lock must be held if atomic access to the refcount is required. + */ static inline int hyp_page_count(void *addr) { struct hyp_page *p = hyp_virt_to_page(addr); @@ -45,4 +49,22 @@ static inline int hyp_page_count(void *addr) return p->refcount; } +static inline void hyp_page_ref_inc(struct hyp_page *p) +{ + BUG_ON(p->refcount == USHRT_MAX); + p->refcount++; +} + +static inline int hyp_page_ref_dec_and_test(struct hyp_page *p) +{ + BUG_ON(!p->refcount); + p->refcount--; + return (p->refcount == 0); +} + +static inline void hyp_set_page_refcounted(struct hyp_page *p) +{ + BUG_ON(p->refcount); + p->refcount = 1; +} #endif /* __KVM_HYP_MEMORY_H */ diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c index d40f0b30b534..1ded09fc9b10 100644 --- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c @@ -144,25 +144,6 @@ static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool, return p; } -static inline void hyp_page_ref_inc(struct hyp_page *p) -{ - BUG_ON(p->refcount == USHRT_MAX); - p->refcount++; -} - -static inline int hyp_page_ref_dec_and_test(struct hyp_page *p) -{ - BUG_ON(!p->refcount); - p->refcount--; - return (p->refcount == 0); -} - -static inline void hyp_set_page_refcounted(struct hyp_page *p) -{ - BUG_ON(p->refcount); - p->refcount = 1; -} - static void __hyp_put_page(struct hyp_pool *pool, struct hyp_page *p) { if (hyp_page_ref_dec_and_test(p)) -- cgit v1.2.3 From 72a5bc0f153ce8ca80e9abbd1d9adec7d586915a Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 10 Nov 2022 19:02:35 +0000 Subject: KVM: arm64: Allow attaching of non-coalescable pages to a hyp pool All the contiguous pages used to initialize a 'struct hyp_pool' are considered coalescable, which means that the hyp page allocator will actively try to merge them with their buddies on the hyp_put_page() path. However, using hyp_put_page() on a page that is not part of the inital memory range given to a hyp_pool() is currently unsupported. In order to allow dynamically extending hyp pools at run-time, add a check to __hyp_attach_page() to allow inserting 'external' pages into the free-list of order 0. This will be necessary to allow lazy donation of pages from the host to the hypervisor when allocating guest stage-2 page-table pages at EL2. Tested-by: Vincent Donnefort Signed-off-by: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-3-will@kernel.org --- arch/arm64/kvm/hyp/nvhe/page_alloc.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c index 1ded09fc9b10..dad88e203598 100644 --- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c @@ -93,11 +93,16 @@ static inline struct hyp_page *node_to_page(struct list_head *node) static void __hyp_attach_page(struct hyp_pool *pool, struct hyp_page *p) { + phys_addr_t phys = hyp_page_to_phys(p); unsigned short order = p->order; struct hyp_page *buddy; memset(hyp_page_to_virt(p), 0, PAGE_SIZE << p->order); + /* Skip coalescing for 'external' pages being freed into the pool. */ + if (phys < pool->range_start || phys >= pool->range_end) + goto insert; + /* * Only the first struct hyp_page of a high-order page (otherwise known * as the 'head') should have p->order set. The non-head pages should @@ -116,6 +121,7 @@ static void __hyp_attach_page(struct hyp_pool *pool, p = min(p, buddy); } +insert: /* Mark the new head, and insert it */ p->order = order; page_add_to_list(p, &pool->free_area[order]); -- cgit v1.2.3 From 8e6bcc3a4502a0d8d065466efd888b6b59b85789 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 10 Nov 2022 19:02:36 +0000 Subject: KVM: arm64: Back the hypervisor 'struct hyp_page' array for all memory The EL2 'vmemmap' array in nVHE Protected mode is currently very sparse: only memory pages owned by the hypervisor itself have a matching 'struct hyp_page'. However, as the size of this struct has been reduced significantly since its introduction, it appears that we can now afford to back the vmemmap for all of memory. Having an easily accessible 'struct hyp_page' for every physical page in memory provides the hypervisor with a simple mechanism to store metadata (e.g. a refcount) that wouldn't otherwise fit in the very limited number of software bits available in the host stage-2 page-table entries. This will be used in subsequent patches when pinning host memory pages for use by the hypervisor at EL2. Tested-by: Vincent Donnefort Signed-off-by: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-4-will@kernel.org --- arch/arm64/include/asm/kvm_pkvm.h | 26 ++++++++++++++++++++++++++ arch/arm64/kvm/hyp/include/nvhe/mm.h | 14 +------------- arch/arm64/kvm/hyp/nvhe/mm.c | 31 +++++++++++++++++++++++++++---- arch/arm64/kvm/hyp/nvhe/page_alloc.c | 4 +--- arch/arm64/kvm/hyp/nvhe/setup.c | 7 +++---- arch/arm64/kvm/pkvm.c | 18 ++---------------- 6 files changed, 60 insertions(+), 40 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index 9f4ad2a8df59..8f7b8a2314bb 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -14,6 +14,32 @@ extern struct memblock_region kvm_nvhe_sym(hyp_memory)[]; extern unsigned int kvm_nvhe_sym(hyp_memblock_nr); +static inline unsigned long +hyp_vmemmap_memblock_size(struct memblock_region *reg, size_t vmemmap_entry_size) +{ + unsigned long nr_pages = reg->size >> PAGE_SHIFT; + unsigned long start, end; + + start = (reg->base >> PAGE_SHIFT) * vmemmap_entry_size; + end = start + nr_pages * vmemmap_entry_size; + start = ALIGN_DOWN(start, PAGE_SIZE); + end = ALIGN(end, PAGE_SIZE); + + return end - start; +} + +static inline unsigned long hyp_vmemmap_pages(size_t vmemmap_entry_size) +{ + unsigned long res = 0, i; + + for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) { + res += hyp_vmemmap_memblock_size(&kvm_nvhe_sym(hyp_memory)[i], + vmemmap_entry_size); + } + + return res >> PAGE_SHIFT; +} + static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages) { unsigned long total = 0, i; diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h index 42d8eb9bfe72..b2ee6d5df55b 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h @@ -15,7 +15,7 @@ extern hyp_spinlock_t pkvm_pgd_lock; int hyp_create_idmap(u32 hyp_va_bits); int hyp_map_vectors(void); -int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back); +int hyp_back_vmemmap(phys_addr_t back); int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot); int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot); int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot); @@ -24,16 +24,4 @@ int __pkvm_create_private_mapping(phys_addr_t phys, size_t size, unsigned long *haddr); int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr); -static inline void hyp_vmemmap_range(phys_addr_t phys, unsigned long size, - unsigned long *start, unsigned long *end) -{ - unsigned long nr_pages = size >> PAGE_SHIFT; - struct hyp_page *p = hyp_phys_to_page(phys); - - *start = (unsigned long)p; - *end = *start + nr_pages * sizeof(struct hyp_page); - *start = ALIGN_DOWN(*start, PAGE_SIZE); - *end = ALIGN(*end, PAGE_SIZE); -} - #endif /* __KVM_HYP_MM_H */ diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c index 96193cb31a39..d3a3b47181de 100644 --- a/arch/arm64/kvm/hyp/nvhe/mm.c +++ b/arch/arm64/kvm/hyp/nvhe/mm.c @@ -129,13 +129,36 @@ int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot) return ret; } -int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back) +int hyp_back_vmemmap(phys_addr_t back) { - unsigned long start, end; + unsigned long i, start, size, end = 0; + int ret; - hyp_vmemmap_range(phys, size, &start, &end); + for (i = 0; i < hyp_memblock_nr; i++) { + start = hyp_memory[i].base; + start = ALIGN_DOWN((u64)hyp_phys_to_page(start), PAGE_SIZE); + /* + * The begining of the hyp_vmemmap region for the current + * memblock may already be backed by the page backing the end + * the previous region, so avoid mapping it twice. + */ + start = max(start, end); + + end = hyp_memory[i].base + hyp_memory[i].size; + end = PAGE_ALIGN((u64)hyp_phys_to_page(end)); + if (start >= end) + continue; + + size = end - start; + ret = __pkvm_create_mappings(start, size, back, PAGE_HYP); + if (ret) + return ret; + + memset(hyp_phys_to_virt(back), 0, size); + back += size; + } - return __pkvm_create_mappings(start, end - start, back, PAGE_HYP); + return 0; } static void *__hyp_bp_vect_base; diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c index dad88e203598..803ba3222e75 100644 --- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c @@ -236,10 +236,8 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages, /* Init the vmemmap portion */ p = hyp_phys_to_page(phys); - for (i = 0; i < nr_pages; i++) { - p[i].order = 0; + for (i = 0; i < nr_pages; i++) hyp_set_page_refcounted(&p[i]); - } /* Attach the unused pages to the buddy tree */ for (i = reserved_pages; i < nr_pages; i++) diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index e8d4ea2fcfa0..579eb4f73476 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -31,12 +31,11 @@ static struct hyp_pool hpool; static int divide_memory_pool(void *virt, unsigned long size) { - unsigned long vstart, vend, nr_pages; + unsigned long nr_pages; hyp_early_alloc_init(virt, size); - hyp_vmemmap_range(__hyp_pa(virt), size, &vstart, &vend); - nr_pages = (vend - vstart) >> PAGE_SHIFT; + nr_pages = hyp_vmemmap_pages(sizeof(struct hyp_page)); vmemmap_base = hyp_early_alloc_contig(nr_pages); if (!vmemmap_base) return -ENOMEM; @@ -78,7 +77,7 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, if (ret) return ret; - ret = hyp_back_vmemmap(phys, size, hyp_virt_to_phys(vmemmap_base)); + ret = hyp_back_vmemmap(hyp_virt_to_phys(vmemmap_base)); if (ret) return ret; diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index ebecb7c045f4..34229425b25d 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -53,7 +53,7 @@ static int __init register_memblock_regions(void) void __init kvm_hyp_reserve(void) { - u64 nr_pages, prev, hyp_mem_pages = 0; + u64 hyp_mem_pages = 0; int ret; if (!is_hyp_mode_available() || is_kernel_in_hyp_mode()) @@ -71,21 +71,7 @@ void __init kvm_hyp_reserve(void) hyp_mem_pages += hyp_s1_pgtable_pages(); hyp_mem_pages += host_s2_pgtable_pages(); - - /* - * The hyp_vmemmap needs to be backed by pages, but these pages - * themselves need to be present in the vmemmap, so compute the number - * of pages needed by looking for a fixed point. - */ - nr_pages = 0; - do { - prev = nr_pages; - nr_pages = hyp_mem_pages + prev; - nr_pages = DIV_ROUND_UP(nr_pages * STRUCT_HYP_PAGE_SIZE, - PAGE_SIZE); - nr_pages += __hyp_pgtable_max_pages(nr_pages); - } while (nr_pages != prev); - hyp_mem_pages += nr_pages; + hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE); /* * Try to allocate a PMD-aligned region to reduce TLB pressure once -- cgit v1.2.3 From 0d16d12eb26ef85602ef8a678d94825a66772774 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 10 Nov 2022 19:02:37 +0000 Subject: KVM: arm64: Fix-up hyp stage-1 refcounts for all pages mapped at EL2 In order to allow unmapping arbitrary memory pages from the hypervisor stage-1 page-table, fix-up the initial refcount for pages that have been mapped before the 'vmemmap' array was up and running so that it accurately accounts for all existing hypervisor mappings. This is achieved by traversing the entire hypervisor stage-1 page-table during initialisation of EL2 and updating the corresponding 'struct hyp_page' for each valid mapping. Reviewed-by: Oliver Upton Tested-by: Vincent Donnefort Signed-off-by: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-5-will@kernel.org --- arch/arm64/kvm/hyp/nvhe/setup.c | 62 ++++++++++++++++++++++++++++------------- 1 file changed, 43 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 579eb4f73476..8f2726d7e201 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -185,12 +185,11 @@ static void hpool_put_page(void *addr) hyp_put_page(&hpool, addr); } -static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level, - kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, - void * const arg) +static int fix_host_ownership_walker(u64 addr, u64 end, u32 level, + kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, + void * const arg) { - struct kvm_pgtable_mm_ops *mm_ops = arg; enum kvm_pgtable_prot prot; enum pkvm_page_state state; kvm_pte_t pte = *ptep; @@ -199,15 +198,6 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level, if (!kvm_pte_valid(pte)) return 0; - /* - * Fix-up the refcount for the page-table pages as the early allocator - * was unable to access the hyp_vmemmap and so the buddy allocator has - * initialised the refcount to '1'. - */ - mm_ops->get_page(ptep); - if (flag != KVM_PGTABLE_WALK_LEAF) - return 0; - if (level != (KVM_PGTABLE_MAX_LEVELS - 1)) return -EINVAL; @@ -236,12 +226,30 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level, return host_stage2_idmap_locked(phys, PAGE_SIZE, prot); } -static int finalize_host_mappings(void) +static int fix_hyp_pgtable_refcnt_walker(u64 addr, u64 end, u32 level, + kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, + void * const arg) +{ + struct kvm_pgtable_mm_ops *mm_ops = arg; + kvm_pte_t pte = *ptep; + + /* + * Fix-up the refcount for the page-table pages as the early allocator + * was unable to access the hyp_vmemmap and so the buddy allocator has + * initialised the refcount to '1'. + */ + if (kvm_pte_valid(pte)) + mm_ops->get_page(ptep); + + return 0; +} + +static int fix_host_ownership(void) { struct kvm_pgtable_walker walker = { - .cb = finalize_host_mappings_walker, - .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, - .arg = pkvm_pgtable.mm_ops, + .cb = fix_host_ownership_walker, + .flags = KVM_PGTABLE_WALK_LEAF, }; int i, ret; @@ -257,6 +265,18 @@ static int finalize_host_mappings(void) return 0; } +static int fix_hyp_pgtable_refcnt(void) +{ + struct kvm_pgtable_walker walker = { + .cb = fix_hyp_pgtable_refcnt_walker, + .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, + .arg = pkvm_pgtable.mm_ops, + }; + + return kvm_pgtable_walk(&pkvm_pgtable, 0, BIT(pkvm_pgtable.ia_bits), + &walker); +} + void __noreturn __pkvm_init_finalise(void) { struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data); @@ -286,7 +306,11 @@ void __noreturn __pkvm_init_finalise(void) }; pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops; - ret = finalize_host_mappings(); + ret = fix_host_ownership(); + if (ret) + goto out; + + ret = fix_hyp_pgtable_refcnt(); if (ret) goto out; -- cgit v1.2.3 From 33bc332d4061e95db55594893c4f80105b1dd813 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Nov 2022 19:02:38 +0000 Subject: KVM: arm64: Unify identifiers used to distinguish host and hypervisor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'pkvm_component_id' enum type provides constants to refer to the host and the hypervisor, yet this information is duplicated by the 'pkvm_hyp_id' constant. Remove the definition of 'pkvm_hyp_id' and move the 'pkvm_component_id' type definition to 'mem_protect.h' so that it can be used outside of the memory protection code, for example when initialising the owner for hypervisor-owned pages. Reviewed-by: Oliver Upton Reviewed-by: Philippe Mathieu-Daudé Tested-by: Vincent Donnefort Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-6-will@kernel.org --- arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 6 +++++- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 8 -------- arch/arm64/kvm/hyp/nvhe/setup.c | 2 +- 3 files changed, 6 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 80e99836eac7..f5705a1e972f 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -51,7 +51,11 @@ struct host_kvm { }; extern struct host_kvm host_kvm; -extern const u8 pkvm_hyp_id; +/* This corresponds to page-table locking order */ +enum pkvm_component_id { + PKVM_ID_HOST, + PKVM_ID_HYP, +}; int __pkvm_prot_finalize(void); int __pkvm_host_share_hyp(u64 pfn); diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 1e78acf9662e..ff86f5bd230f 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -26,8 +26,6 @@ struct host_kvm host_kvm; static struct hyp_pool host_s2_pool; -const u8 pkvm_hyp_id = 1; - static void host_lock_component(void) { hyp_spin_lock(&host_kvm.lock); @@ -380,12 +378,6 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) BUG_ON(ret && ret != -EAGAIN); } -/* This corresponds to locking order */ -enum pkvm_component_id { - PKVM_ID_HOST, - PKVM_ID_HYP, -}; - struct pkvm_mem_transition { u64 nr_pages; diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 8f2726d7e201..0312c9c74a5a 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -212,7 +212,7 @@ static int fix_host_ownership_walker(u64 addr, u64 end, u32 level, state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte)); switch (state) { case PKVM_PAGE_OWNED: - return host_stage2_set_owner_locked(phys, PAGE_SIZE, pkvm_hyp_id); + return host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HYP); case PKVM_PAGE_SHARED_OWNED: prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_BORROWED); break; -- cgit v1.2.3 From 1ed5c24c26f48ff61dc5d97c655769821f36a622 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Nov 2022 19:02:39 +0000 Subject: KVM: arm64: Implement do_donate() helper for donating memory Transferring ownership information of a memory region from one component to another can be achieved using a "donate" operation, which results in the previous owner losing access to the underlying pages entirely and the new owner having exclusive access to the page. Implement a do_donate() helper, along the same lines as do_{un,}share, and provide this functionality for the host-{to,from}-hyp cases as this will later be used to donate/reclaim memory pages to store VM metadata at EL2. In a similar manner to the sharing transitions, permission checks are performed by the hypervisor to ensure that the component initiating the transition really is the owner of the page and also that the completer does not currently have a page mapped at the target address. Tested-by: Vincent Donnefort Co-developed-by: Quentin Perret Signed-off-by: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-7-will@kernel.org --- arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 2 + arch/arm64/kvm/hyp/nvhe/mem_protect.c | 239 ++++++++++++++++++++++++++ 2 files changed, 241 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index f5705a1e972f..c87b19b2d468 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -60,6 +60,8 @@ enum pkvm_component_id { int __pkvm_prot_finalize(void); int __pkvm_host_share_hyp(u64 pfn); int __pkvm_host_unshare_hyp(u64 pfn); +int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages); +int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages); bool addr_is_memory(phys_addr_t phys); int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot); diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index ff86f5bd230f..10069cd32787 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -391,6 +391,9 @@ struct pkvm_mem_transition { /* Address in the completer's address space */ u64 completer_addr; } host; + struct { + u64 completer_addr; + } hyp; }; } initiator; @@ -404,6 +407,10 @@ struct pkvm_mem_share { const enum kvm_pgtable_prot completer_prot; }; +struct pkvm_mem_donation { + const struct pkvm_mem_transition tx; +}; + struct check_walk_data { enum pkvm_page_state desired; enum pkvm_page_state (*get_page_state)(kvm_pte_t pte); @@ -503,6 +510,46 @@ static int host_initiate_unshare(u64 *completer_addr, return __host_set_page_state_range(addr, size, PKVM_PAGE_OWNED); } +static int host_initiate_donation(u64 *completer_addr, + const struct pkvm_mem_transition *tx) +{ + u8 owner_id = tx->completer.id; + u64 size = tx->nr_pages * PAGE_SIZE; + + *completer_addr = tx->initiator.host.completer_addr; + return host_stage2_set_owner_locked(tx->initiator.addr, size, owner_id); +} + +static bool __host_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx) +{ + return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) || + tx->initiator.id != PKVM_ID_HYP); +} + +static int __host_ack_transition(u64 addr, const struct pkvm_mem_transition *tx, + enum pkvm_page_state state) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + + if (__host_ack_skip_pgtable_check(tx)) + return 0; + + return __host_check_page_state_range(addr, size, state); +} + +static int host_ack_donation(u64 addr, const struct pkvm_mem_transition *tx) +{ + return __host_ack_transition(addr, tx, PKVM_NOPAGE); +} + +static int host_complete_donation(u64 addr, const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + u8 host_id = tx->completer.id; + + return host_stage2_set_owner_locked(addr, size, host_id); +} + static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte) { if (!kvm_pte_valid(pte)) @@ -523,6 +570,27 @@ static int __hyp_check_page_state_range(u64 addr, u64 size, return check_page_state_range(&pkvm_pgtable, addr, size, &d); } +static int hyp_request_donation(u64 *completer_addr, + const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + u64 addr = tx->initiator.addr; + + *completer_addr = tx->initiator.hyp.completer_addr; + return __hyp_check_page_state_range(addr, size, PKVM_PAGE_OWNED); +} + +static int hyp_initiate_donation(u64 *completer_addr, + const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + int ret; + + *completer_addr = tx->initiator.hyp.completer_addr; + ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, tx->initiator.addr, size); + return (ret != size) ? -EFAULT : 0; +} + static bool __hyp_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx) { return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) || @@ -554,6 +622,16 @@ static int hyp_ack_unshare(u64 addr, const struct pkvm_mem_transition *tx) PKVM_PAGE_SHARED_BORROWED); } +static int hyp_ack_donation(u64 addr, const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + + if (__hyp_ack_skip_pgtable_check(tx)) + return 0; + + return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE); +} + static int hyp_complete_share(u64 addr, const struct pkvm_mem_transition *tx, enum kvm_pgtable_prot perms) { @@ -572,6 +650,15 @@ static int hyp_complete_unshare(u64 addr, const struct pkvm_mem_transition *tx) return (ret != size) ? -EFAULT : 0; } +static int hyp_complete_donation(u64 addr, + const struct pkvm_mem_transition *tx) +{ + void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE); + enum kvm_pgtable_prot prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_OWNED); + + return pkvm_create_mappings_locked(start, end, prot); +} + static int check_share(struct pkvm_mem_share *share) { const struct pkvm_mem_transition *tx = &share->tx; @@ -724,6 +811,94 @@ static int do_unshare(struct pkvm_mem_share *share) return WARN_ON(__do_unshare(share)); } +static int check_donation(struct pkvm_mem_donation *donation) +{ + const struct pkvm_mem_transition *tx = &donation->tx; + u64 completer_addr; + int ret; + + switch (tx->initiator.id) { + case PKVM_ID_HOST: + ret = host_request_owned_transition(&completer_addr, tx); + break; + case PKVM_ID_HYP: + ret = hyp_request_donation(&completer_addr, tx); + break; + default: + ret = -EINVAL; + } + + if (ret) + return ret; + + switch (tx->completer.id) { + case PKVM_ID_HOST: + ret = host_ack_donation(completer_addr, tx); + break; + case PKVM_ID_HYP: + ret = hyp_ack_donation(completer_addr, tx); + break; + default: + ret = -EINVAL; + } + + return ret; +} + +static int __do_donate(struct pkvm_mem_donation *donation) +{ + const struct pkvm_mem_transition *tx = &donation->tx; + u64 completer_addr; + int ret; + + switch (tx->initiator.id) { + case PKVM_ID_HOST: + ret = host_initiate_donation(&completer_addr, tx); + break; + case PKVM_ID_HYP: + ret = hyp_initiate_donation(&completer_addr, tx); + break; + default: + ret = -EINVAL; + } + + if (ret) + return ret; + + switch (tx->completer.id) { + case PKVM_ID_HOST: + ret = host_complete_donation(completer_addr, tx); + break; + case PKVM_ID_HYP: + ret = hyp_complete_donation(completer_addr, tx); + break; + default: + ret = -EINVAL; + } + + return ret; +} + +/* + * do_donate(): + * + * The page owner transfers ownership to another component, losing access + * as a consequence. + * + * Initiator: OWNED => NOPAGE + * Completer: NOPAGE => OWNED + */ +static int do_donate(struct pkvm_mem_donation *donation) +{ + int ret; + + ret = check_donation(donation); + if (ret) + return ret; + + return WARN_ON(__do_donate(donation)); +} + int __pkvm_host_share_hyp(u64 pfn) { int ret; @@ -789,3 +964,67 @@ int __pkvm_host_unshare_hyp(u64 pfn) return ret; } + +int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages) +{ + int ret; + u64 host_addr = hyp_pfn_to_phys(pfn); + u64 hyp_addr = (u64)__hyp_va(host_addr); + struct pkvm_mem_donation donation = { + .tx = { + .nr_pages = nr_pages, + .initiator = { + .id = PKVM_ID_HOST, + .addr = host_addr, + .host = { + .completer_addr = hyp_addr, + }, + }, + .completer = { + .id = PKVM_ID_HYP, + }, + }, + }; + + host_lock_component(); + hyp_lock_component(); + + ret = do_donate(&donation); + + hyp_unlock_component(); + host_unlock_component(); + + return ret; +} + +int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages) +{ + int ret; + u64 host_addr = hyp_pfn_to_phys(pfn); + u64 hyp_addr = (u64)__hyp_va(host_addr); + struct pkvm_mem_donation donation = { + .tx = { + .nr_pages = nr_pages, + .initiator = { + .id = PKVM_ID_HYP, + .addr = hyp_addr, + .hyp = { + .completer_addr = host_addr, + }, + }, + .completer = { + .id = PKVM_ID_HOST, + }, + }, + }; + + host_lock_component(); + hyp_lock_component(); + + ret = do_donate(&donation); + + hyp_unlock_component(); + host_unlock_component(); + + return ret; +} -- cgit v1.2.3 From 43c1ff8b75011bc3e3e923adf31ba815864a2494 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 10 Nov 2022 19:02:40 +0000 Subject: KVM: arm64: Prevent the donation of no-map pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Memory regions marked as "no-map" in the host device-tree routinely include TrustZone carev-outs and DMA pools. Although donating such pages to the hypervisor may not breach confidentiality, it could be used to corrupt its state in uncontrollable ways. To prevent this, let's block host-initiated memory transitions targeting "no-map" pages altogether in nVHE protected mode as there should be no valid reason to do this in current operation. Thankfully, the pKVM EL2 hypervisor has a full copy of the host's list of memblock regions, so we can easily check for the presence of the MEMBLOCK_NOMAP flag on a region containing pages being donated from the host. Reviewed-by: Philippe Mathieu-Daudé Tested-by: Vincent Donnefort Signed-off-by: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-8-will@kernel.org --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 10069cd32787..f7e3afaf9f11 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -193,7 +193,7 @@ struct kvm_mem_range { u64 end; }; -static bool find_mem_range(phys_addr_t addr, struct kvm_mem_range *range) +static struct memblock_region *find_mem_range(phys_addr_t addr, struct kvm_mem_range *range) { int cur, left = 0, right = hyp_memblock_nr; struct memblock_region *reg; @@ -216,18 +216,28 @@ static bool find_mem_range(phys_addr_t addr, struct kvm_mem_range *range) } else { range->start = reg->base; range->end = end; - return true; + return reg; } } - return false; + return NULL; } bool addr_is_memory(phys_addr_t phys) { struct kvm_mem_range range; - return find_mem_range(phys, &range); + return !!find_mem_range(phys, &range); +} + +static bool addr_is_allowed_memory(phys_addr_t phys) +{ + struct memblock_region *reg; + struct kvm_mem_range range; + + reg = find_mem_range(phys, &range); + + return reg && !(reg->flags & MEMBLOCK_NOMAP); } static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range) @@ -346,7 +356,7 @@ static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot pr static int host_stage2_idmap(u64 addr) { struct kvm_mem_range range; - bool is_memory = find_mem_range(addr, &range); + bool is_memory = !!find_mem_range(addr, &range); enum kvm_pgtable_prot prot; int ret; @@ -424,7 +434,7 @@ static int __check_page_state_visitor(u64 addr, u64 end, u32 level, struct check_walk_data *d = arg; kvm_pte_t pte = *ptep; - if (kvm_pte_valid(pte) && !addr_is_memory(kvm_pte_to_phys(pte))) + if (kvm_pte_valid(pte) && !addr_is_allowed_memory(kvm_pte_to_phys(pte))) return -EINVAL; return d->get_page_state(pte) == d->desired ? 0 : -EPERM; -- cgit v1.2.3 From 9926cfce8dcb880255f30ab9ac930add787e1ead Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 10 Nov 2022 19:02:41 +0000 Subject: KVM: arm64: Add helpers to pin memory shared with the hypervisor at EL2 Add helpers allowing the hypervisor to check whether a range of pages are currently shared by the host, and 'pin' them if so by blocking host unshare operations until the memory has been unpinned. This will allow the hypervisor to take references on host-provided data-structures (e.g. 'struct kvm') with the guarantee that these pages will remain in a stable state until the hypervisor decides to release them, for example during guest teardown. Tested-by: Vincent Donnefort Signed-off-by: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-9-will@kernel.org --- arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 3 ++ arch/arm64/kvm/hyp/include/nvhe/memory.h | 7 +++- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 48 +++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index c87b19b2d468..998bf165af71 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -69,6 +69,9 @@ int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id); int kvm_host_prepare_stage2(void *pgt_pool_base); void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt); +int hyp_pin_shared_mem(void *from, void *to); +void hyp_unpin_shared_mem(void *from, void *to); + static __always_inline void __load_host_stage2(void) { if (static_branch_likely(&kvm_protected_mode_initialized)) diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h index 9422900e5c6a..ab205c4d6774 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/memory.h +++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h @@ -55,10 +55,15 @@ static inline void hyp_page_ref_inc(struct hyp_page *p) p->refcount++; } -static inline int hyp_page_ref_dec_and_test(struct hyp_page *p) +static inline void hyp_page_ref_dec(struct hyp_page *p) { BUG_ON(!p->refcount); p->refcount--; +} + +static inline int hyp_page_ref_dec_and_test(struct hyp_page *p) +{ + hyp_page_ref_dec(p); return (p->refcount == 0); } diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index f7e3afaf9f11..83c2f67e1b58 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -625,6 +625,9 @@ static int hyp_ack_unshare(u64 addr, const struct pkvm_mem_transition *tx) { u64 size = tx->nr_pages * PAGE_SIZE; + if (tx->initiator.id == PKVM_ID_HOST && hyp_page_count((void *)addr)) + return -EBUSY; + if (__hyp_ack_skip_pgtable_check(tx)) return 0; @@ -1038,3 +1041,48 @@ int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages) return ret; } + +int hyp_pin_shared_mem(void *from, void *to) +{ + u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE); + u64 end = PAGE_ALIGN((u64)to); + u64 size = end - start; + int ret; + + host_lock_component(); + hyp_lock_component(); + + ret = __host_check_page_state_range(__hyp_pa(start), size, + PKVM_PAGE_SHARED_OWNED); + if (ret) + goto unlock; + + ret = __hyp_check_page_state_range(start, size, + PKVM_PAGE_SHARED_BORROWED); + if (ret) + goto unlock; + + for (cur = start; cur < end; cur += PAGE_SIZE) + hyp_page_ref_inc(hyp_virt_to_page(cur)); + +unlock: + hyp_unlock_component(); + host_unlock_component(); + + return ret; +} + +void hyp_unpin_shared_mem(void *from, void *to) +{ + u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE); + u64 end = PAGE_ALIGN((u64)to); + + host_lock_component(); + hyp_lock_component(); + + for (cur = start; cur < end; cur += PAGE_SIZE) + hyp_page_ref_dec(hyp_virt_to_page(cur)); + + hyp_unlock_component(); + host_unlock_component(); +} -- cgit v1.2.3 From 4d968b12e6bbe4440f4f220c41d779e02df8af1a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Nov 2022 19:02:42 +0000 Subject: KVM: arm64: Include asm/kvm_mmu.h in nvhe/mem_protect.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit nvhe/mem_protect.h refers to __load_stage2() in the definition of __load_host_stage2() but doesn't include the relevant header. Include asm/kvm_mmu.h in nvhe/mem_protect.h so that users of the latter don't have to do this themselves. Reviewed-by: Philippe Mathieu-Daudé Tested-by: Vincent Donnefort Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-10-will@kernel.org --- arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 998bf165af71..3bea816296dc 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -8,6 +8,7 @@ #define __KVM_NVHE_MEM_PROTECT__ #include #include +#include #include #include #include -- cgit v1.2.3 From 1c80002e3264552d8b9c0e162e09aa4087403716 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Thu, 10 Nov 2022 19:02:43 +0000 Subject: KVM: arm64: Add hyp_spinlock_t static initializer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a static initializer macro for 'hyp_spinlock_t' so that it is straightforward to instantiate global locks at EL2. This will be later utilised for locking the VM table in the hypervisor. Reviewed-by: Philippe Mathieu-Daudé Tested-by: Vincent Donnefort Signed-off-by: Fuad Tabba Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-11-will@kernel.org --- arch/arm64/kvm/hyp/include/nvhe/spinlock.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/include/nvhe/spinlock.h b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h index 4652fd04bdbe..7c7ea8c55405 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/spinlock.h +++ b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h @@ -28,9 +28,17 @@ typedef union hyp_spinlock { }; } hyp_spinlock_t; +#define __HYP_SPIN_LOCK_INITIALIZER \ + { .__val = 0 } + +#define __HYP_SPIN_LOCK_UNLOCKED \ + ((hyp_spinlock_t) __HYP_SPIN_LOCK_INITIALIZER) + +#define DEFINE_HYP_SPINLOCK(x) hyp_spinlock_t x = __HYP_SPIN_LOCK_UNLOCKED + #define hyp_spin_lock_init(l) \ do { \ - *(l) = (hyp_spinlock_t){ .__val = 0 }; \ + *(l) = __HYP_SPIN_LOCK_UNLOCKED; \ } while (0) static inline void hyp_spin_lock(hyp_spinlock_t *lock) -- cgit v1.2.3 From 5304002dc3754a5663d75c977bfa2d9e3c08906d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Nov 2022 19:02:44 +0000 Subject: KVM: arm64: Rename 'host_kvm' to 'host_mmu' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation for introducing VM and vCPU state at EL2, rename the existing 'struct host_kvm' and its singleton 'host_kvm' instance to 'host_mmu' so as to avoid confusion between the structure tracking the host stage-2 MMU state and the host instance of a 'struct kvm' for a protected guest. Reviewed-by: Philippe Mathieu-Daudé Tested-by: Vincent Donnefort Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-12-will@kernel.org --- arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 6 ++-- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 46 +++++++++++++-------------- 2 files changed, 26 insertions(+), 26 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 3bea816296dc..0a6d3e7f2a43 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -44,13 +44,13 @@ static inline enum pkvm_page_state pkvm_getstate(enum kvm_pgtable_prot prot) return prot & PKVM_PAGE_STATE_PROT_MASK; } -struct host_kvm { +struct host_mmu { struct kvm_arch arch; struct kvm_pgtable pgt; struct kvm_pgtable_mm_ops mm_ops; hyp_spinlock_t lock; }; -extern struct host_kvm host_kvm; +extern struct host_mmu host_mmu; /* This corresponds to page-table locking order */ enum pkvm_component_id { @@ -76,7 +76,7 @@ void hyp_unpin_shared_mem(void *from, void *to); static __always_inline void __load_host_stage2(void) { if (static_branch_likely(&kvm_protected_mode_initialized)) - __load_stage2(&host_kvm.arch.mmu, &host_kvm.arch); + __load_stage2(&host_mmu.arch.mmu, &host_mmu.arch); else write_sysreg(0, vttbr_el2); } diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 83c2f67e1b58..06c6a24c0eae 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -22,18 +22,18 @@ #define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_NOFWB | KVM_PGTABLE_S2_IDMAP) extern unsigned long hyp_nr_cpus; -struct host_kvm host_kvm; +struct host_mmu host_mmu; static struct hyp_pool host_s2_pool; static void host_lock_component(void) { - hyp_spin_lock(&host_kvm.lock); + hyp_spin_lock(&host_mmu.lock); } static void host_unlock_component(void) { - hyp_spin_unlock(&host_kvm.lock); + hyp_spin_unlock(&host_mmu.lock); } static void hyp_lock_component(void) @@ -88,7 +88,7 @@ static int prepare_s2_pool(void *pgt_pool_base) if (ret) return ret; - host_kvm.mm_ops = (struct kvm_pgtable_mm_ops) { + host_mmu.mm_ops = (struct kvm_pgtable_mm_ops) { .zalloc_pages_exact = host_s2_zalloc_pages_exact, .zalloc_page = host_s2_zalloc_page, .phys_to_virt = hyp_phys_to_virt, @@ -109,7 +109,7 @@ static void prepare_host_vtcr(void) parange = kvm_get_parange(id_aa64mmfr0_el1_sys_val); phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange); - host_kvm.arch.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val, + host_mmu.arch.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val, id_aa64mmfr1_el1_sys_val, phys_shift); } @@ -117,25 +117,25 @@ static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot pr int kvm_host_prepare_stage2(void *pgt_pool_base) { - struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu; + struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu; int ret; prepare_host_vtcr(); - hyp_spin_lock_init(&host_kvm.lock); - mmu->arch = &host_kvm.arch; + hyp_spin_lock_init(&host_mmu.lock); + mmu->arch = &host_mmu.arch; ret = prepare_s2_pool(pgt_pool_base); if (ret) return ret; - ret = __kvm_pgtable_stage2_init(&host_kvm.pgt, mmu, - &host_kvm.mm_ops, KVM_HOST_S2_FLAGS, + ret = __kvm_pgtable_stage2_init(&host_mmu.pgt, mmu, + &host_mmu.mm_ops, KVM_HOST_S2_FLAGS, host_stage2_force_pte_cb); if (ret) return ret; - mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd); - mmu->pgt = &host_kvm.pgt; + mmu->pgd_phys = __hyp_pa(host_mmu.pgt.pgd); + mmu->pgt = &host_mmu.pgt; atomic64_set(&mmu->vmid.id, 0); return 0; @@ -143,19 +143,19 @@ int kvm_host_prepare_stage2(void *pgt_pool_base) int __pkvm_prot_finalize(void) { - struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu; + struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu; struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params); if (params->hcr_el2 & HCR_VM) return -EPERM; params->vttbr = kvm_get_vttbr(mmu); - params->vtcr = host_kvm.arch.vtcr; + params->vtcr = host_mmu.arch.vtcr; params->hcr_el2 |= HCR_VM; kvm_flush_dcache_to_poc(params, sizeof(*params)); write_sysreg(params->hcr_el2, hcr_el2); - __load_stage2(&host_kvm.arch.mmu, &host_kvm.arch); + __load_stage2(&host_mmu.arch.mmu, &host_mmu.arch); /* * Make sure to have an ISB before the TLB maintenance below but only @@ -173,7 +173,7 @@ int __pkvm_prot_finalize(void) static int host_stage2_unmap_dev_all(void) { - struct kvm_pgtable *pgt = &host_kvm.pgt; + struct kvm_pgtable *pgt = &host_mmu.pgt; struct memblock_region *reg; u64 addr = 0; int i, ret; @@ -258,7 +258,7 @@ static bool range_is_memory(u64 start, u64 end) static inline int __host_stage2_idmap(u64 start, u64 end, enum kvm_pgtable_prot prot) { - return kvm_pgtable_stage2_map(&host_kvm.pgt, start, end - start, start, + return kvm_pgtable_stage2_map(&host_mmu.pgt, start, end - start, start, prot, &host_s2_pool); } @@ -271,7 +271,7 @@ static inline int __host_stage2_idmap(u64 start, u64 end, #define host_stage2_try(fn, ...) \ ({ \ int __ret; \ - hyp_assert_lock_held(&host_kvm.lock); \ + hyp_assert_lock_held(&host_mmu.lock); \ __ret = fn(__VA_ARGS__); \ if (__ret == -ENOMEM) { \ __ret = host_stage2_unmap_dev_all(); \ @@ -294,8 +294,8 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) u32 level; int ret; - hyp_assert_lock_held(&host_kvm.lock); - ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, &level); + hyp_assert_lock_held(&host_mmu.lock); + ret = kvm_pgtable_get_leaf(&host_mmu.pgt, addr, &pte, &level); if (ret) return ret; @@ -327,7 +327,7 @@ int host_stage2_idmap_locked(phys_addr_t addr, u64 size, int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id) { - return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_kvm.pgt, + return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt, addr, size, &host_s2_pool, owner_id); } @@ -468,8 +468,8 @@ static int __host_check_page_state_range(u64 addr, u64 size, .get_page_state = host_get_page_state, }; - hyp_assert_lock_held(&host_kvm.lock); - return check_page_state_range(&host_kvm.pgt, addr, size, &d); + hyp_assert_lock_held(&host_mmu.lock); + return check_page_state_range(&host_mmu.pgt, addr, size, &d); } static int __host_set_page_state_range(u64 addr, u64 size, -- cgit v1.2.3 From a1ec5c70d3f63d8a143fb83cd7f53bd8ff2f72c8 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Thu, 10 Nov 2022 19:02:45 +0000 Subject: KVM: arm64: Add infrastructure to create and track pKVM instances at EL2 Introduce a global table (and lock) to track pKVM instances at EL2, and provide hypercalls that can be used by the untrusted host to create and destroy pKVM VMs and their vCPUs. pKVM VM/vCPU state is directly accessible only by the trusted hypervisor (EL2). Each pKVM VM is directly associated with an untrusted host KVM instance, and is referenced by the host using an opaque handle. Future patches will provide hypercalls to allow the host to initialize/set/get pKVM VM/vCPU state using the opaque handle. Tested-by: Vincent Donnefort Signed-off-by: Fuad Tabba Co-developed-by: Will Deacon Signed-off-by: Will Deacon [maz: silence warning on unmap_donated_memory_noclear()] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-13-will@kernel.org --- arch/arm64/include/asm/kvm_asm.h | 3 + arch/arm64/include/asm/kvm_host.h | 8 + arch/arm64/include/asm/kvm_pgtable.h | 8 + arch/arm64/include/asm/kvm_pkvm.h | 8 + arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 3 + arch/arm64/kvm/hyp/include/nvhe/pkvm.h | 58 ++++ arch/arm64/kvm/hyp/nvhe/hyp-main.c | 31 +++ arch/arm64/kvm/hyp/nvhe/mem_protect.c | 14 + arch/arm64/kvm/hyp/nvhe/pkvm.c | 380 ++++++++++++++++++++++++++ arch/arm64/kvm/hyp/nvhe/setup.c | 8 + arch/arm64/kvm/hyp/pgtable.c | 9 + arch/arm64/kvm/pkvm.c | 1 + 12 files changed, 531 insertions(+) create mode 100644 arch/arm64/kvm/hyp/include/nvhe/pkvm.h (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 53035763e48e..de52ba775d48 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -76,6 +76,9 @@ enum __kvm_host_smccc_func { __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs, __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs, __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_init_traps, + __KVM_HOST_SMCCC_FUNC___pkvm_init_vm, + __KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu, + __KVM_HOST_SMCCC_FUNC___pkvm_teardown_vm, }; #define DECLARE_KVM_VHE_SYM(sym) extern char sym[] diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 45e2136322ba..d3dd7ab9c79e 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -115,6 +115,8 @@ struct kvm_smccc_features { unsigned long vendor_hyp_bmap; }; +typedef unsigned int pkvm_handle_t; + struct kvm_arch { struct kvm_s2_mmu mmu; @@ -166,6 +168,12 @@ struct kvm_arch { /* Hypercall features firmware registers' descriptor */ struct kvm_smccc_features smccc_feat; + + /* + * For an untrusted host VM, 'pkvm_handle' is used to lookup + * the associated pKVM instance in the hypervisor. + */ + pkvm_handle_t pkvm_handle; }; struct kvm_vcpu_fault_info { diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 3252eb50ecfe..15c389db1931 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -296,6 +296,14 @@ u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size); */ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift); +/** + * kvm_pgtable_stage2_pgd_size() - Helper to compute size of a stage-2 PGD + * @vtcr: Content of the VTCR register. + * + * Return: the size (in bytes) of the stage-2 PGD + */ +size_t kvm_pgtable_stage2_pgd_size(u64 vtcr); + /** * __kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table. * @pgt: Uninitialised page-table structure to initialise. diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index 8f7b8a2314bb..f4e3133d6550 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -9,6 +9,9 @@ #include #include +/* Maximum number of VMs that can co-exist under pKVM. */ +#define KVM_MAX_PVMS 255 + #define HYP_MEMBLOCK_REGIONS 128 extern struct memblock_region kvm_nvhe_sym(hyp_memory)[]; @@ -40,6 +43,11 @@ static inline unsigned long hyp_vmemmap_pages(size_t vmemmap_entry_size) return res >> PAGE_SHIFT; } +static inline unsigned long hyp_vm_table_pages(void) +{ + return PAGE_ALIGN(KVM_MAX_PVMS * sizeof(void *)) >> PAGE_SHIFT; +} + static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages) { unsigned long total = 0, i; diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 0a6d3e7f2a43..ce9a796a85ee 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -11,6 +11,7 @@ #include #include #include +#include #include /* @@ -68,10 +69,12 @@ bool addr_is_memory(phys_addr_t phys); int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot); int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id); int kvm_host_prepare_stage2(void *pgt_pool_base); +int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd); void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt); int hyp_pin_shared_mem(void *from, void *to); void hyp_unpin_shared_mem(void *from, void *to); +void reclaim_guest_pages(struct pkvm_hyp_vm *vm); static __always_inline void __load_host_stage2(void) { diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h new file mode 100644 index 000000000000..8c653a3b9501 --- /dev/null +++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2021 Google LLC + * Author: Fuad Tabba + */ + +#ifndef __ARM64_KVM_NVHE_PKVM_H__ +#define __ARM64_KVM_NVHE_PKVM_H__ + +#include + +/* + * Holds the relevant data for maintaining the vcpu state completely at hyp. + */ +struct pkvm_hyp_vcpu { + struct kvm_vcpu vcpu; + + /* Backpointer to the host's (untrusted) vCPU instance. */ + struct kvm_vcpu *host_vcpu; +}; + +/* + * Holds the relevant data for running a protected vm. + */ +struct pkvm_hyp_vm { + struct kvm kvm; + + /* Backpointer to the host's (untrusted) KVM instance. */ + struct kvm *host_kvm; + + /* The guest's stage-2 page-table managed by the hypervisor. */ + struct kvm_pgtable pgt; + + /* + * The number of vcpus initialized and ready to run. + * Modifying this is protected by 'vm_table_lock'. + */ + unsigned int nr_vcpus; + + /* Array of the hyp vCPU structures for this VM. */ + struct pkvm_hyp_vcpu *vcpus[]; +}; + +static inline struct pkvm_hyp_vm * +pkvm_hyp_vcpu_to_hyp_vm(struct pkvm_hyp_vcpu *hyp_vcpu) +{ + return container_of(hyp_vcpu->vcpu.kvm, struct pkvm_hyp_vm, kvm); +} + +void pkvm_hyp_vm_table_init(void *tbl); + +int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, + unsigned long pgd_hva); +int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu, + unsigned long vcpu_hva); +int __pkvm_teardown_vm(pkvm_handle_t handle); + +#endif /* __ARM64_KVM_NVHE_PKVM_H__ */ diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 3cea4b6ac23e..b5f3fcfe9135 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -15,6 +15,7 @@ #include #include +#include #include DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); @@ -191,6 +192,33 @@ static void handle___pkvm_vcpu_init_traps(struct kvm_cpu_context *host_ctxt) __pkvm_vcpu_init_traps(kern_hyp_va(vcpu)); } +static void handle___pkvm_init_vm(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct kvm *, host_kvm, host_ctxt, 1); + DECLARE_REG(unsigned long, vm_hva, host_ctxt, 2); + DECLARE_REG(unsigned long, pgd_hva, host_ctxt, 3); + + host_kvm = kern_hyp_va(host_kvm); + cpu_reg(host_ctxt, 1) = __pkvm_init_vm(host_kvm, vm_hva, pgd_hva); +} + +static void handle___pkvm_init_vcpu(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1); + DECLARE_REG(struct kvm_vcpu *, host_vcpu, host_ctxt, 2); + DECLARE_REG(unsigned long, vcpu_hva, host_ctxt, 3); + + host_vcpu = kern_hyp_va(host_vcpu); + cpu_reg(host_ctxt, 1) = __pkvm_init_vcpu(handle, host_vcpu, vcpu_hva); +} + +static void handle___pkvm_teardown_vm(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1); + + cpu_reg(host_ctxt, 1) = __pkvm_teardown_vm(handle); +} + typedef void (*hcall_t)(struct kvm_cpu_context *); #define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x @@ -220,6 +248,9 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__vgic_v3_save_aprs), HANDLE_FUNC(__vgic_v3_restore_aprs), HANDLE_FUNC(__pkvm_vcpu_init_traps), + HANDLE_FUNC(__pkvm_init_vm), + HANDLE_FUNC(__pkvm_init_vcpu), + HANDLE_FUNC(__pkvm_teardown_vm), }; static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 06c6a24c0eae..459957b3082e 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -141,6 +141,20 @@ int kvm_host_prepare_stage2(void *pgt_pool_base) return 0; } +int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd) +{ + vm->pgt.pgd = pgd; + return 0; +} + +void reclaim_guest_pages(struct pkvm_hyp_vm *vm) +{ + unsigned long nr_pages; + + nr_pages = kvm_pgtable_stage2_pgd_size(vm->kvm.arch.vtcr) >> PAGE_SHIFT; + WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(vm->pgt.pgd), nr_pages)); +} + int __pkvm_prot_finalize(void) { struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu; diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 85d3b7ae720f..135c9a095eca 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -7,6 +7,9 @@ #include #include #include +#include +#include +#include #include /* @@ -183,3 +186,380 @@ void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu) pvm_init_traps_aa64mmfr0(vcpu); pvm_init_traps_aa64mmfr1(vcpu); } + +/* + * Start the VM table handle at the offset defined instead of at 0. + * Mainly for sanity checking and debugging. + */ +#define HANDLE_OFFSET 0x1000 + +static unsigned int vm_handle_to_idx(pkvm_handle_t handle) +{ + return handle - HANDLE_OFFSET; +} + +static pkvm_handle_t idx_to_vm_handle(unsigned int idx) +{ + return idx + HANDLE_OFFSET; +} + +/* + * Spinlock for protecting state related to the VM table. Protects writes + * to 'vm_table' and 'nr_table_entries' as well as reads and writes to + * 'last_hyp_vcpu_lookup'. + */ +static DEFINE_HYP_SPINLOCK(vm_table_lock); + +/* + * The table of VM entries for protected VMs in hyp. + * Allocated at hyp initialization and setup. + */ +static struct pkvm_hyp_vm **vm_table; + +void pkvm_hyp_vm_table_init(void *tbl) +{ + WARN_ON(vm_table); + vm_table = tbl; +} + +/* + * Return the hyp vm structure corresponding to the handle. + */ +static struct pkvm_hyp_vm *get_vm_by_handle(pkvm_handle_t handle) +{ + unsigned int idx = vm_handle_to_idx(handle); + + if (unlikely(idx >= KVM_MAX_PVMS)) + return NULL; + + return vm_table[idx]; +} + +static void unpin_host_vcpu(struct kvm_vcpu *host_vcpu) +{ + if (host_vcpu) + hyp_unpin_shared_mem(host_vcpu, host_vcpu + 1); +} + +static void unpin_host_vcpus(struct pkvm_hyp_vcpu *hyp_vcpus[], + unsigned int nr_vcpus) +{ + int i; + + for (i = 0; i < nr_vcpus; i++) + unpin_host_vcpu(hyp_vcpus[i]->host_vcpu); +} + +static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm, + unsigned int nr_vcpus) +{ + hyp_vm->host_kvm = host_kvm; + hyp_vm->kvm.created_vcpus = nr_vcpus; + hyp_vm->kvm.arch.vtcr = host_mmu.arch.vtcr; +} + +static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu, + struct pkvm_hyp_vm *hyp_vm, + struct kvm_vcpu *host_vcpu, + unsigned int vcpu_idx) +{ + int ret = 0; + + if (hyp_pin_shared_mem(host_vcpu, host_vcpu + 1)) + return -EBUSY; + + if (host_vcpu->vcpu_idx != vcpu_idx) { + ret = -EINVAL; + goto done; + } + + hyp_vcpu->host_vcpu = host_vcpu; + + hyp_vcpu->vcpu.kvm = &hyp_vm->kvm; + hyp_vcpu->vcpu.vcpu_id = READ_ONCE(host_vcpu->vcpu_id); + hyp_vcpu->vcpu.vcpu_idx = vcpu_idx; + + hyp_vcpu->vcpu.arch.hw_mmu = &hyp_vm->kvm.arch.mmu; +done: + if (ret) + unpin_host_vcpu(host_vcpu); + return ret; +} + +static int find_free_vm_table_entry(struct kvm *host_kvm) +{ + int i; + + for (i = 0; i < KVM_MAX_PVMS; ++i) { + if (!vm_table[i]) + return i; + } + + return -ENOMEM; +} + +/* + * Allocate a VM table entry and insert a pointer to the new vm. + * + * Return a unique handle to the protected VM on success, + * negative error code on failure. + */ +static pkvm_handle_t insert_vm_table_entry(struct kvm *host_kvm, + struct pkvm_hyp_vm *hyp_vm) +{ + struct kvm_s2_mmu *mmu = &hyp_vm->kvm.arch.mmu; + int idx; + + hyp_assert_lock_held(&vm_table_lock); + + /* + * Initializing protected state might have failed, yet a malicious + * host could trigger this function. Thus, ensure that 'vm_table' + * exists. + */ + if (unlikely(!vm_table)) + return -EINVAL; + + idx = find_free_vm_table_entry(host_kvm); + if (idx < 0) + return idx; + + hyp_vm->kvm.arch.pkvm_handle = idx_to_vm_handle(idx); + + /* VMID 0 is reserved for the host */ + atomic64_set(&mmu->vmid.id, idx + 1); + + mmu->arch = &hyp_vm->kvm.arch; + mmu->pgt = &hyp_vm->pgt; + + vm_table[idx] = hyp_vm; + return hyp_vm->kvm.arch.pkvm_handle; +} + +/* + * Deallocate and remove the VM table entry corresponding to the handle. + */ +static void remove_vm_table_entry(pkvm_handle_t handle) +{ + hyp_assert_lock_held(&vm_table_lock); + vm_table[vm_handle_to_idx(handle)] = NULL; +} + +static size_t pkvm_get_hyp_vm_size(unsigned int nr_vcpus) +{ + return size_add(sizeof(struct pkvm_hyp_vm), + size_mul(sizeof(struct pkvm_hyp_vcpu *), nr_vcpus)); +} + +static void *map_donated_memory_noclear(unsigned long host_va, size_t size) +{ + void *va = (void *)kern_hyp_va(host_va); + + if (!PAGE_ALIGNED(va)) + return NULL; + + if (__pkvm_host_donate_hyp(hyp_virt_to_pfn(va), + PAGE_ALIGN(size) >> PAGE_SHIFT)) + return NULL; + + return va; +} + +static void *map_donated_memory(unsigned long host_va, size_t size) +{ + void *va = map_donated_memory_noclear(host_va, size); + + if (va) + memset(va, 0, size); + + return va; +} + +static void __unmap_donated_memory(void *va, size_t size) +{ + WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(va), + PAGE_ALIGN(size) >> PAGE_SHIFT)); +} + +static void unmap_donated_memory(void *va, size_t size) +{ + if (!va) + return; + + memset(va, 0, size); + __unmap_donated_memory(va, size); +} + +static void __maybe_unused unmap_donated_memory_noclear(void *va, size_t size) +{ + if (!va) + return; + + __unmap_donated_memory(va, size); +} + +/* + * Initialize the hypervisor copy of the protected VM state using the + * memory donated by the host. + * + * Unmaps the donated memory from the host at stage 2. + * + * host_kvm: A pointer to the host's struct kvm. + * vm_hva: The host va of the area being donated for the VM state. + * Must be page aligned. + * pgd_hva: The host va of the area being donated for the stage-2 PGD for + * the VM. Must be page aligned. Its size is implied by the VM's + * VTCR. + * + * Return a unique handle to the protected VM on success, + * negative error code on failure. + */ +int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, + unsigned long pgd_hva) +{ + struct pkvm_hyp_vm *hyp_vm = NULL; + size_t vm_size, pgd_size; + unsigned int nr_vcpus; + void *pgd = NULL; + int ret; + + ret = hyp_pin_shared_mem(host_kvm, host_kvm + 1); + if (ret) + return ret; + + nr_vcpus = READ_ONCE(host_kvm->created_vcpus); + if (nr_vcpus < 1) { + ret = -EINVAL; + goto err_unpin_kvm; + } + + vm_size = pkvm_get_hyp_vm_size(nr_vcpus); + pgd_size = kvm_pgtable_stage2_pgd_size(host_mmu.arch.vtcr); + + ret = -ENOMEM; + + hyp_vm = map_donated_memory(vm_hva, vm_size); + if (!hyp_vm) + goto err_remove_mappings; + + pgd = map_donated_memory_noclear(pgd_hva, pgd_size); + if (!pgd) + goto err_remove_mappings; + + init_pkvm_hyp_vm(host_kvm, hyp_vm, nr_vcpus); + + hyp_spin_lock(&vm_table_lock); + ret = insert_vm_table_entry(host_kvm, hyp_vm); + if (ret < 0) + goto err_unlock; + + ret = kvm_guest_prepare_stage2(hyp_vm, pgd); + if (ret) + goto err_remove_vm_table_entry; + hyp_spin_unlock(&vm_table_lock); + + return hyp_vm->kvm.arch.pkvm_handle; + +err_remove_vm_table_entry: + remove_vm_table_entry(hyp_vm->kvm.arch.pkvm_handle); +err_unlock: + hyp_spin_unlock(&vm_table_lock); +err_remove_mappings: + unmap_donated_memory(hyp_vm, vm_size); + unmap_donated_memory(pgd, pgd_size); +err_unpin_kvm: + hyp_unpin_shared_mem(host_kvm, host_kvm + 1); + return ret; +} + +/* + * Initialize the hypervisor copy of the protected vCPU state using the + * memory donated by the host. + * + * handle: The handle for the protected vm. + * host_vcpu: A pointer to the corresponding host vcpu. + * vcpu_hva: The host va of the area being donated for the vcpu state. + * Must be page aligned. The size of the area must be equal to + * the page-aligned size of 'struct pkvm_hyp_vcpu'. + * Return 0 on success, negative error code on failure. + */ +int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu, + unsigned long vcpu_hva) +{ + struct pkvm_hyp_vcpu *hyp_vcpu; + struct pkvm_hyp_vm *hyp_vm; + unsigned int idx; + int ret; + + hyp_vcpu = map_donated_memory(vcpu_hva, sizeof(*hyp_vcpu)); + if (!hyp_vcpu) + return -ENOMEM; + + hyp_spin_lock(&vm_table_lock); + + hyp_vm = get_vm_by_handle(handle); + if (!hyp_vm) { + ret = -ENOENT; + goto unlock; + } + + idx = hyp_vm->nr_vcpus; + if (idx >= hyp_vm->kvm.created_vcpus) { + ret = -EINVAL; + goto unlock; + } + + ret = init_pkvm_hyp_vcpu(hyp_vcpu, hyp_vm, host_vcpu, idx); + if (ret) + goto unlock; + + hyp_vm->vcpus[idx] = hyp_vcpu; + hyp_vm->nr_vcpus++; +unlock: + hyp_spin_unlock(&vm_table_lock); + + if (ret) + unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu)); + + return ret; +} + +int __pkvm_teardown_vm(pkvm_handle_t handle) +{ + struct pkvm_hyp_vm *hyp_vm; + struct kvm *host_kvm; + size_t vm_size; + int err; + + hyp_spin_lock(&vm_table_lock); + hyp_vm = get_vm_by_handle(handle); + if (!hyp_vm) { + err = -ENOENT; + goto err_unlock; + } + + if (WARN_ON(hyp_page_count(hyp_vm))) { + err = -EBUSY; + goto err_unlock; + } + + /* Ensure the VMID is clean before it can be reallocated */ + __kvm_tlb_flush_vmid(&hyp_vm->kvm.arch.mmu); + remove_vm_table_entry(handle); + hyp_spin_unlock(&vm_table_lock); + + /* Reclaim guest pages (including page-table pages) */ + reclaim_guest_pages(hyp_vm); + unpin_host_vcpus(hyp_vm->vcpus, hyp_vm->nr_vcpus); + + /* Return the metadata pages to the host */ + host_kvm = hyp_vm->host_kvm; + vm_size = pkvm_get_hyp_vm_size(hyp_vm->kvm.created_vcpus); + unmap_donated_memory(hyp_vm, vm_size); + hyp_unpin_shared_mem(host_kvm, host_kvm + 1); + return 0; + +err_unlock: + hyp_spin_unlock(&vm_table_lock); + return err; +} diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 0312c9c74a5a..2be72fbe7279 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -16,6 +16,7 @@ #include #include #include +#include #include unsigned long hyp_nr_cpus; @@ -24,6 +25,7 @@ unsigned long hyp_nr_cpus; (unsigned long)__per_cpu_start) static void *vmemmap_base; +static void *vm_table_base; static void *hyp_pgt_base; static void *host_s2_pgt_base; static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops; @@ -40,6 +42,11 @@ static int divide_memory_pool(void *virt, unsigned long size) if (!vmemmap_base) return -ENOMEM; + nr_pages = hyp_vm_table_pages(); + vm_table_base = hyp_early_alloc_contig(nr_pages); + if (!vm_table_base) + return -ENOMEM; + nr_pages = hyp_s1_pgtable_pages(); hyp_pgt_base = hyp_early_alloc_contig(nr_pages); if (!hyp_pgt_base) @@ -314,6 +321,7 @@ void __noreturn __pkvm_init_finalise(void) if (ret) goto out; + pkvm_hyp_vm_table_init(vm_table_base); out: /* * We tail-called to here from handle___pkvm_init() and will not return, diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index cdf8e76b0be1..a1a27f88a312 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -1200,6 +1200,15 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, return 0; } +size_t kvm_pgtable_stage2_pgd_size(u64 vtcr) +{ + u32 ia_bits = VTCR_EL2_IPA(vtcr); + u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr); + u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; + + return kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE; +} + static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, enum kvm_pgtable_walk_flags flag, void * const arg) diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index 34229425b25d..71493136e59c 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -71,6 +71,7 @@ void __init kvm_hyp_reserve(void) hyp_mem_pages += hyp_s1_pgtable_pages(); hyp_mem_pages += host_s2_pgtable_pages(); + hyp_mem_pages += hyp_vm_table_pages(); hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE); /* -- cgit v1.2.3 From 9d0c063a4d1d10ef8e6288899b8524413e40cfa0 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Thu, 10 Nov 2022 19:02:46 +0000 Subject: KVM: arm64: Instantiate pKVM hypervisor VM and vCPU structures from EL1 With the pKVM hypervisor at EL2 now offering hypercalls to the host for creating and destroying VM and vCPU structures, plumb these in to the existing arm64 KVM backend to ensure that the hypervisor data structures are allocated and initialised on first vCPU run for a pKVM guest. In the host, 'struct kvm_protected_vm' is introduced to hold the handle of the pKVM VM instance as well as to track references to the memory donated to the hypervisor so that it can be freed back to the host allocator following VM teardown. The stage-2 page-table, hypervisor VM and vCPU structures are allocated separately so as to avoid the need for a large physically-contiguous allocation in the host at run-time. Tested-by: Vincent Donnefort Signed-off-by: Fuad Tabba Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-14-will@kernel.org --- arch/arm64/include/asm/kvm_host.h | 14 +++- arch/arm64/include/asm/kvm_pkvm.h | 4 ++ arch/arm64/kvm/arm.c | 14 ++++ arch/arm64/kvm/hyp/hyp-constants.c | 3 + arch/arm64/kvm/hyp/nvhe/pkvm.c | 15 ++-- arch/arm64/kvm/pkvm.c | 138 +++++++++++++++++++++++++++++++++++++ 6 files changed, 182 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index d3dd7ab9c79e..467393e7331f 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -117,6 +117,16 @@ struct kvm_smccc_features { typedef unsigned int pkvm_handle_t; +struct kvm_protected_vm { + pkvm_handle_t handle; + + struct { + void *pgd; + void *vm; + void *vcpus[KVM_MAX_VCPUS]; + } hyp_donations; +}; + struct kvm_arch { struct kvm_s2_mmu mmu; @@ -170,10 +180,10 @@ struct kvm_arch { struct kvm_smccc_features smccc_feat; /* - * For an untrusted host VM, 'pkvm_handle' is used to lookup + * For an untrusted host VM, 'pkvm.handle' is used to lookup * the associated pKVM instance in the hypervisor. */ - pkvm_handle_t pkvm_handle; + struct kvm_protected_vm pkvm; }; struct kvm_vcpu_fault_info { diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index f4e3133d6550..01129b0d4c68 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -14,6 +14,10 @@ #define HYP_MEMBLOCK_REGIONS 128 +int pkvm_init_host_vm(struct kvm *kvm); +int pkvm_create_hyp_vm(struct kvm *kvm); +void pkvm_destroy_hyp_vm(struct kvm *kvm); + extern struct memblock_region kvm_nvhe_sym(hyp_memory)[]; extern unsigned int kvm_nvhe_sym(hyp_memblock_nr); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 94d33e296e10..30d6fc5d3a93 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -150,6 +151,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (ret) goto out_free_stage2_pgd; + ret = pkvm_init_host_vm(kvm); + if (ret) + goto out_free_stage2_pgd; + if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL)) { ret = -ENOMEM; goto out_free_stage2_pgd; @@ -187,6 +192,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_vgic_destroy(kvm); + if (is_protected_kvm_enabled()) + pkvm_destroy_hyp_vm(kvm); + kvm_destroy_vcpus(kvm); kvm_unshare_hyp(kvm, kvm + 1); @@ -569,6 +577,12 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) if (ret) return ret; + if (is_protected_kvm_enabled()) { + ret = pkvm_create_hyp_vm(kvm); + if (ret) + return ret; + } + if (!irqchip_in_kernel(kvm)) { /* * Tell the rest of the code that there are userspace irqchip diff --git a/arch/arm64/kvm/hyp/hyp-constants.c b/arch/arm64/kvm/hyp/hyp-constants.c index b3742a6691e8..b257a3b4bfc5 100644 --- a/arch/arm64/kvm/hyp/hyp-constants.c +++ b/arch/arm64/kvm/hyp/hyp-constants.c @@ -2,9 +2,12 @@ #include #include +#include int main(void) { DEFINE(STRUCT_HYP_PAGE_SIZE, sizeof(struct hyp_page)); + DEFINE(PKVM_HYP_VM_SIZE, sizeof(struct pkvm_hyp_vm)); + DEFINE(PKVM_HYP_VCPU_SIZE, sizeof(struct pkvm_hyp_vcpu)); return 0; } diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 135c9a095eca..2c73c4640e4d 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -324,7 +324,7 @@ static pkvm_handle_t insert_vm_table_entry(struct kvm *host_kvm, if (idx < 0) return idx; - hyp_vm->kvm.arch.pkvm_handle = idx_to_vm_handle(idx); + hyp_vm->kvm.arch.pkvm.handle = idx_to_vm_handle(idx); /* VMID 0 is reserved for the host */ atomic64_set(&mmu->vmid.id, idx + 1); @@ -333,7 +333,7 @@ static pkvm_handle_t insert_vm_table_entry(struct kvm *host_kvm, mmu->pgt = &hyp_vm->pgt; vm_table[idx] = hyp_vm; - return hyp_vm->kvm.arch.pkvm_handle; + return hyp_vm->kvm.arch.pkvm.handle; } /* @@ -458,10 +458,10 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, goto err_remove_vm_table_entry; hyp_spin_unlock(&vm_table_lock); - return hyp_vm->kvm.arch.pkvm_handle; + return hyp_vm->kvm.arch.pkvm.handle; err_remove_vm_table_entry: - remove_vm_table_entry(hyp_vm->kvm.arch.pkvm_handle); + remove_vm_table_entry(hyp_vm->kvm.arch.pkvm.handle); err_unlock: hyp_spin_unlock(&vm_table_lock); err_remove_mappings: @@ -528,6 +528,7 @@ int __pkvm_teardown_vm(pkvm_handle_t handle) { struct pkvm_hyp_vm *hyp_vm; struct kvm *host_kvm; + unsigned int idx; size_t vm_size; int err; @@ -553,6 +554,12 @@ int __pkvm_teardown_vm(pkvm_handle_t handle) unpin_host_vcpus(hyp_vm->vcpus, hyp_vm->nr_vcpus); /* Return the metadata pages to the host */ + for (idx = 0; idx < hyp_vm->nr_vcpus; ++idx) { + struct pkvm_hyp_vcpu *hyp_vcpu = hyp_vm->vcpus[idx]; + + unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu)); + } + host_kvm = hyp_vm->host_kvm; vm_size = pkvm_get_hyp_vm_size(hyp_vm->kvm.created_vcpus); unmap_donated_memory(hyp_vm, vm_size); diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index 71493136e59c..8c443b915e43 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -94,3 +95,140 @@ void __init kvm_hyp_reserve(void) kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20, hyp_mem_base); } + +/* + * Allocates and donates memory for hypervisor VM structs at EL2. + * + * Allocates space for the VM state, which includes the hyp vm as well as + * the hyp vcpus. + * + * Stores an opaque handler in the kvm struct for future reference. + * + * Return 0 on success, negative error code on failure. + */ +static int __pkvm_create_hyp_vm(struct kvm *host_kvm) +{ + size_t pgd_sz, hyp_vm_sz, hyp_vcpu_sz; + struct kvm_vcpu *host_vcpu; + pkvm_handle_t handle; + void *pgd, *hyp_vm; + unsigned long idx; + int ret; + + if (host_kvm->created_vcpus < 1) + return -EINVAL; + + pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.vtcr); + + /* + * The PGD pages will be reclaimed using a hyp_memcache which implies + * page granularity. So, use alloc_pages_exact() to get individual + * refcounts. + */ + pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT); + if (!pgd) + return -ENOMEM; + + /* Allocate memory to donate to hyp for vm and vcpu pointers. */ + hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE, + size_mul(sizeof(void *), + host_kvm->created_vcpus))); + hyp_vm = alloc_pages_exact(hyp_vm_sz, GFP_KERNEL_ACCOUNT); + if (!hyp_vm) { + ret = -ENOMEM; + goto free_pgd; + } + + /* Donate the VM memory to hyp and let hyp initialize it. */ + ret = kvm_call_hyp_nvhe(__pkvm_init_vm, host_kvm, hyp_vm, pgd); + if (ret < 0) + goto free_vm; + + handle = ret; + + host_kvm->arch.pkvm.handle = handle; + host_kvm->arch.pkvm.hyp_donations.pgd = pgd; + host_kvm->arch.pkvm.hyp_donations.vm = hyp_vm; + + /* Donate memory for the vcpus at hyp and initialize it. */ + hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE); + kvm_for_each_vcpu(idx, host_vcpu, host_kvm) { + void *hyp_vcpu; + + /* Indexing of the vcpus to be sequential starting at 0. */ + if (WARN_ON(host_vcpu->vcpu_idx != idx)) { + ret = -EINVAL; + goto destroy_vm; + } + + hyp_vcpu = alloc_pages_exact(hyp_vcpu_sz, GFP_KERNEL_ACCOUNT); + if (!hyp_vcpu) { + ret = -ENOMEM; + goto destroy_vm; + } + + host_kvm->arch.pkvm.hyp_donations.vcpus[idx] = hyp_vcpu; + + ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, host_vcpu, + hyp_vcpu); + if (ret) + goto destroy_vm; + } + + return 0; + +destroy_vm: + pkvm_destroy_hyp_vm(host_kvm); + return ret; +free_vm: + free_pages_exact(hyp_vm, hyp_vm_sz); +free_pgd: + free_pages_exact(pgd, pgd_sz); + return ret; +} + +int pkvm_create_hyp_vm(struct kvm *host_kvm) +{ + int ret = 0; + + mutex_lock(&host_kvm->lock); + if (!host_kvm->arch.pkvm.handle) + ret = __pkvm_create_hyp_vm(host_kvm); + mutex_unlock(&host_kvm->lock); + + return ret; +} + +void pkvm_destroy_hyp_vm(struct kvm *host_kvm) +{ + unsigned long idx, nr_vcpus = host_kvm->created_vcpus; + size_t pgd_sz, hyp_vm_sz; + + if (host_kvm->arch.pkvm.handle) + WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm, + host_kvm->arch.pkvm.handle)); + + host_kvm->arch.pkvm.handle = 0; + + for (idx = 0; idx < nr_vcpus; ++idx) { + void *hyp_vcpu = host_kvm->arch.pkvm.hyp_donations.vcpus[idx]; + + if (!hyp_vcpu) + break; + + free_pages_exact(hyp_vcpu, PAGE_ALIGN(PKVM_HYP_VCPU_SIZE)); + } + + hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE, + size_mul(sizeof(void *), nr_vcpus))); + pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.vtcr); + + free_pages_exact(host_kvm->arch.pkvm.hyp_donations.vm, hyp_vm_sz); + free_pages_exact(host_kvm->arch.pkvm.hyp_donations.pgd, pgd_sz); +} + +int pkvm_init_host_vm(struct kvm *host_kvm) +{ + mutex_init(&host_kvm->lock); + return 0; +} -- cgit v1.2.3 From aa6948f82f0b7060fbbac21911dc7996b144ba3c Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 10 Nov 2022 19:02:47 +0000 Subject: KVM: arm64: Add per-cpu fixmap infrastructure at EL2 Mapping pages in a guest page-table from within the pKVM hypervisor at EL2 may require cache maintenance to ensure that the initialised page contents is visible even to non-cacheable (e.g. MMU-off) accesses from the guest. In preparation for performing this maintenance at EL2, introduce a per-vCPU fixmap which allows the pKVM hypervisor to map guest pages temporarily into its stage-1 page-table for the purposes of cache maintenance and, in future, poisoning on the reclaim path. The use of a fixmap avoids the need for memory allocation or locking on the map() path. Tested-by: Vincent Donnefort Signed-off-by: Quentin Perret Co-developed-by: Will Deacon Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-15-will@kernel.org --- arch/arm64/include/asm/kvm_pgtable.h | 14 ++++ arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 2 + arch/arm64/kvm/hyp/include/nvhe/mm.h | 4 + arch/arm64/kvm/hyp/nvhe/mem_protect.c | 1 - arch/arm64/kvm/hyp/nvhe/mm.c | 104 ++++++++++++++++++++++++++ arch/arm64/kvm/hyp/nvhe/setup.c | 4 + arch/arm64/kvm/hyp/pgtable.c | 12 --- 7 files changed, 128 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 15c389db1931..34cb93f3c96d 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -42,6 +42,8 @@ typedef u64 kvm_pte_t; #define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT) #define KVM_PTE_ADDR_51_48 GENMASK(15, 12) +#define KVM_PHYS_INVALID (-1ULL) + static inline bool kvm_pte_valid(kvm_pte_t pte) { return pte & KVM_PTE_VALID; @@ -57,6 +59,18 @@ static inline u64 kvm_pte_to_phys(kvm_pte_t pte) return pa; } +static inline kvm_pte_t kvm_phys_to_pte(u64 pa) +{ + kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK; + + if (PAGE_SHIFT == 16) { + pa &= GENMASK(51, 48); + pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48); + } + + return pte; +} + static inline u64 kvm_granule_shift(u32 level) { /* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */ diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index ce9a796a85ee..ef31a1872c93 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -59,6 +59,8 @@ enum pkvm_component_id { PKVM_ID_HYP, }; +extern unsigned long hyp_nr_cpus; + int __pkvm_prot_finalize(void); int __pkvm_host_share_hyp(u64 pfn); int __pkvm_host_unshare_hyp(u64 pfn); diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h index b2ee6d5df55b..d5ec972b5c1e 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h @@ -13,6 +13,10 @@ extern struct kvm_pgtable pkvm_pgtable; extern hyp_spinlock_t pkvm_pgd_lock; +int hyp_create_pcpu_fixmap(void); +void *hyp_fixmap_map(phys_addr_t phys); +void hyp_fixmap_unmap(void); + int hyp_create_idmap(u32 hyp_va_bits); int hyp_map_vectors(void); int hyp_back_vmemmap(phys_addr_t back); diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 459957b3082e..8b4d3f0aa7a0 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -21,7 +21,6 @@ #define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_NOFWB | KVM_PGTABLE_S2_IDMAP) -extern unsigned long hyp_nr_cpus; struct host_mmu host_mmu; static struct hyp_pool host_s2_pool; diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c index d3a3b47181de..5648ac21e62d 100644 --- a/arch/arm64/kvm/hyp/nvhe/mm.c +++ b/arch/arm64/kvm/hyp/nvhe/mm.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -25,6 +26,12 @@ unsigned int hyp_memblock_nr; static u64 __io_map_base; +struct hyp_fixmap_slot { + u64 addr; + kvm_pte_t *ptep; +}; +static DEFINE_PER_CPU(struct hyp_fixmap_slot, fixmap_slots); + static int __pkvm_create_mappings(unsigned long start, unsigned long size, unsigned long phys, enum kvm_pgtable_prot prot) { @@ -212,6 +219,103 @@ int hyp_map_vectors(void) return 0; } +void *hyp_fixmap_map(phys_addr_t phys) +{ + struct hyp_fixmap_slot *slot = this_cpu_ptr(&fixmap_slots); + kvm_pte_t pte, *ptep = slot->ptep; + + pte = *ptep; + pte &= ~kvm_phys_to_pte(KVM_PHYS_INVALID); + pte |= kvm_phys_to_pte(phys) | KVM_PTE_VALID; + WRITE_ONCE(*ptep, pte); + dsb(ishst); + + return (void *)slot->addr; +} + +static void fixmap_clear_slot(struct hyp_fixmap_slot *slot) +{ + kvm_pte_t *ptep = slot->ptep; + u64 addr = slot->addr; + + WRITE_ONCE(*ptep, *ptep & ~KVM_PTE_VALID); + + /* + * Irritatingly, the architecture requires that we use inner-shareable + * broadcast TLB invalidation here in case another CPU speculates + * through our fixmap and decides to create an "amalagamation of the + * values held in the TLB" due to the apparent lack of a + * break-before-make sequence. + * + * https://lore.kernel.org/kvm/20221017115209.2099-1-will@kernel.org/T/#mf10dfbaf1eaef9274c581b81c53758918c1d0f03 + */ + dsb(ishst); + __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), (KVM_PGTABLE_MAX_LEVELS - 1)); + dsb(ish); + isb(); +} + +void hyp_fixmap_unmap(void) +{ + fixmap_clear_slot(this_cpu_ptr(&fixmap_slots)); +} + +static int __create_fixmap_slot_cb(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, + void * const arg) +{ + struct hyp_fixmap_slot *slot = per_cpu_ptr(&fixmap_slots, (u64)arg); + + if (!kvm_pte_valid(*ptep) || level != KVM_PGTABLE_MAX_LEVELS - 1) + return -EINVAL; + + slot->addr = addr; + slot->ptep = ptep; + + /* + * Clear the PTE, but keep the page-table page refcount elevated to + * prevent it from ever being freed. This lets us manipulate the PTEs + * by hand safely without ever needing to allocate memory. + */ + fixmap_clear_slot(slot); + + return 0; +} + +static int create_fixmap_slot(u64 addr, u64 cpu) +{ + struct kvm_pgtable_walker walker = { + .cb = __create_fixmap_slot_cb, + .flags = KVM_PGTABLE_WALK_LEAF, + .arg = (void *)cpu, + }; + + return kvm_pgtable_walk(&pkvm_pgtable, addr, PAGE_SIZE, &walker); +} + +int hyp_create_pcpu_fixmap(void) +{ + unsigned long addr, i; + int ret; + + for (i = 0; i < hyp_nr_cpus; i++) { + ret = pkvm_alloc_private_va_range(PAGE_SIZE, &addr); + if (ret) + return ret; + + ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr, PAGE_SIZE, + __hyp_pa(__hyp_bss_start), PAGE_HYP); + if (ret) + return ret; + + ret = create_fixmap_slot(addr, i); + if (ret) + return ret; + } + + return 0; +} + int hyp_create_idmap(u32 hyp_va_bits) { unsigned long start, end; diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 2be72fbe7279..0f69c1393416 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -321,6 +321,10 @@ void __noreturn __pkvm_init_finalise(void) if (ret) goto out; + ret = hyp_create_pcpu_fixmap(); + if (ret) + goto out; + pkvm_hyp_vm_table_init(vm_table_base); out: /* diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index a1a27f88a312..2bcb2d5903ba 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -57,8 +57,6 @@ struct kvm_pgtable_walk_data { u64 end; }; -#define KVM_PHYS_INVALID (-1ULL) - static bool kvm_phys_is_valid(u64 phys) { return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_EL1_PARANGE_MAX)); @@ -122,16 +120,6 @@ static bool kvm_pte_table(kvm_pte_t pte, u32 level) return FIELD_GET(KVM_PTE_TYPE, pte) == KVM_PTE_TYPE_TABLE; } -static kvm_pte_t kvm_phys_to_pte(u64 pa) -{ - kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK; - - if (PAGE_SHIFT == 16) - pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48); - - return pte; -} - static kvm_pte_t *kvm_pte_follow(kvm_pte_t pte, struct kvm_pgtable_mm_ops *mm_ops) { return mm_ops->phys_to_virt(kvm_pte_to_phys(pte)); -- cgit v1.2.3 From 6c165223e9a6384aa1e934b90f2650e71adb972a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Nov 2022 19:02:48 +0000 Subject: KVM: arm64: Initialise hypervisor copies of host symbols unconditionally MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The nVHE object at EL2 maintains its own copies of some host variables so that, when pKVM is enabled, the host cannot directly modify the hypervisor state. When running in normal nVHE mode, however, these variables are still mirrored at EL2 but are not initialised. Initialise the hypervisor symbols from the host copies regardless of pKVM, ensuring that any reference to this data at EL2 with normal nVHE will return a sensibly initialised value. Reviewed-by: Philippe Mathieu-Daudé Tested-by: Vincent Donnefort Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-16-will@kernel.org --- arch/arm64/kvm/arm.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 30d6fc5d3a93..584626e11797 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1884,11 +1884,8 @@ static int do_pkvm_init(u32 hyp_va_bits) return ret; } -static int kvm_hyp_init_protection(u32 hyp_va_bits) +static void kvm_hyp_init_symbols(void) { - void *addr = phys_to_virt(hyp_mem_base); - int ret; - kvm_nvhe_sym(id_aa64pfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); kvm_nvhe_sym(id_aa64pfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1); kvm_nvhe_sym(id_aa64isar0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR0_EL1); @@ -1897,6 +1894,12 @@ static int kvm_hyp_init_protection(u32 hyp_va_bits) kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1); +} + +static int kvm_hyp_init_protection(u32 hyp_va_bits) +{ + void *addr = phys_to_virt(hyp_mem_base); + int ret; ret = create_hyp_mappings(addr, addr + hyp_mem_size, PAGE_HYP); if (ret) @@ -2071,6 +2074,8 @@ static int init_hyp_mode(void) cpu_prepare_hyp_mode(cpu); } + kvm_hyp_init_symbols(); + if (is_protected_kvm_enabled()) { init_cpu_logical_map(); @@ -2078,9 +2083,7 @@ static int init_hyp_mode(void) err = -ENODEV; goto out_err; } - } - if (is_protected_kvm_enabled()) { err = kvm_hyp_init_protection(hyp_va_bits); if (err) { kvm_err("Failed to init hyp memory protection\n"); -- cgit v1.2.3 From 13e248aab73d2f1c27b458ef09d38b44f3e5bf2e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Nov 2022 19:02:49 +0000 Subject: KVM: arm64: Provide I-cache invalidation by virtual address at EL2 In preparation for handling cache maintenance of guest pages from within the pKVM hypervisor at EL2, introduce an EL2 copy of icache_inval_pou() which will later be plumbed into the stage-2 page-table cache maintenance callbacks, ensuring that the initial contents of pages mapped as executable into the guest stage-2 page-table is visible to the instruction fetcher. Tested-by: Vincent Donnefort Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-17-will@kernel.org --- arch/arm64/include/asm/kvm_hyp.h | 1 + arch/arm64/kernel/image-vars.h | 3 --- arch/arm64/kvm/arm.c | 1 + arch/arm64/kvm/hyp/nvhe/cache.S | 11 +++++++++++ arch/arm64/kvm/hyp/nvhe/pkvm.c | 3 +++ 5 files changed, 16 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index aa7fa2a08f06..fd99cf09972d 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -123,4 +123,5 @@ extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val); +extern unsigned long kvm_nvhe_sym(__icache_flags); #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 8151412653de..7f4e43bfaade 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -71,9 +71,6 @@ KVM_NVHE_ALIAS(nvhe_hyp_panic_handler); /* Vectors installed by hyp-init on reset HVC. */ KVM_NVHE_ALIAS(__hyp_stub_vectors); -/* Kernel symbol used by icache_is_vpipt(). */ -KVM_NVHE_ALIAS(__icache_flags); - /* VMID bits set by the KVM VMID allocator */ KVM_NVHE_ALIAS(kvm_arm_vmid_bits); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 584626e11797..d99e93e6ddf7 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1894,6 +1894,7 @@ static void kvm_hyp_init_symbols(void) kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1); + kvm_nvhe_sym(__icache_flags) = __icache_flags; } static int kvm_hyp_init_protection(u32 hyp_va_bits) diff --git a/arch/arm64/kvm/hyp/nvhe/cache.S b/arch/arm64/kvm/hyp/nvhe/cache.S index 0c367eb5f4e2..85936c17ae40 100644 --- a/arch/arm64/kvm/hyp/nvhe/cache.S +++ b/arch/arm64/kvm/hyp/nvhe/cache.S @@ -12,3 +12,14 @@ SYM_FUNC_START(__pi_dcache_clean_inval_poc) ret SYM_FUNC_END(__pi_dcache_clean_inval_poc) SYM_FUNC_ALIAS(dcache_clean_inval_poc, __pi_dcache_clean_inval_poc) + +SYM_FUNC_START(__pi_icache_inval_pou) +alternative_if ARM64_HAS_CACHE_DIC + isb + ret +alternative_else_nop_endif + + invalidate_icache_by_line x0, x1, x2, x3 + ret +SYM_FUNC_END(__pi_icache_inval_pou) +SYM_FUNC_ALIAS(icache_inval_pou, __pi_icache_inval_pou) diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 2c73c4640e4d..0768307566d4 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -12,6 +12,9 @@ #include #include +/* Used by icache_is_vpipt(). */ +unsigned long __icache_flags; + /* * Set trap register values based on features in ID_AA64PFR0. */ -- cgit v1.2.3 From 717a7eebac106a5cc5d5493f8eef9cf4ae6edf19 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 10 Nov 2022 19:02:50 +0000 Subject: KVM: arm64: Add generic hyp_memcache helpers The host at EL1 and the pKVM hypervisor at EL2 will soon need to exchange memory pages dynamically for creating and destroying VM state. Indeed, the hypervisor will rely on the host to donate memory pages it can use to create guest stage-2 page-tables and to store VM and vCPU metadata. In order to ease this process, introduce a 'struct hyp_memcache' which is essentially a linked list of available pages, indexed by physical addresses so that it can be passed meaningfully between the different virtual address spaces configured at EL1 and EL2. Tested-by: Vincent Donnefort Signed-off-by: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-18-will@kernel.org --- arch/arm64/include/asm/kvm_host.h | 57 +++++++++++++++++++++++++++ arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 2 + arch/arm64/kvm/hyp/nvhe/mm.c | 33 ++++++++++++++++ arch/arm64/kvm/mmu.c | 26 ++++++++++++ 4 files changed, 118 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 467393e7331f..835987e0f868 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -73,6 +73,63 @@ u32 __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu); +struct kvm_hyp_memcache { + phys_addr_t head; + unsigned long nr_pages; +}; + +static inline void push_hyp_memcache(struct kvm_hyp_memcache *mc, + phys_addr_t *p, + phys_addr_t (*to_pa)(void *virt)) +{ + *p = mc->head; + mc->head = to_pa(p); + mc->nr_pages++; +} + +static inline void *pop_hyp_memcache(struct kvm_hyp_memcache *mc, + void *(*to_va)(phys_addr_t phys)) +{ + phys_addr_t *p = to_va(mc->head); + + if (!mc->nr_pages) + return NULL; + + mc->head = *p; + mc->nr_pages--; + + return p; +} + +static inline int __topup_hyp_memcache(struct kvm_hyp_memcache *mc, + unsigned long min_pages, + void *(*alloc_fn)(void *arg), + phys_addr_t (*to_pa)(void *virt), + void *arg) +{ + while (mc->nr_pages < min_pages) { + phys_addr_t *p = alloc_fn(arg); + + if (!p) + return -ENOMEM; + push_hyp_memcache(mc, p, to_pa); + } + + return 0; +} + +static inline void __free_hyp_memcache(struct kvm_hyp_memcache *mc, + void (*free_fn)(void *virt, void *arg), + void *(*to_va)(phys_addr_t phys), + void *arg) +{ + while (mc->nr_pages) + free_fn(pop_hyp_memcache(mc, to_va), arg); +} + +void free_hyp_memcache(struct kvm_hyp_memcache *mc); +int topup_hyp_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages); + struct kvm_vmid { atomic64_t id; }; diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index ef31a1872c93..420b87e755a4 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -77,6 +77,8 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt); int hyp_pin_shared_mem(void *from, void *to); void hyp_unpin_shared_mem(void *from, void *to); void reclaim_guest_pages(struct pkvm_hyp_vm *vm); +int refill_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages, + struct kvm_hyp_memcache *host_mc); static __always_inline void __load_host_stage2(void) { diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c index 5648ac21e62d..c80b2c007619 100644 --- a/arch/arm64/kvm/hyp/nvhe/mm.c +++ b/arch/arm64/kvm/hyp/nvhe/mm.c @@ -340,3 +340,36 @@ int hyp_create_idmap(u32 hyp_va_bits) return __pkvm_create_mappings(start, end - start, start, PAGE_HYP_EXEC); } + +static void *admit_host_page(void *arg) +{ + struct kvm_hyp_memcache *host_mc = arg; + + if (!host_mc->nr_pages) + return NULL; + + /* + * The host still owns the pages in its memcache, so we need to go + * through a full host-to-hyp donation cycle to change it. Fortunately, + * __pkvm_host_donate_hyp() takes care of races for us, so if it + * succeeds we're good to go. + */ + if (__pkvm_host_donate_hyp(hyp_phys_to_pfn(host_mc->head), 1)) + return NULL; + + return pop_hyp_memcache(host_mc, hyp_phys_to_virt); +} + +/* Refill our local memcache by poping pages from the one provided by the host. */ +int refill_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages, + struct kvm_hyp_memcache *host_mc) +{ + struct kvm_hyp_memcache tmp = *host_mc; + int ret; + + ret = __topup_hyp_memcache(mc, min_pages, admit_host_page, + hyp_virt_to_phys, &tmp); + *host_mc = tmp; + + return ret; +} diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 60ee3d9f01f8..18061163c607 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -807,6 +807,32 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) } } +static void hyp_mc_free_fn(void *addr, void *unused) +{ + free_page((unsigned long)addr); +} + +static void *hyp_mc_alloc_fn(void *unused) +{ + return (void *)__get_free_page(GFP_KERNEL_ACCOUNT); +} + +void free_hyp_memcache(struct kvm_hyp_memcache *mc) +{ + if (is_protected_kvm_enabled()) + __free_hyp_memcache(mc, hyp_mc_free_fn, + kvm_host_va, NULL); +} + +int topup_hyp_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages) +{ + if (!is_protected_kvm_enabled()) + return 0; + + return __topup_hyp_memcache(mc, min_pages, hyp_mc_alloc_fn, + kvm_host_pa, NULL); +} + /** * kvm_phys_addr_ioremap - map a device range to guest IPA * -- cgit v1.2.3 From 315775ff7c6de497dd07c3f6eff499fb538783eb Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 10 Nov 2022 19:02:51 +0000 Subject: KVM: arm64: Consolidate stage-2 initialisation into a single function The initialisation of guest stage-2 page-tables is currently split across two functions: kvm_init_stage2_mmu() and kvm_arm_setup_stage2(). That is presumably for historical reasons as kvm_arm_setup_stage2() originates from the (now defunct) KVM port for 32-bit Arm. Simplify this code path by merging both functions into one, taking care to map the 'struct kvm' into the hypervisor stage-1 early on in order to simplify the failure path. Tested-by: Vincent Donnefort Co-developed-by: Fuad Tabba Signed-off-by: Fuad Tabba Signed-off-by: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-19-will@kernel.org --- arch/arm64/include/asm/kvm_arm.h | 2 +- arch/arm64/include/asm/kvm_host.h | 2 -- arch/arm64/include/asm/kvm_mmu.h | 2 +- arch/arm64/kvm/arm.c | 27 +++++++++++++-------------- arch/arm64/kvm/mmu.c | 27 ++++++++++++++++++++++++++- arch/arm64/kvm/reset.c | 29 ----------------------------- 6 files changed, 41 insertions(+), 48 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 8aa8492dafc0..89e63585dae4 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -135,7 +135,7 @@ * 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are * not known to exist and will break with this configuration. * - * The VTCR_EL2 is configured per VM and is initialised in kvm_arm_setup_stage2(). + * The VTCR_EL2 is configured per VM and is initialised in kvm_init_stage2_mmu. * * Note that when using 4K pages, we concatenate two first level page tables * together. With 16K pages, we concatenate 16 first level page tables. diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 835987e0f868..57218f0c449e 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -990,8 +990,6 @@ int kvm_set_ipa_limit(void); #define __KVM_HAVE_ARCH_VM_ALLOC struct kvm *kvm_arch_alloc_vm(void); -int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type); - static inline bool kvm_vm_is_protected(struct kvm *kvm) { return false; diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 7784081088e7..e4a7e6369499 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -166,7 +166,7 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, void free_hyp_pgds(void); void stage2_unmap_vm(struct kvm *kvm); -int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu); +int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type); void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu); int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, phys_addr_t pa, unsigned long size, bool writable); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index d99e93e6ddf7..f78eefa02f6b 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -139,28 +139,24 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { int ret; - ret = kvm_arm_setup_stage2(kvm, type); - if (ret) - return ret; - - ret = kvm_init_stage2_mmu(kvm, &kvm->arch.mmu); - if (ret) - return ret; - ret = kvm_share_hyp(kvm, kvm + 1); if (ret) - goto out_free_stage2_pgd; + return ret; ret = pkvm_init_host_vm(kvm); if (ret) - goto out_free_stage2_pgd; + goto err_unshare_kvm; if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL)) { ret = -ENOMEM; - goto out_free_stage2_pgd; + goto err_unshare_kvm; } cpumask_copy(kvm->arch.supported_cpus, cpu_possible_mask); + ret = kvm_init_stage2_mmu(kvm, &kvm->arch.mmu, type); + if (ret) + goto err_free_cpumask; + kvm_vgic_early_init(kvm); /* The maximum number of VCPUs is limited by the host's GIC model */ @@ -169,9 +165,12 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) set_default_spectre(kvm); kvm_arm_init_hypercalls(kvm); - return ret; -out_free_stage2_pgd: - kvm_free_stage2_pgd(&kvm->arch.mmu); + return 0; + +err_free_cpumask: + free_cpumask_var(kvm->arch.supported_cpus); +err_unshare_kvm: + kvm_unshare_hyp(kvm, kvm + 1); return ret; } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 18061163c607..3e56c6393cae 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -675,15 +675,40 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = { * kvm_init_stage2_mmu - Initialise a S2 MMU structure * @kvm: The pointer to the KVM structure * @mmu: The pointer to the s2 MMU structure + * @type: The machine type of the virtual machine * * Allocates only the stage-2 HW PGD level table(s). * Note we don't need locking here as this is only called when the VM is * created, which can only be done once. */ -int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu) +int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type) { + u32 kvm_ipa_limit = get_kvm_ipa_limit(); int cpu, err; struct kvm_pgtable *pgt; + u64 mmfr0, mmfr1; + u32 phys_shift; + + if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK) + return -EINVAL; + + phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type); + if (phys_shift) { + if (phys_shift > kvm_ipa_limit || + phys_shift < ARM64_MIN_PARANGE_BITS) + return -EINVAL; + } else { + phys_shift = KVM_PHYS_SHIFT; + if (phys_shift > kvm_ipa_limit) { + pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n", + current->comm); + return -EINVAL; + } + } + + mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); + mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); + kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift); if (mmu->pgt != NULL) { kvm_err("kvm_arch already initialized?\n"); diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 5ae18472205a..e0267f672b8a 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -395,32 +395,3 @@ int kvm_set_ipa_limit(void) return 0; } - -int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) -{ - u64 mmfr0, mmfr1; - u32 phys_shift; - - if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK) - return -EINVAL; - - phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type); - if (phys_shift) { - if (phys_shift > kvm_ipa_limit || - phys_shift < ARM64_MIN_PARANGE_BITS) - return -EINVAL; - } else { - phys_shift = KVM_PHYS_SHIFT; - if (phys_shift > kvm_ipa_limit) { - pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n", - current->comm); - return -EINVAL; - } - } - - mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); - mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); - kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift); - - return 0; -} -- cgit v1.2.3 From 60dfe093ec13b056856c672e1daa35134be38283 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 10 Nov 2022 19:02:52 +0000 Subject: KVM: arm64: Instantiate guest stage-2 page-tables at EL2 Extend the initialisation of guest data structures within the pKVM hypervisor at EL2 so that we instantiate a memory pool and a full 'struct kvm_s2_mmu' structure for each VM, with a stage-2 page-table entirely independent from the one managed by the host at EL1. The 'struct kvm_pgtable_mm_ops' used by the page-table code is populated with a set of callbacks that can manage guest pages in the hypervisor without any direct intervention from the host, allocating page-table pages from the provided pool and returning these to the host on VM teardown. To keep things simple, the stage-2 MMU for the guest is configured identically to the host stage-2 in the VTCR register and so the IPA size of the guest must match the PA size of the host. For now, the new page-table is unused as there is no way for the host to map anything into it. Yet. Tested-by: Vincent Donnefort Signed-off-by: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-20-will@kernel.org --- arch/arm64/kvm/hyp/include/nvhe/pkvm.h | 6 ++ arch/arm64/kvm/hyp/nvhe/mem_protect.c | 125 ++++++++++++++++++++++++++++++++- arch/arm64/kvm/mmu.c | 4 +- 3 files changed, 132 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h index 8c653a3b9501..d14dfbcb7da1 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h @@ -9,6 +9,9 @@ #include +#include +#include + /* * Holds the relevant data for maintaining the vcpu state completely at hyp. */ @@ -30,6 +33,9 @@ struct pkvm_hyp_vm { /* The guest's stage-2 page-table managed by the hypervisor. */ struct kvm_pgtable pgt; + struct kvm_pgtable_mm_ops mm_ops; + struct hyp_pool pool; + hyp_spinlock_t lock; /* * The number of vcpus initialized and ready to run. diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 8b4d3f0aa7a0..0162afba6dc4 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -25,6 +25,21 @@ struct host_mmu host_mmu; static struct hyp_pool host_s2_pool; +static DEFINE_PER_CPU(struct pkvm_hyp_vm *, __current_vm); +#define current_vm (*this_cpu_ptr(&__current_vm)) + +static void guest_lock_component(struct pkvm_hyp_vm *vm) +{ + hyp_spin_lock(&vm->lock); + current_vm = vm; +} + +static void guest_unlock_component(struct pkvm_hyp_vm *vm) +{ + current_vm = NULL; + hyp_spin_unlock(&vm->lock); +} + static void host_lock_component(void) { hyp_spin_lock(&host_mmu.lock); @@ -140,18 +155,124 @@ int kvm_host_prepare_stage2(void *pgt_pool_base) return 0; } +static bool guest_stage2_force_pte_cb(u64 addr, u64 end, + enum kvm_pgtable_prot prot) +{ + return true; +} + +static void *guest_s2_zalloc_pages_exact(size_t size) +{ + void *addr = hyp_alloc_pages(¤t_vm->pool, get_order(size)); + + WARN_ON(size != (PAGE_SIZE << get_order(size))); + hyp_split_page(hyp_virt_to_page(addr)); + + return addr; +} + +static void guest_s2_free_pages_exact(void *addr, unsigned long size) +{ + u8 order = get_order(size); + unsigned int i; + + for (i = 0; i < (1 << order); i++) + hyp_put_page(¤t_vm->pool, addr + (i * PAGE_SIZE)); +} + +static void *guest_s2_zalloc_page(void *mc) +{ + struct hyp_page *p; + void *addr; + + addr = hyp_alloc_pages(¤t_vm->pool, 0); + if (addr) + return addr; + + addr = pop_hyp_memcache(mc, hyp_phys_to_virt); + if (!addr) + return addr; + + memset(addr, 0, PAGE_SIZE); + p = hyp_virt_to_page(addr); + memset(p, 0, sizeof(*p)); + p->refcount = 1; + + return addr; +} + +static void guest_s2_get_page(void *addr) +{ + hyp_get_page(¤t_vm->pool, addr); +} + +static void guest_s2_put_page(void *addr) +{ + hyp_put_page(¤t_vm->pool, addr); +} + +static void clean_dcache_guest_page(void *va, size_t size) +{ + __clean_dcache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size); + hyp_fixmap_unmap(); +} + +static void invalidate_icache_guest_page(void *va, size_t size) +{ + __invalidate_icache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size); + hyp_fixmap_unmap(); +} + int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd) { - vm->pgt.pgd = pgd; + struct kvm_s2_mmu *mmu = &vm->kvm.arch.mmu; + unsigned long nr_pages; + int ret; + + nr_pages = kvm_pgtable_stage2_pgd_size(vm->kvm.arch.vtcr) >> PAGE_SHIFT; + ret = hyp_pool_init(&vm->pool, hyp_virt_to_pfn(pgd), nr_pages, 0); + if (ret) + return ret; + + hyp_spin_lock_init(&vm->lock); + vm->mm_ops = (struct kvm_pgtable_mm_ops) { + .zalloc_pages_exact = guest_s2_zalloc_pages_exact, + .free_pages_exact = guest_s2_free_pages_exact, + .zalloc_page = guest_s2_zalloc_page, + .phys_to_virt = hyp_phys_to_virt, + .virt_to_phys = hyp_virt_to_phys, + .page_count = hyp_page_count, + .get_page = guest_s2_get_page, + .put_page = guest_s2_put_page, + .dcache_clean_inval_poc = clean_dcache_guest_page, + .icache_inval_pou = invalidate_icache_guest_page, + }; + + guest_lock_component(vm); + ret = __kvm_pgtable_stage2_init(mmu->pgt, mmu, &vm->mm_ops, 0, + guest_stage2_force_pte_cb); + guest_unlock_component(vm); + if (ret) + return ret; + + vm->kvm.arch.mmu.pgd_phys = __hyp_pa(vm->pgt.pgd); + return 0; } void reclaim_guest_pages(struct pkvm_hyp_vm *vm) { + void *pgd = vm->pgt.pgd; unsigned long nr_pages; nr_pages = kvm_pgtable_stage2_pgd_size(vm->kvm.arch.vtcr) >> PAGE_SHIFT; - WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(vm->pgt.pgd), nr_pages)); + + guest_lock_component(vm); + kvm_pgtable_stage2_destroy(&vm->pgt); + vm->kvm.arch.mmu.pgd_phys = 0ULL; + guest_unlock_component(vm); + + WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(pgd), nr_pages)); } int __pkvm_prot_finalize(void) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 3e56c6393cae..962f4472601b 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -693,7 +693,9 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t return -EINVAL; phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type); - if (phys_shift) { + if (is_protected_kvm_enabled()) { + phys_shift = kvm_ipa_limit; + } else if (phys_shift) { if (phys_shift > kvm_ipa_limit || phys_shift < ARM64_MIN_PARANGE_BITS) return -EINVAL; -- cgit v1.2.3 From f41dff4efb918db68923a826e966ca62c7c8e929 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 10 Nov 2022 19:02:53 +0000 Subject: KVM: arm64: Return guest memory from EL2 via dedicated teardown memcache Rather than relying on the host to free the previously-donated pKVM hypervisor VM pages explicitly on teardown, introduce a dedicated teardown memcache which allows the host to reclaim guest memory resources without having to keep track of all of the allocations made by the pKVM hypervisor at EL2. Tested-by: Vincent Donnefort Co-developed-by: Fuad Tabba Signed-off-by: Fuad Tabba Signed-off-by: Quentin Perret Signed-off-by: Will Deacon [maz: dropped __maybe_unused from unmap_donated_memory_noclear()] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-21-will@kernel.org --- arch/arm64/include/asm/kvm_host.h | 7 +----- arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 2 +- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 17 +++++++++------ arch/arm64/kvm/hyp/nvhe/pkvm.c | 27 +++++++++++++++++------ arch/arm64/kvm/pkvm.c | 31 ++++++--------------------- 5 files changed, 40 insertions(+), 44 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 57218f0c449e..63307e7dc9c5 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -176,12 +176,7 @@ typedef unsigned int pkvm_handle_t; struct kvm_protected_vm { pkvm_handle_t handle; - - struct { - void *pgd; - void *vm; - void *vcpus[KVM_MAX_VCPUS]; - } hyp_donations; + struct kvm_hyp_memcache teardown_mc; }; struct kvm_arch { diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 420b87e755a4..b7bdbe63deed 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -76,7 +76,7 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt); int hyp_pin_shared_mem(void *from, void *to); void hyp_unpin_shared_mem(void *from, void *to); -void reclaim_guest_pages(struct pkvm_hyp_vm *vm); +void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc); int refill_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages, struct kvm_hyp_memcache *host_mc); diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 0162afba6dc4..94cd48f7850e 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -260,19 +260,24 @@ int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd) return 0; } -void reclaim_guest_pages(struct pkvm_hyp_vm *vm) +void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc) { - void *pgd = vm->pgt.pgd; - unsigned long nr_pages; - - nr_pages = kvm_pgtable_stage2_pgd_size(vm->kvm.arch.vtcr) >> PAGE_SHIFT; + void *addr; + /* Dump all pgtable pages in the hyp_pool */ guest_lock_component(vm); kvm_pgtable_stage2_destroy(&vm->pgt); vm->kvm.arch.mmu.pgd_phys = 0ULL; guest_unlock_component(vm); - WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(pgd), nr_pages)); + /* Drain the hyp_pool into the memcache */ + addr = hyp_alloc_pages(&vm->pool, 0); + while (addr) { + memset(hyp_virt_to_page(addr), 0, sizeof(struct hyp_page)); + push_hyp_memcache(mc, addr, hyp_virt_to_phys); + WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1)); + addr = hyp_alloc_pages(&vm->pool, 0); + } } int __pkvm_prot_finalize(void) diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 0768307566d4..81835c2f4c5a 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -393,7 +393,7 @@ static void unmap_donated_memory(void *va, size_t size) __unmap_donated_memory(va, size); } -static void __maybe_unused unmap_donated_memory_noclear(void *va, size_t size) +static void unmap_donated_memory_noclear(void *va, size_t size) { if (!va) return; @@ -527,8 +527,21 @@ unlock: return ret; } +static void +teardown_donated_memory(struct kvm_hyp_memcache *mc, void *addr, size_t size) +{ + size = PAGE_ALIGN(size); + memset(addr, 0, size); + + for (void *start = addr; start < addr + size; start += PAGE_SIZE) + push_hyp_memcache(mc, start, hyp_virt_to_phys); + + unmap_donated_memory_noclear(addr, size); +} + int __pkvm_teardown_vm(pkvm_handle_t handle) { + struct kvm_hyp_memcache *mc; struct pkvm_hyp_vm *hyp_vm; struct kvm *host_kvm; unsigned int idx; @@ -547,25 +560,27 @@ int __pkvm_teardown_vm(pkvm_handle_t handle) goto err_unlock; } + host_kvm = hyp_vm->host_kvm; + /* Ensure the VMID is clean before it can be reallocated */ __kvm_tlb_flush_vmid(&hyp_vm->kvm.arch.mmu); remove_vm_table_entry(handle); hyp_spin_unlock(&vm_table_lock); /* Reclaim guest pages (including page-table pages) */ - reclaim_guest_pages(hyp_vm); + mc = &host_kvm->arch.pkvm.teardown_mc; + reclaim_guest_pages(hyp_vm, mc); unpin_host_vcpus(hyp_vm->vcpus, hyp_vm->nr_vcpus); - /* Return the metadata pages to the host */ + /* Push the metadata pages to the teardown memcache */ for (idx = 0; idx < hyp_vm->nr_vcpus; ++idx) { struct pkvm_hyp_vcpu *hyp_vcpu = hyp_vm->vcpus[idx]; - unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu)); + teardown_donated_memory(mc, hyp_vcpu, sizeof(*hyp_vcpu)); } - host_kvm = hyp_vm->host_kvm; vm_size = pkvm_get_hyp_vm_size(hyp_vm->kvm.created_vcpus); - unmap_donated_memory(hyp_vm, vm_size); + teardown_donated_memory(mc, hyp_vm, vm_size); hyp_unpin_shared_mem(host_kvm, host_kvm + 1); return 0; diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index 8c443b915e43..cf56958b1492 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -147,8 +147,6 @@ static int __pkvm_create_hyp_vm(struct kvm *host_kvm) handle = ret; host_kvm->arch.pkvm.handle = handle; - host_kvm->arch.pkvm.hyp_donations.pgd = pgd; - host_kvm->arch.pkvm.hyp_donations.vm = hyp_vm; /* Donate memory for the vcpus at hyp and initialize it. */ hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE); @@ -167,12 +165,12 @@ static int __pkvm_create_hyp_vm(struct kvm *host_kvm) goto destroy_vm; } - host_kvm->arch.pkvm.hyp_donations.vcpus[idx] = hyp_vcpu; - ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, host_vcpu, hyp_vcpu); - if (ret) + if (ret) { + free_pages_exact(hyp_vcpu, hyp_vcpu_sz); goto destroy_vm; + } } return 0; @@ -201,30 +199,13 @@ int pkvm_create_hyp_vm(struct kvm *host_kvm) void pkvm_destroy_hyp_vm(struct kvm *host_kvm) { - unsigned long idx, nr_vcpus = host_kvm->created_vcpus; - size_t pgd_sz, hyp_vm_sz; - - if (host_kvm->arch.pkvm.handle) + if (host_kvm->arch.pkvm.handle) { WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm, host_kvm->arch.pkvm.handle)); - - host_kvm->arch.pkvm.handle = 0; - - for (idx = 0; idx < nr_vcpus; ++idx) { - void *hyp_vcpu = host_kvm->arch.pkvm.hyp_donations.vcpus[idx]; - - if (!hyp_vcpu) - break; - - free_pages_exact(hyp_vcpu, PAGE_ALIGN(PKVM_HYP_VCPU_SIZE)); } - hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE, - size_mul(sizeof(void *), nr_vcpus))); - pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.vtcr); - - free_pages_exact(host_kvm->arch.pkvm.hyp_donations.vm, hyp_vm_sz); - free_pages_exact(host_kvm->arch.pkvm.hyp_donations.pgd, pgd_sz); + host_kvm->arch.pkvm.handle = 0; + free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc); } int pkvm_init_host_vm(struct kvm *host_kvm) -- cgit v1.2.3 From fe41a7f8c0ee3ee2f682f8c28c7e1c5ff2be8a79 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 10 Nov 2022 19:02:54 +0000 Subject: KVM: arm64: Unmap 'kvm_arm_hyp_percpu_base' from the host When pKVM is enabled, the hypervisor at EL2 does not trust the host at EL1 and must therefore prevent it from having unrestricted access to internal hypervisor state. The 'kvm_arm_hyp_percpu_base' array holds the offsets for hypervisor per-cpu allocations, so move this this into the nVHE code where it cannot be modified by the untrusted host at EL1. Tested-by: Vincent Donnefort Signed-off-by: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-22-will@kernel.org --- arch/arm64/include/asm/kvm_asm.h | 4 ++-- arch/arm64/kernel/image-vars.h | 3 --- arch/arm64/kvm/arm.c | 9 ++++----- arch/arm64/kvm/hyp/nvhe/hyp-smp.c | 2 ++ 4 files changed, 8 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index de52ba775d48..43c3bc0f9544 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -109,7 +109,7 @@ enum __kvm_host_smccc_func { #define per_cpu_ptr_nvhe_sym(sym, cpu) \ ({ \ unsigned long base, off; \ - base = kvm_arm_hyp_percpu_base[cpu]; \ + base = kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu]; \ off = (unsigned long)&CHOOSE_NVHE_SYM(sym) - \ (unsigned long)&CHOOSE_NVHE_SYM(__per_cpu_start); \ base ? (typeof(CHOOSE_NVHE_SYM(sym))*)(base + off) : NULL; \ @@ -214,7 +214,7 @@ DECLARE_KVM_HYP_SYM(__kvm_hyp_vector); #define __kvm_hyp_init CHOOSE_NVHE_SYM(__kvm_hyp_init) #define __kvm_hyp_vector CHOOSE_HYP_SYM(__kvm_hyp_vector) -extern unsigned long kvm_arm_hyp_percpu_base[NR_CPUS]; +extern unsigned long kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[]; DECLARE_KVM_NVHE_SYM(__per_cpu_start); DECLARE_KVM_NVHE_SYM(__per_cpu_end); diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 7f4e43bfaade..ae8f37f4aa8c 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -89,9 +89,6 @@ KVM_NVHE_ALIAS(gic_nonsecure_priorities); KVM_NVHE_ALIAS(__start___kvm_ex_table); KVM_NVHE_ALIAS(__stop___kvm_ex_table); -/* Array containing bases of nVHE per-CPU memory regions. */ -KVM_NVHE_ALIAS(kvm_arm_hyp_percpu_base); - /* PMU available static key */ #ifdef CONFIG_HW_PERF_EVENTS KVM_NVHE_ALIAS(kvm_arm_pmu_available); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index f78eefa02f6b..25467f24803d 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -51,7 +51,6 @@ DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized); DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector); DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); -unsigned long kvm_arm_hyp_percpu_base[NR_CPUS]; DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); static bool vgic_present; @@ -1857,13 +1856,13 @@ static void teardown_hyp_mode(void) free_hyp_pgds(); for_each_possible_cpu(cpu) { free_page(per_cpu(kvm_arm_hyp_stack_page, cpu)); - free_pages(kvm_arm_hyp_percpu_base[cpu], nvhe_percpu_order()); + free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order()); } } static int do_pkvm_init(u32 hyp_va_bits) { - void *per_cpu_base = kvm_ksym_ref(kvm_arm_hyp_percpu_base); + void *per_cpu_base = kvm_ksym_ref(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)); int ret; preempt_disable(); @@ -1967,7 +1966,7 @@ static int init_hyp_mode(void) page_addr = page_address(page); memcpy(page_addr, CHOOSE_NVHE_SYM(__per_cpu_start), nvhe_percpu_size()); - kvm_arm_hyp_percpu_base[cpu] = (unsigned long)page_addr; + kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu] = (unsigned long)page_addr; } /* @@ -2060,7 +2059,7 @@ static int init_hyp_mode(void) } for_each_possible_cpu(cpu) { - char *percpu_begin = (char *)kvm_arm_hyp_percpu_base[cpu]; + char *percpu_begin = (char *)kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu]; char *percpu_end = percpu_begin + nvhe_percpu_size(); /* Map Hyp percpu pages */ diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c index 9f54833af400..04d194583f1e 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c @@ -23,6 +23,8 @@ u64 cpu_logical_map(unsigned int cpu) return hyp_cpu_logical_map[cpu]; } +unsigned long __ro_after_init kvm_arm_hyp_percpu_base[NR_CPUS]; + unsigned long __hyp_per_cpu_offset(unsigned int cpu) { unsigned long *cpu_base_array; -- cgit v1.2.3 From 73f38ef2ae531b180685173e0923225551434fcb Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Nov 2022 19:02:55 +0000 Subject: KVM: arm64: Maintain a copy of 'kvm_arm_vmid_bits' at EL2 Sharing 'kvm_arm_vmid_bits' between EL1 and EL2 allows the host to modify the variable arbitrarily, potentially leading to all sorts of shenanians as this is used to configure the VTTBR register for the guest stage-2. In preparation for unmapping host sections entirely from EL2, maintain a copy of 'kvm_arm_vmid_bits' in the pKVM hypervisor and initialise it from the host value while it is still trusted. Tested-by: Vincent Donnefort Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-23-will@kernel.org --- arch/arm64/include/asm/kvm_hyp.h | 2 ++ arch/arm64/kernel/image-vars.h | 3 --- arch/arm64/kvm/arm.c | 1 + arch/arm64/kvm/hyp/nvhe/pkvm.c | 3 +++ 4 files changed, 6 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index fd99cf09972d..6797eafe7890 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -124,4 +124,6 @@ extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val); extern unsigned long kvm_nvhe_sym(__icache_flags); +extern unsigned int kvm_nvhe_sym(kvm_arm_vmid_bits); + #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index ae8f37f4aa8c..31ad75da4d58 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -71,9 +71,6 @@ KVM_NVHE_ALIAS(nvhe_hyp_panic_handler); /* Vectors installed by hyp-init on reset HVC. */ KVM_NVHE_ALIAS(__hyp_stub_vectors); -/* VMID bits set by the KVM VMID allocator */ -KVM_NVHE_ALIAS(kvm_arm_vmid_bits); - /* Static keys which are set if a vGIC trap should be handled in hyp. */ KVM_NVHE_ALIAS(vgic_v2_cpuif_trap); KVM_NVHE_ALIAS(vgic_v3_cpuif_trap); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 25467f24803d..1d4b8122d010 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1893,6 +1893,7 @@ static void kvm_hyp_init_symbols(void) kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1); kvm_nvhe_sym(__icache_flags) = __icache_flags; + kvm_nvhe_sym(kvm_arm_vmid_bits) = kvm_arm_vmid_bits; } static int kvm_hyp_init_protection(u32 hyp_va_bits) diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 81835c2f4c5a..ed6ceac1e854 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -15,6 +15,9 @@ /* Used by icache_is_vpipt(). */ unsigned long __icache_flags; +/* Used by kvm_get_vttbr(). */ +unsigned int kvm_arm_vmid_bits; + /* * Set trap register values based on features in ID_AA64PFR0. */ -- cgit v1.2.3 From 27eb26bfff5d358d42911d04bbecc62e659ec32b Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 10 Nov 2022 19:02:56 +0000 Subject: KVM: arm64: Explicitly map 'kvm_vgic_global_state' at EL2 The pkvm hypervisor at EL2 may need to read the 'kvm_vgic_global_state' variable from the host, for example when saving and restoring the state of the virtual GIC. Explicitly map 'kvm_vgic_global_state' in the stage-1 page-table of the pKVM hypervisor rather than relying on mapping all of the host '.rodata' section. Tested-by: Vincent Donnefort Signed-off-by: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-24-will@kernel.org --- arch/arm64/kvm/hyp/nvhe/setup.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 0f69c1393416..5a371ab236db 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -161,6 +161,11 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, if (ret) return ret; + ret = pkvm_create_mappings(&kvm_vgic_global_state, + &kvm_vgic_global_state + 1, prot); + if (ret) + return ret; + return 0; } -- cgit v1.2.3 From 169cd0f8238f2598b85d2db2e15828e8f8da18e5 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 10 Nov 2022 19:02:57 +0000 Subject: KVM: arm64: Don't unnecessarily map host kernel sections at EL2 We no longer need to map the host's '.rodata' and '.bss' sections in the stage-1 page-table of the pKVM hypervisor at EL2, so remove those mappings and avoid creating any future dependencies at EL2 on host-controlled data structures. Tested-by: Vincent Donnefort Signed-off-by: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-25-will@kernel.org --- arch/arm64/kernel/image-vars.h | 6 ------ arch/arm64/kvm/hyp/nvhe/setup.c | 14 +++----------- 2 files changed, 3 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 31ad75da4d58..e3f88b5836a2 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -102,12 +102,6 @@ KVM_NVHE_ALIAS_HYP(__memcpy, __pi_memcpy); KVM_NVHE_ALIAS_HYP(__memset, __pi_memset); #endif -/* Kernel memory sections */ -KVM_NVHE_ALIAS(__start_rodata); -KVM_NVHE_ALIAS(__end_rodata); -KVM_NVHE_ALIAS(__bss_start); -KVM_NVHE_ALIAS(__bss_stop); - /* Hyp memory sections */ KVM_NVHE_ALIAS(__hyp_idmap_text_start); KVM_NVHE_ALIAS(__hyp_idmap_text_end); diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 5a371ab236db..5cdf3fb09bb4 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -144,23 +144,15 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, } /* - * Map the host's .bss and .rodata sections RO in the hypervisor, but - * transfer the ownership from the host to the hypervisor itself to - * make sure it can't be donated or shared with another entity. + * Map the host sections RO in the hypervisor, but transfer the + * ownership from the host to the hypervisor itself to make sure they + * can't be donated or shared with another entity. * * The ownership transition requires matching changes in the host * stage-2. This will be done later (see finalize_host_mappings()) once * the hyp_vmemmap is addressable. */ prot = pkvm_mkstate(PAGE_HYP_RO, PKVM_PAGE_SHARED_OWNED); - ret = pkvm_create_mappings(__start_rodata, __end_rodata, prot); - if (ret) - return ret; - - ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop, prot); - if (ret) - return ret; - ret = pkvm_create_mappings(&kvm_vgic_global_state, &kvm_vgic_global_state + 1, prot); if (ret) -- cgit v1.2.3 From be66e67f175096f283c9d5614c4991fc9e7ed975 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Nov 2022 19:02:59 +0000 Subject: KVM: arm64: Use the pKVM hyp vCPU structure in handle___kvm_vcpu_run() As a stepping stone towards deprivileging the host's access to the guest's vCPU structures, introduce some naive flush/sync routines to copy most of the host vCPU into the hyp vCPU on vCPU run and back again on return to EL1. This allows us to run using the pKVM hyp structures when KVM is initialised in protected mode. Tested-by: Vincent Donnefort Co-developed-by: Fuad Tabba Signed-off-by: Fuad Tabba Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-27-will@kernel.org --- arch/arm64/kvm/hyp/include/nvhe/pkvm.h | 4 ++ arch/arm64/kvm/hyp/nvhe/hyp-main.c | 79 +++++++++++++++++++++++++++++++++- arch/arm64/kvm/hyp/nvhe/pkvm.c | 28 ++++++++++++ 3 files changed, 109 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h index d14dfbcb7da1..82b3d62538a6 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h @@ -61,4 +61,8 @@ int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu, unsigned long vcpu_hva); int __pkvm_teardown_vm(pkvm_handle_t handle); +struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle, + unsigned int vcpu_idx); +void pkvm_put_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu); + #endif /* __ARM64_KVM_NVHE_PKVM_H__ */ diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index b5f3fcfe9135..728e01d4536b 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -22,11 +22,86 @@ DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt); +static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) +{ + struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu; + + hyp_vcpu->vcpu.arch.ctxt = host_vcpu->arch.ctxt; + + hyp_vcpu->vcpu.arch.sve_state = kern_hyp_va(host_vcpu->arch.sve_state); + hyp_vcpu->vcpu.arch.sve_max_vl = host_vcpu->arch.sve_max_vl; + + hyp_vcpu->vcpu.arch.hw_mmu = host_vcpu->arch.hw_mmu; + + hyp_vcpu->vcpu.arch.hcr_el2 = host_vcpu->arch.hcr_el2; + hyp_vcpu->vcpu.arch.mdcr_el2 = host_vcpu->arch.mdcr_el2; + hyp_vcpu->vcpu.arch.cptr_el2 = host_vcpu->arch.cptr_el2; + + hyp_vcpu->vcpu.arch.iflags = host_vcpu->arch.iflags; + hyp_vcpu->vcpu.arch.fp_state = host_vcpu->arch.fp_state; + + hyp_vcpu->vcpu.arch.debug_ptr = kern_hyp_va(host_vcpu->arch.debug_ptr); + hyp_vcpu->vcpu.arch.host_fpsimd_state = host_vcpu->arch.host_fpsimd_state; + + hyp_vcpu->vcpu.arch.vsesr_el2 = host_vcpu->arch.vsesr_el2; + + hyp_vcpu->vcpu.arch.vgic_cpu.vgic_v3 = host_vcpu->arch.vgic_cpu.vgic_v3; +} + +static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) +{ + struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu; + struct vgic_v3_cpu_if *hyp_cpu_if = &hyp_vcpu->vcpu.arch.vgic_cpu.vgic_v3; + struct vgic_v3_cpu_if *host_cpu_if = &host_vcpu->arch.vgic_cpu.vgic_v3; + unsigned int i; + + host_vcpu->arch.ctxt = hyp_vcpu->vcpu.arch.ctxt; + + host_vcpu->arch.hcr_el2 = hyp_vcpu->vcpu.arch.hcr_el2; + host_vcpu->arch.cptr_el2 = hyp_vcpu->vcpu.arch.cptr_el2; + + host_vcpu->arch.fault = hyp_vcpu->vcpu.arch.fault; + + host_vcpu->arch.iflags = hyp_vcpu->vcpu.arch.iflags; + host_vcpu->arch.fp_state = hyp_vcpu->vcpu.arch.fp_state; + + host_cpu_if->vgic_hcr = hyp_cpu_if->vgic_hcr; + for (i = 0; i < hyp_cpu_if->used_lrs; ++i) + host_cpu_if->vgic_lr[i] = hyp_cpu_if->vgic_lr[i]; +} + static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt) { - DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1); + DECLARE_REG(struct kvm_vcpu *, host_vcpu, host_ctxt, 1); + int ret; + + host_vcpu = kern_hyp_va(host_vcpu); + + if (unlikely(is_protected_kvm_enabled())) { + struct pkvm_hyp_vcpu *hyp_vcpu; + struct kvm *host_kvm; + + host_kvm = kern_hyp_va(host_vcpu->kvm); + hyp_vcpu = pkvm_load_hyp_vcpu(host_kvm->arch.pkvm.handle, + host_vcpu->vcpu_idx); + if (!hyp_vcpu) { + ret = -EINVAL; + goto out; + } + + flush_hyp_vcpu(hyp_vcpu); + + ret = __kvm_vcpu_run(&hyp_vcpu->vcpu); + + sync_hyp_vcpu(hyp_vcpu); + pkvm_put_hyp_vcpu(hyp_vcpu); + } else { + /* The host is fully trusted, run its vCPU directly. */ + ret = __kvm_vcpu_run(host_vcpu); + } - cpu_reg(host_ctxt, 1) = __kvm_vcpu_run(kern_hyp_va(vcpu)); +out: + cpu_reg(host_ctxt, 1) = ret; } static void handle___kvm_adjust_pc(struct kvm_cpu_context *host_ctxt) diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index ed6ceac1e854..a06ece14a6d8 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -241,6 +241,33 @@ static struct pkvm_hyp_vm *get_vm_by_handle(pkvm_handle_t handle) return vm_table[idx]; } +struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle, + unsigned int vcpu_idx) +{ + struct pkvm_hyp_vcpu *hyp_vcpu = NULL; + struct pkvm_hyp_vm *hyp_vm; + + hyp_spin_lock(&vm_table_lock); + hyp_vm = get_vm_by_handle(handle); + if (!hyp_vm || hyp_vm->nr_vcpus <= vcpu_idx) + goto unlock; + + hyp_vcpu = hyp_vm->vcpus[vcpu_idx]; + hyp_page_ref_inc(hyp_virt_to_page(hyp_vm)); +unlock: + hyp_spin_unlock(&vm_table_lock); + return hyp_vcpu; +} + +void pkvm_put_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) +{ + struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu); + + hyp_spin_lock(&vm_table_lock); + hyp_page_ref_dec(hyp_virt_to_page(hyp_vm)); + hyp_spin_unlock(&vm_table_lock); +} + static void unpin_host_vcpu(struct kvm_vcpu *host_vcpu) { if (host_vcpu) @@ -286,6 +313,7 @@ static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu, hyp_vcpu->vcpu.vcpu_idx = vcpu_idx; hyp_vcpu->vcpu.arch.hw_mmu = &hyp_vm->kvm.arch.mmu; + hyp_vcpu->vcpu.arch.cflags = READ_ONCE(host_vcpu->arch.cflags); done: if (ret) unpin_host_vcpu(host_vcpu); -- cgit v1.2.3 From d017eeabd5092565c3dd1c8a7b00ba724c33c18f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:17 +0000 Subject: arm64: Add ID_DFR0_EL1.PerfMon values for PMUv3p7 and IMP_DEF Align the ID_DFR0_EL1.PerfMon values with ID_AA64DFR0_EL1.PMUver. Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-2-maz@kernel.org --- arch/arm64/include/asm/sysreg.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 7d301700d1a9..84f59ce1dc6d 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -698,6 +698,8 @@ #define ID_DFR0_PERFMON_8_1 0x4 #define ID_DFR0_PERFMON_8_4 0x5 #define ID_DFR0_PERFMON_8_5 0x6 +#define ID_DFR0_PERFMON_8_7 0x7 +#define ID_DFR0_PERFMON_IMP_DEF 0xf #define ID_ISAR4_SWP_FRAC_SHIFT 28 #define ID_ISAR4_PSR_M_SHIFT 24 -- cgit v1.2.3 From bead02204e9806807bb290137b1ccabfcb4b16fd Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:18 +0000 Subject: KVM: arm64: PMU: Align chained counter implementation with architecture pseudocode Ricardo recently pointed out that the PMU chained counter emulation in KVM wasn't quite behaving like the one on actual hardware, in the sense that a chained counter would expose an overflow on both halves of a chained counter, while KVM would only expose the overflow on the top half. The difference is subtle, but significant. What does the architecture say (DDI0087 H.a): - Up to PMUv3p4, all counters but the cycle counter are 32bit - A 32bit counter that overflows generates a CHAIN event on the adjacent counter after exposing its own overflow status - The CHAIN event is accounted if the counter is correctly configured (CHAIN event selected and counter enabled) This all means that our current implementation (which uses 64bit perf events) prevents us from emulating this overflow on the lower half. How to fix this? By implementing the above, to the letter. This largely results in code deletion, removing the notions of "counter pair", "chained counters", and "canonical counter". The code is further restructured to make the CHAIN handling similar to SWINC, as the two are now extremely similar in behaviour. Reported-by: Ricardo Koller Signed-off-by: Marc Zyngier Reviewed-by: Reiji Watanabe Link: https://lore.kernel.org/r/20221113163832.3154370-3-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 320 +++++++++++++--------------------------------- include/kvm/arm_pmu.h | 2 - 2 files changed, 86 insertions(+), 236 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 0003c7d37533..57765be69bea 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -15,16 +15,14 @@ #include #include +#define PERF_ATTR_CFG1_COUNTER_64BIT BIT(0) + DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available); static LIST_HEAD(arm_pmus); static DEFINE_MUTEX(arm_pmus_lock); static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx); -static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx); -static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc); - -#define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1 static u32 kvm_pmu_event_mask(struct kvm *kvm) { @@ -57,6 +55,11 @@ static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx) __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC); } +static bool kvm_pmu_counter_can_chain(struct kvm_vcpu *vcpu, u64 idx) +{ + return (!(idx & 1) && (idx + 1) < ARMV8_PMU_CYCLE_IDX); +} + static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc) { struct kvm_pmu *pmu; @@ -69,91 +72,22 @@ static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc) } /** - * kvm_pmu_pmc_is_chained - determine if the pmc is chained - * @pmc: The PMU counter pointer - */ -static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc) -{ - struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); - - return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); -} - -/** - * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter - * @select_idx: The counter index - */ -static bool kvm_pmu_idx_is_high_counter(u64 select_idx) -{ - return select_idx & 0x1; -} - -/** - * kvm_pmu_get_canonical_pmc - obtain the canonical pmc - * @pmc: The PMU counter pointer - * - * When a pair of PMCs are chained together we use the low counter (canonical) - * to hold the underlying perf event. - */ -static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc) -{ - if (kvm_pmu_pmc_is_chained(pmc) && - kvm_pmu_idx_is_high_counter(pmc->idx)) - return pmc - 1; - - return pmc; -} -static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc) -{ - if (kvm_pmu_idx_is_high_counter(pmc->idx)) - return pmc - 1; - else - return pmc + 1; -} - -/** - * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain + * kvm_pmu_get_counter_value - get PMU counter value * @vcpu: The vcpu pointer * @select_idx: The counter index */ -static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx) -{ - u64 eventsel, reg; - - select_idx |= 0x1; - - if (select_idx == ARMV8_PMU_CYCLE_IDX) - return false; - - reg = PMEVTYPER0_EL0 + select_idx; - eventsel = __vcpu_sys_reg(vcpu, reg) & kvm_pmu_event_mask(vcpu->kvm); - - return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN; -} - -/** - * kvm_pmu_get_pair_counter_value - get PMU counter value - * @vcpu: The vcpu pointer - * @pmc: The PMU counter pointer - */ -static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu, - struct kvm_pmc *pmc) +u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) { - u64 counter, counter_high, reg, enabled, running; - - if (kvm_pmu_pmc_is_chained(pmc)) { - pmc = kvm_pmu_get_canonical_pmc(pmc); - reg = PMEVCNTR0_EL0 + pmc->idx; + u64 counter, reg, enabled, running; + struct kvm_pmu *pmu = &vcpu->arch.pmu; + struct kvm_pmc *pmc = &pmu->pmc[select_idx]; - counter = __vcpu_sys_reg(vcpu, reg); - counter_high = __vcpu_sys_reg(vcpu, reg + 1); + if (!kvm_vcpu_has_pmu(vcpu)) + return 0; - counter = lower_32_bits(counter) | (counter_high << 32); - } else { - reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) - ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; - counter = __vcpu_sys_reg(vcpu, reg); - } + reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) + ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; + counter = __vcpu_sys_reg(vcpu, reg); /* * The real counter value is equal to the value of counter register plus @@ -163,29 +97,7 @@ static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu, counter += perf_event_read_value(pmc->perf_event, &enabled, &running); - return counter; -} - -/** - * kvm_pmu_get_counter_value - get PMU counter value - * @vcpu: The vcpu pointer - * @select_idx: The counter index - */ -u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) -{ - u64 counter; - struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc = &pmu->pmc[select_idx]; - - if (!kvm_vcpu_has_pmu(vcpu)) - return 0; - - counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); - - if (kvm_pmu_pmc_is_chained(pmc) && - kvm_pmu_idx_is_high_counter(select_idx)) - counter = upper_32_bits(counter); - else if (select_idx != ARMV8_PMU_CYCLE_IDX) + if (select_idx != ARMV8_PMU_CYCLE_IDX) counter = lower_32_bits(counter); return counter; @@ -218,7 +130,6 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) */ static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) { - pmc = kvm_pmu_get_canonical_pmc(pmc); if (pmc->perf_event) { perf_event_disable(pmc->perf_event); perf_event_release_kernel(pmc->perf_event); @@ -236,11 +147,10 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) { u64 counter, reg, val; - pmc = kvm_pmu_get_canonical_pmc(pmc); if (!pmc->perf_event) return; - counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); + counter = kvm_pmu_get_counter_value(vcpu, pmc->idx); if (pmc->idx == ARMV8_PMU_CYCLE_IDX) { reg = PMCCNTR_EL0; @@ -252,9 +162,6 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) __vcpu_sys_reg(vcpu, reg) = val; - if (kvm_pmu_pmc_is_chained(pmc)) - __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter); - kvm_pmu_release_perf_event(pmc); } @@ -285,8 +192,6 @@ void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) for_each_set_bit(i, &mask, 32) kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]); - - bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS); } /** @@ -340,12 +245,9 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) pmc = &pmu->pmc[i]; - /* A change in the enable state may affect the chain state */ - kvm_pmu_update_pmc_chained(vcpu, i); - kvm_pmu_create_perf_event(vcpu, i); - - /* At this point, pmc must be the canonical */ - if (pmc->perf_event) { + if (!pmc->perf_event) { + kvm_pmu_create_perf_event(vcpu, i); + } else { perf_event_enable(pmc->perf_event); if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE) kvm_debug("fail to enable perf event\n"); @@ -375,11 +277,6 @@ void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) pmc = &pmu->pmc[i]; - /* A change in the enable state may affect the chain state */ - kvm_pmu_update_pmc_chained(vcpu, i); - kvm_pmu_create_perf_event(vcpu, i); - - /* At this point, pmc must be the canonical */ if (pmc->perf_event) perf_event_disable(pmc->perf_event); } @@ -484,6 +381,48 @@ static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work) kvm_vcpu_kick(vcpu); } +/* + * Perform an increment on any of the counters described in @mask, + * generating the overflow if required, and propagate it as a chained + * event if possible. + */ +static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, + unsigned long mask, u32 event) +{ + int i; + + if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) + return; + + /* Weed out disabled counters */ + mask &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); + + for_each_set_bit(i, &mask, ARMV8_PMU_CYCLE_IDX) { + u64 type, reg; + + /* Filter on event type */ + type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i); + type &= kvm_pmu_event_mask(vcpu->kvm); + if (type != event) + continue; + + /* Increment this counter */ + reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; + reg = lower_32_bits(reg); + __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; + + if (reg) /* No overflow? move on */ + continue; + + /* Mark overflow */ + __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); + + if (kvm_pmu_counter_can_chain(vcpu, i)) + kvm_pmu_counter_increment(vcpu, BIT(i + 1), + ARMV8_PMUV3_PERFCTR_CHAIN); + } +} + /** * When the perf event overflows, set the overflow status and inform the vcpu. */ @@ -514,6 +453,10 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); + if (kvm_pmu_counter_can_chain(vcpu, idx)) + kvm_pmu_counter_increment(vcpu, BIT(idx + 1), + ARMV8_PMUV3_PERFCTR_CHAIN); + if (kvm_pmu_overflow_status(vcpu)) { kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); @@ -533,50 +476,7 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, */ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) { - struct kvm_pmu *pmu = &vcpu->arch.pmu; - int i; - - if (!kvm_vcpu_has_pmu(vcpu)) - return; - - if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) - return; - - /* Weed out disabled counters */ - val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); - - for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) { - u64 type, reg; - - if (!(val & BIT(i))) - continue; - - /* PMSWINC only applies to ... SW_INC! */ - type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i); - type &= kvm_pmu_event_mask(vcpu->kvm); - if (type != ARMV8_PMUV3_PERFCTR_SW_INCR) - continue; - - /* increment this even SW_INC counter */ - reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; - reg = lower_32_bits(reg); - __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; - - if (reg) /* no overflow on the low part */ - continue; - - if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) { - /* increment the high counter */ - reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1; - reg = lower_32_bits(reg); - __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg; - if (!reg) /* mark overflow on the high counter */ - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1); - } else { - /* mark overflow on low counter */ - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); - } - } + kvm_pmu_counter_increment(vcpu, val, ARMV8_PMUV3_PERFCTR_SW_INCR); } /** @@ -625,18 +525,11 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) { struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu; struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc; + struct kvm_pmc *pmc = &pmu->pmc[select_idx]; struct perf_event *event; struct perf_event_attr attr; u64 eventsel, counter, reg, data; - /* - * For chained counters the event type and filtering attributes are - * obtained from the low/even counter. We also use this counter to - * determine if the event is enabled/disabled. - */ - pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]); - reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx; data = __vcpu_sys_reg(vcpu, reg); @@ -647,8 +540,12 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) else eventsel = data & kvm_pmu_event_mask(vcpu->kvm); - /* Software increment event doesn't need to be backed by a perf event */ - if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR) + /* + * Neither SW increment nor chained events need to be backed + * by a perf event. + */ + if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR || + eventsel == ARMV8_PMUV3_PERFCTR_CHAIN) return; /* @@ -670,30 +567,21 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) attr.exclude_host = 1; /* Don't count host events */ attr.config = eventsel; - counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); + counter = kvm_pmu_get_counter_value(vcpu, select_idx); - if (kvm_pmu_pmc_is_chained(pmc)) { - /** - * The initial sample period (overflow count) of an event. For - * chained counters we only support overflow interrupts on the - * high counter. - */ + /* + * If counting with a 64bit counter, advertise it to the perf + * code, carefully dealing with the initial sample period. + */ + if (kvm_pmu_idx_is_64bit(vcpu, select_idx)) { + attr.config1 |= PERF_ATTR_CFG1_COUNTER_64BIT; attr.sample_period = (-counter) & GENMASK(63, 0); - attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED; - - event = perf_event_create_kernel_counter(&attr, -1, current, - kvm_pmu_perf_overflow, - pmc + 1); } else { - /* The initial sample period (overflow count) of an event. */ - if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) - attr.sample_period = (-counter) & GENMASK(63, 0); - else - attr.sample_period = (-counter) & GENMASK(31, 0); + attr.sample_period = (-counter) & GENMASK(31, 0); + } - event = perf_event_create_kernel_counter(&attr, -1, current, + event = perf_event_create_kernel_counter(&attr, -1, current, kvm_pmu_perf_overflow, pmc); - } if (IS_ERR(event)) { pr_err_once("kvm: pmu event creation failed %ld\n", @@ -704,41 +592,6 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) pmc->perf_event = event; } -/** - * kvm_pmu_update_pmc_chained - update chained bitmap - * @vcpu: The vcpu pointer - * @select_idx: The number of selected counter - * - * Update the chained bitmap based on the event type written in the - * typer register and the enable state of the odd register. - */ -static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx) -{ - struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc; - bool new_state, old_state; - - old_state = kvm_pmu_pmc_is_chained(pmc); - new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) && - kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1); - - if (old_state == new_state) - return; - - canonical_pmc = kvm_pmu_get_canonical_pmc(pmc); - kvm_pmu_stop_counter(vcpu, canonical_pmc); - if (new_state) { - /* - * During promotion from !chained to chained we must ensure - * the adjacent counter is stopped and its event destroyed - */ - kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc)); - set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); - return; - } - clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); -} - /** * kvm_pmu_set_counter_event_type - set selected counter to monitor some event * @vcpu: The vcpu pointer @@ -766,7 +619,6 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, __vcpu_sys_reg(vcpu, reg) = data & mask; - kvm_pmu_update_pmc_chained(vcpu, select_idx); kvm_pmu_create_perf_event(vcpu, select_idx); } diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index c0b868ce6a8f..96b192139a23 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -11,7 +11,6 @@ #include #define ARMV8_PMU_CYCLE_IDX (ARMV8_PMU_MAX_COUNTERS - 1) -#define ARMV8_PMU_MAX_COUNTER_PAIRS ((ARMV8_PMU_MAX_COUNTERS + 1) >> 1) #ifdef CONFIG_HW_PERF_EVENTS @@ -29,7 +28,6 @@ struct kvm_pmu { struct irq_work overflow_work; struct kvm_pmu_events events; struct kvm_pmc pmc[ARMV8_PMU_MAX_COUNTERS]; - DECLARE_BITMAP(chained, ARMV8_PMU_MAX_COUNTER_PAIRS); int irq_num; bool created; bool irq_level; -- cgit v1.2.3 From acdd8a4e13a008a83c6da88bb53eecbecda9714c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:19 +0000 Subject: KVM: arm64: PMU: Always advertise the CHAIN event Even when the underlying HW doesn't offer the CHAIN event (which happens with QEMU), we can always support it as we're in control of the counter overflow. Always advertise the event via PMCEID0_EL0. Reviewed-by: Reiji Watanabe Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-4-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 57765be69bea..69b67ab3c4bf 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -701,6 +701,8 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) if (!pmceid1) { val = read_sysreg(pmceid0_el0); + /* always support CHAIN */ + val |= BIT(ARMV8_PMUV3_PERFCTR_CHAIN); base = 0; } else { val = read_sysreg(pmceid1_el0); -- cgit v1.2.3 From c82d28cbf1d4f9fe174041b4485c635cb970afa7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:20 +0000 Subject: KVM: arm64: PMU: Distinguish between 64bit counter and 64bit overflow The PMU architecture makes a subtle difference between a 64bit counter and a counter that has a 64bit overflow. This is for example the case of the cycle counter, which can generate an overflow on a 32bit boundary if PMCR_EL0.LC==0 despite the accumulation being done on 64 bits. Use this distinction in the few cases where it matters in the code, as we will reuse this with PMUv3p5 long counters. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-5-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 43 +++++++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 69b67ab3c4bf..d050143326b5 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -50,6 +50,11 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm) * @select_idx: The counter index */ static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx) +{ + return (select_idx == ARMV8_PMU_CYCLE_IDX); +} + +static bool kvm_pmu_idx_has_64bit_overflow(struct kvm_vcpu *vcpu, u64 select_idx) { return (select_idx == ARMV8_PMU_CYCLE_IDX && __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC); @@ -57,7 +62,8 @@ static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx) static bool kvm_pmu_counter_can_chain(struct kvm_vcpu *vcpu, u64 idx) { - return (!(idx & 1) && (idx + 1) < ARMV8_PMU_CYCLE_IDX); + return (!(idx & 1) && (idx + 1) < ARMV8_PMU_CYCLE_IDX && + !kvm_pmu_idx_has_64bit_overflow(vcpu, idx)); } static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc) @@ -97,7 +103,7 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) counter += perf_event_read_value(pmc->perf_event, &enabled, &running); - if (select_idx != ARMV8_PMU_CYCLE_IDX) + if (!kvm_pmu_idx_is_64bit(vcpu, select_idx)) counter = lower_32_bits(counter); return counter; @@ -423,6 +429,23 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, } } +/* Compute the sample period for a given counter value */ +static u64 compute_period(struct kvm_vcpu *vcpu, u64 select_idx, u64 counter) +{ + u64 val; + + if (kvm_pmu_idx_is_64bit(vcpu, select_idx)) { + if (!kvm_pmu_idx_has_64bit_overflow(vcpu, select_idx)) + val = -(counter & GENMASK(31, 0)); + else + val = (-counter) & GENMASK(63, 0); + } else { + val = (-counter) & GENMASK(31, 0); + } + + return val; +} + /** * When the perf event overflows, set the overflow status and inform the vcpu. */ @@ -442,10 +465,7 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, * Reset the sample period to the architectural limit, * i.e. the point where the counter overflows. */ - period = -(local64_read(&perf_event->count)); - - if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) - period &= GENMASK(31, 0); + period = compute_period(vcpu, idx, local64_read(&perf_event->count)); local64_set(&perf_event->hw.period_left, 0); perf_event->attr.sample_period = period; @@ -571,14 +591,13 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) /* * If counting with a 64bit counter, advertise it to the perf - * code, carefully dealing with the initial sample period. + * code, carefully dealing with the initial sample period + * which also depends on the overflow. */ - if (kvm_pmu_idx_is_64bit(vcpu, select_idx)) { + if (kvm_pmu_idx_is_64bit(vcpu, select_idx)) attr.config1 |= PERF_ATTR_CFG1_COUNTER_64BIT; - attr.sample_period = (-counter) & GENMASK(63, 0); - } else { - attr.sample_period = (-counter) & GENMASK(31, 0); - } + + attr.sample_period = compute_period(vcpu, select_idx, counter); event = perf_event_create_kernel_counter(&attr, -1, current, kvm_pmu_perf_overflow, pmc); -- cgit v1.2.3 From 001d85bd6c039d3662a4f33a5d212ef3e0438b27 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:21 +0000 Subject: KVM: arm64: PMU: Narrow the overflow checking when required For 64bit counters that overflow on a 32bit boundary, make sure we only check the bottom 32bit to generate a CHAIN event. Signed-off-by: Marc Zyngier Reviewed-by: Reiji Watanabe Link: https://lore.kernel.org/r/20221113163832.3154370-6-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index d050143326b5..9e6bc7edc4de 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -417,7 +417,8 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, reg = lower_32_bits(reg); __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; - if (reg) /* No overflow? move on */ + /* No overflow? move on */ + if (kvm_pmu_idx_has_64bit_overflow(vcpu, i) ? reg : lower_32_bits(reg)) continue; /* Mark overflow */ -- cgit v1.2.3 From 0f1e172b54f7574ca6aa46b851b332896add955f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:22 +0000 Subject: KVM: arm64: PMU: Only narrow counters that are not 64bit wide The current PMU emulation sometimes narrows counters to 32bit if the counter isn't the cycle counter. As this is going to change with PMUv3p5 where the counters are all 64bit, fix the couple of cases where this happens unconditionally. Signed-off-by: Marc Zyngier Reviewed-by: Reiji Watanabe Link: https://lore.kernel.org/r/20221113163832.3154370-7-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 9e6bc7edc4de..1fab889dbc74 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -151,20 +151,17 @@ static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) */ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) { - u64 counter, reg, val; + u64 reg, val; if (!pmc->perf_event) return; - counter = kvm_pmu_get_counter_value(vcpu, pmc->idx); + val = kvm_pmu_get_counter_value(vcpu, pmc->idx); - if (pmc->idx == ARMV8_PMU_CYCLE_IDX) { + if (pmc->idx == ARMV8_PMU_CYCLE_IDX) reg = PMCCNTR_EL0; - val = counter; - } else { + else reg = PMEVCNTR0_EL0 + pmc->idx; - val = lower_32_bits(counter); - } __vcpu_sys_reg(vcpu, reg) = val; @@ -414,7 +411,8 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, /* Increment this counter */ reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; - reg = lower_32_bits(reg); + if (!kvm_pmu_idx_is_64bit(vcpu, i)) + reg = lower_32_bits(reg); __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; /* No overflow? move on */ -- cgit v1.2.3 From 0cb9c3c87a9d3287eaf353936e6846d885102439 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:23 +0000 Subject: KVM: arm64: PMU: Add counter_index_to_*reg() helpers In order to reduce the boilerplate code, add two helpers returning the counter register index (resp. the event register) in the vcpu register file from the counter index. Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-8-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 1fab889dbc74..faab0f57a45d 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -77,6 +77,16 @@ static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc) return container_of(vcpu_arch, struct kvm_vcpu, arch); } +static u32 counter_index_to_reg(u64 idx) +{ + return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + idx; +} + +static u32 counter_index_to_evtreg(u64 idx) +{ + return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + idx; +} + /** * kvm_pmu_get_counter_value - get PMU counter value * @vcpu: The vcpu pointer @@ -91,8 +101,7 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) if (!kvm_vcpu_has_pmu(vcpu)) return 0; - reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) - ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; + reg = counter_index_to_reg(select_idx); counter = __vcpu_sys_reg(vcpu, reg); /* @@ -122,8 +131,7 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) if (!kvm_vcpu_has_pmu(vcpu)) return; - reg = (select_idx == ARMV8_PMU_CYCLE_IDX) - ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; + reg = counter_index_to_reg(select_idx); __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx); /* Recreate the perf event to reflect the updated sample_period */ @@ -158,10 +166,7 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) val = kvm_pmu_get_counter_value(vcpu, pmc->idx); - if (pmc->idx == ARMV8_PMU_CYCLE_IDX) - reg = PMCCNTR_EL0; - else - reg = PMEVCNTR0_EL0 + pmc->idx; + reg = counter_index_to_reg(pmc->idx); __vcpu_sys_reg(vcpu, reg) = val; @@ -404,16 +409,16 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, u64 type, reg; /* Filter on event type */ - type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i); + type = __vcpu_sys_reg(vcpu, counter_index_to_evtreg(i)); type &= kvm_pmu_event_mask(vcpu->kvm); if (type != event) continue; /* Increment this counter */ - reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; + reg = __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) + 1; if (!kvm_pmu_idx_is_64bit(vcpu, i)) reg = lower_32_bits(reg); - __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; + __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) = reg; /* No overflow? move on */ if (kvm_pmu_idx_has_64bit_overflow(vcpu, i) ? reg : lower_32_bits(reg)) @@ -549,8 +554,7 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) struct perf_event_attr attr; u64 eventsel, counter, reg, data; - reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) - ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx; + reg = counter_index_to_evtreg(select_idx); data = __vcpu_sys_reg(vcpu, reg); kvm_pmu_stop_counter(vcpu, pmc); @@ -632,8 +636,7 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, mask &= ~ARMV8_PMU_EVTYPE_EVENT; mask |= kvm_pmu_event_mask(vcpu->kvm); - reg = (select_idx == ARMV8_PMU_CYCLE_IDX) - ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx; + reg = counter_index_to_evtreg(select_idx); __vcpu_sys_reg(vcpu, reg) = data & mask; -- cgit v1.2.3 From 9917264d74d9063341968a8e071266358496777b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:24 +0000 Subject: KVM: arm64: PMU: Simplify setting a counter to a specific value kvm_pmu_set_counter_value() is pretty odd, as it tries to update the counter value while taking into account the value that is currently held by the running perf counter. This is not only complicated, this is quite wrong. Nowhere in the architecture is it said that the counter would be offset by something that is pending. The counter should be updated with the value set by SW, and start counting from there if required. Remove the odd computation and just assign the provided value after having released the perf event (which is then restarted). Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-9-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index faab0f57a45d..ea0c8411641f 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -23,6 +23,7 @@ static LIST_HEAD(arm_pmus); static DEFINE_MUTEX(arm_pmus_lock); static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx); +static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc); static u32 kvm_pmu_event_mask(struct kvm *kvm) { @@ -131,8 +132,10 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) if (!kvm_vcpu_has_pmu(vcpu)) return; + kvm_pmu_release_perf_event(&vcpu->arch.pmu.pmc[select_idx]); + reg = counter_index_to_reg(select_idx); - __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx); + __vcpu_sys_reg(vcpu, reg) = val; /* Recreate the perf event to reflect the updated sample_period */ kvm_pmu_create_perf_event(vcpu, select_idx); -- cgit v1.2.3 From 26d2d0594d7016dbcbce4038aa202c2858d5a944 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:25 +0000 Subject: KVM: arm64: PMU: Do not let AArch32 change the counters' top 32 bits Even when using PMUv3p5 (which implies 64bit counters), there is no way for AArch32 to write to the top 32 bits of the counters. The only way to influence these bits (other than by counting events) is by writing PMCR.P==1. Make sure we obey the architecture and preserve the top 32 bits on a counter update. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-10-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index ea0c8411641f..7a945fa6dd03 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -119,13 +119,8 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) return counter; } -/** - * kvm_pmu_set_counter_value - set PMU counter value - * @vcpu: The vcpu pointer - * @select_idx: The counter index - * @val: The counter value - */ -void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) +static void kvm_pmu_set_counter(struct kvm_vcpu *vcpu, u64 select_idx, u64 val, + bool force) { u64 reg; @@ -135,12 +130,36 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) kvm_pmu_release_perf_event(&vcpu->arch.pmu.pmc[select_idx]); reg = counter_index_to_reg(select_idx); + + if (vcpu_mode_is_32bit(vcpu) && select_idx != ARMV8_PMU_CYCLE_IDX && + !force) { + /* + * Even with PMUv3p5, AArch32 cannot write to the top + * 32bit of the counters. The only possible course of + * action is to use PMCR.P, which will reset them to + * 0 (the only use of the 'force' parameter). + */ + val = __vcpu_sys_reg(vcpu, reg) & GENMASK(63, 32); + val |= lower_32_bits(val); + } + __vcpu_sys_reg(vcpu, reg) = val; /* Recreate the perf event to reflect the updated sample_period */ kvm_pmu_create_perf_event(vcpu, select_idx); } +/** + * kvm_pmu_set_counter_value - set PMU counter value + * @vcpu: The vcpu pointer + * @select_idx: The counter index + * @val: The counter value + */ +void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) +{ + kvm_pmu_set_counter(vcpu, select_idx, val, false); +} + /** * kvm_pmu_release_perf_event - remove the perf event * @pmc: The PMU counter pointer @@ -533,7 +552,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); mask &= ~BIT(ARMV8_PMU_CYCLE_IDX); for_each_set_bit(i, &mask, 32) - kvm_pmu_set_counter_value(vcpu, i, 0); + kvm_pmu_set_counter(vcpu, i, 0, true); } } -- cgit v1.2.3 From 3d0dba5764b94308b8c4257ad64e383f11ce0c92 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:26 +0000 Subject: KVM: arm64: PMU: Move the ID_AA64DFR0_EL1.PMUver limit to VM creation As further patches will enable the selection of a PMU revision from userspace, sample the supported PMU revision at VM creation time, rather than building each time the ID_AA64DFR0_EL1 register is accessed. This shouldn't result in any change in behaviour. Reviewed-by: Reiji Watanabe Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-11-maz@kernel.org --- arch/arm64/include/asm/kvm_host.h | 4 ++++ arch/arm64/kvm/arm.c | 6 ++++++ arch/arm64/kvm/pmu-emul.c | 11 +++++++++++ arch/arm64/kvm/sys_regs.c | 36 ++++++++++++++++++++++++++++-------- include/kvm/arm_pmu.h | 6 ++++++ 5 files changed, 55 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 45e2136322ba..cc44e3bc528d 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -163,6 +163,10 @@ struct kvm_arch { u8 pfr0_csv2; u8 pfr0_csv3; + struct { + u8 imp:4; + u8 unimp:4; + } dfr0_pmuver; /* Hypercall features firmware registers' descriptor */ struct kvm_smccc_features smccc_feat; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 94d33e296e10..f956aab438c7 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -164,6 +164,12 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) set_default_spectre(kvm); kvm_arm_init_hypercalls(kvm); + /* + * Initialise the default PMUver before there is a chance to + * create an actual PMU. + */ + kvm->arch.dfr0_pmuver.imp = kvm_arm_pmu_get_pmuver_limit(); + return ret; out_free_stage2_pgd: kvm_free_stage2_pgd(&kvm->arch.mmu); diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 7a945fa6dd03..94ca2d17a4e4 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -1047,3 +1047,14 @@ int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) return -ENXIO; } + +u8 kvm_arm_pmu_get_pmuver_limit(void) +{ + u64 tmp; + + tmp = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1); + tmp = cpuid_feature_cap_perfmon_field(tmp, + ID_AA64DFR0_EL1_PMUVer_SHIFT, + ID_AA64DFR0_EL1_PMUVer_V3P4); + return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), tmp); +} diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index f4a7c5abcbca..297b4fcbf969 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1062,6 +1062,27 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu, return true; } +static u8 vcpu_pmuver(const struct kvm_vcpu *vcpu) +{ + if (kvm_vcpu_has_pmu(vcpu)) + return vcpu->kvm->arch.dfr0_pmuver.imp; + + return vcpu->kvm->arch.dfr0_pmuver.unimp; +} + +static u8 pmuver_to_perfmon(u8 pmuver) +{ + switch (pmuver) { + case ID_AA64DFR0_EL1_PMUVer_IMP: + return ID_DFR0_PERFMON_8_0; + case ID_AA64DFR0_EL1_PMUVer_IMP_DEF: + return ID_DFR0_PERFMON_IMP_DEF; + default: + /* Anything ARMv8.1+ and NI have the same value. For now. */ + return pmuver; + } +} + /* Read a sanitised cpufeature ID register by sys_reg_desc */ static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r) { @@ -1111,18 +1132,17 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r /* Limit debug to ARMv8.0 */ val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer); val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), 6); - /* Limit guests to PMUv3 for ARMv8.4 */ - val = cpuid_feature_cap_perfmon_field(val, - ID_AA64DFR0_EL1_PMUVer_SHIFT, - kvm_vcpu_has_pmu(vcpu) ? ID_AA64DFR0_EL1_PMUVer_V3P4 : 0); + /* Set PMUver to the required version */ + val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer); + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), + vcpu_pmuver(vcpu)); /* Hide SPE from guests */ val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMSVer); break; case SYS_ID_DFR0_EL1: - /* Limit guests to PMUv3 for ARMv8.4 */ - val = cpuid_feature_cap_perfmon_field(val, - ID_DFR0_PERFMON_SHIFT, - kvm_vcpu_has_pmu(vcpu) ? ID_DFR0_PERFMON_8_4 : 0); + val &= ~ARM64_FEATURE_MASK(ID_DFR0_PERFMON); + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_DFR0_PERFMON), + pmuver_to_perfmon(vcpu_pmuver(vcpu))); break; } diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 96b192139a23..812f729c9108 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -89,6 +89,8 @@ void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu); vcpu->arch.pmu.events = *kvm_get_pmu_events(); \ } while (0) +u8 kvm_arm_pmu_get_pmuver_limit(void); + #else struct kvm_pmu { }; @@ -154,6 +156,10 @@ static inline u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) static inline void kvm_pmu_update_vcpu_events(struct kvm_vcpu *vcpu) {} static inline void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) {} static inline void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) {} +static inline u8 kvm_arm_pmu_get_pmuver_limit(void) +{ + return 0; +} #endif -- cgit v1.2.3 From 60e651ff1f48bfdf8fec80d35510bd89ecf8c766 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:27 +0000 Subject: KVM: arm64: PMU: Allow ID_AA64DFR0_EL1.PMUver to be set from userspace Allow userspace to write ID_AA64DFR0_EL1, on the condition that only the PMUver field can be altered and be at most the one that was initially computed for the guest. Reviewed-by: Reiji Watanabe Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-12-maz@kernel.org --- arch/arm64/kvm/sys_regs.c | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 297b4fcbf969..49585258ae6c 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1242,6 +1242,45 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, return 0; } +static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd, + u64 val) +{ + u8 pmuver, host_pmuver; + bool valid_pmu; + + host_pmuver = kvm_arm_pmu_get_pmuver_limit(); + + /* + * Allow AA64DFR0_EL1.PMUver to be set from userspace as long + * as it doesn't promise more than what the HW gives us. We + * allow an IMPDEF PMU though, only if no PMU is supported + * (KVM backward compatibility handling). + */ + pmuver = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), val); + if ((pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF && pmuver > host_pmuver)) + return -EINVAL; + + valid_pmu = (pmuver != 0 && pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF); + + /* Make sure view register and PMU support do match */ + if (kvm_vcpu_has_pmu(vcpu) != valid_pmu) + return -EINVAL; + + /* We can only differ with PMUver, and anything else is an error */ + val ^= read_id_reg(vcpu, rd); + val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer); + if (val) + return -EINVAL; + + if (valid_pmu) + vcpu->kvm->arch.dfr0_pmuver.imp = pmuver; + else + vcpu->kvm->arch.dfr0_pmuver.unimp = pmuver; + + return 0; +} + /* * cpufeature ID register user accessors * @@ -1503,7 +1542,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_UNALLOCATED(4,7), /* CRm=5 */ - ID_SANITISED(ID_AA64DFR0_EL1), + { SYS_DESC(SYS_ID_AA64DFR0_EL1), .access = access_id_reg, + .get_user = get_id_reg, .set_user = set_id_aa64dfr0_el1, }, ID_SANITISED(ID_AA64DFR1_EL1), ID_UNALLOCATED(5,2), ID_UNALLOCATED(5,3), -- cgit v1.2.3 From d82e0dfdfda73f91e7282e1083a2cd7cd366ea87 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:28 +0000 Subject: KVM: arm64: PMU: Allow ID_DFR0_EL1.PerfMon to be set from userspace Allow userspace to write ID_DFR0_EL1, on the condition that only the PerfMon field can be altered and be something that is compatible with what was computed for the AArch64 view of the guest. Reviewed-by: Reiji Watanabe Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-13-maz@kernel.org --- arch/arm64/kvm/sys_regs.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 49585258ae6c..b8ac58723459 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1070,6 +1070,19 @@ static u8 vcpu_pmuver(const struct kvm_vcpu *vcpu) return vcpu->kvm->arch.dfr0_pmuver.unimp; } +static u8 perfmon_to_pmuver(u8 perfmon) +{ + switch (perfmon) { + case ID_DFR0_PERFMON_8_0: + return ID_AA64DFR0_EL1_PMUVer_IMP; + case ID_DFR0_PERFMON_IMP_DEF: + return ID_AA64DFR0_EL1_PMUVer_IMP_DEF; + default: + /* Anything ARMv8.1+ and NI have the same value. For now. */ + return perfmon; + } +} + static u8 pmuver_to_perfmon(u8 pmuver) { switch (pmuver) { @@ -1281,6 +1294,46 @@ static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu, return 0; } +static int set_id_dfr0_el1(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd, + u64 val) +{ + u8 perfmon, host_perfmon; + bool valid_pmu; + + host_perfmon = pmuver_to_perfmon(kvm_arm_pmu_get_pmuver_limit()); + + /* + * Allow DFR0_EL1.PerfMon to be set from userspace as long as + * it doesn't promise more than what the HW gives us on the + * AArch64 side (as everything is emulated with that), and + * that this is a PMUv3. + */ + perfmon = FIELD_GET(ARM64_FEATURE_MASK(ID_DFR0_PERFMON), val); + if ((perfmon != ID_DFR0_PERFMON_IMP_DEF && perfmon > host_perfmon) || + (perfmon != 0 && perfmon < ID_DFR0_PERFMON_8_0)) + return -EINVAL; + + valid_pmu = (perfmon != 0 && perfmon != ID_DFR0_PERFMON_IMP_DEF); + + /* Make sure view register and PMU support do match */ + if (kvm_vcpu_has_pmu(vcpu) != valid_pmu) + return -EINVAL; + + /* We can only differ with PerfMon, and anything else is an error */ + val ^= read_id_reg(vcpu, rd); + val &= ~ARM64_FEATURE_MASK(ID_DFR0_PERFMON); + if (val) + return -EINVAL; + + if (valid_pmu) + vcpu->kvm->arch.dfr0_pmuver.imp = perfmon_to_pmuver(perfmon); + else + vcpu->kvm->arch.dfr0_pmuver.unimp = perfmon_to_pmuver(perfmon); + + return 0; +} + /* * cpufeature ID register user accessors * @@ -1502,7 +1555,9 @@ static const struct sys_reg_desc sys_reg_descs[] = { /* CRm=1 */ AA32_ID_SANITISED(ID_PFR0_EL1), AA32_ID_SANITISED(ID_PFR1_EL1), - AA32_ID_SANITISED(ID_DFR0_EL1), + { SYS_DESC(SYS_ID_DFR0_EL1), .access = access_id_reg, + .get_user = get_id_reg, .set_user = set_id_dfr0_el1, + .visibility = aa32_id_visibility, }, ID_HIDDEN(ID_AFR0_EL1), AA32_ID_SANITISED(ID_MMFR0_EL1), AA32_ID_SANITISED(ID_MMFR1_EL1), -- cgit v1.2.3 From 11af4c37165e36a6090172ded5d06acdf15206da Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:29 +0000 Subject: KVM: arm64: PMU: Implement PMUv3p5 long counter support PMUv3p5 (which is mandatory with ARMv8.5) comes with some extra features: - All counters are 64bit - The overflow point is controlled by the PMCR_EL0.LP bit Add the required checks in the helpers that control counter width and overflow, as well as the sysreg handling for the LP bit. A new kvm_pmu_is_3p5() helper makes it easy to spot the PMUv3p5 specific handling. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-14-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 8 +++++--- arch/arm64/kvm/sys_regs.c | 4 ++++ include/kvm/arm_pmu.h | 7 +++++++ 3 files changed, 16 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 94ca2d17a4e4..7e25ff73cbba 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -52,13 +52,15 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm) */ static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx) { - return (select_idx == ARMV8_PMU_CYCLE_IDX); + return (select_idx == ARMV8_PMU_CYCLE_IDX || kvm_pmu_is_3p5(vcpu)); } static bool kvm_pmu_idx_has_64bit_overflow(struct kvm_vcpu *vcpu, u64 select_idx) { - return (select_idx == ARMV8_PMU_CYCLE_IDX && - __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC); + u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0); + + return (select_idx < ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LP)) || + (select_idx == ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LC)); } static bool kvm_pmu_counter_can_chain(struct kvm_vcpu *vcpu, u64 idx) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index b8ac58723459..67eac0f747be 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -654,6 +654,8 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E); if (!kvm_supports_32bit_el0()) val |= ARMV8_PMU_PMCR_LC; + if (!kvm_pmu_is_3p5(vcpu)) + val &= ~ARMV8_PMU_PMCR_LP; __vcpu_sys_reg(vcpu, r->reg) = val; } @@ -703,6 +705,8 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, val |= p->regval & ARMV8_PMU_PMCR_MASK; if (!kvm_supports_32bit_el0()) val |= ARMV8_PMU_PMCR_LC; + if (!kvm_pmu_is_3p5(vcpu)) + val &= ~ARMV8_PMU_PMCR_LP; __vcpu_sys_reg(vcpu, PMCR_EL0) = val; kvm_pmu_handle_pmcr(vcpu, val); kvm_vcpu_pmu_restore_guest(vcpu); diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 812f729c9108..628775334d5e 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -89,6 +89,12 @@ void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu); vcpu->arch.pmu.events = *kvm_get_pmu_events(); \ } while (0) +/* + * Evaluates as true when emulating PMUv3p5, and false otherwise. + */ +#define kvm_pmu_is_3p5(vcpu) \ + (vcpu->kvm->arch.dfr0_pmuver.imp >= ID_AA64DFR0_EL1_PMUVer_V3P5) + u8 kvm_arm_pmu_get_pmuver_limit(void); #else @@ -153,6 +159,7 @@ static inline u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) } #define kvm_vcpu_has_pmu(vcpu) ({ false; }) +#define kvm_pmu_is_3p5(vcpu) ({ false; }) static inline void kvm_pmu_update_vcpu_events(struct kvm_vcpu *vcpu) {} static inline void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) {} static inline void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) {} -- cgit v1.2.3 From 1f7c978282855d6b2abd608064004c74902e791d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:30 +0000 Subject: KVM: arm64: PMU: Allow PMUv3p5 to be exposed to the guest Now that the infrastructure is in place, bump the PMU support up to PMUv3p5. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-15-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 7e25ff73cbba..be881ae67133 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -1057,6 +1057,6 @@ u8 kvm_arm_pmu_get_pmuver_limit(void) tmp = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1); tmp = cpuid_feature_cap_perfmon_field(tmp, ID_AA64DFR0_EL1_PMUVer_SHIFT, - ID_AA64DFR0_EL1_PMUVer_V3P4); + ID_AA64DFR0_EL1_PMUVer_V3P5); return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), tmp); } -- cgit v1.2.3 From 9bad925dd741408825590eccc495d073cc246de0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:31 +0000 Subject: KVM: arm64: PMU: Simplify vcpu computation on perf overflow notification The way we compute the target vcpu on getting an overflow is a bit odd, as we use the PMC array as an anchor for kvm_pmc_to_vcpu, while we could directly compute the correct address. Get rid of the intermediate step and directly compute the target vcpu. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-16-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index be881ae67133..49a004660497 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -405,11 +405,8 @@ void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work) { struct kvm_vcpu *vcpu; - struct kvm_pmu *pmu; - - pmu = container_of(work, struct kvm_pmu, overflow_work); - vcpu = kvm_pmc_to_vcpu(pmu->pmc); + vcpu = container_of(work, struct kvm_vcpu, arch.pmu.overflow_work); kvm_vcpu_kick(vcpu); } -- cgit v1.2.3 From d56bdce586e7fabd2b3339f476e0e4c059b24e19 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:32 +0000 Subject: KVM: arm64: PMU: Make kvm_pmc the main data structure The PMU code has historically been torn between referencing a counter as a pair vcpu+index or as the PMC pointer. Given that it is pretty easy to go from one representation to the other, standardise on the latter which, IMHO, makes the code slightly more readable. YMMV. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-17-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 174 +++++++++++++++++++++++----------------------- 1 file changed, 87 insertions(+), 87 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 49a004660497..3295dea34f4c 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -22,9 +22,19 @@ DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available); static LIST_HEAD(arm_pmus); static DEFINE_MUTEX(arm_pmus_lock); -static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx); +static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc); static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc); +static struct kvm_vcpu *kvm_pmc_to_vcpu(const struct kvm_pmc *pmc) +{ + return container_of(pmc, struct kvm_vcpu, arch.pmu.pmc[pmc->idx]); +} + +static struct kvm_pmc *kvm_vcpu_idx_to_pmc(struct kvm_vcpu *vcpu, int cnt_idx) +{ + return &vcpu->arch.pmu.pmc[cnt_idx]; +} + static u32 kvm_pmu_event_mask(struct kvm *kvm) { unsigned int pmuver; @@ -46,38 +56,27 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm) } /** - * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter - * @vcpu: The vcpu pointer - * @select_idx: The counter index + * kvm_pmc_is_64bit - determine if counter is 64bit + * @pmc: counter context */ -static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx) +static bool kvm_pmc_is_64bit(struct kvm_pmc *pmc) { - return (select_idx == ARMV8_PMU_CYCLE_IDX || kvm_pmu_is_3p5(vcpu)); + return (pmc->idx == ARMV8_PMU_CYCLE_IDX || + kvm_pmu_is_3p5(kvm_pmc_to_vcpu(pmc))); } -static bool kvm_pmu_idx_has_64bit_overflow(struct kvm_vcpu *vcpu, u64 select_idx) +static bool kvm_pmc_has_64bit_overflow(struct kvm_pmc *pmc) { - u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0); + u64 val = __vcpu_sys_reg(kvm_pmc_to_vcpu(pmc), PMCR_EL0); - return (select_idx < ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LP)) || - (select_idx == ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LC)); + return (pmc->idx < ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LP)) || + (pmc->idx == ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LC)); } -static bool kvm_pmu_counter_can_chain(struct kvm_vcpu *vcpu, u64 idx) +static bool kvm_pmu_counter_can_chain(struct kvm_pmc *pmc) { - return (!(idx & 1) && (idx + 1) < ARMV8_PMU_CYCLE_IDX && - !kvm_pmu_idx_has_64bit_overflow(vcpu, idx)); -} - -static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc) -{ - struct kvm_pmu *pmu; - struct kvm_vcpu_arch *vcpu_arch; - - pmc -= pmc->idx; - pmu = container_of(pmc, struct kvm_pmu, pmc[0]); - vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu); - return container_of(vcpu_arch, struct kvm_vcpu, arch); + return (!(pmc->idx & 1) && (pmc->idx + 1) < ARMV8_PMU_CYCLE_IDX && + !kvm_pmc_has_64bit_overflow(pmc)); } static u32 counter_index_to_reg(u64 idx) @@ -90,21 +89,12 @@ static u32 counter_index_to_evtreg(u64 idx) return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + idx; } -/** - * kvm_pmu_get_counter_value - get PMU counter value - * @vcpu: The vcpu pointer - * @select_idx: The counter index - */ -u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) +static u64 kvm_pmu_get_pmc_value(struct kvm_pmc *pmc) { + struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); u64 counter, reg, enabled, running; - struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc = &pmu->pmc[select_idx]; - - if (!kvm_vcpu_has_pmu(vcpu)) - return 0; - reg = counter_index_to_reg(select_idx); + reg = counter_index_to_reg(pmc->idx); counter = __vcpu_sys_reg(vcpu, reg); /* @@ -115,25 +105,35 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) counter += perf_event_read_value(pmc->perf_event, &enabled, &running); - if (!kvm_pmu_idx_is_64bit(vcpu, select_idx)) + if (!kvm_pmc_is_64bit(pmc)) counter = lower_32_bits(counter); return counter; } -static void kvm_pmu_set_counter(struct kvm_vcpu *vcpu, u64 select_idx, u64 val, - bool force) +/** + * kvm_pmu_get_counter_value - get PMU counter value + * @vcpu: The vcpu pointer + * @select_idx: The counter index + */ +u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) { - u64 reg; - if (!kvm_vcpu_has_pmu(vcpu)) - return; + return 0; - kvm_pmu_release_perf_event(&vcpu->arch.pmu.pmc[select_idx]); + return kvm_pmu_get_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, select_idx)); +} - reg = counter_index_to_reg(select_idx); +static void kvm_pmu_set_pmc_value(struct kvm_pmc *pmc, u64 val, bool force) +{ + struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); + u64 reg; - if (vcpu_mode_is_32bit(vcpu) && select_idx != ARMV8_PMU_CYCLE_IDX && + kvm_pmu_release_perf_event(pmc); + + reg = counter_index_to_reg(pmc->idx); + + if (vcpu_mode_is_32bit(vcpu) && pmc->idx != ARMV8_PMU_CYCLE_IDX && !force) { /* * Even with PMUv3p5, AArch32 cannot write to the top @@ -148,7 +148,7 @@ static void kvm_pmu_set_counter(struct kvm_vcpu *vcpu, u64 select_idx, u64 val, __vcpu_sys_reg(vcpu, reg) = val; /* Recreate the perf event to reflect the updated sample_period */ - kvm_pmu_create_perf_event(vcpu, select_idx); + kvm_pmu_create_perf_event(pmc); } /** @@ -159,7 +159,10 @@ static void kvm_pmu_set_counter(struct kvm_vcpu *vcpu, u64 select_idx, u64 val, */ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) { - kvm_pmu_set_counter(vcpu, select_idx, val, false); + if (!kvm_vcpu_has_pmu(vcpu)) + return; + + kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, select_idx), val, false); } /** @@ -181,14 +184,15 @@ static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) * * If this counter has been configured to monitor some event, release it here. */ -static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) +static void kvm_pmu_stop_counter(struct kvm_pmc *pmc) { + struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); u64 reg, val; if (!pmc->perf_event) return; - val = kvm_pmu_get_counter_value(vcpu, pmc->idx); + val = kvm_pmu_get_pmc_value(pmc); reg = counter_index_to_reg(pmc->idx); @@ -219,11 +223,10 @@ void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) { unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); - struct kvm_pmu *pmu = &vcpu->arch.pmu; int i; for_each_set_bit(i, &mask, 32) - kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]); + kvm_pmu_stop_counter(kvm_vcpu_idx_to_pmc(vcpu, i)); } /** @@ -234,10 +237,9 @@ void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) { int i; - struct kvm_pmu *pmu = &vcpu->arch.pmu; for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) - kvm_pmu_release_perf_event(&pmu->pmc[i]); + kvm_pmu_release_perf_event(kvm_vcpu_idx_to_pmc(vcpu, i)); irq_work_sync(&vcpu->arch.pmu.overflow_work); } @@ -262,9 +264,6 @@ u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) { int i; - struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc; - if (!kvm_vcpu_has_pmu(vcpu)) return; @@ -272,13 +271,15 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) return; for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { + struct kvm_pmc *pmc; + if (!(val & BIT(i))) continue; - pmc = &pmu->pmc[i]; + pmc = kvm_vcpu_idx_to_pmc(vcpu, i); if (!pmc->perf_event) { - kvm_pmu_create_perf_event(vcpu, i); + kvm_pmu_create_perf_event(pmc); } else { perf_event_enable(pmc->perf_event); if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE) @@ -297,17 +298,17 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) { int i; - struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc; if (!kvm_vcpu_has_pmu(vcpu) || !val) return; for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { + struct kvm_pmc *pmc; + if (!(val & BIT(i))) continue; - pmc = &pmu->pmc[i]; + pmc = kvm_vcpu_idx_to_pmc(vcpu, i); if (pmc->perf_event) perf_event_disable(pmc->perf_event); @@ -427,6 +428,7 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, mask &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); for_each_set_bit(i, &mask, ARMV8_PMU_CYCLE_IDX) { + struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, i); u64 type, reg; /* Filter on event type */ @@ -437,30 +439,30 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, /* Increment this counter */ reg = __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) + 1; - if (!kvm_pmu_idx_is_64bit(vcpu, i)) + if (!kvm_pmc_is_64bit(pmc)) reg = lower_32_bits(reg); __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) = reg; /* No overflow? move on */ - if (kvm_pmu_idx_has_64bit_overflow(vcpu, i) ? reg : lower_32_bits(reg)) + if (kvm_pmc_has_64bit_overflow(pmc) ? reg : lower_32_bits(reg)) continue; /* Mark overflow */ __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); - if (kvm_pmu_counter_can_chain(vcpu, i)) + if (kvm_pmu_counter_can_chain(pmc)) kvm_pmu_counter_increment(vcpu, BIT(i + 1), ARMV8_PMUV3_PERFCTR_CHAIN); } } /* Compute the sample period for a given counter value */ -static u64 compute_period(struct kvm_vcpu *vcpu, u64 select_idx, u64 counter) +static u64 compute_period(struct kvm_pmc *pmc, u64 counter) { u64 val; - if (kvm_pmu_idx_is_64bit(vcpu, select_idx)) { - if (!kvm_pmu_idx_has_64bit_overflow(vcpu, select_idx)) + if (kvm_pmc_is_64bit(pmc)) { + if (!kvm_pmc_has_64bit_overflow(pmc)) val = -(counter & GENMASK(31, 0)); else val = (-counter) & GENMASK(63, 0); @@ -490,7 +492,7 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, * Reset the sample period to the architectural limit, * i.e. the point where the counter overflows. */ - period = compute_period(vcpu, idx, local64_read(&perf_event->count)); + period = compute_period(pmc, local64_read(&perf_event->count)); local64_set(&perf_event->hw.period_left, 0); perf_event->attr.sample_period = period; @@ -498,7 +500,7 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); - if (kvm_pmu_counter_can_chain(vcpu, idx)) + if (kvm_pmu_counter_can_chain(pmc)) kvm_pmu_counter_increment(vcpu, BIT(idx + 1), ARMV8_PMUV3_PERFCTR_CHAIN); @@ -551,34 +553,33 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); mask &= ~BIT(ARMV8_PMU_CYCLE_IDX); for_each_set_bit(i, &mask, 32) - kvm_pmu_set_counter(vcpu, i, 0, true); + kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, i), 0, true); } } -static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx) +static bool kvm_pmu_counter_is_enabled(struct kvm_pmc *pmc) { + struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) && - (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx)); + (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(pmc->idx)); } /** * kvm_pmu_create_perf_event - create a perf event for a counter - * @vcpu: The vcpu pointer - * @select_idx: The number of selected counter + * @pmc: Counter context */ -static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) +static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc) { + struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu; - struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc = &pmu->pmc[select_idx]; struct perf_event *event; struct perf_event_attr attr; - u64 eventsel, counter, reg, data; + u64 eventsel, reg, data; - reg = counter_index_to_evtreg(select_idx); + reg = counter_index_to_evtreg(pmc->idx); data = __vcpu_sys_reg(vcpu, reg); - kvm_pmu_stop_counter(vcpu, pmc); + kvm_pmu_stop_counter(pmc); if (pmc->idx == ARMV8_PMU_CYCLE_IDX) eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; else @@ -604,24 +605,22 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) attr.type = arm_pmu->pmu.type; attr.size = sizeof(attr); attr.pinned = 1; - attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx); + attr.disabled = !kvm_pmu_counter_is_enabled(pmc); attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0; attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0; attr.exclude_hv = 1; /* Don't count EL2 events */ attr.exclude_host = 1; /* Don't count host events */ attr.config = eventsel; - counter = kvm_pmu_get_counter_value(vcpu, select_idx); - /* * If counting with a 64bit counter, advertise it to the perf * code, carefully dealing with the initial sample period * which also depends on the overflow. */ - if (kvm_pmu_idx_is_64bit(vcpu, select_idx)) + if (kvm_pmc_is_64bit(pmc)) attr.config1 |= PERF_ATTR_CFG1_COUNTER_64BIT; - attr.sample_period = compute_period(vcpu, select_idx, counter); + attr.sample_period = compute_period(pmc, kvm_pmu_get_pmc_value(pmc)); event = perf_event_create_kernel_counter(&attr, -1, current, kvm_pmu_perf_overflow, pmc); @@ -648,6 +647,7 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, u64 select_idx) { + struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, select_idx); u64 reg, mask; if (!kvm_vcpu_has_pmu(vcpu)) @@ -657,11 +657,11 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, mask &= ~ARMV8_PMU_EVTYPE_EVENT; mask |= kvm_pmu_event_mask(vcpu->kvm); - reg = counter_index_to_evtreg(select_idx); + reg = counter_index_to_evtreg(pmc->idx); __vcpu_sys_reg(vcpu, reg) = data & mask; - kvm_pmu_create_perf_event(vcpu, select_idx); + kvm_pmu_create_perf_event(pmc); } void kvm_host_pmu_init(struct arm_pmu *pmu) -- cgit v1.2.3 From 3a5154c723ba5ceb9ce374a7307e03263c03fd29 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 18 Nov 2022 18:22:20 +0000 Subject: KVM: arm64: Take a pointer to walker data in kvm_dereference_pteref() Rather than passing through the state of the KVM_PGTABLE_WALK_SHARED flag, just take a pointer to the whole walker structure instead. Move around struct kvm_pgtable and the RCU indirection such that the associated ifdeffery remains in one place while ensuring the walker + flags definitions precede their use. No functional change intended. Signed-off-by: Oliver Upton Acked-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221118182222.3932898-2-oliver.upton@linux.dev --- arch/arm64/include/asm/kvm_pgtable.h | 144 ++++++++++++++++++----------------- arch/arm64/kvm/hyp/pgtable.c | 6 +- 2 files changed, 76 insertions(+), 74 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index a874ce0ce7b5..f23af693e3c5 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -37,54 +37,6 @@ static inline u64 kvm_get_parange(u64 mmfr0) typedef u64 kvm_pte_t; -/* - * RCU cannot be used in a non-kernel context such as the hyp. As such, page - * table walkers used in hyp do not call into RCU and instead use other - * synchronization mechanisms (such as a spinlock). - */ -#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__) - -typedef kvm_pte_t *kvm_pteref_t; - -static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared) -{ - return pteref; -} - -static inline void kvm_pgtable_walk_begin(void) {} -static inline void kvm_pgtable_walk_end(void) {} - -static inline bool kvm_pgtable_walk_lock_held(void) -{ - return true; -} - -#else - -typedef kvm_pte_t __rcu *kvm_pteref_t; - -static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared) -{ - return rcu_dereference_check(pteref, !shared); -} - -static inline void kvm_pgtable_walk_begin(void) -{ - rcu_read_lock(); -} - -static inline void kvm_pgtable_walk_end(void) -{ - rcu_read_unlock(); -} - -static inline bool kvm_pgtable_walk_lock_held(void) -{ - return rcu_read_lock_held(); -} - -#endif - #define KVM_PTE_VALID BIT(0) #define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT) @@ -212,29 +164,6 @@ enum kvm_pgtable_prot { typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end, enum kvm_pgtable_prot prot); -/** - * struct kvm_pgtable - KVM page-table. - * @ia_bits: Maximum input address size, in bits. - * @start_level: Level at which the page-table walk starts. - * @pgd: Pointer to the first top-level entry of the page-table. - * @mm_ops: Memory management callbacks. - * @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables. - * @flags: Stage-2 page-table flags. - * @force_pte_cb: Function that returns true if page level mappings must - * be used instead of block mappings. - */ -struct kvm_pgtable { - u32 ia_bits; - u32 start_level; - kvm_pteref_t pgd; - struct kvm_pgtable_mm_ops *mm_ops; - - /* Stage-2 only */ - struct kvm_s2_mmu *mmu; - enum kvm_pgtable_stage2_flags flags; - kvm_pgtable_force_pte_cb_t force_pte_cb; -}; - /** * enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table walk. * @KVM_PGTABLE_WALK_LEAF: Visit leaf entries, including invalid @@ -285,6 +214,79 @@ struct kvm_pgtable_walker { const enum kvm_pgtable_walk_flags flags; }; +/* + * RCU cannot be used in a non-kernel context such as the hyp. As such, page + * table walkers used in hyp do not call into RCU and instead use other + * synchronization mechanisms (such as a spinlock). + */ +#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__) + +typedef kvm_pte_t *kvm_pteref_t; + +static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walker, + kvm_pteref_t pteref) +{ + return pteref; +} + +static inline void kvm_pgtable_walk_begin(void) {} +static inline void kvm_pgtable_walk_end(void) {} + +static inline bool kvm_pgtable_walk_lock_held(void) +{ + return true; +} + +#else + +typedef kvm_pte_t __rcu *kvm_pteref_t; + +static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walker, + kvm_pteref_t pteref) +{ + return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED)); +} + +static inline void kvm_pgtable_walk_begin(void) +{ + rcu_read_lock(); +} + +static inline void kvm_pgtable_walk_end(void) +{ + rcu_read_unlock(); +} + +static inline bool kvm_pgtable_walk_lock_held(void) +{ + return rcu_read_lock_held(); +} + +#endif + +/** + * struct kvm_pgtable - KVM page-table. + * @ia_bits: Maximum input address size, in bits. + * @start_level: Level at which the page-table walk starts. + * @pgd: Pointer to the first top-level entry of the page-table. + * @mm_ops: Memory management callbacks. + * @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables. + * @flags: Stage-2 page-table flags. + * @force_pte_cb: Function that returns true if page level mappings must + * be used instead of block mappings. + */ +struct kvm_pgtable { + u32 ia_bits; + u32 start_level; + kvm_pteref_t pgd; + struct kvm_pgtable_mm_ops *mm_ops; + + /* Stage-2 only */ + struct kvm_s2_mmu *mmu; + enum kvm_pgtable_stage2_flags flags; + kvm_pgtable_force_pte_cb_t force_pte_cb; +}; + /** * kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table. * @pgt: Uninitialised page-table structure to initialise. diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 5bca9610d040..b5b91a882836 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -188,7 +188,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, kvm_pteref_t pteref, u32 level) { enum kvm_pgtable_walk_flags flags = data->walker->flags; - kvm_pte_t *ptep = kvm_dereference_pteref(pteref, flags & KVM_PGTABLE_WALK_SHARED); + kvm_pte_t *ptep = kvm_dereference_pteref(data->walker, pteref); struct kvm_pgtable_visit_ctx ctx = { .ptep = ptep, .old = READ_ONCE(*ptep), @@ -558,7 +558,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt) }; WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); - pgt->mm_ops->put_page(kvm_dereference_pteref(pgt->pgd, false)); + pgt->mm_ops->put_page(kvm_dereference_pteref(&walker, pgt->pgd)); pgt->pgd = NULL; } @@ -1241,7 +1241,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE; - pgt->mm_ops->free_pages_exact(kvm_dereference_pteref(pgt->pgd, false), pgd_sz); + pgt->mm_ops->free_pages_exact(kvm_dereference_pteref(&walker, pgt->pgd), pgd_sz); pgt->pgd = NULL; } -- cgit v1.2.3 From b7833bf202e3068abb77c642a0843f696e9c8d38 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 18 Nov 2022 18:22:21 +0000 Subject: KVM: arm64: Don't acquire RCU read lock for exclusive table walks Marek reported a BUG resulting from the recent parallel faults changes, as the hyp stage-1 map walker attempted to allocate table memory while holding the RCU read lock: BUG: sleeping function called from invalid context at include/linux/sched/mm.h:274 in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 1, name: swapper/0 preempt_count: 0, expected: 0 RCU nest depth: 1, expected: 0 2 locks held by swapper/0/1: #0: ffff80000a8a44d0 (kvm_hyp_pgd_mutex){+.+.}-{3:3}, at: __create_hyp_mappings+0x80/0xc4 #1: ffff80000a927720 (rcu_read_lock){....}-{1:2}, at: kvm_pgtable_walk+0x0/0x1f4 CPU: 2 PID: 1 Comm: swapper/0 Not tainted 6.1.0-rc3+ #5918 Hardware name: Raspberry Pi 3 Model B (DT) Call trace: dump_backtrace.part.0+0xe4/0xf0 show_stack+0x18/0x40 dump_stack_lvl+0x8c/0xb8 dump_stack+0x18/0x34 __might_resched+0x178/0x220 __might_sleep+0x48/0xa0 prepare_alloc_pages+0x178/0x1a0 __alloc_pages+0x9c/0x109c alloc_page_interleave+0x1c/0xc4 alloc_pages+0xec/0x160 get_zeroed_page+0x1c/0x44 kvm_hyp_zalloc_page+0x14/0x20 hyp_map_walker+0xd4/0x134 kvm_pgtable_visitor_cb.isra.0+0x38/0x5c __kvm_pgtable_walk+0x1a4/0x220 kvm_pgtable_walk+0x104/0x1f4 kvm_pgtable_hyp_map+0x80/0xc4 __create_hyp_mappings+0x9c/0xc4 kvm_mmu_init+0x144/0x1cc kvm_arch_init+0xe4/0xef4 kvm_init+0x3c/0x3d0 arm_init+0x20/0x30 do_one_initcall+0x74/0x400 kernel_init_freeable+0x2e0/0x350 kernel_init+0x24/0x130 ret_from_fork+0x10/0x20 Since the hyp stage-1 table walkers are serialized by kvm_hyp_pgd_mutex, RCU protection really doesn't add anything. Don't acquire the RCU read lock for an exclusive walk. Reported-by: Marek Szyprowski Signed-off-by: Oliver Upton Acked-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221118182222.3932898-3-oliver.upton@linux.dev --- arch/arm64/include/asm/kvm_pgtable.h | 14 ++++++++------ arch/arm64/kvm/hyp/pgtable.c | 4 ++-- 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index f23af693e3c5..4b6b52ebc11c 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -229,8 +229,8 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke return pteref; } -static inline void kvm_pgtable_walk_begin(void) {} -static inline void kvm_pgtable_walk_end(void) {} +static inline void kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) {} +static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker) {} static inline bool kvm_pgtable_walk_lock_held(void) { @@ -247,14 +247,16 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED)); } -static inline void kvm_pgtable_walk_begin(void) +static inline void kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) { - rcu_read_lock(); + if (walker->flags & KVM_PGTABLE_WALK_SHARED) + rcu_read_lock(); } -static inline void kvm_pgtable_walk_end(void) +static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker) { - rcu_read_unlock(); + if (walker->flags & KVM_PGTABLE_WALK_SHARED) + rcu_read_unlock(); } static inline bool kvm_pgtable_walk_lock_held(void) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index b5b91a882836..d6f3753cb87e 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -289,9 +289,9 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, }; int r; - kvm_pgtable_walk_begin(); + kvm_pgtable_walk_begin(walker); r = _kvm_pgtable_walk(pgt, &walk_data); - kvm_pgtable_walk_end(); + kvm_pgtable_walk_end(walker); return r; } -- cgit v1.2.3 From 5e806c5812e8012a83496cf96bdba266b3aec428 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 18 Nov 2022 18:22:22 +0000 Subject: KVM: arm64: Reject shared table walks in the hyp code Exclusive table walks are the only supported table walk in the hyp, as there is no construct like RCU available in the hypervisor code. Reject any attempt to do a shared table walk by returning an error and allowing the caller to clean up the mess. Suggested-by: Will Deacon Signed-off-by: Oliver Upton Acked-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221118182222.3932898-4-oliver.upton@linux.dev --- arch/arm64/include/asm/kvm_pgtable.h | 17 +++++++++++++++-- arch/arm64/kvm/hyp/pgtable.c | 5 ++++- 2 files changed, 19 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 4b6b52ebc11c..d5cb01f8dc06 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -229,7 +229,18 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke return pteref; } -static inline void kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) {} +static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) +{ + /* + * Due to the lack of RCU (or a similar protection scheme), only + * non-shared table walkers are allowed in the hypervisor. + */ + if (walker->flags & KVM_PGTABLE_WALK_SHARED) + return -EPERM; + + return 0; +} + static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker) {} static inline bool kvm_pgtable_walk_lock_held(void) @@ -247,10 +258,12 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED)); } -static inline void kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) +static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) { if (walker->flags & KVM_PGTABLE_WALK_SHARED) rcu_read_lock(); + + return 0; } static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index d6f3753cb87e..58dbe0ab567f 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -289,7 +289,10 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, }; int r; - kvm_pgtable_walk_begin(walker); + r = kvm_pgtable_walk_begin(walker); + if (r) + return r; + r = _kvm_pgtable_walk(pgt, &walk_data); kvm_pgtable_walk_end(walker); -- cgit v1.2.3 From 86815735aa571d493cf5768cad5fa8e6fd9c7ba8 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 28 Nov 2022 19:26:29 +0530 Subject: KVM: arm64: PMU: Replace version number '0' with ID_AA64DFR0_EL1_PMUVer_NI kvm_host_pmu_init() returns when detected PMU is either not implemented, or implementation defined. kvm_pmu_probe_armpmu() also has a similar situation. Extracted ID_AA64DFR0_EL1_PMUVer value, when PMU is not implemented is '0', which can be replaced with ID_AA64DFR0_EL1_PMUVer_NI defined as '0b0000'. Cc: Arnaldo Carvalho de Melo Cc: Marc Zyngier Cc: Mark Rutland Cc: Will Deacon Cc: Catalin Marinas Cc: linux-perf-users@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Anshuman Khandual Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221128135629.118346-1-anshuman.khandual@arm.com --- arch/arm64/kvm/pmu-emul.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 3295dea34f4c..bb7251e670a9 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -668,7 +668,8 @@ void kvm_host_pmu_init(struct arm_pmu *pmu) { struct arm_pmu_entry *entry; - if (pmu->pmuver == 0 || pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) + if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI || + pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) return; mutex_lock(&arm_pmus_lock); @@ -721,7 +722,7 @@ static struct arm_pmu *kvm_pmu_probe_armpmu(void) if (event->pmu) { pmu = to_arm_pmu(event->pmu); - if (pmu->pmuver == 0 || + if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI || pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) pmu = NULL; } -- cgit v1.2.3 From 292e8f1494764ac46dd1b7dd46fa317db691436c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Nov 2022 10:40:02 +0000 Subject: KVM: arm64: PMU: Simplify PMCR_EL0 reset handling Resetting PMCR_EL0 is a pretty involved process that includes poisoning some of the writable bits, just because we can. It makes it hard to reason about about what gets configured, and just resetting things to 0 seems like a much saner option. Reduce reset_pmcr() to just preserving PMCR_EL0.N from the host, and setting PMCR_EL0.LC if we don't support AArch32. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 67eac0f747be..eb56ad031116 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -639,24 +639,18 @@ static void reset_pmselr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { - u64 pmcr, val; + u64 pmcr; /* No PMU available, PMCR_EL0 may UNDEF... */ if (!kvm_arm_support_pmu_v3()) return; - pmcr = read_sysreg(pmcr_el0); - /* - * Writable bits of PMCR_EL0 (ARMV8_PMU_PMCR_MASK) are reset to UNKNOWN - * except PMCR.E resetting to zero. - */ - val = ((pmcr & ~ARMV8_PMU_PMCR_MASK) - | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E); + /* Only preserve PMCR_EL0.N, and reset the rest to 0 */ + pmcr = read_sysreg(pmcr_el0) & ARMV8_PMU_PMCR_N_MASK; if (!kvm_supports_32bit_el0()) - val |= ARMV8_PMU_PMCR_LC; - if (!kvm_pmu_is_3p5(vcpu)) - val &= ~ARMV8_PMU_PMCR_LP; - __vcpu_sys_reg(vcpu, r->reg) = val; + pmcr |= ARMV8_PMU_PMCR_LC; + + __vcpu_sys_reg(vcpu, r->reg) = pmcr; } static bool check_pmu_access_disabled(struct kvm_vcpu *vcpu, u64 flags) -- cgit v1.2.3 From 64d6820d64c0a206e744bd8945374d563a76c16c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Nov 2022 10:44:59 +0000 Subject: KVM: arm64: PMU: Sanitise PMCR_EL0.LP on first vcpu run Userspace can play some dirty tricks on us by selecting a given PMU version (such as PMUv3p5), restore a PMCR_EL0 value that has PMCR_EL0.LP set, and then switch the PMU version to PMUv3p1, for example. In this situation, we end-up with PMCR_EL0.LP being set and spreading havoc in the PMU emulation. This is specially hard as the first two step can be done on one vcpu and the third step on another, meaning that we need to sanitise *all* vcpus when the PMU version is changed. In orer to avoid a pretty complicated locking situation, defer the sanitisation of PMCR_EL0 to the point where the vcpu is actually run for the first tine, using the existing KVM_REQ_RELOAD_PMU request that calls into kvm_pmu_handle_pmcr(). There is still an obscure corner case where userspace could do the above trick, and then save the VM without running it. They would then observe an inconsistent state (PMUv3.1 + LP set), but that state will be fixed on the first run anyway whenever the guest gets restored on a host. Reported-by: Reiji Watanabe Signed-off-by: Marc Zyngier --- arch/arm64/kvm/pmu-emul.c | 6 ++++++ arch/arm64/kvm/sys_regs.c | 8 ++++---- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index bb7251e670a9..d8ea39943086 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -538,6 +538,12 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) if (!kvm_vcpu_has_pmu(vcpu)) return; + /* Fixup PMCR_EL0 to reconcile the PMU version and the LP bit */ + if (!kvm_pmu_is_3p5(vcpu)) + val &= ~ARMV8_PMU_PMCR_LP; + + __vcpu_sys_reg(vcpu, PMCR_EL0) = val; + if (val & ARMV8_PMU_PMCR_E) { kvm_pmu_enable_counter_mask(vcpu, __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index eb56ad031116..528d253c571a 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -693,15 +693,15 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return false; if (p->is_write) { - /* Only update writeable bits of PMCR */ + /* + * Only update writeable bits of PMCR (continuing into + * kvm_pmu_handle_pmcr() as well) + */ val = __vcpu_sys_reg(vcpu, PMCR_EL0); val &= ~ARMV8_PMU_PMCR_MASK; val |= p->regval & ARMV8_PMU_PMCR_MASK; if (!kvm_supports_32bit_el0()) val |= ARMV8_PMU_PMCR_LC; - if (!kvm_pmu_is_3p5(vcpu)) - val &= ~ARMV8_PMU_PMCR_LP; - __vcpu_sys_reg(vcpu, PMCR_EL0) = val; kvm_pmu_handle_pmcr(vcpu, val); kvm_vcpu_pmu_restore_guest(vcpu); } else { -- cgit v1.2.3 From b0284cd29a957e62d60c2886fd663be93c56f9c0 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 3 Nov 2022 18:10:34 -0700 Subject: mm: Do not enable PG_arch_2 for all 64-bit architectures Commit 4beba9486abd ("mm: Add PG_arch_2 page flag") introduced a new page flag for all 64-bit architectures. However, even if an architecture is 64-bit, it may still have limited spare bits in the 'flags' member of 'struct page'. This may happen if an architecture enables SPARSEMEM without SPARSEMEM_VMEMMAP as is the case with the newly added loongarch. This architecture port needs 19 more bits for the sparsemem section information and, while it is currently fine with PG_arch_2, adding any more PG_arch_* flags will trigger build-time warnings. Add a new CONFIG_ARCH_USES_PG_ARCH_X option which can be selected by architectures that need more PG_arch_* flags beyond PG_arch_1. Select it on arm64. Signed-off-by: Catalin Marinas [pcc@google.com: fix build with CONFIG_ARM64_MTE disabled] Signed-off-by: Peter Collingbourne Reported-by: kernel test robot Cc: Andrew Morton Cc: Steven Price Reviewed-by: Steven Price Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221104011041.290951-2-pcc@google.com --- arch/arm64/Kconfig | 1 + fs/proc/page.c | 2 +- include/linux/page-flags.h | 2 +- include/trace/events/mmflags.h | 8 ++++---- mm/Kconfig | 8 ++++++++ mm/huge_memory.c | 2 +- 6 files changed, 16 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 505c8a1ccbe0..cd93d0738425 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1965,6 +1965,7 @@ config ARM64_MTE depends on ARM64_PAN select ARCH_HAS_SUBPAGE_FAULTS select ARCH_USES_HIGH_VMA_FLAGS + select ARCH_USES_PG_ARCH_X help Memory Tagging (part of the ARMv8.5 Extensions) provides architectural support for run-time, always-on detection of diff --git a/fs/proc/page.c b/fs/proc/page.c index f2273b164535..882525c8e94c 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -219,7 +219,7 @@ u64 stable_page_flags(struct page *page) u |= kpf_copy_bit(k, KPF_PRIVATE_2, PG_private_2); u |= kpf_copy_bit(k, KPF_OWNER_PRIVATE, PG_owner_priv_1); u |= kpf_copy_bit(k, KPF_ARCH, PG_arch_1); -#ifdef CONFIG_64BIT +#ifdef CONFIG_ARCH_USES_PG_ARCH_X u |= kpf_copy_bit(k, KPF_ARCH_2, PG_arch_2); #endif diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 0b0ae5084e60..5dc7977edf9d 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -132,7 +132,7 @@ enum pageflags { PG_young, PG_idle, #endif -#ifdef CONFIG_64BIT +#ifdef CONFIG_ARCH_USES_PG_ARCH_X PG_arch_2, #endif #ifdef CONFIG_KASAN_HW_TAGS diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index e87cb2b80ed3..d9f6d35fb150 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -91,10 +91,10 @@ #define IF_HAVE_PG_IDLE(flag,string) #endif -#ifdef CONFIG_64BIT -#define IF_HAVE_PG_ARCH_2(flag,string) ,{1UL << flag, string} +#ifdef CONFIG_ARCH_USES_PG_ARCH_X +#define IF_HAVE_PG_ARCH_X(flag,string) ,{1UL << flag, string} #else -#define IF_HAVE_PG_ARCH_2(flag,string) +#define IF_HAVE_PG_ARCH_X(flag,string) #endif #ifdef CONFIG_KASAN_HW_TAGS @@ -130,7 +130,7 @@ IF_HAVE_PG_UNCACHED(PG_uncached, "uncached" ) \ IF_HAVE_PG_HWPOISON(PG_hwpoison, "hwpoison" ) \ IF_HAVE_PG_IDLE(PG_young, "young" ) \ IF_HAVE_PG_IDLE(PG_idle, "idle" ) \ -IF_HAVE_PG_ARCH_2(PG_arch_2, "arch_2" ) \ +IF_HAVE_PG_ARCH_X(PG_arch_2, "arch_2" ) \ IF_HAVE_PG_SKIP_KASAN_POISON(PG_skip_kasan_poison, "skip_kasan_poison") #define show_page_flags(flags) \ diff --git a/mm/Kconfig b/mm/Kconfig index 57e1d8c5b505..807bd7192f51 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1005,6 +1005,14 @@ config ARCH_USES_HIGH_VMA_FLAGS config ARCH_HAS_PKEYS bool +config ARCH_USES_PG_ARCH_X + bool + help + Enable the definition of PG_arch_x page flags with x > 1. Only + suitable for 64-bit architectures with CONFIG_FLATMEM or + CONFIG_SPARSEMEM_VMEMMAP enabled, otherwise there may not be + enough room for additional bits in page->flags. + config VM_EVENT_COUNTERS default y bool "Enable VM event counters for /proc/vmstat" if EXPERT diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 561a42567477..76e7b973919c 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2444,7 +2444,7 @@ static void __split_huge_page_tail(struct page *head, int tail, (1L << PG_workingset) | (1L << PG_locked) | (1L << PG_unevictable) | -#ifdef CONFIG_64BIT +#ifdef CONFIG_ARCH_USES_PG_ARCH_X (1L << PG_arch_2) | #endif (1L << PG_dirty) | -- cgit v1.2.3 From e059853d14ca4ed0f6a190d7109487918a22a976 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 3 Nov 2022 18:10:35 -0700 Subject: arm64: mte: Fix/clarify the PG_mte_tagged semantics Currently the PG_mte_tagged page flag mostly means the page contains valid tags and it should be set after the tags have been cleared or restored. However, in mte_sync_tags() it is set before setting the tags to avoid, in theory, a race with concurrent mprotect(PROT_MTE) for shared pages. However, a concurrent mprotect(PROT_MTE) with a copy on write in another thread can cause the new page to have stale tags. Similarly, tag reading via ptrace() can read stale tags if the PG_mte_tagged flag is set before actually clearing/restoring the tags. Fix the PG_mte_tagged semantics so that it is only set after the tags have been cleared or restored. This is safe for swap restoring into a MAP_SHARED or CoW page since the core code takes the page lock. Add two functions to test and set the PG_mte_tagged flag with acquire and release semantics. The downside is that concurrent mprotect(PROT_MTE) on a MAP_SHARED page may cause tag loss. This is already the case for KVM guests if a VMM changes the page protection while the guest triggers a user_mem_abort(). Signed-off-by: Catalin Marinas [pcc@google.com: fix build with CONFIG_ARM64_MTE disabled] Signed-off-by: Peter Collingbourne Reviewed-by: Cornelia Huck Reviewed-by: Steven Price Cc: Will Deacon Cc: Marc Zyngier Cc: Peter Collingbourne Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221104011041.290951-3-pcc@google.com --- arch/arm64/include/asm/mte.h | 30 ++++++++++++++++++++++++++++++ arch/arm64/include/asm/pgtable.h | 2 +- arch/arm64/kernel/cpufeature.c | 4 +++- arch/arm64/kernel/elfcore.c | 2 +- arch/arm64/kernel/hibernate.c | 2 +- arch/arm64/kernel/mte.c | 17 +++++++++++------ arch/arm64/kvm/guest.c | 4 ++-- arch/arm64/kvm/mmu.c | 4 ++-- arch/arm64/mm/copypage.c | 5 +++-- arch/arm64/mm/fault.c | 2 +- arch/arm64/mm/mteswap.c | 2 +- 11 files changed, 56 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index 760c62f8e22f..3f8199ba265a 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -37,6 +37,29 @@ void mte_free_tag_storage(char *storage); /* track which pages have valid allocation tags */ #define PG_mte_tagged PG_arch_2 +static inline void set_page_mte_tagged(struct page *page) +{ + /* + * Ensure that the tags written prior to this function are visible + * before the page flags update. + */ + smp_wmb(); + set_bit(PG_mte_tagged, &page->flags); +} + +static inline bool page_mte_tagged(struct page *page) +{ + bool ret = test_bit(PG_mte_tagged, &page->flags); + + /* + * If the page is tagged, ensure ordering with a likely subsequent + * read of the tags. + */ + if (ret) + smp_rmb(); + return ret; +} + void mte_zero_clear_page_tags(void *addr); void mte_sync_tags(pte_t old_pte, pte_t pte); void mte_copy_page_tags(void *kto, const void *kfrom); @@ -56,6 +79,13 @@ size_t mte_probe_user_range(const char __user *uaddr, size_t size); /* unused if !CONFIG_ARM64_MTE, silence the compiler */ #define PG_mte_tagged 0 +static inline void set_page_mte_tagged(struct page *page) +{ +} +static inline bool page_mte_tagged(struct page *page) +{ + return false; +} static inline void mte_zero_clear_page_tags(void *addr) { } diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 71a1af42f0e8..98b638441521 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -1050,7 +1050,7 @@ static inline void arch_swap_invalidate_area(int type) static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio) { if (system_supports_mte() && mte_restore_tags(entry, &folio->page)) - set_bit(PG_mte_tagged, &folio->flags); + set_page_mte_tagged(&folio->page); } #endif /* CONFIG_ARM64_MTE */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 6062454a9067..df11cfe61fcb 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2050,8 +2050,10 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) * Clear the tags in the zero page. This needs to be done via the * linear map which has the Tagged attribute. */ - if (!test_and_set_bit(PG_mte_tagged, &ZERO_PAGE(0)->flags)) + if (!page_mte_tagged(ZERO_PAGE(0))) { mte_clear_page_tags(lm_alias(empty_zero_page)); + set_page_mte_tagged(ZERO_PAGE(0)); + } kasan_init_hw_tags_cpu(); } diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c index 27ef7ad3ffd2..353009d7f307 100644 --- a/arch/arm64/kernel/elfcore.c +++ b/arch/arm64/kernel/elfcore.c @@ -47,7 +47,7 @@ static int mte_dump_tag_range(struct coredump_params *cprm, * Pages mapped in user space as !pte_access_permitted() (e.g. * PROT_EXEC only) may not have the PG_mte_tagged flag set. */ - if (!test_bit(PG_mte_tagged, &page->flags)) { + if (!page_mte_tagged(page)) { put_page(page); dump_skip(cprm, MTE_PAGE_TAG_STORAGE); continue; diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index af5df48ba915..788597a6b6a2 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -271,7 +271,7 @@ static int swsusp_mte_save_tags(void) if (!page) continue; - if (!test_bit(PG_mte_tagged, &page->flags)) + if (!page_mte_tagged(page)) continue; ret = save_tags(page, pfn); diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 7467217c1eaf..84a085d536f8 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -41,8 +41,10 @@ static void mte_sync_page_tags(struct page *page, pte_t old_pte, if (check_swap && is_swap_pte(old_pte)) { swp_entry_t entry = pte_to_swp_entry(old_pte); - if (!non_swap_entry(entry) && mte_restore_tags(entry, page)) + if (!non_swap_entry(entry) && mte_restore_tags(entry, page)) { + set_page_mte_tagged(page); return; + } } if (!pte_is_tagged) @@ -52,8 +54,10 @@ static void mte_sync_page_tags(struct page *page, pte_t old_pte, * Test PG_mte_tagged again in case it was racing with another * set_pte_at(). */ - if (!test_and_set_bit(PG_mte_tagged, &page->flags)) + if (!page_mte_tagged(page)) { mte_clear_page_tags(page_address(page)); + set_page_mte_tagged(page); + } } void mte_sync_tags(pte_t old_pte, pte_t pte) @@ -69,9 +73,11 @@ void mte_sync_tags(pte_t old_pte, pte_t pte) /* if PG_mte_tagged is set, tags have already been initialised */ for (i = 0; i < nr_pages; i++, page++) { - if (!test_bit(PG_mte_tagged, &page->flags)) + if (!page_mte_tagged(page)) { mte_sync_page_tags(page, old_pte, check_swap, pte_is_tagged); + set_page_mte_tagged(page); + } } /* ensure the tags are visible before the PTE is set */ @@ -96,8 +102,7 @@ int memcmp_pages(struct page *page1, struct page *page2) * pages is tagged, set_pte_at() may zero or change the tags of the * other page via mte_sync_tags(). */ - if (test_bit(PG_mte_tagged, &page1->flags) || - test_bit(PG_mte_tagged, &page2->flags)) + if (page_mte_tagged(page1) || page_mte_tagged(page2)) return addr1 != addr2; return ret; @@ -454,7 +459,7 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr, put_page(page); break; } - WARN_ON_ONCE(!test_bit(PG_mte_tagged, &page->flags)); + WARN_ON_ONCE(!page_mte_tagged(page)); /* limit access to the end of the page */ offset = offset_in_page(addr); diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 2ff13a3f8479..817fdd1ab778 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -1059,7 +1059,7 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, maddr = page_address(page); if (!write) { - if (test_bit(PG_mte_tagged, &page->flags)) + if (page_mte_tagged(page)) num_tags = mte_copy_tags_to_user(tags, maddr, MTE_GRANULES_PER_PAGE); else @@ -1076,7 +1076,7 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, * completed fully */ if (num_tags == MTE_GRANULES_PER_PAGE) - set_bit(PG_mte_tagged, &page->flags); + set_page_mte_tagged(page); kvm_release_pfn_dirty(pfn); } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 60ee3d9f01f8..2c3759f1f2c5 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1110,9 +1110,9 @@ static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn, return -EFAULT; for (i = 0; i < nr_pages; i++, page++) { - if (!test_bit(PG_mte_tagged, &page->flags)) { + if (!page_mte_tagged(page)) { mte_clear_page_tags(page_address(page)); - set_bit(PG_mte_tagged, &page->flags); + set_page_mte_tagged(page); } } diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c index 24913271e898..731d8a35701e 100644 --- a/arch/arm64/mm/copypage.c +++ b/arch/arm64/mm/copypage.c @@ -21,9 +21,10 @@ void copy_highpage(struct page *to, struct page *from) copy_page(kto, kfrom); - if (system_supports_mte() && test_bit(PG_mte_tagged, &from->flags)) { - set_bit(PG_mte_tagged, &to->flags); + if (system_supports_mte() && page_mte_tagged(from)) { + page_kasan_tag_reset(to); mte_copy_page_tags(kto, kfrom); + set_page_mte_tagged(to); } } EXPORT_SYMBOL(copy_highpage); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 5b391490e045..629e886ceec4 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -934,5 +934,5 @@ struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma, void tag_clear_highpage(struct page *page) { mte_zero_clear_page_tags(page_address(page)); - set_bit(PG_mte_tagged, &page->flags); + set_page_mte_tagged(page); } diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c index bed803d8e158..70f913205db9 100644 --- a/arch/arm64/mm/mteswap.c +++ b/arch/arm64/mm/mteswap.c @@ -24,7 +24,7 @@ int mte_save_tags(struct page *page) { void *tag_storage, *ret; - if (!test_bit(PG_mte_tagged, &page->flags)) + if (!page_mte_tagged(page)) return 0; tag_storage = mte_allocate_tag_storage(); -- cgit v1.2.3 From 2dbf12ae132cc78048615cfa19c9be64baaf0ced Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 3 Nov 2022 18:10:36 -0700 Subject: KVM: arm64: Simplify the sanitise_mte_tags() logic Currently sanitise_mte_tags() checks if it's an online page before attempting to sanitise the tags. Such detection should be done in the caller via the VM_MTE_ALLOWED vma flag. Since kvm_set_spte_gfn() does not have the vma, leave the page unmapped if not already tagged. Tag initialisation will be done on a subsequent access fault in user_mem_abort(). Signed-off-by: Catalin Marinas [pcc@google.com: fix the page initializer] Signed-off-by: Peter Collingbourne Reviewed-by: Steven Price Cc: Will Deacon Cc: Marc Zyngier Cc: Peter Collingbourne Reviewed-by: Cornelia Huck Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221104011041.290951-4-pcc@google.com --- arch/arm64/kvm/mmu.c | 40 +++++++++++++++------------------------- 1 file changed, 15 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 2c3759f1f2c5..e81bfb730629 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1091,23 +1091,14 @@ static int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva) * - mmap_lock protects between a VM faulting a page in and the VMM performing * an mprotect() to add VM_MTE */ -static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn, - unsigned long size) +static void sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn, + unsigned long size) { unsigned long i, nr_pages = size >> PAGE_SHIFT; - struct page *page; + struct page *page = pfn_to_page(pfn); if (!kvm_has_mte(kvm)) - return 0; - - /* - * pfn_to_online_page() is used to reject ZONE_DEVICE pages - * that may not support tags. - */ - page = pfn_to_online_page(pfn); - - if (!page) - return -EFAULT; + return; for (i = 0; i < nr_pages; i++, page++) { if (!page_mte_tagged(page)) { @@ -1115,8 +1106,6 @@ static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn, set_page_mte_tagged(page); } } - - return 0; } static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, @@ -1127,7 +1116,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, bool write_fault, writable, force_pte = false; bool exec_fault; bool device = false; - bool shared; unsigned long mmu_seq; struct kvm *kvm = vcpu->kvm; struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; @@ -1177,8 +1165,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, vma_shift = get_vma_page_shift(vma, hva); } - shared = (vma->vm_flags & VM_SHARED); - switch (vma_shift) { #ifndef __PAGETABLE_PMD_FOLDED case PUD_SHIFT: @@ -1299,12 +1285,13 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) { /* Check the VMM hasn't introduced a new VM_SHARED VMA */ - if (!shared) - ret = sanitise_mte_tags(kvm, pfn, vma_pagesize); - else + if ((vma->vm_flags & VM_MTE_ALLOWED) && + !(vma->vm_flags & VM_SHARED)) { + sanitise_mte_tags(kvm, pfn, vma_pagesize); + } else { ret = -EFAULT; - if (ret) goto out_unlock; + } } if (writable) @@ -1526,15 +1513,18 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { kvm_pfn_t pfn = pte_pfn(range->pte); - int ret; if (!kvm->arch.mmu.pgt) return false; WARN_ON(range->end - range->start != 1); - ret = sanitise_mte_tags(kvm, pfn, PAGE_SIZE); - if (ret) + /* + * If the page isn't tagged, defer to user_mem_abort() for sanitising + * the MTE tags. The S2 pte should have been unmapped by + * mmu_notifier_invalidate_range_end(). + */ + if (kvm_has_mte(kvm) && !page_mte_tagged(pfn_to_page(pfn))) return false; /* -- cgit v1.2.3 From d77e59a8fccde7fb5dd8c57594ed147b4291c970 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 3 Nov 2022 18:10:38 -0700 Subject: arm64: mte: Lock a page for MTE tag initialisation Initialising the tags and setting PG_mte_tagged flag for a page can race between multiple set_pte_at() on shared pages or setting the stage 2 pte via user_mem_abort(). Introduce a new PG_mte_lock flag as PG_arch_3 and set it before attempting page initialisation. Given that PG_mte_tagged is never cleared for a page, consider setting this flag to mean page unlocked and wait on this bit with acquire semantics if the page is locked: - try_page_mte_tagging() - lock the page for tagging, return true if it can be tagged, false if already tagged. No acquire semantics if it returns true (PG_mte_tagged not set) as there is no serialisation with a previous set_page_mte_tagged(). - set_page_mte_tagged() - set PG_mte_tagged with release semantics. The two-bit locking is based on Peter Collingbourne's idea. Signed-off-by: Catalin Marinas Signed-off-by: Peter Collingbourne Reviewed-by: Steven Price Cc: Will Deacon Cc: Marc Zyngier Cc: Peter Collingbourne Reviewed-by: Cornelia Huck Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221104011041.290951-6-pcc@google.com --- arch/arm64/include/asm/mte.h | 35 ++++++++++++++++++++++++++++++++++- arch/arm64/include/asm/pgtable.h | 4 ++-- arch/arm64/kernel/cpufeature.c | 2 +- arch/arm64/kernel/mte.c | 12 +++--------- arch/arm64/kvm/guest.c | 16 ++++++++++------ arch/arm64/kvm/mmu.c | 2 +- arch/arm64/mm/copypage.c | 2 ++ arch/arm64/mm/fault.c | 2 ++ arch/arm64/mm/mteswap.c | 14 +++++--------- 9 files changed, 60 insertions(+), 29 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index 3f8199ba265a..20dd06d70af5 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -25,7 +25,7 @@ unsigned long mte_copy_tags_to_user(void __user *to, void *from, unsigned long n); int mte_save_tags(struct page *page); void mte_save_page_tags(const void *page_addr, void *tag_storage); -bool mte_restore_tags(swp_entry_t entry, struct page *page); +void mte_restore_tags(swp_entry_t entry, struct page *page); void mte_restore_page_tags(void *page_addr, const void *tag_storage); void mte_invalidate_tags(int type, pgoff_t offset); void mte_invalidate_tags_area(int type); @@ -36,6 +36,8 @@ void mte_free_tag_storage(char *storage); /* track which pages have valid allocation tags */ #define PG_mte_tagged PG_arch_2 +/* simple lock to avoid multiple threads tagging the same page */ +#define PG_mte_lock PG_arch_3 static inline void set_page_mte_tagged(struct page *page) { @@ -60,6 +62,33 @@ static inline bool page_mte_tagged(struct page *page) return ret; } +/* + * Lock the page for tagging and return 'true' if the page can be tagged, + * 'false' if already tagged. PG_mte_tagged is never cleared and therefore the + * locking only happens once for page initialisation. + * + * The page MTE lock state: + * + * Locked: PG_mte_lock && !PG_mte_tagged + * Unlocked: !PG_mte_lock || PG_mte_tagged + * + * Acquire semantics only if the page is tagged (returning 'false'). + */ +static inline bool try_page_mte_tagging(struct page *page) +{ + if (!test_and_set_bit(PG_mte_lock, &page->flags)) + return true; + + /* + * The tags are either being initialised or may have been initialised + * already. Check if the PG_mte_tagged flag has been set or wait + * otherwise. + */ + smp_cond_load_acquire(&page->flags, VAL & (1UL << PG_mte_tagged)); + + return false; +} + void mte_zero_clear_page_tags(void *addr); void mte_sync_tags(pte_t old_pte, pte_t pte); void mte_copy_page_tags(void *kto, const void *kfrom); @@ -86,6 +115,10 @@ static inline bool page_mte_tagged(struct page *page) { return false; } +static inline bool try_page_mte_tagging(struct page *page) +{ + return false; +} static inline void mte_zero_clear_page_tags(void *addr) { } diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 98b638441521..8735ac1a1e32 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -1049,8 +1049,8 @@ static inline void arch_swap_invalidate_area(int type) #define __HAVE_ARCH_SWAP_RESTORE static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio) { - if (system_supports_mte() && mte_restore_tags(entry, &folio->page)) - set_page_mte_tagged(&folio->page); + if (system_supports_mte()) + mte_restore_tags(entry, &folio->page); } #endif /* CONFIG_ARM64_MTE */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index df11cfe61fcb..afb4ffd745c3 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2050,7 +2050,7 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) * Clear the tags in the zero page. This needs to be done via the * linear map which has the Tagged attribute. */ - if (!page_mte_tagged(ZERO_PAGE(0))) { + if (try_page_mte_tagging(ZERO_PAGE(0))) { mte_clear_page_tags(lm_alias(empty_zero_page)); set_page_mte_tagged(ZERO_PAGE(0)); } diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 84a085d536f8..f5bcb0dc6267 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -41,20 +41,14 @@ static void mte_sync_page_tags(struct page *page, pte_t old_pte, if (check_swap && is_swap_pte(old_pte)) { swp_entry_t entry = pte_to_swp_entry(old_pte); - if (!non_swap_entry(entry) && mte_restore_tags(entry, page)) { - set_page_mte_tagged(page); - return; - } + if (!non_swap_entry(entry)) + mte_restore_tags(entry, page); } if (!pte_is_tagged) return; - /* - * Test PG_mte_tagged again in case it was racing with another - * set_pte_at(). - */ - if (!page_mte_tagged(page)) { + if (try_page_mte_tagging(page)) { mte_clear_page_tags(page_address(page)); set_page_mte_tagged(page); } diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 817fdd1ab778..5626ddb540ce 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -1068,15 +1068,19 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, clear_user(tags, MTE_GRANULES_PER_PAGE); kvm_release_pfn_clean(pfn); } else { + /* + * Only locking to serialise with a concurrent + * set_pte_at() in the VMM but still overriding the + * tags, hence ignoring the return value. + */ + try_page_mte_tagging(page); num_tags = mte_copy_tags_from_user(maddr, tags, MTE_GRANULES_PER_PAGE); - /* - * Set the flag after checking the write - * completed fully - */ - if (num_tags == MTE_GRANULES_PER_PAGE) - set_page_mte_tagged(page); + /* uaccess failed, don't leave stale tags */ + if (num_tags != MTE_GRANULES_PER_PAGE) + mte_clear_page_tags(page); + set_page_mte_tagged(page); kvm_release_pfn_dirty(pfn); } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index e81bfb730629..fa2c85b93149 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1101,7 +1101,7 @@ static void sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn, return; for (i = 0; i < nr_pages; i++, page++) { - if (!page_mte_tagged(page)) { + if (try_page_mte_tagging(page)) { mte_clear_page_tags(page_address(page)); set_page_mte_tagged(page); } diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c index 731d8a35701e..8dd5a8fe64b4 100644 --- a/arch/arm64/mm/copypage.c +++ b/arch/arm64/mm/copypage.c @@ -23,6 +23,8 @@ void copy_highpage(struct page *to, struct page *from) if (system_supports_mte() && page_mte_tagged(from)) { page_kasan_tag_reset(to); + /* It's a new page, shouldn't have been tagged yet */ + WARN_ON_ONCE(!try_page_mte_tagging(to)); mte_copy_page_tags(kto, kfrom); set_page_mte_tagged(to); } diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 629e886ceec4..b8b299d1736a 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -933,6 +933,8 @@ struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma, void tag_clear_highpage(struct page *page) { + /* Newly allocated page, shouldn't have been tagged yet */ + WARN_ON_ONCE(!try_page_mte_tagging(page)); mte_zero_clear_page_tags(page_address(page)); set_page_mte_tagged(page); } diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c index 70f913205db9..cd508ba80ab1 100644 --- a/arch/arm64/mm/mteswap.c +++ b/arch/arm64/mm/mteswap.c @@ -46,21 +46,17 @@ int mte_save_tags(struct page *page) return 0; } -bool mte_restore_tags(swp_entry_t entry, struct page *page) +void mte_restore_tags(swp_entry_t entry, struct page *page) { void *tags = xa_load(&mte_pages, entry.val); if (!tags) - return false; + return; - /* - * Test PG_mte_tagged again in case it was racing with another - * set_pte_at(). - */ - if (!test_and_set_bit(PG_mte_tagged, &page->flags)) + if (try_page_mte_tagging(page)) { mte_restore_page_tags(page_address(page), tags); - - return true; + set_page_mte_tagged(page); + } } void mte_invalidate_tags(int type, pgoff_t offset) -- cgit v1.2.3 From d89585fbb30869011b326ef26c94c3137d228df9 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 3 Nov 2022 18:10:39 -0700 Subject: KVM: arm64: unify the tests for VMAs in memslots when MTE is enabled Previously we allowed creating a memslot containing a private mapping that was not VM_MTE_ALLOWED, but would later reject KVM_RUN with -EFAULT. Now we reject the memory region at memslot creation time. Since this is a minor tweak to the ABI (a VMM that created one of these memslots would fail later anyway), no VMM to my knowledge has MTE support yet, and the hardware with the necessary features is not generally available, we can probably make this ABI change at this point. Signed-off-by: Peter Collingbourne Reviewed-by: Catalin Marinas Reviewed-by: Steven Price Reviewed-by: Cornelia Huck Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221104011041.290951-7-pcc@google.com --- arch/arm64/kvm/mmu.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index fa2c85b93149..9ff9a271cf01 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1108,6 +1108,19 @@ static void sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn, } } +static bool kvm_vma_mte_allowed(struct vm_area_struct *vma) +{ + /* + * VM_SHARED mappings are not allowed with MTE to avoid races + * when updating the PG_mte_tagged page flag, see + * sanitise_mte_tags for more details. + */ + if (vma->vm_flags & VM_SHARED) + return false; + + return vma->vm_flags & VM_MTE_ALLOWED; +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_memory_slot *memslot, unsigned long hva, unsigned long fault_status) @@ -1284,9 +1297,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, } if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) { - /* Check the VMM hasn't introduced a new VM_SHARED VMA */ - if ((vma->vm_flags & VM_MTE_ALLOWED) && - !(vma->vm_flags & VM_SHARED)) { + /* Check the VMM hasn't introduced a new disallowed VMA */ + if (kvm_vma_mte_allowed(vma)) { sanitise_mte_tags(kvm, pfn, vma_pagesize); } else { ret = -EFAULT; @@ -1730,12 +1742,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (!vma) break; - /* - * VM_SHARED mappings are not allowed with MTE to avoid races - * when updating the PG_mte_tagged page flag, see - * sanitise_mte_tags for more details. - */ - if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED) { + if (kvm_has_mte(kvm) && !kvm_vma_mte_allowed(vma)) { ret = -EINVAL; break; } -- cgit v1.2.3 From c911f0d4687947915f04024aa01803247fcf7f1a Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 3 Nov 2022 18:10:40 -0700 Subject: KVM: arm64: permit all VM_MTE_ALLOWED mappings with MTE enabled Certain VMMs such as crosvm have features (e.g. sandboxing) that depend on being able to map guest memory as MAP_SHARED. The current restriction on sharing MAP_SHARED pages with the guest is preventing the use of those features with MTE. Now that the races between tasks concurrently clearing tags on the same page have been fixed, remove this restriction. Note that this is a relaxation of the ABI. Signed-off-by: Peter Collingbourne Reviewed-by: Catalin Marinas Reviewed-by: Steven Price Reviewed-by: Cornelia Huck Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221104011041.290951-8-pcc@google.com --- arch/arm64/kvm/mmu.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 9ff9a271cf01..b9402d8b5a90 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1110,14 +1110,6 @@ static void sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn, static bool kvm_vma_mte_allowed(struct vm_area_struct *vma) { - /* - * VM_SHARED mappings are not allowed with MTE to avoid races - * when updating the PG_mte_tagged page flag, see - * sanitise_mte_tags for more details. - */ - if (vma->vm_flags & VM_SHARED) - return false; - return vma->vm_flags & VM_MTE_ALLOWED; } -- cgit v1.2.3 From 37622bae3db306ee472acb02fa55c48f14ddd5a1 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:00 +0000 Subject: arm64/sysreg: Standardise naming for ID_MMFR0_EL1 To convert the 32bit id registers to use the sysreg generation, they must first have a regular pattern, to match the symbols the script generates. The scripts would like to follow exactly what is in the arm-arm, which uses lower case for some of these feature names. Ensure symbols for the ID_MMFR0_EL1 register have an _EL1 suffix, and use lower case in feature names where the arm-arm does the same. No functional change. Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-2-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 16 ++++++++-------- arch/arm64/kernel/cpufeature.c | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 7d301700d1a9..d757e518b08c 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -733,14 +733,14 @@ #define ID_ISAR6_DP_SHIFT 4 #define ID_ISAR6_JSCVT_SHIFT 0 -#define ID_MMFR0_INNERSHR_SHIFT 28 -#define ID_MMFR0_FCSE_SHIFT 24 -#define ID_MMFR0_AUXREG_SHIFT 20 -#define ID_MMFR0_TCM_SHIFT 16 -#define ID_MMFR0_SHARELVL_SHIFT 12 -#define ID_MMFR0_OUTERSHR_SHIFT 8 -#define ID_MMFR0_PMSA_SHIFT 4 -#define ID_MMFR0_VMSA_SHIFT 0 +#define ID_MMFR0_EL1_InnerShr_SHIFT 28 +#define ID_MMFR0_EL1_FCSE_SHIFT 24 +#define ID_MMFR0_EL1_AuxReg_SHIFT 20 +#define ID_MMFR0_EL1_TCM_SHIFT 16 +#define ID_MMFR0_EL1_ShareLvl_SHIFT 12 +#define ID_MMFR0_EL1_OuterShr_SHIFT 8 +#define ID_MMFR0_EL1_PMSA_SHIFT 4 +#define ID_MMFR0_EL1_VMSA_SHIFT 0 #define ID_MMFR4_EVT_SHIFT 28 #define ID_MMFR4_CCIDX_SHIFT 24 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index b3f37e2209ad..e42466c9aa0b 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -402,14 +402,14 @@ struct arm64_ftr_reg arm64_ftr_reg_ctrel0 = { }; static const struct arm64_ftr_bits ftr_id_mmfr0[] = { - S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_INNERSHR_SHIFT, 4, 0xf), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_FCSE_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_MMFR0_AUXREG_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_TCM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_SHARELVL_SHIFT, 4, 0), - S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_OUTERSHR_SHIFT, 4, 0xf), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_PMSA_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_VMSA_SHIFT, 4, 0), + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_EL1_InnerShr_SHIFT, 4, 0xf), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_EL1_FCSE_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_MMFR0_EL1_AuxReg_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_EL1_TCM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_EL1_ShareLvl_SHIFT, 4, 0), + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_EL1_OuterShr_SHIFT, 4, 0xf), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_EL1_PMSA_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_EL1_VMSA_SHIFT, 4, 0), ARM64_FTR_END, }; -- cgit v1.2.3 From 5ea1534ec320ffaca144136cca3880877738e6d2 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:01 +0000 Subject: arm64/sysreg: Standardise naming for ID_MMFR4_EL1 To convert the 32bit id registers to use the sysreg generation, they must first have a regular pattern, to match the symbols the script generates. Ensure symbols for the ID_MMFR4_EL1 register have an _EL1 suffix, and use lower case in feature names where the arm-arm does the same. No functional change. Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-3-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 16 ++++++++-------- arch/arm64/kernel/cpufeature.c | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index d757e518b08c..f7d003b975c5 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -742,14 +742,14 @@ #define ID_MMFR0_EL1_PMSA_SHIFT 4 #define ID_MMFR0_EL1_VMSA_SHIFT 0 -#define ID_MMFR4_EVT_SHIFT 28 -#define ID_MMFR4_CCIDX_SHIFT 24 -#define ID_MMFR4_LSM_SHIFT 20 -#define ID_MMFR4_HPDS_SHIFT 16 -#define ID_MMFR4_CNP_SHIFT 12 -#define ID_MMFR4_XNX_SHIFT 8 -#define ID_MMFR4_AC2_SHIFT 4 -#define ID_MMFR4_SPECSEI_SHIFT 0 +#define ID_MMFR4_EL1_EVT_SHIFT 28 +#define ID_MMFR4_EL1_CCIDX_SHIFT 24 +#define ID_MMFR4_EL1_LSM_SHIFT 20 +#define ID_MMFR4_EL1_HPDS_SHIFT 16 +#define ID_MMFR4_EL1_CnP_SHIFT 12 +#define ID_MMFR4_EL1_XNX_SHIFT 8 +#define ID_MMFR4_EL1_AC2_SHIFT 4 +#define ID_MMFR4_EL1_SpecSEI_SHIFT 0 #define ID_MMFR5_ETS_SHIFT 0 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index e42466c9aa0b..b04500da4379 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -491,13 +491,13 @@ static const struct arm64_ftr_bits ftr_id_isar5[] = { }; static const struct arm64_ftr_bits ftr_id_mmfr4[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_EVT_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_CCIDX_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_LSM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_HPDS_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_CNP_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_XNX_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_AC2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_EL1_EVT_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_EL1_CCIDX_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_EL1_LSM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_EL1_HPDS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_EL1_CnP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_EL1_XNX_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_EL1_AC2_SHIFT, 4, 0), /* * SpecSEI = 1 indicates that the PE might generate an SError on an @@ -505,7 +505,7 @@ static const struct arm64_ftr_bits ftr_id_mmfr4[] = { * SError might be generated than it will not be. Hence it has been * classified as FTR_HIGHER_SAFE. */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_MMFR4_SPECSEI_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_MMFR4_EL1_SpecSEI_SHIFT, 4, 0), ARM64_FTR_END, }; -- cgit v1.2.3 From 7b24177c631de0d75fd7473ee75666498faf225e Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:02 +0000 Subject: arm64/sysreg: Standardise naming for ID_MMFR5_EL1 To convert the 32bit id registers to use the sysreg generation, they must first have a regular pattern, to match the symbols the script generates. Ensure symbols for the ID_MMFR5_EL1 register have an _EL1 suffix. No functional change. Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-4-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 2 +- arch/arm64/kernel/cpufeature.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index f7d003b975c5..7a719c9804fc 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -751,7 +751,7 @@ #define ID_MMFR4_EL1_AC2_SHIFT 4 #define ID_MMFR4_EL1_SpecSEI_SHIFT 0 -#define ID_MMFR5_ETS_SHIFT 0 +#define ID_MMFR5_EL1_ETS_SHIFT 0 #define ID_PFR0_DIT_SHIFT 24 #define ID_PFR0_CSV2_SHIFT 16 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index b04500da4379..29239ea0f139 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -522,7 +522,7 @@ static const struct arm64_ftr_bits ftr_id_isar4[] = { }; static const struct arm64_ftr_bits ftr_id_mmfr5[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR5_ETS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR5_EL1_ETS_SHIFT, 4, 0), ARM64_FTR_END, }; -- cgit v1.2.3 From 52b3dc559a4c240c33c58affdf8247bab485d422 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:03 +0000 Subject: arm64/sysreg: Standardise naming for ID_ISAR0_EL1 To convert the 32bit id registers to use the sysreg generation, they must first have a regular pattern, to match the symbols the script generates. Ensure symbols for the ID_ISAR0_EL1 register have an _EL1 suffix, and use lower-case for feature names where the arm-arm does the same. To functional change. Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-5-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 14 +++++++------- arch/arm64/kernel/cpufeature.c | 14 +++++++------- 2 files changed, 14 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 7a719c9804fc..4dc80e00a467 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -710,13 +710,13 @@ #define ID_DFR1_MTPMU_SHIFT 0 -#define ID_ISAR0_DIVIDE_SHIFT 24 -#define ID_ISAR0_DEBUG_SHIFT 20 -#define ID_ISAR0_COPROC_SHIFT 16 -#define ID_ISAR0_CMPBRANCH_SHIFT 12 -#define ID_ISAR0_BITFIELD_SHIFT 8 -#define ID_ISAR0_BITCOUNT_SHIFT 4 -#define ID_ISAR0_SWAP_SHIFT 0 +#define ID_ISAR0_EL1_Divide_SHIFT 24 +#define ID_ISAR0_EL1_Debug_SHIFT 20 +#define ID_ISAR0_EL1_Coproc_SHIFT 16 +#define ID_ISAR0_EL1_CmpBranch_SHIFT 12 +#define ID_ISAR0_EL1_BitField_SHIFT 8 +#define ID_ISAR0_EL1_BitCount_SHIFT 4 +#define ID_ISAR0_EL1_Swap_SHIFT 0 #define ID_ISAR5_RDM_SHIFT 24 #define ID_ISAR5_CRC32_SHIFT 16 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 29239ea0f139..122c6e317660 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -470,13 +470,13 @@ static const struct arm64_ftr_bits ftr_gmid[] = { }; static const struct arm64_ftr_bits ftr_id_isar0[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_DIVIDE_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_DEBUG_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_COPROC_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_CMPBRANCH_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_BITFIELD_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_BITCOUNT_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_SWAP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_EL1_Divide_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_EL1_Debug_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_EL1_Coproc_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_EL1_CmpBranch_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_EL1_BitField_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_EL1_BitCount_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_EL1_Swap_SHIFT, 4, 0), ARM64_FTR_END, }; -- cgit v1.2.3 From 3f08e378f00e1b1c14763fe225bf553b4733db67 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:04 +0000 Subject: arm64/sysreg: Standardise naming for ID_ISAR4_EL1 To convert the 32bit id registers to use the sysreg generation, they must first have a regular pattern, to match the symbols the script generates. Ensure symbols for the ID_ISAR4_EL1 register have an _EL1 suffix, and use lower-case for feature names where the arm-arm does the same. No functional change. Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-6-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 16 ++++++++-------- arch/arm64/kernel/cpufeature.c | 18 +++++++++--------- 2 files changed, 17 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 4dc80e00a467..259756bad5fe 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -699,14 +699,14 @@ #define ID_DFR0_PERFMON_8_4 0x5 #define ID_DFR0_PERFMON_8_5 0x6 -#define ID_ISAR4_SWP_FRAC_SHIFT 28 -#define ID_ISAR4_PSR_M_SHIFT 24 -#define ID_ISAR4_SYNCH_PRIM_FRAC_SHIFT 20 -#define ID_ISAR4_BARRIER_SHIFT 16 -#define ID_ISAR4_SMC_SHIFT 12 -#define ID_ISAR4_WRITEBACK_SHIFT 8 -#define ID_ISAR4_WITHSHIFTS_SHIFT 4 -#define ID_ISAR4_UNPRIV_SHIFT 0 +#define ID_ISAR4_EL1_SWP_frac_SHIFT 28 +#define ID_ISAR4_EL1_PSR_M_SHIFT 24 +#define ID_ISAR4_EL1_SynchPrim_frac_SHIFT 20 +#define ID_ISAR4_EL1_Barrier_SHIFT 16 +#define ID_ISAR4_EL1_SMC_SHIFT 12 +#define ID_ISAR4_EL1_Writeback_SHIFT 8 +#define ID_ISAR4_EL1_WithShifts_SHIFT 4 +#define ID_ISAR4_EL1_Unpriv_SHIFT 0 #define ID_DFR1_MTPMU_SHIFT 0 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 122c6e317660..9d0424aed30a 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -510,14 +510,14 @@ static const struct arm64_ftr_bits ftr_id_mmfr4[] = { }; static const struct arm64_ftr_bits ftr_id_isar4[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_SWP_FRAC_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_PSR_M_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_SYNCH_PRIM_FRAC_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_BARRIER_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_SMC_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_WRITEBACK_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_WITHSHIFTS_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_UNPRIV_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_EL1_SWP_frac_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_EL1_PSR_M_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_EL1_SynchPrim_frac_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_EL1_Barrier_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_EL1_SMC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_EL1_Writeback_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_EL1_WithShifts_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_EL1_Unpriv_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -1119,7 +1119,7 @@ static int update_32bit_cpu_features(int cpu, struct cpuinfo_32bit *info, * EL1-dependent register fields to avoid spurious sanity check fails. */ if (!id_aa64pfr0_32bit_el1(pfr0)) { - relax_cpu_ftr_reg(SYS_ID_ISAR4_EL1, ID_ISAR4_SMC_SHIFT); + relax_cpu_ftr_reg(SYS_ID_ISAR4_EL1, ID_ISAR4_EL1_SMC_SHIFT); relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_VIRT_FRAC_SHIFT); relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_SEC_FRAC_SHIFT); relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_VIRTUALIZATION_SHIFT); -- cgit v1.2.3 From 816c8638d8c66991dd6dacd32e578293d10393f4 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:05 +0000 Subject: arm64/sysreg: Standardise naming for ID_ISAR5_EL1 To convert the 32bit id registers to use the sysreg generation, they must first have a regular pattern, to match the symbols the script generates. Ensure symbols for the ID_ISAR5_EL1 register have an _EL1 suffix. No functional change. Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-7-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 12 ++++++------ arch/arm64/kernel/cpufeature.c | 22 +++++++++++----------- 2 files changed, 17 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 259756bad5fe..dbd376174223 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -718,12 +718,12 @@ #define ID_ISAR0_EL1_BitCount_SHIFT 4 #define ID_ISAR0_EL1_Swap_SHIFT 0 -#define ID_ISAR5_RDM_SHIFT 24 -#define ID_ISAR5_CRC32_SHIFT 16 -#define ID_ISAR5_SHA2_SHIFT 12 -#define ID_ISAR5_SHA1_SHIFT 8 -#define ID_ISAR5_AES_SHIFT 4 -#define ID_ISAR5_SEVL_SHIFT 0 +#define ID_ISAR5_EL1_RDM_SHIFT 24 +#define ID_ISAR5_EL1_CRC32_SHIFT 16 +#define ID_ISAR5_EL1_SHA2_SHIFT 12 +#define ID_ISAR5_EL1_SHA1_SHIFT 8 +#define ID_ISAR5_EL1_AES_SHIFT 4 +#define ID_ISAR5_EL1_SEVL_SHIFT 0 #define ID_ISAR6_I8MM_SHIFT 24 #define ID_ISAR6_BF16_SHIFT 20 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9d0424aed30a..efa3fcece38c 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -481,12 +481,12 @@ static const struct arm64_ftr_bits ftr_id_isar0[] = { }; static const struct arm64_ftr_bits ftr_id_isar5[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_RDM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_CRC32_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_SHA2_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_SHA1_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_AES_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_SEVL_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_EL1_RDM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_EL1_CRC32_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_EL1_SHA2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_EL1_SHA1_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_EL1_AES_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_EL1_SEVL_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -2842,11 +2842,11 @@ static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = { /* Arm v8 mandates MVFR0.FPDP == {0, 2}. So, piggy back on this for the presence of VFP support */ HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP), HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3), - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL), - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES), - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1), - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2), - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_EL1_AES_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_EL1_AES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_EL1_SHA1_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_EL1_SHA2_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_EL1_CRC32_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32), #endif {}, }; -- cgit v1.2.3 From eef4344f779f6aee9ec9d49a03ec2317a85a744e Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:06 +0000 Subject: arm64/sysreg: Standardise naming for ID_ISAR6_EL1 To convert the 32bit id registers to use the sysreg generation, they must first have a regular pattern, to match the symbols the script generates. Ensure symbols for the ID_ISAR6_EL1 register have an _EL1 suffix. No functional change. Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-8-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 14 +++++++------- arch/arm64/kernel/cpufeature.c | 14 +++++++------- 2 files changed, 14 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index dbd376174223..10a00b219851 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -725,13 +725,13 @@ #define ID_ISAR5_EL1_AES_SHIFT 4 #define ID_ISAR5_EL1_SEVL_SHIFT 0 -#define ID_ISAR6_I8MM_SHIFT 24 -#define ID_ISAR6_BF16_SHIFT 20 -#define ID_ISAR6_SPECRES_SHIFT 16 -#define ID_ISAR6_SB_SHIFT 12 -#define ID_ISAR6_FHM_SHIFT 8 -#define ID_ISAR6_DP_SHIFT 4 -#define ID_ISAR6_JSCVT_SHIFT 0 +#define ID_ISAR6_EL1_I8MM_SHIFT 24 +#define ID_ISAR6_EL1_BF16_SHIFT 20 +#define ID_ISAR6_EL1_SPECRES_SHIFT 16 +#define ID_ISAR6_EL1_SB_SHIFT 12 +#define ID_ISAR6_EL1_FHM_SHIFT 8 +#define ID_ISAR6_EL1_DP_SHIFT 4 +#define ID_ISAR6_EL1_JSCVT_SHIFT 0 #define ID_MMFR0_EL1_InnerShr_SHIFT 28 #define ID_MMFR0_EL1_FCSE_SHIFT 24 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index efa3fcece38c..63ecad8f1730 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -527,13 +527,13 @@ static const struct arm64_ftr_bits ftr_id_mmfr5[] = { }; static const struct arm64_ftr_bits ftr_id_isar6[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_I8MM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_BF16_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_SPECRES_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_SB_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_FHM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_DP_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_JSCVT_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_I8MM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_BF16_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_SPECRES_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_SB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_FHM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_DP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_JSCVT_SHIFT, 4, 0), ARM64_FTR_END, }; -- cgit v1.2.3 From e0bf98fef3fd0f934deee3ebc3a03b88aec5b501 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:07 +0000 Subject: arm64/sysreg: Standardise naming for ID_PFR0_EL1 To convert the 32bit id registers to use the sysreg generation, they must first have a regular pattern, to match the symbols the script generates. Ensure symbols for the ID_PFR0_EL1 register have an _EL1 suffix, and use lower case in feature names where the arm-arm does the same. No functional change. Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-9-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 12 ++++++------ arch/arm64/kernel/cpufeature.c | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 10a00b219851..6b4975132db9 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -753,12 +753,12 @@ #define ID_MMFR5_EL1_ETS_SHIFT 0 -#define ID_PFR0_DIT_SHIFT 24 -#define ID_PFR0_CSV2_SHIFT 16 -#define ID_PFR0_STATE3_SHIFT 12 -#define ID_PFR0_STATE2_SHIFT 8 -#define ID_PFR0_STATE1_SHIFT 4 -#define ID_PFR0_STATE0_SHIFT 0 +#define ID_PFR0_EL1_DIT_SHIFT 24 +#define ID_PFR0_EL1_CSV2_SHIFT 16 +#define ID_PFR0_EL1_State3_SHIFT 12 +#define ID_PFR0_EL1_State2_SHIFT 8 +#define ID_PFR0_EL1_State1_SHIFT 4 +#define ID_PFR0_EL1_State0_SHIFT 0 #define ID_DFR0_PERFMON_SHIFT 24 #define ID_DFR0_MPROFDBG_SHIFT 20 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 63ecad8f1730..11e84a4f8b97 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -538,12 +538,12 @@ static const struct arm64_ftr_bits ftr_id_isar6[] = { }; static const struct arm64_ftr_bits ftr_id_pfr0[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_DIT_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR0_CSV2_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_STATE3_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_STATE2_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_STATE1_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_STATE0_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_EL1_DIT_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR0_EL1_CSV2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_EL1_State3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_EL1_State2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_EL1_State1_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_EL1_State0_SHIFT, 4, 0), ARM64_FTR_END, }; -- cgit v1.2.3 From 0a648056d68d4049dfb5d1c36b79ad3eba0090f0 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:08 +0000 Subject: arm64/sysreg: Standardise naming for ID_PFR1_EL1 To convert the 32bit id registers to use the sysreg generation, they must first have a regular pattern, to match the symbols the script generates. Ensure symbols for the ID_PFR1_EL1 register have an _EL1 suffix, and use lower case in feature names where the arm-arm does the same. No functional change. Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-10-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 16 ++++++++-------- arch/arm64/kernel/cpufeature.c | 26 +++++++++++++------------- 2 files changed, 21 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 6b4975132db9..f93f68ebdccc 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -789,14 +789,14 @@ #define MVFR1_FPDNAN_SHIFT 4 #define MVFR1_FPFTZ_SHIFT 0 -#define ID_PFR1_GIC_SHIFT 28 -#define ID_PFR1_VIRT_FRAC_SHIFT 24 -#define ID_PFR1_SEC_FRAC_SHIFT 20 -#define ID_PFR1_GENTIMER_SHIFT 16 -#define ID_PFR1_VIRTUALIZATION_SHIFT 12 -#define ID_PFR1_MPROGMOD_SHIFT 8 -#define ID_PFR1_SECURITY_SHIFT 4 -#define ID_PFR1_PROGMOD_SHIFT 0 +#define ID_PFR1_EL1_GIC_SHIFT 28 +#define ID_PFR1_EL1_Virt_frac_SHIFT 24 +#define ID_PFR1_EL1_Sec_frac_SHIFT 20 +#define ID_PFR1_EL1_GenTimer_SHIFT 16 +#define ID_PFR1_EL1_Virtualization_SHIFT 12 +#define ID_PFR1_EL1_MProgMod_SHIFT 8 +#define ID_PFR1_EL1_Security_SHIFT 4 +#define ID_PFR1_EL1_ProgMod_SHIFT 0 #if defined(CONFIG_ARM64_4K_PAGES) #define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN4_SHIFT diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 11e84a4f8b97..271a82dd59d4 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -548,14 +548,14 @@ static const struct arm64_ftr_bits ftr_id_pfr0[] = { }; static const struct arm64_ftr_bits ftr_id_pfr1[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_GIC_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_VIRT_FRAC_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_SEC_FRAC_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_GENTIMER_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_VIRTUALIZATION_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_MPROGMOD_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_SECURITY_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_PROGMOD_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_EL1_GIC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_EL1_Virt_frac_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_EL1_Sec_frac_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_EL1_GenTimer_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_EL1_Virtualization_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_EL1_MProgMod_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_EL1_Security_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_EL1_ProgMod_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -1120,11 +1120,11 @@ static int update_32bit_cpu_features(int cpu, struct cpuinfo_32bit *info, */ if (!id_aa64pfr0_32bit_el1(pfr0)) { relax_cpu_ftr_reg(SYS_ID_ISAR4_EL1, ID_ISAR4_EL1_SMC_SHIFT); - relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_VIRT_FRAC_SHIFT); - relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_SEC_FRAC_SHIFT); - relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_VIRTUALIZATION_SHIFT); - relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_SECURITY_SHIFT); - relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_PROGMOD_SHIFT); + relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_EL1_Virt_frac_SHIFT); + relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_EL1_Sec_frac_SHIFT); + relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_EL1_Virtualization_SHIFT); + relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_EL1_Security_SHIFT); + relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_EL1_ProgMod_SHIFT); } taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu, -- cgit v1.2.3 From 1ecf3dcb136320359ffd79d1ca5908e078443b6b Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:09 +0000 Subject: arm64/sysreg: Standardise naming for ID_PFR2_EL1 To convert the 32bit id registers to use the sysreg generation, they must first have a regular pattern, to match the symbols the script generates. Ensure symbols for the ID_PFR2_EL1 register have an _EL1 suffix. No functional change. Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-11-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 4 ++-- arch/arm64/kernel/cpufeature.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index f93f68ebdccc..155cb298c897 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -768,8 +768,8 @@ #define ID_DFR0_COPSDBG_SHIFT 4 #define ID_DFR0_COPDBG_SHIFT 0 -#define ID_PFR2_SSBS_SHIFT 4 -#define ID_PFR2_CSV3_SHIFT 0 +#define ID_PFR2_EL1_SSBS_SHIFT 4 +#define ID_PFR2_EL1_CSV3_SHIFT 0 #define MVFR0_FPROUND_SHIFT 28 #define MVFR0_FPSHVEC_SHIFT 24 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 271a82dd59d4..8009fc2e4b5e 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -560,8 +560,8 @@ static const struct arm64_ftr_bits ftr_id_pfr1[] = { }; static const struct arm64_ftr_bits ftr_id_pfr2[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR2_SSBS_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR2_CSV3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR2_EL1_SSBS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR2_EL1_CSV3_SHIFT, 4, 0), ARM64_FTR_END, }; -- cgit v1.2.3 From f4f5969e354235bbe729ff3881089fc902c0daff Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:10 +0000 Subject: arm64/sysreg: Standardise naming for ID_DFR0_EL1 To convert the 32bit id registers to use the sysreg generation, they must first have a regular pattern, to match the symbols the script generates. Ensure symbols for the ID_DFR0_EL1 register have an _EL1 suffix, and use lower-case for feature names where the arm-arm does the same. The arm-arm has feature names for some of the ID_DFR0_EL1.PerMon encodings. Use these feature names in preference to the '8_4' indication of the architecture version they were introduced in. No functional change. Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-12-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 24 +++++++++++------------- arch/arm64/kernel/cpufeature.c | 14 +++++++------- arch/arm64/kvm/sys_regs.c | 4 ++-- 3 files changed, 20 insertions(+), 22 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 155cb298c897..835b279f7f20 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -692,12 +692,10 @@ #define ID_AA64MMFR0_EL1_PARANGE_MAX ID_AA64MMFR0_EL1_PARANGE_48 #endif -#define ID_DFR0_PERFMON_SHIFT 24 - -#define ID_DFR0_PERFMON_8_0 0x3 -#define ID_DFR0_PERFMON_8_1 0x4 -#define ID_DFR0_PERFMON_8_4 0x5 -#define ID_DFR0_PERFMON_8_5 0x6 +#define ID_DFR0_EL1_PerfMon_PMUv3 0x3 +#define ID_DFR0_EL1_PerfMon_PMUv3p1 0x4 +#define ID_DFR0_EL1_PerfMon_PMUv3p4 0x5 +#define ID_DFR0_EL1_PerfMon_PMUv3p5 0x6 #define ID_ISAR4_EL1_SWP_frac_SHIFT 28 #define ID_ISAR4_EL1_PSR_M_SHIFT 24 @@ -760,13 +758,13 @@ #define ID_PFR0_EL1_State1_SHIFT 4 #define ID_PFR0_EL1_State0_SHIFT 0 -#define ID_DFR0_PERFMON_SHIFT 24 -#define ID_DFR0_MPROFDBG_SHIFT 20 -#define ID_DFR0_MMAPTRC_SHIFT 16 -#define ID_DFR0_COPTRC_SHIFT 12 -#define ID_DFR0_MMAPDBG_SHIFT 8 -#define ID_DFR0_COPSDBG_SHIFT 4 -#define ID_DFR0_COPDBG_SHIFT 0 +#define ID_DFR0_EL1_PerfMon_SHIFT 24 +#define ID_DFR0_EL1_MProfDbg_SHIFT 20 +#define ID_DFR0_EL1_MMapTrc_SHIFT 16 +#define ID_DFR0_EL1_CopTrc_SHIFT 12 +#define ID_DFR0_EL1_MMapDbg_SHIFT 8 +#define ID_DFR0_EL1_CopSDbg_SHIFT 4 +#define ID_DFR0_EL1_CopDbg_SHIFT 0 #define ID_PFR2_EL1_SSBS_SHIFT 4 #define ID_PFR2_EL1_CSV3_SHIFT 0 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 8009fc2e4b5e..77b65a75ba07 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -567,13 +567,13 @@ static const struct arm64_ftr_bits ftr_id_pfr2[] = { static const struct arm64_ftr_bits ftr_id_dfr0[] = { /* [31:28] TraceFilt */ - S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_DFR0_PERFMON_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MPROFDBG_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MMAPTRC_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_COPTRC_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MMAPDBG_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_COPSDBG_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_COPDBG_SHIFT, 4, 0), + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_DFR0_EL1_PerfMon_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_EL1_MProfDbg_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_EL1_MMapTrc_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_EL1_CopTrc_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_EL1_MMapDbg_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_EL1_CopSDbg_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_EL1_CopDbg_SHIFT, 4, 0), ARM64_FTR_END, }; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index f4a7c5abcbca..608e4f25161d 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1121,8 +1121,8 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r case SYS_ID_DFR0_EL1: /* Limit guests to PMUv3 for ARMv8.4 */ val = cpuid_feature_cap_perfmon_field(val, - ID_DFR0_PERFMON_SHIFT, - kvm_vcpu_has_pmu(vcpu) ? ID_DFR0_PERFMON_8_4 : 0); + ID_DFR0_EL1_PerfMon_SHIFT, + kvm_vcpu_has_pmu(vcpu) ? ID_DFR0_EL1_PerfMon_PMUv3p4 : 0); break; } -- cgit v1.2.3 From d092106d73539905b5deadfa86be04ff141bcaa8 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:11 +0000 Subject: arm64/sysreg: Standardise naming for ID_DFR1_EL1 To convert the 32bit id registers to use the sysreg generation, they must first have a regular pattern, to match the symbols the script generates. Ensure symbols for the ID_DFR1_EL1 register have an _EL1 suffix. No functional change. Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-13-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 2 +- arch/arm64/kernel/cpufeature.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 835b279f7f20..29d93a36eac9 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -706,7 +706,7 @@ #define ID_ISAR4_EL1_WithShifts_SHIFT 4 #define ID_ISAR4_EL1_Unpriv_SHIFT 0 -#define ID_DFR1_MTPMU_SHIFT 0 +#define ID_DFR1_EL1_MTPMU_SHIFT 0 #define ID_ISAR0_EL1_Divide_SHIFT 24 #define ID_ISAR0_EL1_Debug_SHIFT 20 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 77b65a75ba07..84b3dc994a70 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -578,7 +578,7 @@ static const struct arm64_ftr_bits ftr_id_dfr0[] = { }; static const struct arm64_ftr_bits ftr_id_dfr1[] = { - S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR1_MTPMU_SHIFT, 4, 0), + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR1_EL1_MTPMU_SHIFT, 4, 0), ARM64_FTR_END, }; -- cgit v1.2.3 From a3aab94801dee86cc9d7d43a611c71688a483908 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:12 +0000 Subject: arm64/sysreg: Standardise naming for MVFR0_EL1 To convert the 32bit id registers to use the sysreg generation, they must first have a regular pattern, to match the symbols the script generates. Ensure symbols for the MVFR0_EL1 register use lower-case for feature names where the arm-arm does the same. No functional change. Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-14-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 16 ++++++++-------- arch/arm64/kernel/cpufeature.c | 20 ++++++++++---------- 2 files changed, 18 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 29d93a36eac9..0d4ab1c78d22 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -769,14 +769,14 @@ #define ID_PFR2_EL1_SSBS_SHIFT 4 #define ID_PFR2_EL1_CSV3_SHIFT 0 -#define MVFR0_FPROUND_SHIFT 28 -#define MVFR0_FPSHVEC_SHIFT 24 -#define MVFR0_FPSQRT_SHIFT 20 -#define MVFR0_FPDIVIDE_SHIFT 16 -#define MVFR0_FPTRAP_SHIFT 12 -#define MVFR0_FPDP_SHIFT 8 -#define MVFR0_FPSP_SHIFT 4 -#define MVFR0_SIMD_SHIFT 0 +#define MVFR0_EL1_FPRound_SHIFT 28 +#define MVFR0_EL1_FPShVec_SHIFT 24 +#define MVFR0_EL1_FPSqrt_SHIFT 20 +#define MVFR0_EL1_FPDivide_SHIFT 16 +#define MVFR0_EL1_FPTrap_SHIFT 12 +#define MVFR0_EL1_FPDP_SHIFT 8 +#define MVFR0_EL1_FPSP_SHIFT 4 +#define MVFR0_EL1_SIMDReg_SHIFT 0 #define MVFR1_SIMDFMAC_SHIFT 28 #define MVFR1_FPHP_SHIFT 24 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 84b3dc994a70..7ffe076ce7a6 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -429,14 +429,14 @@ static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = { }; static const struct arm64_ftr_bits ftr_mvfr0[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_FPROUND_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_FPSHVEC_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_FPSQRT_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_FPDIVIDE_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_FPTRAP_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_FPDP_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_FPSP_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_SIMD_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_EL1_FPRound_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_EL1_FPShVec_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_EL1_FPSqrt_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_EL1_FPDivide_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_EL1_FPTrap_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_EL1_FPDP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_EL1_FPSP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_EL1_SIMDReg_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -2840,8 +2840,8 @@ static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = { HWCAP_CAP_MATCH(compat_has_neon, CAP_COMPAT_HWCAP, COMPAT_HWCAP_NEON), HWCAP_CAP(SYS_MVFR1_EL1, MVFR1_SIMDFMAC_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv4), /* Arm v8 mandates MVFR0.FPDP == {0, 2}. So, piggy back on this for the presence of VFP support */ - HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP), - HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3), + HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_EL1_FPDP_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP), + HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_EL1_FPDP_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3), HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_EL1_AES_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL), HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_EL1_AES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES), HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_EL1_SHA1_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1), -- cgit v1.2.3 From d3e1aa85b1b27b0e6d6f8d8b0bf0a9a04ed1d40a Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:13 +0000 Subject: arm64/sysreg: Standardise naming for MVFR1_EL1 To convert the 32bit id registers to use the sysreg generation, they must first have a regular pattern, to match the symbols the script generates. Ensure symbols for the MVFR1_EL1 register use lower-case for feature names where the arm-arm does the same. No functional change. Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-15-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 16 ++++++++-------- arch/arm64/kernel/cpufeature.c | 24 ++++++++++++------------ 2 files changed, 20 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 0d4ab1c78d22..5f2dfbc4a347 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -778,14 +778,14 @@ #define MVFR0_EL1_FPSP_SHIFT 4 #define MVFR0_EL1_SIMDReg_SHIFT 0 -#define MVFR1_SIMDFMAC_SHIFT 28 -#define MVFR1_FPHP_SHIFT 24 -#define MVFR1_SIMDHP_SHIFT 20 -#define MVFR1_SIMDSP_SHIFT 16 -#define MVFR1_SIMDINT_SHIFT 12 -#define MVFR1_SIMDLS_SHIFT 8 -#define MVFR1_FPDNAN_SHIFT 4 -#define MVFR1_FPFTZ_SHIFT 0 +#define MVFR1_EL1_SIMDFMAC_SHIFT 28 +#define MVFR1_EL1_FPHP_SHIFT 24 +#define MVFR1_EL1_SIMDHP_SHIFT 20 +#define MVFR1_EL1_SIMDSP_SHIFT 16 +#define MVFR1_EL1_SIMDInt_SHIFT 12 +#define MVFR1_EL1_SIMDLS_SHIFT 8 +#define MVFR1_EL1_FPDNaN_SHIFT 4 +#define MVFR1_EL1_FPFtZ_SHIFT 0 #define ID_PFR1_EL1_GIC_SHIFT 28 #define ID_PFR1_EL1_Virt_frac_SHIFT 24 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 7ffe076ce7a6..c56339066304 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -441,14 +441,14 @@ static const struct arm64_ftr_bits ftr_mvfr0[] = { }; static const struct arm64_ftr_bits ftr_mvfr1[] = { - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_SIMDFMAC_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_FPHP_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_SIMDHP_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_SIMDSP_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_SIMDINT_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_SIMDLS_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_FPDNAN_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_FPFTZ_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_SIMDFMAC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_FPHP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_SIMDHP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_SIMDSP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_SIMDInt_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_SIMDLS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_FPDNaN_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_FPFtZ_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -2829,16 +2829,16 @@ static bool compat_has_neon(const struct arm64_cpu_capabilities *cap, int scope) else mvfr1 = read_sysreg_s(SYS_MVFR1_EL1); - return cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDSP_SHIFT) && - cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDINT_SHIFT) && - cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDLS_SHIFT); + return cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_EL1_SIMDSP_SHIFT) && + cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_EL1_SIMDInt_SHIFT) && + cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_EL1_SIMDLS_SHIFT); } #endif static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = { #ifdef CONFIG_COMPAT HWCAP_CAP_MATCH(compat_has_neon, CAP_COMPAT_HWCAP, COMPAT_HWCAP_NEON), - HWCAP_CAP(SYS_MVFR1_EL1, MVFR1_SIMDFMAC_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv4), + HWCAP_CAP(SYS_MVFR1_EL1, MVFR1_EL1_SIMDFMAC_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv4), /* Arm v8 mandates MVFR0.FPDP == {0, 2}. So, piggy back on this for the presence of VFP support */ HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_EL1_FPDP_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP), HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_EL1_FPDP_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3), -- cgit v1.2.3 From c6e155e8e561dafcba9faf86598d7ec69942ab86 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:14 +0000 Subject: arm64/sysreg: Standardise naming for MVFR2_EL1 To convert the 32bit id registers to use the sysreg generation, they must first have a regular pattern, to match the symbols the script generates. Ensure symbols for the MVFR2_EL1 register use lower-case for feature names where the arm-arm does the same. No functional change. Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-16-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 4 ++-- arch/arm64/kernel/cpufeature.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 5f2dfbc4a347..fb9f5db0b936 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -813,8 +813,8 @@ #define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN64_2_SHIFT #endif -#define MVFR2_FPMISC_SHIFT 4 -#define MVFR2_SIMDMISC_SHIFT 0 +#define MVFR2_EL1_FPMisc_SHIFT 4 +#define MVFR2_EL1_SIMDMisc_SHIFT 0 #define CPACR_EL1_FPEN_EL1EN (BIT(20)) /* enable EL1 access */ #define CPACR_EL1_FPEN_EL0EN (BIT(21)) /* enable EL0 access, if EL1EN set */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index c56339066304..935579e5517a 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -453,8 +453,8 @@ static const struct arm64_ftr_bits ftr_mvfr1[] = { }; static const struct arm64_ftr_bits ftr_mvfr2[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR2_FPMISC_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR2_SIMDMISC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR2_EL1_FPMisc_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR2_EL1_SIMDMisc_SHIFT, 4, 0), ARM64_FTR_END, }; -- cgit v1.2.3 From 7587cdef55928ef70d4e945003646711ad39e405 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:15 +0000 Subject: arm64/sysreg: Extend the maximum width of a register and symbol name 32bit has multiple values for its id registers, as extra properties were added to the CPUs. Some of these end up having long names, which exceed the fixed 48 character column that the sysreg awk script generates. For example, the ID_MMFR1_EL1.L1Hvd field has an encoding whose natural name would be 'invalidate Iside only'. Using this causes compile errors as the script generates the following: #define ID_MMFR1_EL1_L1Hvd_INVALIDATE_ISIDE_ONLYUL(0b0001) Add a few extra characters. Reviewed-by: Mark Brown Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-17-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/tools/gen-sysreg.awk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/tools/gen-sysreg.awk b/arch/arm64/tools/gen-sysreg.awk index db461921d256..c350164a3955 100755 --- a/arch/arm64/tools/gen-sysreg.awk +++ b/arch/arm64/tools/gen-sysreg.awk @@ -33,7 +33,7 @@ function expect_fields(nf) { # Print a CPP macro definition, padded with spaces so that the macro bodies # line up in a column function define(name, val) { - printf "%-48s%s\n", "#define " name, val + printf "%-56s%s\n", "#define " name, val } # Print standard BITMASK/SHIFT/WIDTH CPP definitions for a field -- cgit v1.2.3 From 8893df290e3684e1900db52700d77a8ed78e2904 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:16 +0000 Subject: arm64/sysreg: Convert ID_MMFR0_EL1 to automatic generation Convert ID_MMFR0_EL1 to be automatically generated as per DDI0487I.a, no functional changes. Reviewed-by: Mark Brown Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-18-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 10 --------- arch/arm64/tools/sysreg | 47 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index fb9f5db0b936..f856fbf8bfdf 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -171,7 +171,6 @@ #define SYS_ID_DFR0_EL1 sys_reg(3, 0, 0, 1, 2) #define SYS_ID_DFR1_EL1 sys_reg(3, 0, 0, 3, 5) #define SYS_ID_AFR0_EL1 sys_reg(3, 0, 0, 1, 3) -#define SYS_ID_MMFR0_EL1 sys_reg(3, 0, 0, 1, 4) #define SYS_ID_MMFR1_EL1 sys_reg(3, 0, 0, 1, 5) #define SYS_ID_MMFR2_EL1 sys_reg(3, 0, 0, 1, 6) #define SYS_ID_MMFR3_EL1 sys_reg(3, 0, 0, 1, 7) @@ -731,15 +730,6 @@ #define ID_ISAR6_EL1_DP_SHIFT 4 #define ID_ISAR6_EL1_JSCVT_SHIFT 0 -#define ID_MMFR0_EL1_InnerShr_SHIFT 28 -#define ID_MMFR0_EL1_FCSE_SHIFT 24 -#define ID_MMFR0_EL1_AuxReg_SHIFT 20 -#define ID_MMFR0_EL1_TCM_SHIFT 16 -#define ID_MMFR0_EL1_ShareLvl_SHIFT 12 -#define ID_MMFR0_EL1_OuterShr_SHIFT 8 -#define ID_MMFR0_EL1_PMSA_SHIFT 4 -#define ID_MMFR0_EL1_VMSA_SHIFT 0 - #define ID_MMFR4_EL1_EVT_SHIFT 28 #define ID_MMFR4_EL1_CCIDX_SHIFT 24 #define ID_MMFR4_EL1_LSM_SHIFT 20 diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 384757a7eda9..5f2273768173 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -46,6 +46,53 @@ # feature that introduces them (eg, FEAT_LS64_ACCDATA introduces enumeration # item ACCDATA) though it may be more taseful to do something else. +Sysreg ID_MMFR0_EL1 3 0 0 1 4 +Res0 63:32 +Enum 31:28 InnerShr + 0b0000 NC + 0b0001 HW + 0b1111 IGNORED +EndEnum +Enum 27:24 FCSE + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 23:20 AuxReg + 0b0000 NI + 0b0001 ACTLR + 0b0010 AIFSR +EndEnum +Enum 19:16 TCM + 0b0000 NI + 0b0001 IMPDEF + 0b0010 TCM + 0b0011 TCM_DMA +EndEnum +Enum 15:12 ShareLvl + 0b0000 ONE + 0b0001 TWO +EndEnum +Enum 11:8 OuterShr + 0b0000 NC + 0b0001 HW + 0b1111 IGNORED +EndEnum +Enum 7:4 PMSA + 0b0000 NI + 0b0001 IMPDEF + 0b0010 PMSAv6 + 0b0011 PMSAv7 +EndEnum +Enum 3:0 VMSA + 0b0000 NI + 0b0001 IMPDEF + 0b0010 VMSAv6 + 0b0011 VMSAv7 + 0b0100 VMSAv7_PXN + 0b0101 VMSAv7_LONG +EndEnum +EndSysreg + Sysreg ID_AA64PFR0_EL1 3 0 0 4 0 Enum 63:60 CSV3 0b0000 NI -- cgit v1.2.3 From 7e2f00bea3dbbe082cf2ff33ca8711235d098207 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:17 +0000 Subject: arm64/sysreg: Convert ID_MMFR1_EL1 to automatic generation Convert ID_MMFR1_EL1 to be automatically generated as per DDI0487I.a, no functional changes. Reviewed-by: Mark Brown Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-19-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 1 - arch/arm64/tools/sysreg | 49 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index f856fbf8bfdf..e7e8400fc61f 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -171,7 +171,6 @@ #define SYS_ID_DFR0_EL1 sys_reg(3, 0, 0, 1, 2) #define SYS_ID_DFR1_EL1 sys_reg(3, 0, 0, 3, 5) #define SYS_ID_AFR0_EL1 sys_reg(3, 0, 0, 1, 3) -#define SYS_ID_MMFR1_EL1 sys_reg(3, 0, 0, 1, 5) #define SYS_ID_MMFR2_EL1 sys_reg(3, 0, 0, 1, 6) #define SYS_ID_MMFR3_EL1 sys_reg(3, 0, 0, 1, 7) #define SYS_ID_MMFR4_EL1 sys_reg(3, 0, 0, 2, 6) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 5f2273768173..2f1d0077afed 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -93,6 +93,55 @@ Enum 3:0 VMSA EndEnum EndSysreg +Sysreg ID_MMFR1_EL1 3 0 0 1 5 +Res0 63:32 +Enum 31:28 BPred + 0b0000 NI + 0b0001 BP_SW_MANGED + 0b0010 BP_ASID_AWARE + 0b0011 BP_NOSNOOP + 0b0100 BP_INVISIBLE +EndEnum +Enum 27:24 L1TstCln + 0b0000 NI + 0b0001 NOINVALIDATE + 0b0010 INVALIDATE +EndEnum +Enum 23:20 L1Uni + 0b0000 NI + 0b0001 INVALIDATE + 0b0010 CLEAN_AND_INVALIDATE +EndEnum +Enum 19:16 L1Hvd + 0b0000 NI + 0b0001 INVALIDATE_ISIDE_ONLY + 0b0010 INVALIDATE + 0b0011 CLEAN_AND_INVALIDATE +EndEnum +Enum 15:12 L1UniSW + 0b0000 NI + 0b0001 CLEAN + 0b0010 CLEAN_AND_INVALIDATE + 0b0011 INVALIDATE +EndEnum +Enum 11:8 L1HvdSW + 0b0000 NI + 0b0001 CLEAN_AND_INVALIDATE + 0b0010 INVALIDATE_DSIDE_ONLY + 0b0011 INVALIDATE +EndEnum +Enum 7:4 L1UniVA + 0b0000 NI + 0b0001 CLEAN_AND_INVALIDATE + 0b0010 INVALIDATE_BP +EndEnum +Enum 3:0 L1HvdVA + 0b0000 NI + 0b0001 CLEAN_AND_INVALIDATE + 0b0010 INVALIDATE_BP +EndEnum +EndSysreg + Sysreg ID_AA64PFR0_EL1 3 0 0 4 0 Enum 63:60 CSV3 0b0000 NI -- cgit v1.2.3 From fbfba88b6ae136a6ae02dddd36bcf8f66f811128 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:18 +0000 Subject: arm64/sysreg: Convert ID_MMFR2_EL1 to automatic generation Convert ID_MMFR2_EL1 to be automatically generated as per DDI0487I.a, no functional changes. Reviewed-by: Mark Brown Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-20-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 1 - arch/arm64/tools/sysreg | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index e7e8400fc61f..7e7e647a895c 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -171,7 +171,6 @@ #define SYS_ID_DFR0_EL1 sys_reg(3, 0, 0, 1, 2) #define SYS_ID_DFR1_EL1 sys_reg(3, 0, 0, 3, 5) #define SYS_ID_AFR0_EL1 sys_reg(3, 0, 0, 1, 3) -#define SYS_ID_MMFR2_EL1 sys_reg(3, 0, 0, 1, 6) #define SYS_ID_MMFR3_EL1 sys_reg(3, 0, 0, 1, 7) #define SYS_ID_MMFR4_EL1 sys_reg(3, 0, 0, 2, 6) #define SYS_ID_MMFR5_EL1 sys_reg(3, 0, 0, 3, 6) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 2f1d0077afed..df8b234eb7e9 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -142,6 +142,47 @@ Enum 3:0 L1HvdVA EndEnum EndSysreg +Sysreg ID_MMFR2_EL1 3 0 0 1 6 +Res0 63:32 +Enum 31:28 HWAccFlg + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 27:24 WFIStall + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 23:20 MemBarr + 0b0000 NI + 0b0001 DSB_ONLY + 0b0010 IMP +EndEnum +Enum 19:16 UniTLB + 0b0000 NI + 0b0001 BY_VA + 0b0010 BY_MATCH_ASID + 0b0011 BY_ALL_ASID + 0b0100 OTHER_TLBS + 0b0101 BROADCAST + 0b0110 BY_IPA +EndEnum +Enum 15:12 HvdTLB + 0b0000 NI +EndEnum +Enum 11:8 L1HvdRng + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 7:4 L1HvdBG + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 3:0 L1HvdFG + 0b0000 NI + 0b0001 IMP +EndEnum +EndSysreg + Sysreg ID_AA64PFR0_EL1 3 0 0 4 0 Enum 63:60 CSV3 0b0000 NI -- cgit v1.2.3 From 8fe2a9c578b07c9a631fbf7cd9e278ffa6619869 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:19 +0000 Subject: arm64/sysreg: Convert ID_MMFR3_EL1 to automatic generation Convert ID_MMFR3_EL1 to be automatically generated as per DDI0487I.a, no functional changes. Reviewed-by: Mark Brown Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-21-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 1 - arch/arm64/tools/sysreg | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 7e7e647a895c..be4015d22a30 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -171,7 +171,6 @@ #define SYS_ID_DFR0_EL1 sys_reg(3, 0, 0, 1, 2) #define SYS_ID_DFR1_EL1 sys_reg(3, 0, 0, 3, 5) #define SYS_ID_AFR0_EL1 sys_reg(3, 0, 0, 1, 3) -#define SYS_ID_MMFR3_EL1 sys_reg(3, 0, 0, 1, 7) #define SYS_ID_MMFR4_EL1 sys_reg(3, 0, 0, 2, 6) #define SYS_ID_MMFR5_EL1 sys_reg(3, 0, 0, 3, 6) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index df8b234eb7e9..7573312ef249 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -183,6 +183,46 @@ Enum 3:0 L1HvdFG EndEnum EndSysreg +Sysreg ID_MMFR3_EL1 3 0 0 1 7 +Res0 63:32 +Enum 31:28 Supersec + 0b0000 IMP + 0b1111 NI +EndEnum +Enum 27:24 CMemSz + 0b0000 4GB + 0b0001 64GB + 0b0010 1TB +EndEnum +Enum 23:20 CohWalk + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 19:16 PAN + 0b0000 NI + 0b0001 PAN + 0b0010 PAN2 +EndEnum +Enum 15:12 MaintBcst + 0b0000 NI + 0b0001 NO_TLB + 0b0010 ALL +EndEnum +Enum 11:8 BPMaint + 0b0000 NI + 0b0001 ALL + 0b0010 BY_VA +EndEnum +Enum 7:4 CMaintSW + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 3:0 CMaintVA + 0b0000 NI + 0b0001 IMP +EndEnum +EndSysreg + Sysreg ID_AA64PFR0_EL1 3 0 0 4 0 Enum 63:60 CSV3 0b0000 NI -- cgit v1.2.3 From 5b380ae0e2b3f354bd1274008286b10ee8585015 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:20 +0000 Subject: arm64/sysreg: Convert ID_MMFR4_EL1 to automatic generation Convert ID_MMFR4_EL1 to be automatically generated as per DDI0487I.a, no functional changes. Reviewed-by: Mark Brown Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-22-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 10 ---------- arch/arm64/tools/sysreg | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index be4015d22a30..b9fdb1e0a451 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -171,7 +171,6 @@ #define SYS_ID_DFR0_EL1 sys_reg(3, 0, 0, 1, 2) #define SYS_ID_DFR1_EL1 sys_reg(3, 0, 0, 3, 5) #define SYS_ID_AFR0_EL1 sys_reg(3, 0, 0, 1, 3) -#define SYS_ID_MMFR4_EL1 sys_reg(3, 0, 0, 2, 6) #define SYS_ID_MMFR5_EL1 sys_reg(3, 0, 0, 3, 6) #define SYS_ID_ISAR0_EL1 sys_reg(3, 0, 0, 2, 0) @@ -727,15 +726,6 @@ #define ID_ISAR6_EL1_DP_SHIFT 4 #define ID_ISAR6_EL1_JSCVT_SHIFT 0 -#define ID_MMFR4_EL1_EVT_SHIFT 28 -#define ID_MMFR4_EL1_CCIDX_SHIFT 24 -#define ID_MMFR4_EL1_LSM_SHIFT 20 -#define ID_MMFR4_EL1_HPDS_SHIFT 16 -#define ID_MMFR4_EL1_CnP_SHIFT 12 -#define ID_MMFR4_EL1_XNX_SHIFT 8 -#define ID_MMFR4_EL1_AC2_SHIFT 4 -#define ID_MMFR4_EL1_SpecSEI_SHIFT 0 - #define ID_MMFR5_EL1_ETS_SHIFT 0 #define ID_PFR0_EL1_DIT_SHIFT 24 diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 7573312ef249..7cd258ee7a0f 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -223,6 +223,44 @@ Enum 3:0 CMaintVA EndEnum EndSysreg +Sysreg ID_MMFR4_EL1 3 0 0 2 6 +Res0 63:32 +Enum 31:28 EVT + 0b0000 NI + 0b0001 NO_TLBIS + 0b0010 TLBIS +EndEnum +Enum 27:24 CCIDX + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 23:20 LSM + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 19:16 HPDS + 0b0000 NI + 0b0001 AA32HPD + 0b0010 HPDS2 +EndEnum +Enum 15:12 CnP + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 11:8 XNX + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 7:4 AC2 + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 3:0 SpecSEI + 0b0000 NI + 0b0001 IMP +EndEnum +EndSysreg + Sysreg ID_AA64PFR0_EL1 3 0 0 4 0 Enum 63:60 CSV3 0b0000 NI -- cgit v1.2.3 From 258a96b25a9d2ee54171653b801637edc3cc0b7f Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:21 +0000 Subject: arm64/sysreg: Convert ID_ISAR0_EL1 to automatic generation Convert ID_ISAR0_EL1 to be automatically generated as per DDI0487I.a, no functional changes. Reviewed-by: Mark Brown Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-23-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 9 --------- arch/arm64/tools/sysreg | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index b9fdb1e0a451..758f1c139c25 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -173,7 +173,6 @@ #define SYS_ID_AFR0_EL1 sys_reg(3, 0, 0, 1, 3) #define SYS_ID_MMFR5_EL1 sys_reg(3, 0, 0, 3, 6) -#define SYS_ID_ISAR0_EL1 sys_reg(3, 0, 0, 2, 0) #define SYS_ID_ISAR1_EL1 sys_reg(3, 0, 0, 2, 1) #define SYS_ID_ISAR2_EL1 sys_reg(3, 0, 0, 2, 2) #define SYS_ID_ISAR3_EL1 sys_reg(3, 0, 0, 2, 3) @@ -703,14 +702,6 @@ #define ID_DFR1_EL1_MTPMU_SHIFT 0 -#define ID_ISAR0_EL1_Divide_SHIFT 24 -#define ID_ISAR0_EL1_Debug_SHIFT 20 -#define ID_ISAR0_EL1_Coproc_SHIFT 16 -#define ID_ISAR0_EL1_CmpBranch_SHIFT 12 -#define ID_ISAR0_EL1_BitField_SHIFT 8 -#define ID_ISAR0_EL1_BitCount_SHIFT 4 -#define ID_ISAR0_EL1_Swap_SHIFT 0 - #define ID_ISAR5_EL1_RDM_SHIFT 24 #define ID_ISAR5_EL1_CRC32_SHIFT 16 #define ID_ISAR5_EL1_SHA2_SHIFT 12 diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 7cd258ee7a0f..bc1133af1746 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -223,6 +223,42 @@ Enum 3:0 CMaintVA EndEnum EndSysreg +Sysreg ID_ISAR0_EL1 3 0 0 2 0 +Res0 63:28 +Enum 27:24 Divide + 0b0000 NI + 0b0001 xDIV_T32 + 0b0010 xDIV_A32 +EndEnum +Enum 23:20 Debug + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 19:16 Coproc + 0b0000 NI + 0b0001 MRC + 0b0010 MRC2 + 0b0011 MRRC + 0b0100 MRRC2 +EndEnum +Enum 15:12 CmpBranch + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 11:8 BitField + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 7:4 BitCount + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 3:0 Swap + 0b0000 NI + 0b0001 IMP +EndEnum +EndSysreg + Sysreg ID_MMFR4_EL1 3 0 0 2 6 Res0 63:32 Enum 31:28 EVT -- cgit v1.2.3 From 892386a6a8073aa880ab4b89e0fe421fa429b7bb Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:22 +0000 Subject: arm64/sysreg: Convert ID_ISAR1_EL1 to automatic generation Convert ID_ISAR1_EL1 to be automatically generated as per DDI0487I.a, no functional changes. Reviewed-by: Mark Brown Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-24-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 1 - arch/arm64/tools/sysreg | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 758f1c139c25..d15edee3b1da 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -173,7 +173,6 @@ #define SYS_ID_AFR0_EL1 sys_reg(3, 0, 0, 1, 3) #define SYS_ID_MMFR5_EL1 sys_reg(3, 0, 0, 3, 6) -#define SYS_ID_ISAR1_EL1 sys_reg(3, 0, 0, 2, 1) #define SYS_ID_ISAR2_EL1 sys_reg(3, 0, 0, 2, 2) #define SYS_ID_ISAR3_EL1 sys_reg(3, 0, 0, 2, 3) #define SYS_ID_ISAR4_EL1 sys_reg(3, 0, 0, 2, 4) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index bc1133af1746..d7c7949be47f 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -259,6 +259,45 @@ Enum 3:0 Swap EndEnum EndSysreg +Sysreg ID_ISAR1_EL1 3 0 0 2 1 +Res0 63:32 +Enum 31:28 Jazelle + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 27:24 Interwork + 0b0000 NI + 0b0001 BX + 0b0010 BLX + 0b0011 A32_BX +EndEnum +Enum 23:20 Immediate + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 19:16 IfThen + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 15:12 Extend + 0b0000 NI + 0b0001 SXTB + 0b0010 SXTB16 +EndEnum +Enum 11:8 Except_AR + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 7:4 Except + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 3:0 Endian + 0b0000 NI + 0b0001 IMP +EndEnum +EndSysreg + Sysreg ID_MMFR4_EL1 3 0 0 2 6 Res0 63:32 Enum 31:28 EVT -- cgit v1.2.3 From dfa70ae8d8c2b24ecb55b8877f693f9b0a13fb95 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:23 +0000 Subject: arm64/sysreg: Convert ID_ISAR2_EL1 to automatic generation Convert ID_ISAR2_EL1 to be automatically generated as per DDI0487I.a, no functional changes. Reviewed-by: Mark Brown Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-25-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 1 - arch/arm64/tools/sysreg | 46 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index d15edee3b1da..f62d21a00da4 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -173,7 +173,6 @@ #define SYS_ID_AFR0_EL1 sys_reg(3, 0, 0, 1, 3) #define SYS_ID_MMFR5_EL1 sys_reg(3, 0, 0, 3, 6) -#define SYS_ID_ISAR2_EL1 sys_reg(3, 0, 0, 2, 2) #define SYS_ID_ISAR3_EL1 sys_reg(3, 0, 0, 2, 3) #define SYS_ID_ISAR4_EL1 sys_reg(3, 0, 0, 2, 4) #define SYS_ID_ISAR5_EL1 sys_reg(3, 0, 0, 2, 5) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index d7c7949be47f..9a7e5d0e63eb 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -298,6 +298,52 @@ Enum 3:0 Endian EndEnum EndSysreg +Sysreg ID_ISAR2_EL1 3 0 0 2 2 +Res0 63:32 +Enum 31:28 Reversal + 0b0000 NI + 0b0001 REV + 0b0010 RBIT +EndEnum +Enum 27:24 PSR_AR + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 23:20 MultU + 0b0000 NI + 0b0001 UMULL + 0b0010 UMAAL +EndEnum +Enum 19:16 MultS + 0b0000 NI + 0b0001 SMULL + 0b0010 SMLABB + 0b0011 SMLAD +EndEnum +Enum 15:12 Mult + 0b0000 NI + 0b0001 MLA + 0b0010 MLS +EndEnum +Enum 11:8 MultiAccessInt + 0b0000 NI + 0b0001 RESTARTABLE + 0b0010 CONTINUABLE +EndEnum +Enum 7:4 MemHint + 0b0000 NI + 0b0001 PLD + 0b0010 PLD2 + 0b0011 PLI + 0b0100 PLDW +EndEnum +Enum 3:0 LoadStore + 0b0000 NI + 0b0001 DOUBLE + 0b0010 ACQUIRE +EndEnum +EndSysreg + Sysreg ID_MMFR4_EL1 3 0 0 2 6 Res0 63:32 Enum 31:28 EVT -- cgit v1.2.3 From d07016c965300d5872a751b4f45a8934f576c9ca Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:24 +0000 Subject: arm64/sysreg: Convert ID_ISAR3_EL1 to automatic generation Convert ID_ISAR3_EL1 to be automatically generated as per DDI0487I.a, no functional changes. Reviewed-by: Mark Brown Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-26-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 1 - arch/arm64/tools/sysreg | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index f62d21a00da4..bfd69abf1bf7 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -173,7 +173,6 @@ #define SYS_ID_AFR0_EL1 sys_reg(3, 0, 0, 1, 3) #define SYS_ID_MMFR5_EL1 sys_reg(3, 0, 0, 3, 6) -#define SYS_ID_ISAR3_EL1 sys_reg(3, 0, 0, 2, 3) #define SYS_ID_ISAR4_EL1 sys_reg(3, 0, 0, 2, 4) #define SYS_ID_ISAR5_EL1 sys_reg(3, 0, 0, 2, 5) #define SYS_ID_ISAR6_EL1 sys_reg(3, 0, 0, 2, 7) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 9a7e5d0e63eb..1e2d5a3b619c 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -344,6 +344,44 @@ Enum 3:0 LoadStore EndEnum EndSysreg +Sysreg ID_ISAR3_EL1 3 0 0 2 3 +Res0 63:32 +Enum 31:28 T32EE + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 27:24 TrueNOP + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 23:20 T32Copy + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 19:16 TabBranch + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 15:12 SynchPrim + 0b0000 NI + 0b0001 EXCLUSIVE + 0b0010 DOUBLE +EndEnum +Enum 11:8 SVC + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 7:4 SIMD + 0b0000 NI + 0b0001 SSAT + 0b0011 PKHBT +EndEnum +Enum 3:0 Saturate + 0b0000 NI + 0b0001 IMP +EndEnum +EndSysreg + Sysreg ID_MMFR4_EL1 3 0 0 2 6 Res0 63:32 Enum 31:28 EVT -- cgit v1.2.3 From 849cc9bd9f0ef532c208d1cc01a824dc119646e3 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:25 +0000 Subject: arm64/sysreg: Convert ID_ISAR4_EL1 to automatic generation Convert ID_ISAR4_EL1 to be automatically generated as per DDI0487I.a, no functional changes. Reviewed-by: Mark Brown Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-27-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 10 ---------- arch/arm64/tools/sysreg | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index bfd69abf1bf7..0a63c39407f2 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -173,7 +173,6 @@ #define SYS_ID_AFR0_EL1 sys_reg(3, 0, 0, 1, 3) #define SYS_ID_MMFR5_EL1 sys_reg(3, 0, 0, 3, 6) -#define SYS_ID_ISAR4_EL1 sys_reg(3, 0, 0, 2, 4) #define SYS_ID_ISAR5_EL1 sys_reg(3, 0, 0, 2, 5) #define SYS_ID_ISAR6_EL1 sys_reg(3, 0, 0, 2, 7) @@ -688,15 +687,6 @@ #define ID_DFR0_EL1_PerfMon_PMUv3p4 0x5 #define ID_DFR0_EL1_PerfMon_PMUv3p5 0x6 -#define ID_ISAR4_EL1_SWP_frac_SHIFT 28 -#define ID_ISAR4_EL1_PSR_M_SHIFT 24 -#define ID_ISAR4_EL1_SynchPrim_frac_SHIFT 20 -#define ID_ISAR4_EL1_Barrier_SHIFT 16 -#define ID_ISAR4_EL1_SMC_SHIFT 12 -#define ID_ISAR4_EL1_Writeback_SHIFT 8 -#define ID_ISAR4_EL1_WithShifts_SHIFT 4 -#define ID_ISAR4_EL1_Unpriv_SHIFT 0 - #define ID_DFR1_EL1_MTPMU_SHIFT 0 #define ID_ISAR5_EL1_RDM_SHIFT 24 diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 1e2d5a3b619c..ed88cc1ae2a8 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -382,6 +382,45 @@ Enum 3:0 Saturate EndEnum EndSysreg +Sysreg ID_ISAR4_EL1 3 0 0 2 4 +Res0 63:32 +Enum 31:28 SWP_frac + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 27:24 PSR_M + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 23:20 SynchPrim_frac + 0b0000 NI + 0b0011 IMP +EndEnum +Enum 19:16 Barrier + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 15:12 SMC + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 11:8 Writeback + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 7:4 WithShifts + 0b0000 NI + 0b0001 LSL3 + 0b0011 LS + 0b0100 REG +EndEnum +Enum 3:0 Unpriv + 0b0000 NI + 0b0001 REG_BYTE + 0b0010 SIGNED_HALFWORD +EndEnum +EndSysreg + Sysreg ID_MMFR4_EL1 3 0 0 2 6 Res0 63:32 Enum 31:28 EVT -- cgit v1.2.3 From f4e9ce12dd88d33c25019e2053ade587d7b95969 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:26 +0000 Subject: arm64/sysreg: Convert ID_ISAR5_EL1 to automatic generation Convert ID_ISAR5_EL1 to be automatically generated as per DDI0487I.a, no functional changes. Reviewed-by: Mark Brown Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-28-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 8 -------- arch/arm64/tools/sysreg | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 0a63c39407f2..04a6e44427a9 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -173,7 +173,6 @@ #define SYS_ID_AFR0_EL1 sys_reg(3, 0, 0, 1, 3) #define SYS_ID_MMFR5_EL1 sys_reg(3, 0, 0, 3, 6) -#define SYS_ID_ISAR5_EL1 sys_reg(3, 0, 0, 2, 5) #define SYS_ID_ISAR6_EL1 sys_reg(3, 0, 0, 2, 7) #define SYS_MVFR0_EL1 sys_reg(3, 0, 0, 3, 0) @@ -689,13 +688,6 @@ #define ID_DFR1_EL1_MTPMU_SHIFT 0 -#define ID_ISAR5_EL1_RDM_SHIFT 24 -#define ID_ISAR5_EL1_CRC32_SHIFT 16 -#define ID_ISAR5_EL1_SHA2_SHIFT 12 -#define ID_ISAR5_EL1_SHA1_SHIFT 8 -#define ID_ISAR5_EL1_AES_SHIFT 4 -#define ID_ISAR5_EL1_SEVL_SHIFT 0 - #define ID_ISAR6_EL1_I8MM_SHIFT 24 #define ID_ISAR6_EL1_BF16_SHIFT 20 #define ID_ISAR6_EL1_SPECRES_SHIFT 16 diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index ed88cc1ae2a8..43e765a2c68f 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -421,6 +421,40 @@ Enum 3:0 Unpriv EndEnum EndSysreg +Sysreg ID_ISAR5_EL1 3 0 0 2 5 +Res0 63:32 +Enum 31:28 VCMA + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 27:24 RDM + 0b0000 NI + 0b0001 IMP +EndEnum +Res0 23:20 +Enum 19:16 CRC32 + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 15:12 SHA2 + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 11:8 SHA1 + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 7:4 AES + 0b0000 NI + 0b0001 IMP + 0b0010 VMULL +EndEnum +Enum 3:0 SEVL + 0b0000 NI + 0b0001 IMP +EndEnum +EndSysreg + Sysreg ID_MMFR4_EL1 3 0 0 2 6 Res0 63:32 Enum 31:28 EVT -- cgit v1.2.3 From 5ea58a1b5c7a3830322e6921f9e415968ee2af54 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:27 +0000 Subject: arm64/sysreg: Convert ID_ISAR6_EL1 to automatic generation Convert ID_ISAR6_EL1 to be automatically generated as per DDI0487I.a, no functional changes. Reviewed-by: Mark Brown Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-29-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 10 ---------- arch/arm64/tools/sysreg | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 04a6e44427a9..03f38890cb2b 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -173,8 +173,6 @@ #define SYS_ID_AFR0_EL1 sys_reg(3, 0, 0, 1, 3) #define SYS_ID_MMFR5_EL1 sys_reg(3, 0, 0, 3, 6) -#define SYS_ID_ISAR6_EL1 sys_reg(3, 0, 0, 2, 7) - #define SYS_MVFR0_EL1 sys_reg(3, 0, 0, 3, 0) #define SYS_MVFR1_EL1 sys_reg(3, 0, 0, 3, 1) #define SYS_MVFR2_EL1 sys_reg(3, 0, 0, 3, 2) @@ -688,14 +686,6 @@ #define ID_DFR1_EL1_MTPMU_SHIFT 0 -#define ID_ISAR6_EL1_I8MM_SHIFT 24 -#define ID_ISAR6_EL1_BF16_SHIFT 20 -#define ID_ISAR6_EL1_SPECRES_SHIFT 16 -#define ID_ISAR6_EL1_SB_SHIFT 12 -#define ID_ISAR6_EL1_FHM_SHIFT 8 -#define ID_ISAR6_EL1_DP_SHIFT 4 -#define ID_ISAR6_EL1_JSCVT_SHIFT 0 - #define ID_MMFR5_EL1_ETS_SHIFT 0 #define ID_PFR0_EL1_DIT_SHIFT 24 diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 43e765a2c68f..aa6b3f5316f0 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -455,6 +455,38 @@ Enum 3:0 SEVL EndEnum EndSysreg +Sysreg ID_ISAR6_EL1 3 0 0 2 7 +Res0 63:28 +Enum 27:24 I8MM + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 23:20 BF16 + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 19:16 SPECRES + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 15:12 SB + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 11:8 FHM + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 7:4 DP + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 3:0 JSCVT + 0b0000 NI + 0b0001 IMP +EndEnum +EndSysreg + Sysreg ID_MMFR4_EL1 3 0 0 2 6 Res0 63:32 Enum 31:28 EVT -- cgit v1.2.3 From fb0b8d1a24d8707e93b2338bf233df5a904cbdf6 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:28 +0000 Subject: arm64/sysreg: Convert ID_PFR0_EL1 to automatic generation Convert ID_PFR0_EL1 to be automatically generated as per DDI0487I.a, no functional changes. Signed-off-by: James Morse Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20221130171637.718182-30-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 8 -------- arch/arm64/tools/sysreg | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 03f38890cb2b..72b32e179b46 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -165,7 +165,6 @@ #define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5) #define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6) -#define SYS_ID_PFR0_EL1 sys_reg(3, 0, 0, 1, 0) #define SYS_ID_PFR1_EL1 sys_reg(3, 0, 0, 1, 1) #define SYS_ID_PFR2_EL1 sys_reg(3, 0, 0, 3, 4) #define SYS_ID_DFR0_EL1 sys_reg(3, 0, 0, 1, 2) @@ -688,13 +687,6 @@ #define ID_MMFR5_EL1_ETS_SHIFT 0 -#define ID_PFR0_EL1_DIT_SHIFT 24 -#define ID_PFR0_EL1_CSV2_SHIFT 16 -#define ID_PFR0_EL1_State3_SHIFT 12 -#define ID_PFR0_EL1_State2_SHIFT 8 -#define ID_PFR0_EL1_State1_SHIFT 4 -#define ID_PFR0_EL1_State0_SHIFT 0 - #define ID_DFR0_EL1_PerfMon_SHIFT 24 #define ID_DFR0_EL1_MProfDbg_SHIFT 20 #define ID_DFR0_EL1_MMapTrc_SHIFT 16 diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index aa6b3f5316f0..641bd7e3365c 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -46,6 +46,47 @@ # feature that introduces them (eg, FEAT_LS64_ACCDATA introduces enumeration # item ACCDATA) though it may be more taseful to do something else. +Sysreg ID_PFR0_EL1 3 0 0 1 0 +Res0 63:32 +Enum 31:28 RAS + 0b0000 NI + 0b0001 RAS + 0b0010 RASv1p1 +EndEnum +Enum 27:24 DIT + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 23:20 AMU + 0b0000 NI + 0b0001 AMUv1 + 0b0010 AMUv1p1 +EndEnum +Enum 19:16 CSV2 + 0b0000 UNDISCLOSED + 0b0001 IMP + 0b0010 CSV2p1 +EndEnum +Enum 15:12 State3 + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 11:8 State2 + 0b0000 NI + 0b0001 NO_CV + 0b0010 CV +EndEnum +Enum 7:4 State1 + 0b0000 NI + 0b0001 THUMB + 0b0010 THUMB2 +EndEnum +Enum 3:0 State0 + 0b0000 NI + 0b0001 IMP +EndEnum +EndSysreg + Sysreg ID_MMFR0_EL1 3 0 0 1 4 Res0 63:32 Enum 31:28 InnerShr -- cgit v1.2.3 From 1224308075f17dc42dc96043dfc031e221b87c98 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:29 +0000 Subject: arm64/sysreg: Convert ID_PFR1_EL1 to automatic generation Convert ID_PFR1_EL1 to be automatically generated as per DDI0487I.a, no functional changes. Reviewed-by: Mark Brown Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-31-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 10 ---------- arch/arm64/tools/sysreg | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 72b32e179b46..85197dd180e0 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -165,7 +165,6 @@ #define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5) #define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6) -#define SYS_ID_PFR1_EL1 sys_reg(3, 0, 0, 1, 1) #define SYS_ID_PFR2_EL1 sys_reg(3, 0, 0, 3, 4) #define SYS_ID_DFR0_EL1 sys_reg(3, 0, 0, 1, 2) #define SYS_ID_DFR1_EL1 sys_reg(3, 0, 0, 3, 5) @@ -716,15 +715,6 @@ #define MVFR1_EL1_FPDNaN_SHIFT 4 #define MVFR1_EL1_FPFtZ_SHIFT 0 -#define ID_PFR1_EL1_GIC_SHIFT 28 -#define ID_PFR1_EL1_Virt_frac_SHIFT 24 -#define ID_PFR1_EL1_Sec_frac_SHIFT 20 -#define ID_PFR1_EL1_GenTimer_SHIFT 16 -#define ID_PFR1_EL1_Virtualization_SHIFT 12 -#define ID_PFR1_EL1_MProgMod_SHIFT 8 -#define ID_PFR1_EL1_Security_SHIFT 4 -#define ID_PFR1_EL1_ProgMod_SHIFT 0 - #if defined(CONFIG_ARM64_4K_PAGES) #define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN4_SHIFT #define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 641bd7e3365c..e86193b295e4 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -87,6 +87,46 @@ Enum 3:0 State0 EndEnum EndSysreg +Sysreg ID_PFR1_EL1 3 0 0 1 1 +Res0 63:32 +Enum 31:28 GIC + 0b0000 NI + 0b0001 GICv3 + 0b0010 GICv4p1 +EndEnum +Enum 27:24 Virt_frac + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 23:20 Sec_frac + 0b0000 NI + 0b0001 WALK_DISABLE + 0b0010 SECURE_MEMORY +EndEnum +Enum 19:16 GenTimer + 0b0000 NI + 0b0001 IMP + 0b0010 ECV +EndEnum +Enum 15:12 Virtualization + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 11:8 MProgMod + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 7:4 Security + 0b0000 NI + 0b0001 EL3 + 0b0001 NSACR_RFR +EndEnum +Enum 3:0 ProgMod + 0b0000 NI + 0b0001 IMP +EndEnum +EndSysreg + Sysreg ID_MMFR0_EL1 3 0 0 1 4 Res0 63:32 Enum 31:28 InnerShr -- cgit v1.2.3 From 039d372305fff8aa7dc22774c80637d57775eee6 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:30 +0000 Subject: arm64/sysreg: Convert ID_PFR2_EL1 to automatic generation Convert ID_PFR2_EL1 to be automatically generated as per DDI0487I.a, no functional changes. Reviewed-by: Mark Brown Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-32-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 4 ---- arch/arm64/tools/sysreg | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 85197dd180e0..ccb64dc09a4e 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -165,7 +165,6 @@ #define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5) #define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6) -#define SYS_ID_PFR2_EL1 sys_reg(3, 0, 0, 3, 4) #define SYS_ID_DFR0_EL1 sys_reg(3, 0, 0, 1, 2) #define SYS_ID_DFR1_EL1 sys_reg(3, 0, 0, 3, 5) #define SYS_ID_AFR0_EL1 sys_reg(3, 0, 0, 1, 3) @@ -694,9 +693,6 @@ #define ID_DFR0_EL1_CopSDbg_SHIFT 4 #define ID_DFR0_EL1_CopDbg_SHIFT 0 -#define ID_PFR2_EL1_SSBS_SHIFT 4 -#define ID_PFR2_EL1_CSV3_SHIFT 0 - #define MVFR0_EL1_FPRound_SHIFT 28 #define MVFR0_EL1_FPShVec_SHIFT 24 #define MVFR0_EL1_FPSqrt_SHIFT 20 diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index e86193b295e4..667428a89578 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -606,6 +606,22 @@ Enum 3:0 SpecSEI EndEnum EndSysreg +Sysreg ID_PFR2_EL1 3 0 0 3 4 +Res0 63:12 +Enum 11:8 RAS_frac + 0b0000 NI + 0b0001 RASv1p1 +EndEnum +Enum 7:4 SSBS + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 3:0 CSV3 + 0b0000 NI + 0b0001 IMP +EndEnum +EndSysreg + Sysreg ID_AA64PFR0_EL1 3 0 0 4 0 Enum 63:60 CSV3 0b0000 NI -- cgit v1.2.3 From e79c94a2a487515aeb1557b6d3e540ae5f66a67a Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:31 +0000 Subject: arm64/sysreg: Convert MVFR0_EL1 to automatic generation Convert MVFR0_EL1 to be automatically generated as per DDI0487I.a, no functional changes. Reviewed-by: Mark Brown Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-33-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 10 ---------- arch/arm64/tools/sysreg | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index ccb64dc09a4e..561968f7b66d 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -170,7 +170,6 @@ #define SYS_ID_AFR0_EL1 sys_reg(3, 0, 0, 1, 3) #define SYS_ID_MMFR5_EL1 sys_reg(3, 0, 0, 3, 6) -#define SYS_MVFR0_EL1 sys_reg(3, 0, 0, 3, 0) #define SYS_MVFR1_EL1 sys_reg(3, 0, 0, 3, 1) #define SYS_MVFR2_EL1 sys_reg(3, 0, 0, 3, 2) @@ -693,15 +692,6 @@ #define ID_DFR0_EL1_CopSDbg_SHIFT 4 #define ID_DFR0_EL1_CopDbg_SHIFT 0 -#define MVFR0_EL1_FPRound_SHIFT 28 -#define MVFR0_EL1_FPShVec_SHIFT 24 -#define MVFR0_EL1_FPSqrt_SHIFT 20 -#define MVFR0_EL1_FPDivide_SHIFT 16 -#define MVFR0_EL1_FPTrap_SHIFT 12 -#define MVFR0_EL1_FPDP_SHIFT 8 -#define MVFR0_EL1_FPSP_SHIFT 4 -#define MVFR0_EL1_SIMDReg_SHIFT 0 - #define MVFR1_EL1_SIMDFMAC_SHIFT 28 #define MVFR1_EL1_FPHP_SHIFT 24 #define MVFR1_EL1_SIMDHP_SHIFT 20 diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 667428a89578..7a56a9a0efdf 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -606,6 +606,45 @@ Enum 3:0 SpecSEI EndEnum EndSysreg +Sysreg MVFR0_EL1 3 0 0 3 0 +Res0 63:32 +Enum 31:28 FPRound + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 27:24 FPShVec + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 23:20 FPSqrt + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 19:16 FPDivide + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 15:12 FPTrap + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 11:8 FPDP + 0b0000 NI + 0b0001 VFPv2 + 0b0001 VFPv3 +EndEnum +Enum 7:4 FPSP + 0b0000 NI + 0b0001 VFPv2 + 0b0001 VFPv3 +EndEnum +Enum 3:0 SIMDReg + 0b0000 NI + 0b0001 IMP_16x64 + 0b0001 IMP_32x64 +EndEnum +EndSysreg + Sysreg ID_PFR2_EL1 3 0 0 3 4 Res0 63:12 Enum 11:8 RAS_frac -- cgit v1.2.3 From c9b718eda706179310821daf48caca7da7918a10 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:32 +0000 Subject: arm64/sysreg: Convert MVFR1_EL1 to automatic generation Convert MVFR1_EL1 to be automatically generated as per DDI0487I.a, no functional changes. Reviewed-by: Mark Brown Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-34-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 10 ---------- arch/arm64/tools/sysreg | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 561968f7b66d..d4b787d30fe8 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -170,7 +170,6 @@ #define SYS_ID_AFR0_EL1 sys_reg(3, 0, 0, 1, 3) #define SYS_ID_MMFR5_EL1 sys_reg(3, 0, 0, 3, 6) -#define SYS_MVFR1_EL1 sys_reg(3, 0, 0, 3, 1) #define SYS_MVFR2_EL1 sys_reg(3, 0, 0, 3, 2) #define SYS_ACTLR_EL1 sys_reg(3, 0, 1, 0, 1) @@ -692,15 +691,6 @@ #define ID_DFR0_EL1_CopSDbg_SHIFT 4 #define ID_DFR0_EL1_CopDbg_SHIFT 0 -#define MVFR1_EL1_SIMDFMAC_SHIFT 28 -#define MVFR1_EL1_FPHP_SHIFT 24 -#define MVFR1_EL1_SIMDHP_SHIFT 20 -#define MVFR1_EL1_SIMDSP_SHIFT 16 -#define MVFR1_EL1_SIMDInt_SHIFT 12 -#define MVFR1_EL1_SIMDLS_SHIFT 8 -#define MVFR1_EL1_FPDNaN_SHIFT 4 -#define MVFR1_EL1_FPFtZ_SHIFT 0 - #if defined(CONFIG_ARM64_4K_PAGES) #define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN4_SHIFT #define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 7a56a9a0efdf..3ca7e61265fd 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -645,6 +645,45 @@ Enum 3:0 SIMDReg EndEnum EndSysreg +Sysreg MVFR1_EL1 3 0 0 3 1 +Res0 63:32 +Enum 31:28 SIMDFMAC + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 27:24 FPHP + 0b0000 NI + 0b0001 FPHP + 0b0010 FPHP_CONV + 0b0011 FP16 +EndEnum +Enum 23:20 SIMDHP + 0b0000 NI + 0b0001 SIMDHP + 0b0001 SIMDHP_FLOAT +EndEnum +Enum 19:16 SIMDSP + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 15:12 SIMDInt + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 11:8 SIMDLS + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 7:4 FPDNaN + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 3:0 FPFtZ + 0b0000 NI + 0b0001 IMP +EndEnum +EndSysreg + Sysreg ID_PFR2_EL1 3 0 0 3 4 Res0 63:12 Enum 11:8 RAS_frac -- cgit v1.2.3 From f70a810e01b2ad7eaad3470be1cf4220249bb251 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:33 +0000 Subject: arm64/sysreg: Convert MVFR2_EL1 to automatic generation Convert MVFR2_EL1 to be automatically generated as per DDI0487I.a, no functional changes. Reviewed-by: Mark Brown Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-35-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 5 ----- arch/arm64/tools/sysreg | 17 +++++++++++++++++ 2 files changed, 17 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index d4b787d30fe8..aabc6db300fa 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -170,8 +170,6 @@ #define SYS_ID_AFR0_EL1 sys_reg(3, 0, 0, 1, 3) #define SYS_ID_MMFR5_EL1 sys_reg(3, 0, 0, 3, 6) -#define SYS_MVFR2_EL1 sys_reg(3, 0, 0, 3, 2) - #define SYS_ACTLR_EL1 sys_reg(3, 0, 1, 0, 1) #define SYS_RGSR_EL1 sys_reg(3, 0, 1, 0, 5) #define SYS_GCR_EL1 sys_reg(3, 0, 1, 0, 6) @@ -708,9 +706,6 @@ #define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN64_2_SHIFT #endif -#define MVFR2_EL1_FPMisc_SHIFT 4 -#define MVFR2_EL1_SIMDMisc_SHIFT 0 - #define CPACR_EL1_FPEN_EL1EN (BIT(20)) /* enable EL1 access */ #define CPACR_EL1_FPEN_EL0EN (BIT(21)) /* enable EL0 access, if EL1EN set */ diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 3ca7e61265fd..f0481f626405 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -684,6 +684,23 @@ Enum 3:0 FPFtZ EndEnum EndSysreg +Sysreg MVFR2_EL1 3 0 0 3 2 +Res0 63:8 +Enum 7:4 FPMisc + 0b0000 NI + 0b0001 FP + 0b0010 FP_DIRECTED_ROUNDING + 0b0011 FP_ROUNDING + 0b0100 FP_MAX_MIN +EndEnum +Enum 3:0 SIMDMisc + 0b0000 NI + 0b0001 SIMD_DIRECTED_ROUNDING + 0b0010 SIMD_ROUNDING + 0b0011 SIMD_MAX_MIN +EndEnum +EndSysreg + Sysreg ID_PFR2_EL1 3 0 0 3 4 Res0 63:12 Enum 11:8 RAS_frac -- cgit v1.2.3 From 8a950efa1ff0393967d79428b16af88193591ab3 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:34 +0000 Subject: arm64/sysreg: Convert ID_MMFR5_EL1 to automatic generation Convert ID_MMFR5_EL1 to be automatically generated as per DDI0487I.a, no functional changes. Reviewed-by: Mark Brown Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-36-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 3 --- arch/arm64/tools/sysreg | 12 ++++++++++++ 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index aabc6db300fa..25e96aa84ae8 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -168,7 +168,6 @@ #define SYS_ID_DFR0_EL1 sys_reg(3, 0, 0, 1, 2) #define SYS_ID_DFR1_EL1 sys_reg(3, 0, 0, 3, 5) #define SYS_ID_AFR0_EL1 sys_reg(3, 0, 0, 1, 3) -#define SYS_ID_MMFR5_EL1 sys_reg(3, 0, 0, 3, 6) #define SYS_ACTLR_EL1 sys_reg(3, 0, 1, 0, 1) #define SYS_RGSR_EL1 sys_reg(3, 0, 1, 0, 5) @@ -679,8 +678,6 @@ #define ID_DFR1_EL1_MTPMU_SHIFT 0 -#define ID_MMFR5_EL1_ETS_SHIFT 0 - #define ID_DFR0_EL1_PerfMon_SHIFT 24 #define ID_DFR0_EL1_MProfDbg_SHIFT 20 #define ID_DFR0_EL1_MMapTrc_SHIFT 16 diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index f0481f626405..e83816c175a4 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -717,6 +717,18 @@ Enum 3:0 CSV3 EndEnum EndSysreg +Sysreg ID_MMFR5_EL1 3 0 0 3 6 +Res0 63:8 +Enum 7:4 nTLBPA + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 3:0 ETS + 0b0000 NI + 0b0001 IMP +EndEnum +EndSysreg + Sysreg ID_AA64PFR0_EL1 3 0 0 4 0 Enum 63:60 CSV3 0b0000 NI -- cgit v1.2.3 From 58e010516ee63b04863de7030858670d1fc93471 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:35 +0000 Subject: arm64/sysreg: Convert ID_AFR0_EL1 to automatic generation Convert ID_AFR0_EL1 to be automatically generated as per DDI0487I.a, no functional changes. Reviewed-by: Mark Brown Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-37-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 1 - arch/arm64/tools/sysreg | 8 ++++++++ 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 25e96aa84ae8..c4d59ea7147a 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -167,7 +167,6 @@ #define SYS_ID_DFR0_EL1 sys_reg(3, 0, 0, 1, 2) #define SYS_ID_DFR1_EL1 sys_reg(3, 0, 0, 3, 5) -#define SYS_ID_AFR0_EL1 sys_reg(3, 0, 0, 1, 3) #define SYS_ACTLR_EL1 sys_reg(3, 0, 1, 0, 1) #define SYS_RGSR_EL1 sys_reg(3, 0, 1, 0, 5) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index e83816c175a4..dd853ce2226d 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -127,6 +127,14 @@ Enum 3:0 ProgMod EndEnum EndSysreg +Sysreg ID_AFR0_EL1 3 0 0 1 3 +Res0 63:16 +Field 15:12 IMPDEF3 +Field 11:8 IMPDEF2 +Field 7:4 IMPDEF1 +Field 3:0 IMPDEF0 +EndSysreg + Sysreg ID_MMFR0_EL1 3 0 0 1 4 Res0 63:32 Enum 31:28 InnerShr -- cgit v1.2.3 From d044a9fbace769c0f47b52ab8bc39f69e8a6e922 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:36 +0000 Subject: arm64/sysreg: Convert ID_DFR0_EL1 to automatic generation Convert ID_DFR0_EL1 to be automatically generated as per DDI0487I.a, no functional changes. Signed-off-by: James Morse Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20221130171637.718182-38-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 14 ------------ arch/arm64/tools/sysreg | 50 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index c4d59ea7147a..a20ab575891f 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -165,7 +165,6 @@ #define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5) #define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6) -#define SYS_ID_DFR0_EL1 sys_reg(3, 0, 0, 1, 2) #define SYS_ID_DFR1_EL1 sys_reg(3, 0, 0, 3, 5) #define SYS_ACTLR_EL1 sys_reg(3, 0, 1, 0, 1) @@ -670,21 +669,8 @@ #define ID_AA64MMFR0_EL1_PARANGE_MAX ID_AA64MMFR0_EL1_PARANGE_48 #endif -#define ID_DFR0_EL1_PerfMon_PMUv3 0x3 -#define ID_DFR0_EL1_PerfMon_PMUv3p1 0x4 -#define ID_DFR0_EL1_PerfMon_PMUv3p4 0x5 -#define ID_DFR0_EL1_PerfMon_PMUv3p5 0x6 - #define ID_DFR1_EL1_MTPMU_SHIFT 0 -#define ID_DFR0_EL1_PerfMon_SHIFT 24 -#define ID_DFR0_EL1_MProfDbg_SHIFT 20 -#define ID_DFR0_EL1_MMapTrc_SHIFT 16 -#define ID_DFR0_EL1_CopTrc_SHIFT 12 -#define ID_DFR0_EL1_MMapDbg_SHIFT 8 -#define ID_DFR0_EL1_CopSDbg_SHIFT 4 -#define ID_DFR0_EL1_CopDbg_SHIFT 0 - #if defined(CONFIG_ARM64_4K_PAGES) #define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN4_SHIFT #define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index dd853ce2226d..6ff68bc7e776 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -127,6 +127,56 @@ Enum 3:0 ProgMod EndEnum EndSysreg +Sysreg ID_DFR0_EL1 3 0 0 1 2 +Res0 63:32 +Enum 31:28 TraceFilt + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 27:24 PerfMon + 0b0000 NI + 0b0001 PMUv1 + 0b0010 PMUv2 + 0b0011 PMUv3 + 0b0100 PMUv3p1 + 0b0101 PMUv3p4 + 0b0110 PMUv3p5 + 0b0111 PMUv3p7 + 0b1000 PMUv3p8 + 0b1111 IMPDEF +EndEnum +Enum 23:20 MProfDbg + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 19:16 MMapTrc + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 15:12 CopTrc + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 11:8 MMapDbg + 0b0000 NI + 0b0100 Armv7 + 0b0101 Armv7p1 +EndEnum +Field 7:4 CopSDbg +Enum 3:0 CopDbg + 0b0000 NI + 0b0010 Armv6 + 0b0011 Armv6p1 + 0b0100 Armv7 + 0b0101 Armv7p1 + 0b0110 Armv8 + 0b0111 VHE + 0b1000 Debugv8p2 + 0b1001 Debugv8p4 + 0b1010 Debugv8p8 +EndEnum +EndSysreg + Sysreg ID_AFR0_EL1 3 0 0 1 3 Res0 63:16 Field 15:12 IMPDEF3 -- cgit v1.2.3 From fa057722978ee52f319670f4ff4f181ecfa87290 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 30 Nov 2022 17:16:37 +0000 Subject: arm64/sysreg: Convert ID_DFR1_EL1 to automatic generation Convert ID_DFR1_EL1 to be automatically generated as per DDI0487I.a, no functional changes. Reviewed-by: Mark Brown Signed-off-by: James Morse Link: https://lore.kernel.org/r/20221130171637.718182-39-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 2 -- arch/arm64/tools/sysreg | 13 +++++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index a20ab575891f..3350fcdbde08 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -165,8 +165,6 @@ #define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5) #define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6) -#define SYS_ID_DFR1_EL1 sys_reg(3, 0, 0, 3, 5) - #define SYS_ACTLR_EL1 sys_reg(3, 0, 1, 0, 1) #define SYS_RGSR_EL1 sys_reg(3, 0, 1, 0, 5) #define SYS_GCR_EL1 sys_reg(3, 0, 1, 0, 6) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 6ff68bc7e776..bd5fceb26c54 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -775,6 +775,19 @@ Enum 3:0 CSV3 EndEnum EndSysreg +Sysreg ID_DFR1_EL1 3 0 0 3 5 +Res0 63:8 +Enum 7:4 HPMN0 + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 3:0 MTPMU + 0b0000 IMPDEF + 0b0001 IMP + 0b1111 NI +EndEnum +EndSysreg + Sysreg ID_MMFR5_EL1 3 0 0 3 6 Res0 63:8 Enum 7:4 nTLBPA -- cgit v1.2.3 From acb3f4bc2108dca531f81233383e21f0bdc02267 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 1 Dec 2022 17:31:12 +0000 Subject: arm64/sysreg: Remove duplicate definitions from asm/sysreg.h With the new-fangled generation of asm/sysreg-defs.h, some definitions have ended up being duplicated between the two files. Remove these duplicate definitions, and consolidate the naming for GMID_EL1_BS_WIDTH. Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 6 ------ arch/arm64/lib/mte.S | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 3350fcdbde08..097488cee174 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -667,8 +667,6 @@ #define ID_AA64MMFR0_EL1_PARANGE_MAX ID_AA64MMFR0_EL1_PARANGE_48 #endif -#define ID_DFR1_EL1_MTPMU_SHIFT 0 - #if defined(CONFIG_ARM64_4K_PAGES) #define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN4_SHIFT #define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN @@ -719,10 +717,6 @@ #define SYS_RGSR_EL1_SEED_SHIFT 8 #define SYS_RGSR_EL1_SEED_MASK 0xffffUL -/* GMID_EL1 field definitions */ -#define GMID_EL1_BS_SHIFT 0 -#define GMID_EL1_BS_SIZE 4 - /* TFSR{,E0}_EL1 bit definitions */ #define SYS_TFSR_EL1_TF0_SHIFT 0 #define SYS_TFSR_EL1_TF1_SHIFT 1 diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S index 1b7c93ae7e63..5018ac03b6bf 100644 --- a/arch/arm64/lib/mte.S +++ b/arch/arm64/lib/mte.S @@ -18,7 +18,7 @@ */ .macro multitag_transfer_size, reg, tmp mrs_s \reg, SYS_GMID_EL1 - ubfx \reg, \reg, #GMID_EL1_BS_SHIFT, #GMID_EL1_BS_SIZE + ubfx \reg, \reg, #GMID_EL1_BS_SHIFT, #GMID_EL1_BS_WIDTH mov \tmp, #4 lsl \reg, \tmp, \reg .endm -- cgit v1.2.3 From 58ff6569bc6ec369482eb2d132868870380be64c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 5 Dec 2022 12:05:51 +0000 Subject: KVM: arm64: PMU: Fix period computation for 64bit counters with 32bit overflow Fix the bogus masking when computing the period of a 64bit counter with 32bit overflow. It really should be treated like a 32bit counter for the purpose of the period. Reported-by: Ricardo Koller Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/Y4jbosgHbUDI0WF4@google.com --- arch/arm64/kvm/pmu-emul.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index d8ea39943086..24908400e190 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -461,14 +461,10 @@ static u64 compute_period(struct kvm_pmc *pmc, u64 counter) { u64 val; - if (kvm_pmc_is_64bit(pmc)) { - if (!kvm_pmc_has_64bit_overflow(pmc)) - val = -(counter & GENMASK(31, 0)); - else - val = (-counter) & GENMASK(63, 0); - } else { + if (kvm_pmc_is_64bit(pmc) && kvm_pmc_has_64bit_overflow(pmc)) + val = (-counter) & GENMASK(63, 0); + else val = (-counter) & GENMASK(31, 0); - } return val; } -- cgit v1.2.3 From 219072c09abde0f1d0a6ce091be375e8eb7d08f0 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Mon, 5 Dec 2022 11:40:31 +0000 Subject: KVM: arm64: Fix benign bug with incorrect use of VA_BITS get_user_mapping_size() uses kvm's pgtable library to walk a user space page table created by the kernel, and in doing so, passes metadata that the library needs, including ia_bits, which defines the size of the input address. For the case where the kernel is compiled for 52 VA bits but runs on HW that does not support LVA, it will fall back to 48 VA bits at runtime. Therefore we must use vabits_actual rather than VA_BITS to get the true address size. This is benign in the current code base because the pgtable library only uses it for error checking. Fixes: 6011cf68c885 ("KVM: arm64: Walk userspace page tables to compute the THP mapping size") Signed-off-by: Ryan Roberts Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221205114031.3972780-1-ryan.roberts@arm.com --- arch/arm64/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 4efb983cff43..1ef0704420d9 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -641,7 +641,7 @@ static int get_user_mapping_size(struct kvm *kvm, u64 addr) { struct kvm_pgtable pgt = { .pgd = (kvm_pte_t *)kvm->mm->pgd, - .ia_bits = VA_BITS, + .ia_bits = vabits_actual, .start_level = (KVM_PGTABLE_MAX_LEVELS - CONFIG_PGTABLE_LEVELS), .mm_ops = &kvm_user_mm_ops, -- cgit v1.2.3