From c14d08c5adb25f397638be3d8dd7f4738fb38272 Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Wed, 26 Apr 2023 17:23:19 +0000 Subject: KVM: arm64: Rename free_removed to free_unlinked Normalize on referring to tables outside of an active paging structure as 'unlinked'. A subsequent change to KVM will add support for building page tables that are not part of an active paging structure. The existing 'removed_table' terminology is quite clunky when applied in this context. Signed-off-by: Ricardo Koller Reviewed-by: Oliver Upton Reviewed-by: Shaoqin Huang Reviewed-by: Gavin Shan Link: https://lore.kernel.org/r/20230426172330.1439644-2-ricarkol@google.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/mmu.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/arm64/kvm/mmu.c') diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 3b9d4d24c361..a0d3c773af99 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -131,21 +131,21 @@ static void kvm_s2_free_pages_exact(void *virt, size_t size) static struct kvm_pgtable_mm_ops kvm_s2_mm_ops; -static void stage2_free_removed_table_rcu_cb(struct rcu_head *head) +static void stage2_free_unlinked_table_rcu_cb(struct rcu_head *head) { struct page *page = container_of(head, struct page, rcu_head); void *pgtable = page_to_virt(page); u32 level = page_private(page); - kvm_pgtable_stage2_free_removed(&kvm_s2_mm_ops, pgtable, level); + kvm_pgtable_stage2_free_unlinked(&kvm_s2_mm_ops, pgtable, level); } -static void stage2_free_removed_table(void *addr, u32 level) +static void stage2_free_unlinked_table(void *addr, u32 level) { struct page *page = virt_to_page(addr); set_page_private(page, (unsigned long)level); - call_rcu(&page->rcu_head, stage2_free_removed_table_rcu_cb); + call_rcu(&page->rcu_head, stage2_free_unlinked_table_rcu_cb); } static void kvm_host_get_page(void *addr) @@ -701,7 +701,7 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = { .zalloc_page = stage2_memcache_zalloc_page, .zalloc_pages_exact = kvm_s2_zalloc_pages_exact, .free_pages_exact = kvm_s2_free_pages_exact, - .free_removed_table = stage2_free_removed_table, + .free_unlinked_table = stage2_free_unlinked_table, .get_page = kvm_host_get_page, .put_page = kvm_s2_put_page, .page_count = kvm_host_page_count, -- cgit v1.2.3 From 2f440b72e852be428540579b5813ba2b8236578d Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Wed, 26 Apr 2023 17:23:23 +0000 Subject: KVM: arm64: Add KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE Add a capability for userspace to specify the eager split chunk size. The chunk size specifies how many pages to break at a time, using a single allocation. Bigger the chunk size, more pages need to be allocated ahead of time. Suggested-by: Oliver Upton Signed-off-by: Ricardo Koller Reviewed-by: Gavin Shan Link: https://lore.kernel.org/r/20230426172330.1439644-6-ricarkol@google.com Signed-off-by: Oliver Upton --- Documentation/virt/kvm/api.rst | 27 +++++++++++++++++++++++++++ arch/arm64/include/asm/kvm_host.h | 15 +++++++++++++++ arch/arm64/include/asm/kvm_pgtable.h | 18 ++++++++++++++++++ arch/arm64/kvm/arm.c | 28 ++++++++++++++++++++++++++++ arch/arm64/kvm/mmu.c | 4 ++++ include/uapi/linux/kvm.h | 2 ++ 6 files changed, 94 insertions(+) (limited to 'arch/arm64/kvm/mmu.c') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index add067793b90..656bd293c8f4 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -8445,6 +8445,33 @@ structure. When getting the Modified Change Topology Report value, the attr->addr must point to a byte where the value will be stored or retrieved from. +8.40 KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE +--------------------------------------- + +:Capability: KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE +:Architectures: arm64 +:Type: vm +:Parameters: arg[0] is the new split chunk size. +:Returns: 0 on success, -EINVAL if any memslot was already created. + +This capability sets the chunk size used in Eager Page Splitting. + +Eager Page Splitting improves the performance of dirty-logging (used +in live migrations) when guest memory is backed by huge-pages. It +avoids splitting huge-pages (into PAGE_SIZE pages) on fault, by doing +it eagerly when enabling dirty logging (with the +KVM_MEM_LOG_DIRTY_PAGES flag for a memory region), or when using +KVM_CLEAR_DIRTY_LOG. + +The chunk size specifies how many pages to break at a time, using a +single allocation for each chunk. Bigger the chunk size, more pages +need to be allocated ahead of time. + +The chunk size needs to be a valid block size. The list of acceptable +block sizes is exposed in KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES as a +64-bit bitmap (each bit describing a block size). The default value is +0, to disable the eager page splitting. + 9. Known KVM API problems ========================= diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 7e7e19ef6993..b743198450b3 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -159,6 +159,21 @@ struct kvm_s2_mmu { /* The last vcpu id that ran on each physical CPU */ int __percpu *last_vcpu_ran; +#define KVM_ARM_EAGER_SPLIT_CHUNK_SIZE_DEFAULT 0 + /* + * Memory cache used to split + * KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE worth of huge pages. It + * is used to allocate stage2 page tables while splitting huge + * pages. The choice of KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE + * influences both the capacity of the split page cache, and + * how often KVM reschedules. Be wary of raising CHUNK_SIZE + * too high. + * + * Protected by kvm->slots_lock. + */ + struct kvm_mmu_memory_cache split_page_cache; + uint64_t split_page_chunk_size; + struct kvm_arch *arch; }; diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index c8e0e7d9303b..cbc6971e2cb4 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -92,6 +92,24 @@ static inline bool kvm_level_supports_block_mapping(u32 level) return level >= KVM_PGTABLE_MIN_BLOCK_LEVEL; } +static inline u32 kvm_supported_block_sizes(void) +{ + u32 level = KVM_PGTABLE_MIN_BLOCK_LEVEL; + u32 r = 0; + + for (; level < KVM_PGTABLE_MAX_LEVELS; level++) + r |= BIT(kvm_granule_shift(level)); + + return r; +} + +static inline bool kvm_is_block_size_supported(u64 size) +{ + bool is_power_of_two = IS_ALIGNED(size, size); + + return is_power_of_two && (size & kvm_supported_block_sizes()); +} + /** * struct kvm_pgtable_mm_ops - Memory management callbacks. * @zalloc_page: Allocate a single zeroed memory page. diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 14391826241c..c605626801c4 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -65,6 +65,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) { int r; + u64 new_cap; if (cap->flags) return -EINVAL; @@ -89,6 +90,24 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, r = 0; set_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags); break; + case KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE: + new_cap = cap->args[0]; + + mutex_lock(&kvm->slots_lock); + /* + * To keep things simple, allow changing the chunk + * size only when no memory slots have been created. + */ + if (!kvm_are_all_memslots_empty(kvm)) { + r = -EINVAL; + } else if (new_cap && !kvm_is_block_size_supported(new_cap)) { + r = -EINVAL; + } else { + r = 0; + kvm->arch.mmu.split_page_chunk_size = new_cap; + } + mutex_unlock(&kvm->slots_lock); + break; default: r = -EINVAL; break; @@ -302,6 +321,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ARM_PTRAUTH_GENERIC: r = system_has_full_ptr_auth(); break; + case KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE: + if (kvm) + r = kvm->arch.mmu.split_page_chunk_size; + else + r = KVM_ARM_EAGER_SPLIT_CHUNK_SIZE_DEFAULT; + break; + case KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES: + r = kvm_supported_block_sizes(); + break; default: r = 0; } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index a0d3c773af99..f2d30486f755 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -775,6 +775,10 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t for_each_possible_cpu(cpu) *per_cpu_ptr(mmu->last_vcpu_ran, cpu) = -1; + /* The eager page splitting is disabled by default */ + mmu->split_page_chunk_size = KVM_ARM_EAGER_SPLIT_CHUNK_SIZE_DEFAULT; + mmu->split_page_cache.gfp_zero = __GFP_ZERO; + mmu->pgt = pgt; mmu->pgd_phys = __pa(pgt->pgd); return 0; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 737318b1c1d9..44edee0211fb 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1190,6 +1190,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225 #define KVM_CAP_PMU_EVENT_MASKED_EVENTS 226 #define KVM_CAP_COUNTER_OFFSET 227 +#define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228 +#define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 6bd92b9d8b02a67d67b6d7351ad5c81321a02017 Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Wed, 26 Apr 2023 17:23:25 +0000 Subject: KVM: arm64: Refactor kvm_arch_commit_memory_region() Refactor kvm_arch_commit_memory_region() as a preparation for a future commit to look cleaner and more understandable. Also, it looks more like its x86 counterpart (in kvm_mmu_slot_apply_flags()). Signed-off-by: Ricardo Koller Reviewed-by: Shaoqin Huang Reviewed-by: Gavin Shan Link: https://lore.kernel.org/r/20230426172330.1439644-8-ricarkol@google.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/mmu.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'arch/arm64/kvm/mmu.c') diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index f2d30486f755..8df15a174449 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1794,20 +1794,32 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_memory_slot *new, enum kvm_mr_change change) { + bool log_dirty_pages = new && new->flags & KVM_MEM_LOG_DIRTY_PAGES; + /* * At this point memslot has been committed and there is an * allocated dirty_bitmap[], dirty pages will be tracked while the * memory slot is write protected. */ - if (change != KVM_MR_DELETE && new->flags & KVM_MEM_LOG_DIRTY_PAGES) { + if (log_dirty_pages) { + + if (change == KVM_MR_DELETE) + return; + /* - * If we're with initial-all-set, we don't need to write - * protect any pages because they're all reported as dirty. - * Huge pages and normal pages will be write protect gradually. + * Pages are write-protected on either of these two + * cases: + * + * 1. with initial-all-set: gradually with CLEAR ioctls, */ - if (!kvm_dirty_log_manual_protect_and_init_set(kvm)) { - kvm_mmu_wp_memory_region(kvm, new->id); - } + if (kvm_dirty_log_manual_protect_and_init_set(kvm)) + return; + /* + * or + * 2. without initial-all-set: all in one shot when + * enabling dirty logging. + */ + kvm_mmu_wp_memory_region(kvm, new->id); } } -- cgit v1.2.3 From ce2b60223800c801b4b519c07aff3aa9c75c2b6d Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Wed, 26 Apr 2023 17:23:26 +0000 Subject: KVM: arm64: Add kvm_uninit_stage2_mmu() Add kvm_uninit_stage2_mmu() and move kvm_free_stage2_pgd() into it. A future commit will add some more things to do inside of kvm_uninit_stage2_mmu(). Signed-off-by: Ricardo Koller Reviewed-by: Shaoqin Huang Reviewed-by: Gavin Shan Link: https://lore.kernel.org/r/20230426172330.1439644-9-ricarkol@google.com Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_mmu.h | 1 + arch/arm64/kvm/mmu.c | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/arm64/kvm/mmu.c') diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 27e63c111f78..20c50e00496d 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -172,6 +172,7 @@ void __init free_hyp_pgds(void); void stage2_unmap_vm(struct kvm *kvm); int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type); +void kvm_uninit_stage2_mmu(struct kvm *kvm); void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu); int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, phys_addr_t pa, unsigned long size, bool writable); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 8df15a174449..d3fb35a002f9 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -790,6 +790,11 @@ out_free_pgtable: return err; } +void kvm_uninit_stage2_mmu(struct kvm *kvm) +{ + kvm_free_stage2_pgd(&kvm->arch.mmu); +} + static void stage2_unmap_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) { @@ -1893,7 +1898,7 @@ void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) void kvm_arch_flush_shadow_all(struct kvm *kvm) { - kvm_free_stage2_pgd(&kvm->arch.mmu); + kvm_uninit_stage2_mmu(kvm); } void kvm_arch_flush_shadow_memslot(struct kvm *kvm, -- cgit v1.2.3 From e7bf7a490c68b0b64bc05aa0a4f09f6044037db1 Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Wed, 26 Apr 2023 17:23:27 +0000 Subject: KVM: arm64: Split huge pages when dirty logging is enabled Split huge pages eagerly when enabling dirty logging. The goal is to avoid doing it while faulting on write-protected pages, which negatively impacts guest performance. A memslot marked for dirty logging is split in 1GB pieces at a time. This is in order to release the mmu_lock and give other kernel threads the opportunity to run, and also in order to allocate enough pages to split a 1GB range worth of huge pages (or a single 1GB huge page). Note that these page allocations can fail, so eager page splitting is best-effort. This is not a correctness issue though, as huge pages can still be split on write-faults. Eager page splitting only takes effect when the huge page mapping has been existing in the stage-2 page table. Otherwise, the huge page will be mapped to multiple non-huge pages on page fault. The benefits of eager page splitting are the same as in x86, added with commit a3fe5dbda0a4 ("KVM: x86/mmu: Split huge pages mapped by the TDP MMU when dirty logging is enabled"). For example, when running dirty_log_perf_test with 64 virtual CPUs (Ampere Altra), 1GB per vCPU, 50% reads, and 2MB HugeTLB memory, the time it takes vCPUs to access all of their memory after dirty logging is enabled decreased by 44% from 2.58s to 1.42s. Signed-off-by: Ricardo Koller Reviewed-by: Shaoqin Huang Reviewed-by: Gavin Shan Link: https://lore.kernel.org/r/20230426172330.1439644-10-ricarkol@google.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/mmu.c | 127 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 123 insertions(+), 4 deletions(-) (limited to 'arch/arm64/kvm/mmu.c') diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index d3fb35a002f9..a36a01426b59 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -31,14 +31,21 @@ static phys_addr_t __ro_after_init hyp_idmap_vector; static unsigned long __ro_after_init io_map_base; -static phys_addr_t stage2_range_addr_end(phys_addr_t addr, phys_addr_t end) +static phys_addr_t __stage2_range_addr_end(phys_addr_t addr, phys_addr_t end, + phys_addr_t size) { - phys_addr_t size = kvm_granule_size(KVM_PGTABLE_MIN_BLOCK_LEVEL); phys_addr_t boundary = ALIGN_DOWN(addr + size, size); return (boundary - 1 < end - 1) ? boundary : end; } +static phys_addr_t stage2_range_addr_end(phys_addr_t addr, phys_addr_t end) +{ + phys_addr_t size = kvm_granule_size(KVM_PGTABLE_MIN_BLOCK_LEVEL); + + return __stage2_range_addr_end(addr, end, size); +} + /* * Release kvm_mmu_lock periodically if the memory region is large. Otherwise, * we may see kernel panics with CONFIG_DETECT_HUNG_TASK, @@ -75,6 +82,79 @@ static int stage2_apply_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, #define stage2_apply_range_resched(mmu, addr, end, fn) \ stage2_apply_range(mmu, addr, end, fn, true) +/* + * Get the maximum number of page-tables pages needed to split a range + * of blocks into PAGE_SIZE PTEs. It assumes the range is already + * mapped at level 2, or at level 1 if allowed. + */ +static int kvm_mmu_split_nr_page_tables(u64 range) +{ + int n = 0; + + if (KVM_PGTABLE_MIN_BLOCK_LEVEL < 2) + n += DIV_ROUND_UP_ULL(range, PUD_SIZE); + n += DIV_ROUND_UP_ULL(range, PMD_SIZE); + return n; +} + +static bool need_split_memcache_topup_or_resched(struct kvm *kvm) +{ + struct kvm_mmu_memory_cache *cache; + u64 chunk_size, min; + + if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) + return true; + + chunk_size = kvm->arch.mmu.split_page_chunk_size; + min = kvm_mmu_split_nr_page_tables(chunk_size); + cache = &kvm->arch.mmu.split_page_cache; + return kvm_mmu_memory_cache_nr_free_objects(cache) < min; +} + +static int kvm_mmu_split_huge_pages(struct kvm *kvm, phys_addr_t addr, + phys_addr_t end) +{ + struct kvm_mmu_memory_cache *cache; + struct kvm_pgtable *pgt; + int ret, cache_capacity; + u64 next, chunk_size; + + lockdep_assert_held_write(&kvm->mmu_lock); + + chunk_size = kvm->arch.mmu.split_page_chunk_size; + cache_capacity = kvm_mmu_split_nr_page_tables(chunk_size); + + if (chunk_size == 0) + return 0; + + cache = &kvm->arch.mmu.split_page_cache; + + do { + if (need_split_memcache_topup_or_resched(kvm)) { + write_unlock(&kvm->mmu_lock); + cond_resched(); + /* Eager page splitting is best-effort. */ + ret = __kvm_mmu_topup_memory_cache(cache, + cache_capacity, + cache_capacity); + write_lock(&kvm->mmu_lock); + if (ret) + break; + } + + pgt = kvm->arch.mmu.pgt; + if (!pgt) + return -EINVAL; + + next = __stage2_range_addr_end(addr, end, chunk_size); + ret = kvm_pgtable_stage2_split(pgt, addr, next - addr, cache); + if (ret) + break; + } while (addr = next, addr != end); + + return ret; +} + static bool memslot_is_logging(struct kvm_memory_slot *memslot) { return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY); @@ -793,6 +873,7 @@ out_free_pgtable: void kvm_uninit_stage2_mmu(struct kvm *kvm) { kvm_free_stage2_pgd(&kvm->arch.mmu); + kvm_mmu_free_memory_cache(&kvm->arch.mmu.split_page_cache); } static void stage2_unmap_memslot(struct kvm *kvm, @@ -1019,6 +1100,34 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, stage2_wp_range(&kvm->arch.mmu, start, end); } +/** + * kvm_mmu_split_memory_region() - split the stage 2 blocks into PAGE_SIZE + * pages for memory slot + * @kvm: The KVM pointer + * @slot: The memory slot to split + * + * Acquires kvm->mmu_lock. Called with kvm->slots_lock mutex acquired, + * serializing operations for VM memory regions. + */ +static void kvm_mmu_split_memory_region(struct kvm *kvm, int slot) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + phys_addr_t start, end; + + lockdep_assert_held(&kvm->slots_lock); + + slots = kvm_memslots(kvm); + memslot = id_to_memslot(slots, slot); + + start = memslot->base_gfn << PAGE_SHIFT; + end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; + + write_lock(&kvm->mmu_lock); + kvm_mmu_split_huge_pages(kvm, start, end); + write_unlock(&kvm->mmu_lock); +} + /* * kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected * dirty pages. @@ -1812,8 +1921,8 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, return; /* - * Pages are write-protected on either of these two - * cases: + * Huge and normal pages are write-protected and split + * on either of these two cases: * * 1. with initial-all-set: gradually with CLEAR ioctls, */ @@ -1825,6 +1934,16 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, * enabling dirty logging. */ kvm_mmu_wp_memory_region(kvm, new->id); + kvm_mmu_split_memory_region(kvm, new->id); + } else { + /* + * Free any leftovers from the eager page splitting cache. Do + * this when deleting, moving, disabling dirty logging, or + * creating the memslot (a nop). Doing it for deletes makes + * sure we don't leak memory, and there's no need to keep the + * cache around for any of the other cases. + */ + kvm_mmu_free_memory_cache(&kvm->arch.mmu.split_page_cache); } } -- cgit v1.2.3 From 3005f6f29447d4f397c2ba67119fdea222ee51d3 Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Wed, 26 Apr 2023 17:23:28 +0000 Subject: KVM: arm64: Open-code kvm_mmu_write_protect_pt_masked() Move the functionality of kvm_mmu_write_protect_pt_masked() into its caller, kvm_arch_mmu_enable_log_dirty_pt_masked(). This will be used in a subsequent commit in order to share some of the code in kvm_arch_mmu_enable_log_dirty_pt_masked(). Signed-off-by: Ricardo Koller Reviewed-by: Gavin Shan Link: https://lore.kernel.org/r/20230426172330.1439644-11-ricarkol@google.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/mmu.c | 42 +++++++++++++++--------------------------- 1 file changed, 15 insertions(+), 27 deletions(-) (limited to 'arch/arm64/kvm/mmu.c') diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index a36a01426b59..272558f54101 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1078,28 +1078,6 @@ static void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot) kvm_flush_remote_tlbs(kvm); } -/** - * kvm_mmu_write_protect_pt_masked() - write protect dirty pages - * @kvm: The KVM pointer - * @slot: The memory slot associated with mask - * @gfn_offset: The gfn offset in memory slot - * @mask: The mask of dirty pages at offset 'gfn_offset' in this memory - * slot to be write protected - * - * Walks bits set in mask write protects the associated pte's. Caller must - * acquire kvm_mmu_lock. - */ -static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, - struct kvm_memory_slot *slot, - gfn_t gfn_offset, unsigned long mask) -{ - phys_addr_t base_gfn = slot->base_gfn + gfn_offset; - phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT; - phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT; - - stage2_wp_range(&kvm->arch.mmu, start, end); -} - /** * kvm_mmu_split_memory_region() - split the stage 2 blocks into PAGE_SIZE * pages for memory slot @@ -1129,17 +1107,27 @@ static void kvm_mmu_split_memory_region(struct kvm *kvm, int slot) } /* - * kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected - * dirty pages. + * kvm_arch_mmu_enable_log_dirty_pt_masked() - enable dirty logging for selected pages. + * @kvm: The KVM pointer + * @slot: The memory slot associated with mask + * @gfn_offset: The gfn offset in memory slot + * @mask: The mask of pages at offset 'gfn_offset' in this memory + * slot to enable dirty logging on * - * It calls kvm_mmu_write_protect_pt_masked to write protect selected pages to - * enable dirty logging for them. + * Writes protect selected pages to enable dirty logging for them. Caller must + * acquire kvm->mmu_lock. */ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask) { - kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask); + phys_addr_t base_gfn = slot->base_gfn + gfn_offset; + phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT; + phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT; + + lockdep_assert_held_write(&kvm->mmu_lock); + + stage2_wp_range(&kvm->arch.mmu, start, end); } static void kvm_send_hwpoison_signal(unsigned long address, short lsb) -- cgit v1.2.3 From 6acf51666d039857421b6f8a3660301c82649fa5 Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Wed, 26 Apr 2023 17:23:29 +0000 Subject: KVM: arm64: Split huge pages during KVM_CLEAR_DIRTY_LOG This is the arm64 counterpart of commit cb00a70bd4b7 ("KVM: x86/mmu: Split huge pages mapped by the TDP MMU during KVM_CLEAR_DIRTY_LOG"), which has the benefit of splitting the cost of splitting a memslot across multiple ioctls. Split huge pages on the range specified using KVM_CLEAR_DIRTY_LOG. And do not split when enabling dirty logging if KVM_DIRTY_LOG_INITIALLY_SET is set. Signed-off-by: Ricardo Koller Reviewed-by: Gavin Shan Link: https://lore.kernel.org/r/20230426172330.1439644-12-ricarkol@google.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/mmu.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'arch/arm64/kvm/mmu.c') diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 272558f54101..3386bd28d267 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1114,8 +1114,8 @@ static void kvm_mmu_split_memory_region(struct kvm *kvm, int slot) * @mask: The mask of pages at offset 'gfn_offset' in this memory * slot to enable dirty logging on * - * Writes protect selected pages to enable dirty logging for them. Caller must - * acquire kvm->mmu_lock. + * Writes protect selected pages to enable dirty logging, and then + * splits them to PAGE_SIZE. Caller must acquire kvm->mmu_lock. */ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, @@ -1128,6 +1128,17 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, lockdep_assert_held_write(&kvm->mmu_lock); stage2_wp_range(&kvm->arch.mmu, start, end); + + /* + * Eager-splitting is done when manual-protect is set. We + * also check for initially-all-set because we can avoid + * eager-splitting if initially-all-set is false. + * Initially-all-set equal false implies that huge-pages were + * already split when enabling dirty logging: no need to do it + * again. + */ + if (kvm_dirty_log_manual_protect_and_init_set(kvm)) + kvm_mmu_split_huge_pages(kvm, start, end); } static void kvm_send_hwpoison_signal(unsigned long address, short lsb) -- cgit v1.2.3 From 14c3555f055dd0819381148bf5b569cc5ba9ddfb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 17 May 2023 22:23:39 +0200 Subject: arm64: kvm: avoid overflow in integer division The newly added kvm_mmu_split_nr_page_tables() function uses DIV_ROUND_DOWN_ULL() to divide 64-bit addresses, but this requires a 32-bit divisior, and PUD_SIZE may exceed that when 64KB pages are used: arch/arm64/kvm/mmu.c: In function 'kvm_mmu_split_nr_page_tables': include/linux/math.h:42:64: error: conversion from 'long unsigned int' to 'u32' {aka 'unsigned int'} changes value from '68719476736' to '0' [-Werror=overflow] 42 | DIV_ROUND_DOWN_ULL((unsigned long long)(ll) + (d) - 1, (d)) | ^~~ include/linux/math.h:39:47: note: in definition of macro 'DIV_ROUND_DOWN_ULL' 39 | #define DIV_ROUND_DOWN_ULL(ll, d) div_u64(ll, d) | ^ arch/arm64/kvm/mmu.c:95:22: note: in expansion of macro 'DIV_ROUND_UP_ULL' 95 | n += DIV_ROUND_UP_ULL(range, PUD_SIZE); | ^~~~~~~~~~~~~~~~ Since this code is only used on 64-bit targets, DIV_ROUND_UP() can deal with this more easily, as it already takes 64-bit arguments. Fixes: e7bf7a490c68 ("KVM: arm64: Split huge pages when dirty logging is enabled") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20230517202352.793673-1-arnd@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm64/kvm/mmu.c') diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 3386bd28d267..6db9ef288ec3 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -92,8 +92,8 @@ static int kvm_mmu_split_nr_page_tables(u64 range) int n = 0; if (KVM_PGTABLE_MIN_BLOCK_LEVEL < 2) - n += DIV_ROUND_UP_ULL(range, PUD_SIZE); - n += DIV_ROUND_UP_ULL(range, PMD_SIZE); + n += DIV_ROUND_UP(range, PUD_SIZE); + n += DIV_ROUND_UP(range, PMD_SIZE); return n; } -- cgit v1.2.3