From 7d3332be011e4ed061c1403b30b5e54ebccb4fa2 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Wed, 31 May 2023 11:38:17 +0200 Subject: riscv: mm: Pre-allocate PGD entries for vmalloc/modules area MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The RISC-V port requires that kernel PGD entries are to be synchronized between MMs. This is done via the vmalloc_fault() function, that simply copies the PGD entries from init_mm to the faulting one. Historically, faulting in PGD entries have been a source for both bugs [1], and poor performance. One way to get rid of vmalloc faults is by pre-allocating the PGD entries. Pre-allocating the entries potientially wastes 64 * 4K (65 on SV39). The pre-allocation function is pulled from Jörg Rödel's x86 work, with the addition of 3-level page tables (PMD allocations). The pmd_alloc() function needs the ptlock cache to be initialized (when split page locks is enabled), so the pre-allocation is done in a RISC-V specific pgtable_cache_init() implementation. Pre-allocate the kernel PGD entries for the vmalloc/modules area, but only for 64b platforms. Link: https://lore.kernel.org/lkml/20200508144043.13893-1-joro@8bytes.org/ # [1] Signed-off-by: Björn Töpel Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20230531093817.665799-1-bjorn@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/fault.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) (limited to 'arch/riscv/mm/fault.c') diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 8685f85a7474..b023fb311e28 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -238,24 +238,12 @@ void handle_page_fault(struct pt_regs *regs) * only copy the information from the master page table, * nothing more. */ - if (unlikely((addr >= VMALLOC_START) && (addr < VMALLOC_END))) { + if ((!IS_ENABLED(CONFIG_MMU) || !IS_ENABLED(CONFIG_64BIT)) && + unlikely(addr >= VMALLOC_START && addr < VMALLOC_END)) { vmalloc_fault(regs, code, addr); return; } -#ifdef CONFIG_64BIT - /* - * Modules in 64bit kernels lie in their own virtual region which is not - * in the vmalloc region, but dealing with page faults in this region - * or the vmalloc region amounts to doing the same thing: checking that - * the mapping exists in init_mm.pgd and updating user page table, so - * just use vmalloc_fault. - */ - if (unlikely(addr >= MODULES_VADDR && addr < MODULES_END)) { - vmalloc_fault(regs, code, addr); - return; - } -#endif /* Enable interrupts if they were enabled in the parent context. */ if (!regs_irqs_disabled(regs)) local_irq_enable(); -- cgit v1.2.3 From 648321fa0d970c04b4327ac1a053abf43d285931 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Wed, 24 May 2023 00:59:42 +0800 Subject: riscv: mm: try VMA lock-based page fault handling first Attempt VMA lock-based page fault handling first, and fall back to the existing mmap_lock-based handling if that fails. A simple running the ebizzy benchmark on Lichee Pi 4A shows that PER_VMA_LOCK can improve the ebizzy benchmark by about 32.68%. In theory, the more CPUs, the bigger improvement, but I don't have any HW platform which has more than 4 CPUs. This is the riscv variant of "x86/mm: try VMA lock-based page fault handling first". Signed-off-by: Jisheng Zhang Reviewed-by: Guo Ren Reviewed-by: Suren Baghdasaryan Link: https://lore.kernel.org/r/20230523165942.2630-1-jszhang@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + arch/riscv/mm/fault.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) (limited to 'arch/riscv/mm/fault.c') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index a3d54cd14fca..a9e8b697fefb 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -44,6 +44,7 @@ config RISCV select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU select ARCH_SUPPORTS_HUGETLBFS if MMU select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU + select ARCH_SUPPORTS_PER_VMA_LOCK if MMU select ARCH_USE_MEMTEST select ARCH_USE_QUEUED_RWLOCKS select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index b023fb311e28..e52ed89a0cdb 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -274,6 +274,36 @@ void handle_page_fault(struct pt_regs *regs) flags |= FAULT_FLAG_WRITE; else if (cause == EXC_INST_PAGE_FAULT) flags |= FAULT_FLAG_INSTRUCTION; +#ifdef CONFIG_PER_VMA_LOCK + if (!(flags & FAULT_FLAG_USER)) + goto lock_mmap; + + vma = lock_vma_under_rcu(mm, addr); + if (!vma) + goto lock_mmap; + + if (unlikely(access_error(cause, vma))) { + vma_end_read(vma); + goto lock_mmap; + } + + fault = handle_mm_fault(vma, addr, flags | FAULT_FLAG_VMA_LOCK, regs); + vma_end_read(vma); + + if (!(fault & VM_FAULT_RETRY)) { + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); + goto done; + } + count_vm_vma_lock_event(VMA_LOCK_RETRY); + + if (fault_signal_pending(fault, regs)) { + if (!user_mode(regs)) + no_context(regs, addr); + return; + } +lock_mmap: +#endif /* CONFIG_PER_VMA_LOCK */ + retry: mmap_read_lock(mm); vma = find_vma(mm, addr); @@ -343,6 +373,9 @@ good_area: mmap_read_unlock(mm); +#ifdef CONFIG_PER_VMA_LOCK +done: +#endif if (unlikely(fault & VM_FAULT_ERROR)) { tsk->thread.bad_cause = cause; mm_fault_error(regs, addr, fault); -- cgit v1.2.3