From 23baf831a32c04f9a968812511540b1b3e648bf5 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 15 Mar 2023 14:31:33 +0300 Subject: mm, treewide: redefine MAX_ORDER sanely MAX_ORDER currently defined as number of orders page allocator supports: user can ask buddy allocator for page order between 0 and MAX_ORDER-1. This definition is counter-intuitive and lead to number of bugs all over the kernel. Change the definition of MAX_ORDER to be inclusive: the range of orders user can ask from buddy allocator is 0..MAX_ORDER now. [kirill@shutemov.name: fix min() warning] Link: https://lkml.kernel.org/r/20230315153800.32wib3n5rickolvh@box [akpm@linux-foundation.org: fix another min_t warning] [kirill@shutemov.name: fixups per Zi Yan] Link: https://lkml.kernel.org/r/20230316232144.b7ic4cif4kjiabws@box.shutemov.name [akpm@linux-foundation.org: fix underlining in docs] Link: https://lore.kernel.org/oe-kbuild-all/202303191025.VRCTk6mP-lkp@intel.com/ Link: https://lkml.kernel.org/r/20230315113133.11326-11-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Reviewed-by: Michael Ellerman [powerpc] Cc: "Kirill A. Shutemov" Cc: Zi Yan Signed-off-by: Andrew Morton --- arch/arm64/Kconfig | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) (limited to 'arch/arm64/Kconfig') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1023e896d46b..cb5c6aa3254e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1476,22 +1476,22 @@ config XEN # include/linux/mmzone.h requires the following to be true: # -# MAX_ORDER - 1 + PAGE_SHIFT <= SECTION_SIZE_BITS +# MAX_ORDER + PAGE_SHIFT <= SECTION_SIZE_BITS # -# so the maximum value of MAX_ORDER is SECTION_SIZE_BITS + 1 - PAGE_SHIFT: +# so the maximum value of MAX_ORDER is SECTION_SIZE_BITS - PAGE_SHIFT: # # | SECTION_SIZE_BITS | PAGE_SHIFT | max MAX_ORDER | default MAX_ORDER | # ----+-------------------+--------------+-----------------+--------------------+ -# 4K | 27 | 12 | 16 | 11 | -# 16K | 27 | 14 | 14 | 12 | -# 64K | 29 | 16 | 14 | 14 | +# 4K | 27 | 12 | 15 | 10 | +# 16K | 27 | 14 | 13 | 11 | +# 64K | 29 | 16 | 13 | 13 | config ARCH_FORCE_MAX_ORDER int "Maximum zone order" if ARM64_4K_PAGES || ARM64_16K_PAGES - default "14" if ARM64_64K_PAGES - range 12 14 if ARM64_16K_PAGES - default "12" if ARM64_16K_PAGES - range 11 16 if ARM64_4K_PAGES - default "11" + default "13" if ARM64_64K_PAGES + range 11 13 if ARM64_16K_PAGES + default "11" if ARM64_16K_PAGES + range 10 15 if ARM64_4K_PAGES + default "10" help The kernel memory allocator divides physically contiguous memory blocks into "zones", where each zone is a power of two number of @@ -1500,14 +1500,11 @@ config ARCH_FORCE_MAX_ORDER blocks of physically contiguous memory, then you may need to increase this value. - This config option is actually maximum order plus one. For example, - a value of 11 means that the largest free memory block is 2^10 pages. - We make sure that we can allocate up to a HugePage size for each configuration. Hence we have : - MAX_ORDER = (PMD_SHIFT - PAGE_SHIFT) + 1 => PAGE_SHIFT - 2 + MAX_ORDER = PMD_SHIFT - PAGE_SHIFT => PAGE_SHIFT - 3 - However for 4K, we choose a higher default value, 11 as opposed to 10, giving us + However for 4K, we choose a higher default value, 10 as opposed to 9, giving us 4M allocations matching the default size used by generic code. config UNMAP_KERNEL_AT_EL0 -- cgit v1.2.3 From cd7f176aea5f5929a09a91c661a26912cc995d1b Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Mon, 27 Feb 2023 09:36:29 -0800 Subject: arm64/mm: try VMA lock-based page fault handling first Attempt VMA lock-based page fault handling first, and fall back to the existing mmap_lock-based handling if that fails. Link: https://lkml.kernel.org/r/20230227173632.3292573-31-surenb@google.com Signed-off-by: Suren Baghdasaryan Signed-off-by: Andrew Morton --- arch/arm64/Kconfig | 1 + arch/arm64/mm/fault.c | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) (limited to 'arch/arm64/Kconfig') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index cb5c6aa3254e..e60baf7859d1 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -95,6 +95,7 @@ config ARM64 select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_SUPPORTS_PAGE_TABLE_CHECK + select ARCH_SUPPORTS_PER_VMA_LOCK select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT select ARCH_WANT_DEFAULT_BPF_JIT select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index f4cb0f85ccf4..9e0db5c387e3 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -535,6 +535,9 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, unsigned long vm_flags; unsigned int mm_flags = FAULT_FLAG_DEFAULT; unsigned long addr = untagged_addr(far); +#ifdef CONFIG_PER_VMA_LOCK + struct vm_area_struct *vma; +#endif if (kprobe_page_fault(regs, esr)) return 0; @@ -585,6 +588,36 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); +#ifdef CONFIG_PER_VMA_LOCK + if (!(mm_flags & FAULT_FLAG_USER)) + goto lock_mmap; + + vma = lock_vma_under_rcu(mm, addr); + if (!vma) + goto lock_mmap; + + if (!(vma->vm_flags & vm_flags)) { + vma_end_read(vma); + goto lock_mmap; + } + fault = handle_mm_fault(vma, addr & PAGE_MASK, + mm_flags | FAULT_FLAG_VMA_LOCK, regs); + vma_end_read(vma); + + if (!(fault & VM_FAULT_RETRY)) { + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); + goto done; + } + count_vm_vma_lock_event(VMA_LOCK_RETRY); + + /* Quick path to respond to signals */ + if (fault_signal_pending(fault, regs)) { + if (!user_mode(regs)) + goto no_context; + return 0; + } +lock_mmap: +#endif /* CONFIG_PER_VMA_LOCK */ /* * As per x86, we may deadlock here. However, since the kernel only * validly references user space from well defined areas of the code, @@ -628,6 +661,9 @@ retry: } mmap_read_unlock(mm); +#ifdef CONFIG_PER_VMA_LOCK +done: +#endif /* * Handle the "normal" (no error) case first. */ -- cgit v1.2.3 From 34affcd7577a232803f729d1870ba475f294e4ea Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (IBM)" Date: Fri, 24 Mar 2023 08:22:21 +0300 Subject: arm64: drop ranges in definition of ARCH_FORCE_MAX_ORDER It is not a good idea to change fundamental parameters of core memory management. Having predefined ranges suggests that the values within those ranges are sensible, but one has to *really* understand implications of changing MAX_ORDER before actually amending it and ranges don't help here. Drop ranges in definition of ARCH_FORCE_MAX_ORDER and make its prompt visible only if EXPERT=y Link: https://lkml.kernel.org/r/20230324052233.2654090-3-rppt@kernel.org Signed-off-by: Mike Rapoport (IBM) Reviewed-by: Zi Yan Reviewed-by: Kefeng Wang Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Christophe Leroy Cc: David Miller Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Guo Ren Cc: John Paul Adrian Glaubitz Cc: Kirill A. Shutemov Cc: Max Filippov Cc: Michael Ellerman Cc: Rich Felker Cc: "Russell King (Oracle)" Cc: Will Deacon Cc: Yoshinori Sato Signed-off-by: Andrew Morton --- arch/arm64/Kconfig | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/arm64/Kconfig') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e60baf7859d1..7324032af859 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1487,11 +1487,9 @@ config XEN # 16K | 27 | 14 | 13 | 11 | # 64K | 29 | 16 | 13 | 13 | config ARCH_FORCE_MAX_ORDER - int "Maximum zone order" if ARM64_4K_PAGES || ARM64_16K_PAGES + int "Maximum zone order" if EXPERT && (ARM64_4K_PAGES || ARM64_16K_PAGES) default "13" if ARM64_64K_PAGES - range 11 13 if ARM64_16K_PAGES default "11" if ARM64_16K_PAGES - range 10 15 if ARM64_4K_PAGES default "10" help The kernel memory allocator divides physically contiguous memory -- cgit v1.2.3 From 4632cb22ac26e9e6e4ad651fb53f25f0b78c2889 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (IBM)" Date: Fri, 24 Mar 2023 08:22:22 +0300 Subject: arm64: reword ARCH_FORCE_MAX_ORDER prompt and help text The prompt and help text of ARCH_FORCE_MAX_ORDER are not even close to describe this configuration option. Update both to actually describe what this option does. [rppt@kernel.org: change ARCH_FORCE_MAX_ORDER dependencies] Link: https://lkml.kernel.org/r/20230325060828.2662773-4-rppt@kernel.org Link: https://lkml.kernel.org/r/20230324052233.2654090-4-rppt@kernel.org Signed-off-by: Mike Rapoport (IBM) Acked-by: Kirill A. Shutemov Reviewed-by: Zi Yan Reviewed-by: Kefeng Wang Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Christophe Leroy Cc: David Miller Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Guo Ren Cc: John Paul Adrian Glaubitz Cc: Max Filippov Cc: Michael Ellerman Cc: Rich Felker Cc: "Russell King (Oracle)" Cc: Will Deacon Cc: Yoshinori Sato Signed-off-by: Andrew Morton --- arch/arm64/Kconfig | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/arm64/Kconfig') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7324032af859..cc11cdcf5a00 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1487,24 +1487,24 @@ config XEN # 16K | 27 | 14 | 13 | 11 | # 64K | 29 | 16 | 13 | 13 | config ARCH_FORCE_MAX_ORDER - int "Maximum zone order" if EXPERT && (ARM64_4K_PAGES || ARM64_16K_PAGES) + int "Order of maximal physically contiguous allocations" if EXPERT && (ARM64_4K_PAGES || ARM64_16K_PAGES) default "13" if ARM64_64K_PAGES default "11" if ARM64_16K_PAGES default "10" help - The kernel memory allocator divides physically contiguous memory - blocks into "zones", where each zone is a power of two number of - pages. This option selects the largest power of two that the kernel - keeps in the memory allocator. If you need to allocate very large - blocks of physically contiguous memory, then you may need to - increase this value. + The kernel page allocator limits the size of maximal physically + contiguous allocations. The limit is called MAX_ORDER and it + defines the maximal power of two of number of pages that can be + allocated as a single contiguous block. This option allows + overriding the default setting when ability to allocate very + large blocks of physically contiguous memory is required. - We make sure that we can allocate up to a HugePage size for each configuration. - Hence we have : - MAX_ORDER = PMD_SHIFT - PAGE_SHIFT => PAGE_SHIFT - 3 + The maximal size of allocation cannot exceed the size of the + section, so the value of MAX_ORDER should satisfy - However for 4K, we choose a higher default value, 10 as opposed to 9, giving us - 4M allocations matching the default size used by generic code. + MAX_ORDER + PAGE_SHIFT <= SECTION_SIZE_BITS + + Don't change if unsure. config UNMAP_KERNEL_AT_EL0 bool "Unmap kernel when running in userspace (aka \"KAISER\")" if EXPERT -- cgit v1.2.3