summaryrefslogtreecommitdiff
path: root/arch/arm64
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-04-28 05:42:02 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2023-04-28 05:42:02 +0300
commit7fa8a8ee9400fe8ec188426e40e481717bc5e924 (patch)
treecc8fd6b4f936ec01e73238643757451e20478c07 /arch/arm64
parent91ec4b0d11fe115581ce2835300558802ce55e6c (diff)
parent4d4b6d66db63ceed399f1fb1a4b24081d2590eb1 (diff)
downloadlinux-7fa8a8ee9400fe8ec188426e40e481717bc5e924.tar.xz
Merge tag 'mm-stable-2023-04-27-15-30' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: - Nick Piggin's "shoot lazy tlbs" series, to improve the peformance of switching from a user process to a kernel thread. - More folio conversions from Kefeng Wang, Zhang Peng and Pankaj Raghav. - zsmalloc performance improvements from Sergey Senozhatsky. - Yue Zhao has found and fixed some data race issues around the alteration of memcg userspace tunables. - VFS rationalizations from Christoph Hellwig: - removal of most of the callers of write_one_page() - make __filemap_get_folio()'s return value more useful - Luis Chamberlain has changed tmpfs so it no longer requires swap backing. Use `mount -o noswap'. - Qi Zheng has made the slab shrinkers operate locklessly, providing some scalability benefits. - Keith Busch has improved dmapool's performance, making part of its operations O(1) rather than O(n). - Peter Xu adds the UFFD_FEATURE_WP_UNPOPULATED feature to userfaultd, permitting userspace to wr-protect anon memory unpopulated ptes. - Kirill Shutemov has changed MAX_ORDER's meaning to be inclusive rather than exclusive, and has fixed a bunch of errors which were caused by its unintuitive meaning. - Axel Rasmussen give userfaultfd the UFFDIO_CONTINUE_MODE_WP feature, which causes minor faults to install a write-protected pte. - Vlastimil Babka has done some maintenance work on vma_merge(): cleanups to the kernel code and improvements to our userspace test harness. - Cleanups to do_fault_around() by Lorenzo Stoakes. - Mike Rapoport has moved a lot of initialization code out of various mm/ files and into mm/mm_init.c. - Lorenzo Stoakes removd vmf_insert_mixed_prot(), which was added for DRM, but DRM doesn't use it any more. - Lorenzo has also coverted read_kcore() and vread() to use iterators and has thereby removed the use of bounce buffers in some cases. - Lorenzo has also contributed further cleanups of vma_merge(). - Chaitanya Prakash provides some fixes to the mmap selftesting code. - Matthew Wilcox changes xfs and afs so they no longer take sleeping locks in ->map_page(), a step towards RCUification of pagefaults. - Suren Baghdasaryan has improved mmap_lock scalability by switching to per-VMA locking. - Frederic Weisbecker has reworked the percpu cache draining so that it no longer causes latency glitches on cpu isolated workloads. - Mike Rapoport cleans up and corrects the ARCH_FORCE_MAX_ORDER Kconfig logic. - Liu Shixin has changed zswap's initialization so we no longer waste a chunk of memory if zswap is not being used. - Yosry Ahmed has improved the performance of memcg statistics flushing. - David Stevens has fixed several issues involving khugepaged, userfaultfd and shmem. - Christoph Hellwig has provided some cleanup work to zram's IO-related code paths. - David Hildenbrand has fixed up some issues in the selftest code's testing of our pte state changing. - Pankaj Raghav has made page_endio() unneeded and has removed it. - Peter Xu contributed some rationalizations of the userfaultfd selftests. - Yosry Ahmed has fixed an issue around memcg's page recalim accounting. - Chaitanya Prakash has fixed some arm-related issues in the selftests/mm code. - Longlong Xia has improved the way in which KSM handles hwpoisoned pages. - Peter Xu fixes a few issues with uffd-wp at fork() time. - Stefan Roesch has changed KSM so that it may now be used on a per-process and per-cgroup basis. * tag 'mm-stable-2023-04-27-15-30' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (369 commits) mm,unmap: avoid flushing TLB in batch if PTE is inaccessible shmem: restrict noswap option to initial user namespace mm/khugepaged: fix conflicting mods to collapse_file() sparse: remove unnecessary 0 values from rc mm: move 'mmap_min_addr' logic from callers into vm_unmapped_area() hugetlb: pte_alloc_huge() to replace huge pte_alloc_map() maple_tree: fix allocation in mas_sparse_area() mm: do not increment pgfault stats when page fault handler retries zsmalloc: allow only one active pool compaction context selftests/mm: add new selftests for KSM mm: add new KSM process and sysfs knobs mm: add new api to enable ksm per process mm: shrinkers: fix debugfs file permissions mm: don't check VMA write permissions if the PTE/PMD indicates write permissions migrate_pages_batch: fix statistics for longterm pin retry userfaultfd: use helper function range_in_vma() lib/show_mem.c: use for_each_populated_zone() simplify code mm: correct arg in reclaim_pages()/reclaim_clean_pages_from_list() fs/buffer: convert create_page_buffers to folio_create_buffers fs/buffer: add folio_create_empty_buffers helper ...
Diffstat (limited to 'arch/arm64')
-rw-r--r--arch/arm64/Kconfig52
-rw-r--r--arch/arm64/include/asm/memory.h8
-rw-r--r--arch/arm64/include/asm/mte-kasan.h81
-rw-r--r--arch/arm64/include/asm/mte.h12
-rw-r--r--arch/arm64/include/asm/pgtable.h2
-rw-r--r--arch/arm64/include/asm/sparsemem.h2
-rw-r--r--arch/arm64/include/asm/uaccess.h66
-rw-r--r--arch/arm64/include/asm/word-at-a-time.h4
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/gfp.h2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/page_alloc.c10
-rw-r--r--arch/arm64/mm/fault.c36
11 files changed, 166 insertions, 109 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 3f5bf55050e8..b1201d25a8a4 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -95,6 +95,7 @@ config ARM64
select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
select ARCH_SUPPORTS_NUMA_BALANCING
select ARCH_SUPPORTS_PAGE_TABLE_CHECK
+ select ARCH_SUPPORTS_PER_VMA_LOCK
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
select ARCH_WANT_DEFAULT_BPF_JIT
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
@@ -1505,39 +1506,34 @@ config XEN
# include/linux/mmzone.h requires the following to be true:
#
-# MAX_ORDER - 1 + PAGE_SHIFT <= SECTION_SIZE_BITS
+# MAX_ORDER + PAGE_SHIFT <= SECTION_SIZE_BITS
#
-# so the maximum value of MAX_ORDER is SECTION_SIZE_BITS + 1 - PAGE_SHIFT:
+# so the maximum value of MAX_ORDER is SECTION_SIZE_BITS - PAGE_SHIFT:
#
# | SECTION_SIZE_BITS | PAGE_SHIFT | max MAX_ORDER | default MAX_ORDER |
# ----+-------------------+--------------+-----------------+--------------------+
-# 4K | 27 | 12 | 16 | 11 |
-# 16K | 27 | 14 | 14 | 12 |
-# 64K | 29 | 16 | 14 | 14 |
+# 4K | 27 | 12 | 15 | 10 |
+# 16K | 27 | 14 | 13 | 11 |
+# 64K | 29 | 16 | 13 | 13 |
config ARCH_FORCE_MAX_ORDER
- int "Maximum zone order" if ARM64_4K_PAGES || ARM64_16K_PAGES
- default "14" if ARM64_64K_PAGES
- range 12 14 if ARM64_16K_PAGES
- default "12" if ARM64_16K_PAGES
- range 11 16 if ARM64_4K_PAGES
- default "11"
- help
- The kernel memory allocator divides physically contiguous memory
- blocks into "zones", where each zone is a power of two number of
- pages. This option selects the largest power of two that the kernel
- keeps in the memory allocator. If you need to allocate very large
- blocks of physically contiguous memory, then you may need to
- increase this value.
-
- This config option is actually maximum order plus one. For example,
- a value of 11 means that the largest free memory block is 2^10 pages.
-
- We make sure that we can allocate up to a HugePage size for each configuration.
- Hence we have :
- MAX_ORDER = (PMD_SHIFT - PAGE_SHIFT) + 1 => PAGE_SHIFT - 2
-
- However for 4K, we choose a higher default value, 11 as opposed to 10, giving us
- 4M allocations matching the default size used by generic code.
+ int "Order of maximal physically contiguous allocations" if EXPERT && (ARM64_4K_PAGES || ARM64_16K_PAGES)
+ default "13" if ARM64_64K_PAGES
+ default "11" if ARM64_16K_PAGES
+ default "10"
+ help
+ The kernel page allocator limits the size of maximal physically
+ contiguous allocations. The limit is called MAX_ORDER and it
+ defines the maximal power of two of number of pages that can be
+ allocated as a single contiguous block. This option allows
+ overriding the default setting when ability to allocate very
+ large blocks of physically contiguous memory is required.
+
+ The maximal size of allocation cannot exceed the size of the
+ section, so the value of MAX_ORDER should satisfy
+
+ MAX_ORDER + PAGE_SHIFT <= SECTION_SIZE_BITS
+
+ Don't change if unsure.
config UNMAP_KERNEL_AT_EL0
bool "Unmap kernel when running in userspace (aka \"KAISER\")" if EXPERT
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index efcd68154a3a..c735afdf639b 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -261,9 +261,11 @@ static inline const void *__tag_set(const void *addr, u8 tag)
}
#ifdef CONFIG_KASAN_HW_TAGS
-#define arch_enable_tagging_sync() mte_enable_kernel_sync()
-#define arch_enable_tagging_async() mte_enable_kernel_async()
-#define arch_enable_tagging_asymm() mte_enable_kernel_asymm()
+#define arch_enable_tag_checks_sync() mte_enable_kernel_sync()
+#define arch_enable_tag_checks_async() mte_enable_kernel_async()
+#define arch_enable_tag_checks_asymm() mte_enable_kernel_asymm()
+#define arch_suppress_tag_checks_start() mte_enable_tco()
+#define arch_suppress_tag_checks_stop() mte_disable_tco()
#define arch_force_async_tag_fault() mte_check_tfsr_exit()
#define arch_get_random_tag() mte_get_random_tag()
#define arch_get_mem_tag(addr) mte_get_mem_tag(addr)
diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h
index 9f79425fc65a..2e98028c1965 100644
--- a/arch/arm64/include/asm/mte-kasan.h
+++ b/arch/arm64/include/asm/mte-kasan.h
@@ -13,9 +13,74 @@
#include <linux/types.h>
+#ifdef CONFIG_KASAN_HW_TAGS
+
+/* Whether the MTE asynchronous mode is enabled. */
+DECLARE_STATIC_KEY_FALSE(mte_async_or_asymm_mode);
+
+static inline bool system_uses_mte_async_or_asymm_mode(void)
+{
+ return static_branch_unlikely(&mte_async_or_asymm_mode);
+}
+
+#else /* CONFIG_KASAN_HW_TAGS */
+
+static inline bool system_uses_mte_async_or_asymm_mode(void)
+{
+ return false;
+}
+
+#endif /* CONFIG_KASAN_HW_TAGS */
+
#ifdef CONFIG_ARM64_MTE
/*
+ * The Tag Check Flag (TCF) mode for MTE is per EL, hence TCF0
+ * affects EL0 and TCF affects EL1 irrespective of which TTBR is
+ * used.
+ * The kernel accesses TTBR0 usually with LDTR/STTR instructions
+ * when UAO is available, so these would act as EL0 accesses using
+ * TCF0.
+ * However futex.h code uses exclusives which would be executed as
+ * EL1, this can potentially cause a tag check fault even if the
+ * user disables TCF0.
+ *
+ * To address the problem we set the PSTATE.TCO bit in uaccess_enable()
+ * and reset it in uaccess_disable().
+ *
+ * The Tag check override (TCO) bit disables temporarily the tag checking
+ * preventing the issue.
+ */
+static inline void mte_disable_tco(void)
+{
+ asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(0),
+ ARM64_MTE, CONFIG_KASAN_HW_TAGS));
+}
+
+static inline void mte_enable_tco(void)
+{
+ asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(1),
+ ARM64_MTE, CONFIG_KASAN_HW_TAGS));
+}
+
+/*
+ * These functions disable tag checking only if in MTE async mode
+ * since the sync mode generates exceptions synchronously and the
+ * nofault or load_unaligned_zeropad can handle them.
+ */
+static inline void __mte_disable_tco_async(void)
+{
+ if (system_uses_mte_async_or_asymm_mode())
+ mte_disable_tco();
+}
+
+static inline void __mte_enable_tco_async(void)
+{
+ if (system_uses_mte_async_or_asymm_mode())
+ mte_enable_tco();
+}
+
+/*
* These functions are meant to be only used from KASAN runtime through
* the arch_*() interface defined in asm/memory.h.
* These functions don't include system_supports_mte() checks,
@@ -138,6 +203,22 @@ void mte_enable_kernel_asymm(void);
#else /* CONFIG_ARM64_MTE */
+static inline void mte_disable_tco(void)
+{
+}
+
+static inline void mte_enable_tco(void)
+{
+}
+
+static inline void __mte_disable_tco_async(void)
+{
+}
+
+static inline void __mte_enable_tco_async(void)
+{
+}
+
static inline u8 mte_get_ptr_tag(void *ptr)
{
return 0xFF;
diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index 20dd06d70af5..c028afb1cd0b 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -178,14 +178,6 @@ static inline void mte_disable_tco_entry(struct task_struct *task)
}
#ifdef CONFIG_KASAN_HW_TAGS
-/* Whether the MTE asynchronous mode is enabled. */
-DECLARE_STATIC_KEY_FALSE(mte_async_or_asymm_mode);
-
-static inline bool system_uses_mte_async_or_asymm_mode(void)
-{
- return static_branch_unlikely(&mte_async_or_asymm_mode);
-}
-
void mte_check_tfsr_el1(void);
static inline void mte_check_tfsr_entry(void)
@@ -212,10 +204,6 @@ static inline void mte_check_tfsr_exit(void)
mte_check_tfsr_el1();
}
#else
-static inline bool system_uses_mte_async_or_asymm_mode(void)
-{
- return false;
-}
static inline void mte_check_tfsr_el1(void)
{
}
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index b6ba466e2e8a..0bd18de9fd97 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -57,7 +57,7 @@ static inline bool arch_thp_swp_supported(void)
* fault on one CPU which has been handled concurrently by another CPU
* does not need to perform additional invalidation.
*/
-#define flush_tlb_fix_spurious_fault(vma, address) do { } while (0)
+#define flush_tlb_fix_spurious_fault(vma, address, ptep) do { } while (0)
/*
* ZERO_PAGE is a global shared page that is always zero: used
diff --git a/arch/arm64/include/asm/sparsemem.h b/arch/arm64/include/asm/sparsemem.h
index 4b73463423c3..5f5437621029 100644
--- a/arch/arm64/include/asm/sparsemem.h
+++ b/arch/arm64/include/asm/sparsemem.h
@@ -10,7 +10,7 @@
/*
* Section size must be at least 512MB for 64K base
* page size config. Otherwise it will be less than
- * (MAX_ORDER - 1) and the build process will fail.
+ * MAX_ORDER and the build process will fail.
*/
#ifdef CONFIG_ARM64_64K_PAGES
#define SECTION_SIZE_BITS 29
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 8209e6a86989..05f4fc265428 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -136,55 +136,9 @@ static inline void __uaccess_enable_hw_pan(void)
CONFIG_ARM64_PAN));
}
-/*
- * The Tag Check Flag (TCF) mode for MTE is per EL, hence TCF0
- * affects EL0 and TCF affects EL1 irrespective of which TTBR is
- * used.
- * The kernel accesses TTBR0 usually with LDTR/STTR instructions
- * when UAO is available, so these would act as EL0 accesses using
- * TCF0.
- * However futex.h code uses exclusives which would be executed as
- * EL1, this can potentially cause a tag check fault even if the
- * user disables TCF0.
- *
- * To address the problem we set the PSTATE.TCO bit in uaccess_enable()
- * and reset it in uaccess_disable().
- *
- * The Tag check override (TCO) bit disables temporarily the tag checking
- * preventing the issue.
- */
-static inline void __uaccess_disable_tco(void)
-{
- asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(0),
- ARM64_MTE, CONFIG_KASAN_HW_TAGS));
-}
-
-static inline void __uaccess_enable_tco(void)
-{
- asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(1),
- ARM64_MTE, CONFIG_KASAN_HW_TAGS));
-}
-
-/*
- * These functions disable tag checking only if in MTE async mode
- * since the sync mode generates exceptions synchronously and the
- * nofault or load_unaligned_zeropad can handle them.
- */
-static inline void __uaccess_disable_tco_async(void)
-{
- if (system_uses_mte_async_or_asymm_mode())
- __uaccess_disable_tco();
-}
-
-static inline void __uaccess_enable_tco_async(void)
-{
- if (system_uses_mte_async_or_asymm_mode())
- __uaccess_enable_tco();
-}
-
static inline void uaccess_disable_privileged(void)
{
- __uaccess_disable_tco();
+ mte_disable_tco();
if (uaccess_ttbr0_disable())
return;
@@ -194,7 +148,7 @@ static inline void uaccess_disable_privileged(void)
static inline void uaccess_enable_privileged(void)
{
- __uaccess_enable_tco();
+ mte_enable_tco();
if (uaccess_ttbr0_enable())
return;
@@ -302,8 +256,8 @@ do { \
#define get_user __get_user
/*
- * We must not call into the scheduler between __uaccess_enable_tco_async() and
- * __uaccess_disable_tco_async(). As `dst` and `src` may contain blocking
+ * We must not call into the scheduler between __mte_enable_tco_async() and
+ * __mte_disable_tco_async(). As `dst` and `src` may contain blocking
* functions, we must evaluate these outside of the critical section.
*/
#define __get_kernel_nofault(dst, src, type, err_label) \
@@ -312,10 +266,10 @@ do { \
__typeof__(src) __gkn_src = (src); \
int __gkn_err = 0; \
\
- __uaccess_enable_tco_async(); \
+ __mte_enable_tco_async(); \
__raw_get_mem("ldr", *((type *)(__gkn_dst)), \
(__force type *)(__gkn_src), __gkn_err, K); \
- __uaccess_disable_tco_async(); \
+ __mte_disable_tco_async(); \
\
if (unlikely(__gkn_err)) \
goto err_label; \
@@ -388,8 +342,8 @@ do { \
#define put_user __put_user
/*
- * We must not call into the scheduler between __uaccess_enable_tco_async() and
- * __uaccess_disable_tco_async(). As `dst` and `src` may contain blocking
+ * We must not call into the scheduler between __mte_enable_tco_async() and
+ * __mte_disable_tco_async(). As `dst` and `src` may contain blocking
* functions, we must evaluate these outside of the critical section.
*/
#define __put_kernel_nofault(dst, src, type, err_label) \
@@ -398,10 +352,10 @@ do { \
__typeof__(src) __pkn_src = (src); \
int __pkn_err = 0; \
\
- __uaccess_enable_tco_async(); \
+ __mte_enable_tco_async(); \
__raw_put_mem("str", *((type *)(__pkn_src)), \
(__force type *)(__pkn_dst), __pkn_err, K); \
- __uaccess_disable_tco_async(); \
+ __mte_disable_tco_async(); \
\
if (unlikely(__pkn_err)) \
goto err_label; \
diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h
index 1c8e4f2490bf..f3b151ed0d7a 100644
--- a/arch/arm64/include/asm/word-at-a-time.h
+++ b/arch/arm64/include/asm/word-at-a-time.h
@@ -55,7 +55,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr)
{
unsigned long ret;
- __uaccess_enable_tco_async();
+ __mte_enable_tco_async();
/* Load word from unaligned pointer addr */
asm(
@@ -65,7 +65,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr)
: "=&r" (ret)
: "r" (addr), "Q" (*(unsigned long *)addr));
- __uaccess_disable_tco_async();
+ __mte_disable_tco_async();
return ret;
}
diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
index 0a048dc06a7d..fe5472a184a3 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/gfp.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
@@ -16,7 +16,7 @@ struct hyp_pool {
* API at EL2.
*/
hyp_spinlock_t lock;
- struct list_head free_area[MAX_ORDER];
+ struct list_head free_area[MAX_ORDER + 1];
phys_addr_t range_start;
phys_addr_t range_end;
unsigned short max_order;
diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
index 803ba3222e75..b1e392186a0f 100644
--- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c
+++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
@@ -110,7 +110,7 @@ static void __hyp_attach_page(struct hyp_pool *pool,
* after coalescing, so make sure to mark it HYP_NO_ORDER proactively.
*/
p->order = HYP_NO_ORDER;
- for (; (order + 1) < pool->max_order; order++) {
+ for (; (order + 1) <= pool->max_order; order++) {
buddy = __find_buddy_avail(pool, p, order);
if (!buddy)
break;
@@ -203,9 +203,9 @@ void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order)
hyp_spin_lock(&pool->lock);
/* Look for a high-enough-order page */
- while (i < pool->max_order && list_empty(&pool->free_area[i]))
+ while (i <= pool->max_order && list_empty(&pool->free_area[i]))
i++;
- if (i >= pool->max_order) {
+ if (i > pool->max_order) {
hyp_spin_unlock(&pool->lock);
return NULL;
}
@@ -228,8 +228,8 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages,
int i;
hyp_spin_lock_init(&pool->lock);
- pool->max_order = min(MAX_ORDER, get_order((nr_pages + 1) << PAGE_SHIFT));
- for (i = 0; i < pool->max_order; i++)
+ pool->max_order = min(MAX_ORDER, get_order(nr_pages << PAGE_SHIFT));
+ for (i = 0; i <= pool->max_order; i++)
INIT_LIST_HEAD(&pool->free_area[i]);
pool->range_start = phys;
pool->range_end = phys + (nr_pages << PAGE_SHIFT);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index f4cb0f85ccf4..9e0db5c387e3 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -535,6 +535,9 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
unsigned long vm_flags;
unsigned int mm_flags = FAULT_FLAG_DEFAULT;
unsigned long addr = untagged_addr(far);
+#ifdef CONFIG_PER_VMA_LOCK
+ struct vm_area_struct *vma;
+#endif
if (kprobe_page_fault(regs, esr))
return 0;
@@ -585,6 +588,36 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
+#ifdef CONFIG_PER_VMA_LOCK
+ if (!(mm_flags & FAULT_FLAG_USER))
+ goto lock_mmap;
+
+ vma = lock_vma_under_rcu(mm, addr);
+ if (!vma)
+ goto lock_mmap;
+
+ if (!(vma->vm_flags & vm_flags)) {
+ vma_end_read(vma);
+ goto lock_mmap;
+ }
+ fault = handle_mm_fault(vma, addr & PAGE_MASK,
+ mm_flags | FAULT_FLAG_VMA_LOCK, regs);
+ vma_end_read(vma);
+
+ if (!(fault & VM_FAULT_RETRY)) {
+ count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+ goto done;
+ }
+ count_vm_vma_lock_event(VMA_LOCK_RETRY);
+
+ /* Quick path to respond to signals */
+ if (fault_signal_pending(fault, regs)) {
+ if (!user_mode(regs))
+ goto no_context;
+ return 0;
+ }
+lock_mmap:
+#endif /* CONFIG_PER_VMA_LOCK */
/*
* As per x86, we may deadlock here. However, since the kernel only
* validly references user space from well defined areas of the code,
@@ -628,6 +661,9 @@ retry:
}
mmap_read_unlock(mm);
+#ifdef CONFIG_PER_VMA_LOCK
+done:
+#endif
/*
* Handle the "normal" (no error) case first.
*/