diff options
author | Liam R. Howlett <Liam.Howlett@Oracle.com> | 2022-09-06 22:48:45 +0300 |
---|---|---|
committer | Andrew Morton <akpm@linux-foundation.org> | 2022-09-27 05:46:14 +0300 |
commit | d4af56c5c7c6781ca6ca8075e2cf5bc119ed33d1 (patch) | |
tree | e3c5b2600a2a9231ab6bbb85ede7b2be92f637f9 /include/linux | |
parent | e15e06a8392321a19d8ebdbdd7643b7fa8874c17 (diff) | |
download | linux-d4af56c5c7c6781ca6ca8075e2cf5bc119ed33d1.tar.xz |
mm: start tracking VMAs with maple tree
Start tracking the VMAs with the new maple tree structure in parallel with
the rb_tree. Add debug and trace events for maple tree operations and
duplicate the rb_tree that is created on forks into the maple tree.
The maple tree is added to the mm_struct including the mm_init struct,
added support in required mm/mmap functions, added tracking in kernel/fork
for process forking, and used to find the unmapped_area and checked
against what the rbtree finds.
This also moves the mmap_lock() in exit_mmap() since the oom reaper call
does walk the VMAs. Otherwise lockdep will be unhappy if oom happens.
When splitting a vma fails due to allocations of the maple tree nodes,
the error path in __split_vma() calls new->vm_ops->close(new). The page
accounting for hugetlb is actually in the close() operation, so it
accounts for the removal of 1/2 of the VMA which was not adjusted. This
results in a negative exit value. To avoid the negative charge, set
vm_start = vm_end and vm_pgoff = 0.
There is also a potential accounting issue in special mappings from
insert_vm_struct() failing to allocate, so reverse the charge there in
the failure scenario.
Link: https://lkml.kernel.org/r/20220906194824.2110408-9-Liam.Howlett@oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Tested-by: Yu Zhao <yuzhao@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: SeongJae Park <sj@kernel.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/mm.h | 5 | ||||
-rw-r--r-- | include/linux/mm_types.h | 3 |
2 files changed, 8 insertions, 0 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h index 7cc9ffc19e7f..896d04248e66 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2567,6 +2567,8 @@ extern bool arch_has_descending_max_zone_pfns(void); /* nommu.c */ extern atomic_long_t mmap_pages_allocated; extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t); +/* mmap.c */ +void vma_mas_store(struct vm_area_struct *vma, struct ma_state *mas); /* interval_tree.c */ void vma_interval_tree_insert(struct vm_area_struct *node, @@ -2630,6 +2632,9 @@ extern struct vm_area_struct *copy_vma(struct vm_area_struct **, bool *need_rmap_locks); extern void exit_mmap(struct mm_struct *); +void vma_mas_store(struct vm_area_struct *vma, struct ma_state *mas); +void vma_mas_remove(struct vm_area_struct *vma, struct ma_state *mas); + static inline int check_data_rlimit(unsigned long rlim, unsigned long new, unsigned long start, diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index e1797813cc2c..425bc5f7d477 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -9,6 +9,7 @@ #include <linux/list.h> #include <linux/spinlock.h> #include <linux/rbtree.h> +#include <linux/maple_tree.h> #include <linux/rwsem.h> #include <linux/completion.h> #include <linux/cpumask.h> @@ -486,6 +487,7 @@ struct kioctx_table; struct mm_struct { struct { struct vm_area_struct *mmap; /* list of VMAs */ + struct maple_tree mm_mt; struct rb_root mm_rb; u64 vmacache_seqnum; /* per-thread vmacache */ #ifdef CONFIG_MMU @@ -697,6 +699,7 @@ struct mm_struct { unsigned long cpu_bitmap[]; }; +#define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN) extern struct mm_struct init_mm; /* Pointer magic because the dynamic array size confuses some compilers. */ |