summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorLiam R. Howlett <Liam.Howlett@Oracle.com>2022-09-06 22:48:45 +0300
committerAndrew Morton <akpm@linux-foundation.org>2022-09-27 05:46:14 +0300
commitd4af56c5c7c6781ca6ca8075e2cf5bc119ed33d1 (patch)
treee3c5b2600a2a9231ab6bbb85ede7b2be92f637f9 /include/linux
parente15e06a8392321a19d8ebdbdd7643b7fa8874c17 (diff)
downloadlinux-d4af56c5c7c6781ca6ca8075e2cf5bc119ed33d1.tar.xz
mm: start tracking VMAs with maple tree
Start tracking the VMAs with the new maple tree structure in parallel with the rb_tree. Add debug and trace events for maple tree operations and duplicate the rb_tree that is created on forks into the maple tree. The maple tree is added to the mm_struct including the mm_init struct, added support in required mm/mmap functions, added tracking in kernel/fork for process forking, and used to find the unmapped_area and checked against what the rbtree finds. This also moves the mmap_lock() in exit_mmap() since the oom reaper call does walk the VMAs. Otherwise lockdep will be unhappy if oom happens. When splitting a vma fails due to allocations of the maple tree nodes, the error path in __split_vma() calls new->vm_ops->close(new). The page accounting for hugetlb is actually in the close() operation, so it accounts for the removal of 1/2 of the VMA which was not adjusted. This results in a negative exit value. To avoid the negative charge, set vm_start = vm_end and vm_pgoff = 0. There is also a potential accounting issue in special mappings from insert_vm_struct() failing to allocate, so reverse the charge there in the failure scenario. Link: https://lkml.kernel.org/r/20220906194824.2110408-9-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> Tested-by: Yu Zhao <yuzhao@google.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: David Hildenbrand <david@redhat.com> Cc: David Howells <dhowells@redhat.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: SeongJae Park <sj@kernel.org> Cc: Sven Schnelle <svens@linux.ibm.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/mm.h5
-rw-r--r--include/linux/mm_types.h3
2 files changed, 8 insertions, 0 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7cc9ffc19e7f..896d04248e66 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2567,6 +2567,8 @@ extern bool arch_has_descending_max_zone_pfns(void);
/* nommu.c */
extern atomic_long_t mmap_pages_allocated;
extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);
+/* mmap.c */
+void vma_mas_store(struct vm_area_struct *vma, struct ma_state *mas);
/* interval_tree.c */
void vma_interval_tree_insert(struct vm_area_struct *node,
@@ -2630,6 +2632,9 @@ extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
bool *need_rmap_locks);
extern void exit_mmap(struct mm_struct *);
+void vma_mas_store(struct vm_area_struct *vma, struct ma_state *mas);
+void vma_mas_remove(struct vm_area_struct *vma, struct ma_state *mas);
+
static inline int check_data_rlimit(unsigned long rlim,
unsigned long new,
unsigned long start,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index e1797813cc2c..425bc5f7d477 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -9,6 +9,7 @@
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/rbtree.h>
+#include <linux/maple_tree.h>
#include <linux/rwsem.h>
#include <linux/completion.h>
#include <linux/cpumask.h>
@@ -486,6 +487,7 @@ struct kioctx_table;
struct mm_struct {
struct {
struct vm_area_struct *mmap; /* list of VMAs */
+ struct maple_tree mm_mt;
struct rb_root mm_rb;
u64 vmacache_seqnum; /* per-thread vmacache */
#ifdef CONFIG_MMU
@@ -697,6 +699,7 @@ struct mm_struct {
unsigned long cpu_bitmap[];
};
+#define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN)
extern struct mm_struct init_mm;
/* Pointer magic because the dynamic array size confuses some compilers. */