summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/backing-dev.c10
-rw-r--r--mm/bootmem_info.c2
-rw-r--r--mm/damon/dbgfs.c3
-rw-r--r--mm/gup.c68
-rw-r--r--mm/huge_memory.c64
-rw-r--r--mm/hugetlb.c28
-rw-r--r--mm/mmap.c7
-rw-r--r--mm/mprotect.c3
-rw-r--r--mm/page-writeback.c6
-rw-r--r--mm/pagewalk.c21
-rw-r--r--mm/ptdump.c4
-rw-r--r--mm/rmap.c29
-rw-r--r--mm/shmem.c60
-rw-r--r--mm/userfaultfd.c29
-rw-r--r--mm/vmstat.c9
-rw-r--r--mm/zsmalloc.c2
16 files changed, 224 insertions, 121 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 95550b8fa7fe..de65cb1e5f76 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -260,10 +260,10 @@ void wb_wakeup_delayed(struct bdi_writeback *wb)
unsigned long timeout;
timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
- spin_lock_bh(&wb->work_lock);
+ spin_lock_irq(&wb->work_lock);
if (test_bit(WB_registered, &wb->state))
queue_delayed_work(bdi_wq, &wb->dwork, timeout);
- spin_unlock_bh(&wb->work_lock);
+ spin_unlock_irq(&wb->work_lock);
}
static void wb_update_bandwidth_workfn(struct work_struct *work)
@@ -334,12 +334,12 @@ static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb);
static void wb_shutdown(struct bdi_writeback *wb)
{
/* Make sure nobody queues further work */
- spin_lock_bh(&wb->work_lock);
+ spin_lock_irq(&wb->work_lock);
if (!test_and_clear_bit(WB_registered, &wb->state)) {
- spin_unlock_bh(&wb->work_lock);
+ spin_unlock_irq(&wb->work_lock);
return;
}
- spin_unlock_bh(&wb->work_lock);
+ spin_unlock_irq(&wb->work_lock);
cgwb_remove_from_bdi_list(wb);
/*
diff --git a/mm/bootmem_info.c b/mm/bootmem_info.c
index f18a631e7479..b1efebfcf94b 100644
--- a/mm/bootmem_info.c
+++ b/mm/bootmem_info.c
@@ -12,6 +12,7 @@
#include <linux/memblock.h>
#include <linux/bootmem_info.h>
#include <linux/memory_hotplug.h>
+#include <linux/kmemleak.h>
void get_page_bootmem(unsigned long info, struct page *page, unsigned long type)
{
@@ -33,6 +34,7 @@ void put_page_bootmem(struct page *page)
ClearPagePrivate(page);
set_page_private(page, 0);
INIT_LIST_HEAD(&page->lru);
+ kmemleak_free_part(page_to_virt(page), PAGE_SIZE);
free_reserved_page(page);
}
}
diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c
index cb8a7e9926a4..cfdf63132d5a 100644
--- a/mm/damon/dbgfs.c
+++ b/mm/damon/dbgfs.c
@@ -818,6 +818,9 @@ static int dbgfs_mk_context(char *name)
return -ENOENT;
new_dir = debugfs_create_dir(name, root);
+ /* Below check is required for a potential duplicated name case */
+ if (IS_ERR(new_dir))
+ return PTR_ERR(new_dir);
dbgfs_dirs[dbgfs_nr_ctxs] = new_dir;
new_ctx = dbgfs_new_ctx();
diff --git a/mm/gup.c b/mm/gup.c
index 732825157430..5abdaf487460 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -478,14 +478,42 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
return -EEXIST;
}
-/*
- * FOLL_FORCE can write to even unwritable pte's, but only
- * after we've gone through a COW cycle and they are dirty.
- */
-static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
+/* FOLL_FORCE can write to even unwritable PTEs in COW mappings. */
+static inline bool can_follow_write_pte(pte_t pte, struct page *page,
+ struct vm_area_struct *vma,
+ unsigned int flags)
{
- return pte_write(pte) ||
- ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
+ /* If the pte is writable, we can write to the page. */
+ if (pte_write(pte))
+ return true;
+
+ /* Maybe FOLL_FORCE is set to override it? */
+ if (!(flags & FOLL_FORCE))
+ return false;
+
+ /* But FOLL_FORCE has no effect on shared mappings */
+ if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
+ return false;
+
+ /* ... or read-only private ones */
+ if (!(vma->vm_flags & VM_MAYWRITE))
+ return false;
+
+ /* ... or already writable ones that just need to take a write fault */
+ if (vma->vm_flags & VM_WRITE)
+ return false;
+
+ /*
+ * See can_change_pte_writable(): we broke COW and could map the page
+ * writable if we have an exclusive anonymous page ...
+ */
+ if (!page || !PageAnon(page) || !PageAnonExclusive(page))
+ return false;
+
+ /* ... and a write-fault isn't required for other reasons. */
+ if (vma_soft_dirty_enabled(vma) && !pte_soft_dirty(pte))
+ return false;
+ return !userfaultfd_pte_wp(vma, pte);
}
static struct page *follow_page_pte(struct vm_area_struct *vma,
@@ -528,12 +556,19 @@ retry:
}
if ((flags & FOLL_NUMA) && pte_protnone(pte))
goto no_page;
- if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) {
- pte_unmap_unlock(ptep, ptl);
- return NULL;
- }
page = vm_normal_page(vma, address, pte);
+
+ /*
+ * We only care about anon pages in can_follow_write_pte() and don't
+ * have to worry about pte_devmap() because they are never anon.
+ */
+ if ((flags & FOLL_WRITE) &&
+ !can_follow_write_pte(pte, page, vma, flags)) {
+ page = NULL;
+ goto out;
+ }
+
if (!page && pte_devmap(pte) && (flags & (FOLL_GET | FOLL_PIN))) {
/*
* Only return device mapping pages in the FOLL_GET or FOLL_PIN
@@ -986,17 +1021,6 @@ static int faultin_page(struct vm_area_struct *vma,
return -EBUSY;
}
- /*
- * The VM_FAULT_WRITE bit tells us that do_wp_page has broken COW when
- * necessary, even if maybe_mkwrite decided not to set pte_write. We
- * can thus safely do subsequent page lookups as if they were reads.
- * But only do so when looping for pte_write is futile: in some cases
- * userspace may also be wanting to write to the gotten user page,
- * which a read fault here might prevent (a readonly page might get
- * reCOWed by userspace write).
- */
- if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE))
- *flags |= FOLL_COW;
return 0;
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 8a7c1b344abe..e9414ee57c5b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1040,12 +1040,6 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
assert_spin_locked(pmd_lockptr(mm, pmd));
- /*
- * When we COW a devmap PMD entry, we split it into PTEs, so we should
- * not be in this function with `flags & FOLL_COW` set.
- */
- WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set");
-
/* FOLL_GET and FOLL_PIN are mutually exclusive. */
if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) ==
(FOLL_PIN | FOLL_GET)))
@@ -1395,14 +1389,42 @@ fallback:
return VM_FAULT_FALLBACK;
}
-/*
- * FOLL_FORCE can write to even unwritable pmd's, but only
- * after we've gone through a COW cycle and they are dirty.
- */
-static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags)
+/* FOLL_FORCE can write to even unwritable PMDs in COW mappings. */
+static inline bool can_follow_write_pmd(pmd_t pmd, struct page *page,
+ struct vm_area_struct *vma,
+ unsigned int flags)
{
- return pmd_write(pmd) ||
- ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd));
+ /* If the pmd is writable, we can write to the page. */
+ if (pmd_write(pmd))
+ return true;
+
+ /* Maybe FOLL_FORCE is set to override it? */
+ if (!(flags & FOLL_FORCE))
+ return false;
+
+ /* But FOLL_FORCE has no effect on shared mappings */
+ if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
+ return false;
+
+ /* ... or read-only private ones */
+ if (!(vma->vm_flags & VM_MAYWRITE))
+ return false;
+
+ /* ... or already writable ones that just need to take a write fault */
+ if (vma->vm_flags & VM_WRITE)
+ return false;
+
+ /*
+ * See can_change_pte_writable(): we broke COW and could map the page
+ * writable if we have an exclusive anonymous page ...
+ */
+ if (!page || !PageAnon(page) || !PageAnonExclusive(page))
+ return false;
+
+ /* ... and a write-fault isn't required for other reasons. */
+ if (vma_soft_dirty_enabled(vma) && !pmd_soft_dirty(pmd))
+ return false;
+ return !userfaultfd_huge_pmd_wp(vma, pmd);
}
struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
@@ -1411,12 +1433,16 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
unsigned int flags)
{
struct mm_struct *mm = vma->vm_mm;
- struct page *page = NULL;
+ struct page *page;
assert_spin_locked(pmd_lockptr(mm, pmd));
- if (flags & FOLL_WRITE && !can_follow_write_pmd(*pmd, flags))
- goto out;
+ page = pmd_page(*pmd);
+ VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page);
+
+ if ((flags & FOLL_WRITE) &&
+ !can_follow_write_pmd(*pmd, page, vma, flags))
+ return NULL;
/* Avoid dumping huge zero page */
if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd))
@@ -1424,10 +1450,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
/* Full NUMA hinting faults to serialise migration in fault paths */
if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
- goto out;
-
- page = pmd_page(*pmd);
- VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page);
+ return NULL;
if (!pmd_write(*pmd) && gup_must_unshare(flags, page))
return ERR_PTR(-EMLINK);
@@ -1444,7 +1467,6 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT;
VM_BUG_ON_PAGE(!PageCompound(page) && !is_zone_device_page(page), page);
-out:
return page;
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 0aee2f3ae15c..e070b8593b37 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5241,6 +5241,21 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
VM_BUG_ON(unshare && (flags & FOLL_WRITE));
VM_BUG_ON(!unshare && !(flags & FOLL_WRITE));
+ /*
+ * hugetlb does not support FOLL_FORCE-style write faults that keep the
+ * PTE mapped R/O such as maybe_mkwrite() would do.
+ */
+ if (WARN_ON_ONCE(!unshare && !(vma->vm_flags & VM_WRITE)))
+ return VM_FAULT_SIGSEGV;
+
+ /* Let's take out MAP_SHARED mappings first. */
+ if (vma->vm_flags & VM_MAYSHARE) {
+ if (unlikely(unshare))
+ return 0;
+ set_huge_ptep_writable(vma, haddr, ptep);
+ return 0;
+ }
+
pte = huge_ptep_get(ptep);
old_page = pte_page(pte);
@@ -5781,12 +5796,11 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
* If we are going to COW/unshare the mapping later, we examine the
* pending reservations for this page now. This will ensure that any
* allocations necessary to record that reservation occur outside the
- * spinlock. For private mappings, we also lookup the pagecache
- * page now as it is used to determine if a reservation has been
- * consumed.
+ * spinlock. Also lookup the pagecache page now as it is used to
+ * determine if a reservation has been consumed.
*/
if ((flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) &&
- !huge_pte_write(entry)) {
+ !(vma->vm_flags & VM_MAYSHARE) && !huge_pte_write(entry)) {
if (vma_needs_reservation(h, vma, haddr) < 0) {
ret = VM_FAULT_OOM;
goto out_mutex;
@@ -5794,9 +5808,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
/* Just decrements count, does not deallocate */
vma_end_reservation(h, vma, haddr);
- if (!(vma->vm_flags & VM_MAYSHARE))
- pagecache_page = hugetlbfs_pagecache_page(h,
- vma, haddr);
+ pagecache_page = hugetlbfs_pagecache_page(h, vma, haddr);
}
ptl = huge_pte_lock(h, mm, ptep);
@@ -6029,7 +6041,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
if (!huge_pte_none_mostly(huge_ptep_get(dst_pte)))
goto out_release_unlock;
- if (vm_shared) {
+ if (page_in_pagecache) {
page_dup_file_rmap(page, true);
} else {
ClearHPageRestoreReserve(page);
diff --git a/mm/mmap.c b/mm/mmap.c
index c035020d0c89..9d780f415be3 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1646,8 +1646,11 @@ int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
pgprot_val(vm_pgprot_modify(vm_page_prot, vm_flags)))
return 0;
- /* Do we need to track softdirty? */
- if (vma_soft_dirty_enabled(vma))
+ /*
+ * Do we need to track softdirty? hugetlb does not support softdirty
+ * tracking yet.
+ */
+ if (vma_soft_dirty_enabled(vma) && !is_vm_hugetlb_page(vma))
return 1;
/* Specialty mapping? */
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 3a23dde73723..bc6bddd156ca 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -196,10 +196,11 @@ static unsigned long change_pte_range(struct mmu_gather *tlb,
pages++;
} else if (is_swap_pte(oldpte)) {
swp_entry_t entry = pte_to_swp_entry(oldpte);
- struct page *page = pfn_swap_entry_to_page(entry);
pte_t newpte;
if (is_writable_migration_entry(entry)) {
+ struct page *page = pfn_swap_entry_to_page(entry);
+
/*
* A protection check is difficult so
* just be safe and disable write
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index d0d466a5c804..032a7bf8d259 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2892,6 +2892,7 @@ static void wb_inode_writeback_start(struct bdi_writeback *wb)
static void wb_inode_writeback_end(struct bdi_writeback *wb)
{
+ unsigned long flags;
atomic_dec(&wb->writeback_inodes);
/*
* Make sure estimate of writeback throughput gets updated after
@@ -2900,7 +2901,10 @@ static void wb_inode_writeback_end(struct bdi_writeback *wb)
* that if multiple inodes end writeback at a similar time, they get
* batched into one bandwidth update.
*/
- queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL);
+ spin_lock_irqsave(&wb->work_lock, flags);
+ if (test_bit(WB_registered, &wb->state))
+ queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL);
+ spin_unlock_irqrestore(&wb->work_lock, flags);
}
bool __folio_end_writeback(struct folio *folio)
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 9b3db11a4d1d..fa7a3d21a751 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -110,7 +110,7 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
do {
again:
next = pmd_addr_end(addr, end);
- if (pmd_none(*pmd) || (!walk->vma && !walk->no_vma)) {
+ if (pmd_none(*pmd)) {
if (ops->pte_hole)
err = ops->pte_hole(addr, next, depth, walk);
if (err)
@@ -171,7 +171,7 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
do {
again:
next = pud_addr_end(addr, end);
- if (pud_none(*pud) || (!walk->vma && !walk->no_vma)) {
+ if (pud_none(*pud)) {
if (ops->pte_hole)
err = ops->pte_hole(addr, next, depth, walk);
if (err)
@@ -366,19 +366,19 @@ static int __walk_page_range(unsigned long start, unsigned long end,
struct vm_area_struct *vma = walk->vma;
const struct mm_walk_ops *ops = walk->ops;
- if (vma && ops->pre_vma) {
+ if (ops->pre_vma) {
err = ops->pre_vma(start, end, walk);
if (err)
return err;
}
- if (vma && is_vm_hugetlb_page(vma)) {
+ if (is_vm_hugetlb_page(vma)) {
if (ops->hugetlb_entry)
err = walk_hugetlb_range(start, end, walk);
} else
err = walk_pgd_range(start, end, walk);
- if (vma && ops->post_vma)
+ if (ops->post_vma)
ops->post_vma(walk);
return err;
@@ -450,9 +450,13 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
if (!vma) { /* after the last vma */
walk.vma = NULL;
next = end;
+ if (ops->pte_hole)
+ err = ops->pte_hole(start, next, -1, &walk);
} else if (start < vma->vm_start) { /* outside vma */
walk.vma = NULL;
next = min(end, vma->vm_start);
+ if (ops->pte_hole)
+ err = ops->pte_hole(start, next, -1, &walk);
} else { /* inside vma */
walk.vma = vma;
next = min(end, vma->vm_end);
@@ -470,9 +474,8 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
}
if (err < 0)
break;
- }
- if (walk.vma || walk.ops->pte_hole)
err = __walk_page_range(start, next, &walk);
+ }
if (err)
break;
} while (start = next, start < end);
@@ -501,9 +504,9 @@ int walk_page_range_novma(struct mm_struct *mm, unsigned long start,
if (start >= end || !walk.mm)
return -EINVAL;
- mmap_assert_locked(walk.mm);
+ mmap_assert_write_locked(walk.mm);
- return __walk_page_range(start, end, &walk);
+ return walk_pgd_range(start, end, &walk);
}
int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
diff --git a/mm/ptdump.c b/mm/ptdump.c
index eea3d28d173c..8adab455a68b 100644
--- a/mm/ptdump.c
+++ b/mm/ptdump.c
@@ -152,13 +152,13 @@ void ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm, pgd_t *pgd)
{
const struct ptdump_range *range = st->range;
- mmap_read_lock(mm);
+ mmap_write_lock(mm);
while (range->start != range->end) {
walk_page_range_novma(mm, range->start, range->end,
&ptdump_ops, pgd, st);
range++;
}
- mmap_read_unlock(mm);
+ mmap_write_unlock(mm);
/* Flush out the last page */
st->note_page(st, 0, -1, 0);
diff --git a/mm/rmap.c b/mm/rmap.c
index edc06c52bc82..93d5a6f793d2 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -93,7 +93,8 @@ static inline struct anon_vma *anon_vma_alloc(void)
anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
if (anon_vma) {
atomic_set(&anon_vma->refcount, 1);
- anon_vma->degree = 1; /* Reference for first vma */
+ anon_vma->num_children = 0;
+ anon_vma->num_active_vmas = 0;
anon_vma->parent = anon_vma;
/*
* Initialise the anon_vma root to point to itself. If called
@@ -201,6 +202,7 @@ int __anon_vma_prepare(struct vm_area_struct *vma)
anon_vma = anon_vma_alloc();
if (unlikely(!anon_vma))
goto out_enomem_free_avc;
+ anon_vma->num_children++; /* self-parent link for new root */
allocated = anon_vma;
}
@@ -210,8 +212,7 @@ int __anon_vma_prepare(struct vm_area_struct *vma)
if (likely(!vma->anon_vma)) {
vma->anon_vma = anon_vma;
anon_vma_chain_link(vma, avc, anon_vma);
- /* vma reference or self-parent link for new root */
- anon_vma->degree++;
+ anon_vma->num_active_vmas++;
allocated = NULL;
avc = NULL;
}
@@ -296,19 +297,19 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
anon_vma_chain_link(dst, avc, anon_vma);
/*
- * Reuse existing anon_vma if its degree lower than two,
- * that means it has no vma and only one anon_vma child.
+ * Reuse existing anon_vma if it has no vma and only one
+ * anon_vma child.
*
- * Do not choose parent anon_vma, otherwise first child
- * will always reuse it. Root anon_vma is never reused:
+ * Root anon_vma is never reused:
* it has self-parent reference and at least one child.
*/
if (!dst->anon_vma && src->anon_vma &&
- anon_vma != src->anon_vma && anon_vma->degree < 2)
+ anon_vma->num_children < 2 &&
+ anon_vma->num_active_vmas == 0)
dst->anon_vma = anon_vma;
}
if (dst->anon_vma)
- dst->anon_vma->degree++;
+ dst->anon_vma->num_active_vmas++;
unlock_anon_vma_root(root);
return 0;
@@ -358,6 +359,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
anon_vma = anon_vma_alloc();
if (!anon_vma)
goto out_error;
+ anon_vma->num_active_vmas++;
avc = anon_vma_chain_alloc(GFP_KERNEL);
if (!avc)
goto out_error_free_anon_vma;
@@ -378,7 +380,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
vma->anon_vma = anon_vma;
anon_vma_lock_write(anon_vma);
anon_vma_chain_link(vma, avc, anon_vma);
- anon_vma->parent->degree++;
+ anon_vma->parent->num_children++;
anon_vma_unlock_write(anon_vma);
return 0;
@@ -410,7 +412,7 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
* to free them outside the lock.
*/
if (RB_EMPTY_ROOT(&anon_vma->rb_root.rb_root)) {
- anon_vma->parent->degree--;
+ anon_vma->parent->num_children--;
continue;
}
@@ -418,7 +420,7 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
anon_vma_chain_free(avc);
}
if (vma->anon_vma) {
- vma->anon_vma->degree--;
+ vma->anon_vma->num_active_vmas--;
/*
* vma would still be needed after unlink, and anon_vma will be prepared
@@ -436,7 +438,8 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
struct anon_vma *anon_vma = avc->anon_vma;
- VM_WARN_ON(anon_vma->degree);
+ VM_WARN_ON(anon_vma->num_children);
+ VM_WARN_ON(anon_vma->num_active_vmas);
put_anon_vma(anon_vma);
list_del(&avc->same_vma);
diff --git a/mm/shmem.c b/mm/shmem.c
index 5783f11351bb..42e5888bf84d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1659,7 +1659,9 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
new = page_folio(newpage);
mem_cgroup_migrate(old, new);
__inc_lruvec_page_state(newpage, NR_FILE_PAGES);
+ __inc_lruvec_page_state(newpage, NR_SHMEM);
__dec_lruvec_page_state(oldpage, NR_FILE_PAGES);
+ __dec_lruvec_page_state(oldpage, NR_SHMEM);
}
xa_unlock_irq(&swap_mapping->i_pages);
@@ -1780,6 +1782,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
if (shmem_should_replace_folio(folio, gfp)) {
error = shmem_replace_page(&page, gfp, info, index);
+ folio = page_folio(page);
if (error)
goto failed;
}
@@ -2281,16 +2284,34 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
return 0;
}
-/* Mask out flags that are inappropriate for the given type of inode. */
-static unsigned shmem_mask_flags(umode_t mode, __u32 flags)
+#ifdef CONFIG_TMPFS_XATTR
+static int shmem_initxattrs(struct inode *, const struct xattr *, void *);
+
+/*
+ * chattr's fsflags are unrelated to extended attributes,
+ * but tmpfs has chosen to enable them under the same config option.
+ */
+static void shmem_set_inode_flags(struct inode *inode, unsigned int fsflags)
+{
+ unsigned int i_flags = 0;
+
+ if (fsflags & FS_NOATIME_FL)
+ i_flags |= S_NOATIME;
+ if (fsflags & FS_APPEND_FL)
+ i_flags |= S_APPEND;
+ if (fsflags & FS_IMMUTABLE_FL)
+ i_flags |= S_IMMUTABLE;
+ /*
+ * But FS_NODUMP_FL does not require any action in i_flags.
+ */
+ inode_set_flags(inode, i_flags, S_NOATIME | S_APPEND | S_IMMUTABLE);
+}
+#else
+static void shmem_set_inode_flags(struct inode *inode, unsigned int fsflags)
{
- if (S_ISDIR(mode))
- return flags;
- else if (S_ISREG(mode))
- return flags & SHMEM_REG_FLMASK;
- else
- return flags & SHMEM_OTHER_FLMASK;
}
+#define shmem_initxattrs NULL
+#endif
static struct inode *shmem_get_inode(struct super_block *sb, struct inode *dir,
umode_t mode, dev_t dev, unsigned long flags)
@@ -2319,7 +2340,8 @@ static struct inode *shmem_get_inode(struct super_block *sb, struct inode *dir,
info->i_crtime = inode->i_mtime;
info->fsflags = (dir == NULL) ? 0 :
SHMEM_I(dir)->fsflags & SHMEM_FL_INHERITED;
- info->fsflags = shmem_mask_flags(mode, info->fsflags);
+ if (info->fsflags)
+ shmem_set_inode_flags(inode, info->fsflags);
INIT_LIST_HEAD(&info->shrinklist);
INIT_LIST_HEAD(&info->swaplist);
simple_xattrs_init(&info->xattrs);
@@ -2468,12 +2490,6 @@ out_unacct_blocks:
static const struct inode_operations shmem_symlink_inode_operations;
static const struct inode_operations shmem_short_symlink_operations;
-#ifdef CONFIG_TMPFS_XATTR
-static int shmem_initxattrs(struct inode *, const struct xattr *, void *);
-#else
-#define shmem_initxattrs NULL
-#endif
-
static int
shmem_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len,
@@ -2826,12 +2842,13 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
i_size_write(inode, offset + len);
- inode->i_ctime = current_time(inode);
undone:
spin_lock(&inode->i_lock);
inode->i_private = NULL;
spin_unlock(&inode->i_lock);
out:
+ if (!error)
+ file_modified(file);
inode_unlock(inode);
return error;
}
@@ -3179,18 +3196,13 @@ static int shmem_fileattr_set(struct user_namespace *mnt_userns,
if (fileattr_has_fsx(fa))
return -EOPNOTSUPP;
+ if (fa->flags & ~SHMEM_FL_USER_MODIFIABLE)
+ return -EOPNOTSUPP;
info->fsflags = (info->fsflags & ~SHMEM_FL_USER_MODIFIABLE) |
(fa->flags & SHMEM_FL_USER_MODIFIABLE);
- inode->i_flags &= ~(S_APPEND | S_IMMUTABLE | S_NOATIME);
- if (info->fsflags & FS_APPEND_FL)
- inode->i_flags |= S_APPEND;
- if (info->fsflags & FS_IMMUTABLE_FL)
- inode->i_flags |= S_IMMUTABLE;
- if (info->fsflags & FS_NOATIME_FL)
- inode->i_flags |= S_NOATIME;
-
+ shmem_set_inode_flags(inode, info->fsflags);
inode->i_ctime = current_time(inode);
return 0;
}
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 07d3befc80e4..7327b2573f7c 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -703,14 +703,29 @@ ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long start,
mmap_changing, 0);
}
+void uffd_wp_range(struct mm_struct *dst_mm, struct vm_area_struct *dst_vma,
+ unsigned long start, unsigned long len, bool enable_wp)
+{
+ struct mmu_gather tlb;
+ pgprot_t newprot;
+
+ if (enable_wp)
+ newprot = vm_get_page_prot(dst_vma->vm_flags & ~(VM_WRITE));
+ else
+ newprot = vm_get_page_prot(dst_vma->vm_flags);
+
+ tlb_gather_mmu(&tlb, dst_mm);
+ change_protection(&tlb, dst_vma, start, start + len, newprot,
+ enable_wp ? MM_CP_UFFD_WP : MM_CP_UFFD_WP_RESOLVE);
+ tlb_finish_mmu(&tlb);
+}
+
int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
unsigned long len, bool enable_wp,
atomic_t *mmap_changing)
{
struct vm_area_struct *dst_vma;
unsigned long page_mask;
- struct mmu_gather tlb;
- pgprot_t newprot;
int err;
/*
@@ -750,15 +765,7 @@ int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
goto out_unlock;
}
- if (enable_wp)
- newprot = vm_get_page_prot(dst_vma->vm_flags & ~(VM_WRITE));
- else
- newprot = vm_get_page_prot(dst_vma->vm_flags);
-
- tlb_gather_mmu(&tlb, dst_mm);
- change_protection(&tlb, dst_vma, start, start + len, newprot,
- enable_wp ? MM_CP_UFFD_WP : MM_CP_UFFD_WP_RESOLVE);
- tlb_finish_mmu(&tlb);
+ uffd_wp_range(dst_mm, dst_vma, start, len, enable_wp);
err = 0;
out_unlock:
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 373d2730fcf2..90af9a8572f5 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1168,8 +1168,15 @@ int fragmentation_index(struct zone *zone, unsigned int order)
#define TEXT_FOR_HIGHMEM(xx)
#endif
+#ifdef CONFIG_ZONE_DEVICE
+#define TEXT_FOR_DEVICE(xx) xx "_device",
+#else
+#define TEXT_FOR_DEVICE(xx)
+#endif
+
#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
- TEXT_FOR_HIGHMEM(xx) xx "_movable",
+ TEXT_FOR_HIGHMEM(xx) xx "_movable", \
+ TEXT_FOR_DEVICE(xx)
const char * const vmstat_text[] = {
/* enum zone_stat_item counters */
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 34f784a1604b..907c9b1e1e61 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1487,7 +1487,7 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
struct size_class *class;
enum fullness_group fullness;
- if (unlikely(!handle))
+ if (IS_ERR_OR_NULL((void *)handle))
return;
/*