diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-14 06:29:45 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-14 06:29:45 +0300 |
commit | e2ca6ba6ba0152361aa4fcbf6067db71b2c7a770 (patch) | |
tree | f7ed7753a2e66486a4ffe0fbbf98404ec4ba2212 /mm/khugepaged.c | |
parent | 7e68dd7d07a28faa2e6574dd6b9dbd90cdeaae91 (diff) | |
parent | c45bc55a99957b20e4e0333bcd42e12d1833a7f5 (diff) | |
download | linux-e2ca6ba6ba0152361aa4fcbf6067db71b2c7a770.tar.xz |
Merge tag 'mm-stable-2022-12-13' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton:
- More userfaultfs work from Peter Xu
- Several convert-to-folios series from Sidhartha Kumar and Huang Ying
- Some filemap cleanups from Vishal Moola
- David Hildenbrand added the ability to selftest anon memory COW
handling
- Some cpuset simplifications from Liu Shixin
- Addition of vmalloc tracing support by Uladzislau Rezki
- Some pagecache folioifications and simplifications from Matthew
Wilcox
- A pagemap cleanup from Kefeng Wang: we have VM_ACCESS_FLAGS, so use
it
- Miguel Ojeda contributed some cleanups for our use of the
__no_sanitize_thread__ gcc keyword.
This series should have been in the non-MM tree, my bad
- Naoya Horiguchi improved the interaction between memory poisoning and
memory section removal for huge pages
- DAMON cleanups and tuneups from SeongJae Park
- Tony Luck fixed the handling of COW faults against poisoned pages
- Peter Xu utilized the PTE marker code for handling swapin errors
- Hugh Dickins reworked compound page mapcount handling, simplifying it
and making it more efficient
- Removal of the autonuma savedwrite infrastructure from Nadav Amit and
David Hildenbrand
- zram support for multiple compression streams from Sergey Senozhatsky
- David Hildenbrand reworked the GUP code's R/O long-term pinning so
that drivers no longer need to use the FOLL_FORCE workaround which
didn't work very well anyway
- Mel Gorman altered the page allocator so that local IRQs can remnain
enabled during per-cpu page allocations
- Vishal Moola removed the try_to_release_page() wrapper
- Stefan Roesch added some per-BDI sysfs tunables which are used to
prevent network block devices from dirtying excessive amounts of
pagecache
- David Hildenbrand did some cleanup and repair work on KSM COW
breaking
- Nhat Pham and Johannes Weiner have implemented writeback in zswap's
zsmalloc backend
- Brian Foster has fixed a longstanding corner-case oddity in
file[map]_write_and_wait_range()
- sparse-vmemmap changes for MIPS, LoongArch and NIOS2 from Feiyang
Chen
- Shiyang Ruan has done some work on fsdax, to make its reflink mode
work better under xfstests. Better, but still not perfect
- Christoph Hellwig has removed the .writepage() method from several
filesystems. They only need .writepages()
- Yosry Ahmed wrote a series which fixes the memcg reclaim target
beancounting
- David Hildenbrand has fixed some of our MM selftests for 32-bit
machines
- Many singleton patches, as usual
* tag 'mm-stable-2022-12-13' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (313 commits)
mm/hugetlb: set head flag before setting compound_order in __prep_compound_gigantic_folio
mm: mmu_gather: allow more than one batch of delayed rmaps
mm: fix typo in struct pglist_data code comment
kmsan: fix memcpy tests
mm: add cond_resched() in swapin_walk_pmd_entry()
mm: do not show fs mm pc for VM_LOCKONFAULT pages
selftests/vm: ksm_functional_tests: fixes for 32bit
selftests/vm: cow: fix compile warning on 32bit
selftests/vm: madv_populate: fix missing MADV_POPULATE_(READ|WRITE) definitions
mm/gup_test: fix PIN_LONGTERM_TEST_READ with highmem
mm,thp,rmap: fix races between updates of subpages_mapcount
mm: memcg: fix swapcached stat accounting
mm: add nodes= arg to memory.reclaim
mm: disable top-tier fallback to reclaim on proactive reclaim
selftests: cgroup: make sure reclaim target memcg is unprotected
selftests: cgroup: refactor proactive reclaim code to reclaim_until()
mm: memcg: fix stale protection of reclaim target memcg
mm/mmap: properly unaccount memory on mas_preallocate() failure
omfs: remove ->writepage
jfs: remove ->writepage
...
Diffstat (limited to 'mm/khugepaged.c')
-rw-r--r-- | mm/khugepaged.c | 57 |
1 files changed, 30 insertions, 27 deletions
diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 3703a56571c1..5a7d2d5093f9 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1238,15 +1238,8 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm, /* * Check if the page has any GUP (or other external) pins. * - * Here the check is racy it may see total_mapcount > refcount - * in some cases. - * For example, one process with one forked child process. - * The parent has the PMD split due to MADV_DONTNEED, then - * the child is trying unmap the whole PMD, but khugepaged - * may be scanning the parent between the child has - * PageDoubleMap flag cleared and dec the mapcount. So - * khugepaged may see total_mapcount > refcount. - * + * Here the check may be racy: + * it may see total_mapcount > refcount in some cases? * But such case is ephemeral we could always retry collapse * later. However it may report false positive if the page * has excessive GUP pins (i.e. 512). Anyway the same check @@ -1751,12 +1744,12 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, { struct address_space *mapping = file->f_mapping; struct page *hpage; - pgoff_t index, end = start + HPAGE_PMD_NR; + pgoff_t index = 0, end = start + HPAGE_PMD_NR; LIST_HEAD(pagelist); XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER); int nr_none = 0, result = SCAN_SUCCEED; bool is_shmem = shmem_file(file); - int nr; + int nr = 0; VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem); VM_BUG_ON(start & (HPAGE_PMD_NR - 1)); @@ -1796,6 +1789,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, xas_set(&xas, start); for (index = start; index < end; index++) { struct page *page = xas_next(&xas); + struct folio *folio; VM_BUG_ON(index != xas.xa_index); if (is_shmem) { @@ -1822,8 +1816,6 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, } if (xa_is_value(page) || !PageUptodate(page)) { - struct folio *folio; - xas_unlock_irq(&xas); /* swap in or instantiate fallocated page */ if (shmem_get_folio(mapping->host, index, @@ -1911,13 +1903,15 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, goto out_unlock; } - if (page_mapping(page) != mapping) { + folio = page_folio(page); + + if (folio_mapping(folio) != mapping) { result = SCAN_TRUNCATED; goto out_unlock; } - if (!is_shmem && (PageDirty(page) || - PageWriteback(page))) { + if (!is_shmem && (folio_test_dirty(folio) || + folio_test_writeback(folio))) { /* * khugepaged only works on read-only fd, so this * page is dirty because it hasn't been flushed @@ -1927,20 +1921,20 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, goto out_unlock; } - if (isolate_lru_page(page)) { + if (folio_isolate_lru(folio)) { result = SCAN_DEL_PAGE_LRU; goto out_unlock; } - if (page_has_private(page) && - !try_to_release_page(page, GFP_KERNEL)) { + if (folio_has_private(folio) && + !filemap_release_folio(folio, GFP_KERNEL)) { result = SCAN_PAGE_HAS_PRIVATE; - putback_lru_page(page); + folio_putback_lru(folio); goto out_unlock; } - if (page_mapped(page)) - try_to_unmap(page_folio(page), + if (folio_mapped(folio)) + try_to_unmap(folio, TTU_IGNORE_MLOCK | TTU_BATCH_FLUSH); xas_lock_irq(&xas); @@ -2019,6 +2013,7 @@ xa_unlocked: if (result == SCAN_SUCCEED) { struct page *page, *tmp; + struct folio *folio; /* * Replacing old pages with new one has succeeded, now we @@ -2046,11 +2041,13 @@ xa_unlocked: index++; } - SetPageUptodate(hpage); - page_ref_add(hpage, HPAGE_PMD_NR - 1); + folio = page_folio(hpage); + folio_mark_uptodate(folio); + folio_ref_add(folio, HPAGE_PMD_NR - 1); + if (is_shmem) - set_page_dirty(hpage); - lru_cache_add(hpage); + folio_mark_dirty(folio); + folio_add_lru(folio); /* * Remove pte page tables, so we can re-fault the page as huge. @@ -2108,7 +2105,8 @@ out: mem_cgroup_uncharge(page_folio(hpage)); put_page(hpage); } - /* TODO: tracepoints */ + + trace_mm_khugepaged_collapse_file(mm, hpage, index, is_shmem, addr, file, nr, result); return result; } @@ -2577,6 +2575,11 @@ void khugepaged_min_free_kbytes_update(void) mutex_unlock(&khugepaged_mutex); } +bool current_is_khugepaged(void) +{ + return kthread_func(current) == khugepaged; +} + static int madvise_collapse_errno(enum scan_result r) { /* |