summaryrefslogtreecommitdiff
path: root/mm/khugepaged.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-12-14 06:29:45 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2022-12-14 06:29:45 +0300
commite2ca6ba6ba0152361aa4fcbf6067db71b2c7a770 (patch)
treef7ed7753a2e66486a4ffe0fbbf98404ec4ba2212 /mm/khugepaged.c
parent7e68dd7d07a28faa2e6574dd6b9dbd90cdeaae91 (diff)
parentc45bc55a99957b20e4e0333bcd42e12d1833a7f5 (diff)
downloadlinux-e2ca6ba6ba0152361aa4fcbf6067db71b2c7a770.tar.xz
Merge tag 'mm-stable-2022-12-13' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: - More userfaultfs work from Peter Xu - Several convert-to-folios series from Sidhartha Kumar and Huang Ying - Some filemap cleanups from Vishal Moola - David Hildenbrand added the ability to selftest anon memory COW handling - Some cpuset simplifications from Liu Shixin - Addition of vmalloc tracing support by Uladzislau Rezki - Some pagecache folioifications and simplifications from Matthew Wilcox - A pagemap cleanup from Kefeng Wang: we have VM_ACCESS_FLAGS, so use it - Miguel Ojeda contributed some cleanups for our use of the __no_sanitize_thread__ gcc keyword. This series should have been in the non-MM tree, my bad - Naoya Horiguchi improved the interaction between memory poisoning and memory section removal for huge pages - DAMON cleanups and tuneups from SeongJae Park - Tony Luck fixed the handling of COW faults against poisoned pages - Peter Xu utilized the PTE marker code for handling swapin errors - Hugh Dickins reworked compound page mapcount handling, simplifying it and making it more efficient - Removal of the autonuma savedwrite infrastructure from Nadav Amit and David Hildenbrand - zram support for multiple compression streams from Sergey Senozhatsky - David Hildenbrand reworked the GUP code's R/O long-term pinning so that drivers no longer need to use the FOLL_FORCE workaround which didn't work very well anyway - Mel Gorman altered the page allocator so that local IRQs can remnain enabled during per-cpu page allocations - Vishal Moola removed the try_to_release_page() wrapper - Stefan Roesch added some per-BDI sysfs tunables which are used to prevent network block devices from dirtying excessive amounts of pagecache - David Hildenbrand did some cleanup and repair work on KSM COW breaking - Nhat Pham and Johannes Weiner have implemented writeback in zswap's zsmalloc backend - Brian Foster has fixed a longstanding corner-case oddity in file[map]_write_and_wait_range() - sparse-vmemmap changes for MIPS, LoongArch and NIOS2 from Feiyang Chen - Shiyang Ruan has done some work on fsdax, to make its reflink mode work better under xfstests. Better, but still not perfect - Christoph Hellwig has removed the .writepage() method from several filesystems. They only need .writepages() - Yosry Ahmed wrote a series which fixes the memcg reclaim target beancounting - David Hildenbrand has fixed some of our MM selftests for 32-bit machines - Many singleton patches, as usual * tag 'mm-stable-2022-12-13' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (313 commits) mm/hugetlb: set head flag before setting compound_order in __prep_compound_gigantic_folio mm: mmu_gather: allow more than one batch of delayed rmaps mm: fix typo in struct pglist_data code comment kmsan: fix memcpy tests mm: add cond_resched() in swapin_walk_pmd_entry() mm: do not show fs mm pc for VM_LOCKONFAULT pages selftests/vm: ksm_functional_tests: fixes for 32bit selftests/vm: cow: fix compile warning on 32bit selftests/vm: madv_populate: fix missing MADV_POPULATE_(READ|WRITE) definitions mm/gup_test: fix PIN_LONGTERM_TEST_READ with highmem mm,thp,rmap: fix races between updates of subpages_mapcount mm: memcg: fix swapcached stat accounting mm: add nodes= arg to memory.reclaim mm: disable top-tier fallback to reclaim on proactive reclaim selftests: cgroup: make sure reclaim target memcg is unprotected selftests: cgroup: refactor proactive reclaim code to reclaim_until() mm: memcg: fix stale protection of reclaim target memcg mm/mmap: properly unaccount memory on mas_preallocate() failure omfs: remove ->writepage jfs: remove ->writepage ...
Diffstat (limited to 'mm/khugepaged.c')
-rw-r--r--mm/khugepaged.c57
1 files changed, 30 insertions, 27 deletions
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 3703a56571c1..5a7d2d5093f9 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1238,15 +1238,8 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
/*
* Check if the page has any GUP (or other external) pins.
*
- * Here the check is racy it may see total_mapcount > refcount
- * in some cases.
- * For example, one process with one forked child process.
- * The parent has the PMD split due to MADV_DONTNEED, then
- * the child is trying unmap the whole PMD, but khugepaged
- * may be scanning the parent between the child has
- * PageDoubleMap flag cleared and dec the mapcount. So
- * khugepaged may see total_mapcount > refcount.
- *
+ * Here the check may be racy:
+ * it may see total_mapcount > refcount in some cases?
* But such case is ephemeral we could always retry collapse
* later. However it may report false positive if the page
* has excessive GUP pins (i.e. 512). Anyway the same check
@@ -1751,12 +1744,12 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
{
struct address_space *mapping = file->f_mapping;
struct page *hpage;
- pgoff_t index, end = start + HPAGE_PMD_NR;
+ pgoff_t index = 0, end = start + HPAGE_PMD_NR;
LIST_HEAD(pagelist);
XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER);
int nr_none = 0, result = SCAN_SUCCEED;
bool is_shmem = shmem_file(file);
- int nr;
+ int nr = 0;
VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem);
VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
@@ -1796,6 +1789,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
xas_set(&xas, start);
for (index = start; index < end; index++) {
struct page *page = xas_next(&xas);
+ struct folio *folio;
VM_BUG_ON(index != xas.xa_index);
if (is_shmem) {
@@ -1822,8 +1816,6 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
}
if (xa_is_value(page) || !PageUptodate(page)) {
- struct folio *folio;
-
xas_unlock_irq(&xas);
/* swap in or instantiate fallocated page */
if (shmem_get_folio(mapping->host, index,
@@ -1911,13 +1903,15 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
goto out_unlock;
}
- if (page_mapping(page) != mapping) {
+ folio = page_folio(page);
+
+ if (folio_mapping(folio) != mapping) {
result = SCAN_TRUNCATED;
goto out_unlock;
}
- if (!is_shmem && (PageDirty(page) ||
- PageWriteback(page))) {
+ if (!is_shmem && (folio_test_dirty(folio) ||
+ folio_test_writeback(folio))) {
/*
* khugepaged only works on read-only fd, so this
* page is dirty because it hasn't been flushed
@@ -1927,20 +1921,20 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
goto out_unlock;
}
- if (isolate_lru_page(page)) {
+ if (folio_isolate_lru(folio)) {
result = SCAN_DEL_PAGE_LRU;
goto out_unlock;
}
- if (page_has_private(page) &&
- !try_to_release_page(page, GFP_KERNEL)) {
+ if (folio_has_private(folio) &&
+ !filemap_release_folio(folio, GFP_KERNEL)) {
result = SCAN_PAGE_HAS_PRIVATE;
- putback_lru_page(page);
+ folio_putback_lru(folio);
goto out_unlock;
}
- if (page_mapped(page))
- try_to_unmap(page_folio(page),
+ if (folio_mapped(folio))
+ try_to_unmap(folio,
TTU_IGNORE_MLOCK | TTU_BATCH_FLUSH);
xas_lock_irq(&xas);
@@ -2019,6 +2013,7 @@ xa_unlocked:
if (result == SCAN_SUCCEED) {
struct page *page, *tmp;
+ struct folio *folio;
/*
* Replacing old pages with new one has succeeded, now we
@@ -2046,11 +2041,13 @@ xa_unlocked:
index++;
}
- SetPageUptodate(hpage);
- page_ref_add(hpage, HPAGE_PMD_NR - 1);
+ folio = page_folio(hpage);
+ folio_mark_uptodate(folio);
+ folio_ref_add(folio, HPAGE_PMD_NR - 1);
+
if (is_shmem)
- set_page_dirty(hpage);
- lru_cache_add(hpage);
+ folio_mark_dirty(folio);
+ folio_add_lru(folio);
/*
* Remove pte page tables, so we can re-fault the page as huge.
@@ -2108,7 +2105,8 @@ out:
mem_cgroup_uncharge(page_folio(hpage));
put_page(hpage);
}
- /* TODO: tracepoints */
+
+ trace_mm_khugepaged_collapse_file(mm, hpage, index, is_shmem, addr, file, nr, result);
return result;
}
@@ -2577,6 +2575,11 @@ void khugepaged_min_free_kbytes_update(void)
mutex_unlock(&khugepaged_mutex);
}
+bool current_is_khugepaged(void)
+{
+ return kthread_func(current) == khugepaged;
+}
+
static int madvise_collapse_errno(enum scan_result r)
{
/*