diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-14 06:29:45 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-14 06:29:45 +0300 |
commit | e2ca6ba6ba0152361aa4fcbf6067db71b2c7a770 (patch) | |
tree | f7ed7753a2e66486a4ffe0fbbf98404ec4ba2212 /mm/memory-failure.c | |
parent | 7e68dd7d07a28faa2e6574dd6b9dbd90cdeaae91 (diff) | |
parent | c45bc55a99957b20e4e0333bcd42e12d1833a7f5 (diff) | |
download | linux-e2ca6ba6ba0152361aa4fcbf6067db71b2c7a770.tar.xz |
Merge tag 'mm-stable-2022-12-13' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton:
- More userfaultfs work from Peter Xu
- Several convert-to-folios series from Sidhartha Kumar and Huang Ying
- Some filemap cleanups from Vishal Moola
- David Hildenbrand added the ability to selftest anon memory COW
handling
- Some cpuset simplifications from Liu Shixin
- Addition of vmalloc tracing support by Uladzislau Rezki
- Some pagecache folioifications and simplifications from Matthew
Wilcox
- A pagemap cleanup from Kefeng Wang: we have VM_ACCESS_FLAGS, so use
it
- Miguel Ojeda contributed some cleanups for our use of the
__no_sanitize_thread__ gcc keyword.
This series should have been in the non-MM tree, my bad
- Naoya Horiguchi improved the interaction between memory poisoning and
memory section removal for huge pages
- DAMON cleanups and tuneups from SeongJae Park
- Tony Luck fixed the handling of COW faults against poisoned pages
- Peter Xu utilized the PTE marker code for handling swapin errors
- Hugh Dickins reworked compound page mapcount handling, simplifying it
and making it more efficient
- Removal of the autonuma savedwrite infrastructure from Nadav Amit and
David Hildenbrand
- zram support for multiple compression streams from Sergey Senozhatsky
- David Hildenbrand reworked the GUP code's R/O long-term pinning so
that drivers no longer need to use the FOLL_FORCE workaround which
didn't work very well anyway
- Mel Gorman altered the page allocator so that local IRQs can remnain
enabled during per-cpu page allocations
- Vishal Moola removed the try_to_release_page() wrapper
- Stefan Roesch added some per-BDI sysfs tunables which are used to
prevent network block devices from dirtying excessive amounts of
pagecache
- David Hildenbrand did some cleanup and repair work on KSM COW
breaking
- Nhat Pham and Johannes Weiner have implemented writeback in zswap's
zsmalloc backend
- Brian Foster has fixed a longstanding corner-case oddity in
file[map]_write_and_wait_range()
- sparse-vmemmap changes for MIPS, LoongArch and NIOS2 from Feiyang
Chen
- Shiyang Ruan has done some work on fsdax, to make its reflink mode
work better under xfstests. Better, but still not perfect
- Christoph Hellwig has removed the .writepage() method from several
filesystems. They only need .writepages()
- Yosry Ahmed wrote a series which fixes the memcg reclaim target
beancounting
- David Hildenbrand has fixed some of our MM selftests for 32-bit
machines
- Many singleton patches, as usual
* tag 'mm-stable-2022-12-13' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (313 commits)
mm/hugetlb: set head flag before setting compound_order in __prep_compound_gigantic_folio
mm: mmu_gather: allow more than one batch of delayed rmaps
mm: fix typo in struct pglist_data code comment
kmsan: fix memcpy tests
mm: add cond_resched() in swapin_walk_pmd_entry()
mm: do not show fs mm pc for VM_LOCKONFAULT pages
selftests/vm: ksm_functional_tests: fixes for 32bit
selftests/vm: cow: fix compile warning on 32bit
selftests/vm: madv_populate: fix missing MADV_POPULATE_(READ|WRITE) definitions
mm/gup_test: fix PIN_LONGTERM_TEST_READ with highmem
mm,thp,rmap: fix races between updates of subpages_mapcount
mm: memcg: fix swapcached stat accounting
mm: add nodes= arg to memory.reclaim
mm: disable top-tier fallback to reclaim on proactive reclaim
selftests: cgroup: make sure reclaim target memcg is unprotected
selftests: cgroup: refactor proactive reclaim code to reclaim_until()
mm: memcg: fix stale protection of reclaim target memcg
mm/mmap: properly unaccount memory on mas_preallocate() failure
omfs: remove ->writepage
jfs: remove ->writepage
...
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r-- | mm/memory-failure.c | 172 |
1 files changed, 83 insertions, 89 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index bead6bccc7f2..c77a9e37e27e 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -74,6 +74,19 @@ atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0); static bool hw_memory_failure __read_mostly = false; +inline void num_poisoned_pages_inc(unsigned long pfn) +{ + atomic_long_inc(&num_poisoned_pages); + memblk_nr_poison_inc(pfn); +} + +inline void num_poisoned_pages_sub(unsigned long pfn, long i) +{ + atomic_long_sub(i, &num_poisoned_pages); + if (pfn != -1UL) + memblk_nr_poison_sub(pfn, i); +} + /* * Return values: * 1: the page is dissolved (if needed) and taken off from buddy, @@ -115,7 +128,7 @@ static bool page_handle_poison(struct page *page, bool hugepage_or_freepage, boo if (release) put_page(page); page_ref_inc(page); - num_poisoned_pages_inc(); + num_poisoned_pages_inc(page_to_pfn(page)); return true; } @@ -827,12 +840,13 @@ static int truncate_error_page(struct page *p, unsigned long pfn, int ret = MF_FAILED; if (mapping->a_ops->error_remove_page) { + struct folio *folio = page_folio(p); int err = mapping->a_ops->error_remove_page(mapping, p); if (err != 0) { pr_info("%#lx: Failed to punch page: %d\n", pfn, err); - } else if (page_has_private(p) && - !try_to_release_page(p, GFP_NOIO)) { + } else if (folio_has_private(folio) && + !filemap_release_folio(folio, GFP_NOIO)) { pr_info("%#lx: failed to release buffers\n", pfn); } else { ret = MF_RECOVERED; @@ -1182,14 +1196,16 @@ static struct page_state error_states[] = { * "Dirty/Clean" indication is not 100% accurate due to the possibility of * setting PG_dirty outside page lock. See also comment above set_page_dirty(). */ -static void action_result(unsigned long pfn, enum mf_action_page_type type, - enum mf_result result) +static int action_result(unsigned long pfn, enum mf_action_page_type type, + enum mf_result result) { trace_memory_failure_event(pfn, type, result); - num_poisoned_pages_inc(); + num_poisoned_pages_inc(pfn); pr_err("%#lx: recovery action for %s: %s\n", pfn, action_page_types[type], action_name[result]); + + return (result == MF_RECOVERED || result == MF_DELAYED) ? 0 : -EBUSY; } static int page_action(struct page_state *ps, struct page *p, @@ -1200,14 +1216,12 @@ static int page_action(struct page_state *ps, struct page *p, /* page p should be unlocked after returning from ps->action(). */ result = ps->action(ps, p); - action_result(pfn, ps->type, result); - /* Could do more checks here if page looks ok */ /* * Could adjust zone counters here to correct for the missing page. */ - return (result == MF_RECOVERED || result == MF_DELAYED) ? 0 : -EBUSY; + return action_result(pfn, ps->type, result); } static inline bool PageHWPoisonTakenOff(struct page *page) @@ -1247,7 +1261,7 @@ static int __get_hwpoison_page(struct page *page, unsigned long flags) int ret = 0; bool hugetlb = false; - ret = get_hwpoison_huge_page(head, &hugetlb); + ret = get_hwpoison_huge_page(head, &hugetlb, false); if (hugetlb) return ret; @@ -1337,7 +1351,7 @@ static int __get_unpoison_page(struct page *page) int ret = 0; bool hugetlb = false; - ret = get_hwpoison_huge_page(head, &hugetlb); + ret = get_hwpoison_huge_page(head, &hugetlb, true); if (hugetlb) return ret; @@ -1674,8 +1688,7 @@ EXPORT_SYMBOL_GPL(mf_dax_kill_procs); #ifdef CONFIG_HUGETLB_PAGE /* * Struct raw_hwp_page represents information about "raw error page", - * constructing singly linked list originated from ->private field of - * SUBPAGE_INDEX_HWPOISON-th tail page. + * constructing singly linked list from ->_hugetlb_hwpoison field of folio. */ struct raw_hwp_page { struct llist_node node; @@ -1684,7 +1697,7 @@ struct raw_hwp_page { static inline struct llist_head *raw_hwp_list_head(struct page *hpage) { - return (struct llist_head *)&page_private(hpage + SUBPAGE_INDEX_HWPOISON); + return (struct llist_head *)&page_folio(hpage)->_hugetlb_hwpoison; } static unsigned long __free_raw_hwp_pages(struct page *hpage, bool move_flag) @@ -1699,6 +1712,8 @@ static unsigned long __free_raw_hwp_pages(struct page *hpage, bool move_flag) if (move_flag) SetPageHWPoison(p->page); + else + num_poisoned_pages_sub(page_to_pfn(p->page), 1); kfree(p); count++; } @@ -1734,7 +1749,7 @@ static int hugetlb_set_page_hwpoison(struct page *hpage, struct page *page) llist_add(&raw_hwp->node, head); /* the first error event will be counted in action_result(). */ if (ret) - num_poisoned_pages_inc(); + num_poisoned_pages_inc(page_to_pfn(page)); } else { /* * Failed to save raw error info. We no longer trace all @@ -1788,7 +1803,8 @@ void hugetlb_clear_page_hwpoison(struct page *hpage) * -EBUSY - the hugepage is busy (try to retry) * -EHWPOISON - the hugepage is already hwpoisoned */ -int __get_huge_page_for_hwpoison(unsigned long pfn, int flags) +int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, + bool *migratable_cleared) { struct page *page = pfn_to_page(pfn); struct page *head = compound_head(page); @@ -1818,6 +1834,15 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags) goto out; } + /* + * Clearing HPageMigratable for hwpoisoned hugepages to prevent them + * from being migrated by memory hotremove. + */ + if (count_increased && HPageMigratable(head)) { + ClearHPageMigratable(head); + *migratable_cleared = true; + } + return ret; out: if (count_increased) @@ -1837,10 +1862,11 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb struct page *p = pfn_to_page(pfn); struct page *head; unsigned long page_flags; + bool migratable_cleared = false; *hugetlb = 1; retry: - res = get_huge_page_for_hwpoison(pfn, flags); + res = get_huge_page_for_hwpoison(pfn, flags, &migratable_cleared); if (res == 2) { /* fallback to normal page handling */ *hugetlb = 0; return 0; @@ -1856,8 +1882,7 @@ retry: flags |= MF_NO_RETRY; goto retry; } - action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED); - return res; + return action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED); } head = compound_head(p); @@ -1865,6 +1890,8 @@ retry: if (hwpoison_filter(p)) { hugetlb_clear_page_hwpoison(head); + if (migratable_cleared) + SetHPageMigratable(head); unlock_page(head); if (res == 1) put_page(head); @@ -1883,22 +1910,17 @@ retry: } else { res = MF_FAILED; } - action_result(pfn, MF_MSG_FREE_HUGE, res); - return res == MF_RECOVERED ? 0 : -EBUSY; + return action_result(pfn, MF_MSG_FREE_HUGE, res); } page_flags = head->flags; if (!hwpoison_user_mappings(p, pfn, flags, head)) { - action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED); - res = -EBUSY; - goto out; + unlock_page(head); + return action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED); } return identify_page_state(pfn, p, page_flags); -out: - unlock_page(head); - return res; } #else @@ -1913,17 +1935,25 @@ static inline unsigned long free_raw_hwp_pages(struct page *hpage, bool flag) } #endif /* CONFIG_HUGETLB_PAGE */ +/* Drop the extra refcount in case we come from madvise() */ +static void put_ref_page(unsigned long pfn, int flags) +{ + struct page *page; + + if (!(flags & MF_COUNT_INCREASED)) + return; + + page = pfn_to_page(pfn); + if (page) + put_page(page); +} + static int memory_failure_dev_pagemap(unsigned long pfn, int flags, struct dev_pagemap *pgmap) { - struct page *page = pfn_to_page(pfn); int rc = -ENXIO; - if (flags & MF_COUNT_INCREASED) - /* - * Drop the extra refcount in case we come from madvise(). - */ - put_page(page); + put_ref_page(pfn, flags); /* device metadata space is not recoverable */ if (!pgmap_pfn_valid(pgmap, pfn)) @@ -2055,16 +2085,13 @@ try_again: } res = MF_FAILED; } - action_result(pfn, MF_MSG_BUDDY, res); - res = res == MF_RECOVERED ? 0 : -EBUSY; + res = action_result(pfn, MF_MSG_BUDDY, res); } else { - action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED); - res = -EBUSY; + res = action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED); } goto unlock_mutex; } else if (res < 0) { - action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED); - res = -EBUSY; + res = action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED); goto unlock_mutex; } } @@ -2085,8 +2112,7 @@ try_again: */ SetPageHasHWPoisoned(hpage); if (try_to_split_thp_page(p) < 0) { - action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED); - res = -EBUSY; + res = action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED); goto unlock_mutex; } VM_BUG_ON_PAGE(!page_count(p), p); @@ -2119,8 +2145,7 @@ try_again: retry = false; goto try_again; } - action_result(pfn, MF_MSG_DIFFERENT_COMPOUND, MF_IGNORED); - res = -EBUSY; + res = action_result(pfn, MF_MSG_DIFFERENT_COMPOUND, MF_IGNORED); goto unlock_page; } @@ -2160,8 +2185,7 @@ try_again: * Abort on fail: __filemap_remove_folio() assumes unmapped page. */ if (!hwpoison_user_mappings(p, pfn, flags, p)) { - action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED); - res = -EBUSY; + res = action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED); goto unlock_page; } @@ -2169,8 +2193,7 @@ try_again: * Torn down by someone else? */ if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) { - action_result(pfn, MF_MSG_TRUNCATED_LRU, MF_IGNORED); - res = -EBUSY; + res = action_result(pfn, MF_MSG_TRUNCATED_LRU, MF_IGNORED); goto unlock_page; } @@ -2315,8 +2338,8 @@ int unpoison_memory(unsigned long pfn) struct page *page; struct page *p; int ret = -EBUSY; - int freeit = 0; unsigned long count = 1; + bool huge = false; static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); @@ -2365,6 +2388,7 @@ int unpoison_memory(unsigned long pfn) ret = get_hwpoison_page(p, MF_UNPOISON); if (!ret) { if (PageHuge(p)) { + huge = true; count = free_raw_hwp_pages(page, false); if (count == 0) { ret = -EBUSY; @@ -2380,6 +2404,7 @@ int unpoison_memory(unsigned long pfn) pfn, &unpoison_rs); } else { if (PageHuge(p)) { + huge = true; count = free_raw_hwp_pages(page, false); if (count == 0) { ret = -EBUSY; @@ -2387,10 +2412,9 @@ int unpoison_memory(unsigned long pfn) goto unlock_mutex; } } - freeit = !!TestClearPageHWPoison(p); put_page(page); - if (freeit) { + if (TestClearPageHWPoison(p)) { put_page(page); ret = 0; } @@ -2398,8 +2422,9 @@ int unpoison_memory(unsigned long pfn) unlock_mutex: mutex_unlock(&mf_mutex); - if (!ret || freeit) { - num_poisoned_pages_sub(count); + if (!ret) { + if (!huge) + num_poisoned_pages_sub(pfn, 1); unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n", page_to_pfn(p), &unpoison_rs); } @@ -2516,12 +2541,6 @@ static int soft_offline_in_use_page(struct page *page) return ret; } -static void put_ref_page(struct page *page) -{ - if (page) - put_page(page); -} - /** * soft_offline_page - Soft offline a page. * @pfn: pfn to soft-offline @@ -2550,19 +2569,17 @@ int soft_offline_page(unsigned long pfn, int flags) { int ret; bool try_again = true; - struct page *page, *ref_page = NULL; - - WARN_ON_ONCE(!pfn_valid(pfn) && (flags & MF_COUNT_INCREASED)); + struct page *page; - if (!pfn_valid(pfn)) + if (!pfn_valid(pfn)) { + WARN_ON_ONCE(flags & MF_COUNT_INCREASED); return -ENXIO; - if (flags & MF_COUNT_INCREASED) - ref_page = pfn_to_page(pfn); + } /* Only online pages can be soft-offlined (esp., not ZONE_DEVICE). */ page = pfn_to_online_page(pfn); if (!page) { - put_ref_page(ref_page); + put_ref_page(pfn, flags); return -EIO; } @@ -2570,7 +2587,7 @@ int soft_offline_page(unsigned long pfn, int flags) if (PageHWPoison(page)) { pr_info("%s: %#lx page already poisoned\n", __func__, pfn); - put_ref_page(ref_page); + put_ref_page(pfn, flags); mutex_unlock(&mf_mutex); return 0; } @@ -2602,26 +2619,3 @@ retry: return ret; } - -void clear_hwpoisoned_pages(struct page *memmap, int nr_pages) -{ - int i, total = 0; - - /* - * A further optimization is to have per section refcounted - * num_poisoned_pages. But that would need more space per memmap, so - * for now just do a quick global check to speed up this routine in the - * absence of bad pages. - */ - if (atomic_long_read(&num_poisoned_pages) == 0) - return; - - for (i = 0; i < nr_pages; i++) { - if (PageHWPoison(&memmap[i])) { - total++; - ClearPageHWPoison(&memmap[i]); - } - } - if (total) - num_poisoned_pages_sub(total); -} |