summaryrefslogtreecommitdiff
path: root/mm/memory-failure.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-12-14 06:29:45 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2022-12-14 06:29:45 +0300
commite2ca6ba6ba0152361aa4fcbf6067db71b2c7a770 (patch)
treef7ed7753a2e66486a4ffe0fbbf98404ec4ba2212 /mm/memory-failure.c
parent7e68dd7d07a28faa2e6574dd6b9dbd90cdeaae91 (diff)
parentc45bc55a99957b20e4e0333bcd42e12d1833a7f5 (diff)
downloadlinux-e2ca6ba6ba0152361aa4fcbf6067db71b2c7a770.tar.xz
Merge tag 'mm-stable-2022-12-13' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: - More userfaultfs work from Peter Xu - Several convert-to-folios series from Sidhartha Kumar and Huang Ying - Some filemap cleanups from Vishal Moola - David Hildenbrand added the ability to selftest anon memory COW handling - Some cpuset simplifications from Liu Shixin - Addition of vmalloc tracing support by Uladzislau Rezki - Some pagecache folioifications and simplifications from Matthew Wilcox - A pagemap cleanup from Kefeng Wang: we have VM_ACCESS_FLAGS, so use it - Miguel Ojeda contributed some cleanups for our use of the __no_sanitize_thread__ gcc keyword. This series should have been in the non-MM tree, my bad - Naoya Horiguchi improved the interaction between memory poisoning and memory section removal for huge pages - DAMON cleanups and tuneups from SeongJae Park - Tony Luck fixed the handling of COW faults against poisoned pages - Peter Xu utilized the PTE marker code for handling swapin errors - Hugh Dickins reworked compound page mapcount handling, simplifying it and making it more efficient - Removal of the autonuma savedwrite infrastructure from Nadav Amit and David Hildenbrand - zram support for multiple compression streams from Sergey Senozhatsky - David Hildenbrand reworked the GUP code's R/O long-term pinning so that drivers no longer need to use the FOLL_FORCE workaround which didn't work very well anyway - Mel Gorman altered the page allocator so that local IRQs can remnain enabled during per-cpu page allocations - Vishal Moola removed the try_to_release_page() wrapper - Stefan Roesch added some per-BDI sysfs tunables which are used to prevent network block devices from dirtying excessive amounts of pagecache - David Hildenbrand did some cleanup and repair work on KSM COW breaking - Nhat Pham and Johannes Weiner have implemented writeback in zswap's zsmalloc backend - Brian Foster has fixed a longstanding corner-case oddity in file[map]_write_and_wait_range() - sparse-vmemmap changes for MIPS, LoongArch and NIOS2 from Feiyang Chen - Shiyang Ruan has done some work on fsdax, to make its reflink mode work better under xfstests. Better, but still not perfect - Christoph Hellwig has removed the .writepage() method from several filesystems. They only need .writepages() - Yosry Ahmed wrote a series which fixes the memcg reclaim target beancounting - David Hildenbrand has fixed some of our MM selftests for 32-bit machines - Many singleton patches, as usual * tag 'mm-stable-2022-12-13' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (313 commits) mm/hugetlb: set head flag before setting compound_order in __prep_compound_gigantic_folio mm: mmu_gather: allow more than one batch of delayed rmaps mm: fix typo in struct pglist_data code comment kmsan: fix memcpy tests mm: add cond_resched() in swapin_walk_pmd_entry() mm: do not show fs mm pc for VM_LOCKONFAULT pages selftests/vm: ksm_functional_tests: fixes for 32bit selftests/vm: cow: fix compile warning on 32bit selftests/vm: madv_populate: fix missing MADV_POPULATE_(READ|WRITE) definitions mm/gup_test: fix PIN_LONGTERM_TEST_READ with highmem mm,thp,rmap: fix races between updates of subpages_mapcount mm: memcg: fix swapcached stat accounting mm: add nodes= arg to memory.reclaim mm: disable top-tier fallback to reclaim on proactive reclaim selftests: cgroup: make sure reclaim target memcg is unprotected selftests: cgroup: refactor proactive reclaim code to reclaim_until() mm: memcg: fix stale protection of reclaim target memcg mm/mmap: properly unaccount memory on mas_preallocate() failure omfs: remove ->writepage jfs: remove ->writepage ...
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r--mm/memory-failure.c172
1 files changed, 83 insertions, 89 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index bead6bccc7f2..c77a9e37e27e 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -74,6 +74,19 @@ atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0);
static bool hw_memory_failure __read_mostly = false;
+inline void num_poisoned_pages_inc(unsigned long pfn)
+{
+ atomic_long_inc(&num_poisoned_pages);
+ memblk_nr_poison_inc(pfn);
+}
+
+inline void num_poisoned_pages_sub(unsigned long pfn, long i)
+{
+ atomic_long_sub(i, &num_poisoned_pages);
+ if (pfn != -1UL)
+ memblk_nr_poison_sub(pfn, i);
+}
+
/*
* Return values:
* 1: the page is dissolved (if needed) and taken off from buddy,
@@ -115,7 +128,7 @@ static bool page_handle_poison(struct page *page, bool hugepage_or_freepage, boo
if (release)
put_page(page);
page_ref_inc(page);
- num_poisoned_pages_inc();
+ num_poisoned_pages_inc(page_to_pfn(page));
return true;
}
@@ -827,12 +840,13 @@ static int truncate_error_page(struct page *p, unsigned long pfn,
int ret = MF_FAILED;
if (mapping->a_ops->error_remove_page) {
+ struct folio *folio = page_folio(p);
int err = mapping->a_ops->error_remove_page(mapping, p);
if (err != 0) {
pr_info("%#lx: Failed to punch page: %d\n", pfn, err);
- } else if (page_has_private(p) &&
- !try_to_release_page(p, GFP_NOIO)) {
+ } else if (folio_has_private(folio) &&
+ !filemap_release_folio(folio, GFP_NOIO)) {
pr_info("%#lx: failed to release buffers\n", pfn);
} else {
ret = MF_RECOVERED;
@@ -1182,14 +1196,16 @@ static struct page_state error_states[] = {
* "Dirty/Clean" indication is not 100% accurate due to the possibility of
* setting PG_dirty outside page lock. See also comment above set_page_dirty().
*/
-static void action_result(unsigned long pfn, enum mf_action_page_type type,
- enum mf_result result)
+static int action_result(unsigned long pfn, enum mf_action_page_type type,
+ enum mf_result result)
{
trace_memory_failure_event(pfn, type, result);
- num_poisoned_pages_inc();
+ num_poisoned_pages_inc(pfn);
pr_err("%#lx: recovery action for %s: %s\n",
pfn, action_page_types[type], action_name[result]);
+
+ return (result == MF_RECOVERED || result == MF_DELAYED) ? 0 : -EBUSY;
}
static int page_action(struct page_state *ps, struct page *p,
@@ -1200,14 +1216,12 @@ static int page_action(struct page_state *ps, struct page *p,
/* page p should be unlocked after returning from ps->action(). */
result = ps->action(ps, p);
- action_result(pfn, ps->type, result);
-
/* Could do more checks here if page looks ok */
/*
* Could adjust zone counters here to correct for the missing page.
*/
- return (result == MF_RECOVERED || result == MF_DELAYED) ? 0 : -EBUSY;
+ return action_result(pfn, ps->type, result);
}
static inline bool PageHWPoisonTakenOff(struct page *page)
@@ -1247,7 +1261,7 @@ static int __get_hwpoison_page(struct page *page, unsigned long flags)
int ret = 0;
bool hugetlb = false;
- ret = get_hwpoison_huge_page(head, &hugetlb);
+ ret = get_hwpoison_huge_page(head, &hugetlb, false);
if (hugetlb)
return ret;
@@ -1337,7 +1351,7 @@ static int __get_unpoison_page(struct page *page)
int ret = 0;
bool hugetlb = false;
- ret = get_hwpoison_huge_page(head, &hugetlb);
+ ret = get_hwpoison_huge_page(head, &hugetlb, true);
if (hugetlb)
return ret;
@@ -1674,8 +1688,7 @@ EXPORT_SYMBOL_GPL(mf_dax_kill_procs);
#ifdef CONFIG_HUGETLB_PAGE
/*
* Struct raw_hwp_page represents information about "raw error page",
- * constructing singly linked list originated from ->private field of
- * SUBPAGE_INDEX_HWPOISON-th tail page.
+ * constructing singly linked list from ->_hugetlb_hwpoison field of folio.
*/
struct raw_hwp_page {
struct llist_node node;
@@ -1684,7 +1697,7 @@ struct raw_hwp_page {
static inline struct llist_head *raw_hwp_list_head(struct page *hpage)
{
- return (struct llist_head *)&page_private(hpage + SUBPAGE_INDEX_HWPOISON);
+ return (struct llist_head *)&page_folio(hpage)->_hugetlb_hwpoison;
}
static unsigned long __free_raw_hwp_pages(struct page *hpage, bool move_flag)
@@ -1699,6 +1712,8 @@ static unsigned long __free_raw_hwp_pages(struct page *hpage, bool move_flag)
if (move_flag)
SetPageHWPoison(p->page);
+ else
+ num_poisoned_pages_sub(page_to_pfn(p->page), 1);
kfree(p);
count++;
}
@@ -1734,7 +1749,7 @@ static int hugetlb_set_page_hwpoison(struct page *hpage, struct page *page)
llist_add(&raw_hwp->node, head);
/* the first error event will be counted in action_result(). */
if (ret)
- num_poisoned_pages_inc();
+ num_poisoned_pages_inc(page_to_pfn(page));
} else {
/*
* Failed to save raw error info. We no longer trace all
@@ -1788,7 +1803,8 @@ void hugetlb_clear_page_hwpoison(struct page *hpage)
* -EBUSY - the hugepage is busy (try to retry)
* -EHWPOISON - the hugepage is already hwpoisoned
*/
-int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
+int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
+ bool *migratable_cleared)
{
struct page *page = pfn_to_page(pfn);
struct page *head = compound_head(page);
@@ -1818,6 +1834,15 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
goto out;
}
+ /*
+ * Clearing HPageMigratable for hwpoisoned hugepages to prevent them
+ * from being migrated by memory hotremove.
+ */
+ if (count_increased && HPageMigratable(head)) {
+ ClearHPageMigratable(head);
+ *migratable_cleared = true;
+ }
+
return ret;
out:
if (count_increased)
@@ -1837,10 +1862,11 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
struct page *p = pfn_to_page(pfn);
struct page *head;
unsigned long page_flags;
+ bool migratable_cleared = false;
*hugetlb = 1;
retry:
- res = get_huge_page_for_hwpoison(pfn, flags);
+ res = get_huge_page_for_hwpoison(pfn, flags, &migratable_cleared);
if (res == 2) { /* fallback to normal page handling */
*hugetlb = 0;
return 0;
@@ -1856,8 +1882,7 @@ retry:
flags |= MF_NO_RETRY;
goto retry;
}
- action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
- return res;
+ return action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
}
head = compound_head(p);
@@ -1865,6 +1890,8 @@ retry:
if (hwpoison_filter(p)) {
hugetlb_clear_page_hwpoison(head);
+ if (migratable_cleared)
+ SetHPageMigratable(head);
unlock_page(head);
if (res == 1)
put_page(head);
@@ -1883,22 +1910,17 @@ retry:
} else {
res = MF_FAILED;
}
- action_result(pfn, MF_MSG_FREE_HUGE, res);
- return res == MF_RECOVERED ? 0 : -EBUSY;
+ return action_result(pfn, MF_MSG_FREE_HUGE, res);
}
page_flags = head->flags;
if (!hwpoison_user_mappings(p, pfn, flags, head)) {
- action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
- res = -EBUSY;
- goto out;
+ unlock_page(head);
+ return action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
}
return identify_page_state(pfn, p, page_flags);
-out:
- unlock_page(head);
- return res;
}
#else
@@ -1913,17 +1935,25 @@ static inline unsigned long free_raw_hwp_pages(struct page *hpage, bool flag)
}
#endif /* CONFIG_HUGETLB_PAGE */
+/* Drop the extra refcount in case we come from madvise() */
+static void put_ref_page(unsigned long pfn, int flags)
+{
+ struct page *page;
+
+ if (!(flags & MF_COUNT_INCREASED))
+ return;
+
+ page = pfn_to_page(pfn);
+ if (page)
+ put_page(page);
+}
+
static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
struct dev_pagemap *pgmap)
{
- struct page *page = pfn_to_page(pfn);
int rc = -ENXIO;
- if (flags & MF_COUNT_INCREASED)
- /*
- * Drop the extra refcount in case we come from madvise().
- */
- put_page(page);
+ put_ref_page(pfn, flags);
/* device metadata space is not recoverable */
if (!pgmap_pfn_valid(pgmap, pfn))
@@ -2055,16 +2085,13 @@ try_again:
}
res = MF_FAILED;
}
- action_result(pfn, MF_MSG_BUDDY, res);
- res = res == MF_RECOVERED ? 0 : -EBUSY;
+ res = action_result(pfn, MF_MSG_BUDDY, res);
} else {
- action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED);
- res = -EBUSY;
+ res = action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED);
}
goto unlock_mutex;
} else if (res < 0) {
- action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
- res = -EBUSY;
+ res = action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
goto unlock_mutex;
}
}
@@ -2085,8 +2112,7 @@ try_again:
*/
SetPageHasHWPoisoned(hpage);
if (try_to_split_thp_page(p) < 0) {
- action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED);
- res = -EBUSY;
+ res = action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED);
goto unlock_mutex;
}
VM_BUG_ON_PAGE(!page_count(p), p);
@@ -2119,8 +2145,7 @@ try_again:
retry = false;
goto try_again;
}
- action_result(pfn, MF_MSG_DIFFERENT_COMPOUND, MF_IGNORED);
- res = -EBUSY;
+ res = action_result(pfn, MF_MSG_DIFFERENT_COMPOUND, MF_IGNORED);
goto unlock_page;
}
@@ -2160,8 +2185,7 @@ try_again:
* Abort on fail: __filemap_remove_folio() assumes unmapped page.
*/
if (!hwpoison_user_mappings(p, pfn, flags, p)) {
- action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
- res = -EBUSY;
+ res = action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
goto unlock_page;
}
@@ -2169,8 +2193,7 @@ try_again:
* Torn down by someone else?
*/
if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) {
- action_result(pfn, MF_MSG_TRUNCATED_LRU, MF_IGNORED);
- res = -EBUSY;
+ res = action_result(pfn, MF_MSG_TRUNCATED_LRU, MF_IGNORED);
goto unlock_page;
}
@@ -2315,8 +2338,8 @@ int unpoison_memory(unsigned long pfn)
struct page *page;
struct page *p;
int ret = -EBUSY;
- int freeit = 0;
unsigned long count = 1;
+ bool huge = false;
static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
@@ -2365,6 +2388,7 @@ int unpoison_memory(unsigned long pfn)
ret = get_hwpoison_page(p, MF_UNPOISON);
if (!ret) {
if (PageHuge(p)) {
+ huge = true;
count = free_raw_hwp_pages(page, false);
if (count == 0) {
ret = -EBUSY;
@@ -2380,6 +2404,7 @@ int unpoison_memory(unsigned long pfn)
pfn, &unpoison_rs);
} else {
if (PageHuge(p)) {
+ huge = true;
count = free_raw_hwp_pages(page, false);
if (count == 0) {
ret = -EBUSY;
@@ -2387,10 +2412,9 @@ int unpoison_memory(unsigned long pfn)
goto unlock_mutex;
}
}
- freeit = !!TestClearPageHWPoison(p);
put_page(page);
- if (freeit) {
+ if (TestClearPageHWPoison(p)) {
put_page(page);
ret = 0;
}
@@ -2398,8 +2422,9 @@ int unpoison_memory(unsigned long pfn)
unlock_mutex:
mutex_unlock(&mf_mutex);
- if (!ret || freeit) {
- num_poisoned_pages_sub(count);
+ if (!ret) {
+ if (!huge)
+ num_poisoned_pages_sub(pfn, 1);
unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
page_to_pfn(p), &unpoison_rs);
}
@@ -2516,12 +2541,6 @@ static int soft_offline_in_use_page(struct page *page)
return ret;
}
-static void put_ref_page(struct page *page)
-{
- if (page)
- put_page(page);
-}
-
/**
* soft_offline_page - Soft offline a page.
* @pfn: pfn to soft-offline
@@ -2550,19 +2569,17 @@ int soft_offline_page(unsigned long pfn, int flags)
{
int ret;
bool try_again = true;
- struct page *page, *ref_page = NULL;
-
- WARN_ON_ONCE(!pfn_valid(pfn) && (flags & MF_COUNT_INCREASED));
+ struct page *page;
- if (!pfn_valid(pfn))
+ if (!pfn_valid(pfn)) {
+ WARN_ON_ONCE(flags & MF_COUNT_INCREASED);
return -ENXIO;
- if (flags & MF_COUNT_INCREASED)
- ref_page = pfn_to_page(pfn);
+ }
/* Only online pages can be soft-offlined (esp., not ZONE_DEVICE). */
page = pfn_to_online_page(pfn);
if (!page) {
- put_ref_page(ref_page);
+ put_ref_page(pfn, flags);
return -EIO;
}
@@ -2570,7 +2587,7 @@ int soft_offline_page(unsigned long pfn, int flags)
if (PageHWPoison(page)) {
pr_info("%s: %#lx page already poisoned\n", __func__, pfn);
- put_ref_page(ref_page);
+ put_ref_page(pfn, flags);
mutex_unlock(&mf_mutex);
return 0;
}
@@ -2602,26 +2619,3 @@ retry:
return ret;
}
-
-void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
-{
- int i, total = 0;
-
- /*
- * A further optimization is to have per section refcounted
- * num_poisoned_pages. But that would need more space per memmap, so
- * for now just do a quick global check to speed up this routine in the
- * absence of bad pages.
- */
- if (atomic_long_read(&num_poisoned_pages) == 0)
- return;
-
- for (i = 0; i < nr_pages; i++) {
- if (PageHWPoison(&memmap[i])) {
- total++;
- ClearPageHWPoison(&memmap[i]);
- }
- }
- if (total)
- num_poisoned_pages_sub(total);
-}