summaryrefslogtreecommitdiff
path: root/mm/memory-failure.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r--mm/memory-failure.c176
1 files changed, 108 insertions, 68 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index e245191e6b04..881c35ef1daa 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -39,7 +39,6 @@
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/page-flags.h>
-#include <linux/kernel-page-flags.h>
#include <linux/sched/signal.h>
#include <linux/sched/task.h>
#include <linux/dax.h>
@@ -50,7 +49,6 @@
#include <linux/swap.h>
#include <linux/backing-dev.h>
#include <linux/migrate.h>
-#include <linux/suspend.h>
#include <linux/slab.h>
#include <linux/swapops.h>
#include <linux/hugetlb.h>
@@ -59,7 +57,6 @@
#include <linux/memremap.h>
#include <linux/kfifo.h>
#include <linux/ratelimit.h>
-#include <linux/page-isolation.h>
#include <linux/pagewalk.h>
#include <linux/shmem_fs.h>
#include <linux/sysctl.h>
@@ -75,13 +72,15 @@ atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0);
static bool hw_memory_failure __read_mostly = false;
-inline void num_poisoned_pages_inc(unsigned long pfn)
+static DEFINE_MUTEX(mf_mutex);
+
+void num_poisoned_pages_inc(unsigned long pfn)
{
atomic_long_inc(&num_poisoned_pages);
memblk_nr_poison_inc(pfn);
}
-inline void num_poisoned_pages_sub(unsigned long pfn, long i)
+void num_poisoned_pages_sub(unsigned long pfn, long i)
{
atomic_long_sub(i, &num_poisoned_pages);
if (pfn != -1UL)
@@ -363,17 +362,14 @@ void shake_page(struct page *p)
{
if (PageHuge(p))
return;
-
- if (!PageSlab(p)) {
- lru_add_drain_all();
- if (PageLRU(p) || is_free_buddy_page(p))
- return;
- }
-
/*
* TODO: Could shrink slab caches here if a lightweight range-based
* shrinker will be available.
*/
+ if (PageSlab(p))
+ return;
+
+ lru_add_drain_all();
}
EXPORT_SYMBOL_GPL(shake_page);
@@ -614,7 +610,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
pgoff = page_to_pgoff(page);
read_lock(&tasklist_lock);
- for_each_process (tsk) {
+ for_each_process(tsk) {
struct anon_vma_chain *vmac;
struct task_struct *t = task_early_kill(tsk, force_early);
@@ -658,7 +654,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
/*
* Send early kill signal to tasks where a vma covers
* the page but the corrupted page is not necessarily
- * mapped it in its pte.
+ * mapped in its pte.
* Assume applications who requested early kill want
* to be informed of all such data corruptions.
*/
@@ -831,6 +827,7 @@ static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask,
static const struct mm_walk_ops hwp_walk_ops = {
.pmd_entry = hwpoison_pte_range,
.hugetlb_entry = hwpoison_hugetlb_range,
+ .walk_lock = PGWALK_RDLOCK,
};
/*
@@ -939,14 +936,12 @@ static int truncate_error_page(struct page *p, unsigned long pfn,
struct folio *folio = page_folio(p);
int err = mapping->a_ops->error_remove_page(mapping, p);
- if (err != 0) {
+ if (err != 0)
pr_info("%#lx: Failed to punch page: %d\n", pfn, err);
- } else if (folio_has_private(folio) &&
- !filemap_release_folio(folio, GFP_NOIO)) {
+ else if (!filemap_release_folio(folio, GFP_NOIO))
pr_info("%#lx: failed to release buffers\n", pfn);
- } else {
+ else
ret = MF_RECOVERED;
- }
} else {
/*
* If the file system doesn't support it just invalidate
@@ -1192,9 +1187,6 @@ static int me_huge_page(struct page_state *ps, struct page *p)
struct address_space *mapping;
bool extra_pins = false;
- if (!PageHuge(hpage))
- return MF_DELAYED;
-
mapping = page_mapping(hpage);
if (mapping) {
res = truncate_error_page(hpage, page_to_pfn(p), mapping);
@@ -1394,8 +1386,15 @@ static int __get_hwpoison_page(struct page *page, unsigned long flags)
bool hugetlb = false;
ret = get_hwpoison_hugetlb_folio(folio, &hugetlb, false);
- if (hugetlb)
- return ret;
+ if (hugetlb) {
+ /* Make sure hugetlb demotion did not happen from under us. */
+ if (folio == page_folio(page))
+ return ret;
+ if (ret > 0) {
+ folio_put(folio);
+ folio = page_folio(page);
+ }
+ }
/*
* This check prevents from calling folio_try_get() for any
@@ -1484,8 +1483,13 @@ static int __get_unpoison_page(struct page *page)
bool hugetlb = false;
ret = get_hwpoison_hugetlb_folio(folio, &hugetlb, true);
- if (hugetlb)
- return ret;
+ if (hugetlb) {
+ /* Make sure hugetlb demotion did not happen from under us. */
+ if (folio == page_folio(page))
+ return ret;
+ if (ret > 0)
+ folio_put(folio);
+ }
/*
* PageHWPoisonTakenOff pages are not only marked as PG_hwpoison,
@@ -1813,6 +1817,7 @@ EXPORT_SYMBOL_GPL(mf_dax_kill_procs);
#endif /* CONFIG_FS_DAX */
#ifdef CONFIG_HUGETLB_PAGE
+
/*
* Struct raw_hwp_page represents information about "raw error page",
* constructing singly linked list from ->_hugetlb_hwpoison field of folio.
@@ -1827,16 +1832,49 @@ static inline struct llist_head *raw_hwp_list_head(struct folio *folio)
return (struct llist_head *)&folio->_hugetlb_hwpoison;
}
+bool is_raw_hwpoison_page_in_hugepage(struct page *page)
+{
+ struct llist_head *raw_hwp_head;
+ struct raw_hwp_page *p;
+ struct folio *folio = page_folio(page);
+ bool ret = false;
+
+ if (!folio_test_hwpoison(folio))
+ return false;
+
+ if (!folio_test_hugetlb(folio))
+ return PageHWPoison(page);
+
+ /*
+ * When RawHwpUnreliable is set, kernel lost track of which subpages
+ * are HWPOISON. So return as if ALL subpages are HWPOISONed.
+ */
+ if (folio_test_hugetlb_raw_hwp_unreliable(folio))
+ return true;
+
+ mutex_lock(&mf_mutex);
+
+ raw_hwp_head = raw_hwp_list_head(folio);
+ llist_for_each_entry(p, raw_hwp_head->first, node) {
+ if (page == p->page) {
+ ret = true;
+ break;
+ }
+ }
+
+ mutex_unlock(&mf_mutex);
+
+ return ret;
+}
+
static unsigned long __folio_free_raw_hwp(struct folio *folio, bool move_flag)
{
- struct llist_head *head;
- struct llist_node *t, *tnode;
+ struct llist_node *head;
+ struct raw_hwp_page *p, *next;
unsigned long count = 0;
- head = raw_hwp_list_head(folio);
- llist_for_each_safe(tnode, t, head->first) {
- struct raw_hwp_page *p = container_of(tnode, struct raw_hwp_page, node);
-
+ head = llist_del_all(raw_hwp_list_head(folio));
+ llist_for_each_entry_safe(p, next, head, node) {
if (move_flag)
SetPageHWPoison(p->page);
else
@@ -1844,7 +1882,6 @@ static unsigned long __folio_free_raw_hwp(struct folio *folio, bool move_flag)
kfree(p);
count++;
}
- llist_del_all(head);
return count;
}
@@ -1852,7 +1889,7 @@ static int folio_set_hugetlb_hwpoison(struct folio *folio, struct page *page)
{
struct llist_head *head;
struct raw_hwp_page *raw_hwp;
- struct llist_node *t, *tnode;
+ struct raw_hwp_page *p, *next;
int ret = folio_test_set_hwpoison(folio) ? -EHWPOISON : 0;
/*
@@ -1863,9 +1900,7 @@ static int folio_set_hugetlb_hwpoison(struct folio *folio, struct page *page)
if (folio_test_hugetlb_raw_hwp_unreliable(folio))
return -EHWPOISON;
head = raw_hwp_list_head(folio);
- llist_for_each_safe(tnode, t, head->first) {
- struct raw_hwp_page *p = container_of(tnode, struct raw_hwp_page, node);
-
+ llist_for_each_entry_safe(p, next, head->first, node) {
if (p->page == page)
return -EHWPOISON;
}
@@ -1916,6 +1951,8 @@ void folio_clear_hugetlb_hwpoison(struct folio *folio)
{
if (folio_test_hugetlb_raw_hwp_unreliable(folio))
return;
+ if (folio_test_hugetlb_vmemmap_optimized(folio))
+ return;
folio_clear_hwpoison(folio);
folio_free_raw_hwp(folio, true);
}
@@ -2080,8 +2117,6 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
{
int rc = -ENXIO;
- put_ref_page(pfn, flags);
-
/* device metadata space is not recoverable */
if (!pgmap_pfn_valid(pgmap, pfn))
goto out;
@@ -2104,12 +2139,11 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
out:
/* drop pgmap ref acquired in caller */
put_dev_pagemap(pgmap);
- action_result(pfn, MF_MSG_DAX, rc ? MF_FAILED : MF_RECOVERED);
+ if (rc != -EOPNOTSUPP)
+ action_result(pfn, MF_MSG_DAX, rc ? MF_FAILED : MF_RECOVERED);
return rc;
}
-static DEFINE_MUTEX(mf_mutex);
-
/**
* memory_failure - Handle memory failure of a page.
* @pfn: Page Number of the corrupted page
@@ -2125,7 +2159,7 @@ static DEFINE_MUTEX(mf_mutex);
* detected by a background scrubber)
*
* Must run in process context (e.g. a work queue) with interrupts
- * enabled and no spinlocks hold.
+ * enabled and no spinlocks held.
*
* Return: 0 for successfully handled the memory error,
* -EOPNOTSUPP for hwpoison_filter() filtered the error event,
@@ -2157,6 +2191,7 @@ int memory_failure(unsigned long pfn, int flags)
if (pfn_valid(pfn)) {
pgmap = get_dev_pagemap(pfn, NULL);
+ put_ref_page(pfn, flags);
if (pgmap) {
res = memory_failure_dev_pagemap(pfn, flags,
pgmap);
@@ -2183,8 +2218,6 @@ try_again:
goto unlock_mutex;
}
- hpage = compound_head(p);
-
/*
* We need/can do nothing about count=0 pages.
* 1) it's a free page, and therefore in safe hand:
@@ -2223,13 +2256,14 @@ try_again:
}
}
+ hpage = compound_head(p);
if (PageTransHuge(hpage)) {
/*
* The flag must be set after the refcount is bumped
* otherwise it may race with THP split.
* And the flag can't be set in get_hwpoison_page() since
* it is called by soft offline too and it is just called
- * for !MF_COUNT_INCREASE. So here seems to be the best
+ * for !MF_COUNT_INCREASED. So here seems to be the best
* place.
*
* Don't need care about the above error handling paths for
@@ -2466,7 +2500,7 @@ int unpoison_memory(unsigned long pfn)
{
struct folio *folio;
struct page *p;
- int ret = -EBUSY;
+ int ret = -EBUSY, ghp;
unsigned long count = 1;
bool huge = false;
static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
@@ -2487,7 +2521,7 @@ int unpoison_memory(unsigned long pfn)
goto unlock_mutex;
}
- if (!folio_test_hwpoison(folio)) {
+ if (!PageHWPoison(p)) {
unpoison_pr_info("Unpoison: Page was already unpoisoned %#lx\n",
pfn, &unpoison_rs);
goto unlock_mutex;
@@ -2499,6 +2533,13 @@ int unpoison_memory(unsigned long pfn)
goto unlock_mutex;
}
+ if (folio_test_slab(folio) || PageTable(&folio->page) || folio_test_reserved(folio))
+ goto unlock_mutex;
+
+ /*
+ * Note that folio->_mapcount is overloaded in SLAB, so the simple test
+ * in folio_mapped() has to be done after folio_test_slab() is checked.
+ */
if (folio_mapped(folio)) {
unpoison_pr_info("Unpoison: Someone maps the hwpoison page %#lx\n",
pfn, &unpoison_rs);
@@ -2511,32 +2552,28 @@ int unpoison_memory(unsigned long pfn)
goto unlock_mutex;
}
- if (folio_test_slab(folio) || PageTable(&folio->page) || folio_test_reserved(folio))
- goto unlock_mutex;
-
- ret = get_hwpoison_page(p, MF_UNPOISON);
- if (!ret) {
+ ghp = get_hwpoison_page(p, MF_UNPOISON);
+ if (!ghp) {
if (PageHuge(p)) {
huge = true;
count = folio_free_raw_hwp(folio, false);
- if (count == 0) {
- ret = -EBUSY;
+ if (count == 0)
goto unlock_mutex;
- }
}
ret = folio_test_clear_hwpoison(folio) ? 0 : -EBUSY;
- } else if (ret < 0) {
- if (ret == -EHWPOISON) {
+ } else if (ghp < 0) {
+ if (ghp == -EHWPOISON) {
ret = put_page_back_buddy(p) ? 0 : -EBUSY;
- } else
+ } else {
+ ret = ghp;
unpoison_pr_info("Unpoison: failed to grab page %#lx\n",
pfn, &unpoison_rs);
+ }
} else {
if (PageHuge(p)) {
huge = true;
count = folio_free_raw_hwp(folio, false);
if (count == 0) {
- ret = -EBUSY;
folio_put(folio);
goto unlock_mutex;
}
@@ -2586,10 +2623,10 @@ static bool isolate_page(struct page *page, struct list_head *pagelist)
/*
* If we succeed to isolate the page, we grabbed another refcount on
- * the page, so we can safely drop the one we got from get_any_pages().
+ * the page, so we can safely drop the one we got from get_any_page().
* If we failed to isolate the page, it means that we cannot go further
* and we will return an error, so drop the reference we got from
- * get_any_pages() as well.
+ * get_any_page() as well.
*/
put_page(page);
return isolated;
@@ -2622,7 +2659,7 @@ static int soft_offline_in_use_page(struct page *page)
}
lock_page(page);
- if (!PageHuge(page))
+ if (!huge)
wait_on_page_writeback(page);
if (PageHWPoison(page)) {
unlock_page(page);
@@ -2631,7 +2668,7 @@ static int soft_offline_in_use_page(struct page *page)
return 0;
}
- if (!PageHuge(page) && PageLRU(page) && !PageSwapCache(page))
+ if (!huge && PageLRU(page) && !PageSwapCache(page))
/*
* Try to invalidate first. This should work for
* non dirty unmapped page cache pages.
@@ -2737,10 +2774,13 @@ retry:
if (ret > 0) {
ret = soft_offline_in_use_page(page);
} else if (ret == 0) {
- if (!page_handle_poison(page, true, false) && try_again) {
- try_again = false;
- flags &= ~MF_COUNT_INCREASED;
- goto retry;
+ if (!page_handle_poison(page, true, false)) {
+ if (try_again) {
+ try_again = false;
+ flags &= ~MF_COUNT_INCREASED;
+ goto retry;
+ }
+ ret = -EBUSY;
}
}