diff options
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r-- | mm/memory-failure.c | 173 |
1 files changed, 92 insertions, 81 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 660c21859118..4f9b61f4a668 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -595,10 +595,9 @@ struct task_struct *task_early_kill(struct task_struct *tsk, int force_early) /* * Collect processes when the error hit an anonymous page. */ -static void collect_procs_anon(struct page *page, struct list_head *to_kill, - int force_early) +static void collect_procs_anon(struct folio *folio, struct page *page, + struct list_head *to_kill, int force_early) { - struct folio *folio = page_folio(page); struct vm_area_struct *vma; struct task_struct *tsk; struct anon_vma *av; @@ -633,12 +632,12 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill, /* * Collect processes when the error hit a file mapped page. */ -static void collect_procs_file(struct page *page, struct list_head *to_kill, - int force_early) +static void collect_procs_file(struct folio *folio, struct page *page, + struct list_head *to_kill, int force_early) { struct vm_area_struct *vma; struct task_struct *tsk; - struct address_space *mapping = page->mapping; + struct address_space *mapping = folio->mapping; pgoff_t pgoff; i_mmap_lock_read(mapping); @@ -679,7 +678,7 @@ static void add_to_kill_fsdax(struct task_struct *tsk, struct page *p, */ static void collect_procs_fsdax(struct page *page, struct address_space *mapping, pgoff_t pgoff, - struct list_head *to_kill) + struct list_head *to_kill, bool pre_remove) { struct vm_area_struct *vma; struct task_struct *tsk; @@ -687,8 +686,15 @@ static void collect_procs_fsdax(struct page *page, i_mmap_lock_read(mapping); rcu_read_lock(); for_each_process(tsk) { - struct task_struct *t = task_early_kill(tsk, true); + struct task_struct *t = tsk; + /* + * Search for all tasks while MF_MEM_PRE_REMOVE is set, because + * the current may not be the one accessing the fsdax page. + * Otherwise, search for the current task. + */ + if (!pre_remove) + t = task_early_kill(tsk, true); if (!t) continue; vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { @@ -704,17 +710,17 @@ static void collect_procs_fsdax(struct page *page, /* * Collect the processes who have the corrupted page mapped to kill. */ -static void collect_procs(struct page *page, struct list_head *tokill, - int force_early) +static void collect_procs(struct folio *folio, struct page *page, + struct list_head *tokill, int force_early) { - if (!page->mapping) + if (!folio->mapping) return; if (unlikely(PageKsm(page))) collect_procs_ksm(page, tokill, force_early); else if (PageAnon(page)) - collect_procs_anon(page, tokill, force_early); + collect_procs_anon(folio, page, tokill, force_early); else - collect_procs_file(page, tokill, force_early); + collect_procs_file(folio, page, tokill, force_early); } struct hwpoison_walk { @@ -902,39 +908,38 @@ static const char * const action_page_types[] = { * The page count will stop it from being freed by unpoison. * Stress tests should be aware of this memory leak problem. */ -static int delete_from_lru_cache(struct page *p) +static int delete_from_lru_cache(struct folio *folio) { - if (isolate_lru_page(p)) { + if (folio_isolate_lru(folio)) { /* * Clear sensible page flags, so that the buddy system won't - * complain when the page is unpoison-and-freed. + * complain when the folio is unpoison-and-freed. */ - ClearPageActive(p); - ClearPageUnevictable(p); + folio_clear_active(folio); + folio_clear_unevictable(folio); /* * Poisoned page might never drop its ref count to 0 so we have * to uncharge it manually from its memcg. */ - mem_cgroup_uncharge(page_folio(p)); + mem_cgroup_uncharge(folio); /* - * drop the page count elevated by isolate_lru_page() + * drop the refcount elevated by folio_isolate_lru() */ - put_page(p); + folio_put(folio); return 0; } return -EIO; } -static int truncate_error_page(struct page *p, unsigned long pfn, +static int truncate_error_folio(struct folio *folio, unsigned long pfn, struct address_space *mapping) { int ret = MF_FAILED; - if (mapping->a_ops->error_remove_page) { - struct folio *folio = page_folio(p); - int err = mapping->a_ops->error_remove_page(mapping, p); + if (mapping->a_ops->error_remove_folio) { + int err = mapping->a_ops->error_remove_folio(mapping, folio); if (err != 0) pr_info("%#lx: Failed to punch page: %d\n", pfn, err); @@ -947,7 +952,7 @@ static int truncate_error_page(struct page *p, unsigned long pfn, * If the file system doesn't support it just invalidate * This fails on dirty or anything with private pages */ - if (invalidate_inode_page(p)) + if (mapping_evict_folio(mapping, folio)) ret = MF_RECOVERED; else pr_info("%#lx: Failed to invalidate\n", pfn); @@ -1014,17 +1019,18 @@ static int me_unknown(struct page_state *ps, struct page *p) */ static int me_pagecache_clean(struct page_state *ps, struct page *p) { + struct folio *folio = page_folio(p); int ret; struct address_space *mapping; bool extra_pins; - delete_from_lru_cache(p); + delete_from_lru_cache(folio); /* - * For anonymous pages we're done the only reference left + * For anonymous folios the only reference left * should be the one m_f() holds. */ - if (PageAnon(p)) { + if (folio_test_anon(folio)) { ret = MF_RECOVERED; goto out; } @@ -1036,11 +1042,9 @@ static int me_pagecache_clean(struct page_state *ps, struct page *p) * has a reference, because it could be file system metadata * and that's not safe to truncate. */ - mapping = page_mapping(p); + mapping = folio_mapping(folio); if (!mapping) { - /* - * Page has been teared down in the meanwhile - */ + /* Folio has been torn down in the meantime */ ret = MF_FAILED; goto out; } @@ -1056,12 +1060,12 @@ static int me_pagecache_clean(struct page_state *ps, struct page *p) * * Open: to take i_rwsem or not for this? Right now we don't. */ - ret = truncate_error_page(p, page_to_pfn(p), mapping); + ret = truncate_error_folio(folio, page_to_pfn(p), mapping); if (has_extra_refcount(ps, p, extra_pins)) ret = MF_FAILED; out: - unlock_page(p); + folio_unlock(folio); return ret; } @@ -1139,15 +1143,16 @@ static int me_pagecache_dirty(struct page_state *ps, struct page *p) */ static int me_swapcache_dirty(struct page_state *ps, struct page *p) { + struct folio *folio = page_folio(p); int ret; bool extra_pins = false; - ClearPageDirty(p); + folio_clear_dirty(folio); /* Trigger EIO in shmem: */ - ClearPageUptodate(p); + folio_clear_uptodate(folio); - ret = delete_from_lru_cache(p) ? MF_FAILED : MF_DELAYED; - unlock_page(p); + ret = delete_from_lru_cache(folio) ? MF_FAILED : MF_DELAYED; + folio_unlock(folio); if (ret == MF_DELAYED) extra_pins = true; @@ -1165,7 +1170,7 @@ static int me_swapcache_clean(struct page_state *ps, struct page *p) delete_from_swap_cache(folio); - ret = delete_from_lru_cache(p) ? MF_FAILED : MF_RECOVERED; + ret = delete_from_lru_cache(folio) ? MF_FAILED : MF_RECOVERED; folio_unlock(folio); if (has_extra_refcount(ps, p, false)) @@ -1182,25 +1187,25 @@ static int me_swapcache_clean(struct page_state *ps, struct page *p) */ static int me_huge_page(struct page_state *ps, struct page *p) { + struct folio *folio = page_folio(p); int res; - struct page *hpage = compound_head(p); struct address_space *mapping; bool extra_pins = false; - mapping = page_mapping(hpage); + mapping = folio_mapping(folio); if (mapping) { - res = truncate_error_page(hpage, page_to_pfn(p), mapping); + res = truncate_error_folio(folio, page_to_pfn(p), mapping); /* The page is kept in page cache. */ extra_pins = true; - unlock_page(hpage); + folio_unlock(folio); } else { - unlock_page(hpage); + folio_unlock(folio); /* * migration entry prevents later access on error hugepage, * so we can free and dissolve it into buddy to save healthy * subpages. */ - put_page(hpage); + folio_put(folio); if (__page_handle_poison(p) >= 0) { page_ref_inc(p); res = MF_RECOVERED; @@ -1571,7 +1576,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn, * This check implies we don't kill processes if their pages * are in the swap cache early. Those are always late kills. */ - if (!page_mapped(hpage)) + if (!page_mapped(p)) return true; if (PageSwapCache(p)) { @@ -1602,7 +1607,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn, * mapped in dirty form. This has to be done before try_to_unmap, * because ttu takes the rmap data structures down. */ - collect_procs(hpage, &tokill, flags & MF_ACTION_REQUIRED); + collect_procs(folio, p, &tokill, flags & MF_ACTION_REQUIRED); if (PageHuge(hpage) && !PageAnon(hpage)) { /* @@ -1622,10 +1627,10 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn, try_to_unmap(folio, ttu); } - unmap_success = !page_mapped(hpage); + unmap_success = !page_mapped(p); if (!unmap_success) pr_err("%#lx: failed to unmap page (mapcount=%d)\n", - pfn, page_mapcount(hpage)); + pfn, page_mapcount(p)); /* * try_to_unmap() might put mlocked page in lru cache, so call @@ -1705,7 +1710,7 @@ static void unmap_and_kill(struct list_head *to_kill, unsigned long pfn, * mapping being torn down is communicated in siginfo, see * kill_proc() */ - loff_t start = (index << PAGE_SHIFT) & ~(size - 1); + loff_t start = ((loff_t)index << PAGE_SHIFT) & ~(size - 1); unmap_mapping_range(mapping, start, size, 0); } @@ -1772,7 +1777,7 @@ static int mf_generic_kill_procs(unsigned long long pfn, int flags, * SIGBUS (i.e. MF_MUST_KILL) */ flags |= MF_ACTION_REQUIRED | MF_MUST_KILL; - collect_procs(&folio->page, &to_kill, true); + collect_procs(folio, &folio->page, &to_kill, true); unmap_and_kill(&to_kill, pfn, folio->mapping, folio->index, flags); unlock: @@ -1795,6 +1800,7 @@ int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index, dax_entry_t cookie; struct page *page; size_t end = index + count; + bool pre_remove = mf_flags & MF_MEM_PRE_REMOVE; mf_flags |= MF_ACTION_REQUIRED | MF_MUST_KILL; @@ -1806,9 +1812,14 @@ int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index, if (!page) goto unlock; - SetPageHWPoison(page); + if (!pre_remove) + SetPageHWPoison(page); - collect_procs_fsdax(page, mapping, index, &to_kill); + /* + * The pre_remove case is revoking access, the memory is still + * good and could theoretically be put back into service. + */ + collect_procs_fsdax(page, mapping, index, &to_kill, pre_remove); unmap_and_kill(&to_kill, page_to_pfn(page), mapping, index, mf_flags); unlock: @@ -2317,8 +2328,8 @@ try_again: * We use page flags to determine what action should be taken, but * the flags can be modified by the error containment action. One * example is an mlocked page, where PG_mlocked is cleared by - * page_remove_rmap() in try_to_unmap_one(). So to determine page status - * correctly, we save a copy of the page flags at this time. + * folio_remove_rmap_*() in try_to_unmap_one(). So to determine page + * status correctly, we save a copy of the page flags at this time. */ page_flags = p->flags; @@ -2602,37 +2613,37 @@ unlock_mutex: } EXPORT_SYMBOL(unpoison_memory); -static bool isolate_page(struct page *page, struct list_head *pagelist) +static bool mf_isolate_folio(struct folio *folio, struct list_head *pagelist) { bool isolated = false; - if (PageHuge(page)) { - isolated = isolate_hugetlb(page_folio(page), pagelist); + if (folio_test_hugetlb(folio)) { + isolated = isolate_hugetlb(folio, pagelist); } else { - bool lru = !__PageMovable(page); + bool lru = !__folio_test_movable(folio); if (lru) - isolated = isolate_lru_page(page); + isolated = folio_isolate_lru(folio); else - isolated = isolate_movable_page(page, + isolated = isolate_movable_page(&folio->page, ISOLATE_UNEVICTABLE); if (isolated) { - list_add(&page->lru, pagelist); + list_add(&folio->lru, pagelist); if (lru) - inc_node_page_state(page, NR_ISOLATED_ANON + - page_is_file_lru(page)); + node_stat_add_folio(folio, NR_ISOLATED_ANON + + folio_is_file_lru(folio)); } } /* - * If we succeed to isolate the page, we grabbed another refcount on - * the page, so we can safely drop the one we got from get_any_page(). - * If we failed to isolate the page, it means that we cannot go further + * If we succeed to isolate the folio, we grabbed another refcount on + * the folio, so we can safely drop the one we got from get_any_page(). + * If we failed to isolate the folio, it means that we cannot go further * and we will return an error, so drop the reference we got from * get_any_page() as well. */ - put_page(page); + folio_put(folio); return isolated; } @@ -2645,40 +2656,40 @@ static int soft_offline_in_use_page(struct page *page) { long ret = 0; unsigned long pfn = page_to_pfn(page); - struct page *hpage = compound_head(page); + struct folio *folio = page_folio(page); char const *msg_page[] = {"page", "hugepage"}; - bool huge = PageHuge(page); + bool huge = folio_test_hugetlb(folio); LIST_HEAD(pagelist); struct migration_target_control mtc = { .nid = NUMA_NO_NODE, .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL, }; - if (!huge && PageTransHuge(hpage)) { + if (!huge && folio_test_large(folio)) { if (try_to_split_thp_page(page)) { pr_info("soft offline: %#lx: thp split failed\n", pfn); return -EBUSY; } - hpage = page; + folio = page_folio(page); } - lock_page(page); + folio_lock(folio); if (!huge) - wait_on_page_writeback(page); + folio_wait_writeback(folio); if (PageHWPoison(page)) { - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); pr_info("soft offline: %#lx page already poisoned\n", pfn); return 0; } - if (!huge && PageLRU(page) && !PageSwapCache(page)) + if (!huge && folio_test_lru(folio) && !folio_test_swapcache(folio)) /* * Try to invalidate first. This should work for * non dirty unmapped page cache pages. */ - ret = invalidate_inode_page(page); - unlock_page(page); + ret = mapping_evict_folio(folio_mapping(folio), folio); + folio_unlock(folio); if (ret) { pr_info("soft_offline: %#lx: invalidated\n", pfn); @@ -2686,7 +2697,7 @@ static int soft_offline_in_use_page(struct page *page) return 0; } - if (isolate_page(hpage, &pagelist)) { + if (mf_isolate_folio(folio, &pagelist)) { ret = migrate_pages(&pagelist, alloc_migration_target, NULL, (unsigned long)&mtc, MIGRATE_SYNC, MR_MEMORY_FAILURE, NULL); if (!ret) { |