From 611b9fd80fb53e79079744fb29d8c1363f4b5c02 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Mon, 13 Mar 2023 13:39:29 +0800 Subject: mm: memory-failure: directly use IS_ENABLED(CONFIG_HWPOISON_INJECT) It's more clear and simple to just use IS_ENABLED(CONFIG_HWPOISON_INJECT) to check whether or not to enable HWPoison injector module instead of CONFIG_HWPOISON_INJECT/CONFIG_HWPOISON_INJECT_MODULE. Link: https://lkml.kernel.org/r/20230313053929.84607-1-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Reviewed-by: David Hildenbrand Acked-by: Naoya Horiguchi Cc: Miaohe Lin Signed-off-by: Andrew Morton --- mm/memory-failure.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/memory-failure.c') diff --git a/mm/memory-failure.c b/mm/memory-failure.c index fae9baf3be16..f761704d27d7 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -168,7 +168,7 @@ static bool page_handle_poison(struct page *page, bool hugepage_or_freepage, boo return true; } -#if defined(CONFIG_HWPOISON_INJECT) || defined(CONFIG_HWPOISON_INJECT_MODULE) +#if IS_ENABLED(CONFIG_HWPOISON_INJECT) u32 hwpoison_filter_enable = 0; u32 hwpoison_filter_dev_major = ~0U; -- cgit v1.2.3 From 4f775086a6eee07c6ae4be4734d736e13b537351 Mon Sep 17 00:00:00 2001 From: Longlong Xia Date: Fri, 14 Apr 2023 10:17:40 +0800 Subject: mm: memory-failure: refactor add_to_kill() Patch series "mm: ksm: support hwpoison for ksm page", v2. Currently, ksm does not support hwpoison. As ksm is being used more widely for deduplication at the system level, container level, and process level, supporting hwpoison for ksm has become increasingly important. However, ksm pages were not processed by hwpoison in 2009 [1]. The main method of implementation: 1. Refactor add_to_kill() and add new add_to_kill_*() to better accommodate the handling of different types of pages. 2. Add collect_procs_ksm() to collect processes when the error hit an ksm page. 3. Add task_in_to_kill_list() to avoid duplicate addition of tsk to the to_kill list. 4. Try_to_unmap ksm page (already supported). 5. Handle related processes such as sending SIGBUS. Tested with poisoning to ksm page from 1) different process 2) one process and with/without memory_failure_early_kill set, the processes are killed as expected with the patchset. [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/ commit/?h=01e00f880ca700376e1845cf7a2524ebe68e47d6 This patch (of 2): The page_address_in_vma() is used to find the user virtual address of page in add_to_kill(), but it doesn't support ksm due to the ksm page->index unusable, add an ksm_addr as parameter to add_to_kill(), let's the caller to pass it, also rename the function to __add_to_kill(), and adding add_to_kill_anon_file() for handling anonymous pages and file pages, adding add_to_kill_fsdax() for handling fsdax pages. Link: https://lkml.kernel.org/r/20230414021741.2597273-1-xialonglong1@huawei.com Link: https://lkml.kernel.org/r/20230414021741.2597273-2-xialonglong1@huawei.com Signed-off-by: Longlong Xia Tested-by: Naoya Horiguchi Reviewed-by: Kefeng Wang Cc: Kefeng Wang Cc: Miaohe Lin Cc: Nanyong Sun Signed-off-by: Andrew Morton --- mm/memory-failure.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) (limited to 'mm/memory-failure.c') diff --git a/mm/memory-failure.c b/mm/memory-failure.c index f761704d27d7..bb09999108ef 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -405,9 +405,9 @@ static unsigned long dev_pagemap_mapping_shift(struct vm_area_struct *vma, * page->mapping are sufficient for mapping the page back to its * corresponding user virtual address. */ -static void add_to_kill(struct task_struct *tsk, struct page *p, - pgoff_t fsdax_pgoff, struct vm_area_struct *vma, - struct list_head *to_kill) +static void __add_to_kill(struct task_struct *tsk, struct page *p, + struct vm_area_struct *vma, struct list_head *to_kill, + unsigned long ksm_addr, pgoff_t fsdax_pgoff) { struct to_kill *tk; @@ -417,7 +417,7 @@ static void add_to_kill(struct task_struct *tsk, struct page *p, return; } - tk->addr = page_address_in_vma(p, vma); + tk->addr = ksm_addr ? ksm_addr : page_address_in_vma(p, vma); if (is_zone_device_page(p)) { if (fsdax_pgoff != FSDAX_INVALID_PGOFF) tk->addr = vma_pgoff_address(fsdax_pgoff, 1, vma); @@ -448,6 +448,13 @@ static void add_to_kill(struct task_struct *tsk, struct page *p, list_add_tail(&tk->nd, to_kill); } +static void add_to_kill_anon_file(struct task_struct *tsk, struct page *p, + struct vm_area_struct *vma, + struct list_head *to_kill) +{ + __add_to_kill(tsk, p, vma, to_kill, 0, FSDAX_INVALID_PGOFF); +} + /* * Kill the processes that have been collected earlier. * @@ -573,7 +580,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill, continue; if (!page_mapped_in_vma(page, vma)) continue; - add_to_kill(t, page, FSDAX_INVALID_PGOFF, vma, to_kill); + add_to_kill_anon_file(t, page, vma, to_kill); } } read_unlock(&tasklist_lock); @@ -609,8 +616,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, * to be informed of all such data corruptions. */ if (vma->vm_mm == t->mm) - add_to_kill(t, page, FSDAX_INVALID_PGOFF, vma, - to_kill); + add_to_kill_anon_file(t, page, vma, to_kill); } } read_unlock(&tasklist_lock); @@ -618,6 +624,13 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, } #ifdef CONFIG_FS_DAX +static void add_to_kill_fsdax(struct task_struct *tsk, struct page *p, + struct vm_area_struct *vma, + struct list_head *to_kill, pgoff_t pgoff) +{ + __add_to_kill(tsk, p, vma, to_kill, 0, pgoff); +} + /* * Collect processes when the error hit a fsdax page. */ @@ -637,7 +650,7 @@ static void collect_procs_fsdax(struct page *page, continue; vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { if (vma->vm_mm == t->mm) - add_to_kill(t, page, pgoff, vma, to_kill); + add_to_kill_fsdax(t, page, vma, to_kill, pgoff); } } read_unlock(&tasklist_lock); -- cgit v1.2.3 From 4248d0083ec5817eebfb916c54950d100b3468ee Mon Sep 17 00:00:00 2001 From: Longlong Xia Date: Fri, 14 Apr 2023 10:17:41 +0800 Subject: mm: ksm: support hwpoison for ksm page hwpoison_user_mappings() is updated to support ksm pages, and add collect_procs_ksm() to collect processes when the error hit an ksm page. The difference from collect_procs_anon() is that it also needs to traverse the rmap-item list on the stable node of the ksm page. At the same time, add_to_kill_ksm() is added to handle ksm pages. And task_in_to_kill_list() is added to avoid duplicate addition of tsk to the to_kill list. This is because when scanning the list, if the pages that make up the ksm page all come from the same process, they may be added repeatedly. Link: https://lkml.kernel.org/r/20230414021741.2597273-3-xialonglong1@huawei.com Signed-off-by: Longlong Xia Tested-by: Naoya Horiguchi Reviewed-by: Kefeng Wang Cc: Miaohe Lin Cc: Nanyong Sun Signed-off-by: Andrew Morton --- include/linux/ksm.h | 11 +++++++++++ include/linux/mm.h | 7 +++++++ mm/ksm.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ mm/memory-failure.c | 34 +++++++++++++++++++++++++--------- 4 files changed, 88 insertions(+), 9 deletions(-) (limited to 'mm/memory-failure.c') diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 7e232ba59b86..d9e701326a88 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -51,6 +51,10 @@ struct page *ksm_might_need_to_copy(struct page *page, void rmap_walk_ksm(struct folio *folio, struct rmap_walk_control *rwc); void folio_migrate_ksm(struct folio *newfolio, struct folio *folio); +#ifdef CONFIG_MEMORY_FAILURE +void collect_procs_ksm(struct page *page, struct list_head *to_kill, + int force_early); +#endif #else /* !CONFIG_KSM */ static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) @@ -62,6 +66,13 @@ static inline void ksm_exit(struct mm_struct *mm) { } +#ifdef CONFIG_MEMORY_FAILURE +static inline void collect_procs_ksm(struct page *page, + struct list_head *to_kill, int force_early) +{ +} +#endif + #ifdef CONFIG_MMU static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags) diff --git a/include/linux/mm.h b/include/linux/mm.h index 2772a1800cf3..37554b08bb28 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3604,6 +3604,7 @@ extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, bool *migratable_cleared); void num_poisoned_pages_inc(unsigned long pfn); void num_poisoned_pages_sub(unsigned long pfn, long i); +struct task_struct *task_early_kill(struct task_struct *tsk, int force_early); #else static inline void memory_failure_queue(unsigned long pfn, int flags) { @@ -3624,6 +3625,12 @@ static inline void num_poisoned_pages_sub(unsigned long pfn, long i) } #endif +#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_KSM) +void add_to_kill_ksm(struct task_struct *tsk, struct page *p, + struct vm_area_struct *vma, struct list_head *to_kill, + unsigned long ksm_addr); +#endif + #if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG) extern void memblk_nr_poison_inc(unsigned long pfn); extern void memblk_nr_poison_sub(unsigned long pfn, long i); diff --git a/mm/ksm.c b/mm/ksm.c index 290a3eb6d8de..37c63310bc4e 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -2738,6 +2738,51 @@ again: goto again; } +#ifdef CONFIG_MEMORY_FAILURE +/* + * Collect processes when the error hit an ksm page. + */ +void collect_procs_ksm(struct page *page, struct list_head *to_kill, + int force_early) +{ + struct ksm_stable_node *stable_node; + struct ksm_rmap_item *rmap_item; + struct folio *folio = page_folio(page); + struct vm_area_struct *vma; + struct task_struct *tsk; + + stable_node = folio_stable_node(folio); + if (!stable_node) + return; + hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) { + struct anon_vma *av = rmap_item->anon_vma; + + anon_vma_lock_read(av); + read_lock(&tasklist_lock); + for_each_process(tsk) { + struct anon_vma_chain *vmac; + unsigned long addr; + struct task_struct *t = + task_early_kill(tsk, force_early); + if (!t) + continue; + anon_vma_interval_tree_foreach(vmac, &av->rb_root, 0, + ULONG_MAX) + { + vma = vmac->vma; + if (vma->vm_mm == t->mm) { + addr = rmap_item->address & PAGE_MASK; + add_to_kill_ksm(t, page, vma, to_kill, + addr); + } + } + } + read_unlock(&tasklist_lock); + anon_vma_unlock_read(av); + } +} +#endif + #ifdef CONFIG_MIGRATION void folio_migrate_ksm(struct folio *newfolio, struct folio *folio) { diff --git a/mm/memory-failure.c b/mm/memory-failure.c index bb09999108ef..f69c410fb9f9 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -455,6 +455,27 @@ static void add_to_kill_anon_file(struct task_struct *tsk, struct page *p, __add_to_kill(tsk, p, vma, to_kill, 0, FSDAX_INVALID_PGOFF); } +#ifdef CONFIG_KSM +static bool task_in_to_kill_list(struct list_head *to_kill, + struct task_struct *tsk) +{ + struct to_kill *tk, *next; + + list_for_each_entry_safe(tk, next, to_kill, nd) { + if (tk->tsk == tsk) + return true; + } + + return false; +} +void add_to_kill_ksm(struct task_struct *tsk, struct page *p, + struct vm_area_struct *vma, struct list_head *to_kill, + unsigned long ksm_addr) +{ + if (!task_in_to_kill_list(to_kill, tsk)) + __add_to_kill(tsk, p, vma, to_kill, ksm_addr, FSDAX_INVALID_PGOFF); +} +#endif /* * Kill the processes that have been collected earlier. * @@ -534,8 +555,7 @@ static struct task_struct *find_early_kill_thread(struct task_struct *tsk) * processes sharing the same error page,if the process is "early kill", the * task_struct of the dedicated thread will also be returned. */ -static struct task_struct *task_early_kill(struct task_struct *tsk, - int force_early) +struct task_struct *task_early_kill(struct task_struct *tsk, int force_early) { if (!tsk->mm) return NULL; @@ -666,8 +686,9 @@ static void collect_procs(struct page *page, struct list_head *tokill, { if (!page->mapping) return; - - if (PageAnon(page)) + if (unlikely(PageKsm(page))) + collect_procs_ksm(page, tokill, force_early); + else if (PageAnon(page)) collect_procs_anon(page, tokill, force_early); else collect_procs_file(page, tokill, force_early); @@ -1522,11 +1543,6 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn, if (!page_mapped(hpage)) return true; - if (PageKsm(p)) { - pr_err("%#lx: can't handle KSM pages.\n", pfn); - return false; - } - if (PageSwapCache(p)) { pr_err("%#lx: keeping poisoned page in swap cache\n", pfn); ttu &= ~TTU_HWPOISON; -- cgit v1.2.3