From c310e06cc4e44b4ec4bc02f0494a7f55cc36c1be Mon Sep 17 00:00:00 2001 From: Xianting Tian Date: Thu, 28 Apr 2022 23:16:04 -0700 Subject: fs/proc/task_mmu.c: remove redundant page validation of pte_page pte_page() always returns a valid page, so remove the redundant page validation, as we did in many other places. Link: https://lkml.kernel.org/r/20220316025947.328276-1-xianting.tian@linux.alibaba.com Signed-off-by: Xianting Tian Reviewed-by: Chaitanya Kulkarni Cc: Alexey Dobriyan Cc: Yang Shi Cc: Sasha Levin Cc: Miaohe Lin Signed-off-by: Andrew Morton --- fs/proc/task_mmu.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/proc') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index f46060eb91b5..a843d13e2e1a 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1873,8 +1873,6 @@ static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask, return 0; page = pte_page(huge_pte); - if (!page) - return 0; md = walk->private; gather_stats(page, md, pte_dirty(huge_pte), 1); -- cgit v1.2.3 From 7609385337a4feb6236e42dcd0df2185683ce839 Mon Sep 17 00:00:00 2001 From: xu xin Date: Thu, 28 Apr 2022 23:16:16 -0700 Subject: ksm: count ksm merging pages for each process Some applications or containers want to use KSM by calling madvise() to advise areas of address space to be MERGEABLE. But they may not know which applications are more likely to cause real merges in the deployment. If this patch is applied, it helps them know their corresponding number of merged pages, and then optimize their app code. As current KSM only counts the number of KSM merging pages(e.g. ksm_pages_sharing and ksm_pages_shared) of the whole system, we cannot see the more fine-grained KSM merging, for the upper application optimization, the merging area cannot be set easily according to the KSM page merging probability of each process. Therefore, it is necessary to add extra statistical means so that the upper level users can know the detailed KSM merging information of each process. We add a new proc file named as ksm_merging_pages under /proc// to indicate the involved ksm merging pages of this process. [akpm@linux-foundation.org: fix comment typo, remove BUG_ON()s] Link: https://lkml.kernel.org/r/20220325082318.2352853-1-xu.xin16@zte.com.cn Signed-off-by: xu xin Reported-by: kernel test robot Reviewed-by: Yang Yang Reviewed-by: Ran Xiaokai Reported-by: Zeal Robot Cc: Kees Cook Cc: Alexey Dobriyan Cc: Stephen Rothwell Cc: Ohhoon Kwon Cc: Matthew Wilcox (Oracle) Cc: Stephen Brennan Cc: Vlastimil Babka Cc: Feng Tang Cc: Yang Yang Cc: Ran Xiaokai Cc: Zeal Robot Signed-off-by: Andrew Morton --- fs/proc/base.c | 22 ++++++++++++++++++++++ include/linux/mm_types.h | 7 +++++++ mm/ksm.c | 8 ++++++++ 3 files changed, 37 insertions(+) (limited to 'fs/proc') diff --git a/fs/proc/base.c b/fs/proc/base.c index c1031843cc6a..8dfa36a99c74 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3154,6 +3154,22 @@ static int proc_pid_patch_state(struct seq_file *m, struct pid_namespace *ns, } #endif /* CONFIG_LIVEPATCH */ +#ifdef CONFIG_KSM +static int proc_pid_ksm_merging_pages(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) +{ + struct mm_struct *mm; + + mm = get_task_mm(task); + if (mm) { + seq_printf(m, "%lu\n", mm->ksm_merging_pages); + mmput(mm); + } + + return 0; +} +#endif /* CONFIG_KSM */ + #ifdef CONFIG_STACKLEAK_METRICS static int proc_stack_depth(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) @@ -3285,6 +3301,9 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_SECCOMP_CACHE_DEBUG ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache), #endif +#ifdef CONFIG_KSM + ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages), +#endif }; static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) @@ -3618,6 +3637,9 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_SECCOMP_CACHE_DEBUG ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache), #endif +#ifdef CONFIG_KSM + ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages), +#endif }; static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 8834e38c06a4..87eddd509de2 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -654,6 +654,13 @@ struct mm_struct { #ifdef CONFIG_IOMMU_SVA u32 pasid; +#endif +#ifdef CONFIG_KSM + /* + * Represent how many pages of this process are involved in KSM + * merging. + */ + unsigned long ksm_merging_pages; #endif } __randomize_layout; diff --git a/mm/ksm.c b/mm/ksm.c index 063a48eeb5ee..94bb0f049806 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -638,6 +638,9 @@ static void remove_node_from_stable_tree(struct stable_node *stable_node) ksm_pages_sharing--; else ksm_pages_shared--; + + rmap_item->mm->ksm_merging_pages--; + VM_BUG_ON(stable_node->rmap_hlist_len <= 0); stable_node->rmap_hlist_len--; put_anon_vma(rmap_item->anon_vma); @@ -785,6 +788,9 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item) ksm_pages_sharing--; else ksm_pages_shared--; + + rmap_item->mm->ksm_merging_pages--; + VM_BUG_ON(stable_node->rmap_hlist_len <= 0); stable_node->rmap_hlist_len--; @@ -2007,6 +2013,8 @@ static void stable_tree_append(struct rmap_item *rmap_item, ksm_pages_sharing++; else ksm_pages_shared++; + + rmap_item->mm->ksm_merging_pages++; } /* -- cgit v1.2.3 From 8e165e733bfa06edbcdbe491ef13b2bf1a3fa883 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 12 May 2022 20:22:55 -0700 Subject: mm/pagemap: recognize uffd-wp bit for shmem/hugetlbfs This requires the pagemap code to be able to recognize the newly introduced swap special pte for uffd-wp, meanwhile the general case for hugetlb that we recently start to support. It should make pagemap uffd-wp support complete. Link: https://lkml.kernel.org/r/20220405014923.15047-1-peterx@redhat.com Signed-off-by: Peter Xu Cc: Alistair Popple Cc: Andrea Arcangeli Cc: Axel Rasmussen Cc: David Hildenbrand Cc: Hugh Dickins Cc: Jerome Glisse Cc: "Kirill A . Shutemov" Cc: Matthew Wilcox Cc: Mike Kravetz Cc: Mike Rapoport Cc: Nadav Amit Signed-off-by: Andrew Morton --- fs/proc/task_mmu.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs/proc') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index a843d13e2e1a..2d04e3470d4c 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1421,6 +1421,8 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, migration = is_migration_entry(entry); if (is_pfn_swap_entry(entry)) page = pfn_swap_entry_to_page(entry); + if (pte_marker_entry_uffd_wp(entry)) + flags |= PM_UFFD_WP; } if (page && !PageAnon(page)) @@ -1556,10 +1558,15 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask, if (page_mapcount(page) == 1) flags |= PM_MMAP_EXCLUSIVE; + if (huge_pte_uffd_wp(pte)) + flags |= PM_UFFD_WP; + flags |= PM_PRESENT; if (pm->show_pfn) frame = pte_pfn(pte) + ((addr & ~hmask) >> PAGE_SHIFT); + } else if (pte_swp_uffd_wp_any(pte)) { + flags |= PM_UFFD_WP; } for (; addr != end; addr += PAGE_SIZE) { -- cgit v1.2.3 From f6498b776d280b30a4614d8261840961e993c2c8 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 19 May 2022 14:08:53 -0700 Subject: mm: zswap: add basic meminfo and vmstat coverage Currently it requires poking at debugfs to figure out the size and population of the zswap cache on a host. There are no counters for reads and writes against the cache. As a result, it's difficult to understand zswap behavior on production systems. Print zswap memory consumption and how many pages are zswapped out in /proc/meminfo. Count zswapouts and zswapins in /proc/vmstat. Link: https://lkml.kernel.org/r/20220510152847.230957-6-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: David Hildenbrand Cc: Dan Streetman Cc: Michal Hocko Cc: Minchan Kim Cc: Roman Gushchin Cc: Seth Jennings Cc: Shakeel Butt Signed-off-by: Andrew Morton --- Documentation/filesystems/proc.rst | 6 ++++++ fs/proc/meminfo.c | 7 +++++++ include/linux/swap.h | 5 +++++ include/linux/vm_event_item.h | 4 ++++ mm/vmstat.c | 4 ++++ mm/zswap.c | 13 ++++++------- 6 files changed, 32 insertions(+), 7 deletions(-) (limited to 'fs/proc') diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index 5e9791457876..cfec3cab0f9a 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -964,6 +964,8 @@ Example output. You may not have all of these fields. Mlocked: 0 kB SwapTotal: 0 kB SwapFree: 0 kB + Zswap: 1904 kB + Zswapped: 7792 kB Dirty: 12 kB Writeback: 0 kB AnonPages: 4654780 kB @@ -1055,6 +1057,10 @@ SwapTotal SwapFree Memory which has been evicted from RAM, and is temporarily on the disk +Zswap + Memory consumed by the zswap backend (compressed size) +Zswapped + Amount of anonymous memory stored in zswap (original size) Dirty Memory which is waiting to get written back to the disk Writeback diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 6fa761c9cc78..6e89f0e2fd20 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -86,6 +86,13 @@ static int meminfo_proc_show(struct seq_file *m, void *v) show_val_kb(m, "SwapTotal: ", i.totalswap); show_val_kb(m, "SwapFree: ", i.freeswap); +#ifdef CONFIG_ZSWAP + seq_printf(m, "Zswap: %8lu kB\n", + (unsigned long)(zswap_pool_total_size >> 10)); + seq_printf(m, "Zswapped: %8lu kB\n", + (unsigned long)atomic_read(&zswap_stored_pages) << + (PAGE_SHIFT - 10)); +#endif show_val_kb(m, "Dirty: ", global_node_page_state(NR_FILE_DIRTY)); show_val_kb(m, "Writeback: ", diff --git a/include/linux/swap.h b/include/linux/swap.h index 04c0713c1d6e..f3ae17b43f20 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -620,6 +620,11 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem) } #endif +#ifdef CONFIG_ZSWAP +extern u64 zswap_pool_total_size; +extern atomic_t zswap_stored_pages; +#endif + #if defined(CONFIG_SWAP) && defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) extern void __cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask); static inline void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask) diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index e83967e4c20e..404024486fa5 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -136,6 +136,10 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #ifdef CONFIG_KSM COW_KSM, #endif +#ifdef CONFIG_ZSWAP + ZSWPIN, + ZSWPOUT, +#endif #ifdef CONFIG_X86 DIRECT_MAP_LEVEL2_SPLIT, DIRECT_MAP_LEVEL3_SPLIT, diff --git a/mm/vmstat.c b/mm/vmstat.c index b94a2e4723ff..da525bfb6f4a 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1396,6 +1396,10 @@ const char * const vmstat_text[] = { #ifdef CONFIG_KSM "cow_ksm", #endif +#ifdef CONFIG_ZSWAP + "zswpin", + "zswpout", +#endif #ifdef CONFIG_X86 "direct_map_level2_splits", "direct_map_level3_splits", diff --git a/mm/zswap.c b/mm/zswap.c index 2c5db4cbedea..e3c16a70f533 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -42,9 +42,9 @@ * statistics **********************************/ /* Total bytes used by the compressed storage */ -static u64 zswap_pool_total_size; +u64 zswap_pool_total_size; /* The number of compressed pages currently stored in zswap */ -static atomic_t zswap_stored_pages = ATOMIC_INIT(0); +atomic_t zswap_stored_pages = ATOMIC_INIT(0); /* The number of same-value filled pages currently stored in zswap */ static atomic_t zswap_same_filled_pages = ATOMIC_INIT(0); @@ -1243,6 +1243,7 @@ insert_entry: /* update stats */ atomic_inc(&zswap_stored_pages); zswap_update_total_size(); + count_vm_event(ZSWPOUT); return 0; @@ -1285,11 +1286,10 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset, zswap_fill_page(dst, entry->value); kunmap_atomic(dst); ret = 0; - goto freeentry; + goto stats; } if (!zpool_can_sleep_mapped(entry->pool->zpool)) { - tmp = kmalloc(entry->length, GFP_ATOMIC); if (!tmp) { ret = -ENOMEM; @@ -1304,10 +1304,8 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset, src += sizeof(struct zswap_header); if (!zpool_can_sleep_mapped(entry->pool->zpool)) { - memcpy(tmp, src, entry->length); src = tmp; - zpool_unmap_handle(entry->pool->zpool, entry->handle); } @@ -1326,7 +1324,8 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset, kfree(tmp); BUG_ON(ret); - +stats: + count_vm_event(ZSWPIN); freeentry: spin_lock(&tree->lock); zswap_entry_put(tree, entry); -- cgit v1.2.3