From 21c690a349baab895dc68ab70d291e1598d7109d Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 21 Mar 2024 09:36:28 -0700 Subject: mm: introduce slabobj_ext to support slab object extensions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently slab pages can store only vectors of obj_cgroup pointers in page->memcg_data. Introduce slabobj_ext structure to allow more data to be stored for each slab object. Wrap obj_cgroup into slabobj_ext to support current functionality while allowing to extend slabobj_ext in the future. Link: https://lkml.kernel.org/r/20240321163705.3067592-7-surenb@google.com Signed-off-by: Suren Baghdasaryan Reviewed-by: Pasha Tatashin Reviewed-by: Vlastimil Babka Tested-by: Kees Cook Cc: Alexander Viro Cc: Alex Gaynor Cc: Alice Ryhl Cc: Andreas Hindborg Cc: Benno Lossin Cc: "Björn Roy Baron" Cc: Boqun Feng Cc: Christoph Lameter Cc: Dennis Zhou Cc: Gary Guo Cc: Kent Overstreet Cc: Miguel Ojeda Cc: Peter Zijlstra Cc: Tejun Heo Cc: Wedson Almeida Filho Signed-off-by: Andrew Morton --- mm/slab.h | 52 +++++++++++++++++++++++++++------------------------- 1 file changed, 27 insertions(+), 25 deletions(-) (limited to 'mm/slab.h') diff --git a/mm/slab.h b/mm/slab.h index d2bc9b191222..1c16dc8344fa 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -87,8 +87,8 @@ struct slab { unsigned int __unused; atomic_t __page_refcount; -#ifdef CONFIG_MEMCG - unsigned long memcg_data; +#ifdef CONFIG_SLAB_OBJ_EXT + unsigned long obj_exts; #endif }; @@ -97,8 +97,8 @@ struct slab { SLAB_MATCH(flags, __page_flags); SLAB_MATCH(compound_head, slab_cache); /* Ensure bit 0 is clear */ SLAB_MATCH(_refcount, __page_refcount); -#ifdef CONFIG_MEMCG -SLAB_MATCH(memcg_data, memcg_data); +#ifdef CONFIG_SLAB_OBJ_EXT +SLAB_MATCH(memcg_data, obj_exts); #endif #undef SLAB_MATCH static_assert(sizeof(struct slab) <= sizeof(struct page)); @@ -536,42 +536,44 @@ static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t fla return false; } -#ifdef CONFIG_MEMCG_KMEM +#ifdef CONFIG_SLAB_OBJ_EXT + /* - * slab_objcgs - get the object cgroups vector associated with a slab + * slab_obj_exts - get the pointer to the slab object extension vector + * associated with a slab. * @slab: a pointer to the slab struct * - * Returns a pointer to the object cgroups vector associated with the slab, + * Returns a pointer to the object extension vector associated with the slab, * or NULL if no such vector has been associated yet. */ -static inline struct obj_cgroup **slab_objcgs(struct slab *slab) +static inline struct slabobj_ext *slab_obj_exts(struct slab *slab) { - unsigned long memcg_data = READ_ONCE(slab->memcg_data); + unsigned long obj_exts = READ_ONCE(slab->obj_exts); - VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS), +#ifdef CONFIG_MEMCG + VM_BUG_ON_PAGE(obj_exts && !(obj_exts & MEMCG_DATA_OBJEXTS), slab_page(slab)); - VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, slab_page(slab)); + VM_BUG_ON_PAGE(obj_exts & MEMCG_DATA_KMEM, slab_page(slab)); - return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); + return (struct slabobj_ext *)(obj_exts & ~MEMCG_DATA_FLAGS_MASK); +#else + return (struct slabobj_ext *)obj_exts; +#endif } -int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s, - gfp_t gfp, bool new_slab); -void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, - enum node_stat_item idx, int nr); -#else /* CONFIG_MEMCG_KMEM */ -static inline struct obj_cgroup **slab_objcgs(struct slab *slab) +#else /* CONFIG_SLAB_OBJ_EXT */ + +static inline struct slabobj_ext *slab_obj_exts(struct slab *slab) { return NULL; } -static inline int memcg_alloc_slab_cgroups(struct slab *slab, - struct kmem_cache *s, gfp_t gfp, - bool new_slab) -{ - return 0; -} -#endif /* CONFIG_MEMCG_KMEM */ +#endif /* CONFIG_SLAB_OBJ_EXT */ + +#ifdef CONFIG_MEMCG_KMEM +void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, + enum node_stat_item idx, int nr); +#endif size_t __ksize(const void *objp); -- cgit v1.2.3 From 53ce720359b8f4b531c5b8bd7fa06d74bca45f0a Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 21 Mar 2024 09:36:31 -0700 Subject: slab: objext: introduce objext_flags as extension to page_memcg_data_flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce objext_flags to store additional objext flags unrelated to memcg. Link: https://lkml.kernel.org/r/20240321163705.3067592-10-surenb@google.com Signed-off-by: Suren Baghdasaryan Reviewed-by: Kees Cook Reviewed-by: Pasha Tatashin Reviewed-by: Vlastimil Babka Tested-by: Kees Cook Cc: Alexander Viro Cc: Alex Gaynor Cc: Alice Ryhl Cc: Andreas Hindborg Cc: Benno Lossin Cc: "Björn Roy Baron" Cc: Boqun Feng Cc: Christoph Lameter Cc: Dennis Zhou Cc: Gary Guo Cc: Kent Overstreet Cc: Miguel Ojeda Cc: Peter Zijlstra Cc: Tejun Heo Cc: Wedson Almeida Filho Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 29 ++++++++++++++++++++++------- mm/slab.h | 5 +---- 2 files changed, 23 insertions(+), 11 deletions(-) (limited to 'mm/slab.h') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 99f423742324..12afc2647cf0 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -357,7 +357,22 @@ enum page_memcg_data_flags { __NR_MEMCG_DATA_FLAGS = (1UL << 2), }; -#define MEMCG_DATA_FLAGS_MASK (__NR_MEMCG_DATA_FLAGS - 1) +#define __FIRST_OBJEXT_FLAG __NR_MEMCG_DATA_FLAGS + +#else /* CONFIG_MEMCG */ + +#define __FIRST_OBJEXT_FLAG (1UL << 0) + +#endif /* CONFIG_MEMCG */ + +enum objext_flags { + /* the next bit after the last actual flag */ + __NR_OBJEXTS_FLAGS = __FIRST_OBJEXT_FLAG, +}; + +#define OBJEXTS_FLAGS_MASK (__NR_OBJEXTS_FLAGS - 1) + +#ifdef CONFIG_MEMCG static inline bool folio_memcg_kmem(struct folio *folio); @@ -391,7 +406,7 @@ static inline struct mem_cgroup *__folio_memcg(struct folio *folio) VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJEXTS, folio); VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_KMEM, folio); - return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); + return (struct mem_cgroup *)(memcg_data & ~OBJEXTS_FLAGS_MASK); } /* @@ -412,7 +427,7 @@ static inline struct obj_cgroup *__folio_objcg(struct folio *folio) VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJEXTS, folio); VM_BUG_ON_FOLIO(!(memcg_data & MEMCG_DATA_KMEM), folio); - return (struct obj_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); + return (struct obj_cgroup *)(memcg_data & ~OBJEXTS_FLAGS_MASK); } /* @@ -469,11 +484,11 @@ static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio) if (memcg_data & MEMCG_DATA_KMEM) { struct obj_cgroup *objcg; - objcg = (void *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); + objcg = (void *)(memcg_data & ~OBJEXTS_FLAGS_MASK); return obj_cgroup_memcg(objcg); } - return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); + return (struct mem_cgroup *)(memcg_data & ~OBJEXTS_FLAGS_MASK); } /* @@ -512,11 +527,11 @@ static inline struct mem_cgroup *folio_memcg_check(struct folio *folio) if (memcg_data & MEMCG_DATA_KMEM) { struct obj_cgroup *objcg; - objcg = (void *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); + objcg = (void *)(memcg_data & ~OBJEXTS_FLAGS_MASK); return obj_cgroup_memcg(objcg); } - return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); + return (struct mem_cgroup *)(memcg_data & ~OBJEXTS_FLAGS_MASK); } static inline struct mem_cgroup *page_memcg_check(struct page *page) diff --git a/mm/slab.h b/mm/slab.h index 1c16dc8344fa..65db525e93af 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -554,11 +554,8 @@ static inline struct slabobj_ext *slab_obj_exts(struct slab *slab) VM_BUG_ON_PAGE(obj_exts && !(obj_exts & MEMCG_DATA_OBJEXTS), slab_page(slab)); VM_BUG_ON_PAGE(obj_exts & MEMCG_DATA_KMEM, slab_page(slab)); - - return (struct slabobj_ext *)(obj_exts & ~MEMCG_DATA_FLAGS_MASK); -#else - return (struct slabobj_ext *)obj_exts; #endif + return (struct slabobj_ext *)(obj_exts & ~OBJEXTS_FLAGS_MASK); } #else /* CONFIG_SLAB_OBJ_EXT */ -- cgit v1.2.3 From 46df8e73a4a3f1445f2a8429111e72ede1f4d291 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 21 Mar 2024 14:24:45 +0000 Subject: mm: free up PG_slab Reclaim the Slab page flag by using a spare bit in PageType. We are perennially short of page flags for various purposes, and now that the original SLAB allocator has been retired, SLUB does not use the mapcount/page_type field. This lets us remove a number of special cases for ignoring mapcount on Slab pages. [willy@infradead.org: update vmcoreinfo] Link: https://lkml.kernel.org/r/ZgGV-O8WYQ_83kxp@casper.infradead.org Link: https://lkml.kernel.org/r/20240321142448.1645400-8-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: David Hildenbrand Cc: Miaohe Lin Cc: Muchun Song Cc: Oscar Salvador Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 21 +++++++++++++++++---- include/trace/events/mmflags.h | 2 +- kernel/vmcore_info.c | 3 ++- mm/memory-failure.c | 9 --------- mm/slab.h | 2 +- 5 files changed, 21 insertions(+), 16 deletions(-) (limited to 'mm/slab.h') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 94eb8a11a321..73e0b17c7728 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -109,7 +109,6 @@ enum pageflags { PG_active, PG_workingset, PG_error, - PG_slab, PG_owner_priv_1, /* Owner use. If pagecache, fs may use*/ PG_arch_1, PG_reserved, @@ -524,7 +523,6 @@ PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD) TESTCLEARFLAG(Active, active, PF_HEAD) PAGEFLAG(Workingset, workingset, PF_HEAD) TESTCLEARFLAG(Workingset, workingset, PF_HEAD) -__PAGEFLAG(Slab, slab, PF_NO_TAIL) PAGEFLAG(Checked, checked, PF_NO_COMPOUND) /* Used by some filesystems */ /* Xen */ @@ -931,7 +929,7 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) #endif /* - * For pages that are never mapped to userspace (and aren't PageSlab), + * For pages that are never mapped to userspace, * page_type may be used. Because it is initialised to -1, we invert the * sense of the bit, so __SetPageFoo *clears* the bit used for PageFoo, and * __ClearPageFoo *sets* the bit used for PageFoo. We reserve a few high and @@ -947,6 +945,7 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) #define PG_table 0x00000200 #define PG_guard 0x00000400 #define PG_hugetlb 0x00000800 +#define PG_slab 0x00001000 #define PageType(page, flag) \ ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) @@ -1041,6 +1040,20 @@ PAGE_TYPE_OPS(Table, table, pgtable) */ PAGE_TYPE_OPS(Guard, guard, guard) +FOLIO_TYPE_OPS(slab, slab) + +/** + * PageSlab - Determine if the page belongs to the slab allocator + * @page: The page to test. + * + * Context: Any context. + * Return: True for slab pages, false for any other kind of page. + */ +static inline bool PageSlab(const struct page *page) +{ + return folio_test_slab(page_folio(page)); +} + #ifdef CONFIG_HUGETLB_PAGE FOLIO_TYPE_OPS(hugetlb, hugetlb) #else @@ -1121,7 +1134,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page) (1UL << PG_lru | 1UL << PG_locked | \ 1UL << PG_private | 1UL << PG_private_2 | \ 1UL << PG_writeback | 1UL << PG_reserved | \ - 1UL << PG_slab | 1UL << PG_active | \ + 1UL << PG_active | \ 1UL << PG_unevictable | __PG_MLOCKED | LRU_GEN_MASK) /* diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index d55e53ac91bd..e46d6e82765e 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -107,7 +107,6 @@ DEF_PAGEFLAG_NAME(lru), \ DEF_PAGEFLAG_NAME(active), \ DEF_PAGEFLAG_NAME(workingset), \ - DEF_PAGEFLAG_NAME(slab), \ DEF_PAGEFLAG_NAME(owner_priv_1), \ DEF_PAGEFLAG_NAME(arch_1), \ DEF_PAGEFLAG_NAME(reserved), \ @@ -135,6 +134,7 @@ IF_HAVE_PG_ARCH_X(arch_3) #define DEF_PAGETYPE_NAME(_name) { PG_##_name, __stringify(_name) } #define __def_pagetype_names \ + DEF_PAGETYPE_NAME(slab), \ DEF_PAGETYPE_NAME(hugetlb), \ DEF_PAGETYPE_NAME(offline), \ DEF_PAGETYPE_NAME(guard), \ diff --git a/kernel/vmcore_info.c b/kernel/vmcore_info.c index 23c125c2e243..1d5eadd9dd61 100644 --- a/kernel/vmcore_info.c +++ b/kernel/vmcore_info.c @@ -198,7 +198,8 @@ static int __init crash_save_vmcoreinfo_init(void) VMCOREINFO_NUMBER(PG_private); VMCOREINFO_NUMBER(PG_swapcache); VMCOREINFO_NUMBER(PG_swapbacked); - VMCOREINFO_NUMBER(PG_slab); +#define PAGE_SLAB_MAPCOUNT_VALUE (~PG_slab) + VMCOREINFO_NUMBER(PAGE_SLAB_MAPCOUNT_VALUE); #ifdef CONFIG_MEMORY_FAILURE VMCOREINFO_NUMBER(PG_hwpoison); #endif diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 9e62a00b46dd..0a7a8a4ba421 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1251,7 +1251,6 @@ static int me_huge_page(struct page_state *ps, struct page *p) #define mlock (1UL << PG_mlocked) #define lru (1UL << PG_lru) #define head (1UL << PG_head) -#define slab (1UL << PG_slab) #define reserved (1UL << PG_reserved) static struct page_state error_states[] = { @@ -1261,13 +1260,6 @@ static struct page_state error_states[] = { * PG_buddy pages only make a small fraction of all free pages. */ - /* - * Could in theory check if slab page is free or if we can drop - * currently unused objects without touching them. But just - * treat it as standard kernel for now. - */ - { slab, slab, MF_MSG_SLAB, me_kernel }, - { head, head, MF_MSG_HUGE, me_huge_page }, { sc|dirty, sc|dirty, MF_MSG_DIRTY_SWAPCACHE, me_swapcache_dirty }, @@ -1294,7 +1286,6 @@ static struct page_state error_states[] = { #undef mlock #undef lru #undef head -#undef slab #undef reserved static void update_per_node_mf_stats(unsigned long pfn, diff --git a/mm/slab.h b/mm/slab.h index 65db525e93af..1343bfa12cee 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -84,8 +84,8 @@ struct slab { }; struct rcu_head rcu_head; }; - unsigned int __unused; + unsigned int __page_type; atomic_t __page_refcount; #ifdef CONFIG_SLAB_OBJ_EXT unsigned long obj_exts; -- cgit v1.2.3 From e6100a4590bfc0a91c8e882f73538c9c0d987a46 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 26 Mar 2024 11:37:39 +0100 Subject: mm, slab: move slab_memcg hooks to mm/memcontrol.c The hooks make multiple calls to functions in mm/memcontrol.c, including to th current_obj_cgroup() marked __always_inline. It might be faster to make a single call to the hook in mm/memcontrol.c instead. The hooks also don't use almost anything from mm/slub.c. obj_full_size() can move with the hooks and cache_vmstat_idx() to the internal mm/slab.h Link: https://lkml.kernel.org/r/20240326-slab-memcg-v3-2-d85d2563287a@suse.cz Signed-off-by: Vlastimil Babka Reviewed-by: Roman Gushchin Cc: Al Viro Cc: Chengming Zhou Cc: Christian Brauner Cc: Christoph Lameter Cc: Chuck Lever Cc: David Rientjes Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Jan Kara Cc: Jeff Layton Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Josh Poimboeuf Cc: Kees Cook Cc: Linus Torvalds Cc: Michal Hocko Cc: Muchun Song Cc: Pekka Enberg Cc: Shakeel Butt Signed-off-by: Andrew Morton --- mm/memcontrol.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++++ mm/slab.h | 13 +++++++ mm/slub.c | 103 ++------------------------------------------------------ 3 files changed, 105 insertions(+), 101 deletions(-) (limited to 'mm/slab.h') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a2aa2ff6cf77..29fa5b17a1ef 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3556,6 +3556,96 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size) refill_obj_stock(objcg, size, true); } +static inline size_t obj_full_size(struct kmem_cache *s) +{ + /* + * For each accounted object there is an extra space which is used + * to store obj_cgroup membership. Charge it too. + */ + return s->size + sizeof(struct obj_cgroup *); +} + +bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru, + gfp_t flags, size_t size, void **p) +{ + struct obj_cgroup *objcg; + struct slab *slab; + unsigned long off; + size_t i; + + /* + * The obtained objcg pointer is safe to use within the current scope, + * defined by current task or set_active_memcg() pair. + * obj_cgroup_get() is used to get a permanent reference. + */ + objcg = current_obj_cgroup(); + if (!objcg) + return true; + + /* + * slab_alloc_node() avoids the NULL check, so we might be called with a + * single NULL object. kmem_cache_alloc_bulk() aborts if it can't fill + * the whole requested size. + * return success as there's nothing to free back + */ + if (unlikely(*p == NULL)) + return true; + + flags &= gfp_allowed_mask; + + if (lru) { + int ret; + struct mem_cgroup *memcg; + + memcg = get_mem_cgroup_from_objcg(objcg); + ret = memcg_list_lru_alloc(memcg, lru, flags); + css_put(&memcg->css); + + if (ret) + return false; + } + + if (obj_cgroup_charge(objcg, flags, size * obj_full_size(s))) + return false; + + for (i = 0; i < size; i++) { + slab = virt_to_slab(p[i]); + + if (!slab_obj_exts(slab) && + alloc_slab_obj_exts(slab, s, flags, false)) { + obj_cgroup_uncharge(objcg, obj_full_size(s)); + continue; + } + + off = obj_to_index(s, slab, p[i]); + obj_cgroup_get(objcg); + slab_obj_exts(slab)[off].objcg = objcg; + mod_objcg_state(objcg, slab_pgdat(slab), + cache_vmstat_idx(s), obj_full_size(s)); + } + + return true; +} + +void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, + void **p, int objects, struct slabobj_ext *obj_exts) +{ + for (int i = 0; i < objects; i++) { + struct obj_cgroup *objcg; + unsigned int off; + + off = obj_to_index(s, slab, p[i]); + objcg = obj_exts[off].objcg; + if (!objcg) + continue; + + obj_exts[off].objcg = NULL; + obj_cgroup_uncharge(objcg, obj_full_size(s)); + mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s), + -obj_full_size(s)); + obj_cgroup_put(objcg); + } +} #endif /* CONFIG_MEMCG_KMEM */ /* diff --git a/mm/slab.h b/mm/slab.h index 1343bfa12cee..411251b9bdd1 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -558,6 +558,9 @@ static inline struct slabobj_ext *slab_obj_exts(struct slab *slab) return (struct slabobj_ext *)(obj_exts & ~OBJEXTS_FLAGS_MASK); } +int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, + gfp_t gfp, bool new_slab); + #else /* CONFIG_SLAB_OBJ_EXT */ static inline struct slabobj_ext *slab_obj_exts(struct slab *slab) @@ -567,7 +570,17 @@ static inline struct slabobj_ext *slab_obj_exts(struct slab *slab) #endif /* CONFIG_SLAB_OBJ_EXT */ +static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s) +{ + return (s->flags & SLAB_RECLAIM_ACCOUNT) ? + NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B; +} + #ifdef CONFIG_MEMCG_KMEM +bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru, + gfp_t flags, size_t size, void **p); +void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, + void **p, int objects, struct slabobj_ext *obj_exts); void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, enum node_stat_item idx, int nr); #endif diff --git a/mm/slub.c b/mm/slub.c index f83efdbe15b7..3e33ff900d35 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1859,12 +1859,6 @@ static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab, #endif #endif /* CONFIG_SLUB_DEBUG */ -static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s) -{ - return (s->flags & SLAB_RECLAIM_ACCOUNT) ? - NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B; -} - #ifdef CONFIG_SLAB_OBJ_EXT #ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG @@ -1923,8 +1917,8 @@ static inline void handle_failed_objexts_alloc(unsigned long obj_exts, #define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | \ __GFP_ACCOUNT | __GFP_NOFAIL) -static int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, - gfp_t gfp, bool new_slab) +int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, + gfp_t gfp, bool new_slab) { unsigned int objects = objs_per_slab(s, slab); unsigned long new_exts; @@ -2083,78 +2077,6 @@ alloc_tagging_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p, #endif /* CONFIG_SLAB_OBJ_EXT */ #ifdef CONFIG_MEMCG_KMEM -static inline size_t obj_full_size(struct kmem_cache *s) -{ - /* - * For each accounted object there is an extra space which is used - * to store obj_cgroup membership. Charge it too. - */ - return s->size + sizeof(struct obj_cgroup *); -} - -static bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, - struct list_lru *lru, - gfp_t flags, size_t size, - void **p) -{ - struct obj_cgroup *objcg; - struct slab *slab; - unsigned long off; - size_t i; - - /* - * The obtained objcg pointer is safe to use within the current scope, - * defined by current task or set_active_memcg() pair. - * obj_cgroup_get() is used to get a permanent reference. - */ - objcg = current_obj_cgroup(); - if (!objcg) - return true; - - /* - * slab_alloc_node() avoids the NULL check, so we might be called with a - * single NULL object. kmem_cache_alloc_bulk() aborts if it can't fill - * the whole requested size. - * return success as there's nothing to free back - */ - if (unlikely(*p == NULL)) - return true; - - flags &= gfp_allowed_mask; - - if (lru) { - int ret; - struct mem_cgroup *memcg; - - memcg = get_mem_cgroup_from_objcg(objcg); - ret = memcg_list_lru_alloc(memcg, lru, flags); - css_put(&memcg->css); - - if (ret) - return false; - } - - if (obj_cgroup_charge(objcg, flags, size * obj_full_size(s))) - return false; - - for (i = 0; i < size; i++) { - slab = virt_to_slab(p[i]); - - if (!slab_obj_exts(slab) && - alloc_slab_obj_exts(slab, s, flags, false)) { - obj_cgroup_uncharge(objcg, obj_full_size(s)); - continue; - } - - off = obj_to_index(s, slab, p[i]); - obj_cgroup_get(objcg); - slab_obj_exts(slab)[off].objcg = objcg; - mod_objcg_state(objcg, slab_pgdat(slab), - cache_vmstat_idx(s), obj_full_size(s)); - } - - return true; -} static void memcg_alloc_abort_single(struct kmem_cache *s, void *object); @@ -2181,27 +2103,6 @@ bool memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru, return false; } -static void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, - void **p, int objects, - struct slabobj_ext *obj_exts) -{ - for (int i = 0; i < objects; i++) { - struct obj_cgroup *objcg; - unsigned int off; - - off = obj_to_index(s, slab, p[i]); - objcg = obj_exts[off].objcg; - if (!objcg) - continue; - - obj_exts[off].objcg = NULL; - obj_cgroup_uncharge(objcg, obj_full_size(s)); - mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s), - -obj_full_size(s)); - obj_cgroup_put(objcg); - } -} - static __fastpath_inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p, int objects) -- cgit v1.2.3 From 91882c1617c15eb2a4e996b0822e0b9cbd854dea Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Sat, 20 Apr 2024 16:25:05 -0700 Subject: memcg: simple cleanup of stats update functions mod_memcg_lruvec_state() is never called from outside of memcontrol.c and with always irq disabled. So, replace it with the irq disabled version and add an assert that irq is disabled in the caller. Similarly mod_objcg_state() is not called from outside of memcontrol.c, so simply make it static and change it's name to __mod_objcg_state(). Link: https://lkml.kernel.org/r/20240420232505.2768428-1-shakeel.butt@linux.dev Signed-off-by: Shakeel Butt Acked-by: Johannes Weiner Reviewed-by: T.J. Mercier Cc: Michal Hocko Cc: Muchun Song Cc: Roman Gushchin Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 17 ----------------- mm/memcontrol.c | 31 +++++++++++++++---------------- mm/slab.h | 2 -- 3 files changed, 15 insertions(+), 35 deletions(-) (limited to 'mm/slab.h') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 8f332b4ae84c..9aba0d0462ca 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1077,8 +1077,6 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, void mem_cgroup_flush_stats(struct mem_cgroup *memcg); void mem_cgroup_flush_stats_ratelimited(struct mem_cgroup *memcg); -void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, - int val); void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val); static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx, @@ -1091,16 +1089,6 @@ static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx, local_irq_restore(flags); } -static inline void mod_memcg_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx, int val) -{ - unsigned long flags; - - local_irq_save(flags); - __mod_memcg_lruvec_state(lruvec, idx, val); - local_irq_restore(flags); -} - void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx, unsigned long count); @@ -1594,11 +1582,6 @@ static inline void mem_cgroup_flush_stats_ratelimited(struct mem_cgroup *memcg) { } -static inline void __mod_memcg_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx, int val) -{ -} - static inline void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val) { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 29fa5b17a1ef..9095ab05d47a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -836,8 +836,9 @@ static unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx) return x; } -void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, - int val) +static void __mod_memcg_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx, + int val) { struct mem_cgroup_per_node *pn; struct mem_cgroup *memcg; @@ -2978,21 +2979,19 @@ void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg) #ifdef CONFIG_MEMCG_KMEM -/* - * mod_objcg_mlstate() may be called with irq enabled, so - * mod_memcg_lruvec_state() should be used. - */ -static inline void mod_objcg_mlstate(struct obj_cgroup *objcg, - struct pglist_data *pgdat, - enum node_stat_item idx, int nr) +static inline void __mod_objcg_mlstate(struct obj_cgroup *objcg, + struct pglist_data *pgdat, + enum node_stat_item idx, int nr) { struct mem_cgroup *memcg; struct lruvec *lruvec; + lockdep_assert_irqs_disabled(); + rcu_read_lock(); memcg = obj_cgroup_memcg(objcg); lruvec = mem_cgroup_lruvec(memcg, pgdat); - mod_memcg_lruvec_state(lruvec, idx, nr); + __mod_memcg_lruvec_state(lruvec, idx, nr); rcu_read_unlock(); } @@ -3312,7 +3311,7 @@ void __memcg_kmem_uncharge_page(struct page *page, int order) obj_cgroup_put(objcg); } -void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, +static void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, enum node_stat_item idx, int nr) { struct memcg_stock_pcp *stock; @@ -3340,12 +3339,12 @@ void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, struct pglist_data *oldpg = stock->cached_pgdat; if (stock->nr_slab_reclaimable_b) { - mod_objcg_mlstate(objcg, oldpg, NR_SLAB_RECLAIMABLE_B, + __mod_objcg_mlstate(objcg, oldpg, NR_SLAB_RECLAIMABLE_B, stock->nr_slab_reclaimable_b); stock->nr_slab_reclaimable_b = 0; } if (stock->nr_slab_unreclaimable_b) { - mod_objcg_mlstate(objcg, oldpg, NR_SLAB_UNRECLAIMABLE_B, + __mod_objcg_mlstate(objcg, oldpg, NR_SLAB_UNRECLAIMABLE_B, stock->nr_slab_unreclaimable_b); stock->nr_slab_unreclaimable_b = 0; } @@ -3371,7 +3370,7 @@ void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, } } if (nr) - mod_objcg_mlstate(objcg, pgdat, idx, nr); + __mod_objcg_mlstate(objcg, pgdat, idx, nr); local_unlock_irqrestore(&memcg_stock.stock_lock, flags); obj_cgroup_put(old); @@ -3437,13 +3436,13 @@ static struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock) */ if (stock->nr_slab_reclaimable_b || stock->nr_slab_unreclaimable_b) { if (stock->nr_slab_reclaimable_b) { - mod_objcg_mlstate(old, stock->cached_pgdat, + __mod_objcg_mlstate(old, stock->cached_pgdat, NR_SLAB_RECLAIMABLE_B, stock->nr_slab_reclaimable_b); stock->nr_slab_reclaimable_b = 0; } if (stock->nr_slab_unreclaimable_b) { - mod_objcg_mlstate(old, stock->cached_pgdat, + __mod_objcg_mlstate(old, stock->cached_pgdat, NR_SLAB_UNRECLAIMABLE_B, stock->nr_slab_unreclaimable_b); stock->nr_slab_unreclaimable_b = 0; diff --git a/mm/slab.h b/mm/slab.h index 411251b9bdd1..d12fb4392e35 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -581,8 +581,6 @@ bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru, gfp_t flags, size_t size, void **p); void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p, int objects, struct slabobj_ext *obj_exts); -void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, - enum node_stat_item idx, int nr); #endif size_t __ksize(const void *objp); -- cgit v1.2.3