From c25303281d79299e9f35d4b2e496a8bd134d5715 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 5 Jun 2021 23:20:17 -0400 Subject: mm: Convert get_page_unless_zero() to return bool atomic_add_unless() returns bool, so remove the widening casts to int in page_ref_add_unless() and get_page_unless_zero(). This causes gcc to produce slightly larger code in isolate_migratepages_block(), but it's not clear that it's worse code. Net +19 bytes of text. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Kirill A. Shutemov Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/mm.h | 2 +- include/linux/page_ref.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 73a52aba448f..1de8864a1e28 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -754,7 +754,7 @@ static inline int put_page_testzero(struct page *page) * This can be called when MMU is off so it must not access * any of the virtual mappings. */ -static inline int get_page_unless_zero(struct page *page) +static inline bool get_page_unless_zero(struct page *page) { return page_ref_add_unless(page, 1, 0); } diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index 7ad46f45df39..3a799de8ad52 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h @@ -161,9 +161,9 @@ static inline int page_ref_dec_return(struct page *page) return ret; } -static inline int page_ref_add_unless(struct page *page, int nr, int u) +static inline bool page_ref_add_unless(struct page *page, int nr, int u) { - int ret = atomic_add_unless(&page->_refcount, nr, u); + bool ret = atomic_add_unless(&page->_refcount, nr, u); if (page_ref_tracepoint_active(page_ref_mod_unless)) __page_ref_mod_unless(page, nr, ret); -- cgit v1.2.3 From 7b230db3b8d373219f88a3d25c8fbbf12cc7f233 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 6 Dec 2020 22:22:48 -0500 Subject: mm: Introduce struct folio A struct folio is a new abstraction to replace the venerable struct page. A function which takes a struct folio argument declares that it will operate on the entire (possibly compound) page, not just PAGE_SIZE bytes. In return, the caller guarantees that the pointer it is passing does not point to a tail page. No change to generated code. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Mike Rapoport --- Documentation/core-api/mm-api.rst | 1 + include/linux/mm.h | 75 +++++++++++++++++++++++++++++++++++++++ include/linux/mm_types.h | 60 +++++++++++++++++++++++++++++++ include/linux/page-flags.h | 28 +++++++++++++++ 4 files changed, 164 insertions(+) diff --git a/Documentation/core-api/mm-api.rst b/Documentation/core-api/mm-api.rst index a42f9baddfbf..2a94e6164f80 100644 --- a/Documentation/core-api/mm-api.rst +++ b/Documentation/core-api/mm-api.rst @@ -95,6 +95,7 @@ More Memory Management Functions .. kernel-doc:: mm/mempolicy.c .. kernel-doc:: include/linux/mm_types.h :internal: +.. kernel-doc:: include/linux/page-flags.h .. kernel-doc:: include/linux/mm.h :internal: .. kernel-doc:: include/linux/mmzone.h diff --git a/include/linux/mm.h b/include/linux/mm.h index 1de8864a1e28..9057b8406acf 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -950,6 +950,20 @@ static inline unsigned int compound_order(struct page *page) return page[1].compound_order; } +/** + * folio_order - The allocation order of a folio. + * @folio: The folio. + * + * A folio is composed of 2^order pages. See get_order() for the definition + * of order. + * + * Return: The order of the folio. + */ +static inline unsigned int folio_order(struct folio *folio) +{ + return compound_order(&folio->page); +} + static inline bool hpage_pincount_available(struct page *page) { /* @@ -1595,6 +1609,66 @@ static inline void set_page_links(struct page *page, enum zone_type zone, #endif } +/** + * folio_nr_pages - The number of pages in the folio. + * @folio: The folio. + * + * Return: A positive power of two. + */ +static inline long folio_nr_pages(struct folio *folio) +{ + return compound_nr(&folio->page); +} + +/** + * folio_next - Move to the next physical folio. + * @folio: The folio we're currently operating on. + * + * If you have physically contiguous memory which may span more than + * one folio (eg a &struct bio_vec), use this function to move from one + * folio to the next. Do not use it if the memory is only virtually + * contiguous as the folios are almost certainly not adjacent to each + * other. This is the folio equivalent to writing ``page++``. + * + * Context: We assume that the folios are refcounted and/or locked at a + * higher level and do not adjust the reference counts. + * Return: The next struct folio. + */ +static inline struct folio *folio_next(struct folio *folio) +{ + return (struct folio *)folio_page(folio, folio_nr_pages(folio)); +} + +/** + * folio_shift - The size of the memory described by this folio. + * @folio: The folio. + * + * A folio represents a number of bytes which is a power-of-two in size. + * This function tells you which power-of-two the folio is. See also + * folio_size() and folio_order(). + * + * Context: The caller should have a reference on the folio to prevent + * it from being split. It is not necessary for the folio to be locked. + * Return: The base-2 logarithm of the size of this folio. + */ +static inline unsigned int folio_shift(struct folio *folio) +{ + return PAGE_SHIFT + folio_order(folio); +} + +/** + * folio_size - The number of bytes in a folio. + * @folio: The folio. + * + * Context: The caller should have a reference on the folio to prevent + * it from being split. It is not necessary for the folio to be locked. + * Return: The number of bytes in this folio. + */ +static inline size_t folio_size(struct folio *folio) +{ + return PAGE_SIZE << folio_order(folio); +} + /* * Some inline functions in vmstat.h depend on page_zone() */ @@ -1700,6 +1774,7 @@ extern void pagefault_out_of_memory(void); #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) #define offset_in_thp(page, p) ((unsigned long)(p) & (thp_size(page) - 1)) +#define offset_in_folio(folio, p) ((unsigned long)(p) & (folio_size(folio) - 1)) /* * Flags passed to show_mem() and show_free_areas() to suppress output in diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 7f8ee09c711f..6908186629b4 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -239,6 +239,66 @@ struct page { #endif } _struct_page_alignment; +/** + * struct folio - Represents a contiguous set of bytes. + * @flags: Identical to the page flags. + * @lru: Least Recently Used list; tracks how recently this folio was used. + * @mapping: The file this page belongs to, or refers to the anon_vma for + * anonymous memory. + * @index: Offset within the file, in units of pages. For anonymous memory, + * this is the index from the beginning of the mmap. + * @private: Filesystem per-folio data (see folio_attach_private()). + * Used for swp_entry_t if folio_test_swapcache(). + * @_mapcount: Do not access this member directly. Use folio_mapcount() to + * find out how many times this folio is mapped by userspace. + * @_refcount: Do not access this member directly. Use folio_ref_count() + * to find how many references there are to this folio. + * @memcg_data: Memory Control Group data. + * + * A folio is a physically, virtually and logically contiguous set + * of bytes. It is a power-of-two in size, and it is aligned to that + * same power-of-two. It is at least as large as %PAGE_SIZE. If it is + * in the page cache, it is at a file offset which is a multiple of that + * power-of-two. It may be mapped into userspace at an address which is + * at an arbitrary page offset, but its kernel virtual address is aligned + * to its size. + */ +struct folio { + /* private: don't document the anon union */ + union { + struct { + /* public: */ + unsigned long flags; + struct list_head lru; + struct address_space *mapping; + pgoff_t index; + void *private; + atomic_t _mapcount; + atomic_t _refcount; +#ifdef CONFIG_MEMCG + unsigned long memcg_data; +#endif + /* private: the union with struct page is transitional */ + }; + struct page page; + }; +}; + +static_assert(sizeof(struct page) == sizeof(struct folio)); +#define FOLIO_MATCH(pg, fl) \ + static_assert(offsetof(struct page, pg) == offsetof(struct folio, fl)) +FOLIO_MATCH(flags, flags); +FOLIO_MATCH(lru, lru); +FOLIO_MATCH(compound_head, lru); +FOLIO_MATCH(index, index); +FOLIO_MATCH(private, private); +FOLIO_MATCH(_mapcount, _mapcount); +FOLIO_MATCH(_refcount, _refcount); +#ifdef CONFIG_MEMCG +FOLIO_MATCH(memcg_data, memcg_data); +#endif +#undef FOLIO_MATCH + static inline atomic_t *compound_mapcount_ptr(struct page *page) { return &page[1].compound_mapcount; diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index a558d67ee86f..e9b0723a637d 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -193,6 +193,34 @@ static inline unsigned long _compound_head(const struct page *page) #define compound_head(page) ((typeof(page))_compound_head(page)) +/** + * page_folio - Converts from page to folio. + * @p: The page. + * + * Every page is part of a folio. This function cannot be called on a + * NULL pointer. + * + * Context: No reference, nor lock is required on @page. If the caller + * does not hold a reference, this call may race with a folio split, so + * it should re-check the folio still contains this page after gaining + * a reference on the folio. + * Return: The folio which contains this page. + */ +#define page_folio(p) (_Generic((p), \ + const struct page *: (const struct folio *)_compound_head(p), \ + struct page *: (struct folio *)_compound_head(p))) + +/** + * folio_page - Return a page from a folio. + * @folio: The folio. + * @n: The page number to return. + * + * @n is relative to the start of the folio. This function does not + * check that the page number lies within @folio; the caller is presumed + * to have a reference to the page. + */ +#define folio_page(folio, n) nth_page(&(folio)->page, n) + static __always_inline int PageTail(struct page *page) { return READ_ONCE(page->compound_head) & 1; -- cgit v1.2.3 From 32b8fc486524044f2643c5d3340fa436b761ba71 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 18 Jan 2021 07:40:36 -0500 Subject: mm: Add folio_pgdat(), folio_zone() and folio_zonenum() These are just convenience wrappers for callers with folios; pgdat and zone can be reached from tail pages as well as head pages. No change to generated code. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells Acked-by: Mike Rapoport --- include/linux/mm.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index 9057b8406acf..3fc524f8c2a2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1145,6 +1145,11 @@ static inline enum zone_type page_zonenum(const struct page *page) return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; } +static inline enum zone_type folio_zonenum(const struct folio *folio) +{ + return page_zonenum(&folio->page); +} + #ifdef CONFIG_ZONE_DEVICE static inline bool is_zone_device_page(const struct page *page) { @@ -1560,6 +1565,16 @@ static inline pg_data_t *page_pgdat(const struct page *page) return NODE_DATA(page_to_nid(page)); } +static inline struct zone *folio_zone(const struct folio *folio) +{ + return page_zone(&folio->page); +} + +static inline pg_data_t *folio_pgdat(const struct folio *folio) +{ + return page_pgdat(&folio->page); +} + #ifdef SECTION_IN_PAGE_FLAGS static inline void set_page_section(struct page *page, unsigned long section) { -- cgit v1.2.3 From a53e17e4e97b4519882d640984a386c522e9fba3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 18 Jan 2021 08:14:00 -0500 Subject: mm/vmstat: Add functions to account folio statistics Allow page counters to be more readily modified by callers which have a folio. Name these wrappers with 'stat' instead of 'state' as requested by Linus here: https://lore.kernel.org/linux-mm/CAHk-=wj847SudR-kt+46fT3+xFFgiwpgThvm7DJWGdi4cVrbnQ@mail.gmail.com/ No change to generated code. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells Acked-by: Mike Rapoport --- include/linux/vmstat.h | 107 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index d6a6cf53b127..241bd0f53fb9 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -415,6 +415,78 @@ static inline void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *pzstats) { } #endif /* CONFIG_SMP */ +static inline void __zone_stat_mod_folio(struct folio *folio, + enum zone_stat_item item, long nr) +{ + __mod_zone_page_state(folio_zone(folio), item, nr); +} + +static inline void __zone_stat_add_folio(struct folio *folio, + enum zone_stat_item item) +{ + __mod_zone_page_state(folio_zone(folio), item, folio_nr_pages(folio)); +} + +static inline void __zone_stat_sub_folio(struct folio *folio, + enum zone_stat_item item) +{ + __mod_zone_page_state(folio_zone(folio), item, -folio_nr_pages(folio)); +} + +static inline void zone_stat_mod_folio(struct folio *folio, + enum zone_stat_item item, long nr) +{ + mod_zone_page_state(folio_zone(folio), item, nr); +} + +static inline void zone_stat_add_folio(struct folio *folio, + enum zone_stat_item item) +{ + mod_zone_page_state(folio_zone(folio), item, folio_nr_pages(folio)); +} + +static inline void zone_stat_sub_folio(struct folio *folio, + enum zone_stat_item item) +{ + mod_zone_page_state(folio_zone(folio), item, -folio_nr_pages(folio)); +} + +static inline void __node_stat_mod_folio(struct folio *folio, + enum node_stat_item item, long nr) +{ + __mod_node_page_state(folio_pgdat(folio), item, nr); +} + +static inline void __node_stat_add_folio(struct folio *folio, + enum node_stat_item item) +{ + __mod_node_page_state(folio_pgdat(folio), item, folio_nr_pages(folio)); +} + +static inline void __node_stat_sub_folio(struct folio *folio, + enum node_stat_item item) +{ + __mod_node_page_state(folio_pgdat(folio), item, -folio_nr_pages(folio)); +} + +static inline void node_stat_mod_folio(struct folio *folio, + enum node_stat_item item, long nr) +{ + mod_node_page_state(folio_pgdat(folio), item, nr); +} + +static inline void node_stat_add_folio(struct folio *folio, + enum node_stat_item item) +{ + mod_node_page_state(folio_pgdat(folio), item, folio_nr_pages(folio)); +} + +static inline void node_stat_sub_folio(struct folio *folio, + enum node_stat_item item) +{ + mod_node_page_state(folio_pgdat(folio), item, -folio_nr_pages(folio)); +} + static inline void __mod_zone_freepage_state(struct zone *zone, int nr_pages, int migratetype) { @@ -543,6 +615,24 @@ static inline void __dec_lruvec_page_state(struct page *page, __mod_lruvec_page_state(page, idx, -1); } +static inline void __lruvec_stat_mod_folio(struct folio *folio, + enum node_stat_item idx, int val) +{ + __mod_lruvec_page_state(&folio->page, idx, val); +} + +static inline void __lruvec_stat_add_folio(struct folio *folio, + enum node_stat_item idx) +{ + __lruvec_stat_mod_folio(folio, idx, folio_nr_pages(folio)); +} + +static inline void __lruvec_stat_sub_folio(struct folio *folio, + enum node_stat_item idx) +{ + __lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio)); +} + static inline void inc_lruvec_page_state(struct page *page, enum node_stat_item idx) { @@ -555,4 +645,21 @@ static inline void dec_lruvec_page_state(struct page *page, mod_lruvec_page_state(page, idx, -1); } +static inline void lruvec_stat_mod_folio(struct folio *folio, + enum node_stat_item idx, int val) +{ + mod_lruvec_page_state(&folio->page, idx, val); +} + +static inline void lruvec_stat_add_folio(struct folio *folio, + enum node_stat_item idx) +{ + lruvec_stat_mod_folio(folio, idx, folio_nr_pages(folio)); +} + +static inline void lruvec_stat_sub_folio(struct folio *folio, + enum node_stat_item idx) +{ + lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio)); +} #endif /* _LINUX_VMSTAT_H */ -- cgit v1.2.3 From 9e9edb2094db7eb4c6da21e02ac885955350ecf9 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 15 Jan 2021 10:52:37 -0500 Subject: mm/debug: Add VM_BUG_ON_FOLIO() and VM_WARN_ON_ONCE_FOLIO() These are the folio equivalents of VM_BUG_ON_PAGE and VM_WARN_ON_ONCE_PAGE. No change to generated code. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells Acked-by: Mike Rapoport --- include/linux/mmdebug.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index 1935d4c72d10..d7285f8148a3 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -22,6 +22,13 @@ void dump_mm(const struct mm_struct *mm); BUG(); \ } \ } while (0) +#define VM_BUG_ON_FOLIO(cond, folio) \ + do { \ + if (unlikely(cond)) { \ + dump_page(&folio->page, "VM_BUG_ON_FOLIO(" __stringify(cond)")");\ + BUG(); \ + } \ + } while (0) #define VM_BUG_ON_VMA(cond, vma) \ do { \ if (unlikely(cond)) { \ @@ -47,6 +54,17 @@ void dump_mm(const struct mm_struct *mm); } \ unlikely(__ret_warn_once); \ }) +#define VM_WARN_ON_ONCE_FOLIO(cond, folio) ({ \ + static bool __section(".data.once") __warned; \ + int __ret_warn_once = !!(cond); \ + \ + if (unlikely(__ret_warn_once && !__warned)) { \ + dump_page(&folio->page, "VM_WARN_ON_ONCE_FOLIO(" __stringify(cond)")");\ + __warned = true; \ + WARN_ON(1); \ + } \ + unlikely(__ret_warn_once); \ +}) #define VM_WARN_ON(cond) (void)WARN_ON(cond) #define VM_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond) @@ -55,11 +73,13 @@ void dump_mm(const struct mm_struct *mm); #else #define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond) #define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond) +#define VM_BUG_ON_FOLIO(cond, folio) VM_BUG_ON(cond) #define VM_BUG_ON_VMA(cond, vma) VM_BUG_ON(cond) #define VM_BUG_ON_MM(cond, mm) VM_BUG_ON(cond) #define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ON_ONCE_PAGE(cond, page) BUILD_BUG_ON_INVALID(cond) +#define VM_WARN_ON_ONCE_FOLIO(cond, folio) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond) #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond) #endif -- cgit v1.2.3 From c24016ac3a629655ea164b1129816660187943c0 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 31 Mar 2021 10:39:55 -0400 Subject: mm: Add folio reference count functions These functions mirror their page reference counterparts. Also add the kernel-doc to the mm-api and correct the return type of page_ref_add_unless() to bool. No change to generated code. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells Acked-by: Mike Rapoport --- Documentation/core-api/mm-api.rst | 1 + include/linux/page_ref.h | 88 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 88 insertions(+), 1 deletion(-) diff --git a/Documentation/core-api/mm-api.rst b/Documentation/core-api/mm-api.rst index 2a94e6164f80..5c459ee2acce 100644 --- a/Documentation/core-api/mm-api.rst +++ b/Documentation/core-api/mm-api.rst @@ -98,4 +98,5 @@ More Memory Management Functions .. kernel-doc:: include/linux/page-flags.h .. kernel-doc:: include/linux/mm.h :internal: +.. kernel-doc:: include/linux/page_ref.h .. kernel-doc:: include/linux/mmzone.h diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index 3a799de8ad52..717d53c9ddf1 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h @@ -67,9 +67,31 @@ static inline int page_ref_count(const struct page *page) return atomic_read(&page->_refcount); } +/** + * folio_ref_count - The reference count on this folio. + * @folio: The folio. + * + * The refcount is usually incremented by calls to folio_get() and + * decremented by calls to folio_put(). Some typical users of the + * folio refcount: + * + * - Each reference from a page table + * - The page cache + * - Filesystem private data + * - The LRU list + * - Pipes + * - Direct IO which references this page in the process address space + * + * Return: The number of references to this folio. + */ +static inline int folio_ref_count(const struct folio *folio) +{ + return page_ref_count(&folio->page); +} + static inline int page_count(const struct page *page) { - return atomic_read(&compound_head(page)->_refcount); + return folio_ref_count(page_folio(page)); } static inline void set_page_count(struct page *page, int v) @@ -79,6 +101,11 @@ static inline void set_page_count(struct page *page, int v) __page_ref_set(page, v); } +static inline void folio_set_count(struct folio *folio, int v) +{ + set_page_count(&folio->page, v); +} + /* * Setup the page count before being freed into the page allocator for * the first time (boot or memory hotplug) @@ -95,6 +122,11 @@ static inline void page_ref_add(struct page *page, int nr) __page_ref_mod(page, nr); } +static inline void folio_ref_add(struct folio *folio, int nr) +{ + page_ref_add(&folio->page, nr); +} + static inline void page_ref_sub(struct page *page, int nr) { atomic_sub(nr, &page->_refcount); @@ -102,6 +134,11 @@ static inline void page_ref_sub(struct page *page, int nr) __page_ref_mod(page, -nr); } +static inline void folio_ref_sub(struct folio *folio, int nr) +{ + page_ref_sub(&folio->page, nr); +} + static inline int page_ref_sub_return(struct page *page, int nr) { int ret = atomic_sub_return(nr, &page->_refcount); @@ -111,6 +148,11 @@ static inline int page_ref_sub_return(struct page *page, int nr) return ret; } +static inline int folio_ref_sub_return(struct folio *folio, int nr) +{ + return page_ref_sub_return(&folio->page, nr); +} + static inline void page_ref_inc(struct page *page) { atomic_inc(&page->_refcount); @@ -118,6 +160,11 @@ static inline void page_ref_inc(struct page *page) __page_ref_mod(page, 1); } +static inline void folio_ref_inc(struct folio *folio) +{ + page_ref_inc(&folio->page); +} + static inline void page_ref_dec(struct page *page) { atomic_dec(&page->_refcount); @@ -125,6 +172,11 @@ static inline void page_ref_dec(struct page *page) __page_ref_mod(page, -1); } +static inline void folio_ref_dec(struct folio *folio) +{ + page_ref_dec(&folio->page); +} + static inline int page_ref_sub_and_test(struct page *page, int nr) { int ret = atomic_sub_and_test(nr, &page->_refcount); @@ -134,6 +186,11 @@ static inline int page_ref_sub_and_test(struct page *page, int nr) return ret; } +static inline int folio_ref_sub_and_test(struct folio *folio, int nr) +{ + return page_ref_sub_and_test(&folio->page, nr); +} + static inline int page_ref_inc_return(struct page *page) { int ret = atomic_inc_return(&page->_refcount); @@ -143,6 +200,11 @@ static inline int page_ref_inc_return(struct page *page) return ret; } +static inline int folio_ref_inc_return(struct folio *folio) +{ + return page_ref_inc_return(&folio->page); +} + static inline int page_ref_dec_and_test(struct page *page) { int ret = atomic_dec_and_test(&page->_refcount); @@ -152,6 +214,11 @@ static inline int page_ref_dec_and_test(struct page *page) return ret; } +static inline int folio_ref_dec_and_test(struct folio *folio) +{ + return page_ref_dec_and_test(&folio->page); +} + static inline int page_ref_dec_return(struct page *page) { int ret = atomic_dec_return(&page->_refcount); @@ -161,6 +228,11 @@ static inline int page_ref_dec_return(struct page *page) return ret; } +static inline int folio_ref_dec_return(struct folio *folio) +{ + return page_ref_dec_return(&folio->page); +} + static inline bool page_ref_add_unless(struct page *page, int nr, int u) { bool ret = atomic_add_unless(&page->_refcount, nr, u); @@ -170,6 +242,11 @@ static inline bool page_ref_add_unless(struct page *page, int nr, int u) return ret; } +static inline bool folio_ref_add_unless(struct folio *folio, int nr, int u) +{ + return page_ref_add_unless(&folio->page, nr, u); +} + static inline int page_ref_freeze(struct page *page, int count) { int ret = likely(atomic_cmpxchg(&page->_refcount, count, 0) == count); @@ -179,6 +256,11 @@ static inline int page_ref_freeze(struct page *page, int count) return ret; } +static inline int folio_ref_freeze(struct folio *folio, int count) +{ + return page_ref_freeze(&folio->page, count); +} + static inline void page_ref_unfreeze(struct page *page, int count) { VM_BUG_ON_PAGE(page_count(page) != 0, page); @@ -189,4 +271,8 @@ static inline void page_ref_unfreeze(struct page *page, int count) __page_ref_unfreeze(page, count); } +static inline void folio_ref_unfreeze(struct folio *folio, int count) +{ + page_ref_unfreeze(&folio->page, count); +} #endif -- cgit v1.2.3 From b620f63358cd35c8e9084ee9fc460153f64714f6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 6 Dec 2020 23:04:57 -0500 Subject: mm: Add folio_put() If we know we have a folio, we can call folio_put() instead of put_page() and save the overhead of calling compound_head(). Also skips the devmap checks. This commit looks like it should be a no-op, but actually saves 684 bytes of text with the distro-derived config that I'm testing. Some functions grow a little while others shrink. I presume the compiler is making different inlining decisions. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells Acked-by: Mike Rapoport --- include/linux/mm.h | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 3fc524f8c2a2..28a84d82247b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -748,6 +748,11 @@ static inline int put_page_testzero(struct page *page) return page_ref_dec_and_test(page); } +static inline int folio_put_testzero(struct folio *folio) +{ + return put_page_testzero(&folio->page); +} + /* * Try to grab a ref unless the page has a refcount of zero, return false if * that is the case. @@ -1247,9 +1252,28 @@ static inline __must_check bool try_get_page(struct page *page) return true; } +/** + * folio_put - Decrement the reference count on a folio. + * @folio: The folio. + * + * If the folio's reference count reaches zero, the memory will be + * released back to the page allocator and may be used by another + * allocation immediately. Do not access the memory or the struct folio + * after calling folio_put() unless you can be sure that it wasn't the + * last reference. + * + * Context: May be called in process or interrupt context, but not in NMI + * context. May be called while holding a spinlock. + */ +static inline void folio_put(struct folio *folio) +{ + if (folio_put_testzero(folio)) + __put_page(&folio->page); +} + static inline void put_page(struct page *page) { - page = compound_head(page); + struct folio *folio = page_folio(page); /* * For devmap managed pages we need to catch refcount transition from @@ -1257,13 +1281,12 @@ static inline void put_page(struct page *page) * need to inform the device driver through callback. See * include/linux/memremap.h and HMM for details. */ - if (page_is_devmap_managed(page)) { - put_devmap_managed_page(page); + if (page_is_devmap_managed(&folio->page)) { + put_devmap_managed_page(&folio->page); return; } - if (put_page_testzero(page)) - __put_page(page); + folio_put(folio); } /* -- cgit v1.2.3 From 86d234cb0499c6466ccfc45f6501bc0cd4621c60 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 6 Dec 2020 23:04:57 -0500 Subject: mm: Add folio_get() If we know we have a folio, we can call folio_get() instead of get_page() and save the overhead of calling compound_head(). No change to generated code. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells Acked-by: Mike Rapoport --- include/linux/mm.h | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 28a84d82247b..63685d953ce0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1224,18 +1224,26 @@ static inline bool is_pci_p2pdma_page(const struct page *page) } /* 127: arbitrary random number, small enough to assemble well */ -#define page_ref_zero_or_close_to_overflow(page) \ - ((unsigned int) page_ref_count(page) + 127u <= 127u) +#define folio_ref_zero_or_close_to_overflow(folio) \ + ((unsigned int) folio_ref_count(folio) + 127u <= 127u) + +/** + * folio_get - Increment the reference count on a folio. + * @folio: The folio. + * + * Context: May be called in any context, as long as you know that + * you have a refcount on the folio. If you do not already have one, + * folio_try_get() may be the right interface for you to use. + */ +static inline void folio_get(struct folio *folio) +{ + VM_BUG_ON_FOLIO(folio_ref_zero_or_close_to_overflow(folio), folio); + folio_ref_inc(folio); +} static inline void get_page(struct page *page) { - page = compound_head(page); - /* - * Getting a normal page or the head of a compound page - * requires to already have an elevated page->_refcount. - */ - VM_BUG_ON_PAGE(page_ref_zero_or_close_to_overflow(page), page); - page_ref_inc(page); + folio_get(page_folio(page)); } bool __must_check try_grab_page(struct page *page, unsigned int flags); -- cgit v1.2.3 From 020853b6f5ead2849b055e9873ff7267ce584256 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 10 May 2021 16:33:22 -0400 Subject: mm: Add folio_try_get_rcu() This is the equivalent of page_cache_get_speculative(). Also add folio_ref_try_add_rcu (the equivalent of page_cache_add_speculative) and folio_get_unless_zero() (the equivalent of get_page_unless_zero()). The new kernel-doc attempts to explain from the user's point of view when to use folio_try_get_rcu() and when to use folio_get_unless_zero(), because there seems to be some confusion currently between the users of page_cache_get_speculative() and get_page_unless_zero(). Reimplement page_cache_add_speculative() and page_cache_get_speculative() as wrappers around the folio equivalents, but leave get_page_unless_zero() alone for now. This commit reduces text size by 3 bytes due to slightly different register allocation & instruction selections. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: Christoph Hellwig Acked-by: Kirill A. Shutemov Acked-by: Mike Rapoport --- include/linux/page_ref.h | 66 +++++++++++++++++++++++++++++++++++++ include/linux/pagemap.h | 84 +++--------------------------------------------- mm/filemap.c | 20 ++++++++++++ 3 files changed, 90 insertions(+), 80 deletions(-) diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index 717d53c9ddf1..2e677e6ad09f 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h @@ -247,6 +247,72 @@ static inline bool folio_ref_add_unless(struct folio *folio, int nr, int u) return page_ref_add_unless(&folio->page, nr, u); } +/** + * folio_try_get - Attempt to increase the refcount on a folio. + * @folio: The folio. + * + * If you do not already have a reference to a folio, you can attempt to + * get one using this function. It may fail if, for example, the folio + * has been freed since you found a pointer to it, or it is frozen for + * the purposes of splitting or migration. + * + * Return: True if the reference count was successfully incremented. + */ +static inline bool folio_try_get(struct folio *folio) +{ + return folio_ref_add_unless(folio, 1, 0); +} + +static inline bool folio_ref_try_add_rcu(struct folio *folio, int count) +{ +#ifdef CONFIG_TINY_RCU + /* + * The caller guarantees the folio will not be freed from interrupt + * context, so (on !SMP) we only need preemption to be disabled + * and TINY_RCU does that for us. + */ +# ifdef CONFIG_PREEMPT_COUNT + VM_BUG_ON(!in_atomic() && !irqs_disabled()); +# endif + VM_BUG_ON_FOLIO(folio_ref_count(folio) == 0, folio); + folio_ref_add(folio, count); +#else + if (unlikely(!folio_ref_add_unless(folio, count, 0))) { + /* Either the folio has been freed, or will be freed. */ + return false; + } +#endif + return true; +} + +/** + * folio_try_get_rcu - Attempt to increase the refcount on a folio. + * @folio: The folio. + * + * This is a version of folio_try_get() optimised for non-SMP kernels. + * If you are still holding the rcu_read_lock() after looking up the + * page and know that the page cannot have its refcount decreased to + * zero in interrupt context, you can use this instead of folio_try_get(). + * + * Example users include get_user_pages_fast() (as pages are not unmapped + * from interrupt context) and the page cache lookups (as pages are not + * truncated from interrupt context). We also know that pages are not + * frozen in interrupt context for the purposes of splitting or migration. + * + * You can also use this function if you're holding a lock that prevents + * pages being frozen & removed; eg the i_pages lock for the page cache + * or the mmap_sem or page table lock for page tables. In this case, + * it will always succeed, and you could have used a plain folio_get(), + * but it's sometimes more convenient to have a common function called + * from both locked and RCU-protected contexts. + * + * Return: True if the reference count was successfully incremented. + */ +static inline bool folio_try_get_rcu(struct folio *folio) +{ + return folio_ref_try_add_rcu(folio, 1); +} + static inline int page_ref_freeze(struct page *page, int count) { int ret = likely(atomic_cmpxchg(&page->_refcount, count, 0) == count); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 62db6b0176b9..f3ec26551082 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -172,91 +172,15 @@ static inline struct address_space *page_mapping_file(struct page *page) return page_mapping(page); } -/* - * speculatively take a reference to a page. - * If the page is free (_refcount == 0), then _refcount is untouched, and 0 - * is returned. Otherwise, _refcount is incremented by 1 and 1 is returned. - * - * This function must be called inside the same rcu_read_lock() section as has - * been used to lookup the page in the pagecache radix-tree (or page table): - * this allows allocators to use a synchronize_rcu() to stabilize _refcount. - * - * Unless an RCU grace period has passed, the count of all pages coming out - * of the allocator must be considered unstable. page_count may return higher - * than expected, and put_page must be able to do the right thing when the - * page has been finished with, no matter what it is subsequently allocated - * for (because put_page is what is used here to drop an invalid speculative - * reference). - * - * This is the interesting part of the lockless pagecache (and lockless - * get_user_pages) locking protocol, where the lookup-side (eg. find_get_page) - * has the following pattern: - * 1. find page in radix tree - * 2. conditionally increment refcount - * 3. check the page is still in pagecache (if no, goto 1) - * - * Remove-side that cares about stability of _refcount (eg. reclaim) has the - * following (with the i_pages lock held): - * A. atomically check refcount is correct and set it to 0 (atomic_cmpxchg) - * B. remove page from pagecache - * C. free the page - * - * There are 2 critical interleavings that matter: - * - 2 runs before A: in this case, A sees elevated refcount and bails out - * - A runs before 2: in this case, 2 sees zero refcount and retries; - * subsequently, B will complete and 1 will find no page, causing the - * lookup to return NULL. - * - * It is possible that between 1 and 2, the page is removed then the exact same - * page is inserted into the same position in pagecache. That's OK: the - * old find_get_page using a lock could equally have run before or after - * such a re-insertion, depending on order that locks are granted. - * - * Lookups racing against pagecache insertion isn't a big problem: either 1 - * will find the page or it will not. Likewise, the old find_get_page could run - * either before the insertion or afterwards, depending on timing. - */ -static inline int __page_cache_add_speculative(struct page *page, int count) +static inline bool page_cache_add_speculative(struct page *page, int count) { -#ifdef CONFIG_TINY_RCU -# ifdef CONFIG_PREEMPT_COUNT - VM_BUG_ON(!in_atomic() && !irqs_disabled()); -# endif - /* - * Preempt must be disabled here - we rely on rcu_read_lock doing - * this for us. - * - * Pagecache won't be truncated from interrupt context, so if we have - * found a page in the radix tree here, we have pinned its refcount by - * disabling preempt, and hence no need for the "speculative get" that - * SMP requires. - */ - VM_BUG_ON_PAGE(page_count(page) == 0, page); - page_ref_add(page, count); - -#else - if (unlikely(!page_ref_add_unless(page, count, 0))) { - /* - * Either the page has been freed, or will be freed. - * In either case, retry here and the caller should - * do the right thing (see comments above). - */ - return 0; - } -#endif VM_BUG_ON_PAGE(PageTail(page), page); - - return 1; -} - -static inline int page_cache_get_speculative(struct page *page) -{ - return __page_cache_add_speculative(page, 1); + return folio_ref_try_add_rcu((struct folio *)page, count); } -static inline int page_cache_add_speculative(struct page *page, int count) +static inline bool page_cache_get_speculative(struct page *page) { - return __page_cache_add_speculative(page, count); + return page_cache_add_speculative(page, 1); } /** diff --git a/mm/filemap.c b/mm/filemap.c index dae481293b5d..9fcc3d94cfcd 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1801,6 +1801,26 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping, } EXPORT_SYMBOL(page_cache_prev_miss); +/* + * Lockless page cache protocol: + * On the lookup side: + * 1. Load the folio from i_pages + * 2. Increment the refcount if it's not zero + * 3. If the folio is not found by xas_reload(), put the refcount and retry + * + * On the removal side: + * A. Freeze the page (by zeroing the refcount if nobody else has a reference) + * B. Remove the page from i_pages + * C. Return the page to the page allocator + * + * This means that any page may have its reference count temporarily + * increased by a speculative page cache (or fast GUP) lookup as it can + * be allocated by another user before the RCU grace period expires. + * Because the refcount temporarily acquired here may end up being the + * last refcount on the page, any page allocation must be freeable by + * folio_put(). + */ + /* * mapping_get_entry - Get a page cache entry. * @mapping: the address_space to search -- cgit v1.2.3 From d389a4a8115518e2cc232649b012b72113fe8b67 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 7 Dec 2020 15:42:09 -0500 Subject: mm: Add folio flag manipulation functions These new functions are the folio analogues of the various PageFlags functions. If CONFIG_DEBUG_VM_PGFLAGS is enabled, we check the folio is not a tail page at every invocation. This will also catch the PagePoisoned case as a poisoned page has every bit set, which would include PageTail. This saves 1684 bytes of text with the distro-derived config that I'm testing due to removing a double call to compound_head() in PageSwapCache(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells Acked-by: Mike Rapoport --- include/linux/page-flags.h | 219 ++++++++++++++++++++++++++++++++------------- 1 file changed, 156 insertions(+), 63 deletions(-) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index e9b0723a637d..2491e84b8e52 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -143,6 +143,8 @@ enum pageflags { #endif __NR_PAGEFLAGS, + PG_readahead = PG_reclaim, + /* Filesystems */ PG_checked = PG_owner_priv_1, @@ -245,6 +247,15 @@ static inline void page_init_poison(struct page *page, size_t size) } #endif +static unsigned long *folio_flags(struct folio *folio, unsigned n) +{ + struct page *page = &folio->page; + + VM_BUG_ON_PGFLAGS(PageTail(page), page); + VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags), page); + return &page[n].flags; +} + /* * Page flags policies wrt compound pages * @@ -289,36 +300,64 @@ static inline void page_init_poison(struct page *page, size_t size) VM_BUG_ON_PGFLAGS(!PageHead(page), page); \ PF_POISONED_CHECK(&page[1]); }) +/* Which page is the flag stored in */ +#define FOLIO_PF_ANY 0 +#define FOLIO_PF_HEAD 0 +#define FOLIO_PF_ONLY_HEAD 0 +#define FOLIO_PF_NO_TAIL 0 +#define FOLIO_PF_NO_COMPOUND 0 +#define FOLIO_PF_SECOND 1 + /* * Macros to create function definitions for page flags */ #define TESTPAGEFLAG(uname, lname, policy) \ +static __always_inline bool folio_test_##lname(struct folio *folio) \ +{ return test_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline int Page##uname(struct page *page) \ - { return test_bit(PG_##lname, &policy(page, 0)->flags); } +{ return test_bit(PG_##lname, &policy(page, 0)->flags); } #define SETPAGEFLAG(uname, lname, policy) \ +static __always_inline \ +void folio_set_##lname(struct folio *folio) \ +{ set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline void SetPage##uname(struct page *page) \ - { set_bit(PG_##lname, &policy(page, 1)->flags); } +{ set_bit(PG_##lname, &policy(page, 1)->flags); } #define CLEARPAGEFLAG(uname, lname, policy) \ +static __always_inline \ +void folio_clear_##lname(struct folio *folio) \ +{ clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline void ClearPage##uname(struct page *page) \ - { clear_bit(PG_##lname, &policy(page, 1)->flags); } +{ clear_bit(PG_##lname, &policy(page, 1)->flags); } #define __SETPAGEFLAG(uname, lname, policy) \ +static __always_inline \ +void __folio_set_##lname(struct folio *folio) \ +{ __set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline void __SetPage##uname(struct page *page) \ - { __set_bit(PG_##lname, &policy(page, 1)->flags); } +{ __set_bit(PG_##lname, &policy(page, 1)->flags); } #define __CLEARPAGEFLAG(uname, lname, policy) \ +static __always_inline \ +void __folio_clear_##lname(struct folio *folio) \ +{ __clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline void __ClearPage##uname(struct page *page) \ - { __clear_bit(PG_##lname, &policy(page, 1)->flags); } +{ __clear_bit(PG_##lname, &policy(page, 1)->flags); } #define TESTSETFLAG(uname, lname, policy) \ +static __always_inline \ +bool folio_test_set_##lname(struct folio *folio) \ +{ return test_and_set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline int TestSetPage##uname(struct page *page) \ - { return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); } +{ return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); } #define TESTCLEARFLAG(uname, lname, policy) \ +static __always_inline \ +bool folio_test_clear_##lname(struct folio *folio) \ +{ return test_and_clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline int TestClearPage##uname(struct page *page) \ - { return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); } +{ return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); } #define PAGEFLAG(uname, lname, policy) \ TESTPAGEFLAG(uname, lname, policy) \ @@ -334,29 +373,37 @@ static __always_inline int TestClearPage##uname(struct page *page) \ TESTSETFLAG(uname, lname, policy) \ TESTCLEARFLAG(uname, lname, policy) -#define TESTPAGEFLAG_FALSE(uname) \ +#define TESTPAGEFLAG_FALSE(uname, lname) \ +static inline bool folio_test_##lname(const struct folio *folio) { return 0; } \ static inline int Page##uname(const struct page *page) { return 0; } -#define SETPAGEFLAG_NOOP(uname) \ +#define SETPAGEFLAG_NOOP(uname, lname) \ +static inline void folio_set_##lname(struct folio *folio) { } \ static inline void SetPage##uname(struct page *page) { } -#define CLEARPAGEFLAG_NOOP(uname) \ +#define CLEARPAGEFLAG_NOOP(uname, lname) \ +static inline void folio_clear_##lname(struct folio *folio) { } \ static inline void ClearPage##uname(struct page *page) { } -#define __CLEARPAGEFLAG_NOOP(uname) \ +#define __CLEARPAGEFLAG_NOOP(uname, lname) \ +static inline void __folio_clear_##lname(struct folio *folio) { } \ static inline void __ClearPage##uname(struct page *page) { } -#define TESTSETFLAG_FALSE(uname) \ +#define TESTSETFLAG_FALSE(uname, lname) \ +static inline bool folio_test_set_##lname(struct folio *folio) \ +{ return 0; } \ static inline int TestSetPage##uname(struct page *page) { return 0; } -#define TESTCLEARFLAG_FALSE(uname) \ +#define TESTCLEARFLAG_FALSE(uname, lname) \ +static inline bool folio_test_clear_##lname(struct folio *folio) \ +{ return 0; } \ static inline int TestClearPage##uname(struct page *page) { return 0; } -#define PAGEFLAG_FALSE(uname) TESTPAGEFLAG_FALSE(uname) \ - SETPAGEFLAG_NOOP(uname) CLEARPAGEFLAG_NOOP(uname) +#define PAGEFLAG_FALSE(uname, lname) TESTPAGEFLAG_FALSE(uname, lname) \ + SETPAGEFLAG_NOOP(uname, lname) CLEARPAGEFLAG_NOOP(uname, lname) -#define TESTSCFLAG_FALSE(uname) \ - TESTSETFLAG_FALSE(uname) TESTCLEARFLAG_FALSE(uname) +#define TESTSCFLAG_FALSE(uname, lname) \ + TESTSETFLAG_FALSE(uname, lname) TESTCLEARFLAG_FALSE(uname, lname) __PAGEFLAG(Locked, locked, PF_NO_TAIL) PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) __CLEARPAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) @@ -412,8 +459,8 @@ PAGEFLAG(MappedToDisk, mappedtodisk, PF_NO_TAIL) /* PG_readahead is only used for reads; PG_reclaim is only for writes */ PAGEFLAG(Reclaim, reclaim, PF_NO_TAIL) TESTCLEARFLAG(Reclaim, reclaim, PF_NO_TAIL) -PAGEFLAG(Readahead, reclaim, PF_NO_COMPOUND) - TESTCLEARFLAG(Readahead, reclaim, PF_NO_COMPOUND) +PAGEFLAG(Readahead, readahead, PF_NO_COMPOUND) + TESTCLEARFLAG(Readahead, readahead, PF_NO_COMPOUND) #ifdef CONFIG_HIGHMEM /* @@ -422,22 +469,25 @@ PAGEFLAG(Readahead, reclaim, PF_NO_COMPOUND) */ #define PageHighMem(__p) is_highmem_idx(page_zonenum(__p)) #else -PAGEFLAG_FALSE(HighMem) +PAGEFLAG_FALSE(HighMem, highmem) #endif #ifdef CONFIG_SWAP -static __always_inline int PageSwapCache(struct page *page) +static __always_inline bool folio_test_swapcache(struct folio *folio) { -#ifdef CONFIG_THP_SWAP - page = compound_head(page); -#endif - return PageSwapBacked(page) && test_bit(PG_swapcache, &page->flags); + return folio_test_swapbacked(folio) && + test_bit(PG_swapcache, folio_flags(folio, 0)); +} +static __always_inline bool PageSwapCache(struct page *page) +{ + return folio_test_swapcache(page_folio(page)); } + SETPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL) CLEARPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL) #else -PAGEFLAG_FALSE(SwapCache) +PAGEFLAG_FALSE(SwapCache, swapcache) #endif PAGEFLAG(Unevictable, unevictable, PF_HEAD) @@ -449,14 +499,14 @@ PAGEFLAG(Mlocked, mlocked, PF_NO_TAIL) __CLEARPAGEFLAG(Mlocked, mlocked, PF_NO_TAIL) TESTSCFLAG(Mlocked, mlocked, PF_NO_TAIL) #else -PAGEFLAG_FALSE(Mlocked) __CLEARPAGEFLAG_NOOP(Mlocked) - TESTSCFLAG_FALSE(Mlocked) +PAGEFLAG_FALSE(Mlocked, mlocked) __CLEARPAGEFLAG_NOOP(Mlocked, mlocked) + TESTSCFLAG_FALSE(Mlocked, mlocked) #endif #ifdef CONFIG_ARCH_USES_PG_UNCACHED PAGEFLAG(Uncached, uncached, PF_NO_COMPOUND) #else -PAGEFLAG_FALSE(Uncached) +PAGEFLAG_FALSE(Uncached, uncached) #endif #ifdef CONFIG_MEMORY_FAILURE @@ -465,7 +515,7 @@ TESTSCFLAG(HWPoison, hwpoison, PF_ANY) #define __PG_HWPOISON (1UL << PG_hwpoison) extern bool take_page_off_buddy(struct page *page); #else -PAGEFLAG_FALSE(HWPoison) +PAGEFLAG_FALSE(HWPoison, hwpoison) #define __PG_HWPOISON 0 #endif @@ -479,7 +529,7 @@ PAGEFLAG(Idle, idle, PF_ANY) #ifdef CONFIG_KASAN_HW_TAGS PAGEFLAG(SkipKASanPoison, skip_kasan_poison, PF_HEAD) #else -PAGEFLAG_FALSE(SkipKASanPoison) +PAGEFLAG_FALSE(SkipKASanPoison, skip_kasan_poison) #endif /* @@ -517,10 +567,14 @@ static __always_inline int PageMappingFlags(struct page *page) return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) != 0; } -static __always_inline int PageAnon(struct page *page) +static __always_inline bool folio_test_anon(struct folio *folio) +{ + return ((unsigned long)folio->mapping & PAGE_MAPPING_ANON) != 0; +} + +static __always_inline bool PageAnon(struct page *page) { - page = compound_head(page); - return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0; + return folio_test_anon(page_folio(page)); } static __always_inline int __PageMovable(struct page *page) @@ -536,30 +590,32 @@ static __always_inline int __PageMovable(struct page *page) * is found in VM_MERGEABLE vmas. It's a PageAnon page, pointing not to any * anon_vma, but to that page's node of the stable tree. */ -static __always_inline int PageKsm(struct page *page) +static __always_inline bool folio_test_ksm(struct folio *folio) { - page = compound_head(page); - return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) == + return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) == PAGE_MAPPING_KSM; } + +static __always_inline bool PageKsm(struct page *page) +{ + return folio_test_ksm(page_folio(page)); +} #else -TESTPAGEFLAG_FALSE(Ksm) +TESTPAGEFLAG_FALSE(Ksm, ksm) #endif u64 stable_page_flags(struct page *page); -static inline int PageUptodate(struct page *page) +static inline bool folio_test_uptodate(struct folio *folio) { - int ret; - page = compound_head(page); - ret = test_bit(PG_uptodate, &(page)->flags); + bool ret = test_bit(PG_uptodate, folio_flags(folio, 0)); /* - * Must ensure that the data we read out of the page is loaded - * _after_ we've loaded page->flags to check for PageUptodate. - * We can skip the barrier if the page is not uptodate, because + * Must ensure that the data we read out of the folio is loaded + * _after_ we've loaded folio->flags to check the uptodate bit. + * We can skip the barrier if the folio is not uptodate, because * we wouldn't be reading anything from it. * - * See SetPageUptodate() for the other side of the story. + * See folio_mark_uptodate() for the other side of the story. */ if (ret) smp_rmb(); @@ -567,23 +623,36 @@ static inline int PageUptodate(struct page *page) return ret; } -static __always_inline void __SetPageUptodate(struct page *page) +static inline int PageUptodate(struct page *page) +{ + return folio_test_uptodate(page_folio(page)); +} + +static __always_inline void __folio_mark_uptodate(struct folio *folio) { - VM_BUG_ON_PAGE(PageTail(page), page); smp_wmb(); - __set_bit(PG_uptodate, &page->flags); + __set_bit(PG_uptodate, folio_flags(folio, 0)); } -static __always_inline void SetPageUptodate(struct page *page) +static __always_inline void folio_mark_uptodate(struct folio *folio) { - VM_BUG_ON_PAGE(PageTail(page), page); /* * Memory barrier must be issued before setting the PG_uptodate bit, - * so that all previous stores issued in order to bring the page - * uptodate are actually visible before PageUptodate becomes true. + * so that all previous stores issued in order to bring the folio + * uptodate are actually visible before folio_test_uptodate becomes true. */ smp_wmb(); - set_bit(PG_uptodate, &page->flags); + set_bit(PG_uptodate, folio_flags(folio, 0)); +} + +static __always_inline void __SetPageUptodate(struct page *page) +{ + __folio_mark_uptodate((struct folio *)page); +} + +static __always_inline void SetPageUptodate(struct page *page) +{ + folio_mark_uptodate((struct folio *)page); } CLEARPAGEFLAG(Uptodate, uptodate, PF_NO_TAIL) @@ -608,6 +677,17 @@ static inline void set_page_writeback_keepwrite(struct page *page) __PAGEFLAG(Head, head, PF_ANY) CLEARPAGEFLAG(Head, head, PF_ANY) +/* Whether there are one or multiple pages in a folio */ +static inline bool folio_test_single(struct folio *folio) +{ + return !folio_test_head(folio); +} + +static inline bool folio_test_multi(struct folio *folio) +{ + return folio_test_head(folio); +} + static __always_inline void set_compound_head(struct page *page, struct page *head) { WRITE_ONCE(page->compound_head, (unsigned long)head + 1); @@ -631,12 +711,15 @@ static inline void ClearPageCompound(struct page *page) #ifdef CONFIG_HUGETLB_PAGE int PageHuge(struct page *page); int PageHeadHuge(struct page *page); +static inline bool folio_test_hugetlb(struct folio *folio) +{ + return PageHeadHuge(&folio->page); +} #else -TESTPAGEFLAG_FALSE(Huge) -TESTPAGEFLAG_FALSE(HeadHuge) +TESTPAGEFLAG_FALSE(Huge, hugetlb) +TESTPAGEFLAG_FALSE(HeadHuge, headhuge) #endif - #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * PageHuge() only returns true for hugetlbfs pages, but not for @@ -652,6 +735,11 @@ static inline int PageTransHuge(struct page *page) return PageHead(page); } +static inline bool folio_test_transhuge(struct folio *folio) +{ + return folio_test_head(folio); +} + /* * PageTransCompound returns true for both transparent huge pages * and hugetlbfs pages, so it should only be called when it's known @@ -688,12 +776,12 @@ static inline int PageTransTail(struct page *page) PAGEFLAG(DoubleMap, double_map, PF_SECOND) TESTSCFLAG(DoubleMap, double_map, PF_SECOND) #else -TESTPAGEFLAG_FALSE(TransHuge) -TESTPAGEFLAG_FALSE(TransCompound) -TESTPAGEFLAG_FALSE(TransCompoundMap) -TESTPAGEFLAG_FALSE(TransTail) -PAGEFLAG_FALSE(DoubleMap) - TESTSCFLAG_FALSE(DoubleMap) +TESTPAGEFLAG_FALSE(TransHuge, transhuge) +TESTPAGEFLAG_FALSE(TransCompound, transcompound) +TESTPAGEFLAG_FALSE(TransCompoundMap, transcompoundmap) +TESTPAGEFLAG_FALSE(TransTail, transtail) +PAGEFLAG_FALSE(DoubleMap, double_map) + TESTSCFLAG_FALSE(DoubleMap, double_map) #endif /* @@ -877,6 +965,11 @@ static inline int page_has_private(struct page *page) return !!(page->flags & PAGE_FLAGS_PRIVATE); } +static inline bool folio_has_private(struct folio *folio) +{ + return page_has_private(&folio->page); +} + #undef PF_ANY #undef PF_HEAD #undef PF_ONLY_HEAD -- cgit v1.2.3 From 889a3747b3b7661b089ba4eae081a3b6bb351a23 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 25 Feb 2021 09:47:41 -0500 Subject: mm/lru: Add folio LRU functions Handle arbitrary-order folios being added to the LRU. By definition, all pages being added to the LRU were already head or base pages, but call page_folio() on them anyway to get the type right and avoid the buried calls to compound_head(). Saves 783 bytes of kernel text; no functions grow. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Yu Zhao Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Kirill A. Shutemov Acked-by: Mike Rapoport Acked-by: Vlastimil Babka --- Documentation/core-api/mm-api.rst | 1 + include/linux/mm_inline.h | 103 +++++++++++++++++++++++++------------- include/trace/events/pagemap.h | 2 +- 3 files changed, 69 insertions(+), 37 deletions(-) diff --git a/Documentation/core-api/mm-api.rst b/Documentation/core-api/mm-api.rst index 5c459ee2acce..971149f5d241 100644 --- a/Documentation/core-api/mm-api.rst +++ b/Documentation/core-api/mm-api.rst @@ -95,6 +95,7 @@ More Memory Management Functions .. kernel-doc:: mm/mempolicy.c .. kernel-doc:: include/linux/mm_types.h :internal: +.. kernel-doc:: include/linux/mm_inline.h .. kernel-doc:: include/linux/page-flags.h .. kernel-doc:: include/linux/mm.h :internal: diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 355ea1ee32bd..e2ec68b0515c 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -6,27 +6,33 @@ #include /** - * page_is_file_lru - should the page be on a file LRU or anon LRU? - * @page: the page to test - * - * Returns 1 if @page is a regular filesystem backed page cache page or a lazily - * freed anonymous page (e.g. via MADV_FREE). Returns 0 if @page is a normal - * anonymous page, a tmpfs page or otherwise ram or swap backed page. Used by - * functions that manipulate the LRU lists, to sort a page onto the right LRU - * list. + * folio_is_file_lru - Should the folio be on a file LRU or anon LRU? + * @folio: The folio to test. * * We would like to get this info without a page flag, but the state - * needs to survive until the page is last deleted from the LRU, which + * needs to survive until the folio is last deleted from the LRU, which * could be as far down as __page_cache_release. + * + * Return: An integer (not a boolean!) used to sort a folio onto the + * right LRU list and to account folios correctly. + * 1 if @folio is a regular filesystem backed page cache folio + * or a lazily freed anonymous folio (e.g. via MADV_FREE). + * 0 if @folio is a normal anonymous folio, a tmpfs folio or otherwise + * ram or swap backed folio. */ +static inline int folio_is_file_lru(struct folio *folio) +{ + return !folio_test_swapbacked(folio); +} + static inline int page_is_file_lru(struct page *page) { - return !PageSwapBacked(page); + return folio_is_file_lru(page_folio(page)); } static __always_inline void update_lru_size(struct lruvec *lruvec, enum lru_list lru, enum zone_type zid, - int nr_pages) + long nr_pages) { struct pglist_data *pgdat = lruvec_pgdat(lruvec); @@ -39,69 +45,94 @@ static __always_inline void update_lru_size(struct lruvec *lruvec, } /** - * __clear_page_lru_flags - clear page lru flags before releasing a page - * @page: the page that was on lru and now has a zero reference + * __folio_clear_lru_flags - Clear page lru flags before releasing a page. + * @folio: The folio that was on lru and now has a zero reference. */ -static __always_inline void __clear_page_lru_flags(struct page *page) +static __always_inline void __folio_clear_lru_flags(struct folio *folio) { - VM_BUG_ON_PAGE(!PageLRU(page), page); + VM_BUG_ON_FOLIO(!folio_test_lru(folio), folio); - __ClearPageLRU(page); + __folio_clear_lru(folio); /* this shouldn't happen, so leave the flags to bad_page() */ - if (PageActive(page) && PageUnevictable(page)) + if (folio_test_active(folio) && folio_test_unevictable(folio)) return; - __ClearPageActive(page); - __ClearPageUnevictable(page); + __folio_clear_active(folio); + __folio_clear_unevictable(folio); +} + +static __always_inline void __clear_page_lru_flags(struct page *page) +{ + __folio_clear_lru_flags(page_folio(page)); } /** - * page_lru - which LRU list should a page be on? - * @page: the page to test + * folio_lru_list - Which LRU list should a folio be on? + * @folio: The folio to test. * - * Returns the LRU list a page should be on, as an index + * Return: The LRU list a folio should be on, as an index * into the array of LRU lists. */ -static __always_inline enum lru_list page_lru(struct page *page) +static __always_inline enum lru_list folio_lru_list(struct folio *folio) { enum lru_list lru; - VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page); + VM_BUG_ON_FOLIO(folio_test_active(folio) && folio_test_unevictable(folio), folio); - if (PageUnevictable(page)) + if (folio_test_unevictable(folio)) return LRU_UNEVICTABLE; - lru = page_is_file_lru(page) ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON; - if (PageActive(page)) + lru = folio_is_file_lru(folio) ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON; + if (folio_test_active(folio)) lru += LRU_ACTIVE; return lru; } +static __always_inline +void lruvec_add_folio(struct lruvec *lruvec, struct folio *folio) +{ + enum lru_list lru = folio_lru_list(folio); + + update_lru_size(lruvec, lru, folio_zonenum(folio), + folio_nr_pages(folio)); + list_add(&folio->lru, &lruvec->lists[lru]); +} + static __always_inline void add_page_to_lru_list(struct page *page, struct lruvec *lruvec) { - enum lru_list lru = page_lru(page); + lruvec_add_folio(lruvec, page_folio(page)); +} - update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page)); - list_add(&page->lru, &lruvec->lists[lru]); +static __always_inline +void lruvec_add_folio_tail(struct lruvec *lruvec, struct folio *folio) +{ + enum lru_list lru = folio_lru_list(folio); + + update_lru_size(lruvec, lru, folio_zonenum(folio), + folio_nr_pages(folio)); + list_add_tail(&folio->lru, &lruvec->lists[lru]); } static __always_inline void add_page_to_lru_list_tail(struct page *page, struct lruvec *lruvec) { - enum lru_list lru = page_lru(page); + lruvec_add_folio_tail(lruvec, page_folio(page)); +} - update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page)); - list_add_tail(&page->lru, &lruvec->lists[lru]); +static __always_inline +void lruvec_del_folio(struct lruvec *lruvec, struct folio *folio) +{ + list_del(&folio->lru); + update_lru_size(lruvec, folio_lru_list(folio), folio_zonenum(folio), + -folio_nr_pages(folio)); } static __always_inline void del_page_from_lru_list(struct page *page, struct lruvec *lruvec) { - list_del(&page->lru); - update_lru_size(lruvec, page_lru(page), page_zonenum(page), - -thp_nr_pages(page)); + lruvec_del_folio(lruvec, page_folio(page)); } #endif diff --git a/include/trace/events/pagemap.h b/include/trace/events/pagemap.h index 1d28431e85bd..92ad176210ff 100644 --- a/include/trace/events/pagemap.h +++ b/include/trace/events/pagemap.h @@ -41,7 +41,7 @@ TRACE_EVENT(mm_lru_insertion, TP_fast_assign( __entry->page = page; __entry->pfn = page_to_pfn(page); - __entry->lru = page_lru(page); + __entry->lru = folio_lru_list(page_folio(page)); __entry->flags = trace_pagemap_flags(page); ), -- cgit v1.2.3 From 85d0a2ed3747da7f9aedacb72478bbedf06f9f2e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 11 Jan 2021 10:04:40 -0500 Subject: mm: Handle per-folio private data Add folio_get_private() which mirrors page_private() -- ie folio private data is the same as page private data. The only difference is that these return a void * instead of an unsigned long, which matches the majority of users. Turn attach_page_private() into folio_attach_private() and reimplement attach_page_private() as a wrapper. No filesystem which uses page private data currently supports compound pages, so we're free to define the rules. attach_page_private() may only be called on a head page; if you want to add private data to a tail page, you can call set_page_private() directly (and shouldn't increment the page refcount! That should be done when adding private data to the head page / folio). This saves 813 bytes of text with the distro-derived config that I'm testing due to removing the calls to compound_head() in get_page() & put_page(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells Acked-by: Mike Rapoport --- include/linux/mm_types.h | 11 +++++++++++ include/linux/pagemap.h | 48 +++++++++++++++++++++++++++++------------------- 2 files changed, 40 insertions(+), 19 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 6908186629b4..5ebcb86ac934 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -317,6 +317,12 @@ static inline atomic_t *compound_pincount_ptr(struct page *page) #define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) #define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE) +/* + * page_private can be used on tail pages. However, PagePrivate is only + * checked by the VM on the head page. So page_private on the tail pages + * should be used for data that's ancillary to the head page (eg attaching + * buffer heads to tail pages after attaching buffer heads to the head page) + */ #define page_private(page) ((page)->private) static inline void set_page_private(struct page *page, unsigned long private) @@ -324,6 +330,11 @@ static inline void set_page_private(struct page *page, unsigned long private) page->private = private; } +static inline void *folio_get_private(struct folio *folio) +{ + return folio->private; +} + struct page_frag_cache { void * va; #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index f3ec26551082..b0f5bf1cb540 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -184,42 +184,52 @@ static inline bool page_cache_get_speculative(struct page *page) } /** - * attach_page_private - Attach private data to a page. - * @page: Page to attach data to. - * @data: Data to attach to page. + * folio_attach_private - Attach private data to a folio. + * @folio: Folio to attach data to. + * @data: Data to attach to folio. * - * Attaching private data to a page increments the page's reference count. - * The data must be detached before the page will be freed. + * Attaching private data to a folio increments the page's reference count. + * The data must be detached before the folio will be freed. */ -static inline void attach_page_private(struct page *page, void *data) +static inline void folio_attach_private(struct folio *folio, void *data) { - get_page(page); - set_page_private(page, (unsigned long)data); - SetPagePrivate(page); + folio_get(folio); + folio->private = data; + folio_set_private(folio); } /** - * detach_page_private - Detach private data from a page. - * @page: Page to detach data from. + * folio_detach_private - Detach private data from a folio. + * @folio: Folio to detach data from. * - * Removes the data that was previously attached to the page and decrements + * Removes the data that was previously attached to the folio and decrements * the refcount on the page. * - * Return: Data that was attached to the page. + * Return: Data that was attached to the folio. */ -static inline void *detach_page_private(struct page *page) +static inline void *folio_detach_private(struct folio *folio) { - void *data = (void *)page_private(page); + void *data = folio_get_private(folio); - if (!PagePrivate(page)) + if (!folio_test_private(folio)) return NULL; - ClearPagePrivate(page); - set_page_private(page, 0); - put_page(page); + folio_clear_private(folio); + folio->private = NULL; + folio_put(folio); return data; } +static inline void attach_page_private(struct page *page, void *data) +{ + folio_attach_private(page_folio(page), data); +} + +static inline void *detach_page_private(struct page *page) +{ + return folio_detach_private(page_folio(page)); +} + #ifdef CONFIG_NUMA extern struct page *__page_cache_alloc(gfp_t gfp); #else -- cgit v1.2.3 From 9257e15677384d1ccdfe0619c4455e34cb0342c7 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 15 Jan 2021 23:39:21 -0500 Subject: mm/filemap: Add folio_index(), folio_file_page() and folio_contains() folio_index() is the equivalent of page_index() for folios. folio_file_page() is the equivalent of find_subpage(). folio_contains() is the equivalent of thp_contains(). No changes to generated code. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells Acked-by: Mike Rapoport --- include/linux/pagemap.h | 56 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index b0f5bf1cb540..e7b46223239a 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -386,6 +386,62 @@ static inline bool thp_contains(struct page *head, pgoff_t index) return page_index(head) == (index & ~(thp_nr_pages(head) - 1UL)); } +#define swapcache_index(folio) __page_file_index(&(folio)->page) + +/** + * folio_index - File index of a folio. + * @folio: The folio. + * + * For a folio which is either in the page cache or the swap cache, + * return its index within the address_space it belongs to. If you know + * the page is definitely in the page cache, you can look at the folio's + * index directly. + * + * Return: The index (offset in units of pages) of a folio in its file. + */ +static inline pgoff_t folio_index(struct folio *folio) +{ + if (unlikely(folio_test_swapcache(folio))) + return swapcache_index(folio); + return folio->index; +} + +/** + * folio_file_page - The page for a particular index. + * @folio: The folio which contains this index. + * @index: The index we want to look up. + * + * Sometimes after looking up a folio in the page cache, we need to + * obtain the specific page for an index (eg a page fault). + * + * Return: The page containing the file data for this index. + */ +static inline struct page *folio_file_page(struct folio *folio, pgoff_t index) +{ + /* HugeTLBfs indexes the page cache in units of hpage_size */ + if (folio_test_hugetlb(folio)) + return &folio->page; + return folio_page(folio, index & (folio_nr_pages(folio) - 1)); +} + +/** + * folio_contains - Does this folio contain this index? + * @folio: The folio. + * @index: The page index within the file. + * + * Context: The caller should have the page locked in order to prevent + * (eg) shmem from moving the page between the page cache and swap cache + * and changing its index in the middle of the operation. + * Return: true or false. + */ +static inline bool folio_contains(struct folio *folio, pgoff_t index) +{ + /* HugeTLBfs indexes the page cache in units of hpage_size */ + if (folio_test_hugetlb(folio)) + return folio->index == index; + return index - folio_index(folio) < folio_nr_pages(folio); +} + /* * Given the page we found in the page cache, return the page corresponding * to this index in the file -- cgit v1.2.3 From f94b18f6653ab6d3aa503213c2e1e79b18f05a30 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 21 Mar 2021 16:24:31 -0400 Subject: mm/filemap: Add folio_next_index() This helper returns the page index of the next folio in the file (ie the end of this folio, plus one). No changes to generated code. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells Acked-by: Mike Rapoport --- include/linux/pagemap.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index e7b46223239a..95e83a9e73c7 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -406,6 +406,17 @@ static inline pgoff_t folio_index(struct folio *folio) return folio->index; } +/** + * folio_next_index - Get the index of the next folio. + * @folio: The current folio. + * + * Return: The index of the folio which follows this folio in the file. + */ +static inline pgoff_t folio_next_index(struct folio *folio) +{ + return folio->index + folio_nr_pages(folio); +} + /** * folio_file_page - The page for a particular index. * @folio: The folio which contains this index. -- cgit v1.2.3 From 352b47a6984470954e1e262126b1da5312c40b09 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 24 Dec 2020 07:25:19 -0500 Subject: mm/filemap: Add folio_pos() and folio_file_pos() These are just wrappers around page_offset() and page_file_offset() respectively. No change to generated code. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells --- include/linux/pagemap.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 95e83a9e73c7..3354117a1dae 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -561,6 +561,27 @@ static inline loff_t page_file_offset(struct page *page) return ((loff_t)page_index(page)) << PAGE_SHIFT; } +/** + * folio_pos - Returns the byte position of this folio in its file. + * @folio: The folio. + */ +static inline loff_t folio_pos(struct folio *folio) +{ + return page_offset(&folio->page); +} + +/** + * folio_file_pos - Returns the byte position of this folio in its file. + * @folio: The folio. + * + * This differs from folio_pos() for folios which belong to a swap file. + * NFS is the only filesystem today which needs to use folio_file_pos(). + */ +static inline loff_t folio_file_pos(struct folio *folio) +{ + return page_file_offset(&folio->page); +} + extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma, unsigned long address); -- cgit v1.2.3 From 2f52578f9c64d7d9a96ab81c243cc20804fabf2b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 10 Dec 2020 10:55:05 -0500 Subject: mm/util: Add folio_mapping() and folio_file_mapping() These are the folio equivalent of page_mapping() and page_file_mapping(). Add an out-of-line page_mapping() wrapper around folio_mapping() in order to prevent the page_folio() call from bloating every caller of page_mapping(). Adjust page_file_mapping() and page_mapping_file() to use folios internally. Rename __page_file_mapping() to swapcache_mapping() and change it to take a folio. This ends up saving 122 bytes of text overall. folio_mapping() is 45 bytes shorter than page_mapping() was, but the new page_mapping() wrapper is 30 bytes. The major reduction is a few bytes less in dozens of nfs functions (which call page_file_mapping()). Most of these appear to be a slight change in gcc's register allocation decisions, which allow: 48 8b 56 08 mov 0x8(%rsi),%rdx 48 8d 42 ff lea -0x1(%rdx),%rax 83 e2 01 and $0x1,%edx 48 0f 44 c6 cmove %rsi,%rax to become: 48 8b 46 08 mov 0x8(%rsi),%rax 48 8d 78 ff lea -0x1(%rax),%rdi a8 01 test $0x1,%al 48 0f 44 fe cmove %rsi,%rdi for a reduction of a single byte. Once the NFS client is converted to use folios, this entire sequence will disappear. Also add folio_mapping() documentation. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells --- Documentation/core-api/mm-api.rst | 2 ++ include/linux/mm.h | 14 -------------- include/linux/pagemap.h | 35 +++++++++++++++++++++++++++++++++-- include/linux/swap.h | 6 ++++++ mm/Makefile | 2 +- mm/folio-compat.c | 13 +++++++++++++ mm/swapfile.c | 8 ++++---- mm/util.c | 30 ++++++++++++++++++------------ 8 files changed, 77 insertions(+), 33 deletions(-) create mode 100644 mm/folio-compat.c diff --git a/Documentation/core-api/mm-api.rst b/Documentation/core-api/mm-api.rst index 971149f5d241..395835f9289f 100644 --- a/Documentation/core-api/mm-api.rst +++ b/Documentation/core-api/mm-api.rst @@ -101,3 +101,5 @@ More Memory Management Functions :internal: .. kernel-doc:: include/linux/page_ref.h .. kernel-doc:: include/linux/mmzone.h +.. kernel-doc:: mm/util.c + :functions: folio_mapping diff --git a/include/linux/mm.h b/include/linux/mm.h index 63685d953ce0..bc5c38e1f780 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1755,19 +1755,6 @@ void page_address_init(void); extern void *page_rmapping(struct page *page); extern struct anon_vma *page_anon_vma(struct page *page); -extern struct address_space *page_mapping(struct page *page); - -extern struct address_space *__page_file_mapping(struct page *); - -static inline -struct address_space *page_file_mapping(struct page *page) -{ - if (unlikely(PageSwapCache(page))) - return __page_file_mapping(page); - - return page->mapping; -} - extern pgoff_t __page_file_index(struct page *page); /* @@ -1782,7 +1769,6 @@ static inline pgoff_t page_index(struct page *page) } bool page_mapped(struct page *page); -struct address_space *page_mapping(struct page *page); /* * Return true only if the page has been allocated with diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 3354117a1dae..0ca96a40fabe 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -162,14 +162,45 @@ static inline void filemap_nr_thps_dec(struct address_space *mapping) void release_pages(struct page **pages, int nr); +struct address_space *page_mapping(struct page *); +struct address_space *folio_mapping(struct folio *); +struct address_space *swapcache_mapping(struct folio *); + +/** + * folio_file_mapping - Find the mapping this folio belongs to. + * @folio: The folio. + * + * For folios which are in the page cache, return the mapping that this + * page belongs to. Folios in the swap cache return the mapping of the + * swap file or swap device where the data is stored. This is different + * from the mapping returned by folio_mapping(). The only reason to + * use it is if, like NFS, you return 0 from ->activate_swapfile. + * + * Do not call this for folios which aren't in the page cache or swap cache. + */ +static inline struct address_space *folio_file_mapping(struct folio *folio) +{ + if (unlikely(folio_test_swapcache(folio))) + return swapcache_mapping(folio); + + return folio->mapping; +} + +static inline struct address_space *page_file_mapping(struct page *page) +{ + return folio_file_mapping(page_folio(page)); +} + /* * For file cache pages, return the address_space, otherwise return NULL */ static inline struct address_space *page_mapping_file(struct page *page) { - if (unlikely(PageSwapCache(page))) + struct folio *folio = page_folio(page); + + if (unlikely(folio_test_swapcache(folio))) return NULL; - return page_mapping(page); + return folio_mapping(folio); } static inline bool page_cache_add_speculative(struct page *page, int count) diff --git a/include/linux/swap.h b/include/linux/swap.h index ba52f3a3478e..85607c6c0cba 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -320,6 +320,12 @@ struct vma_swap_readahead { #endif }; +static inline swp_entry_t folio_swap_entry(struct folio *folio) +{ + swp_entry_t entry = { .val = page_private(&folio->page) }; + return entry; +} + /* linux/mm/workingset.c */ void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages); void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg); diff --git a/mm/Makefile b/mm/Makefile index fc60a40ce954..d6c0042e3aa0 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -46,7 +46,7 @@ mmu-$(CONFIG_MMU) += process_vm_access.o endif obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ - maccess.o page-writeback.o \ + maccess.o page-writeback.o folio-compat.o \ readahead.o swap.o truncate.o vmscan.o shmem.o \ util.o mmzone.o vmstat.o backing-dev.o \ mm_init.o percpu.o slab_common.o \ diff --git a/mm/folio-compat.c b/mm/folio-compat.c new file mode 100644 index 000000000000..5e107aa30a62 --- /dev/null +++ b/mm/folio-compat.c @@ -0,0 +1,13 @@ +/* + * Compatibility functions which bloat the callers too much to make inline. + * All of the callers of these functions should be converted to use folios + * eventually. + */ + +#include + +struct address_space *page_mapping(struct page *page) +{ + return folio_mapping(page_folio(page)); +} +EXPORT_SYMBOL(page_mapping); diff --git a/mm/swapfile.c b/mm/swapfile.c index 22d10f713848..e3dcaeecc50f 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3534,13 +3534,13 @@ struct swap_info_struct *page_swap_info(struct page *page) } /* - * out-of-line __page_file_ methods to avoid include hell. + * out-of-line methods to avoid include hell. */ -struct address_space *__page_file_mapping(struct page *page) +struct address_space *swapcache_mapping(struct folio *folio) { - return page_swap_info(page)->swap_file->f_mapping; + return page_swap_info(&folio->page)->swap_file->f_mapping; } -EXPORT_SYMBOL_GPL(__page_file_mapping); +EXPORT_SYMBOL_GPL(swapcache_mapping); pgoff_t __page_file_index(struct page *page) { diff --git a/mm/util.c b/mm/util.c index bacabe446906..6c1fe9bee30a 100644 --- a/mm/util.c +++ b/mm/util.c @@ -705,30 +705,36 @@ struct anon_vma *page_anon_vma(struct page *page) return __page_rmapping(page); } -struct address_space *page_mapping(struct page *page) +/** + * folio_mapping - Find the mapping where this folio is stored. + * @folio: The folio. + * + * For folios which are in the page cache, return the mapping that this + * page belongs to. Folios in the swap cache return the swap mapping + * this page is stored in (which is different from the mapping for the + * swap file or swap device where the data is stored). + * + * You can call this for folios which aren't in the swap cache or page + * cache and it will return NULL. + */ +struct address_space *folio_mapping(struct folio *folio) { struct address_space *mapping; - page = compound_head(page); - /* This happens if someone calls flush_dcache_page on slab page */ - if (unlikely(PageSlab(page))) + if (unlikely(folio_test_slab(folio))) return NULL; - if (unlikely(PageSwapCache(page))) { - swp_entry_t entry; - - entry.val = page_private(page); - return swap_address_space(entry); - } + if (unlikely(folio_test_swapcache(folio))) + return swap_address_space(folio_swap_entry(folio)); - mapping = page->mapping; + mapping = folio->mapping; if ((unsigned long)mapping & PAGE_MAPPING_ANON) return NULL; return (void *)((unsigned long)mapping & ~PAGE_MAPPING_FLAGS); } -EXPORT_SYMBOL(page_mapping); +EXPORT_SYMBOL(folio_mapping); /* Slow path of page_mapcount() for compound pages */ int __page_mapcount(struct page *page) -- cgit v1.2.3 From 4e1364286d0a2dd384bceb6db6185b99c0e2c0bc Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 7 Dec 2020 15:44:35 -0500 Subject: mm/filemap: Add folio_unlock() Convert unlock_page() to call folio_unlock(). By using a folio we avoid a call to compound_head(). This shortens the function from 39 bytes to 25 and removes 4 instructions on x86-64. Because we still have unlock_page(), it's a net increase of 16 bytes of text for the kernel as a whole, but any path that uses folio_unlock() will execute 4 fewer instructions. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Reviewed-by: William Kucharski Reviewed-by: David Howells Acked-by: Mike Rapoport Acked-by: Vlastimil Babka --- include/linux/pagemap.h | 3 ++- mm/filemap.c | 29 ++++++++++++----------------- mm/folio-compat.c | 6 ++++++ 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 0ca96a40fabe..8087921641a3 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -657,7 +657,8 @@ extern int __lock_page_killable(struct page *page); extern int __lock_page_async(struct page *page, struct wait_page_queue *wait); extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm, unsigned int flags); -extern void unlock_page(struct page *page); +void unlock_page(struct page *page); +void folio_unlock(struct folio *folio); /* * Return true if the page was successfully locked diff --git a/mm/filemap.c b/mm/filemap.c index 9fcc3d94cfcd..191d7d39e838 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1490,29 +1490,24 @@ static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem #endif /** - * unlock_page - unlock a locked page - * @page: the page + * folio_unlock - Unlock a locked folio. + * @folio: The folio. * - * Unlocks the page and wakes up sleepers in wait_on_page_locked(). - * Also wakes sleepers in wait_on_page_writeback() because the wakeup - * mechanism between PageLocked pages and PageWriteback pages is shared. - * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep. + * Unlocks the folio and wakes up any thread sleeping on the page lock. * - * Note that this depends on PG_waiters being the sign bit in the byte - * that contains PG_locked - thus the BUILD_BUG_ON(). That allows us to - * clear the PG_locked bit and test PG_waiters at the same time fairly - * portably (architectures that do LL/SC can test any bit, while x86 can - * test the sign bit). + * Context: May be called from interrupt or process context. May not be + * called from NMI context. */ -void unlock_page(struct page *page) +void folio_unlock(struct folio *folio) { + /* Bit 7 allows x86 to check the byte's sign bit */ BUILD_BUG_ON(PG_waiters != 7); - page = compound_head(page); - VM_BUG_ON_PAGE(!PageLocked(page), page); - if (clear_bit_unlock_is_negative_byte(PG_locked, &page->flags)) - wake_up_page_bit(page, PG_locked); + BUILD_BUG_ON(PG_locked > 7); + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); + if (clear_bit_unlock_is_negative_byte(PG_locked, folio_flags(folio, 0))) + wake_up_page_bit(&folio->page, PG_locked); } -EXPORT_SYMBOL(unlock_page); +EXPORT_SYMBOL(folio_unlock); /** * end_page_private_2 - Clear PG_private_2 and release any waiters diff --git a/mm/folio-compat.c b/mm/folio-compat.c index 5e107aa30a62..91b3d00a92f7 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -11,3 +11,9 @@ struct address_space *page_mapping(struct page *page) return folio_mapping(page_folio(page)); } EXPORT_SYMBOL(page_mapping); + +void unlock_page(struct page *page) +{ + return folio_unlock(page_folio(page)); +} +EXPORT_SYMBOL(unlock_page); -- cgit v1.2.3 From 7c23c782d5d57df97509ed2fc17f9b9490f18f1b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 1 Mar 2021 19:38:25 -0500 Subject: mm/filemap: Add folio_lock() This is like lock_page() but for use by callers who know they have a folio. Convert __lock_page() to be __folio_lock(). This saves one call to compound_head() per contended call to lock_page(). Saves 455 bytes of text; mostly from improved register allocation and inlining decisions. __folio_lock is 59 bytes while __lock_page was 79. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells Acked-by: Mike Rapoport --- include/linux/pagemap.h | 24 +++++++++++++++++++----- mm/filemap.c | 29 +++++++++++++++-------------- 2 files changed, 34 insertions(+), 19 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 8087921641a3..6481a431ea40 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -652,7 +652,7 @@ static inline bool wake_page_match(struct wait_page_queue *wait_page, return true; } -extern void __lock_page(struct page *page); +void __folio_lock(struct folio *folio); extern int __lock_page_killable(struct page *page); extern int __lock_page_async(struct page *page, struct wait_page_queue *wait); extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm, @@ -660,13 +660,24 @@ extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm, void unlock_page(struct page *page); void folio_unlock(struct folio *folio); +static inline bool folio_trylock(struct folio *folio) +{ + return likely(!test_and_set_bit_lock(PG_locked, folio_flags(folio, 0))); +} + /* * Return true if the page was successfully locked */ static inline int trylock_page(struct page *page) { - page = compound_head(page); - return (likely(!test_and_set_bit_lock(PG_locked, &page->flags))); + return folio_trylock(page_folio(page)); +} + +static inline void folio_lock(struct folio *folio) +{ + might_sleep(); + if (!folio_trylock(folio)) + __folio_lock(folio); } /* @@ -674,9 +685,12 @@ static inline int trylock_page(struct page *page) */ static inline void lock_page(struct page *page) { + struct folio *folio; might_sleep(); - if (!trylock_page(page)) - __lock_page(page); + + folio = page_folio(page); + if (!folio_trylock(folio)) + __folio_lock(folio); } /* diff --git a/mm/filemap.c b/mm/filemap.c index 191d7d39e838..a1d3f67e1b49 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1242,7 +1242,7 @@ static void wake_up_page(struct page *page, int bit) */ enum behavior { EXCLUSIVE, /* Hold ref to page and take the bit when woken, like - * __lock_page() waiting on then setting PG_locked. + * __folio_lock() waiting on then setting PG_locked. */ SHARED, /* Hold ref to page and check the bit when woken, like * wait_on_page_writeback() waiting on PG_writeback. @@ -1633,17 +1633,16 @@ void page_endio(struct page *page, bool is_write, int err) EXPORT_SYMBOL_GPL(page_endio); /** - * __lock_page - get a lock on the page, assuming we need to sleep to get it - * @__page: the page to lock + * __folio_lock - Get a lock on the folio, assuming we need to sleep to get it. + * @folio: The folio to lock */ -void __lock_page(struct page *__page) +void __folio_lock(struct folio *folio) { - struct page *page = compound_head(__page); - wait_queue_head_t *q = page_waitqueue(page); - wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE, + wait_queue_head_t *q = page_waitqueue(&folio->page); + wait_on_page_bit_common(q, &folio->page, PG_locked, TASK_UNINTERRUPTIBLE, EXCLUSIVE); } -EXPORT_SYMBOL(__lock_page); +EXPORT_SYMBOL(__folio_lock); int __lock_page_killable(struct page *__page) { @@ -1718,10 +1717,10 @@ int __lock_page_or_retry(struct page *page, struct mm_struct *mm, return 0; } } else { - __lock_page(page); + __folio_lock(page_folio(page)); } - return 1; + return 1; } /** @@ -2915,7 +2914,9 @@ unlock: static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page, struct file **fpin) { - if (trylock_page(page)) + struct folio *folio = page_folio(page); + + if (folio_trylock(folio)) return 1; /* @@ -2928,7 +2929,7 @@ static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page, *fpin = maybe_unlock_mmap_for_io(vmf, *fpin); if (vmf->flags & FAULT_FLAG_KILLABLE) { - if (__lock_page_killable(page)) { + if (__lock_page_killable(&folio->page)) { /* * We didn't have the right flags to drop the mmap_lock, * but all fault_handlers only check for fatal signals @@ -2940,11 +2941,11 @@ static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page, return 0; } } else - __lock_page(page); + __folio_lock(folio); + return 1; } - /* * Synchronous readahead happens when we don't even find a page in the page * cache at all. We don't want to perform IO under the mmap sem, so if we have -- cgit v1.2.3 From af7f29d9e1a7bda1429923327421367b69aa2e70 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Dec 2020 00:07:31 -0500 Subject: mm/filemap: Add folio_lock_killable() This is like lock_page_killable() but for use by callers who know they have a folio. Convert __lock_page_killable() to be __folio_lock_killable(). This saves one call to compound_head() per contended call to lock_page_killable(). __folio_lock_killable() is 19 bytes smaller than __lock_page_killable() was. filemap_fault() shrinks by 74 bytes and __lock_page_or_retry() shrinks by 71 bytes. That's a total of 164 bytes of text saved. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Acked-by: Mike Rapoport Reviewed-by: David Howells --- include/linux/pagemap.h | 15 ++++++++++----- mm/filemap.c | 17 +++++++++-------- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 6481a431ea40..cf0ebd8c9e86 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -653,7 +653,7 @@ static inline bool wake_page_match(struct wait_page_queue *wait_page, } void __folio_lock(struct folio *folio); -extern int __lock_page_killable(struct page *page); +int __folio_lock_killable(struct folio *folio); extern int __lock_page_async(struct page *page, struct wait_page_queue *wait); extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm, unsigned int flags); @@ -693,6 +693,14 @@ static inline void lock_page(struct page *page) __folio_lock(folio); } +static inline int folio_lock_killable(struct folio *folio) +{ + might_sleep(); + if (!folio_trylock(folio)) + return __folio_lock_killable(folio); + return 0; +} + /* * lock_page_killable is like lock_page but can be interrupted by fatal * signals. It returns 0 if it locked the page and -EINTR if it was @@ -700,10 +708,7 @@ static inline void lock_page(struct page *page) */ static inline int lock_page_killable(struct page *page) { - might_sleep(); - if (!trylock_page(page)) - return __lock_page_killable(page); - return 0; + return folio_lock_killable(page_folio(page)); } /* diff --git a/mm/filemap.c b/mm/filemap.c index a1d3f67e1b49..17803f785a34 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1644,14 +1644,13 @@ void __folio_lock(struct folio *folio) } EXPORT_SYMBOL(__folio_lock); -int __lock_page_killable(struct page *__page) +int __folio_lock_killable(struct folio *folio) { - struct page *page = compound_head(__page); - wait_queue_head_t *q = page_waitqueue(page); - return wait_on_page_bit_common(q, page, PG_locked, TASK_KILLABLE, + wait_queue_head_t *q = page_waitqueue(&folio->page); + return wait_on_page_bit_common(q, &folio->page, PG_locked, TASK_KILLABLE, EXCLUSIVE); } -EXPORT_SYMBOL_GPL(__lock_page_killable); +EXPORT_SYMBOL_GPL(__folio_lock_killable); int __lock_page_async(struct page *page, struct wait_page_queue *wait) { @@ -1693,6 +1692,8 @@ int __lock_page_async(struct page *page, struct wait_page_queue *wait) int __lock_page_or_retry(struct page *page, struct mm_struct *mm, unsigned int flags) { + struct folio *folio = page_folio(page); + if (fault_flag_allow_retry_first(flags)) { /* * CAUTION! In this case, mmap_lock is not released @@ -1711,13 +1712,13 @@ int __lock_page_or_retry(struct page *page, struct mm_struct *mm, if (flags & FAULT_FLAG_KILLABLE) { int ret; - ret = __lock_page_killable(page); + ret = __folio_lock_killable(folio); if (ret) { mmap_read_unlock(mm); return 0; } } else { - __folio_lock(page_folio(page)); + __folio_lock(folio); } return 1; @@ -2929,7 +2930,7 @@ static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page, *fpin = maybe_unlock_mmap_for_io(vmf, *fpin); if (vmf->flags & FAULT_FLAG_KILLABLE) { - if (__lock_page_killable(&folio->page)) { + if (__folio_lock_killable(folio)) { /* * We didn't have the right flags to drop the mmap_lock, * but all fault_handlers only check for fatal signals -- cgit v1.2.3 From ffdc8dabf20b1b894eda63e7ec9ca15ab0b7292c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 30 Dec 2020 17:58:40 -0500 Subject: mm/filemap: Add __folio_lock_async() There aren't any actual callers of lock_page_async(), so remove it. Convert filemap_update_page() to call __folio_lock_async(). __folio_lock_async() is 21 bytes smaller than __lock_page_async(), but the real savings come from using a folio in filemap_update_page(), shrinking it from 515 bytes to 404 bytes, saving 110 bytes. The text shrinks by 132 bytes in total. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells Acked-by: Mike Rapoport --- fs/io_uring.c | 2 +- include/linux/pagemap.h | 17 ----------------- mm/filemap.c | 27 ++++++++++++++------------- 3 files changed, 15 insertions(+), 31 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 82f867983bb3..2c913698e428 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3364,7 +3364,7 @@ static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) } /* - * This is our waitqueue callback handler, registered through lock_page_async() + * This is our waitqueue callback handler, registered through __folio_lock_async() * when we initially tried to do the IO with the iocb armed our waitqueue. * This gets called when the page is unlocked, and we generally expect that to * happen when the page IO is completed and the page is now uptodate. This will diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index cf0ebd8c9e86..5b5e8bd0b3fb 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -654,7 +654,6 @@ static inline bool wake_page_match(struct wait_page_queue *wait_page, void __folio_lock(struct folio *folio); int __folio_lock_killable(struct folio *folio); -extern int __lock_page_async(struct page *page, struct wait_page_queue *wait); extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm, unsigned int flags); void unlock_page(struct page *page); @@ -711,22 +710,6 @@ static inline int lock_page_killable(struct page *page) return folio_lock_killable(page_folio(page)); } -/* - * lock_page_async - Lock the page, unless this would block. If the page - * is already locked, then queue a callback when the page becomes unlocked. - * This callback can then retry the operation. - * - * Returns 0 if the page is locked successfully, or -EIOCBQUEUED if the page - * was already locked and the callback defined in 'wait' was queued. - */ -static inline int lock_page_async(struct page *page, - struct wait_page_queue *wait) -{ - if (!trylock_page(page)) - return __lock_page_async(page, wait); - return 0; -} - /* * lock_page_or_retry - Lock the page, unless this would block and the * caller indicated that it can handle a retry. diff --git a/mm/filemap.c b/mm/filemap.c index 17803f785a34..655145d27fff 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1652,18 +1652,18 @@ int __folio_lock_killable(struct folio *folio) } EXPORT_SYMBOL_GPL(__folio_lock_killable); -int __lock_page_async(struct page *page, struct wait_page_queue *wait) +static int __folio_lock_async(struct folio *folio, struct wait_page_queue *wait) { - struct wait_queue_head *q = page_waitqueue(page); + struct wait_queue_head *q = page_waitqueue(&folio->page); int ret = 0; - wait->page = page; + wait->page = &folio->page; wait->bit_nr = PG_locked; spin_lock_irq(&q->lock); __add_wait_queue_entry_tail(q, &wait->wait); - SetPageWaiters(page); - ret = !trylock_page(page); + folio_set_waiters(folio); + ret = !folio_trylock(folio); /* * If we were successful now, we know we're still on the * waitqueue as we're still under the lock. This means it's @@ -2436,6 +2436,7 @@ static int filemap_update_page(struct kiocb *iocb, struct address_space *mapping, struct iov_iter *iter, struct page *page) { + struct folio *folio = page_folio(page); int error; if (iocb->ki_flags & IOCB_NOWAIT) { @@ -2445,40 +2446,40 @@ static int filemap_update_page(struct kiocb *iocb, filemap_invalidate_lock_shared(mapping); } - if (!trylock_page(page)) { + if (!folio_trylock(folio)) { error = -EAGAIN; if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_NOIO)) goto unlock_mapping; if (!(iocb->ki_flags & IOCB_WAITQ)) { filemap_invalidate_unlock_shared(mapping); - put_and_wait_on_page_locked(page, TASK_KILLABLE); + put_and_wait_on_page_locked(&folio->page, TASK_KILLABLE); return AOP_TRUNCATED_PAGE; } - error = __lock_page_async(page, iocb->ki_waitq); + error = __folio_lock_async(folio, iocb->ki_waitq); if (error) goto unlock_mapping; } error = AOP_TRUNCATED_PAGE; - if (!page->mapping) + if (!folio->mapping) goto unlock; error = 0; - if (filemap_range_uptodate(mapping, iocb->ki_pos, iter, page)) + if (filemap_range_uptodate(mapping, iocb->ki_pos, iter, &folio->page)) goto unlock; error = -EAGAIN; if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ)) goto unlock; - error = filemap_read_page(iocb->ki_filp, mapping, page); + error = filemap_read_page(iocb->ki_filp, mapping, &folio->page); goto unlock_mapping; unlock: - unlock_page(page); + folio_unlock(folio); unlock_mapping: filemap_invalidate_unlock_shared(mapping); if (error == AOP_TRUNCATED_PAGE) - put_page(page); + folio_put(folio); return error; } -- cgit v1.2.3 From 6baa8d602e84d97a7541ed94ccaeb6a3f9763111 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 4 Mar 2021 10:21:02 -0500 Subject: mm/filemap: Add folio_wait_locked() Also add folio_wait_locked_killable(). Turn wait_on_page_locked() and wait_on_page_locked_killable() into wrappers. This eliminates a call to compound_head() from each call-site, reducing text size by 193 bytes for me. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells Acked-by: Mike Rapoport --- include/linux/pagemap.h | 26 ++++++++++++++++++-------- mm/filemap.c | 4 ++-- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 5b5e8bd0b3fb..77dbb7f625af 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -732,23 +732,33 @@ extern void wait_on_page_bit(struct page *page, int bit_nr); extern int wait_on_page_bit_killable(struct page *page, int bit_nr); /* - * Wait for a page to be unlocked. + * Wait for a folio to be unlocked. * - * This must be called with the caller "holding" the page, - * ie with increased "page->count" so that the page won't + * This must be called with the caller "holding" the folio, + * ie with increased "page->count" so that the folio won't * go away during the wait.. */ +static inline void folio_wait_locked(struct folio *folio) +{ + if (folio_test_locked(folio)) + wait_on_page_bit(&folio->page, PG_locked); +} + +static inline int folio_wait_locked_killable(struct folio *folio) +{ + if (!folio_test_locked(folio)) + return 0; + return wait_on_page_bit_killable(&folio->page, PG_locked); +} + static inline void wait_on_page_locked(struct page *page) { - if (PageLocked(page)) - wait_on_page_bit(compound_head(page), PG_locked); + folio_wait_locked(page_folio(page)); } static inline int wait_on_page_locked_killable(struct page *page) { - if (!PageLocked(page)) - return 0; - return wait_on_page_bit_killable(compound_head(page), PG_locked); + return folio_wait_locked_killable(page_folio(page)); } int put_and_wait_on_page_locked(struct page *page, int state); diff --git a/mm/filemap.c b/mm/filemap.c index 655145d27fff..b17b58c29ed7 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1704,9 +1704,9 @@ int __lock_page_or_retry(struct page *page, struct mm_struct *mm, mmap_read_unlock(mm); if (flags & FAULT_FLAG_KILLABLE) - wait_on_page_locked_killable(page); + folio_wait_locked_killable(folio); else - wait_on_page_locked(page); + folio_wait_locked(folio); return 0; } if (flags & FAULT_FLAG_KILLABLE) { -- cgit v1.2.3 From 9138e47ed425246102317dbe452f7f0d4a54c4a2 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 18 Mar 2021 21:39:45 -0400 Subject: mm/filemap: Add __folio_lock_or_retry() Convert __lock_page_or_retry() to __folio_lock_or_retry(). This actually saves 4 bytes in the only caller of lock_page_or_retry() (due to better register allocation) and saves the 14 byte cost of calling page_folio() in __folio_lock_or_retry() for a total saving of 18 bytes. Also use a bool for the return type. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Reviewed-by: William Kucharski Acked-by: Mike Rapoport Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/pagemap.h | 11 +++++++---- mm/filemap.c | 22 ++++++++++------------ mm/memory.c | 8 ++++---- 3 files changed, 21 insertions(+), 20 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 77dbb7f625af..7cf140f98910 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -654,7 +654,7 @@ static inline bool wake_page_match(struct wait_page_queue *wait_page, void __folio_lock(struct folio *folio); int __folio_lock_killable(struct folio *folio); -extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm, +bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm, unsigned int flags); void unlock_page(struct page *page); void folio_unlock(struct folio *folio); @@ -715,13 +715,16 @@ static inline int lock_page_killable(struct page *page) * caller indicated that it can handle a retry. * * Return value and mmap_lock implications depend on flags; see - * __lock_page_or_retry(). + * __folio_lock_or_retry(). */ -static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm, +static inline bool lock_page_or_retry(struct page *page, struct mm_struct *mm, unsigned int flags) { + struct folio *folio; might_sleep(); - return trylock_page(page) || __lock_page_or_retry(page, mm, flags); + + folio = page_folio(page); + return folio_trylock(folio) || __folio_lock_or_retry(folio, mm, flags); } /* diff --git a/mm/filemap.c b/mm/filemap.c index b17b58c29ed7..706d121ca9cc 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1680,48 +1680,46 @@ static int __folio_lock_async(struct folio *folio, struct wait_page_queue *wait) /* * Return values: - * 1 - page is locked; mmap_lock is still held. - * 0 - page is not locked. + * true - folio is locked; mmap_lock is still held. + * false - folio is not locked. * mmap_lock has been released (mmap_read_unlock(), unless flags had both * FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT set, in * which case mmap_lock is still held. * - * If neither ALLOW_RETRY nor KILLABLE are set, will always return 1 - * with the page locked and the mmap_lock unperturbed. + * If neither ALLOW_RETRY nor KILLABLE are set, will always return true + * with the folio locked and the mmap_lock unperturbed. */ -int __lock_page_or_retry(struct page *page, struct mm_struct *mm, +bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm, unsigned int flags) { - struct folio *folio = page_folio(page); - if (fault_flag_allow_retry_first(flags)) { /* * CAUTION! In this case, mmap_lock is not released * even though return 0. */ if (flags & FAULT_FLAG_RETRY_NOWAIT) - return 0; + return false; mmap_read_unlock(mm); if (flags & FAULT_FLAG_KILLABLE) folio_wait_locked_killable(folio); else folio_wait_locked(folio); - return 0; + return false; } if (flags & FAULT_FLAG_KILLABLE) { - int ret; + bool ret; ret = __folio_lock_killable(folio); if (ret) { mmap_read_unlock(mm); - return 0; + return false; } } else { __folio_lock(folio); } - return 1; + return true; } /** diff --git a/mm/memory.c b/mm/memory.c index adf9b9ef8277..269992ba3fa3 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4258,7 +4258,7 @@ static vm_fault_t do_shared_fault(struct vm_fault *vmf) * We enter with non-exclusive mmap_lock (to exclude vma changes, * but allow concurrent faults). * The mmap_lock may have been released depending on flags and our - * return value. See filemap_fault() and __lock_page_or_retry(). + * return value. See filemap_fault() and __folio_lock_or_retry(). * If mmap_lock is released, vma may become invalid (for example * by other thread calling munmap()). */ @@ -4499,7 +4499,7 @@ static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud) * concurrent faults). * * The mmap_lock may have been released depending on flags and our return value. - * See filemap_fault() and __lock_page_or_retry(). + * See filemap_fault() and __folio_lock_or_retry(). */ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) { @@ -4603,7 +4603,7 @@ unlock: * By the time we get here, we already hold the mm semaphore * * The mmap_lock may have been released depending on flags and our - * return value. See filemap_fault() and __lock_page_or_retry(). + * return value. See filemap_fault() and __folio_lock_or_retry(). */ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, unsigned int flags) @@ -4759,7 +4759,7 @@ static inline void mm_account_fault(struct pt_regs *regs, * By the time we get here, we already hold the mm semaphore * * The mmap_lock may have been released depending on flags and our - * return value. See filemap_fault() and __lock_page_or_retry(). + * return value. See filemap_fault() and __folio_lock_or_retry(). */ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, unsigned int flags, struct pt_regs *regs) -- cgit v1.2.3 From 575ced1c8b0d3b578b933a68ce67ddaff3df9506 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Dec 2020 01:25:39 -0500 Subject: mm/swap: Add folio_rotate_reclaimable() Convert rotate_reclaimable_page() to folio_rotate_reclaimable(). This eliminates all five of the calls to compound_head() in this function, saving 75 bytes at the cost of adding 15 bytes to its one caller, end_page_writeback(). We also save 36 bytes from pagevec_move_tail_fn() due to using folios there. Net 96 bytes savings. Also move its declaration to mm/internal.h as it's only used by filemap.c. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: Christoph Hellwig Acked-by: Kirill A. Shutemov Acked-by: Mike Rapoport Reviewed-by: David Howells --- include/linux/swap.h | 1 - mm/filemap.c | 3 ++- mm/internal.h | 1 + mm/page_io.c | 4 ++-- mm/swap.c | 30 ++++++++++++++++-------------- 5 files changed, 21 insertions(+), 18 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index 85607c6c0cba..c7ecd3ad8e2e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -371,7 +371,6 @@ extern void lru_add_drain(void); extern void lru_add_drain_cpu(int cpu); extern void lru_add_drain_cpu_zone(struct zone *zone); extern void lru_add_drain_all(void); -extern void rotate_reclaimable_page(struct page *page); extern void deactivate_file_page(struct page *page); extern void deactivate_page(struct page *page); extern void mark_page_lazyfree(struct page *page); diff --git a/mm/filemap.c b/mm/filemap.c index 706d121ca9cc..6a5bdb4f7c73 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1584,8 +1584,9 @@ void end_page_writeback(struct page *page) * ever page writeback. */ if (PageReclaim(page)) { + struct folio *folio = page_folio(page); ClearPageReclaim(page); - rotate_reclaimable_page(page); + folio_rotate_reclaimable(folio); } /* diff --git a/mm/internal.h b/mm/internal.h index cf3cb933eba3..1a84484f8650 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -35,6 +35,7 @@ void page_writeback_init(void); vm_fault_t do_swap_page(struct vm_fault *vmf); +void folio_rotate_reclaimable(struct folio *folio); void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, unsigned long floor, unsigned long ceiling); diff --git a/mm/page_io.c b/mm/page_io.c index c493ce9ebcf5..d597bc6e6e45 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -38,7 +38,7 @@ void end_swap_bio_write(struct bio *bio) * Also print a dire warning that things will go BAD (tm) * very quickly. * - * Also clear PG_reclaim to avoid rotate_reclaimable_page() + * Also clear PG_reclaim to avoid folio_rotate_reclaimable() */ set_page_dirty(page); pr_alert_ratelimited("Write-error on swap-device (%u:%u:%llu)\n", @@ -317,7 +317,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, * temporary failure if the system has limited * memory for allocating transmit buffers. * Mark the page dirty and avoid - * rotate_reclaimable_page but rate-limit the + * folio_rotate_reclaimable but rate-limit the * messages but do not flag PageError like * the normal direct-to-bio case as it could * be temporary. diff --git a/mm/swap.c b/mm/swap.c index af3cad4e5378..0edbcb9c8876 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -206,11 +206,13 @@ static void pagevec_lru_move_fn(struct pagevec *pvec, static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec) { - if (!PageUnevictable(page)) { - del_page_from_lru_list(page, lruvec); - ClearPageActive(page); - add_page_to_lru_list_tail(page, lruvec); - __count_vm_events(PGROTATED, thp_nr_pages(page)); + struct folio *folio = page_folio(page); + + if (!folio_test_unevictable(folio)) { + lruvec_del_folio(lruvec, folio); + folio_clear_active(folio); + lruvec_add_folio_tail(lruvec, folio); + __count_vm_events(PGROTATED, folio_nr_pages(folio)); } } @@ -227,23 +229,23 @@ static bool pagevec_add_and_need_flush(struct pagevec *pvec, struct page *page) } /* - * Writeback is about to end against a page which has been marked for immediate - * reclaim. If it still appears to be reclaimable, move it to the tail of the - * inactive list. + * Writeback is about to end against a folio which has been marked for + * immediate reclaim. If it still appears to be reclaimable, move it + * to the tail of the inactive list. * - * rotate_reclaimable_page() must disable IRQs, to prevent nasty races. + * folio_rotate_reclaimable() must disable IRQs, to prevent nasty races. */ -void rotate_reclaimable_page(struct page *page) +void folio_rotate_reclaimable(struct folio *folio) { - if (!PageLocked(page) && !PageDirty(page) && - !PageUnevictable(page) && PageLRU(page)) { + if (!folio_test_locked(folio) && !folio_test_dirty(folio) && + !folio_test_unevictable(folio) && folio_test_lru(folio)) { struct pagevec *pvec; unsigned long flags; - get_page(page); + folio_get(folio); local_lock_irqsave(&lru_rotate.lock, flags); pvec = this_cpu_ptr(&lru_rotate.pvec); - if (pagevec_add_and_need_flush(pvec, page)) + if (pagevec_add_and_need_flush(pvec, &folio->page)) pagevec_lru_move_fn(pvec, pagevec_move_tail_fn); local_unlock_irqrestore(&lru_rotate.lock, flags); } -- cgit v1.2.3 From 4268b48077e55a93959f368aa9d3103ede5d3f0f Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 3 Mar 2021 15:21:55 -0500 Subject: mm/filemap: Add folio_end_writeback() Add an end_page_writeback() wrapper function for users that are not yet converted to folios. folio_end_writeback() is less than half the size of end_page_writeback() at just 105 bytes compared to 228 bytes, due to removing all the compound_head() calls. The 30 byte wrapper function makes this a net saving of 93 bytes. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells Acked-by: Mike Rapoport --- include/linux/pagemap.h | 3 ++- mm/filemap.c | 43 +++++++++++++++++++++---------------------- mm/folio-compat.c | 6 ++++++ 3 files changed, 29 insertions(+), 23 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 7cf140f98910..d9dcd335cc18 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -767,7 +767,8 @@ static inline int wait_on_page_locked_killable(struct page *page) int put_and_wait_on_page_locked(struct page *page, int state); void wait_on_page_writeback(struct page *page); int wait_on_page_writeback_killable(struct page *page); -extern void end_page_writeback(struct page *page); +void end_page_writeback(struct page *page); +void folio_end_writeback(struct folio *folio); void wait_for_stable_page(struct page *page); void __set_page_dirty(struct page *, struct address_space *, int warn); diff --git a/mm/filemap.c b/mm/filemap.c index 6a5bdb4f7c73..4e41bfdb6555 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1230,11 +1230,11 @@ static void wake_up_page_bit(struct page *page, int bit_nr) spin_unlock_irqrestore(&q->lock, flags); } -static void wake_up_page(struct page *page, int bit) +static void folio_wake(struct folio *folio, int bit) { - if (!PageWaiters(page)) + if (!folio_test_waiters(folio)) return; - wake_up_page_bit(page, bit); + wake_up_page_bit(&folio->page, bit); } /* @@ -1571,39 +1571,38 @@ int wait_on_page_private_2_killable(struct page *page) EXPORT_SYMBOL(wait_on_page_private_2_killable); /** - * end_page_writeback - end writeback against a page - * @page: the page + * folio_end_writeback - End writeback against a folio. + * @folio: The folio. */ -void end_page_writeback(struct page *page) +void folio_end_writeback(struct folio *folio) { /* - * TestClearPageReclaim could be used here but it is an atomic - * operation and overkill in this particular case. Failing to - * shuffle a page marked for immediate reclaim is too mild to - * justify taking an atomic operation penalty at the end of - * ever page writeback. + * folio_test_clear_reclaim() could be used here but it is an + * atomic operation and overkill in this particular case. Failing + * to shuffle a folio marked for immediate reclaim is too mild + * a gain to justify taking an atomic operation penalty at the + * end of every folio writeback. */ - if (PageReclaim(page)) { - struct folio *folio = page_folio(page); - ClearPageReclaim(page); + if (folio_test_reclaim(folio)) { + folio_clear_reclaim(folio); folio_rotate_reclaimable(folio); } /* - * Writeback does not hold a page reference of its own, relying + * Writeback does not hold a folio reference of its own, relying * on truncation to wait for the clearing of PG_writeback. - * But here we must make sure that the page is not freed and - * reused before the wake_up_page(). + * But here we must make sure that the folio is not freed and + * reused before the folio_wake(). */ - get_page(page); - if (!test_clear_page_writeback(page)) + folio_get(folio); + if (!test_clear_page_writeback(&folio->page)) BUG(); smp_mb__after_atomic(); - wake_up_page(page, PG_writeback); - put_page(page); + folio_wake(folio, PG_writeback); + folio_put(folio); } -EXPORT_SYMBOL(end_page_writeback); +EXPORT_SYMBOL(folio_end_writeback); /* * After completing I/O on a page, call this routine to update the page diff --git a/mm/folio-compat.c b/mm/folio-compat.c index 91b3d00a92f7..526843d03d58 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -17,3 +17,9 @@ void unlock_page(struct page *page) return folio_unlock(page_folio(page)); } EXPORT_SYMBOL(unlock_page); + +void end_page_writeback(struct page *page) +{ + return folio_end_writeback(page_folio(page)); +} +EXPORT_SYMBOL(end_page_writeback); -- cgit v1.2.3 From 490e016f229a79dc7551e7f0e989d2304416c189 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 4 Mar 2021 11:09:17 -0500 Subject: mm/writeback: Add folio_wait_writeback() wait_on_page_writeback_killable() only has one caller, so convert it to call folio_wait_writeback_killable(). For the wait_on_page_writeback() callers, add a compatibility wrapper around folio_wait_writeback(). Turning PageWriteback() into folio_test_writeback() eliminates a call to compound_head() which saves 8 bytes and 15 bytes in the two functions. Unfortunately, that is more than offset by adding the wait_on_page_writeback compatibility wrapper for a net increase in text of 7 bytes. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Acked-by: Mike Rapoport Reviewed-by: David Howells --- fs/afs/write.c | 9 +++++---- include/linux/pagemap.h | 3 ++- mm/folio-compat.c | 6 ++++++ mm/page-writeback.c | 48 +++++++++++++++++++++++++++++++++--------------- 4 files changed, 46 insertions(+), 20 deletions(-) diff --git a/fs/afs/write.c b/fs/afs/write.c index 2dfe3b3a53d6..5103328e528d 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -861,7 +861,8 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync) */ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf) { - struct page *page = thp_head(vmf->page); + struct folio *folio = page_folio(vmf->page); + struct page *page = &folio->page; struct file *file = vmf->vma->vm_file; struct inode *inode = file_inode(file); struct afs_vnode *vnode = AFS_FS_I(inode); @@ -884,7 +885,7 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf) goto out; #endif - if (wait_on_page_writeback_killable(page)) + if (folio_wait_writeback_killable(folio)) goto out; if (lock_page_killable(page) < 0) @@ -894,8 +895,8 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf) * details the portion of the page we need to write back and we might * need to redirty the page if there's a problem. */ - if (wait_on_page_writeback_killable(page) < 0) { - unlock_page(page); + if (folio_wait_writeback_killable(folio) < 0) { + folio_unlock(folio); goto out; } diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index d9dcd335cc18..e6013c063986 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -766,7 +766,8 @@ static inline int wait_on_page_locked_killable(struct page *page) int put_and_wait_on_page_locked(struct page *page, int state); void wait_on_page_writeback(struct page *page); -int wait_on_page_writeback_killable(struct page *page); +void folio_wait_writeback(struct folio *folio); +int folio_wait_writeback_killable(struct folio *folio); void end_page_writeback(struct page *page); void folio_end_writeback(struct folio *folio); void wait_for_stable_page(struct page *page); diff --git a/mm/folio-compat.c b/mm/folio-compat.c index 526843d03d58..41275dac7a92 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -23,3 +23,9 @@ void end_page_writeback(struct page *page) return folio_end_writeback(page_folio(page)); } EXPORT_SYMBOL(end_page_writeback); + +void wait_on_page_writeback(struct page *page) +{ + return folio_wait_writeback(page_folio(page)); +} +EXPORT_SYMBOL_GPL(wait_on_page_writeback); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 4812a17b288c..c7f40c954217 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2873,33 +2873,51 @@ int __test_set_page_writeback(struct page *page, bool keep_write) } EXPORT_SYMBOL(__test_set_page_writeback); -/* - * Wait for a page to complete writeback +/** + * folio_wait_writeback - Wait for a folio to finish writeback. + * @folio: The folio to wait for. + * + * If the folio is currently being written back to storage, wait for the + * I/O to complete. + * + * Context: Sleeps. Must be called in process context and with + * no spinlocks held. Caller should hold a reference on the folio. + * If the folio is not locked, writeback may start again after writeback + * has finished. */ -void wait_on_page_writeback(struct page *page) +void folio_wait_writeback(struct folio *folio) { - while (PageWriteback(page)) { - trace_wait_on_page_writeback(page, page_mapping(page)); - wait_on_page_bit(page, PG_writeback); + while (folio_test_writeback(folio)) { + trace_wait_on_page_writeback(&folio->page, folio_mapping(folio)); + wait_on_page_bit(&folio->page, PG_writeback); } } -EXPORT_SYMBOL_GPL(wait_on_page_writeback); +EXPORT_SYMBOL_GPL(folio_wait_writeback); -/* - * Wait for a page to complete writeback. Returns -EINTR if we get a - * fatal signal while waiting. +/** + * folio_wait_writeback_killable - Wait for a folio to finish writeback. + * @folio: The folio to wait for. + * + * If the folio is currently being written back to storage, wait for the + * I/O to complete or a fatal signal to arrive. + * + * Context: Sleeps. Must be called in process context and with + * no spinlocks held. Caller should hold a reference on the folio. + * If the folio is not locked, writeback may start again after writeback + * has finished. + * Return: 0 on success, -EINTR if we get a fatal signal while waiting. */ -int wait_on_page_writeback_killable(struct page *page) +int folio_wait_writeback_killable(struct folio *folio) { - while (PageWriteback(page)) { - trace_wait_on_page_writeback(page, page_mapping(page)); - if (wait_on_page_bit_killable(page, PG_writeback)) + while (folio_test_writeback(folio)) { + trace_wait_on_page_writeback(&folio->page, folio_mapping(folio)); + if (wait_on_page_bit_killable(&folio->page, PG_writeback)) return -EINTR; } return 0; } -EXPORT_SYMBOL_GPL(wait_on_page_writeback_killable); +EXPORT_SYMBOL_GPL(folio_wait_writeback_killable); /** * wait_for_stable_page() - wait for writeback to finish, if necessary. -- cgit v1.2.3 From a49d0c507759214a7cfd26555382c314db486792 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 4 Mar 2021 11:25:25 -0500 Subject: mm/writeback: Add folio_wait_stable() Move wait_for_stable_page() into the folio compatibility file. folio_wait_stable() avoids a call to compound_head() and is 14 bytes smaller than wait_for_stable_page() was. The net text size grows by 16 bytes as a result of this patch. We can also remove thp_head() as this was the last user. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells --- include/linux/huge_mm.h | 15 --------------- include/linux/pagemap.h | 1 + mm/folio-compat.c | 6 ++++++ mm/page-writeback.c | 24 ++++++++++++++---------- 4 files changed, 21 insertions(+), 25 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index f123e15d966e..f280f33ff223 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -250,15 +250,6 @@ static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, return NULL; } -/** - * thp_head - Head page of a transparent huge page. - * @page: Any page (tail, head or regular) found in the page cache. - */ -static inline struct page *thp_head(struct page *page) -{ - return compound_head(page); -} - /** * thp_order - Order of a transparent huge page. * @page: Head page of a transparent huge page. @@ -336,12 +327,6 @@ static inline struct list_head *page_deferred_list(struct page *page) #define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; }) #define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; }) -static inline struct page *thp_head(struct page *page) -{ - VM_BUG_ON_PGFLAGS(PageTail(page), page); - return page; -} - static inline unsigned int thp_order(struct page *page) { VM_BUG_ON_PGFLAGS(PageTail(page), page); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index e6013c063986..ee39ad7b42f1 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -771,6 +771,7 @@ int folio_wait_writeback_killable(struct folio *folio); void end_page_writeback(struct page *page); void folio_end_writeback(struct folio *folio); void wait_for_stable_page(struct page *page); +void folio_wait_stable(struct folio *folio); void __set_page_dirty(struct page *, struct address_space *, int warn); int __set_page_dirty_nobuffers(struct page *page); diff --git a/mm/folio-compat.c b/mm/folio-compat.c index 41275dac7a92..3c83f03b80d7 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -29,3 +29,9 @@ void wait_on_page_writeback(struct page *page) return folio_wait_writeback(page_folio(page)); } EXPORT_SYMBOL_GPL(wait_on_page_writeback); + +void wait_for_stable_page(struct page *page) +{ + return folio_wait_stable(page_folio(page)); +} +EXPORT_SYMBOL_GPL(wait_for_stable_page); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index c7f40c954217..0b0b7cd81a93 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2920,17 +2920,21 @@ int folio_wait_writeback_killable(struct folio *folio) EXPORT_SYMBOL_GPL(folio_wait_writeback_killable); /** - * wait_for_stable_page() - wait for writeback to finish, if necessary. - * @page: The page to wait on. + * folio_wait_stable() - wait for writeback to finish, if necessary. + * @folio: The folio to wait on. * - * This function determines if the given page is related to a backing device - * that requires page contents to be held stable during writeback. If so, then - * it will wait for any pending writeback to complete. + * This function determines if the given folio is related to a backing + * device that requires folio contents to be held stable during writeback. + * If so, then it will wait for any pending writeback to complete. + * + * Context: Sleeps. Must be called in process context and with + * no spinlocks held. Caller should hold a reference on the folio. + * If the folio is not locked, writeback may start again after writeback + * has finished. */ -void wait_for_stable_page(struct page *page) +void folio_wait_stable(struct folio *folio) { - page = thp_head(page); - if (page->mapping->host->i_sb->s_iflags & SB_I_STABLE_WRITES) - wait_on_page_writeback(page); + if (folio->mapping->host->i_sb->s_iflags & SB_I_STABLE_WRITES) + folio_wait_writeback(folio); } -EXPORT_SYMBOL_GPL(wait_for_stable_page); +EXPORT_SYMBOL_GPL(folio_wait_stable); -- cgit v1.2.3 From 101c0bf67f50ca0e8b9da97b26f8dc7cb232b4d3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 4 Mar 2021 12:02:54 -0500 Subject: mm/filemap: Add folio_wait_bit() Rename wait_on_page_bit() to folio_wait_bit(). We must always wait on the folio, otherwise we won't be woken up due to the tail page hashing to a different bucket from the head page. This commit shrinks the kernel by 770 bytes, mostly due to moving the page waitqueue lookup into folio_wait_bit_common(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells Acked-by: Mike Rapoport --- include/linux/pagemap.h | 10 +++---- mm/filemap.c | 77 +++++++++++++++++++++++-------------------------- mm/page-writeback.c | 4 +-- 3 files changed, 43 insertions(+), 48 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index ee39ad7b42f1..2f481327dee8 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -728,11 +728,11 @@ static inline bool lock_page_or_retry(struct page *page, struct mm_struct *mm, } /* - * This is exported only for wait_on_page_locked/wait_on_page_writeback, etc., + * This is exported only for folio_wait_locked/folio_wait_writeback, etc., * and should not be used directly. */ -extern void wait_on_page_bit(struct page *page, int bit_nr); -extern int wait_on_page_bit_killable(struct page *page, int bit_nr); +void folio_wait_bit(struct folio *folio, int bit_nr); +int folio_wait_bit_killable(struct folio *folio, int bit_nr); /* * Wait for a folio to be unlocked. @@ -744,14 +744,14 @@ extern int wait_on_page_bit_killable(struct page *page, int bit_nr); static inline void folio_wait_locked(struct folio *folio) { if (folio_test_locked(folio)) - wait_on_page_bit(&folio->page, PG_locked); + folio_wait_bit(folio, PG_locked); } static inline int folio_wait_locked_killable(struct folio *folio) { if (!folio_test_locked(folio)) return 0; - return wait_on_page_bit_killable(&folio->page, PG_locked); + return folio_wait_bit_killable(folio, PG_locked); } static inline void wait_on_page_locked(struct page *page) diff --git a/mm/filemap.c b/mm/filemap.c index 4e41bfdb6555..13663f0cfd51 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1157,7 +1157,7 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, * * So update the flags atomically, and wake up the waiter * afterwards to avoid any races. This store-release pairs - * with the load-acquire in wait_on_page_bit_common(). + * with the load-acquire in folio_wait_bit_common(). */ smp_store_release(&wait->flags, flags | WQ_FLAG_WOKEN); wake_up_state(wait->private, mode); @@ -1238,7 +1238,7 @@ static void folio_wake(struct folio *folio, int bit) } /* - * A choice of three behaviors for wait_on_page_bit_common(): + * A choice of three behaviors for folio_wait_bit_common(): */ enum behavior { EXCLUSIVE, /* Hold ref to page and take the bit when woken, like @@ -1253,16 +1253,16 @@ enum behavior { }; /* - * Attempt to check (or get) the page bit, and mark us done + * Attempt to check (or get) the folio flag, and mark us done * if successful. */ -static inline bool trylock_page_bit_common(struct page *page, int bit_nr, +static inline bool folio_trylock_flag(struct folio *folio, int bit_nr, struct wait_queue_entry *wait) { if (wait->flags & WQ_FLAG_EXCLUSIVE) { - if (test_and_set_bit(bit_nr, &page->flags)) + if (test_and_set_bit(bit_nr, &folio->flags)) return false; - } else if (test_bit(bit_nr, &page->flags)) + } else if (test_bit(bit_nr, &folio->flags)) return false; wait->flags |= WQ_FLAG_WOKEN | WQ_FLAG_DONE; @@ -1272,9 +1272,10 @@ static inline bool trylock_page_bit_common(struct page *page, int bit_nr, /* How many times do we accept lock stealing from under a waiter? */ int sysctl_page_lock_unfairness = 5; -static inline int wait_on_page_bit_common(wait_queue_head_t *q, - struct page *page, int bit_nr, int state, enum behavior behavior) +static inline int folio_wait_bit_common(struct folio *folio, int bit_nr, + int state, enum behavior behavior) { + wait_queue_head_t *q = page_waitqueue(&folio->page); int unfairness = sysctl_page_lock_unfairness; struct wait_page_queue wait_page; wait_queue_entry_t *wait = &wait_page.wait; @@ -1283,8 +1284,8 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, unsigned long pflags; if (bit_nr == PG_locked && - !PageUptodate(page) && PageWorkingset(page)) { - if (!PageSwapBacked(page)) { + !folio_test_uptodate(folio) && folio_test_workingset(folio)) { + if (!folio_test_swapbacked(folio)) { delayacct_thrashing_start(); delayacct = true; } @@ -1294,7 +1295,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, init_wait(wait); wait->func = wake_page_function; - wait_page.page = page; + wait_page.page = &folio->page; wait_page.bit_nr = bit_nr; repeat: @@ -1309,7 +1310,7 @@ repeat: * Do one last check whether we can get the * page bit synchronously. * - * Do the SetPageWaiters() marking before that + * Do the folio_set_waiters() marking before that * to let any waker we _just_ missed know they * need to wake us up (otherwise they'll never * even go to the slow case that looks at the @@ -1320,8 +1321,8 @@ repeat: * lock to avoid races. */ spin_lock_irq(&q->lock); - SetPageWaiters(page); - if (!trylock_page_bit_common(page, bit_nr, wait)) + folio_set_waiters(folio); + if (!folio_trylock_flag(folio, bit_nr, wait)) __add_wait_queue_entry_tail(q, wait); spin_unlock_irq(&q->lock); @@ -1331,10 +1332,10 @@ repeat: * see whether the page bit testing has already * been done by the wake function. * - * We can drop our reference to the page. + * We can drop our reference to the folio. */ if (behavior == DROP) - put_page(page); + folio_put(folio); /* * Note that until the "finish_wait()", or until @@ -1371,7 +1372,7 @@ repeat: * * And if that fails, we'll have to retry this all. */ - if (unlikely(test_and_set_bit(bit_nr, &page->flags))) + if (unlikely(test_and_set_bit(bit_nr, folio_flags(folio, 0)))) goto repeat; wait->flags |= WQ_FLAG_DONE; @@ -1380,7 +1381,7 @@ repeat: /* * If a signal happened, this 'finish_wait()' may remove the last - * waiter from the wait-queues, but the PageWaiters bit will remain + * waiter from the wait-queues, but the folio waiters bit will remain * set. That's ok. The next wakeup will take care of it, and trying * to do it here would be difficult and prone to races. */ @@ -1411,19 +1412,17 @@ repeat: return wait->flags & WQ_FLAG_WOKEN ? 0 : -EINTR; } -void wait_on_page_bit(struct page *page, int bit_nr) +void folio_wait_bit(struct folio *folio, int bit_nr) { - wait_queue_head_t *q = page_waitqueue(page); - wait_on_page_bit_common(q, page, bit_nr, TASK_UNINTERRUPTIBLE, SHARED); + folio_wait_bit_common(folio, bit_nr, TASK_UNINTERRUPTIBLE, SHARED); } -EXPORT_SYMBOL(wait_on_page_bit); +EXPORT_SYMBOL(folio_wait_bit); -int wait_on_page_bit_killable(struct page *page, int bit_nr) +int folio_wait_bit_killable(struct folio *folio, int bit_nr) { - wait_queue_head_t *q = page_waitqueue(page); - return wait_on_page_bit_common(q, page, bit_nr, TASK_KILLABLE, SHARED); + return folio_wait_bit_common(folio, bit_nr, TASK_KILLABLE, SHARED); } -EXPORT_SYMBOL(wait_on_page_bit_killable); +EXPORT_SYMBOL(folio_wait_bit_killable); /** * put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked @@ -1440,11 +1439,8 @@ EXPORT_SYMBOL(wait_on_page_bit_killable); */ int put_and_wait_on_page_locked(struct page *page, int state) { - wait_queue_head_t *q; - - page = compound_head(page); - q = page_waitqueue(page); - return wait_on_page_bit_common(q, page, PG_locked, state, DROP); + return folio_wait_bit_common(page_folio(page), PG_locked, state, + DROP); } /** @@ -1538,9 +1534,10 @@ EXPORT_SYMBOL(end_page_private_2); */ void wait_on_page_private_2(struct page *page) { - page = compound_head(page); - while (PagePrivate2(page)) - wait_on_page_bit(page, PG_private_2); + struct folio *folio = page_folio(page); + + while (folio_test_private_2(folio)) + folio_wait_bit(folio, PG_private_2); } EXPORT_SYMBOL(wait_on_page_private_2); @@ -1557,11 +1554,11 @@ EXPORT_SYMBOL(wait_on_page_private_2); */ int wait_on_page_private_2_killable(struct page *page) { + struct folio *folio = page_folio(page); int ret = 0; - page = compound_head(page); - while (PagePrivate2(page)) { - ret = wait_on_page_bit_killable(page, PG_private_2); + while (folio_test_private_2(folio)) { + ret = folio_wait_bit_killable(folio, PG_private_2); if (ret < 0) break; } @@ -1638,16 +1635,14 @@ EXPORT_SYMBOL_GPL(page_endio); */ void __folio_lock(struct folio *folio) { - wait_queue_head_t *q = page_waitqueue(&folio->page); - wait_on_page_bit_common(q, &folio->page, PG_locked, TASK_UNINTERRUPTIBLE, + folio_wait_bit_common(folio, PG_locked, TASK_UNINTERRUPTIBLE, EXCLUSIVE); } EXPORT_SYMBOL(__folio_lock); int __folio_lock_killable(struct folio *folio) { - wait_queue_head_t *q = page_waitqueue(&folio->page); - return wait_on_page_bit_common(q, &folio->page, PG_locked, TASK_KILLABLE, + return folio_wait_bit_common(folio, PG_locked, TASK_KILLABLE, EXCLUSIVE); } EXPORT_SYMBOL_GPL(__folio_lock_killable); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 0b0b7cd81a93..1d8f2ee2e065 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2889,7 +2889,7 @@ void folio_wait_writeback(struct folio *folio) { while (folio_test_writeback(folio)) { trace_wait_on_page_writeback(&folio->page, folio_mapping(folio)); - wait_on_page_bit(&folio->page, PG_writeback); + folio_wait_bit(folio, PG_writeback); } } EXPORT_SYMBOL_GPL(folio_wait_writeback); @@ -2911,7 +2911,7 @@ int folio_wait_writeback_killable(struct folio *folio) { while (folio_test_writeback(folio)) { trace_wait_on_page_writeback(&folio->page, folio_mapping(folio)); - if (wait_on_page_bit_killable(&folio->page, PG_writeback)) + if (folio_wait_bit_killable(folio, PG_writeback)) return -EINTR; } -- cgit v1.2.3 From 6974d7c977d77a9d51c8bc712425bfa1e552493e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 15 Jan 2021 17:14:48 -0500 Subject: mm/filemap: Add folio_wake_bit() Convert wake_up_page_bit() to folio_wake_bit(). All callers have a folio, so use it directly. Saves 66 bytes of text in end_page_private_2(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells Acked-by: Mike Rapoport --- mm/filemap.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 13663f0cfd51..a896348ff4de 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1176,14 +1176,14 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, return (flags & WQ_FLAG_EXCLUSIVE) != 0; } -static void wake_up_page_bit(struct page *page, int bit_nr) +static void folio_wake_bit(struct folio *folio, int bit_nr) { - wait_queue_head_t *q = page_waitqueue(page); + wait_queue_head_t *q = page_waitqueue(&folio->page); struct wait_page_key key; unsigned long flags; wait_queue_entry_t bookmark; - key.page = page; + key.page = &folio->page; key.bit_nr = bit_nr; key.page_match = 0; @@ -1218,7 +1218,7 @@ static void wake_up_page_bit(struct page *page, int bit_nr) * page waiters. */ if (!waitqueue_active(q) || !key.page_match) { - ClearPageWaiters(page); + folio_clear_waiters(folio); /* * It's possible to miss clearing Waiters here, when we woke * our page waiters, but the hashed waitqueue has waiters for @@ -1234,7 +1234,7 @@ static void folio_wake(struct folio *folio, int bit) { if (!folio_test_waiters(folio)) return; - wake_up_page_bit(&folio->page, bit); + folio_wake_bit(folio, bit); } /* @@ -1501,7 +1501,7 @@ void folio_unlock(struct folio *folio) BUILD_BUG_ON(PG_locked > 7); VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); if (clear_bit_unlock_is_negative_byte(PG_locked, folio_flags(folio, 0))) - wake_up_page_bit(&folio->page, PG_locked); + folio_wake_bit(folio, PG_locked); } EXPORT_SYMBOL(folio_unlock); @@ -1518,11 +1518,12 @@ EXPORT_SYMBOL(folio_unlock); */ void end_page_private_2(struct page *page) { - page = compound_head(page); - VM_BUG_ON_PAGE(!PagePrivate2(page), page); - clear_bit_unlock(PG_private_2, &page->flags); - wake_up_page_bit(page, PG_private_2); - put_page(page); + struct folio *folio = page_folio(page); + + VM_BUG_ON_FOLIO(!folio_test_private_2(folio), folio); + clear_bit_unlock(PG_private_2, folio_flags(folio, 0)); + folio_wake_bit(folio, PG_private_2); + folio_put(folio); } EXPORT_SYMBOL(end_page_private_2); -- cgit v1.2.3 From df4d4f12739495332e0d1f916ef4270f7d25d207 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 16 Jan 2021 11:22:14 -0500 Subject: mm/filemap: Convert page wait queues to be folios Reinforce that page flags are actually in the head page by changing the type from page to folio. Increases the size of cachefiles by two bytes, but the kernel core is unchanged in size. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells --- fs/cachefiles/rdwr.c | 16 ++++++++-------- include/linux/pagemap.h | 8 ++++---- mm/filemap.c | 38 +++++++++++++++++++------------------- 3 files changed, 31 insertions(+), 31 deletions(-) diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 8ffc40e84a59..fcf4f3b72923 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -25,20 +25,20 @@ static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode, struct cachefiles_object *object; struct fscache_retrieval *op = monitor->op; struct wait_page_key *key = _key; - struct page *page = wait->private; + struct folio *folio = wait->private; ASSERT(key); _enter("{%lu},%u,%d,{%p,%u}", monitor->netfs_page->index, mode, sync, - key->page, key->bit_nr); + key->folio, key->bit_nr); - if (key->page != page || key->bit_nr != PG_locked) + if (key->folio != folio || key->bit_nr != PG_locked) return 0; - _debug("--- monitor %p %lx ---", page, page->flags); + _debug("--- monitor %p %lx ---", folio, folio->flags); - if (!PageUptodate(page) && !PageError(page)) { + if (!folio_test_uptodate(folio) && !folio_test_error(folio)) { /* unlocked, not uptodate and not erronous? */ _debug("page probably truncated"); } @@ -107,7 +107,7 @@ static int cachefiles_read_reissue(struct cachefiles_object *object, put_page(backpage2); INIT_LIST_HEAD(&monitor->op_link); - add_page_wait_queue(backpage, &monitor->monitor); + folio_add_wait_queue(page_folio(backpage), &monitor->monitor); if (trylock_page(backpage)) { ret = -EIO; @@ -294,7 +294,7 @@ monitor_backing_page: get_page(backpage); monitor->back_page = backpage; monitor->monitor.private = backpage; - add_page_wait_queue(backpage, &monitor->monitor); + folio_add_wait_queue(page_folio(backpage), &monitor->monitor); monitor = NULL; /* but the page may have been read before the monitor was installed, so @@ -548,7 +548,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, get_page(backpage); monitor->back_page = backpage; monitor->monitor.private = backpage; - add_page_wait_queue(backpage, &monitor->monitor); + folio_add_wait_queue(page_folio(backpage), &monitor->monitor); monitor = NULL; /* but the page may have been read before the monitor was diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 2f481327dee8..ebc62e9e453b 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -628,13 +628,13 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma, } struct wait_page_key { - struct page *page; + struct folio *folio; int bit_nr; int page_match; }; struct wait_page_queue { - struct page *page; + struct folio *folio; int bit_nr; wait_queue_entry_t wait; }; @@ -642,7 +642,7 @@ struct wait_page_queue { static inline bool wake_page_match(struct wait_page_queue *wait_page, struct wait_page_key *key) { - if (wait_page->page != key->page) + if (wait_page->folio != key->folio) return false; key->page_match = 1; @@ -802,7 +802,7 @@ int wait_on_page_private_2_killable(struct page *page); /* * Add an arbitrary waiter to a page's wait queue */ -extern void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter); +void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter); /* * Fault everything in given userspace address range in. diff --git a/mm/filemap.c b/mm/filemap.c index a896348ff4de..1f8c00c2a4b7 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1074,11 +1074,11 @@ EXPORT_SYMBOL(filemap_invalidate_unlock_two); */ #define PAGE_WAIT_TABLE_BITS 8 #define PAGE_WAIT_TABLE_SIZE (1 << PAGE_WAIT_TABLE_BITS) -static wait_queue_head_t page_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned; +static wait_queue_head_t folio_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned; -static wait_queue_head_t *page_waitqueue(struct page *page) +static wait_queue_head_t *folio_waitqueue(struct folio *folio) { - return &page_wait_table[hash_ptr(page, PAGE_WAIT_TABLE_BITS)]; + return &folio_wait_table[hash_ptr(folio, PAGE_WAIT_TABLE_BITS)]; } void __init pagecache_init(void) @@ -1086,7 +1086,7 @@ void __init pagecache_init(void) int i; for (i = 0; i < PAGE_WAIT_TABLE_SIZE; i++) - init_waitqueue_head(&page_wait_table[i]); + init_waitqueue_head(&folio_wait_table[i]); page_writeback_init(); } @@ -1141,10 +1141,10 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, */ flags = wait->flags; if (flags & WQ_FLAG_EXCLUSIVE) { - if (test_bit(key->bit_nr, &key->page->flags)) + if (test_bit(key->bit_nr, &key->folio->flags)) return -1; if (flags & WQ_FLAG_CUSTOM) { - if (test_and_set_bit(key->bit_nr, &key->page->flags)) + if (test_and_set_bit(key->bit_nr, &key->folio->flags)) return -1; flags |= WQ_FLAG_DONE; } @@ -1178,12 +1178,12 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, static void folio_wake_bit(struct folio *folio, int bit_nr) { - wait_queue_head_t *q = page_waitqueue(&folio->page); + wait_queue_head_t *q = folio_waitqueue(folio); struct wait_page_key key; unsigned long flags; wait_queue_entry_t bookmark; - key.page = &folio->page; + key.folio = folio; key.bit_nr = bit_nr; key.page_match = 0; @@ -1275,7 +1275,7 @@ int sysctl_page_lock_unfairness = 5; static inline int folio_wait_bit_common(struct folio *folio, int bit_nr, int state, enum behavior behavior) { - wait_queue_head_t *q = page_waitqueue(&folio->page); + wait_queue_head_t *q = folio_waitqueue(folio); int unfairness = sysctl_page_lock_unfairness; struct wait_page_queue wait_page; wait_queue_entry_t *wait = &wait_page.wait; @@ -1295,7 +1295,7 @@ static inline int folio_wait_bit_common(struct folio *folio, int bit_nr, init_wait(wait); wait->func = wake_page_function; - wait_page.page = &folio->page; + wait_page.folio = folio; wait_page.bit_nr = bit_nr; repeat: @@ -1444,23 +1444,23 @@ int put_and_wait_on_page_locked(struct page *page, int state) } /** - * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue - * @page: Page defining the wait queue of interest + * folio_add_wait_queue - Add an arbitrary waiter to a folio's wait queue + * @folio: Folio defining the wait queue of interest * @waiter: Waiter to add to the queue * - * Add an arbitrary @waiter to the wait queue for the nominated @page. + * Add an arbitrary @waiter to the wait queue for the nominated @folio. */ -void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter) +void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter) { - wait_queue_head_t *q = page_waitqueue(page); + wait_queue_head_t *q = folio_waitqueue(folio); unsigned long flags; spin_lock_irqsave(&q->lock, flags); __add_wait_queue_entry_tail(q, waiter); - SetPageWaiters(page); + folio_set_waiters(folio); spin_unlock_irqrestore(&q->lock, flags); } -EXPORT_SYMBOL_GPL(add_page_wait_queue); +EXPORT_SYMBOL_GPL(folio_add_wait_queue); #ifndef clear_bit_unlock_is_negative_byte @@ -1650,10 +1650,10 @@ EXPORT_SYMBOL_GPL(__folio_lock_killable); static int __folio_lock_async(struct folio *folio, struct wait_page_queue *wait) { - struct wait_queue_head *q = page_waitqueue(&folio->page); + struct wait_queue_head *q = folio_waitqueue(folio); int ret = 0; - wait->page = &folio->page; + wait->folio = folio; wait->bit_nr = PG_locked; spin_lock_irq(&q->lock); -- cgit v1.2.3 From b47393f8448ade8bafe09ed302ce2a15093e9718 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 22 Apr 2021 22:58:32 -0400 Subject: mm/filemap: Add folio private_2 functions end_page_private_2() becomes folio_end_private_2(), wait_on_page_private_2() becomes folio_wait_private_2() and wait_on_page_private_2_killable() becomes folio_wait_private_2_killable(). Adjust the fscache equivalents to call page_folio() before calling these functions to avoid adding wrappers. Ends up costing 1 byte of text in ceph & netfs, but the core shrinks by three calls to page_folio(). Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Kirill A. Shutemov --- include/linux/netfs.h | 6 +++--- include/linux/pagemap.h | 6 +++--- mm/filemap.c | 41 ++++++++++++++++++----------------------- 3 files changed, 24 insertions(+), 29 deletions(-) diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 5d6a4158a9a6..3d4cbf2f7dc4 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -55,7 +55,7 @@ static inline void set_page_fscache(struct page *page) */ static inline void end_page_fscache(struct page *page) { - end_page_private_2(page); + folio_end_private_2(page_folio(page)); } /** @@ -66,7 +66,7 @@ static inline void end_page_fscache(struct page *page) */ static inline void wait_on_page_fscache(struct page *page) { - wait_on_page_private_2(page); + folio_wait_private_2(page_folio(page)); } /** @@ -82,7 +82,7 @@ static inline void wait_on_page_fscache(struct page *page) */ static inline int wait_on_page_fscache_killable(struct page *page) { - return wait_on_page_private_2_killable(page); + return folio_wait_private_2_killable(page_folio(page)); } enum netfs_read_source { diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index ebc62e9e453b..05f91bfc048d 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -795,9 +795,9 @@ static inline void set_page_private_2(struct page *page) SetPagePrivate2(page); } -void end_page_private_2(struct page *page); -void wait_on_page_private_2(struct page *page); -int wait_on_page_private_2_killable(struct page *page); +void folio_end_private_2(struct folio *folio); +void folio_wait_private_2(struct folio *folio); +int folio_wait_private_2_killable(struct folio *folio); /* * Add an arbitrary waiter to a page's wait queue diff --git a/mm/filemap.c b/mm/filemap.c index 1f8c00c2a4b7..d74be9fb3aa2 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1506,56 +1506,51 @@ void folio_unlock(struct folio *folio) EXPORT_SYMBOL(folio_unlock); /** - * end_page_private_2 - Clear PG_private_2 and release any waiters - * @page: The page + * folio_end_private_2 - Clear PG_private_2 and wake any waiters. + * @folio: The folio. * - * Clear the PG_private_2 bit on a page and wake up any sleepers waiting for - * this. The page ref held for PG_private_2 being set is released. + * Clear the PG_private_2 bit on a folio and wake up any sleepers waiting for + * it. The folio reference held for PG_private_2 being set is released. * - * This is, for example, used when a netfs page is being written to a local - * disk cache, thereby allowing writes to the cache for the same page to be + * This is, for example, used when a netfs folio is being written to a local + * disk cache, thereby allowing writes to the cache for the same folio to be * serialised. */ -void end_page_private_2(struct page *page) +void folio_end_private_2(struct folio *folio) { - struct folio *folio = page_folio(page); - VM_BUG_ON_FOLIO(!folio_test_private_2(folio), folio); clear_bit_unlock(PG_private_2, folio_flags(folio, 0)); folio_wake_bit(folio, PG_private_2); folio_put(folio); } -EXPORT_SYMBOL(end_page_private_2); +EXPORT_SYMBOL(folio_end_private_2); /** - * wait_on_page_private_2 - Wait for PG_private_2 to be cleared on a page - * @page: The page to wait on + * folio_wait_private_2 - Wait for PG_private_2 to be cleared on a folio. + * @folio: The folio to wait on. * - * Wait for PG_private_2 (aka PG_fscache) to be cleared on a page. + * Wait for PG_private_2 (aka PG_fscache) to be cleared on a folio. */ -void wait_on_page_private_2(struct page *page) +void folio_wait_private_2(struct folio *folio) { - struct folio *folio = page_folio(page); - while (folio_test_private_2(folio)) folio_wait_bit(folio, PG_private_2); } -EXPORT_SYMBOL(wait_on_page_private_2); +EXPORT_SYMBOL(folio_wait_private_2); /** - * wait_on_page_private_2_killable - Wait for PG_private_2 to be cleared on a page - * @page: The page to wait on + * folio_wait_private_2_killable - Wait for PG_private_2 to be cleared on a folio. + * @folio: The folio to wait on. * - * Wait for PG_private_2 (aka PG_fscache) to be cleared on a page or until a + * Wait for PG_private_2 (aka PG_fscache) to be cleared on a folio or until a * fatal signal is received by the calling task. * * Return: * - 0 if successful. * - -EINTR if a fatal signal was encountered. */ -int wait_on_page_private_2_killable(struct page *page) +int folio_wait_private_2_killable(struct folio *folio) { - struct folio *folio = page_folio(page); int ret = 0; while (folio_test_private_2(folio)) { @@ -1566,7 +1561,7 @@ int wait_on_page_private_2_killable(struct page *page) return ret; } -EXPORT_SYMBOL(wait_on_page_private_2_killable); +EXPORT_SYMBOL(folio_wait_private_2_killable); /** * folio_end_writeback - End writeback against a folio. -- cgit v1.2.3 From 6abbaa5b01730a1f0883d199cf5a90ae5c5dccea Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 27 Apr 2021 14:24:30 -0400 Subject: fs/netfs: Add folio fscache functions Match the page writeback functions by adding folio_start_fscache(), folio_end_fscache(), folio_wait_fscache() and folio_wait_fscache_killable(). Remove set_page_private_2(). Also rewrite the kernel-doc to describe when to use the function rather than what the function does, and include the kernel-doc in the appropriate rst file. Saves 31 bytes of text in netfs_rreq_unlock() due to set_page_fscache() calling page_folio() once instead of three times. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: Christoph Hellwig Acked-by: Kirill A. Shutemov Acked-by: Mike Rapoport Reviewed-by: David Howells --- Documentation/filesystems/netfs_library.rst | 2 + include/linux/netfs.h | 75 ++++++++++++++++++----------- include/linux/pagemap.h | 16 ------ 3 files changed, 50 insertions(+), 43 deletions(-) diff --git a/Documentation/filesystems/netfs_library.rst b/Documentation/filesystems/netfs_library.rst index 57a641847818..bb68d39f03b7 100644 --- a/Documentation/filesystems/netfs_library.rst +++ b/Documentation/filesystems/netfs_library.rst @@ -524,3 +524,5 @@ Note that these methods are passed a pointer to the cache resource structure, not the read request structure as they could be used in other situations where there isn't a read request structure as well, such as writing dirty data to the cache. + +.. kernel-doc:: include/linux/netfs.h diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 3d4cbf2f7dc4..12c4177f7703 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -22,6 +22,7 @@ * Overload PG_private_2 to give us PG_fscache - this is used to indicate that * a page is currently backed by a local disk cache */ +#define folio_test_fscache(folio) folio_test_private_2(folio) #define PageFsCache(page) PagePrivate2((page)) #define SetPageFsCache(page) SetPagePrivate2((page)) #define ClearPageFsCache(page) ClearPagePrivate2((page)) @@ -29,57 +30,77 @@ #define TestClearPageFsCache(page) TestClearPagePrivate2((page)) /** - * set_page_fscache - Set PG_fscache on a page and take a ref - * @page: The page. + * folio_start_fscache - Start an fscache write on a folio. + * @folio: The folio. * - * Set the PG_fscache (PG_private_2) flag on a page and take the reference - * needed for the VM to handle its lifetime correctly. This sets the flag and - * takes the reference unconditionally, so care must be taken not to set the - * flag again if it's already set. + * Call this function before writing a folio to a local cache. Starting a + * second write before the first one finishes is not allowed. */ -static inline void set_page_fscache(struct page *page) +static inline void folio_start_fscache(struct folio *folio) { - set_page_private_2(page); + VM_BUG_ON_FOLIO(folio_test_private_2(folio), folio); + folio_get(folio); + folio_set_private_2(folio); } /** - * end_page_fscache - Clear PG_fscache and release any waiters - * @page: The page - * - * Clear the PG_fscache (PG_private_2) bit on a page and wake up any sleepers - * waiting for this. The page ref held for PG_private_2 being set is released. + * folio_end_fscache - End an fscache write on a folio. + * @folio: The folio. * - * This is, for example, used when a netfs page is being written to a local - * disk cache, thereby allowing writes to the cache for the same page to be - * serialised. + * Call this function after the folio has been written to the local cache. + * This will wake any sleepers waiting on this folio. */ -static inline void end_page_fscache(struct page *page) +static inline void folio_end_fscache(struct folio *folio) { - folio_end_private_2(page_folio(page)); + folio_end_private_2(folio); } /** - * wait_on_page_fscache - Wait for PG_fscache to be cleared on a page - * @page: The page to wait on + * folio_wait_fscache - Wait for an fscache write on this folio to end. + * @folio: The folio. * - * Wait for PG_fscache (aka PG_private_2) to be cleared on a page. + * If this folio is currently being written to a local cache, wait for + * the write to finish. Another write may start after this one finishes, + * unless the caller holds the folio lock. */ -static inline void wait_on_page_fscache(struct page *page) +static inline void folio_wait_fscache(struct folio *folio) { - folio_wait_private_2(page_folio(page)); + folio_wait_private_2(folio); } /** - * wait_on_page_fscache_killable - Wait for PG_fscache to be cleared on a page - * @page: The page to wait on + * folio_wait_fscache_killable - Wait for an fscache write on this folio to end. + * @folio: The folio. * - * Wait for PG_fscache (aka PG_private_2) to be cleared on a page or until a - * fatal signal is received by the calling task. + * If this folio is currently being written to a local cache, wait + * for the write to finish or for a fatal signal to be received. + * Another write may start after this one finishes, unless the caller + * holds the folio lock. * * Return: * - 0 if successful. * - -EINTR if a fatal signal was encountered. */ +static inline int folio_wait_fscache_killable(struct folio *folio) +{ + return folio_wait_private_2_killable(folio); +} + +static inline void set_page_fscache(struct page *page) +{ + folio_start_fscache(page_folio(page)); +} + +static inline void end_page_fscache(struct page *page) +{ + folio_end_private_2(page_folio(page)); +} + +static inline void wait_on_page_fscache(struct page *page) +{ + folio_wait_private_2(page_folio(page)); +} + static inline int wait_on_page_fscache_killable(struct page *page) { return folio_wait_private_2_killable(page_folio(page)); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 05f91bfc048d..bdbd7be67812 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -779,22 +779,6 @@ int __set_page_dirty_no_writeback(struct page *page); void page_endio(struct page *page, bool is_write, int err); -/** - * set_page_private_2 - Set PG_private_2 on a page and take a ref - * @page: The page. - * - * Set the PG_private_2 flag on a page and take the reference needed for the VM - * to handle its lifetime correctly. This sets the flag and takes the - * reference unconditionally, so care must be taken not to set the flag again - * if it's already set. - */ -static inline void set_page_private_2(struct page *page) -{ - page = compound_head(page); - get_page(page); - SetPagePrivate2(page); -} - void folio_end_private_2(struct folio *folio); void folio_wait_private_2(struct folio *folio); int folio_wait_private_2_killable(struct folio *folio); -- cgit v1.2.3 From dd10ab049beb479dc83bb14a7b5cd68c363983ce Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 12 Apr 2021 16:45:17 -0400 Subject: mm: Add folio_mapped() This function is the equivalent of page_mapped(). It is slightly shorter as we do not need to handle the PageTail() case. Reimplement page_mapped() as a wrapper around folio_mapped(). folio_mapped() is 13 bytes smaller than page_mapped(), but the page_mapped() wrapper is 30 bytes, for a net increase of 17 bytes of text. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Kirill A. Shutemov Acked-by: Mike Rapoport --- include/linux/mm.h | 1 + include/linux/mm_types.h | 6 ++++++ mm/folio-compat.c | 6 ++++++ mm/util.c | 29 ++++++++++++++++------------- 4 files changed, 29 insertions(+), 13 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index bc5c38e1f780..95e36d0475ac 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1769,6 +1769,7 @@ static inline pgoff_t page_index(struct page *page) } bool page_mapped(struct page *page); +bool folio_mapped(struct folio *folio); /* * Return true only if the page has been allocated with diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 5ebcb86ac934..82dab23205c3 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -299,6 +299,12 @@ FOLIO_MATCH(memcg_data, memcg_data); #endif #undef FOLIO_MATCH +static inline atomic_t *folio_mapcount_ptr(struct folio *folio) +{ + struct page *tail = &folio->page + 1; + return &tail->compound_mapcount; +} + static inline atomic_t *compound_mapcount_ptr(struct page *page) { return &page[1].compound_mapcount; diff --git a/mm/folio-compat.c b/mm/folio-compat.c index 3c83f03b80d7..7044fcc8a8aa 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -35,3 +35,9 @@ void wait_for_stable_page(struct page *page) return folio_wait_stable(page_folio(page)); } EXPORT_SYMBOL_GPL(wait_for_stable_page); + +bool page_mapped(struct page *page) +{ + return folio_mapped(page_folio(page)); +} +EXPORT_SYMBOL(page_mapped); diff --git a/mm/util.c b/mm/util.c index 6c1fe9bee30a..e322a42090e5 100644 --- a/mm/util.c +++ b/mm/util.c @@ -671,28 +671,31 @@ void *page_rmapping(struct page *page) return __page_rmapping(page); } -/* - * Return true if this page is mapped into pagetables. - * For compound page it returns true if any subpage of compound page is mapped. +/** + * folio_mapped - Is this folio mapped into userspace? + * @folio: The folio. + * + * Return: True if any page in this folio is referenced by user page tables. */ -bool page_mapped(struct page *page) +bool folio_mapped(struct folio *folio) { - int i; + long i, nr; - if (likely(!PageCompound(page))) - return atomic_read(&page->_mapcount) >= 0; - page = compound_head(page); - if (atomic_read(compound_mapcount_ptr(page)) >= 0) + if (folio_test_single(folio)) + return atomic_read(&folio->_mapcount) >= 0; + if (atomic_read(folio_mapcount_ptr(folio)) >= 0) return true; - if (PageHuge(page)) + if (folio_test_hugetlb(folio)) return false; - for (i = 0; i < compound_nr(page); i++) { - if (atomic_read(&page[i]._mapcount) >= 0) + + nr = folio_nr_pages(folio); + for (i = 0; i < nr; i++) { + if (atomic_read(&folio_page(folio, i)->_mapcount) >= 0) return true; } return false; } -EXPORT_SYMBOL(page_mapped); +EXPORT_SYMBOL(folio_mapped); struct anon_vma *page_anon_vma(struct page *page) { -- cgit v1.2.3 From 874fd90cafdca9d678bceb88f91322af8c9a9d2d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 25 Jun 2021 09:27:29 -0400 Subject: mm: Add folio_nid() This is the folio equivalent of page_to_nid(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Mike Rapoport Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/mm.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index 95e36d0475ac..04e41d4d85ea 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1429,6 +1429,11 @@ static inline int page_to_nid(const struct page *page) } #endif +static inline int folio_nid(const struct folio *folio) +{ + return page_to_nid(&folio->page); +} + #ifdef CONFIG_NUMA_BALANCING static inline int cpu_pid_to_cpupid(int cpu, int pid) { -- cgit v1.2.3 From 6e0110c247c8794a16573d1e238f92489cc27c8a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 29 Apr 2021 13:40:11 -0400 Subject: mm/memcg: Remove 'page' parameter to mem_cgroup_charge_statistics() The last use of 'page' was removed by commit 468c398233da ("mm: memcontrol: switch to native NR_ANON_THPS counter"), so we can now remove the parameter from the function. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Michal Hocko Acked-by: Johannes Weiner Reviewed-by: David Howells Acked-by: Vlastimil Babka --- mm/memcontrol.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6da5020a8656..7d560bbcac70 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -799,7 +799,6 @@ static unsigned long memcg_events_local(struct mem_cgroup *memcg, int event) } static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, - struct page *page, int nr_pages) { /* pagein of a big page is an event. So, ignore page size */ @@ -5669,9 +5668,9 @@ static int mem_cgroup_move_account(struct page *page, ret = 0; local_irq_disable(); - mem_cgroup_charge_statistics(to, page, nr_pages); + mem_cgroup_charge_statistics(to, nr_pages); memcg_check_events(to, page); - mem_cgroup_charge_statistics(from, page, -nr_pages); + mem_cgroup_charge_statistics(from, -nr_pages); memcg_check_events(from, page); local_irq_enable(); out_unlock: @@ -6693,7 +6692,7 @@ static int charge_memcg(struct page *page, struct mem_cgroup *memcg, gfp_t gfp) commit_charge(page, memcg); local_irq_disable(); - mem_cgroup_charge_statistics(memcg, page, nr_pages); + mem_cgroup_charge_statistics(memcg, nr_pages); memcg_check_events(memcg, page); local_irq_enable(); out: @@ -6976,7 +6975,7 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) commit_charge(newpage, memcg); local_irq_save(flags); - mem_cgroup_charge_statistics(memcg, newpage, nr_pages); + mem_cgroup_charge_statistics(memcg, nr_pages); memcg_check_events(memcg, newpage); local_irq_restore(flags); } @@ -7204,7 +7203,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) * only synchronisation we have for updating the per-CPU variables. */ VM_BUG_ON(!irqs_disabled()); - mem_cgroup_charge_statistics(memcg, page, -nr_entries); + mem_cgroup_charge_statistics(memcg, -nr_entries); memcg_check_events(memcg, page); css_put(&memcg->css); -- cgit v1.2.3 From 658b69c9d85211e2c56af4f73bbe55a4e0da04b2 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 29 Apr 2021 14:51:26 -0400 Subject: mm/memcg: Use the node id in mem_cgroup_update_tree() By using the node id in mem_cgroup_update_tree(), we can delete soft_limit_tree_from_page() and mem_cgroup_page_nodeinfo(). Saves 42 bytes of kernel text on my config. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Michal Hocko Acked-by: Johannes Weiner Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- mm/memcontrol.c | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7d560bbcac70..ac6ed42ca141 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -456,28 +456,12 @@ ino_t page_cgroup_ino(struct page *page) return ino; } -static struct mem_cgroup_per_node * -mem_cgroup_page_nodeinfo(struct mem_cgroup *memcg, struct page *page) -{ - int nid = page_to_nid(page); - - return memcg->nodeinfo[nid]; -} - static struct mem_cgroup_tree_per_node * soft_limit_tree_node(int nid) { return soft_limit_tree.rb_tree_per_node[nid]; } -static struct mem_cgroup_tree_per_node * -soft_limit_tree_from_page(struct page *page) -{ - int nid = page_to_nid(page); - - return soft_limit_tree.rb_tree_per_node[nid]; -} - static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_node *mz, struct mem_cgroup_tree_per_node *mctz, unsigned long new_usage_in_excess) @@ -548,13 +532,13 @@ static unsigned long soft_limit_excess(struct mem_cgroup *memcg) return excess; } -static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page) +static void mem_cgroup_update_tree(struct mem_cgroup *memcg, int nid) { unsigned long excess; struct mem_cgroup_per_node *mz; struct mem_cgroup_tree_per_node *mctz; - mctz = soft_limit_tree_from_page(page); + mctz = soft_limit_tree_node(nid); if (!mctz) return; /* @@ -562,7 +546,7 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page) * because their event counter is not touched. */ for (; memcg; memcg = parent_mem_cgroup(memcg)) { - mz = mem_cgroup_page_nodeinfo(memcg, page); + mz = memcg->nodeinfo[nid]; excess = soft_limit_excess(memcg); /* * We have to update the tree if mz is on RB-tree or @@ -852,7 +836,7 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page) MEM_CGROUP_TARGET_SOFTLIMIT); mem_cgroup_threshold(memcg); if (unlikely(do_softlimit)) - mem_cgroup_update_tree(memcg, page); + mem_cgroup_update_tree(memcg, page_to_nid(page)); } } -- cgit v1.2.3 From 2ab082ba76f99ba83a496e7bbe3082d6d5096d1e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 25 Jun 2021 08:57:02 -0400 Subject: mm/memcg: Remove soft_limit_tree_node() Opencode this one-line function in its three callers. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Michal Hocko Acked-by: Johannes Weiner Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- mm/memcontrol.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ac6ed42ca141..316d005566b8 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -456,12 +456,6 @@ ino_t page_cgroup_ino(struct page *page) return ino; } -static struct mem_cgroup_tree_per_node * -soft_limit_tree_node(int nid) -{ - return soft_limit_tree.rb_tree_per_node[nid]; -} - static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_node *mz, struct mem_cgroup_tree_per_node *mctz, unsigned long new_usage_in_excess) @@ -538,7 +532,7 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, int nid) struct mem_cgroup_per_node *mz; struct mem_cgroup_tree_per_node *mctz; - mctz = soft_limit_tree_node(nid); + mctz = soft_limit_tree.rb_tree_per_node[nid]; if (!mctz) return; /* @@ -577,7 +571,7 @@ static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg) for_each_node(nid) { mz = memcg->nodeinfo[nid]; - mctz = soft_limit_tree_node(nid); + mctz = soft_limit_tree.rb_tree_per_node[nid]; if (mctz) mem_cgroup_remove_exceeded(mz, mctz); } @@ -3364,7 +3358,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, if (order > 0) return 0; - mctz = soft_limit_tree_node(pgdat->node_id); + mctz = soft_limit_tree.rb_tree_per_node[pgdat->node_id]; /* * Do not even bother to check the largest node if the root -- cgit v1.2.3 From 8e88bd2dfde222285a56a54b0400942b5b33da20 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 25 Jun 2021 09:05:47 -0400 Subject: mm/memcg: Convert memcg_check_events to take a node ID memcg_check_events only uses the page's nid, so call page_to_nid in the callers to make the interface easier to understand. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Michal Hocko Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- mm/memcontrol.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 316d005566b8..a064a85d51da 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -819,7 +819,7 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg, * Check events in order. * */ -static void memcg_check_events(struct mem_cgroup *memcg, struct page *page) +static void memcg_check_events(struct mem_cgroup *memcg, int nid) { /* threshold event is triggered in finer grain than soft limit */ if (unlikely(mem_cgroup_event_ratelimit(memcg, @@ -830,7 +830,7 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page) MEM_CGROUP_TARGET_SOFTLIMIT); mem_cgroup_threshold(memcg); if (unlikely(do_softlimit)) - mem_cgroup_update_tree(memcg, page_to_nid(page)); + mem_cgroup_update_tree(memcg, nid); } } @@ -5555,7 +5555,7 @@ static int mem_cgroup_move_account(struct page *page, struct lruvec *from_vec, *to_vec; struct pglist_data *pgdat; unsigned int nr_pages = compound ? thp_nr_pages(page) : 1; - int ret; + int nid, ret; VM_BUG_ON(from == to); VM_BUG_ON_PAGE(PageLRU(page), page); @@ -5644,12 +5644,13 @@ static int mem_cgroup_move_account(struct page *page, __unlock_page_memcg(from); ret = 0; + nid = page_to_nid(page); local_irq_disable(); mem_cgroup_charge_statistics(to, nr_pages); - memcg_check_events(to, page); + memcg_check_events(to, nid); mem_cgroup_charge_statistics(from, -nr_pages); - memcg_check_events(from, page); + memcg_check_events(from, nid); local_irq_enable(); out_unlock: unlock_page(page); @@ -6671,7 +6672,7 @@ static int charge_memcg(struct page *page, struct mem_cgroup *memcg, gfp_t gfp) local_irq_disable(); mem_cgroup_charge_statistics(memcg, nr_pages); - memcg_check_events(memcg, page); + memcg_check_events(memcg, page_to_nid(page)); local_irq_enable(); out: return ret; @@ -6777,7 +6778,7 @@ struct uncharge_gather { unsigned long nr_memory; unsigned long pgpgout; unsigned long nr_kmem; - struct page *dummy_page; + int nid; }; static inline void uncharge_gather_clear(struct uncharge_gather *ug) @@ -6801,7 +6802,7 @@ static void uncharge_batch(const struct uncharge_gather *ug) local_irq_save(flags); __count_memcg_events(ug->memcg, PGPGOUT, ug->pgpgout); __this_cpu_add(ug->memcg->vmstats_percpu->nr_page_events, ug->nr_memory); - memcg_check_events(ug->memcg, ug->dummy_page); + memcg_check_events(ug->memcg, ug->nid); local_irq_restore(flags); /* drop reference from uncharge_page */ @@ -6842,7 +6843,7 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug) uncharge_gather_clear(ug); } ug->memcg = memcg; - ug->dummy_page = page; + ug->nid = page_to_nid(page); /* pairs with css_put in uncharge_batch */ css_get(&memcg->css); @@ -6954,7 +6955,7 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) local_irq_save(flags); mem_cgroup_charge_statistics(memcg, nr_pages); - memcg_check_events(memcg, newpage); + memcg_check_events(memcg, page_to_nid(newpage)); local_irq_restore(flags); } @@ -7182,7 +7183,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) */ VM_BUG_ON(!irqs_disabled()); mem_cgroup_charge_statistics(memcg, -nr_entries); - memcg_check_events(memcg, page); + memcg_check_events(memcg, page_to_nid(page)); css_put(&memcg->css); } -- cgit v1.2.3 From 1b7e4464d43a488e383843bf96ec62d12393bff1 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 28 Jun 2021 14:59:26 -0400 Subject: mm/memcg: Add folio_memcg() and related functions memcg information is only stored in the head page, so the memcg subsystem needs to assure that all accesses are to the head page. The first step is converting page_memcg() to folio_memcg(). The callers of page_memcg() and PageMemcgKmem() are not yet ready to be converted to use folios, so retain them as wrappers around folio_memcg() and folio_memcg_kmem(). They will be converted in a later patch set. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/memcontrol.h | 110 ++++++++++++++++++++++++++------------------- mm/memcontrol.c | 21 +++++---- 2 files changed, 77 insertions(+), 54 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 3096c9a0ee01..06659670db32 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -369,7 +369,7 @@ enum page_memcg_data_flags { #define MEMCG_DATA_FLAGS_MASK (__NR_MEMCG_DATA_FLAGS - 1) -static inline bool PageMemcgKmem(struct page *page); +static inline bool folio_memcg_kmem(struct folio *folio); /* * After the initialization objcg->memcg is always pointing at @@ -384,73 +384,77 @@ static inline struct mem_cgroup *obj_cgroup_memcg(struct obj_cgroup *objcg) } /* - * __page_memcg - get the memory cgroup associated with a non-kmem page - * @page: a pointer to the page struct + * __folio_memcg - Get the memory cgroup associated with a non-kmem folio + * @folio: Pointer to the folio. * - * Returns a pointer to the memory cgroup associated with the page, - * or NULL. This function assumes that the page is known to have a + * Returns a pointer to the memory cgroup associated with the folio, + * or NULL. This function assumes that the folio is known to have a * proper memory cgroup pointer. It's not safe to call this function - * against some type of pages, e.g. slab pages or ex-slab pages or - * kmem pages. + * against some type of folios, e.g. slab folios or ex-slab folios or + * kmem folios. */ -static inline struct mem_cgroup *__page_memcg(struct page *page) +static inline struct mem_cgroup *__folio_memcg(struct folio *folio) { - unsigned long memcg_data = page->memcg_data; + unsigned long memcg_data = folio->memcg_data; - VM_BUG_ON_PAGE(PageSlab(page), page); - VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page); - VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page); + VM_BUG_ON_FOLIO(folio_test_slab(folio), folio); + VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJCGS, folio); + VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_KMEM, folio); return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); } /* - * __page_objcg - get the object cgroup associated with a kmem page - * @page: a pointer to the page struct + * __folio_objcg - get the object cgroup associated with a kmem folio. + * @folio: Pointer to the folio. * - * Returns a pointer to the object cgroup associated with the page, - * or NULL. This function assumes that the page is known to have a + * Returns a pointer to the object cgroup associated with the folio, + * or NULL. This function assumes that the folio is known to have a * proper object cgroup pointer. It's not safe to call this function - * against some type of pages, e.g. slab pages or ex-slab pages or - * LRU pages. + * against some type of folios, e.g. slab folios or ex-slab folios or + * LRU folios. */ -static inline struct obj_cgroup *__page_objcg(struct page *page) +static inline struct obj_cgroup *__folio_objcg(struct folio *folio) { - unsigned long memcg_data = page->memcg_data; + unsigned long memcg_data = folio->memcg_data; - VM_BUG_ON_PAGE(PageSlab(page), page); - VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page); - VM_BUG_ON_PAGE(!(memcg_data & MEMCG_DATA_KMEM), page); + VM_BUG_ON_FOLIO(folio_test_slab(folio), folio); + VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJCGS, folio); + VM_BUG_ON_FOLIO(!(memcg_data & MEMCG_DATA_KMEM), folio); return (struct obj_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); } /* - * page_memcg - get the memory cgroup associated with a page - * @page: a pointer to the page struct + * folio_memcg - Get the memory cgroup associated with a folio. + * @folio: Pointer to the folio. * - * Returns a pointer to the memory cgroup associated with the page, - * or NULL. This function assumes that the page is known to have a + * Returns a pointer to the memory cgroup associated with the folio, + * or NULL. This function assumes that the folio is known to have a * proper memory cgroup pointer. It's not safe to call this function - * against some type of pages, e.g. slab pages or ex-slab pages. + * against some type of folios, e.g. slab folios or ex-slab folios. * - * For a non-kmem page any of the following ensures page and memcg binding + * For a non-kmem folio any of the following ensures folio and memcg binding * stability: * - * - the page lock + * - the folio lock * - LRU isolation * - lock_page_memcg() * - exclusive reference * - * For a kmem page a caller should hold an rcu read lock to protect memcg - * associated with a kmem page from being released. + * For a kmem folio a caller should hold an rcu read lock to protect memcg + * associated with a kmem folio from being released. */ +static inline struct mem_cgroup *folio_memcg(struct folio *folio) +{ + if (folio_memcg_kmem(folio)) + return obj_cgroup_memcg(__folio_objcg(folio)); + return __folio_memcg(folio); +} + static inline struct mem_cgroup *page_memcg(struct page *page) { - if (PageMemcgKmem(page)) - return obj_cgroup_memcg(__page_objcg(page)); - else - return __page_memcg(page); + return folio_memcg(page_folio(page)); } /* @@ -523,17 +527,18 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page) #ifdef CONFIG_MEMCG_KMEM /* - * PageMemcgKmem - check if the page has MemcgKmem flag set - * @page: a pointer to the page struct + * folio_memcg_kmem - Check if the folio has the memcg_kmem flag set. + * @folio: Pointer to the folio. * - * Checks if the page has MemcgKmem flag set. The caller must ensure that - * the page has an associated memory cgroup. It's not safe to call this function - * against some types of pages, e.g. slab pages. + * Checks if the folio has MemcgKmem flag set. The caller must ensure + * that the folio has an associated memory cgroup. It's not safe to call + * this function against some types of folios, e.g. slab folios. */ -static inline bool PageMemcgKmem(struct page *page) +static inline bool folio_memcg_kmem(struct folio *folio) { - VM_BUG_ON_PAGE(page->memcg_data & MEMCG_DATA_OBJCGS, page); - return page->memcg_data & MEMCG_DATA_KMEM; + VM_BUG_ON_PGFLAGS(PageTail(&folio->page), &folio->page); + VM_BUG_ON_FOLIO(folio->memcg_data & MEMCG_DATA_OBJCGS, folio); + return folio->memcg_data & MEMCG_DATA_KMEM; } /* @@ -577,7 +582,7 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page) } #else -static inline bool PageMemcgKmem(struct page *page) +static inline bool folio_memcg_kmem(struct folio *folio) { return false; } @@ -593,6 +598,11 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page) } #endif +static inline bool PageMemcgKmem(struct page *page) +{ + return folio_memcg_kmem(page_folio(page)); +} + static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) { return (memcg == root_mem_cgroup); @@ -1115,6 +1125,11 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, #define MEM_CGROUP_ID_SHIFT 0 #define MEM_CGROUP_ID_MAX 0 +static inline struct mem_cgroup *folio_memcg(struct folio *folio) +{ + return NULL; +} + static inline struct mem_cgroup *page_memcg(struct page *page) { return NULL; @@ -1131,6 +1146,11 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page) return NULL; } +static inline bool folio_memcg_kmem(struct folio *folio) +{ + return false; +} + static inline bool PageMemcgKmem(struct page *page) { return false; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a064a85d51da..1385ac6f688e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2992,15 +2992,16 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order) */ void __memcg_kmem_uncharge_page(struct page *page, int order) { + struct folio *folio = page_folio(page); struct obj_cgroup *objcg; unsigned int nr_pages = 1 << order; - if (!PageMemcgKmem(page)) + if (!folio_memcg_kmem(folio)) return; - objcg = __page_objcg(page); + objcg = __folio_objcg(folio); obj_cgroup_uncharge_pages(objcg, nr_pages); - page->memcg_data = 0; + folio->memcg_data = 0; obj_cgroup_put(objcg); } @@ -3234,17 +3235,18 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size) */ void split_page_memcg(struct page *head, unsigned int nr) { - struct mem_cgroup *memcg = page_memcg(head); + struct folio *folio = page_folio(head); + struct mem_cgroup *memcg = folio_memcg(folio); int i; if (mem_cgroup_disabled() || !memcg) return; for (i = 1; i < nr; i++) - head[i].memcg_data = head->memcg_data; + folio_page(folio, i)->memcg_data = folio->memcg_data; - if (PageMemcgKmem(head)) - obj_cgroup_get_many(__page_objcg(head), nr - 1); + if (folio_memcg_kmem(folio)) + obj_cgroup_get_many(__folio_objcg(folio), nr - 1); else css_get_many(&memcg->css, nr - 1); } @@ -6811,6 +6813,7 @@ static void uncharge_batch(const struct uncharge_gather *ug) static void uncharge_page(struct page *page, struct uncharge_gather *ug) { + struct folio *folio = page_folio(page); unsigned long nr_pages; struct mem_cgroup *memcg; struct obj_cgroup *objcg; @@ -6824,14 +6827,14 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug) * exclusive access to the page. */ if (use_objcg) { - objcg = __page_objcg(page); + objcg = __folio_objcg(folio); /* * This get matches the put at the end of the function and * kmem pages do not hold memcg references anymore. */ memcg = get_mem_cgroup_from_objcg(objcg); } else { - memcg = __page_memcg(page); + memcg = __folio_memcg(folio); } if (!memcg) -- cgit v1.2.3 From 118f2875490b027218594db9e2effb52cebc7693 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 29 Apr 2021 14:07:12 -0400 Subject: mm/memcg: Convert commit_charge() to take a folio The memcg_data is only set on the head page, so enforce that by typing it as a folio. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Michal Hocko Reviewed-by: David Howells Acked-by: Vlastimil Babka --- mm/memcontrol.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 1385ac6f688e..e352225970d2 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2711,9 +2711,9 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages) } #endif -static void commit_charge(struct page *page, struct mem_cgroup *memcg) +static void commit_charge(struct folio *folio, struct mem_cgroup *memcg) { - VM_BUG_ON_PAGE(page_memcg(page), page); + VM_BUG_ON_FOLIO(folio_memcg(folio), folio); /* * Any of the following ensures page's memcg stability: * @@ -2722,7 +2722,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg) * - lock_page_memcg() * - exclusive reference */ - page->memcg_data = (unsigned long)memcg; + folio->memcg_data = (unsigned long)memcg; } static struct mem_cgroup *get_mem_cgroup_from_objcg(struct obj_cgroup *objcg) @@ -6662,7 +6662,8 @@ void mem_cgroup_calculate_protection(struct mem_cgroup *root, static int charge_memcg(struct page *page, struct mem_cgroup *memcg, gfp_t gfp) { - unsigned int nr_pages = thp_nr_pages(page); + struct folio *folio = page_folio(page); + long nr_pages = folio_nr_pages(folio); int ret; ret = try_charge(memcg, gfp, nr_pages); @@ -6670,7 +6671,7 @@ static int charge_memcg(struct page *page, struct mem_cgroup *memcg, gfp_t gfp) goto out; css_get(&memcg->css); - commit_charge(page, memcg); + commit_charge(folio, memcg); local_irq_disable(); mem_cgroup_charge_statistics(memcg, nr_pages); @@ -6922,21 +6923,21 @@ void __mem_cgroup_uncharge_list(struct list_head *page_list) */ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) { + struct folio *newfolio = page_folio(newpage); struct mem_cgroup *memcg; - unsigned int nr_pages; + long nr_pages = folio_nr_pages(newfolio); unsigned long flags; VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage); - VM_BUG_ON_PAGE(!PageLocked(newpage), newpage); - VM_BUG_ON_PAGE(PageAnon(oldpage) != PageAnon(newpage), newpage); - VM_BUG_ON_PAGE(PageTransHuge(oldpage) != PageTransHuge(newpage), - newpage); + VM_BUG_ON_FOLIO(!folio_test_locked(newfolio), newfolio); + VM_BUG_ON_FOLIO(PageAnon(oldpage) != folio_test_anon(newfolio), newfolio); + VM_BUG_ON_FOLIO(compound_nr(oldpage) != nr_pages, newfolio); if (mem_cgroup_disabled()) return; /* Page cache replacement: new page already charged? */ - if (page_memcg(newpage)) + if (folio_memcg(newfolio)) return; memcg = page_memcg(oldpage); @@ -6945,8 +6946,6 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) return; /* Force-charge the new page. The old one will be freed soon */ - nr_pages = thp_nr_pages(newpage); - if (!mem_cgroup_is_root(memcg)) { page_counter_charge(&memcg->memory, nr_pages); if (do_memsw_account()) @@ -6954,7 +6953,7 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) } css_get(&memcg->css); - commit_charge(newpage, memcg); + commit_charge(newfolio, memcg); local_irq_save(flags); mem_cgroup_charge_statistics(memcg, nr_pages); -- cgit v1.2.3 From 8f425e4ed0eb3ef0b2d85a9efccf947ca6aa9b1c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 25 Jun 2021 09:27:04 -0400 Subject: mm/memcg: Convert mem_cgroup_charge() to take a folio Convert all callers of mem_cgroup_charge() to call page_folio() on the page they're currently passing in. Many of them will be converted to use folios themselves soon. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/memcontrol.h | 28 +++++++++++++++++++++------- kernel/events/uprobes.c | 3 ++- mm/filemap.c | 2 +- mm/huge_memory.c | 2 +- mm/khugepaged.c | 4 ++-- mm/ksm.c | 3 ++- mm/memcontrol.c | 28 +++++++--------------------- mm/memory.c | 9 +++++---- mm/migrate.c | 2 +- mm/shmem.c | 2 +- mm/userfaultfd.c | 2 +- 11 files changed, 44 insertions(+), 41 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 06659670db32..19a51729e00c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -694,14 +694,28 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg) page_counter_read(&memcg->memory); } -int __mem_cgroup_charge(struct page *page, struct mm_struct *mm, - gfp_t gfp_mask); -static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm, - gfp_t gfp_mask) +int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp); + +/** + * mem_cgroup_charge - Charge a newly allocated folio to a cgroup. + * @folio: Folio to charge. + * @mm: mm context of the allocating task. + * @gfp: Reclaim mode. + * + * Try to charge @folio to the memcg that @mm belongs to, reclaiming + * pages according to @gfp if necessary. If @mm is NULL, try to + * charge to the active memcg. + * + * Do not use this for folios allocated for swapin. + * + * Return: 0 on success. Otherwise, an error code is returned. + */ +static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, + gfp_t gfp) { if (mem_cgroup_disabled()) return 0; - return __mem_cgroup_charge(page, mm, gfp_mask); + return __mem_cgroup_charge(folio, mm, gfp); } int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm, @@ -1199,8 +1213,8 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg) return false; } -static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm, - gfp_t gfp_mask) +static inline int mem_cgroup_charge(struct folio *folio, + struct mm_struct *mm, gfp_t gfp) { return 0; } diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index af24dc3febbe..6357c3580d07 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -167,7 +167,8 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, addr + PAGE_SIZE); if (new_page) { - err = mem_cgroup_charge(new_page, vma->vm_mm, GFP_KERNEL); + err = mem_cgroup_charge(page_folio(new_page), vma->vm_mm, + GFP_KERNEL); if (err) return err; } diff --git a/mm/filemap.c b/mm/filemap.c index d74be9fb3aa2..816af226f49d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -889,7 +889,7 @@ noinline int __add_to_page_cache_locked(struct page *page, page->index = offset; if (!huge) { - error = mem_cgroup_charge(page, NULL, gfp); + error = mem_cgroup_charge(page_folio(page), NULL, gfp); if (error) goto error; charged = true; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 5e9ef0fc261e..d49986a10d83 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -603,7 +603,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf, VM_BUG_ON_PAGE(!PageCompound(page), page); - if (mem_cgroup_charge(page, vma->vm_mm, gfp)) { + if (mem_cgroup_charge(page_folio(page), vma->vm_mm, gfp)) { put_page(page); count_vm_event(THP_FAULT_FALLBACK); count_vm_event(THP_FAULT_FALLBACK_CHARGE); diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 045cc579f724..8480a3b05bcc 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1087,7 +1087,7 @@ static void collapse_huge_page(struct mm_struct *mm, goto out_nolock; } - if (unlikely(mem_cgroup_charge(new_page, mm, gfp))) { + if (unlikely(mem_cgroup_charge(page_folio(new_page), mm, gfp))) { result = SCAN_CGROUP_CHARGE_FAIL; goto out_nolock; } @@ -1658,7 +1658,7 @@ static void collapse_file(struct mm_struct *mm, goto out; } - if (unlikely(mem_cgroup_charge(new_page, mm, gfp))) { + if (unlikely(mem_cgroup_charge(page_folio(new_page), mm, gfp))) { result = SCAN_CGROUP_CHARGE_FAIL; goto out; } diff --git a/mm/ksm.c b/mm/ksm.c index a5716fdec1aa..c246a0b0ac75 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -2578,7 +2578,8 @@ struct page *ksm_might_need_to_copy(struct page *page, return page; /* let do_swap_page report the error */ new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); - if (new_page && mem_cgroup_charge(new_page, vma->vm_mm, GFP_KERNEL)) { + if (new_page && + mem_cgroup_charge(page_folio(new_page), vma->vm_mm, GFP_KERNEL)) { put_page(new_page); new_page = NULL; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e352225970d2..dbca7bf92737 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6660,9 +6660,9 @@ void mem_cgroup_calculate_protection(struct mem_cgroup *root, atomic_long_read(&parent->memory.children_low_usage))); } -static int charge_memcg(struct page *page, struct mem_cgroup *memcg, gfp_t gfp) +static int charge_memcg(struct folio *folio, struct mem_cgroup *memcg, + gfp_t gfp) { - struct folio *folio = page_folio(page); long nr_pages = folio_nr_pages(folio); int ret; @@ -6675,34 +6675,19 @@ static int charge_memcg(struct page *page, struct mem_cgroup *memcg, gfp_t gfp) local_irq_disable(); mem_cgroup_charge_statistics(memcg, nr_pages); - memcg_check_events(memcg, page_to_nid(page)); + memcg_check_events(memcg, folio_nid(folio)); local_irq_enable(); out: return ret; } -/** - * __mem_cgroup_charge - charge a newly allocated page to a cgroup - * @page: page to charge - * @mm: mm context of the victim - * @gfp_mask: reclaim mode - * - * Try to charge @page to the memcg that @mm belongs to, reclaiming - * pages according to @gfp_mask if necessary. if @mm is NULL, try to - * charge to the active memcg. - * - * Do not use this for pages allocated for swapin. - * - * Returns 0 on success. Otherwise, an error code is returned. - */ -int __mem_cgroup_charge(struct page *page, struct mm_struct *mm, - gfp_t gfp_mask) +int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp) { struct mem_cgroup *memcg; int ret; memcg = get_mem_cgroup_from_mm(mm); - ret = charge_memcg(page, memcg, gfp_mask); + ret = charge_memcg(folio, memcg, gfp); css_put(&memcg->css); return ret; @@ -6723,6 +6708,7 @@ int __mem_cgroup_charge(struct page *page, struct mm_struct *mm, int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm, gfp_t gfp, swp_entry_t entry) { + struct folio *folio = page_folio(page); struct mem_cgroup *memcg; unsigned short id; int ret; @@ -6737,7 +6723,7 @@ int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm, memcg = get_mem_cgroup_from_mm(mm); rcu_read_unlock(); - ret = charge_memcg(page, memcg, gfp); + ret = charge_memcg(folio, memcg, gfp); css_put(&memcg->css); return ret; diff --git a/mm/memory.c b/mm/memory.c index 269992ba3fa3..b67d80526bee 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -990,7 +990,7 @@ page_copy_prealloc(struct mm_struct *src_mm, struct vm_area_struct *vma, if (!new_page) return NULL; - if (mem_cgroup_charge(new_page, src_mm, GFP_KERNEL)) { + if (mem_cgroup_charge(page_folio(new_page), src_mm, GFP_KERNEL)) { put_page(new_page); return NULL; } @@ -3019,7 +3019,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) } } - if (mem_cgroup_charge(new_page, mm, GFP_KERNEL)) + if (mem_cgroup_charge(page_folio(new_page), mm, GFP_KERNEL)) goto oom_free_new; cgroup_throttle_swaprate(new_page, GFP_KERNEL); @@ -3769,7 +3769,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) if (!page) goto oom; - if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL)) + if (mem_cgroup_charge(page_folio(page), vma->vm_mm, GFP_KERNEL)) goto oom_free_page; cgroup_throttle_swaprate(page, GFP_KERNEL); @@ -4193,7 +4193,8 @@ static vm_fault_t do_cow_fault(struct vm_fault *vmf) if (!vmf->cow_page) return VM_FAULT_OOM; - if (mem_cgroup_charge(vmf->cow_page, vma->vm_mm, GFP_KERNEL)) { + if (mem_cgroup_charge(page_folio(vmf->cow_page), vma->vm_mm, + GFP_KERNEL)) { put_page(vmf->cow_page); return VM_FAULT_OOM; } diff --git a/mm/migrate.c b/mm/migrate.c index a6a7743ee98f..da55d2a8638d 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2846,7 +2846,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate, if (unlikely(anon_vma_prepare(vma))) goto abort; - if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL)) + if (mem_cgroup_charge(page_folio(page), vma->vm_mm, GFP_KERNEL)) goto abort; /* diff --git a/mm/shmem.c b/mm/shmem.c index b5860f4a2738..a2e653aeb536 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -710,7 +710,7 @@ static int shmem_add_to_page_cache(struct page *page, page->index = index; if (!PageSwapCache(page)) { - error = mem_cgroup_charge(page, charge_mm, gfp); + error = mem_cgroup_charge(page_folio(page), charge_mm, gfp); if (error) { if (PageTransHuge(page)) { count_vm_event(THP_FILE_FALLBACK); diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 7a9008415534..36e5f6ab976f 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -164,7 +164,7 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm, __SetPageUptodate(page); ret = -ENOMEM; - if (mem_cgroup_charge(page, dst_mm, GFP_KERNEL)) + if (mem_cgroup_charge(page_folio(page), dst_mm, GFP_KERNEL)) goto out_release; ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr, -- cgit v1.2.3 From c4ed6ebfcb0929d204ab7548496c0d28bd408b36 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 29 Jun 2021 21:47:12 -0400 Subject: mm/memcg: Convert uncharge_page() to uncharge_folio() Use a folio rather than a page to ensure that we're only operating on base or head pages, and not tail pages. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- mm/memcontrol.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index dbca7bf92737..64eac157db79 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6794,24 +6794,23 @@ static void uncharge_batch(const struct uncharge_gather *ug) memcg_check_events(ug->memcg, ug->nid); local_irq_restore(flags); - /* drop reference from uncharge_page */ + /* drop reference from uncharge_folio */ css_put(&ug->memcg->css); } -static void uncharge_page(struct page *page, struct uncharge_gather *ug) +static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug) { - struct folio *folio = page_folio(page); - unsigned long nr_pages; + long nr_pages; struct mem_cgroup *memcg; struct obj_cgroup *objcg; - bool use_objcg = PageMemcgKmem(page); + bool use_objcg = folio_memcg_kmem(folio); - VM_BUG_ON_PAGE(PageLRU(page), page); + VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); /* * Nobody should be changing or seriously looking at - * page memcg or objcg at this point, we have fully - * exclusive access to the page. + * folio memcg or objcg at this point, we have fully + * exclusive access to the folio. */ if (use_objcg) { objcg = __folio_objcg(folio); @@ -6833,19 +6832,19 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug) uncharge_gather_clear(ug); } ug->memcg = memcg; - ug->nid = page_to_nid(page); + ug->nid = folio_nid(folio); /* pairs with css_put in uncharge_batch */ css_get(&memcg->css); } - nr_pages = compound_nr(page); + nr_pages = folio_nr_pages(folio); if (use_objcg) { ug->nr_memory += nr_pages; ug->nr_kmem += nr_pages; - page->memcg_data = 0; + folio->memcg_data = 0; obj_cgroup_put(objcg); } else { /* LRU pages aren't accounted at the root level */ @@ -6853,7 +6852,7 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug) ug->nr_memory += nr_pages; ug->pgpgout++; - page->memcg_data = 0; + folio->memcg_data = 0; } css_put(&memcg->css); @@ -6874,7 +6873,7 @@ void __mem_cgroup_uncharge(struct page *page) return; uncharge_gather_clear(&ug); - uncharge_page(page, &ug); + uncharge_folio(page_folio(page), &ug); uncharge_batch(&ug); } @@ -6888,11 +6887,11 @@ void __mem_cgroup_uncharge(struct page *page) void __mem_cgroup_uncharge_list(struct list_head *page_list) { struct uncharge_gather ug; - struct page *page; + struct folio *folio; uncharge_gather_clear(&ug); - list_for_each_entry(page, page_list, lru) - uncharge_page(page, &ug); + list_for_each_entry(folio, page_list, lru) + uncharge_folio(folio, &ug); if (ug.memcg) uncharge_batch(&ug); } -- cgit v1.2.3 From bbc6b703b21963e909f633cf7718903ed5094319 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 1 May 2021 20:42:23 -0400 Subject: mm/memcg: Convert mem_cgroup_uncharge() to take a folio Convert all the callers to call page_folio(). Most of them were already using a head page, but a few of them I can't prove were, so this may actually fix a bug. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Mike Rapoport Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/memcontrol.h | 15 +++++++++++---- mm/filemap.c | 2 +- mm/khugepaged.c | 4 ++-- mm/memcontrol.c | 14 ++++---------- mm/memory-failure.c | 2 +- mm/memremap.c | 2 +- mm/page_alloc.c | 2 +- mm/swap.c | 2 +- 8 files changed, 22 insertions(+), 21 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 19a51729e00c..b4bc052db32b 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -722,12 +722,19 @@ int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm, gfp_t gfp, swp_entry_t entry); void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry); -void __mem_cgroup_uncharge(struct page *page); -static inline void mem_cgroup_uncharge(struct page *page) +void __mem_cgroup_uncharge(struct folio *folio); + +/** + * mem_cgroup_uncharge - Uncharge a folio. + * @folio: Folio to uncharge. + * + * Uncharge a folio previously charged with mem_cgroup_charge(). + */ +static inline void mem_cgroup_uncharge(struct folio *folio) { if (mem_cgroup_disabled()) return; - __mem_cgroup_uncharge(page); + __mem_cgroup_uncharge(folio); } void __mem_cgroup_uncharge_list(struct list_head *page_list); @@ -1229,7 +1236,7 @@ static inline void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry) { } -static inline void mem_cgroup_uncharge(struct page *page) +static inline void mem_cgroup_uncharge(struct folio *folio) { } diff --git a/mm/filemap.c b/mm/filemap.c index 816af226f49d..44fcd9d1dd65 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -940,7 +940,7 @@ unlock: if (xas_error(&xas)) { error = xas_error(&xas); if (charged) - mem_cgroup_uncharge(page); + mem_cgroup_uncharge(page_folio(page)); goto error; } diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 8480a3b05bcc..6d56e7abd2b8 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1211,7 +1211,7 @@ out_up_write: mmap_write_unlock(mm); out_nolock: if (!IS_ERR_OR_NULL(*hpage)) - mem_cgroup_uncharge(*hpage); + mem_cgroup_uncharge(page_folio(*hpage)); trace_mm_collapse_huge_page(mm, isolated, result); return; } @@ -1975,7 +1975,7 @@ xa_unlocked: out: VM_BUG_ON(!list_empty(&pagelist)); if (!IS_ERR_OR_NULL(*hpage)) - mem_cgroup_uncharge(*hpage); + mem_cgroup_uncharge(page_folio(*hpage)); /* TODO: tracepoints */ } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 64eac157db79..6321ed6d6e5a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6858,22 +6858,16 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug) css_put(&memcg->css); } -/** - * __mem_cgroup_uncharge - uncharge a page - * @page: page to uncharge - * - * Uncharge a page previously charged with __mem_cgroup_charge(). - */ -void __mem_cgroup_uncharge(struct page *page) +void __mem_cgroup_uncharge(struct folio *folio) { struct uncharge_gather ug; - /* Don't touch page->lru of any random page, pre-check: */ - if (!page_memcg(page)) + /* Don't touch folio->lru of any random page, pre-check: */ + if (!folio_memcg(folio)) return; uncharge_gather_clear(&ug); - uncharge_folio(page_folio(page), &ug); + uncharge_folio(folio, &ug); uncharge_batch(&ug); } diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 3e6449f2102a..fffe4afaff43 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -762,7 +762,7 @@ static int delete_from_lru_cache(struct page *p) * Poisoned page might never drop its ref count to 0 so we have * to uncharge it manually from its memcg. */ - mem_cgroup_uncharge(p); + mem_cgroup_uncharge(page_folio(p)); /* * drop the page count elevated by isolate_lru_page() diff --git a/mm/memremap.c b/mm/memremap.c index ed593bf87109..5a66a71ab591 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -505,7 +505,7 @@ void free_devmap_managed_page(struct page *page) __ClearPageWaiters(page); - mem_cgroup_uncharge(page); + mem_cgroup_uncharge(page_folio(page)); /* * When a device_private page is freed, the page->mapping field diff --git a/mm/page_alloc.c b/mm/page_alloc.c index b37435c274cf..869d0b06e1ef 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -724,7 +724,7 @@ static inline void free_the_page(struct page *page, unsigned int order) void free_compound_page(struct page *page) { - mem_cgroup_uncharge(page); + mem_cgroup_uncharge(page_folio(page)); free_the_page(page, compound_order(page)); } diff --git a/mm/swap.c b/mm/swap.c index 0edbcb9c8876..5679ce5bc362 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -94,7 +94,7 @@ static void __page_cache_release(struct page *page) static void __put_single_page(struct page *page) { __page_cache_release(page); - mem_cgroup_uncharge(page); + mem_cgroup_uncharge(page_folio(page)); free_unref_page(page, 0); } -- cgit v1.2.3 From d21bba2b7d0ae19dd1279e10aee61c37a17aba74 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 6 May 2021 18:14:59 -0400 Subject: mm/memcg: Convert mem_cgroup_migrate() to take folios Convert all callers of mem_cgroup_migrate() to call page_folio() first. They all look like they're using head pages already, but this proves it. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Mike Rapoport Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/memcontrol.h | 4 ++-- mm/filemap.c | 4 +++- mm/memcontrol.c | 35 +++++++++++++++++------------------ mm/migrate.c | 4 +++- mm/shmem.c | 5 ++++- 5 files changed, 29 insertions(+), 23 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index b4bc052db32b..07eda24ec581 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -745,7 +745,7 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list) __mem_cgroup_uncharge_list(page_list); } -void mem_cgroup_migrate(struct page *oldpage, struct page *newpage); +void mem_cgroup_migrate(struct folio *old, struct folio *new); /** * mem_cgroup_lruvec - get the lru list vector for a memcg & node @@ -1244,7 +1244,7 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list) { } -static inline void mem_cgroup_migrate(struct page *old, struct page *new) +static inline void mem_cgroup_migrate(struct folio *old, struct folio *new) { } diff --git a/mm/filemap.c b/mm/filemap.c index 44fcd9d1dd65..5368a4dcc35e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -835,6 +835,8 @@ EXPORT_SYMBOL(file_write_and_wait_range); */ void replace_page_cache_page(struct page *old, struct page *new) { + struct folio *fold = page_folio(old); + struct folio *fnew = page_folio(new); struct address_space *mapping = old->mapping; void (*freepage)(struct page *) = mapping->a_ops->freepage; pgoff_t offset = old->index; @@ -848,7 +850,7 @@ void replace_page_cache_page(struct page *old, struct page *new) new->mapping = mapping; new->index = offset; - mem_cgroup_migrate(old, new); + mem_cgroup_migrate(fold, fnew); xas_lock_irq(&xas); xas_store(&xas, new); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6321ed6d6e5a..c83d2f862f8a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6891,36 +6891,35 @@ void __mem_cgroup_uncharge_list(struct list_head *page_list) } /** - * mem_cgroup_migrate - charge a page's replacement - * @oldpage: currently circulating page - * @newpage: replacement page + * mem_cgroup_migrate - Charge a folio's replacement. + * @old: Currently circulating folio. + * @new: Replacement folio. * - * Charge @newpage as a replacement page for @oldpage. @oldpage will + * Charge @new as a replacement folio for @old. @old will * be uncharged upon free. * - * Both pages must be locked, @newpage->mapping must be set up. + * Both folios must be locked, @new->mapping must be set up. */ -void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) +void mem_cgroup_migrate(struct folio *old, struct folio *new) { - struct folio *newfolio = page_folio(newpage); struct mem_cgroup *memcg; - long nr_pages = folio_nr_pages(newfolio); + long nr_pages = folio_nr_pages(new); unsigned long flags; - VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage); - VM_BUG_ON_FOLIO(!folio_test_locked(newfolio), newfolio); - VM_BUG_ON_FOLIO(PageAnon(oldpage) != folio_test_anon(newfolio), newfolio); - VM_BUG_ON_FOLIO(compound_nr(oldpage) != nr_pages, newfolio); + VM_BUG_ON_FOLIO(!folio_test_locked(old), old); + VM_BUG_ON_FOLIO(!folio_test_locked(new), new); + VM_BUG_ON_FOLIO(folio_test_anon(old) != folio_test_anon(new), new); + VM_BUG_ON_FOLIO(folio_nr_pages(old) != nr_pages, new); if (mem_cgroup_disabled()) return; - /* Page cache replacement: new page already charged? */ - if (folio_memcg(newfolio)) + /* Page cache replacement: new folio already charged? */ + if (folio_memcg(new)) return; - memcg = page_memcg(oldpage); - VM_WARN_ON_ONCE_PAGE(!memcg, oldpage); + memcg = folio_memcg(old); + VM_WARN_ON_ONCE_FOLIO(!memcg, old); if (!memcg) return; @@ -6932,11 +6931,11 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) } css_get(&memcg->css); - commit_charge(newfolio, memcg); + commit_charge(new, memcg); local_irq_save(flags); mem_cgroup_charge_statistics(memcg, nr_pages); - memcg_check_events(memcg, page_to_nid(newpage)); + memcg_check_events(memcg, folio_nid(new)); local_irq_restore(flags); } diff --git a/mm/migrate.c b/mm/migrate.c index da55d2a8638d..bfb8ba490479 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -542,6 +542,8 @@ int migrate_huge_page_move_mapping(struct address_space *mapping, */ void migrate_page_states(struct page *newpage, struct page *page) { + struct folio *folio = page_folio(page); + struct folio *newfolio = page_folio(newpage); int cpupid; if (PageError(page)) @@ -609,7 +611,7 @@ void migrate_page_states(struct page *newpage, struct page *page) copy_page_owner(page, newpage); if (!PageHuge(page)) - mem_cgroup_migrate(page, newpage); + mem_cgroup_migrate(folio, newfolio); } EXPORT_SYMBOL(migrate_page_states); diff --git a/mm/shmem.c b/mm/shmem.c index a2e653aeb536..1588f33d009a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1637,6 +1637,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, struct shmem_inode_info *info, pgoff_t index) { struct page *oldpage, *newpage; + struct folio *old, *new; struct address_space *swap_mapping; swp_entry_t entry; pgoff_t swap_index; @@ -1673,7 +1674,9 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, xa_lock_irq(&swap_mapping->i_pages); error = shmem_replace_entry(swap_mapping, swap_index, oldpage, newpage); if (!error) { - mem_cgroup_migrate(oldpage, newpage); + old = page_folio(oldpage); + new = page_folio(newpage); + mem_cgroup_migrate(old, new); __inc_lruvec_page_state(newpage, NR_FILE_PAGES); __dec_lruvec_page_state(oldpage, NR_FILE_PAGES); } -- cgit v1.2.3 From 9d8053fc7a21ee2b3a540165d09418955258d9e8 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 4 May 2021 11:43:01 -0400 Subject: mm/memcg: Convert mem_cgroup_track_foreign_dirty_slowpath() to folio The page was only being used for the memcg and to gather trace information, so this is a simple conversion. The only caller of mem_cgroup_track_foreign_dirty() will be converted to folios in a later patch, so doing this now makes that patch simpler. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/memcontrol.h | 7 ++++--- include/trace/events/writeback.h | 8 ++++---- mm/memcontrol.c | 6 +++--- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 07eda24ec581..1c2776c3a223 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1599,17 +1599,18 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages, unsigned long *pheadroom, unsigned long *pdirty, unsigned long *pwriteback); -void mem_cgroup_track_foreign_dirty_slowpath(struct page *page, +void mem_cgroup_track_foreign_dirty_slowpath(struct folio *folio, struct bdi_writeback *wb); static inline void mem_cgroup_track_foreign_dirty(struct page *page, struct bdi_writeback *wb) { + struct folio *folio = page_folio(page); if (mem_cgroup_disabled()) return; - if (unlikely(&page_memcg(page)->css != wb->memcg_css)) - mem_cgroup_track_foreign_dirty_slowpath(page, wb); + if (unlikely(&folio_memcg(folio)->css != wb->memcg_css)) + mem_cgroup_track_foreign_dirty_slowpath(folio, wb); } void mem_cgroup_flush_foreign(struct bdi_writeback *wb); diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 840d1ba84cf5..297871ca0004 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -236,9 +236,9 @@ TRACE_EVENT(inode_switch_wbs, TRACE_EVENT(track_foreign_dirty, - TP_PROTO(struct page *page, struct bdi_writeback *wb), + TP_PROTO(struct folio *folio, struct bdi_writeback *wb), - TP_ARGS(page, wb), + TP_ARGS(folio, wb), TP_STRUCT__entry( __array(char, name, 32) @@ -250,7 +250,7 @@ TRACE_EVENT(track_foreign_dirty, ), TP_fast_assign( - struct address_space *mapping = page_mapping(page); + struct address_space *mapping = folio_mapping(folio); struct inode *inode = mapping ? mapping->host : NULL; strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); @@ -258,7 +258,7 @@ TRACE_EVENT(track_foreign_dirty, __entry->ino = inode ? inode->i_ino : 0; __entry->memcg_id = wb->memcg_css->id; __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); - __entry->page_cgroup_ino = cgroup_ino(page_memcg(page)->css.cgroup); + __entry->page_cgroup_ino = cgroup_ino(folio_memcg(folio)->css.cgroup); ), TP_printk("bdi %s[%llu]: ino=%lu memcg_id=%u cgroup_ino=%lu page_cgroup_ino=%lu", diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c83d2f862f8a..4a04bddefdbc 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4516,17 +4516,17 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages, * As being wrong occasionally doesn't matter, updates and accesses to the * records are lockless and racy. */ -void mem_cgroup_track_foreign_dirty_slowpath(struct page *page, +void mem_cgroup_track_foreign_dirty_slowpath(struct folio *folio, struct bdi_writeback *wb) { - struct mem_cgroup *memcg = page_memcg(page); + struct mem_cgroup *memcg = folio_memcg(folio); struct memcg_cgwb_frn *frn; u64 now = get_jiffies_64(); u64 oldest_at = now; int oldest = -1; int i; - trace_track_foreign_dirty(page, wb); + trace_track_foreign_dirty(folio, wb); /* * Pick the slot to use. If there is already a slot for @wb, keep -- cgit v1.2.3 From f70ad448741580bf61cdfbeb02229c581409760a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 28 Jun 2021 17:26:00 -0400 Subject: mm/memcg: Add folio_memcg_lock() and folio_memcg_unlock() These are the folio equivalents of lock_page_memcg() and unlock_page_memcg(). lock_page_memcg() and unlock_page_memcg() have too many callers to be easily replaced in a single patch, so reimplement them as wrappers for now to be cleaned up later when enough callers have been converted to use folios. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Mike Rapoport Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/memcontrol.h | 10 ++++++++++ mm/memcontrol.c | 45 +++++++++++++++++++++++++++++---------------- 2 files changed, 39 insertions(+), 16 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 1c2776c3a223..be85450f066f 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -978,6 +978,8 @@ void mem_cgroup_print_oom_group(struct mem_cgroup *memcg); extern bool cgroup_memory_noswap; #endif +void folio_memcg_lock(struct folio *folio); +void folio_memcg_unlock(struct folio *folio); void lock_page_memcg(struct page *page); void unlock_page_memcg(struct page *page); @@ -1397,6 +1399,14 @@ static inline void unlock_page_memcg(struct page *page) { } +static inline void folio_memcg_lock(struct folio *folio) +{ +} + +static inline void folio_memcg_unlock(struct folio *folio) +{ +} + static inline void mem_cgroup_handle_over_high(void) { } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 4a04bddefdbc..23fe124fe78d 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1933,18 +1933,17 @@ void mem_cgroup_print_oom_group(struct mem_cgroup *memcg) } /** - * lock_page_memcg - lock a page and memcg binding - * @page: the page + * folio_memcg_lock - Bind a folio to its memcg. + * @folio: The folio. * - * This function protects unlocked LRU pages from being moved to + * This function prevents unlocked LRU folios from being moved to * another cgroup. * - * It ensures lifetime of the locked memcg. Caller is responsible - * for the lifetime of the page. + * It ensures lifetime of the bound memcg. The caller is responsible + * for the lifetime of the folio. */ -void lock_page_memcg(struct page *page) +void folio_memcg_lock(struct folio *folio) { - struct page *head = compound_head(page); /* rmap on tail pages */ struct mem_cgroup *memcg; unsigned long flags; @@ -1958,7 +1957,7 @@ void lock_page_memcg(struct page *page) if (mem_cgroup_disabled()) return; again: - memcg = page_memcg(head); + memcg = folio_memcg(folio); if (unlikely(!memcg)) return; @@ -1972,7 +1971,7 @@ again: return; spin_lock_irqsave(&memcg->move_lock, flags); - if (memcg != page_memcg(head)) { + if (memcg != folio_memcg(folio)) { spin_unlock_irqrestore(&memcg->move_lock, flags); goto again; } @@ -1986,9 +1985,15 @@ again: memcg->move_lock_task = current; memcg->move_lock_flags = flags; } +EXPORT_SYMBOL(folio_memcg_lock); + +void lock_page_memcg(struct page *page) +{ + folio_memcg_lock(page_folio(page)); +} EXPORT_SYMBOL(lock_page_memcg); -static void __unlock_page_memcg(struct mem_cgroup *memcg) +static void __folio_memcg_unlock(struct mem_cgroup *memcg) { if (memcg && memcg->move_lock_task == current) { unsigned long flags = memcg->move_lock_flags; @@ -2003,14 +2008,22 @@ static void __unlock_page_memcg(struct mem_cgroup *memcg) } /** - * unlock_page_memcg - unlock a page and memcg binding - * @page: the page + * folio_memcg_unlock - Release the binding between a folio and its memcg. + * @folio: The folio. + * + * This releases the binding created by folio_memcg_lock(). This does + * not change the accounting of this folio to its memcg, but it does + * permit others to change it. */ -void unlock_page_memcg(struct page *page) +void folio_memcg_unlock(struct folio *folio) { - struct page *head = compound_head(page); + __folio_memcg_unlock(folio_memcg(folio)); +} +EXPORT_SYMBOL(folio_memcg_unlock); - __unlock_page_memcg(page_memcg(head)); +void unlock_page_memcg(struct page *page) +{ + folio_memcg_unlock(page_folio(page)); } EXPORT_SYMBOL(unlock_page_memcg); @@ -5643,7 +5656,7 @@ static int mem_cgroup_move_account(struct page *page, page->memcg_data = (unsigned long)to; - __unlock_page_memcg(from); + __folio_memcg_unlock(from); ret = 0; nid = page_to_nid(page); -- cgit v1.2.3 From fcce4672c06ad5f6eb9497607ac942490b25d3af Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 1 Mar 2021 16:34:06 -0500 Subject: mm/memcg: Convert mem_cgroup_move_account() to use a folio This saves dozens of bytes of text by eliminating a lot of calls to compound_head(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- mm/memcontrol.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 23fe124fe78d..c17681defeec 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5567,38 +5567,39 @@ static int mem_cgroup_move_account(struct page *page, struct mem_cgroup *from, struct mem_cgroup *to) { + struct folio *folio = page_folio(page); struct lruvec *from_vec, *to_vec; struct pglist_data *pgdat; - unsigned int nr_pages = compound ? thp_nr_pages(page) : 1; + unsigned int nr_pages = compound ? folio_nr_pages(folio) : 1; int nid, ret; VM_BUG_ON(from == to); - VM_BUG_ON_PAGE(PageLRU(page), page); - VM_BUG_ON(compound && !PageTransHuge(page)); + VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); + VM_BUG_ON(compound && !folio_test_multi(folio)); /* * Prevent mem_cgroup_migrate() from looking at * page's memory cgroup of its source page while we change it. */ ret = -EBUSY; - if (!trylock_page(page)) + if (!folio_trylock(folio)) goto out; ret = -EINVAL; - if (page_memcg(page) != from) + if (folio_memcg(folio) != from) goto out_unlock; - pgdat = page_pgdat(page); + pgdat = folio_pgdat(folio); from_vec = mem_cgroup_lruvec(from, pgdat); to_vec = mem_cgroup_lruvec(to, pgdat); - lock_page_memcg(page); + folio_memcg_lock(folio); - if (PageAnon(page)) { - if (page_mapped(page)) { + if (folio_test_anon(folio)) { + if (folio_mapped(folio)) { __mod_lruvec_state(from_vec, NR_ANON_MAPPED, -nr_pages); __mod_lruvec_state(to_vec, NR_ANON_MAPPED, nr_pages); - if (PageTransHuge(page)) { + if (folio_test_transhuge(folio)) { __mod_lruvec_state(from_vec, NR_ANON_THPS, -nr_pages); __mod_lruvec_state(to_vec, NR_ANON_THPS, @@ -5609,18 +5610,18 @@ static int mem_cgroup_move_account(struct page *page, __mod_lruvec_state(from_vec, NR_FILE_PAGES, -nr_pages); __mod_lruvec_state(to_vec, NR_FILE_PAGES, nr_pages); - if (PageSwapBacked(page)) { + if (folio_test_swapbacked(folio)) { __mod_lruvec_state(from_vec, NR_SHMEM, -nr_pages); __mod_lruvec_state(to_vec, NR_SHMEM, nr_pages); } - if (page_mapped(page)) { + if (folio_mapped(folio)) { __mod_lruvec_state(from_vec, NR_FILE_MAPPED, -nr_pages); __mod_lruvec_state(to_vec, NR_FILE_MAPPED, nr_pages); } - if (PageDirty(page)) { - struct address_space *mapping = page_mapping(page); + if (folio_test_dirty(folio)) { + struct address_space *mapping = folio_mapping(folio); if (mapping_can_writeback(mapping)) { __mod_lruvec_state(from_vec, NR_FILE_DIRTY, @@ -5631,7 +5632,7 @@ static int mem_cgroup_move_account(struct page *page, } } - if (PageWriteback(page)) { + if (folio_test_writeback(folio)) { __mod_lruvec_state(from_vec, NR_WRITEBACK, -nr_pages); __mod_lruvec_state(to_vec, NR_WRITEBACK, nr_pages); } @@ -5654,12 +5655,12 @@ static int mem_cgroup_move_account(struct page *page, css_get(&to->css); css_put(&from->css); - page->memcg_data = (unsigned long)to; + folio->memcg_data = (unsigned long)to; __folio_memcg_unlock(from); ret = 0; - nid = page_to_nid(page); + nid = folio_nid(folio); local_irq_disable(); mem_cgroup_charge_statistics(to, nr_pages); @@ -5668,7 +5669,7 @@ static int mem_cgroup_move_account(struct page *page, memcg_check_events(from, nid); local_irq_enable(); out_unlock: - unlock_page(page); + folio_unlock(folio); out: return ret; } -- cgit v1.2.3 From b1baabd995ab8e830dbf647fe731b51e12b8cedd Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 28 Jun 2021 20:00:28 -0400 Subject: mm/memcg: Add folio_lruvec() This replaces mem_cgroup_page_lruvec(). All callers converted. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Mike Rapoport Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/memcontrol.h | 20 +++++++++----------- mm/compaction.c | 2 +- mm/memcontrol.c | 9 ++++++--- mm/swap.c | 3 ++- mm/workingset.c | 3 ++- 5 files changed, 20 insertions(+), 17 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index be85450f066f..35577caf27d9 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -784,18 +784,17 @@ out: } /** - * mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page - * @page: the page + * folio_lruvec - return lruvec for isolating/putting an LRU folio + * @folio: Pointer to the folio. * - * This function relies on page->mem_cgroup being stable. + * This function relies on folio->mem_cgroup being stable. */ -static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page) +static inline struct lruvec *folio_lruvec(struct folio *folio) { - pg_data_t *pgdat = page_pgdat(page); - struct mem_cgroup *memcg = page_memcg(page); + struct mem_cgroup *memcg = folio_memcg(folio); - VM_WARN_ON_ONCE_PAGE(!memcg && !mem_cgroup_disabled(), page); - return mem_cgroup_lruvec(memcg, pgdat); + VM_WARN_ON_ONCE_FOLIO(!memcg && !mem_cgroup_disabled(), folio); + return mem_cgroup_lruvec(memcg, folio_pgdat(folio)); } struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); @@ -1256,10 +1255,9 @@ static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg, return &pgdat->__lruvec; } -static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page) +static inline struct lruvec *folio_lruvec(struct folio *folio) { - pg_data_t *pgdat = page_pgdat(page); - + struct pglist_data *pgdat = folio_pgdat(folio); return &pgdat->__lruvec; } diff --git a/mm/compaction.c b/mm/compaction.c index bfc93da1c2c7..37dfdf2b2287 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1022,7 +1022,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, if (!TestClearPageLRU(page)) goto isolate_fail_put; - lruvec = mem_cgroup_page_lruvec(page); + lruvec = folio_lruvec(page_folio(page)); /* If we already hold the lock, we can skip some rechecking */ if (lruvec != locked) { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c17681defeec..ea4f879d2771 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1154,9 +1154,10 @@ void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page) */ struct lruvec *lock_page_lruvec(struct page *page) { + struct folio *folio = page_folio(page); struct lruvec *lruvec; - lruvec = mem_cgroup_page_lruvec(page); + lruvec = folio_lruvec(folio); spin_lock(&lruvec->lru_lock); lruvec_memcg_debug(lruvec, page); @@ -1166,9 +1167,10 @@ struct lruvec *lock_page_lruvec(struct page *page) struct lruvec *lock_page_lruvec_irq(struct page *page) { + struct folio *folio = page_folio(page); struct lruvec *lruvec; - lruvec = mem_cgroup_page_lruvec(page); + lruvec = folio_lruvec(folio); spin_lock_irq(&lruvec->lru_lock); lruvec_memcg_debug(lruvec, page); @@ -1178,9 +1180,10 @@ struct lruvec *lock_page_lruvec_irq(struct page *page) struct lruvec *lock_page_lruvec_irqsave(struct page *page, unsigned long *flags) { + struct folio *folio = page_folio(page); struct lruvec *lruvec; - lruvec = mem_cgroup_page_lruvec(page); + lruvec = folio_lruvec(folio); spin_lock_irqsave(&lruvec->lru_lock, *flags); lruvec_memcg_debug(lruvec, page); diff --git a/mm/swap.c b/mm/swap.c index 5679ce5bc362..65a74c89e7cf 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -293,7 +293,8 @@ void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages) void lru_note_cost_page(struct page *page) { - lru_note_cost(mem_cgroup_page_lruvec(page), + struct folio *folio = page_folio(page); + lru_note_cost(folio_lruvec(folio), page_is_file_lru(page), thp_nr_pages(page)); } diff --git a/mm/workingset.c b/mm/workingset.c index d5b81e4f4cbe..3deb408a240d 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -397,6 +397,7 @@ out: */ void workingset_activation(struct page *page) { + struct folio *folio = page_folio(page); struct mem_cgroup *memcg; struct lruvec *lruvec; @@ -411,7 +412,7 @@ void workingset_activation(struct page *page) memcg = page_memcg_rcu(page); if (!mem_cgroup_disabled() && !memcg) goto out; - lruvec = mem_cgroup_page_lruvec(page); + lruvec = folio_lruvec(folio); workingset_age_nonresident(lruvec, thp_nr_pages(page)); out: rcu_read_unlock(); -- cgit v1.2.3 From e809c3fedeeb806993349e7bf797b4c2b728be7d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 28 Jun 2021 21:59:47 -0400 Subject: mm/memcg: Add folio_lruvec_lock() and similar functions These are the folio equivalents of lock_page_lruvec() and similar functions. Also convert lruvec_memcg_debug() to take a folio. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/memcontrol.h | 32 ++++++++++--------- mm/compaction.c | 2 +- mm/huge_memory.c | 5 +-- mm/memcontrol.c | 77 +++++++++++++++++++++++++++++----------------- mm/rmap.c | 2 +- mm/swap.c | 8 +++-- mm/vmscan.c | 3 +- 7 files changed, 79 insertions(+), 50 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 35577caf27d9..30d2cd7b5c9e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -801,15 +801,16 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm); -struct lruvec *lock_page_lruvec(struct page *page); -struct lruvec *lock_page_lruvec_irq(struct page *page); -struct lruvec *lock_page_lruvec_irqsave(struct page *page, +struct lruvec *folio_lruvec_lock(struct folio *folio); +struct lruvec *folio_lruvec_lock_irq(struct folio *folio); +struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio, unsigned long *flags); #ifdef CONFIG_DEBUG_VM -void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page); +void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio); #else -static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page) +static inline +void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio) { } #endif @@ -1261,7 +1262,8 @@ static inline struct lruvec *folio_lruvec(struct folio *folio) return &pgdat->__lruvec; } -static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page) +static inline +void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio) { } @@ -1291,26 +1293,26 @@ static inline void mem_cgroup_put(struct mem_cgroup *memcg) { } -static inline struct lruvec *lock_page_lruvec(struct page *page) +static inline struct lruvec *folio_lruvec_lock(struct folio *folio) { - struct pglist_data *pgdat = page_pgdat(page); + struct pglist_data *pgdat = folio_pgdat(folio); spin_lock(&pgdat->__lruvec.lru_lock); return &pgdat->__lruvec; } -static inline struct lruvec *lock_page_lruvec_irq(struct page *page) +static inline struct lruvec *folio_lruvec_lock_irq(struct folio *folio) { - struct pglist_data *pgdat = page_pgdat(page); + struct pglist_data *pgdat = folio_pgdat(folio); spin_lock_irq(&pgdat->__lruvec.lru_lock); return &pgdat->__lruvec; } -static inline struct lruvec *lock_page_lruvec_irqsave(struct page *page, +static inline struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio, unsigned long *flagsp) { - struct pglist_data *pgdat = page_pgdat(page); + struct pglist_data *pgdat = folio_pgdat(folio); spin_lock_irqsave(&pgdat->__lruvec.lru_lock, *flagsp); return &pgdat->__lruvec; @@ -1576,6 +1578,7 @@ static inline bool page_matches_lruvec(struct page *page, struct lruvec *lruvec) static inline struct lruvec *relock_page_lruvec_irq(struct page *page, struct lruvec *locked_lruvec) { + struct folio *folio = page_folio(page); if (locked_lruvec) { if (page_matches_lruvec(page, locked_lruvec)) return locked_lruvec; @@ -1583,13 +1586,14 @@ static inline struct lruvec *relock_page_lruvec_irq(struct page *page, unlock_page_lruvec_irq(locked_lruvec); } - return lock_page_lruvec_irq(page); + return folio_lruvec_lock_irq(folio); } /* Don't lock again iff page's lruvec locked */ static inline struct lruvec *relock_page_lruvec_irqsave(struct page *page, struct lruvec *locked_lruvec, unsigned long *flags) { + struct folio *folio = page_folio(page); if (locked_lruvec) { if (page_matches_lruvec(page, locked_lruvec)) return locked_lruvec; @@ -1597,7 +1601,7 @@ static inline struct lruvec *relock_page_lruvec_irqsave(struct page *page, unlock_page_lruvec_irqrestore(locked_lruvec, *flags); } - return lock_page_lruvec_irqsave(page, flags); + return folio_lruvec_lock_irqsave(folio, flags); } #ifdef CONFIG_CGROUP_WRITEBACK diff --git a/mm/compaction.c b/mm/compaction.c index 37dfdf2b2287..fbc60f964c38 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1032,7 +1032,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, compact_lock_irqsave(&lruvec->lru_lock, &flags, cc); locked = lruvec; - lruvec_memcg_debug(lruvec, page); + lruvec_memcg_debug(lruvec, page_folio(page)); /* Try get exclusive access under lock */ if (!skip_updated) { diff --git a/mm/huge_memory.c b/mm/huge_memory.c index d49986a10d83..e5ea5f775d5c 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2405,7 +2405,8 @@ static void __split_huge_page_tail(struct page *head, int tail, static void __split_huge_page(struct page *page, struct list_head *list, pgoff_t end) { - struct page *head = compound_head(page); + struct folio *folio = page_folio(page); + struct page *head = &folio->page; struct lruvec *lruvec; struct address_space *swap_cache = NULL; unsigned long offset = 0; @@ -2424,7 +2425,7 @@ static void __split_huge_page(struct page *page, struct list_head *list, } /* lock lru list/PageCompound, ref frozen by page_ref_freeze */ - lruvec = lock_page_lruvec(head); + lruvec = folio_lruvec_lock(folio); for (i = nr - 1; i >= 1; i--) { __split_huge_page_tail(head, i, lruvec, list); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ea4f879d2771..8dab23a71fc4 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1126,67 +1126,88 @@ int mem_cgroup_scan_tasks(struct mem_cgroup *memcg, } #ifdef CONFIG_DEBUG_VM -void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page) +void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio) { struct mem_cgroup *memcg; if (mem_cgroup_disabled()) return; - memcg = page_memcg(page); + memcg = folio_memcg(folio); if (!memcg) - VM_BUG_ON_PAGE(lruvec_memcg(lruvec) != root_mem_cgroup, page); + VM_BUG_ON_FOLIO(lruvec_memcg(lruvec) != root_mem_cgroup, folio); else - VM_BUG_ON_PAGE(lruvec_memcg(lruvec) != memcg, page); + VM_BUG_ON_FOLIO(lruvec_memcg(lruvec) != memcg, folio); } #endif /** - * lock_page_lruvec - lock and return lruvec for a given page. - * @page: the page + * folio_lruvec_lock - Lock the lruvec for a folio. + * @folio: Pointer to the folio. * * These functions are safe to use under any of the following conditions: - * - page locked - * - PageLRU cleared - * - lock_page_memcg() - * - page->_refcount is zero + * - folio locked + * - folio_test_lru false + * - folio_memcg_lock() + * - folio frozen (refcount of 0) + * + * Return: The lruvec this folio is on with its lock held. */ -struct lruvec *lock_page_lruvec(struct page *page) +struct lruvec *folio_lruvec_lock(struct folio *folio) { - struct folio *folio = page_folio(page); - struct lruvec *lruvec; + struct lruvec *lruvec = folio_lruvec(folio); - lruvec = folio_lruvec(folio); spin_lock(&lruvec->lru_lock); - - lruvec_memcg_debug(lruvec, page); + lruvec_memcg_debug(lruvec, folio); return lruvec; } -struct lruvec *lock_page_lruvec_irq(struct page *page) +/** + * folio_lruvec_lock_irq - Lock the lruvec for a folio. + * @folio: Pointer to the folio. + * + * These functions are safe to use under any of the following conditions: + * - folio locked + * - folio_test_lru false + * - folio_memcg_lock() + * - folio frozen (refcount of 0) + * + * Return: The lruvec this folio is on with its lock held and interrupts + * disabled. + */ +struct lruvec *folio_lruvec_lock_irq(struct folio *folio) { - struct folio *folio = page_folio(page); - struct lruvec *lruvec; + struct lruvec *lruvec = folio_lruvec(folio); - lruvec = folio_lruvec(folio); spin_lock_irq(&lruvec->lru_lock); - - lruvec_memcg_debug(lruvec, page); + lruvec_memcg_debug(lruvec, folio); return lruvec; } -struct lruvec *lock_page_lruvec_irqsave(struct page *page, unsigned long *flags) +/** + * folio_lruvec_lock_irqsave - Lock the lruvec for a folio. + * @folio: Pointer to the folio. + * @flags: Pointer to irqsave flags. + * + * These functions are safe to use under any of the following conditions: + * - folio locked + * - folio_test_lru false + * - folio_memcg_lock() + * - folio frozen (refcount of 0) + * + * Return: The lruvec this folio is on with its lock held and interrupts + * disabled. + */ +struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio, + unsigned long *flags) { - struct folio *folio = page_folio(page); - struct lruvec *lruvec; + struct lruvec *lruvec = folio_lruvec(folio); - lruvec = folio_lruvec(folio); spin_lock_irqsave(&lruvec->lru_lock, *flags); - - lruvec_memcg_debug(lruvec, page); + lruvec_memcg_debug(lruvec, folio); return lruvec; } diff --git a/mm/rmap.c b/mm/rmap.c index 6aebd1747251..059556dbefec 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -34,7 +34,7 @@ * mapping->private_lock (in __set_page_dirty_buffers) * lock_page_memcg move_lock (in __set_page_dirty_buffers) * i_pages lock (widely used) - * lruvec->lru_lock (in lock_page_lruvec_irq) + * lruvec->lru_lock (in folio_lruvec_lock_irq) * inode->i_lock (in set_page_dirty's __mark_inode_dirty) * bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty) * sb_lock (within inode_lock in fs/fs-writeback.c) diff --git a/mm/swap.c b/mm/swap.c index 65a74c89e7cf..d1fc964def12 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -80,10 +80,11 @@ static DEFINE_PER_CPU(struct lru_pvecs, lru_pvecs) = { static void __page_cache_release(struct page *page) { if (PageLRU(page)) { + struct folio *folio = page_folio(page); struct lruvec *lruvec; unsigned long flags; - lruvec = lock_page_lruvec_irqsave(page, &flags); + lruvec = folio_lruvec_lock_irqsave(folio, &flags); del_page_from_lru_list(page, lruvec); __clear_page_lru_flags(page); unlock_page_lruvec_irqrestore(lruvec, flags); @@ -350,11 +351,12 @@ static inline void activate_page_drain(int cpu) static void activate_page(struct page *page) { + struct folio *folio = page_folio(page); struct lruvec *lruvec; - page = compound_head(page); + page = &folio->page; if (TestClearPageLRU(page)) { - lruvec = lock_page_lruvec_irq(page); + lruvec = folio_lruvec_lock_irq(folio); __activate_page(page, lruvec); unlock_page_lruvec_irq(lruvec); SetPageLRU(page); diff --git a/mm/vmscan.c b/mm/vmscan.c index 74296c2d1fed..8694e1549bcd 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2090,6 +2090,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, */ int isolate_lru_page(struct page *page) { + struct folio *folio = page_folio(page); int ret = -EBUSY; VM_BUG_ON_PAGE(!page_count(page), page); @@ -2099,7 +2100,7 @@ int isolate_lru_page(struct page *page) struct lruvec *lruvec; get_page(page); - lruvec = lock_page_lruvec_irq(page); + lruvec = folio_lruvec_lock_irq(folio); del_page_from_lru_list(page, lruvec); unlock_page_lruvec_irq(lruvec); ret = 0; -- cgit v1.2.3 From 0de340cbed3359423e38ed49242ac9d6986b5cfd Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 29 Jun 2021 22:27:31 -0400 Subject: mm/memcg: Add folio_lruvec_relock_irq() and folio_lruvec_relock_irqsave() These are the folio equivalents of relock_page_lruvec_irq() and folio_lruvec_relock_irqsave(). Also convert page_matches_lruvec() to folio_matches_lruvec(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/memcontrol.h | 17 ++++++++--------- mm/mlock.c | 3 ++- mm/swap.c | 13 ++++++++----- mm/vmscan.c | 5 +++-- 4 files changed, 21 insertions(+), 17 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 30d2cd7b5c9e..05094eaf1d61 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1568,19 +1568,19 @@ static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec, } /* Test requires a stable page->memcg binding, see page_memcg() */ -static inline bool page_matches_lruvec(struct page *page, struct lruvec *lruvec) +static inline bool folio_matches_lruvec(struct folio *folio, + struct lruvec *lruvec) { - return lruvec_pgdat(lruvec) == page_pgdat(page) && - lruvec_memcg(lruvec) == page_memcg(page); + return lruvec_pgdat(lruvec) == folio_pgdat(folio) && + lruvec_memcg(lruvec) == folio_memcg(folio); } /* Don't lock again iff page's lruvec locked */ -static inline struct lruvec *relock_page_lruvec_irq(struct page *page, +static inline struct lruvec *folio_lruvec_relock_irq(struct folio *folio, struct lruvec *locked_lruvec) { - struct folio *folio = page_folio(page); if (locked_lruvec) { - if (page_matches_lruvec(page, locked_lruvec)) + if (folio_matches_lruvec(folio, locked_lruvec)) return locked_lruvec; unlock_page_lruvec_irq(locked_lruvec); @@ -1590,12 +1590,11 @@ static inline struct lruvec *relock_page_lruvec_irq(struct page *page, } /* Don't lock again iff page's lruvec locked */ -static inline struct lruvec *relock_page_lruvec_irqsave(struct page *page, +static inline struct lruvec *folio_lruvec_relock_irqsave(struct folio *folio, struct lruvec *locked_lruvec, unsigned long *flags) { - struct folio *folio = page_folio(page); if (locked_lruvec) { - if (page_matches_lruvec(page, locked_lruvec)) + if (folio_matches_lruvec(folio, locked_lruvec)) return locked_lruvec; unlock_page_lruvec_irqrestore(locked_lruvec, *flags); diff --git a/mm/mlock.c b/mm/mlock.c index 16d2ee160d43..e263d62ae2d0 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -271,6 +271,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) /* Phase 1: page isolation */ for (i = 0; i < nr; i++) { struct page *page = pvec->pages[i]; + struct folio *folio = page_folio(page); if (TestClearPageMlocked(page)) { /* @@ -278,7 +279,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) * so we can spare the get_page() here. */ if (TestClearPageLRU(page)) { - lruvec = relock_page_lruvec_irq(page, lruvec); + lruvec = folio_lruvec_relock_irq(folio, lruvec); del_page_from_lru_list(page, lruvec); continue; } else diff --git a/mm/swap.c b/mm/swap.c index d1fc964def12..57791ae80f2e 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -189,12 +189,13 @@ static void pagevec_lru_move_fn(struct pagevec *pvec, for (i = 0; i < pagevec_count(pvec); i++) { struct page *page = pvec->pages[i]; + struct folio *folio = page_folio(page); /* block memcg migration during page moving between lru */ if (!TestClearPageLRU(page)) continue; - lruvec = relock_page_lruvec_irqsave(page, lruvec, &flags); + lruvec = folio_lruvec_relock_irqsave(folio, lruvec, &flags); (*move_fn)(page, lruvec); SetPageLRU(page); @@ -893,11 +894,12 @@ void release_pages(struct page **pages, int nr) int i; LIST_HEAD(pages_to_free); struct lruvec *lruvec = NULL; - unsigned long flags; + unsigned long flags = 0; unsigned int lock_batch; for (i = 0; i < nr; i++) { struct page *page = pages[i]; + struct folio *folio = page_folio(page); /* * Make sure the IRQ-safe lock-holding time does not get @@ -909,7 +911,7 @@ void release_pages(struct page **pages, int nr) lruvec = NULL; } - page = compound_head(page); + page = &folio->page; if (is_huge_zero_page(page)) continue; @@ -948,7 +950,7 @@ void release_pages(struct page **pages, int nr) if (PageLRU(page)) { struct lruvec *prev_lruvec = lruvec; - lruvec = relock_page_lruvec_irqsave(page, lruvec, + lruvec = folio_lruvec_relock_irqsave(folio, lruvec, &flags); if (prev_lruvec != lruvec) lock_batch = 0; @@ -1052,8 +1054,9 @@ void __pagevec_lru_add(struct pagevec *pvec) for (i = 0; i < pagevec_count(pvec); i++) { struct page *page = pvec->pages[i]; + struct folio *folio = page_folio(page); - lruvec = relock_page_lruvec_irqsave(page, lruvec, &flags); + lruvec = folio_lruvec_relock_irqsave(folio, lruvec, &flags); __pagevec_lru_add_fn(page, lruvec); } if (lruvec) diff --git a/mm/vmscan.c b/mm/vmscan.c index 8694e1549bcd..306229c4313f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2200,7 +2200,7 @@ static unsigned int move_pages_to_lru(struct lruvec *lruvec, * All pages were isolated from the same lruvec (and isolation * inhibits memcg migration). */ - VM_BUG_ON_PAGE(!page_matches_lruvec(page, lruvec), page); + VM_BUG_ON_PAGE(!folio_matches_lruvec(page_folio(page), lruvec), page); add_page_to_lru_list(page, lruvec); nr_pages = thp_nr_pages(page); nr_moved += nr_pages; @@ -4666,6 +4666,7 @@ void check_move_unevictable_pages(struct pagevec *pvec) for (i = 0; i < pvec->nr; i++) { struct page *page = pvec->pages[i]; + struct folio *folio = page_folio(page); int nr_pages; if (PageTransTail(page)) @@ -4678,7 +4679,7 @@ void check_move_unevictable_pages(struct pagevec *pvec) if (!TestClearPageLRU(page)) continue; - lruvec = relock_page_lruvec_irq(page, lruvec); + lruvec = folio_lruvec_relock_irq(folio, lruvec); if (page_evictable(page) && PageUnevictable(page)) { del_page_from_lru_list(page, lruvec); ClearPageUnevictable(page); -- cgit v1.2.3 From c5ce619a77ce00d537ef512e7a823c99ce890a40 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 4 May 2021 17:19:13 -0400 Subject: mm/workingset: Convert workingset_activation to take a folio This function already assumed it was being passed a head page. No real change here, except that thp_nr_pages() compiles away on kernels with THP compiled out while folio_nr_pages() is always present. Also convert page_memcg_rcu() to folio_memcg_rcu(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/memcontrol.h | 22 ++++++++++++---------- include/linux/swap.h | 2 +- mm/swap.c | 2 +- mm/workingset.c | 11 ++++------- 4 files changed, 18 insertions(+), 19 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 05094eaf1d61..7bd78c13d1fa 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -457,20 +457,22 @@ static inline struct mem_cgroup *page_memcg(struct page *page) return folio_memcg(page_folio(page)); } -/* - * page_memcg_rcu - locklessly get the memory cgroup associated with a page - * @page: a pointer to the page struct +/** + * folio_memcg_rcu - Locklessly get the memory cgroup associated with a folio. + * @folio: Pointer to the folio. * - * Returns a pointer to the memory cgroup associated with the page, - * or NULL. This function assumes that the page is known to have a + * This function assumes that the folio is known to have a * proper memory cgroup pointer. It's not safe to call this function - * against some type of pages, e.g. slab pages or ex-slab pages. + * against some type of folios, e.g. slab folios or ex-slab folios. + * + * Return: A pointer to the memory cgroup associated with the folio, + * or NULL. */ -static inline struct mem_cgroup *page_memcg_rcu(struct page *page) +static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio) { - unsigned long memcg_data = READ_ONCE(page->memcg_data); + unsigned long memcg_data = READ_ONCE(folio->memcg_data); - VM_BUG_ON_PAGE(PageSlab(page), page); + VM_BUG_ON_FOLIO(folio_test_slab(folio), folio); WARN_ON_ONCE(!rcu_read_lock_held()); if (memcg_data & MEMCG_DATA_KMEM) { @@ -1158,7 +1160,7 @@ static inline struct mem_cgroup *page_memcg(struct page *page) return NULL; } -static inline struct mem_cgroup *page_memcg_rcu(struct page *page) +static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio) { WARN_ON_ONCE(!rcu_read_lock_held()); return NULL; diff --git a/include/linux/swap.h b/include/linux/swap.h index c7ecd3ad8e2e..0fc84797623f 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -330,7 +330,7 @@ static inline swp_entry_t folio_swap_entry(struct folio *folio) void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages); void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg); void workingset_refault(struct page *page, void *shadow); -void workingset_activation(struct page *page); +void workingset_activation(struct folio *folio); /* Only track the nodes of mappings with shadow entries */ void workingset_update_node(struct xa_node *node); diff --git a/mm/swap.c b/mm/swap.c index 57791ae80f2e..5c688897c013 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -429,7 +429,7 @@ void mark_page_accessed(struct page *page) else __lru_cache_activate_page(page); ClearPageReferenced(page); - workingset_activation(page); + workingset_activation(page_folio(page)); } if (page_is_idle(page)) clear_page_idle(page); diff --git a/mm/workingset.c b/mm/workingset.c index 3deb408a240d..1c96ed525a0e 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -393,13 +393,11 @@ out: /** * workingset_activation - note a page activation - * @page: page that is being activated + * @folio: Folio that is being activated. */ -void workingset_activation(struct page *page) +void workingset_activation(struct folio *folio) { - struct folio *folio = page_folio(page); struct mem_cgroup *memcg; - struct lruvec *lruvec; rcu_read_lock(); /* @@ -409,11 +407,10 @@ void workingset_activation(struct page *page) * XXX: See workingset_refault() - this should return * root_mem_cgroup even for !CONFIG_MEMCG. */ - memcg = page_memcg_rcu(page); + memcg = folio_memcg_rcu(folio); if (!mem_cgroup_disabled() && !memcg) goto out; - lruvec = folio_lruvec(folio); - workingset_age_nonresident(lruvec, thp_nr_pages(page)); + workingset_age_nonresident(folio_lruvec(folio), folio_nr_pages(folio)); out: rcu_read_unlock(); } -- cgit v1.2.3 From bf6bd276b374d44f6e7146d52aa6097eb91384a3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 7 May 2021 10:55:27 -0400 Subject: mm: Add folio_pfn() This is the folio equivalent of page_to_pfn(). Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Mike Rapoport Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/mm.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index 04e41d4d85ea..47143f3e7f0a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1624,6 +1624,20 @@ static inline unsigned long page_to_section(const struct page *page) } #endif +/** + * folio_pfn - Return the Page Frame Number of a folio. + * @folio: The folio. + * + * A folio may contain multiple pages. The pages have consecutive + * Page Frame Numbers. + * + * Return: The Page Frame Number of the first page in the folio. + */ +static inline unsigned long folio_pfn(struct folio *folio) +{ + return page_to_pfn(&folio->page); +} + /* MIGRATE_CMA and ZONE_MOVABLE do not allow pin pages */ #ifdef CONFIG_MIGRATION static inline bool is_pinnable_page(struct page *page) -- cgit v1.2.3 From 646010009d3541b8cb4f803dcb4b8d0da2f22579 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 7 May 2021 11:17:34 -0400 Subject: mm: Add folio_raw_mapping() Convert __page_rmapping to folio_raw_mapping and move it to mm/internal.h. It's only a couple of instructions (load and mask), so it's definitely going to be cheaper to inline it than call it. Leave page_rmapping out of line. Change page_anon_vma() to not call folio_raw_mapping() -- it's more efficient to do the subtraction than the mask. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Howells Acked-by: Vlastimil Babka --- mm/internal.h | 7 +++++++ mm/util.c | 20 ++++---------------- 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index 1a84484f8650..187a032fed4d 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -34,6 +34,13 @@ void page_writeback_init(void); +static inline void *folio_raw_mapping(struct folio *folio) +{ + unsigned long mapping = (unsigned long)folio->mapping; + + return (void *)(mapping & ~PAGE_MAPPING_FLAGS); +} + vm_fault_t do_swap_page(struct vm_fault *vmf); void folio_rotate_reclaimable(struct folio *folio); diff --git a/mm/util.c b/mm/util.c index e322a42090e5..96a5e7185f65 100644 --- a/mm/util.c +++ b/mm/util.c @@ -654,21 +654,10 @@ void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags) } EXPORT_SYMBOL(kvrealloc); -static inline void *__page_rmapping(struct page *page) -{ - unsigned long mapping; - - mapping = (unsigned long)page->mapping; - mapping &= ~PAGE_MAPPING_FLAGS; - - return (void *)mapping; -} - /* Neutral page->mapping pointer to address_space or anon_vma or other */ void *page_rmapping(struct page *page) { - page = compound_head(page); - return __page_rmapping(page); + return folio_raw_mapping(page_folio(page)); } /** @@ -699,13 +688,12 @@ EXPORT_SYMBOL(folio_mapped); struct anon_vma *page_anon_vma(struct page *page) { - unsigned long mapping; + struct folio *folio = page_folio(page); + unsigned long mapping = (unsigned long)folio->mapping; - page = compound_head(page); - mapping = (unsigned long)page->mapping; if ((mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON) return NULL; - return __page_rmapping(page); + return (void *)(mapping - PAGE_MAPPING_ANON); } /** -- cgit v1.2.3 From 08b0b0059bf1c2e8637f724cc7cc4d29b1e808de Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 16 Dec 2020 11:06:33 -0500 Subject: mm: Add flush_dcache_folio() This is a default implementation which calls flush_dcache_page() on each page in the folio. If architectures can do better, they should implement their own version of it. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Vlastimil Babka --- Documentation/core-api/cachetlb.rst | 6 ++++++ arch/arc/include/asm/cacheflush.h | 1 + arch/arm/include/asm/cacheflush.h | 1 + arch/m68k/include/asm/cacheflush_mm.h | 1 + arch/mips/include/asm/cacheflush.h | 2 ++ arch/nds32/include/asm/cacheflush.h | 1 + arch/nios2/include/asm/cacheflush.h | 3 ++- arch/parisc/include/asm/cacheflush.h | 3 ++- arch/sh/include/asm/cacheflush.h | 3 ++- arch/xtensa/include/asm/cacheflush.h | 5 ++++- include/asm-generic/cacheflush.h | 6 ++++++ mm/util.c | 11 +++++++++++ 12 files changed, 39 insertions(+), 4 deletions(-) diff --git a/Documentation/core-api/cachetlb.rst b/Documentation/core-api/cachetlb.rst index 8aed9103e48a..5c0552e78c58 100644 --- a/Documentation/core-api/cachetlb.rst +++ b/Documentation/core-api/cachetlb.rst @@ -326,6 +326,12 @@ maps this page at its virtual address. dirty. Again, see sparc64 for examples of how to deal with this. + ``void flush_dcache_folio(struct folio *folio)`` + This function is called under the same circumstances as + flush_dcache_page(). It allows the architecture to + optimise for flushing the entire folio of pages instead + of flushing one page at a time. + ``void copy_to_user_page(struct vm_area_struct *vma, struct page *page, unsigned long user_vaddr, void *dst, void *src, int len)`` ``void copy_from_user_page(struct vm_area_struct *vma, struct page *page, diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h index e201b4b1655a..e8c2c7469e10 100644 --- a/arch/arc/include/asm/cacheflush.h +++ b/arch/arc/include/asm/cacheflush.h @@ -36,6 +36,7 @@ void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr); #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 void flush_dcache_page(struct page *page); +void flush_dcache_folio(struct folio *folio); void dma_cache_wback_inv(phys_addr_t start, unsigned long sz); void dma_cache_inv(phys_addr_t start, unsigned long sz); diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 5e56288e343b..e68fb879e4f9 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -290,6 +290,7 @@ extern void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr */ #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *); +void flush_dcache_folio(struct folio *folio); #define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1 static inline void flush_kernel_vmap_range(void *addr, int size) diff --git a/arch/m68k/include/asm/cacheflush_mm.h b/arch/m68k/include/asm/cacheflush_mm.h index 1ac55e7b47f0..8ab46625ddd3 100644 --- a/arch/m68k/include/asm/cacheflush_mm.h +++ b/arch/m68k/include/asm/cacheflush_mm.h @@ -250,6 +250,7 @@ static inline void __flush_page_to_ram(void *vaddr) #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 #define flush_dcache_page(page) __flush_page_to_ram(page_address(page)) +void flush_dcache_folio(struct folio *folio); #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) #define flush_icache_page(vma, page) __flush_page_to_ram(page_address(page)) diff --git a/arch/mips/include/asm/cacheflush.h b/arch/mips/include/asm/cacheflush.h index b3dc9c589442..f207388541d5 100644 --- a/arch/mips/include/asm/cacheflush.h +++ b/arch/mips/include/asm/cacheflush.h @@ -61,6 +61,8 @@ static inline void flush_dcache_page(struct page *page) SetPageDcacheDirty(page); } +void flush_dcache_folio(struct folio *folio); + #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) diff --git a/arch/nds32/include/asm/cacheflush.h b/arch/nds32/include/asm/cacheflush.h index c2a222ebfa2a..3fc0bb7d6487 100644 --- a/arch/nds32/include/asm/cacheflush.h +++ b/arch/nds32/include/asm/cacheflush.h @@ -27,6 +27,7 @@ void flush_cache_vunmap(unsigned long start, unsigned long end); #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 void flush_dcache_page(struct page *page); +void flush_dcache_folio(struct folio *folio); void copy_to_user_page(struct vm_area_struct *vma, struct page *page, unsigned long vaddr, void *dst, void *src, int len); void copy_from_user_page(struct vm_area_struct *vma, struct page *page, diff --git a/arch/nios2/include/asm/cacheflush.h b/arch/nios2/include/asm/cacheflush.h index 18eb9f69f806..1999561b22aa 100644 --- a/arch/nios2/include/asm/cacheflush.h +++ b/arch/nios2/include/asm/cacheflush.h @@ -28,7 +28,8 @@ extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, extern void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn); #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 -extern void flush_dcache_page(struct page *page); +void flush_dcache_page(struct page *page); +void flush_dcache_folio(struct folio *folio); extern void flush_icache_range(unsigned long start, unsigned long end); extern void flush_icache_page(struct vm_area_struct *vma, struct page *page); diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index eef0096db5f8..da0cd4b3a28f 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h @@ -49,7 +49,8 @@ void invalidate_kernel_vmap_range(void *vaddr, int size); #define flush_cache_vunmap(start, end) flush_cache_all() #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 -extern void flush_dcache_page(struct page *page); +void flush_dcache_page(struct page *page); +void flush_dcache_folio(struct folio *folio); #define flush_dcache_mmap_lock(mapping) xa_lock_irq(&mapping->i_pages) #define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages) diff --git a/arch/sh/include/asm/cacheflush.h b/arch/sh/include/asm/cacheflush.h index 372afa82fee6..c7a97f32432f 100644 --- a/arch/sh/include/asm/cacheflush.h +++ b/arch/sh/include/asm/cacheflush.h @@ -42,7 +42,8 @@ extern void flush_cache_page(struct vm_area_struct *vma, extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 -extern void flush_dcache_page(struct page *page); +void flush_dcache_page(struct page *page); +void flush_dcache_folio(struct folio *folio); extern void flush_icache_range(unsigned long start, unsigned long end); #define flush_icache_user_range flush_icache_range extern void flush_icache_page(struct vm_area_struct *vma, diff --git a/arch/xtensa/include/asm/cacheflush.h b/arch/xtensa/include/asm/cacheflush.h index cf907e5bf2f2..a8a041609c5d 100644 --- a/arch/xtensa/include/asm/cacheflush.h +++ b/arch/xtensa/include/asm/cacheflush.h @@ -120,7 +120,8 @@ void flush_cache_page(struct vm_area_struct*, #define flush_cache_vunmap(start,end) flush_cache_all() #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 -extern void flush_dcache_page(struct page*); +void flush_dcache_page(struct page *); +void flush_dcache_folio(struct folio *); void local_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); @@ -137,7 +138,9 @@ void local_flush_cache_page(struct vm_area_struct *vma, #define flush_cache_vunmap(start,end) do { } while (0) #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO #define flush_dcache_page(page) do { } while (0) +static inline void flush_dcache_folio(struct folio *folio) { } #define flush_icache_range local_flush_icache_range #define flush_cache_page(vma, addr, pfn) do { } while (0) diff --git a/include/asm-generic/cacheflush.h b/include/asm-generic/cacheflush.h index 4a674db4e1fa..fedc0dfa4877 100644 --- a/include/asm-generic/cacheflush.h +++ b/include/asm-generic/cacheflush.h @@ -49,9 +49,15 @@ static inline void flush_cache_page(struct vm_area_struct *vma, static inline void flush_dcache_page(struct page *page) { } + +static inline void flush_dcache_folio(struct folio *folio) { } #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO #endif +#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO +void flush_dcache_folio(struct folio *folio); +#endif #ifndef flush_dcache_mmap_lock static inline void flush_dcache_mmap_lock(struct address_space *mapping) diff --git a/mm/util.c b/mm/util.c index 96a5e7185f65..bf860838282c 100644 --- a/mm/util.c +++ b/mm/util.c @@ -1076,3 +1076,14 @@ void page_offline_end(void) up_write(&page_offline_rwsem); } EXPORT_SYMBOL(page_offline_end); + +#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO +void flush_dcache_folio(struct folio *folio) +{ + long i, nr = folio_nr_pages(folio); + + for (i = 0; i < nr; i++) + flush_dcache_page(folio_page(folio, i)); +} +EXPORT_SYMBOL(flush_dcache_folio); +#endif -- cgit v1.2.3 From 53c36de0701f2fdda6b1d209dc89e2bad4a9c7b8 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 30 Dec 2020 10:21:39 -0500 Subject: mm: Add kmap_local_folio() This allows us to map a portion of a folio. Callers can only expect to access up to the next page boundary. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Vlastimil Babka --- include/linux/highmem-internal.h | 11 +++++++++++ include/linux/highmem.h | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h index 4aa1031d3e4c..0a0b2b09b1b8 100644 --- a/include/linux/highmem-internal.h +++ b/include/linux/highmem-internal.h @@ -73,6 +73,12 @@ static inline void *kmap_local_page(struct page *page) return __kmap_local_page_prot(page, kmap_prot); } +static inline void *kmap_local_folio(struct folio *folio, size_t offset) +{ + struct page *page = folio_page(folio, offset / PAGE_SIZE); + return __kmap_local_page_prot(page, kmap_prot) + offset % PAGE_SIZE; +} + static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot) { return __kmap_local_page_prot(page, prot); @@ -171,6 +177,11 @@ static inline void *kmap_local_page(struct page *page) return page_address(page); } +static inline void *kmap_local_folio(struct folio *folio, size_t offset) +{ + return page_address(&folio->page) + offset; +} + static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot) { return kmap_local_page(page); diff --git a/include/linux/highmem.h b/include/linux/highmem.h index b4c49f9cc379..27cdd715c5f9 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -96,6 +96,43 @@ static inline void kmap_flush_unused(void); */ static inline void *kmap_local_page(struct page *page); +/** + * kmap_local_folio - Map a page in this folio for temporary usage + * @folio: The folio containing the page. + * @offset: The byte offset within the folio which identifies the page. + * + * Requires careful handling when nesting multiple mappings because the map + * management is stack based. The unmap has to be in the reverse order of + * the map operation:: + * + * addr1 = kmap_local_folio(folio1, offset1); + * addr2 = kmap_local_folio(folio2, offset2); + * ... + * kunmap_local(addr2); + * kunmap_local(addr1); + * + * Unmapping addr1 before addr2 is invalid and causes malfunction. + * + * Contrary to kmap() mappings the mapping is only valid in the context of + * the caller and cannot be handed to other contexts. + * + * On CONFIG_HIGHMEM=n kernels and for low memory pages this returns the + * virtual address of the direct mapping. Only real highmem pages are + * temporarily mapped. + * + * While it is significantly faster than kmap() for the higmem case it + * comes with restrictions about the pointer validity. Only use when really + * necessary. + * + * On HIGHMEM enabled systems mapping a highmem page has the side effect of + * disabling migration in order to keep the virtual address stable across + * preemption. No caller of kmap_local_folio() can rely on this side effect. + * + * Context: Can be invoked from any context. + * Return: The virtual address of @offset. + */ +static inline void *kmap_local_folio(struct folio *folio, size_t offset); + /** * kmap_atomic - Atomically map a page for temporary usage - Deprecated! * @page: Pointer to the page to be mapped -- cgit v1.2.3 From b424de33c42dbd03e39ca8f521126d39acb6c335 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 24 Apr 2021 14:50:36 -0400 Subject: mm: Add arch_make_folio_accessible() As a default implementation, call arch_make_page_accessible n times. If an architecture can do better, it can override this. Also move the default implementation of arch_make_page_accessible() from gfp.h to mm.h. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/gfp.h | 6 ------ include/linux/mm.h | 23 +++++++++++++++++++++++ 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 55b2ec1f965a..dc5ff40608ce 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -520,12 +520,6 @@ static inline void arch_free_page(struct page *page, int order) { } #ifndef HAVE_ARCH_ALLOC_PAGE static inline void arch_alloc_page(struct page *page, int order) { } #endif -#ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE -static inline int arch_make_page_accessible(struct page *page) -{ - return 0; -} -#endif struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid, nodemask_t *nodemask); diff --git a/include/linux/mm.h b/include/linux/mm.h index 47143f3e7f0a..c28cace6fe6b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1734,6 +1734,29 @@ static inline size_t folio_size(struct folio *folio) return PAGE_SIZE << folio_order(folio); } +#ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE +static inline int arch_make_page_accessible(struct page *page) +{ + return 0; +} +#endif + +#ifndef HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE +static inline int arch_make_folio_accessible(struct folio *folio) +{ + int ret; + long i, nr = folio_nr_pages(folio); + + for (i = 0; i < nr; i++) { + ret = arch_make_page_accessible(folio_page(folio, i)); + if (ret) + break; + } + + return ret; +} +#endif + /* * Some inline functions in vmstat.h depend on page_zone() */ -- cgit v1.2.3 From 35a020ba0802f732ba070e03e50b140aea4373c4 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 27 Apr 2021 09:00:12 -0400 Subject: mm: Add folio_young and folio_idle Idle page tracking is handled through page_ext on 32-bit architectures. Add folio equivalents for 32-bit and move all the page compatibility parts to common code. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: Christoph Hellwig Reviewed-by: David Howells --- include/linux/page_idle.h | 99 +++++++++++++++++++++++------------------------ 1 file changed, 49 insertions(+), 50 deletions(-) diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h index d8a6aecf99cb..83abf95e9fa7 100644 --- a/include/linux/page_idle.h +++ b/include/linux/page_idle.h @@ -8,46 +8,16 @@ #ifdef CONFIG_PAGE_IDLE_FLAG -#ifdef CONFIG_64BIT -static inline bool page_is_young(struct page *page) -{ - return PageYoung(page); -} - -static inline void set_page_young(struct page *page) -{ - SetPageYoung(page); -} - -static inline bool test_and_clear_page_young(struct page *page) -{ - return TestClearPageYoung(page); -} - -static inline bool page_is_idle(struct page *page) -{ - return PageIdle(page); -} - -static inline void set_page_idle(struct page *page) -{ - SetPageIdle(page); -} - -static inline void clear_page_idle(struct page *page) -{ - ClearPageIdle(page); -} -#else /* !CONFIG_64BIT */ +#ifndef CONFIG_64BIT /* * If there is not enough space to store Idle and Young bits in page flags, use * page ext flags instead. */ extern struct page_ext_operations page_idle_ops; -static inline bool page_is_young(struct page *page) +static inline bool folio_test_young(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(page); + struct page_ext *page_ext = lookup_page_ext(&folio->page); if (unlikely(!page_ext)) return false; @@ -55,9 +25,9 @@ static inline bool page_is_young(struct page *page) return test_bit(PAGE_EXT_YOUNG, &page_ext->flags); } -static inline void set_page_young(struct page *page) +static inline void folio_set_young(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(page); + struct page_ext *page_ext = lookup_page_ext(&folio->page); if (unlikely(!page_ext)) return; @@ -65,9 +35,9 @@ static inline void set_page_young(struct page *page) set_bit(PAGE_EXT_YOUNG, &page_ext->flags); } -static inline bool test_and_clear_page_young(struct page *page) +static inline bool folio_test_clear_young(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(page); + struct page_ext *page_ext = lookup_page_ext(&folio->page); if (unlikely(!page_ext)) return false; @@ -75,9 +45,9 @@ static inline bool test_and_clear_page_young(struct page *page) return test_and_clear_bit(PAGE_EXT_YOUNG, &page_ext->flags); } -static inline bool page_is_idle(struct page *page) +static inline bool folio_test_idle(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(page); + struct page_ext *page_ext = lookup_page_ext(&folio->page); if (unlikely(!page_ext)) return false; @@ -85,9 +55,9 @@ static inline bool page_is_idle(struct page *page) return test_bit(PAGE_EXT_IDLE, &page_ext->flags); } -static inline void set_page_idle(struct page *page) +static inline void folio_set_idle(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(page); + struct page_ext *page_ext = lookup_page_ext(&folio->page); if (unlikely(!page_ext)) return; @@ -95,46 +65,75 @@ static inline void set_page_idle(struct page *page) set_bit(PAGE_EXT_IDLE, &page_ext->flags); } -static inline void clear_page_idle(struct page *page) +static inline void folio_clear_idle(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(page); + struct page_ext *page_ext = lookup_page_ext(&folio->page); if (unlikely(!page_ext)) return; clear_bit(PAGE_EXT_IDLE, &page_ext->flags); } -#endif /* CONFIG_64BIT */ +#endif /* !CONFIG_64BIT */ #else /* !CONFIG_PAGE_IDLE_FLAG */ -static inline bool page_is_young(struct page *page) +static inline bool folio_test_young(struct folio *folio) { return false; } -static inline void set_page_young(struct page *page) +static inline void folio_set_young(struct folio *folio) { } -static inline bool test_and_clear_page_young(struct page *page) +static inline bool folio_test_clear_young(struct folio *folio) { return false; } -static inline bool page_is_idle(struct page *page) +static inline bool folio_test_idle(struct folio *folio) { return false; } -static inline void set_page_idle(struct page *page) +static inline void folio_set_idle(struct folio *folio) { } -static inline void clear_page_idle(struct page *page) +static inline void folio_clear_idle(struct folio *folio) { } #endif /* CONFIG_PAGE_IDLE_FLAG */ +static inline bool page_is_young(struct page *page) +{ + return folio_test_young(page_folio(page)); +} + +static inline void set_page_young(struct page *page) +{ + folio_set_young(page_folio(page)); +} + +static inline bool test_and_clear_page_young(struct page *page) +{ + return folio_test_clear_young(page_folio(page)); +} + +static inline bool page_is_idle(struct page *page) +{ + return folio_test_idle(page_folio(page)); +} + +static inline void set_page_idle(struct page *page) +{ + folio_set_idle(page_folio(page)); +} + +static inline void clear_page_idle(struct page *page) +{ + folio_clear_idle(page_folio(page)); +} #endif /* _LINUX_MM_PAGE_IDLE_H */ -- cgit v1.2.3 From f2d273927ea49e2f85b660aeca26f58b8a15d385 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 27 Apr 2021 10:37:50 -0400 Subject: mm/swap: Add folio_activate() This replaces activate_page() and eliminates lots of calls to compound_head(). Saves net 118 bytes of kernel text. There are still some redundant calls to page_folio() here which will be removed when pagevec_lru_move_fn() is converted to use folios. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/trace/events/pagemap.h | 14 ++++++-------- mm/swap.c | 41 ++++++++++++++++++++++------------------- 2 files changed, 28 insertions(+), 27 deletions(-) diff --git a/include/trace/events/pagemap.h b/include/trace/events/pagemap.h index 92ad176210ff..1fd0185d66e8 100644 --- a/include/trace/events/pagemap.h +++ b/include/trace/events/pagemap.h @@ -60,23 +60,21 @@ TRACE_EVENT(mm_lru_insertion, TRACE_EVENT(mm_lru_activate, - TP_PROTO(struct page *page), + TP_PROTO(struct folio *folio), - TP_ARGS(page), + TP_ARGS(folio), TP_STRUCT__entry( - __field(struct page *, page ) + __field(struct folio *, folio ) __field(unsigned long, pfn ) ), TP_fast_assign( - __entry->page = page; - __entry->pfn = page_to_pfn(page); + __entry->folio = folio; + __entry->pfn = folio_pfn(folio); ), - /* Flag format is based on page-types.c formatting for pagemap */ - TP_printk("page=%p pfn=0x%lx", __entry->page, __entry->pfn) - + TP_printk("folio=%p pfn=0x%lx", __entry->folio, __entry->pfn) ); #endif /* _TRACE_PAGEMAP_H */ diff --git a/mm/swap.c b/mm/swap.c index 5c688897c013..3860d6bc8c8a 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -300,15 +300,15 @@ void lru_note_cost_page(struct page *page) page_is_file_lru(page), thp_nr_pages(page)); } -static void __activate_page(struct page *page, struct lruvec *lruvec) +static void __folio_activate(struct folio *folio, struct lruvec *lruvec) { - if (!PageActive(page) && !PageUnevictable(page)) { - int nr_pages = thp_nr_pages(page); + if (!folio_test_active(folio) && !folio_test_unevictable(folio)) { + long nr_pages = folio_nr_pages(folio); - del_page_from_lru_list(page, lruvec); - SetPageActive(page); - add_page_to_lru_list(page, lruvec); - trace_mm_lru_activate(page); + lruvec_del_folio(lruvec, folio); + folio_set_active(folio); + lruvec_add_folio(lruvec, folio); + trace_mm_lru_activate(folio); __count_vm_events(PGACTIVATE, nr_pages); __count_memcg_events(lruvec_memcg(lruvec), PGACTIVATE, @@ -317,6 +317,11 @@ static void __activate_page(struct page *page, struct lruvec *lruvec) } #ifdef CONFIG_SMP +static void __activate_page(struct page *page, struct lruvec *lruvec) +{ + return __folio_activate(page_folio(page), lruvec); +} + static void activate_page_drain(int cpu) { struct pagevec *pvec = &per_cpu(lru_pvecs.activate_page, cpu); @@ -330,16 +335,16 @@ static bool need_activate_page_drain(int cpu) return pagevec_count(&per_cpu(lru_pvecs.activate_page, cpu)) != 0; } -static void activate_page(struct page *page) +static void folio_activate(struct folio *folio) { - page = compound_head(page); - if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { + if (folio_test_lru(folio) && !folio_test_active(folio) && + !folio_test_unevictable(folio)) { struct pagevec *pvec; + folio_get(folio); local_lock(&lru_pvecs.lock); pvec = this_cpu_ptr(&lru_pvecs.activate_page); - get_page(page); - if (pagevec_add_and_need_flush(pvec, page)) + if (pagevec_add_and_need_flush(pvec, &folio->page)) pagevec_lru_move_fn(pvec, __activate_page); local_unlock(&lru_pvecs.lock); } @@ -350,17 +355,15 @@ static inline void activate_page_drain(int cpu) { } -static void activate_page(struct page *page) +static void folio_activate(struct folio *folio) { - struct folio *folio = page_folio(page); struct lruvec *lruvec; - page = &folio->page; - if (TestClearPageLRU(page)) { + if (folio_test_clear_lru(folio)) { lruvec = folio_lruvec_lock_irq(folio); - __activate_page(page, lruvec); + __folio_activate(folio, lruvec); unlock_page_lruvec_irq(lruvec); - SetPageLRU(page); + folio_set_lru(folio); } } #endif @@ -425,7 +428,7 @@ void mark_page_accessed(struct page *page) * LRU on the next drain. */ if (PageLRU(page)) - activate_page(page); + folio_activate(page_folio(page)); else __lru_cache_activate_page(page); ClearPageReferenced(page); -- cgit v1.2.3 From 76580b6529db622964b4ffee59ded25efcb73748 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 27 Apr 2021 10:47:39 -0400 Subject: mm/swap: Add folio_mark_accessed() Convert mark_page_accessed() to folio_mark_accessed(). It already operated on the entire compound page, but now we can avoid calling compound_head quite so many times. Shrinks the function from 424 bytes to 295 bytes (shrinking by 129 bytes). The compatibility wrapper is 30 bytes, plus the 8 bytes for the exported symbol means the kernel shrinks by 91 bytes. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/swap.h | 3 ++- mm/folio-compat.c | 7 +++++++ mm/swap.c | 34 ++++++++++++++++------------------ 3 files changed, 25 insertions(+), 19 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index 0fc84797623f..067b7af5242c 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -352,7 +352,8 @@ extern void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages); extern void lru_note_cost_page(struct page *); extern void lru_cache_add(struct page *); -extern void mark_page_accessed(struct page *); +void mark_page_accessed(struct page *); +void folio_mark_accessed(struct folio *); extern atomic_t lru_disable_count; diff --git a/mm/folio-compat.c b/mm/folio-compat.c index 7044fcc8a8aa..a374747ae1c6 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -5,6 +5,7 @@ */ #include +#include struct address_space *page_mapping(struct page *page) { @@ -41,3 +42,9 @@ bool page_mapped(struct page *page) return folio_mapped(page_folio(page)); } EXPORT_SYMBOL(page_mapped); + +void mark_page_accessed(struct page *page) +{ + folio_mark_accessed(page_folio(page)); +} +EXPORT_SYMBOL(mark_page_accessed); diff --git a/mm/swap.c b/mm/swap.c index 3860d6bc8c8a..b95fd6d05768 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -368,7 +368,7 @@ static void folio_activate(struct folio *folio) } #endif -static void __lru_cache_activate_page(struct page *page) +static void __lru_cache_activate_folio(struct folio *folio) { struct pagevec *pvec; int i; @@ -389,8 +389,8 @@ static void __lru_cache_activate_page(struct page *page) for (i = pagevec_count(pvec) - 1; i >= 0; i--) { struct page *pagevec_page = pvec->pages[i]; - if (pagevec_page == page) { - SetPageActive(page); + if (pagevec_page == &folio->page) { + folio_set_active(folio); break; } } @@ -408,36 +408,34 @@ static void __lru_cache_activate_page(struct page *page) * When a newly allocated page is not yet visible, so safe for non-atomic ops, * __SetPageReferenced(page) may be substituted for mark_page_accessed(page). */ -void mark_page_accessed(struct page *page) +void folio_mark_accessed(struct folio *folio) { - page = compound_head(page); - - if (!PageReferenced(page)) { - SetPageReferenced(page); - } else if (PageUnevictable(page)) { + if (!folio_test_referenced(folio)) { + folio_set_referenced(folio); + } else if (folio_test_unevictable(folio)) { /* * Unevictable pages are on the "LRU_UNEVICTABLE" list. But, * this list is never rotated or maintained, so marking an * evictable page accessed has no effect. */ - } else if (!PageActive(page)) { + } else if (!folio_test_active(folio)) { /* * If the page is on the LRU, queue it for activation via * lru_pvecs.activate_page. Otherwise, assume the page is on a * pagevec, mark it active and it'll be moved to the active * LRU on the next drain. */ - if (PageLRU(page)) - folio_activate(page_folio(page)); + if (folio_test_lru(folio)) + folio_activate(folio); else - __lru_cache_activate_page(page); - ClearPageReferenced(page); - workingset_activation(page_folio(page)); + __lru_cache_activate_folio(folio); + folio_clear_referenced(folio); + workingset_activation(folio); } - if (page_is_idle(page)) - clear_page_idle(page); + if (folio_test_idle(folio)) + folio_clear_idle(folio); } -EXPORT_SYMBOL(mark_page_accessed); +EXPORT_SYMBOL(folio_mark_accessed); /** * lru_cache_add - add a page to a page list -- cgit v1.2.3 From d9c08e2232fbca4fa56b9350f7f84835c4aa3add Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 28 Feb 2021 16:02:57 -0500 Subject: mm/rmap: Add folio_mkclean() Transform page_mkclean() into folio_mkclean() and add a page_mkclean() wrapper around folio_mkclean(). folio_mkclean is 15 bytes smaller than page_mkclean, but the kernel is enlarged by 33 bytes due to inlining page_folio() into each caller. This will go away once the callers are converted to use folio_mkclean(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/rmap.h | 10 ++++++---- mm/rmap.c | 12 ++++++------ 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index c976cc6de257..e704b1a4c06c 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -235,7 +235,7 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *); * * returns the number of cleaned PTEs. */ -int page_mkclean(struct page *); +int folio_mkclean(struct folio *); /* * called in munlock()/munmap() path to check for other vmas holding @@ -295,12 +295,14 @@ static inline void try_to_unmap(struct page *page, enum ttu_flags flags) { } -static inline int page_mkclean(struct page *page) +static inline int folio_mkclean(struct folio *folio) { return 0; } - - #endif /* CONFIG_MMU */ +static inline int page_mkclean(struct page *page) +{ + return folio_mkclean(page_folio(page)); +} #endif /* _LINUX_RMAP_H */ diff --git a/mm/rmap.c b/mm/rmap.c index 059556dbefec..3a1059c284c3 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -981,7 +981,7 @@ static bool invalid_mkclean_vma(struct vm_area_struct *vma, void *arg) return true; } -int page_mkclean(struct page *page) +int folio_mkclean(struct folio *folio) { int cleaned = 0; struct address_space *mapping; @@ -991,20 +991,20 @@ int page_mkclean(struct page *page) .invalid_vma = invalid_mkclean_vma, }; - BUG_ON(!PageLocked(page)); + BUG_ON(!folio_test_locked(folio)); - if (!page_mapped(page)) + if (!folio_mapped(folio)) return 0; - mapping = page_mapping(page); + mapping = folio_mapping(folio); if (!mapping) return 0; - rmap_walk(page, &rwc); + rmap_walk(&folio->page, &rwc); return cleaned; } -EXPORT_SYMBOL_GPL(page_mkclean); +EXPORT_SYMBOL_GPL(folio_mkclean); /** * page_move_anon_rmap - move a page to our anon_vma -- cgit v1.2.3 From 3417013e0d183be9b42d794082eec0ec1c5b5f15 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 7 May 2021 07:28:40 -0400 Subject: mm/migrate: Add folio_migrate_mapping() Reimplement migrate_page_move_mapping() as a wrapper around folio_migrate_mapping(). Saves 193 bytes of kernel text. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/migrate.h | 2 ++ mm/folio-compat.c | 11 +++++++ mm/migrate.c | 85 +++++++++++++++++++++++++------------------------ 3 files changed, 57 insertions(+), 41 deletions(-) diff --git a/include/linux/migrate.h b/include/linux/migrate.h index c8077e936691..58b3af645e20 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -57,6 +57,8 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page); extern int migrate_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page, int extra_count); +int folio_migrate_mapping(struct address_space *mapping, + struct folio *newfolio, struct folio *folio, int extra_count); #else static inline void putback_movable_pages(struct list_head *l) {} diff --git a/mm/folio-compat.c b/mm/folio-compat.c index a374747ae1c6..d883d964fd52 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -4,6 +4,7 @@ * eventually. */ +#include #include #include @@ -48,3 +49,13 @@ void mark_page_accessed(struct page *page) folio_mark_accessed(page_folio(page)); } EXPORT_SYMBOL(mark_page_accessed); + +#ifdef CONFIG_MIGRATION +int migrate_page_move_mapping(struct address_space *mapping, + struct page *newpage, struct page *page, int extra_count) +{ + return folio_migrate_mapping(mapping, page_folio(newpage), + page_folio(page), extra_count); +} +EXPORT_SYMBOL(migrate_page_move_mapping); +#endif diff --git a/mm/migrate.c b/mm/migrate.c index bfb8ba490479..0e408ee4b7b2 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -364,7 +364,7 @@ static int expected_page_refs(struct address_space *mapping, struct page *page) */ expected_count += is_device_private_page(page); if (mapping) - expected_count += thp_nr_pages(page) + page_has_private(page); + expected_count += compound_nr(page) + page_has_private(page); return expected_count; } @@ -377,74 +377,75 @@ static int expected_page_refs(struct address_space *mapping, struct page *page) * 2 for pages with a mapping * 3 for pages with a mapping and PagePrivate/PagePrivate2 set. */ -int migrate_page_move_mapping(struct address_space *mapping, - struct page *newpage, struct page *page, int extra_count) +int folio_migrate_mapping(struct address_space *mapping, + struct folio *newfolio, struct folio *folio, int extra_count) { - XA_STATE(xas, &mapping->i_pages, page_index(page)); + XA_STATE(xas, &mapping->i_pages, folio_index(folio)); struct zone *oldzone, *newzone; int dirty; - int expected_count = expected_page_refs(mapping, page) + extra_count; - int nr = thp_nr_pages(page); + int expected_count = expected_page_refs(mapping, &folio->page) + extra_count; + long nr = folio_nr_pages(folio); if (!mapping) { /* Anonymous page without mapping */ - if (page_count(page) != expected_count) + if (folio_ref_count(folio) != expected_count) return -EAGAIN; /* No turning back from here */ - newpage->index = page->index; - newpage->mapping = page->mapping; - if (PageSwapBacked(page)) - __SetPageSwapBacked(newpage); + newfolio->index = folio->index; + newfolio->mapping = folio->mapping; + if (folio_test_swapbacked(folio)) + __folio_set_swapbacked(newfolio); return MIGRATEPAGE_SUCCESS; } - oldzone = page_zone(page); - newzone = page_zone(newpage); + oldzone = folio_zone(folio); + newzone = folio_zone(newfolio); xas_lock_irq(&xas); - if (page_count(page) != expected_count || xas_load(&xas) != page) { + if (folio_ref_count(folio) != expected_count || + xas_load(&xas) != folio) { xas_unlock_irq(&xas); return -EAGAIN; } - if (!page_ref_freeze(page, expected_count)) { + if (!folio_ref_freeze(folio, expected_count)) { xas_unlock_irq(&xas); return -EAGAIN; } /* - * Now we know that no one else is looking at the page: + * Now we know that no one else is looking at the folio: * no turning back from here. */ - newpage->index = page->index; - newpage->mapping = page->mapping; - page_ref_add(newpage, nr); /* add cache reference */ - if (PageSwapBacked(page)) { - __SetPageSwapBacked(newpage); - if (PageSwapCache(page)) { - SetPageSwapCache(newpage); - set_page_private(newpage, page_private(page)); + newfolio->index = folio->index; + newfolio->mapping = folio->mapping; + folio_ref_add(newfolio, nr); /* add cache reference */ + if (folio_test_swapbacked(folio)) { + __folio_set_swapbacked(newfolio); + if (folio_test_swapcache(folio)) { + folio_set_swapcache(newfolio); + newfolio->private = folio_get_private(folio); } } else { - VM_BUG_ON_PAGE(PageSwapCache(page), page); + VM_BUG_ON_FOLIO(folio_test_swapcache(folio), folio); } /* Move dirty while page refs frozen and newpage not yet exposed */ - dirty = PageDirty(page); + dirty = folio_test_dirty(folio); if (dirty) { - ClearPageDirty(page); - SetPageDirty(newpage); + folio_clear_dirty(folio); + folio_set_dirty(newfolio); } - xas_store(&xas, newpage); - if (PageTransHuge(page)) { + xas_store(&xas, newfolio); + if (nr > 1) { int i; for (i = 1; i < nr; i++) { xas_next(&xas); - xas_store(&xas, newpage); + xas_store(&xas, newfolio); } } @@ -453,7 +454,7 @@ int migrate_page_move_mapping(struct address_space *mapping, * to one less reference. * We know this isn't the last reference. */ - page_ref_unfreeze(page, expected_count - nr); + folio_ref_unfreeze(folio, expected_count - nr); xas_unlock(&xas); /* Leave irq disabled to prevent preemption while updating stats */ @@ -472,18 +473,18 @@ int migrate_page_move_mapping(struct address_space *mapping, struct lruvec *old_lruvec, *new_lruvec; struct mem_cgroup *memcg; - memcg = page_memcg(page); + memcg = folio_memcg(folio); old_lruvec = mem_cgroup_lruvec(memcg, oldzone->zone_pgdat); new_lruvec = mem_cgroup_lruvec(memcg, newzone->zone_pgdat); __mod_lruvec_state(old_lruvec, NR_FILE_PAGES, -nr); __mod_lruvec_state(new_lruvec, NR_FILE_PAGES, nr); - if (PageSwapBacked(page) && !PageSwapCache(page)) { + if (folio_test_swapbacked(folio) && !folio_test_swapcache(folio)) { __mod_lruvec_state(old_lruvec, NR_SHMEM, -nr); __mod_lruvec_state(new_lruvec, NR_SHMEM, nr); } #ifdef CONFIG_SWAP - if (PageSwapCache(page)) { + if (folio_test_swapcache(folio)) { __mod_lruvec_state(old_lruvec, NR_SWAPCACHE, -nr); __mod_lruvec_state(new_lruvec, NR_SWAPCACHE, nr); } @@ -499,11 +500,11 @@ int migrate_page_move_mapping(struct address_space *mapping, return MIGRATEPAGE_SUCCESS; } -EXPORT_SYMBOL(migrate_page_move_mapping); +EXPORT_SYMBOL(folio_migrate_mapping); /* * The expected number of remaining references is the same as that - * of migrate_page_move_mapping(). + * of folio_migrate_mapping(). */ int migrate_huge_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page) @@ -564,7 +565,7 @@ void migrate_page_states(struct page *newpage, struct page *page) if (PageMappedToDisk(page)) SetPageMappedToDisk(newpage); - /* Move dirty on pages not done by migrate_page_move_mapping() */ + /* Move dirty on pages not done by folio_migrate_mapping() */ if (PageDirty(page)) SetPageDirty(newpage); @@ -640,11 +641,13 @@ int migrate_page(struct address_space *mapping, struct page *newpage, struct page *page, enum migrate_mode mode) { + struct folio *newfolio = page_folio(newpage); + struct folio *folio = page_folio(page); int rc; - BUG_ON(PageWriteback(page)); /* Writeback must be complete */ + BUG_ON(folio_test_writeback(folio)); /* Writeback must be complete */ - rc = migrate_page_move_mapping(mapping, newpage, page, 0); + rc = folio_migrate_mapping(mapping, newfolio, folio, 0); if (rc != MIGRATEPAGE_SUCCESS) return rc; @@ -2470,7 +2473,7 @@ static void migrate_vma_collect(struct migrate_vma *migrate) * @page: struct page to check * * Pinned pages cannot be migrated. This is the same test as in - * migrate_page_move_mapping(), except that here we allow migration of a + * folio_migrate_mapping(), except that here we allow migration of a * ZONE_DEVICE page. */ static bool migrate_vma_check_page(struct page *page) -- cgit v1.2.3 From 19138349ed59b90ce58aca319b873eca2e04ad43 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 7 May 2021 15:26:29 -0400 Subject: mm/migrate: Add folio_migrate_flags() Turn migrate_page_states() into a wrapper around folio_migrate_flags(). Also convert two functions only called from folio_migrate_flags() to be folio-based. ksm_migrate_page() becomes folio_migrate_ksm() and copy_page_owner() becomes folio_copy_owner(). folio_migrate_flags() alone shrinks by two thirds -- 1967 bytes down to 642 bytes. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/ksm.h | 4 +-- include/linux/migrate.h | 1 + include/linux/page_owner.h | 8 ++--- mm/folio-compat.c | 6 ++++ mm/ksm.c | 31 ++++++++++------- mm/migrate.c | 84 ++++++++++++++++++++++------------------------ mm/page_owner.c | 10 +++--- 7 files changed, 77 insertions(+), 67 deletions(-) diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 161e8164abcf..a38a5bca1ba5 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -52,7 +52,7 @@ struct page *ksm_might_need_to_copy(struct page *page, struct vm_area_struct *vma, unsigned long address); void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc); -void ksm_migrate_page(struct page *newpage, struct page *oldpage); +void folio_migrate_ksm(struct folio *newfolio, struct folio *folio); #else /* !CONFIG_KSM */ @@ -83,7 +83,7 @@ static inline void rmap_walk_ksm(struct page *page, { } -static inline void ksm_migrate_page(struct page *newpage, struct page *oldpage) +static inline void folio_migrate_ksm(struct folio *newfolio, struct folio *old) { } #endif /* CONFIG_MMU */ diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 58b3af645e20..aa875b83f7ba 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -57,6 +57,7 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page); extern int migrate_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page, int extra_count); +void folio_migrate_flags(struct folio *newfolio, struct folio *folio); int folio_migrate_mapping(struct address_space *mapping, struct folio *newfolio, struct folio *folio, int extra_count); #else diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h index 719bfe5108c5..43c638c51c1f 100644 --- a/include/linux/page_owner.h +++ b/include/linux/page_owner.h @@ -12,7 +12,7 @@ extern void __reset_page_owner(struct page *page, unsigned int order); extern void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask); extern void __split_page_owner(struct page *page, unsigned int nr); -extern void __copy_page_owner(struct page *oldpage, struct page *newpage); +extern void __folio_copy_owner(struct folio *newfolio, struct folio *old); extern void __set_page_owner_migrate_reason(struct page *page, int reason); extern void __dump_page_owner(const struct page *page); extern void pagetypeinfo_showmixedcount_print(struct seq_file *m, @@ -36,10 +36,10 @@ static inline void split_page_owner(struct page *page, unsigned int nr) if (static_branch_unlikely(&page_owner_inited)) __split_page_owner(page, nr); } -static inline void copy_page_owner(struct page *oldpage, struct page *newpage) +static inline void folio_copy_owner(struct folio *newfolio, struct folio *old) { if (static_branch_unlikely(&page_owner_inited)) - __copy_page_owner(oldpage, newpage); + __folio_copy_owner(newfolio, old); } static inline void set_page_owner_migrate_reason(struct page *page, int reason) { @@ -63,7 +63,7 @@ static inline void split_page_owner(struct page *page, unsigned int order) { } -static inline void copy_page_owner(struct page *oldpage, struct page *newpage) +static inline void folio_copy_owner(struct folio *newfolio, struct folio *folio) { } static inline void set_page_owner_migrate_reason(struct page *page, int reason) diff --git a/mm/folio-compat.c b/mm/folio-compat.c index d883d964fd52..3f00ad92d1ff 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -58,4 +58,10 @@ int migrate_page_move_mapping(struct address_space *mapping, page_folio(page), extra_count); } EXPORT_SYMBOL(migrate_page_move_mapping); + +void migrate_page_states(struct page *newpage, struct page *page) +{ + folio_migrate_flags(page_folio(newpage), page_folio(page)); +} +EXPORT_SYMBOL(migrate_page_states); #endif diff --git a/mm/ksm.c b/mm/ksm.c index c246a0b0ac75..0662093237e4 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -751,7 +751,7 @@ stale: /* * We come here from above when page->mapping or !PageSwapCache * suggests that the node is stale; but it might be under migration. - * We need smp_rmb(), matching the smp_wmb() in ksm_migrate_page(), + * We need smp_rmb(), matching the smp_wmb() in folio_migrate_ksm(), * before checking whether node->kpfn has been changed. */ smp_rmb(); @@ -852,9 +852,14 @@ static int unmerge_ksm_pages(struct vm_area_struct *vma, return err; } +static inline struct stable_node *folio_stable_node(struct folio *folio) +{ + return folio_test_ksm(folio) ? folio_raw_mapping(folio) : NULL; +} + static inline struct stable_node *page_stable_node(struct page *page) { - return PageKsm(page) ? page_rmapping(page) : NULL; + return folio_stable_node(page_folio(page)); } static inline void set_page_stable_node(struct page *page, @@ -2659,26 +2664,26 @@ again: } #ifdef CONFIG_MIGRATION -void ksm_migrate_page(struct page *newpage, struct page *oldpage) +void folio_migrate_ksm(struct folio *newfolio, struct folio *folio) { struct stable_node *stable_node; - VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage); - VM_BUG_ON_PAGE(!PageLocked(newpage), newpage); - VM_BUG_ON_PAGE(newpage->mapping != oldpage->mapping, newpage); + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); + VM_BUG_ON_FOLIO(!folio_test_locked(newfolio), newfolio); + VM_BUG_ON_FOLIO(newfolio->mapping != folio->mapping, newfolio); - stable_node = page_stable_node(newpage); + stable_node = folio_stable_node(folio); if (stable_node) { - VM_BUG_ON_PAGE(stable_node->kpfn != page_to_pfn(oldpage), oldpage); - stable_node->kpfn = page_to_pfn(newpage); + VM_BUG_ON_FOLIO(stable_node->kpfn != folio_pfn(folio), folio); + stable_node->kpfn = folio_pfn(newfolio); /* - * newpage->mapping was set in advance; now we need smp_wmb() + * newfolio->mapping was set in advance; now we need smp_wmb() * to make sure that the new stable_node->kpfn is visible - * to get_ksm_page() before it can see that oldpage->mapping - * has gone stale (or that PageSwapCache has been cleared). + * to get_ksm_page() before it can see that folio->mapping + * has gone stale (or that folio_test_swapcache has been cleared). */ smp_wmb(); - set_page_stable_node(oldpage, NULL); + set_page_stable_node(&folio->page, NULL); } } #endif /* CONFIG_MIGRATION */ diff --git a/mm/migrate.c b/mm/migrate.c index 0e408ee4b7b2..f6e0017ef0bf 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -539,82 +539,80 @@ int migrate_huge_page_move_mapping(struct address_space *mapping, } /* - * Copy the page to its new location + * Copy the flags and some other ancillary information */ -void migrate_page_states(struct page *newpage, struct page *page) +void folio_migrate_flags(struct folio *newfolio, struct folio *folio) { - struct folio *folio = page_folio(page); - struct folio *newfolio = page_folio(newpage); int cpupid; - if (PageError(page)) - SetPageError(newpage); - if (PageReferenced(page)) - SetPageReferenced(newpage); - if (PageUptodate(page)) - SetPageUptodate(newpage); - if (TestClearPageActive(page)) { - VM_BUG_ON_PAGE(PageUnevictable(page), page); - SetPageActive(newpage); - } else if (TestClearPageUnevictable(page)) - SetPageUnevictable(newpage); - if (PageWorkingset(page)) - SetPageWorkingset(newpage); - if (PageChecked(page)) - SetPageChecked(newpage); - if (PageMappedToDisk(page)) - SetPageMappedToDisk(newpage); + if (folio_test_error(folio)) + folio_set_error(newfolio); + if (folio_test_referenced(folio)) + folio_set_referenced(newfolio); + if (folio_test_uptodate(folio)) + folio_mark_uptodate(newfolio); + if (folio_test_clear_active(folio)) { + VM_BUG_ON_FOLIO(folio_test_unevictable(folio), folio); + folio_set_active(newfolio); + } else if (folio_test_clear_unevictable(folio)) + folio_set_unevictable(newfolio); + if (folio_test_workingset(folio)) + folio_set_workingset(newfolio); + if (folio_test_checked(folio)) + folio_set_checked(newfolio); + if (folio_test_mappedtodisk(folio)) + folio_set_mappedtodisk(newfolio); /* Move dirty on pages not done by folio_migrate_mapping() */ - if (PageDirty(page)) - SetPageDirty(newpage); + if (folio_test_dirty(folio)) + folio_set_dirty(newfolio); - if (page_is_young(page)) - set_page_young(newpage); - if (page_is_idle(page)) - set_page_idle(newpage); + if (folio_test_young(folio)) + folio_set_young(newfolio); + if (folio_test_idle(folio)) + folio_set_idle(newfolio); /* * Copy NUMA information to the new page, to prevent over-eager * future migrations of this same page. */ - cpupid = page_cpupid_xchg_last(page, -1); - page_cpupid_xchg_last(newpage, cpupid); + cpupid = page_cpupid_xchg_last(&folio->page, -1); + page_cpupid_xchg_last(&newfolio->page, cpupid); - ksm_migrate_page(newpage, page); + folio_migrate_ksm(newfolio, folio); /* * Please do not reorder this without considering how mm/ksm.c's * get_ksm_page() depends upon ksm_migrate_page() and PageSwapCache(). */ - if (PageSwapCache(page)) - ClearPageSwapCache(page); - ClearPagePrivate(page); + if (folio_test_swapcache(folio)) + folio_clear_swapcache(folio); + folio_clear_private(folio); /* page->private contains hugetlb specific flags */ - if (!PageHuge(page)) - set_page_private(page, 0); + if (!folio_test_hugetlb(folio)) + folio->private = NULL; /* * If any waiters have accumulated on the new page then * wake them up. */ - if (PageWriteback(newpage)) - end_page_writeback(newpage); + if (folio_test_writeback(newfolio)) + folio_end_writeback(newfolio); /* * PG_readahead shares the same bit with PG_reclaim. The above * end_page_writeback() may clear PG_readahead mistakenly, so set the * bit after that. */ - if (PageReadahead(page)) - SetPageReadahead(newpage); + if (folio_test_readahead(folio)) + folio_set_readahead(newfolio); - copy_page_owner(page, newpage); + folio_copy_owner(newfolio, folio); - if (!PageHuge(page)) + if (!folio_test_hugetlb(folio)) mem_cgroup_migrate(folio, newfolio); } -EXPORT_SYMBOL(migrate_page_states); +EXPORT_SYMBOL(folio_migrate_flags); void migrate_page_copy(struct page *newpage, struct page *page) { @@ -655,7 +653,7 @@ int migrate_page(struct address_space *mapping, if (mode != MIGRATE_SYNC_NO_COPY) migrate_page_copy(newpage, page); else - migrate_page_states(newpage, page); + folio_migrate_flags(newfolio, folio); return MIGRATEPAGE_SUCCESS; } EXPORT_SYMBOL(migrate_page); diff --git a/mm/page_owner.c b/mm/page_owner.c index 62402d22539b..d24ed221357c 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -210,10 +210,10 @@ void __split_page_owner(struct page *page, unsigned int nr) } } -void __copy_page_owner(struct page *oldpage, struct page *newpage) +void __folio_copy_owner(struct folio *newfolio, struct folio *old) { - struct page_ext *old_ext = lookup_page_ext(oldpage); - struct page_ext *new_ext = lookup_page_ext(newpage); + struct page_ext *old_ext = lookup_page_ext(&old->page); + struct page_ext *new_ext = lookup_page_ext(&newfolio->page); struct page_owner *old_page_owner, *new_page_owner; if (unlikely(!old_ext || !new_ext)) @@ -231,11 +231,11 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage) new_page_owner->free_ts_nsec = old_page_owner->ts_nsec; /* - * We don't clear the bit on the oldpage as it's going to be freed + * We don't clear the bit on the old folio as it's going to be freed * after migration. Until then, the info can be useful in case of * a bug, and the overall stats will be off a bit only temporarily. * Also, migrate_misplaced_transhuge_page() can still fail the - * migration and then we want the oldpage to retain the info. But + * migration and then we want the old folio to retain the info. But * in that case we also don't need to explicitly clear the info from * the new page, which will be freed. */ -- cgit v1.2.3 From 715cbfd6c5c595bc8b7a6f9ad1fe9fec0122bb20 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 7 May 2021 15:05:06 -0400 Subject: mm/migrate: Add folio_migrate_copy() This is the folio equivalent of migrate_page_copy(), which is retained as a wrapper for filesystems which are not yet converted to folios. Also convert copy_huge_page() to folio_copy(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Acked-by: Vlastimil Babka --- include/linux/migrate.h | 1 + include/linux/mm.h | 2 +- mm/folio-compat.c | 6 ++++++ mm/hugetlb.c | 2 +- mm/migrate.c | 14 +++++--------- mm/util.c | 21 +++++++++++++++++---- 6 files changed, 31 insertions(+), 15 deletions(-) diff --git a/include/linux/migrate.h b/include/linux/migrate.h index aa875b83f7ba..0d2aeb9b0f66 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -58,6 +58,7 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping, extern int migrate_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page, int extra_count); void folio_migrate_flags(struct folio *newfolio, struct folio *folio); +void folio_migrate_copy(struct folio *newfolio, struct folio *folio); int folio_migrate_mapping(struct address_space *mapping, struct folio *newfolio, struct folio *folio, int extra_count); #else diff --git a/include/linux/mm.h b/include/linux/mm.h index c28cace6fe6b..93d5fbe2e4e3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -912,7 +912,7 @@ void __put_page(struct page *page); void put_pages_list(struct list_head *pages); void split_page(struct page *page, unsigned int order); -void copy_huge_page(struct page *dst, struct page *src); +void folio_copy(struct folio *dst, struct folio *src); /* * Compound pages have a destructor function. Provide a diff --git a/mm/folio-compat.c b/mm/folio-compat.c index 3f00ad92d1ff..2ccd8f213fc4 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -64,4 +64,10 @@ void migrate_page_states(struct page *newpage, struct page *page) folio_migrate_flags(page_folio(newpage), page_folio(page)); } EXPORT_SYMBOL(migrate_page_states); + +void migrate_page_copy(struct page *newpage, struct page *page) +{ + folio_migrate_copy(page_folio(newpage), page_folio(page)); +} +EXPORT_SYMBOL(migrate_page_copy); #endif diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 95dc7b83381f..6378c1066459 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5302,7 +5302,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, *pagep = NULL; goto out; } - copy_huge_page(page, *pagep); + folio_copy(page_folio(page), page_folio(*pagep)); put_page(*pagep); *pagep = NULL; } diff --git a/mm/migrate.c b/mm/migrate.c index f6e0017ef0bf..433c453b47f9 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -614,16 +614,12 @@ void folio_migrate_flags(struct folio *newfolio, struct folio *folio) } EXPORT_SYMBOL(folio_migrate_flags); -void migrate_page_copy(struct page *newpage, struct page *page) +void folio_migrate_copy(struct folio *newfolio, struct folio *folio) { - if (PageHuge(page) || PageTransHuge(page)) - copy_huge_page(newpage, page); - else - copy_highpage(newpage, page); - - migrate_page_states(newpage, page); + folio_copy(newfolio, folio); + folio_migrate_flags(newfolio, folio); } -EXPORT_SYMBOL(migrate_page_copy); +EXPORT_SYMBOL(folio_migrate_copy); /************************************************************ * Migration functions @@ -651,7 +647,7 @@ int migrate_page(struct address_space *mapping, return rc; if (mode != MIGRATE_SYNC_NO_COPY) - migrate_page_copy(newpage, page); + folio_migrate_copy(newfolio, folio); else folio_migrate_flags(newfolio, folio); return MIGRATEPAGE_SUCCESS; diff --git a/mm/util.c b/mm/util.c index bf860838282c..e58151a61255 100644 --- a/mm/util.c +++ b/mm/util.c @@ -747,13 +747,26 @@ int __page_mapcount(struct page *page) } EXPORT_SYMBOL_GPL(__page_mapcount); -void copy_huge_page(struct page *dst, struct page *src) +/** + * folio_copy - Copy the contents of one folio to another. + * @dst: Folio to copy to. + * @src: Folio to copy from. + * + * The bytes in the folio represented by @src are copied to @dst. + * Assumes the caller has validated that @dst is at least as large as @src. + * Can be called in atomic context for order-0 folios, but if the folio is + * larger, it may sleep. + */ +void folio_copy(struct folio *dst, struct folio *src) { - unsigned i, nr = compound_nr(src); + long i = 0; + long nr = folio_nr_pages(src); - for (i = 0; i < nr; i++) { + for (;;) { + copy_highpage(folio_page(dst, i), folio_page(src, i)); + if (++i == nr) + break; cond_resched(); - copy_highpage(nth_page(dst, i), nth_page(src, i)); } } -- cgit v1.2.3 From bd3488e7b4d61780eb3dfaca1cc6f4026bcffd48 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 19 Mar 2021 08:39:50 -0400 Subject: mm/writeback: Rename __add_wb_stat() to wb_stat_mod() Make this look like the newly renamed vmstat functions. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/backing-dev.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index ac7f231b8825..a62e72dd829f 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -64,7 +64,7 @@ static inline bool bdi_has_dirty_io(struct backing_dev_info *bdi) return atomic_long_read(&bdi->tot_write_bandwidth); } -static inline void __add_wb_stat(struct bdi_writeback *wb, +static inline void wb_stat_mod(struct bdi_writeback *wb, enum wb_stat_item item, s64 amount) { percpu_counter_add_batch(&wb->stat[item], amount, WB_STAT_BATCH); @@ -72,12 +72,12 @@ static inline void __add_wb_stat(struct bdi_writeback *wb, static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) { - __add_wb_stat(wb, item, 1); + wb_stat_mod(wb, item, 1); } static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) { - __add_wb_stat(wb, item, -1); + wb_stat_mod(wb, item, -1); } static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) -- cgit v1.2.3 From be5f1797523004d0d9aaee81a523d86e3b890007 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 20 Mar 2021 16:34:54 -0400 Subject: flex_proportions: Allow N events instead of 1 When batching events (such as writing back N pages in a single I/O), it is better to do one flex_proportion operation instead of N. There is only one caller of __fprop_inc_percpu_max(), and it's the one we're going to change in the next patch, so rename it instead of adding a compatibility wrapper. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara --- include/linux/flex_proportions.h | 9 +++++---- lib/flex_proportions.c | 28 +++++++++++++++++++--------- mm/page-writeback.c | 4 ++-- 3 files changed, 26 insertions(+), 15 deletions(-) diff --git a/include/linux/flex_proportions.h b/include/linux/flex_proportions.h index c12df59d3f5f..3e378b1fb0bc 100644 --- a/include/linux/flex_proportions.h +++ b/include/linux/flex_proportions.h @@ -83,9 +83,10 @@ struct fprop_local_percpu { int fprop_local_init_percpu(struct fprop_local_percpu *pl, gfp_t gfp); void fprop_local_destroy_percpu(struct fprop_local_percpu *pl); -void __fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl); -void __fprop_inc_percpu_max(struct fprop_global *p, struct fprop_local_percpu *pl, - int max_frac); +void __fprop_add_percpu(struct fprop_global *p, struct fprop_local_percpu *pl, + long nr); +void __fprop_add_percpu_max(struct fprop_global *p, + struct fprop_local_percpu *pl, int max_frac, long nr); void fprop_fraction_percpu(struct fprop_global *p, struct fprop_local_percpu *pl, unsigned long *numerator, unsigned long *denominator); @@ -96,7 +97,7 @@ void fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl) unsigned long flags; local_irq_save(flags); - __fprop_inc_percpu(p, pl); + __fprop_add_percpu(p, pl, 1); local_irq_restore(flags); } diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c index 451543937524..53e7eb1dd76c 100644 --- a/lib/flex_proportions.c +++ b/lib/flex_proportions.c @@ -217,11 +217,12 @@ static void fprop_reflect_period_percpu(struct fprop_global *p, } /* Event of type pl happened */ -void __fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl) +void __fprop_add_percpu(struct fprop_global *p, struct fprop_local_percpu *pl, + long nr) { fprop_reflect_period_percpu(p, pl); - percpu_counter_add_batch(&pl->events, 1, PROP_BATCH); - percpu_counter_add(&p->events, 1); + percpu_counter_add_batch(&pl->events, nr, PROP_BATCH); + percpu_counter_add(&p->events, nr); } void fprop_fraction_percpu(struct fprop_global *p, @@ -253,20 +254,29 @@ void fprop_fraction_percpu(struct fprop_global *p, } /* - * Like __fprop_inc_percpu() except that event is counted only if the given + * Like __fprop_add_percpu() except that event is counted only if the given * type has fraction smaller than @max_frac/FPROP_FRAC_BASE */ -void __fprop_inc_percpu_max(struct fprop_global *p, - struct fprop_local_percpu *pl, int max_frac) +void __fprop_add_percpu_max(struct fprop_global *p, + struct fprop_local_percpu *pl, int max_frac, long nr) { if (unlikely(max_frac < FPROP_FRAC_BASE)) { unsigned long numerator, denominator; + s64 tmp; fprop_fraction_percpu(p, pl, &numerator, &denominator); - if (numerator > - (((u64)denominator) * max_frac) >> FPROP_FRAC_SHIFT) + /* Adding 'nr' to fraction exceeds max_frac/FPROP_FRAC_BASE? */ + tmp = (u64)denominator * max_frac - + ((u64)numerator << FPROP_FRAC_SHIFT); + if (tmp < 0) { + /* Maximum fraction already exceeded? */ return; + } else if (tmp < nr * (FPROP_FRAC_BASE - max_frac)) { + /* Add just enough for the fraction to saturate */ + nr = div_u64(tmp + FPROP_FRAC_BASE - max_frac - 1, + FPROP_FRAC_BASE - max_frac); + } } - __fprop_inc_percpu(p, pl); + __fprop_add_percpu(p, pl, nr); } diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 1d8f2ee2e065..cb7387d0e77d 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -566,8 +566,8 @@ static void wb_domain_writeout_inc(struct wb_domain *dom, struct fprop_local_percpu *completions, unsigned int max_prop_frac) { - __fprop_inc_percpu_max(&dom->completions, completions, - max_prop_frac); + __fprop_add_percpu_max(&dom->completions, completions, + max_prop_frac, 1); /* First event after period switching was turned off? */ if (unlikely(!dom->period_time)) { /* -- cgit v1.2.3 From cc24df4cd15f197676e8ba43383ba80e78a5b865 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 20 Mar 2021 16:45:15 -0400 Subject: mm/writeback: Change __wb_writeout_inc() to __wb_writeout_add() Allow for accounting N pages at once instead of one page at a time. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: David Howells Acked-by: Vlastimil Babka --- mm/page-writeback.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index cb7387d0e77d..943cf74a76ed 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -562,12 +562,12 @@ static unsigned long wp_next_time(unsigned long cur_time) return cur_time; } -static void wb_domain_writeout_inc(struct wb_domain *dom, +static void wb_domain_writeout_add(struct wb_domain *dom, struct fprop_local_percpu *completions, - unsigned int max_prop_frac) + unsigned int max_prop_frac, long nr) { __fprop_add_percpu_max(&dom->completions, completions, - max_prop_frac, 1); + max_prop_frac, nr); /* First event after period switching was turned off? */ if (unlikely(!dom->period_time)) { /* @@ -585,18 +585,18 @@ static void wb_domain_writeout_inc(struct wb_domain *dom, * Increment @wb's writeout completion count and the global writeout * completion count. Called from test_clear_page_writeback(). */ -static inline void __wb_writeout_inc(struct bdi_writeback *wb) +static inline void __wb_writeout_add(struct bdi_writeback *wb, long nr) { struct wb_domain *cgdom; - inc_wb_stat(wb, WB_WRITTEN); - wb_domain_writeout_inc(&global_wb_domain, &wb->completions, - wb->bdi->max_prop_frac); + wb_stat_mod(wb, WB_WRITTEN, nr); + wb_domain_writeout_add(&global_wb_domain, &wb->completions, + wb->bdi->max_prop_frac, nr); cgdom = mem_cgroup_wb_domain(wb); if (cgdom) - wb_domain_writeout_inc(cgdom, wb_memcg_completions(wb), - wb->bdi->max_prop_frac); + wb_domain_writeout_add(cgdom, wb_memcg_completions(wb), + wb->bdi->max_prop_frac, nr); } void wb_writeout_inc(struct bdi_writeback *wb) @@ -604,7 +604,7 @@ void wb_writeout_inc(struct bdi_writeback *wb) unsigned long flags; local_irq_save(flags); - __wb_writeout_inc(wb); + __wb_writeout_add(wb, 1); local_irq_restore(flags); } EXPORT_SYMBOL_GPL(wb_writeout_inc); @@ -2786,7 +2786,7 @@ int test_clear_page_writeback(struct page *page) struct bdi_writeback *wb = inode_to_wb(inode); dec_wb_stat(wb, WB_WRITEBACK); - __wb_writeout_inc(wb); + __wb_writeout_add(wb, 1); if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) wb_inode_writeback_end(wb); -- cgit v1.2.3 From 269ccca3899f6bce49e004f50f623e0b161fb027 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 15 Jan 2021 23:34:16 -0500 Subject: mm/writeback: Add __folio_end_writeback() test_clear_page_writeback() is actually an mm-internal function, although it's named as if it's a pagecache function. Move it to mm/internal.h, rename it to __folio_end_writeback() and change the return type to bool. The conversion from page to folio is mostly about accounting the number of pages being written back, although it does eliminate a couple of calls to compound_head(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/page-flags.h | 1 - mm/filemap.c | 2 +- mm/internal.h | 1 + mm/page-writeback.c | 29 +++++++++++++++-------------- 4 files changed, 17 insertions(+), 16 deletions(-) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 2491e84b8e52..3aebb2060a26 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -657,7 +657,6 @@ static __always_inline void SetPageUptodate(struct page *page) CLEARPAGEFLAG(Uptodate, uptodate, PF_NO_TAIL) -int test_clear_page_writeback(struct page *page); int __test_set_page_writeback(struct page *page, bool keep_write); #define test_set_page_writeback(page) \ diff --git a/mm/filemap.c b/mm/filemap.c index 5368a4dcc35e..5cb18399a1b6 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1590,7 +1590,7 @@ void folio_end_writeback(struct folio *folio) * reused before the folio_wake(). */ folio_get(folio); - if (!test_clear_page_writeback(&folio->page)) + if (!__folio_end_writeback(folio)) BUG(); smp_mb__after_atomic(); diff --git a/mm/internal.h b/mm/internal.h index 187a032fed4d..08fb03b16d3d 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -43,6 +43,7 @@ static inline void *folio_raw_mapping(struct folio *folio) vm_fault_t do_swap_page(struct vm_fault *vmf); void folio_rotate_reclaimable(struct folio *folio); +bool __folio_end_writeback(struct folio *folio); void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, unsigned long floor, unsigned long ceiling); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 943cf74a76ed..9ff9a33bced1 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -583,7 +583,7 @@ static void wb_domain_writeout_add(struct wb_domain *dom, /* * Increment @wb's writeout completion count and the global writeout - * completion count. Called from test_clear_page_writeback(). + * completion count. Called from __folio_end_writeback(). */ static inline void __wb_writeout_add(struct bdi_writeback *wb, long nr) { @@ -2766,27 +2766,28 @@ static void wb_inode_writeback_end(struct bdi_writeback *wb) queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL); } -int test_clear_page_writeback(struct page *page) +bool __folio_end_writeback(struct folio *folio) { - struct address_space *mapping = page_mapping(page); - int ret; + long nr = folio_nr_pages(folio); + struct address_space *mapping = folio_mapping(folio); + bool ret; - lock_page_memcg(page); + folio_memcg_lock(folio); if (mapping && mapping_use_writeback_tags(mapping)) { struct inode *inode = mapping->host; struct backing_dev_info *bdi = inode_to_bdi(inode); unsigned long flags; xa_lock_irqsave(&mapping->i_pages, flags); - ret = TestClearPageWriteback(page); + ret = folio_test_clear_writeback(folio); if (ret) { - __xa_clear_mark(&mapping->i_pages, page_index(page), + __xa_clear_mark(&mapping->i_pages, folio_index(folio), PAGECACHE_TAG_WRITEBACK); if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) { struct bdi_writeback *wb = inode_to_wb(inode); - dec_wb_stat(wb, WB_WRITEBACK); - __wb_writeout_add(wb, 1); + wb_stat_mod(wb, WB_WRITEBACK, -nr); + __wb_writeout_add(wb, nr); if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) wb_inode_writeback_end(wb); @@ -2799,14 +2800,14 @@ int test_clear_page_writeback(struct page *page) xa_unlock_irqrestore(&mapping->i_pages, flags); } else { - ret = TestClearPageWriteback(page); + ret = folio_test_clear_writeback(folio); } if (ret) { - dec_lruvec_page_state(page, NR_WRITEBACK); - dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); - inc_node_page_state(page, NR_WRITTEN); + lruvec_stat_mod_folio(folio, NR_WRITEBACK, -nr); + zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr); + node_stat_mod_folio(folio, NR_WRITTEN, nr); } - unlock_page_memcg(page); + folio_memcg_unlock(folio); return ret; } -- cgit v1.2.3 From f143f1ea5a5380b2682e6836fcd24338a2aa82c7 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 24 Apr 2021 12:00:48 -0400 Subject: mm/writeback: Add folio_start_writeback() Rename set_page_writeback() to folio_start_writeback() to match folio_end_writeback(). Do not bother with wrappers that return void; callers are perfectly capable of ignoring return values. Add wrappers for set_page_writeback(), set_page_writeback_keepwrite() and test_set_page_writeback() for compatibililty with existing filesystems. The main advantage of this patch is getting the statistics right, although it does eliminate a couple of calls to compound_head(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/page-flags.h | 19 ++++++++++--------- mm/folio-compat.c | 6 ++++++ mm/page-writeback.c | 39 ++++++++++++++++++++------------------- 3 files changed, 36 insertions(+), 28 deletions(-) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 3aebb2060a26..a68af80649a4 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -657,21 +657,22 @@ static __always_inline void SetPageUptodate(struct page *page) CLEARPAGEFLAG(Uptodate, uptodate, PF_NO_TAIL) -int __test_set_page_writeback(struct page *page, bool keep_write); +bool __folio_start_writeback(struct folio *folio, bool keep_write); +bool set_page_writeback(struct page *page); -#define test_set_page_writeback(page) \ - __test_set_page_writeback(page, false) -#define test_set_page_writeback_keepwrite(page) \ - __test_set_page_writeback(page, true) +#define folio_start_writeback(folio) \ + __folio_start_writeback(folio, false) +#define folio_start_writeback_keepwrite(folio) \ + __folio_start_writeback(folio, true) -static inline void set_page_writeback(struct page *page) +static inline void set_page_writeback_keepwrite(struct page *page) { - test_set_page_writeback(page); + folio_start_writeback_keepwrite(page_folio(page)); } -static inline void set_page_writeback_keepwrite(struct page *page) +static inline bool test_set_page_writeback(struct page *page) { - test_set_page_writeback_keepwrite(page); + return set_page_writeback(page); } __PAGEFLAG(Head, head, PF_ANY) CLEARPAGEFLAG(Head, head, PF_ANY) diff --git a/mm/folio-compat.c b/mm/folio-compat.c index 2ccd8f213fc4..10ce5582d869 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -71,3 +71,9 @@ void migrate_page_copy(struct page *newpage, struct page *page) } EXPORT_SYMBOL(migrate_page_copy); #endif + +bool set_page_writeback(struct page *page) +{ + return folio_start_writeback(page_folio(page)); +} +EXPORT_SYMBOL(set_page_writeback); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 9ff9a33bced1..82938b037103 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2811,21 +2811,23 @@ bool __folio_end_writeback(struct folio *folio) return ret; } -int __test_set_page_writeback(struct page *page, bool keep_write) +bool __folio_start_writeback(struct folio *folio, bool keep_write) { - struct address_space *mapping = page_mapping(page); - int ret, access_ret; + long nr = folio_nr_pages(folio); + struct address_space *mapping = folio_mapping(folio); + bool ret; + int access_ret; - lock_page_memcg(page); + folio_memcg_lock(folio); if (mapping && mapping_use_writeback_tags(mapping)) { - XA_STATE(xas, &mapping->i_pages, page_index(page)); + XA_STATE(xas, &mapping->i_pages, folio_index(folio)); struct inode *inode = mapping->host; struct backing_dev_info *bdi = inode_to_bdi(inode); unsigned long flags; xas_lock_irqsave(&xas, flags); xas_load(&xas); - ret = TestSetPageWriteback(page); + ret = folio_test_set_writeback(folio); if (!ret) { bool on_wblist; @@ -2836,43 +2838,42 @@ int __test_set_page_writeback(struct page *page, bool keep_write) if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) { struct bdi_writeback *wb = inode_to_wb(inode); - inc_wb_stat(wb, WB_WRITEBACK); + wb_stat_mod(wb, WB_WRITEBACK, nr); if (!on_wblist) wb_inode_writeback_start(wb); } /* - * We can come through here when swapping anonymous - * pages, so we don't necessarily have an inode to track - * for sync. + * We can come through here when swapping + * anonymous folios, so we don't necessarily + * have an inode to track for sync. */ if (mapping->host && !on_wblist) sb_mark_inode_writeback(mapping->host); } - if (!PageDirty(page)) + if (!folio_test_dirty(folio)) xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY); if (!keep_write) xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE); xas_unlock_irqrestore(&xas, flags); } else { - ret = TestSetPageWriteback(page); + ret = folio_test_set_writeback(folio); } if (!ret) { - inc_lruvec_page_state(page, NR_WRITEBACK); - inc_zone_page_state(page, NR_ZONE_WRITE_PENDING); + lruvec_stat_mod_folio(folio, NR_WRITEBACK, nr); + zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, nr); } - unlock_page_memcg(page); - access_ret = arch_make_page_accessible(page); + folio_memcg_unlock(folio); + access_ret = arch_make_folio_accessible(folio); /* * If writeback has been triggered on a page that cannot be made * accessible, it is too late to recover here. */ - VM_BUG_ON_PAGE(access_ret != 0, page); + VM_BUG_ON_FOLIO(access_ret != 0, folio); return ret; - } -EXPORT_SYMBOL(__test_set_page_writeback); +EXPORT_SYMBOL(__folio_start_writeback); /** * folio_wait_writeback - Wait for a folio to finish writeback. -- cgit v1.2.3 From b5e84594cafb934e023ee7d4ea4208b6c6b65fcc Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 26 Apr 2021 23:53:10 -0400 Subject: mm/writeback: Add folio_mark_dirty() Reimplement set_page_dirty() as a wrapper around folio_mark_dirty(). There is no change to filesystems as they were already being called with the compound_head of the page being marked dirty. We avoid several calls to compound_head(), both statically (through using folio_test_dirty() instead of PageDirty() and dynamically by calling folio_mapping() instead of page_mapping(). Also return bool instead of int to show the range of values actually returned, and add kernel-doc. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/mm.h | 3 ++- mm/folio-compat.c | 6 ++++++ mm/page-writeback.c | 35 +++++++++++++++++++---------------- 3 files changed, 27 insertions(+), 17 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 93d5fbe2e4e3..00510b02ffe1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2008,7 +2008,8 @@ int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page); void account_page_cleaned(struct page *page, struct address_space *mapping, struct bdi_writeback *wb); -int set_page_dirty(struct page *page); +bool folio_mark_dirty(struct folio *folio); +bool set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); void __cancel_dirty_page(struct page *page); static inline void cancel_dirty_page(struct page *page) diff --git a/mm/folio-compat.c b/mm/folio-compat.c index 10ce5582d869..2c2b3917b5dc 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -77,3 +77,9 @@ bool set_page_writeback(struct page *page) return folio_start_writeback(page_folio(page)); } EXPORT_SYMBOL(set_page_writeback); + +bool set_page_dirty(struct page *page) +{ + return folio_mark_dirty(page_folio(page)); +} +EXPORT_SYMBOL(set_page_dirty); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 82938b037103..c79801656f5b 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2581,18 +2581,21 @@ int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page) } EXPORT_SYMBOL(redirty_page_for_writepage); -/* - * Dirty a page. +/** + * folio_mark_dirty - Mark a folio as being modified. + * @folio: The folio. + * + * For folios with a mapping this should be done under the page lock + * for the benefit of asynchronous memory errors who prefer a consistent + * dirty state. This rule can be broken in some special cases, + * but should be better not to. * - * For pages with a mapping this should be done under the page lock for the - * benefit of asynchronous memory errors who prefer a consistent dirty state. - * This rule can be broken in some special cases, but should be better not to. + * Return: True if the folio was newly dirtied, false if it was already dirty. */ -int set_page_dirty(struct page *page) +bool folio_mark_dirty(struct folio *folio) { - struct address_space *mapping = page_mapping(page); + struct address_space *mapping = folio_mapping(folio); - page = compound_head(page); if (likely(mapping)) { /* * readahead/lru_deactivate_page could remain @@ -2604,17 +2607,17 @@ int set_page_dirty(struct page *page) * it will confuse readahead and make it restart the size rampup * process. But it's a trivial problem. */ - if (PageReclaim(page)) - ClearPageReclaim(page); - return mapping->a_ops->set_page_dirty(page); + if (folio_test_reclaim(folio)) + folio_clear_reclaim(folio); + return mapping->a_ops->set_page_dirty(&folio->page); } - if (!PageDirty(page)) { - if (!TestSetPageDirty(page)) - return 1; + if (!folio_test_dirty(folio)) { + if (!folio_test_set_dirty(folio)) + return true; } - return 0; + return false; } -EXPORT_SYMBOL(set_page_dirty); +EXPORT_SYMBOL(folio_mark_dirty); /* * set_page_dirty() is racy if the caller has no reference against -- cgit v1.2.3 From 203a31516616111b8eaaf00c16fa3fcaa25e6f81 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 4 May 2021 11:01:10 -0400 Subject: mm/writeback: Add __folio_mark_dirty() Turn __set_page_dirty() into a wrapper around __folio_mark_dirty(). Convert account_page_dirtied() into folio_account_dirtied() and account the number of pages in the folio to support multi-page folios. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/memcontrol.h | 5 ++--- include/linux/pagemap.h | 7 ++++++- mm/page-writeback.c | 41 +++++++++++++++++++++-------------------- 3 files changed, 29 insertions(+), 24 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 7bd78c13d1fa..e34bf0cbdf55 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1615,10 +1615,9 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages, void mem_cgroup_track_foreign_dirty_slowpath(struct folio *folio, struct bdi_writeback *wb); -static inline void mem_cgroup_track_foreign_dirty(struct page *page, +static inline void mem_cgroup_track_foreign_dirty(struct folio *folio, struct bdi_writeback *wb) { - struct folio *folio = page_folio(page); if (mem_cgroup_disabled()) return; @@ -1643,7 +1642,7 @@ static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb, { } -static inline void mem_cgroup_track_foreign_dirty(struct page *page, +static inline void mem_cgroup_track_foreign_dirty(struct folio *folio, struct bdi_writeback *wb) { } diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index bdbd7be67812..aa9a083083df 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -772,8 +772,13 @@ void end_page_writeback(struct page *page); void folio_end_writeback(struct folio *folio); void wait_for_stable_page(struct page *page); void folio_wait_stable(struct folio *folio); +void __folio_mark_dirty(struct folio *folio, struct address_space *, int warn); +static inline void __set_page_dirty(struct page *page, + struct address_space *mapping, int warn) +{ + __folio_mark_dirty(page_folio(page), mapping, warn); +} -void __set_page_dirty(struct page *, struct address_space *, int warn); int __set_page_dirty_nobuffers(struct page *page); int __set_page_dirty_no_writeback(struct page *page); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index c79801656f5b..40d77597385f 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2438,29 +2438,30 @@ EXPORT_SYMBOL(__set_page_dirty_no_writeback); * * NOTE: This relies on being atomic wrt interrupts. */ -static void account_page_dirtied(struct page *page, +static void folio_account_dirtied(struct folio *folio, struct address_space *mapping) { struct inode *inode = mapping->host; - trace_writeback_dirty_page(page, mapping); + trace_writeback_dirty_page(&folio->page, mapping); if (mapping_can_writeback(mapping)) { struct bdi_writeback *wb; + long nr = folio_nr_pages(folio); - inode_attach_wb(inode, page); + inode_attach_wb(inode, &folio->page); wb = inode_to_wb(inode); - __inc_lruvec_page_state(page, NR_FILE_DIRTY); - __inc_zone_page_state(page, NR_ZONE_WRITE_PENDING); - __inc_node_page_state(page, NR_DIRTIED); - inc_wb_stat(wb, WB_RECLAIMABLE); - inc_wb_stat(wb, WB_DIRTIED); - task_io_account_write(PAGE_SIZE); - current->nr_dirtied++; - __this_cpu_inc(bdp_ratelimits); + __lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, nr); + __zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, nr); + __node_stat_mod_folio(folio, NR_DIRTIED, nr); + wb_stat_mod(wb, WB_RECLAIMABLE, nr); + wb_stat_mod(wb, WB_DIRTIED, nr); + task_io_account_write(nr * PAGE_SIZE); + current->nr_dirtied += nr; + __this_cpu_add(bdp_ratelimits, nr); - mem_cgroup_track_foreign_dirty(page, wb); + mem_cgroup_track_foreign_dirty(folio, wb); } } @@ -2481,24 +2482,24 @@ void account_page_cleaned(struct page *page, struct address_space *mapping, } /* - * Mark the page dirty, and set it dirty in the page cache, and mark the inode - * dirty. + * Mark the folio dirty, and set it dirty in the page cache, and mark + * the inode dirty. * - * If warn is true, then emit a warning if the page is not uptodate and has + * If warn is true, then emit a warning if the folio is not uptodate and has * not been truncated. * * The caller must hold lock_page_memcg(). */ -void __set_page_dirty(struct page *page, struct address_space *mapping, +void __folio_mark_dirty(struct folio *folio, struct address_space *mapping, int warn) { unsigned long flags; xa_lock_irqsave(&mapping->i_pages, flags); - if (page->mapping) { /* Race with truncate? */ - WARN_ON_ONCE(warn && !PageUptodate(page)); - account_page_dirtied(page, mapping); - __xa_set_mark(&mapping->i_pages, page_index(page), + if (folio->mapping) { /* Race with truncate? */ + WARN_ON_ONCE(warn && !folio_test_uptodate(folio)); + folio_account_dirtied(folio, mapping); + __xa_set_mark(&mapping->i_pages, folio_index(folio), PAGECACHE_TAG_DIRTY); } xa_unlock_irqrestore(&mapping->i_pages, flags); -- cgit v1.2.3 From b9b0ff61eef51469eae962b80bae094ce2c8c2a5 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 13 Jul 2021 22:45:50 -0400 Subject: mm/writeback: Convert tracing writeback_page_template to folios Rename writeback_dirty_page() to writeback_dirty_folio() and wait_on_page_writeback() to folio_wait_writeback(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/trace/events/writeback.h | 20 ++++++++++---------- mm/page-writeback.c | 6 +++--- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 297871ca0004..7dccb66474f7 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -52,11 +52,11 @@ WB_WORK_REASON struct wb_writeback_work; -DECLARE_EVENT_CLASS(writeback_page_template, +DECLARE_EVENT_CLASS(writeback_folio_template, - TP_PROTO(struct page *page, struct address_space *mapping), + TP_PROTO(struct folio *folio, struct address_space *mapping), - TP_ARGS(page, mapping), + TP_ARGS(folio, mapping), TP_STRUCT__entry ( __array(char, name, 32) @@ -69,7 +69,7 @@ DECLARE_EVENT_CLASS(writeback_page_template, bdi_dev_name(mapping ? inode_to_bdi(mapping->host) : NULL), 32); __entry->ino = mapping ? mapping->host->i_ino : 0; - __entry->index = page->index; + __entry->index = folio->index; ), TP_printk("bdi %s: ino=%lu index=%lu", @@ -79,18 +79,18 @@ DECLARE_EVENT_CLASS(writeback_page_template, ) ); -DEFINE_EVENT(writeback_page_template, writeback_dirty_page, +DEFINE_EVENT(writeback_folio_template, writeback_dirty_folio, - TP_PROTO(struct page *page, struct address_space *mapping), + TP_PROTO(struct folio *folio, struct address_space *mapping), - TP_ARGS(page, mapping) + TP_ARGS(folio, mapping) ); -DEFINE_EVENT(writeback_page_template, wait_on_page_writeback, +DEFINE_EVENT(writeback_folio_template, folio_wait_writeback, - TP_PROTO(struct page *page, struct address_space *mapping), + TP_PROTO(struct folio *folio, struct address_space *mapping), - TP_ARGS(page, mapping) + TP_ARGS(folio, mapping) ); DECLARE_EVENT_CLASS(writeback_dirty_inode_template, diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 40d77597385f..ebbd15f0d841 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2443,7 +2443,7 @@ static void folio_account_dirtied(struct folio *folio, { struct inode *inode = mapping->host; - trace_writeback_dirty_page(&folio->page, mapping); + trace_writeback_dirty_folio(folio, mapping); if (mapping_can_writeback(mapping)) { struct bdi_writeback *wb; @@ -2894,7 +2894,7 @@ EXPORT_SYMBOL(__folio_start_writeback); void folio_wait_writeback(struct folio *folio) { while (folio_test_writeback(folio)) { - trace_wait_on_page_writeback(&folio->page, folio_mapping(folio)); + trace_folio_wait_writeback(folio, folio_mapping(folio)); folio_wait_bit(folio, PG_writeback); } } @@ -2916,7 +2916,7 @@ EXPORT_SYMBOL_GPL(folio_wait_writeback); int folio_wait_writeback_killable(struct folio *folio) { while (folio_test_writeback(folio)) { - trace_wait_on_page_writeback(&folio->page, folio_mapping(folio)); + trace_folio_wait_writeback(folio, folio_mapping(folio)); if (folio_wait_bit_killable(folio, PG_writeback)) return -EINTR; } -- cgit v1.2.3 From 85d4d2ebc86f02740c5f5f72ec43cc47d3560720 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 3 May 2021 23:30:44 -0400 Subject: mm/writeback: Add filemap_dirty_folio() Reimplement __set_page_dirty_nobuffers() as a wrapper around filemap_dirty_folio(). Eventually folio_mark_dirty() will pass the folio's mapping to the address space's ->dirty_folio() operation, so add the parameter to filemap_dirty_folio() now. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/writeback.h | 1 + mm/folio-compat.c | 6 +++++ mm/page-writeback.c | 60 ++++++++++++++++++++++++----------------------- 3 files changed, 38 insertions(+), 29 deletions(-) diff --git a/include/linux/writeback.h b/include/linux/writeback.h index d1f65adf6a26..d4f24eba066f 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -393,6 +393,7 @@ void writeback_set_ratelimit(void); void tag_pages_for_writeback(struct address_space *mapping, pgoff_t start, pgoff_t end); +bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio); void account_page_redirty(struct page *page); void sb_mark_inode_writeback(struct inode *inode); diff --git a/mm/folio-compat.c b/mm/folio-compat.c index 2c2b3917b5dc..dad962b920e5 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -83,3 +83,9 @@ bool set_page_dirty(struct page *page) return folio_mark_dirty(page_folio(page)); } EXPORT_SYMBOL(set_page_dirty); + +int __set_page_dirty_nobuffers(struct page *page) +{ + return filemap_dirty_folio(page_mapping(page), page_folio(page)); +} +EXPORT_SYMBOL(__set_page_dirty_nobuffers); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index ebbd15f0d841..a501dad430af 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2505,41 +2505,43 @@ void __folio_mark_dirty(struct folio *folio, struct address_space *mapping, xa_unlock_irqrestore(&mapping->i_pages, flags); } -/* - * For address_spaces which do not use buffers. Just tag the page as dirty in - * the xarray. - * - * This is also used when a single buffer is being dirtied: we want to set the - * page dirty in that case, but not all the buffers. This is a "bottom-up" - * dirtying, whereas __set_page_dirty_buffers() is a "top-down" dirtying. - * - * The caller must ensure this doesn't race with truncation. Most will simply - * hold the page lock, but e.g. zap_pte_range() calls with the page mapped and - * the pte lock held, which also locks out truncation. +/** + * filemap_dirty_folio - Mark a folio dirty for filesystems which do not use buffer_heads. + * @mapping: Address space this folio belongs to. + * @folio: Folio to be marked as dirty. + * + * Filesystems which do not use buffer heads should call this function + * from their set_page_dirty address space operation. It ignores the + * contents of folio_get_private(), so if the filesystem marks individual + * blocks as dirty, the filesystem should handle that itself. + * + * This is also sometimes used by filesystems which use buffer_heads when + * a single buffer is being dirtied: we want to set the folio dirty in + * that case, but not all the buffers. This is a "bottom-up" dirtying, + * whereas __set_page_dirty_buffers() is a "top-down" dirtying. + * + * The caller must ensure this doesn't race with truncation. Most will + * simply hold the folio lock, but e.g. zap_pte_range() calls with the + * folio mapped and the pte lock held, which also locks out truncation. */ -int __set_page_dirty_nobuffers(struct page *page) +bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio) { - lock_page_memcg(page); - if (!TestSetPageDirty(page)) { - struct address_space *mapping = page_mapping(page); + folio_memcg_lock(folio); + if (folio_test_set_dirty(folio)) { + folio_memcg_unlock(folio); + return false; + } - if (!mapping) { - unlock_page_memcg(page); - return 1; - } - __set_page_dirty(page, mapping, !PagePrivate(page)); - unlock_page_memcg(page); + __folio_mark_dirty(folio, mapping, !folio_test_private(folio)); + folio_memcg_unlock(folio); - if (mapping->host) { - /* !PageAnon && !swapper_space */ - __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); - } - return 1; + if (mapping->host) { + /* !PageAnon && !swapper_space */ + __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); } - unlock_page_memcg(page); - return 0; + return true; } -EXPORT_SYMBOL(__set_page_dirty_nobuffers); +EXPORT_SYMBOL(filemap_dirty_folio); /* * Call this whenever redirtying a page, to de-account the dirty counters -- cgit v1.2.3 From fc9b6a538b222eaeb4d1e3f93e716b6626d9b653 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 4 May 2021 16:12:09 -0400 Subject: mm/writeback: Add folio_account_cleaned() Get the statistics right; compound pages were being accounted as a single page. This didn't matter before now as no filesystem which supported compound pages did writeback. Also move the declaration to pagemap.h since this is part of the page cache. Add a wrapper for account_page_cleaned(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/mm.h | 3 --- include/linux/pagemap.h | 7 +++++++ mm/page-writeback.c | 11 ++++++----- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 00510b02ffe1..f47af4ca4873 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -39,7 +39,6 @@ struct anon_vma_chain; struct file_ra_state; struct user_struct; struct writeback_control; -struct bdi_writeback; struct pt_regs; extern int sysctl_page_lock_unfairness; @@ -2006,8 +2005,6 @@ extern void do_invalidatepage(struct page *page, unsigned int offset, int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page); -void account_page_cleaned(struct page *page, struct address_space *mapping, - struct bdi_writeback *wb); bool folio_mark_dirty(struct folio *folio); bool set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index aa9a083083df..8ba7da513ac4 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -778,6 +778,13 @@ static inline void __set_page_dirty(struct page *page, { __folio_mark_dirty(page_folio(page), mapping, warn); } +void folio_account_cleaned(struct folio *folio, struct address_space *mapping, + struct bdi_writeback *wb); +static inline void account_page_cleaned(struct page *page, + struct address_space *mapping, struct bdi_writeback *wb) +{ + return folio_account_cleaned(page_folio(page), mapping, wb); +} int __set_page_dirty_nobuffers(struct page *page); int __set_page_dirty_no_writeback(struct page *page); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index a501dad430af..82e3bc3d4eae 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2470,14 +2470,15 @@ static void folio_account_dirtied(struct folio *folio, * * Caller must hold lock_page_memcg(). */ -void account_page_cleaned(struct page *page, struct address_space *mapping, +void folio_account_cleaned(struct folio *folio, struct address_space *mapping, struct bdi_writeback *wb) { if (mapping_can_writeback(mapping)) { - dec_lruvec_page_state(page, NR_FILE_DIRTY); - dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); - dec_wb_stat(wb, WB_RECLAIMABLE); - task_io_account_cancelled_write(PAGE_SIZE); + long nr = folio_nr_pages(folio); + lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, -nr); + zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr); + wb_stat_mod(wb, WB_RECLAIMABLE, -nr); + task_io_account_cancelled_write(nr * PAGE_SIZE); } } -- cgit v1.2.3 From fdaf532a23795e320c47e1caab50a53c202135c5 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 8 Mar 2021 16:43:04 -0500 Subject: mm/writeback: Add folio_cancel_dirty() Turn __cancel_dirty_page() into __folio_cancel_dirty() and add wrappers. Move the prototypes into pagemap.h since this is page cache functionality. Saves 44 bytes of kernel text in total; 33 bytes from __folio_cancel_dirty and 11 from two callers of cancel_dirty_page(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/mm.h | 7 ------- include/linux/pagemap.h | 11 +++++++++++ mm/page-writeback.c | 16 ++++++++-------- 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index f47af4ca4873..3bc394aafbc0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2008,13 +2008,6 @@ int redirty_page_for_writepage(struct writeback_control *wbc, bool folio_mark_dirty(struct folio *folio); bool set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); -void __cancel_dirty_page(struct page *page); -static inline void cancel_dirty_page(struct page *page) -{ - /* Avoid atomic ops, locking, etc. when not actually needed. */ - if (PageDirty(page)) - __cancel_dirty_page(page); -} int clear_page_dirty_for_io(struct page *page); int get_cmdline(struct task_struct *task, char *buffer, int buflen); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 8ba7da513ac4..8f4a9ba0bfb0 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -785,6 +785,17 @@ static inline void account_page_cleaned(struct page *page, { return folio_account_cleaned(page_folio(page), mapping, wb); } +void __folio_cancel_dirty(struct folio *folio); +static inline void folio_cancel_dirty(struct folio *folio) +{ + /* Avoid atomic ops, locking, etc. when not actually needed. */ + if (folio_test_dirty(folio)) + __folio_cancel_dirty(folio); +} +static inline void cancel_dirty_page(struct page *page) +{ + folio_cancel_dirty(page_folio(page)); +} int __set_page_dirty_nobuffers(struct page *page); int __set_page_dirty_no_writeback(struct page *page); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 82e3bc3d4eae..16c4ab0cc1c9 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2657,28 +2657,28 @@ EXPORT_SYMBOL(set_page_dirty_lock); * page without actually doing it through the VM. Can you say "ext3 is * horribly ugly"? Thought you could. */ -void __cancel_dirty_page(struct page *page) +void __folio_cancel_dirty(struct folio *folio) { - struct address_space *mapping = page_mapping(page); + struct address_space *mapping = folio_mapping(folio); if (mapping_can_writeback(mapping)) { struct inode *inode = mapping->host; struct bdi_writeback *wb; struct wb_lock_cookie cookie = {}; - lock_page_memcg(page); + folio_memcg_lock(folio); wb = unlocked_inode_to_wb_begin(inode, &cookie); - if (TestClearPageDirty(page)) - account_page_cleaned(page, mapping, wb); + if (folio_test_clear_dirty(folio)) + folio_account_cleaned(folio, mapping, wb); unlocked_inode_to_wb_end(inode, &cookie); - unlock_page_memcg(page); + folio_memcg_unlock(folio); } else { - ClearPageDirty(page); + folio_clear_dirty(folio); } } -EXPORT_SYMBOL(__cancel_dirty_page); +EXPORT_SYMBOL(__folio_cancel_dirty); /* * Clear a page's dirty flag, while caring for dirty memory accounting. -- cgit v1.2.3 From 9350f20a070d27a6c6a292a2b6f6091e7ea90a65 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 28 Feb 2021 16:21:20 -0500 Subject: mm/writeback: Add folio_clear_dirty_for_io() Transform clear_page_dirty_for_io() into folio_clear_dirty_for_io() and add a compatibility wrapper. Also move the declaration to pagemap.h as this is page cache functionality that doesn't need to be used by the rest of the kernel. Increases the size of the kernel by 79 bytes. While we remove a few calls to compound_head(), we add a call to folio_nr_pages() to get the stats correct for the eventual support of multi-page folios. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/mm.h | 1 - include/linux/pagemap.h | 2 ++ mm/folio-compat.c | 6 +++++ mm/page-writeback.c | 63 +++++++++++++++++++++++++------------------------ 4 files changed, 40 insertions(+), 32 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 3bc394aafbc0..dd5c6970ba67 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2008,7 +2008,6 @@ int redirty_page_for_writepage(struct writeback_control *wbc, bool folio_mark_dirty(struct folio *folio); bool set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); -int clear_page_dirty_for_io(struct page *page); int get_cmdline(struct task_struct *task, char *buffer, int buflen); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 8f4a9ba0bfb0..0dda6459d2d1 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -796,6 +796,8 @@ static inline void cancel_dirty_page(struct page *page) { folio_cancel_dirty(page_folio(page)); } +bool folio_clear_dirty_for_io(struct folio *folio); +bool clear_page_dirty_for_io(struct page *page); int __set_page_dirty_nobuffers(struct page *page); int __set_page_dirty_no_writeback(struct page *page); diff --git a/mm/folio-compat.c b/mm/folio-compat.c index dad962b920e5..39f5a8d963b1 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -89,3 +89,9 @@ int __set_page_dirty_nobuffers(struct page *page) return filemap_dirty_folio(page_mapping(page), page_folio(page)); } EXPORT_SYMBOL(__set_page_dirty_nobuffers); + +bool clear_page_dirty_for_io(struct page *page) +{ + return folio_clear_dirty_for_io(page_folio(page)); +} +EXPORT_SYMBOL(clear_page_dirty_for_io); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 16c4ab0cc1c9..85c04445de97 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2681,25 +2681,25 @@ void __folio_cancel_dirty(struct folio *folio) EXPORT_SYMBOL(__folio_cancel_dirty); /* - * Clear a page's dirty flag, while caring for dirty memory accounting. - * Returns true if the page was previously dirty. - * - * This is for preparing to put the page under writeout. We leave the page - * tagged as dirty in the xarray so that a concurrent write-for-sync - * can discover it via a PAGECACHE_TAG_DIRTY walk. The ->writepage - * implementation will run either set_page_writeback() or set_page_dirty(), - * at which stage we bring the page's dirty flag and xarray dirty tag - * back into sync. - * - * This incoherency between the page's dirty flag and xarray tag is - * unfortunate, but it only exists while the page is locked. + * Clear a folio's dirty flag, while caring for dirty memory accounting. + * Returns true if the folio was previously dirty. + * + * This is for preparing to put the folio under writeout. We leave + * the folio tagged as dirty in the xarray so that a concurrent + * write-for-sync can discover it via a PAGECACHE_TAG_DIRTY walk. + * The ->writepage implementation will run either folio_start_writeback() + * or folio_mark_dirty(), at which stage we bring the folio's dirty flag + * and xarray dirty tag back into sync. + * + * This incoherency between the folio's dirty flag and xarray tag is + * unfortunate, but it only exists while the folio is locked. */ -int clear_page_dirty_for_io(struct page *page) +bool folio_clear_dirty_for_io(struct folio *folio) { - struct address_space *mapping = page_mapping(page); - int ret = 0; + struct address_space *mapping = folio_mapping(folio); + bool ret = false; - VM_BUG_ON_PAGE(!PageLocked(page), page); + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); if (mapping && mapping_can_writeback(mapping)) { struct inode *inode = mapping->host; @@ -2712,48 +2712,49 @@ int clear_page_dirty_for_io(struct page *page) * We use this sequence to make sure that * (a) we account for dirty stats properly * (b) we tell the low-level filesystem to - * mark the whole page dirty if it was + * mark the whole folio dirty if it was * dirty in a pagetable. Only to then - * (c) clean the page again and return 1 to + * (c) clean the folio again and return 1 to * cause the writeback. * * This way we avoid all nasty races with the * dirty bit in multiple places and clearing * them concurrently from different threads. * - * Note! Normally the "set_page_dirty(page)" + * Note! Normally the "folio_mark_dirty(folio)" * has no effect on the actual dirty bit - since * that will already usually be set. But we * need the side effects, and it can help us * avoid races. * - * We basically use the page "master dirty bit" + * We basically use the folio "master dirty bit" * as a serialization point for all the different * threads doing their things. */ - if (page_mkclean(page)) - set_page_dirty(page); + if (folio_mkclean(folio)) + folio_mark_dirty(folio); /* * We carefully synchronise fault handlers against - * installing a dirty pte and marking the page dirty + * installing a dirty pte and marking the folio dirty * at this point. We do this by having them hold the - * page lock while dirtying the page, and pages are + * page lock while dirtying the folio, and folios are * always locked coming in here, so we get the desired * exclusion. */ wb = unlocked_inode_to_wb_begin(inode, &cookie); - if (TestClearPageDirty(page)) { - dec_lruvec_page_state(page, NR_FILE_DIRTY); - dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); - dec_wb_stat(wb, WB_RECLAIMABLE); - ret = 1; + if (folio_test_clear_dirty(folio)) { + long nr = folio_nr_pages(folio); + lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, -nr); + zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr); + wb_stat_mod(wb, WB_RECLAIMABLE, -nr); + ret = true; } unlocked_inode_to_wb_end(inode, &cookie); return ret; } - return TestClearPageDirty(page); + return folio_test_clear_dirty(folio); } -EXPORT_SYMBOL(clear_page_dirty_for_io); +EXPORT_SYMBOL(folio_clear_dirty_for_io); static void wb_inode_writeback_start(struct bdi_writeback *wb) { -- cgit v1.2.3 From 25ff8b15537dfa0e1a62d55cfcc48f3c8bd8a76c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 3 May 2021 10:06:55 -0400 Subject: mm/writeback: Add folio_account_redirty() Account the number of pages in the folio that we're redirtying. Turn account_page_dirty() into a wrapper around it. Also turn the comment on folio_account_redirty() into kernel-doc and edit it slightly so it makes sense to its potential callers. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/writeback.h | 6 +++++- mm/page-writeback.c | 32 +++++++++++++++++++------------- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/include/linux/writeback.h b/include/linux/writeback.h index d4f24eba066f..852100bea351 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -394,7 +394,11 @@ void tag_pages_for_writeback(struct address_space *mapping, pgoff_t start, pgoff_t end); bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio); -void account_page_redirty(struct page *page); +void folio_account_redirty(struct folio *folio); +static inline void account_page_redirty(struct page *page) +{ + folio_account_redirty(page_folio(page)); +} void sb_mark_inode_writeback(struct inode *inode); void sb_clear_inode_writeback(struct inode *inode); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 85c04445de97..7ef904363fb7 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1084,7 +1084,7 @@ static void wb_update_write_bandwidth(struct bdi_writeback *wb, * write_bandwidth = --------------------------------------------------- * period * - * @written may have decreased due to account_page_redirty(). + * @written may have decreased due to folio_account_redirty(). * Avoid underflowing @bw calculation. */ bw = written - min(written, wb->written_stamp); @@ -2544,30 +2544,36 @@ bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio) } EXPORT_SYMBOL(filemap_dirty_folio); -/* - * Call this whenever redirtying a page, to de-account the dirty counters - * (NR_DIRTIED, WB_DIRTIED, tsk->nr_dirtied), so that they match the written - * counters (NR_WRITTEN, WB_WRITTEN) in long term. The mismatches will lead to - * systematic errors in balanced_dirty_ratelimit and the dirty pages position - * control. +/** + * folio_account_redirty - Manually account for redirtying a page. + * @folio: The folio which is being redirtied. + * + * Most filesystems should call folio_redirty_for_writepage() instead + * of this fuction. If your filesystem is doing writeback outside the + * context of a writeback_control(), it can call this when redirtying + * a folio, to de-account the dirty counters (NR_DIRTIED, WB_DIRTIED, + * tsk->nr_dirtied), so that they match the written counters (NR_WRITTEN, + * WB_WRITTEN) in long term. The mismatches will lead to systematic errors + * in balanced_dirty_ratelimit and the dirty pages position control. */ -void account_page_redirty(struct page *page) +void folio_account_redirty(struct folio *folio) { - struct address_space *mapping = page->mapping; + struct address_space *mapping = folio->mapping; if (mapping && mapping_can_writeback(mapping)) { struct inode *inode = mapping->host; struct bdi_writeback *wb; struct wb_lock_cookie cookie = {}; + long nr = folio_nr_pages(folio); wb = unlocked_inode_to_wb_begin(inode, &cookie); - current->nr_dirtied--; - dec_node_page_state(page, NR_DIRTIED); - dec_wb_stat(wb, WB_DIRTIED); + current->nr_dirtied -= nr; + node_stat_mod_folio(folio, NR_DIRTIED, -nr); + wb_stat_mod(wb, WB_DIRTIED, -nr); unlocked_inode_to_wb_end(inode, &cookie); } } -EXPORT_SYMBOL(account_page_redirty); +EXPORT_SYMBOL(folio_account_redirty); /* * When a writepage implementation decides that it doesn't want to write this -- cgit v1.2.3 From cd78ab11a8810dd297f4751d17cc53e3dce36024 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 2 May 2021 23:22:52 -0400 Subject: mm/writeback: Add folio_redirty_for_writepage() Reimplement redirty_page_for_writepage() as a wrapper around folio_redirty_for_writepage(). Account the number of pages in the folio, add kernel-doc and move the prototype to writeback.h. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- fs/jfs/jfs_metapage.c | 1 + include/linux/mm.h | 4 ---- include/linux/writeback.h | 2 ++ mm/folio-compat.c | 7 +++++++ mm/page-writeback.c | 30 ++++++++++++++++++++---------- 5 files changed, 30 insertions(+), 14 deletions(-) diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 176580f54af9..104ae698443e 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "jfs_incore.h" #include "jfs_superblock.h" #include "jfs_filsys.h" diff --git a/include/linux/mm.h b/include/linux/mm.h index dd5c6970ba67..23ab040aa65a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -36,9 +36,7 @@ struct mempolicy; struct anon_vma; struct anon_vma_chain; -struct file_ra_state; struct user_struct; -struct writeback_control; struct pt_regs; extern int sysctl_page_lock_unfairness; @@ -2003,8 +2001,6 @@ extern int try_to_release_page(struct page * page, gfp_t gfp_mask); extern void do_invalidatepage(struct page *page, unsigned int offset, unsigned int length); -int redirty_page_for_writepage(struct writeback_control *wbc, - struct page *page); bool folio_mark_dirty(struct folio *folio); bool set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 852100bea351..3571f383dfb6 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -399,6 +399,8 @@ static inline void account_page_redirty(struct page *page) { folio_account_redirty(page_folio(page)); } +bool folio_redirty_for_writepage(struct writeback_control *, struct folio *); +bool redirty_page_for_writepage(struct writeback_control *, struct page *); void sb_mark_inode_writeback(struct inode *inode); void sb_clear_inode_writeback(struct inode *inode); diff --git a/mm/folio-compat.c b/mm/folio-compat.c index 39f5a8d963b1..c1e01bc36d32 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -95,3 +95,10 @@ bool clear_page_dirty_for_io(struct page *page) return folio_clear_dirty_for_io(page_folio(page)); } EXPORT_SYMBOL(clear_page_dirty_for_io); + +bool redirty_page_for_writepage(struct writeback_control *wbc, + struct page *page) +{ + return folio_redirty_for_writepage(wbc, page_folio(page)); +} +EXPORT_SYMBOL(redirty_page_for_writepage); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 7ef904363fb7..a0c35a9d8029 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2575,21 +2575,31 @@ void folio_account_redirty(struct folio *folio) } EXPORT_SYMBOL(folio_account_redirty); -/* - * When a writepage implementation decides that it doesn't want to write this - * page for some reason, it should redirty the locked page via - * redirty_page_for_writepage() and it should then unlock the page and return 0 +/** + * folio_redirty_for_writepage - Decline to write a dirty folio. + * @wbc: The writeback control. + * @folio: The folio. + * + * When a writepage implementation decides that it doesn't want to write + * @folio for some reason, it should call this function, unlock @folio and + * return 0. + * + * Return: True if we redirtied the folio. False if someone else dirtied + * it first. */ -int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page) +bool folio_redirty_for_writepage(struct writeback_control *wbc, + struct folio *folio) { - int ret; + bool ret; + long nr = folio_nr_pages(folio); + + wbc->pages_skipped += nr; + ret = filemap_dirty_folio(folio->mapping, folio); + folio_account_redirty(folio); - wbc->pages_skipped++; - ret = __set_page_dirty_nobuffers(page); - account_page_redirty(page); return ret; } -EXPORT_SYMBOL(redirty_page_for_writepage); +EXPORT_SYMBOL(folio_redirty_for_writepage); /** * folio_mark_dirty - Mark a folio as being modified. -- cgit v1.2.3 From 9eb7c76dd31a53331bec795faaf7daa7ffb80071 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 27 Apr 2021 23:11:28 -0400 Subject: mm/filemap: Add i_blocks_per_folio() Reimplement i_blocks_per_page() as a wrapper around i_blocks_per_folio(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/pagemap.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 0dda6459d2d1..5431d9920dc0 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1149,19 +1149,25 @@ static inline int page_mkwrite_check_truncate(struct page *page, } /** - * i_blocks_per_page - How many blocks fit in this page. + * i_blocks_per_folio - How many blocks fit in this folio. * @inode: The inode which contains the blocks. - * @page: The page (head page if the page is a THP). + * @folio: The folio. * - * If the block size is larger than the size of this page, return zero. + * If the block size is larger than the size of this folio, return zero. * - * Context: The caller should hold a refcount on the page to prevent it + * Context: The caller should hold a refcount on the folio to prevent it * from being split. - * Return: The number of filesystem blocks covered by this page. + * Return: The number of filesystem blocks covered by this folio. */ +static inline +unsigned int i_blocks_per_folio(struct inode *inode, struct folio *folio) +{ + return folio_size(folio) >> inode->i_blkbits; +} + static inline unsigned int i_blocks_per_page(struct inode *inode, struct page *page) { - return thp_size(page) >> inode->i_blkbits; + return i_blocks_per_folio(inode, page_folio(page)); } #endif /* _LINUX_PAGEMAP_H */ -- cgit v1.2.3 From f705bf84eab2aa3b9842974b8443587727ccb23a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 28 Apr 2021 22:30:06 -0400 Subject: mm/filemap: Add folio_mkwrite_check_truncate() This is the folio equivalent of page_mkwrite_check_truncate(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Howells --- include/linux/pagemap.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 5431d9920dc0..4b74d83c5571 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1120,6 +1120,34 @@ static inline unsigned long dir_pages(struct inode *inode) PAGE_SHIFT; } +/** + * folio_mkwrite_check_truncate - check if folio was truncated + * @folio: the folio to check + * @inode: the inode to check the folio against + * + * Return: the number of bytes in the folio up to EOF, + * or -EFAULT if the folio was truncated. + */ +static inline ssize_t folio_mkwrite_check_truncate(struct folio *folio, + struct inode *inode) +{ + loff_t size = i_size_read(inode); + pgoff_t index = size >> PAGE_SHIFT; + size_t offset = offset_in_folio(folio, size); + + if (!folio->mapping) + return -EFAULT; + + /* folio is wholly inside EOF */ + if (folio_next_index(folio) - 1 < index) + return folio_size(folio); + /* folio is wholly past EOF */ + if (folio->index > index || !offset) + return -EFAULT; + /* folio is partially inside EOF */ + return offset; +} + /** * page_mkwrite_check_truncate - check if page was truncated * @page: the page to check -- cgit v1.2.3 From 9bf70167e3c61473b95f40771decc3778bf0fb9f Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 27 Apr 2021 16:37:09 -0400 Subject: mm/filemap: Add readahead_folio() The pointers stored in the page cache are folios, by definition. This change comes with a behaviour change -- callers of readahead_folio() are no longer required to put the page reference themselves. This matches how readpage works, rather than matching how readpages used to work. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/pagemap.h | 54 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 15 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 4b74d83c5571..fea9615bab35 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -987,33 +987,57 @@ void page_cache_async_readahead(struct address_space *mapping, page_cache_async_ra(&ractl, page, req_count); } +static inline struct folio *__readahead_folio(struct readahead_control *ractl) +{ + struct folio *folio; + + BUG_ON(ractl->_batch_count > ractl->_nr_pages); + ractl->_nr_pages -= ractl->_batch_count; + ractl->_index += ractl->_batch_count; + + if (!ractl->_nr_pages) { + ractl->_batch_count = 0; + return NULL; + } + + folio = xa_load(&ractl->mapping->i_pages, ractl->_index); + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); + ractl->_batch_count = folio_nr_pages(folio); + + return folio; +} + /** * readahead_page - Get the next page to read. - * @rac: The current readahead request. + * @ractl: The current readahead request. * * Context: The page is locked and has an elevated refcount. The caller * should decreases the refcount once the page has been submitted for I/O * and unlock the page once all I/O to that page has completed. * Return: A pointer to the next page, or %NULL if we are done. */ -static inline struct page *readahead_page(struct readahead_control *rac) +static inline struct page *readahead_page(struct readahead_control *ractl) { - struct page *page; + struct folio *folio = __readahead_folio(ractl); - BUG_ON(rac->_batch_count > rac->_nr_pages); - rac->_nr_pages -= rac->_batch_count; - rac->_index += rac->_batch_count; - - if (!rac->_nr_pages) { - rac->_batch_count = 0; - return NULL; - } + return &folio->page; +} - page = xa_load(&rac->mapping->i_pages, rac->_index); - VM_BUG_ON_PAGE(!PageLocked(page), page); - rac->_batch_count = thp_nr_pages(page); +/** + * readahead_folio - Get the next folio to read. + * @ractl: The current readahead request. + * + * Context: The folio is locked. The caller should unlock the folio once + * all I/O to that folio has completed. + * Return: A pointer to the next folio, or %NULL if we are done. + */ +static inline struct folio *readahead_folio(struct readahead_control *ractl) +{ + struct folio *folio = __readahead_folio(ractl); - return page; + if (folio) + folio_put(folio); + return folio; } static inline unsigned int __readahead_batch(struct readahead_control *rac, -- cgit v1.2.3 From 0995d7e568141226f10f8216aa4965e06ab5db8a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 29 Apr 2021 10:27:16 -0400 Subject: mm/workingset: Convert workingset_refault() to take a folio This nets us 178 bytes of savings from removing calls to compound_head. The three callers all grow a little, but each of them will be converted to use folios soon, so that's fine. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/swap.h | 4 ++-- include/linux/vmstat.h | 6 ------ mm/filemap.c | 2 +- mm/memory.c | 3 ++- mm/swap.c | 7 +++---- mm/swap_state.c | 2 +- mm/workingset.c | 42 ++++++++++++++++++++++-------------------- 7 files changed, 31 insertions(+), 35 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index 067b7af5242c..892faac942da 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -329,7 +329,7 @@ static inline swp_entry_t folio_swap_entry(struct folio *folio) /* linux/mm/workingset.c */ void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages); void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg); -void workingset_refault(struct page *page, void *shadow); +void workingset_refault(struct folio *folio, void *shadow); void workingset_activation(struct folio *folio); /* Only track the nodes of mappings with shadow entries */ @@ -350,7 +350,7 @@ extern unsigned long nr_free_buffer_pages(void); /* linux/mm/swap.c */ extern void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages); -extern void lru_note_cost_page(struct page *); +extern void lru_note_cost_folio(struct folio *); extern void lru_cache_add(struct page *); void mark_page_accessed(struct page *); void folio_mark_accessed(struct folio *); diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 241bd0f53fb9..bfe38869498d 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -597,12 +597,6 @@ static inline void mod_lruvec_page_state(struct page *page, #endif /* CONFIG_MEMCG */ -static inline void inc_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx) -{ - mod_lruvec_state(lruvec, idx, 1); -} - static inline void __inc_lruvec_page_state(struct page *page, enum node_stat_item idx) { diff --git a/mm/filemap.c b/mm/filemap.c index 5cb18399a1b6..c4a7f3fdca12 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -998,7 +998,7 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping, */ WARN_ON_ONCE(PageActive(page)); if (!(gfp_mask & __GFP_WRITE) && shadow) - workingset_refault(page, shadow); + workingset_refault(page_folio(page), shadow); lru_cache_add(page); } return ret; diff --git a/mm/memory.c b/mm/memory.c index b67d80526bee..6308eeaed136 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3539,7 +3539,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) shadow = get_shadow_from_swap_cache(entry); if (shadow) - workingset_refault(page, shadow); + workingset_refault(page_folio(page), + shadow); lru_cache_add(page); diff --git a/mm/swap.c b/mm/swap.c index b95fd6d05768..5c1674c98f82 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -293,11 +293,10 @@ void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages) } while ((lruvec = parent_lruvec(lruvec))); } -void lru_note_cost_page(struct page *page) +void lru_note_cost_folio(struct folio *folio) { - struct folio *folio = page_folio(page); - lru_note_cost(folio_lruvec(folio), - page_is_file_lru(page), thp_nr_pages(page)); + lru_note_cost(folio_lruvec(folio), folio_is_file_lru(folio), + folio_nr_pages(folio)); } static void __folio_activate(struct folio *folio, struct lruvec *lruvec) diff --git a/mm/swap_state.c b/mm/swap_state.c index bc7cee6b2ec5..8d4104242100 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -498,7 +498,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, mem_cgroup_swapin_uncharge_swap(entry); if (shadow) - workingset_refault(page, shadow); + workingset_refault(page_folio(page), shadow); /* Caller will initiate read into locked page */ lru_cache_add(page); diff --git a/mm/workingset.c b/mm/workingset.c index 1c96ed525a0e..109ab978251a 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -273,17 +273,17 @@ void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg) } /** - * workingset_refault - evaluate the refault of a previously evicted page - * @page: the freshly allocated replacement page - * @shadow: shadow entry of the evicted page + * workingset_refault - Evaluate the refault of a previously evicted folio. + * @folio: The freshly allocated replacement folio. + * @shadow: Shadow entry of the evicted folio. * * Calculates and evaluates the refault distance of the previously - * evicted page in the context of the node and the memcg whose memory + * evicted folio in the context of the node and the memcg whose memory * pressure caused the eviction. */ -void workingset_refault(struct page *page, void *shadow) +void workingset_refault(struct folio *folio, void *shadow) { - bool file = page_is_file_lru(page); + bool file = folio_is_file_lru(folio); struct mem_cgroup *eviction_memcg; struct lruvec *eviction_lruvec; unsigned long refault_distance; @@ -295,16 +295,17 @@ void workingset_refault(struct page *page, void *shadow) unsigned long refault; bool workingset; int memcgid; + long nr; unpack_shadow(shadow, &memcgid, &pgdat, &eviction, &workingset); rcu_read_lock(); /* * Look up the memcg associated with the stored ID. It might - * have been deleted since the page's eviction. + * have been deleted since the folio's eviction. * * Note that in rare events the ID could have been recycled - * for a new cgroup that refaults a shared page. This is + * for a new cgroup that refaults a shared folio. This is * impossible to tell from the available data. However, this * should be a rare and limited disturbance, and activations * are always speculative anyway. Ultimately, it's the aging @@ -340,17 +341,18 @@ void workingset_refault(struct page *page, void *shadow) refault_distance = (refault - eviction) & EVICTION_MASK; /* - * The activation decision for this page is made at the level + * The activation decision for this folio is made at the level * where the eviction occurred, as that is where the LRU order - * during page reclaim is being determined. + * during folio reclaim is being determined. * - * However, the cgroup that will own the page is the one that + * However, the cgroup that will own the folio is the one that * is actually experiencing the refault event. */ - memcg = page_memcg(page); + nr = folio_nr_pages(folio); + memcg = folio_memcg(folio); lruvec = mem_cgroup_lruvec(memcg, pgdat); - inc_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file); + mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr); mem_cgroup_flush_stats(); /* @@ -376,16 +378,16 @@ void workingset_refault(struct page *page, void *shadow) if (refault_distance > workingset_size) goto out; - SetPageActive(page); - workingset_age_nonresident(lruvec, thp_nr_pages(page)); - inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + file); + folio_set_active(folio); + workingset_age_nonresident(lruvec, nr); + mod_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + file, nr); - /* Page was active prior to eviction */ + /* Folio was active prior to eviction */ if (workingset) { - SetPageWorkingset(page); + folio_set_workingset(folio); /* XXX: Move to lru_cache_add() when it supports new vs putback */ - lru_note_cost_page(page); - inc_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + file); + lru_note_cost_folio(folio); + mod_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + file, nr); } out: rcu_read_unlock(); -- cgit v1.2.3 From 3eed3ef55c83ec718fae676fd59699816223215f Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 May 2021 15:04:28 -0400 Subject: mm: Add folio_evictable() This is the folio equivalent of page_evictable(). Unfortunately, it's different from !folio_test_unevictable(), but I think it's used in places where you have to be a VM expert and can reasonably be expected to know the difference. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Vlastimil Babka --- mm/internal.h | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index 08fb03b16d3d..b1001ebeb286 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -72,17 +72,28 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t start, pgoff_t end, struct pagevec *pvec, pgoff_t *indices); /** - * page_evictable - test whether a page is evictable - * @page: the page to test + * folio_evictable - Test whether a folio is evictable. + * @folio: The folio to test. * - * Test whether page is evictable--i.e., should be placed on active/inactive - * lists vs unevictable list. - * - * Reasons page might not be evictable: - * (1) page's mapping marked unevictable - * (2) page is part of an mlocked VMA + * Test whether @folio is evictable -- i.e., should be placed on + * active/inactive lists vs unevictable list. * + * Reasons folio might not be evictable: + * 1. folio's mapping marked unevictable + * 2. One of the pages in the folio is part of an mlocked VMA */ +static inline bool folio_evictable(struct folio *folio) +{ + bool ret; + + /* Prevent address_space of inode and swap cache from being freed */ + rcu_read_lock(); + ret = !mapping_unevictable(folio_mapping(folio)) && + !folio_test_mlocked(folio); + rcu_read_unlock(); + return ret; +} + static inline bool page_evictable(struct page *page) { bool ret; -- cgit v1.2.3 From 934387c99f1ce3c21b4f1fa67005dd95fbcee5e9 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 May 2021 15:08:29 -0400 Subject: mm/lru: Convert __pagevec_lru_add_fn to take a folio This saves five calls to compound_head(), totalling 60 bytes of text. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/trace/events/pagemap.h | 32 +++++++++++++-------------- mm/swap.c | 49 +++++++++++++++++++++--------------------- 2 files changed, 41 insertions(+), 40 deletions(-) diff --git a/include/trace/events/pagemap.h b/include/trace/events/pagemap.h index 1fd0185d66e8..171524d3526d 100644 --- a/include/trace/events/pagemap.h +++ b/include/trace/events/pagemap.h @@ -16,38 +16,38 @@ #define PAGEMAP_MAPPEDDISK 0x0020u #define PAGEMAP_BUFFERS 0x0040u -#define trace_pagemap_flags(page) ( \ - (PageAnon(page) ? PAGEMAP_ANONYMOUS : PAGEMAP_FILE) | \ - (page_mapped(page) ? PAGEMAP_MAPPED : 0) | \ - (PageSwapCache(page) ? PAGEMAP_SWAPCACHE : 0) | \ - (PageSwapBacked(page) ? PAGEMAP_SWAPBACKED : 0) | \ - (PageMappedToDisk(page) ? PAGEMAP_MAPPEDDISK : 0) | \ - (page_has_private(page) ? PAGEMAP_BUFFERS : 0) \ +#define trace_pagemap_flags(folio) ( \ + (folio_test_anon(folio) ? PAGEMAP_ANONYMOUS : PAGEMAP_FILE) | \ + (folio_mapped(folio) ? PAGEMAP_MAPPED : 0) | \ + (folio_test_swapcache(folio) ? PAGEMAP_SWAPCACHE : 0) | \ + (folio_test_swapbacked(folio) ? PAGEMAP_SWAPBACKED : 0) | \ + (folio_test_mappedtodisk(folio) ? PAGEMAP_MAPPEDDISK : 0) | \ + (folio_test_private(folio) ? PAGEMAP_BUFFERS : 0) \ ) TRACE_EVENT(mm_lru_insertion, - TP_PROTO(struct page *page), + TP_PROTO(struct folio *folio), - TP_ARGS(page), + TP_ARGS(folio), TP_STRUCT__entry( - __field(struct page *, page ) + __field(struct folio *, folio ) __field(unsigned long, pfn ) __field(enum lru_list, lru ) __field(unsigned long, flags ) ), TP_fast_assign( - __entry->page = page; - __entry->pfn = page_to_pfn(page); - __entry->lru = folio_lru_list(page_folio(page)); - __entry->flags = trace_pagemap_flags(page); + __entry->folio = folio; + __entry->pfn = folio_pfn(folio); + __entry->lru = folio_lru_list(folio); + __entry->flags = trace_pagemap_flags(folio); ), /* Flag format is based on page-types.c formatting for pagemap */ - TP_printk("page=%p pfn=0x%lx lru=%d flags=%s%s%s%s%s%s", - __entry->page, + TP_printk("folio=%p pfn=0x%lx lru=%d flags=%s%s%s%s%s%s", + __entry->folio, __entry->pfn, __entry->lru, __entry->flags & PAGEMAP_MAPPED ? "M" : " ", diff --git a/mm/swap.c b/mm/swap.c index 5c1674c98f82..858b4a8220ca 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -992,17 +992,18 @@ void __pagevec_release(struct pagevec *pvec) } EXPORT_SYMBOL(__pagevec_release); -static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec) +static void __pagevec_lru_add_fn(struct folio *folio, struct lruvec *lruvec) { - int was_unevictable = TestClearPageUnevictable(page); - int nr_pages = thp_nr_pages(page); + int was_unevictable = folio_test_clear_unevictable(folio); + long nr_pages = folio_nr_pages(folio); - VM_BUG_ON_PAGE(PageLRU(page), page); + VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); /* - * Page becomes evictable in two ways: + * A folio becomes evictable in two ways: * 1) Within LRU lock [munlock_vma_page() and __munlock_pagevec()]. - * 2) Before acquiring LRU lock to put the page to correct LRU and then + * 2) Before acquiring LRU lock to put the folio on the correct LRU + * and then * a) do PageLRU check with lock [check_move_unevictable_pages] * b) do PageLRU check before lock [clear_page_mlock] * @@ -1011,35 +1012,36 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec) * * #0: __pagevec_lru_add_fn #1: clear_page_mlock * - * SetPageLRU() TestClearPageMlocked() + * folio_set_lru() folio_test_clear_mlocked() * smp_mb() // explicit ordering // above provides strict * // ordering - * PageMlocked() PageLRU() + * folio_test_mlocked() folio_test_lru() * * - * if '#1' does not observe setting of PG_lru by '#0' and fails - * isolation, the explicit barrier will make sure that page_evictable - * check will put the page in correct LRU. Without smp_mb(), SetPageLRU - * can be reordered after PageMlocked check and can make '#1' to fail - * the isolation of the page whose Mlocked bit is cleared (#0 is also - * looking at the same page) and the evictable page will be stranded - * in an unevictable LRU. + * if '#1' does not observe setting of PG_lru by '#0' and + * fails isolation, the explicit barrier will make sure that + * folio_evictable check will put the folio on the correct + * LRU. Without smp_mb(), folio_set_lru() can be reordered + * after folio_test_mlocked() check and can make '#1' fail the + * isolation of the folio whose mlocked bit is cleared (#0 is + * also looking at the same folio) and the evictable folio will + * be stranded on an unevictable LRU. */ - SetPageLRU(page); + folio_set_lru(folio); smp_mb__after_atomic(); - if (page_evictable(page)) { + if (folio_evictable(folio)) { if (was_unevictable) __count_vm_events(UNEVICTABLE_PGRESCUED, nr_pages); } else { - ClearPageActive(page); - SetPageUnevictable(page); + folio_clear_active(folio); + folio_set_unevictable(folio); if (!was_unevictable) __count_vm_events(UNEVICTABLE_PGCULLED, nr_pages); } - add_page_to_lru_list(page, lruvec); - trace_mm_lru_insertion(page); + lruvec_add_folio(lruvec, folio); + trace_mm_lru_insertion(folio); } /* @@ -1053,11 +1055,10 @@ void __pagevec_lru_add(struct pagevec *pvec) unsigned long flags = 0; for (i = 0; i < pagevec_count(pvec); i++) { - struct page *page = pvec->pages[i]; - struct folio *folio = page_folio(page); + struct folio *folio = page_folio(pvec->pages[i]); lruvec = folio_lruvec_relock_irqsave(folio, lruvec, &flags); - __pagevec_lru_add_fn(page, lruvec); + __pagevec_lru_add_fn(folio, lruvec); } if (lruvec) unlock_page_lruvec_irqrestore(lruvec, flags); -- cgit v1.2.3 From 0d31125d2d3267ec349796418a16761bbda20387 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 29 Apr 2021 11:09:31 -0400 Subject: mm/lru: Add folio_add_lru() Reimplement lru_cache_add() as a wrapper around folio_add_lru(). Saves 159 bytes of kernel text due to removing calls to compound_head(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/swap.h | 1 + mm/folio-compat.c | 6 ++++++ mm/swap.c | 22 +++++++++++----------- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index 892faac942da..cdf0957a88a4 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -351,6 +351,7 @@ extern unsigned long nr_free_buffer_pages(void); extern void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages); extern void lru_note_cost_folio(struct folio *); +extern void folio_add_lru(struct folio *); extern void lru_cache_add(struct page *); void mark_page_accessed(struct page *); void folio_mark_accessed(struct folio *); diff --git a/mm/folio-compat.c b/mm/folio-compat.c index c1e01bc36d32..6de3cd78a4ae 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -102,3 +102,9 @@ bool redirty_page_for_writepage(struct writeback_control *wbc, return folio_redirty_for_writepage(wbc, page_folio(page)); } EXPORT_SYMBOL(redirty_page_for_writepage); + +void lru_cache_add(struct page *page) +{ + folio_add_lru(page_folio(page)); +} +EXPORT_SYMBOL(lru_cache_add); diff --git a/mm/swap.c b/mm/swap.c index 858b4a8220ca..8ff9ba7cf2de 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -437,29 +437,29 @@ void folio_mark_accessed(struct folio *folio) EXPORT_SYMBOL(folio_mark_accessed); /** - * lru_cache_add - add a page to a page list - * @page: the page to be added to the LRU. + * folio_add_lru - Add a folio to an LRU list. + * @folio: The folio to be added to the LRU. * - * Queue the page for addition to the LRU via pagevec. The decision on whether + * Queue the folio for addition to the LRU. The decision on whether * to add the page to the [in]active [file|anon] list is deferred until the - * pagevec is drained. This gives a chance for the caller of lru_cache_add() - * have the page added to the active list using mark_page_accessed(). + * pagevec is drained. This gives a chance for the caller of folio_add_lru() + * have the folio added to the active list using folio_mark_accessed(). */ -void lru_cache_add(struct page *page) +void folio_add_lru(struct folio *folio) { struct pagevec *pvec; - VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page); - VM_BUG_ON_PAGE(PageLRU(page), page); + VM_BUG_ON_FOLIO(folio_test_active(folio) && folio_test_unevictable(folio), folio); + VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); - get_page(page); + folio_get(folio); local_lock(&lru_pvecs.lock); pvec = this_cpu_ptr(&lru_pvecs.lru_add); - if (pagevec_add_and_need_flush(pvec, page)) + if (pagevec_add_and_need_flush(pvec, &folio->page)) __pagevec_lru_add(pvec); local_unlock(&lru_pvecs.lock); } -EXPORT_SYMBOL(lru_cache_add); +EXPORT_SYMBOL(folio_add_lru); /** * lru_cache_add_inactive_or_unevictable -- cgit v1.2.3 From cc09cb134124a42fbe3bdcebefdc54e286d8f3e5 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 15 Dec 2020 22:55:54 -0500 Subject: mm/page_alloc: Add folio allocation functions The __folio_alloc(), __folio_alloc_node() and folio_alloc() functions are mostly for type safety, but they also ensure that the page allocator allocates a compound page and initialises the deferred list if the page is large enough to have one. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Vlastimil Babka --- include/linux/gfp.h | 16 ++++++++++++++++ mm/mempolicy.c | 10 ++++++++++ mm/page_alloc.c | 12 ++++++++++++ 3 files changed, 38 insertions(+) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index dc5ff40608ce..3745efd21cf6 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -523,6 +523,8 @@ static inline void arch_alloc_page(struct page *page, int order) { } struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid, nodemask_t *nodemask); +struct folio *__folio_alloc(gfp_t gfp, unsigned int order, int preferred_nid, + nodemask_t *nodemask); unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, nodemask_t *nodemask, int nr_pages, @@ -564,6 +566,15 @@ __alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) return __alloc_pages(gfp_mask, order, nid, NULL); } +static inline +struct folio *__folio_alloc_node(gfp_t gfp, unsigned int order, int nid) +{ + VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); + VM_WARN_ON((gfp & __GFP_THISNODE) && !node_online(nid)); + + return __folio_alloc(gfp, order, nid, NULL); +} + /* * Allocate pages, preferring the node given as nid. When nid == NUMA_NO_NODE, * prefer the current CPU's closest node. Otherwise node must be valid and @@ -580,6 +591,7 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, #ifdef CONFIG_NUMA struct page *alloc_pages(gfp_t gfp, unsigned int order); +struct folio *folio_alloc(gfp_t gfp, unsigned order); extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, struct vm_area_struct *vma, unsigned long addr, int node, bool hugepage); @@ -590,6 +602,10 @@ static inline struct page *alloc_pages(gfp_t gfp_mask, unsigned int order) { return alloc_pages_node(numa_node_id(), gfp_mask, order); } +static inline struct folio *folio_alloc(gfp_t gfp, unsigned int order) +{ + return __folio_alloc_node(gfp, order, numa_node_id()); +} #define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\ alloc_pages(gfp_mask, order) #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 1592b081c58e..251df91ddc80 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2202,6 +2202,16 @@ struct page *alloc_pages(gfp_t gfp, unsigned order) } EXPORT_SYMBOL(alloc_pages); +struct folio *folio_alloc(gfp_t gfp, unsigned order) +{ + struct page *page = alloc_pages(gfp | __GFP_COMP, order); + + if (page && order > 1) + prep_transhuge_page(page); + return (struct folio *)page; +} +EXPORT_SYMBOL(folio_alloc); + int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst) { struct mempolicy *pol = mpol_dup(vma_policy(src)); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 869d0b06e1ef..544ff5a11cb8 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5400,6 +5400,18 @@ out: } EXPORT_SYMBOL(__alloc_pages); +struct folio *__folio_alloc(gfp_t gfp, unsigned int order, int preferred_nid, + nodemask_t *nodemask) +{ + struct page *page = __alloc_pages(gfp | __GFP_COMP, order, + preferred_nid, nodemask); + + if (page && order > 1) + prep_transhuge_page(page); + return (struct folio *)page; +} +EXPORT_SYMBOL(__folio_alloc); + /* * Common helper functions. Never use with __GFP_HIGHMEM because the returned * address cannot represent highmem pages. Use alloc_pages and then kmap if -- cgit v1.2.3 From bb3c579e25e5757bc5bac1333f4a56dfebf7cb91 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 15 Dec 2020 23:11:07 -0500 Subject: mm/filemap: Add filemap_alloc_folio Reimplement __page_cache_alloc as a wrapper around filemap_alloc_folio to allow filesystems to be converted at our leisure. Increases kernel text size by 133 bytes, mostly in cachefiles_read_backing_file(). pagecache_get_page() shrinks by 32 bytes, though. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/pagemap.h | 11 ++++++++--- mm/filemap.c | 14 +++++++------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index fea9615bab35..51c190ae3b0b 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -262,14 +262,19 @@ static inline void *detach_page_private(struct page *page) } #ifdef CONFIG_NUMA -extern struct page *__page_cache_alloc(gfp_t gfp); +struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order); #else -static inline struct page *__page_cache_alloc(gfp_t gfp) +static inline struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order) { - return alloc_pages(gfp, 0); + return folio_alloc(gfp, order); } #endif +static inline struct page *__page_cache_alloc(gfp_t gfp) +{ + return &filemap_alloc_folio(gfp, 0)->page; +} + static inline struct page *page_cache_alloc(struct address_space *x) { return __page_cache_alloc(mapping_gfp_mask(x)); diff --git a/mm/filemap.c b/mm/filemap.c index c4a7f3fdca12..6976fb4af390 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1006,24 +1006,24 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping, EXPORT_SYMBOL_GPL(add_to_page_cache_lru); #ifdef CONFIG_NUMA -struct page *__page_cache_alloc(gfp_t gfp) +struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order) { int n; - struct page *page; + struct folio *folio; if (cpuset_do_page_mem_spread()) { unsigned int cpuset_mems_cookie; do { cpuset_mems_cookie = read_mems_allowed_begin(); n = cpuset_mem_spread_node(); - page = __alloc_pages_node(n, gfp, 0); - } while (!page && read_mems_allowed_retry(cpuset_mems_cookie)); + folio = __folio_alloc_node(gfp, order, n); + } while (!folio && read_mems_allowed_retry(cpuset_mems_cookie)); - return page; + return folio; } - return alloc_pages(gfp, 0); + return folio_alloc(gfp, order); } -EXPORT_SYMBOL(__page_cache_alloc); +EXPORT_SYMBOL(filemap_alloc_folio); #endif /* -- cgit v1.2.3 From 9dd3d069406cea073fc633e77bc59abbfde8c6c4 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Dec 2020 08:56:28 -0500 Subject: mm/filemap: Add filemap_add_folio() Convert __add_to_page_cache_locked() into __filemap_add_folio(). Add an assertion to it that (for !hugetlbfs), the folio is naturally aligned within the file. Move the prototype from mm.h to pagemap.h. Convert add_to_page_cache_lru() into filemap_add_folio(). Add a compatibility wrapper for unconverted callers. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/mm.h | 7 ----- include/linux/pagemap.h | 10 +++++-- kernel/bpf/verifier.c | 2 +- mm/filemap.c | 70 ++++++++++++++++++++++++------------------------- mm/folio-compat.c | 7 +++++ 5 files changed, 50 insertions(+), 46 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 23ab040aa65a..efcb06fc6116 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -213,13 +213,6 @@ int overcommit_kbytes_handler(struct ctl_table *, int, void *, size_t *, loff_t *); int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *, loff_t *); -/* - * Any attempt to mark this function as static leads to build failure - * when CONFIG_DEBUG_INFO_BTF is enabled because __add_to_page_cache_locked() - * is referred to by BPF code. This must be visible for error injection. - */ -int __add_to_page_cache_locked(struct page *page, struct address_space *mapping, - pgoff_t index, gfp_t gfp, void **shadowp); #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 51c190ae3b0b..a10eb6dc3866 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -876,9 +876,11 @@ static inline int fault_in_pages_readable(const char __user *uaddr, size_t size) } int add_to_page_cache_locked(struct page *page, struct address_space *mapping, - pgoff_t index, gfp_t gfp_mask); + pgoff_t index, gfp_t gfp); int add_to_page_cache_lru(struct page *page, struct address_space *mapping, - pgoff_t index, gfp_t gfp_mask); + pgoff_t index, gfp_t gfp); +int filemap_add_folio(struct address_space *mapping, struct folio *folio, + pgoff_t index, gfp_t gfp); extern void delete_from_page_cache(struct page *page); extern void __delete_from_page_cache(struct page *page, void *shadow); void replace_page_cache_page(struct page *old, struct page *new); @@ -903,6 +905,10 @@ static inline int add_to_page_cache(struct page *page, return error; } +/* Must be non-static for BPF error injection */ +int __filemap_add_folio(struct address_space *mapping, struct folio *folio, + pgoff_t index, gfp_t gfp, void **shadowp); + /** * struct readahead_control - Describes a readahead request. * diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e76b55917905..de006552be8a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -13319,7 +13319,7 @@ BTF_SET_START(btf_non_sleepable_error_inject) /* Three functions below can be called from sleepable and non-sleepable context. * Assume non-sleepable from bpf safety point of view. */ -BTF_ID(func, __add_to_page_cache_locked) +BTF_ID(func, __filemap_add_folio) BTF_ID(func, should_fail_alloc_page) BTF_ID(func, should_failslab) BTF_SET_END(btf_non_sleepable_error_inject) diff --git a/mm/filemap.c b/mm/filemap.c index 6976fb4af390..3902e7e8877d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -872,26 +872,25 @@ void replace_page_cache_page(struct page *old, struct page *new) } EXPORT_SYMBOL_GPL(replace_page_cache_page); -noinline int __add_to_page_cache_locked(struct page *page, - struct address_space *mapping, - pgoff_t offset, gfp_t gfp, - void **shadowp) +noinline int __filemap_add_folio(struct address_space *mapping, + struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp) { - XA_STATE(xas, &mapping->i_pages, offset); - int huge = PageHuge(page); + XA_STATE(xas, &mapping->i_pages, index); + int huge = folio_test_hugetlb(folio); int error; bool charged = false; - VM_BUG_ON_PAGE(!PageLocked(page), page); - VM_BUG_ON_PAGE(PageSwapBacked(page), page); + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); + VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio); mapping_set_update(&xas, mapping); - get_page(page); - page->mapping = mapping; - page->index = offset; + folio_get(folio); + folio->mapping = mapping; + folio->index = index; if (!huge) { - error = mem_cgroup_charge(page_folio(page), NULL, gfp); + error = mem_cgroup_charge(folio, NULL, gfp); + VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio); if (error) goto error; charged = true; @@ -903,7 +902,7 @@ noinline int __add_to_page_cache_locked(struct page *page, unsigned int order = xa_get_order(xas.xa, xas.xa_index); void *entry, *old = NULL; - if (order > thp_order(page)) + if (order > folio_order(folio)) xas_split_alloc(&xas, xa_load(xas.xa, xas.xa_index), order, gfp); xas_lock_irq(&xas); @@ -920,13 +919,13 @@ noinline int __add_to_page_cache_locked(struct page *page, *shadowp = old; /* entry may have been split before we acquired lock */ order = xa_get_order(xas.xa, xas.xa_index); - if (order > thp_order(page)) { + if (order > folio_order(folio)) { xas_split(&xas, old, order); xas_reset(&xas); } } - xas_store(&xas, page); + xas_store(&xas, folio); if (xas_error(&xas)) goto unlock; @@ -934,7 +933,7 @@ noinline int __add_to_page_cache_locked(struct page *page, /* hugetlb pages do not participate in page cache accounting */ if (!huge) - __inc_lruvec_page_state(page, NR_FILE_PAGES); + __lruvec_stat_add_folio(folio, NR_FILE_PAGES); unlock: xas_unlock_irq(&xas); } while (xas_nomem(&xas, gfp)); @@ -942,19 +941,19 @@ unlock: if (xas_error(&xas)) { error = xas_error(&xas); if (charged) - mem_cgroup_uncharge(page_folio(page)); + mem_cgroup_uncharge(folio); goto error; } - trace_mm_filemap_add_to_page_cache(page); + trace_mm_filemap_add_to_page_cache(&folio->page); return 0; error: - page->mapping = NULL; + folio->mapping = NULL; /* Leave page->index set: truncation relies upon it */ - put_page(page); + folio_put(folio); return error; } -ALLOW_ERROR_INJECTION(__add_to_page_cache_locked, ERRNO); +ALLOW_ERROR_INJECTION(__filemap_add_folio, ERRNO); /** * add_to_page_cache_locked - add a locked page to the pagecache @@ -971,39 +970,38 @@ ALLOW_ERROR_INJECTION(__add_to_page_cache_locked, ERRNO); int add_to_page_cache_locked(struct page *page, struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask) { - return __add_to_page_cache_locked(page, mapping, offset, + return __filemap_add_folio(mapping, page_folio(page), offset, gfp_mask, NULL); } EXPORT_SYMBOL(add_to_page_cache_locked); -int add_to_page_cache_lru(struct page *page, struct address_space *mapping, - pgoff_t offset, gfp_t gfp_mask) +int filemap_add_folio(struct address_space *mapping, struct folio *folio, + pgoff_t index, gfp_t gfp) { void *shadow = NULL; int ret; - __SetPageLocked(page); - ret = __add_to_page_cache_locked(page, mapping, offset, - gfp_mask, &shadow); + __folio_set_locked(folio); + ret = __filemap_add_folio(mapping, folio, index, gfp, &shadow); if (unlikely(ret)) - __ClearPageLocked(page); + __folio_clear_locked(folio); else { /* - * The page might have been evicted from cache only + * The folio might have been evicted from cache only * recently, in which case it should be activated like - * any other repeatedly accessed page. - * The exception is pages getting rewritten; evicting other + * any other repeatedly accessed folio. + * The exception is folios getting rewritten; evicting other * data from the working set, only to cache data that will * get overwritten with something else, is a waste of memory. */ - WARN_ON_ONCE(PageActive(page)); - if (!(gfp_mask & __GFP_WRITE) && shadow) - workingset_refault(page_folio(page), shadow); - lru_cache_add(page); + WARN_ON_ONCE(folio_test_active(folio)); + if (!(gfp & __GFP_WRITE) && shadow) + workingset_refault(folio, shadow); + folio_add_lru(folio); } return ret; } -EXPORT_SYMBOL_GPL(add_to_page_cache_lru); +EXPORT_SYMBOL_GPL(filemap_add_folio); #ifdef CONFIG_NUMA struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order) diff --git a/mm/folio-compat.c b/mm/folio-compat.c index 6de3cd78a4ae..6b19bc4ed6b0 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -108,3 +108,10 @@ void lru_cache_add(struct page *page) folio_add_lru(page_folio(page)); } EXPORT_SYMBOL(lru_cache_add); + +int add_to_page_cache_lru(struct page *page, struct address_space *mapping, + pgoff_t index, gfp_t gfp) +{ + return filemap_add_folio(mapping, page_folio(page), index, gfp); +} +EXPORT_SYMBOL(add_to_page_cache_lru); -- cgit v1.2.3 From bca65eeab1db74f83661a49ebdaf87414fc4938e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 15 Dec 2020 23:22:38 -0500 Subject: mm/filemap: Convert mapping_get_entry to return a folio The pagecache only contains folios, so indicate that this is definitely not a tail page. Shrinks mapping_get_entry() by 56 bytes, but grows pagecache_get_page() by 21 bytes as gcc makes slightly different hot/cold code decisions. A net reduction of 35 bytes of text. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- mm/filemap.c | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 3902e7e8877d..5c99fd9a789d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1810,49 +1810,42 @@ EXPORT_SYMBOL(page_cache_prev_miss); * @mapping: the address_space to search * @index: The page cache index. * - * Looks up the page cache slot at @mapping & @index. If there is a - * page cache page, the head page is returned with an increased refcount. + * Looks up the page cache entry at @mapping & @index. If it is a folio, + * it is returned with an increased refcount. If it is a shadow entry + * of a previously evicted folio, or a swap entry from shmem/tmpfs, + * it is returned without further action. * - * If the slot holds a shadow entry of a previously evicted page, or a - * swap entry from shmem/tmpfs, it is returned. - * - * Return: The head page or shadow entry, %NULL if nothing is found. + * Return: The folio, swap or shadow entry, %NULL if nothing is found. */ -static struct page *mapping_get_entry(struct address_space *mapping, - pgoff_t index) +static void *mapping_get_entry(struct address_space *mapping, pgoff_t index) { XA_STATE(xas, &mapping->i_pages, index); - struct page *page; + struct folio *folio; rcu_read_lock(); repeat: xas_reset(&xas); - page = xas_load(&xas); - if (xas_retry(&xas, page)) + folio = xas_load(&xas); + if (xas_retry(&xas, folio)) goto repeat; /* * A shadow entry of a recently evicted page, or a swap entry from * shmem/tmpfs. Return it without attempting to raise page count. */ - if (!page || xa_is_value(page)) + if (!folio || xa_is_value(folio)) goto out; - if (!page_cache_get_speculative(page)) + if (!folio_try_get_rcu(folio)) goto repeat; - /* - * Has the page moved or been split? - * This is part of the lockless pagecache protocol. See - * include/linux/pagemap.h for details. - */ - if (unlikely(page != xas_reload(&xas))) { - put_page(page); + if (unlikely(folio != xas_reload(&xas))) { + folio_put(folio); goto repeat; } out: rcu_read_unlock(); - return page; + return folio; } /** -- cgit v1.2.3 From 3f0c6a07fee6a1c2618a2e12ffb8e9aa24384fde Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 8 Mar 2021 11:45:35 -0500 Subject: mm/filemap: Add filemap_get_folio filemap_get_folio() is a replacement for find_get_page(). Turn pagecache_get_page() into a wrapper around __filemap_get_folio(). Remove find_lock_head() as this use case is now covered by filemap_get_folio(). Reduces overall kernel size by 209 bytes. __filemap_get_folio() is 316 bytes shorter than pagecache_get_page() was, but the new pagecache_get_page() wrapper is 99 bytes. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/pagemap.h | 41 +++++++++++----------- mm/filemap.c | 92 +++++++++++++++++++++++-------------------------- mm/folio-compat.c | 12 +++++++ 3 files changed, 76 insertions(+), 69 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index a10eb6dc3866..401a90336a2c 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -302,8 +302,26 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping, #define FGP_HEAD 0x00000080 #define FGP_ENTRY 0x00000100 -struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset, - int fgp_flags, gfp_t cache_gfp_mask); +struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, + int fgp_flags, gfp_t gfp); +struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, + int fgp_flags, gfp_t gfp); + +/** + * filemap_get_folio - Find and get a folio. + * @mapping: The address_space to search. + * @index: The page index. + * + * Looks up the page cache entry at @mapping & @index. If a folio is + * present, it is returned with an increased refcount. + * + * Otherwise, %NULL is returned. + */ +static inline struct folio *filemap_get_folio(struct address_space *mapping, + pgoff_t index) +{ + return __filemap_get_folio(mapping, index, 0, 0); +} /** * find_get_page - find and get a page reference @@ -346,25 +364,6 @@ static inline struct page *find_lock_page(struct address_space *mapping, return pagecache_get_page(mapping, index, FGP_LOCK, 0); } -/** - * find_lock_head - Locate, pin and lock a pagecache page. - * @mapping: The address_space to search. - * @index: The page index. - * - * Looks up the page cache entry at @mapping & @index. If there is a - * page cache page, its head page is returned locked and with an increased - * refcount. - * - * Context: May sleep. - * Return: A struct page which is !PageTail, or %NULL if there is no page - * in the cache for this index. - */ -static inline struct page *find_lock_head(struct address_space *mapping, - pgoff_t index) -{ - return pagecache_get_page(mapping, index, FGP_LOCK | FGP_HEAD, 0); -} - /** * find_or_create_page - locate or add a pagecache page * @mapping: the page's address_space diff --git a/mm/filemap.c b/mm/filemap.c index 5c99fd9a789d..de4cc1b9aa9b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1849,93 +1849,89 @@ out: } /** - * pagecache_get_page - Find and get a reference to a page. + * __filemap_get_folio - Find and get a reference to a folio. * @mapping: The address_space to search. * @index: The page index. - * @fgp_flags: %FGP flags modify how the page is returned. - * @gfp_mask: Memory allocation flags to use if %FGP_CREAT is specified. + * @fgp_flags: %FGP flags modify how the folio is returned. + * @gfp: Memory allocation flags to use if %FGP_CREAT is specified. * * Looks up the page cache entry at @mapping & @index. * * @fgp_flags can be zero or more of these flags: * - * * %FGP_ACCESSED - The page will be marked accessed. - * * %FGP_LOCK - The page is returned locked. - * * %FGP_HEAD - If the page is present and a THP, return the head page - * rather than the exact page specified by the index. + * * %FGP_ACCESSED - The folio will be marked accessed. + * * %FGP_LOCK - The folio is returned locked. * * %FGP_ENTRY - If there is a shadow / swap / DAX entry, return it - * instead of allocating a new page to replace it. + * instead of allocating a new folio to replace it. * * %FGP_CREAT - If no page is present then a new page is allocated using - * @gfp_mask and added to the page cache and the VM's LRU list. + * @gfp and added to the page cache and the VM's LRU list. * The page is returned locked and with an increased refcount. * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the * page is already in cache. If the page was allocated, unlock it before * returning so the caller can do the same dance. - * * %FGP_WRITE - The page will be written - * * %FGP_NOFS - __GFP_FS will get cleared in gfp mask - * * %FGP_NOWAIT - Don't get blocked by page lock + * * %FGP_WRITE - The page will be written to by the caller. + * * %FGP_NOFS - __GFP_FS will get cleared in gfp. + * * %FGP_NOWAIT - Don't get blocked by page lock. * * If %FGP_LOCK or %FGP_CREAT are specified then the function may sleep even * if the %GFP flags specified for %FGP_CREAT are atomic. * * If there is a page cache page, it is returned with an increased refcount. * - * Return: The found page or %NULL otherwise. + * Return: The found folio or %NULL otherwise. */ -struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, - int fgp_flags, gfp_t gfp_mask) +struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, + int fgp_flags, gfp_t gfp) { - struct page *page; + struct folio *folio; repeat: - page = mapping_get_entry(mapping, index); - if (xa_is_value(page)) { + folio = mapping_get_entry(mapping, index); + if (xa_is_value(folio)) { if (fgp_flags & FGP_ENTRY) - return page; - page = NULL; + return folio; + folio = NULL; } - if (!page) + if (!folio) goto no_page; if (fgp_flags & FGP_LOCK) { if (fgp_flags & FGP_NOWAIT) { - if (!trylock_page(page)) { - put_page(page); + if (!folio_trylock(folio)) { + folio_put(folio); return NULL; } } else { - lock_page(page); + folio_lock(folio); } /* Has the page been truncated? */ - if (unlikely(page->mapping != mapping)) { - unlock_page(page); - put_page(page); + if (unlikely(folio->mapping != mapping)) { + folio_unlock(folio); + folio_put(folio); goto repeat; } - VM_BUG_ON_PAGE(!thp_contains(page, index), page); + VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio); } if (fgp_flags & FGP_ACCESSED) - mark_page_accessed(page); + folio_mark_accessed(folio); else if (fgp_flags & FGP_WRITE) { /* Clear idle flag for buffer write */ - if (page_is_idle(page)) - clear_page_idle(page); + if (folio_test_idle(folio)) + folio_clear_idle(folio); } - if (!(fgp_flags & FGP_HEAD)) - page = find_subpage(page, index); no_page: - if (!page && (fgp_flags & FGP_CREAT)) { + if (!folio && (fgp_flags & FGP_CREAT)) { int err; if ((fgp_flags & FGP_WRITE) && mapping_can_writeback(mapping)) - gfp_mask |= __GFP_WRITE; + gfp |= __GFP_WRITE; if (fgp_flags & FGP_NOFS) - gfp_mask &= ~__GFP_FS; + gfp &= ~__GFP_FS; - page = __page_cache_alloc(gfp_mask); - if (!page) + folio = filemap_alloc_folio(gfp, 0); + if (!folio) return NULL; if (WARN_ON_ONCE(!(fgp_flags & (FGP_LOCK | FGP_FOR_MMAP)))) @@ -1943,27 +1939,27 @@ no_page: /* Init accessed so avoid atomic mark_page_accessed later */ if (fgp_flags & FGP_ACCESSED) - __SetPageReferenced(page); + __folio_set_referenced(folio); - err = add_to_page_cache_lru(page, mapping, index, gfp_mask); + err = filemap_add_folio(mapping, folio, index, gfp); if (unlikely(err)) { - put_page(page); - page = NULL; + folio_put(folio); + folio = NULL; if (err == -EEXIST) goto repeat; } /* - * add_to_page_cache_lru locks the page, and for mmap we expect - * an unlocked page. + * filemap_add_folio locks the page, and for mmap + * we expect an unlocked page. */ - if (page && (fgp_flags & FGP_FOR_MMAP)) - unlock_page(page); + if (folio && (fgp_flags & FGP_FOR_MMAP)) + folio_unlock(folio); } - return page; + return folio; } -EXPORT_SYMBOL(pagecache_get_page); +EXPORT_SYMBOL(__filemap_get_folio); static inline struct page *find_get_entry(struct xa_state *xas, pgoff_t max, xa_mark_t mark) diff --git a/mm/folio-compat.c b/mm/folio-compat.c index 6b19bc4ed6b0..e833e680e944 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -115,3 +115,15 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping, return filemap_add_folio(mapping, page_folio(page), index, gfp); } EXPORT_SYMBOL(add_to_page_cache_lru); + +struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, + int fgp_flags, gfp_t gfp) +{ + struct folio *folio; + + folio = __filemap_get_folio(mapping, index, fgp_flags, gfp); + if ((fgp_flags & FGP_HEAD) || !folio || xa_is_value(folio)) + return &folio->page; + return folio_file_page(folio, index); +} +EXPORT_SYMBOL(pagecache_get_page); -- cgit v1.2.3 From b27652d935f41793c5e229a1e8b3a8bb3afe3cc1 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 24 Dec 2020 12:55:56 -0500 Subject: mm/filemap: Add FGP_STABLE Allow filemap_get_folio() to wait for writeback to complete (if the filesystem wants that behaviour). This is the folio equivalent of grab_cache_page_write_begin(), which is moved into the folio-compat file as a reminder to migrate all the code using it. This paves the way for getting rid of AOP_FLAG_NOFS once grab_cache_page_write_begin() is removed. Kernel grows by 11 bytes. filemap_get_folio() grows by 33 bytes but grab_cache_page_write_begin() shrinks by 22 bytes to make up for it. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/pagemap.h | 1 + mm/filemap.c | 25 +++---------------------- mm/folio-compat.c | 13 +++++++++++++ 3 files changed, 17 insertions(+), 22 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 401a90336a2c..b68b053e581f 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -301,6 +301,7 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping, #define FGP_FOR_MMAP 0x00000040 #define FGP_HEAD 0x00000080 #define FGP_ENTRY 0x00000100 +#define FGP_STABLE 0x00000200 struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, int fgp_flags, gfp_t gfp); diff --git a/mm/filemap.c b/mm/filemap.c index de4cc1b9aa9b..3e9feb5cc570 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1872,6 +1872,7 @@ out: * * %FGP_WRITE - The page will be written to by the caller. * * %FGP_NOFS - __GFP_FS will get cleared in gfp. * * %FGP_NOWAIT - Don't get blocked by page lock. + * * %FGP_STABLE - Wait for the folio to be stable (finished writeback) * * If %FGP_LOCK or %FGP_CREAT are specified then the function may sleep even * if the %GFP flags specified for %FGP_CREAT are atomic. @@ -1922,6 +1923,8 @@ repeat: folio_clear_idle(folio); } + if (fgp_flags & FGP_STABLE) + folio_wait_stable(folio); no_page: if (!folio && (fgp_flags & FGP_CREAT)) { int err; @@ -3704,28 +3707,6 @@ out: } EXPORT_SYMBOL(generic_file_direct_write); -/* - * Find or create a page at the given pagecache position. Return the locked - * page. This function is specifically for buffered writes. - */ -struct page *grab_cache_page_write_begin(struct address_space *mapping, - pgoff_t index, unsigned flags) -{ - struct page *page; - int fgp_flags = FGP_LOCK|FGP_WRITE|FGP_CREAT; - - if (flags & AOP_FLAG_NOFS) - fgp_flags |= FGP_NOFS; - - page = pagecache_get_page(mapping, index, fgp_flags, - mapping_gfp_mask(mapping)); - if (page) - wait_for_stable_page(page); - - return page; -} -EXPORT_SYMBOL(grab_cache_page_write_begin); - ssize_t generic_perform_write(struct file *file, struct iov_iter *i, loff_t pos) { diff --git a/mm/folio-compat.c b/mm/folio-compat.c index e833e680e944..5b6ae1da314e 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -116,6 +116,7 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping, } EXPORT_SYMBOL(add_to_page_cache_lru); +noinline struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, int fgp_flags, gfp_t gfp) { @@ -127,3 +128,15 @@ struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, return folio_file_page(folio, index); } EXPORT_SYMBOL(pagecache_get_page); + +struct page *grab_cache_page_write_begin(struct address_space *mapping, + pgoff_t index, unsigned flags) +{ + unsigned fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE; + + if (flags & AOP_FLAG_NOFS) + fgp_flags |= FGP_NOFS; + return pagecache_get_page(mapping, index, fgp_flags, + mapping_gfp_mask(mapping)); +} +EXPORT_SYMBOL(grab_cache_page_write_begin); -- cgit v1.2.3 From 121703c1c817b3c77f61002466d0bfca7e39f25d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 9 Mar 2021 13:48:03 -0500 Subject: mm/writeback: Add folio_write_one Transform write_one_page() into folio_write_one() and add a compatibility wrapper. Also move the declaration to pagemap.h as this is page cache functionality that doesn't need to be used by the rest of the kernel. Saves 58 bytes of kernel text. While folio_write_one() is 101 bytes smaller than write_one_page(), the inlined call to page_folio() expands each caller. There are fewer than ten callers so it doesn't seem worth putting a wrapper in the core. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Howells --- include/linux/mm.h | 4 ---- include/linux/pagemap.h | 5 +++++ mm/page-writeback.c | 30 +++++++++++++++--------------- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index efcb06fc6116..40ff114aaf9e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2788,10 +2788,6 @@ extern vm_fault_t filemap_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff); extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf); -/* mm/page-writeback.c */ -int __must_check write_one_page(struct page *page); -void task_dirty_inc(struct task_struct *tsk); - extern unsigned long stack_guard_gap; /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */ extern int expand_stack(struct vm_area_struct *vma, unsigned long address); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index b68b053e581f..013cdc90f5fd 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -803,6 +803,11 @@ static inline void cancel_dirty_page(struct page *page) } bool folio_clear_dirty_for_io(struct folio *folio); bool clear_page_dirty_for_io(struct page *page); +int __must_check folio_write_one(struct folio *folio); +static inline int __must_check write_one_page(struct page *page) +{ + return folio_write_one(page_folio(page)); +} int __set_page_dirty_nobuffers(struct page *page); int __set_page_dirty_no_writeback(struct page *page); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index a0c35a9d8029..9c64490171e0 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2381,44 +2381,44 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc) } /** - * write_one_page - write out a single page and wait on I/O - * @page: the page to write + * folio_write_one - write out a single folio and wait on I/O. + * @folio: The folio to write. * - * The page must be locked by the caller and will be unlocked upon return. + * The folio must be locked by the caller and will be unlocked upon return. * * Note that the mapping's AS_EIO/AS_ENOSPC flags will be cleared when this * function returns. * * Return: %0 on success, negative error code otherwise */ -int write_one_page(struct page *page) +int folio_write_one(struct folio *folio) { - struct address_space *mapping = page->mapping; + struct address_space *mapping = folio->mapping; int ret = 0; struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, - .nr_to_write = 1, + .nr_to_write = folio_nr_pages(folio), }; - BUG_ON(!PageLocked(page)); + BUG_ON(!folio_test_locked(folio)); - wait_on_page_writeback(page); + folio_wait_writeback(folio); - if (clear_page_dirty_for_io(page)) { - get_page(page); - ret = mapping->a_ops->writepage(page, &wbc); + if (folio_clear_dirty_for_io(folio)) { + folio_get(folio); + ret = mapping->a_ops->writepage(&folio->page, &wbc); if (ret == 0) - wait_on_page_writeback(page); - put_page(page); + folio_wait_writeback(folio); + folio_put(folio); } else { - unlock_page(page); + folio_unlock(folio); } if (!ret) ret = filemap_check_errors(mapping); return ret; } -EXPORT_SYMBOL(write_one_page); +EXPORT_SYMBOL(folio_write_one); /* * For address_spaces which do not use buffers nor write back. -- cgit v1.2.3