summaryrefslogtreecommitdiff
path: root/include/linux/swap.h
diff options
context:
space:
mode:
authorRyan Roberts <ryan.roberts@arm.com>2024-04-08 21:39:41 +0300
committerAndrew Morton <akpm@linux-foundation.org>2024-04-26 06:56:37 +0300
commita62fb92ac12ed39df4930dca599a3b427552882a (patch)
treec0f9ffe693552fb8c89aae858db0186976579e50 /include/linux/swap.h
parentd7d0d389ff90644546ffcb8e15ea3ccaf6138958 (diff)
downloadlinux-a62fb92ac12ed39df4930dca599a3b427552882a.tar.xz
mm: swap: free_swap_and_cache_nr() as batched free_swap_and_cache()
Now that we no longer have a convenient flag in the cluster to determine if a folio is large, free_swap_and_cache() will take a reference and lock a large folio much more often, which could lead to contention and (e.g.) failure to split large folios, etc. Let's solve that problem by batch freeing swap and cache with a new function, free_swap_and_cache_nr(), to free a contiguous range of swap entries together. This allows us to first drop a reference to each swap slot before we try to release the cache folio. This means we only try to release the folio once, only taking the reference and lock once - much better than the previous 512 times for the 2M THP case. Contiguous swap entries are gathered in zap_pte_range() and madvise_free_pte_range() in a similar way to how present ptes are already gathered in zap_pte_range(). While we are at it, let's simplify by converting the return type of both functions to void. The return value was used only by zap_pte_range() to print a bad pte, and was ignored by everyone else, so the extra reporting wasn't exactly guaranteed. We will still get the warning with most of the information from get_swap_device(). With the batch version, we wouldn't know which pte was bad anyway so could print the wrong one. [ryan.roberts@arm.com: fix a build warning on parisc] Link: https://lkml.kernel.org/r/20240409111840.3173122-1-ryan.roberts@arm.com Link: https://lkml.kernel.org/r/20240408183946.2991168-3-ryan.roberts@arm.com Signed-off-by: Ryan Roberts <ryan.roberts@arm.com> Acked-by: David Hildenbrand <david@redhat.com> Cc: Barry Song <21cnbao@gmail.com> Cc: Barry Song <v-songbaohua@oppo.com> Cc: Chris Li <chrisl@kernel.org> Cc: Gao Xiang <xiang@kernel.org> Cc: "Huang, Ying" <ying.huang@intel.com> Cc: Kefeng Wang <wangkefeng.wang@huawei.com> Cc: Lance Yang <ioworker0@gmail.com> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Michal Hocko <mhocko@suse.com> Cc: Yang Shi <shy828301@gmail.com> Cc: Yu Zhao <yuzhao@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'include/linux/swap.h')
-rw-r--r--include/linux/swap.h12
1 files changed, 9 insertions, 3 deletions
diff --git a/include/linux/swap.h b/include/linux/swap.h
index a803de0ac24f..2d8f2b950ddf 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -468,7 +468,7 @@ extern int swap_duplicate(swp_entry_t);
extern int swapcache_prepare(swp_entry_t);
extern void swap_free(swp_entry_t);
extern void swapcache_free_entries(swp_entry_t *entries, int n);
-extern int free_swap_and_cache(swp_entry_t);
+extern void free_swap_and_cache_nr(swp_entry_t entry, int nr);
int swap_type_of(dev_t device, sector_t offset);
int find_first_swap(dev_t *device);
extern unsigned int count_swap_pages(int, int);
@@ -517,8 +517,9 @@ static inline void put_swap_device(struct swap_info_struct *si)
#define free_pages_and_swap_cache(pages, nr) \
release_pages((pages), (nr));
-/* used to sanity check ptes in zap_pte_range when CONFIG_SWAP=0 */
-#define free_swap_and_cache(e) is_pfn_swap_entry(e)
+static inline void free_swap_and_cache_nr(swp_entry_t entry, int nr)
+{
+}
static inline void free_swap_cache(struct folio *folio)
{
@@ -586,6 +587,11 @@ static inline int add_swap_extent(struct swap_info_struct *sis,
}
#endif /* CONFIG_SWAP */
+static inline void free_swap_and_cache(swp_entry_t entry)
+{
+ free_swap_and_cache_nr(entry, 1);
+}
+
#ifdef CONFIG_MEMCG
static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg)
{