summaryrefslogtreecommitdiff
path: root/include/linux/mm_types.h
diff options
context:
space:
mode:
authorHugh Dickins <hughd@google.com>2022-11-03 04:51:38 +0300
committerAndrew Morton <akpm@linux-foundation.org>2022-12-01 02:58:46 +0300
commitcb67f4282bf9693658dbda934a441ddbbb1446df (patch)
tree799f5d9f3b36ff8d844e90ace1f09a94a7ec3a5c /include/linux/mm_types.h
parentdad6a5eb55564845aa17b8b20fa834af21e46c48 (diff)
downloadlinux-cb67f4282bf9693658dbda934a441ddbbb1446df.tar.xz
mm,thp,rmap: simplify compound page mapcount handling
Compound page (folio) mapcount calculations have been different for anon and file (or shmem) THPs, and involved the obscure PageDoubleMap flag. And each huge mapping and unmapping of a file (or shmem) THP involved atomically incrementing and decrementing the mapcount of every subpage of that huge page, dirtying many struct page cachelines. Add subpages_mapcount field to the struct folio and first tail page, so that the total of subpage mapcounts is available in one place near the head: then page_mapcount() and total_mapcount() and page_mapped(), and their folio equivalents, are so quick that anon and file and hugetlb don't need to be optimized differently. Delete the unloved PageDoubleMap. page_add and page_remove rmap functions must now maintain the subpages_mapcount as well as the subpage _mapcount, when dealing with pte mappings of huge pages; and correct maintenance of NR_ANON_MAPPED and NR_FILE_MAPPED statistics still needs reading through the subpages, using nr_subpages_unmapped() - but only when first or last pmd mapping finds subpages_mapcount raised (double-map case, not the common case). But are those counts (used to decide when to split an anon THP, and in vmscan's pagecache_reclaimable heuristic) correctly maintained? Not quite: since page_remove_rmap() (and also split_huge_pmd()) is often called without page lock, there can be races when a subpage pte mapcount 0<->1 while compound pmd mapcount 0<->1 is scanning - races which the previous implementation had prevented. The statistics might become inaccurate, and even drift down until they underflow through 0. That is not good enough, but is better dealt with in a followup patch. Update a few comments on first and second tail page overlaid fields. hugepage_add_new_anon_rmap() has to "increment" compound_mapcount, but subpages_mapcount and compound_pincount are already correctly at 0, so delete its reinitialization of compound_pincount. A simple 100 X munmap(mmap(2GB, MAP_SHARED|MAP_POPULATE, tmpfs), 2GB) took 18 seconds on small pages, and used to take 1 second on huge pages, but now takes 119 milliseconds on huge pages. Mapping by pmds a second time used to take 860ms and now takes 92ms; mapping by pmds after mapping by ptes (when the scan is needed) used to take 870ms and now takes 495ms. But there might be some benchmarks which would show a slowdown, because tail struct pages now fall out of cache until final freeing checks them. Link: https://lkml.kernel.org/r/47ad693-717-79c8-e1ba-46c3a6602e48@google.com Signed-off-by: Hugh Dickins <hughd@google.com> Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: David Hildenbrand <david@redhat.com> Cc: James Houghton <jthoughton@google.com> Cc: John Hubbard <jhubbard@nvidia.com> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Miaohe Lin <linmiaohe@huawei.com> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Mina Almasry <almasrymina@google.com> Cc: Muchun Song <songmuchun@bytedance.com> Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev> Cc: Peter Xu <peterx@redhat.com> Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Yang Shi <shy828301@gmail.com> Cc: Zach O'Keefe <zokeefe@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'include/linux/mm_types.h')
-rw-r--r--include/linux/mm_types.h21
1 files changed, 18 insertions, 3 deletions
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 44f1f8b6be02..44a1a699b5ad 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -142,6 +142,7 @@ struct page {
unsigned char compound_dtor;
unsigned char compound_order;
atomic_t compound_mapcount;
+ atomic_t subpages_mapcount;
atomic_t compound_pincount;
#ifdef CONFIG_64BIT
unsigned int compound_nr; /* 1 << compound_order */
@@ -270,7 +271,8 @@ struct page {
* @_head_1: Points to the folio. Do not use.
* @_folio_dtor: Which destructor to use for this folio.
* @_folio_order: Do not use directly, call folio_order().
- * @_total_mapcount: Do not use directly, call folio_entire_mapcount().
+ * @_compound_mapcount: Do not use directly, call folio_entire_mapcount().
+ * @_subpages_mapcount: Do not use directly, call folio_mapcount().
* @_pincount: Do not use directly, call folio_maybe_dma_pinned().
* @_folio_nr_pages: Do not use directly, call folio_nr_pages().
* @_flags_2: For alignment. Do not use.
@@ -323,7 +325,8 @@ struct folio {
unsigned long _head_1;
unsigned char _folio_dtor;
unsigned char _folio_order;
- atomic_t _total_mapcount;
+ atomic_t _compound_mapcount;
+ atomic_t _subpages_mapcount;
atomic_t _pincount;
#ifdef CONFIG_64BIT
unsigned int _folio_nr_pages;
@@ -365,7 +368,8 @@ FOLIO_MATCH(flags, _flags_1);
FOLIO_MATCH(compound_head, _head_1);
FOLIO_MATCH(compound_dtor, _folio_dtor);
FOLIO_MATCH(compound_order, _folio_order);
-FOLIO_MATCH(compound_mapcount, _total_mapcount);
+FOLIO_MATCH(compound_mapcount, _compound_mapcount);
+FOLIO_MATCH(subpages_mapcount, _subpages_mapcount);
FOLIO_MATCH(compound_pincount, _pincount);
#ifdef CONFIG_64BIT
FOLIO_MATCH(compound_nr, _folio_nr_pages);
@@ -388,11 +392,22 @@ static inline atomic_t *folio_mapcount_ptr(struct folio *folio)
return &tail->compound_mapcount;
}
+static inline atomic_t *folio_subpages_mapcount_ptr(struct folio *folio)
+{
+ struct page *tail = &folio->page + 1;
+ return &tail->subpages_mapcount;
+}
+
static inline atomic_t *compound_mapcount_ptr(struct page *page)
{
return &page[1].compound_mapcount;
}
+static inline atomic_t *subpages_mapcount_ptr(struct page *page)
+{
+ return &page[1].subpages_mapcount;
+}
+
static inline atomic_t *compound_pincount_ptr(struct page *page)
{
return &page[1].compound_pincount;