summaryrefslogtreecommitdiff
path: root/include/linux/hugetlb.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-05-19 19:21:03 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2024-05-19 19:21:03 +0300
commit61307b7be41a1f1039d1d1368810a1d92cb97b44 (patch)
tree639e233e177f8618cd5f86daeb7efc6b095890f0 /include/linux/hugetlb.h
parent0450d2083be6bdcd18c9535ac50c55266499b2df (diff)
parent76edc534cc289308130272a2ac28694fc9b72a03 (diff)
downloadlinux-61307b7be41a1f1039d1d1368810a1d92cb97b44.tar.xz
Merge tag 'mm-stable-2024-05-17-19-19' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull mm updates from Andrew Morton: "The usual shower of singleton fixes and minor series all over MM, documented (hopefully adequately) in the respective changelogs. Notable series include: - Lucas Stach has provided some page-mapping cleanup/consolidation/ maintainability work in the series "mm/treewide: Remove pXd_huge() API". - In the series "Allow migrate on protnone reference with MPOL_PREFERRED_MANY policy", Donet Tom has optimized mempolicy's MPOL_PREFERRED_MANY mode, yielding almost doubled performance in one test. - In their series "Memory allocation profiling" Kent Overstreet and Suren Baghdasaryan have contributed a means of determining (via /proc/allocinfo) whereabouts in the kernel memory is being allocated: number of calls and amount of memory. - Matthew Wilcox has provided the series "Various significant MM patches" which does a number of rather unrelated things, but in largely similar code sites. - In his series "mm: page_alloc: freelist migratetype hygiene" Johannes Weiner has fixed the page allocator's handling of migratetype requests, with resulting improvements in compaction efficiency. - In the series "make the hugetlb migration strategy consistent" Baolin Wang has fixed a hugetlb migration issue, which should improve hugetlb allocation reliability. - Liu Shixin has hit an I/O meltdown caused by readahead in a memory-tight memcg. Addressed in the series "Fix I/O high when memory almost met memcg limit". - In the series "mm/filemap: optimize folio adding and splitting" Kairui Song has optimized pagecache insertion, yielding ~10% performance improvement in one test. - Baoquan He has cleaned up and consolidated the early zone initialization code in the series "mm/mm_init.c: refactor free_area_init_core()". - Baoquan has also redone some MM initializatio code in the series "mm/init: minor clean up and improvement". - MM helper cleanups from Christoph Hellwig in his series "remove follow_pfn". - More cleanups from Matthew Wilcox in the series "Various page->flags cleanups". - Vlastimil Babka has contributed maintainability improvements in the series "memcg_kmem hooks refactoring". - More folio conversions and cleanups in Matthew Wilcox's series: "Convert huge_zero_page to huge_zero_folio" "khugepaged folio conversions" "Remove page_idle and page_young wrappers" "Use folio APIs in procfs" "Clean up __folio_put()" "Some cleanups for memory-failure" "Remove page_mapping()" "More folio compat code removal" - David Hildenbrand chipped in with "fs/proc/task_mmu: convert hugetlb functions to work on folis". - Code consolidation and cleanup work related to GUP's handling of hugetlbs in Peter Xu's series "mm/gup: Unify hugetlb, part 2". - Rick Edgecombe has developed some fixes to stack guard gaps in the series "Cover a guard gap corner case". - Jinjiang Tu has fixed KSM's behaviour after a fork+exec in the series "mm/ksm: fix ksm exec support for prctl". - Baolin Wang has implemented NUMA balancing for multi-size THPs. This is a simple first-cut implementation for now. The series is "support multi-size THP numa balancing". - Cleanups to vma handling helper functions from Matthew Wilcox in the series "Unify vma_address and vma_pgoff_address". - Some selftests maintenance work from Dev Jain in the series "selftests/mm: mremap_test: Optimizations and style fixes". - Improvements to the swapping of multi-size THPs from Ryan Roberts in the series "Swap-out mTHP without splitting". - Kefeng Wang has significantly optimized the handling of arm64's permission page faults in the series "arch/mm/fault: accelerate pagefault when badaccess" "mm: remove arch's private VM_FAULT_BADMAP/BADACCESS" - GUP cleanups from David Hildenbrand in "mm/gup: consistently call it GUP-fast". - hugetlb fault code cleanups from Vishal Moola in "Hugetlb fault path to use struct vm_fault". - selftests build fixes from John Hubbard in the series "Fix selftests/mm build without requiring "make headers"". - Memory tiering fixes/improvements from Ho-Ren (Jack) Chuang in the series "Improved Memory Tier Creation for CPUless NUMA Nodes". Fixes the initialization code so that migration between different memory types works as intended. - David Hildenbrand has improved follow_pte() and fixed an errant driver in the series "mm: follow_pte() improvements and acrn follow_pte() fixes". - David also did some cleanup work on large folio mapcounts in his series "mm: mapcount for large folios + page_mapcount() cleanups". - Folio conversions in KSM in Alex Shi's series "transfer page to folio in KSM". - Barry Song has added some sysfs stats for monitoring multi-size THP's in the series "mm: add per-order mTHP alloc and swpout counters". - Some zswap cleanups from Yosry Ahmed in the series "zswap same-filled and limit checking cleanups". - Matthew Wilcox has been looking at buffer_head code and found the documentation to be lacking. The series is "Improve buffer head documentation". - Multi-size THPs get more work, this time from Lance Yang. His series "mm/madvise: enhance lazyfreeing with mTHP in madvise_free" optimizes the freeing of these things. - Kemeng Shi has added more userspace-visible writeback instrumentation in the series "Improve visibility of writeback". - Kemeng Shi then sent some maintenance work on top in the series "Fix and cleanups to page-writeback". - Matthew Wilcox reduces mmap_lock traffic in the anon vma code in the series "Improve anon_vma scalability for anon VMAs". Intel's test bot reported an improbable 3x improvement in one test. - SeongJae Park adds some DAMON feature work in the series "mm/damon: add a DAMOS filter type for page granularity access recheck" "selftests/damon: add DAMOS quota goal test" - Also some maintenance work in the series "mm/damon/paddr: simplify page level access re-check for pageout" "mm/damon: misc fixes and improvements" - David Hildenbrand has disabled some known-to-fail selftests ni the series "selftests: mm: cow: flag vmsplice() hugetlb tests as XFAIL". - memcg metadata storage optimizations from Shakeel Butt in "memcg: reduce memory consumption by memcg stats". - DAX fixes and maintenance work from Vishal Verma in the series "dax/bus.c: Fixups for dax-bus locking"" * tag 'mm-stable-2024-05-17-19-19' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (426 commits) memcg, oom: cleanup unused memcg_oom_gfp_mask and memcg_oom_order selftests/mm: hugetlb_madv_vs_map: avoid test skipping by querying hugepage size at runtime mm/hugetlb: add missing VM_FAULT_SET_HINDEX in hugetlb_wp mm/hugetlb: add missing VM_FAULT_SET_HINDEX in hugetlb_fault selftests: cgroup: add tests to verify the zswap writeback path mm: memcg: make alloc_mem_cgroup_per_node_info() return bool mm/damon/core: fix return value from damos_wmark_metric_value mm: do not update memcg stats for NR_{FILE/SHMEM}_PMDMAPPED selftests: cgroup: remove redundant enabling of memory controller Docs/mm/damon/maintainer-profile: allow posting patches based on damon/next tree Docs/mm/damon/maintainer-profile: change the maintainer's timezone from PST to PT Docs/mm/damon/design: use a list for supported filters Docs/admin-guide/mm/damon/usage: fix wrong schemes effective quota update command Docs/admin-guide/mm/damon/usage: fix wrong example of DAMOS filter matching sysfs file selftests/damon: classify tests for functionalities and regressions selftests/damon/_damon_sysfs: use 'is' instead of '==' for 'None' selftests/damon/_damon_sysfs: find sysfs mount point from /proc/mounts selftests/damon/_damon_sysfs: check errors from nr_schemes file reads mm/damon/core: initialize ->esz_bp from damos_quota_init_priv() selftests/damon: add a test for DAMOS quota goal ...
Diffstat (limited to 'include/linux/hugetlb.h')
-rw-r--r--include/linux/hugetlb.h95
1 files changed, 52 insertions, 43 deletions
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index b06f7c426d38..2b3c3a404769 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -174,8 +174,11 @@ u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx);
pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, pud_t *pud);
+bool hugetlbfs_pagecache_present(struct hstate *h,
+ struct vm_area_struct *vma,
+ unsigned long address);
-struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage);
+struct address_space *hugetlb_folio_mapping_lock_write(struct folio *folio);
extern int sysctl_hugetlb_shm_group;
extern struct list_head huge_boot_pages[MAX_NUMNODES];
@@ -272,13 +275,9 @@ void hugetlb_vma_unlock_write(struct vm_area_struct *vma);
int hugetlb_vma_trylock_write(struct vm_area_struct *vma);
void hugetlb_vma_assert_locked(struct vm_area_struct *vma);
void hugetlb_vma_lock_release(struct kref *kref);
-
-int pmd_huge(pmd_t pmd);
-int pud_huge(pud_t pud);
long hugetlb_change_protection(struct vm_area_struct *vma,
unsigned long address, unsigned long end, pgprot_t newprot,
unsigned long cp_flags);
-
bool is_hugetlb_entry_migration(pte_t pte);
bool is_hugetlb_entry_hwpoisoned(pte_t pte);
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
@@ -298,8 +297,8 @@ static inline unsigned long hugetlb_total_pages(void)
return 0;
}
-static inline struct address_space *hugetlb_page_mapping_lock_write(
- struct page *hpage)
+static inline struct address_space *hugetlb_folio_mapping_lock_write(
+ struct folio *folio)
{
return NULL;
}
@@ -329,13 +328,6 @@ static inline void hugetlb_zap_end(
{
}
-static inline struct page *hugetlb_follow_page_mask(
- struct vm_area_struct *vma, unsigned long address, unsigned int flags,
- unsigned int *page_mask)
-{
- BUILD_BUG(); /* should never be compiled in if !CONFIG_HUGETLB_PAGE*/
-}
-
static inline int copy_hugetlb_page_range(struct mm_struct *dst,
struct mm_struct *src,
struct vm_area_struct *dst_vma,
@@ -399,16 +391,6 @@ static inline void hugetlb_vma_assert_locked(struct vm_area_struct *vma)
{
}
-static inline int pmd_huge(pmd_t pmd)
-{
- return 0;
-}
-
-static inline int pud_huge(pud_t pud)
-{
- return 0;
-}
-
static inline int is_hugepage_only_range(struct mm_struct *mm,
unsigned long addr, unsigned long len)
{
@@ -493,16 +475,6 @@ static inline vm_fault_t hugetlb_fault(struct mm_struct *mm,
static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { }
#endif /* !CONFIG_HUGETLB_PAGE */
-/*
- * hugepages at page global directory. If arch support
- * hugepages at pgd level, they need to define this.
- */
-#ifndef pgd_huge
-#define pgd_huge(x) 0
-#endif
-#ifndef p4d_huge
-#define p4d_huge(x) 0
-#endif
#ifndef pgd_write
static inline int pgd_write(pgd_t pgd)
@@ -743,7 +715,8 @@ int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list);
struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr, int avoid_reserve);
struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
- nodemask_t *nmask, gfp_t gfp_mask);
+ nodemask_t *nmask, gfp_t gfp_mask,
+ bool allow_alloc_fallback);
int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping,
pgoff_t idx);
void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
@@ -855,9 +828,9 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
#define is_hugepage_only_range is_hugepage_only_range
#endif
-#ifndef arch_clear_hugepage_flags
-static inline void arch_clear_hugepage_flags(struct page *page) { }
-#define arch_clear_hugepage_flags arch_clear_hugepage_flags
+#ifndef arch_clear_hugetlb_flags
+static inline void arch_clear_hugetlb_flags(struct folio *folio) { }
+#define arch_clear_hugetlb_flags arch_clear_hugetlb_flags
#endif
#ifndef arch_make_huge_pte
@@ -884,8 +857,8 @@ static inline int hstate_index(struct hstate *h)
return h - hstates;
}
-extern int dissolve_free_huge_page(struct page *page);
-extern int dissolve_free_huge_pages(unsigned long start_pfn,
+int dissolve_free_hugetlb_folio(struct folio *folio);
+int dissolve_free_hugetlb_folios(unsigned long start_pfn,
unsigned long end_pfn);
#ifdef CONFIG_MEMORY_FAILURE
@@ -966,6 +939,30 @@ static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask)
return modified_mask;
}
+static inline bool htlb_allow_alloc_fallback(int reason)
+{
+ bool allowed_fallback = false;
+
+ /*
+ * Note: the memory offline, memory failure and migration syscalls will
+ * be allowed to fallback to other nodes due to lack of a better chioce,
+ * that might break the per-node hugetlb pool. While other cases will
+ * set the __GFP_THISNODE to avoid breaking the per-node hugetlb pool.
+ */
+ switch (reason) {
+ case MR_MEMORY_HOTPLUG:
+ case MR_MEMORY_FAILURE:
+ case MR_SYSCALL:
+ case MR_MEMPOLICY_MBIND:
+ allowed_fallback = true;
+ break;
+ default:
+ break;
+ }
+
+ return allowed_fallback;
+}
+
static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
struct mm_struct *mm, pte_t *pte)
{
@@ -1061,7 +1058,8 @@ static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
static inline struct folio *
alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
- nodemask_t *nmask, gfp_t gfp_mask)
+ nodemask_t *nmask, gfp_t gfp_mask,
+ bool allow_alloc_fallback)
{
return NULL;
}
@@ -1146,12 +1144,12 @@ static inline int hstate_index(struct hstate *h)
return 0;
}
-static inline int dissolve_free_huge_page(struct page *page)
+static inline int dissolve_free_hugetlb_folio(struct folio *folio)
{
return 0;
}
-static inline int dissolve_free_huge_pages(unsigned long start_pfn,
+static inline int dissolve_free_hugetlb_folios(unsigned long start_pfn,
unsigned long end_pfn)
{
return 0;
@@ -1177,6 +1175,11 @@ static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask)
return 0;
}
+static inline bool htlb_allow_alloc_fallback(int reason)
+{
+ return false;
+}
+
static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
struct mm_struct *mm, pte_t *pte)
{
@@ -1217,6 +1220,12 @@ static inline void hugetlb_register_node(struct node *node)
static inline void hugetlb_unregister_node(struct node *node)
{
}
+
+static inline bool hugetlbfs_pagecache_present(
+ struct hstate *h, struct vm_area_struct *vma, unsigned long address)
+{
+ return false;
+}
#endif /* CONFIG_HUGETLB_PAGE */
static inline spinlock_t *huge_pte_lock(struct hstate *h,