summaryrefslogtreecommitdiff
path: root/mm/hugetlb.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r--mm/hugetlb.c166
1 files changed, 79 insertions, 87 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index bdbfeb6fb393..ab35b1cc9927 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -260,12 +260,6 @@ static inline struct hugepage_subpool *subpool_vma(struct vm_area_struct *vma)
/*
* hugetlb vma_lock helper routines
*/
-static bool __vma_shareable_lock(struct vm_area_struct *vma)
-{
- return vma->vm_flags & (VM_MAYSHARE | VM_SHARED) &&
- vma->vm_private_data;
-}
-
void hugetlb_vma_lock_read(struct vm_area_struct *vma)
{
if (__vma_shareable_lock(vma)) {
@@ -1480,9 +1474,9 @@ static void __destroy_compound_gigantic_folio(struct folio *folio,
int nr_pages = 1 << order;
struct page *p;
- atomic_set(folio_mapcount_ptr(folio), 0);
- atomic_set(folio_subpages_mapcount_ptr(folio), 0);
- atomic_set(folio_pincount_ptr(folio), 0);
+ atomic_set(&folio->_entire_mapcount, 0);
+ atomic_set(&folio->_nr_pages_mapped, 0);
+ atomic_set(&folio->_pincount, 0);
for (i = 1; i < nr_pages; i++) {
p = folio_page(folio, i);
@@ -1492,7 +1486,7 @@ static void __destroy_compound_gigantic_folio(struct folio *folio,
set_page_refcounted(p);
}
- folio_set_compound_order(folio, 0);
+ folio_set_order(folio, 0);
__folio_clear_head(folio);
}
@@ -1737,7 +1731,7 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
* which makes any healthy subpages reusable.
*/
if (unlikely(folio_test_hwpoison(folio)))
- hugetlb_clear_page_hwpoison(&folio->page);
+ folio_clear_hugetlb_hwpoison(folio);
for (i = 0; i < pages_per_huge_page(h); i++) {
subpage = folio_page(folio, i);
@@ -1956,7 +1950,7 @@ static bool __prep_compound_gigantic_folio(struct folio *folio,
__folio_clear_reserved(folio);
__folio_set_head(folio);
/* we rely on prep_new_hugetlb_folio to set the destructor */
- folio_set_compound_order(folio, order);
+ folio_set_order(folio, order);
for (i = 0; i < nr_pages; i++) {
p = folio_page(folio, i);
@@ -2002,9 +1996,9 @@ static bool __prep_compound_gigantic_folio(struct folio *folio,
if (i != 0)
set_compound_head(p, &folio->page);
}
- atomic_set(folio_mapcount_ptr(folio), -1);
- atomic_set(folio_subpages_mapcount_ptr(folio), 0);
- atomic_set(folio_pincount_ptr(folio), 0);
+ atomic_set(&folio->_entire_mapcount, -1);
+ atomic_set(&folio->_nr_pages_mapped, 0);
+ atomic_set(&folio->_pincount, 0);
return true;
out_error:
@@ -2020,7 +2014,7 @@ out_error:
p = folio_page(folio, j);
__ClearPageReserved(p);
}
- folio_set_compound_order(folio, 0);
+ folio_set_order(folio, 0);
__folio_clear_head(folio);
return false;
}
@@ -2044,11 +2038,12 @@ static bool prep_compound_gigantic_folio_for_demote(struct folio *folio,
*/
int PageHuge(struct page *page)
{
+ struct folio *folio;
+
if (!PageCompound(page))
return 0;
-
- page = compound_head(page);
- return page[1].compound_dtor == HUGETLB_PAGE_DTOR;
+ folio = page_folio(page);
+ return folio->_folio_dtor == HUGETLB_PAGE_DTOR;
}
EXPORT_SYMBOL_GPL(PageHuge);
@@ -2058,10 +2053,11 @@ EXPORT_SYMBOL_GPL(PageHuge);
*/
int PageHeadHuge(struct page *page_head)
{
- if (!PageHead(page_head))
+ struct folio *folio = (struct folio *)page_head;
+ if (!folio_test_large(folio))
return 0;
- return page_head[1].compound_dtor == HUGETLB_PAGE_DTOR;
+ return folio->_folio_dtor == HUGETLB_PAGE_DTOR;
}
EXPORT_SYMBOL_GPL(PageHeadHuge);
@@ -4972,7 +4968,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
int ret = 0;
if (cow) {
- mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, src_vma, src,
+ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, src,
src_vma->vm_start,
src_vma->vm_end);
mmu_notifier_invalidate_range_start(&range);
@@ -4981,7 +4977,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
} else {
/*
* For shared mappings the vma lock must be held before
- * calling huge_pte_offset in the src vma. Otherwise, the
+ * calling hugetlb_walk() in the src vma. Otherwise, the
* returned ptep could go away if part of a shared pmd and
* another thread calls huge_pmd_unshare.
*/
@@ -4991,7 +4987,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
last_addr_mask = hugetlb_mask_last_page(h);
for (addr = src_vma->vm_start; addr < src_vma->vm_end; addr += sz) {
spinlock_t *src_ptl, *dst_ptl;
- src_pte = huge_pte_offset(src, addr, sz);
+ src_pte = hugetlb_walk(src_vma, addr, sz);
if (!src_pte) {
addr |= last_addr_mask;
continue;
@@ -5183,7 +5179,7 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
struct mmu_notifier_range range;
bool shared_pmd = false;
- mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, old_addr,
+ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, old_addr,
old_end);
adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);
/*
@@ -5198,7 +5194,7 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
hugetlb_vma_lock_write(vma);
i_mmap_lock_write(mapping);
for (; old_addr < old_end; old_addr += sz, new_addr += sz) {
- src_pte = huge_pte_offset(mm, old_addr, sz);
+ src_pte = hugetlb_walk(vma, old_addr, sz);
if (!src_pte) {
old_addr |= last_addr_mask;
new_addr |= last_addr_mask;
@@ -5261,7 +5257,7 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
last_addr_mask = hugetlb_mask_last_page(h);
address = start;
for (; address < end; address += sz) {
- ptep = huge_pte_offset(mm, address, sz);
+ ptep = hugetlb_walk(vma, address, sz);
if (!ptep) {
address |= last_addr_mask;
continue;
@@ -5397,7 +5393,7 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
struct mmu_notifier_range range;
struct mmu_gather tlb;
- mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
+ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm,
start, end);
adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);
mmu_notifier_invalidate_range_start(&range);
@@ -5574,7 +5570,7 @@ retry_avoidcopy:
mutex_lock(&hugetlb_fault_mutex_table[hash]);
hugetlb_vma_lock_read(vma);
spin_lock(ptl);
- ptep = huge_pte_offset(mm, haddr, huge_page_size(h));
+ ptep = hugetlb_walk(vma, haddr, huge_page_size(h));
if (likely(ptep &&
pte_same(huge_ptep_get(ptep), pte)))
goto retry_avoidcopy;
@@ -5603,7 +5599,7 @@ retry_avoidcopy:
pages_per_huge_page(h));
__SetPageUptodate(new_page);
- mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, haddr,
+ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, haddr,
haddr + huge_page_size(h));
mmu_notifier_invalidate_range_start(&range);
@@ -5612,7 +5608,7 @@ retry_avoidcopy:
* before the page tables are altered
*/
spin_lock(ptl);
- ptep = huge_pte_offset(mm, haddr, huge_page_size(h));
+ ptep = hugetlb_walk(vma, haddr, huge_page_size(h));
if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) {
/* Break COW or unshare */
huge_ptep_clear_flush(vma, haddr, ptep);
@@ -5919,7 +5915,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
* if populated.
*/
if (unlikely(pte_marker_uffd_wp(old_pte)))
- new_pte = huge_pte_wrprotect(huge_pte_mkuffd_wp(new_pte));
+ new_pte = huge_pte_mkuffd_wp(new_pte);
set_huge_pte_at(mm, haddr, ptep, new_pte);
hugetlb_count_add(pages_per_huge_page(h), mm);
@@ -5994,22 +5990,6 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
int need_wait_lock = 0;
unsigned long haddr = address & huge_page_mask(h);
- ptep = huge_pte_offset(mm, haddr, huge_page_size(h));
- if (ptep) {
- /*
- * Since we hold no locks, ptep could be stale. That is
- * OK as we are only making decisions based on content and
- * not actually modifying content here.
- */
- entry = huge_ptep_get(ptep);
- if (unlikely(is_hugetlb_entry_migration(entry))) {
- migration_entry_wait_huge(vma, ptep);
- return 0;
- } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
- return VM_FAULT_HWPOISON_LARGE |
- VM_FAULT_SET_HINDEX(hstate_index(h));
- }
-
/*
* Serialize hugepage allocation and instantiation, so that we don't
* get spurious allocation failures if two CPUs race to instantiate
@@ -6024,10 +6004,6 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
* Acquire vma lock before calling huge_pte_alloc and hold
* until finished with ptep. This prevents huge_pmd_unshare from
* being called elsewhere and making the ptep no longer valid.
- *
- * ptep could have already be assigned via huge_pte_offset. That
- * is OK, as huge_pte_alloc will return the same value unless
- * something has changed.
*/
hugetlb_vma_lock_read(vma);
ptep = huge_pte_alloc(mm, vma, haddr, huge_page_size(h));
@@ -6056,8 +6032,23 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
* fault, and is_hugetlb_entry_(migration|hwpoisoned) check will
* properly handle it.
*/
- if (!pte_present(entry))
+ if (!pte_present(entry)) {
+ if (unlikely(is_hugetlb_entry_migration(entry))) {
+ /*
+ * Release the hugetlb fault lock now, but retain
+ * the vma lock, because it is needed to guard the
+ * huge_pte_lockptr() later in
+ * migration_entry_wait_huge(). The vma lock will
+ * be released there.
+ */
+ mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+ migration_entry_wait_huge(vma, ptep);
+ return 0;
+ } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
+ ret = VM_FAULT_HWPOISON_LARGE |
+ VM_FAULT_SET_HINDEX(hstate_index(h));
goto out_mutex;
+ }
/*
* If we are going to COW/unshare the mapping later, we examine the
@@ -6402,10 +6393,10 @@ struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
if (WARN_ON_ONCE(flags & FOLL_PIN))
return NULL;
-retry:
- pte = huge_pte_offset(mm, haddr, huge_page_size(h));
+ hugetlb_vma_lock_read(vma);
+ pte = hugetlb_walk(vma, haddr, huge_page_size(h));
if (!pte)
- return NULL;
+ goto out_unlock;
ptl = huge_pte_lock(h, mm, pte);
entry = huge_ptep_get(pte);
@@ -6425,19 +6416,11 @@ retry:
page = NULL;
goto out;
}
- } else {
- if (is_hugetlb_entry_migration(entry)) {
- spin_unlock(ptl);
- __migration_entry_wait_huge(pte, ptl);
- goto retry;
- }
- /*
- * hwpoisoned entry is treated as no_page_table in
- * follow_page_mask().
- */
}
out:
spin_unlock(ptl);
+out_unlock:
+ hugetlb_vma_unlock_read(vma);
return page;
}
@@ -6468,6 +6451,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
break;
}
+ hugetlb_vma_lock_read(vma);
/*
* Some archs (sparc64, sh*) have multiple pte_ts to
* each hugepage. We have to make sure we get the
@@ -6475,8 +6459,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
*
* Note that page table lock is not held when pte is null.
*/
- pte = huge_pte_offset(mm, vaddr & huge_page_mask(h),
- huge_page_size(h));
+ pte = hugetlb_walk(vma, vaddr & huge_page_mask(h),
+ huge_page_size(h));
if (pte)
ptl = huge_pte_lock(h, mm, pte);
absent = !pte || huge_pte_none(huge_ptep_get(pte));
@@ -6492,6 +6476,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
!hugetlbfs_pagecache_present(h, vma, vaddr)) {
if (pte)
spin_unlock(ptl);
+ hugetlb_vma_unlock_read(vma);
remainder = 0;
break;
}
@@ -6513,6 +6498,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (pte)
spin_unlock(ptl);
+ hugetlb_vma_unlock_read(vma);
+
if (flags & FOLL_WRITE)
fault_flags |= FAULT_FLAG_WRITE;
else if (unshare)
@@ -6575,6 +6562,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
remainder -= pages_per_huge_page(h);
i += pages_per_huge_page(h);
spin_unlock(ptl);
+ hugetlb_vma_unlock_read(vma);
continue;
}
@@ -6604,6 +6592,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (WARN_ON_ONCE(!try_grab_folio(pages[i], refs,
flags))) {
spin_unlock(ptl);
+ hugetlb_vma_unlock_read(vma);
remainder = 0;
err = -ENOMEM;
break;
@@ -6615,6 +6604,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
i += refs;
spin_unlock(ptl);
+ hugetlb_vma_unlock_read(vma);
}
*nr_pages = remainder;
/*
@@ -6627,7 +6617,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
return i ? i : err;
}
-unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
+long hugetlb_change_protection(struct vm_area_struct *vma,
unsigned long address, unsigned long end,
pgprot_t newprot, unsigned long cp_flags)
{
@@ -6636,7 +6626,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
pte_t *ptep;
pte_t pte;
struct hstate *h = hstate_vma(vma);
- unsigned long pages = 0, psize = huge_page_size(h);
+ long pages = 0, psize = huge_page_size(h);
bool shared_pmd = false;
struct mmu_notifier_range range;
unsigned long last_addr_mask;
@@ -6649,7 +6639,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
* range if PMD sharing is possible.
*/
mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_VMA,
- 0, vma, mm, start, end);
+ 0, mm, start, end);
adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);
BUG_ON(address >= end);
@@ -6661,7 +6651,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
last_addr_mask = hugetlb_mask_last_page(h);
for (; address < end; address += psize) {
spinlock_t *ptl;
- ptep = huge_pte_offset(mm, address, psize);
+ ptep = hugetlb_walk(vma, address, psize);
if (!ptep) {
if (!uffd_wp) {
address |= last_addr_mask;
@@ -6672,8 +6662,10 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
* pre-allocations to install pte markers.
*/
ptep = huge_pte_alloc(mm, vma, address, psize);
- if (!ptep)
+ if (!ptep) {
+ pages = -ENOMEM;
break;
+ }
}
ptl = huge_pte_lock(h, mm, ptep);
if (huge_pmd_unshare(mm, vma, address, ptep)) {
@@ -6728,7 +6720,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
pte = huge_pte_modify(old_pte, newprot);
pte = arch_make_huge_pte(pte, shift, vma->vm_flags);
if (uffd_wp)
- pte = huge_pte_mkuffd_wp(huge_pte_wrprotect(pte));
+ pte = huge_pte_mkuffd_wp(pte);
else if (uffd_wp_resolve)
pte = huge_pte_clear_uffd_wp(pte);
huge_ptep_modify_prot_commit(vma, address, ptep, old_pte, pte);
@@ -6763,7 +6755,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
hugetlb_vma_unlock_write(vma);
mmu_notifier_invalidate_range_end(&range);
- return pages << h->order;
+ return pages > 0 ? (pages << h->order) : pages;
}
/* Return true if reservation was successful, false otherwise. */
@@ -6772,7 +6764,7 @@ bool hugetlb_reserve_pages(struct inode *inode,
struct vm_area_struct *vma,
vm_flags_t vm_flags)
{
- long chg, add = -1;
+ long chg = -1, add = -1;
struct hstate *h = hstate_inode(inode);
struct hugepage_subpool *spool = subpool_inode(inode);
struct resv_map *resv_map;
@@ -6977,8 +6969,8 @@ static unsigned long page_table_shareable(struct vm_area_struct *svma,
unsigned long s_end = sbase + PUD_SIZE;
/* Allow segments to share if only one is marked locked */
- unsigned long vm_flags = vma->vm_flags & VM_LOCKED_CLEAR_MASK;
- unsigned long svm_flags = svma->vm_flags & VM_LOCKED_CLEAR_MASK;
+ unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED_MASK;
+ unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED_MASK;
/*
* match the virtual addresses, permission and the alignment of the
@@ -7071,8 +7063,8 @@ pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
saddr = page_table_shareable(svma, vma, addr, idx);
if (saddr) {
- spte = huge_pte_offset(svma->vm_mm, saddr,
- vma_mmu_pagesize(svma));
+ spte = hugetlb_walk(svma, saddr,
+ vma_mmu_pagesize(svma));
if (spte) {
get_page(virt_to_page(spte));
break;
@@ -7276,18 +7268,18 @@ unlock:
return ret;
}
-int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison)
+int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison)
{
int ret = 0;
*hugetlb = false;
spin_lock_irq(&hugetlb_lock);
- if (PageHeadHuge(page)) {
+ if (folio_test_hugetlb(folio)) {
*hugetlb = true;
- if (HPageFreed(page))
+ if (folio_test_hugetlb_freed(folio))
ret = 0;
- else if (HPageMigratable(page) || unpoison)
- ret = get_page_unless_zero(page);
+ else if (folio_test_hugetlb_migratable(folio) || unpoison)
+ ret = folio_try_get(folio);
else
ret = -EBUSY;
}
@@ -7378,13 +7370,13 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
* No need to call adjust_range_if_pmd_sharing_possible(), because
* we have already done the PUD_SIZE alignment.
*/
- mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm,
+ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm,
start, end);
mmu_notifier_invalidate_range_start(&range);
hugetlb_vma_lock_write(vma);
i_mmap_lock_write(vma->vm_file->f_mapping);
for (address = start; address < end; address += PUD_SIZE) {
- ptep = huge_pte_offset(mm, address, sz);
+ ptep = hugetlb_walk(vma, address, sz);
if (!ptep)
continue;
ptl = huge_pte_lock(h, mm, ptep);