diff options
Diffstat (limited to 'mm/userfaultfd.c')
-rw-r--r-- | mm/userfaultfd.c | 69 |
1 files changed, 51 insertions, 18 deletions
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 712160cd41ec..defa5109cc62 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -56,17 +56,16 @@ struct vm_area_struct *find_vma_and_prepare_anon(struct mm_struct *mm, #ifdef CONFIG_PER_VMA_LOCK /* - * lock_vma() - Lookup and lock vma corresponding to @address. + * uffd_lock_vma() - Lookup and lock vma corresponding to @address. * @mm: mm to search vma in. * @address: address that the vma should contain. * - * Should be called without holding mmap_lock. vma should be unlocked after use - * with unlock_vma(). + * Should be called without holding mmap_lock. * * Return: A locked vma containing @address, -ENOENT if no vma is found, or * -ENOMEM if anon_vma couldn't be allocated. */ -static struct vm_area_struct *lock_vma(struct mm_struct *mm, +static struct vm_area_struct *uffd_lock_vma(struct mm_struct *mm, unsigned long address) { struct vm_area_struct *vma; @@ -74,9 +73,8 @@ static struct vm_area_struct *lock_vma(struct mm_struct *mm, vma = lock_vma_under_rcu(mm, address); if (vma) { /* - * lock_vma_under_rcu() only checks anon_vma for private - * anonymous mappings. But we need to ensure it is assigned in - * private file-backed vmas as well. + * We know we're going to need to use anon_vma, so check + * that early. */ if (!(vma->vm_flags & VM_SHARED) && unlikely(!vma->anon_vma)) vma_end_read(vma); @@ -107,7 +105,7 @@ static struct vm_area_struct *uffd_mfill_lock(struct mm_struct *dst_mm, { struct vm_area_struct *dst_vma; - dst_vma = lock_vma(dst_mm, dst_start); + dst_vma = uffd_lock_vma(dst_mm, dst_start); if (IS_ERR(dst_vma) || validate_dst_vma(dst_vma, dst_start + len)) return dst_vma; @@ -180,9 +178,9 @@ int mfill_atomic_install_pte(pmd_t *dst_pmd, pte_t _dst_pte, *dst_pte; bool writable = dst_vma->vm_flags & VM_WRITE; bool vm_shared = dst_vma->vm_flags & VM_SHARED; - bool page_in_cache = page_mapping(page); spinlock_t *ptl; - struct folio *folio; + struct folio *folio = page_folio(page); + bool page_in_cache = folio_mapping(folio); _dst_pte = mk_pte(page, dst_vma->vm_page_prot); _dst_pte = pte_mkdirty(_dst_pte); @@ -212,7 +210,6 @@ int mfill_atomic_install_pte(pmd_t *dst_pmd, if (!pte_none_mostly(ptep_get(dst_pte))) goto out_unlock; - folio = page_folio(page); if (page_in_cache) { /* Usually, cache pages are already added to LRU */ if (newly_allocated) @@ -316,6 +313,38 @@ out_release: goto out; } +static int mfill_atomic_pte_zeroed_folio(pmd_t *dst_pmd, + struct vm_area_struct *dst_vma, + unsigned long dst_addr) +{ + struct folio *folio; + int ret = -ENOMEM; + + folio = vma_alloc_zeroed_movable_folio(dst_vma, dst_addr); + if (!folio) + return ret; + + if (mem_cgroup_charge(folio, dst_vma->vm_mm, GFP_KERNEL)) + goto out_put; + + /* + * The memory barrier inside __folio_mark_uptodate makes sure that + * zeroing out the folio become visible before mapping the page + * using set_pte_at(). See do_anonymous_page(). + */ + __folio_mark_uptodate(folio); + + ret = mfill_atomic_install_pte(dst_pmd, dst_vma, dst_addr, + &folio->page, true, 0); + if (ret) + goto out_put; + + return 0; +out_put: + folio_put(folio); + return ret; +} + static int mfill_atomic_pte_zeropage(pmd_t *dst_pmd, struct vm_area_struct *dst_vma, unsigned long dst_addr) @@ -324,6 +353,9 @@ static int mfill_atomic_pte_zeropage(pmd_t *dst_pmd, spinlock_t *ptl; int ret; + if (mm_forbids_zeropage(dst_vma->vm_mm)) + return mfill_atomic_pte_zeroed_folio(dst_pmd, dst_vma, dst_addr); + _dst_pte = pte_mkspecial(pfn_pte(my_zero_pfn(dst_addr), dst_vma->vm_page_prot)); ret = -EAGAIN; @@ -1026,7 +1058,7 @@ static int move_present_pte(struct mm_struct *mm, } folio_move_anon_rmap(src_folio, dst_vma); - WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr)); + src_folio->index = linear_page_index(dst_vma, dst_addr); orig_dst_pte = mk_pte(&src_folio->page, dst_vma->vm_page_prot); /* Follow mremap() behavior and treat the entry dirty after the move */ @@ -1402,7 +1434,7 @@ static int uffd_move_lock(struct mm_struct *mm, struct vm_area_struct *vma; int err; - vma = lock_vma(mm, dst_start); + vma = uffd_lock_vma(mm, dst_start); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -1417,7 +1449,7 @@ static int uffd_move_lock(struct mm_struct *mm, } /* - * Using lock_vma() to get src_vma can lead to following deadlock: + * Using uffd_lock_vma() to get src_vma can lead to following deadlock: * * Thread1 Thread2 * ------- ------- @@ -1439,12 +1471,13 @@ static int uffd_move_lock(struct mm_struct *mm, err = find_vmas_mm_locked(mm, dst_start, src_start, dst_vmap, src_vmap); if (!err) { /* - * See comment in lock_vma() as to why not using + * See comment in uffd_lock_vma() as to why not using * vma_start_read() here. */ down_read(&(*dst_vmap)->vm_lock->lock); if (*dst_vmap != *src_vmap) - down_read(&(*src_vmap)->vm_lock->lock); + down_read_nested(&(*src_vmap)->vm_lock->lock, + SINGLE_DEPTH_NESTING); } mmap_read_unlock(mm); return err; @@ -1661,9 +1694,9 @@ ssize_t move_pages(struct userfaultfd_ctx *ctx, unsigned long dst_start, /* Check if we can move the pmd without splitting it. */ if (move_splits_huge_pmd(dst_addr, src_addr, src_start + len) || !pmd_none(dst_pmdval)) { - struct folio *folio = pfn_folio(pmd_pfn(*src_pmd)); + struct folio *folio = pmd_folio(*src_pmd); - if (!folio || (!is_huge_zero_page(&folio->page) && + if (!folio || (!is_huge_zero_folio(folio) && !PageAnonExclusive(&folio->page))) { spin_unlock(ptl); err = -EBUSY; |