summaryrefslogtreecommitdiff
path: root/mm/shmem.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/shmem.c')
-rw-r--r--mm/shmem.c185
1 files changed, 74 insertions, 111 deletions
diff --git a/mm/shmem.c b/mm/shmem.c
index a08cedefbfaa..70d9ce294bb4 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1695,8 +1695,9 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
{
struct address_space *mapping = inode->i_mapping;
struct shmem_inode_info *info = SHMEM_I(inode);
- struct mm_struct *charge_mm = vma ? vma->vm_mm : current->mm;
- struct page *page;
+ struct mm_struct *charge_mm = vma ? vma->vm_mm : NULL;
+ struct swap_info_struct *si;
+ struct page *page = NULL;
swp_entry_t swap;
int error;
@@ -1704,6 +1705,12 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
swap = radix_to_swp_entry(*pagep);
*pagep = NULL;
+ /* Prevent swapoff from happening to us. */
+ si = get_swap_device(swap);
+ if (!si) {
+ error = EINVAL;
+ goto failed;
+ }
/* Look it up and read it in.. */
page = lookup_swap_cache(swap, NULL, 0);
if (!page) {
@@ -1765,6 +1772,8 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
swap_free(swap);
*pagep = page;
+ if (si)
+ put_swap_device(si);
return 0;
failed:
if (!shmem_confirm_swap(mapping, index, swap))
@@ -1775,6 +1784,9 @@ unlock:
put_page(page);
}
+ if (si)
+ put_swap_device(si);
+
return error;
}
@@ -1785,7 +1797,7 @@ unlock:
* vm. If we swap it in we mark it dirty since we also free the swap
* entry since a page cannot live in both the swap and page cache.
*
- * vmf and fault_type are only supplied by shmem_fault:
+ * vma, vmf, and fault_type are only supplied by shmem_fault:
* otherwise they are NULL.
*/
static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
@@ -1816,10 +1828,20 @@ repeat:
}
sbinfo = SHMEM_SB(inode->i_sb);
- charge_mm = vma ? vma->vm_mm : current->mm;
+ charge_mm = vma ? vma->vm_mm : NULL;
page = pagecache_get_page(mapping, index,
FGP_ENTRY | FGP_HEAD | FGP_LOCK, 0);
+
+ if (page && vma && userfaultfd_minor(vma)) {
+ if (!xa_is_value(page)) {
+ unlock_page(page);
+ put_page(page);
+ }
+ *fault_type = handle_userfault(vmf, VM_UFFD_MINOR);
+ return 0;
+ }
+
if (xa_is_value(page)) {
error = shmem_swapin_page(inode, index, &page,
sgp, gfp, vma, fault_type);
@@ -2227,7 +2249,7 @@ static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
}
#endif
-int shmem_lock(struct file *file, int lock, struct user_struct *user)
+int shmem_lock(struct file *file, int lock, struct ucounts *ucounts)
{
struct inode *inode = file_inode(file);
struct shmem_inode_info *info = SHMEM_I(inode);
@@ -2239,13 +2261,13 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
* no serialization needed when called from shm_destroy().
*/
if (lock && !(info->flags & VM_LOCKED)) {
- if (!user_shm_lock(inode->i_size, user))
+ if (!user_shm_lock(inode->i_size, ucounts))
goto out_nomem;
info->flags |= VM_LOCKED;
mapping_set_unevictable(file->f_mapping);
}
- if (!lock && (info->flags & VM_LOCKED) && user) {
- user_shm_unlock(inode->i_size, user);
+ if (!lock && (info->flags & VM_LOCKED) && ucounts) {
+ user_shm_unlock(inode->i_size, ucounts);
info->flags &= ~VM_LOCKED;
mapping_clear_unevictable(file->f_mapping);
}
@@ -2258,25 +2280,11 @@ out_nomem:
static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
{
struct shmem_inode_info *info = SHMEM_I(file_inode(file));
+ int ret;
- if (info->seals & F_SEAL_FUTURE_WRITE) {
- /*
- * New PROT_WRITE and MAP_SHARED mmaps are not allowed when
- * "future write" seal active.
- */
- if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE))
- return -EPERM;
-
- /*
- * Since an F_SEAL_FUTURE_WRITE sealed memfd can be mapped as
- * MAP_SHARED and read-only, take care to not allow mprotect to
- * revert protections on such mappings. Do this only for shared
- * mappings. For private mappings, don't need to mask
- * VM_MAYWRITE as we still want them to be COW-writable.
- */
- if (vma->vm_flags & VM_SHARED)
- vma->vm_flags &= ~(VM_MAYWRITE);
- }
+ ret = seal_check_future_write(info->seals, vma);
+ if (ret)
+ return ret;
/* arm64 - allow memory tagging on RAM-based files */
vma->vm_flags |= VM_MTE_ALLOWED;
@@ -2354,36 +2362,45 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
return inode;
}
-static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
- pmd_t *dst_pmd,
- struct vm_area_struct *dst_vma,
- unsigned long dst_addr,
- unsigned long src_addr,
- bool zeropage,
- struct page **pagep)
+#ifdef CONFIG_USERFAULTFD
+int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
+ pmd_t *dst_pmd,
+ struct vm_area_struct *dst_vma,
+ unsigned long dst_addr,
+ unsigned long src_addr,
+ bool zeropage,
+ struct page **pagep)
{
struct inode *inode = file_inode(dst_vma->vm_file);
struct shmem_inode_info *info = SHMEM_I(inode);
struct address_space *mapping = inode->i_mapping;
gfp_t gfp = mapping_gfp_mask(mapping);
pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
- spinlock_t *ptl;
void *page_kaddr;
struct page *page;
- pte_t _dst_pte, *dst_pte;
int ret;
- pgoff_t offset, max_off;
+ pgoff_t max_off;
- ret = -ENOMEM;
- if (!shmem_inode_acct_block(inode, 1))
- goto out;
+ if (!shmem_inode_acct_block(inode, 1)) {
+ /*
+ * We may have got a page, returned -ENOENT triggering a retry,
+ * and now we find ourselves with -ENOMEM. Release the page, to
+ * avoid a BUG_ON in our caller.
+ */
+ if (unlikely(*pagep)) {
+ put_page(*pagep);
+ *pagep = NULL;
+ }
+ return -ENOMEM;
+ }
if (!*pagep) {
+ ret = -ENOMEM;
page = shmem_alloc_page(gfp, info, pgoff);
if (!page)
goto out_unacct_blocks;
- if (!zeropage) { /* mcopy_atomic */
+ if (!zeropage) { /* COPY */
page_kaddr = kmap_atomic(page);
ret = copy_from_user(page_kaddr,
(const void __user *)src_addr,
@@ -2393,11 +2410,11 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
/* fallback to copy_from_user outside mmap_lock */
if (unlikely(ret)) {
*pagep = page;
- shmem_inode_unacct_blocks(inode, 1);
+ ret = -ENOENT;
/* don't free the page */
- return -ENOENT;
+ goto out_unacct_blocks;
}
- } else { /* mfill_zeropage_atomic */
+ } else { /* ZEROPAGE */
clear_highpage(page);
}
} else {
@@ -2405,15 +2422,15 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
*pagep = NULL;
}
- VM_BUG_ON(PageLocked(page) || PageSwapBacked(page));
+ VM_BUG_ON(PageLocked(page));
+ VM_BUG_ON(PageSwapBacked(page));
__SetPageLocked(page);
__SetPageSwapBacked(page);
__SetPageUptodate(page);
ret = -EFAULT;
- offset = linear_page_index(dst_vma, dst_addr);
max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
- if (unlikely(offset >= max_off))
+ if (unlikely(pgoff >= max_off))
goto out_release;
ret = shmem_add_to_page_cache(page, mapping, pgoff, NULL,
@@ -2421,32 +2438,10 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
if (ret)
goto out_release;
- _dst_pte = mk_pte(page, dst_vma->vm_page_prot);
- if (dst_vma->vm_flags & VM_WRITE)
- _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
- else {
- /*
- * We don't set the pte dirty if the vma has no
- * VM_WRITE permission, so mark the page dirty or it
- * could be freed from under us. We could do it
- * unconditionally before unlock_page(), but doing it
- * only if VM_WRITE is not set is faster.
- */
- set_page_dirty(page);
- }
-
- dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
-
- ret = -EFAULT;
- max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
- if (unlikely(offset >= max_off))
- goto out_release_unlock;
-
- ret = -EEXIST;
- if (!pte_none(*dst_pte))
- goto out_release_unlock;
-
- lru_cache_add(page);
+ ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
+ page, true, false);
+ if (ret)
+ goto out_delete_from_cache;
spin_lock_irq(&info->lock);
info->alloced++;
@@ -2454,50 +2449,19 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
shmem_recalc_inode(inode);
spin_unlock_irq(&info->lock);
- inc_mm_counter(dst_mm, mm_counter_file(page));
- page_add_file_rmap(page, false);
- set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
-
- /* No need to invalidate - it was non-present before */
- update_mmu_cache(dst_vma, dst_addr, dst_pte);
- pte_unmap_unlock(dst_pte, ptl);
+ SetPageDirty(page);
unlock_page(page);
- ret = 0;
-out:
- return ret;
-out_release_unlock:
- pte_unmap_unlock(dst_pte, ptl);
- ClearPageDirty(page);
+ return 0;
+out_delete_from_cache:
delete_from_page_cache(page);
out_release:
unlock_page(page);
put_page(page);
out_unacct_blocks:
shmem_inode_unacct_blocks(inode, 1);
- goto out;
-}
-
-int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm,
- pmd_t *dst_pmd,
- struct vm_area_struct *dst_vma,
- unsigned long dst_addr,
- unsigned long src_addr,
- struct page **pagep)
-{
- return shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma,
- dst_addr, src_addr, false, pagep);
-}
-
-int shmem_mfill_zeropage_pte(struct mm_struct *dst_mm,
- pmd_t *dst_pmd,
- struct vm_area_struct *dst_vma,
- unsigned long dst_addr)
-{
- struct page *page = NULL;
-
- return shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma,
- dst_addr, 0, true, &page);
+ return ret;
}
+#endif /* CONFIG_USERFAULTFD */
#ifdef CONFIG_TMPFS
static const struct inode_operations shmem_symlink_inode_operations;
@@ -4032,8 +3996,7 @@ bool shmem_huge_enabled(struct vm_area_struct *vma)
loff_t i_size;
pgoff_t off;
- if ((vma->vm_flags & VM_NOHUGEPAGE) ||
- test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
+ if (!transhuge_vma_enabled(vma, vma->vm_flags))
return false;
if (shmem_huge == SHMEM_HUGE_FORCE)
return true;
@@ -4096,7 +4059,7 @@ int shmem_unuse(unsigned int type, bool frontswap,
return 0;
}
-int shmem_lock(struct file *file, int lock, struct user_struct *user)
+int shmem_lock(struct file *file, int lock, struct ucounts *ucounts)
{
return 0;
}