summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorMike Kravetz <mike.kravetz@oracle.com>2022-09-15 01:18:10 +0300
committerAndrew Morton <akpm@linux-foundation.org>2022-10-04 00:03:17 +0300
commitfa27759af4a6d7494c986c44695b13bcd6eaf46b (patch)
treed20a218f389abd1703c69b002c7d9ae3a5830407 /mm
parent40549ba8f8e0ed1f8b235979563f619e9aa34fdf (diff)
downloadlinux-fa27759af4a6d7494c986c44695b13bcd6eaf46b.tar.xz
hugetlb: clean up code checking for fault/truncation races
With the new hugetlb vma lock in place, it can also be used to handle page fault races with file truncation. The lock is taken at the beginning of the code fault path in read mode. During truncation, it is taken in write mode for each vma which has the file mapped. The file's size (i_size) is modified before taking the vma lock to unmap. How are races handled? The page fault code checks i_size early in processing after taking the vma lock. If the fault is beyond i_size, the fault is aborted. If the fault is not beyond i_size the fault will continue and a new page will be added to the file. It could be that truncation code modifies i_size after the check in fault code. That is OK, as truncation code will soon remove the page. The truncation code will wait until the fault is finished, as it must obtain the vma lock in write mode. This patch cleans up/removes late checks in the fault paths that try to back out pages racing with truncation. As noted above, we just let the truncation code remove the pages. [mike.kravetz@oracle.com: fix reserve_alloc set but not used compiler warning] Link: https://lkml.kernel.org/r/Yyj7HsJWfHDoU24U@monkey Link: https://lkml.kernel.org/r/20220914221810.95771-10-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> Cc: Axel Rasmussen <axelrasmussen@google.com> Cc: David Hildenbrand <david@redhat.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: James Houghton <jthoughton@google.com> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> Cc: Miaohe Lin <linmiaohe@huawei.com> Cc: Michal Hocko <mhocko@suse.com> Cc: Mina Almasry <almasrymina@google.com> Cc: Muchun Song <songmuchun@bytedance.com> Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev> Cc: Pasha Tatashin <pasha.tatashin@soleen.com> Cc: Peter Xu <peterx@redhat.com> Cc: Prakash Sangappa <prakash.sangappa@oracle.com> Cc: Sven Schnelle <svens@linux.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/hugetlb.c24
1 files changed, 3 insertions, 21 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index d78504959df7..b0e39045a7a8 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5680,10 +5680,6 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
}
ptl = huge_pte_lock(h, mm, ptep);
- size = i_size_read(mapping->host) >> huge_page_shift(h);
- if (idx >= size)
- goto backout;
-
ret = 0;
/* If pte changed from under us, retry */
if (!pte_same(huge_ptep_get(ptep), old_pte))
@@ -5727,10 +5723,10 @@ out:
backout:
spin_unlock(ptl);
backout_unlocked:
- unlock_page(page);
- /* restore reserve for newly allocated pages not in page cache */
if (new_page && !new_pagecache_page)
restore_reserve_on_error(h, vma, haddr, page);
+
+ unlock_page(page);
put_page(page);
goto out;
}
@@ -6063,25 +6059,11 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
ptl = huge_pte_lock(h, dst_mm, dst_pte);
/*
- * Recheck the i_size after holding PT lock to make sure not
- * to leave any page mapped (as page_mapped()) beyond the end
- * of the i_size (remove_inode_hugepages() is strict about
- * enforcing that). If we bail out here, we'll also leave a
- * page in the radix tree in the vm_shared case beyond the end
- * of the i_size, but remove_inode_hugepages() will take care
- * of it as soon as we drop the hugetlb_fault_mutex_table.
- */
- size = i_size_read(mapping->host) >> huge_page_shift(h);
- ret = -EFAULT;
- if (idx >= size)
- goto out_release_unlock;
-
- ret = -EEXIST;
- /*
* We allow to overwrite a pte marker: consider when both MISSING|WP
* registered, we firstly wr-protect a none pte which has no page cache
* page backing it, then access the page.
*/
+ ret = -EEXIST;
if (!huge_pte_none_mostly(huge_ptep_get(dst_pte)))
goto out_release_unlock;