summaryrefslogtreecommitdiff
path: root/mm/hugetlb.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r--mm/hugetlb.c207
1 files changed, 190 insertions, 17 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 482f7f357f75..f44b79998ac2 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -91,6 +91,8 @@ struct mutex *hugetlb_fault_mutex_table ____cacheline_aligned_in_smp;
/* Forward declaration */
static int hugetlb_acct_memory(struct hstate *h, long delta);
+static void hugetlb_vma_lock_free(struct vm_area_struct *vma);
+static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma);
static inline bool subpool_is_free(struct hugepage_subpool *spool)
{
@@ -859,7 +861,7 @@ __weak unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
* faults in a MAP_PRIVATE mapping. Only the process that called mmap()
* is guaranteed to have their future faults succeed.
*
- * With the exception of reset_vma_resv_huge_pages() which is called at fork(),
+ * With the exception of hugetlb_dup_vma_private() which is called at fork(),
* the reserve counters are updated with the hugetlb_lock held. It is safe
* to reset the VMA at fork() time as it is not in use yet and there is no
* chance of the global counters getting corrupted as a result of the values.
@@ -1006,12 +1008,20 @@ static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag)
return (get_vma_private_data(vma) & flag) != 0;
}
-/* Reset counters to 0 and clear all HPAGE_RESV_* flags */
-void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
+void hugetlb_dup_vma_private(struct vm_area_struct *vma)
{
VM_BUG_ON_VMA(!is_vm_hugetlb_page(vma), vma);
+ /*
+ * Clear vm_private_data
+ * - For MAP_PRIVATE mappings, this is the reserve map which does
+ * not apply to children. Faults generated by the children are
+ * not guaranteed to succeed, even if read-only.
+ * - For shared mappings this is a per-vma semaphore that may be
+ * allocated in a subsequent call to hugetlb_vm_op_open.
+ */
+ vma->vm_private_data = (void *)0;
if (!(vma->vm_flags & VM_MAYSHARE))
- vma->vm_private_data = (void *)0;
+ return;
}
/*
@@ -1042,7 +1052,7 @@ void clear_vma_resv_huge_pages(struct vm_area_struct *vma)
kref_put(&reservations->refs, resv_map_release);
}
- reset_vma_resv_huge_pages(vma);
+ hugetlb_dup_vma_private(vma);
}
/* Returns true if the VMA has associated reserve pages */
@@ -4623,16 +4633,21 @@ static void hugetlb_vm_op_open(struct vm_area_struct *vma)
resv_map_dup_hugetlb_cgroup_uncharge_info(resv);
kref_get(&resv->refs);
}
+
+ hugetlb_vma_lock_alloc(vma);
}
static void hugetlb_vm_op_close(struct vm_area_struct *vma)
{
struct hstate *h = hstate_vma(vma);
- struct resv_map *resv = vma_resv_map(vma);
+ struct resv_map *resv;
struct hugepage_subpool *spool = subpool_vma(vma);
unsigned long reserve, start, end;
long gbl_reserve;
+ hugetlb_vma_lock_free(vma);
+
+ resv = vma_resv_map(vma);
if (!resv || !is_vma_resv_set(vma, HPAGE_RESV_OWNER))
return;
@@ -6440,6 +6455,11 @@ bool hugetlb_reserve_pages(struct inode *inode,
}
/*
+ * vma specific semaphore used for pmd sharing synchronization
+ */
+ hugetlb_vma_lock_alloc(vma);
+
+ /*
* Only apply hugepage reservation if asked. At fault time, an
* attempt will be made for VM_NORESERVE to allocate a page
* without using reserves
@@ -6462,12 +6482,11 @@ bool hugetlb_reserve_pages(struct inode *inode,
resv_map = inode_resv_map(inode);
chg = region_chg(resv_map, from, to, &regions_needed);
-
} else {
/* Private mapping. */
resv_map = resv_map_alloc();
if (!resv_map)
- return false;
+ goto out_err;
chg = to - from;
@@ -6562,6 +6581,7 @@ out_uncharge_cgroup:
hugetlb_cgroup_uncharge_cgroup_rsvd(hstate_index(h),
chg * pages_per_huge_page(h), h_cg);
out_err:
+ hugetlb_vma_lock_free(vma);
if (!vma || vma->vm_flags & VM_MAYSHARE)
/* Only call region_abort if the region_chg succeeded but the
* region_add failed or didn't run.
@@ -6641,14 +6661,34 @@ static unsigned long page_table_shareable(struct vm_area_struct *svma,
}
static bool __vma_aligned_range_pmd_shareable(struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
+ unsigned long start, unsigned long end,
+ bool check_vma_lock)
{
+#ifdef CONFIG_USERFAULTFD
+ if (uffd_disable_huge_pmd_share(vma))
+ return false;
+#endif
/*
* check on proper vm_flags and page table alignment
*/
- if (vma->vm_flags & VM_MAYSHARE && range_in_vma(vma, start, end))
- return true;
- return false;
+ if (!(vma->vm_flags & VM_MAYSHARE))
+ return false;
+ if (check_vma_lock && !vma->vm_private_data)
+ return false;
+ if (!range_in_vma(vma, start, end))
+ return false;
+ return true;
+}
+
+static bool vma_pmd_shareable(struct vm_area_struct *vma)
+{
+ unsigned long start = ALIGN(vma->vm_start, PUD_SIZE),
+ end = ALIGN_DOWN(vma->vm_end, PUD_SIZE);
+
+ if (start >= end)
+ return false;
+
+ return __vma_aligned_range_pmd_shareable(vma, start, end, false);
}
static bool vma_addr_pmd_shareable(struct vm_area_struct *vma,
@@ -6657,15 +6697,11 @@ static bool vma_addr_pmd_shareable(struct vm_area_struct *vma,
unsigned long start = addr & PUD_MASK;
unsigned long end = start + PUD_SIZE;
- return __vma_aligned_range_pmd_shareable(vma, start, end);
+ return __vma_aligned_range_pmd_shareable(vma, start, end, true);
}
bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr)
{
-#ifdef CONFIG_USERFAULTFD
- if (uffd_disable_huge_pmd_share(vma))
- return false;
-#endif
return vma_addr_pmd_shareable(vma, addr);
}
@@ -6696,6 +6732,130 @@ void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
*end = ALIGN(*end, PUD_SIZE);
}
+static bool __vma_shareable_flags_pmd(struct vm_area_struct *vma)
+{
+ return vma->vm_flags & (VM_MAYSHARE | VM_SHARED) &&
+ vma->vm_private_data;
+}
+
+void hugetlb_vma_lock_read(struct vm_area_struct *vma)
+{
+ if (__vma_shareable_flags_pmd(vma)) {
+ struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
+
+ down_read(&vma_lock->rw_sema);
+ }
+}
+
+void hugetlb_vma_unlock_read(struct vm_area_struct *vma)
+{
+ if (__vma_shareable_flags_pmd(vma)) {
+ struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
+
+ up_read(&vma_lock->rw_sema);
+ }
+}
+
+void hugetlb_vma_lock_write(struct vm_area_struct *vma)
+{
+ if (__vma_shareable_flags_pmd(vma)) {
+ struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
+
+ down_write(&vma_lock->rw_sema);
+ }
+}
+
+void hugetlb_vma_unlock_write(struct vm_area_struct *vma)
+{
+ if (__vma_shareable_flags_pmd(vma)) {
+ struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
+
+ up_write(&vma_lock->rw_sema);
+ }
+}
+
+int hugetlb_vma_trylock_write(struct vm_area_struct *vma)
+{
+ struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
+
+ if (!__vma_shareable_flags_pmd(vma))
+ return 1;
+
+ return down_write_trylock(&vma_lock->rw_sema);
+}
+
+void hugetlb_vma_assert_locked(struct vm_area_struct *vma)
+{
+ if (__vma_shareable_flags_pmd(vma)) {
+ struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
+
+ lockdep_assert_held(&vma_lock->rw_sema);
+ }
+}
+
+void hugetlb_vma_lock_release(struct kref *kref)
+{
+ struct hugetlb_vma_lock *vma_lock = container_of(kref,
+ struct hugetlb_vma_lock, refs);
+
+ kfree(vma_lock);
+}
+
+static void hugetlb_vma_lock_free(struct vm_area_struct *vma)
+{
+ /*
+ * Only present in sharable vmas. See comment in
+ * __unmap_hugepage_range_final about how VM_SHARED could
+ * be set without VM_MAYSHARE. As a result, we need to
+ * check if either is set in the free path.
+ */
+ if (!vma || !(vma->vm_flags & (VM_MAYSHARE | VM_SHARED)))
+ return;
+
+ if (vma->vm_private_data) {
+ struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
+
+ /*
+ * vma_lock structure may or not be released, but it
+ * certainly will no longer be attached to vma so clear
+ * pointer.
+ */
+ vma_lock->vma = NULL;
+ kref_put(&vma_lock->refs, hugetlb_vma_lock_release);
+ vma->vm_private_data = NULL;
+ }
+}
+
+static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma)
+{
+ struct hugetlb_vma_lock *vma_lock;
+
+ /* Only establish in (flags) sharable vmas */
+ if (!vma || !(vma->vm_flags & VM_MAYSHARE))
+ return;
+
+ /* Should never get here with non-NULL vm_private_data */
+ if (vma->vm_private_data)
+ return;
+
+ /* Check size/alignment for pmd sharing possible */
+ if (!vma_pmd_shareable(vma))
+ return;
+
+ vma_lock = kmalloc(sizeof(*vma_lock), GFP_KERNEL);
+ if (!vma_lock)
+ /*
+ * If we can not allocate structure, then vma can not
+ * participate in pmd sharing.
+ */
+ return;
+
+ kref_init(&vma_lock->refs);
+ init_rwsem(&vma_lock->rw_sema);
+ vma_lock->vma = vma;
+ vma->vm_private_data = vma_lock;
+}
+
/*
* Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
* and returns the corresponding pte. While this is not necessary for the
@@ -6782,6 +6942,19 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
}
#else /* !CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
+
+void hugetlb_vma_lock_release(struct kref *kref)
+{
+}
+
+static void hugetlb_vma_lock_free(struct vm_area_struct *vma)
+{
+}
+
+static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma)
+{
+}
+
pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, pud_t *pud)
{