summaryrefslogtreecommitdiff
path: root/mm/gup.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/gup.c')
-rw-r--r--mm/gup.c131
1 files changed, 114 insertions, 17 deletions
diff --git a/mm/gup.c b/mm/gup.c
index 3ded6a5f26b2..728d996767cb 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -44,6 +44,23 @@ static void hpage_pincount_sub(struct page *page, int refs)
atomic_sub(refs, compound_pincount_ptr(page));
}
+/* Equivalent to calling put_page() @refs times. */
+static void put_page_refs(struct page *page, int refs)
+{
+#ifdef CONFIG_DEBUG_VM
+ if (VM_WARN_ON_ONCE_PAGE(page_ref_count(page) < refs, page))
+ return;
+#endif
+
+ /*
+ * Calling put_page() for each ref is unnecessarily slow. Only the last
+ * ref needs a put_page().
+ */
+ if (refs > 1)
+ page_ref_sub(page, refs - 1);
+ put_page(page);
+}
+
/*
* Return the compound head page with ref appropriately incremented,
* or NULL if that failed.
@@ -56,6 +73,21 @@ static inline struct page *try_get_compound_head(struct page *page, int refs)
return NULL;
if (unlikely(!page_cache_add_speculative(head, refs)))
return NULL;
+
+ /*
+ * At this point we have a stable reference to the head page; but it
+ * could be that between the compound_head() lookup and the refcount
+ * increment, the compound page was split, in which case we'd end up
+ * holding a reference on a page that has nothing to do with the page
+ * we were given anymore.
+ * So now that the head page is stable, recheck that the pages still
+ * belong together.
+ */
+ if (unlikely(compound_head(page) != head)) {
+ put_page_refs(head, refs);
+ return NULL;
+ }
+
return head;
}
@@ -96,6 +128,14 @@ __maybe_unused struct page *try_grab_compound_head(struct page *page,
return NULL;
/*
+ * CAUTION: Don't use compound_head() on the page before this
+ * point, the result won't be stable.
+ */
+ page = try_get_compound_head(page, refs);
+ if (!page)
+ return NULL;
+
+ /*
* When pinning a compound page of order > 1 (which is what
* hpage_pincount_available() checks for), use an exact count to
* track it, via hpage_pincount_add/_sub().
@@ -103,15 +143,10 @@ __maybe_unused struct page *try_grab_compound_head(struct page *page,
* However, be sure to *also* increment the normal page refcount
* field at least once, so that the page really is pinned.
*/
- if (!hpage_pincount_available(page))
- refs *= GUP_PIN_COUNTING_BIAS;
-
- page = try_get_compound_head(page, refs);
- if (!page)
- return NULL;
-
if (hpage_pincount_available(page))
hpage_pincount_add(page, refs);
+ else
+ page_ref_add(page, refs * (GUP_PIN_COUNTING_BIAS - 1));
mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_ACQUIRED,
orig_refs);
@@ -135,14 +170,7 @@ static void put_compound_head(struct page *page, int refs, unsigned int flags)
refs *= GUP_PIN_COUNTING_BIAS;
}
- VM_BUG_ON_PAGE(page_ref_count(page) < refs, page);
- /*
- * Calling put_page() for each ref is unnecessarily slow. Only the last
- * ref needs a put_page().
- */
- if (refs > 1)
- page_ref_sub(page, refs - 1);
- put_page(page);
+ put_page_refs(page, refs);
}
/**
@@ -392,6 +420,17 @@ void unpin_user_pages(struct page **pages, unsigned long npages)
}
EXPORT_SYMBOL(unpin_user_pages);
+/*
+ * Set the MMF_HAS_PINNED if not set yet; after set it'll be there for the mm's
+ * lifecycle. Avoid setting the bit unless necessary, or it might cause write
+ * cache bouncing on large SMP machines for concurrent pinned gups.
+ */
+static inline void mm_set_has_pinned_flag(unsigned long *mm_flags)
+{
+ if (!test_bit(MMF_HAS_PINNED, mm_flags))
+ set_bit(MMF_HAS_PINNED, mm_flags);
+}
+
#ifdef CONFIG_MMU
static struct page *no_page_table(struct vm_area_struct *vma,
unsigned int flags)
@@ -1293,7 +1332,7 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
}
if (flags & FOLL_PIN)
- atomic_set(&mm->has_pinned, 1);
+ mm_set_has_pinned_flag(&mm->flags);
/*
* FOLL_PIN and FOLL_GET are mutually exclusive. Traditional behavior
@@ -1462,6 +1501,64 @@ long populate_vma_page_range(struct vm_area_struct *vma,
}
/*
+ * faultin_vma_page_range() - populate (prefault) page tables inside the
+ * given VMA range readable/writable
+ *
+ * This takes care of mlocking the pages, too, if VM_LOCKED is set.
+ *
+ * @vma: target vma
+ * @start: start address
+ * @end: end address
+ * @write: whether to prefault readable or writable
+ * @locked: whether the mmap_lock is still held
+ *
+ * Returns either number of processed pages in the vma, or a negative error
+ * code on error (see __get_user_pages()).
+ *
+ * vma->vm_mm->mmap_lock must be held. The range must be page-aligned and
+ * covered by the VMA.
+ *
+ * If @locked is NULL, it may be held for read or write and will be unperturbed.
+ *
+ * If @locked is non-NULL, it must held for read only and may be released. If
+ * it's released, *@locked will be set to 0.
+ */
+long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end, bool write, int *locked)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ unsigned long nr_pages = (end - start) / PAGE_SIZE;
+ int gup_flags;
+
+ VM_BUG_ON(!PAGE_ALIGNED(start));
+ VM_BUG_ON(!PAGE_ALIGNED(end));
+ VM_BUG_ON_VMA(start < vma->vm_start, vma);
+ VM_BUG_ON_VMA(end > vma->vm_end, vma);
+ mmap_assert_locked(mm);
+
+ /*
+ * FOLL_TOUCH: Mark page accessed and thereby young; will also mark
+ * the page dirty with FOLL_WRITE -- which doesn't make a
+ * difference with !FOLL_FORCE, because the page is writable
+ * in the page table.
+ * FOLL_HWPOISON: Return -EHWPOISON instead of -EFAULT when we hit
+ * a poisoned page.
+ * FOLL_POPULATE: Always populate memory with VM_LOCKONFAULT.
+ * !FOLL_FORCE: Require proper access permissions.
+ */
+ gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK | FOLL_HWPOISON;
+ if (write)
+ gup_flags |= FOLL_WRITE;
+
+ /*
+ * See check_vma_flags(): Will return -EFAULT on incompatible mappings
+ * or with insufficient permissions.
+ */
+ return __get_user_pages(mm, start, nr_pages, gup_flags,
+ NULL, NULL, locked);
+}
+
+/*
* __mm_populate - populate and/or mlock pages within a range of address space.
*
* This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap
@@ -2614,7 +2711,7 @@ static int internal_get_user_pages_fast(unsigned long start,
return -EINVAL;
if (gup_flags & FOLL_PIN)
- atomic_set(&current->mm->has_pinned, 1);
+ mm_set_has_pinned_flag(&current->mm->flags);
if (!(gup_flags & FOLL_FAST_ONLY))
might_lock_read(&current->mm->mmap_lock);