summaryrefslogtreecommitdiff
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c93
1 files changed, 63 insertions, 30 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 30991f83d0bf..81dca0083fcd 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -65,6 +65,7 @@
#include <linux/userfaultfd_k.h>
#include <asm/io.h>
+#include <asm/mmu_context.h>
#include <asm/pgalloc.h>
#include <asm/uaccess.h>
#include <asm/tlb.h>
@@ -562,8 +563,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
}
}
-int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
- pmd_t *pmd, unsigned long address)
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
{
spinlock_t *ptl;
pgtable_t new = pte_alloc_one(mm, address);
@@ -661,9 +661,8 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
return;
}
if (nr_unshown) {
- printk(KERN_ALERT
- "BUG: Bad page map: %lu messages suppressed\n",
- nr_unshown);
+ pr_alert("BUG: Bad page map: %lu messages suppressed\n",
+ nr_unshown);
nr_unshown = 0;
}
nr_shown = 0;
@@ -674,15 +673,13 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
mapping = vma->vm_file ? vma->vm_file->f_mapping : NULL;
index = linear_page_index(vma, addr);
- printk(KERN_ALERT
- "BUG: Bad page map in process %s pte:%08llx pmd:%08llx\n",
- current->comm,
- (long long)pte_val(pte), (long long)pmd_val(*pmd));
+ pr_alert("BUG: Bad page map in process %s pte:%08llx pmd:%08llx\n",
+ current->comm,
+ (long long)pte_val(pte), (long long)pmd_val(*pmd));
if (page)
dump_page(page, "bad pte");
- printk(KERN_ALERT
- "addr:%p vm_flags:%08lx anon_vma:%p mapping:%p index:%lx\n",
- (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index);
+ pr_alert("addr:%p vm_flags:%08lx anon_vma:%p mapping:%p index:%lx\n",
+ (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index);
/*
* Choose text because data symbols depend on CONFIG_KALLSYMS_ALL=y
*/
@@ -1551,8 +1548,29 @@ out:
int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
unsigned long pfn)
{
+ return vm_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot);
+}
+EXPORT_SYMBOL(vm_insert_pfn);
+
+/**
+ * vm_insert_pfn_prot - insert single pfn into user vma with specified pgprot
+ * @vma: user vma to map to
+ * @addr: target user address of this page
+ * @pfn: source kernel pfn
+ * @pgprot: pgprot flags for the inserted page
+ *
+ * This is exactly like vm_insert_pfn, except that it allows drivers to
+ * to override pgprot on a per-page basis.
+ *
+ * This only makes sense for IO mappings, and it makes no sense for
+ * cow mappings. In general, using multiple vmas is preferable;
+ * vm_insert_pfn_prot should only be used if using multiple VMAs is
+ * impractical.
+ */
+int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
+ unsigned long pfn, pgprot_t pgprot)
+{
int ret;
- pgprot_t pgprot = vma->vm_page_prot;
/*
* Technically, architectures with pte_special can avoid all these
* restrictions (same for remap_pfn_range). However we would like
@@ -1574,7 +1592,7 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
return ret;
}
-EXPORT_SYMBOL(vm_insert_pfn);
+EXPORT_SYMBOL(vm_insert_pfn_prot);
int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
pfn_t pfn)
@@ -1591,10 +1609,15 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
* than insert_pfn). If a zero_pfn were inserted into a VM_MIXEDMAP
* without pte special, it would there be refcounted as a normal page.
*/
- if (!HAVE_PTE_SPECIAL && pfn_t_valid(pfn)) {
+ if (!HAVE_PTE_SPECIAL && !pfn_t_devmap(pfn) && pfn_t_valid(pfn)) {
struct page *page;
- page = pfn_t_to_page(pfn);
+ /*
+ * At this point we are committed to insert_page()
+ * regardless of whether the caller specified flags that
+ * result in pfn_t_has_page() == false.
+ */
+ page = pfn_to_page(pfn_t_to_pfn(pfn));
return insert_page(vma, addr, page, vma->vm_page_prot);
}
return insert_pfn(vma, addr, pfn, vma->vm_page_prot);
@@ -1871,7 +1894,9 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
unsigned long end = addr + size;
int err;
- BUG_ON(addr >= end);
+ if (WARN_ON(addr >= end))
+ return -EINVAL;
+
pgd = pgd_offset(mm, addr);
do {
next = pgd_addr_end(addr, end);
@@ -2232,11 +2257,6 @@ static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma,
page_cache_get(old_page);
- /*
- * Only catch write-faults on shared writable pages,
- * read-only shared pages can get COWed by
- * get_user_pages(.write=1, .force=1).
- */
if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
int tmp;
@@ -3122,8 +3142,7 @@ static int do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
unsigned int flags, pte_t orig_pte)
{
- pgoff_t pgoff = (((address & PAGE_MASK)
- - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+ pgoff_t pgoff = linear_page_index(vma, address);
pte_unmap(page_table);
/* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */
@@ -3357,6 +3376,11 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
pmd_t *pmd;
pte_t *pte;
+ if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
+ flags & FAULT_FLAG_INSTRUCTION,
+ flags & FAULT_FLAG_REMOTE))
+ return VM_FAULT_SIGSEGV;
+
if (unlikely(is_vm_hugetlb_page(vma)))
return hugetlb_fault(mm, vma, address, flags);
@@ -3397,15 +3421,24 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
}
/*
- * Use __pte_alloc instead of pte_alloc_map, because we can't
+ * Use pte_alloc() instead of pte_alloc_map, because we can't
* run pte_offset_map on the pmd, if an huge pmd could
* materialize from under us from a different thread.
*/
- if (unlikely(pmd_none(*pmd)) &&
- unlikely(__pte_alloc(mm, vma, pmd, address)))
+ if (unlikely(pte_alloc(mm, pmd, address)))
return VM_FAULT_OOM;
- /* if an huge pmd materialized from under us just retry later */
- if (unlikely(pmd_trans_huge(*pmd) || pmd_devmap(*pmd)))
+ /*
+ * If a huge pmd materialized under us just retry later. Use
+ * pmd_trans_unstable() instead of pmd_trans_huge() to ensure the pmd
+ * didn't become pmd_trans_huge under us and then back to pmd_none, as
+ * a result of MADV_DONTNEED running immediately after a huge pmd fault
+ * in a different thread of this mm, in turn leading to a misleading
+ * pmd_trans_huge() retval. All we have to ensure is that it is a
+ * regular pmd that we can walk with pte_offset_map() and we can do that
+ * through an atomic read in C, which is what pmd_trans_unstable()
+ * provides.
+ */
+ if (unlikely(pmd_trans_unstable(pmd) || pmd_devmap(*pmd)))
return 0;
/*
* A regular pmd is established and it can't morph into a huge pmd
@@ -3664,7 +3697,7 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
void *maddr;
struct page *page = NULL;
- ret = get_user_pages(tsk, mm, addr, 1,
+ ret = get_user_pages_remote(tsk, mm, addr, 1,
write, 1, &page, &vma);
if (ret <= 0) {
#ifndef CONFIG_HAVE_IOREMAP_PROT