diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 92 |
1 files changed, 66 insertions, 26 deletions
diff --git a/mm/memory.c b/mm/memory.c index c32318dc11d4..550405fc3b5e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -166,7 +166,7 @@ static int __init init_zero_pfn(void) zero_pfn = page_to_pfn(ZERO_PAGE(0)); return 0; } -core_initcall(init_zero_pfn); +early_initcall(init_zero_pfn); void mm_trace_rss_stat(struct mm_struct *mm, int member, long count) { @@ -809,12 +809,8 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma pte_t *dst_pte, pte_t *src_pte, unsigned long addr, int *rss, struct page **prealloc, pte_t pte, struct page *page) { - struct mm_struct *src_mm = src_vma->vm_mm; struct page *new_page; - if (!is_cow_mapping(src_vma->vm_flags)) - return 1; - /* * What we want to do is to check whether this page may * have been pinned by the parent process. If so, @@ -828,9 +824,7 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma * the page count. That might give false positives for * for pinning, but it will work correctly. */ - if (likely(!atomic_read(&src_mm->has_pinned))) - return 1; - if (likely(!page_maybe_dma_pinned(page))) + if (likely(!page_needs_cow_for_dma(src_vma, page))) return 1; new_page = *prealloc; @@ -2177,11 +2171,11 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long pfn, pgprot_t prot) { - pte_t *pte; + pte_t *pte, *mapped_pte; spinlock_t *ptl; int err = 0; - pte = pte_alloc_map_lock(mm, pmd, addr, &ptl); + mapped_pte = pte = pte_alloc_map_lock(mm, pmd, addr, &ptl); if (!pte) return -ENOMEM; arch_enter_lazy_mmu_mode(); @@ -2195,7 +2189,7 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd, pfn++; } while (pte++, addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); - pte_unmap_unlock(pte - 1, ptl); + pte_unmap_unlock(mapped_pte, ptl); return err; } @@ -2394,18 +2388,18 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, pte_fn_t fn, void *data, bool create, pgtbl_mod_mask *mask) { - pte_t *pte; + pte_t *pte, *mapped_pte; int err = 0; spinlock_t *ptl; if (create) { - pte = (mm == &init_mm) ? + mapped_pte = pte = (mm == &init_mm) ? pte_alloc_kernel_track(pmd, addr, mask) : pte_alloc_map_lock(mm, pmd, addr, &ptl); if (!pte) return -ENOMEM; } else { - pte = (mm == &init_mm) ? + mapped_pte = pte = (mm == &init_mm) ? pte_offset_kernel(pmd, addr) : pte_offset_map_lock(mm, pmd, addr, &ptl); } @@ -2428,7 +2422,7 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, arch_leave_lazy_mmu_mode(); if (mm != &init_mm) - pte_unmap_unlock(pte-1, ptl); + pte_unmap_unlock(mapped_pte, ptl); return err; } @@ -2902,7 +2896,6 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) } flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); entry = mk_pte(new_page, vma->vm_page_prot); - entry = pte_sw_mkyoung(entry); entry = maybe_mkwrite(pte_mkdirty(entry), vma); /* @@ -3104,6 +3097,14 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) return handle_userfault(vmf, VM_UFFD_WP); } + /* + * Userfaultfd write-protect can defer flushes. Ensure the TLB + * is flushed in this case before copying. + */ + if (unlikely(userfaultfd_wp(vmf->vma) && + mm_tlb_flush_pending(vmf->vma->vm_mm))) + flush_tlb_page(vmf->vma, vmf->address); + vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte); if (!vmf->page) { /* @@ -3560,7 +3561,6 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) __SetPageUptodate(page); entry = mk_pte(page, vma->vm_page_prot); - entry = pte_sw_mkyoung(entry); if (vma->vm_flags & VM_WRITE) entry = pte_mkwrite(pte_mkdirty(entry)); @@ -3745,8 +3745,6 @@ void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr) if (prefault && arch_wants_old_prefaulted_pte()) entry = pte_mkold(entry); - else - entry = pte_sw_mkyoung(entry); if (write) entry = maybe_mkwrite(pte_mkdirty(entry), vma); @@ -4798,28 +4796,68 @@ out: return ret; } +/** + * generic_access_phys - generic implementation for iomem mmap access + * @vma: the vma to access + * @addr: userspace addres, not relative offset within @vma + * @buf: buffer to read/write + * @len: length of transfer + * @write: set to FOLL_WRITE when writing, otherwise reading + * + * This is a generic implementation for &vm_operations_struct.access for an + * iomem mapping. This callback is used by access_process_vm() when the @vma is + * not page based. + */ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, void *buf, int len, int write) { resource_size_t phys_addr; unsigned long prot = 0; void __iomem *maddr; - int offset = addr & (PAGE_SIZE-1); + pte_t *ptep, pte; + spinlock_t *ptl; + int offset = offset_in_page(addr); + int ret = -EINVAL; - if (follow_phys(vma, addr, write, &prot, &phys_addr)) + if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) + return -EINVAL; + +retry: + if (follow_pte(vma->vm_mm, addr, &ptep, &ptl)) + return -EINVAL; + pte = *ptep; + pte_unmap_unlock(ptep, ptl); + + prot = pgprot_val(pte_pgprot(pte)); + phys_addr = (resource_size_t)pte_pfn(pte) << PAGE_SHIFT; + + if ((write & FOLL_WRITE) && !pte_write(pte)) return -EINVAL; maddr = ioremap_prot(phys_addr, PAGE_ALIGN(len + offset), prot); if (!maddr) return -ENOMEM; + if (follow_pte(vma->vm_mm, addr, &ptep, &ptl)) + goto out_unmap; + + if (!pte_same(pte, *ptep)) { + pte_unmap_unlock(ptep, ptl); + iounmap(maddr); + + goto retry; + } + if (write) memcpy_toio(maddr + offset, buf, len); else memcpy_fromio(buf, maddr + offset, len); + ret = len; + pte_unmap_unlock(ptep, ptl); +out_unmap: iounmap(maddr); - return len; + return ret; } EXPORT_SYMBOL_GPL(generic_access_phys); #endif @@ -5137,17 +5175,19 @@ long copy_huge_page_from_user(struct page *dst_page, void *page_kaddr; unsigned long i, rc = 0; unsigned long ret_val = pages_per_huge_page * PAGE_SIZE; + struct page *subpage = dst_page; - for (i = 0; i < pages_per_huge_page; i++) { + for (i = 0; i < pages_per_huge_page; + i++, subpage = mem_map_next(subpage, dst_page, i)) { if (allow_pagefault) - page_kaddr = kmap(dst_page + i); + page_kaddr = kmap(subpage); else - page_kaddr = kmap_atomic(dst_page + i); + page_kaddr = kmap_atomic(subpage); rc = copy_from_user(page_kaddr, (const void __user *)(src + i * PAGE_SIZE), PAGE_SIZE); if (allow_pagefault) - kunmap(dst_page + i); + kunmap(subpage); else kunmap_atomic(page_kaddr); |