summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-11-02 22:40:55 +0300
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2018-11-10 18:39:21 +0300
commit0f1490a7573919a27dfc370c29a87caf142db993 (patch)
tree5121f3c2b39b0019e54564fab8657519c2247932 /mm
parent01393bd29dfb2f8060e33ba599bd76eebe5f45b3 (diff)
downloadlinux-0f1490a7573919a27dfc370c29a87caf142db993.tar.xz
mremap: properly flush TLB before releasing the page
Commit eb66ae030829605d61fbef1909ce310e29f78821 upstream. This is a backport to stable 3.18.y, based on Will Deacon's 4.4.y backport. Jann Horn points out that our TLB flushing was subtly wrong for the mremap() case. What makes mremap() special is that we don't follow the usual "add page to list of pages to be freed, then flush tlb, and then free pages". No, mremap() obviously just _moves_ the page from one page table location to another. That matters, because mremap() thus doesn't directly control the lifetime of the moved page with a freelist: instead, the lifetime of the page is controlled by the page table locking, that serializes access to the entry. As a result, we need to flush the TLB not just before releasing the lock for the source location (to avoid any concurrent accesses to the entry), but also before we release the destination page table lock (to avoid the TLB being flushed after somebody else has already done something to that page). This also makes the whole "need_flush" logic unnecessary, since we now always end up flushing the TLB for every valid entry. Reported-and-tested-by: Jann Horn <jannh@google.com> Acked-by: Will Deacon <will.deacon@arm.com> Tested-by: Ingo Molnar <mingo@kernel.org> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> [will: backport to 4.4 stable] Signed-off-by: Will Deacon <will.deacon@arm.com> [ghackmann@google.com: adjust context] Signed-off-by: Greg Hackmann <ghackmann@google.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/huge_memory.c6
-rw-r--r--mm/mremap.c21
2 files changed, 21 insertions, 6 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index bc402f39ac48..977437ee859b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1462,7 +1462,7 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
spinlock_t *old_ptl, *new_ptl;
int ret = 0;
pmd_t pmd;
-
+ bool force_flush = false;
struct mm_struct *mm = vma->vm_mm;
if ((old_addr & ~HPAGE_PMD_MASK) ||
@@ -1490,6 +1490,8 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
if (new_ptl != old_ptl)
spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
pmd = pmdp_get_and_clear(mm, old_addr, old_pmd);
+ if (pmd_present(pmd))
+ force_flush = true;
VM_BUG_ON(!pmd_none(*new_pmd));
if (pmd_move_must_withdraw(new_ptl, old_ptl)) {
@@ -1498,6 +1500,8 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
pgtable_trans_huge_deposit(mm, new_pmd, pgtable);
}
set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd));
+ if (force_flush)
+ flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
if (new_ptl != old_ptl)
spin_unlock(new_ptl);
spin_unlock(old_ptl);
diff --git a/mm/mremap.c b/mm/mremap.c
index b147f66f4c40..62199110babc 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -97,6 +97,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
struct mm_struct *mm = vma->vm_mm;
pte_t *old_pte, *new_pte, pte;
spinlock_t *old_ptl, *new_ptl;
+ bool force_flush = false;
+ unsigned long len = old_end - old_addr;
/*
* When need_rmap_locks is true, we take the i_mmap_mutex and anon_vma
@@ -143,12 +145,26 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
if (pte_none(*old_pte))
continue;
pte = ptep_get_and_clear(mm, old_addr, old_pte);
+ /*
+ * If we are remapping a valid PTE, make sure
+ * to flush TLB before we drop the PTL for the PTE.
+ *
+ * NOTE! Both old and new PTL matter: the old one
+ * for racing with page_mkclean(), the new one to
+ * make sure the physical page stays valid until
+ * the TLB entry for the old mapping has been
+ * flushed.
+ */
+ if (pte_present(pte))
+ force_flush = true;
pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
pte = move_soft_dirty_pte(pte);
set_pte_at(mm, new_addr, new_pte, pte);
}
arch_leave_lazy_mmu_mode();
+ if (force_flush)
+ flush_tlb_range(vma, old_end - len, old_end);
if (new_ptl != old_ptl)
spin_unlock(new_ptl);
pte_unmap(new_pte - 1);
@@ -168,7 +184,6 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
{
unsigned long extent, next, old_end;
pmd_t *old_pmd, *new_pmd;
- bool need_flush = false;
unsigned long mmun_start; /* For mmu_notifiers */
unsigned long mmun_end; /* For mmu_notifiers */
@@ -207,7 +222,6 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
anon_vma_unlock_write(vma->anon_vma);
}
if (err > 0) {
- need_flush = true;
continue;
} else if (!err) {
split_huge_page_pmd(vma, old_addr, old_pmd);
@@ -224,10 +238,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
extent = LATENCY_LIMIT;
move_ptes(vma, old_pmd, old_addr, old_addr + extent,
new_vma, new_pmd, new_addr, need_rmap_locks);
- need_flush = true;
}
- if (likely(need_flush))
- flush_tlb_range(vma, old_end-len, old_addr);
mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);