summaryrefslogtreecommitdiff
path: root/mm/memory.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-05-27 06:00:28 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-27 06:00:28 +0300
commit478a1469a7d27fe6b2f85fc801ecdeb8afc836e6 (patch)
tree9b1eb10e1a0567413443281387b09d02b514b5ec /mm/memory.c
parent315227f6da389f3a560f27f7777080857278e1b4 (diff)
parent4d9a2c8746671efbb0c27d3ae28c7474597a7aad (diff)
downloadlinux-478a1469a7d27fe6b2f85fc801ecdeb8afc836e6.tar.xz
Merge tag 'dax-locking-for-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull DAX locking updates from Ross Zwisler: "Filesystem DAX locking for 4.7 - We use a bit in an exceptional radix tree entry as a lock bit and use it similarly to how page lock is used for normal faults. This fixes races between hole instantiation and read faults of the same index. - Filesystem DAX PMD faults are disabled, and will be re-enabled when PMD locking is implemented" * tag 'dax-locking-for-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: dax: Remove i_mmap_lock protection dax: Use radix tree entry lock to protect cow faults dax: New fault locking dax: Allow DAX code to replace exceptional entries dax: Define DAX lock bit for radix tree exceptional entry dax: Make huge page handling depend of CONFIG_BROKEN dax: Fix condition for filling of PMD holes
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c40
1 files changed, 18 insertions, 22 deletions
diff --git a/mm/memory.c b/mm/memory.c
index a1b93d9e4449..15322b73636b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -63,6 +63,7 @@
#include <linux/dma-debug.h>
#include <linux/debugfs.h>
#include <linux/userfaultfd_k.h>
+#include <linux/dax.h>
#include <asm/io.h>
#include <asm/mmu_context.h>
@@ -2492,8 +2493,6 @@ void unmap_mapping_range(struct address_space *mapping,
if (details.last_index < details.first_index)
details.last_index = ULONG_MAX;
-
- /* DAX uses i_mmap_lock to serialise file truncate vs page fault */
i_mmap_lock_write(mapping);
if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap)))
unmap_mapping_range_tree(&mapping->i_mmap, &details);
@@ -2825,7 +2824,8 @@ oom:
*/
static int __do_fault(struct vm_area_struct *vma, unsigned long address,
pgoff_t pgoff, unsigned int flags,
- struct page *cow_page, struct page **page)
+ struct page *cow_page, struct page **page,
+ void **entry)
{
struct vm_fault vmf;
int ret;
@@ -2840,8 +2840,10 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address,
ret = vma->vm_ops->fault(vma, &vmf);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
return ret;
- if (!vmf.page)
- goto out;
+ if (ret & VM_FAULT_DAX_LOCKED) {
+ *entry = vmf.entry;
+ return ret;
+ }
if (unlikely(PageHWPoison(vmf.page))) {
if (ret & VM_FAULT_LOCKED)
@@ -2855,7 +2857,6 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address,
else
VM_BUG_ON_PAGE(!PageLocked(vmf.page), vmf.page);
- out:
*page = vmf.page;
return ret;
}
@@ -3048,7 +3049,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
pte_unmap_unlock(pte, ptl);
}
- ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page);
+ ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page, NULL);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
return ret;
@@ -3071,6 +3072,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
{
struct page *fault_page, *new_page;
+ void *fault_entry;
struct mem_cgroup *memcg;
spinlock_t *ptl;
pte_t *pte;
@@ -3088,26 +3090,24 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
return VM_FAULT_OOM;
}
- ret = __do_fault(vma, address, pgoff, flags, new_page, &fault_page);
+ ret = __do_fault(vma, address, pgoff, flags, new_page, &fault_page,
+ &fault_entry);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
goto uncharge_out;
- if (fault_page)
+ if (!(ret & VM_FAULT_DAX_LOCKED))
copy_user_highpage(new_page, fault_page, address, vma);
__SetPageUptodate(new_page);
pte = pte_offset_map_lock(mm, pmd, address, &ptl);
if (unlikely(!pte_same(*pte, orig_pte))) {
pte_unmap_unlock(pte, ptl);
- if (fault_page) {
+ if (!(ret & VM_FAULT_DAX_LOCKED)) {
unlock_page(fault_page);
put_page(fault_page);
} else {
- /*
- * The fault handler has no page to lock, so it holds
- * i_mmap_lock for read to protect against truncate.
- */
- i_mmap_unlock_read(vma->vm_file->f_mapping);
+ dax_unlock_mapping_entry(vma->vm_file->f_mapping,
+ pgoff);
}
goto uncharge_out;
}
@@ -3115,15 +3115,11 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
mem_cgroup_commit_charge(new_page, memcg, false, false);
lru_cache_add_active_or_unevictable(new_page, vma);
pte_unmap_unlock(pte, ptl);
- if (fault_page) {
+ if (!(ret & VM_FAULT_DAX_LOCKED)) {
unlock_page(fault_page);
put_page(fault_page);
} else {
- /*
- * The fault handler has no page to lock, so it holds
- * i_mmap_lock for read to protect against truncate.
- */
- i_mmap_unlock_read(vma->vm_file->f_mapping);
+ dax_unlock_mapping_entry(vma->vm_file->f_mapping, pgoff);
}
return ret;
uncharge_out:
@@ -3143,7 +3139,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
int dirtied = 0;
int ret, tmp;
- ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page);
+ ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page, NULL);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
return ret;