diff options
author | David S. Miller <davem@davemloft.net> | 2020-03-13 07:29:30 +0300 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2020-03-13 08:34:48 +0300 |
commit | 1d343579312311aa9875b34d5a921f5e2ec69f0a (patch) | |
tree | 64b636b5557d92f5266187f035dac34862db86d5 /mm | |
parent | a8eceea84a3a3504e42f6495cf462027c5d19cb0 (diff) | |
parent | 0d81a3f29c0afb18ba2b1275dcccf21e0dd4da38 (diff) | |
download | linux-1d343579312311aa9875b34d5a921f5e2ec69f0a.tar.xz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Minor overlapping changes, nothing serious.
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/huge_memory.c | 3 | ||||
-rw-r--r-- | mm/memcontrol.c | 14 | ||||
-rw-r--r-- | mm/memory.c | 35 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 8 | ||||
-rw-r--r-- | mm/mprotect.c | 38 | ||||
-rw-r--r-- | mm/z3fold.c | 1 |
6 files changed, 73 insertions, 26 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index b08b199f9a11..24ad53b4dfc0 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3043,8 +3043,7 @@ void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, return; flush_cache_range(vma, address, address + HPAGE_PMD_SIZE); - pmdval = *pvmw->pmd; - pmdp_invalidate(vma, address, pvmw->pmd); + pmdval = pmdp_invalidate(vma, address, pvmw->pmd); if (pmd_dirty(pmdval)) set_page_dirty(page); entry = make_migration_entry(page, pmd_write(pmdval)); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d09776cd6e10..2058b8da18db 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6682,19 +6682,9 @@ void mem_cgroup_sk_alloc(struct sock *sk) if (!mem_cgroup_sockets_enabled) return; - /* - * Socket cloning can throw us here with sk_memcg already - * filled. It won't however, necessarily happen from - * process context. So the test for root memcg given - * the current task's memcg won't help us in this case. - * - * Respecting the original socket's memcg is a better - * decision in this case. - */ - if (sk->sk_memcg) { - css_get(&sk->sk_memcg->css); + /* Do not associate the sock with unrelated interrupted task's memcg. */ + if (in_interrupt()) return; - } rcu_read_lock(); memcg = mem_cgroup_from_task(current); diff --git a/mm/memory.c b/mm/memory.c index 0bccc622e482..e8bfdf0d9d1d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2257,7 +2257,7 @@ static inline bool cow_user_page(struct page *dst, struct page *src, bool ret; void *kaddr; void __user *uaddr; - bool force_mkyoung; + bool locked = false; struct vm_area_struct *vma = vmf->vma; struct mm_struct *mm = vma->vm_mm; unsigned long addr = vmf->address; @@ -2282,11 +2282,11 @@ static inline bool cow_user_page(struct page *dst, struct page *src, * On architectures with software "accessed" bits, we would * take a double page fault, so mark it accessed here. */ - force_mkyoung = arch_faults_on_old_pte() && !pte_young(vmf->orig_pte); - if (force_mkyoung) { + if (arch_faults_on_old_pte() && !pte_young(vmf->orig_pte)) { pte_t entry; vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); + locked = true; if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) { /* * Other thread has already handled the fault @@ -2310,18 +2310,37 @@ static inline bool cow_user_page(struct page *dst, struct page *src, * zeroes. */ if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) { + if (locked) + goto warn; + + /* Re-validate under PTL if the page is still mapped */ + vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); + locked = true; + if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) { + /* The PTE changed under us. Retry page fault. */ + ret = false; + goto pte_unlock; + } + /* - * Give a warn in case there can be some obscure - * use-case + * The same page can be mapped back since last copy attampt. + * Try to copy again under PTL. */ - WARN_ON_ONCE(1); - clear_page(kaddr); + if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) { + /* + * Give a warn in case there can be some obscure + * use-case + */ +warn: + WARN_ON_ONCE(1); + clear_page(kaddr); + } } ret = true; pte_unlock: - if (force_mkyoung) + if (locked) pte_unmap_unlock(vmf->pte, vmf->ptl); kunmap_atomic(kaddr); flush_dcache_page(dst); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 0a54ffac8c68..19389cdc16a5 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -574,7 +574,13 @@ EXPORT_SYMBOL_GPL(restore_online_page_callback); void generic_online_page(struct page *page, unsigned int order) { - kernel_map_pages(page, 1 << order, 1); + /* + * Freeing the page with debug_pagealloc enabled will try to unmap it, + * so we should map it first. This is better than introducing a special + * case in page freeing fast path. + */ + if (debug_pagealloc_enabled_static()) + kernel_map_pages(page, 1 << order, 1); __free_pages_core(page, order); totalram_pages_add(1UL << order); #ifdef CONFIG_HIGHMEM diff --git a/mm/mprotect.c b/mm/mprotect.c index 7a8e84f86831..311c0dadf71c 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -161,6 +161,31 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, return pages; } +/* + * Used when setting automatic NUMA hinting protection where it is + * critical that a numa hinting PMD is not confused with a bad PMD. + */ +static inline int pmd_none_or_clear_bad_unless_trans_huge(pmd_t *pmd) +{ + pmd_t pmdval = pmd_read_atomic(pmd); + + /* See pmd_none_or_trans_huge_or_clear_bad for info on barrier */ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + barrier(); +#endif + + if (pmd_none(pmdval)) + return 1; + if (pmd_trans_huge(pmdval)) + return 0; + if (unlikely(pmd_bad(pmdval))) { + pmd_clear_bad(pmd); + return 1; + } + + return 0; +} + static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *pud, unsigned long addr, unsigned long end, pgprot_t newprot, int dirty_accountable, int prot_numa) @@ -178,8 +203,17 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, unsigned long this_pages; next = pmd_addr_end(addr, end); - if (!is_swap_pmd(*pmd) && !pmd_trans_huge(*pmd) && !pmd_devmap(*pmd) - && pmd_none_or_clear_bad(pmd)) + + /* + * Automatic NUMA balancing walks the tables with mmap_sem + * held for read. It's possible a parallel update to occur + * between pmd_trans_huge() and a pmd_none_or_clear_bad() + * check leading to a false positive and clearing. + * Hence, it's necessary to atomically read the PMD value + * for all the checks. + */ + if (!is_swap_pmd(*pmd) && !pmd_devmap(*pmd) && + pmd_none_or_clear_bad_unless_trans_huge(pmd)) goto next; /* invoke the mmu notifier if the pmd is populated */ diff --git a/mm/z3fold.c b/mm/z3fold.c index 43754d8ebce8..42f31c4b53ad 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -41,7 +41,6 @@ #include <linux/workqueue.h> #include <linux/slab.h> #include <linux/spinlock.h> -#include <linux/rwlock.h> #include <linux/zpool.h> #include <linux/magic.h> |