summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig3
-rw-r--r--mm/gup.c57
-rw-r--r--mm/highmem.c272
-rw-r--r--mm/mprotect.c7
4 files changed, 268 insertions, 71 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 5829d31649b9..cf04bc3c866c 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -874,4 +874,7 @@ config ARCH_HAS_HUGEPD
config MAPPING_DIRTY_HELPERS
bool
+config KMAP_LOCAL
+ bool
+
endmenu
diff --git a/mm/gup.c b/mm/gup.c
index b3d852b4a60c..e4c224cd9661 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1977,61 +1977,6 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
* This code is based heavily on the PowerPC implementation by Nick Piggin.
*/
#ifdef CONFIG_HAVE_FAST_GUP
-#ifdef CONFIG_GUP_GET_PTE_LOW_HIGH
-
-/*
- * WARNING: only to be used in the get_user_pages_fast() implementation.
- *
- * With get_user_pages_fast(), we walk down the pagetables without taking any
- * locks. For this we would like to load the pointers atomically, but sometimes
- * that is not possible (e.g. without expensive cmpxchg8b on x86_32 PAE). What
- * we do have is the guarantee that a PTE will only either go from not present
- * to present, or present to not present or both -- it will not switch to a
- * completely different present page without a TLB flush in between; something
- * that we are blocking by holding interrupts off.
- *
- * Setting ptes from not present to present goes:
- *
- * ptep->pte_high = h;
- * smp_wmb();
- * ptep->pte_low = l;
- *
- * And present to not present goes:
- *
- * ptep->pte_low = 0;
- * smp_wmb();
- * ptep->pte_high = 0;
- *
- * We must ensure here that the load of pte_low sees 'l' IFF pte_high sees 'h'.
- * We load pte_high *after* loading pte_low, which ensures we don't see an older
- * value of pte_high. *Then* we recheck pte_low, which ensures that we haven't
- * picked up a changed pte high. We might have gotten rubbish values from
- * pte_low and pte_high, but we are guaranteed that pte_low will not have the
- * present bit set *unless* it is 'l'. Because get_user_pages_fast() only
- * operates on present ptes we're safe.
- */
-static inline pte_t gup_get_pte(pte_t *ptep)
-{
- pte_t pte;
-
- do {
- pte.pte_low = ptep->pte_low;
- smp_rmb();
- pte.pte_high = ptep->pte_high;
- smp_rmb();
- } while (unlikely(pte.pte_low != ptep->pte_low));
-
- return pte;
-}
-#else /* CONFIG_GUP_GET_PTE_LOW_HIGH */
-/*
- * We require that the PTE can be read atomically.
- */
-static inline pte_t gup_get_pte(pte_t *ptep)
-{
- return ptep_get(ptep);
-}
-#endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */
static void __maybe_unused undo_dev_pagemap(int *nr, int nr_start,
unsigned int flags,
@@ -2058,7 +2003,7 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
ptem = ptep = pte_offset_map(&pmd, addr);
do {
- pte_t pte = gup_get_pte(ptep);
+ pte_t pte = ptep_get_lockless(ptep);
struct page *head, *page;
/*
diff --git a/mm/highmem.c b/mm/highmem.c
index 0ee87a9e0cbf..c3a9ea7875ef 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -31,10 +31,6 @@
#include <asm/tlbflush.h>
#include <linux/vmalloc.h>
-#if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
-DEFINE_PER_CPU(int, __kmap_atomic_idx);
-#endif
-
/*
* Virtual_count is not a pure "count".
* 0 means that it is not mapped, and has not been mapped
@@ -108,9 +104,7 @@ static inline wait_queue_head_t *get_pkmap_wait_queue_head(unsigned int color)
atomic_long_t _totalhigh_pages __read_mostly;
EXPORT_SYMBOL(_totalhigh_pages);
-EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx);
-
-unsigned int nr_free_highpages (void)
+unsigned int __nr_free_highpages (void)
{
struct zone *zone;
unsigned int pages = 0;
@@ -147,7 +141,7 @@ pte_t * pkmap_page_table;
do { spin_unlock(&kmap_lock); (void)(flags); } while (0)
#endif
-struct page *kmap_to_page(void *vaddr)
+struct page *__kmap_to_page(void *vaddr)
{
unsigned long addr = (unsigned long)vaddr;
@@ -158,7 +152,7 @@ struct page *kmap_to_page(void *vaddr)
return virt_to_page(addr);
}
-EXPORT_SYMBOL(kmap_to_page);
+EXPORT_SYMBOL(__kmap_to_page);
static void flush_all_zero_pkmaps(void)
{
@@ -200,10 +194,7 @@ static void flush_all_zero_pkmaps(void)
flush_tlb_kernel_range(PKMAP_ADDR(0), PKMAP_ADDR(LAST_PKMAP));
}
-/**
- * kmap_flush_unused - flush all unused kmap mappings in order to remove stray mappings
- */
-void kmap_flush_unused(void)
+void __kmap_flush_unused(void)
{
lock_kmap();
flush_all_zero_pkmaps();
@@ -367,7 +358,6 @@ void kunmap_high(struct page *page)
if (need_wakeup)
wake_up(pkmap_map_wait);
}
-
EXPORT_SYMBOL(kunmap_high);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -421,7 +411,259 @@ void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
}
EXPORT_SYMBOL(zero_user_segments);
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-#endif /* CONFIG_HIGHMEM */
+#endif /* CONFIG_HIGHMEM */
+
+#ifdef CONFIG_KMAP_LOCAL
+
+#include <asm/kmap_size.h>
+
+/*
+ * With DEBUG_KMAP_LOCAL the stack depth is doubled and every second
+ * slot is unused which acts as a guard page
+ */
+#ifdef CONFIG_DEBUG_KMAP_LOCAL
+# define KM_INCR 2
+#else
+# define KM_INCR 1
+#endif
+
+static inline int kmap_local_idx_push(void)
+{
+ WARN_ON_ONCE(in_irq() && !irqs_disabled());
+ current->kmap_ctrl.idx += KM_INCR;
+ BUG_ON(current->kmap_ctrl.idx >= KM_MAX_IDX);
+ return current->kmap_ctrl.idx - 1;
+}
+
+static inline int kmap_local_idx(void)
+{
+ return current->kmap_ctrl.idx - 1;
+}
+
+static inline void kmap_local_idx_pop(void)
+{
+ current->kmap_ctrl.idx -= KM_INCR;
+ BUG_ON(current->kmap_ctrl.idx < 0);
+}
+
+#ifndef arch_kmap_local_post_map
+# define arch_kmap_local_post_map(vaddr, pteval) do { } while (0)
+#endif
+
+#ifndef arch_kmap_local_pre_unmap
+# define arch_kmap_local_pre_unmap(vaddr) do { } while (0)
+#endif
+
+#ifndef arch_kmap_local_post_unmap
+# define arch_kmap_local_post_unmap(vaddr) do { } while (0)
+#endif
+
+#ifndef arch_kmap_local_map_idx
+#define arch_kmap_local_map_idx(idx, pfn) kmap_local_calc_idx(idx)
+#endif
+
+#ifndef arch_kmap_local_unmap_idx
+#define arch_kmap_local_unmap_idx(idx, vaddr) kmap_local_calc_idx(idx)
+#endif
+
+#ifndef arch_kmap_local_high_get
+static inline void *arch_kmap_local_high_get(struct page *page)
+{
+ return NULL;
+}
+#endif
+
+/* Unmap a local mapping which was obtained by kmap_high_get() */
+static inline bool kmap_high_unmap_local(unsigned long vaddr)
+{
+#ifdef ARCH_NEEDS_KMAP_HIGH_GET
+ if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) {
+ kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)]));
+ return true;
+ }
+#endif
+ return false;
+}
+
+static inline int kmap_local_calc_idx(int idx)
+{
+ return idx + KM_MAX_IDX * smp_processor_id();
+}
+
+static pte_t *__kmap_pte;
+
+static pte_t *kmap_get_pte(void)
+{
+ if (!__kmap_pte)
+ __kmap_pte = virt_to_kpte(__fix_to_virt(FIX_KMAP_BEGIN));
+ return __kmap_pte;
+}
+
+void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot)
+{
+ pte_t pteval, *kmap_pte = kmap_get_pte();
+ unsigned long vaddr;
+ int idx;
+
+ /*
+ * Disable migration so resulting virtual address is stable
+ * accross preemption.
+ */
+ migrate_disable();
+ preempt_disable();
+ idx = arch_kmap_local_map_idx(kmap_local_idx_push(), pfn);
+ vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+ BUG_ON(!pte_none(*(kmap_pte - idx)));
+ pteval = pfn_pte(pfn, prot);
+ set_pte_at(&init_mm, vaddr, kmap_pte - idx, pteval);
+ arch_kmap_local_post_map(vaddr, pteval);
+ current->kmap_ctrl.pteval[kmap_local_idx()] = pteval;
+ preempt_enable();
+
+ return (void *)vaddr;
+}
+EXPORT_SYMBOL_GPL(__kmap_local_pfn_prot);
+
+void *__kmap_local_page_prot(struct page *page, pgprot_t prot)
+{
+ void *kmap;
+
+ /*
+ * To broaden the usage of the actual kmap_local() machinery always map
+ * pages when debugging is enabled and the architecture has no problems
+ * with alias mappings.
+ */
+ if (!IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP) && !PageHighMem(page))
+ return page_address(page);
+
+ /* Try kmap_high_get() if architecture has it enabled */
+ kmap = arch_kmap_local_high_get(page);
+ if (kmap)
+ return kmap;
+
+ return __kmap_local_pfn_prot(page_to_pfn(page), prot);
+}
+EXPORT_SYMBOL(__kmap_local_page_prot);
+
+void kunmap_local_indexed(void *vaddr)
+{
+ unsigned long addr = (unsigned long) vaddr & PAGE_MASK;
+ pte_t *kmap_pte = kmap_get_pte();
+ int idx;
+
+ if (addr < __fix_to_virt(FIX_KMAP_END) ||
+ addr > __fix_to_virt(FIX_KMAP_BEGIN)) {
+ if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP)) {
+ /* This _should_ never happen! See above. */
+ WARN_ON_ONCE(1);
+ return;
+ }
+ /*
+ * Handle mappings which were obtained by kmap_high_get()
+ * first as the virtual address of such mappings is below
+ * PAGE_OFFSET. Warn for all other addresses which are in
+ * the user space part of the virtual address space.
+ */
+ if (!kmap_high_unmap_local(addr))
+ WARN_ON_ONCE(addr < PAGE_OFFSET);
+ return;
+ }
+
+ preempt_disable();
+ idx = arch_kmap_local_unmap_idx(kmap_local_idx(), addr);
+ WARN_ON_ONCE(addr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
+
+ arch_kmap_local_pre_unmap(addr);
+ pte_clear(&init_mm, addr, kmap_pte - idx);
+ arch_kmap_local_post_unmap(addr);
+ current->kmap_ctrl.pteval[kmap_local_idx()] = __pte(0);
+ kmap_local_idx_pop();
+ preempt_enable();
+ migrate_enable();
+}
+EXPORT_SYMBOL(kunmap_local_indexed);
+
+/*
+ * Invoked before switch_to(). This is safe even when during or after
+ * clearing the maps an interrupt which needs a kmap_local happens because
+ * the task::kmap_ctrl.idx is not modified by the unmapping code so a
+ * nested kmap_local will use the next unused index and restore the index
+ * on unmap. The already cleared kmaps of the outgoing task are irrelevant
+ * because the interrupt context does not know about them. The same applies
+ * when scheduling back in for an interrupt which happens before the
+ * restore is complete.
+ */
+void __kmap_local_sched_out(void)
+{
+ struct task_struct *tsk = current;
+ pte_t *kmap_pte = kmap_get_pte();
+ int i;
+
+ /* Clear kmaps */
+ for (i = 0; i < tsk->kmap_ctrl.idx; i++) {
+ pte_t pteval = tsk->kmap_ctrl.pteval[i];
+ unsigned long addr;
+ int idx;
+
+ /* With debug all even slots are unmapped and act as guard */
+ if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !(i & 0x01)) {
+ WARN_ON_ONCE(!pte_none(pteval));
+ continue;
+ }
+ if (WARN_ON_ONCE(pte_none(pteval)))
+ continue;
+
+ /*
+ * This is a horrible hack for XTENSA to calculate the
+ * coloured PTE index. Uses the PFN encoded into the pteval
+ * and the map index calculation because the actual mapped
+ * virtual address is not stored in task::kmap_ctrl.
+ * For any sane architecture this is optimized out.
+ */
+ idx = arch_kmap_local_map_idx(i, pte_pfn(pteval));
+
+ addr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+ arch_kmap_local_pre_unmap(addr);
+ pte_clear(&init_mm, addr, kmap_pte - idx);
+ arch_kmap_local_post_unmap(addr);
+ }
+}
+
+void __kmap_local_sched_in(void)
+{
+ struct task_struct *tsk = current;
+ pte_t *kmap_pte = kmap_get_pte();
+ int i;
+
+ /* Restore kmaps */
+ for (i = 0; i < tsk->kmap_ctrl.idx; i++) {
+ pte_t pteval = tsk->kmap_ctrl.pteval[i];
+ unsigned long addr;
+ int idx;
+
+ /* With debug all even slots are unmapped and act as guard */
+ if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !(i & 0x01)) {
+ WARN_ON_ONCE(!pte_none(pteval));
+ continue;
+ }
+ if (WARN_ON_ONCE(pte_none(pteval)))
+ continue;
+
+ /* See comment in __kmap_local_sched_out() */
+ idx = arch_kmap_local_map_idx(i, pte_pfn(pteval));
+ addr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+ set_pte_at(&init_mm, addr, kmap_pte - idx, pteval);
+ arch_kmap_local_post_map(addr, pteval);
+ }
+}
+
+void kmap_local_fork(struct task_struct *tsk)
+{
+ if (WARN_ON_ONCE(tsk->kmap_ctrl.idx))
+ memset(&tsk->kmap_ctrl, 0, sizeof(tsk->kmap_ctrl));
+}
+
+#endif
#if defined(HASHED_PAGE_VIRTUAL)
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 56c02beb6041..ab709023e9aa 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -616,9 +616,16 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
tmp = vma->vm_end;
if (tmp > end)
tmp = end;
+
+ if (vma->vm_ops && vma->vm_ops->mprotect)
+ error = vma->vm_ops->mprotect(vma, nstart, tmp, newflags);
+ if (error)
+ goto out;
+
error = mprotect_fixup(vma, &prev, nstart, tmp, newflags);
if (error)
goto out;
+
nstart = tmp;
if (nstart < prev->vm_end)