summaryrefslogtreecommitdiff
path: root/include/linux/pgtable.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/pgtable.h')
-rw-r--r--include/linux/pgtable.h123
1 files changed, 88 insertions, 35 deletions
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 5063b482e34f..f49abcfe5eda 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -5,6 +5,9 @@
#include <linux/pfn.h>
#include <asm/pgtable.h>
+#define PMD_ORDER (PMD_SHIFT - PAGE_SHIFT)
+#define PUD_ORDER (PUD_SHIFT - PAGE_SHIFT)
+
#ifndef __ASSEMBLY__
#ifdef CONFIG_MMU
@@ -63,7 +66,6 @@ static inline unsigned long pte_index(unsigned long address)
{
return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
}
-#define pte_index pte_index
#ifndef pmd_index
static inline unsigned long pmd_index(unsigned long address)
@@ -99,7 +101,7 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
((pte_t *)kmap_local_page(pmd_page(*(pmd))) + pte_index((address)))
#define pte_unmap(pte) do { \
kunmap_local((pte)); \
- /* rcu_read_unlock() to be added later */ \
+ rcu_read_unlock(); \
} while (0)
#else
static inline pte_t *__pte_map(pmd_t *pmd, unsigned long address)
@@ -108,10 +110,12 @@ static inline pte_t *__pte_map(pmd_t *pmd, unsigned long address)
}
static inline void pte_unmap(pte_t *pte)
{
- /* rcu_read_unlock() to be added later */
+ rcu_read_unlock();
}
#endif
+void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable);
+
/* Find an entry in the second-level page table.. */
#ifndef pmd_offset
static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
@@ -180,6 +184,60 @@ static inline int pmd_young(pmd_t pmd)
}
#endif
+/*
+ * A facility to provide lazy MMU batching. This allows PTE updates and
+ * page invalidations to be delayed until a call to leave lazy MMU mode
+ * is issued. Some architectures may benefit from doing this, and it is
+ * beneficial for both shadow and direct mode hypervisors, which may batch
+ * the PTE updates which happen during this window. Note that using this
+ * interface requires that read hazards be removed from the code. A read
+ * hazard could result in the direct mode hypervisor case, since the actual
+ * write to the page tables may not yet have taken place, so reads though
+ * a raw PTE pointer after it has been modified are not guaranteed to be
+ * up to date. This mode can only be entered and left under the protection of
+ * the page table locks for all page tables which may be modified. In the UP
+ * case, this is required so that preemption is disabled, and in the SMP case,
+ * it must synchronize the delayed page table writes properly on other CPUs.
+ */
+#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
+#define arch_enter_lazy_mmu_mode() do {} while (0)
+#define arch_leave_lazy_mmu_mode() do {} while (0)
+#define arch_flush_lazy_mmu_mode() do {} while (0)
+#endif
+
+#ifndef set_ptes
+/**
+ * set_ptes - Map consecutive pages to a contiguous range of addresses.
+ * @mm: Address space to map the pages into.
+ * @addr: Address to map the first page at.
+ * @ptep: Page table pointer for the first entry.
+ * @pte: Page table entry for the first page.
+ * @nr: Number of pages to map.
+ *
+ * May be overridden by the architecture, or the architecture can define
+ * set_pte() and PFN_PTE_SHIFT.
+ *
+ * Context: The caller holds the page table lock. The pages all belong
+ * to the same folio. The PTEs are all in the same PMD.
+ */
+static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte, unsigned int nr)
+{
+ page_table_check_ptes_set(mm, ptep, pte, nr);
+
+ arch_enter_lazy_mmu_mode();
+ for (;;) {
+ set_pte(ptep, pte);
+ if (--nr == 0)
+ break;
+ ptep++;
+ pte = __pte(pte_val(pte) + (1UL << PFN_PTE_SHIFT));
+ }
+ arch_leave_lazy_mmu_mode();
+}
+#endif
+#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1)
+
#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
extern int ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep,
@@ -320,7 +378,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
{
pte_t pte = ptep_get(ptep);
pte_clear(mm, address, ptep);
- page_table_check_pte_clear(mm, address, pte);
+ page_table_check_pte_clear(mm, pte);
return pte;
}
#endif
@@ -390,6 +448,7 @@ static inline pmd_t pmdp_get_lockless(pmd_t *pmdp)
return pmd;
}
#define pmdp_get_lockless pmdp_get_lockless
+#define pmdp_get_lockless_sync() tlb_remove_table_sync_one()
#endif /* CONFIG_PGTABLE_LEVELS > 2 */
#endif /* CONFIG_GUP_GET_PXX_LOW_HIGH */
@@ -408,6 +467,9 @@ static inline pmd_t pmdp_get_lockless(pmd_t *pmdp)
{
return pmdp_get(pmdp);
}
+static inline void pmdp_get_lockless_sync(void)
+{
+}
#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -419,7 +481,7 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
pmd_t pmd = *pmdp;
pmd_clear(pmdp);
- page_table_check_pmd_clear(mm, address, pmd);
+ page_table_check_pmd_clear(mm, pmd);
return pmd;
}
@@ -432,7 +494,7 @@ static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
pud_t pud = *pudp;
pud_clear(pudp);
- page_table_check_pud_clear(mm, address, pud);
+ page_table_check_pud_clear(mm, pud);
return pud;
}
@@ -450,11 +512,11 @@ static inline pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
#endif
#ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FULL
-static inline pud_t pudp_huge_get_and_clear_full(struct mm_struct *mm,
+static inline pud_t pudp_huge_get_and_clear_full(struct vm_area_struct *vma,
unsigned long address, pud_t *pudp,
int full)
{
- return pudp_huge_get_and_clear(mm, address, pudp);
+ return pudp_huge_get_and_clear(vma->vm_mm, address, pudp);
}
#endif
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -558,6 +620,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
#endif
#ifndef __HAVE_ARCH_PUDP_SET_WRPROTECT
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline void pudp_set_wrprotect(struct mm_struct *mm,
unsigned long address, pud_t *pudp)
{
@@ -571,6 +634,7 @@ static inline void pudp_set_wrprotect(struct mm_struct *mm,
{
BUILD_BUG();
}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
#endif
@@ -693,11 +757,14 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
{
return pmd_val(pmd_a) == pmd_val(pmd_b);
}
+#endif
+#ifndef pud_same
static inline int pud_same(pud_t pud_a, pud_t pud_b)
{
return pud_val(pud_a) == pud_val(pud_b);
}
+#define pud_same pud_same
#endif
#ifndef __HAVE_ARCH_P4D_SAME
@@ -1041,27 +1108,6 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
#endif
/*
- * A facility to provide lazy MMU batching. This allows PTE updates and
- * page invalidations to be delayed until a call to leave lazy MMU mode
- * is issued. Some architectures may benefit from doing this, and it is
- * beneficial for both shadow and direct mode hypervisors, which may batch
- * the PTE updates which happen during this window. Note that using this
- * interface requires that read hazards be removed from the code. A read
- * hazard could result in the direct mode hypervisor case, since the actual
- * write to the page tables may not yet have taken place, so reads though
- * a raw PTE pointer after it has been modified are not guaranteed to be
- * up to date. This mode can only be entered and left under the protection of
- * the page table locks for all page tables which may be modified. In the UP
- * case, this is required so that preemption is disabled, and in the SMP case,
- * it must synchronize the delayed page table writes properly on other CPUs.
- */
-#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
-#define arch_enter_lazy_mmu_mode() do {} while (0)
-#define arch_leave_lazy_mmu_mode() do {} while (0)
-#define arch_flush_lazy_mmu_mode() do {} while (0)
-#endif
-
-/*
* A facility to provide batching of the reload of page tables and
* other process state with the actual context switch code for
* paravirtualized guests. By convention, only one of the batched
@@ -1322,12 +1368,16 @@ static inline int pud_trans_unstable(pud_t *pud)
#ifndef CONFIG_NUMA_BALANCING
/*
- * Technically a PTE can be PROTNONE even when not doing NUMA balancing but
- * the only case the kernel cares is for NUMA balancing and is only ever set
- * when the VMA is accessible. For PROT_NONE VMAs, the PTEs are not marked
- * _PAGE_PROTNONE so by default, implement the helper as "always no". It
- * is the responsibility of the caller to distinguish between PROT_NONE
- * protections and NUMA hinting fault protections.
+ * In an inaccessible (PROT_NONE) VMA, pte_protnone() may indicate "yes". It is
+ * perfectly valid to indicate "no" in that case, which is why our default
+ * implementation defaults to "always no".
+ *
+ * In an accessible VMA, however, pte_protnone() reliably indicates PROT_NONE
+ * page protection due to NUMA hinting. NUMA hinting faults only apply in
+ * accessible VMAs.
+ *
+ * So, to reliably identify PROT_NONE PTEs that require a NUMA hinting fault,
+ * looking at the VMA accessibility is sufficient.
*/
static inline int pte_protnone(pte_t pte)
{
@@ -1499,6 +1549,9 @@ typedef unsigned int pgtbl_mod_mask;
#define has_transparent_hugepage() IS_BUILTIN(CONFIG_TRANSPARENT_HUGEPAGE)
#endif
+#ifndef has_transparent_pud_hugepage
+#define has_transparent_pud_hugepage() IS_BUILTIN(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
+#endif
/*
* On some architectures it depends on the mm if the p4d/pud or pmd
* layer of the page table hierarchy is folded or not.