summaryrefslogtreecommitdiff
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/44x_mmu.c14
-rw-r--r--arch/powerpc/mm/8xx_mmu.c10
-rw-r--r--arch/powerpc/mm/Makefile11
-rw-r--r--arch/powerpc/mm/dma-noncoherent.c15
-rw-r--r--arch/powerpc/mm/dump_bats.c173
-rw-r--r--arch/powerpc/mm/dump_linuxpagetables-generic.c2
-rw-r--r--arch/powerpc/mm/dump_linuxpagetables.c1
-rw-r--r--arch/powerpc/mm/dump_sr.c64
-rw-r--r--arch/powerpc/mm/fault.c50
-rw-r--r--arch/powerpc/mm/hash_low_32.S33
-rw-r--r--arch/powerpc/mm/hugetlbpage.c42
-rw-r--r--arch/powerpc/mm/init-common.c56
-rw-r--r--arch/powerpc/mm/init_64.c19
-rw-r--r--arch/powerpc/mm/mem.c54
-rw-r--r--arch/powerpc/mm/mmu_context.c10
-rw-r--r--arch/powerpc/mm/mmu_context_book3s64.c15
-rw-r--r--arch/powerpc/mm/mmu_context_iommu.c110
-rw-r--r--arch/powerpc/mm/mmu_context_nohash.c4
-rw-r--r--arch/powerpc/mm/mmu_decl.h2
-rw-r--r--arch/powerpc/mm/numa.c4
-rw-r--r--arch/powerpc/mm/pgtable-book3s64.c88
-rw-r--r--arch/powerpc/mm/pgtable-frag.c119
-rw-r--r--arch/powerpc/mm/pgtable.c26
-rw-r--r--arch/powerpc/mm/pgtable_32.c29
-rw-r--r--arch/powerpc/mm/pkeys.c25
-rw-r--r--arch/powerpc/mm/ppc_mmu_32.c51
-rw-r--r--arch/powerpc/mm/slb.c35
-rw-r--r--arch/powerpc/mm/tlb_low_64e.S7
28 files changed, 704 insertions, 365 deletions
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c
index 12d92518e898..ea2b9af08a48 100644
--- a/arch/powerpc/mm/44x_mmu.c
+++ b/arch/powerpc/mm/44x_mmu.c
@@ -29,6 +29,7 @@
#include <asm/mmu.h>
#include <asm/page.h>
#include <asm/cacheflush.h>
+#include <asm/code-patching.h>
#include "mmu_decl.h"
@@ -43,22 +44,13 @@ unsigned long tlb_47x_boltmap[1024/8];
static void ppc44x_update_tlb_hwater(void)
{
- extern unsigned int tlb_44x_patch_hwater_D[];
- extern unsigned int tlb_44x_patch_hwater_I[];
-
/* The TLB miss handlers hard codes the watermark in a cmpli
* instruction to improve performances rather than loading it
* from the global variable. Thus, we patch the instructions
* in the 2 TLB miss handlers when updating the value
*/
- tlb_44x_patch_hwater_D[0] = (tlb_44x_patch_hwater_D[0] & 0xffff0000) |
- tlb_44x_hwater;
- flush_icache_range((unsigned long)&tlb_44x_patch_hwater_D[0],
- (unsigned long)&tlb_44x_patch_hwater_D[1]);
- tlb_44x_patch_hwater_I[0] = (tlb_44x_patch_hwater_I[0] & 0xffff0000) |
- tlb_44x_hwater;
- flush_icache_range((unsigned long)&tlb_44x_patch_hwater_I[0],
- (unsigned long)&tlb_44x_patch_hwater_I[1]);
+ modify_instruction_site(&patch__tlb_44x_hwater_D, 0xffff, tlb_44x_hwater);
+ modify_instruction_site(&patch__tlb_44x_hwater_I, 0xffff, tlb_44x_hwater);
}
/*
diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c
index 01b7f5107c3a..bfa503cff351 100644
--- a/arch/powerpc/mm/8xx_mmu.c
+++ b/arch/powerpc/mm/8xx_mmu.c
@@ -100,11 +100,7 @@ static void __init mmu_mapin_immr(void)
static void __init mmu_patch_cmp_limit(s32 *site, unsigned long mapped)
{
- unsigned int instr = *(unsigned int *)patch_site_addr(site);
-
- instr &= 0xffff0000;
- instr |= (unsigned long)__va(mapped) >> 16;
- patch_instruction_site(site, instr);
+ modify_instruction_site(site, 0xffff, (unsigned long)__va(mapped) >> 16);
}
unsigned long __init mmu_mapin_ram(unsigned long top)
@@ -175,12 +171,12 @@ void set_context(unsigned long id, pgd_t *pgd)
*(ptr + 1) = pgd;
#endif
- /* Register M_TW will contain base address of level 1 table minus the
+ /* Register M_TWB will contain base address of level 1 table minus the
* lower part of the kernel PGDIR base address, so that all accesses to
* level 1 table are done relative to lower part of kernel PGDIR base
* address.
*/
- mtspr(SPRN_M_TW, __pa(pgd) - offset);
+ mtspr(SPRN_M_TWB, __pa(pgd) - offset);
/* Update context */
mtspr(SPRN_M_CASID, id - 1);
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index ca96e7be4d0e..f965fc33a8b7 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -15,10 +15,13 @@ obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o
hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o
obj-$(CONFIG_PPC_BOOK3E_64) += pgtable-book3e.o
-obj-$(CONFIG_PPC_BOOK3S_64) += pgtable-hash64.o hash_utils_64.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o
+obj-$(CONFIG_PPC_BOOK3S_64) += pgtable-hash64.o hash_utils_64.o slb.o \
+ $(hash64-y) mmu_context_book3s64.o \
+ pgtable-book3s64.o pgtable-frag.o
+obj-$(CONFIG_PPC32) += pgtable-frag.o
obj-$(CONFIG_PPC_RADIX_MMU) += pgtable-radix.o tlb-radix.o
-obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o
-obj-$(CONFIG_PPC_STD_MMU) += tlb_hash$(BITS).o
+obj-$(CONFIG_PPC_BOOK3S_32) += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o
+obj-$(CONFIG_PPC_BOOK3S) += tlb_hash$(BITS).o
ifdef CONFIG_PPC_BOOK3S_64
obj-$(CONFIG_PPC_4K_PAGES) += hash64_4k.o
obj-$(CONFIG_PPC_64K_PAGES) += hash64_64k.o
@@ -47,7 +50,7 @@ ifdef CONFIG_PPC_PTDUMP
obj-$(CONFIG_4xx) += dump_linuxpagetables-generic.o
obj-$(CONFIG_PPC_8xx) += dump_linuxpagetables-8xx.o
obj-$(CONFIG_PPC_BOOK3E_MMU) += dump_linuxpagetables-generic.o
-obj-$(CONFIG_PPC_BOOK3S_32) += dump_linuxpagetables-generic.o
+obj-$(CONFIG_PPC_BOOK3S_32) += dump_linuxpagetables-generic.o dump_bats.o dump_sr.o
obj-$(CONFIG_PPC_BOOK3S_64) += dump_linuxpagetables-book3s64.o
endif
obj-$(CONFIG_PPC_HTDUMP) += dump_hashpagetable.o
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
index b6e7b5952ab5..e955539686a4 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -29,7 +29,7 @@
#include <linux/string.h>
#include <linux/types.h>
#include <linux/highmem.h>
-#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
#include <linux/export.h>
#include <asm/tlbflush.h>
@@ -151,8 +151,8 @@ static struct ppc_vm_region *ppc_vm_region_find(struct ppc_vm_region *head, unsi
* Allocate DMA-coherent memory space and return both the kernel remapped
* virtual and bus address for that space.
*/
-void *
-__dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp)
+void *__dma_nommu_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
{
struct page *page;
struct ppc_vm_region *c;
@@ -223,7 +223,7 @@ __dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t
/*
* Set the "dma handle"
*/
- *handle = page_to_phys(page);
+ *dma_handle = phys_to_dma(dev, page_to_phys(page));
do {
SetPageReserved(page);
@@ -249,12 +249,12 @@ __dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t
no_page:
return NULL;
}
-EXPORT_SYMBOL(__dma_alloc_coherent);
/*
* free a page as defined by the above mapping.
*/
-void __dma_free_coherent(size_t size, void *vaddr)
+void __dma_nommu_free_coherent(struct device *dev, size_t size, void *vaddr,
+ dma_addr_t dma_handle, unsigned long attrs)
{
struct ppc_vm_region *c;
unsigned long flags, addr;
@@ -309,7 +309,6 @@ void __dma_free_coherent(size_t size, void *vaddr)
__func__, vaddr);
dump_stack();
}
-EXPORT_SYMBOL(__dma_free_coherent);
/*
* make an area consistent.
@@ -401,7 +400,7 @@ EXPORT_SYMBOL(__dma_sync_page);
/*
* Return the PFN for a given cpu virtual address returned by
- * __dma_alloc_coherent. This is used by dma_mmap_coherent()
+ * __dma_nommu_alloc_coherent. This is used by dma_mmap_coherent()
*/
unsigned long __dma_get_coherent_pfn(unsigned long cpu_addr)
{
diff --git a/arch/powerpc/mm/dump_bats.c b/arch/powerpc/mm/dump_bats.c
new file mode 100644
index 000000000000..a0d23e96e841
--- /dev/null
+++ b/arch/powerpc/mm/dump_bats.c
@@ -0,0 +1,173 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright 2018, Christophe Leroy CS S.I.
+ * <christophe.leroy@c-s.fr>
+ *
+ * This dumps the content of BATS
+ */
+
+#include <asm/debugfs.h>
+#include <asm/pgtable.h>
+#include <asm/cpu_has_feature.h>
+
+static char *pp_601(int k, int pp)
+{
+ if (pp == 0)
+ return k ? "NA" : "RWX";
+ if (pp == 1)
+ return k ? "ROX" : "RWX";
+ if (pp == 2)
+ return k ? "RWX" : "RWX";
+ return k ? "ROX" : "ROX";
+}
+
+static void bat_show_601(struct seq_file *m, int idx, u32 lower, u32 upper)
+{
+ u32 blpi = upper & 0xfffe0000;
+ u32 k = (upper >> 2) & 3;
+ u32 pp = upper & 3;
+ phys_addr_t pbn = PHYS_BAT_ADDR(lower);
+ u32 bsm = lower & 0x3ff;
+ u32 size = (bsm + 1) << 17;
+
+ seq_printf(m, "%d: ", idx);
+ if (!(lower & 0x40)) {
+ seq_puts(m, " -\n");
+ return;
+ }
+
+ seq_printf(m, "0x%08x-0x%08x ", blpi, blpi + size - 1);
+#ifdef CONFIG_PHYS_64BIT
+ seq_printf(m, "0x%016llx ", pbn);
+#else
+ seq_printf(m, "0x%08x ", pbn);
+#endif
+
+ seq_printf(m, "Kernel %s User %s", pp_601(k & 2, pp), pp_601(k & 1, pp));
+
+ if (lower & _PAGE_WRITETHRU)
+ seq_puts(m, "write through ");
+ if (lower & _PAGE_NO_CACHE)
+ seq_puts(m, "no cache ");
+ if (lower & _PAGE_COHERENT)
+ seq_puts(m, "coherent ");
+ seq_puts(m, "\n");
+}
+
+#define BAT_SHOW_601(_m, _n, _l, _u) bat_show_601(_m, _n, mfspr(_l), mfspr(_u))
+
+static int bats_show_601(struct seq_file *m, void *v)
+{
+ seq_puts(m, "---[ Block Address Translation ]---\n");
+
+ BAT_SHOW_601(m, 0, SPRN_IBAT0L, SPRN_IBAT0U);
+ BAT_SHOW_601(m, 1, SPRN_IBAT1L, SPRN_IBAT1U);
+ BAT_SHOW_601(m, 2, SPRN_IBAT2L, SPRN_IBAT2U);
+ BAT_SHOW_601(m, 3, SPRN_IBAT3L, SPRN_IBAT3U);
+
+ return 0;
+}
+
+static void bat_show_603(struct seq_file *m, int idx, u32 lower, u32 upper, bool is_d)
+{
+ u32 bepi = upper & 0xfffe0000;
+ u32 bl = (upper >> 2) & 0x7ff;
+ u32 k = upper & 3;
+ phys_addr_t brpn = PHYS_BAT_ADDR(lower);
+ u32 size = (bl + 1) << 17;
+
+ seq_printf(m, "%d: ", idx);
+ if (k == 0) {
+ seq_puts(m, " -\n");
+ return;
+ }
+
+ seq_printf(m, "0x%08x-0x%08x ", bepi, bepi + size - 1);
+#ifdef CONFIG_PHYS_64BIT
+ seq_printf(m, "0x%016llx ", brpn);
+#else
+ seq_printf(m, "0x%08x ", brpn);
+#endif
+
+ if (k == 1)
+ seq_puts(m, "User ");
+ else if (k == 2)
+ seq_puts(m, "Kernel ");
+ else
+ seq_puts(m, "Kernel/User ");
+
+ if (lower & BPP_RX)
+ seq_puts(m, is_d ? "RO " : "EXEC ");
+ else if (lower & BPP_RW)
+ seq_puts(m, is_d ? "RW " : "EXEC ");
+ else
+ seq_puts(m, is_d ? "NA " : "NX ");
+
+ if (lower & _PAGE_WRITETHRU)
+ seq_puts(m, "write through ");
+ if (lower & _PAGE_NO_CACHE)
+ seq_puts(m, "no cache ");
+ if (lower & _PAGE_COHERENT)
+ seq_puts(m, "coherent ");
+ if (lower & _PAGE_GUARDED)
+ seq_puts(m, "guarded ");
+ seq_puts(m, "\n");
+}
+
+#define BAT_SHOW_603(_m, _n, _l, _u, _d) bat_show_603(_m, _n, mfspr(_l), mfspr(_u), _d)
+
+static int bats_show_603(struct seq_file *m, void *v)
+{
+ seq_puts(m, "---[ Instruction Block Address Translation ]---\n");
+
+ BAT_SHOW_603(m, 0, SPRN_IBAT0L, SPRN_IBAT0U, false);
+ BAT_SHOW_603(m, 1, SPRN_IBAT1L, SPRN_IBAT1U, false);
+ BAT_SHOW_603(m, 2, SPRN_IBAT2L, SPRN_IBAT2U, false);
+ BAT_SHOW_603(m, 3, SPRN_IBAT3L, SPRN_IBAT3U, false);
+ if (mmu_has_feature(MMU_FTR_USE_HIGH_BATS)) {
+ BAT_SHOW_603(m, 4, SPRN_IBAT4L, SPRN_IBAT4U, false);
+ BAT_SHOW_603(m, 5, SPRN_IBAT5L, SPRN_IBAT5U, false);
+ BAT_SHOW_603(m, 6, SPRN_IBAT6L, SPRN_IBAT6U, false);
+ BAT_SHOW_603(m, 7, SPRN_IBAT7L, SPRN_IBAT7U, false);
+ }
+
+ seq_puts(m, "\n---[ Data Block Address Translation ]---\n");
+
+ BAT_SHOW_603(m, 0, SPRN_DBAT0L, SPRN_DBAT0U, true);
+ BAT_SHOW_603(m, 1, SPRN_DBAT1L, SPRN_DBAT1U, true);
+ BAT_SHOW_603(m, 2, SPRN_DBAT2L, SPRN_DBAT2U, true);
+ BAT_SHOW_603(m, 3, SPRN_DBAT3L, SPRN_DBAT3U, true);
+ if (mmu_has_feature(MMU_FTR_USE_HIGH_BATS)) {
+ BAT_SHOW_603(m, 4, SPRN_DBAT4L, SPRN_DBAT4U, true);
+ BAT_SHOW_603(m, 5, SPRN_DBAT5L, SPRN_DBAT5U, true);
+ BAT_SHOW_603(m, 6, SPRN_DBAT6L, SPRN_DBAT6U, true);
+ BAT_SHOW_603(m, 7, SPRN_DBAT7L, SPRN_DBAT7U, true);
+ }
+
+ return 0;
+}
+
+static int bats_open(struct inode *inode, struct file *file)
+{
+ if (cpu_has_feature(CPU_FTR_601))
+ return single_open(file, bats_show_601, NULL);
+
+ return single_open(file, bats_show_603, NULL);
+}
+
+static const struct file_operations bats_fops = {
+ .open = bats_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int __init bats_init(void)
+{
+ struct dentry *debugfs_file;
+
+ debugfs_file = debugfs_create_file("block_address_translation", 0400,
+ powerpc_debugfs_root, NULL, &bats_fops);
+ return debugfs_file ? 0 : -ENOMEM;
+}
+device_initcall(bats_init);
diff --git a/arch/powerpc/mm/dump_linuxpagetables-generic.c b/arch/powerpc/mm/dump_linuxpagetables-generic.c
index 1e3829ec1348..3fe98a0974c6 100644
--- a/arch/powerpc/mm/dump_linuxpagetables-generic.c
+++ b/arch/powerpc/mm/dump_linuxpagetables-generic.c
@@ -21,13 +21,11 @@ static const struct flag_info flag_array[] = {
.set = "rw",
.clear = "r ",
}, {
-#ifndef CONFIG_PPC_BOOK3S_32
.mask = _PAGE_EXEC,
.val = _PAGE_EXEC,
.set = " X ",
.clear = " ",
}, {
-#endif
.mask = _PAGE_PRESENT,
.val = _PAGE_PRESENT,
.set = "present",
diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c
index 2b74f8adf4d0..6aa41669ac1a 100644
--- a/arch/powerpc/mm/dump_linuxpagetables.c
+++ b/arch/powerpc/mm/dump_linuxpagetables.c
@@ -19,6 +19,7 @@
#include <linux/hugetlb.h>
#include <linux/io.h>
#include <linux/mm.h>
+#include <linux/highmem.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
#include <asm/fixmap.h>
diff --git a/arch/powerpc/mm/dump_sr.c b/arch/powerpc/mm/dump_sr.c
new file mode 100644
index 000000000000..501843664bb9
--- /dev/null
+++ b/arch/powerpc/mm/dump_sr.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright 2018, Christophe Leroy CS S.I.
+ * <christophe.leroy@c-s.fr>
+ *
+ * This dumps the content of Segment Registers
+ */
+
+#include <asm/debugfs.h>
+
+static void seg_show(struct seq_file *m, int i)
+{
+ u32 val = mfsrin(i << 28);
+
+ seq_printf(m, "0x%01x0000000-0x%01xfffffff ", i, i);
+ seq_printf(m, "Kern key %d ", (val >> 30) & 1);
+ seq_printf(m, "User key %d ", (val >> 29) & 1);
+ if (val & 0x80000000) {
+ seq_printf(m, "Device 0x%03x", (val >> 20) & 0x1ff);
+ seq_printf(m, "-0x%05x", val & 0xfffff);
+ } else {
+ if (val & 0x10000000)
+ seq_puts(m, "No Exec ");
+ seq_printf(m, "VSID 0x%06x", val & 0xffffff);
+ }
+ seq_puts(m, "\n");
+}
+
+static int sr_show(struct seq_file *m, void *v)
+{
+ int i;
+
+ seq_puts(m, "---[ User Segments ]---\n");
+ for (i = 0; i < TASK_SIZE >> 28; i++)
+ seg_show(m, i);
+
+ seq_puts(m, "\n---[ Kernel Segments ]---\n");
+ for (; i < 16; i++)
+ seg_show(m, i);
+
+ return 0;
+}
+
+static int sr_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, sr_show, NULL);
+}
+
+static const struct file_operations sr_fops = {
+ .open = sr_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int __init sr_init(void)
+{
+ struct dentry *debugfs_file;
+
+ debugfs_file = debugfs_create_file("segment_registers", 0400,
+ powerpc_debugfs_root, NULL, &sr_fops);
+ return debugfs_file ? 0 : -ENOMEM;
+}
+device_initcall(sr_init);
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 1697e903bbf2..a6dcfda3e11e 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -226,7 +226,9 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr,
static bool bad_kernel_fault(bool is_exec, unsigned long error_code,
unsigned long address)
{
- if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT))) {
+ /* NX faults set DSISR_PROTFAULT on the 8xx, DSISR_NOEXEC_OR_G on others */
+ if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT |
+ DSISR_PROTFAULT))) {
printk_ratelimited(KERN_CRIT "kernel tried to execute"
" exec-protected page (%lx) -"
"exploit attempt? (uid: %d)\n",
@@ -341,10 +343,21 @@ static inline void cmo_account_page_fault(void)
static inline void cmo_account_page_fault(void) { }
#endif /* CONFIG_PPC_SMLPAR */
-#ifdef CONFIG_PPC_STD_MMU
-static void sanity_check_fault(bool is_write, unsigned long error_code)
+#ifdef CONFIG_PPC_BOOK3S
+static void sanity_check_fault(bool is_write, bool is_user,
+ unsigned long error_code, unsigned long address)
{
/*
+ * Userspace trying to access kernel address, we get PROTFAULT for that.
+ */
+ if (is_user && address >= TASK_SIZE) {
+ pr_crit_ratelimited("%s[%d]: User access of kernel address (%lx) - exploit attempt? (uid: %d)\n",
+ current->comm, current->pid, address,
+ from_kuid(&init_user_ns, current_uid()));
+ return;
+ }
+
+ /*
* For hash translation mode, we should never get a
* PROTFAULT. Any update to pte to reduce access will result in us
* removing the hash page table entry, thus resulting in a DSISR_NOHPTE
@@ -373,12 +386,15 @@ static void sanity_check_fault(bool is_write, unsigned long error_code)
* For radix, we can get prot fault for autonuma case, because radix
* page table will have them marked noaccess for user.
*/
- if (!radix_enabled() && !is_write)
- WARN_ON_ONCE(error_code & DSISR_PROTFAULT);
+ if (radix_enabled() || is_write)
+ return;
+
+ WARN_ON_ONCE(error_code & DSISR_PROTFAULT);
}
#else
-static void sanity_check_fault(bool is_write, unsigned long error_code) { }
-#endif /* CONFIG_PPC_STD_MMU */
+static void sanity_check_fault(bool is_write, bool is_user,
+ unsigned long error_code, unsigned long address) { }
+#endif /* CONFIG_PPC_BOOK3S */
/*
* Define the correct "is_write" bit in error_code based
@@ -435,7 +451,7 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
}
/* Additional sanity check(s) */
- sanity_check_fault(is_write, error_code);
+ sanity_check_fault(is_write, is_user, error_code, address);
/*
* The kernel should never take an execute fault nor should it
@@ -636,21 +652,23 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
switch (TRAP(regs)) {
case 0x300:
case 0x380:
- printk(KERN_ALERT "Unable to handle kernel paging request for "
- "data at address 0x%08lx\n", regs->dar);
+ case 0xe00:
+ pr_alert("BUG: %s at 0x%08lx\n",
+ regs->dar < PAGE_SIZE ? "Kernel NULL pointer dereference" :
+ "Unable to handle kernel data access", regs->dar);
break;
case 0x400:
case 0x480:
- printk(KERN_ALERT "Unable to handle kernel paging request for "
- "instruction fetch\n");
+ pr_alert("BUG: Unable to handle kernel instruction fetch%s",
+ regs->nip < PAGE_SIZE ? " (NULL pointer?)\n" : "\n");
break;
case 0x600:
- printk(KERN_ALERT "Unable to handle kernel paging request for "
- "unaligned access at address 0x%08lx\n", regs->dar);
+ pr_alert("BUG: Unable to handle kernel unaligned access at 0x%08lx\n",
+ regs->dar);
break;
default:
- printk(KERN_ALERT "Unable to handle kernel paging request for "
- "unknown fault\n");
+ pr_alert("BUG: Unable to handle unknown paging fault at 0x%08lx\n",
+ regs->dar);
break;
}
printk(KERN_ALERT "Faulting instruction address: 0x%08lx\n",
diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S
index 26acf6c8c20c..1e2df3e9f9ea 100644
--- a/arch/powerpc/mm/hash_low_32.S
+++ b/arch/powerpc/mm/hash_low_32.S
@@ -28,6 +28,7 @@
#include <asm/asm-offsets.h>
#include <asm/export.h>
#include <asm/feature-fixups.h>
+#include <asm/code-patching-asm.h>
#ifdef CONFIG_SMP
.section .bss
@@ -337,11 +338,13 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
rlwimi r5,r4,10,26,31 /* put in API (abbrev page index) */
SET_V(r5) /* set V (valid) bit */
+ patch_site 0f, patch__hash_page_A0
+ patch_site 1f, patch__hash_page_A1
+ patch_site 2f, patch__hash_page_A2
/* Get the address of the primary PTE group in the hash table (r3) */
-_GLOBAL(hash_page_patch_A)
- addis r0,r7,Hash_base@h /* base address of hash table */
- rlwimi r0,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */
- rlwinm r3,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
+0: addis r0,r7,Hash_base@h /* base address of hash table */
+1: rlwimi r0,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */
+2: rlwinm r3,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
xor r3,r3,r0 /* make primary hash */
li r0,8 /* PTEs/group */
@@ -366,10 +369,10 @@ _GLOBAL(hash_page_patch_A)
bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */
beq+ found_slot
+ patch_site 0f, patch__hash_page_B
/* Search the secondary PTEG for a matching PTE */
ori r5,r5,PTE_H /* set H (secondary hash) bit */
-_GLOBAL(hash_page_patch_B)
- xoris r4,r3,Hash_msk>>16 /* compute secondary hash */
+0: xoris r4,r3,Hash_msk>>16 /* compute secondary hash */
xori r4,r4,(-PTEG_SIZE & 0xffff)
addi r4,r4,-HPTE_SIZE
mtctr r0
@@ -393,10 +396,10 @@ _GLOBAL(hash_page_patch_B)
addi r6,r6,1
stw r6,primary_pteg_full@l(r4)
+ patch_site 0f, patch__hash_page_C
/* Search the secondary PTEG for an empty slot */
ori r5,r5,PTE_H /* set H (secondary hash) bit */
-_GLOBAL(hash_page_patch_C)
- xoris r4,r3,Hash_msk>>16 /* compute secondary hash */
+0: xoris r4,r3,Hash_msk>>16 /* compute secondary hash */
xori r4,r4,(-PTEG_SIZE & 0xffff)
addi r4,r4,-HPTE_SIZE
mtctr r0
@@ -577,11 +580,13 @@ _GLOBAL(flush_hash_pages)
stwcx. r8,0,r5 /* update the pte */
bne- 33b
+ patch_site 0f, patch__flush_hash_A0
+ patch_site 1f, patch__flush_hash_A1
+ patch_site 2f, patch__flush_hash_A2
/* Get the address of the primary PTE group in the hash table (r3) */
-_GLOBAL(flush_hash_patch_A)
- addis r8,r7,Hash_base@h /* base address of hash table */
- rlwimi r8,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */
- rlwinm r0,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
+0: addis r8,r7,Hash_base@h /* base address of hash table */
+1: rlwimi r8,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */
+2: rlwinm r0,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
xor r8,r0,r8 /* make primary hash */
/* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */
@@ -593,11 +598,11 @@ _GLOBAL(flush_hash_patch_A)
bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */
beq+ 3f
+ patch_site 0f, patch__flush_hash_B
/* Search the secondary PTEG for a matching PTE */
ori r11,r11,PTE_H /* set H (secondary hash) bit */
li r0,8 /* PTEs/group */
-_GLOBAL(flush_hash_patch_B)
- xoris r12,r8,Hash_msk>>16 /* compute secondary hash */
+0: xoris r12,r8,Hash_msk>>16 /* compute secondary hash */
xori r12,r12,(-PTEG_SIZE & 0xffff)
addi r12,r12,-HPTE_SIZE
mtctr r0
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 8cf035e68378..9e732bb2c84a 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -42,6 +42,8 @@ EXPORT_SYMBOL(HPAGE_SHIFT);
#define hugepd_none(hpd) (hpd_val(hpd) == 0)
+#define PTE_T_ORDER (__builtin_ffs(sizeof(pte_t)) - __builtin_ffs(sizeof(void *)))
+
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz)
{
/*
@@ -61,14 +63,17 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
int num_hugepd;
if (pshift >= pdshift) {
- cachep = hugepte_cache;
+ cachep = PGT_CACHE(PTE_T_ORDER);
num_hugepd = 1 << (pshift - pdshift);
+ } else if (IS_ENABLED(CONFIG_PPC_8xx)) {
+ cachep = PGT_CACHE(PTE_INDEX_SIZE);
+ num_hugepd = 1;
} else {
cachep = PGT_CACHE(pdshift - pshift);
num_hugepd = 1;
}
- new = kmem_cache_zalloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL));
+ new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL));
BUG_ON(pshift > HUGEPD_SHIFT_MASK);
BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);
@@ -264,7 +269,7 @@ static void hugepd_free_rcu_callback(struct rcu_head *head)
unsigned int i;
for (i = 0; i < batch->index; i++)
- kmem_cache_free(hugepte_cache, batch->ptes[i]);
+ kmem_cache_free(PGT_CACHE(PTE_T_ORDER), batch->ptes[i]);
free_page((unsigned long)batch);
}
@@ -277,7 +282,7 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte)
if (atomic_read(&tlb->mm->mm_users) < 2 ||
mm_is_thread_local(tlb->mm)) {
- kmem_cache_free(hugepte_cache, hugepte);
+ kmem_cache_free(PGT_CACHE(PTE_T_ORDER), hugepte);
put_cpu_var(hugepd_freelist_cur);
return;
}
@@ -289,7 +294,7 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte)
(*batchp)->ptes[(*batchp)->index++] = hugepte;
if ((*batchp)->index == HUGEPD_FREELIST_SIZE) {
- call_rcu_sched(&(*batchp)->rcu, hugepd_free_rcu_callback);
+ call_rcu(&(*batchp)->rcu, hugepd_free_rcu_callback);
*batchp = NULL;
}
put_cpu_var(hugepd_freelist_cur);
@@ -329,6 +334,9 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif
if (shift >= pdshift)
hugepd_free(tlb, hugepte);
+ else if (IS_ENABLED(CONFIG_PPC_8xx))
+ pgtable_free_tlb(tlb, hugepte,
+ get_hugepd_cache_index(PTE_INDEX_SIZE));
else
pgtable_free_tlb(tlb, hugepte,
get_hugepd_cache_index(pdshift - shift));
@@ -652,7 +660,6 @@ static int __init hugepage_setup_sz(char *str)
}
__setup("hugepagesz=", hugepage_setup_sz);
-struct kmem_cache *hugepte_cache;
static int __init hugetlbpage_init(void)
{
int psize;
@@ -699,24 +706,13 @@ static int __init hugetlbpage_init(void)
* if we have pdshift and shift value same, we don't
* use pgt cache for hugepd.
*/
- if (pdshift > shift)
- pgtable_cache_add(pdshift - shift, NULL);
+ if (pdshift > shift && IS_ENABLED(CONFIG_PPC_8xx))
+ pgtable_cache_add(PTE_INDEX_SIZE);
+ else if (pdshift > shift)
+ pgtable_cache_add(pdshift - shift);
#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
- else if (!hugepte_cache) {
- /*
- * Create a kmem cache for hugeptes. The bottom bits in
- * the pte have size information encoded in them, so
- * align them to allow this
- */
- hugepte_cache = kmem_cache_create("hugepte-cache",
- sizeof(pte_t),
- HUGEPD_SHIFT_MASK + 1,
- 0, NULL);
- if (hugepte_cache == NULL)
- panic("%s: Unable to create kmem cache "
- "for hugeptes\n", __func__);
-
- }
+ else
+ pgtable_cache_add(PTE_T_ORDER);
#endif
}
diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c
index 2b656e67f2ea..1e6910eb70ed 100644
--- a/arch/powerpc/mm/init-common.c
+++ b/arch/powerpc/mm/init-common.c
@@ -25,22 +25,40 @@
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
-static void pgd_ctor(void *addr)
-{
- memset(addr, 0, PGD_TABLE_SIZE);
+#define CTOR(shift) static void ctor_##shift(void *addr) \
+{ \
+ memset(addr, 0, sizeof(void *) << (shift)); \
}
-static void pud_ctor(void *addr)
-{
- memset(addr, 0, PUD_TABLE_SIZE);
-}
+CTOR(0); CTOR(1); CTOR(2); CTOR(3); CTOR(4); CTOR(5); CTOR(6); CTOR(7);
+CTOR(8); CTOR(9); CTOR(10); CTOR(11); CTOR(12); CTOR(13); CTOR(14); CTOR(15);
-static void pmd_ctor(void *addr)
+static inline void (*ctor(int shift))(void *)
{
- memset(addr, 0, PMD_TABLE_SIZE);
+ BUILD_BUG_ON(MAX_PGTABLE_INDEX_SIZE != 15);
+
+ switch (shift) {
+ case 0: return ctor_0;
+ case 1: return ctor_1;
+ case 2: return ctor_2;
+ case 3: return ctor_3;
+ case 4: return ctor_4;
+ case 5: return ctor_5;
+ case 6: return ctor_6;
+ case 7: return ctor_7;
+ case 8: return ctor_8;
+ case 9: return ctor_9;
+ case 10: return ctor_10;
+ case 11: return ctor_11;
+ case 12: return ctor_12;
+ case 13: return ctor_13;
+ case 14: return ctor_14;
+ case 15: return ctor_15;
+ }
+ return NULL;
}
-struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE];
+struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE + 1];
EXPORT_SYMBOL_GPL(pgtable_cache); /* used by kvm_hv module */
/*
@@ -50,7 +68,7 @@ EXPORT_SYMBOL_GPL(pgtable_cache); /* used by kvm_hv module */
* everything else. Caches created by this function are used for all
* the higher level pagetables, and for hugepage pagetables.
*/
-void pgtable_cache_add(unsigned shift, void (*ctor)(void *))
+void pgtable_cache_add(unsigned int shift)
{
char *name;
unsigned long table_size = sizeof(void *) << shift;
@@ -71,19 +89,19 @@ void pgtable_cache_add(unsigned shift, void (*ctor)(void *))
* moment, gcc doesn't seem to recognize is_power_of_2 as a
* constant expression, so so much for that. */
BUG_ON(!is_power_of_2(minalign));
- BUG_ON((shift < 1) || (shift > MAX_PGTABLE_INDEX_SIZE));
+ BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
if (PGT_CACHE(shift))
return; /* Already have a cache of this size */
align = max_t(unsigned long, align, minalign);
name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift);
- new = kmem_cache_create(name, table_size, align, 0, ctor);
+ new = kmem_cache_create(name, table_size, align, 0, ctor(shift));
if (!new)
panic("Could not allocate pgtable cache for order %d", shift);
kfree(name);
- pgtable_cache[shift - 1] = new;
+ pgtable_cache[shift] = new;
pr_debug("Allocated pgtable cache for order %d\n", shift);
}
@@ -91,15 +109,15 @@ EXPORT_SYMBOL_GPL(pgtable_cache_add); /* used by kvm_hv module */
void pgtable_cache_init(void)
{
- pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor);
+ pgtable_cache_add(PGD_INDEX_SIZE);
- if (PMD_CACHE_INDEX && !PGT_CACHE(PMD_CACHE_INDEX))
- pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor);
+ if (PMD_CACHE_INDEX)
+ pgtable_cache_add(PMD_CACHE_INDEX);
/*
* In all current configs, when the PUD index exists it's the
* same size as either the pgd or pmd index except with THP enabled
* on book3s 64
*/
- if (PUD_CACHE_INDEX && !PGT_CACHE(PUD_CACHE_INDEX))
- pgtable_cache_add(PUD_CACHE_INDEX, pud_ctor);
+ if (PUD_CACHE_INDEX)
+ pgtable_cache_add(PUD_CACHE_INDEX);
}
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 7a9886f98b0c..a5091c034747 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -188,15 +188,20 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node);
for (; start < end; start += page_size) {
- void *p;
+ void *p = NULL;
int rc;
if (vmemmap_populated(start, page_size))
continue;
+ /*
+ * Allocate from the altmap first if we have one. This may
+ * fail due to alignment issues when using 16MB hugepages, so
+ * fall back to system memory if the altmap allocation fail.
+ */
if (altmap)
p = altmap_alloc_block_buf(page_size, altmap);
- else
+ if (!p)
p = vmemmap_alloc_block_buf(page_size, node);
if (!p)
return -ENOMEM;
@@ -255,8 +260,15 @@ void __ref vmemmap_free(unsigned long start, unsigned long end,
{
unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
unsigned long page_order = get_order(page_size);
+ unsigned long alt_start = ~0, alt_end = ~0;
+ unsigned long base_pfn;
start = _ALIGN_DOWN(start, page_size);
+ if (altmap) {
+ alt_start = altmap->base_pfn;
+ alt_end = altmap->base_pfn + altmap->reserve +
+ altmap->free + altmap->alloc + altmap->align;
+ }
pr_debug("vmemmap_free %lx...%lx\n", start, end);
@@ -280,8 +292,9 @@ void __ref vmemmap_free(unsigned long start, unsigned long end,
page = pfn_to_page(addr >> PAGE_SHIFT);
section_base = pfn_to_page(vmemmap_section_start(start));
nr_pages = 1 << page_order;
+ base_pfn = PHYS_PFN(addr);
- if (altmap) {
+ if (base_pfn >= alt_start && base_pfn < alt_end) {
vmem_altmap_free(altmap, nr_pages);
} else if (PageReserved(page)) {
/* allocated from bootmem */
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 0a64fffabee1..33cc6f676fa6 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -139,7 +139,8 @@ int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *
}
#ifdef CONFIG_MEMORY_HOTREMOVE
-int __meminit arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
+int __meminit arch_remove_memory(int nid, u64 start, u64 size,
+ struct vmem_altmap *altmap)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
@@ -246,35 +247,19 @@ static int __init mark_nonram_nosave(void)
}
#endif
-static bool zone_limits_final;
-
/*
- * The memory zones past TOP_ZONE are managed by generic mm code.
- * These should be set to zero since that's what every other
- * architecture does.
+ * Zones usage:
+ *
+ * We setup ZONE_DMA to be 31-bits on all platforms and ZONE_NORMAL to be
+ * everything else. GFP_DMA32 page allocations automatically fall back to
+ * ZONE_DMA.
+ *
+ * By using 31-bit unconditionally, we can exploit ARCH_ZONE_DMA_BITS to
+ * inform the generic DMA mapping code. 32-bit only devices (if not handled
+ * by an IOMMU anyway) will take a first dip into ZONE_NORMAL and get
+ * otherwise served by ZONE_DMA.
*/
-static unsigned long max_zone_pfns[MAX_NR_ZONES] = {
- [0 ... TOP_ZONE ] = ~0UL,
- [TOP_ZONE + 1 ... MAX_NR_ZONES - 1] = 0
-};
-
-/*
- * Restrict the specified zone and all more restrictive zones
- * to be below the specified pfn. May not be called after
- * paging_init().
- */
-void __init limit_zone_pfn(enum zone_type zone, unsigned long pfn_limit)
-{
- int i;
-
- if (WARN_ON(zone_limits_final))
- return;
-
- for (i = zone; i >= 0; i--) {
- if (max_zone_pfns[i] > pfn_limit)
- max_zone_pfns[i] = pfn_limit;
- }
-}
+static unsigned long max_zone_pfns[MAX_NR_ZONES];
/*
* Find the least restrictive zone that is entirely below the
@@ -324,11 +309,14 @@ void __init paging_init(void)
printk(KERN_DEBUG "Memory hole size: %ldMB\n",
(long int)((top_of_ram - total_ram) >> 20));
+#ifdef CONFIG_ZONE_DMA
+ max_zone_pfns[ZONE_DMA] = min(max_low_pfn, 0x7fffffffUL >> PAGE_SHIFT);
+#endif
+ max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
#ifdef CONFIG_HIGHMEM
- limit_zone_pfn(ZONE_NORMAL, lowmem_end_addr >> PAGE_SHIFT);
+ max_zone_pfns[ZONE_HIGHMEM] = max_pfn;
#endif
- limit_zone_pfn(TOP_ZONE, top_of_ram >> PAGE_SHIFT);
- zone_limits_final = true;
+
free_area_init_nodes(max_zone_pfns);
mark_nonram_nosave();
@@ -503,7 +491,7 @@ EXPORT_SYMBOL(flush_icache_user_range);
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
pte_t *ptep)
{
-#ifdef CONFIG_PPC_STD_MMU
+#ifdef CONFIG_PPC_BOOK3S
/*
* We don't need to worry about _PAGE_PRESENT here because we are
* called with either mm->page_table_lock held or ptl lock held
@@ -541,7 +529,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
}
hash_preload(vma->vm_mm, address, is_exec, trap);
-#endif /* CONFIG_PPC_STD_MMU */
+#endif /* CONFIG_PPC_BOOK3S */
#if (defined(CONFIG_PPC_BOOK3E_64) || defined(CONFIG_PPC_FSL_BOOK3E)) \
&& defined(CONFIG_HUGETLB_PAGE)
if (is_vm_hugetlb_page(vma))
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
index f84e14f23e50..bb52320b7369 100644
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -15,6 +15,7 @@
#include <linux/sched/mm.h>
#include <asm/mmu_context.h>
+#include <asm/pgalloc.h>
#if defined(CONFIG_PPC32)
static inline void switch_mm_pgdir(struct task_struct *tsk,
@@ -97,3 +98,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
switch_mmu_context(prev, next, tsk);
}
+#ifdef CONFIG_PPC32
+void arch_exit_mmap(struct mm_struct *mm)
+{
+ void *frag = pte_frag_get(&mm->context);
+
+ if (frag)
+ pte_frag_destroy(frag);
+}
+#endif
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index 510f103d7813..f720c5cc0b5e 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -164,21 +164,6 @@ static void destroy_contexts(mm_context_t *ctx)
}
}
-static void pte_frag_destroy(void *pte_frag)
-{
- int count;
- struct page *page;
-
- page = virt_to_page(pte_frag);
- /* drop all the pending references */
- count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
- /* We allow PTE_FRAG_NR fragments from a PTE page */
- if (atomic_sub_and_test(PTE_FRAG_NR - count, &page->pt_frag_refcount)) {
- pgtable_page_dtor(page);
- __free_page(page);
- }
-}
-
static void pmd_frag_destroy(void *pmd_frag)
{
int count;
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index 56c2234cc6ae..a712a650a8b6 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -36,6 +36,8 @@ struct mm_iommu_table_group_mem_t {
u64 ua; /* userspace address */
u64 entries; /* number of entries in hpas[] */
u64 *hpas; /* vmalloc'ed */
+#define MM_IOMMU_TABLE_INVALID_HPA ((uint64_t)-1)
+ u64 dev_hpa; /* Device memory base address */
};
static long mm_iommu_adjust_locked_vm(struct mm_struct *mm,
@@ -126,7 +128,8 @@ static int mm_iommu_move_page_from_cma(struct page *page)
return 0;
}
-long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
+static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
+ unsigned long entries, unsigned long dev_hpa,
struct mm_iommu_table_group_mem_t **pmem)
{
struct mm_iommu_table_group_mem_t *mem;
@@ -140,12 +143,6 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list,
next) {
- if ((mem->ua == ua) && (mem->entries == entries)) {
- ++mem->used;
- *pmem = mem;
- goto unlock_exit;
- }
-
/* Overlap? */
if ((mem->ua < (ua + (entries << PAGE_SHIFT))) &&
(ua < (mem->ua +
@@ -156,11 +153,13 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
}
- ret = mm_iommu_adjust_locked_vm(mm, entries, true);
- if (ret)
- goto unlock_exit;
+ if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) {
+ ret = mm_iommu_adjust_locked_vm(mm, entries, true);
+ if (ret)
+ goto unlock_exit;
- locked_entries = entries;
+ locked_entries = entries;
+ }
mem = kzalloc(sizeof(*mem), GFP_KERNEL);
if (!mem) {
@@ -168,6 +167,13 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
goto unlock_exit;
}
+ if (dev_hpa != MM_IOMMU_TABLE_INVALID_HPA) {
+ mem->pageshift = __ffs(dev_hpa | (entries << PAGE_SHIFT));
+ mem->dev_hpa = dev_hpa;
+ goto good_exit;
+ }
+ mem->dev_hpa = MM_IOMMU_TABLE_INVALID_HPA;
+
/*
* For a starting point for a maximum page size calculation
* we use @ua and @entries natural alignment to allow IOMMU pages
@@ -236,6 +242,7 @@ populate:
mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
}
+good_exit:
atomic64_set(&mem->mapped, 1);
mem->used = 1;
mem->ua = ua;
@@ -252,13 +259,31 @@ unlock_exit:
return ret;
}
-EXPORT_SYMBOL_GPL(mm_iommu_get);
+
+long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
+ struct mm_iommu_table_group_mem_t **pmem)
+{
+ return mm_iommu_do_alloc(mm, ua, entries, MM_IOMMU_TABLE_INVALID_HPA,
+ pmem);
+}
+EXPORT_SYMBOL_GPL(mm_iommu_new);
+
+long mm_iommu_newdev(struct mm_struct *mm, unsigned long ua,
+ unsigned long entries, unsigned long dev_hpa,
+ struct mm_iommu_table_group_mem_t **pmem)
+{
+ return mm_iommu_do_alloc(mm, ua, entries, dev_hpa, pmem);
+}
+EXPORT_SYMBOL_GPL(mm_iommu_newdev);
static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem)
{
long i;
struct page *page = NULL;
+ if (!mem->hpas)
+ return;
+
for (i = 0; i < mem->entries; ++i) {
if (!mem->hpas[i])
continue;
@@ -300,6 +325,7 @@ static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem)
long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
{
long ret = 0;
+ unsigned long entries, dev_hpa;
mutex_lock(&mem_list_mutex);
@@ -321,9 +347,12 @@ long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
}
/* @mapped became 0 so now mappings are disabled, release the region */
+ entries = mem->entries;
+ dev_hpa = mem->dev_hpa;
mm_iommu_release(mem);
- mm_iommu_adjust_locked_vm(mm, mem->entries, false);
+ if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
+ mm_iommu_adjust_locked_vm(mm, entries, false);
unlock_exit:
mutex_unlock(&mem_list_mutex);
@@ -368,27 +397,32 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm,
return ret;
}
-struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
+struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm,
unsigned long ua, unsigned long entries)
{
struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
+ mutex_lock(&mem_list_mutex);
+
list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
if ((mem->ua == ua) && (mem->entries == entries)) {
ret = mem;
+ ++mem->used;
break;
}
}
+ mutex_unlock(&mem_list_mutex);
+
return ret;
}
-EXPORT_SYMBOL_GPL(mm_iommu_find);
+EXPORT_SYMBOL_GPL(mm_iommu_get);
long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
unsigned long ua, unsigned int pageshift, unsigned long *hpa)
{
const long entry = (ua - mem->ua) >> PAGE_SHIFT;
- u64 *va = &mem->hpas[entry];
+ u64 *va;
if (entry >= mem->entries)
return -EFAULT;
@@ -396,6 +430,12 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
if (pageshift > mem->pageshift)
return -EFAULT;
+ if (!mem->hpas) {
+ *hpa = mem->dev_hpa + (ua - mem->ua);
+ return 0;
+ }
+
+ va = &mem->hpas[entry];
*hpa = (*va & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);
return 0;
@@ -406,7 +446,6 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
unsigned long ua, unsigned int pageshift, unsigned long *hpa)
{
const long entry = (ua - mem->ua) >> PAGE_SHIFT;
- void *va = &mem->hpas[entry];
unsigned long *pa;
if (entry >= mem->entries)
@@ -415,7 +454,12 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
if (pageshift > mem->pageshift)
return -EFAULT;
- pa = (void *) vmalloc_to_phys(va);
+ if (!mem->hpas) {
+ *hpa = mem->dev_hpa + (ua - mem->ua);
+ return 0;
+ }
+
+ pa = (void *) vmalloc_to_phys(&mem->hpas[entry]);
if (!pa)
return -EFAULT;
@@ -435,6 +479,9 @@ extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua)
if (!mem)
return;
+ if (mem->dev_hpa != MM_IOMMU_TABLE_INVALID_HPA)
+ return;
+
entry = (ua - mem->ua) >> PAGE_SHIFT;
va = &mem->hpas[entry];
@@ -445,6 +492,33 @@ extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua)
*pa |= MM_IOMMU_TABLE_GROUP_PAGE_DIRTY;
}
+bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
+ unsigned int pageshift, unsigned long *size)
+{
+ struct mm_iommu_table_group_mem_t *mem;
+ unsigned long end;
+
+ list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
+ if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
+ continue;
+
+ end = mem->dev_hpa + (mem->entries << PAGE_SHIFT);
+ if ((mem->dev_hpa <= hpa) && (hpa < end)) {
+ /*
+ * Since the IOMMU page size might be bigger than
+ * PAGE_SIZE, the amount of preregistered memory
+ * starting from @hpa might be smaller than 1<<pageshift
+ * and the caller needs to distinguish this situation.
+ */
+ *size = min(1UL << pageshift, end - hpa);
+ return true;
+ }
+ }
+
+ return false;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_is_devmem);
+
long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem)
{
if (atomic64_inc_not_zero(&mem->mapped))
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 2faca46ad720..22d71a58167f 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -372,7 +372,6 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm)
{
pr_hard("initing context for mm @%p\n", mm);
-#ifdef CONFIG_PPC_MM_SLICES
/*
* We have MMU_NO_CONTEXT set to be ~0. Hence check
* explicitly against context.id == 0. This ensures that we properly
@@ -382,9 +381,9 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm)
*/
if (mm->context.id == 0)
slice_init_new_context_exec(mm);
-#endif
mm->context.id = MMU_NO_CONTEXT;
mm->context.active = 0;
+ pte_frag_set(&mm->context, NULL);
return 0;
}
@@ -487,4 +486,3 @@ void __init mmu_context_init(void)
next_context = FIRST_CONTEXT;
nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT + 1;
}
-
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 8574fbbc45e0..c4a717da65eb 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -155,7 +155,7 @@ struct tlbcam {
};
#endif
-#if defined(CONFIG_6xx) || defined(CONFIG_FSL_BOOKE) || defined(CONFIG_PPC_8xx)
+#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_FSL_BOOKE) || defined(CONFIG_PPC_8xx)
/* 6xx have BATS */
/* FSL_BOOKE have TLBCAM */
/* 8xx have LTLB */
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 3a048e98a132..87f0dd004295 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1178,7 +1178,7 @@ static long vphn_get_associativity(unsigned long cpu,
switch (rc) {
case H_FUNCTION:
- printk(KERN_INFO
+ printk_once(KERN_INFO
"VPHN is not supported. Disabling polling...\n");
stop_topology_update();
break;
@@ -1475,7 +1475,7 @@ static int dt_update_callback(struct notifier_block *nb,
switch (action) {
case OF_RECONFIG_UPDATE_PROPERTY:
- if (!of_prop_cmp(update->dn->type, "cpu") &&
+ if (of_node_is_type(update->dn, "cpu") &&
!of_prop_cmp(update->prop->name, "ibm,associativity")) {
u32 core_id;
of_property_read_u32(update->dn, "reg", &core_id);
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 9f93c9f985c5..f3c31f5e1026 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -244,6 +244,9 @@ static pmd_t *get_pmd_from_cache(struct mm_struct *mm)
{
void *pmd_frag, *ret;
+ if (PMD_FRAG_NR == 1)
+ return NULL;
+
spin_lock(&mm->page_table_lock);
ret = mm->context.pmd_frag;
if (ret) {
@@ -322,91 +325,6 @@ void pmd_fragment_free(unsigned long *pmd)
}
}
-static pte_t *get_pte_from_cache(struct mm_struct *mm)
-{
- void *pte_frag, *ret;
-
- spin_lock(&mm->page_table_lock);
- ret = mm->context.pte_frag;
- if (ret) {
- pte_frag = ret + PTE_FRAG_SIZE;
- /*
- * If we have taken up all the fragments mark PTE page NULL
- */
- if (((unsigned long)pte_frag & ~PAGE_MASK) == 0)
- pte_frag = NULL;
- mm->context.pte_frag = pte_frag;
- }
- spin_unlock(&mm->page_table_lock);
- return (pte_t *)ret;
-}
-
-static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
-{
- void *ret = NULL;
- struct page *page;
-
- if (!kernel) {
- page = alloc_page(PGALLOC_GFP | __GFP_ACCOUNT);
- if (!page)
- return NULL;
- if (!pgtable_page_ctor(page)) {
- __free_page(page);
- return NULL;
- }
- } else {
- page = alloc_page(PGALLOC_GFP);
- if (!page)
- return NULL;
- }
-
- atomic_set(&page->pt_frag_refcount, 1);
-
- ret = page_address(page);
- /*
- * if we support only one fragment just return the
- * allocated page.
- */
- if (PTE_FRAG_NR == 1)
- return ret;
- spin_lock(&mm->page_table_lock);
- /*
- * If we find pgtable_page set, we return
- * the allocated page with single fragement
- * count.
- */
- if (likely(!mm->context.pte_frag)) {
- atomic_set(&page->pt_frag_refcount, PTE_FRAG_NR);
- mm->context.pte_frag = ret + PTE_FRAG_SIZE;
- }
- spin_unlock(&mm->page_table_lock);
-
- return (pte_t *)ret;
-}
-
-pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
-{
- pte_t *pte;
-
- pte = get_pte_from_cache(mm);
- if (pte)
- return pte;
-
- return __alloc_for_ptecache(mm, kernel);
-}
-
-void pte_fragment_free(unsigned long *table, int kernel)
-{
- struct page *page = virt_to_page(table);
-
- BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
- if (atomic_dec_and_test(&page->pt_frag_refcount)) {
- if (!kernel)
- pgtable_page_dtor(page);
- __free_page(page);
- }
-}
-
static inline void pgtable_free(void *table, int index)
{
switch (index) {
diff --git a/arch/powerpc/mm/pgtable-frag.c b/arch/powerpc/mm/pgtable-frag.c
new file mode 100644
index 000000000000..af23a587f019
--- /dev/null
+++ b/arch/powerpc/mm/pgtable-frag.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Handling Page Tables through page fragments
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/mm.h>
+#include <linux/percpu.h>
+#include <linux/hardirq.h>
+#include <linux/hugetlb.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+#include <asm/tlb.h>
+
+void pte_frag_destroy(void *pte_frag)
+{
+ int count;
+ struct page *page;
+
+ page = virt_to_page(pte_frag);
+ /* drop all the pending references */
+ count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
+ /* We allow PTE_FRAG_NR fragments from a PTE page */
+ if (atomic_sub_and_test(PTE_FRAG_NR - count, &page->pt_frag_refcount)) {
+ pgtable_page_dtor(page);
+ __free_page(page);
+ }
+}
+
+static pte_t *get_pte_from_cache(struct mm_struct *mm)
+{
+ void *pte_frag, *ret;
+
+ if (PTE_FRAG_NR == 1)
+ return NULL;
+
+ spin_lock(&mm->page_table_lock);
+ ret = pte_frag_get(&mm->context);
+ if (ret) {
+ pte_frag = ret + PTE_FRAG_SIZE;
+ /*
+ * If we have taken up all the fragments mark PTE page NULL
+ */
+ if (((unsigned long)pte_frag & ~PAGE_MASK) == 0)
+ pte_frag = NULL;
+ pte_frag_set(&mm->context, pte_frag);
+ }
+ spin_unlock(&mm->page_table_lock);
+ return (pte_t *)ret;
+}
+
+static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
+{
+ void *ret = NULL;
+ struct page *page;
+
+ if (!kernel) {
+ page = alloc_page(PGALLOC_GFP | __GFP_ACCOUNT);
+ if (!page)
+ return NULL;
+ if (!pgtable_page_ctor(page)) {
+ __free_page(page);
+ return NULL;
+ }
+ } else {
+ page = alloc_page(PGALLOC_GFP);
+ if (!page)
+ return NULL;
+ }
+
+ atomic_set(&page->pt_frag_refcount, 1);
+
+ ret = page_address(page);
+ /*
+ * if we support only one fragment just return the
+ * allocated page.
+ */
+ if (PTE_FRAG_NR == 1)
+ return ret;
+ spin_lock(&mm->page_table_lock);
+ /*
+ * If we find pgtable_page set, we return
+ * the allocated page with single fragement
+ * count.
+ */
+ if (likely(!pte_frag_get(&mm->context))) {
+ atomic_set(&page->pt_frag_refcount, PTE_FRAG_NR);
+ pte_frag_set(&mm->context, ret + PTE_FRAG_SIZE);
+ }
+ spin_unlock(&mm->page_table_lock);
+
+ return (pte_t *)ret;
+}
+
+pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
+{
+ pte_t *pte;
+
+ pte = get_pte_from_cache(mm);
+ if (pte)
+ return pte;
+
+ return __alloc_for_ptecache(mm, kernel);
+}
+
+void pte_fragment_free(unsigned long *table, int kernel)
+{
+ struct page *page = virt_to_page(table);
+
+ BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
+ if (atomic_dec_and_test(&page->pt_frag_refcount)) {
+ if (!kernel)
+ pgtable_page_dtor(page);
+ __free_page(page);
+ }
+}
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 010e1c616cb2..d3d61d29b4f1 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -74,7 +74,7 @@ static struct page *maybe_pte_to_page(pte_t pte)
* support falls into the same category.
*/
-static pte_t set_pte_filter(pte_t pte)
+static pte_t set_pte_filter_hash(pte_t pte)
{
if (radix_enabled())
return pte;
@@ -93,14 +93,12 @@ static pte_t set_pte_filter(pte_t pte)
return pte;
}
-static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
- int dirty)
-{
- return pte;
-}
-
#else /* CONFIG_PPC_BOOK3S */
+static pte_t set_pte_filter_hash(pte_t pte) { return pte; }
+
+#endif /* CONFIG_PPC_BOOK3S */
+
/* Embedded type MMU with HW exec support. This is a bit more complicated
* as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so
* instead we "filter out" the exec permission for non clean pages.
@@ -109,6 +107,9 @@ static pte_t set_pte_filter(pte_t pte)
{
struct page *pg;
+ if (mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ return set_pte_filter_hash(pte);
+
/* No exec permission in the first place, move on */
if (!pte_exec(pte) || !pte_looks_normal(pte))
return pte;
@@ -138,6 +139,9 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
{
struct page *pg;
+ if (mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ return pte;
+
/* So here, we only care about exec faults, as we use them
* to recover lost _PAGE_EXEC and perform I$/D$ coherency
* if necessary. Also if _PAGE_EXEC is already set, same deal,
@@ -172,8 +176,6 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
return pte_mkexec(pte);
}
-#endif /* CONFIG_PPC_BOOK3S */
-
/*
* set_pte stores a linux PTE into the linux page table.
*/
@@ -221,9 +223,9 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
}
#ifdef CONFIG_HUGETLB_PAGE
-extern int huge_ptep_set_access_flags(struct vm_area_struct *vma,
- unsigned long addr, pte_t *ptep,
- pte_t pte, int dirty)
+int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t pte, int dirty)
{
#ifdef HUGETLB_NEED_PRELOAD
/*
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index bda3c6f1bd32..d67215248d82 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -45,32 +45,15 @@ extern char etext[], _stext[], _sinittext[], _einittext[];
__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
- pte_t *pte;
+ if (!slab_is_available())
+ return memblock_alloc(PTE_FRAG_SIZE, PTE_FRAG_SIZE);
- if (slab_is_available()) {
- pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
- } else {
- pte = __va(memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE));
- if (pte)
- clear_page(pte);
- }
- return pte;
+ return (pte_t *)pte_fragment_alloc(mm, address, 1);
}
pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
- struct page *ptepage;
-
- gfp_t flags = GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT;
-
- ptepage = alloc_pages(flags, 0);
- if (!ptepage)
- return NULL;
- if (!pgtable_page_ctor(ptepage)) {
- __free_page(ptepage);
- return NULL;
- }
- return ptepage;
+ return (pgtable_t)pte_fragment_alloc(mm, address, 0);
}
void __iomem *
@@ -160,7 +143,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *call
* Don't allow anybody to remap normal RAM that we're using.
* mem_init() sets high_memory so only do the check after that.
*/
- if (slab_is_available() && (p < virt_to_phys(high_memory)) &&
+ if (slab_is_available() && p <= virt_to_phys(high_memory - 1) &&
page_is_ram(__phys_to_pfn(p))) {
printk("__ioremap(): phys addr 0x%llx is RAM lr %ps\n",
(unsigned long long)p, __builtin_return_address(0));
@@ -260,7 +243,7 @@ static void __init __mapin_ram_chunk(unsigned long offset, unsigned long top)
ktext = ((char *)v >= _stext && (char *)v < etext) ||
((char *)v >= _sinittext && (char *)v < _einittext);
map_kernel_page(v, p, ktext ? PAGE_KERNEL_TEXT : PAGE_KERNEL);
-#ifdef CONFIG_PPC_STD_MMU_32
+#ifdef CONFIG_PPC_BOOK3S_32
if (ktext)
hash_preload(&init_mm, v, false, 0x300);
#endif
diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c
index b271b283c785..587807763737 100644
--- a/arch/powerpc/mm/pkeys.c
+++ b/arch/powerpc/mm/pkeys.c
@@ -6,20 +6,21 @@
*/
#include <asm/mman.h>
+#include <asm/mmu_context.h>
#include <asm/setup.h>
#include <linux/pkeys.h>
#include <linux/of_device.h>
DEFINE_STATIC_KEY_TRUE(pkey_disabled);
-bool pkey_execute_disable_supported;
int pkeys_total; /* Total pkeys as per device tree */
-bool pkeys_devtree_defined; /* pkey property exported by device tree */
u32 initial_allocation_mask; /* Bits set for the initially allocated keys */
u32 reserved_allocation_mask; /* Bits set for reserved keys */
-u64 pkey_amr_mask; /* Bits in AMR not to be touched */
-u64 pkey_iamr_mask; /* Bits in AMR not to be touched */
-u64 pkey_uamor_mask; /* Bits in UMOR not to be touched */
-int execute_only_key = 2;
+static bool pkey_execute_disable_supported;
+static bool pkeys_devtree_defined; /* property exported by device tree */
+static u64 pkey_amr_mask; /* Bits in AMR not to be touched */
+static u64 pkey_iamr_mask; /* Bits in AMR not to be touched */
+static u64 pkey_uamor_mask; /* Bits in UMOR not to be touched */
+static int execute_only_key = 2;
#define AMR_BITS_PER_PKEY 2
#define AMR_RD_BIT 0x1UL
@@ -57,7 +58,7 @@ static inline bool pkey_mmu_enabled(void)
return cpu_has_feature(CPU_FTR_PKEY);
}
-int pkey_initialize(void)
+static int pkey_initialize(void)
{
int os_reserved, i;
@@ -414,3 +415,13 @@ bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write,
return pkey_access_permitted(vma_pkey(vma), write, execute);
}
+
+void arch_dup_pkeys(struct mm_struct *oldmm, struct mm_struct *mm)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return;
+
+ /* Duplicate the oldmm pkey state in mm: */
+ mm_pkey_allocation_map(mm) = mm_pkey_allocation_map(oldmm);
+ mm->context.execute_only_pkey = oldmm->context.execute_only_pkey;
+}
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index f6f575bae3bc..3f4193201ee7 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -31,6 +31,7 @@
#include <asm/prom.h>
#include <asm/mmu.h>
#include <asm/machdep.h>
+#include <asm/code-patching.h>
#include "mmu_decl.h"
@@ -52,7 +53,7 @@ struct batrange { /* stores address ranges mapped by BATs */
phys_addr_t v_block_mapped(unsigned long va)
{
int b;
- for (b = 0; b < 4; ++b)
+ for (b = 0; b < ARRAY_SIZE(bat_addrs); ++b)
if (va >= bat_addrs[b].start && va < bat_addrs[b].limit)
return bat_addrs[b].phys + (va - bat_addrs[b].start);
return 0;
@@ -64,7 +65,7 @@ phys_addr_t v_block_mapped(unsigned long va)
unsigned long p_block_mapped(phys_addr_t pa)
{
int b;
- for (b = 0; b < 4; ++b)
+ for (b = 0; b < ARRAY_SIZE(bat_addrs); ++b)
if (pa >= bat_addrs[b].phys
&& pa < (bat_addrs[b].limit-bat_addrs[b].start)
+bat_addrs[b].phys)
@@ -182,22 +183,8 @@ void __init MMU_init_hw(void)
unsigned int hmask, mb, mb2;
unsigned int n_hpteg, lg_n_hpteg;
- extern unsigned int hash_page_patch_A[];
- extern unsigned int hash_page_patch_B[], hash_page_patch_C[];
- extern unsigned int hash_page[];
- extern unsigned int flush_hash_patch_A[], flush_hash_patch_B[];
-
- if (!mmu_has_feature(MMU_FTR_HPTE_TABLE)) {
- /*
- * Put a blr (procedure return) instruction at the
- * start of hash_page, since we can still get DSI
- * exceptions on a 603.
- */
- hash_page[0] = 0x4e800020;
- flush_icache_range((unsigned long) &hash_page[0],
- (unsigned long) &hash_page[1]);
+ if (!mmu_has_feature(MMU_FTR_HPTE_TABLE))
return;
- }
if ( ppc_md.progress ) ppc_md.progress("hash:enter", 0x105);
@@ -244,31 +231,19 @@ void __init MMU_init_hw(void)
if (lg_n_hpteg > 16)
mb2 = 16 - LG_HPTEG_SIZE;
- hash_page_patch_A[0] = (hash_page_patch_A[0] & ~0xffff)
- | ((unsigned int)(Hash) >> 16);
- hash_page_patch_A[1] = (hash_page_patch_A[1] & ~0x7c0) | (mb << 6);
- hash_page_patch_A[2] = (hash_page_patch_A[2] & ~0x7c0) | (mb2 << 6);
- hash_page_patch_B[0] = (hash_page_patch_B[0] & ~0xffff) | hmask;
- hash_page_patch_C[0] = (hash_page_patch_C[0] & ~0xffff) | hmask;
-
- /*
- * Ensure that the locations we've patched have been written
- * out from the data cache and invalidated in the instruction
- * cache, on those machines with split caches.
- */
- flush_icache_range((unsigned long) &hash_page_patch_A[0],
- (unsigned long) &hash_page_patch_C[1]);
+ modify_instruction_site(&patch__hash_page_A0, 0xffff, (unsigned int)Hash >> 16);
+ modify_instruction_site(&patch__hash_page_A1, 0x7c0, mb << 6);
+ modify_instruction_site(&patch__hash_page_A2, 0x7c0, mb2 << 6);
+ modify_instruction_site(&patch__hash_page_B, 0xffff, hmask);
+ modify_instruction_site(&patch__hash_page_C, 0xffff, hmask);
/*
* Patch up the instructions in hashtable.S:flush_hash_page
*/
- flush_hash_patch_A[0] = (flush_hash_patch_A[0] & ~0xffff)
- | ((unsigned int)(Hash) >> 16);
- flush_hash_patch_A[1] = (flush_hash_patch_A[1] & ~0x7c0) | (mb << 6);
- flush_hash_patch_A[2] = (flush_hash_patch_A[2] & ~0x7c0) | (mb2 << 6);
- flush_hash_patch_B[0] = (flush_hash_patch_B[0] & ~0xffff) | hmask;
- flush_icache_range((unsigned long) &flush_hash_patch_A[0],
- (unsigned long) &flush_hash_patch_B[1]);
+ modify_instruction_site(&patch__flush_hash_A0, 0xffff, (unsigned int)Hash >> 16);
+ modify_instruction_site(&patch__flush_hash_A1, 0x7c0, mb << 6);
+ modify_instruction_site(&patch__flush_hash_A2, 0x7c0, mb2 << 6);
+ modify_instruction_site(&patch__flush_hash_B, 0xffff, hmask);
if ( ppc_md.progress ) ppc_md.progress("hash:done", 0x205);
}
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index c3fdf2969d9f..bc3914d54e26 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -19,6 +19,7 @@
#include <asm/mmu.h>
#include <asm/mmu_context.h>
#include <asm/paca.h>
+#include <asm/ppc-opcode.h>
#include <asm/cputable.h>
#include <asm/cacheflush.h>
#include <asm/smp.h>
@@ -58,27 +59,19 @@ static inline unsigned long mk_vsid_data(unsigned long ea, int ssize,
return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags);
}
-static void assert_slb_exists(unsigned long ea)
+static void assert_slb_presence(bool present, unsigned long ea)
{
#ifdef CONFIG_DEBUG_VM
unsigned long tmp;
WARN_ON_ONCE(mfmsr() & MSR_EE);
- asm volatile("slbfee. %0, %1" : "=r"(tmp) : "r"(ea) : "cr0");
- WARN_ON(tmp == 0);
-#endif
-}
-
-static void assert_slb_notexists(unsigned long ea)
-{
-#ifdef CONFIG_DEBUG_VM
- unsigned long tmp;
+ if (!cpu_has_feature(CPU_FTR_ARCH_206))
+ return;
- WARN_ON_ONCE(mfmsr() & MSR_EE);
+ asm volatile(__PPC_SLBFEE_DOT(%0, %1) : "=r"(tmp) : "r"(ea) : "cr0");
- asm volatile("slbfee. %0, %1" : "=r"(tmp) : "r"(ea) : "cr0");
- WARN_ON(tmp != 0);
+ WARN_ON(present == (tmp == 0));
#endif
}
@@ -114,7 +107,7 @@ static inline void create_shadowed_slbe(unsigned long ea, int ssize,
*/
slb_shadow_update(ea, ssize, flags, index);
- assert_slb_notexists(ea);
+ assert_slb_presence(false, ea);
asm volatile("slbmte %0,%1" :
: "r" (mk_vsid_data(ea, ssize, flags)),
"r" (mk_esid_data(ea, ssize, index))
@@ -137,7 +130,7 @@ void __slb_restore_bolted_realmode(void)
"r" (be64_to_cpu(p->save_area[index].esid)));
}
- assert_slb_exists(local_paca->kstack);
+ assert_slb_presence(true, local_paca->kstack);
}
/*
@@ -185,7 +178,7 @@ void slb_flush_and_restore_bolted(void)
:: "r" (be64_to_cpu(p->save_area[KSTACK_INDEX].vsid)),
"r" (be64_to_cpu(p->save_area[KSTACK_INDEX].esid))
: "memory");
- assert_slb_exists(get_paca()->kstack);
+ assert_slb_presence(true, get_paca()->kstack);
get_paca()->slb_cache_ptr = 0;
@@ -443,9 +436,9 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
ea = (unsigned long)
get_paca()->slb_cache[i] << SID_SHIFT;
/*
- * Could assert_slb_exists here, but hypervisor
- * or machine check could have come in and
- * removed the entry at this point.
+ * Could assert_slb_presence(true) here, but
+ * hypervisor or machine check could have come
+ * in and removed the entry at this point.
*/
slbie_data = ea;
@@ -676,7 +669,7 @@ static long slb_insert_entry(unsigned long ea, unsigned long context,
* User preloads should add isync afterwards in case the kernel
* accesses user memory before it returns to userspace with rfid.
*/
- assert_slb_notexists(ea);
+ assert_slb_presence(false, ea);
asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data));
barrier();
@@ -715,7 +708,7 @@ static long slb_allocate_kernel(unsigned long ea, unsigned long id)
return -EFAULT;
if (ea < H_VMALLOC_END)
- flags = get_paca()->vmalloc_sllp;
+ flags = local_paca->vmalloc_sllp;
else
flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp;
} else {
diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S
index 7fd20c52a8ec..9ed90064f542 100644
--- a/arch/powerpc/mm/tlb_low_64e.S
+++ b/arch/powerpc/mm/tlb_low_64e.S
@@ -70,6 +70,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
std r15,EX_TLB_R15(r12)
std r10,EX_TLB_CR(r12)
#ifdef CONFIG_PPC_FSL_BOOK3E
+START_BTB_FLUSH_SECTION
+ mfspr r11, SPRN_SRR1
+ andi. r10,r11,MSR_PR
+ beq 1f
+ BTB_FLUSH(r10)
+1:
+END_BTB_FLUSH_SECTION
std r7,EX_TLB_R7(r12)
#endif
TLB_MISS_PROLOG_STATS