summaryrefslogtreecommitdiff
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/book3s32/hash_low.S2
-rw-r--r--arch/powerpc/mm/book3s32/kuap.c20
-rw-r--r--arch/powerpc/mm/book3s32/mmu_context.c2
-rw-r--r--arch/powerpc/mm/book3s64/pgtable.c1
-rw-r--r--arch/powerpc/mm/book3s64/pkeys.c2
-rw-r--r--arch/powerpc/mm/book3s64/radix_pgtable.c65
-rw-r--r--arch/powerpc/mm/book3s64/radix_tlb.c272
-rw-r--r--arch/powerpc/mm/book3s64/slb.c1
-rw-r--r--arch/powerpc/mm/init_32.c2
-rw-r--r--arch/powerpc/mm/init_64.c127
-rw-r--r--arch/powerpc/mm/mmu_context.c8
-rw-r--r--arch/powerpc/mm/mmu_decl.h1
-rw-r--r--arch/powerpc/mm/nohash/kup.c8
-rw-r--r--arch/powerpc/mm/nohash/tlb.c19
-rw-r--r--arch/powerpc/mm/numa.c1
15 files changed, 304 insertions, 227 deletions
diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S
index a5a21d444e72..8b804e1a9fa4 100644
--- a/arch/powerpc/mm/book3s32/hash_low.S
+++ b/arch/powerpc/mm/book3s32/hash_low.S
@@ -14,6 +14,7 @@
* hash table, so this file is not used on them.)
*/
+#include <linux/export.h>
#include <linux/pgtable.h>
#include <linux/init.h>
#include <asm/reg.h>
@@ -22,7 +23,6 @@
#include <asm/ppc_asm.h>
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
-#include <asm/export.h>
#include <asm/feature-fixups.h>
#include <asm/code-patching-asm.h>
diff --git a/arch/powerpc/mm/book3s32/kuap.c b/arch/powerpc/mm/book3s32/kuap.c
index 28676cabb005..3a8815555a48 100644
--- a/arch/powerpc/mm/book3s32/kuap.c
+++ b/arch/powerpc/mm/book3s32/kuap.c
@@ -3,25 +3,11 @@
#include <asm/kup.h>
#include <asm/smp.h>
-struct static_key_false disable_kuap_key;
-EXPORT_SYMBOL(disable_kuap_key);
-
-void kuap_lock_all_ool(void)
-{
- kuap_lock_all();
-}
-EXPORT_SYMBOL(kuap_lock_all_ool);
-
-void kuap_unlock_all_ool(void)
-{
- kuap_unlock_all();
-}
-EXPORT_SYMBOL(kuap_unlock_all_ool);
-
void setup_kuap(bool disabled)
{
if (!disabled) {
- kuap_lock_all_ool();
+ update_user_segments(mfsr(0) | SR_KS);
+ isync(); /* Context sync required after mtsr() */
init_mm.context.sr0 |= SR_KS;
current->thread.sr0 |= SR_KS;
}
@@ -30,7 +16,7 @@ void setup_kuap(bool disabled)
return;
if (disabled)
- static_branch_enable(&disable_kuap_key);
+ cur_cpu_spec->mmu_features &= ~MMU_FTR_KUAP;
else
pr_info("Activating Kernel Userspace Access Protection\n");
}
diff --git a/arch/powerpc/mm/book3s32/mmu_context.c b/arch/powerpc/mm/book3s32/mmu_context.c
index 269a3eb25a73..1922f9a6b058 100644
--- a/arch/powerpc/mm/book3s32/mmu_context.c
+++ b/arch/powerpc/mm/book3s32/mmu_context.c
@@ -71,7 +71,7 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm)
mm->context.id = __init_new_context();
mm->context.sr0 = CTX_TO_VSID(mm->context.id, 0);
- if (!kuep_is_disabled())
+ if (IS_ENABLED(CONFIG_PPC_KUEP))
mm->context.sr0 |= SR_NX;
if (!kuap_is_disabled())
mm->context.sr0 |= SR_KS;
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index 1498ccd08367..8f8a62d3ff4d 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -9,6 +9,7 @@
#include <linux/memremap.h>
#include <linux/pkeys.h>
#include <linux/debugfs.h>
+#include <linux/proc_fs.h>
#include <misc/cxl-base.h>
#include <asm/pgalloc.h>
diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c
index 1d2675ab6711..125733962033 100644
--- a/arch/powerpc/mm/book3s64/pkeys.c
+++ b/arch/powerpc/mm/book3s64/pkeys.c
@@ -291,7 +291,7 @@ void setup_kuap(bool disabled)
if (smp_processor_id() == boot_cpuid) {
pr_info("Activating Kernel Userspace Access Prevention\n");
- cur_cpu_spec->mmu_features |= MMU_FTR_BOOK3S_KUAP;
+ cur_cpu_spec->mmu_features |= MMU_FTR_KUAP;
}
/*
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 96679018e7fb..c6a4ac766b2b 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -37,7 +37,6 @@
#include <mm/mmu_decl.h>
unsigned int mmu_base_pid;
-unsigned long radix_mem_block_size __ro_after_init;
static __ref void *early_alloc_pgtable(unsigned long size, int nid,
unsigned long region_start, unsigned long region_end)
@@ -300,7 +299,7 @@ static int __meminit create_physical_mapping(unsigned long start,
bool prev_exec, exec = false;
pgprot_t prot;
int psize;
- unsigned long max_mapping_size = radix_mem_block_size;
+ unsigned long max_mapping_size = memory_block_size;
if (debug_pagealloc_enabled_or_kfence())
max_mapping_size = PAGE_SIZE;
@@ -502,58 +501,6 @@ static int __init radix_dt_scan_page_sizes(unsigned long node,
return 1;
}
-#ifdef CONFIG_MEMORY_HOTPLUG
-static int __init probe_memory_block_size(unsigned long node, const char *uname, int
- depth, void *data)
-{
- unsigned long *mem_block_size = (unsigned long *)data;
- const __be32 *prop;
- int len;
-
- if (depth != 1)
- return 0;
-
- if (strcmp(uname, "ibm,dynamic-reconfiguration-memory"))
- return 0;
-
- prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len);
-
- if (!prop || len < dt_root_size_cells * sizeof(__be32))
- /*
- * Nothing in the device tree
- */
- *mem_block_size = MIN_MEMORY_BLOCK_SIZE;
- else
- *mem_block_size = of_read_number(prop, dt_root_size_cells);
- return 1;
-}
-
-static unsigned long __init radix_memory_block_size(void)
-{
- unsigned long mem_block_size = MIN_MEMORY_BLOCK_SIZE;
-
- /*
- * OPAL firmware feature is set by now. Hence we are ok
- * to test OPAL feature.
- */
- if (firmware_has_feature(FW_FEATURE_OPAL))
- mem_block_size = 1UL * 1024 * 1024 * 1024;
- else
- of_scan_flat_dt(probe_memory_block_size, &mem_block_size);
-
- return mem_block_size;
-}
-
-#else /* CONFIG_MEMORY_HOTPLUG */
-
-static unsigned long __init radix_memory_block_size(void)
-{
- return 1UL * 1024 * 1024 * 1024;
-}
-
-#endif /* CONFIG_MEMORY_HOTPLUG */
-
-
void __init radix__early_init_devtree(void)
{
int rc;
@@ -577,16 +524,6 @@ void __init radix__early_init_devtree(void)
mmu_psize_defs[MMU_PAGE_64K].h_rpt_pgsize =
psize_to_rpti_pgsize(MMU_PAGE_64K);
}
-
- /*
- * Max mapping size used when mapping pages. We don't use
- * ppc_md.memory_block_size() here because this get called
- * early and we don't have machine probe called yet. Also
- * the pseries implementation only check for ibm,lmb-size.
- * All hypervisor supporting radix do expose that device
- * tree node.
- */
- radix_mem_block_size = radix_memory_block_size();
return;
}
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 3020a8b38572..39acc2cbab4c 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -127,21 +127,6 @@ static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric)
trace_tlbie(0, 0, rb, rs, ric, prs, r);
}
-static __always_inline void __tlbie_pid_lpid(unsigned long pid,
- unsigned long lpid,
- unsigned long ric)
-{
- unsigned long rb, rs, prs, r;
-
- rb = PPC_BIT(53); /* IS = 1 */
- rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
- prs = 1; /* process scoped */
- r = 1; /* radix format */
-
- asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
- : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
- trace_tlbie(0, 0, rb, rs, ric, prs, r);
-}
static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
{
unsigned long rb,rs,prs,r;
@@ -202,23 +187,6 @@ static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
trace_tlbie(0, 0, rb, rs, ric, prs, r);
}
-static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid,
- unsigned long lpid,
- unsigned long ap, unsigned long ric)
-{
- unsigned long rb, rs, prs, r;
-
- rb = va & ~(PPC_BITMASK(52, 63));
- rb |= ap << PPC_BITLSHIFT(58);
- rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
- prs = 1; /* process scoped */
- r = 1; /* radix format */
-
- asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
- : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
- trace_tlbie(0, 0, rb, rs, ric, prs, r);
-}
-
static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
unsigned long ap, unsigned long ric)
{
@@ -264,22 +232,6 @@ static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
}
}
-static inline void fixup_tlbie_va_range_lpid(unsigned long va,
- unsigned long pid,
- unsigned long lpid,
- unsigned long ap)
-{
- if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
- asm volatile("ptesync" : : : "memory");
- __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
- }
-
- if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
- asm volatile("ptesync" : : : "memory");
- __tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB);
- }
-}
-
static inline void fixup_tlbie_pid(unsigned long pid)
{
/*
@@ -299,26 +251,6 @@ static inline void fixup_tlbie_pid(unsigned long pid)
}
}
-static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid)
-{
- /*
- * We can use any address for the invalidation, pick one which is
- * probably unused as an optimisation.
- */
- unsigned long va = ((1UL << 52) - 1);
-
- if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
- asm volatile("ptesync" : : : "memory");
- __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
- }
-
- if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
- asm volatile("ptesync" : : : "memory");
- __tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K),
- RIC_FLUSH_TLB);
- }
-}
-
static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
unsigned long ap)
{
@@ -416,31 +348,6 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
-static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
- unsigned long ric)
-{
- asm volatile("ptesync" : : : "memory");
-
- /*
- * Workaround the fact that the "ric" argument to __tlbie_pid
- * must be a compile-time contraint to match the "i" constraint
- * in the asm statement.
- */
- switch (ric) {
- case RIC_FLUSH_TLB:
- __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
- fixup_tlbie_pid_lpid(pid, lpid);
- break;
- case RIC_FLUSH_PWC:
- __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
- break;
- case RIC_FLUSH_ALL:
- default:
- __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
- fixup_tlbie_pid_lpid(pid, lpid);
- }
- asm volatile("eieio; tlbsync; ptesync" : : : "memory");
-}
struct tlbiel_pid {
unsigned long pid;
unsigned long ric;
@@ -566,20 +473,6 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end,
fixup_tlbie_va_range(addr - page_size, pid, ap);
}
-static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end,
- unsigned long pid, unsigned long lpid,
- unsigned long page_size,
- unsigned long psize)
-{
- unsigned long addr;
- unsigned long ap = mmu_get_ap(psize);
-
- for (addr = start; addr < end; addr += page_size)
- __tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB);
-
- fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap);
-}
-
static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
unsigned long psize, unsigned long ric)
{
@@ -660,18 +553,6 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
-static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end,
- unsigned long pid, unsigned long lpid,
- unsigned long page_size,
- unsigned long psize, bool also_pwc)
-{
- asm volatile("ptesync" : : : "memory");
- if (also_pwc)
- __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
- __tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize);
- asm volatile("eieio; tlbsync; ptesync" : : : "memory");
-}
-
static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
unsigned long start, unsigned long end,
unsigned long pid, unsigned long page_size,
@@ -820,7 +701,7 @@ void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush)
* that's what the caller expects.
*/
if (cpumask_test_cpu(cpu, mm_cpumask(mm))) {
- atomic_dec(&mm->context.active_cpus);
+ dec_mm_active_cpus(mm);
cpumask_clear_cpu(cpu, mm_cpumask(mm));
always_flush = true;
}
@@ -1316,7 +1197,35 @@ void radix__tlb_flush(struct mmu_gather *tlb)
* See the comment for radix in arch_exit_mmap().
*/
if (tlb->fullmm) {
- __flush_all_mm(mm, true);
+ if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_SHOOTDOWN)) {
+ /*
+ * Shootdown based lazy tlb mm refcounting means we
+ * have to IPI everyone in the mm_cpumask anyway soon
+ * when the mm goes away, so might as well do it as
+ * part of the final flush now.
+ *
+ * If lazy shootdown was improved to reduce IPIs (e.g.,
+ * by batching), then it may end up being better to use
+ * tlbies here instead.
+ */
+ preempt_disable();
+
+ smp_mb(); /* see radix__flush_tlb_mm */
+ exit_flush_lazy_tlbs(mm);
+ _tlbiel_pid(mm->context.id, RIC_FLUSH_ALL);
+
+ /*
+ * It should not be possible to have coprocessors still
+ * attached here.
+ */
+ if (WARN_ON_ONCE(atomic_read(&mm->context.copros) > 0))
+ __flush_all_mm(mm, true);
+
+ preempt_enable();
+ } else {
+ __flush_all_mm(mm, true);
+ }
+
} else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
if (!tlb->freed_tables)
radix__flush_tlb_mm(mm);
@@ -1497,6 +1406,127 @@ void radix__flush_tlb_all(void)
}
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+static __always_inline void __tlbie_pid_lpid(unsigned long pid,
+ unsigned long lpid,
+ unsigned long ric)
+{
+ unsigned long rb, rs, prs, r;
+
+ rb = PPC_BIT(53); /* IS = 1 */
+ rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
+ prs = 1; /* process scoped */
+ r = 1; /* radix format */
+
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
+}
+
+static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid,
+ unsigned long lpid,
+ unsigned long ap, unsigned long ric)
+{
+ unsigned long rb, rs, prs, r;
+
+ rb = va & ~(PPC_BITMASK(52, 63));
+ rb |= ap << PPC_BITLSHIFT(58);
+ rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
+ prs = 1; /* process scoped */
+ r = 1; /* radix format */
+
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
+}
+
+static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid)
+{
+ /*
+ * We can use any address for the invalidation, pick one which is
+ * probably unused as an optimisation.
+ */
+ unsigned long va = ((1UL << 52) - 1);
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ asm volatile("ptesync" : : : "memory");
+ __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync" : : : "memory");
+ __tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K),
+ RIC_FLUSH_TLB);
+ }
+}
+
+static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
+ unsigned long ric)
+{
+ asm volatile("ptesync" : : : "memory");
+
+ /*
+ * Workaround the fact that the "ric" argument to __tlbie_pid
+ * must be a compile-time contraint to match the "i" constraint
+ * in the asm statement.
+ */
+ switch (ric) {
+ case RIC_FLUSH_TLB:
+ __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
+ fixup_tlbie_pid_lpid(pid, lpid);
+ break;
+ case RIC_FLUSH_PWC:
+ __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+ break;
+ case RIC_FLUSH_ALL:
+ default:
+ __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
+ fixup_tlbie_pid_lpid(pid, lpid);
+ }
+ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+}
+
+static inline void fixup_tlbie_va_range_lpid(unsigned long va,
+ unsigned long pid,
+ unsigned long lpid,
+ unsigned long ap)
+{
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ asm volatile("ptesync" : : : "memory");
+ __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync" : : : "memory");
+ __tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB);
+ }
+}
+
+static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long lpid,
+ unsigned long page_size,
+ unsigned long psize)
+{
+ unsigned long addr;
+ unsigned long ap = mmu_get_ap(psize);
+
+ for (addr = start; addr < end; addr += page_size)
+ __tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB);
+
+ fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap);
+}
+
+static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long lpid,
+ unsigned long page_size,
+ unsigned long psize, bool also_pwc)
+{
+ asm volatile("ptesync" : : : "memory");
+ if (also_pwc)
+ __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+ __tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize);
+ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+}
+
/*
* Performs process-scoped invalidations for a given LPID
* as part of H_RPT_INVALIDATE hcall.
diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c
index 6956f637a38c..f2708c8629a5 100644
--- a/arch/powerpc/mm/book3s64/slb.c
+++ b/arch/powerpc/mm/book3s64/slb.c
@@ -13,6 +13,7 @@
#include <asm/mmu.h>
#include <asm/mmu_context.h>
#include <asm/paca.h>
+#include <asm/lppaca.h>
#include <asm/ppc-opcode.h>
#include <asm/cputable.h>
#include <asm/cacheflush.h>
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index d4cc3749e621..d8adc452f431 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -126,6 +126,8 @@ void __init MMU_init(void)
setup_kup();
+ update_mmu_feature_fixups(MMU_FTR_KUAP);
+
/* Shortly after that, the entire linear mapping will be available */
memblock_set_current_limit(lowmem_end_addr);
}
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index e0208cb12058..d96bbc001e73 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -40,6 +40,7 @@
#include <linux/of_fdt.h>
#include <linux/libfdt.h>
#include <linux/memremap.h>
+#include <linux/memory.h>
#include <asm/pgalloc.h>
#include <asm/page.h>
@@ -493,6 +494,130 @@ static int __init dt_scan_mmu_pid_width(unsigned long node,
return 1;
}
+/*
+ * Outside hotplug the kernel uses this value to map the kernel direct map
+ * with radix. To be compatible with older kernels, let's keep this value
+ * as 16M which is also SECTION_SIZE with SPARSEMEM. We can ideally map
+ * things with 1GB size in the case where we don't support hotplug.
+ */
+#ifndef CONFIG_MEMORY_HOTPLUG
+#define DEFAULT_MEMORY_BLOCK_SIZE SZ_16M
+#else
+#define DEFAULT_MEMORY_BLOCK_SIZE MIN_MEMORY_BLOCK_SIZE
+#endif
+
+static void update_memory_block_size(unsigned long *block_size, unsigned long mem_size)
+{
+ unsigned long min_memory_block_size = DEFAULT_MEMORY_BLOCK_SIZE;
+
+ for (; *block_size > min_memory_block_size; *block_size >>= 2) {
+ if ((mem_size & *block_size) == 0)
+ break;
+ }
+}
+
+static int __init probe_memory_block_size(unsigned long node, const char *uname, int
+ depth, void *data)
+{
+ const char *type;
+ unsigned long *block_size = (unsigned long *)data;
+ const __be32 *reg, *endp;
+ int l;
+
+ if (depth != 1)
+ return 0;
+ /*
+ * If we have dynamic-reconfiguration-memory node, use the
+ * lmb value.
+ */
+ if (strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) {
+
+ const __be32 *prop;
+
+ prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &l);
+
+ if (!prop || l < dt_root_size_cells * sizeof(__be32))
+ /*
+ * Nothing in the device tree
+ */
+ *block_size = DEFAULT_MEMORY_BLOCK_SIZE;
+ else
+ *block_size = of_read_number(prop, dt_root_size_cells);
+ /*
+ * We have found the final value. Don't probe further.
+ */
+ return 1;
+ }
+ /*
+ * Find all the device tree nodes of memory type and make sure
+ * the area can be mapped using the memory block size value
+ * we end up using. We start with 1G value and keep reducing
+ * it such that we can map the entire area using memory_block_size.
+ * This will be used on powernv and older pseries that don't
+ * have ibm,lmb-size node.
+ * For ex: with P5 we can end up with
+ * memory@0 -> 128MB
+ * memory@128M -> 64M
+ * This will end up using 64MB memory block size value.
+ */
+ type = of_get_flat_dt_prop(node, "device_type", NULL);
+ if (type == NULL || strcmp(type, "memory") != 0)
+ return 0;
+
+ reg = of_get_flat_dt_prop(node, "linux,usable-memory", &l);
+ if (!reg)
+ reg = of_get_flat_dt_prop(node, "reg", &l);
+ if (!reg)
+ return 0;
+
+ endp = reg + (l / sizeof(__be32));
+ while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
+ const char *compatible;
+ u64 size;
+
+ dt_mem_next_cell(dt_root_addr_cells, &reg);
+ size = dt_mem_next_cell(dt_root_size_cells, &reg);
+
+ if (size) {
+ update_memory_block_size(block_size, size);
+ continue;
+ }
+ /*
+ * ibm,coherent-device-memory with linux,usable-memory = 0
+ * Force 256MiB block size. Work around for GPUs on P9 PowerNV
+ * linux,usable-memory == 0 implies driver managed memory and
+ * we can't use large memory block size due to hotplug/unplug
+ * limitations.
+ */
+ compatible = of_get_flat_dt_prop(node, "compatible", NULL);
+ if (compatible && !strcmp(compatible, "ibm,coherent-device-memory")) {
+ if (*block_size > SZ_256M)
+ *block_size = SZ_256M;
+ /*
+ * We keep 256M as the upper limit with GPU present.
+ */
+ return 0;
+ }
+ }
+ /* continue looking for other memory device types */
+ return 0;
+}
+
+/*
+ * start with 1G memory block size. Early init will
+ * fix this with correct value.
+ */
+unsigned long memory_block_size __ro_after_init = 1UL << 30;
+static void __init early_init_memory_block_size(void)
+{
+ /*
+ * We need to do memory_block_size probe early so that
+ * radix__early_init_mmu() can use this as limit for
+ * mapping page size.
+ */
+ of_scan_flat_dt(probe_memory_block_size, &memory_block_size);
+}
+
void __init mmu_early_init_devtree(void)
{
bool hvmode = !!(mfmsr() & MSR_HV);
@@ -526,6 +651,8 @@ void __init mmu_early_init_devtree(void)
if (!hvmode)
early_check_vec5();
+ early_init_memory_block_size();
+
if (early_radix_enabled()) {
radix__early_init_devtree();
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
index 1fb9c99f8679..b24c19078eb1 100644
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -43,11 +43,13 @@ static inline void switch_mm_pgdir(struct task_struct *tsk,
void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
+ int cpu = smp_processor_id();
bool new_on_cpu = false;
/* Mark this context has been used on the new CPU */
- if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) {
- cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
+ if (!cpumask_test_cpu(cpu, mm_cpumask(next))) {
+ VM_WARN_ON_ONCE(next == &init_mm);
+ cpumask_set_cpu(cpu, mm_cpumask(next));
inc_mm_active_cpus(next);
/*
@@ -100,6 +102,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
* sub architectures. Out of line for now
*/
switch_mmu_context(prev, next, tsk);
+
+ VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(prev)));
}
#ifndef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index c6dccb4f06dc..7f9ff0640124 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -110,6 +110,7 @@ extern void MMU_init_hw(void);
void MMU_init_hw_patch(void);
unsigned long mmu_mapin_ram(unsigned long base, unsigned long top);
#endif
+void mmu_init_secondary(int cpu);
#ifdef CONFIG_PPC_E500
extern unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx,
diff --git a/arch/powerpc/mm/nohash/kup.c b/arch/powerpc/mm/nohash/kup.c
index 552becf90e97..e1f7de2e54ec 100644
--- a/arch/powerpc/mm/nohash/kup.c
+++ b/arch/powerpc/mm/nohash/kup.c
@@ -5,7 +5,6 @@
#include <linux/export.h>
#include <linux/init.h>
-#include <linux/jump_label.h>
#include <linux/printk.h>
#include <linux/smp.h>
@@ -13,21 +12,18 @@
#include <asm/smp.h>
#ifdef CONFIG_PPC_KUAP
-struct static_key_false disable_kuap_key;
-EXPORT_SYMBOL(disable_kuap_key);
-
void setup_kuap(bool disabled)
{
if (disabled) {
if (IS_ENABLED(CONFIG_40x))
disable_kuep = true;
if (smp_processor_id() == boot_cpuid)
- static_branch_enable(&disable_kuap_key);
+ cur_cpu_spec->mmu_features &= ~MMU_FTR_KUAP;
return;
}
pr_info("Activating Kernel Userspace Access Protection\n");
- __prevent_user_access(KUAP_READ_WRITE);
+ prevent_user_access(KUAP_READ_WRITE);
}
#endif
diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c
index a903b308acc5..5ffa0af4328a 100644
--- a/arch/powerpc/mm/nohash/tlb.c
+++ b/arch/powerpc/mm/nohash/tlb.c
@@ -318,17 +318,6 @@ EXPORT_SYMBOL(flush_tlb_page);
#endif /* CONFIG_SMP */
-#ifdef CONFIG_PPC_47x
-void __init early_init_mmu_47x(void)
-{
-#ifdef CONFIG_SMP
- unsigned long root = of_get_flat_dt_root();
- if (of_get_flat_dt_prop(root, "cooperative-partition", NULL))
- mmu_clear_feature(MMU_FTR_USE_TLBIVAX_BCAST);
-#endif /* CONFIG_SMP */
-}
-#endif /* CONFIG_PPC_47x */
-
/*
* Flush kernel TLB entries in the given range
*/
@@ -746,8 +735,10 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
#else /* ! CONFIG_PPC64 */
void __init early_init_mmu(void)
{
-#ifdef CONFIG_PPC_47x
- early_init_mmu_47x();
-#endif
+ unsigned long root = of_get_flat_dt_root();
+
+ if (IS_ENABLED(CONFIG_PPC_47x) && IS_ENABLED(CONFIG_SMP) &&
+ of_get_flat_dt_prop(root, "cooperative-partition", NULL))
+ mmu_clear_feature(MMU_FTR_USE_TLBIVAX_BCAST);
}
#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 9f73d089eac1..f6c4ace3b221 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -34,6 +34,7 @@
#include <asm/hvcall.h>
#include <asm/setup.h>
#include <asm/vdso.h>
+#include <asm/vphn.h>
#include <asm/drmem.h>
static int numa_enabled = 1;