diff options
Diffstat (limited to 'arch/powerpc/platforms')
-rw-r--r-- | arch/powerpc/platforms/83xx/usb.c | 4 | ||||
-rw-r--r-- | arch/powerpc/platforms/8xx/pic.c | 3 | ||||
-rw-r--r-- | arch/powerpc/platforms/Kconfig.cputype | 45 | ||||
-rw-r--r-- | arch/powerpc/platforms/cell/spu_base.c | 4 | ||||
-rw-r--r-- | arch/powerpc/platforms/embedded6xx/holly.c | 19 | ||||
-rw-r--r-- | arch/powerpc/platforms/powermac/Makefile | 6 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/idle.c | 902 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/opal-call.c | 6 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/opal-imc.c | 5 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/opal.c | 23 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/pci-ioda.c | 35 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/pci.h | 2 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/setup.c | 5 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/subcore.c | 2 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/hotplug-memory.c | 17 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/iommu.c | 13 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/lpar.c | 3 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/pmem.c | 3 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/ras.c | 135 |
19 files changed, 970 insertions, 262 deletions
diff --git a/arch/powerpc/platforms/83xx/usb.c b/arch/powerpc/platforms/83xx/usb.c index 5c31d8292d3b..e7c2c3fb011a 100644 --- a/arch/powerpc/platforms/83xx/usb.c +++ b/arch/powerpc/platforms/83xx/usb.c @@ -221,8 +221,10 @@ int mpc837x_usb_cfg(void) int ret = 0; np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr"); - if (!np || !of_device_is_available(np)) + if (!np || !of_device_is_available(np)) { + of_node_put(np); return -ENODEV; + } prop = of_get_property(np, "phy_type", NULL); if (!prop || (strcmp(prop, "ulpi") && strcmp(prop, "serial"))) { diff --git a/arch/powerpc/platforms/8xx/pic.c b/arch/powerpc/platforms/8xx/pic.c index 8d5a25d43ef3..e9617d35fd1f 100644 --- a/arch/powerpc/platforms/8xx/pic.c +++ b/arch/powerpc/platforms/8xx/pic.c @@ -153,10 +153,9 @@ int mpc8xx_pic_init(void) if (mpc8xx_pic_host == NULL) { printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n"); ret = -ENOMEM; - goto out; } - return 0; + ret = 0; out: of_node_put(np); return ret; diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 50cd09b4e05d..d0e172d47574 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -25,6 +25,8 @@ config PPC_BOOK3S_32 bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx" select PPC_FPU select PPC_HAVE_PMU_SUPPORT + select PPC_HAVE_KUEP + select PPC_HAVE_KUAP config PPC_85xx bool "Freescale 85xx" @@ -34,6 +36,9 @@ config PPC_8xx bool "Freescale 8xx" select FSL_SOC select SYS_SUPPORTS_HUGETLBFS + select PPC_HAVE_KUEP + select PPC_HAVE_KUAP + select PPC_MM_SLICES if HUGETLB_PAGE config 40x bool "AMCC 40x" @@ -75,6 +80,7 @@ config PPC_BOOK3S_64 select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_SUPPORTS_NUMA_BALANCING select IRQ_WORK + select PPC_MM_SLICES config PPC_BOOK3E_64 bool "Embedded processors" @@ -326,6 +332,8 @@ config PPC_RADIX_MMU bool "Radix MMU Support" depends on PPC_BOOK3S_64 && HUGETLB_PAGE select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA + select PPC_HAVE_KUEP + select PPC_HAVE_KUAP default y help Enable support for the Power ISA 3.0 Radix style MMU. Currently this @@ -345,6 +353,37 @@ config PPC_RADIX_MMU_DEFAULT If you're unsure, say Y. +config PPC_HAVE_KUEP + bool + +config PPC_KUEP + bool "Kernel Userspace Execution Prevention" + depends on PPC_HAVE_KUEP + default y + help + Enable support for Kernel Userspace Execution Prevention (KUEP) + + If you're unsure, say Y. + +config PPC_HAVE_KUAP + bool + +config PPC_KUAP + bool "Kernel Userspace Access Protection" + depends on PPC_HAVE_KUAP + default y + help + Enable support for Kernel Userspace Access Protection (KUAP) + + If you're unsure, say Y. + +config PPC_KUAP_DEBUG + bool "Extra debugging for Kernel Userspace Access Protection" + depends on PPC_HAVE_KUAP && (PPC_RADIX_MMU || PPC_32) + help + Add extra debugging for Kernel Userspace Access Protection (KUAP) + If you're unsure, say N. + config ARCH_ENABLE_HUGEPAGE_MIGRATION def_bool y depends on PPC_BOOK3S_64 && HUGETLB_PAGE && MIGRATION @@ -354,14 +393,16 @@ config PPC_MMU_NOHASH def_bool y depends on !PPC_BOOK3S +config PPC_MMU_NOHASH_32 + def_bool y + depends on PPC_MMU_NOHASH && PPC32 + config PPC_BOOK3E_MMU def_bool y depends on FSL_BOOKE || PPC_BOOK3E config PPC_MM_SLICES bool - default y if PPC_BOOK3S_64 - default y if PPC_8xx && HUGETLB_PAGE config PPC_HAVE_PMU_SUPPORT bool diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index 7f12c7b78c0f..6646f152d57b 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -194,7 +194,7 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr) * faults need to be deferred to process context. */ if ((dsisr & MFC_DSISR_PTE_NOT_FOUND) && - (REGION_ID(ea) != USER_REGION_ID)) { + (get_region_id(ea) != USER_REGION_ID)) { spin_unlock(&spu->register_lock); ret = hash_page(ea, @@ -224,7 +224,7 @@ static void __spu_kernel_slb(void *addr, struct copro_slb *slb) unsigned long ea = (unsigned long)addr; u64 llp; - if (REGION_ID(ea) == KERNEL_REGION_ID) + if (get_region_id(ea) == LINEAR_MAP_REGION_ID) llp = mmu_psize_defs[mmu_linear_psize].sllp; else llp = mmu_psize_defs[mmu_virtual_psize].sllp; diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c index 0409714e8070..829bf3697dc9 100644 --- a/arch/powerpc/platforms/embedded6xx/holly.c +++ b/arch/powerpc/platforms/embedded6xx/holly.c @@ -44,7 +44,8 @@ #define HOLLY_PCI_CFG_PHYS 0x7c000000 -int holly_exclude_device(struct pci_controller *hose, u_char bus, u_char devfn) +static int holly_exclude_device(struct pci_controller *hose, u_char bus, + u_char devfn) { if (bus == 0 && PCI_SLOT(devfn) == 0) return PCIBIOS_DEVICE_NOT_FOUND; @@ -187,13 +188,13 @@ static void __init holly_init_IRQ(void) tsi108_write_reg(TSI108_MPIC_OFFSET + 0x30c, 0); } -void holly_show_cpuinfo(struct seq_file *m) +static void holly_show_cpuinfo(struct seq_file *m) { seq_printf(m, "vendor\t\t: IBM\n"); seq_printf(m, "machine\t\t: PPC750 GX/CL\n"); } -void __noreturn holly_restart(char *cmd) +static void __noreturn holly_restart(char *cmd) { __be32 __iomem *ocn_bar1 = NULL; unsigned long bar; @@ -233,18 +234,6 @@ void __noreturn holly_restart(char *cmd) for (;;) ; } -void holly_power_off(void) -{ - local_irq_disable(); - /* No way to shut power off with software */ - for (;;) ; -} - -void holly_halt(void) -{ - holly_power_off(); -} - /* * Called very early, device-tree isn't unflattened */ diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile index 20ebf35d7913..f4247ade71ca 100644 --- a/arch/powerpc/platforms/powermac/Makefile +++ b/arch/powerpc/platforms/powermac/Makefile @@ -2,6 +2,12 @@ CFLAGS_bootx_init.o += -fPIC CFLAGS_bootx_init.o += $(call cc-option, -fno-stack-protector) +KASAN_SANITIZE_bootx_init.o := n + +ifdef CONFIG_KASAN +CFLAGS_bootx_init.o += -DDISABLE_BRANCH_PROFILING +endif + ifdef CONFIG_FUNCTION_TRACER # Do not trace early boot code CFLAGS_REMOVE_bootx_init.o = $(CC_FLAGS_FTRACE) diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index e52f9b06dd9c..c9133f7908ca 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -16,6 +16,7 @@ #include <linux/device.h> #include <linux/cpu.h> +#include <asm/asm-prototypes.h> #include <asm/firmware.h> #include <asm/machdep.h> #include <asm/opal.h> @@ -48,10 +49,10 @@ static u64 pnv_default_stop_mask; static bool default_stop_found; /* - * First deep stop state. Used to figure out when to save/restore - * hypervisor context. + * First stop state levels when SPR and TB loss can occur. */ -u64 pnv_first_deep_stop_state = MAX_STOP_STATE; +static u64 pnv_first_tb_loss_level = MAX_STOP_STATE + 1; +static u64 pnv_first_spr_loss_level = MAX_STOP_STATE + 1; /* * psscr value and mask of the deepest stop idle state. @@ -62,6 +63,8 @@ static u64 pnv_deepest_stop_psscr_mask; static u64 pnv_deepest_stop_flag; static bool deepest_stop_found; +static unsigned long power7_offline_type; + static int pnv_save_sprs_for_deep_states(void) { int cpu; @@ -72,12 +75,12 @@ static int pnv_save_sprs_for_deep_states(void) * all cpus at boot. Get these reg values of current cpu and use the * same across all cpus. */ - uint64_t lpcr_val = mfspr(SPRN_LPCR); - uint64_t hid0_val = mfspr(SPRN_HID0); - uint64_t hid1_val = mfspr(SPRN_HID1); - uint64_t hid4_val = mfspr(SPRN_HID4); - uint64_t hid5_val = mfspr(SPRN_HID5); - uint64_t hmeer_val = mfspr(SPRN_HMEER); + uint64_t lpcr_val = mfspr(SPRN_LPCR); + uint64_t hid0_val = mfspr(SPRN_HID0); + uint64_t hid1_val = mfspr(SPRN_HID1); + uint64_t hid4_val = mfspr(SPRN_HID4); + uint64_t hid5_val = mfspr(SPRN_HID5); + uint64_t hmeer_val = mfspr(SPRN_HMEER); uint64_t msr_val = MSR_IDLE; uint64_t psscr_val = pnv_deepest_stop_psscr_val; @@ -137,89 +140,6 @@ static int pnv_save_sprs_for_deep_states(void) return 0; } -static void pnv_alloc_idle_core_states(void) -{ - int i, j; - int nr_cores = cpu_nr_cores(); - u32 *core_idle_state; - - /* - * core_idle_state - The lower 8 bits track the idle state of - * each thread of the core. - * - * The most significant bit is the lock bit. - * - * Initially all the bits corresponding to threads_per_core - * are set. They are cleared when the thread enters deep idle - * state like sleep and winkle/stop. - * - * Initially the lock bit is cleared. The lock bit has 2 - * purposes: - * a. While the first thread in the core waking up from - * idle is restoring core state, it prevents other - * threads in the core from switching to process - * context. - * b. While the last thread in the core is saving the - * core state, it prevents a different thread from - * waking up. - */ - for (i = 0; i < nr_cores; i++) { - int first_cpu = i * threads_per_core; - int node = cpu_to_node(first_cpu); - size_t paca_ptr_array_size; - - core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node); - *core_idle_state = (1 << threads_per_core) - 1; - paca_ptr_array_size = (threads_per_core * - sizeof(struct paca_struct *)); - - for (j = 0; j < threads_per_core; j++) { - int cpu = first_cpu + j; - - paca_ptrs[cpu]->core_idle_state_ptr = core_idle_state; - paca_ptrs[cpu]->thread_idle_state = PNV_THREAD_RUNNING; - paca_ptrs[cpu]->thread_mask = 1 << j; - } - } - - update_subcore_sibling_mask(); - - if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) { - int rc = pnv_save_sprs_for_deep_states(); - - if (likely(!rc)) - return; - - /* - * The stop-api is unable to restore hypervisor - * resources on wakeup from platform idle states which - * lose full context. So disable such states. - */ - supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT; - pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n"); - pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n"); - - if (cpu_has_feature(CPU_FTR_ARCH_300) && - (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) { - /* - * Use the default stop state for CPU-Hotplug - * if available. - */ - if (default_stop_found) { - pnv_deepest_stop_psscr_val = - pnv_default_stop_val; - pnv_deepest_stop_psscr_mask = - pnv_default_stop_mask; - pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n", - pnv_deepest_stop_psscr_val); - } else { /* Fallback to snooze loop for CPU-Hotplug */ - deepest_stop_found = false; - pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n"); - } - } - } -} - u32 pnv_get_supported_cpuidle_states(void) { return supported_cpuidle_states; @@ -238,6 +158,9 @@ static void pnv_fastsleep_workaround_apply(void *info) *err = 1; } +static bool power7_fastsleep_workaround_entry = true; +static bool power7_fastsleep_workaround_exit = true; + /* * Used to store fastsleep workaround state * 0 - Workaround applied/undone at fastsleep entry/exit path (Default) @@ -269,21 +192,15 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev, * fastsleep_workaround_applyonce = 1 implies * fastsleep workaround needs to be left in 'applied' state on all * the cores. Do this by- - * 1. Patching out the call to 'undo' workaround in fastsleep exit path - * 2. Sending ipi to all the cores which have at least one online thread - * 3. Patching out the call to 'apply' workaround in fastsleep entry - * path + * 1. Disable the 'undo' workaround in fastsleep exit path + * 2. Sendi IPIs to all the cores which have at least one online thread + * 3. Disable the 'apply' workaround in fastsleep entry path + * * There is no need to send ipi to cores which have all threads * offlined, as last thread of the core entering fastsleep or deeper * state would have applied workaround. */ - err = patch_instruction( - (unsigned int *)pnv_fastsleep_workaround_at_exit, - PPC_INST_NOP); - if (err) { - pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_exit"); - goto fail; - } + power7_fastsleep_workaround_exit = false; get_online_cpus(); primary_thread_mask = cpu_online_cores_map(); @@ -296,13 +213,7 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev, goto fail; } - err = patch_instruction( - (unsigned int *)pnv_fastsleep_workaround_at_entry, - PPC_INST_NOP); - if (err) { - pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_entry"); - goto fail; - } + power7_fastsleep_workaround_entry = false; fastsleep_workaround_applyonce = 1; @@ -315,27 +226,346 @@ static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600, show_fastsleep_workaround_applyonce, store_fastsleep_workaround_applyonce); -static unsigned long __power7_idle_type(unsigned long type) +static inline void atomic_start_thread_idle(void) { + int cpu = raw_smp_processor_id(); + int first = cpu_first_thread_sibling(cpu); + int thread_nr = cpu_thread_in_core(cpu); + unsigned long *state = &paca_ptrs[first]->idle_state; + + clear_bit(thread_nr, state); +} + +static inline void atomic_stop_thread_idle(void) +{ + int cpu = raw_smp_processor_id(); + int first = cpu_first_thread_sibling(cpu); + int thread_nr = cpu_thread_in_core(cpu); + unsigned long *state = &paca_ptrs[first]->idle_state; + + set_bit(thread_nr, state); +} + +static inline void atomic_lock_thread_idle(void) +{ + int cpu = raw_smp_processor_id(); + int first = cpu_first_thread_sibling(cpu); + unsigned long *state = &paca_ptrs[first]->idle_state; + + while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, state))) + barrier(); +} + +static inline void atomic_unlock_and_stop_thread_idle(void) +{ + int cpu = raw_smp_processor_id(); + int first = cpu_first_thread_sibling(cpu); + unsigned long thread = 1UL << cpu_thread_in_core(cpu); + unsigned long *state = &paca_ptrs[first]->idle_state; + u64 s = READ_ONCE(*state); + u64 new, tmp; + + BUG_ON(!(s & PNV_CORE_IDLE_LOCK_BIT)); + BUG_ON(s & thread); + +again: + new = (s | thread) & ~PNV_CORE_IDLE_LOCK_BIT; + tmp = cmpxchg(state, s, new); + if (unlikely(tmp != s)) { + s = tmp; + goto again; + } +} + +static inline void atomic_unlock_thread_idle(void) +{ + int cpu = raw_smp_processor_id(); + int first = cpu_first_thread_sibling(cpu); + unsigned long *state = &paca_ptrs[first]->idle_state; + + BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, state)); + clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, state); +} + +/* P7 and P8 */ +struct p7_sprs { + /* per core */ + u64 tscr; + u64 worc; + + /* per subcore */ + u64 sdr1; + u64 rpr; + + /* per thread */ + u64 lpcr; + u64 hfscr; + u64 fscr; + u64 purr; + u64 spurr; + u64 dscr; + u64 wort; + + /* per thread SPRs that get lost in shallow states */ + u64 amr; + u64 iamr; + u64 amor; + u64 uamor; +}; + +static unsigned long power7_idle_insn(unsigned long type) +{ + int cpu = raw_smp_processor_id(); + int first = cpu_first_thread_sibling(cpu); + unsigned long *state = &paca_ptrs[first]->idle_state; + unsigned long thread = 1UL << cpu_thread_in_core(cpu); + unsigned long core_thread_mask = (1UL << threads_per_core) - 1; unsigned long srr1; + bool full_winkle; + struct p7_sprs sprs = {}; /* avoid false use-uninitialised */ + bool sprs_saved = false; + int rc; - if (!prep_irq_for_idle_irqsoff()) - return 0; + if (unlikely(type != PNV_THREAD_NAP)) { + atomic_lock_thread_idle(); + + BUG_ON(!(*state & thread)); + *state &= ~thread; + + if (power7_fastsleep_workaround_entry) { + if ((*state & core_thread_mask) == 0) { + rc = opal_config_cpu_idle_state( + OPAL_CONFIG_IDLE_FASTSLEEP, + OPAL_CONFIG_IDLE_APPLY); + BUG_ON(rc); + } + } + + if (type == PNV_THREAD_WINKLE) { + sprs.tscr = mfspr(SPRN_TSCR); + sprs.worc = mfspr(SPRN_WORC); + + sprs.sdr1 = mfspr(SPRN_SDR1); + sprs.rpr = mfspr(SPRN_RPR); + + sprs.lpcr = mfspr(SPRN_LPCR); + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + sprs.hfscr = mfspr(SPRN_HFSCR); + sprs.fscr = mfspr(SPRN_FSCR); + } + sprs.purr = mfspr(SPRN_PURR); + sprs.spurr = mfspr(SPRN_SPURR); + sprs.dscr = mfspr(SPRN_DSCR); + sprs.wort = mfspr(SPRN_WORT); + + sprs_saved = true; + + /* + * Increment winkle counter and set all winkle bits if + * all threads are winkling. This allows wakeup side to + * distinguish between fast sleep and winkle state + * loss. Fast sleep still has to resync the timebase so + * this may not be a really big win. + */ + *state += 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT; + if ((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) + >> PNV_CORE_IDLE_WINKLE_COUNT_SHIFT + == threads_per_core) + *state |= PNV_CORE_IDLE_THREAD_WINKLE_BITS; + WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0); + } + + atomic_unlock_thread_idle(); + } + + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + sprs.amr = mfspr(SPRN_AMR); + sprs.iamr = mfspr(SPRN_IAMR); + sprs.amor = mfspr(SPRN_AMOR); + sprs.uamor = mfspr(SPRN_UAMOR); + } + + local_paca->thread_idle_state = type; + srr1 = isa206_idle_insn_mayloss(type); /* go idle */ + local_paca->thread_idle_state = PNV_THREAD_RUNNING; + + WARN_ON_ONCE(!srr1); + WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR)); + + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) { + /* + * We don't need an isync after the mtsprs here because + * the upcoming mtmsrd is execution synchronizing. + */ + mtspr(SPRN_AMR, sprs.amr); + mtspr(SPRN_IAMR, sprs.iamr); + mtspr(SPRN_AMOR, sprs.amor); + mtspr(SPRN_UAMOR, sprs.uamor); + } + } + + if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI)) + hmi_exception_realmode(NULL); + + if (likely((srr1 & SRR1_WAKESTATE) != SRR1_WS_HVLOSS)) { + if (unlikely(type != PNV_THREAD_NAP)) { + atomic_lock_thread_idle(); + if (type == PNV_THREAD_WINKLE) { + WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0); + *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT; + *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT); + } + atomic_unlock_and_stop_thread_idle(); + } + return srr1; + } + + /* HV state loss */ + BUG_ON(type == PNV_THREAD_NAP); + + atomic_lock_thread_idle(); + + full_winkle = false; + if (type == PNV_THREAD_WINKLE) { + WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0); + *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT; + if (*state & (thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT)) { + *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT); + full_winkle = true; + BUG_ON(!sprs_saved); + } + } + + WARN_ON(*state & thread); + + if ((*state & core_thread_mask) != 0) + goto core_woken; + + /* Per-core SPRs */ + if (full_winkle) { + mtspr(SPRN_TSCR, sprs.tscr); + mtspr(SPRN_WORC, sprs.worc); + } + + if (power7_fastsleep_workaround_exit) { + rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP, + OPAL_CONFIG_IDLE_UNDO); + BUG_ON(rc); + } + + /* TB */ + if (opal_resync_timebase() != OPAL_SUCCESS) + BUG(); + +core_woken: + if (!full_winkle) + goto subcore_woken; + + if ((*state & local_paca->subcore_sibling_mask) != 0) + goto subcore_woken; + + /* Per-subcore SPRs */ + mtspr(SPRN_SDR1, sprs.sdr1); + mtspr(SPRN_RPR, sprs.rpr); + +subcore_woken: + /* + * isync after restoring shared SPRs and before unlocking. Unlock + * only contains hwsync which does not necessarily do the right + * thing for SPRs. + */ + isync(); + atomic_unlock_and_stop_thread_idle(); + + /* Fast sleep does not lose SPRs */ + if (!full_winkle) + return srr1; + + /* Per-thread SPRs */ + mtspr(SPRN_LPCR, sprs.lpcr); + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + mtspr(SPRN_HFSCR, sprs.hfscr); + mtspr(SPRN_FSCR, sprs.fscr); + } + mtspr(SPRN_PURR, sprs.purr); + mtspr(SPRN_SPURR, sprs.spurr); + mtspr(SPRN_DSCR, sprs.dscr); + mtspr(SPRN_WORT, sprs.wort); + + mtspr(SPRN_SPRG3, local_paca->sprg_vdso); + + /* + * The SLB has to be restored here, but it sometimes still + * contains entries, so the __ variant must be used to prevent + * multi hits. + */ + __slb_restore_bolted_realmode(); + + return srr1; +} + +extern unsigned long idle_kvm_start_guest(unsigned long srr1); + +#ifdef CONFIG_HOTPLUG_CPU +static unsigned long power7_offline(void) +{ + unsigned long srr1; + + mtmsr(MSR_IDLE); + +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* Tell KVM we're entering idle. */ + /******************************************************/ + /* N O T E W E L L ! ! ! N O T E W E L L */ + /* The following store to HSTATE_HWTHREAD_STATE(r13) */ + /* MUST occur in real mode, i.e. with the MMU off, */ + /* and the MMU must stay off until we clear this flag */ + /* and test HSTATE_HWTHREAD_REQ(r13) in */ + /* pnv_powersave_wakeup in this file. */ + /* The reason is that another thread can switch the */ + /* MMU to a guest context whenever this flag is set */ + /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */ + /* that would potentially cause this thread to start */ + /* executing instructions from guest memory in */ + /* hypervisor mode, leading to a host crash or data */ + /* corruption, or worse. */ + /******************************************************/ + local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE; +#endif __ppc64_runlatch_off(); - srr1 = power7_idle_insn(type); + srr1 = power7_idle_insn(power7_offline_type); __ppc64_runlatch_on(); - fini_irq_for_idle_irqsoff(); +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL; + /* Order setting hwthread_state vs. testing hwthread_req */ + smp_mb(); + if (local_paca->kvm_hstate.hwthread_req) + srr1 = idle_kvm_start_guest(srr1); +#endif + + mtmsr(MSR_KERNEL); return srr1; } +#endif void power7_idle_type(unsigned long type) { unsigned long srr1; - srr1 = __power7_idle_type(type); + if (!prep_irq_for_idle_irqsoff()) + return; + + mtmsr(MSR_IDLE); + __ppc64_runlatch_off(); + srr1 = power7_idle_insn(type); + __ppc64_runlatch_on(); + mtmsr(MSR_KERNEL); + + fini_irq_for_idle_irqsoff(); irq_set_pending_from_srr1(srr1); } @@ -347,33 +577,292 @@ void power7_idle(void) power7_idle_type(PNV_THREAD_NAP); } -static unsigned long __power9_idle_type(unsigned long stop_psscr_val, - unsigned long stop_psscr_mask) +struct p9_sprs { + /* per core */ + u64 ptcr; + u64 rpr; + u64 tscr; + u64 ldbar; + + /* per thread */ + u64 lpcr; + u64 hfscr; + u64 fscr; + u64 pid; + u64 purr; + u64 spurr; + u64 dscr; + u64 wort; + + u64 mmcra; + u32 mmcr0; + u32 mmcr1; + u64 mmcr2; + + /* per thread SPRs that get lost in shallow states */ + u64 amr; + u64 iamr; + u64 amor; + u64 uamor; +}; + +static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) { - unsigned long psscr; + int cpu = raw_smp_processor_id(); + int first = cpu_first_thread_sibling(cpu); + unsigned long *state = &paca_ptrs[first]->idle_state; + unsigned long core_thread_mask = (1UL << threads_per_core) - 1; unsigned long srr1; + unsigned long pls; + unsigned long mmcr0 = 0; + struct p9_sprs sprs = {}; /* avoid false used-uninitialised */ + bool sprs_saved = false; - if (!prep_irq_for_idle_irqsoff()) - return 0; + if (!(psscr & (PSSCR_EC|PSSCR_ESL))) { + /* EC=ESL=0 case */ + + BUG_ON(!mmu_on); + + /* + * Wake synchronously. SRESET via xscom may still cause + * a 0x100 powersave wakeup with SRR1 reason! + */ + srr1 = isa300_idle_stop_noloss(psscr); /* go idle */ + if (likely(!srr1)) + return 0; + + /* + * Registers not saved, can't recover! + * This would be a hardware bug + */ + BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS); + + goto out; + } + + /* EC=ESL=1 case */ +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + if (cpu_has_feature(CPU_FTR_P9_TM_XER_SO_BUG)) { + local_paca->requested_psscr = psscr; + /* order setting requested_psscr vs testing dont_stop */ + smp_mb(); + if (atomic_read(&local_paca->dont_stop)) { + local_paca->requested_psscr = 0; + return 0; + } + } +#endif + + if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) { + /* + * POWER9 DD2 can incorrectly set PMAO when waking up + * after a state-loss idle. Saving and restoring MMCR0 + * over idle is a workaround. + */ + mmcr0 = mfspr(SPRN_MMCR0); + } + if ((psscr & PSSCR_RL_MASK) >= pnv_first_spr_loss_level) { + sprs.lpcr = mfspr(SPRN_LPCR); + sprs.hfscr = mfspr(SPRN_HFSCR); + sprs.fscr = mfspr(SPRN_FSCR); + sprs.pid = mfspr(SPRN_PID); + sprs.purr = mfspr(SPRN_PURR); + sprs.spurr = mfspr(SPRN_SPURR); + sprs.dscr = mfspr(SPRN_DSCR); + sprs.wort = mfspr(SPRN_WORT); + + sprs.mmcra = mfspr(SPRN_MMCRA); + sprs.mmcr0 = mfspr(SPRN_MMCR0); + sprs.mmcr1 = mfspr(SPRN_MMCR1); + sprs.mmcr2 = mfspr(SPRN_MMCR2); + + sprs.ptcr = mfspr(SPRN_PTCR); + sprs.rpr = mfspr(SPRN_RPR); + sprs.tscr = mfspr(SPRN_TSCR); + sprs.ldbar = mfspr(SPRN_LDBAR); + + sprs_saved = true; + + atomic_start_thread_idle(); + } + + sprs.amr = mfspr(SPRN_AMR); + sprs.iamr = mfspr(SPRN_IAMR); + sprs.amor = mfspr(SPRN_AMOR); + sprs.uamor = mfspr(SPRN_UAMOR); + + srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */ + +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + local_paca->requested_psscr = 0; +#endif psscr = mfspr(SPRN_PSSCR); - psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val; + WARN_ON_ONCE(!srr1); + WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR)); + + if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) { + unsigned long mmcra; + + /* + * We don't need an isync after the mtsprs here because the + * upcoming mtmsrd is execution synchronizing. + */ + mtspr(SPRN_AMR, sprs.amr); + mtspr(SPRN_IAMR, sprs.iamr); + mtspr(SPRN_AMOR, sprs.amor); + mtspr(SPRN_UAMOR, sprs.uamor); + + /* + * Workaround for POWER9 DD2.0, if we lost resources, the ERAT + * might have been corrupted and needs flushing. We also need + * to reload MMCR0 (see mmcr0 comment above). + */ + if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) { + asm volatile(PPC_INVALIDATE_ERAT); + mtspr(SPRN_MMCR0, mmcr0); + } + + /* + * DD2.2 and earlier need to set then clear bit 60 in MMCRA + * to ensure the PMU starts running. + */ + mmcra = mfspr(SPRN_MMCRA); + mmcra |= PPC_BIT(60); + mtspr(SPRN_MMCRA, mmcra); + mmcra &= ~PPC_BIT(60); + mtspr(SPRN_MMCRA, mmcra); + } + + if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI)) + hmi_exception_realmode(NULL); + + /* + * On POWER9, SRR1 bits do not match exactly as expected. + * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so + * just always test PSSCR for SPR/TB state loss. + */ + pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT; + if (likely(pls < pnv_first_spr_loss_level)) { + if (sprs_saved) + atomic_stop_thread_idle(); + goto out; + } + + /* HV state loss */ + BUG_ON(!sprs_saved); + + atomic_lock_thread_idle(); + + if ((*state & core_thread_mask) != 0) + goto core_woken; + + /* Per-core SPRs */ + mtspr(SPRN_PTCR, sprs.ptcr); + mtspr(SPRN_RPR, sprs.rpr); + mtspr(SPRN_TSCR, sprs.tscr); + mtspr(SPRN_LDBAR, sprs.ldbar); + + if (pls >= pnv_first_tb_loss_level) { + /* TB loss */ + if (opal_resync_timebase() != OPAL_SUCCESS) + BUG(); + } + + /* + * isync after restoring shared SPRs and before unlocking. Unlock + * only contains hwsync which does not necessarily do the right + * thing for SPRs. + */ + isync(); + +core_woken: + atomic_unlock_and_stop_thread_idle(); + + /* Per-thread SPRs */ + mtspr(SPRN_LPCR, sprs.lpcr); + mtspr(SPRN_HFSCR, sprs.hfscr); + mtspr(SPRN_FSCR, sprs.fscr); + mtspr(SPRN_PID, sprs.pid); + mtspr(SPRN_PURR, sprs.purr); + mtspr(SPRN_SPURR, sprs.spurr); + mtspr(SPRN_DSCR, sprs.dscr); + mtspr(SPRN_WORT, sprs.wort); + + mtspr(SPRN_MMCRA, sprs.mmcra); + mtspr(SPRN_MMCR0, sprs.mmcr0); + mtspr(SPRN_MMCR1, sprs.mmcr1); + mtspr(SPRN_MMCR2, sprs.mmcr2); + + mtspr(SPRN_SPRG3, local_paca->sprg_vdso); + + if (!radix_enabled()) + __slb_restore_bolted_realmode(); + +out: + if (mmu_on) + mtmsr(MSR_KERNEL); + + return srr1; +} + +#ifdef CONFIG_HOTPLUG_CPU +static unsigned long power9_offline_stop(unsigned long psscr) +{ + unsigned long srr1; + +#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE __ppc64_runlatch_off(); - srr1 = power9_idle_stop(psscr); + srr1 = power9_idle_stop(psscr, true); __ppc64_runlatch_on(); +#else + /* + * Tell KVM we're entering idle. + * This does not have to be done in real mode because the P9 MMU + * is independent per-thread. Some steppings share radix/hash mode + * between threads, but in that case KVM has a barrier sync in real + * mode before and after switching between radix and hash. + * + * kvm_start_guest must still be called in real mode though, hence + * the false argument. + */ + local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE; - fini_irq_for_idle_irqsoff(); + __ppc64_runlatch_off(); + srr1 = power9_idle_stop(psscr, false); + __ppc64_runlatch_on(); + + local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL; + /* Order setting hwthread_state vs. testing hwthread_req */ + smp_mb(); + if (local_paca->kvm_hstate.hwthread_req) + srr1 = idle_kvm_start_guest(srr1); + mtmsr(MSR_KERNEL); +#endif return srr1; } +#endif void power9_idle_type(unsigned long stop_psscr_val, unsigned long stop_psscr_mask) { + unsigned long psscr; unsigned long srr1; - srr1 = __power9_idle_type(stop_psscr_val, stop_psscr_mask); + if (!prep_irq_for_idle_irqsoff()) + return; + + psscr = mfspr(SPRN_PSSCR); + psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val; + + __ppc64_runlatch_off(); + srr1 = power9_idle_stop(psscr, true); + __ppc64_runlatch_on(); + + fini_irq_for_idle_irqsoff(); + irq_set_pending_from_srr1(srr1); } @@ -409,7 +898,7 @@ void pnv_power9_force_smt4_catch(void) atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop); } /* order setting dont_stop vs testing requested_psscr */ - mb(); + smp_mb(); for (thr = 0; thr < threads_per_core; ++thr) { if (!paca_ptrs[cpu0+thr]->requested_psscr) ++awake_threads; @@ -481,7 +970,6 @@ void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val) unsigned long pnv_cpu_offline(unsigned int cpu) { unsigned long srr1; - u32 idle_states = pnv_get_supported_cpuidle_states(); __ppc64_runlatch_off(); @@ -492,15 +980,8 @@ unsigned long pnv_cpu_offline(unsigned int cpu) psscr = (psscr & ~pnv_deepest_stop_psscr_mask) | pnv_deepest_stop_psscr_val; srr1 = power9_offline_stop(psscr); - - } else if ((idle_states & OPAL_PM_WINKLE_ENABLED) && - (idle_states & OPAL_PM_LOSE_FULL_CONTEXT)) { - srr1 = power7_idle_insn(PNV_THREAD_WINKLE); - } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) || - (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { - srr1 = power7_idle_insn(PNV_THREAD_SLEEP); - } else if (idle_states & OPAL_PM_NAP_ENABLED) { - srr1 = power7_idle_insn(PNV_THREAD_NAP); + } else if (cpu_has_feature(CPU_FTR_ARCH_206) && power7_offline_type) { + srr1 = power7_offline(); } else { /* This is the fallback method. We emulate snooze */ while (!generic_check_cpu_restart(cpu)) { @@ -596,33 +1077,44 @@ int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags) * @dt_idle_states: Number of idle state entries * Returns 0 on success */ -static int __init pnv_power9_idle_init(void) +static void __init pnv_power9_idle_init(void) { u64 max_residency_ns = 0; int i; /* - * Set pnv_first_deep_stop_state, pnv_deepest_stop_psscr_{val,mask}, - * and the pnv_default_stop_{val,mask}. - * - * pnv_first_deep_stop_state should be set to the first stop - * level to cause hypervisor state loss. - * * pnv_deepest_stop_{val,mask} should be set to values corresponding to * the deepest stop state. * * pnv_default_stop_{val,mask} should be set to values corresponding to - * the shallowest (OPAL_PM_STOP_INST_FAST) loss-less stop state. + * the deepest loss-less (OPAL_PM_STOP_INST_FAST) stop state. */ - pnv_first_deep_stop_state = MAX_STOP_STATE; + pnv_first_tb_loss_level = MAX_STOP_STATE + 1; + pnv_first_spr_loss_level = MAX_STOP_STATE + 1; for (i = 0; i < nr_pnv_idle_states; i++) { int err; struct pnv_idle_states_t *state = &pnv_idle_states[i]; u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK; + if ((state->flags & OPAL_PM_TIMEBASE_STOP) && + (pnv_first_tb_loss_level > psscr_rl)) + pnv_first_tb_loss_level = psscr_rl; + if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) && - pnv_first_deep_stop_state > psscr_rl) - pnv_first_deep_stop_state = psscr_rl; + (pnv_first_spr_loss_level > psscr_rl)) + pnv_first_spr_loss_level = psscr_rl; + + /* + * The idle code does not deal with TB loss occurring + * in a shallower state than SPR loss, so force it to + * behave like SPRs are lost if TB is lost. POWER9 would + * never encouter this, but a POWER8 core would if it + * implemented the stop instruction. So this is for forward + * compatibility. + */ + if ((state->flags & OPAL_PM_TIMEBASE_STOP) && + (pnv_first_spr_loss_level > psscr_rl)) + pnv_first_spr_loss_level = psscr_rl; err = validate_psscr_val_mask(&state->psscr_val, &state->psscr_mask, @@ -647,6 +1139,7 @@ static int __init pnv_power9_idle_init(void) pnv_default_stop_val = state->psscr_val; pnv_default_stop_mask = state->psscr_mask; default_stop_found = true; + WARN_ON(state->flags & OPAL_PM_LOSE_FULL_CONTEXT); } } @@ -666,10 +1159,40 @@ static int __init pnv_power9_idle_init(void) pnv_deepest_stop_psscr_mask); } - pr_info("cpuidle-powernv: Requested Level (RL) value of first deep stop = 0x%llx\n", - pnv_first_deep_stop_state); + pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%lld\n", + pnv_first_spr_loss_level); - return 0; + pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%lld\n", + pnv_first_tb_loss_level); +} + +static void __init pnv_disable_deep_states(void) +{ + /* + * The stop-api is unable to restore hypervisor + * resources on wakeup from platform idle states which + * lose full context. So disable such states. + */ + supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT; + pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n"); + pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n"); + + if (cpu_has_feature(CPU_FTR_ARCH_300) && + (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) { + /* + * Use the default stop state for CPU-Hotplug + * if available. + */ + if (default_stop_found) { + pnv_deepest_stop_psscr_val = pnv_default_stop_val; + pnv_deepest_stop_psscr_mask = pnv_default_stop_mask; + pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n", + pnv_deepest_stop_psscr_val); + } else { /* Fallback to snooze loop for CPU-Hotplug */ + deepest_stop_found = false; + pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n"); + } + } } /* @@ -684,10 +1207,8 @@ static void __init pnv_probe_idle_states(void) return; } - if (cpu_has_feature(CPU_FTR_ARCH_300)) { - if (pnv_power9_idle_init()) - return; - } + if (cpu_has_feature(CPU_FTR_ARCH_300)) + pnv_power9_idle_init(); for (i = 0; i < nr_pnv_idle_states; i++) supported_cpuidle_states |= pnv_idle_states[i].flags; @@ -807,11 +1328,33 @@ out: static int __init pnv_init_idle_states(void) { + int cpu; int rc = 0; - supported_cpuidle_states = 0; + + /* Set up PACA fields */ + for_each_present_cpu(cpu) { + struct paca_struct *p = paca_ptrs[cpu]; + + p->idle_state = 0; + if (cpu == cpu_first_thread_sibling(cpu)) + p->idle_state = (1 << threads_per_core) - 1; + + if (!cpu_has_feature(CPU_FTR_ARCH_300)) { + /* P7/P8 nap */ + p->thread_idle_state = PNV_THREAD_RUNNING; + } else { + /* P9 stop */ +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + p->requested_psscr = 0; + atomic_set(&p->dont_stop, 0); +#endif + } + } /* In case we error out nr_pnv_idle_states will be zero */ nr_pnv_idle_states = 0; + supported_cpuidle_states = 0; + if (cpuidle_disable != IDLE_NO_OVERRIDE) goto out; rc = pnv_parse_cpuidle_dt(); @@ -819,27 +1362,40 @@ static int __init pnv_init_idle_states(void) return rc; pnv_probe_idle_states(); - if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { - patch_instruction( - (unsigned int *)pnv_fastsleep_workaround_at_entry, - PPC_INST_NOP); - patch_instruction( - (unsigned int *)pnv_fastsleep_workaround_at_exit, - PPC_INST_NOP); - } else { - /* - * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that - * workaround is needed to use fastsleep. Provide sysfs - * control to choose how this workaround has to be applied. - */ - device_create_file(cpu_subsys.dev_root, + if (!cpu_has_feature(CPU_FTR_ARCH_300)) { + if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { + power7_fastsleep_workaround_entry = false; + power7_fastsleep_workaround_exit = false; + } else { + /* + * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that + * workaround is needed to use fastsleep. Provide sysfs + * control to choose how this workaround has to be + * applied. + */ + device_create_file(cpu_subsys.dev_root, &dev_attr_fastsleep_workaround_applyonce); - } + } + + update_subcore_sibling_mask(); + + if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED) { + ppc_md.power_save = power7_idle; + power7_offline_type = PNV_THREAD_NAP; + } - pnv_alloc_idle_core_states(); + if ((supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) && + (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)) + power7_offline_type = PNV_THREAD_WINKLE; + else if ((supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED) || + (supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) + power7_offline_type = PNV_THREAD_SLEEP; + } - if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED) - ppc_md.power_save = power7_idle; + if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) { + if (pnv_save_sprs_for_deep_states()) + pnv_disable_deep_states(); + } out: return 0; diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c index daad8c45c8e7..36c8fa3647a2 100644 --- a/arch/powerpc/platforms/powernv/opal-call.c +++ b/arch/powerpc/platforms/powernv/opal-call.c @@ -121,6 +121,8 @@ static int64_t opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3, #define OPAL_CALL(name, opcode) \ int64_t name(int64_t a0, int64_t a1, int64_t a2, int64_t a3, \ + int64_t a4, int64_t a5, int64_t a6, int64_t a7); \ +int64_t name(int64_t a0, int64_t a1, int64_t a2, int64_t a3, \ int64_t a4, int64_t a5, int64_t a6, int64_t a7) \ { \ return opal_call(a0, a1, a2, a3, a4, a5, a6, a7, opcode); \ @@ -218,6 +220,7 @@ OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ); OPAL_CALL(opal_get_param, OPAL_GET_PARAM); OPAL_CALL(opal_set_param, OPAL_SET_PARAM); OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI); +OPAL_CALL(opal_handle_hmi2, OPAL_HANDLE_HMI2); OPAL_CALL(opal_config_cpu_idle_state, OPAL_CONFIG_CPU_IDLE_STATE); OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG); OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION); @@ -260,6 +263,9 @@ OPAL_CALL(opal_xive_get_vp_info, OPAL_XIVE_GET_VP_INFO); OPAL_CALL(opal_xive_set_vp_info, OPAL_XIVE_SET_VP_INFO); OPAL_CALL(opal_xive_sync, OPAL_XIVE_SYNC); OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP); +OPAL_CALL(opal_xive_get_queue_state, OPAL_XIVE_GET_QUEUE_STATE); +OPAL_CALL(opal_xive_set_queue_state, OPAL_XIVE_SET_QUEUE_STATE); +OPAL_CALL(opal_xive_get_vp_state, OPAL_XIVE_GET_VP_STATE); OPAL_CALL(opal_signal_system_reset, OPAL_SIGNAL_SYSTEM_RESET); OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT); OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT); diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c index 58a07948c76e..3e497b91d210 100644 --- a/arch/powerpc/platforms/powernv/opal-imc.c +++ b/arch/powerpc/platforms/powernv/opal-imc.c @@ -127,7 +127,7 @@ static int imc_get_mem_addr_nest(struct device_node *node, nr_chips)) goto error; - pmu_ptr->mem_info = kcalloc(nr_chips, sizeof(*pmu_ptr->mem_info), + pmu_ptr->mem_info = kcalloc(nr_chips + 1, sizeof(*pmu_ptr->mem_info), GFP_KERNEL); if (!pmu_ptr->mem_info) goto error; @@ -284,6 +284,9 @@ static int opal_imc_counters_probe(struct platform_device *pdev) case IMC_TYPE_THREAD: domain = IMC_DOMAIN_THREAD; break; + case IMC_TYPE_TRACE: + domain = IMC_DOMAIN_TRACE; + break; default: pr_warn("IMC Unknown Device type \n"); domain = -1; diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 2b0eca104f86..f2b063b027f0 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -505,7 +505,7 @@ static int opal_recover_mce(struct pt_regs *regs, recovered = 0; } - if (!recovered && evt->severity == MCE_SEV_ERROR_SYNC) { + if (!recovered && evt->sync_error) { /* * Try to kill processes if we get a synchronous machine check * (e.g., one caused by execution of this instruction). This @@ -614,6 +614,27 @@ int opal_hmi_exception_early(struct pt_regs *regs) return 0; } +int opal_hmi_exception_early2(struct pt_regs *regs) +{ + s64 rc; + __be64 out_flags; + + /* + * call opal hmi handler. + * Check 64-bit flag mask to find out if an event was generated, + * and whether TB is still valid or not etc. + */ + rc = opal_handle_hmi2(&out_flags); + if (rc != OPAL_SUCCESS) + return 0; + + if (be64_to_cpu(out_flags) & OPAL_HMI_FLAGS_NEW_EVENT) + local_paca->hmi_event_available = 1; + if (be64_to_cpu(out_flags) & OPAL_HMI_FLAGS_TOD_TB_FAIL) + tb_invalid = true; + return 1; +} + /* HMI exception handler called in virtual mode during check_irq_replay. */ int opal_handle_hmi_exception(struct pt_regs *regs) { diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 3ead4c237ed0..126602b4e399 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -847,11 +847,11 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number, pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN); if (rc) - pe_warn(pe, "OPAL error %ld remove self from PELTV\n", rc); + pe_warn(pe, "OPAL error %lld remove self from PELTV\n", rc); rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid, bcomp, dcomp, fcomp, OPAL_UNMAP_PE); if (rc) - pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc); + pe_err(pe, "OPAL error %lld trying to setup PELT table\n", rc); pe->pbus = NULL; pe->pdev = NULL; @@ -1174,11 +1174,12 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) pe->rid = bus->busn_res.start << 8; if (all) - pe_info(pe, "Secondary bus %d..%d associated with PE#%x\n", - bus->busn_res.start, bus->busn_res.end, pe->pe_number); + pe_info(pe, "Secondary bus %pad..%pad associated with PE#%x\n", + &bus->busn_res.start, &bus->busn_res.end, + pe->pe_number); else - pe_info(pe, "Secondary bus %d associated with PE#%x\n", - bus->busn_res.start, pe->pe_number); + pe_info(pe, "Secondary bus %pad associated with PE#%x\n", + &bus->busn_res.start, pe->pe_number); if (pnv_ioda_configure_pe(phb, pe)) { /* XXX What do we do here ? */ @@ -1448,7 +1449,7 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe tbl = pe->table_group.tables[0]; rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0); if (rc) - pe_warn(pe, "OPAL error %ld release DMA window\n", rc); + pe_warn(pe, "OPAL error %lld release DMA window\n", rc); pnv_pci_ioda2_set_bypass(pe, false); if (pe->table_group.group) { @@ -1836,7 +1837,7 @@ static bool pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev, struct pnv_ioda_pe *pe; if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) - return -ENODEV; + return false; pe = &phb->ioda.pe_array[pdn->pe_number]; if (pe->tce_bypass_enabled) { @@ -1859,7 +1860,7 @@ static bool pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev, /* Configure the bypass mode */ s64 rc = pnv_pci_ioda_dma_64bit_bypass(pe); if (rc) - return rc; + return false; /* 4GB offset bypasses 32-bit space */ pdev->dev.archdata.dma_offset = (1ULL << 32); return true; @@ -2286,8 +2287,8 @@ found: __pa(addr) + tce32_segsz * i, tce32_segsz, IOMMU_PAGE_SIZE_4K); if (rc) { - pe_err(pe, " Failed to configure 32-bit TCE table," - " err %ld\n", rc); + pe_err(pe, " Failed to configure 32-bit TCE table, err %lld\n", + rc); goto fail; } } @@ -2332,9 +2333,9 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group, const __u64 start_addr = tbl->it_offset << tbl->it_page_shift; const __u64 win_size = tbl->it_size << tbl->it_page_shift; - pe_info(pe, "Setting up window#%d %llx..%llx pg=%x\n", num, - start_addr, start_addr + win_size - 1, - IOMMU_PAGE_SIZE(tbl)); + pe_info(pe, "Setting up window#%d %llx..%llx pg=%lx\n", + num, start_addr, start_addr + win_size - 1, + IOMMU_PAGE_SIZE(tbl)); /* * Map TCE table through TVT. The TVE index is the PE number @@ -2348,7 +2349,7 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group, size << 3, IOMMU_PAGE_SIZE(tbl)); if (rc) { - pe_err(pe, "Failed to configure TCE table, err %ld\n", rc); + pe_err(pe, "Failed to configure TCE table, err %lld\n", rc); return rc; } @@ -3450,7 +3451,7 @@ static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe) #ifdef CONFIG_IOMMU_API rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0); if (rc) - pe_warn(pe, "OPAL error %ld release DMA window\n", rc); + pe_warn(pe, "OPAL error %lld release DMA window\n", rc); #endif pnv_pci_ioda2_set_bypass(pe, false); @@ -3484,7 +3485,7 @@ static void pnv_ioda_free_pe_seg(struct pnv_ioda_pe *pe, phb->ioda.reserved_pe_idx, win, 0, idx); if (rc != OPAL_SUCCESS) - pe_warn(pe, "Error %ld unmapping (%d) segment#%d\n", + pe_warn(pe, "Error %lld unmapping (%d) segment#%d\n", rc, win, idx); map[idx] = IODA_INVALID_PE; diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 8e36da379252..be26ab3d99e0 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -2,6 +2,7 @@ #ifndef __POWERNV_PCI_H #define __POWERNV_PCI_H +#include <linux/compiler.h> /* for __printf */ #include <linux/iommu.h> #include <asm/iommu.h> #include <asm/msi_bitmap.h> @@ -204,6 +205,7 @@ extern unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, __u64 window_size, __u32 levels); extern int pnv_eeh_post_init(void); +__printf(3, 4) extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, const char *fmt, ...); #define pe_err(pe, fmt, ...) \ diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 14befee4b3f1..3cf40f689aac 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -401,7 +401,10 @@ static void __init pnv_setup_machdep_opal(void) /* ppc_md.system_reset_exception gets filled in by pnv_smp_init() */ ppc_md.machine_check_exception = opal_machine_check; ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery; - ppc_md.hmi_exception_early = opal_hmi_exception_early; + if (opal_check_token(OPAL_HANDLE_HMI2)) + ppc_md.hmi_exception_early = opal_hmi_exception_early2; + else + ppc_md.hmi_exception_early = opal_hmi_exception_early; ppc_md.handle_hmi_exception = opal_handle_hmi_exception; } diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c index 45563004feda..1d7a9fd30dd1 100644 --- a/arch/powerpc/platforms/powernv/subcore.c +++ b/arch/powerpc/platforms/powernv/subcore.c @@ -183,7 +183,7 @@ static void unsplit_core(void) cpu = smp_processor_id(); if (cpu_thread_in_core(cpu) != 0) { while (mfspr(SPRN_HID0) & mask) - power7_idle_insn(PNV_THREAD_NAP); + power7_idle_type(PNV_THREAD_NAP); per_cpu(split_state, cpu).step = SYNC_STEP_UNSPLIT; return; diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index d291b618a559..47087832f8b2 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -379,7 +379,7 @@ static int dlpar_add_lmb(struct drmem_lmb *); static int dlpar_remove_lmb(struct drmem_lmb *lmb) { unsigned long block_sz; - int nid, rc; + int rc; if (!lmb_is_removable(lmb)) return -EINVAL; @@ -389,14 +389,14 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb) return rc; block_sz = pseries_memory_block_size(); - nid = memory_add_physaddr_to_nid(lmb->base_addr); - __remove_memory(nid, lmb->base_addr, block_sz); + __remove_memory(lmb->nid, lmb->base_addr, block_sz); /* Update memory regions for memory remove */ memblock_remove(lmb->base_addr, block_sz); invalidate_lmb_associativity_index(lmb); + lmb_clear_nid(lmb); lmb->flags &= ~DRCONF_MEM_ASSIGNED; return 0; @@ -653,7 +653,7 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) static int dlpar_add_lmb(struct drmem_lmb *lmb) { unsigned long block_sz; - int nid, rc; + int rc; if (lmb->flags & DRCONF_MEM_ASSIGNED) return -EINVAL; @@ -664,13 +664,11 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb) return rc; } + lmb_set_nid(lmb); block_sz = memory_block_size_bytes(); - /* Find the node id for this address */ - nid = memory_add_physaddr_to_nid(lmb->base_addr); - /* Add the memory */ - rc = __add_memory(nid, lmb->base_addr, block_sz); + rc = __add_memory(lmb->nid, lmb->base_addr, block_sz); if (rc) { invalidate_lmb_associativity_index(lmb); return rc; @@ -678,8 +676,9 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb) rc = dlpar_online_lmb(lmb); if (rc) { - __remove_memory(nid, lmb->base_addr, block_sz); + __remove_memory(lmb->nid, lmb->base_addr, block_sz); invalidate_lmb_associativity_index(lmb); + lmb_clear_nid(lmb); } else { lmb->flags |= DRCONF_MEM_ASSIGNED; } diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 36eb1ddbac69..03bbb299320e 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -105,7 +105,7 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index, unsigned long attrs) { u64 proto_tce; - __be64 *tcep, *tces; + __be64 *tcep; u64 rpn; proto_tce = TCE_PCI_READ; // Read allowed @@ -113,7 +113,7 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index, if (direction != DMA_TO_DEVICE) proto_tce |= TCE_PCI_WRITE; - tces = tcep = ((__be64 *)tbl->it_base) + index; + tcep = ((__be64 *)tbl->it_base) + index; while (npages--) { /* can't move this out since we might cross MEMBLOCK boundary */ @@ -129,9 +129,9 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index, static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages) { - __be64 *tcep, *tces; + __be64 *tcep; - tces = tcep = ((__be64 *)tbl->it_base) + index; + tcep = ((__be64 *)tbl->it_base) + index; while (npages--) *(tcep++) = 0; @@ -945,7 +945,7 @@ static phys_addr_t ddw_memory_hotplug_max(void) for_each_node_by_type(memory, "memory") { unsigned long start, size; - int ranges, n_mem_addr_cells, n_mem_size_cells, len; + int n_mem_addr_cells, n_mem_size_cells, len; const __be32 *memcell_buf; memcell_buf = of_get_property(memory, "reg", &len); @@ -955,9 +955,6 @@ static phys_addr_t ddw_memory_hotplug_max(void) n_mem_addr_cells = of_n_addr_cells(memory); n_mem_size_cells = of_n_size_cells(memory); - /* ranges in cell */ - ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); - start = of_read_number(memcell_buf, n_mem_addr_cells); memcell_buf += n_mem_addr_cells; size = of_read_number(memcell_buf, n_mem_size_cells); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index f2a9f0adc2d3..1034ef1fe2b4 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -901,8 +901,10 @@ static int pseries_lpar_resize_hpt(unsigned long shift) break; case H_PARAMETER: + pr_warn("Invalid argument from H_RESIZE_HPT_PREPARE\n"); return -EINVAL; case H_RESOURCE: + pr_warn("Operation not permitted from H_RESIZE_HPT_PREPARE\n"); return -EPERM; default: pr_warn("Unexpected error %d from H_RESIZE_HPT_PREPARE\n", rc); @@ -918,7 +920,6 @@ static int pseries_lpar_resize_hpt(unsigned long shift) if (rc != 0) { switch (state.commit_rc) { case H_PTEG_FULL: - pr_warn("Hash collision while resizing HPT\n"); return -ENOSPC; default: diff --git a/arch/powerpc/platforms/pseries/pmem.c b/arch/powerpc/platforms/pseries/pmem.c index 27f0a915c8a9..f860a897a9e0 100644 --- a/arch/powerpc/platforms/pseries/pmem.c +++ b/arch/powerpc/platforms/pseries/pmem.c @@ -106,7 +106,7 @@ static ssize_t pmem_drc_remove_node(u32 drc_index) int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog) { - u32 count, drc_index; + u32 drc_index; int rc; /* slim chance, but we might get a hotplug event while booting */ @@ -123,7 +123,6 @@ int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog) return -EINVAL; } - count = hp_elog->_drc_u.drc_count; drc_index = hp_elog->_drc_u.drc_index; lock_device_hotplug(); diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 452dcfd7e5dd..c97d15352f9f 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -539,44 +539,44 @@ static void pseries_print_mce_info(struct pt_regs *regs, int disposition = rtas_error_disposition(errp); static const char * const initiators[] = { - "Unknown", - "CPU", - "PCI", - "ISA", - "Memory", - "Power Mgmt", + [0] = "Unknown", + [1] = "CPU", + [2] = "PCI", + [3] = "ISA", + [4] = "Memory", + [5] = "Power Mgmt", }; static const char * const mc_err_types[] = { - "UE", - "SLB", - "ERAT", - "Unknown", - "TLB", - "D-Cache", - "Unknown", - "I-Cache", + [0] = "UE", + [1] = "SLB", + [2] = "ERAT", + [3] = "Unknown", + [4] = "TLB", + [5] = "D-Cache", + [6] = "Unknown", + [7] = "I-Cache", }; static const char * const mc_ue_types[] = { - "Indeterminate", - "Instruction fetch", - "Page table walk ifetch", - "Load/Store", - "Page table walk Load/Store", + [0] = "Indeterminate", + [1] = "Instruction fetch", + [2] = "Page table walk ifetch", + [3] = "Load/Store", + [4] = "Page table walk Load/Store", }; /* SLB sub errors valid values are 0x0, 0x1, 0x2 */ static const char * const mc_slb_types[] = { - "Parity", - "Multihit", - "Indeterminate", + [0] = "Parity", + [1] = "Multihit", + [2] = "Indeterminate", }; /* TLB and ERAT sub errors valid values are 0x1, 0x2, 0x3 */ static const char * const mc_soft_types[] = { - "Unknown", - "Parity", - "Multihit", - "Indeterminate", + [0] = "Unknown", + [1] = "Parity", + [2] = "Multihit", + [3] = "Indeterminate", }; if (!rtas_error_extended(errp)) { @@ -707,6 +707,87 @@ out: return disposition; } +#ifdef CONFIG_MEMORY_FAILURE + +static DEFINE_PER_CPU(int, rtas_ue_count); +static DEFINE_PER_CPU(unsigned long, rtas_ue_paddr[MAX_MC_EVT]); + +#define UE_EFFECTIVE_ADDR_PROVIDED 0x40 +#define UE_LOGICAL_ADDR_PROVIDED 0x20 + + +static void pseries_hwpoison_work_fn(struct work_struct *work) +{ + unsigned long paddr; + int index; + + while (__this_cpu_read(rtas_ue_count) > 0) { + index = __this_cpu_read(rtas_ue_count) - 1; + paddr = __this_cpu_read(rtas_ue_paddr[index]); + memory_failure(paddr >> PAGE_SHIFT, 0); + __this_cpu_dec(rtas_ue_count); + } +} + +static DECLARE_WORK(hwpoison_work, pseries_hwpoison_work_fn); + +static void queue_ue_paddr(unsigned long paddr) +{ + int index; + + index = __this_cpu_inc_return(rtas_ue_count) - 1; + if (index >= MAX_MC_EVT) { + __this_cpu_dec(rtas_ue_count); + return; + } + this_cpu_write(rtas_ue_paddr[index], paddr); + schedule_work(&hwpoison_work); +} + +static void pseries_do_memory_failure(struct pt_regs *regs, + struct pseries_mc_errorlog *mce_log) +{ + unsigned long paddr; + + if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) { + paddr = be64_to_cpu(mce_log->logical_address); + } else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) { + unsigned long pfn; + + pfn = addr_to_pfn(regs, + be64_to_cpu(mce_log->effective_address)); + if (pfn == ULONG_MAX) + return; + paddr = pfn << PAGE_SHIFT; + } else { + return; + } + queue_ue_paddr(paddr); +} + +static void pseries_process_ue(struct pt_regs *regs, + struct rtas_error_log *errp) +{ + struct pseries_errorlog *pseries_log; + struct pseries_mc_errorlog *mce_log; + + if (!rtas_error_extended(errp)) + return; + + pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); + if (!pseries_log) + return; + + mce_log = (struct pseries_mc_errorlog *)pseries_log->data; + + if (mce_log->error_type == MC_ERROR_TYPE_UE) + pseries_do_memory_failure(regs, mce_log); +} +#else +static inline void pseries_process_ue(struct pt_regs *regs, + struct rtas_error_log *errp) { } +#endif /*CONFIG_MEMORY_FAILURE */ + /* * Process MCE rtas errlog event. */ @@ -765,6 +846,8 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err) recovered = 1; } + pseries_process_ue(regs, err); + /* Queue irq work to log this rtas event later. */ irq_work_queue(&mce_errlog_process_work); |