summaryrefslogtreecommitdiff
path: root/arch/powerpc/platforms
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms')
-rw-r--r--arch/powerpc/platforms/44x/machine_check.c1
-rw-r--r--arch/powerpc/platforms/44x/ppc476.c5
-rw-r--r--arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c4
-rw-r--r--arch/powerpc/platforms/85xx/smp.c4
-rw-r--r--arch/powerpc/platforms/Kconfig29
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype18
-rw-r--r--arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c3
-rw-r--r--arch/powerpc/platforms/embedded6xx/storcenter.c3
-rw-r--r--arch/powerpc/platforms/powermac/pmac.h2
-rw-r--r--arch/powerpc/platforms/powermac/setup.c2
-rw-r--r--arch/powerpc/platforms/powermac/sleep.S15
-rw-r--r--arch/powerpc/platforms/powermac/smp.c12
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c98
-rw-r--r--arch/powerpc/platforms/powernv/idle.c302
-rw-r--r--arch/powerpc/platforms/powernv/memtrace.c2
-rw-r--r--arch/powerpc/platforms/powernv/ocxl.c30
-rw-r--r--arch/powerpc/platforms/powernv/opal-core.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-elog.c33
-rw-r--r--arch/powerpc/platforms/powernv/opal-msglog.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-prd.c2
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c8
-rw-r--r--arch/powerpc/platforms/powernv/powernv.h7
-rw-r--r--arch/powerpc/platforms/powernv/rng.c2
-rw-r--r--arch/powerpc/platforms/powernv/setup.c24
-rw-r--r--arch/powerpc/platforms/powernv/smp.c6
-rw-r--r--arch/powerpc/platforms/powernv/vas-window.c9
-rw-r--r--arch/powerpc/platforms/ps3/spu.c4
-rw-r--r--arch/powerpc/platforms/ps3/system-bus.c6
-rw-r--r--arch/powerpc/platforms/pseries/eeh_pseries.c376
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c6
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c82
-rw-r--r--arch/powerpc/platforms/pseries/hvCall_inst.c23
-rw-r--r--arch/powerpc/platforms/pseries/ibmebus.c2
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c242
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c2
-rw-r--r--arch/powerpc/platforms/pseries/lparcfg.c35
-rw-r--r--arch/powerpc/platforms/pseries/papr_scm.c10
-rw-r--r--arch/powerpc/platforms/pseries/rng.c1
-rw-r--r--arch/powerpc/platforms/pseries/setup.c6
-rw-r--r--arch/powerpc/platforms/pseries/svm.c26
-rw-r--r--arch/powerpc/platforms/pseries/vio.c4
41 files changed, 827 insertions, 623 deletions
diff --git a/arch/powerpc/platforms/44x/machine_check.c b/arch/powerpc/platforms/44x/machine_check.c
index 90ad6ac529d2..a5c898bb9bab 100644
--- a/arch/powerpc/platforms/44x/machine_check.c
+++ b/arch/powerpc/platforms/44x/machine_check.c
@@ -7,6 +7,7 @@
#include <linux/ptrace.h>
#include <asm/reg.h>
+#include <asm/cacheflush.h>
int machine_check_440A(struct pt_regs *regs)
{
diff --git a/arch/powerpc/platforms/44x/ppc476.c b/arch/powerpc/platforms/44x/ppc476.c
index cba83eee685c..07f7e3ce67b5 100644
--- a/arch/powerpc/platforms/44x/ppc476.c
+++ b/arch/powerpc/platforms/44x/ppc476.c
@@ -86,8 +86,7 @@ static void __noreturn avr_reset_system(char *cmd)
avr_halt_system(AVR_PWRCTL_RESET);
}
-static int avr_probe(struct i2c_client *client,
- const struct i2c_device_id *id)
+static int avr_probe(struct i2c_client *client)
{
avr_i2c_client = client;
ppc_md.restart = avr_reset_system;
@@ -104,7 +103,7 @@ static struct i2c_driver avr_driver = {
.driver = {
.name = "akebono-avr",
},
- .probe = avr_probe,
+ .probe_new = avr_probe,
.id_table = avr_id,
};
diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
index 0967bdfb1691..409481016928 100644
--- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
+++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
@@ -142,7 +142,7 @@ static int mcu_gpiochip_remove(struct mcu *mcu)
return 0;
}
-static int mcu_probe(struct i2c_client *client, const struct i2c_device_id *id)
+static int mcu_probe(struct i2c_client *client)
{
struct mcu *mcu;
int ret;
@@ -221,7 +221,7 @@ static struct i2c_driver mcu_driver = {
.name = "mcu-mpc8349emitx",
.of_match_table = mcu_of_match_table,
},
- .probe = mcu_probe,
+ .probe_new = mcu_probe,
.remove = mcu_remove,
.id_table = mcu_ids,
};
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index fda108bae95f..c6df294054fe 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -112,7 +112,7 @@ static void mpc85xx_take_timebase(void)
local_irq_restore(flags);
}
-static void smp_85xx_mach_cpu_die(void)
+static void smp_85xx_cpu_offline_self(void)
{
unsigned int cpu = smp_processor_id();
@@ -506,7 +506,7 @@ void __init mpc85xx_smp_init(void)
if (qoriq_pm_ops) {
smp_85xx_ops.give_timebase = mpc85xx_give_timebase;
smp_85xx_ops.take_timebase = mpc85xx_take_timebase;
- ppc_md.cpu_die = smp_85xx_mach_cpu_die;
+ smp_85xx_ops.cpu_offline_self = smp_85xx_cpu_offline_self;
smp_85xx_ops.cpu_die = qoriq_cpu_kill;
}
#endif
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index fb7515b4fa9c..7a5e8f4541e3 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -199,21 +199,6 @@ source "drivers/cpuidle/Kconfig"
endmenu
-config PPC601_SYNC_FIX
- bool "Workarounds for PPC601 bugs"
- depends on PPC_BOOK3S_601 && PPC_PMAC
- default y
- help
- Some versions of the PPC601 (the first PowerPC chip) have bugs which
- mean that extra synchronization instructions are required near
- certain instructions, typically those that make major changes to the
- CPU state. These extra instructions reduce performance slightly.
- If you say N here, these extra instructions will not be included,
- resulting in a kernel which will run faster but may not run at all
- on some systems with the PPC601 chip.
-
- If in doubt, say Y here.
-
config TAU
bool "On-chip CPU temperature sensor support"
depends on PPC_BOOK3S_32
@@ -223,12 +208,11 @@ config TAU
temperature within 2-4 degrees Celsius. This option shows the current
on-die temperature in /proc/cpuinfo if the cpu supports it.
- Unfortunately, on some chip revisions, this sensor is very inaccurate
- and in many cases, does not work at all, so don't assume the cpu
- temp is actually what /proc/cpuinfo says it is.
+ Unfortunately, this sensor is very inaccurate when uncalibrated, so
+ don't assume the cpu temp is actually what /proc/cpuinfo says it is.
config TAU_INT
- bool "Interrupt driven TAU driver (DANGEROUS)"
+ bool "Interrupt driven TAU driver (EXPERIMENTAL)"
depends on TAU
help
The TAU supports an interrupt driven mode which causes an interrupt
@@ -236,12 +220,7 @@ config TAU_INT
to get notified the temp has exceeded a range. With this option off,
a timer is used to re-check the temperature periodically.
- However, on some cpus it appears that the TAU interrupt hardware
- is buggy and can cause a situation which would lead unexplained hard
- lockups.
-
- Unless you are extending the TAU driver, or enjoy kernel/hardware
- debugging, leave this option off.
+ If in doubt, say N here.
config TAU_AVERAGE
bool "Average high and low temp"
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 1dc9d3c81872..c194c4ae8bc7 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -20,7 +20,7 @@ choice
depends on PPC32
help
There are five families of 32 bit PowerPC chips supported.
- The most common ones are the desktop and server CPUs (601, 603,
+ The most common ones are the desktop and server CPUs (603,
604, 740, 750, 74xx) CPUs from Freescale and IBM, with their
embedded 512x/52xx/82xx/83xx/86xx counterparts.
The other embedded parts, namely 4xx, 8xx, e200 (55xx) and e500
@@ -30,7 +30,7 @@ choice
If unsure, select 52xx/6xx/7xx/74xx/82xx/83xx/86xx.
config PPC_BOOK3S_6xx
- bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx except 601"
+ bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx"
select PPC_BOOK3S_32
select PPC_FPU
select PPC_HAVE_PMU_SUPPORT
@@ -38,13 +38,6 @@ config PPC_BOOK3S_6xx
select PPC_HAVE_KUAP
select HAVE_ARCH_VMAP_STACK if !ADB_PMU
-config PPC_BOOK3S_601
- bool "PowerPC 601"
- select PPC_BOOK3S_32
- select PPC_FPU
- select PPC_HAVE_KUAP
- select HAVE_ARCH_VMAP_STACK
-
config PPC_85xx
bool "Freescale 85xx"
select E500
@@ -490,13 +483,12 @@ endmenu
config VDSO32
def_bool y
- depends on PPC32 || CPU_BIG_ENDIAN
+ depends on PPC32 || COMPAT
help
This symbol controls whether we build the 32-bit VDSO. We obviously
want to do that if we're building a 32-bit kernel. If we're building
- a 64-bit kernel then we only want a 32-bit VDSO if we're building for
- big endian. That is because the only little endian configuration we
- support is ppc64le which is 64-bit only.
+ a 64-bit kernel then we only want a 32-bit VDSO if we're also enabling
+ COMPAT.
choice
prompt "Endianness selection"
diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
index 15437abe1f6d..b95c3380d2b5 100644
--- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
+++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
@@ -147,7 +147,8 @@ static void __noreturn mpc7448_hpc2_restart(char *cmd)
local_irq_disable();
/* Set exception prefix high - to the firmware */
- _nmask_and_or_msr(0, MSR_IP);
+ mtmsr(mfmsr() | MSR_IP);
+ isync();
for (;;) ; /* Spin until reset happens */
}
diff --git a/arch/powerpc/platforms/embedded6xx/storcenter.c b/arch/powerpc/platforms/embedded6xx/storcenter.c
index ed1914dd34bb..e346ddcef45e 100644
--- a/arch/powerpc/platforms/embedded6xx/storcenter.c
+++ b/arch/powerpc/platforms/embedded6xx/storcenter.c
@@ -101,7 +101,8 @@ static void __noreturn storcenter_restart(char *cmd)
local_irq_disable();
/* Set exception prefix high - to the firmware */
- _nmask_and_or_msr(0, MSR_IP);
+ mtmsr(mfmsr() | MSR_IP);
+ isync();
/* Wait for reset to happen */
for (;;) ;
diff --git a/arch/powerpc/platforms/powermac/pmac.h b/arch/powerpc/platforms/powermac/pmac.h
index 16a52afdb76e..0d715db434dc 100644
--- a/arch/powerpc/platforms/powermac/pmac.h
+++ b/arch/powerpc/platforms/powermac/pmac.h
@@ -34,7 +34,7 @@ extern void pmac_check_ht_link(void);
extern void pmac_setup_smp(void);
extern int psurge_secondary_virq;
-extern void low_cpu_die(void) __attribute__((noreturn));
+extern void low_cpu_offline_self(void) __attribute__((noreturn));
extern int pmac_nvram_init(void);
extern void pmac_pic_init(void);
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index f002b0fa69b8..2e2cc0c75d87 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -284,7 +284,7 @@ static void __init pmac_setup_arch(void)
/* 604, G3, G4 etc. */
loops_per_jiffy = *fp / HZ;
else
- /* 601, 603, etc. */
+ /* 603, etc. */
loops_per_jiffy = *fp / (2 * HZ);
of_node_put(cpu);
break;
diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S
index f9a680fdd9c4..7e0f8ba6e54a 100644
--- a/arch/powerpc/platforms/powermac/sleep.S
+++ b/arch/powerpc/platforms/powermac/sleep.S
@@ -201,8 +201,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
addi r3,r3,sleep_storage@l
stw r5,0(r3)
- .globl low_cpu_die
-low_cpu_die:
+ .globl low_cpu_offline_self
+low_cpu_offline_self:
/* Flush & disable all caches */
bl flush_disable_caches
@@ -244,7 +244,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
mtmsr r2
isync
b 1b
-_ASM_NOKPROBE_SYMBOL(low_cpu_die)
+_ASM_NOKPROBE_SYMBOL(low_cpu_offline_self)
/*
* Here is the resume code.
*/
@@ -294,14 +294,7 @@ grackle_wake_up:
* we do any r1 memory access as we are not sure they
* are in a sane state above the first 256Mb region
*/
- li r0,16 /* load up segment register values */
- mtctr r0 /* for context 0 */
- lis r3,0x2000 /* Ku = 1, VSID = 0 */
- li r4,0
-3: mtsrin r3,r4
- addi r3,r3,0x111 /* increment VSID */
- addis r4,r4,0x1000 /* address of next segment */
- bdnz 3b
+ bl load_segment_registers
sync
isync
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index eb23264910e1..74ebe664b016 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -270,10 +270,6 @@ static void __init smp_psurge_probe(void)
int i, ncpus;
struct device_node *dn;
- /* We don't do SMP on the PPC601 -- paulus */
- if (PVR_VER(mfspr(SPRN_PVR)) == 1)
- return;
-
/*
* The powersurge cpu board can be used in the generation
* of powermacs that have a socket for an upgradeable cpu card,
@@ -920,7 +916,7 @@ static int smp_core99_cpu_disable(void)
#ifdef CONFIG_PPC32
-static void pmac_cpu_die(void)
+static void pmac_cpu_offline_self(void)
{
int cpu = smp_processor_id();
@@ -930,12 +926,12 @@ static void pmac_cpu_die(void)
generic_set_cpu_dead(cpu);
smp_wmb();
mb();
- low_cpu_die();
+ low_cpu_offline_self();
}
#else /* CONFIG_PPC32 */
-static void pmac_cpu_die(void)
+static void pmac_cpu_offline_self(void)
{
int cpu = smp_processor_id();
@@ -1020,7 +1016,7 @@ void __init pmac_setup_smp(void)
#endif /* CONFIG_PPC_PMAC32_PSURGE */
#ifdef CONFIG_HOTPLUG_CPU
- ppc_md.cpu_die = pmac_cpu_die;
+ smp_ops->cpu_offline_self = pmac_cpu_offline_self;
#endif
}
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 9af8c3b98853..89e22c460ebf 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -38,60 +38,12 @@
static int eeh_event_irq = -EINVAL;
-void pnv_pcibios_bus_add_device(struct pci_dev *pdev)
+static void pnv_pcibios_bus_add_device(struct pci_dev *pdev)
{
dev_dbg(&pdev->dev, "EEH: Setting up device\n");
eeh_probe_device(pdev);
}
-static int pnv_eeh_init(void)
-{
- struct pci_controller *hose;
- struct pnv_phb *phb;
- int max_diag_size = PNV_PCI_DIAG_BUF_SIZE;
-
- if (!firmware_has_feature(FW_FEATURE_OPAL)) {
- pr_warn("%s: OPAL is required !\n",
- __func__);
- return -EINVAL;
- }
-
- /* Set probe mode */
- eeh_add_flag(EEH_PROBE_MODE_DEV);
-
- /*
- * P7IOC blocks PCI config access to frozen PE, but PHB3
- * doesn't do that. So we have to selectively enable I/O
- * prior to collecting error log.
- */
- list_for_each_entry(hose, &hose_list, list_node) {
- phb = hose->private_data;
-
- if (phb->model == PNV_PHB_MODEL_P7IOC)
- eeh_add_flag(EEH_ENABLE_IO_FOR_LOG);
-
- if (phb->diag_data_size > max_diag_size)
- max_diag_size = phb->diag_data_size;
-
- /*
- * PE#0 should be regarded as valid by EEH core
- * if it's not the reserved one. Currently, we
- * have the reserved PE#255 and PE#127 for PHB3
- * and P7IOC separately. So we should regard
- * PE#0 as valid for PHB3 and P7IOC.
- */
- if (phb->ioda.reserved_pe_idx != 0)
- eeh_add_flag(EEH_VALID_PE_ZERO);
-
- break;
- }
-
- eeh_set_pe_aux_size(max_diag_size);
- ppc_md.pcibios_bus_add_device = pnv_pcibios_bus_add_device;
-
- return 0;
-}
-
static irqreturn_t pnv_eeh_event(int irq, void *data)
{
/*
@@ -135,7 +87,7 @@ static ssize_t pnv_eeh_ei_write(struct file *filp,
return -EINVAL;
/* Retrieve PE */
- pe = eeh_pe_get(hose, pe_no, 0);
+ pe = eeh_pe_get(hose, pe_no);
if (!pe)
return -ENODEV;
@@ -190,7 +142,7 @@ PNV_EEH_DBGFS_ENTRY(inbB, 0xE10);
#endif /* CONFIG_DEBUG_FS */
-void pnv_eeh_enable_phbs(void)
+static void pnv_eeh_enable_phbs(void)
{
struct pci_controller *hose;
struct pnv_phb *phb;
@@ -354,7 +306,7 @@ static struct eeh_pe *pnv_eeh_get_upstream_pe(struct pci_dev *pdev)
if (parent) {
struct pnv_ioda_pe *ioda_pe = pnv_ioda_get_pe(parent);
- return eeh_pe_get(phb->hose, ioda_pe->pe_number, 0);
+ return eeh_pe_get(phb->hose, ioda_pe->pe_number);
}
return NULL;
@@ -1406,7 +1358,7 @@ static int pnv_eeh_get_pe(struct pci_controller *hose,
}
/* Find the PE according to PE# */
- dev_pe = eeh_pe_get(hose, pe_no, 0);
+ dev_pe = eeh_pe_get(hose, pe_no);
if (!dev_pe)
return -EEXIST;
@@ -1674,7 +1626,6 @@ static int pnv_eeh_restore_config(struct eeh_dev *edev)
static struct eeh_ops pnv_eeh_ops = {
.name = "powernv",
- .init = pnv_eeh_init,
.probe = pnv_eeh_probe,
.set_option = pnv_eeh_set_option,
.get_state = pnv_eeh_get_state,
@@ -1715,9 +1666,44 @@ DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_pci_fixup_vf_mps);
*/
static int __init eeh_powernv_init(void)
{
+ int max_diag_size = PNV_PCI_DIAG_BUF_SIZE;
+ struct pci_controller *hose;
+ struct pnv_phb *phb;
int ret = -EINVAL;
- ret = eeh_ops_register(&pnv_eeh_ops);
+ if (!firmware_has_feature(FW_FEATURE_OPAL)) {
+ pr_warn("%s: OPAL is required !\n", __func__);
+ return -EINVAL;
+ }
+
+ /* Set probe mode */
+ eeh_add_flag(EEH_PROBE_MODE_DEV);
+
+ /*
+ * P7IOC blocks PCI config access to frozen PE, but PHB3
+ * doesn't do that. So we have to selectively enable I/O
+ * prior to collecting error log.
+ */
+ list_for_each_entry(hose, &hose_list, list_node) {
+ phb = hose->private_data;
+
+ if (phb->model == PNV_PHB_MODEL_P7IOC)
+ eeh_add_flag(EEH_ENABLE_IO_FOR_LOG);
+
+ if (phb->diag_data_size > max_diag_size)
+ max_diag_size = phb->diag_data_size;
+
+ break;
+ }
+
+ /*
+ * eeh_init() allocates the eeh_pe and its aux data buf so the
+ * size needs to be set before calling eeh_init().
+ */
+ eeh_set_pe_aux_size(max_diag_size);
+ ppc_md.pcibios_bus_add_device = pnv_pcibios_bus_add_device;
+
+ ret = eeh_init(&pnv_eeh_ops);
if (!ret)
pr_info("EEH: PowerNV platform initialized\n");
else
@@ -1725,4 +1711,4 @@ static int __init eeh_powernv_init(void)
return ret;
}
-machine_early_initcall(powernv, eeh_powernv_init);
+machine_arch_initcall(powernv, eeh_powernv_init);
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 345ab062b21a..1ed7c5286487 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -565,7 +565,7 @@ void power7_idle_type(unsigned long type)
irq_set_pending_from_srr1(srr1);
}
-void power7_idle(void)
+static void power7_idle(void)
{
if (!powersave_nap)
return;
@@ -659,20 +659,6 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
mmcr0 = mfspr(SPRN_MMCR0);
}
- if (cpu_has_feature(CPU_FTR_ARCH_31)) {
- /*
- * POWER10 uses MMCRA (BHRBRD) as BHRB disable bit.
- * If the user hasn't asked for the BHRB to be
- * written, the value of MMCRA[BHRBRD] is 1.
- * On wakeup from stop, MMCRA[BHRBD] will be 0,
- * since it is previleged resource and will be lost.
- * Thus, if we do not save and restore the MMCRA[BHRBD],
- * hardware will be needlessly writing to the BHRB
- * in problem mode.
- */
- mmcra = mfspr(SPRN_MMCRA);
- }
-
if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) {
sprs.lpcr = mfspr(SPRN_LPCR);
sprs.hfscr = mfspr(SPRN_HFSCR);
@@ -735,10 +721,6 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
mtspr(SPRN_MMCR0, mmcr0);
}
- /* Reload MMCRA to restore BHRB disable bit for POWER10 */
- if (cpu_has_feature(CPU_FTR_ARCH_31))
- mtspr(SPRN_MMCRA, mmcra);
-
/*
* DD2.2 and earlier need to set then clear bit 60 in MMCRA
* to ensure the PMU starts running.
@@ -823,73 +805,6 @@ out:
return srr1;
}
-#ifdef CONFIG_HOTPLUG_CPU
-static unsigned long power9_offline_stop(unsigned long psscr)
-{
- unsigned long srr1;
-
-#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- __ppc64_runlatch_off();
- srr1 = power9_idle_stop(psscr, true);
- __ppc64_runlatch_on();
-#else
- /*
- * Tell KVM we're entering idle.
- * This does not have to be done in real mode because the P9 MMU
- * is independent per-thread. Some steppings share radix/hash mode
- * between threads, but in that case KVM has a barrier sync in real
- * mode before and after switching between radix and hash.
- *
- * kvm_start_guest must still be called in real mode though, hence
- * the false argument.
- */
- local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
-
- __ppc64_runlatch_off();
- srr1 = power9_idle_stop(psscr, false);
- __ppc64_runlatch_on();
-
- local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
- /* Order setting hwthread_state vs. testing hwthread_req */
- smp_mb();
- if (local_paca->kvm_hstate.hwthread_req)
- srr1 = idle_kvm_start_guest(srr1);
- mtmsr(MSR_KERNEL);
-#endif
-
- return srr1;
-}
-#endif
-
-void power9_idle_type(unsigned long stop_psscr_val,
- unsigned long stop_psscr_mask)
-{
- unsigned long psscr;
- unsigned long srr1;
-
- if (!prep_irq_for_idle_irqsoff())
- return;
-
- psscr = mfspr(SPRN_PSSCR);
- psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
-
- __ppc64_runlatch_off();
- srr1 = power9_idle_stop(psscr, true);
- __ppc64_runlatch_on();
-
- fini_irq_for_idle_irqsoff();
-
- irq_set_pending_from_srr1(srr1);
-}
-
-/*
- * Used for ppc_md.power_save which needs a function with no parameters
- */
-void power9_idle(void)
-{
- power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
-}
-
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/*
* This is used in working around bugs in thread reconfiguration
@@ -962,6 +877,198 @@ void pnv_power9_force_smt4_release(void)
EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release);
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
+struct p10_sprs {
+ /*
+ * SPRs that get lost in shallow states:
+ *
+ * P10 loses CR, LR, CTR, FPSCR, VSCR, XER, TAR, SPRG2, and HSPRG1
+ * isa300 idle routines restore CR, LR.
+ * CTR is volatile
+ * idle thread doesn't use FP or VEC
+ * kernel doesn't use TAR
+ * HSPRG1 is only live in HV interrupt entry
+ * SPRG2 is only live in KVM guests, KVM handles it.
+ */
+};
+
+static unsigned long power10_idle_stop(unsigned long psscr, bool mmu_on)
+{
+ int cpu = raw_smp_processor_id();
+ int first = cpu_first_thread_sibling(cpu);
+ unsigned long *state = &paca_ptrs[first]->idle_state;
+ unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
+ unsigned long srr1;
+ unsigned long pls;
+// struct p10_sprs sprs = {}; /* avoid false used-uninitialised */
+ bool sprs_saved = false;
+
+ if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
+ /* EC=ESL=0 case */
+
+ BUG_ON(!mmu_on);
+
+ /*
+ * Wake synchronously. SRESET via xscom may still cause
+ * a 0x100 powersave wakeup with SRR1 reason!
+ */
+ srr1 = isa300_idle_stop_noloss(psscr); /* go idle */
+ if (likely(!srr1))
+ return 0;
+
+ /*
+ * Registers not saved, can't recover!
+ * This would be a hardware bug
+ */
+ BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS);
+
+ goto out;
+ }
+
+ /* EC=ESL=1 case */
+ if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) {
+ /* XXX: save SPRs for deep state loss here. */
+
+ sprs_saved = true;
+
+ atomic_start_thread_idle();
+ }
+
+ srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */
+
+ psscr = mfspr(SPRN_PSSCR);
+
+ WARN_ON_ONCE(!srr1);
+ WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
+
+ if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
+ hmi_exception_realmode(NULL);
+
+ /*
+ * On POWER10, SRR1 bits do not match exactly as expected.
+ * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so
+ * just always test PSSCR for SPR/TB state loss.
+ */
+ pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT;
+ if (likely(pls < deep_spr_loss_state)) {
+ if (sprs_saved)
+ atomic_stop_thread_idle();
+ goto out;
+ }
+
+ /* HV state loss */
+ BUG_ON(!sprs_saved);
+
+ atomic_lock_thread_idle();
+
+ if ((*state & core_thread_mask) != 0)
+ goto core_woken;
+
+ /* XXX: restore per-core SPRs here */
+
+ if (pls >= pnv_first_tb_loss_level) {
+ /* TB loss */
+ if (opal_resync_timebase() != OPAL_SUCCESS)
+ BUG();
+ }
+
+ /*
+ * isync after restoring shared SPRs and before unlocking. Unlock
+ * only contains hwsync which does not necessarily do the right
+ * thing for SPRs.
+ */
+ isync();
+
+core_woken:
+ atomic_unlock_and_stop_thread_idle();
+
+ /* XXX: restore per-thread SPRs here */
+
+ if (!radix_enabled())
+ __slb_restore_bolted_realmode();
+
+out:
+ if (mmu_on)
+ mtmsr(MSR_KERNEL);
+
+ return srr1;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static unsigned long arch300_offline_stop(unsigned long psscr)
+{
+ unsigned long srr1;
+
+#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ __ppc64_runlatch_off();
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ srr1 = power10_idle_stop(psscr, true);
+ else
+ srr1 = power9_idle_stop(psscr, true);
+ __ppc64_runlatch_on();
+#else
+ /*
+ * Tell KVM we're entering idle.
+ * This does not have to be done in real mode because the P9 MMU
+ * is independent per-thread. Some steppings share radix/hash mode
+ * between threads, but in that case KVM has a barrier sync in real
+ * mode before and after switching between radix and hash.
+ *
+ * kvm_start_guest must still be called in real mode though, hence
+ * the false argument.
+ */
+ local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
+
+ __ppc64_runlatch_off();
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ srr1 = power10_idle_stop(psscr, false);
+ else
+ srr1 = power9_idle_stop(psscr, false);
+ __ppc64_runlatch_on();
+
+ local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
+ /* Order setting hwthread_state vs. testing hwthread_req */
+ smp_mb();
+ if (local_paca->kvm_hstate.hwthread_req)
+ srr1 = idle_kvm_start_guest(srr1);
+ mtmsr(MSR_KERNEL);
+#endif
+
+ return srr1;
+}
+#endif
+
+void arch300_idle_type(unsigned long stop_psscr_val,
+ unsigned long stop_psscr_mask)
+{
+ unsigned long psscr;
+ unsigned long srr1;
+
+ if (!prep_irq_for_idle_irqsoff())
+ return;
+
+ psscr = mfspr(SPRN_PSSCR);
+ psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
+
+ __ppc64_runlatch_off();
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ srr1 = power10_idle_stop(psscr, true);
+ else
+ srr1 = power9_idle_stop(psscr, true);
+ __ppc64_runlatch_on();
+
+ fini_irq_for_idle_irqsoff();
+
+ irq_set_pending_from_srr1(srr1);
+}
+
+/*
+ * Used for ppc_md.power_save which needs a function with no parameters
+ */
+static void arch300_idle(void)
+{
+ arch300_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
+}
+
#ifdef CONFIG_HOTPLUG_CPU
void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
@@ -995,7 +1102,7 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
psscr = mfspr(SPRN_PSSCR);
psscr = (psscr & ~pnv_deepest_stop_psscr_mask) |
pnv_deepest_stop_psscr_val;
- srr1 = power9_offline_stop(psscr);
+ srr1 = arch300_offline_stop(psscr);
} else if (cpu_has_feature(CPU_FTR_ARCH_206) && power7_offline_type) {
srr1 = power7_offline();
} else {
@@ -1093,11 +1200,15 @@ int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags)
* @dt_idle_states: Number of idle state entries
* Returns 0 on success
*/
-static void __init pnv_power9_idle_init(void)
+static void __init pnv_arch300_idle_init(void)
{
u64 max_residency_ns = 0;
int i;
+ /* stop is not really architected, we only have p9,p10 drivers */
+ if (!pvr_version_is(PVR_POWER10) && !pvr_version_is(PVR_POWER9))
+ return;
+
/*
* pnv_deepest_stop_{val,mask} should be set to values corresponding to
* the deepest stop state.
@@ -1112,6 +1223,11 @@ static void __init pnv_power9_idle_init(void)
struct pnv_idle_states_t *state = &pnv_idle_states[i];
u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK;
+ /* No deep loss driver implemented for POWER10 yet */
+ if (pvr_version_is(PVR_POWER10) &&
+ state->flags & (OPAL_PM_TIMEBASE_STOP|OPAL_PM_LOSE_FULL_CONTEXT))
+ continue;
+
if ((state->flags & OPAL_PM_TIMEBASE_STOP) &&
(pnv_first_tb_loss_level > psscr_rl))
pnv_first_tb_loss_level = psscr_rl;
@@ -1162,7 +1278,7 @@ static void __init pnv_power9_idle_init(void)
if (unlikely(!default_stop_found)) {
pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n");
} else {
- ppc_md.power_save = power9_idle;
+ ppc_md.power_save = arch300_idle;
pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n",
pnv_default_stop_val, pnv_default_stop_mask);
}
@@ -1224,7 +1340,7 @@ static void __init pnv_probe_idle_states(void)
}
if (cpu_has_feature(CPU_FTR_ARCH_300))
- pnv_power9_idle_init();
+ pnv_arch300_idle_init();
for (i = 0; i < nr_pnv_idle_states; i++)
supported_cpuidle_states |= pnv_idle_states[i].flags;
@@ -1295,7 +1411,7 @@ static int pnv_parse_cpuidle_dt(void)
for (i = 0; i < nr_idle_states; i++)
pnv_idle_states[i].residency_ns = temp_u32[i];
- /* For power9 */
+ /* For power9 and later */
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
/* Read pm_crtl_val */
if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr",
@@ -1358,8 +1474,8 @@ static int __init pnv_init_idle_states(void)
if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
/* P7/P8 nap */
p->thread_idle_state = PNV_THREAD_RUNNING;
- } else {
- /* P9 stop */
+ } else if (pvr_version_is(PVR_POWER9)) {
+ /* P9 stop workarounds */
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
p->requested_psscr = 0;
atomic_set(&p->dont_stop, 0);
diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c
index 13b369d2cc45..6828108486f8 100644
--- a/arch/powerpc/platforms/powernv/memtrace.c
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -224,7 +224,7 @@ static int memtrace_online(void)
ent->mem = 0;
}
- if (add_memory(ent->nid, ent->start, ent->size)) {
+ if (add_memory(ent->nid, ent->start, ent->size, MHP_NONE)) {
pr_err("Failed to add trace memory to node %d\n",
ent->nid);
ret += 1;
diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c
index 8c65aacda9c8..ecdad219d704 100644
--- a/arch/powerpc/platforms/powernv/ocxl.c
+++ b/arch/powerpc/platforms/powernv/ocxl.c
@@ -2,7 +2,6 @@
// Copyright 2017 IBM Corp.
#include <asm/pnv-ocxl.h>
#include <asm/opal.h>
-#include <asm/xive.h>
#include <misc/ocxl-config.h>
#include "pci.h"
@@ -484,32 +483,3 @@ int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle)
return rc;
}
EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe_from_cache);
-
-int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr)
-{
- __be64 flags, trigger_page;
- s64 rc;
- u32 hwirq;
-
- hwirq = xive_native_alloc_irq();
- if (!hwirq)
- return -ENOENT;
-
- rc = opal_xive_get_irq_info(hwirq, &flags, NULL, &trigger_page, NULL,
- NULL);
- if (rc || !trigger_page) {
- xive_native_free_irq(hwirq);
- return -ENOENT;
- }
- *irq = hwirq;
- *trigger_addr = be64_to_cpu(trigger_page);
- return 0;
-
-}
-EXPORT_SYMBOL_GPL(pnv_ocxl_alloc_xive_irq);
-
-void pnv_ocxl_free_xive_irq(u32 irq)
-{
- xive_native_free_irq(irq);
-}
-EXPORT_SYMBOL_GPL(pnv_ocxl_free_xive_irq);
diff --git a/arch/powerpc/platforms/powernv/opal-core.c b/arch/powerpc/platforms/powernv/opal-core.c
index 6dba3b62269f..23571f0b555a 100644
--- a/arch/powerpc/platforms/powernv/opal-core.c
+++ b/arch/powerpc/platforms/powernv/opal-core.c
@@ -510,7 +510,7 @@ static void __init opalcore_config_init(void)
idx = be32_to_cpu(opalc_metadata->region_cnt);
if (idx > MAX_PT_LOAD_CNT) {
pr_warn("WARNING: OPAL regions count (%d) adjusted to limit (%d)",
- MAX_PT_LOAD_CNT, idx);
+ idx, MAX_PT_LOAD_CNT);
idx = MAX_PT_LOAD_CNT;
}
for (i = 0; i < idx; i++) {
diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c
index 62ef7ad995da..5e33b1fc67c2 100644
--- a/arch/powerpc/platforms/powernv/opal-elog.c
+++ b/arch/powerpc/platforms/powernv/opal-elog.c
@@ -179,14 +179,14 @@ static ssize_t raw_attr_read(struct file *filep, struct kobject *kobj,
return count;
}
-static struct elog_obj *create_elog_obj(uint64_t id, size_t size, uint64_t type)
+static void create_elog_obj(uint64_t id, size_t size, uint64_t type)
{
struct elog_obj *elog;
int rc;
elog = kzalloc(sizeof(*elog), GFP_KERNEL);
if (!elog)
- return NULL;
+ return;
elog->kobj.kset = elog_kset;
@@ -219,18 +219,37 @@ static struct elog_obj *create_elog_obj(uint64_t id, size_t size, uint64_t type)
rc = kobject_add(&elog->kobj, NULL, "0x%llx", id);
if (rc) {
kobject_put(&elog->kobj);
- return NULL;
+ return;
}
+ /*
+ * As soon as the sysfs file for this elog is created/activated there is
+ * a chance the opal_errd daemon (or any userspace) might read and
+ * acknowledge the elog before kobject_uevent() is called. If that
+ * happens then there is a potential race between
+ * elog_ack_store->kobject_put() and kobject_uevent() which leads to a
+ * use-after-free of a kernfs object resulting in a kernel crash.
+ *
+ * To avoid that, we need to take a reference on behalf of the bin file,
+ * so that our reference remains valid while we call kobject_uevent().
+ * We then drop our reference before exiting the function, leaving the
+ * bin file to drop the last reference (if it hasn't already).
+ */
+
+ /* Take a reference for the bin file */
+ kobject_get(&elog->kobj);
rc = sysfs_create_bin_file(&elog->kobj, &elog->raw_attr);
- if (rc) {
+ if (rc == 0) {
+ kobject_uevent(&elog->kobj, KOBJ_ADD);
+ } else {
+ /* Drop the reference taken for the bin file */
kobject_put(&elog->kobj);
- return NULL;
}
- kobject_uevent(&elog->kobj, KOBJ_ADD);
+ /* Drop our reference */
+ kobject_put(&elog->kobj);
- return elog;
+ return;
}
static irqreturn_t elog_event(int irq, void *data)
diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c
index d26da19a611f..d3b6e135c18b 100644
--- a/arch/powerpc/platforms/powernv/opal-msglog.c
+++ b/arch/powerpc/platforms/powernv/opal-msglog.c
@@ -12,6 +12,8 @@
#include <linux/types.h>
#include <asm/barrier.h>
+#include "powernv.h"
+
/* OPAL in-memory console. Defined in OPAL source at core/console.c */
struct memcons {
__be64 magic;
diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c
index 45f4223a790f..deddaebf8c14 100644
--- a/arch/powerpc/platforms/powernv/opal-prd.c
+++ b/arch/powerpc/platforms/powernv/opal-prd.c
@@ -24,7 +24,7 @@
#include <linux/uaccess.h>
-/**
+/*
* The msg member must be at the end of the struct, as it's followed by the
* message data.
*/
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 023a4f987bb2..2b4ceb5e6ce4 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -894,7 +894,6 @@ int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
{
- struct pci_dev *parent;
uint8_t bcomp, dcomp, fcomp;
long rc, rid_end, rid;
@@ -904,7 +903,6 @@ int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER;
fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
- parent = pe->pbus->self;
if (pe->flags & PNV_IODA_PE_BUS_ALL)
count = resource_size(&pe->pbus->busn_res);
else
@@ -925,12 +923,6 @@ int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
}
rid_end = pe->rid + (count << 8);
} else {
-#ifdef CONFIG_PCI_IOV
- if (pe->flags & PNV_IODA_PE_VF)
- parent = pe->parent_dev;
- else
-#endif /* CONFIG_PCI_IOV */
- parent = pe->pdev->bus->self;
bcomp = OpalPciBusAll;
dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h
index 1aa51c4fa904..11df4e16a1cc 100644
--- a/arch/powerpc/platforms/powernv/powernv.h
+++ b/arch/powerpc/platforms/powernv/powernv.h
@@ -2,6 +2,13 @@
#ifndef _POWERNV_H
#define _POWERNV_H
+/*
+ * There's various hacks scattered throughout the generic powerpc arch code
+ * that needs to call into powernv platform stuff. The prototypes for those
+ * functions are in asm/powernv.h
+ */
+#include <asm/powernv.h>
+
#ifdef CONFIG_SMP
extern void pnv_smp_init(void);
#else
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
index 8035caf6e297..72c25295c1c2 100644
--- a/arch/powerpc/platforms/powernv/rng.c
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -65,7 +65,7 @@ int powernv_get_random_real_mode(unsigned long *v)
return 1;
}
-int powernv_get_random_darn(unsigned long *v)
+static int powernv_get_random_darn(unsigned long *v)
{
unsigned long val;
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 7fcb88623081..9acaa0f131b9 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -130,6 +130,28 @@ static void pnv_setup_rfi_flush(void)
setup_count_cache_flush();
}
+static void __init pnv_check_guarded_cores(void)
+{
+ struct device_node *dn;
+ int bad_count = 0;
+
+ for_each_node_by_type(dn, "cpu") {
+ if (of_property_match_string(dn, "status", "bad") >= 0)
+ bad_count++;
+ };
+
+ if (bad_count) {
+ printk(" _ _______________\n");
+ pr_cont(" | | / \\\n");
+ pr_cont(" | | | WARNING! |\n");
+ pr_cont(" | | | |\n");
+ pr_cont(" | | | It looks like |\n");
+ pr_cont(" |_| | you have %*d |\n", 3, bad_count);
+ pr_cont(" _ | guarded cores |\n");
+ pr_cont(" (_) \\_______________/\n");
+ }
+}
+
static void __init pnv_setup_arch(void)
{
set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
@@ -150,6 +172,8 @@ static void __init pnv_setup_arch(void)
/* Enable NAP mode */
powersave_nap = 1;
+ pnv_check_guarded_cores();
+
/* XXX PMCS */
}
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index b2ba3e95bda7..54c4ba45c7ce 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -43,7 +43,7 @@
#include <asm/udbg.h>
#define DBG(fmt...) udbg_printf(fmt)
#else
-#define DBG(fmt...)
+#define DBG(fmt...) do { } while (0)
#endif
static void pnv_smp_setup_cpu(int cpu)
@@ -158,7 +158,7 @@ static void pnv_flush_interrupts(void)
}
}
-static void pnv_smp_cpu_kill_self(void)
+static void pnv_cpu_offline_self(void)
{
unsigned long srr1, unexpected_mask, wmask;
unsigned int cpu;
@@ -417,6 +417,7 @@ static struct smp_ops_t pnv_smp_ops = {
#ifdef CONFIG_HOTPLUG_CPU
.cpu_disable = pnv_smp_cpu_disable,
.cpu_die = generic_cpu_die,
+ .cpu_offline_self = pnv_cpu_offline_self,
#endif /* CONFIG_HOTPLUG_CPU */
};
@@ -430,7 +431,6 @@ void __init pnv_smp_init(void)
smp_ops = &pnv_smp_ops;
#ifdef CONFIG_HOTPLUG_CPU
- ppc_md.cpu_die = pnv_smp_cpu_kill_self;
#ifdef CONFIG_KEXEC_CORE
crash_wake_offline = 1;
#endif
diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c
index 6434f9cb5aed..5f5fe63a3d1c 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -186,7 +186,7 @@ static void unmap_winctx_mmio_bars(struct vas_window *window)
* OS/User Window Context (UWC) MMIO Base Address Region for the given window.
* Map these bus addresses and save the mapped kernel addresses in @window.
*/
-int map_winctx_mmio_bars(struct vas_window *window)
+static int map_winctx_mmio_bars(struct vas_window *window)
{
int len;
u64 start;
@@ -214,7 +214,7 @@ int map_winctx_mmio_bars(struct vas_window *window)
* registers are not sequential. And, we can only write to offsets
* with valid registers.
*/
-void reset_window_regs(struct vas_window *window)
+static void reset_window_regs(struct vas_window *window)
{
write_hvwc_reg(window, VREG(LPID), 0ULL);
write_hvwc_reg(window, VREG(PID), 0ULL);
@@ -357,7 +357,8 @@ static void init_rsvd_tx_buf_count(struct vas_window *txwin,
* as a one-time task? That could work for NX but what about other
* receivers? Let the receivers tell us the rx-fifo buffers for now.
*/
-int init_winctx_regs(struct vas_window *window, struct vas_winctx *winctx)
+static void init_winctx_regs(struct vas_window *window,
+ struct vas_winctx *winctx)
{
u64 val;
int fifo_size;
@@ -499,8 +500,6 @@ int init_winctx_regs(struct vas_window *window, struct vas_winctx *winctx)
val = SET_FIELD(VAS_WINCTL_NX_WIN, val, winctx->nx_win);
val = SET_FIELD(VAS_WINCTL_OPEN, val, 1);
write_hvwc_reg(window, VREG(WINCTL), val);
-
- return 0;
}
static void vas_release_window_id(struct ida *ida, int winid)
diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c
index 1193c294b8d0..0c252478e556 100644
--- a/arch/powerpc/platforms/ps3/spu.c
+++ b/arch/powerpc/platforms/ps3/spu.c
@@ -448,7 +448,7 @@ static void ps3_disable_spu(struct spu_context *ctx)
ctx->ops->runcntl_stop(ctx);
}
-const struct spu_management_ops spu_management_ps3_ops = {
+static const struct spu_management_ops spu_management_ps3_ops = {
.enumerate_spus = ps3_enumerate_spus,
.create_spu = ps3_create_spu,
.destroy_spu = ps3_destroy_spu,
@@ -589,7 +589,7 @@ static u64 resource_allocation_enable_get(struct spu *spu)
return 0; /* No support. */
}
-const struct spu_priv1_ops spu_priv1_ps3_ops = {
+static const struct spu_priv1_ops spu_priv1_ps3_ops = {
.int_mask_and = int_mask_and,
.int_mask_or = int_mask_or,
.int_mask_set = int_mask_set,
diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c
index 3542b7bd6a46..c62aaa29a9d5 100644
--- a/arch/powerpc/platforms/ps3/system-bus.c
+++ b/arch/powerpc/platforms/ps3/system-bus.c
@@ -9,7 +9,7 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/export.h>
-#include <linux/dma-mapping.h>
+#include <linux/dma-map-ops.h>
#include <linux/err.h>
#include <linux/slab.h>
@@ -696,6 +696,8 @@ static const struct dma_map_ops ps3_sb_dma_ops = {
.unmap_page = ps3_unmap_page,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
+ .alloc_pages = dma_common_alloc_pages,
+ .free_pages = dma_common_free_pages,
};
static const struct dma_map_ops ps3_ioc0_dma_ops = {
@@ -708,6 +710,8 @@ static const struct dma_map_ops ps3_ioc0_dma_ops = {
.unmap_page = ps3_unmap_page,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
+ .alloc_pages = dma_common_alloc_pages,
+ .free_pages = dma_common_free_pages,
};
/**
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index cb2d9a970b7b..cf024fa37bda 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -33,8 +33,6 @@
#include <asm/ppc-pci.h>
#include <asm/rtas.h>
-static int pseries_eeh_get_pe_addr(struct pci_dn *pdn);
-
/* RTAS tokens */
static int ibm_set_eeh_option;
static int ibm_set_slot_reset;
@@ -86,42 +84,43 @@ void pseries_pcibios_bus_add_device(struct pci_dev *pdev)
/**
- * pseries_eeh_get_config_addr - Retrieve config address
+ * pseries_eeh_get_pe_config_addr - Find the pe_config_addr for a device
+ * @pdn: pci_dn of the input device
+ *
+ * The EEH RTAS calls use a tuple consisting of: (buid_hi, buid_lo,
+ * pe_config_addr) as a handle to a given PE. This function finds the
+ * pe_config_addr based on the device's config addr.
*
- * Retrieve the assocated config address. Actually, there're 2 RTAS
- * function calls dedicated for the purpose. We need implement
- * it through the new function and then the old one. Besides,
- * you should make sure the config address is figured out from
- * FDT node before calling the function.
+ * Keep in mind that the pe_config_addr *might* be numerically identical to the
+ * device's config addr, but the two are conceptually distinct.
*
- * It's notable that zero'ed return value means invalid PE config
- * address.
+ * Returns the pe_config_addr, or a negative error code.
*/
-static int pseries_eeh_get_config_addr(struct pci_controller *phb, int config_addr)
+static int pseries_eeh_get_pe_config_addr(struct pci_dn *pdn)
{
- int ret = 0;
- int rets[3];
+ int config_addr = rtas_config_addr(pdn->busno, pdn->devfn, 0);
+ struct pci_controller *phb = pdn->phb;
+ int ret, rets[3];
if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
/*
- * First of all, we need to make sure there has one PE
- * associated with the device. Otherwise, PE address is
- * meaningless.
+ * First of all, use function 1 to determine if this device is
+ * part of a PE or not. ret[0] being zero indicates it's not.
*/
ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
config_addr, BUID_HI(phb->buid),
BUID_LO(phb->buid), 1);
if (ret || (rets[0] == 0))
- return 0;
+ return -ENOENT;
- /* Retrieve the associated PE config address */
+ /* Retrieve the associated PE config address with function 0 */
ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
config_addr, BUID_HI(phb->buid),
BUID_LO(phb->buid), 0);
if (ret) {
pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n",
__func__, phb->global_number, config_addr);
- return 0;
+ return -ENXIO;
}
return rets[0];
@@ -134,13 +133,20 @@ static int pseries_eeh_get_config_addr(struct pci_controller *phb, int config_ad
if (ret) {
pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n",
__func__, phb->global_number, config_addr);
- return 0;
+ return -ENXIO;
}
return rets[0];
}
- return ret;
+ /*
+ * PAPR does describe a process for finding the pe_config_addr that was
+ * used before the ibm,get-config-addr-info calls were added. However,
+ * I haven't found *any* systems that don't have that RTAS call
+ * implemented. If you happen to find one that needs the old DT based
+ * process, patches are welcome!
+ */
+ return -ENOENT;
}
/**
@@ -161,8 +167,7 @@ static int pseries_eeh_phb_reset(struct pci_controller *phb, int config_addr, in
BUID_LO(phb->buid), option);
/* If fundamental-reset not supported, try hot-reset */
- if (option == EEH_RESET_FUNDAMENTAL &&
- ret == -8) {
+ if (option == EEH_RESET_FUNDAMENTAL && ret == -8) {
option = EEH_RESET_HOT;
ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
config_addr, BUID_HI(phb->buid),
@@ -170,8 +175,7 @@ static int pseries_eeh_phb_reset(struct pci_controller *phb, int config_addr, in
}
/* We need reset hold or settlement delay */
- if (option == EEH_RESET_FUNDAMENTAL ||
- option == EEH_RESET_HOT)
+ if (option == EEH_RESET_FUNDAMENTAL || option == EEH_RESET_HOT)
msleep(EEH_PE_RST_HOLD_TIME);
else
msleep(EEH_PE_RST_SETTLE_TIME);
@@ -239,88 +243,6 @@ static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX];
static DEFINE_SPINLOCK(slot_errbuf_lock);
static int eeh_error_buf_size;
-/**
- * pseries_eeh_init - EEH platform dependent initialization
- *
- * EEH platform dependent initialization on pseries.
- */
-static int pseries_eeh_init(void)
-{
- struct pci_controller *phb;
- struct pci_dn *pdn;
- int addr, config_addr;
-
- /* figure out EEH RTAS function call tokens */
- ibm_set_eeh_option = rtas_token("ibm,set-eeh-option");
- ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
- ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2");
- ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state");
- ibm_slot_error_detail = rtas_token("ibm,slot-error-detail");
- ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2");
- ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info");
- ibm_configure_pe = rtas_token("ibm,configure-pe");
-
- /*
- * ibm,configure-pe and ibm,configure-bridge have the same semantics,
- * however ibm,configure-pe can be faster. If we can't find
- * ibm,configure-pe then fall back to using ibm,configure-bridge.
- */
- if (ibm_configure_pe == RTAS_UNKNOWN_SERVICE)
- ibm_configure_pe = rtas_token("ibm,configure-bridge");
-
- /*
- * Necessary sanity check. We needn't check "get-config-addr-info"
- * and its variant since the old firmware probably support address
- * of domain/bus/slot/function for EEH RTAS operations.
- */
- if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE ||
- ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE ||
- (ibm_read_slot_reset_state2 == RTAS_UNKNOWN_SERVICE &&
- ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE) ||
- ibm_slot_error_detail == RTAS_UNKNOWN_SERVICE ||
- ibm_configure_pe == RTAS_UNKNOWN_SERVICE) {
- pr_info("EEH functionality not supported\n");
- return -EINVAL;
- }
-
- /* Initialize error log lock and size */
- spin_lock_init(&slot_errbuf_lock);
- eeh_error_buf_size = rtas_token("rtas-error-log-max");
- if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
- pr_info("%s: unknown EEH error log size\n",
- __func__);
- eeh_error_buf_size = 1024;
- } else if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) {
- pr_info("%s: EEH error log size %d exceeds the maximal %d\n",
- __func__, eeh_error_buf_size, RTAS_ERROR_LOG_MAX);
- eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
- }
-
- /* Set EEH probe mode */
- eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG);
-
- /* Set EEH machine dependent code */
- ppc_md.pcibios_bus_add_device = pseries_pcibios_bus_add_device;
-
- if (is_kdump_kernel() || reset_devices) {
- pr_info("Issue PHB reset ...\n");
- list_for_each_entry(phb, &hose_list, list_node) {
- pdn = list_first_entry(&PCI_DN(phb->dn)->child_list, struct pci_dn, list);
- addr = (pdn->busno << 16) | (pdn->devfn << 8);
- config_addr = pseries_eeh_get_config_addr(phb, addr);
- /* invalid PE config addr */
- if (config_addr == 0)
- continue;
-
- pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_FUNDAMENTAL);
- pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_DEACTIVATE);
- pseries_eeh_phb_configure_bridge(phb, config_addr);
- }
- }
-
- return 0;
-}
-
static int pseries_eeh_cap_start(struct pci_dn *pdn)
{
u32 status;
@@ -439,10 +361,9 @@ static struct eeh_pe *pseries_eeh_pe_get_parent(struct eeh_dev *edev)
*/
void pseries_eeh_init_edev(struct pci_dn *pdn)
{
+ struct eeh_pe pe, *parent;
struct eeh_dev *edev;
- struct eeh_pe pe;
u32 pcie_flags;
- int enable = 0;
int ret;
if (WARN_ON_ONCE(!eeh_has_flag(EEH_PROBE_MODE_DEVTREE)))
@@ -499,51 +420,38 @@ void pseries_eeh_init_edev(struct pci_dn *pdn)
}
}
- /* Initialize the fake PE */
+ /* first up, find the pe_config_addr for the PE containing the device */
+ ret = pseries_eeh_get_pe_config_addr(pdn);
+ if (ret < 0) {
+ eeh_edev_dbg(edev, "Unable to find pe_config_addr\n");
+ goto err;
+ }
+
+ /* Try enable EEH on the fake PE */
memset(&pe, 0, sizeof(struct eeh_pe));
pe.phb = pdn->phb;
- pe.config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
+ pe.addr = ret;
- /* Enable EEH on the device */
eeh_edev_dbg(edev, "Enabling EEH on device\n");
ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE);
if (ret) {
eeh_edev_dbg(edev, "EEH failed to enable on device (code %d)\n", ret);
- } else {
- struct eeh_pe *parent;
+ goto err;
+ }
- /* Retrieve PE address */
- edev->pe_config_addr = pseries_eeh_get_pe_addr(pdn);
- pe.addr = edev->pe_config_addr;
+ edev->pe_config_addr = pe.addr;
- /* Some older systems (Power4) allow the ibm,set-eeh-option
- * call to succeed even on nodes where EEH is not supported.
- * Verify support explicitly.
- */
- ret = eeh_ops->get_state(&pe, NULL);
- if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT)
- enable = 1;
+ eeh_add_flag(EEH_ENABLED);
- /*
- * This device doesn't support EEH, but it may have an
- * EEH parent. In this case any error on the device will
- * freeze the PE of it's upstream bridge, so added it to
- * the upstream PE.
- */
- parent = pseries_eeh_pe_get_parent(edev);
- if (parent && !enable)
- edev->pe_config_addr = parent->addr;
+ parent = pseries_eeh_pe_get_parent(edev);
+ eeh_pe_tree_insert(edev, parent);
+ eeh_save_bars(edev);
+ eeh_edev_dbg(edev, "EEH enabled for device");
- if (enable || parent) {
- eeh_add_flag(EEH_ENABLED);
- eeh_pe_tree_insert(edev, parent);
- }
- eeh_edev_dbg(edev, "EEH is %s on device (code %d)\n",
- (enable ? "enabled" : "unsupported"), ret);
- }
+ return;
- /* Save memory bars */
- eeh_save_bars(edev);
+err:
+ eeh_edev_dbg(edev, "EEH is unsupported on device (code = %d)\n", ret);
}
static struct eeh_dev *pseries_eeh_probe(struct pci_dev *pdev)
@@ -600,7 +508,6 @@ EXPORT_SYMBOL_GPL(pseries_eeh_init_edev_recursive);
static int pseries_eeh_set_option(struct eeh_pe *pe, int option)
{
int ret = 0;
- int config_addr;
/*
* When we're enabling or disabling EEH functioality on
@@ -613,85 +520,23 @@ static int pseries_eeh_set_option(struct eeh_pe *pe, int option)
case EEH_OPT_ENABLE:
case EEH_OPT_THAW_MMIO:
case EEH_OPT_THAW_DMA:
- config_addr = pe->config_addr;
- if (pe->addr)
- config_addr = pe->addr;
break;
case EEH_OPT_FREEZE_PE:
/* Not support */
return 0;
default:
- pr_err("%s: Invalid option %d\n",
- __func__, option);
+ pr_err("%s: Invalid option %d\n", __func__, option);
return -EINVAL;
}
ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
- config_addr, BUID_HI(pe->phb->buid),
+ pe->addr, BUID_HI(pe->phb->buid),
BUID_LO(pe->phb->buid), option);
return ret;
}
/**
- * pseries_eeh_get_pe_addr - Retrieve PE address
- * @pe: EEH PE
- *
- * Retrieve the assocated PE address. Actually, there're 2 RTAS
- * function calls dedicated for the purpose. We need implement
- * it through the new function and then the old one. Besides,
- * you should make sure the config address is figured out from
- * FDT node before calling the function.
- *
- * It's notable that zero'ed return value means invalid PE config
- * address.
- */
-static int pseries_eeh_get_pe_addr(struct pci_dn *pdn)
-{
- int config_addr = rtas_config_addr(pdn->busno, pdn->devfn, 0);
- unsigned long buid = pdn->phb->buid;
- int ret = 0;
- int rets[3];
-
- if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
- /*
- * First of all, we need to make sure there has one PE
- * associated with the device. Otherwise, PE address is
- * meaningless.
- */
- ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
- config_addr, BUID_HI(buid), BUID_LO(buid), 1);
- if (ret || (rets[0] == 0))
- return 0;
-
- /* Retrieve the associated PE config address */
- ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
- config_addr, BUID_HI(buid), BUID_LO(buid), 0);
- if (ret) {
- pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n",
- __func__, pdn->phb->global_number, config_addr);
- return 0;
- }
-
- return rets[0];
- }
-
- if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
- ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
- config_addr, BUID_HI(buid), BUID_LO(buid), 0);
- if (ret) {
- pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n",
- __func__, pdn->phb->global_number, config_addr);
- return 0;
- }
-
- return rets[0];
- }
-
- return ret;
-}
-
-/**
* pseries_eeh_get_state - Retrieve PE state
* @pe: EEH PE
* @delay: suggested time to wait if state is unavailable
@@ -706,25 +551,19 @@ static int pseries_eeh_get_pe_addr(struct pci_dn *pdn)
*/
static int pseries_eeh_get_state(struct eeh_pe *pe, int *delay)
{
- int config_addr;
int ret;
int rets[4];
int result;
- /* Figure out PE config address if possible */
- config_addr = pe->config_addr;
- if (pe->addr)
- config_addr = pe->addr;
-
if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
ret = rtas_call(ibm_read_slot_reset_state2, 3, 4, rets,
- config_addr, BUID_HI(pe->phb->buid),
+ pe->addr, BUID_HI(pe->phb->buid),
BUID_LO(pe->phb->buid));
} else if (ibm_read_slot_reset_state != RTAS_UNKNOWN_SERVICE) {
/* Fake PE unavailable info */
rets[2] = 0;
ret = rtas_call(ibm_read_slot_reset_state, 3, 3, rets,
- config_addr, BUID_HI(pe->phb->buid),
+ pe->addr, BUID_HI(pe->phb->buid),
BUID_LO(pe->phb->buid));
} else {
return EEH_STATE_NOT_SUPPORT;
@@ -778,14 +617,7 @@ static int pseries_eeh_get_state(struct eeh_pe *pe, int *delay)
*/
static int pseries_eeh_reset(struct eeh_pe *pe, int option)
{
- int config_addr;
-
- /* Figure out PE address */
- config_addr = pe->config_addr;
- if (pe->addr)
- config_addr = pe->addr;
-
- return pseries_eeh_phb_reset(pe->phb, config_addr, option);
+ return pseries_eeh_phb_reset(pe->phb, pe->addr, option);
}
/**
@@ -801,19 +633,13 @@ static int pseries_eeh_reset(struct eeh_pe *pe, int option)
*/
static int pseries_eeh_get_log(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len)
{
- int config_addr;
unsigned long flags;
int ret;
spin_lock_irqsave(&slot_errbuf_lock, flags);
memset(slot_errbuf, 0, eeh_error_buf_size);
- /* Figure out the PE address */
- config_addr = pe->config_addr;
- if (pe->addr)
- config_addr = pe->addr;
-
- ret = rtas_call(ibm_slot_error_detail, 8, 1, NULL, config_addr,
+ ret = rtas_call(ibm_slot_error_detail, 8, 1, NULL, pe->addr,
BUID_HI(pe->phb->buid), BUID_LO(pe->phb->buid),
virt_to_phys(drv_log), len,
virt_to_phys(slot_errbuf), eeh_error_buf_size,
@@ -832,14 +658,7 @@ static int pseries_eeh_get_log(struct eeh_pe *pe, int severity, char *drv_log, u
*/
static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
{
- int config_addr;
-
- /* Figure out the PE address */
- config_addr = pe->config_addr;
- if (pe->addr)
- config_addr = pe->addr;
-
- return pseries_eeh_phb_configure_bridge(pe->phb, config_addr);
+ return pseries_eeh_phb_configure_bridge(pe->phb, pe->addr);
}
/**
@@ -954,8 +773,7 @@ static int pseries_notify_resume(struct eeh_dev *edev)
if (!edev)
return -EEXIST;
- if (rtas_token("ibm,open-sriov-allow-unfreeze")
- == RTAS_UNKNOWN_SERVICE)
+ if (rtas_token("ibm,open-sriov-allow-unfreeze") == RTAS_UNKNOWN_SERVICE)
return -EINVAL;
if (edev->pdev->is_physfn || edev->pdev->is_virtfn)
@@ -967,7 +785,6 @@ static int pseries_notify_resume(struct eeh_dev *edev)
static struct eeh_ops pseries_eeh_ops = {
.name = "pseries",
- .init = pseries_eeh_init,
.probe = pseries_eeh_probe,
.set_option = pseries_eeh_set_option,
.get_state = pseries_eeh_get_state,
@@ -992,15 +809,84 @@ static struct eeh_ops pseries_eeh_ops = {
*/
static int __init eeh_pseries_init(void)
{
- int ret;
+ struct pci_controller *phb;
+ struct pci_dn *pdn;
+ int ret, config_addr;
- ret = eeh_ops_register(&pseries_eeh_ops);
+ /* figure out EEH RTAS function call tokens */
+ ibm_set_eeh_option = rtas_token("ibm,set-eeh-option");
+ ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
+ ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2");
+ ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state");
+ ibm_slot_error_detail = rtas_token("ibm,slot-error-detail");
+ ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2");
+ ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info");
+ ibm_configure_pe = rtas_token("ibm,configure-pe");
+
+ /*
+ * ibm,configure-pe and ibm,configure-bridge have the same semantics,
+ * however ibm,configure-pe can be faster. If we can't find
+ * ibm,configure-pe then fall back to using ibm,configure-bridge.
+ */
+ if (ibm_configure_pe == RTAS_UNKNOWN_SERVICE)
+ ibm_configure_pe = rtas_token("ibm,configure-bridge");
+
+ /*
+ * Necessary sanity check. We needn't check "get-config-addr-info"
+ * and its variant since the old firmware probably support address
+ * of domain/bus/slot/function for EEH RTAS operations.
+ */
+ if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE ||
+ ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE ||
+ (ibm_read_slot_reset_state2 == RTAS_UNKNOWN_SERVICE &&
+ ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE) ||
+ ibm_slot_error_detail == RTAS_UNKNOWN_SERVICE ||
+ ibm_configure_pe == RTAS_UNKNOWN_SERVICE) {
+ pr_info("EEH functionality not supported\n");
+ return -EINVAL;
+ }
+
+ /* Initialize error log lock and size */
+ spin_lock_init(&slot_errbuf_lock);
+ eeh_error_buf_size = rtas_token("rtas-error-log-max");
+ if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
+ pr_info("%s: unknown EEH error log size\n",
+ __func__);
+ eeh_error_buf_size = 1024;
+ } else if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) {
+ pr_info("%s: EEH error log size %d exceeds the maximal %d\n",
+ __func__, eeh_error_buf_size, RTAS_ERROR_LOG_MAX);
+ eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
+ }
+
+ /* Set EEH probe mode */
+ eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG);
+
+ /* Set EEH machine dependent code */
+ ppc_md.pcibios_bus_add_device = pseries_pcibios_bus_add_device;
+
+ if (is_kdump_kernel() || reset_devices) {
+ pr_info("Issue PHB reset ...\n");
+ list_for_each_entry(phb, &hose_list, list_node) {
+ pdn = list_first_entry(&PCI_DN(phb->dn)->child_list, struct pci_dn, list);
+ config_addr = pseries_eeh_get_pe_config_addr(pdn);
+
+ /* invalid PE config addr */
+ if (config_addr < 0)
+ continue;
+
+ pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_FUNDAMENTAL);
+ pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_DEACTIVATE);
+ pseries_eeh_phb_configure_bridge(phb, config_addr);
+ }
+ }
+
+ ret = eeh_init(&pseries_eeh_ops);
if (!ret)
pr_info("EEH: pSeries platform initialized\n");
else
pr_info("EEH: pSeries platform initialization failure (%d)\n",
ret);
-
return ret;
}
-machine_early_initcall(pseries, eeh_pseries_init);
+machine_arch_initcall(pseries, eeh_pseries_init);
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 7a974ed6b240..f2837e33bf5d 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -55,7 +55,7 @@ static void rtas_stop_self(void)
panic("Alas, I survived.\n");
}
-static void pseries_mach_cpu_die(void)
+static void pseries_cpu_offline_self(void)
{
unsigned int hwcpu = hard_smp_processor_id();
@@ -102,7 +102,7 @@ static int pseries_cpu_disable(void)
* to self-destroy so that the cpu-offline thread can send the CPU_DEAD
* notifications.
*
- * OTOH, pseries_mach_cpu_die() is called by the @cpu when it wants to
+ * OTOH, pseries_cpu_offline_self() is called by the @cpu when it wants to
* self-destruct.
*/
static void pseries_cpu_die(unsigned int cpu)
@@ -901,7 +901,7 @@ static int __init pseries_cpu_hotplug_init(void)
return 0;
}
- ppc_md.cpu_die = pseries_mach_cpu_die;
+ smp_ops->cpu_offline_self = pseries_cpu_offline_self;
smp_ops->cpu_disable = pseries_cpu_disable;
smp_ops->cpu_die = pseries_cpu_die;
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 5d545b78111f..7efe6ec5d14a 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -30,12 +30,17 @@ unsigned long pseries_memory_block_size(void)
np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
if (np) {
- const __be64 *size;
+ int len;
+ int size_cells;
+ const __be32 *prop;
- size = of_get_property(np, "ibm,lmb-size", NULL);
- if (size)
- memblock_size = be64_to_cpup(size);
+ size_cells = of_n_size_cells(np);
+
+ prop = of_get_property(np, "ibm,lmb-size", &len);
+ if (prop && len >= size_cells * sizeof(__be32))
+ memblock_size = of_read_number(prop, size_cells);
of_node_put(np);
+
} else if (machine_is(pseries)) {
/* This fallback really only applies to pseries */
unsigned int memzero_size = 0;
@@ -277,7 +282,7 @@ static int dlpar_offline_lmb(struct drmem_lmb *lmb)
return dlpar_change_lmb_state(lmb, false);
}
-static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size)
+static int pseries_remove_memblock(unsigned long base, unsigned long memblock_size)
{
unsigned long block_sz, start_pfn;
int sections_per_block;
@@ -308,10 +313,11 @@ out:
static int pseries_remove_mem_node(struct device_node *np)
{
- const __be32 *regs;
+ const __be32 *prop;
unsigned long base;
- unsigned int lmb_size;
+ unsigned long lmb_size;
int ret = -EINVAL;
+ int addr_cells, size_cells;
/*
* Check to see if we are actually removing memory
@@ -322,12 +328,19 @@ static int pseries_remove_mem_node(struct device_node *np)
/*
* Find the base address and size of the memblock
*/
- regs = of_get_property(np, "reg", NULL);
- if (!regs)
+ prop = of_get_property(np, "reg", NULL);
+ if (!prop)
return ret;
- base = be64_to_cpu(*(unsigned long *)regs);
- lmb_size = be32_to_cpu(regs[3]);
+ addr_cells = of_n_addr_cells(np);
+ size_cells = of_n_size_cells(np);
+
+ /*
+ * "reg" property represents (addr,size) tuple.
+ */
+ base = of_read_number(prop, addr_cells);
+ prop += addr_cells;
+ lmb_size = of_read_number(prop, size_cells);
pseries_remove_memblock(base, lmb_size);
return 0;
@@ -354,25 +367,32 @@ static int dlpar_add_lmb(struct drmem_lmb *);
static int dlpar_remove_lmb(struct drmem_lmb *lmb)
{
+ struct memory_block *mem_block;
unsigned long block_sz;
int rc;
if (!lmb_is_removable(lmb))
return -EINVAL;
+ mem_block = lmb_to_memblock(lmb);
+ if (mem_block == NULL)
+ return -EINVAL;
+
rc = dlpar_offline_lmb(lmb);
- if (rc)
+ if (rc) {
+ put_device(&mem_block->dev);
return rc;
+ }
block_sz = pseries_memory_block_size();
- __remove_memory(lmb->nid, lmb->base_addr, block_sz);
+ __remove_memory(mem_block->nid, lmb->base_addr, block_sz);
+ put_device(&mem_block->dev);
/* Update memory regions for memory remove */
memblock_remove(lmb->base_addr, block_sz);
invalidate_lmb_associativity_index(lmb);
- lmb_clear_nid(lmb);
lmb->flags &= ~DRCONF_MEM_ASSIGNED;
return 0;
@@ -557,7 +577,7 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
#else
static inline int pseries_remove_memblock(unsigned long base,
- unsigned int memblock_size)
+ unsigned long memblock_size)
{
return -EOPNOTSUPP;
}
@@ -591,7 +611,7 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
static int dlpar_add_lmb(struct drmem_lmb *lmb)
{
unsigned long block_sz;
- int rc;
+ int nid, rc;
if (lmb->flags & DRCONF_MEM_ASSIGNED)
return -EINVAL;
@@ -602,11 +622,15 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
return rc;
}
- lmb_set_nid(lmb);
block_sz = memory_block_size_bytes();
+ /* Find the node id for this LMB. Fake one if necessary. */
+ nid = of_drconf_to_nid_single(lmb);
+ if (nid < 0 || !node_possible(nid))
+ nid = first_online_node;
+
/* Add the memory */
- rc = __add_memory(lmb->nid, lmb->base_addr, block_sz);
+ rc = __add_memory(nid, lmb->base_addr, block_sz, MHP_NONE);
if (rc) {
invalidate_lmb_associativity_index(lmb);
return rc;
@@ -614,9 +638,8 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
rc = dlpar_online_lmb(lmb);
if (rc) {
- __remove_memory(lmb->nid, lmb->base_addr, block_sz);
+ __remove_memory(nid, lmb->base_addr, block_sz);
invalidate_lmb_associativity_index(lmb);
- lmb_clear_nid(lmb);
} else {
lmb->flags |= DRCONF_MEM_ASSIGNED;
}
@@ -878,10 +901,11 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
static int pseries_add_mem_node(struct device_node *np)
{
- const __be32 *regs;
+ const __be32 *prop;
unsigned long base;
- unsigned int lmb_size;
+ unsigned long lmb_size;
int ret = -EINVAL;
+ int addr_cells, size_cells;
/*
* Check to see if we are actually adding memory
@@ -892,12 +916,18 @@ static int pseries_add_mem_node(struct device_node *np)
/*
* Find the base and size of the memblock
*/
- regs = of_get_property(np, "reg", NULL);
- if (!regs)
+ prop = of_get_property(np, "reg", NULL);
+ if (!prop)
return ret;
- base = be64_to_cpu(*(unsigned long *)regs);
- lmb_size = be32_to_cpu(regs[3]);
+ addr_cells = of_n_addr_cells(np);
+ size_cells = of_n_size_cells(np);
+ /*
+ * "reg" property represents (addr,size) tuple.
+ */
+ base = of_read_number(prop, addr_cells);
+ prop += addr_cells;
+ lmb_size = of_read_number(prop, size_cells);
/*
* Update memory region to represent the memory add
diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c
index c40c62ec432e..2c59b4986ea5 100644
--- a/arch/powerpc/platforms/pseries/hvCall_inst.c
+++ b/arch/powerpc/platforms/pseries/hvCall_inst.c
@@ -70,31 +70,14 @@ static int hc_show(struct seq_file *m, void *p)
return 0;
}
-static const struct seq_operations hcall_inst_seq_ops = {
+static const struct seq_operations hcall_inst_sops = {
.start = hc_start,
.next = hc_next,
.stop = hc_stop,
.show = hc_show
};
-static int hcall_inst_seq_open(struct inode *inode, struct file *file)
-{
- int rc;
- struct seq_file *seq;
-
- rc = seq_open(file, &hcall_inst_seq_ops);
- seq = file->private_data;
- seq->private = file_inode(file)->i_private;
-
- return rc;
-}
-
-static const struct file_operations hcall_inst_seq_fops = {
- .open = hcall_inst_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
+DEFINE_SEQ_ATTRIBUTE(hcall_inst);
#define HCALL_ROOT_DIR "hcall_inst"
#define CPU_NAME_BUF_SIZE 32
@@ -149,7 +132,7 @@ static int __init hcall_inst_init(void)
snprintf(cpu_name_buf, CPU_NAME_BUF_SIZE, "cpu%d", cpu);
debugfs_create_file(cpu_name_buf, 0444, hcall_root,
per_cpu(hcall_stats, cpu),
- &hcall_inst_seq_fops);
+ &hcall_inst_fops);
}
return 0;
diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c
index a6f101c958e8..8c6e509f6967 100644
--- a/arch/powerpc/platforms/pseries/ibmebus.c
+++ b/arch/powerpc/platforms/pseries/ibmebus.c
@@ -40,7 +40,7 @@
#include <linux/export.h>
#include <linux/console.h>
#include <linux/kobject.h>
-#include <linux/dma-mapping.h>
+#include <linux/dma-map-ops.h>
#include <linux/interrupt.h>
#include <linux/of.h>
#include <linux/slab.h>
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 6d47b4a3ce39..e4198700ed1a 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -39,6 +39,20 @@
#include "pseries.h"
+enum {
+ DDW_QUERY_PE_DMA_WIN = 0,
+ DDW_CREATE_PE_DMA_WIN = 1,
+ DDW_REMOVE_PE_DMA_WIN = 2,
+
+ DDW_APPLICABLE_SIZE
+};
+
+enum {
+ DDW_EXT_SIZE = 0,
+ DDW_EXT_RESET_DMA_WIN = 1,
+ DDW_EXT_QUERY_OUT_SIZE = 2
+};
+
static struct iommu_table_group *iommu_pseries_alloc_group(int node)
{
struct iommu_table_group *table_group;
@@ -334,7 +348,7 @@ struct direct_window {
/* Dynamic DMA Window support */
struct ddw_query_response {
u32 windows_available;
- u32 largest_available_block;
+ u64 largest_available_block;
u32 page_size;
u32 migration_capable;
};
@@ -767,25 +781,14 @@ static int __init disable_ddw_setup(char *str)
early_param("disable_ddw", disable_ddw_setup);
-static void remove_ddw(struct device_node *np, bool remove_prop)
+static void remove_dma_window(struct device_node *np, u32 *ddw_avail,
+ struct property *win)
{
struct dynamic_dma_window_prop *dwp;
- struct property *win64;
- u32 ddw_avail[3];
u64 liobn;
- int ret = 0;
-
- ret = of_property_read_u32_array(np, "ibm,ddw-applicable",
- &ddw_avail[0], 3);
-
- win64 = of_find_property(np, DIRECT64_PROPNAME, NULL);
- if (!win64)
- return;
-
- if (ret || win64->length < sizeof(*dwp))
- goto delprop;
+ int ret;
- dwp = win64->value;
+ dwp = win->value;
liobn = (u64)be32_to_cpu(dwp->liobn);
/* clear the whole window, note the arg is in kernel pages */
@@ -798,19 +801,39 @@ static void remove_ddw(struct device_node *np, bool remove_prop)
pr_debug("%pOF successfully cleared tces in window.\n",
np);
- ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn);
+ ret = rtas_call(ddw_avail[DDW_REMOVE_PE_DMA_WIN], 1, 1, NULL, liobn);
if (ret)
pr_warn("%pOF: failed to remove direct window: rtas returned "
"%d to ibm,remove-pe-dma-window(%x) %llx\n",
- np, ret, ddw_avail[2], liobn);
+ np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn);
else
pr_debug("%pOF: successfully removed direct window: rtas returned "
"%d to ibm,remove-pe-dma-window(%x) %llx\n",
- np, ret, ddw_avail[2], liobn);
+ np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn);
+}
+
+static void remove_ddw(struct device_node *np, bool remove_prop)
+{
+ struct property *win;
+ u32 ddw_avail[DDW_APPLICABLE_SIZE];
+ int ret = 0;
-delprop:
- if (remove_prop)
- ret = of_remove_property(np, win64);
+ ret = of_property_read_u32_array(np, "ibm,ddw-applicable",
+ &ddw_avail[0], DDW_APPLICABLE_SIZE);
+ if (ret)
+ return;
+
+ win = of_find_property(np, DIRECT64_PROPNAME, NULL);
+ if (!win)
+ return;
+
+ if (win->length >= sizeof(struct dynamic_dma_window_prop))
+ remove_dma_window(np, ddw_avail, win);
+
+ if (!remove_prop)
+ return;
+
+ ret = of_remove_property(np, win);
if (ret)
pr_warn("%pOF: failed to remove direct window property: %d\n",
np, ret);
@@ -869,14 +892,62 @@ static int find_existing_ddw_windows(void)
}
machine_arch_initcall(pseries, find_existing_ddw_windows);
+/**
+ * ddw_read_ext - Get the value of an DDW extension
+ * @np: device node from which the extension value is to be read.
+ * @extnum: index number of the extension.
+ * @value: pointer to return value, modified when extension is available.
+ *
+ * Checks if "ibm,ddw-extensions" exists for this node, and get the value
+ * on index 'extnum'.
+ * It can be used only to check if a property exists, passing value == NULL.
+ *
+ * Returns:
+ * 0 if extension successfully read
+ * -EINVAL if the "ibm,ddw-extensions" does not exist,
+ * -ENODATA if "ibm,ddw-extensions" does not have a value, and
+ * -EOVERFLOW if "ibm,ddw-extensions" does not contain this extension.
+ */
+static inline int ddw_read_ext(const struct device_node *np, int extnum,
+ u32 *value)
+{
+ static const char propname[] = "ibm,ddw-extensions";
+ u32 count;
+ int ret;
+
+ ret = of_property_read_u32_index(np, propname, DDW_EXT_SIZE, &count);
+ if (ret)
+ return ret;
+
+ if (count < extnum)
+ return -EOVERFLOW;
+
+ if (!value)
+ value = &count;
+
+ return of_property_read_u32_index(np, propname, extnum, value);
+}
+
static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
- struct ddw_query_response *query)
+ struct ddw_query_response *query,
+ struct device_node *parent)
{
struct device_node *dn;
struct pci_dn *pdn;
- u32 cfg_addr;
+ u32 cfg_addr, ext_query, query_out[5];
u64 buid;
- int ret;
+ int ret, out_sz;
+
+ /*
+ * From LoPAR level 2.8, "ibm,ddw-extensions" index 3 can rule how many
+ * output parameters ibm,query-pe-dma-windows will have, ranging from
+ * 5 to 6.
+ */
+ ret = ddw_read_ext(parent, DDW_EXT_QUERY_OUT_SIZE, &ext_query);
+ if (!ret && ext_query == 1)
+ out_sz = 6;
+ else
+ out_sz = 5;
/*
* Get the config address and phb buid of the PE window.
@@ -889,11 +960,28 @@ static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
buid = pdn->phb->buid;
cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8));
- ret = rtas_call(ddw_avail[0], 3, 5, (u32 *)query,
- cfg_addr, BUID_HI(buid), BUID_LO(buid));
- dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x"
- " returned %d\n", ddw_avail[0], cfg_addr, BUID_HI(buid),
- BUID_LO(buid), ret);
+ ret = rtas_call(ddw_avail[DDW_QUERY_PE_DMA_WIN], 3, out_sz, query_out,
+ cfg_addr, BUID_HI(buid), BUID_LO(buid));
+ dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x returned %d\n",
+ ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr, BUID_HI(buid),
+ BUID_LO(buid), ret);
+
+ switch (out_sz) {
+ case 5:
+ query->windows_available = query_out[0];
+ query->largest_available_block = query_out[1];
+ query->page_size = query_out[2];
+ query->migration_capable = query_out[3];
+ break;
+ case 6:
+ query->windows_available = query_out[0];
+ query->largest_available_block = ((u64)query_out[1] << 32) |
+ query_out[2];
+ query->page_size = query_out[3];
+ query->migration_capable = query_out[4];
+ break;
+ }
+
return ret;
}
@@ -920,15 +1008,16 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail,
do {
/* extra outputs are LIOBN and dma-addr (hi, lo) */
- ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create,
- cfg_addr, BUID_HI(buid), BUID_LO(buid),
- page_shift, window_shift);
+ ret = rtas_call(ddw_avail[DDW_CREATE_PE_DMA_WIN], 5, 4,
+ (u32 *)create, cfg_addr, BUID_HI(buid),
+ BUID_LO(buid), page_shift, window_shift);
} while (rtas_busy_delay(ret));
dev_info(&dev->dev,
"ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d "
- "(liobn = 0x%x starting addr = %x %x)\n", ddw_avail[1],
- cfg_addr, BUID_HI(buid), BUID_LO(buid), page_shift,
- window_shift, ret, create->liobn, create->addr_hi, create->addr_lo);
+ "(liobn = 0x%x starting addr = %x %x)\n",
+ ddw_avail[DDW_CREATE_PE_DMA_WIN], cfg_addr, BUID_HI(buid),
+ BUID_LO(buid), page_shift, window_shift, ret, create->liobn,
+ create->addr_hi, create->addr_lo);
return ret;
}
@@ -978,6 +1067,38 @@ static phys_addr_t ddw_memory_hotplug_max(void)
}
/*
+ * Platforms supporting the DDW option starting with LoPAR level 2.7 implement
+ * ibm,ddw-extensions, which carries the rtas token for
+ * ibm,reset-pe-dma-windows.
+ * That rtas-call can be used to restore the default DMA window for the device.
+ */
+static void reset_dma_window(struct pci_dev *dev, struct device_node *par_dn)
+{
+ int ret;
+ u32 cfg_addr, reset_dma_win;
+ u64 buid;
+ struct device_node *dn;
+ struct pci_dn *pdn;
+
+ ret = ddw_read_ext(par_dn, DDW_EXT_RESET_DMA_WIN, &reset_dma_win);
+ if (ret)
+ return;
+
+ dn = pci_device_to_OF_node(dev);
+ pdn = PCI_DN(dn);
+ buid = pdn->phb->buid;
+ cfg_addr = (pdn->busno << 16) | (pdn->devfn << 8);
+
+ ret = rtas_call(reset_dma_win, 3, 1, NULL, cfg_addr, BUID_HI(buid),
+ BUID_LO(buid));
+ if (ret)
+ dev_info(&dev->dev,
+ "ibm,reset-pe-dma-windows(%x) %x %x %x returned %d ",
+ reset_dma_win, cfg_addr, BUID_HI(buid), BUID_LO(buid),
+ ret);
+}
+
+/*
* If the PE supports dynamic dma windows, and there is space for a table
* that can map all pages in a linear offset, then setup such a table,
* and record the dma-offset in the struct device.
@@ -996,11 +1117,12 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
int page_shift;
u64 dma_addr, max_addr;
struct device_node *dn;
- u32 ddw_avail[3];
+ u32 ddw_avail[DDW_APPLICABLE_SIZE];
struct direct_window *window;
struct property *win64;
struct dynamic_dma_window_prop *ddwprop;
struct failed_ddw_pdn *fpdn;
+ bool default_win_removed = false;
mutex_lock(&direct_window_init_mutex);
@@ -1029,7 +1151,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
* the property is actually in the parent, not the PE
*/
ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable",
- &ddw_avail[0], 3);
+ &ddw_avail[0], DDW_APPLICABLE_SIZE);
if (ret)
goto out_failed;
@@ -1040,18 +1162,42 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
* of page sizes: supported and supported for migrate-dma.
*/
dn = pci_device_to_OF_node(dev);
- ret = query_ddw(dev, ddw_avail, &query);
+ ret = query_ddw(dev, ddw_avail, &query, pdn);
if (ret != 0)
goto out_failed;
+ /*
+ * If there is no window available, remove the default DMA window,
+ * if it's present. This will make all the resources available to the
+ * new DDW window.
+ * If anything fails after this, we need to restore it, so also check
+ * for extensions presence.
+ */
if (query.windows_available == 0) {
- /*
- * no additional windows are available for this device.
- * We might be able to reallocate the existing window,
- * trading in for a larger page size.
- */
- dev_dbg(&dev->dev, "no free dynamic windows");
- goto out_failed;
+ struct property *default_win;
+ int reset_win_ext;
+
+ default_win = of_find_property(pdn, "ibm,dma-window", NULL);
+ if (!default_win)
+ goto out_failed;
+
+ reset_win_ext = ddw_read_ext(pdn, DDW_EXT_RESET_DMA_WIN, NULL);
+ if (reset_win_ext)
+ goto out_failed;
+
+ remove_dma_window(pdn, ddw_avail, default_win);
+ default_win_removed = true;
+
+ /* Query again, to check if the window is available */
+ ret = query_ddw(dev, ddw_avail, &query, pdn);
+ if (ret != 0)
+ goto out_failed;
+
+ if (query.windows_available == 0) {
+ /* no windows are available for this device. */
+ dev_dbg(&dev->dev, "no free dynamic windows");
+ goto out_failed;
+ }
}
if (query.page_size & 4) {
page_shift = 24; /* 16MB */
@@ -1068,7 +1214,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
/* check largest block * page size > max memory hotplug addr */
max_addr = ddw_memory_hotplug_max();
if (query.largest_available_block < (max_addr >> page_shift)) {
- dev_dbg(&dev->dev, "can't map partition max 0x%llx with %u "
+ dev_dbg(&dev->dev, "can't map partition max 0x%llx with %llu "
"%llu-sized pages\n", max_addr, query.largest_available_block,
1ULL << page_shift);
goto out_failed;
@@ -1142,6 +1288,8 @@ out_free_prop:
kfree(win64);
out_failed:
+ if (default_win_removed)
+ reset_dma_window(dev, pdn);
fpdn = kzalloc(sizeof(*fpdn), GFP_KERNEL);
if (!fpdn)
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index baf24eacd268..764170fdb0f7 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -1724,6 +1724,7 @@ void __init hpte_init_pseries(void)
pseries_lpar_register_process_table(0, 0, 0);
}
+#ifdef CONFIG_PPC_RADIX_MMU
void radix_init_pseries(void)
{
pr_info("Using radix MMU under hypervisor\n");
@@ -1731,6 +1732,7 @@ void radix_init_pseries(void)
pseries_lpar_register_process_table(__pa(process_tb),
0, PRTB_SIZE_SHIFT - 12);
}
+#endif
#ifdef CONFIG_PPC_SMLPAR
#define CMO_FREE_HINT_DEFAULT 1
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index b8d28ab88178..e278390ab28d 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -136,6 +136,39 @@ static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data)
return rc;
}
+static void show_gpci_data(struct seq_file *m)
+{
+ struct hv_gpci_request_buffer *buf;
+ unsigned int affinity_score;
+ long ret;
+
+ buf = kmalloc(sizeof(*buf), GFP_KERNEL);
+ if (buf == NULL)
+ return;
+
+ /*
+ * Show the local LPAR's affinity score.
+ *
+ * 0xB1 selects the Affinity_Domain_Info_By_Partition subcall.
+ * The score is at byte 0xB in the output buffer.
+ */
+ memset(&buf->params, 0, sizeof(buf->params));
+ buf->params.counter_request = cpu_to_be32(0xB1);
+ buf->params.starting_index = cpu_to_be32(-1); /* local LPAR */
+ buf->params.counter_info_version_in = 0x5; /* v5+ for score */
+ ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, virt_to_phys(buf),
+ sizeof(*buf));
+ if (ret != H_SUCCESS) {
+ pr_debug("hcall failed: H_GET_PERF_COUNTER_INFO: %ld, %x\n",
+ ret, be32_to_cpu(buf->params.detail_rc));
+ goto out;
+ }
+ affinity_score = buf->bytes[0xB];
+ seq_printf(m, "partition_affinity_score=%u\n", affinity_score);
+out:
+ kfree(buf);
+}
+
static unsigned h_pic(unsigned long *pool_idle_time,
unsigned long *num_procs)
{
@@ -487,6 +520,8 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
partition_active_processors * 100);
}
+ show_gpci_data(m);
+
seq_printf(m, "partition_active_processors=%d\n",
partition_active_processors);
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index a88a707a608a..835163f54244 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -785,7 +785,8 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc,
static ssize_t perf_stats_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- int index, rc;
+ int index;
+ ssize_t rc;
struct seq_buf s;
struct papr_scm_perf_stat *stat;
struct papr_scm_perf_stats *stats;
@@ -820,9 +821,9 @@ static ssize_t perf_stats_show(struct device *dev,
free_stats:
kfree(stats);
- return rc ? rc : seq_buf_used(&s);
+ return rc ? rc : (ssize_t)seq_buf_used(&s);
}
-DEVICE_ATTR_ADMIN_RO(perf_stats);
+static DEVICE_ATTR_ADMIN_RO(perf_stats);
static ssize_t flags_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -897,6 +898,9 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
p->bus_desc.of_node = p->pdev->dev.of_node;
p->bus_desc.provider_name = kstrdup(p->pdev->name, GFP_KERNEL);
+ /* Set the dimm command family mask to accept PDSMs */
+ set_bit(NVDIMM_FAMILY_PAPR, &p->bus_desc.dimm_family_mask);
+
if (!p->bus_desc.provider_name)
return -ENOMEM;
diff --git a/arch/powerpc/platforms/pseries/rng.c b/arch/powerpc/platforms/pseries/rng.c
index bbb97169bf63..6268545947b8 100644
--- a/arch/powerpc/platforms/pseries/rng.c
+++ b/arch/powerpc/platforms/pseries/rng.c
@@ -36,6 +36,7 @@ static __init int rng_init(void)
ppc_md.get_random_seed = pseries_get_random_long;
+ of_node_put(dn);
return 0;
}
machine_subsys_initcall(pseries, rng_init);
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 2f4ee0a90284..633c45ec406d 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -519,9 +519,15 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result)
if (result->character & H_CPU_CHAR_BCCTR_FLUSH_ASSIST)
security_ftr_set(SEC_FTR_BCCTR_FLUSH_ASSIST);
+ if (result->character & H_CPU_CHAR_BCCTR_LINK_FLUSH_ASSIST)
+ security_ftr_set(SEC_FTR_BCCTR_LINK_FLUSH_ASSIST);
+
if (result->behaviour & H_CPU_BEHAV_FLUSH_COUNT_CACHE)
security_ftr_set(SEC_FTR_FLUSH_COUNT_CACHE);
+ if (result->behaviour & H_CPU_BEHAV_FLUSH_LINK_STACK)
+ security_ftr_set(SEC_FTR_FLUSH_LINK_STACK);
+
/*
* The features below are enabled by default, so we instead look to see
* if firmware has *disabled* them, and clear them if so.
diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c
index e6d7a344d9f2..7b739cc7a8a9 100644
--- a/arch/powerpc/platforms/pseries/svm.c
+++ b/arch/powerpc/platforms/pseries/svm.c
@@ -7,6 +7,7 @@
*/
#include <linux/mm.h>
+#include <linux/memblock.h>
#include <asm/machdep.h>
#include <asm/svm.h>
#include <asm/swiotlb.h>
@@ -35,6 +36,31 @@ static int __init init_svm(void)
}
machine_early_initcall(pseries, init_svm);
+/*
+ * Initialize SWIOTLB. Essentially the same as swiotlb_init(), except that it
+ * can allocate the buffer anywhere in memory. Since the hypervisor doesn't have
+ * any addressing limitation, we don't need to allocate it in low addresses.
+ */
+void __init svm_swiotlb_init(void)
+{
+ unsigned char *vstart;
+ unsigned long bytes, io_tlb_nslabs;
+
+ io_tlb_nslabs = (swiotlb_size_or_default() >> IO_TLB_SHIFT);
+ io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+
+ bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+
+ vstart = memblock_alloc(PAGE_ALIGN(bytes), PAGE_SIZE);
+ if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, false))
+ return;
+
+ if (io_tlb_start)
+ memblock_free_early(io_tlb_start,
+ PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
+ panic("SVM: Cannot allocate SWIOTLB buffer");
+}
+
int set_memory_encrypted(unsigned long addr, int numpages)
{
if (!PAGE_ALIGNED(addr))
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
index 0487b26f6f1a..b2797cfe4e2b 100644
--- a/arch/powerpc/platforms/pseries/vio.c
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -20,7 +20,7 @@
#include <linux/console.h>
#include <linux/export.h>
#include <linux/mm.h>
-#include <linux/dma-mapping.h>
+#include <linux/dma-map-ops.h>
#include <linux/kobject.h>
#include <asm/iommu.h>
@@ -608,6 +608,8 @@ static const struct dma_map_ops vio_dma_mapping_ops = {
.get_required_mask = dma_iommu_get_required_mask,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
+ .alloc_pages = dma_common_alloc_pages,
+ .free_pages = dma_common_free_pages,
};
/**