summaryrefslogtreecommitdiff
path: root/arch/powerpc/platforms
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-09-03 21:22:50 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2021-09-03 21:22:50 +0300
commit7cca308cfdc0725363ac5943dca9dcd49cc1d2d5 (patch)
tree39173c9b21c4a6e1f68e3b672afbe008b27198f9 /arch/powerpc/platforms
parent11d5576880aed34b8aa4e8049afdab92793b071f (diff)
parenta3314262eede9c909a0c797f16f25f941d12c78d (diff)
downloadlinux-7cca308cfdc0725363ac5943dca9dcd49cc1d2d5.tar.xz
Merge tag 'powerpc-5.15-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman: - Convert pseries & powernv to use MSI IRQ domains. - Rework the pseries CPU numbering so that CPUs that are removed, and later re-added, are given a CPU number on the same node as previously, when possible. - Add support for a new more flexible device-tree format for specifying NUMA distances. - Convert powerpc to GENERIC_PTDUMP. - Retire sbc8548 and sbc8641d board support. - Various other small features and fixes. Thanks to Alexey Kardashevskiy, Aneesh Kumar K.V, Anton Blanchard, Cédric Le Goater, Christophe Leroy, Emmanuel Gil Peyrot, Fabiano Rosas, Fangrui Song, Finn Thain, Gautham R. Shenoy, Hari Bathini, Joel Stanley, Jordan Niethe, Kajol Jain, Laurent Dufour, Leonardo Bras, Lukas Bulwahn, Marc Zyngier, Masahiro Yamada, Michal Suchanek, Nathan Chancellor, Nicholas Piggin, Parth Shah, Paul Gortmaker, Pratik R. Sampat, Randy Dunlap, Sebastian Andrzej Siewior, Srikar Dronamraju, Wan Jiabing, Xiongwei Song, and Zheng Yongjun. * tag 'powerpc-5.15-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (154 commits) powerpc/bug: Cast to unsigned long before passing to inline asm powerpc/ptdump: Fix generic ptdump for 64-bit KVM: PPC: Fix clearing never mapped TCEs in realmode powerpc/pseries/iommu: Rename "direct window" to "dma window" powerpc/pseries/iommu: Make use of DDW for indirect mapping powerpc/pseries/iommu: Find existing DDW with given property name powerpc/pseries/iommu: Update remove_dma_window() to accept property name powerpc/pseries/iommu: Reorganize iommu_table_setparms*() with new helper powerpc/pseries/iommu: Add ddw_property_create() and refactor enable_ddw() powerpc/pseries/iommu: Allow DDW windows starting at 0x00 powerpc/pseries/iommu: Add ddw_list_new_entry() helper powerpc/pseries/iommu: Add iommu_pseries_alloc_table() helper powerpc/kernel/iommu: Add new iommu_table_in_use() helper powerpc/pseries/iommu: Replace hard-coded page shift powerpc/numa: Update cpu_cpu_map on CPU online/offline powerpc/numa: Print debug statements only when required powerpc/numa: convert printk to pr_xxx powerpc/numa: Drop dbg in favour of pr_debug powerpc/smp: Enable CACHE domain for shared processor powerpc/smp: Update cpu_core_map on all PowerPc systems ...
Diffstat (limited to 'arch/powerpc/platforms')
-rw-r--r--arch/powerpc/platforms/44x/machine_check.c4
-rw-r--r--arch/powerpc/platforms/4xx/machine_check.c2
-rw-r--r--arch/powerpc/platforms/85xx/Kconfig6
-rw-r--r--arch/powerpc/platforms/85xx/Makefile1
-rw-r--r--arch/powerpc/platforms/85xx/sbc8548.c134
-rw-r--r--arch/powerpc/platforms/86xx/Kconfig8
-rw-r--r--arch/powerpc/platforms/86xx/Makefile1
-rw-r--r--arch/powerpc/platforms/86xx/sbc8641d.c87
-rw-r--r--arch/powerpc/platforms/cell/axon_msi.c4
-rw-r--r--arch/powerpc/platforms/embedded6xx/holly.c2
-rw-r--r--arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c2
-rw-r--r--arch/powerpc/platforms/pasemi/idle.c2
-rw-r--r--arch/powerpc/platforms/powernv/idle.c6
-rw-r--r--arch/powerpc/platforms/powernv/memtrace.c3
-rw-r--r--arch/powerpc/platforms/powernv/opal-imc.c12
-rw-r--r--arch/powerpc/platforms/powernv/opal-lpc.c4
-rw-r--r--arch/powerpc/platforms/powernv/opal-xscom.c4
-rw-r--r--arch/powerpc/platforms/powernv/opal.c2
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c260
-rw-r--r--arch/powerpc/platforms/powernv/pci.c67
-rw-r--r--arch/powerpc/platforms/powernv/pci.h6
-rw-r--r--arch/powerpc/platforms/ps3/htab.c3
-rw-r--r--arch/powerpc/platforms/ps3/mm.c8
-rw-r--r--arch/powerpc/platforms/pseries/dtl.c4
-rw-r--r--arch/powerpc/platforms/pseries/firmware.c3
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c173
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c6
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c514
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c18
-rw-r--r--arch/powerpc/platforms/pseries/msi.c296
-rw-r--r--arch/powerpc/platforms/pseries/pci_dlpar.c4
-rw-r--r--arch/powerpc/platforms/pseries/pseries.h2
-rw-r--r--arch/powerpc/platforms/pseries/ras.c2
-rw-r--r--arch/powerpc/platforms/pseries/setup.c2
-rw-r--r--arch/powerpc/platforms/pseries/vas.c2
35 files changed, 981 insertions, 673 deletions
diff --git a/arch/powerpc/platforms/44x/machine_check.c b/arch/powerpc/platforms/44x/machine_check.c
index a5c898bb9bab..5d19daacd78a 100644
--- a/arch/powerpc/platforms/44x/machine_check.c
+++ b/arch/powerpc/platforms/44x/machine_check.c
@@ -11,7 +11,7 @@
int machine_check_440A(struct pt_regs *regs)
{
- unsigned long reason = regs->dsisr;
+ unsigned long reason = regs->esr;
printk("Machine check in kernel mode.\n");
if (reason & ESR_IMCP){
@@ -48,7 +48,7 @@ int machine_check_440A(struct pt_regs *regs)
#ifdef CONFIG_PPC_47x
int machine_check_47x(struct pt_regs *regs)
{
- unsigned long reason = regs->dsisr;
+ unsigned long reason = regs->esr;
u32 mcsr;
printk(KERN_ERR "Machine check in kernel mode.\n");
diff --git a/arch/powerpc/platforms/4xx/machine_check.c b/arch/powerpc/platforms/4xx/machine_check.c
index a71c29892a91..a905da1d6f41 100644
--- a/arch/powerpc/platforms/4xx/machine_check.c
+++ b/arch/powerpc/platforms/4xx/machine_check.c
@@ -10,7 +10,7 @@
int machine_check_4xx(struct pt_regs *regs)
{
- unsigned long reason = regs->dsisr;
+ unsigned long reason = regs->esr;
if (reason & ESR_IMCP) {
printk("Instruction");
diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig
index b77cbb0a50e1..4142ebf01382 100644
--- a/arch/powerpc/platforms/85xx/Kconfig
+++ b/arch/powerpc/platforms/85xx/Kconfig
@@ -208,12 +208,6 @@ config TQM8560
select TQM85xx
select CPM2
-config SBC8548
- bool "Wind River SBC8548"
- select DEFAULT_UIMAGE
- help
- This option enables support for the Wind River SBC8548 board
-
config PPA8548
bool "Prodrive PPA8548"
help
diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile
index d1dd0dca5ebf..60e4e97a929d 100644
--- a/arch/powerpc/platforms/85xx/Makefile
+++ b/arch/powerpc/platforms/85xx/Makefile
@@ -26,7 +26,6 @@ obj-$(CONFIG_CORENET_GENERIC) += corenet_generic.o
obj-$(CONFIG_FB_FSL_DIU) += t1042rdb_diu.o
obj-$(CONFIG_STX_GP3) += stx_gp3.o
obj-$(CONFIG_TQM85xx) += tqm85xx.o
-obj-$(CONFIG_SBC8548) += sbc8548.o
obj-$(CONFIG_PPA8548) += ppa8548.o
obj-$(CONFIG_SOCRATES) += socrates.o socrates_fpga_pic.o
obj-$(CONFIG_KSI8560) += ksi8560.o
diff --git a/arch/powerpc/platforms/85xx/sbc8548.c b/arch/powerpc/platforms/85xx/sbc8548.c
deleted file mode 100644
index e4acf5ce6b07..000000000000
--- a/arch/powerpc/platforms/85xx/sbc8548.c
+++ /dev/null
@@ -1,134 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Wind River SBC8548 setup and early boot code.
- *
- * Copyright 2007 Wind River Systems Inc.
- *
- * By Paul Gortmaker (see MAINTAINERS for contact information)
- *
- * Based largely on the MPC8548CDS support - Copyright 2005 Freescale Inc.
- */
-
-#include <linux/stddef.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/reboot.h>
-#include <linux/pci.h>
-#include <linux/kdev_t.h>
-#include <linux/major.h>
-#include <linux/console.h>
-#include <linux/delay.h>
-#include <linux/seq_file.h>
-#include <linux/initrd.h>
-#include <linux/interrupt.h>
-#include <linux/fsl_devices.h>
-#include <linux/of_platform.h>
-#include <linux/pgtable.h>
-
-#include <asm/page.h>
-#include <linux/atomic.h>
-#include <asm/time.h>
-#include <asm/io.h>
-#include <asm/machdep.h>
-#include <asm/ipic.h>
-#include <asm/pci-bridge.h>
-#include <asm/irq.h>
-#include <mm/mmu_decl.h>
-#include <asm/prom.h>
-#include <asm/udbg.h>
-#include <asm/mpic.h>
-
-#include <sysdev/fsl_soc.h>
-#include <sysdev/fsl_pci.h>
-
-#include "mpc85xx.h"
-
-static int sbc_rev;
-
-static void __init sbc8548_pic_init(void)
-{
- struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN,
- 0, 256, " OpenPIC ");
- BUG_ON(mpic == NULL);
- mpic_init(mpic);
-}
-
-/* Extract the HW Rev from the EPLD on the board */
-static int __init sbc8548_hw_rev(void)
-{
- struct device_node *np;
- struct resource res;
- unsigned int *rev;
- int board_rev = 0;
-
- np = of_find_compatible_node(NULL, NULL, "hw-rev");
- if (np == NULL) {
- printk("No HW-REV found in DTB.\n");
- return -ENODEV;
- }
-
- of_address_to_resource(np, 0, &res);
- of_node_put(np);
-
- rev = ioremap(res.start,sizeof(unsigned int));
- board_rev = (*rev) >> 28;
- iounmap(rev);
-
- return board_rev;
-}
-
-/*
- * Setup the architecture
- */
-static void __init sbc8548_setup_arch(void)
-{
- if (ppc_md.progress)
- ppc_md.progress("sbc8548_setup_arch()", 0);
-
- fsl_pci_assign_primary();
-
- sbc_rev = sbc8548_hw_rev();
-}
-
-static void sbc8548_show_cpuinfo(struct seq_file *m)
-{
- uint pvid, svid, phid1;
-
- pvid = mfspr(SPRN_PVR);
- svid = mfspr(SPRN_SVR);
-
- seq_printf(m, "Vendor\t\t: Wind River\n");
- seq_printf(m, "Machine\t\t: SBC8548 v%d\n", sbc_rev);
- seq_printf(m, "PVR\t\t: 0x%x\n", pvid);
- seq_printf(m, "SVR\t\t: 0x%x\n", svid);
-
- /* Display cpu Pll setting */
- phid1 = mfspr(SPRN_HID1);
- seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f));
-}
-
-machine_arch_initcall(sbc8548, mpc85xx_common_publish_devices);
-
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init sbc8548_probe(void)
-{
- return of_machine_is_compatible("SBC8548");
-}
-
-define_machine(sbc8548) {
- .name = "SBC8548",
- .probe = sbc8548_probe,
- .setup_arch = sbc8548_setup_arch,
- .init_IRQ = sbc8548_pic_init,
- .show_cpuinfo = sbc8548_show_cpuinfo,
- .get_irq = mpic_get_irq,
-#ifdef CONFIG_PCI
- .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
- .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
-#endif
- .calibrate_decr = generic_calibrate_decr,
- .progress = udbg_progress,
-};
diff --git a/arch/powerpc/platforms/86xx/Kconfig b/arch/powerpc/platforms/86xx/Kconfig
index 07a9d60c618a..be867abebc83 100644
--- a/arch/powerpc/platforms/86xx/Kconfig
+++ b/arch/powerpc/platforms/86xx/Kconfig
@@ -20,12 +20,6 @@ config MPC8641_HPCN
help
This option enables support for the MPC8641 HPCN board.
-config SBC8641D
- bool "Wind River SBC8641D"
- select DEFAULT_UIMAGE
- help
- This option enables support for the WRS SBC8641D board.
-
config MPC8610_HPCD
bool "Freescale MPC8610 HPCD"
select DEFAULT_UIMAGE
@@ -74,7 +68,7 @@ config MPC8641
select FSL_PCI if PCI
select PPC_UDBG_16550
select MPIC
- default y if MPC8641_HPCN || SBC8641D || GEF_SBC610 || GEF_SBC310 || GEF_PPC9A \
+ default y if MPC8641_HPCN || GEF_SBC610 || GEF_SBC310 || GEF_PPC9A \
|| MVME7100
config MPC8610
diff --git a/arch/powerpc/platforms/86xx/Makefile b/arch/powerpc/platforms/86xx/Makefile
index 2c04449be107..5bbe1475bf26 100644
--- a/arch/powerpc/platforms/86xx/Makefile
+++ b/arch/powerpc/platforms/86xx/Makefile
@@ -6,7 +6,6 @@
obj-y := pic.o common.o
obj-$(CONFIG_SMP) += mpc86xx_smp.o
obj-$(CONFIG_MPC8641_HPCN) += mpc86xx_hpcn.o
-obj-$(CONFIG_SBC8641D) += sbc8641d.o
obj-$(CONFIG_MPC8610_HPCD) += mpc8610_hpcd.o
obj-$(CONFIG_GEF_SBC610) += gef_sbc610.o
obj-$(CONFIG_GEF_SBC310) += gef_sbc310.o
diff --git a/arch/powerpc/platforms/86xx/sbc8641d.c b/arch/powerpc/platforms/86xx/sbc8641d.c
deleted file mode 100644
index dc23dd383d6e..000000000000
--- a/arch/powerpc/platforms/86xx/sbc8641d.c
+++ /dev/null
@@ -1,87 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * SBC8641D board specific routines
- *
- * Copyright 2008 Wind River Systems Inc.
- *
- * By Paul Gortmaker (see MAINTAINERS for contact information)
- *
- * Based largely on the 8641 HPCN support by Freescale Semiconductor Inc.
- */
-
-#include <linux/stddef.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/kdev_t.h>
-#include <linux/delay.h>
-#include <linux/seq_file.h>
-#include <linux/of_platform.h>
-
-#include <asm/time.h>
-#include <asm/machdep.h>
-#include <asm/pci-bridge.h>
-#include <asm/prom.h>
-#include <mm/mmu_decl.h>
-#include <asm/udbg.h>
-
-#include <asm/mpic.h>
-
-#include <sysdev/fsl_pci.h>
-#include <sysdev/fsl_soc.h>
-
-#include "mpc86xx.h"
-
-static void __init
-sbc8641_setup_arch(void)
-{
- if (ppc_md.progress)
- ppc_md.progress("sbc8641_setup_arch()", 0);
-
- printk("SBC8641 board from Wind River\n");
-
-#ifdef CONFIG_SMP
- mpc86xx_smp_init();
-#endif
-
- fsl_pci_assign_primary();
-}
-
-
-static void
-sbc8641_show_cpuinfo(struct seq_file *m)
-{
- uint svid = mfspr(SPRN_SVR);
-
- seq_printf(m, "Vendor\t\t: Wind River Systems\n");
-
- seq_printf(m, "SVR\t\t: 0x%x\n", svid);
-}
-
-
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init sbc8641_probe(void)
-{
- if (of_machine_is_compatible("wind,sbc8641"))
- return 1; /* Looks good */
-
- return 0;
-}
-
-machine_arch_initcall(sbc8641, mpc86xx_common_publish_devices);
-
-define_machine(sbc8641) {
- .name = "SBC8641D",
- .probe = sbc8641_probe,
- .setup_arch = sbc8641_setup_arch,
- .init_IRQ = mpc86xx_init_irq,
- .show_cpuinfo = sbc8641_show_cpuinfo,
- .get_irq = mpic_get_irq,
- .time_init = mpc86xx_time_init,
- .calibrate_decr = generic_calibrate_decr,
- .progress = udbg_progress,
-#ifdef CONFIG_PCI
- .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
-#endif
-};
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
index ca2555b8a0c2..82335e364c44 100644
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -12,8 +12,8 @@
#include <linux/export.h>
#include <linux/of_platform.h>
#include <linux/slab.h>
+#include <linux/debugfs.h>
-#include <asm/debugfs.h>
#include <asm/dcr.h>
#include <asm/machdep.h>
#include <asm/prom.h>
@@ -480,6 +480,6 @@ void axon_msi_debug_setup(struct device_node *dn, struct axon_msic *msic)
snprintf(name, sizeof(name), "msic_%d", of_node_to_nid(dn));
- debugfs_create_file(name, 0600, powerpc_debugfs_root, msic, &fops_msic);
+ debugfs_create_file(name, 0600, arch_debugfs_dir, msic, &fops_msic);
}
#endif /* DEBUG */
diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c
index 85521b3e7098..7a85b117f7a4 100644
--- a/arch/powerpc/platforms/embedded6xx/holly.c
+++ b/arch/powerpc/platforms/embedded6xx/holly.c
@@ -251,7 +251,7 @@ static int ppc750_machine_check_exception(struct pt_regs *regs)
/* Are we prepared to handle this fault */
if ((entry = search_exception_tables(regs->nip)) != NULL) {
tsi108_clear_pci_cfg_error();
- regs_set_return_msr(regs, regs->msr | MSR_RI);
+ regs_set_recoverable(regs);
regs_set_return_ip(regs, extable_fixup(entry));
return 1;
}
diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
index d8da6a483e59..9eb9abb5bce2 100644
--- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
+++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
@@ -173,7 +173,7 @@ static int mpc7448_machine_check_exception(struct pt_regs *regs)
/* Are we prepared to handle this fault */
if ((entry = search_exception_tables(regs->nip)) != NULL) {
tsi108_clear_pci_cfg_error();
- regs_set_return_msr(regs, regs->msr | MSR_RI);
+ regs_set_recoverable(regs);
regs_set_return_ip(regs, extable_fixup(entry));
return 1;
}
diff --git a/arch/powerpc/platforms/pasemi/idle.c b/arch/powerpc/platforms/pasemi/idle.c
index 534b0317fc15..6087c70ed2ef 100644
--- a/arch/powerpc/platforms/pasemi/idle.c
+++ b/arch/powerpc/platforms/pasemi/idle.c
@@ -59,7 +59,7 @@ static int pasemi_system_reset_exception(struct pt_regs *regs)
restore_astate(hard_smp_processor_id());
/* everything handled */
- regs_set_return_msr(regs, regs->msr | MSR_RI);
+ regs_set_recoverable(regs);
return 1;
}
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 528a7e0cf83a..e3ffdc8e8567 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -199,12 +199,12 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
*/
power7_fastsleep_workaround_exit = false;
- get_online_cpus();
+ cpus_read_lock();
primary_thread_mask = cpu_online_cores_map();
on_each_cpu_mask(&primary_thread_mask,
pnv_fastsleep_workaround_apply,
&err, 1);
- put_online_cpus();
+ cpus_read_unlock();
if (err) {
pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply");
goto fail;
@@ -667,7 +667,6 @@ static unsigned long power9_idle_stop(unsigned long psscr)
sprs.purr = mfspr(SPRN_PURR);
sprs.spurr = mfspr(SPRN_SPURR);
sprs.dscr = mfspr(SPRN_DSCR);
- sprs.wort = mfspr(SPRN_WORT);
sprs.ciabr = mfspr(SPRN_CIABR);
sprs.mmcra = mfspr(SPRN_MMCRA);
@@ -785,7 +784,6 @@ core_woken:
mtspr(SPRN_PURR, sprs.purr);
mtspr(SPRN_SPURR, sprs.spurr);
mtspr(SPRN_DSCR, sprs.dscr);
- mtspr(SPRN_WORT, sprs.wort);
mtspr(SPRN_CIABR, sprs.ciabr);
mtspr(SPRN_MMCRA, sprs.mmcra);
diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c
index 537a4daed614..877720c64515 100644
--- a/arch/powerpc/platforms/powernv/memtrace.c
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -18,7 +18,6 @@
#include <linux/memory_hotplug.h>
#include <linux/numa.h>
#include <asm/machdep.h>
-#include <asm/debugfs.h>
#include <asm/cacheflush.h>
/* This enables us to keep track of the memory removed from each node. */
@@ -330,7 +329,7 @@ DEFINE_SIMPLE_ATTRIBUTE(memtrace_init_fops, memtrace_enable_get,
static int memtrace_init(void)
{
memtrace_debugfs_dir = debugfs_create_dir("memtrace",
- powerpc_debugfs_root);
+ arch_debugfs_dir);
debugfs_create_file("enable", 0600, memtrace_debugfs_dir,
NULL, &memtrace_init_fops);
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index 7824cc364bc4..05d3832019b9 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -13,11 +13,11 @@
#include <linux/of_address.h>
#include <linux/of_platform.h>
#include <linux/crash_dump.h>
+#include <linux/debugfs.h>
#include <asm/opal.h>
#include <asm/io.h>
#include <asm/imc-pmu.h>
#include <asm/cputhreads.h>
-#include <asm/debugfs.h>
static struct dentry *imc_debugfs_parent;
@@ -56,7 +56,7 @@ static void export_imc_mode_and_cmd(struct device_node *node,
u32 cb_offset;
struct imc_mem_info *ptr = pmu_ptr->mem_info;
- imc_debugfs_parent = debugfs_create_dir("imc", powerpc_debugfs_root);
+ imc_debugfs_parent = debugfs_create_dir("imc", arch_debugfs_dir);
if (of_property_read_u32(node, "cb_offset", &cb_offset))
cb_offset = IMC_CNTL_BLK_OFFSET;
@@ -186,7 +186,7 @@ static void disable_nest_pmu_counters(void)
int nid, cpu;
const struct cpumask *l_cpumask;
- get_online_cpus();
+ cpus_read_lock();
for_each_node_with_cpus(nid) {
l_cpumask = cpumask_of_node(nid);
cpu = cpumask_first_and(l_cpumask, cpu_online_mask);
@@ -195,7 +195,7 @@ static void disable_nest_pmu_counters(void)
opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
get_hard_smp_processor_id(cpu));
}
- put_online_cpus();
+ cpus_read_unlock();
}
static void disable_core_pmu_counters(void)
@@ -203,7 +203,7 @@ static void disable_core_pmu_counters(void)
cpumask_t cores_map;
int cpu, rc;
- get_online_cpus();
+ cpus_read_lock();
/* Disable the IMC Core functions */
cores_map = cpu_online_cores_map();
for_each_cpu(cpu, &cores_map) {
@@ -213,7 +213,7 @@ static void disable_core_pmu_counters(void)
pr_err("%s: Failed to stop Core (cpu = %d)\n",
__FUNCTION__, cpu);
}
- put_online_cpus();
+ cpus_read_unlock();
}
int get_max_nest_dev(void)
diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c
index 608569082ba0..1e5d51db40f8 100644
--- a/arch/powerpc/platforms/powernv/opal-lpc.c
+++ b/arch/powerpc/platforms/powernv/opal-lpc.c
@@ -10,13 +10,13 @@
#include <linux/bug.h>
#include <linux/io.h>
#include <linux/slab.h>
+#include <linux/debugfs.h>
#include <asm/machdep.h>
#include <asm/firmware.h>
#include <asm/opal.h>
#include <asm/prom.h>
#include <linux/uaccess.h>
-#include <asm/debugfs.h>
#include <asm/isa-bridge.h>
static int opal_lpc_chip_id = -1;
@@ -371,7 +371,7 @@ static int opal_lpc_init_debugfs(void)
if (opal_lpc_chip_id < 0)
return -ENODEV;
- root = debugfs_create_dir("lpc", powerpc_debugfs_root);
+ root = debugfs_create_dir("lpc", arch_debugfs_dir);
rc |= opal_lpc_debugfs_create_type(root, "io", OPAL_LPC_IO);
rc |= opal_lpc_debugfs_create_type(root, "mem", OPAL_LPC_MEM);
diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c
index fd510d961b8c..6b4eed2ef4fa 100644
--- a/arch/powerpc/platforms/powernv/opal-xscom.c
+++ b/arch/powerpc/platforms/powernv/opal-xscom.c
@@ -14,11 +14,11 @@
#include <linux/gfp.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
+#include <linux/debugfs.h>
#include <asm/machdep.h>
#include <asm/firmware.h>
#include <asm/opal.h>
-#include <asm/debugfs.h>
#include <asm/prom.h>
static u64 opal_scom_unmangle(u64 addr)
@@ -189,7 +189,7 @@ static int scom_debug_init(void)
if (!firmware_has_feature(FW_FEATURE_OPAL))
return 0;
- root = debugfs_create_dir("scom", powerpc_debugfs_root);
+ root = debugfs_create_dir("scom", arch_debugfs_dir);
if (!root)
return -1;
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 2835376e61a4..e9d18519e650 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -588,7 +588,7 @@ static int opal_recover_mce(struct pt_regs *regs,
{
int recovered = 0;
- if (!(regs->msr & MSR_RI)) {
+ if (regs_is_unrecoverable(regs)) {
/* If MSR_RI isn't set, we cannot recover */
pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
recovered = 0;
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 7de464679292..3dd35c327d1c 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -20,6 +20,7 @@
#include <linux/iommu.h>
#include <linux/rculist.h>
#include <linux/sizes.h>
+#include <linux/debugfs.h>
#include <asm/sections.h>
#include <asm/io.h>
@@ -32,10 +33,10 @@
#include <asm/iommu.h>
#include <asm/tce.h>
#include <asm/xics.h>
-#include <asm/debugfs.h>
#include <asm/firmware.h>
#include <asm/pnv-pci.h>
#include <asm/mmzone.h>
+#include <asm/xive.h>
#include <misc/cxl-base.h>
@@ -1962,27 +1963,40 @@ void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
pe->dma_setup_done = true;
}
-int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq)
+/*
+ * Called from KVM in real mode to EOI passthru interrupts. The ICP
+ * EOI is handled directly in KVM in kvmppc_deliver_irq_passthru().
+ *
+ * The IRQ data is mapped in the PCI-MSI domain and the EOI OPAL call
+ * needs an HW IRQ number mapped in the XICS IRQ domain. The HW IRQ
+ * numbers of the in-the-middle MSI domain are vector numbers and it's
+ * good enough for OPAL. Use that.
+ */
+int64_t pnv_opal_pci_msi_eoi(struct irq_data *d)
{
- struct pnv_phb *phb = container_of(chip, struct pnv_phb,
- ioda.irq_chip);
+ struct pci_controller *hose = irq_data_get_irq_chip_data(d->parent_data);
+ struct pnv_phb *phb = hose->private_data;
- return opal_pci_msi_eoi(phb->opal_id, hw_irq);
+ return opal_pci_msi_eoi(phb->opal_id, d->parent_data->hwirq);
}
+/*
+ * The IRQ data is mapped in the XICS domain, with OPAL HW IRQ numbers
+ */
static void pnv_ioda2_msi_eoi(struct irq_data *d)
{
int64_t rc;
unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
- struct irq_chip *chip = irq_data_get_irq_chip(d);
+ struct pci_controller *hose = irq_data_get_irq_chip_data(d);
+ struct pnv_phb *phb = hose->private_data;
- rc = pnv_opal_pci_msi_eoi(chip, hw_irq);
+ rc = opal_pci_msi_eoi(phb->opal_id, hw_irq);
WARN_ON_ONCE(rc);
icp_native_eoi(d);
}
-
+/* P8/CXL only */
void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq)
{
struct irq_data *idata;
@@ -2004,27 +2018,32 @@ void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq)
phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi;
}
irq_set_chip(virq, &phb->ioda.irq_chip);
+ irq_set_chip_data(virq, phb->hose);
}
+static struct irq_chip pnv_pci_msi_irq_chip;
+
/*
* Returns true iff chip is something that we could call
* pnv_opal_pci_msi_eoi for.
*/
bool is_pnv_opal_msi(struct irq_chip *chip)
{
- return chip->irq_eoi == pnv_ioda2_msi_eoi;
+ return chip == &pnv_pci_msi_irq_chip;
}
EXPORT_SYMBOL_GPL(is_pnv_opal_msi);
-static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
- unsigned int hwirq, unsigned int virq,
- unsigned int is_64, struct msi_msg *msg)
+static int __pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
+ unsigned int xive_num,
+ unsigned int is_64, struct msi_msg *msg)
{
struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev);
- unsigned int xive_num = hwirq - phb->msi_base;
__be32 data;
int rc;
+ dev_dbg(&dev->dev, "%s: setup %s-bit MSI for vector #%d\n", __func__,
+ is_64 ? "64" : "32", xive_num);
+
/* No PE assigned ? bail out ... no MSI for you ! */
if (pe == NULL)
return -ENXIO;
@@ -2072,12 +2091,209 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
}
msg->data = be32_to_cpu(data);
- pnv_set_msi_irq_chip(phb, virq);
+ return 0;
+}
+
+/*
+ * The msi_free() op is called before irq_domain_free_irqs_top() when
+ * the handler data is still available. Use that to clear the XIVE
+ * controller.
+ */
+static void pnv_msi_ops_msi_free(struct irq_domain *domain,
+ struct msi_domain_info *info,
+ unsigned int irq)
+{
+ if (xive_enabled())
+ xive_irq_free_data(irq);
+}
+
+static struct msi_domain_ops pnv_pci_msi_domain_ops = {
+ .msi_free = pnv_msi_ops_msi_free,
+};
+
+static void pnv_msi_shutdown(struct irq_data *d)
+{
+ d = d->parent_data;
+ if (d->chip->irq_shutdown)
+ d->chip->irq_shutdown(d);
+}
+
+static void pnv_msi_mask(struct irq_data *d)
+{
+ pci_msi_mask_irq(d);
+ irq_chip_mask_parent(d);
+}
+
+static void pnv_msi_unmask(struct irq_data *d)
+{
+ pci_msi_unmask_irq(d);
+ irq_chip_unmask_parent(d);
+}
+
+static struct irq_chip pnv_pci_msi_irq_chip = {
+ .name = "PNV-PCI-MSI",
+ .irq_shutdown = pnv_msi_shutdown,
+ .irq_mask = pnv_msi_mask,
+ .irq_unmask = pnv_msi_unmask,
+ .irq_eoi = irq_chip_eoi_parent,
+};
+
+static struct msi_domain_info pnv_msi_domain_info = {
+ .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+ MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX),
+ .ops = &pnv_pci_msi_domain_ops,
+ .chip = &pnv_pci_msi_irq_chip,
+};
+
+static void pnv_msi_compose_msg(struct irq_data *d, struct msi_msg *msg)
+{
+ struct msi_desc *entry = irq_data_get_msi_desc(d);
+ struct pci_dev *pdev = msi_desc_to_pci_dev(entry);
+ struct pci_controller *hose = irq_data_get_irq_chip_data(d);
+ struct pnv_phb *phb = hose->private_data;
+ int rc;
+
+ rc = __pnv_pci_ioda_msi_setup(phb, pdev, d->hwirq,
+ entry->msi_attrib.is_64, msg);
+ if (rc)
+ dev_err(&pdev->dev, "Failed to setup %s-bit MSI #%ld : %d\n",
+ entry->msi_attrib.is_64 ? "64" : "32", d->hwirq, rc);
+}
+
+/*
+ * The IRQ data is mapped in the MSI domain in which HW IRQ numbers
+ * correspond to vector numbers.
+ */
+static void pnv_msi_eoi(struct irq_data *d)
+{
+ struct pci_controller *hose = irq_data_get_irq_chip_data(d);
+ struct pnv_phb *phb = hose->private_data;
+
+ if (phb->model == PNV_PHB_MODEL_PHB3) {
+ /*
+ * The EOI OPAL call takes an OPAL HW IRQ number but
+ * since it is translated into a vector number in
+ * OPAL, use that directly.
+ */
+ WARN_ON_ONCE(opal_pci_msi_eoi(phb->opal_id, d->hwirq));
+ }
+
+ irq_chip_eoi_parent(d);
+}
+
+static struct irq_chip pnv_msi_irq_chip = {
+ .name = "PNV-MSI",
+ .irq_shutdown = pnv_msi_shutdown,
+ .irq_mask = irq_chip_mask_parent,
+ .irq_unmask = irq_chip_unmask_parent,
+ .irq_eoi = pnv_msi_eoi,
+ .irq_set_affinity = irq_chip_set_affinity_parent,
+ .irq_compose_msi_msg = pnv_msi_compose_msg,
+};
+
+static int pnv_irq_parent_domain_alloc(struct irq_domain *domain,
+ unsigned int virq, int hwirq)
+{
+ struct irq_fwspec parent_fwspec;
+ int ret;
+
+ parent_fwspec.fwnode = domain->parent->fwnode;
+ parent_fwspec.param_count = 2;
+ parent_fwspec.param[0] = hwirq;
+ parent_fwspec.param[1] = IRQ_TYPE_EDGE_RISING;
+
+ ret = irq_domain_alloc_irqs_parent(domain, virq, 1, &parent_fwspec);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int pnv_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
+{
+ struct pci_controller *hose = domain->host_data;
+ struct pnv_phb *phb = hose->private_data;
+ msi_alloc_info_t *info = arg;
+ struct pci_dev *pdev = msi_desc_to_pci_dev(info->desc);
+ int hwirq;
+ int i, ret;
+
+ hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, nr_irqs);
+ if (hwirq < 0) {
+ dev_warn(&pdev->dev, "failed to find a free MSI\n");
+ return -ENOSPC;
+ }
+
+ dev_dbg(&pdev->dev, "%s bridge %pOF %d/%x #%d\n", __func__,
+ hose->dn, virq, hwirq, nr_irqs);
+
+ for (i = 0; i < nr_irqs; i++) {
+ ret = pnv_irq_parent_domain_alloc(domain, virq + i,
+ phb->msi_base + hwirq + i);
+ if (ret)
+ goto out;
+
+ irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
+ &pnv_msi_irq_chip, hose);
+ }
+
+ return 0;
+
+out:
+ irq_domain_free_irqs_parent(domain, virq, i - 1);
+ msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, nr_irqs);
+ return ret;
+}
+
+static void pnv_irq_domain_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs)
+{
+ struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+ struct pci_controller *hose = irq_data_get_irq_chip_data(d);
+ struct pnv_phb *phb = hose->private_data;
+
+ pr_debug("%s bridge %pOF %d/%lx #%d\n", __func__, hose->dn,
+ virq, d->hwirq, nr_irqs);
+
+ msi_bitmap_free_hwirqs(&phb->msi_bmp, d->hwirq, nr_irqs);
+ /* XIVE domain is cleared through ->msi_free() */
+}
+
+static const struct irq_domain_ops pnv_irq_domain_ops = {
+ .alloc = pnv_irq_domain_alloc,
+ .free = pnv_irq_domain_free,
+};
+
+static int pnv_msi_allocate_domains(struct pci_controller *hose, unsigned int count)
+{
+ struct pnv_phb *phb = hose->private_data;
+ struct irq_domain *parent = irq_get_default_host();
+
+ hose->fwnode = irq_domain_alloc_named_id_fwnode("PNV-MSI", phb->opal_id);
+ if (!hose->fwnode)
+ return -ENOMEM;
- pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d),"
- " address=%x_%08x data=%x PE# %x\n",
- pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num,
- msg->address_hi, msg->address_lo, data, pe->pe_number);
+ hose->dev_domain = irq_domain_create_hierarchy(parent, 0, count,
+ hose->fwnode,
+ &pnv_irq_domain_ops, hose);
+ if (!hose->dev_domain) {
+ pr_err("PCI: failed to create IRQ domain bridge %pOF (domain %d)\n",
+ hose->dn, hose->global_number);
+ irq_domain_free_fwnode(hose->fwnode);
+ return -ENOMEM;
+ }
+
+ hose->msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(hose->dn),
+ &pnv_msi_domain_info,
+ hose->dev_domain);
+ if (!hose->msi_domain) {
+ pr_err("PCI: failed to create MSI IRQ domain bridge %pOF (domain %d)\n",
+ hose->dn, hose->global_number);
+ irq_domain_free_fwnode(hose->fwnode);
+ irq_domain_remove(hose->dev_domain);
+ return -ENOMEM;
+ }
return 0;
}
@@ -2102,10 +2318,10 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
return;
}
- phb->msi_setup = pnv_pci_ioda_msi_setup;
- phb->msi32_support = 1;
pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
count, phb->msi_base);
+
+ pnv_msi_allocate_domains(phb->hose, count);
}
static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe,
@@ -2259,7 +2475,7 @@ static void pnv_pci_ioda_create_dbgfs(void)
phb = hose->private_data;
sprintf(name, "PCI%04x", hose->global_number);
- phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root);
+ phb->dbgfs = debugfs_create_dir(name, arch_debugfs_dir);
debugfs_create_file_unsafe("dump_diag_regs", 0200, phb->dbgfs,
phb, &pnv_pci_diag_data_fops);
@@ -2709,8 +2925,6 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
.dma_dev_setup = pnv_pci_ioda_dma_dev_setup,
.dma_bus_setup = pnv_pci_ioda_dma_bus_setup,
.iommu_bypass_supported = pnv_pci_ioda_iommu_bypass_supported,
- .setup_msi_irqs = pnv_setup_msi_irqs,
- .teardown_msi_irqs = pnv_teardown_msi_irqs,
.enable_device_hook = pnv_pci_enable_device_hook,
.release_device = pnv_pci_release_device,
.window_alignment = pnv_pci_window_alignment,
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 6bb3c52633fb..9a8391b983d1 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -160,73 +160,6 @@ exit:
}
EXPORT_SYMBOL_GPL(pnv_pci_set_power_state);
-int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
-{
- struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
- struct msi_desc *entry;
- struct msi_msg msg;
- int hwirq;
- unsigned int virq;
- int rc;
-
- if (WARN_ON(!phb) || !phb->msi_bmp.bitmap)
- return -ENODEV;
-
- if (pdev->no_64bit_msi && !phb->msi32_support)
- return -ENODEV;
-
- for_each_pci_msi_entry(entry, pdev) {
- if (!entry->msi_attrib.is_64 && !phb->msi32_support) {
- pr_warn("%s: Supports only 64-bit MSIs\n",
- pci_name(pdev));
- return -ENXIO;
- }
- hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, 1);
- if (hwirq < 0) {
- pr_warn("%s: Failed to find a free MSI\n",
- pci_name(pdev));
- return -ENOSPC;
- }
- virq = irq_create_mapping(NULL, phb->msi_base + hwirq);
- if (!virq) {
- pr_warn("%s: Failed to map MSI to linux irq\n",
- pci_name(pdev));
- msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, 1);
- return -ENOMEM;
- }
- rc = phb->msi_setup(phb, pdev, phb->msi_base + hwirq,
- virq, entry->msi_attrib.is_64, &msg);
- if (rc) {
- pr_warn("%s: Failed to setup MSI\n", pci_name(pdev));
- irq_dispose_mapping(virq);
- msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, 1);
- return rc;
- }
- irq_set_msi_desc(virq, entry);
- pci_write_msi_msg(virq, &msg);
- }
- return 0;
-}
-
-void pnv_teardown_msi_irqs(struct pci_dev *pdev)
-{
- struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
- struct msi_desc *entry;
- irq_hw_number_t hwirq;
-
- if (WARN_ON(!phb))
- return;
-
- for_each_pci_msi_entry(entry, pdev) {
- if (!entry->irq)
- continue;
- hwirq = virq_to_hw(entry->irq);
- irq_set_msi_desc(entry->irq, NULL);
- irq_dispose_mapping(entry->irq);
- msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, 1);
- }
-}
-
/* Nicely print the contents of the PE State Tables (PEST). */
static void pnv_pci_dump_pest(__be64 pestA[], __be64 pestB[], int pest_size)
{
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index c8d4f222a86f..966a9eb64339 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -123,11 +123,7 @@ struct pnv_phb {
#endif
unsigned int msi_base;
- unsigned int msi32_support;
struct msi_bitmap msi_bmp;
- int (*msi_setup)(struct pnv_phb *phb, struct pci_dev *dev,
- unsigned int hwirq, unsigned int virq,
- unsigned int is_64, struct msi_msg *msg);
int (*init_m64)(struct pnv_phb *phb);
int (*get_pe_state)(struct pnv_phb *phb, int pe_no);
void (*freeze_pe)(struct pnv_phb *phb, int pe_no);
@@ -289,8 +285,6 @@ extern void pnv_pci_init_npu2_opencapi_phb(struct device_node *np);
extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option);
-extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type);
-extern void pnv_teardown_msi_irqs(struct pci_dev *pdev);
extern struct pnv_ioda_pe *pnv_pci_bdfn_to_pe(struct pnv_phb *phb, u16 bdfn);
extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev);
extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq);
diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c
index 7ddc7ec6a7c0..ef710a715903 100644
--- a/arch/powerpc/platforms/ps3/htab.c
+++ b/arch/powerpc/platforms/ps3/htab.c
@@ -169,7 +169,8 @@ static void ps3_hpte_invalidate(unsigned long slot, unsigned long vpn,
spin_unlock_irqrestore(&ps3_htab_lock, flags);
}
-static void ps3_hpte_clear(void)
+/* Called during kexec sequence with MMU off */
+static notrace void ps3_hpte_clear(void)
{
unsigned long hpte_count = (1UL << ppc64_pft_size) >> 4;
u64 i;
diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c
index a81eac35d900..9c44f335c0b9 100644
--- a/arch/powerpc/platforms/ps3/mm.c
+++ b/arch/powerpc/platforms/ps3/mm.c
@@ -195,9 +195,11 @@ fail:
/**
* ps3_mm_vas_destroy -
+ *
+ * called during kexec sequence with MMU off.
*/
-void ps3_mm_vas_destroy(void)
+notrace void ps3_mm_vas_destroy(void)
{
int result;
@@ -1243,9 +1245,11 @@ void __init ps3_mm_init(void)
/**
* ps3_mm_shutdown - final cleanup of address space
+ *
+ * called during kexec sequence with MMU off.
*/
-void ps3_mm_shutdown(void)
+notrace void ps3_mm_shutdown(void)
{
ps3_mm_region_destroy(&map.r1);
}
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index 982f069e4c31..352af5b14a0f 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -11,10 +11,10 @@
#include <linux/spinlock.h>
#include <asm/smp.h>
#include <linux/uaccess.h>
+#include <linux/debugfs.h>
#include <asm/firmware.h>
#include <asm/dtl.h>
#include <asm/lppaca.h>
-#include <asm/debugfs.h>
#include <asm/plpar_wrappers.h>
#include <asm/machdep.h>
@@ -338,7 +338,7 @@ static int dtl_init(void)
/* set up common debugfs structure */
- dtl_dir = debugfs_create_dir("dtl", powerpc_debugfs_root);
+ dtl_dir = debugfs_create_dir("dtl", arch_debugfs_dir);
debugfs_create_x8("dtl_event_mask", 0600, dtl_dir, &dtl_event_mask);
debugfs_create_u32("dtl_buf_entries", 0400, dtl_dir, &dtl_buf_entries);
diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c
index 4c7b7f5a2ebc..f162156b7b68 100644
--- a/arch/powerpc/platforms/pseries/firmware.c
+++ b/arch/powerpc/platforms/pseries/firmware.c
@@ -119,10 +119,11 @@ struct vec5_fw_feature {
static __initdata struct vec5_fw_feature
vec5_fw_features_table[] = {
- {FW_FEATURE_TYPE1_AFFINITY, OV5_TYPE1_AFFINITY},
+ {FW_FEATURE_FORM1_AFFINITY, OV5_FORM1_AFFINITY},
{FW_FEATURE_PRRN, OV5_PRRN},
{FW_FEATURE_DRMEM_V2, OV5_DRMEM_V2},
{FW_FEATURE_DRC_INFO, OV5_DRC_INFO},
+ {FW_FEATURE_FORM2_AFFINITY, OV5_FORM2_AFFINITY},
};
static void __init fw_vec5_feature_init(const char *vec5, unsigned long len)
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 7e970f81d8ff..d646c22e94ab 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -39,6 +39,12 @@
/* This version can't take the spinlock, because it never returns */
static int rtas_stop_self_token = RTAS_UNKNOWN_SERVICE;
+/*
+ * Record the CPU ids used on each nodes.
+ * Protected by cpu_add_remove_lock.
+ */
+static cpumask_var_t node_recorded_ids_map[MAX_NUMNODES];
+
static void rtas_stop_self(void)
{
static struct rtas_args args;
@@ -139,72 +145,148 @@ static void pseries_cpu_die(unsigned int cpu)
paca_ptrs[cpu]->cpu_start = 0;
}
+/**
+ * find_cpu_id_range - found a linear ranger of @nthreads free CPU ids.
+ * @nthreads : the number of threads (cpu ids)
+ * @assigned_node : the node it belongs to or NUMA_NO_NODE if free ids from any
+ * node can be peek.
+ * @cpu_mask: the returned CPU mask.
+ *
+ * Returns 0 on success.
+ */
+static int find_cpu_id_range(unsigned int nthreads, int assigned_node,
+ cpumask_var_t *cpu_mask)
+{
+ cpumask_var_t candidate_mask;
+ unsigned int cpu, node;
+ int rc = -ENOSPC;
+
+ if (!zalloc_cpumask_var(&candidate_mask, GFP_KERNEL))
+ return -ENOMEM;
+
+ cpumask_clear(*cpu_mask);
+ for (cpu = 0; cpu < nthreads; cpu++)
+ cpumask_set_cpu(cpu, *cpu_mask);
+
+ BUG_ON(!cpumask_subset(cpu_present_mask, cpu_possible_mask));
+
+ /* Get a bitmap of unoccupied slots. */
+ cpumask_xor(candidate_mask, cpu_possible_mask, cpu_present_mask);
+
+ if (assigned_node != NUMA_NO_NODE) {
+ /*
+ * Remove free ids previously assigned on the other nodes. We
+ * can walk only online nodes because once a node became online
+ * it is not turned offlined back.
+ */
+ for_each_online_node(node) {
+ if (node == assigned_node)
+ continue;
+ cpumask_andnot(candidate_mask, candidate_mask,
+ node_recorded_ids_map[node]);
+ }
+ }
+
+ if (cpumask_empty(candidate_mask))
+ goto out;
+
+ while (!cpumask_empty(*cpu_mask)) {
+ if (cpumask_subset(*cpu_mask, candidate_mask))
+ /* Found a range where we can insert the new cpu(s) */
+ break;
+ cpumask_shift_left(*cpu_mask, *cpu_mask, nthreads);
+ }
+
+ if (!cpumask_empty(*cpu_mask))
+ rc = 0;
+
+out:
+ free_cpumask_var(candidate_mask);
+ return rc;
+}
+
/*
* Update cpu_present_mask and paca(s) for a new cpu node. The wrinkle
- * here is that a cpu device node may represent up to two logical cpus
+ * here is that a cpu device node may represent multiple logical cpus
* in the SMT case. We must honor the assumption in other code that
* the logical ids for sibling SMT threads x and y are adjacent, such
* that x^1 == y and y^1 == x.
*/
static int pseries_add_processor(struct device_node *np)
{
- unsigned int cpu;
- cpumask_var_t candidate_mask, tmp;
- int err = -ENOSPC, len, nthreads, i;
+ int len, nthreads, node, cpu, assigned_node;
+ int rc = 0;
+ cpumask_var_t cpu_mask;
const __be32 *intserv;
intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len);
if (!intserv)
return 0;
- zalloc_cpumask_var(&candidate_mask, GFP_KERNEL);
- zalloc_cpumask_var(&tmp, GFP_KERNEL);
-
nthreads = len / sizeof(u32);
- for (i = 0; i < nthreads; i++)
- cpumask_set_cpu(i, tmp);
- cpu_maps_update_begin();
+ if (!alloc_cpumask_var(&cpu_mask, GFP_KERNEL))
+ return -ENOMEM;
- BUG_ON(!cpumask_subset(cpu_present_mask, cpu_possible_mask));
+ /*
+ * Fetch from the DT nodes read by dlpar_configure_connector() the NUMA
+ * node id the added CPU belongs to.
+ */
+ node = of_node_to_nid(np);
+ if (node < 0 || !node_possible(node))
+ node = first_online_node;
- /* Get a bitmap of unoccupied slots. */
- cpumask_xor(candidate_mask, cpu_possible_mask, cpu_present_mask);
- if (cpumask_empty(candidate_mask)) {
- /* If we get here, it most likely means that NR_CPUS is
- * less than the partition's max processors setting.
+ BUG_ON(node == NUMA_NO_NODE);
+ assigned_node = node;
+
+ cpu_maps_update_begin();
+
+ rc = find_cpu_id_range(nthreads, node, &cpu_mask);
+ if (rc && nr_node_ids > 1) {
+ /*
+ * Try again, considering the free CPU ids from the other node.
*/
- printk(KERN_ERR "Cannot add cpu %pOF; this system configuration"
- " supports %d logical cpus.\n", np,
- num_possible_cpus());
- goto out_unlock;
+ node = NUMA_NO_NODE;
+ rc = find_cpu_id_range(nthreads, NUMA_NO_NODE, &cpu_mask);
}
- while (!cpumask_empty(tmp))
- if (cpumask_subset(tmp, candidate_mask))
- /* Found a range where we can insert the new cpu(s) */
- break;
- else
- cpumask_shift_left(tmp, tmp, nthreads);
-
- if (cpumask_empty(tmp)) {
- printk(KERN_ERR "Unable to find space in cpu_present_mask for"
- " processor %pOFn with %d thread(s)\n", np,
- nthreads);
- goto out_unlock;
+ if (rc) {
+ pr_err("Cannot add cpu %pOF; this system configuration"
+ " supports %d logical cpus.\n", np, num_possible_cpus());
+ goto out;
}
- for_each_cpu(cpu, tmp) {
+ for_each_cpu(cpu, cpu_mask) {
BUG_ON(cpu_present(cpu));
set_cpu_present(cpu, true);
set_hard_smp_processor_id(cpu, be32_to_cpu(*intserv++));
}
- err = 0;
-out_unlock:
+
+ /* Record the newly used CPU ids for the associate node. */
+ cpumask_or(node_recorded_ids_map[assigned_node],
+ node_recorded_ids_map[assigned_node], cpu_mask);
+
+ /*
+ * If node is set to NUMA_NO_NODE, CPU ids have be reused from
+ * another node, remove them from its mask.
+ */
+ if (node == NUMA_NO_NODE) {
+ cpu = cpumask_first(cpu_mask);
+ pr_warn("Reusing free CPU ids %d-%d from another node\n",
+ cpu, cpu + nthreads - 1);
+ for_each_online_node(node) {
+ if (node == assigned_node)
+ continue;
+ cpumask_andnot(node_recorded_ids_map[node],
+ node_recorded_ids_map[node],
+ cpu_mask);
+ }
+ }
+
+out:
cpu_maps_update_done();
- free_cpumask_var(candidate_mask);
- free_cpumask_var(tmp);
- return err;
+ free_cpumask_var(cpu_mask);
+ return rc;
}
/*
@@ -498,6 +580,8 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
return saved_rc;
}
+ update_numa_distance(dn);
+
rc = dlpar_online_cpu(dn);
if (rc) {
saved_rc = rc;
@@ -908,6 +992,7 @@ static struct notifier_block pseries_smp_nb = {
static int __init pseries_cpu_hotplug_init(void)
{
int qcss_tok;
+ unsigned int node;
#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
ppc_md.cpu_probe = dlpar_cpu_probe;
@@ -929,8 +1014,18 @@ static int __init pseries_cpu_hotplug_init(void)
smp_ops->cpu_die = pseries_cpu_die;
/* Processors can be added/removed only on LPAR */
- if (firmware_has_feature(FW_FEATURE_LPAR))
+ if (firmware_has_feature(FW_FEATURE_LPAR)) {
+ for_each_node(node) {
+ alloc_bootmem_cpumask_var(&node_recorded_ids_map[node]);
+
+ /* Record ids of CPU added at boot time */
+ cpumask_or(node_recorded_ids_map[node],
+ node_recorded_ids_map[node],
+ cpumask_of_node(node));
+ }
+
of_reconfig_notifier_register(&pseries_smp_nb);
+ }
return 0;
}
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index d4f28ee4d5dc..44246ba59768 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -180,6 +180,8 @@ static int update_lmb_associativity_index(struct drmem_lmb *lmb)
return -ENODEV;
}
+ update_numa_distance(lmb_node);
+
dr_node = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
if (!dr_node) {
dlpar_free_cc_nodes(lmb_node);
@@ -977,6 +979,10 @@ static int pseries_memory_notifier(struct notifier_block *nb,
case OF_RECONFIG_DETACH_NODE:
err = pseries_remove_mem_node(rd->dn);
break;
+ case OF_RECONFIG_UPDATE_PROPERTY:
+ if (!strcmp(rd->dn->name,
+ "ibm,dynamic-reconfiguration-memory"))
+ drmem_update_lmbs(rd->prop);
}
return notifier_from_errno(err);
}
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 0c55b991f665..dab5c56ffd0e 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -53,28 +53,31 @@ enum {
DDW_EXT_QUERY_OUT_SIZE = 2
};
-static struct iommu_table_group *iommu_pseries_alloc_group(int node)
+static struct iommu_table *iommu_pseries_alloc_table(int node)
{
- struct iommu_table_group *table_group;
struct iommu_table *tbl;
- table_group = kzalloc_node(sizeof(struct iommu_table_group), GFP_KERNEL,
- node);
- if (!table_group)
- return NULL;
-
tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, node);
if (!tbl)
- goto free_group;
+ return NULL;
INIT_LIST_HEAD_RCU(&tbl->it_group_list);
kref_init(&tbl->it_kref);
+ return tbl;
+}
+
+static struct iommu_table_group *iommu_pseries_alloc_group(int node)
+{
+ struct iommu_table_group *table_group;
- table_group->tables[0] = tbl;
+ table_group = kzalloc_node(sizeof(*table_group), GFP_KERNEL, node);
+ if (!table_group)
+ return NULL;
- return table_group;
+ table_group->tables[0] = iommu_pseries_alloc_table(node);
+ if (table_group->tables[0])
+ return table_group;
-free_group:
kfree(table_group);
return NULL;
}
@@ -107,6 +110,8 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
u64 proto_tce;
__be64 *tcep;
u64 rpn;
+ const unsigned long tceshift = tbl->it_page_shift;
+ const unsigned long pagesize = IOMMU_PAGE_SIZE(tbl);
proto_tce = TCE_PCI_READ; // Read allowed
@@ -117,10 +122,10 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
while (npages--) {
/* can't move this out since we might cross MEMBLOCK boundary */
- rpn = __pa(uaddr) >> TCE_SHIFT;
- *tcep = cpu_to_be64(proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT);
+ rpn = __pa(uaddr) >> tceshift;
+ *tcep = cpu_to_be64(proto_tce | rpn << tceshift);
- uaddr += TCE_PAGE_SIZE;
+ uaddr += pagesize;
tcep++;
}
return 0;
@@ -146,7 +151,7 @@ static unsigned long tce_get_pseries(struct iommu_table *tbl, long index)
return be64_to_cpu(*tcep);
}
-static void tce_free_pSeriesLP(unsigned long liobn, long, long);
+static void tce_free_pSeriesLP(unsigned long liobn, long, long, long);
static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long);
static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
@@ -166,12 +171,12 @@ static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
proto_tce |= TCE_PCI_WRITE;
while (npages--) {
- tce = proto_tce | (rpn & TCE_RPN_MASK) << tceshift;
+ tce = proto_tce | rpn << tceshift;
rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, tce);
if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
ret = (int)rc;
- tce_free_pSeriesLP(liobn, tcenum_start,
+ tce_free_pSeriesLP(liobn, tcenum_start, tceshift,
(npages_start - (npages + 1)));
break;
}
@@ -205,10 +210,11 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
long tcenum_start = tcenum, npages_start = npages;
int ret = 0;
unsigned long flags;
+ const unsigned long tceshift = tbl->it_page_shift;
if ((npages == 1) || !firmware_has_feature(FW_FEATURE_PUT_TCE_IND)) {
return tce_build_pSeriesLP(tbl->it_index, tcenum,
- tbl->it_page_shift, npages, uaddr,
+ tceshift, npages, uaddr,
direction, attrs);
}
@@ -225,13 +231,13 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
if (!tcep) {
local_irq_restore(flags);
return tce_build_pSeriesLP(tbl->it_index, tcenum,
- tbl->it_page_shift,
+ tceshift,
npages, uaddr, direction, attrs);
}
__this_cpu_write(tce_page, tcep);
}
- rpn = __pa(uaddr) >> TCE_SHIFT;
+ rpn = __pa(uaddr) >> tceshift;
proto_tce = TCE_PCI_READ;
if (direction != DMA_TO_DEVICE)
proto_tce |= TCE_PCI_WRITE;
@@ -245,12 +251,12 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
limit = min_t(long, npages, 4096/TCE_ENTRY_SIZE);
for (l = 0; l < limit; l++) {
- tcep[l] = cpu_to_be64(proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT);
+ tcep[l] = cpu_to_be64(proto_tce | rpn << tceshift);
rpn++;
}
rc = plpar_tce_put_indirect((u64)tbl->it_index,
- (u64)tcenum << 12,
+ (u64)tcenum << tceshift,
(u64)__pa(tcep),
limit);
@@ -277,12 +283,13 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
return ret;
}
-static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long npages)
+static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
+ long npages)
{
u64 rc;
while (npages--) {
- rc = plpar_tce_put((u64)liobn, (u64)tcenum << 12, 0);
+ rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, 0);
if (rc && printk_ratelimit()) {
printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
@@ -301,9 +308,11 @@ static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long n
u64 rc;
if (!firmware_has_feature(FW_FEATURE_STUFF_TCE))
- return tce_free_pSeriesLP(tbl->it_index, tcenum, npages);
+ return tce_free_pSeriesLP(tbl->it_index, tcenum,
+ tbl->it_page_shift, npages);
- rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages);
+ rc = plpar_tce_stuff((u64)tbl->it_index,
+ (u64)tcenum << tbl->it_page_shift, 0, npages);
if (rc && printk_ratelimit()) {
printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n");
@@ -319,7 +328,8 @@ static unsigned long tce_get_pSeriesLP(struct iommu_table *tbl, long tcenum)
u64 rc;
unsigned long tce_ret;
- rc = plpar_tce_get((u64)tbl->it_index, (u64)tcenum << 12, &tce_ret);
+ rc = plpar_tce_get((u64)tbl->it_index,
+ (u64)tcenum << tbl->it_page_shift, &tce_ret);
if (rc && printk_ratelimit()) {
printk("tce_get_pSeriesLP: plpar_tce_get failed. rc=%lld\n", rc);
@@ -339,7 +349,7 @@ struct dynamic_dma_window_prop {
__be32 window_shift; /* ilog2(tce_window_size) */
};
-struct direct_window {
+struct dma_win {
struct device_node *device;
const struct dynamic_dma_window_prop *prop;
struct list_head list;
@@ -359,12 +369,13 @@ struct ddw_create_response {
u32 addr_lo;
};
-static LIST_HEAD(direct_window_list);
+static LIST_HEAD(dma_win_list);
/* prevents races between memory on/offline and window creation */
-static DEFINE_SPINLOCK(direct_window_list_lock);
+static DEFINE_SPINLOCK(dma_win_list_lock);
/* protects initializing window twice for same device */
-static DEFINE_MUTEX(direct_window_init_mutex);
+static DEFINE_MUTEX(dma_win_init_mutex);
#define DIRECT64_PROPNAME "linux,direct64-ddr-window-info"
+#define DMA64_PROPNAME "linux,dma64-ddr-window-info"
static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn,
unsigned long num_pfn, const void *arg)
@@ -491,6 +502,24 @@ static int tce_setrange_multi_pSeriesLP_walk(unsigned long start_pfn,
return tce_setrange_multi_pSeriesLP(start_pfn, num_pfn, arg);
}
+static void iommu_table_setparms_common(struct iommu_table *tbl, unsigned long busno,
+ unsigned long liobn, unsigned long win_addr,
+ unsigned long window_size, unsigned long page_shift,
+ void *base, struct iommu_table_ops *table_ops)
+{
+ tbl->it_busno = busno;
+ tbl->it_index = liobn;
+ tbl->it_offset = win_addr >> page_shift;
+ tbl->it_size = window_size >> page_shift;
+ tbl->it_page_shift = page_shift;
+ tbl->it_base = (unsigned long)base;
+ tbl->it_blocksize = 16;
+ tbl->it_type = TCE_PCI;
+ tbl->it_ops = table_ops;
+}
+
+struct iommu_table_ops iommu_table_pseries_ops;
+
static void iommu_table_setparms(struct pci_controller *phb,
struct device_node *dn,
struct iommu_table *tbl)
@@ -499,8 +528,13 @@ static void iommu_table_setparms(struct pci_controller *phb,
const unsigned long *basep;
const u32 *sizep;
- node = phb->dn;
+ /* Test if we are going over 2GB of DMA space */
+ if (phb->dma_window_base_cur + phb->dma_window_size > SZ_2G) {
+ udbg_printf("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
+ panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
+ }
+ node = phb->dn;
basep = of_get_property(node, "linux,tce-base", NULL);
sizep = of_get_property(node, "linux,tce-size", NULL);
if (basep == NULL || sizep == NULL) {
@@ -509,33 +543,18 @@ static void iommu_table_setparms(struct pci_controller *phb,
return;
}
- tbl->it_base = (unsigned long)__va(*basep);
+ iommu_table_setparms_common(tbl, phb->bus->number, 0, phb->dma_window_base_cur,
+ phb->dma_window_size, IOMMU_PAGE_SHIFT_4K,
+ __va(*basep), &iommu_table_pseries_ops);
if (!is_kdump_kernel())
memset((void *)tbl->it_base, 0, *sizep);
- tbl->it_busno = phb->bus->number;
- tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K;
-
- /* Units of tce entries */
- tbl->it_offset = phb->dma_window_base_cur >> tbl->it_page_shift;
-
- /* Test if we are going over 2GB of DMA space */
- if (phb->dma_window_base_cur + phb->dma_window_size > 0x80000000ul) {
- udbg_printf("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
- panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
- }
-
phb->dma_window_base_cur += phb->dma_window_size;
-
- /* Set the tce table size - measured in entries */
- tbl->it_size = phb->dma_window_size >> tbl->it_page_shift;
-
- tbl->it_index = 0;
- tbl->it_blocksize = 16;
- tbl->it_type = TCE_PCI;
}
+struct iommu_table_ops iommu_table_lpar_multi_ops;
+
/*
* iommu_table_setparms_lpar
*
@@ -547,17 +566,13 @@ static void iommu_table_setparms_lpar(struct pci_controller *phb,
struct iommu_table_group *table_group,
const __be32 *dma_window)
{
- unsigned long offset, size;
+ unsigned long offset, size, liobn;
- of_parse_dma_window(dn, dma_window, &tbl->it_index, &offset, &size);
+ of_parse_dma_window(dn, dma_window, &liobn, &offset, &size);
+
+ iommu_table_setparms_common(tbl, phb->bus->number, liobn, offset, size, IOMMU_PAGE_SHIFT_4K, NULL,
+ &iommu_table_lpar_multi_ops);
- tbl->it_busno = phb->bus->number;
- tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K;
- tbl->it_base = 0;
- tbl->it_blocksize = 16;
- tbl->it_type = TCE_PCI;
- tbl->it_offset = offset >> tbl->it_page_shift;
- tbl->it_size = size >> tbl->it_page_shift;
table_group->tce32_start = offset;
table_group->tce32_size = size;
@@ -637,7 +652,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
tbl = pci->table_group->tables[0];
iommu_table_setparms(pci->phb, dn, tbl);
- tbl->it_ops = &iommu_table_pseries_ops;
+
if (!iommu_init_table(tbl, pci->phb->node, 0, 0))
panic("Failed to initialize iommu table");
@@ -698,7 +713,10 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n",
dn);
- /* Find nearest ibm,dma-window, walking up the device tree */
+ /*
+ * Find nearest ibm,dma-window (default DMA window), walking up the
+ * device tree
+ */
for (pdn = dn; pdn != NULL; pdn = pdn->parent) {
dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
if (dma_window != NULL)
@@ -720,7 +738,7 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
tbl = ppci->table_group->tables[0];
iommu_table_setparms_lpar(ppci->phb, pdn, tbl,
ppci->table_group, dma_window);
- tbl->it_ops = &iommu_table_lpar_multi_ops;
+
if (!iommu_init_table(tbl, ppci->phb->node, 0, 0))
panic("Failed to initialize iommu table");
iommu_register_group(ppci->table_group,
@@ -750,7 +768,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
PCI_DN(dn)->table_group = iommu_pseries_alloc_group(phb->node);
tbl = PCI_DN(dn)->table_group->tables[0];
iommu_table_setparms(phb, dn, tbl);
- tbl->it_ops = &iommu_table_pseries_ops;
+
if (!iommu_init_table(tbl, phb->node, 0, 0))
panic("Failed to initialize iommu table");
@@ -785,17 +803,10 @@ static int __init disable_ddw_setup(char *str)
early_param("disable_ddw", disable_ddw_setup);
-static void remove_dma_window(struct device_node *np, u32 *ddw_avail,
- struct property *win)
+static void clean_dma_window(struct device_node *np, struct dynamic_dma_window_prop *dwp)
{
- struct dynamic_dma_window_prop *dwp;
- u64 liobn;
int ret;
- dwp = win->value;
- liobn = (u64)be32_to_cpu(dwp->liobn);
-
- /* clear the whole window, note the arg is in kernel pages */
ret = tce_clearrange_multi_pSeriesLP(0,
1ULL << (be32_to_cpu(dwp->window_shift) - PAGE_SHIFT), dwp);
if (ret)
@@ -804,94 +815,136 @@ static void remove_dma_window(struct device_node *np, u32 *ddw_avail,
else
pr_debug("%pOF successfully cleared tces in window.\n",
np);
+}
+
+/*
+ * Call only if DMA window is clean.
+ */
+static void __remove_dma_window(struct device_node *np, u32 *ddw_avail, u64 liobn)
+{
+ int ret;
ret = rtas_call(ddw_avail[DDW_REMOVE_PE_DMA_WIN], 1, 1, NULL, liobn);
if (ret)
- pr_warn("%pOF: failed to remove direct window: rtas returned "
+ pr_warn("%pOF: failed to remove DMA window: rtas returned "
"%d to ibm,remove-pe-dma-window(%x) %llx\n",
np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn);
else
- pr_debug("%pOF: successfully removed direct window: rtas returned "
+ pr_debug("%pOF: successfully removed DMA window: rtas returned "
"%d to ibm,remove-pe-dma-window(%x) %llx\n",
np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn);
}
-static void remove_ddw(struct device_node *np, bool remove_prop)
+static void remove_dma_window(struct device_node *np, u32 *ddw_avail,
+ struct property *win)
+{
+ struct dynamic_dma_window_prop *dwp;
+ u64 liobn;
+
+ dwp = win->value;
+ liobn = (u64)be32_to_cpu(dwp->liobn);
+
+ clean_dma_window(np, dwp);
+ __remove_dma_window(np, ddw_avail, liobn);
+}
+
+static int remove_ddw(struct device_node *np, bool remove_prop, const char *win_name)
{
struct property *win;
u32 ddw_avail[DDW_APPLICABLE_SIZE];
int ret = 0;
+ win = of_find_property(np, win_name, NULL);
+ if (!win)
+ return -EINVAL;
+
ret = of_property_read_u32_array(np, "ibm,ddw-applicable",
&ddw_avail[0], DDW_APPLICABLE_SIZE);
if (ret)
- return;
+ return 0;
- win = of_find_property(np, DIRECT64_PROPNAME, NULL);
- if (!win)
- return;
if (win->length >= sizeof(struct dynamic_dma_window_prop))
remove_dma_window(np, ddw_avail, win);
if (!remove_prop)
- return;
+ return 0;
ret = of_remove_property(np, win);
if (ret)
- pr_warn("%pOF: failed to remove direct window property: %d\n",
+ pr_warn("%pOF: failed to remove DMA window property: %d\n",
np, ret);
+ return 0;
}
-static u64 find_existing_ddw(struct device_node *pdn, int *window_shift)
+static bool find_existing_ddw(struct device_node *pdn, u64 *dma_addr, int *window_shift)
{
- struct direct_window *window;
- const struct dynamic_dma_window_prop *direct64;
- u64 dma_addr = 0;
+ struct dma_win *window;
+ const struct dynamic_dma_window_prop *dma64;
+ bool found = false;
- spin_lock(&direct_window_list_lock);
+ spin_lock(&dma_win_list_lock);
/* check if we already created a window and dupe that config if so */
- list_for_each_entry(window, &direct_window_list, list) {
+ list_for_each_entry(window, &dma_win_list, list) {
if (window->device == pdn) {
- direct64 = window->prop;
- dma_addr = be64_to_cpu(direct64->dma_base);
- *window_shift = be32_to_cpu(direct64->window_shift);
+ dma64 = window->prop;
+ *dma_addr = be64_to_cpu(dma64->dma_base);
+ *window_shift = be32_to_cpu(dma64->window_shift);
+ found = true;
break;
}
}
- spin_unlock(&direct_window_list_lock);
+ spin_unlock(&dma_win_list_lock);
- return dma_addr;
+ return found;
}
-static int find_existing_ddw_windows(void)
+static struct dma_win *ddw_list_new_entry(struct device_node *pdn,
+ const struct dynamic_dma_window_prop *dma64)
{
- int len;
- struct device_node *pdn;
- struct direct_window *window;
- const struct dynamic_dma_window_prop *direct64;
+ struct dma_win *window;
- if (!firmware_has_feature(FW_FEATURE_LPAR))
- return 0;
+ window = kzalloc(sizeof(*window), GFP_KERNEL);
+ if (!window)
+ return NULL;
- for_each_node_with_property(pdn, DIRECT64_PROPNAME) {
- direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len);
- if (!direct64)
- continue;
+ window->device = pdn;
+ window->prop = dma64;
- window = kzalloc(sizeof(*window), GFP_KERNEL);
- if (!window || len < sizeof(struct dynamic_dma_window_prop)) {
- kfree(window);
- remove_ddw(pdn, true);
+ return window;
+}
+
+static void find_existing_ddw_windows_named(const char *name)
+{
+ int len;
+ struct device_node *pdn;
+ struct dma_win *window;
+ const struct dynamic_dma_window_prop *dma64;
+
+ for_each_node_with_property(pdn, name) {
+ dma64 = of_get_property(pdn, name, &len);
+ if (!dma64 || len < sizeof(*dma64)) {
+ remove_ddw(pdn, true, name);
continue;
}
- window->device = pdn;
- window->prop = direct64;
- spin_lock(&direct_window_list_lock);
- list_add(&window->list, &direct_window_list);
- spin_unlock(&direct_window_list_lock);
+ window = ddw_list_new_entry(pdn, dma64);
+ if (!window)
+ break;
+
+ spin_lock(&dma_win_list_lock);
+ list_add(&window->list, &dma_win_list);
+ spin_unlock(&dma_win_list_lock);
}
+}
+
+static int find_existing_ddw_windows(void)
+{
+ if (!firmware_has_feature(FW_FEATURE_LPAR))
+ return 0;
+
+ find_existing_ddw_windows_named(DIRECT64_PROPNAME);
+ find_existing_ddw_windows_named(DMA64_PROPNAME);
return 0;
}
@@ -1130,6 +1183,35 @@ static int iommu_get_page_shift(u32 query_page_size)
return 0;
}
+static struct property *ddw_property_create(const char *propname, u32 liobn, u64 dma_addr,
+ u32 page_shift, u32 window_shift)
+{
+ struct dynamic_dma_window_prop *ddwprop;
+ struct property *win64;
+
+ win64 = kzalloc(sizeof(*win64), GFP_KERNEL);
+ if (!win64)
+ return NULL;
+
+ win64->name = kstrdup(propname, GFP_KERNEL);
+ ddwprop = kzalloc(sizeof(*ddwprop), GFP_KERNEL);
+ win64->value = ddwprop;
+ win64->length = sizeof(*ddwprop);
+ if (!win64->name || !win64->value) {
+ kfree(win64->name);
+ kfree(win64->value);
+ kfree(win64);
+ return NULL;
+ }
+
+ ddwprop->liobn = cpu_to_be32(liobn);
+ ddwprop->dma_base = cpu_to_be64(dma_addr);
+ ddwprop->tce_shift = cpu_to_be32(page_shift);
+ ddwprop->window_shift = cpu_to_be32(window_shift);
+
+ return win64;
+}
+
/*
* If the PE supports dynamic dma windows, and there is space for a table
* that can map all pages in a linear offset, then setup such a table,
@@ -1139,34 +1221,39 @@ static int iommu_get_page_shift(u32 query_page_size)
* pdn: the parent pe node with the ibm,dma_window property
* Future: also check if we can remap the base window for our base page size
*
- * returns the dma offset for use by the direct mapped DMA code.
+ * returns true if can map all pages (direct mapping), false otherwise..
*/
-static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
+static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
{
int len = 0, ret;
int max_ram_len = order_base_2(ddw_memory_hotplug_max());
struct ddw_query_response query;
struct ddw_create_response create;
int page_shift;
- u64 dma_addr;
+ u64 win_addr;
+ const char *win_name;
struct device_node *dn;
u32 ddw_avail[DDW_APPLICABLE_SIZE];
- struct direct_window *window;
+ struct dma_win *window;
struct property *win64;
- struct dynamic_dma_window_prop *ddwprop;
+ bool ddw_enabled = false;
struct failed_ddw_pdn *fpdn;
- bool default_win_removed = false;
+ bool default_win_removed = false, direct_mapping = false;
bool pmem_present;
+ struct pci_dn *pci = PCI_DN(pdn);
+ struct iommu_table *tbl = pci->table_group->tables[0];
dn = of_find_node_by_type(NULL, "ibm,pmemory");
pmem_present = dn != NULL;
of_node_put(dn);
- mutex_lock(&direct_window_init_mutex);
+ mutex_lock(&dma_win_init_mutex);
- dma_addr = find_existing_ddw(pdn, &len);
- if (dma_addr != 0)
+ if (find_existing_ddw(pdn, &dev->dev.archdata.dma_offset, &len)) {
+ direct_mapping = (len >= max_ram_len);
+ ddw_enabled = true;
goto out_unlock;
+ }
/*
* If we already went through this for a previous function of
@@ -1240,12 +1327,12 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
page_shift = iommu_get_page_shift(query.page_size);
if (!page_shift) {
- dev_dbg(&dev->dev, "no supported direct page size in mask %x",
- query.page_size);
+ dev_dbg(&dev->dev, "no supported page size in mask %x",
+ query.page_size);
goto out_failed;
}
- /* verify the window * number of ptes will map the partition */
- /* check largest block * page size > max memory hotplug addr */
+
+
/*
* The "ibm,pmemory" can appear anywhere in the address space.
* Assuming it is still backed by page structs, try MAX_PHYSMEM_BITS
@@ -1261,81 +1348,127 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
dev_info(&dev->dev, "Skipping ibm,pmemory");
}
+ /* check if the available block * number of ptes will map everything */
if (query.largest_available_block < (1ULL << (len - page_shift))) {
dev_dbg(&dev->dev,
"can't map partition max 0x%llx with %llu %llu-sized pages\n",
1ULL << len,
query.largest_available_block,
1ULL << page_shift);
- goto out_failed;
- }
- win64 = kzalloc(sizeof(struct property), GFP_KERNEL);
- if (!win64) {
- dev_info(&dev->dev,
- "couldn't allocate property for 64bit dma window\n");
- goto out_failed;
- }
- win64->name = kstrdup(DIRECT64_PROPNAME, GFP_KERNEL);
- win64->value = ddwprop = kmalloc(sizeof(*ddwprop), GFP_KERNEL);
- win64->length = sizeof(*ddwprop);
- if (!win64->name || !win64->value) {
- dev_info(&dev->dev,
- "couldn't allocate property name and value\n");
- goto out_free_prop;
+
+ /* DDW + IOMMU on single window may fail if there is any allocation */
+ if (default_win_removed && iommu_table_in_use(tbl)) {
+ dev_dbg(&dev->dev, "current IOMMU table in use, can't be replaced.\n");
+ goto out_failed;
+ }
+
+ len = order_base_2(query.largest_available_block << page_shift);
+ win_name = DMA64_PROPNAME;
+ } else {
+ direct_mapping = true;
+ win_name = DIRECT64_PROPNAME;
}
ret = create_ddw(dev, ddw_avail, &create, page_shift, len);
if (ret != 0)
- goto out_free_prop;
-
- ddwprop->liobn = cpu_to_be32(create.liobn);
- ddwprop->dma_base = cpu_to_be64(((u64)create.addr_hi << 32) |
- create.addr_lo);
- ddwprop->tce_shift = cpu_to_be32(page_shift);
- ddwprop->window_shift = cpu_to_be32(len);
+ goto out_failed;
dev_dbg(&dev->dev, "created tce table LIOBN 0x%x for %pOF\n",
create.liobn, dn);
- window = kzalloc(sizeof(*window), GFP_KERNEL);
- if (!window)
- goto out_clear_window;
+ win_addr = ((u64)create.addr_hi << 32) | create.addr_lo;
+ win64 = ddw_property_create(win_name, create.liobn, win_addr, page_shift, len);
- ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT,
- win64->value, tce_setrange_multi_pSeriesLP_walk);
- if (ret) {
- dev_info(&dev->dev, "failed to map direct window for %pOF: %d\n",
- dn, ret);
- goto out_free_window;
+ if (!win64) {
+ dev_info(&dev->dev,
+ "couldn't allocate property, property name, or value\n");
+ goto out_remove_win;
}
ret = of_add_property(pdn, win64);
if (ret) {
- dev_err(&dev->dev, "unable to add dma window property for %pOF: %d",
- pdn, ret);
- goto out_free_window;
+ dev_err(&dev->dev, "unable to add DMA window property for %pOF: %d",
+ pdn, ret);
+ goto out_free_prop;
}
- window->device = pdn;
- window->prop = ddwprop;
- spin_lock(&direct_window_list_lock);
- list_add(&window->list, &direct_window_list);
- spin_unlock(&direct_window_list_lock);
+ window = ddw_list_new_entry(pdn, win64->value);
+ if (!window)
+ goto out_del_prop;
+
+ if (direct_mapping) {
+ /* DDW maps the whole partition, so enable direct DMA mapping */
+ ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT,
+ win64->value, tce_setrange_multi_pSeriesLP_walk);
+ if (ret) {
+ dev_info(&dev->dev, "failed to map DMA window for %pOF: %d\n",
+ dn, ret);
+
+ /* Make sure to clean DDW if any TCE was set*/
+ clean_dma_window(pdn, win64->value);
+ goto out_del_list;
+ }
+ } else {
+ struct iommu_table *newtbl;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(pci->phb->mem_resources); i++) {
+ const unsigned long mask = IORESOURCE_MEM_64 | IORESOURCE_MEM;
+
+ /* Look for MMIO32 */
+ if ((pci->phb->mem_resources[i].flags & mask) == IORESOURCE_MEM)
+ break;
+ }
+
+ if (i == ARRAY_SIZE(pci->phb->mem_resources))
+ goto out_del_list;
+
+ /* New table for using DDW instead of the default DMA window */
+ newtbl = iommu_pseries_alloc_table(pci->phb->node);
+ if (!newtbl) {
+ dev_dbg(&dev->dev, "couldn't create new IOMMU table\n");
+ goto out_del_list;
+ }
+
+ iommu_table_setparms_common(newtbl, pci->phb->bus->number, create.liobn, win_addr,
+ 1UL << len, page_shift, NULL, &iommu_table_lpar_multi_ops);
+ iommu_init_table(newtbl, pci->phb->node, pci->phb->mem_resources[i].start,
+ pci->phb->mem_resources[i].end);
- dma_addr = be64_to_cpu(ddwprop->dma_base);
+ pci->table_group->tables[1] = newtbl;
+
+ /* Keep default DMA window stuct if removed */
+ if (default_win_removed) {
+ tbl->it_size = 0;
+ kfree(tbl->it_map);
+ }
+
+ set_iommu_table_base(&dev->dev, newtbl);
+ }
+
+ spin_lock(&dma_win_list_lock);
+ list_add(&window->list, &dma_win_list);
+ spin_unlock(&dma_win_list_lock);
+
+ dev->dev.archdata.dma_offset = win_addr;
+ ddw_enabled = true;
goto out_unlock;
-out_free_window:
+out_del_list:
kfree(window);
-out_clear_window:
- remove_ddw(pdn, true);
+out_del_prop:
+ of_remove_property(pdn, win64);
out_free_prop:
kfree(win64->name);
kfree(win64->value);
kfree(win64);
+out_remove_win:
+ /* DDW is clean, so it's ok to call this directly. */
+ __remove_dma_window(pdn, ddw_avail, create.liobn);
+
out_failed:
if (default_win_removed)
reset_dma_window(dev, pdn);
@@ -1347,17 +1480,17 @@ out_failed:
list_add(&fpdn->list, &failed_ddw_pdn_list);
out_unlock:
- mutex_unlock(&direct_window_init_mutex);
+ mutex_unlock(&dma_win_init_mutex);
/*
* If we have persistent memory and the window size is only as big
* as RAM, then we failed to create a window to cover persistent
* memory and need to set the DMA limit.
*/
- if (pmem_present && dma_addr && (len == max_ram_len))
- dev->dev.bus_dma_limit = dma_addr + (1ULL << len);
+ if (pmem_present && ddw_enabled && direct_mapping && len == max_ram_len)
+ dev->dev.bus_dma_limit = dev->dev.archdata.dma_offset + (1ULL << len);
- return dma_addr;
+ return ddw_enabled && direct_mapping;
}
static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
@@ -1399,7 +1532,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
tbl = pci->table_group->tables[0];
iommu_table_setparms_lpar(pci->phb, pdn, tbl,
pci->table_group, dma_window);
- tbl->it_ops = &iommu_table_lpar_multi_ops;
+
iommu_init_table(tbl, pci->phb->node, 0, 0);
iommu_register_group(pci->table_group,
pci_domain_nr(pci->phb->bus), 0);
@@ -1436,11 +1569,8 @@ static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask)
break;
}
- if (pdn && PCI_DN(pdn)) {
- pdev->dev.archdata.dma_offset = enable_ddw(pdev, pdn);
- if (pdev->dev.archdata.dma_offset)
- return true;
- }
+ if (pdn && PCI_DN(pdn))
+ return enable_ddw(pdev, pdn);
return false;
}
@@ -1448,29 +1578,29 @@ static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask)
static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action,
void *data)
{
- struct direct_window *window;
+ struct dma_win *window;
struct memory_notify *arg = data;
int ret = 0;
switch (action) {
case MEM_GOING_ONLINE:
- spin_lock(&direct_window_list_lock);
- list_for_each_entry(window, &direct_window_list, list) {
+ spin_lock(&dma_win_list_lock);
+ list_for_each_entry(window, &dma_win_list, list) {
ret |= tce_setrange_multi_pSeriesLP(arg->start_pfn,
arg->nr_pages, window->prop);
/* XXX log error */
}
- spin_unlock(&direct_window_list_lock);
+ spin_unlock(&dma_win_list_lock);
break;
case MEM_CANCEL_ONLINE:
case MEM_OFFLINE:
- spin_lock(&direct_window_list_lock);
- list_for_each_entry(window, &direct_window_list, list) {
+ spin_lock(&dma_win_list_lock);
+ list_for_each_entry(window, &dma_win_list, list) {
ret |= tce_clearrange_multi_pSeriesLP(arg->start_pfn,
arg->nr_pages, window->prop);
/* XXX log error */
}
- spin_unlock(&direct_window_list_lock);
+ spin_unlock(&dma_win_list_lock);
break;
default:
break;
@@ -1491,7 +1621,7 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti
struct of_reconfig_data *rd = data;
struct device_node *np = rd->dn;
struct pci_dn *pci = PCI_DN(np);
- struct direct_window *window;
+ struct dma_win *window;
switch (action) {
case OF_RECONFIG_DETACH_NODE:
@@ -1502,20 +1632,22 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti
* we have to remove the property when releasing
* the device node.
*/
- remove_ddw(np, false);
+ if (remove_ddw(np, false, DIRECT64_PROPNAME))
+ remove_ddw(np, false, DMA64_PROPNAME);
+
if (pci && pci->table_group)
iommu_pseries_free_group(pci->table_group,
np->full_name);
- spin_lock(&direct_window_list_lock);
- list_for_each_entry(window, &direct_window_list, list) {
+ spin_lock(&dma_win_list_lock);
+ list_for_each_entry(window, &dma_win_list, list) {
if (window->device == np) {
list_del(&window->list);
kfree(window);
break;
}
}
- spin_unlock(&direct_window_list_lock);
+ spin_unlock(&dma_win_list_lock);
break;
default:
err = NOTIFY_DONE;
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index dab356e3ff87..3df6bdfea475 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -22,6 +22,8 @@
#include <linux/workqueue.h>
#include <linux/proc_fs.h>
#include <linux/pgtable.h>
+#include <linux/debugfs.h>
+
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/page.h>
@@ -39,7 +41,6 @@
#include <asm/kexec.h>
#include <asm/fadump.h>
#include <asm/asm-prototypes.h>
-#include <asm/debugfs.h>
#include <asm/dtl.h>
#include "pseries.h"
@@ -261,7 +262,7 @@ static int cpu_relative_dispatch_distance(int last_disp_cpu, int cur_disp_cpu)
if (!last_disp_cpu_assoc || !cur_disp_cpu_assoc)
return -EIO;
- return cpu_distance(last_disp_cpu_assoc, cur_disp_cpu_assoc);
+ return cpu_relative_distance(last_disp_cpu_assoc, cur_disp_cpu_assoc);
}
static int cpu_home_node_dispatch_distance(int disp_cpu)
@@ -281,7 +282,7 @@ static int cpu_home_node_dispatch_distance(int disp_cpu)
if (!disp_cpu_assoc || !vcpu_assoc)
return -EIO;
- return cpu_distance(disp_cpu_assoc, vcpu_assoc);
+ return cpu_relative_distance(disp_cpu_assoc, vcpu_assoc);
}
static void update_vcpu_disp_stat(int disp_cpu)
@@ -801,7 +802,8 @@ static long pSeries_lpar_hpte_remove(unsigned long hpte_group)
return -1;
}
-static void manual_hpte_clear_all(void)
+/* Called during kexec sequence with MMU off */
+static notrace void manual_hpte_clear_all(void)
{
unsigned long size_bytes = 1UL << ppc64_pft_size;
unsigned long hpte_count = size_bytes >> 4;
@@ -834,7 +836,8 @@ static void manual_hpte_clear_all(void)
}
}
-static int hcall_hpte_clear_all(void)
+/* Called during kexec sequence with MMU off */
+static notrace int hcall_hpte_clear_all(void)
{
int rc;
@@ -845,7 +848,8 @@ static int hcall_hpte_clear_all(void)
return rc;
}
-static void pseries_hpte_clear_all(void)
+/* Called during kexec sequence with MMU off */
+static notrace void pseries_hpte_clear_all(void)
{
int rc;
@@ -2016,7 +2020,7 @@ static int __init vpa_debugfs_init(void)
if (!firmware_has_feature(FW_FEATURE_SPLPAR))
return 0;
- vpa_dir = debugfs_create_dir("vpa", powerpc_debugfs_root);
+ vpa_dir = debugfs_create_dir("vpa", arch_debugfs_dir);
/* set up the per-cpu vpa file*/
for_each_possible_cpu(i) {
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index 637300330507..1b305e411862 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -13,6 +13,7 @@
#include <asm/hw_irq.h>
#include <asm/ppc-pci.h>
#include <asm/machdep.h>
+#include <asm/xive.h>
#include "pseries.h"
@@ -110,21 +111,6 @@ static int rtas_query_irq_number(struct pci_dn *pdn, int offset)
return rtas_ret[0];
}
-static void rtas_teardown_msi_irqs(struct pci_dev *pdev)
-{
- struct msi_desc *entry;
-
- for_each_pci_msi_entry(entry, pdev) {
- if (!entry->irq)
- continue;
-
- irq_set_msi_desc(entry->irq, NULL);
- irq_dispose_mapping(entry->irq);
- }
-
- rtas_disable_msi(pdev);
-}
-
static int check_req(struct pci_dev *pdev, int nvec, char *prop_name)
{
struct device_node *dn;
@@ -164,12 +150,12 @@ static int check_req_msix(struct pci_dev *pdev, int nvec)
/* Quota calculation */
-static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total)
+static struct device_node *__find_pe_total_msi(struct device_node *node, int *total)
{
struct device_node *dn;
const __be32 *p;
- dn = of_node_get(pci_device_to_OF_node(dev));
+ dn = of_node_get(node);
while (dn) {
p = of_get_property(dn, "ibm,pe-total-#msi", NULL);
if (p) {
@@ -185,6 +171,11 @@ static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total)
return NULL;
}
+static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total)
+{
+ return __find_pe_total_msi(pci_device_to_OF_node(dev), total);
+}
+
static struct device_node *find_pe_dn(struct pci_dev *dev, int *total)
{
struct device_node *dn;
@@ -368,12 +359,11 @@ static void rtas_hack_32bit_msi_gen2(struct pci_dev *pdev)
pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI, 0);
}
-static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec_in, int type)
+static int rtas_prepare_msi_irqs(struct pci_dev *pdev, int nvec_in, int type,
+ msi_alloc_info_t *arg)
{
struct pci_dn *pdn;
- int hwirq, virq, i, quota, rc;
- struct msi_desc *entry;
- struct msi_msg msg;
+ int quota, rc;
int nvec = nvec_in;
int use_32bit_msi_hack = 0;
@@ -451,53 +441,229 @@ again:
return rc;
}
- i = 0;
- for_each_pci_msi_entry(entry, pdev) {
- hwirq = rtas_query_irq_number(pdn, i++);
- if (hwirq < 0) {
- pr_debug("rtas_msi: error (%d) getting hwirq\n", rc);
- return hwirq;
- }
+ return 0;
+}
- /*
- * Depending on the number of online CPUs in the original
- * kernel, it is likely for CPU #0 to be offline in a kdump
- * kernel. The associated IRQs in the affinity mappings
- * provided by irq_create_affinity_masks() are thus not
- * started by irq_startup(), as per-design for managed IRQs.
- * This can be a problem with multi-queue block devices driven
- * by blk-mq : such a non-started IRQ is very likely paired
- * with the single queue enforced by blk-mq during kdump (see
- * blk_mq_alloc_tag_set()). This causes the device to remain
- * silent and likely hangs the guest at some point.
- *
- * We don't really care for fine-grained affinity when doing
- * kdump actually : simply ignore the pre-computed affinity
- * masks in this case and let the default mask with all CPUs
- * be used when creating the IRQ mappings.
- */
- if (is_kdump_kernel())
- virq = irq_create_mapping(NULL, hwirq);
- else
- virq = irq_create_mapping_affinity(NULL, hwirq,
- entry->affinity);
+static int pseries_msi_ops_prepare(struct irq_domain *domain, struct device *dev,
+ int nvec, msi_alloc_info_t *arg)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct msi_desc *desc = first_pci_msi_entry(pdev);
+ int type = desc->msi_attrib.is_msix ? PCI_CAP_ID_MSIX : PCI_CAP_ID_MSI;
- if (!virq) {
- pr_debug("rtas_msi: Failed mapping hwirq %d\n", hwirq);
- return -ENOSPC;
- }
+ return rtas_prepare_msi_irqs(pdev, nvec, type, arg);
+}
+
+/*
+ * ->msi_free() is called before irq_domain_free_irqs_top() when the
+ * handler data is still available. Use that to clear the XIVE
+ * controller data.
+ */
+static void pseries_msi_ops_msi_free(struct irq_domain *domain,
+ struct msi_domain_info *info,
+ unsigned int irq)
+{
+ if (xive_enabled())
+ xive_irq_free_data(irq);
+}
+
+/*
+ * RTAS can not disable one MSI at a time. It's all or nothing. Do it
+ * at the end after all IRQs have been freed.
+ */
+static void pseries_msi_domain_free_irqs(struct irq_domain *domain,
+ struct device *dev)
+{
+ if (WARN_ON_ONCE(!dev_is_pci(dev)))
+ return;
+
+ __msi_domain_free_irqs(domain, dev);
+
+ rtas_disable_msi(to_pci_dev(dev));
+}
+
+static struct msi_domain_ops pseries_pci_msi_domain_ops = {
+ .msi_prepare = pseries_msi_ops_prepare,
+ .msi_free = pseries_msi_ops_msi_free,
+ .domain_free_irqs = pseries_msi_domain_free_irqs,
+};
+
+static void pseries_msi_shutdown(struct irq_data *d)
+{
+ d = d->parent_data;
+ if (d->chip->irq_shutdown)
+ d->chip->irq_shutdown(d);
+}
+
+static void pseries_msi_mask(struct irq_data *d)
+{
+ pci_msi_mask_irq(d);
+ irq_chip_mask_parent(d);
+}
+
+static void pseries_msi_unmask(struct irq_data *d)
+{
+ pci_msi_unmask_irq(d);
+ irq_chip_unmask_parent(d);
+}
- dev_dbg(&pdev->dev, "rtas_msi: allocated virq %d\n", virq);
- irq_set_msi_desc(virq, entry);
+static struct irq_chip pseries_pci_msi_irq_chip = {
+ .name = "pSeries-PCI-MSI",
+ .irq_shutdown = pseries_msi_shutdown,
+ .irq_mask = pseries_msi_mask,
+ .irq_unmask = pseries_msi_unmask,
+ .irq_eoi = irq_chip_eoi_parent,
+};
+
+static struct msi_domain_info pseries_msi_domain_info = {
+ .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+ MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX),
+ .ops = &pseries_pci_msi_domain_ops,
+ .chip = &pseries_pci_msi_irq_chip,
+};
+
+static void pseries_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
+{
+ __pci_read_msi_msg(irq_data_get_msi_desc(data), msg);
+}
+
+static struct irq_chip pseries_msi_irq_chip = {
+ .name = "pSeries-MSI",
+ .irq_shutdown = pseries_msi_shutdown,
+ .irq_mask = irq_chip_mask_parent,
+ .irq_unmask = irq_chip_unmask_parent,
+ .irq_eoi = irq_chip_eoi_parent,
+ .irq_set_affinity = irq_chip_set_affinity_parent,
+ .irq_compose_msi_msg = pseries_msi_compose_msg,
+};
+
+static int pseries_irq_parent_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ irq_hw_number_t hwirq)
+{
+ struct irq_fwspec parent_fwspec;
+ int ret;
+
+ parent_fwspec.fwnode = domain->parent->fwnode;
+ parent_fwspec.param_count = 2;
+ parent_fwspec.param[0] = hwirq;
+ parent_fwspec.param[1] = IRQ_TYPE_EDGE_RISING;
+
+ ret = irq_domain_alloc_irqs_parent(domain, virq, 1, &parent_fwspec);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int pseries_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
+{
+ struct pci_controller *phb = domain->host_data;
+ msi_alloc_info_t *info = arg;
+ struct msi_desc *desc = info->desc;
+ struct pci_dev *pdev = msi_desc_to_pci_dev(desc);
+ int hwirq;
+ int i, ret;
+
+ hwirq = rtas_query_irq_number(pci_get_pdn(pdev), desc->msi_attrib.entry_nr);
+ if (hwirq < 0) {
+ dev_err(&pdev->dev, "Failed to query HW IRQ: %d\n", hwirq);
+ return hwirq;
+ }
+
+ dev_dbg(&pdev->dev, "%s bridge %pOF %d/%x #%d\n", __func__,
+ phb->dn, virq, hwirq, nr_irqs);
+
+ for (i = 0; i < nr_irqs; i++) {
+ ret = pseries_irq_parent_domain_alloc(domain, virq + i, hwirq + i);
+ if (ret)
+ goto out;
+
+ irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
+ &pseries_msi_irq_chip, domain->host_data);
+ }
+
+ return 0;
+
+out:
+ /* TODO: handle RTAS cleanup in ->msi_finish() ? */
+ irq_domain_free_irqs_parent(domain, virq, i - 1);
+ return ret;
+}
+
+static void pseries_irq_domain_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs)
+{
+ struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+ struct pci_controller *phb = irq_data_get_irq_chip_data(d);
+
+ pr_debug("%s bridge %pOF %d #%d\n", __func__, phb->dn, virq, nr_irqs);
+
+ /* XIVE domain data is cleared through ->msi_free() */
+}
- /* Read config space back so we can restore after reset */
- __pci_read_msi_msg(entry, &msg);
- entry->msg = msg;
+static const struct irq_domain_ops pseries_irq_domain_ops = {
+ .alloc = pseries_irq_domain_alloc,
+ .free = pseries_irq_domain_free,
+};
+
+static int __pseries_msi_allocate_domains(struct pci_controller *phb,
+ unsigned int count)
+{
+ struct irq_domain *parent = irq_get_default_host();
+
+ phb->fwnode = irq_domain_alloc_named_id_fwnode("pSeries-MSI",
+ phb->global_number);
+ if (!phb->fwnode)
+ return -ENOMEM;
+
+ phb->dev_domain = irq_domain_create_hierarchy(parent, 0, count,
+ phb->fwnode,
+ &pseries_irq_domain_ops, phb);
+ if (!phb->dev_domain) {
+ pr_err("PCI: failed to create IRQ domain bridge %pOF (domain %d)\n",
+ phb->dn, phb->global_number);
+ irq_domain_free_fwnode(phb->fwnode);
+ return -ENOMEM;
+ }
+
+ phb->msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(phb->dn),
+ &pseries_msi_domain_info,
+ phb->dev_domain);
+ if (!phb->msi_domain) {
+ pr_err("PCI: failed to create MSI IRQ domain bridge %pOF (domain %d)\n",
+ phb->dn, phb->global_number);
+ irq_domain_free_fwnode(phb->fwnode);
+ irq_domain_remove(phb->dev_domain);
+ return -ENOMEM;
}
return 0;
}
+int pseries_msi_allocate_domains(struct pci_controller *phb)
+{
+ int count;
+
+ if (!__find_pe_total_msi(phb->dn, &count)) {
+ pr_err("PCI: failed to find MSIs for bridge %pOF (domain %d)\n",
+ phb->dn, phb->global_number);
+ return -ENOSPC;
+ }
+
+ return __pseries_msi_allocate_domains(phb, count);
+}
+
+void pseries_msi_free_domains(struct pci_controller *phb)
+{
+ if (phb->msi_domain)
+ irq_domain_remove(phb->msi_domain);
+ if (phb->dev_domain)
+ irq_domain_remove(phb->dev_domain);
+ if (phb->fwnode)
+ irq_domain_free_fwnode(phb->fwnode);
+}
+
static void rtas_msi_pci_irq_fixup(struct pci_dev *pdev)
{
/* No LSI -> leave MSIs (if any) configured */
@@ -518,8 +684,6 @@ static void rtas_msi_pci_irq_fixup(struct pci_dev *pdev)
static int rtas_msi_init(void)
{
- struct pci_controller *phb;
-
query_token = rtas_token("ibm,query-interrupt-source-number");
change_token = rtas_token("ibm,change-msi");
@@ -531,16 +695,6 @@ static int rtas_msi_init(void)
pr_debug("rtas_msi: Registering RTAS MSI callbacks.\n");
- WARN_ON(pseries_pci_controller_ops.setup_msi_irqs);
- pseries_pci_controller_ops.setup_msi_irqs = rtas_setup_msi_irqs;
- pseries_pci_controller_ops.teardown_msi_irqs = rtas_teardown_msi_irqs;
-
- list_for_each_entry(phb, &hose_list, list_node) {
- WARN_ON(phb->controller_ops.setup_msi_irqs);
- phb->controller_ops.setup_msi_irqs = rtas_setup_msi_irqs;
- phb->controller_ops.teardown_msi_irqs = rtas_teardown_msi_irqs;
- }
-
WARN_ON(ppc_md.pci_irq_fixup);
ppc_md.pci_irq_fixup = rtas_msi_pci_irq_fixup;
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index a8f9140a24fa..90c9d3531694 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -33,6 +33,8 @@ struct pci_controller *init_phb_dynamic(struct device_node *dn)
pci_devs_phb_init_dynamic(phb);
+ pseries_msi_allocate_domains(phb);
+
/* Create EEH devices for the PHB */
eeh_phb_pe_create(phb);
@@ -74,6 +76,8 @@ int remove_phb_dynamic(struct pci_controller *phb)
}
}
+ pseries_msi_free_domains(phb);
+
/* Remove the PCI bus and unregister the bridge device from sysfs */
phb->bus = NULL;
pci_remove_bus(b);
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 1f051a786fb3..3544778e06d0 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -85,6 +85,8 @@ struct pci_host_bridge;
int pseries_root_bridge_prepare(struct pci_host_bridge *bridge);
extern struct pci_controller_ops pseries_pci_controller_ops;
+int pseries_msi_allocate_domains(struct pci_controller *phb);
+void pseries_msi_free_domains(struct pci_controller *phb);
unsigned long pseries_memory_block_size(void);
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 167f2e1b8d39..56092dccfdb8 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -783,7 +783,7 @@ static int recover_mce(struct pt_regs *regs, struct machine_check_event *evt)
{
int recovered = 0;
- if (!(regs->msr & MSR_RI)) {
+ if (regs_is_unrecoverable(regs)) {
/* If MSR_RI isn't set, we cannot recover */
pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
recovered = 0;
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 0dfaa6ab44cc..f79126f16258 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -486,6 +486,8 @@ static void __init pSeries_discover_phbs(void)
/* create pci_dn's for DT nodes under this PHB */
pci_devs_phb_init_dynamic(phb);
+
+ pseries_msi_allocate_domains(phb);
}
of_node_put(root);
diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
index b5c1cf1bc64d..b043e3936d21 100644
--- a/arch/powerpc/platforms/pseries/vas.c
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -184,7 +184,7 @@ static int h_get_nx_fault(u32 winid, u64 buffer)
* Note: The hypervisor forwards an interrupt for each fault request.
* So one fault CRB to process for each H_GET_NX_FAULT hcall.
*/
-irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
+static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
{
struct pseries_vas_window *txwin = data;
struct coprocessor_request_block crb;