summaryrefslogtreecommitdiff
path: root/arch/powerpc/platforms
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-06-05 22:39:30 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2020-06-05 22:39:30 +0300
commit7ae77150d94d3b535c7b85e6b3647113095e79bf (patch)
tree90fe894e7efd92898e813d88acfd4611d79be969 /arch/powerpc/platforms
parent084623e468d535d98f883cc2ccf2c4fdf2108556 (diff)
parent1395375c592770fe5158a592944aaeed67fa94ff (diff)
downloadlinux-7ae77150d94d3b535c7b85e6b3647113095e79bf.tar.xz
Merge tag 'powerpc-5.8-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman: - Support for userspace to send requests directly to the on-chip GZIP accelerator on Power9. - Rework of our lockless page table walking (__find_linux_pte()) to make it safe against parallel page table manipulations without relying on an IPI for serialisation. - A series of fixes & enhancements to make our machine check handling more robust. - Lots of plumbing to add support for "prefixed" (64-bit) instructions on Power10. - Support for using huge pages for the linear mapping on 8xx (32-bit). - Remove obsolete Xilinx PPC405/PPC440 support, and an associated sound driver. - Removal of some obsolete 40x platforms and associated cruft. - Initial support for booting on Power10. - Lots of other small features, cleanups & fixes. Thanks to: Alexey Kardashevskiy, Alistair Popple, Andrew Donnellan, Andrey Abramov, Aneesh Kumar K.V, Balamuruhan S, Bharata B Rao, Bulent Abali, Cédric Le Goater, Chen Zhou, Christian Zigotzky, Christophe JAILLET, Christophe Leroy, Dmitry Torokhov, Emmanuel Nicolet, Erhard F., Gautham R. Shenoy, Geoff Levand, George Spelvin, Greg Kurz, Gustavo A. R. Silva, Gustavo Walbon, Haren Myneni, Hari Bathini, Joel Stanley, Jordan Niethe, Kajol Jain, Kees Cook, Leonardo Bras, Madhavan Srinivasan., Mahesh Salgaonkar, Markus Elfring, Michael Neuling, Michal Simek, Nathan Chancellor, Nathan Lynch, Naveen N. Rao, Nicholas Piggin, Oliver O'Halloran, Paul Mackerras, Pingfan Liu, Qian Cai, Ram Pai, Raphael Moreira Zinsly, Ravi Bangoria, Sam Bobroff, Sandipan Das, Segher Boessenkool, Stephen Rothwell, Sukadev Bhattiprolu, Tyrel Datwyler, Wolfram Sang, Xiongfeng Wang. * tag 'powerpc-5.8-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (299 commits) powerpc/pseries: Make vio and ibmebus initcalls pseries specific cxl: Remove dead Kconfig options powerpc: Add POWER10 architected mode powerpc/dt_cpu_ftrs: Add MMA feature powerpc/dt_cpu_ftrs: Enable Prefixed Instructions powerpc/dt_cpu_ftrs: Advertise support for ISA v3.1 if selected powerpc: Add support for ISA v3.1 powerpc: Add new HWCAP bits powerpc/64s: Don't set FSCR bits in INIT_THREAD powerpc/64s: Save FSCR to init_task.thread.fscr after feature init powerpc/64s: Don't let DT CPU features set FSCR_DSCR powerpc/64s: Don't init FSCR_DSCR in __init_FSCR() powerpc/32s: Fix another build failure with CONFIG_PPC_KUAP_DEBUG powerpc/module_64: Use special stub for _mcount() with -mprofile-kernel powerpc/module_64: Simplify check for -mprofile-kernel ftrace relocations powerpc/module_64: Consolidate ftrace code powerpc/32: Disable KASAN with pages bigger than 16k powerpc/uaccess: Don't set KUEP by default on book3s/32 powerpc/uaccess: Don't set KUAP by default on book3s/32 powerpc/8xx: Reduce time spent in allow_user_access() and friends ...
Diffstat (limited to 'arch/powerpc/platforms')
-rw-r--r--arch/powerpc/platforms/40x/Kconfig76
-rw-r--r--arch/powerpc/platforms/40x/Makefile3
-rw-r--r--arch/powerpc/platforms/40x/ep405.c123
-rw-r--r--arch/powerpc/platforms/40x/virtex.c54
-rw-r--r--arch/powerpc/platforms/40x/walnut.c65
-rw-r--r--arch/powerpc/platforms/44x/Kconfig40
-rw-r--r--arch/powerpc/platforms/44x/Makefile2
-rw-r--r--arch/powerpc/platforms/44x/virtex.c60
-rw-r--r--arch/powerpc/platforms/44x/virtex_ml510.c30
-rw-r--r--arch/powerpc/platforms/4xx/pci.c4
-rw-r--r--arch/powerpc/platforms/52xx/lite5200_sleep.S2
-rw-r--r--arch/powerpc/platforms/82xx/pq2.c3
-rw-r--r--arch/powerpc/platforms/83xx/suspend-asm.S1
-rw-r--r--arch/powerpc/platforms/86xx/mpc86xx_smp.c5
-rw-r--r--arch/powerpc/platforms/8xx/Kconfig50
-rw-r--r--arch/powerpc/platforms/Kconfig4
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype6
-rw-r--r--arch/powerpc/platforms/cell/iommu.c6
-rw-r--r--arch/powerpc/platforms/embedded6xx/wii.c25
-rw-r--r--arch/powerpc/platforms/powermac/bootx_init.c14
-rw-r--r--arch/powerpc/platforms/powermac/cache.S2
-rw-r--r--arch/powerpc/platforms/powermac/nvram.c2
-rw-r--r--arch/powerpc/platforms/powermac/sleep.S5
-rw-r--r--arch/powerpc/platforms/powermac/smp.c5
-rw-r--r--arch/powerpc/platforms/powernv/Makefile2
-rw-r--r--arch/powerpc/platforms/powernv/idle.c2
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c117
-rw-r--r--arch/powerpc/platforms/powernv/opal-fadump.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal.c4
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda-tce.c28
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c299
-rw-r--r--arch/powerpc/platforms/powernv/pci.c20
-rw-r--r--arch/powerpc/platforms/powernv/pci.h28
-rw-r--r--arch/powerpc/platforms/powernv/vas-api.c278
-rw-r--r--arch/powerpc/platforms/powernv/vas-debug.c2
-rw-r--r--arch/powerpc/platforms/powernv/vas-fault.c382
-rw-r--r--arch/powerpc/platforms/powernv/vas-window.c238
-rw-r--r--arch/powerpc/platforms/powernv/vas.c85
-rw-r--r--arch/powerpc/platforms/powernv/vas.h59
-rw-r--r--arch/powerpc/platforms/ps3/mm.c52
-rw-r--r--arch/powerpc/platforms/ps3/setup.c2
-rw-r--r--arch/powerpc/platforms/pseries/eeh_pseries.c8
-rw-r--r--arch/powerpc/platforms/pseries/ibmebus.c3
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c3
-rw-r--r--arch/powerpc/platforms/pseries/ras.c62
-rw-r--r--arch/powerpc/platforms/pseries/rtas-fadump.c2
-rw-r--r--arch/powerpc/platforms/pseries/setup.c22
-rw-r--r--arch/powerpc/platforms/pseries/vio.c7
48 files changed, 1423 insertions, 871 deletions
diff --git a/arch/powerpc/platforms/40x/Kconfig b/arch/powerpc/platforms/40x/Kconfig
index 6da813b65b42..e3e5217c9822 100644
--- a/arch/powerpc/platforms/40x/Kconfig
+++ b/arch/powerpc/platforms/40x/Kconfig
@@ -7,14 +7,6 @@ config ACADIA
help
This option enables support for the AMCC 405EZ Acadia evaluation board.
-config EP405
- bool "EP405/EP405PC"
- depends on 40x
- select 405GP
- select FORCE_PCI
- help
- This option enables support for the EP405/EP405PC boards.
-
config HOTFOOT
bool "Hotfoot"
depends on 40x
@@ -45,33 +37,6 @@ config MAKALU
help
This option enables support for the AMCC PPC405EX board.
-config WALNUT
- bool "Walnut"
- depends on 40x
- default y
- select 405GP
- select FORCE_PCI
- select OF_RTC
- help
- This option enables support for the IBM PPC405GP evaluation board.
-
-config XILINX_VIRTEX_GENERIC_BOARD
- bool "Generic Xilinx Virtex board"
- depends on 40x
- select XILINX_VIRTEX_II_PRO
- select XILINX_VIRTEX_4_FX
- select XILINX_INTC
- help
- This option enables generic support for Xilinx Virtex based boards.
-
- The generic virtex board support matches any device tree which
- specifies 'xilinx,virtex' in its compatible field. This includes
- the Xilinx ML3xx and ML4xx reference designs using the powerpc
- core.
-
- Most Virtex designs should use this unless it needs to do some
- special configuration at board probe time.
-
config OBS600
bool "OpenBlockS 600"
depends on 40x
@@ -86,18 +51,6 @@ config PPC40x_SIMPLE
help
This option enables the simple PowerPC 40x platform support.
-# OAK doesn't exist but wanted to keep this around for any future 403GCX boards
-config 403GCX
- bool
- #depends on OAK
- select IBM405_ERR51
-
-config 405GP
- bool
- select IBM405_ERR77
- select IBM405_ERR51
- select IBM_EMAC_ZMII if IBM_EMAC
-
config 405EX
bool
select IBM_EMAC_EMAC4 if IBM_EMAC
@@ -109,25 +62,6 @@ config 405EZ
select IBM_EMAC_MAL_CLR_ICINTSTAT if IBM_EMAC
select IBM_EMAC_MAL_COMMON_ERR if IBM_EMAC
-config XILINX_VIRTEX
- bool
- select DEFAULT_UIMAGE
-
-config XILINX_VIRTEX_II_PRO
- bool
- select XILINX_VIRTEX
- select IBM405_ERR77
- select IBM405_ERR51
-
-config XILINX_VIRTEX_4_FX
- bool
- select XILINX_VIRTEX
-
-config STB03xxx
- bool
- select IBM405_ERR77
- select IBM405_ERR51
-
config PPC4xx_GPIO
bool "PPC4xx GPIO support"
depends on 40x
@@ -135,16 +69,6 @@ config PPC4xx_GPIO
help
Enable gpiolib support for ppc40x based boards
-# 40x errata/workaround config symbols, selected by the CPU models above
-
-# All 405-based cores up until the 405GPR and 405EP have this errata.
-config IBM405_ERR77
- bool
-
-# All 40x-based cores, up until the 405GPR and 405EP have this errata.
-config IBM405_ERR51
- bool
-
config APM8018X
bool "APM8018X"
depends on 40x
diff --git a/arch/powerpc/platforms/40x/Makefile b/arch/powerpc/platforms/40x/Makefile
index 828d78340dd9..122de98527c4 100644
--- a/arch/powerpc/platforms/40x/Makefile
+++ b/arch/powerpc/platforms/40x/Makefile
@@ -1,5 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_WALNUT) += walnut.o
-obj-$(CONFIG_XILINX_VIRTEX_GENERIC_BOARD) += virtex.o
-obj-$(CONFIG_EP405) += ep405.o
obj-$(CONFIG_PPC40x_SIMPLE) += ppc40x_simple.o
diff --git a/arch/powerpc/platforms/40x/ep405.c b/arch/powerpc/platforms/40x/ep405.c
deleted file mode 100644
index 1c8aec6e9bb7..000000000000
--- a/arch/powerpc/platforms/40x/ep405.c
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Architecture- / platform-specific boot-time initialization code for
- * IBM PowerPC 4xx based boards. Adapted from original
- * code by Gary Thomas, Cort Dougan <cort@fsmlabs.com>, and Dan Malek
- * <dan@net4x.com>.
- *
- * Copyright(c) 1999-2000 Grant Erickson <grant@lcse.umn.edu>
- *
- * Rewritten and ported to the merged powerpc tree:
- * Copyright 2007 IBM Corporation
- * Josh Boyer <jwboyer@linux.vnet.ibm.com>
- *
- * Adapted to EP405 by Ben. Herrenschmidt <benh@kernel.crashing.org>
- *
- * TODO: Wire up the PCI IRQ mux and the southbridge interrupts
- *
- * 2002 (c) MontaVista, Software, Inc. This file is licensed under
- * the terms of the GNU General Public License version 2. This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
- */
-
-#include <linux/init.h>
-#include <linux/of_platform.h>
-
-#include <asm/machdep.h>
-#include <asm/prom.h>
-#include <asm/udbg.h>
-#include <asm/time.h>
-#include <asm/uic.h>
-#include <asm/pci-bridge.h>
-#include <asm/ppc4xx.h>
-
-static struct device_node *bcsr_node;
-static void __iomem *bcsr_regs;
-
-/* BCSR registers */
-#define BCSR_ID 0
-#define BCSR_PCI_CTRL 1
-#define BCSR_FLASH_NV_POR_CTRL 2
-#define BCSR_FENET_UART_CTRL 3
-#define BCSR_PCI_IRQ 4
-#define BCSR_XIRQ_SELECT 5
-#define BCSR_XIRQ_ROUTING 6
-#define BCSR_XIRQ_STATUS 7
-#define BCSR_XIRQ_STATUS2 8
-#define BCSR_SW_STAT_LED_CTRL 9
-#define BCSR_GPIO_IRQ_PAR_CTRL 10
-/* there's more, can't be bothered typing them tho */
-
-
-static const struct of_device_id ep405_of_bus[] __initconst = {
- { .compatible = "ibm,plb3", },
- { .compatible = "ibm,opb", },
- { .compatible = "ibm,ebc", },
- {},
-};
-
-static int __init ep405_device_probe(void)
-{
- of_platform_bus_probe(NULL, ep405_of_bus, NULL);
-
- return 0;
-}
-machine_device_initcall(ep405, ep405_device_probe);
-
-static void __init ep405_init_bcsr(void)
-{
- const u8 *irq_routing;
- int i;
-
- /* Find the bloody thing & map it */
- bcsr_node = of_find_compatible_node(NULL, NULL, "ep405-bcsr");
- if (bcsr_node == NULL) {
- printk(KERN_ERR "EP405 BCSR not found !\n");
- return;
- }
- bcsr_regs = of_iomap(bcsr_node, 0);
- if (bcsr_regs == NULL) {
- printk(KERN_ERR "EP405 BCSR failed to map !\n");
- return;
- }
-
- /* Get the irq-routing property and apply the routing to the CPLD */
- irq_routing = of_get_property(bcsr_node, "irq-routing", NULL);
- if (irq_routing == NULL)
- return;
- for (i = 0; i < 16; i++) {
- u8 irq = irq_routing[i];
- out_8(bcsr_regs + BCSR_XIRQ_SELECT, i);
- out_8(bcsr_regs + BCSR_XIRQ_ROUTING, irq);
- }
- in_8(bcsr_regs + BCSR_XIRQ_SELECT);
- mb();
- out_8(bcsr_regs + BCSR_GPIO_IRQ_PAR_CTRL, 0xfe);
-}
-
-static void __init ep405_setup_arch(void)
-{
- /* Find & init the BCSR CPLD */
- ep405_init_bcsr();
-
- pci_set_flags(PCI_REASSIGN_ALL_RSRC);
-}
-
-static int __init ep405_probe(void)
-{
- if (!of_machine_is_compatible("ep405"))
- return 0;
-
- return 1;
-}
-
-define_machine(ep405) {
- .name = "EP405",
- .probe = ep405_probe,
- .setup_arch = ep405_setup_arch,
- .progress = udbg_progress,
- .init_IRQ = uic_init_tree,
- .get_irq = uic_get_irq,
- .restart = ppc4xx_reset_system,
- .calibrate_decr = generic_calibrate_decr,
-};
diff --git a/arch/powerpc/platforms/40x/virtex.c b/arch/powerpc/platforms/40x/virtex.c
deleted file mode 100644
index e3d5e095846b..000000000000
--- a/arch/powerpc/platforms/40x/virtex.c
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Xilinx Virtex (IIpro & 4FX) based board support
- *
- * Copyright 2007 Secret Lab Technologies Ltd.
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
-
-#include <linux/init.h>
-#include <linux/of_platform.h>
-#include <asm/machdep.h>
-#include <asm/prom.h>
-#include <asm/time.h>
-#include <asm/xilinx_intc.h>
-#include <asm/xilinx_pci.h>
-#include <asm/ppc4xx.h>
-
-static const struct of_device_id xilinx_of_bus_ids[] __initconst = {
- { .compatible = "xlnx,plb-v46-1.00.a", },
- { .compatible = "xlnx,plb-v34-1.01.a", },
- { .compatible = "xlnx,plb-v34-1.02.a", },
- { .compatible = "xlnx,opb-v20-1.10.c", },
- { .compatible = "xlnx,dcr-v29-1.00.a", },
- { .compatible = "xlnx,compound", },
- {}
-};
-
-static int __init virtex_device_probe(void)
-{
- of_platform_bus_probe(NULL, xilinx_of_bus_ids, NULL);
-
- return 0;
-}
-machine_device_initcall(virtex, virtex_device_probe);
-
-static int __init virtex_probe(void)
-{
- if (!of_machine_is_compatible("xlnx,virtex"))
- return 0;
-
- return 1;
-}
-
-define_machine(virtex) {
- .name = "Xilinx Virtex",
- .probe = virtex_probe,
- .setup_arch = xilinx_pci_init,
- .init_IRQ = xilinx_intc_init_tree,
- .get_irq = xintc_get_irq,
- .restart = ppc4xx_reset_system,
- .calibrate_decr = generic_calibrate_decr,
-};
diff --git a/arch/powerpc/platforms/40x/walnut.c b/arch/powerpc/platforms/40x/walnut.c
deleted file mode 100644
index e5797815e2f1..000000000000
--- a/arch/powerpc/platforms/40x/walnut.c
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Architecture- / platform-specific boot-time initialization code for
- * IBM PowerPC 4xx based boards. Adapted from original
- * code by Gary Thomas, Cort Dougan <cort@fsmlabs.com>, and Dan Malek
- * <dan@net4x.com>.
- *
- * Copyright(c) 1999-2000 Grant Erickson <grant@lcse.umn.edu>
- *
- * Rewritten and ported to the merged powerpc tree:
- * Copyright 2007 IBM Corporation
- * Josh Boyer <jwboyer@linux.vnet.ibm.com>
- *
- * 2002 (c) MontaVista, Software, Inc. This file is licensed under
- * the terms of the GNU General Public License version 2. This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
- */
-
-#include <linux/init.h>
-#include <linux/of_platform.h>
-#include <linux/rtc.h>
-
-#include <asm/machdep.h>
-#include <asm/prom.h>
-#include <asm/udbg.h>
-#include <asm/time.h>
-#include <asm/uic.h>
-#include <asm/pci-bridge.h>
-#include <asm/ppc4xx.h>
-
-static const struct of_device_id walnut_of_bus[] __initconst = {
- { .compatible = "ibm,plb3", },
- { .compatible = "ibm,opb", },
- { .compatible = "ibm,ebc", },
- {},
-};
-
-static int __init walnut_device_probe(void)
-{
- of_platform_bus_probe(NULL, walnut_of_bus, NULL);
- of_instantiate_rtc();
-
- return 0;
-}
-machine_device_initcall(walnut, walnut_device_probe);
-
-static int __init walnut_probe(void)
-{
- if (!of_machine_is_compatible("ibm,walnut"))
- return 0;
-
- pci_set_flags(PCI_REASSIGN_ALL_RSRC);
-
- return 1;
-}
-
-define_machine(walnut) {
- .name = "Walnut",
- .probe = walnut_probe,
- .progress = udbg_progress,
- .init_IRQ = uic_init_tree,
- .get_irq = uic_get_irq,
- .restart = ppc4xx_reset_system,
- .calibrate_decr = generic_calibrate_decr,
-};
diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig
index 25ebe634a661..78ac6d67a935 100644
--- a/arch/powerpc/platforms/44x/Kconfig
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -167,8 +167,7 @@ config YOSEMITE
config ISS4xx
bool "ISS 4xx Simulator"
- depends on (44x || 40x)
- select 405GP if 40x
+ depends on 44x
select 440GP if 44x && !PPC_47x
select PPC_FPU
select OF_RTC
@@ -232,33 +231,6 @@ config ICON
help
This option enables support for the AMCC PPC440SPe evaluation board.
-config XILINX_VIRTEX440_GENERIC_BOARD
- bool "Generic Xilinx Virtex 5 FXT board support"
- depends on 44x
- select XILINX_VIRTEX_5_FXT
- select XILINX_INTC
- help
- This option enables generic support for Xilinx Virtex based boards
- that use a 440 based processor in the Virtex 5 FXT FPGA architecture.
-
- The generic virtex board support matches any device tree which
- specifies 'xlnx,virtex440' in its compatible field. This includes
- the Xilinx ML5xx reference designs using the powerpc core.
-
- Most Virtex 5 designs should use this unless it needs to do some
- special configuration at board probe time.
-
-config XILINX_ML510
- bool "Xilinx ML510 extra support"
- depends on XILINX_VIRTEX440_GENERIC_BOARD
- select HAVE_PCI
- select XILINX_PCI if PCI
- select PPC_INDIRECT_PCI if PCI
- select PPC_I8259 if PCI
- help
- This option enables extra support for features on the Xilinx ML510
- board. The ML510 has a PCI bus with ALI south bridge.
-
config PPC44x_SIMPLE
bool "Simple PowerPC 44x board support"
depends on 44x
@@ -354,13 +326,3 @@ config 476FPE_ERR46
config IBM440EP_ERR42
bool
-# Xilinx specific config options.
-config XILINX_VIRTEX
- bool
- select DEFAULT_UIMAGE
-
-# Xilinx Virtex 5 FXT FPGA architecture, selected by a Xilinx board above
-config XILINX_VIRTEX_5_FXT
- bool
- select XILINX_VIRTEX
-
diff --git a/arch/powerpc/platforms/44x/Makefile b/arch/powerpc/platforms/44x/Makefile
index 1b78c6af821a..5ba031f57652 100644
--- a/arch/powerpc/platforms/44x/Makefile
+++ b/arch/powerpc/platforms/44x/Makefile
@@ -7,8 +7,6 @@ obj-$(CONFIG_PPC44x_SIMPLE) += ppc44x_simple.o
obj-$(CONFIG_EBONY) += ebony.o
obj-$(CONFIG_SAM440EP) += sam440ep.o
obj-$(CONFIG_WARP) += warp.o
-obj-$(CONFIG_XILINX_VIRTEX_5_FXT) += virtex.o
-obj-$(CONFIG_XILINX_ML510) += virtex_ml510.o
obj-$(CONFIG_ISS4xx) += iss4xx.o
obj-$(CONFIG_CANYONLANDS)+= canyonlands.o
obj-$(CONFIG_CURRITUCK) += ppc476.o
diff --git a/arch/powerpc/platforms/44x/virtex.c b/arch/powerpc/platforms/44x/virtex.c
deleted file mode 100644
index 3eb13ed926ee..000000000000
--- a/arch/powerpc/platforms/44x/virtex.c
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Xilinx Virtex 5FXT based board support, derived from
- * the Xilinx Virtex (IIpro & 4FX) based board support
- *
- * Copyright 2007 Secret Lab Technologies Ltd.
- * Copyright 2008 Xilinx, Inc.
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
-
-#include <linux/init.h>
-#include <linux/of_platform.h>
-#include <asm/machdep.h>
-#include <asm/prom.h>
-#include <asm/time.h>
-#include <asm/xilinx_intc.h>
-#include <asm/xilinx_pci.h>
-#include <asm/reg.h>
-#include <asm/ppc4xx.h>
-#include "44x.h"
-
-static const struct of_device_id xilinx_of_bus_ids[] __initconst = {
- { .compatible = "simple-bus", },
- { .compatible = "xlnx,plb-v46-1.00.a", },
- { .compatible = "xlnx,plb-v46-1.02.a", },
- { .compatible = "xlnx,plb-v34-1.01.a", },
- { .compatible = "xlnx,plb-v34-1.02.a", },
- { .compatible = "xlnx,opb-v20-1.10.c", },
- { .compatible = "xlnx,dcr-v29-1.00.a", },
- { .compatible = "xlnx,compound", },
- {}
-};
-
-static int __init virtex_device_probe(void)
-{
- of_platform_bus_probe(NULL, xilinx_of_bus_ids, NULL);
-
- return 0;
-}
-machine_device_initcall(virtex, virtex_device_probe);
-
-static int __init virtex_probe(void)
-{
- if (!of_machine_is_compatible("xlnx,virtex440"))
- return 0;
-
- return 1;
-}
-
-define_machine(virtex) {
- .name = "Xilinx Virtex440",
- .probe = virtex_probe,
- .setup_arch = xilinx_pci_init,
- .init_IRQ = xilinx_intc_init_tree,
- .get_irq = xintc_get_irq,
- .calibrate_decr = generic_calibrate_decr,
- .restart = ppc4xx_reset_system,
-};
diff --git a/arch/powerpc/platforms/44x/virtex_ml510.c b/arch/powerpc/platforms/44x/virtex_ml510.c
deleted file mode 100644
index 349f218b335c..000000000000
--- a/arch/powerpc/platforms/44x/virtex_ml510.c
+++ /dev/null
@@ -1,30 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <asm/i8259.h>
-#include <linux/pci.h>
-#include "44x.h"
-
-/**
- * ml510_ail_quirk
- */
-static void ml510_ali_quirk(struct pci_dev *dev)
-{
- /* Enable the IDE controller */
- pci_write_config_byte(dev, 0x58, 0x4c);
- /* Assign irq 14 to the primary ide channel */
- pci_write_config_byte(dev, 0x44, 0x0d);
- /* Assign irq 15 to the secondary ide channel */
- pci_write_config_byte(dev, 0x75, 0x0f);
- /* Set the ide controller in native mode */
- pci_write_config_byte(dev, 0x09, 0xff);
-
- /* INTB = disabled, INTA = disabled */
- pci_write_config_byte(dev, 0x48, 0x00);
- /* INTD = disabled, INTC = disabled */
- pci_write_config_byte(dev, 0x4a, 0x00);
- /* Audio = INT7, Modem = disabled. */
- pci_write_config_byte(dev, 0x4b, 0x60);
- /* USB = INT7 */
- pci_write_config_byte(dev, 0x74, 0x06);
-}
-DECLARE_PCI_FIXUP_EARLY(0x10b9, 0x1533, ml510_ali_quirk);
-
diff --git a/arch/powerpc/platforms/4xx/pci.c b/arch/powerpc/platforms/4xx/pci.c
index e6e2adcc7b64..c13d64c3b019 100644
--- a/arch/powerpc/platforms/4xx/pci.c
+++ b/arch/powerpc/platforms/4xx/pci.c
@@ -1242,7 +1242,7 @@ static void __init ppc460sx_pciex_check_link(struct ppc4xx_pciex_port *port)
if (mbase == NULL) {
printk(KERN_ERR "%pOF: Can't map internal config space !",
port->node);
- goto done;
+ return;
}
while (attempt && (0 == (in_le32(mbase + PECFG_460SX_DLLSTA)
@@ -1252,9 +1252,7 @@ static void __init ppc460sx_pciex_check_link(struct ppc4xx_pciex_port *port)
}
if (attempt)
port->link = 1;
-done:
iounmap(mbase);
-
}
static struct ppc4xx_pciex_hwops ppc460sx_pcie_hwops __initdata = {
diff --git a/arch/powerpc/platforms/52xx/lite5200_sleep.S b/arch/powerpc/platforms/52xx/lite5200_sleep.S
index 3a9969c429b3..70083649c9ea 100644
--- a/arch/powerpc/platforms/52xx/lite5200_sleep.S
+++ b/arch/powerpc/platforms/52xx/lite5200_sleep.S
@@ -248,6 +248,7 @@ mmu_on:
blr
+_ASM_NOKPROBE_SYMBOL(lite5200_wakeup)
/* ---------------------------------------------------------------------- */
@@ -391,6 +392,7 @@ restore_regs:
LOAD_SPRN(TBWU, 0x5b);
blr
+_ASM_NOKPROBE_SYMBOL(restore_regs)
diff --git a/arch/powerpc/platforms/82xx/pq2.c b/arch/powerpc/platforms/82xx/pq2.c
index 1cdd5ed9d896..3b5cb39a564c 100644
--- a/arch/powerpc/platforms/82xx/pq2.c
+++ b/arch/powerpc/platforms/82xx/pq2.c
@@ -10,6 +10,8 @@
* Copyright (c) 2006 MontaVista Software, Inc.
*/
+#include <linux/kprobes.h>
+
#include <asm/cpm2.h>
#include <asm/io.h>
#include <asm/pci-bridge.h>
@@ -29,6 +31,7 @@ void __noreturn pq2_restart(char *cmd)
panic("Restart failed\n");
}
+NOKPROBE_SYMBOL(pq2_restart)
#ifdef CONFIG_PCI
static int pq2_pci_exclude_device(struct pci_controller *hose,
diff --git a/arch/powerpc/platforms/83xx/suspend-asm.S b/arch/powerpc/platforms/83xx/suspend-asm.S
index 3acd7470dc5e..bc6bd4d0ae96 100644
--- a/arch/powerpc/platforms/83xx/suspend-asm.S
+++ b/arch/powerpc/platforms/83xx/suspend-asm.S
@@ -548,3 +548,4 @@ mpc83xx_deep_resume:
mtdec r0
rfi
+_ASM_NOKPROBE_SYMBOL(mpc83xx_deep_resume)
diff --git a/arch/powerpc/platforms/86xx/mpc86xx_smp.c b/arch/powerpc/platforms/86xx/mpc86xx_smp.c
index 5b91ea5694e3..dba3aa73c062 100644
--- a/arch/powerpc/platforms/86xx/mpc86xx_smp.c
+++ b/arch/powerpc/platforms/86xx/mpc86xx_smp.c
@@ -17,6 +17,7 @@
#include <asm/pci-bridge.h>
#include <asm/mpic.h>
#include <asm/cacheflush.h>
+#include <asm/inst.h>
#include <sysdev/fsl_soc.h>
@@ -72,7 +73,7 @@ smp_86xx_kick_cpu(int nr)
/* Setup fake reset vector to call __secondary_start_mpc86xx. */
target = (unsigned long) __secondary_start_mpc86xx;
- patch_branch(vector, target, BRANCH_SET_LINK);
+ patch_branch((struct ppc_inst *)vector, target, BRANCH_SET_LINK);
/* Kick that CPU */
smp_86xx_release_core(nr);
@@ -82,7 +83,7 @@ smp_86xx_kick_cpu(int nr)
mdelay(1);
/* Restore the exception vector */
- patch_instruction(vector, save_vector);
+ patch_instruction((struct ppc_inst *)vector, ppc_inst(save_vector));
local_irq_restore(flags);
diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig
index e0fe670f06f6..abb2b45b2789 100644
--- a/arch/powerpc/platforms/8xx/Kconfig
+++ b/arch/powerpc/platforms/8xx/Kconfig
@@ -98,15 +98,6 @@ menu "MPC8xx CPM Options"
# 8xx specific questions.
comment "Generic MPC8xx Options"
-config 8xx_COPYBACK
- bool "Copy-Back Data Cache (else Writethrough)"
- help
- Saying Y here will cause the cache on an MPC8xx processor to be used
- in Copy-Back mode. If you say N here, it is used in Writethrough
- mode.
-
- If in doubt, say Y here.
-
config 8xx_GPIO
bool "GPIO API Support"
select GPIOLIB
@@ -171,4 +162,45 @@ config UCODE_PATCH
default y
depends on !NO_UCODE_PATCH
+menu "8xx advanced setup"
+ depends on PPC_8xx
+
+config PIN_TLB
+ bool "Pinned Kernel TLBs"
+ depends on ADVANCED_OPTIONS
+ help
+ On the 8xx, we have 32 instruction TLBs and 32 data TLBs. In each
+ table 4 TLBs can be pinned.
+
+ It reduces the amount of usable TLBs to 28 (ie by 12%). That's the
+ reason why we make it selectable.
+
+ This option does nothing, it just activate the selection of what
+ to pin.
+
+config PIN_TLB_DATA
+ bool "Pinned TLB for DATA"
+ depends on PIN_TLB
+ default y
+ help
+ This pins the first 32 Mbytes of memory with 8M pages.
+
+config PIN_TLB_IMMR
+ bool "Pinned TLB for IMMR"
+ depends on PIN_TLB
+ default y
+ help
+ This pins the IMMR area with a 512kbytes page. In case
+ CONFIG_PIN_TLB_DATA is also selected, it will reduce
+ CONFIG_PIN_TLB_DATA to 24 Mbytes.
+
+config PIN_TLB_TEXT
+ bool "Pinned TLB for TEXT"
+ depends on PIN_TLB
+ default y
+ help
+ This pins kernel text with 8M pages.
+
+endmenu
+
endmenu
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 1f8025383caa..5e6479d409a0 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -317,8 +317,4 @@ config MCU_MPC8349EMITX
also register MCU GPIOs with the generic GPIO API, so you'll able
to use MCU pins as GPIOs.
-config XILINX_PCI
- bool "Xilinx PCI host bridge support"
- depends on PCI && XILINX_VIRTEX
-
endmenu
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 27a81c291be8..d349603fb889 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -55,8 +55,8 @@ config PPC_8xx
select SYS_SUPPORTS_HUGETLBFS
select PPC_HAVE_KUEP
select PPC_HAVE_KUAP
- select PPC_MM_SLICES if HUGETLB_PAGE
select HAVE_ARCH_VMAP_STACK
+ select HUGETLBFS
config 40x
bool "AMCC 40x"
@@ -377,7 +377,7 @@ config PPC_HAVE_KUEP
config PPC_KUEP
bool "Kernel Userspace Execution Prevention"
depends on PPC_HAVE_KUEP
- default y
+ default y if !PPC_BOOK3S_32
help
Enable support for Kernel Userspace Execution Prevention (KUEP)
@@ -389,7 +389,7 @@ config PPC_HAVE_KUAP
config PPC_KUAP
bool "Kernel Userspace Access Protection"
depends on PPC_HAVE_KUAP
- default y
+ default y if !PPC_BOOK3S_32
help
Enable support for Kernel Userspace Access Protection (KUAP)
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index ca9ffc1c8685..2124831cf57c 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -943,7 +943,7 @@ static int __init cell_iommu_fixed_mapping_init(void)
fbase = max(fbase, dbase + dsize);
}
- fbase = _ALIGN_UP(fbase, 1 << IO_SEGMENT_SHIFT);
+ fbase = ALIGN(fbase, 1 << IO_SEGMENT_SHIFT);
fsize = memblock_phys_mem_size();
if ((fbase + fsize) <= 0x800000000ul)
@@ -963,8 +963,8 @@ static int __init cell_iommu_fixed_mapping_init(void)
hend = hbase + htab_size_bytes;
/* The window must start and end on a segment boundary */
- if ((hbase != _ALIGN_UP(hbase, 1 << IO_SEGMENT_SHIFT)) ||
- (hend != _ALIGN_UP(hend, 1 << IO_SEGMENT_SHIFT))) {
+ if ((hbase != ALIGN(hbase, 1 << IO_SEGMENT_SHIFT)) ||
+ (hend != ALIGN(hend, 1 << IO_SEGMENT_SHIFT))) {
pr_debug("iommu: hash window not segment aligned\n");
return -1;
}
diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c
index 67e48b0a164e..a802ef957d63 100644
--- a/arch/powerpc/platforms/embedded6xx/wii.c
+++ b/arch/powerpc/platforms/embedded6xx/wii.c
@@ -172,19 +172,6 @@ static void wii_shutdown(void)
flipper_quiesce();
}
-define_machine(wii) {
- .name = "wii",
- .probe = wii_probe,
- .setup_arch = wii_setup_arch,
- .restart = wii_restart,
- .halt = wii_halt,
- .init_IRQ = wii_pic_probe,
- .get_irq = flipper_pic_get_irq,
- .calibrate_decr = generic_calibrate_decr,
- .progress = udbg_progress,
- .machine_shutdown = wii_shutdown,
-};
-
static const struct of_device_id wii_of_bus[] = {
{ .compatible = "nintendo,hollywood", },
{ },
@@ -200,3 +187,15 @@ static int __init wii_device_probe(void)
}
device_initcall(wii_device_probe);
+define_machine(wii) {
+ .name = "wii",
+ .probe = wii_probe,
+ .setup_arch = wii_setup_arch,
+ .restart = wii_restart,
+ .halt = wii_halt,
+ .init_IRQ = wii_pic_probe,
+ .get_irq = flipper_pic_get_irq,
+ .calibrate_decr = generic_calibrate_decr,
+ .progress = udbg_progress,
+ .machine_shutdown = wii_shutdown,
+};
diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c
index af309ee99114..9d4ecd292255 100644
--- a/arch/powerpc/platforms/powermac/bootx_init.c
+++ b/arch/powerpc/platforms/powermac/bootx_init.c
@@ -108,7 +108,7 @@ static void * __init bootx_early_getprop(unsigned long base,
#define dt_push_token(token, mem) \
do { \
- *(mem) = _ALIGN_UP(*(mem),4); \
+ *(mem) = ALIGN(*(mem),4); \
*((u32 *)*(mem)) = token; \
*(mem) += 4; \
} while(0)
@@ -150,7 +150,7 @@ static void __init bootx_dt_add_prop(char *name, void *data, int size,
/* push property content */
if (size && data) {
memcpy((void *)*mem_end, data, size);
- *mem_end = _ALIGN_UP(*mem_end + size, 4);
+ *mem_end = ALIGN(*mem_end + size, 4);
}
}
@@ -303,7 +303,7 @@ static void __init bootx_scan_dt_build_struct(unsigned long base,
*lp++ = *p;
}
*lp = 0;
- *mem_end = _ALIGN_UP((unsigned long)lp + 1, 4);
+ *mem_end = ALIGN((unsigned long)lp + 1, 4);
/* get and store all properties */
while (*ppp) {
@@ -356,11 +356,11 @@ static unsigned long __init bootx_flatten_dt(unsigned long start)
/* Start using memory after the big blob passed by BootX, get
* some space for the header
*/
- mem_start = mem_end = _ALIGN_UP(((unsigned long)bi) + start, 4);
+ mem_start = mem_end = ALIGN(((unsigned long)bi) + start, 4);
DBG("Boot params header at: %x\n", mem_start);
hdr = (struct boot_param_header *)mem_start;
mem_end += sizeof(struct boot_param_header);
- rsvmap = (u64 *)(_ALIGN_UP(mem_end, 8));
+ rsvmap = (u64 *)(ALIGN(mem_end, 8));
hdr->off_mem_rsvmap = ((unsigned long)rsvmap) - mem_start;
mem_end = ((unsigned long)rsvmap) + 8 * sizeof(u64);
@@ -386,7 +386,7 @@ static unsigned long __init bootx_flatten_dt(unsigned long start)
hdr->dt_strings_size = bootx_dt_strend - bootx_dt_strbase;
/* Build structure */
- mem_end = _ALIGN(mem_end, 16);
+ mem_end = ALIGN(mem_end, 16);
DBG("Building device tree structure at: %x\n", mem_end);
hdr->off_dt_struct = mem_end - mem_start;
bootx_scan_dt_build_struct(base, 4, &mem_end);
@@ -404,7 +404,7 @@ static unsigned long __init bootx_flatten_dt(unsigned long start)
* also bump mem_reserve_cnt to cause further reservations to
* fail since it's too late.
*/
- mem_end = _ALIGN(mem_end, PAGE_SIZE);
+ mem_end = ALIGN(mem_end, PAGE_SIZE);
DBG("End of boot params: %x\n", mem_end);
rsvmap[0] = mem_start;
rsvmap[1] = mem_end;
diff --git a/arch/powerpc/platforms/powermac/cache.S b/arch/powerpc/platforms/powermac/cache.S
index da69e0fcb4f1..ced225415486 100644
--- a/arch/powerpc/platforms/powermac/cache.S
+++ b/arch/powerpc/platforms/powermac/cache.S
@@ -184,6 +184,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
mtlr r10
blr
+_ASM_NOKPROBE_SYMBOL(flush_disable_75x)
/* This code is for 745x processors */
flush_disable_745x:
@@ -351,4 +352,5 @@ END_FTR_SECTION_IFSET(CPU_FTR_L3CR)
mtmsr r11 /* restore DR and EE */
isync
blr
+_ASM_NOKPROBE_SYMBOL(flush_disable_745x)
#endif /* CONFIG_PPC_BOOK3S_32 */
diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c
index dc7a5bae8f1c..853ccc4480e2 100644
--- a/arch/powerpc/platforms/powermac/nvram.c
+++ b/arch/powerpc/platforms/powermac/nvram.c
@@ -55,7 +55,7 @@ struct chrp_header {
u8 cksum;
u16 len;
char name[12];
- u8 data[0];
+ u8 data[];
};
struct core99_header {
diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S
index bd6085b470b7..f9a680fdd9c4 100644
--- a/arch/powerpc/platforms/powermac/sleep.S
+++ b/arch/powerpc/platforms/powermac/sleep.S
@@ -244,7 +244,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
mtmsr r2
isync
b 1b
-
+_ASM_NOKPROBE_SYMBOL(low_cpu_die)
/*
* Here is the resume code.
*/
@@ -282,6 +282,7 @@ _GLOBAL(core99_wake_up)
lwz r1,0(r3)
/* Pass thru to older resume code ... */
+_ASM_NOKPROBE_SYMBOL(core99_wake_up)
/*
* Here is the resume code for older machines.
* r1 has the physical address of SL_PC(sp).
@@ -429,6 +430,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
lwz r0,4(r1)
mtlr r0
blr
+_ASM_NOKPROBE_SYMBOL(grackle_wake_up)
turn_on_mmu:
mflr r4
@@ -438,6 +440,7 @@ turn_on_mmu:
sync
isync
rfi
+_ASM_NOKPROBE_SYMBOL(turn_on_mmu)
#endif /* defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ) */
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index be2ab5b11e57..9969c07035b6 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -49,6 +49,7 @@
#include <asm/keylargo.h>
#include <asm/pmac_low_i2c.h>
#include <asm/pmac_pfunc.h>
+#include <asm/inst.h>
#include "pmac.h"
@@ -813,7 +814,7 @@ static int smp_core99_kick_cpu(int nr)
* b __secondary_start_pmac_0 + nr*8
*/
target = (unsigned long) __secondary_start_pmac_0 + nr * 8;
- patch_branch(vector, target, BRANCH_SET_LINK);
+ patch_branch((struct ppc_inst *)vector, target, BRANCH_SET_LINK);
/* Put some life in our friend */
pmac_call_feature(PMAC_FTR_RESET_CPU, NULL, nr, 0);
@@ -826,7 +827,7 @@ static int smp_core99_kick_cpu(int nr)
mdelay(1);
/* Restore our exception vector */
- patch_instruction(vector, save_vector);
+ patch_instruction((struct ppc_inst *)vector, ppc_inst(save_vector));
local_irq_restore(flags);
if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu done", 0x347);
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index c0f8120045c3..fe3f0fb5aeca 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -17,7 +17,7 @@ obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o
obj-$(CONFIG_OPAL_PRD) += opal-prd.o
obj-$(CONFIG_PERF_EVENTS) += opal-imc.o
obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o
-obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o
+obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o vas-fault.o vas-api.o
obj-$(CONFIG_OCXL_BASE) += ocxl.o
obj-$(CONFIG_SCOM_DEBUGFS) += opal-xscom.o
obj-$(CONFIG_PPC_SECURE_BOOT) += opal-secvar.o
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 78599bca66c2..2dd467383a88 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -1270,7 +1270,7 @@ static int pnv_parse_cpuidle_dt(void)
/* Read residencies */
if (of_property_read_u32_array(np, "ibm,cpu-idle-state-residency-ns",
temp_u32, nr_idle_states)) {
- pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n");
+ pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns in DT\n");
rc = -EINVAL;
goto out;
}
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index b95b9e3c4c98..abeaa533b976 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -15,6 +15,7 @@
#include <asm/debugfs.h>
#include <asm/powernv.h>
+#include <asm/ppc-pci.h>
#include <asm/opal.h>
#include "pci.h"
@@ -425,9 +426,10 @@ static void pnv_comp_attach_table_group(struct npu_comp *npucomp,
++npucomp->pe_num;
}
-struct iommu_table_group *pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe)
+static struct iommu_table_group *
+ pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe)
{
- struct iommu_table_group *table_group;
+ struct iommu_table_group *compound_group;
struct npu_comp *npucomp;
struct pci_dev *gpdev = NULL;
struct pci_controller *hose;
@@ -446,39 +448,52 @@ struct iommu_table_group *pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe)
hose = pci_bus_to_host(npdev->bus);
if (hose->npu) {
- table_group = &hose->npu->npucomp.table_group;
-
- if (!table_group->group) {
- table_group->ops = &pnv_npu_peers_ops;
- iommu_register_group(table_group,
- hose->global_number,
- pe->pe_number);
- }
+ /* P9 case: compound group is per-NPU (all gpus, all links) */
+ npucomp = &hose->npu->npucomp;
} else {
- /* Create a group for 1 GPU and attached NPUs for POWER8 */
- pe->npucomp = kzalloc(sizeof(*pe->npucomp), GFP_KERNEL);
- table_group = &pe->npucomp->table_group;
- table_group->ops = &pnv_npu_peers_ops;
- iommu_register_group(table_group, hose->global_number,
- pe->pe_number);
+ /* P8 case: Compound group is per-GPU (1 gpu, 2 links) */
+ npucomp = pe->npucomp = kzalloc(sizeof(*npucomp), GFP_KERNEL);
}
- /* Steal capabilities from a GPU PE */
- table_group->max_dynamic_windows_supported =
- pe->table_group.max_dynamic_windows_supported;
- table_group->tce32_start = pe->table_group.tce32_start;
- table_group->tce32_size = pe->table_group.tce32_size;
- table_group->max_levels = pe->table_group.max_levels;
- if (!table_group->pgsizes)
- table_group->pgsizes = pe->table_group.pgsizes;
+ compound_group = &npucomp->table_group;
+ if (!compound_group->group) {
+ compound_group->ops = &pnv_npu_peers_ops;
+ iommu_register_group(compound_group, hose->global_number,
+ pe->pe_number);
- npucomp = container_of(table_group, struct npu_comp, table_group);
+ /* Steal capabilities from a GPU PE */
+ compound_group->max_dynamic_windows_supported =
+ pe->table_group.max_dynamic_windows_supported;
+ compound_group->tce32_start = pe->table_group.tce32_start;
+ compound_group->tce32_size = pe->table_group.tce32_size;
+ compound_group->max_levels = pe->table_group.max_levels;
+ if (!compound_group->pgsizes)
+ compound_group->pgsizes = pe->table_group.pgsizes;
+ }
+
+ /*
+ * The gpu would have been added to the iommu group that's created
+ * for the PE. Pull it out now.
+ */
+ iommu_del_device(&gpdev->dev);
+
+ /*
+ * I'm not sure this is strictly required, but it's probably a good idea
+ * since the table_group for the PE is going to be attached to the
+ * compound table group. If we leave the PE's iommu group active then
+ * we might have the same table_group being modifiable via two sepeate
+ * iommu groups.
+ */
+ iommu_group_put(pe->table_group.group);
+
+ /* now put the GPU into the compound group */
pnv_comp_attach_table_group(npucomp, pe);
+ iommu_add_device(compound_group, &gpdev->dev);
- return table_group;
+ return compound_group;
}
-struct iommu_table_group *pnv_npu_compound_attach(struct pnv_ioda_pe *pe)
+static struct iommu_table_group *pnv_npu_compound_attach(struct pnv_ioda_pe *pe)
{
struct iommu_table_group *table_group;
struct npu_comp *npucomp;
@@ -521,6 +536,54 @@ struct iommu_table_group *pnv_npu_compound_attach(struct pnv_ioda_pe *pe)
return table_group;
}
+
+void pnv_pci_npu_setup_iommu_groups(void)
+{
+ struct pci_controller *hose;
+ struct pnv_phb *phb;
+ struct pnv_ioda_pe *pe;
+
+ /*
+ * For non-nvlink devices the IOMMU group is registered when the PE is
+ * configured and devices are added to the group when the per-device
+ * DMA setup is run. That's done in hose->ops.dma_dev_setup() which is
+ * only initialise for "normal" IODA PHBs.
+ *
+ * For NVLink devices we need to ensure the NVLinks and the GPU end up
+ * in the same IOMMU group, so that's handled here.
+ */
+ list_for_each_entry(hose, &hose_list, list_node) {
+ phb = hose->private_data;
+
+ if (phb->type == PNV_PHB_IODA2)
+ list_for_each_entry(pe, &phb->ioda.pe_list, list)
+ pnv_try_setup_npu_table_group(pe);
+ }
+
+ /*
+ * Now we have all PHBs discovered, time to add NPU devices to
+ * the corresponding IOMMU groups.
+ */
+ list_for_each_entry(hose, &hose_list, list_node) {
+ unsigned long pgsizes;
+
+ phb = hose->private_data;
+
+ if (phb->type != PNV_PHB_NPU_NVLINK)
+ continue;
+
+ pgsizes = pnv_ioda_parse_tce_sizes(phb);
+ list_for_each_entry(pe, &phb->ioda.pe_list, list) {
+ /*
+ * IODA2 bridges get this set up from
+ * pci_controller_ops::setup_bridge but NPU bridges
+ * do not have this hook defined so we do it here.
+ */
+ pe->table_group.pgsizes = pgsizes;
+ pnv_npu_compound_attach(pe);
+ }
+ }
+}
#endif /* CONFIG_IOMMU_API */
int pnv_npu2_init(struct pci_controller *hose)
diff --git a/arch/powerpc/platforms/powernv/opal-fadump.c b/arch/powerpc/platforms/powernv/opal-fadump.c
index d361d37d975f..9a360ced663b 100644
--- a/arch/powerpc/platforms/powernv/opal-fadump.c
+++ b/arch/powerpc/platforms/powernv/opal-fadump.c
@@ -671,7 +671,7 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node)
* Firmware supports 32-bit field for size. Align it to PAGE_SIZE
* and request firmware to copy multiple kernel boot memory regions.
*/
- fadump_conf->max_copy_size = _ALIGN_DOWN(U32_MAX, PAGE_SIZE);
+ fadump_conf->max_copy_size = ALIGN_DOWN(U32_MAX, PAGE_SIZE);
/*
* Check if dump has been initiated on last reboot.
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 2b3dfd0b6cdd..d95954ad4c0a 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -811,6 +811,10 @@ static int opal_add_one_export(struct kobject *parent, const char *export_name,
goto out;
attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+ if (!attr) {
+ rc = -ENOMEM;
+ goto out;
+ }
name = kstrdup(export_name, GFP_KERNEL);
if (!name) {
rc = -ENOMEM;
diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
index 5dc6847d5f4c..f923359d8afc 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
@@ -17,6 +17,34 @@
#include <asm/tce.h>
#include "pci.h"
+unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb)
+{
+ struct pci_controller *hose = phb->hose;
+ struct device_node *dn = hose->dn;
+ unsigned long mask = 0;
+ int i, rc, count;
+ u32 val;
+
+ count = of_property_count_u32_elems(dn, "ibm,supported-tce-sizes");
+ if (count <= 0) {
+ mask = SZ_4K | SZ_64K;
+ /* Add 16M for POWER8 by default */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
+ !cpu_has_feature(CPU_FTR_ARCH_300))
+ mask |= SZ_16M | SZ_256M;
+ return mask;
+ }
+
+ for (i = 0; i < count; i++) {
+ rc = of_property_read_u32_index(dn, "ibm,supported-tce-sizes",
+ i, &val);
+ if (rc == 0)
+ mask |= 1ULL << val;
+ }
+
+ return mask;
+}
+
void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
void *tce_mem, u64 tce_size,
u64 dma_offset, unsigned int page_shift)
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 57d3a6af1d52..73a63efcf855 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -51,6 +51,7 @@ static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU_NVLINK",
"NPU_OCAPI" };
static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable);
+static void pnv_pci_configure_bus(struct pci_bus *bus);
void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
const char *fmt, ...)
@@ -264,8 +265,8 @@ static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev,
if (!r->parent || !pnv_pci_is_m64(phb, r))
continue;
- start = _ALIGN_DOWN(r->start - base, sgsz);
- end = _ALIGN_UP(r->end - base, sgsz);
+ start = ALIGN_DOWN(r->start - base, sgsz);
+ end = ALIGN(r->end - base, sgsz);
for (segno = start / sgsz; segno < end / sgsz; segno++) {
if (pe_bitmap)
set_bit(segno, pe_bitmap);
@@ -361,7 +362,7 @@ static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all)
return NULL;
/* Allocate bitmap */
- size = _ALIGN_UP(phb->ioda.total_pe_num / 8, sizeof(unsigned long));
+ size = ALIGN(phb->ioda.total_pe_num / 8, sizeof(unsigned long));
pe_alloc = kzalloc(size, GFP_KERNEL);
if (!pe_alloc) {
pr_warn("%s: Out of memory !\n",
@@ -660,6 +661,16 @@ static int pnv_ioda_get_pe_state(struct pnv_phb *phb, int pe_no)
return state;
}
+struct pnv_ioda_pe *pnv_pci_bdfn_to_pe(struct pnv_phb *phb, u16 bdfn)
+{
+ int pe_number = phb->ioda.pe_rmap[bdfn];
+
+ if (pe_number == IODA_INVALID_PE)
+ return NULL;
+
+ return &phb->ioda.pe_array[pe_number];
+}
+
struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev)
{
struct pci_controller *hose = pci_bus_to_host(dev->bus);
@@ -1110,34 +1121,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
return pe;
}
-static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
-{
- struct pci_dev *dev;
-
- list_for_each_entry(dev, &bus->devices, bus_list) {
- struct pci_dn *pdn = pci_get_pdn(dev);
-
- if (pdn == NULL) {
- pr_warn("%s: No device node associated with device !\n",
- pci_name(dev));
- continue;
- }
-
- /*
- * In partial hotplug case, the PCI device might be still
- * associated with the PE and needn't attach it to the PE
- * again.
- */
- if (pdn->pe_number != IODA_INVALID_PE)
- continue;
-
- pe->device_count++;
- pdn->pe_number = pe->pe_number;
- if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
- pnv_ioda_setup_same_PE(dev->subordinate, pe);
- }
-}
-
/*
* There're 2 types of PCI bus sensitive PEs: One that is compromised of
* single PCI bus. Another one that contains the primary PCI bus and its
@@ -1156,15 +1139,13 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
* We should reuse it instead of allocating a new one.
*/
pe_num = phb->ioda.pe_rmap[bus->number << 8];
- if (pe_num != IODA_INVALID_PE) {
+ if (WARN_ON(pe_num != IODA_INVALID_PE)) {
pe = &phb->ioda.pe_array[pe_num];
- pnv_ioda_setup_same_PE(bus, pe);
return NULL;
}
/* PE number for root bus should have been reserved */
- if (pci_is_root_bus(bus) &&
- phb->ioda.root_pe_idx != IODA_INVALID_PE)
+ if (pci_is_root_bus(bus))
pe = &phb->ioda.pe_array[phb->ioda.root_pe_idx];
/* Check if PE is determined by M64 */
@@ -1202,9 +1183,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
return NULL;
}
- /* Associate it with all child devices */
- pnv_ioda_setup_same_PE(bus, pe);
-
/* Put PE to the list */
list_add_tail(&pe->list, &phb->ioda.pe_list);
@@ -1288,7 +1266,7 @@ static void pnv_ioda_setup_npu_PEs(struct pci_bus *bus)
pnv_ioda_setup_npu_PE(pdev);
}
-static void pnv_pci_ioda_setup_PEs(void)
+static void pnv_pci_ioda_setup_nvlink(void)
{
struct pci_controller *hose;
struct pnv_phb *phb;
@@ -1312,6 +1290,11 @@ static void pnv_pci_ioda_setup_PEs(void)
list_for_each_entry(pe, &phb->ioda.pe_list, list)
pnv_npu2_map_lpar(pe, MSR_DR | MSR_PR | MSR_HV);
}
+
+#ifdef CONFIG_IOMMU_API
+ /* setup iommu groups so we can do nvlink pass-thru */
+ pnv_pci_npu_setup_iommu_groups();
+#endif
}
#ifdef CONFIG_PCI_IOV
@@ -1550,11 +1533,6 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev)
static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
struct pnv_ioda_pe *pe);
-#ifdef CONFIG_IOMMU_API
-static void pnv_ioda_setup_bus_iommu_group(struct pnv_ioda_pe *pe,
- struct iommu_table_group *table_group, struct pci_bus *bus);
-
-#endif
static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
{
struct pci_bus *bus;
@@ -1619,11 +1597,6 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
}
pnv_pci_ioda2_setup_dma_pe(phb, pe);
-#ifdef CONFIG_IOMMU_API
- iommu_register_group(&pe->table_group,
- pe->phb->hose->global_number, pe->pe_number);
- pnv_ioda_setup_bus_iommu_group(pe, &pe->table_group, NULL);
-#endif
}
}
@@ -1767,24 +1740,39 @@ static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev)
struct pci_dn *pdn = pci_get_pdn(pdev);
struct pnv_ioda_pe *pe;
- /*
- * The function can be called while the PE#
- * hasn't been assigned. Do nothing for the
- * case.
- */
- if (!pdn || pdn->pe_number == IODA_INVALID_PE)
- return;
+ /* Check if the BDFN for this device is associated with a PE yet */
+ pe = pnv_pci_bdfn_to_pe(phb, pdev->devfn | (pdev->bus->number << 8));
+ if (!pe) {
+ /* VF PEs should be pre-configured in pnv_pci_sriov_enable() */
+ if (WARN_ON(pdev->is_virtfn))
+ return;
+
+ pnv_pci_configure_bus(pdev->bus);
+ pe = pnv_pci_bdfn_to_pe(phb, pdev->devfn | (pdev->bus->number << 8));
+ pci_info(pdev, "Configured PE#%x\n", pe ? pe->pe_number : 0xfffff);
+
+
+ /*
+ * If we can't setup the IODA PE something has gone horribly
+ * wrong and we can't enable DMA for the device.
+ */
+ if (WARN_ON(!pe))
+ return;
+ } else {
+ pci_info(pdev, "Added to existing PE#%x\n", pe->pe_number);
+ }
+
+ if (pdn)
+ pdn->pe_number = pe->pe_number;
+ pe->device_count++;
- pe = &phb->ioda.pe_array[pdn->pe_number];
WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
pdev->dev.archdata.dma_offset = pe->tce_bypass_base;
set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]);
- /*
- * Note: iommu_add_device() will fail here as
- * for physical PE: the device is already added by now;
- * for virtual PE: sysfs entries are not ready yet and
- * tce_iommu_bus_notifier will add the device to a group later.
- */
+
+ /* PEs with a DMA weight of zero won't have a group */
+ if (pe->table_group.group)
+ iommu_add_device(&pe->table_group, &pdev->dev);
}
/*
@@ -2297,9 +2285,6 @@ found:
pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift;
iommu_init_table(tbl, phb->hose->node, 0, 0);
- if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
- pnv_ioda_setup_bus_dma(pe, pe->pbus);
-
return;
fail:
/* XXX Failure: Try to fallback to 64-bit only ? */
@@ -2537,7 +2522,7 @@ unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
direct_table_size = 1UL << table_shift;
for ( ; levels; --levels) {
- bytes += _ALIGN_UP(tce_table_size, direct_table_size);
+ bytes += ALIGN(tce_table_size, direct_table_size);
tce_table_size /= direct_table_size;
tce_table_size <<= 3;
@@ -2596,137 +2581,8 @@ static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
.take_ownership = pnv_ioda2_take_ownership,
.release_ownership = pnv_ioda2_release_ownership,
};
-
-static void pnv_ioda_setup_bus_iommu_group_add_devices(struct pnv_ioda_pe *pe,
- struct iommu_table_group *table_group,
- struct pci_bus *bus)
-{
- struct pci_dev *dev;
-
- list_for_each_entry(dev, &bus->devices, bus_list) {
- iommu_add_device(table_group, &dev->dev);
-
- if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
- pnv_ioda_setup_bus_iommu_group_add_devices(pe,
- table_group, dev->subordinate);
- }
-}
-
-static void pnv_ioda_setup_bus_iommu_group(struct pnv_ioda_pe *pe,
- struct iommu_table_group *table_group, struct pci_bus *bus)
-{
-
- if (pe->flags & PNV_IODA_PE_DEV)
- iommu_add_device(table_group, &pe->pdev->dev);
-
- if ((pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) || bus)
- pnv_ioda_setup_bus_iommu_group_add_devices(pe, table_group,
- bus);
-}
-
-static unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb);
-
-static void pnv_pci_ioda_setup_iommu_api(void)
-{
- struct pci_controller *hose;
- struct pnv_phb *phb;
- struct pnv_ioda_pe *pe;
-
- /*
- * There are 4 types of PEs:
- * - PNV_IODA_PE_BUS: a downstream port with an adapter,
- * created from pnv_pci_setup_bridge();
- * - PNV_IODA_PE_BUS_ALL: a PCI-PCIX bridge with devices behind it,
- * created from pnv_pci_setup_bridge();
- * - PNV_IODA_PE_VF: a SRIOV virtual function,
- * created from pnv_pcibios_sriov_enable();
- * - PNV_IODA_PE_DEV: an NPU or OCAPI device,
- * created from pnv_pci_ioda_fixup().
- *
- * Normally a PE is represented by an IOMMU group, however for
- * devices with side channels the groups need to be more strict.
- */
- list_for_each_entry(hose, &hose_list, list_node) {
- phb = hose->private_data;
-
- if (phb->type == PNV_PHB_NPU_NVLINK ||
- phb->type == PNV_PHB_NPU_OCAPI)
- continue;
-
- list_for_each_entry(pe, &phb->ioda.pe_list, list) {
- struct iommu_table_group *table_group;
-
- table_group = pnv_try_setup_npu_table_group(pe);
- if (!table_group) {
- if (!pnv_pci_ioda_pe_dma_weight(pe))
- continue;
-
- table_group = &pe->table_group;
- iommu_register_group(&pe->table_group,
- pe->phb->hose->global_number,
- pe->pe_number);
- }
- pnv_ioda_setup_bus_iommu_group(pe, table_group,
- pe->pbus);
- }
- }
-
- /*
- * Now we have all PHBs discovered, time to add NPU devices to
- * the corresponding IOMMU groups.
- */
- list_for_each_entry(hose, &hose_list, list_node) {
- unsigned long pgsizes;
-
- phb = hose->private_data;
-
- if (phb->type != PNV_PHB_NPU_NVLINK)
- continue;
-
- pgsizes = pnv_ioda_parse_tce_sizes(phb);
- list_for_each_entry(pe, &phb->ioda.pe_list, list) {
- /*
- * IODA2 bridges get this set up from
- * pci_controller_ops::setup_bridge but NPU bridges
- * do not have this hook defined so we do it here.
- */
- pe->table_group.pgsizes = pgsizes;
- pnv_npu_compound_attach(pe);
- }
- }
-}
-#else /* !CONFIG_IOMMU_API */
-static void pnv_pci_ioda_setup_iommu_api(void) { };
#endif
-static unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb)
-{
- struct pci_controller *hose = phb->hose;
- struct device_node *dn = hose->dn;
- unsigned long mask = 0;
- int i, rc, count;
- u32 val;
-
- count = of_property_count_u32_elems(dn, "ibm,supported-tce-sizes");
- if (count <= 0) {
- mask = SZ_4K | SZ_64K;
- /* Add 16M for POWER8 by default */
- if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
- !cpu_has_feature(CPU_FTR_ARCH_300))
- mask |= SZ_16M | SZ_256M;
- return mask;
- }
-
- for (i = 0; i < count; i++) {
- rc = of_property_read_u32_index(dn, "ibm,supported-tce-sizes",
- i, &val);
- if (rc == 0)
- mask |= 1ULL << val;
- }
-
- return mask;
-}
-
static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
struct pnv_ioda_pe *pe)
{
@@ -2749,16 +2605,16 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
IOMMU_TABLE_GROUP_MAX_TABLES;
pe->table_group.max_levels = POWERNV_IOMMU_MAX_LEVELS;
pe->table_group.pgsizes = pnv_ioda_parse_tce_sizes(phb);
-#ifdef CONFIG_IOMMU_API
- pe->table_group.ops = &pnv_pci_ioda2_ops;
-#endif
rc = pnv_pci_ioda2_setup_default_config(pe);
if (rc)
return;
- if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
- pnv_ioda_setup_bus_dma(pe, pe->pbus);
+#ifdef CONFIG_IOMMU_API
+ pe->table_group.ops = &pnv_pci_ioda2_ops;
+ iommu_register_group(&pe->table_group, phb->hose->global_number,
+ pe->pe_number);
+#endif
}
int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq)
@@ -3220,8 +3076,7 @@ static void pnv_pci_enable_bridges(void)
static void pnv_pci_ioda_fixup(void)
{
- pnv_pci_ioda_setup_PEs();
- pnv_pci_ioda_setup_iommu_api();
+ pnv_pci_ioda_setup_nvlink();
pnv_pci_ioda_create_dbgfs();
pnv_pci_enable_bridges();
@@ -3333,28 +3188,18 @@ static void pnv_pci_fixup_bridge_resources(struct pci_bus *bus,
}
}
-static void pnv_pci_setup_bridge(struct pci_bus *bus, unsigned long type)
+static void pnv_pci_configure_bus(struct pci_bus *bus)
{
struct pci_controller *hose = pci_bus_to_host(bus);
struct pnv_phb *phb = hose->private_data;
struct pci_dev *bridge = bus->self;
struct pnv_ioda_pe *pe;
- bool all = (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE);
-
- /* Extend bridge's windows if necessary */
- pnv_pci_fixup_bridge_resources(bus, type);
+ bool all = (bridge && pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE);
- /* The PE for root bus should be realized before any one else */
- if (!phb->ioda.root_pe_populated) {
- pe = pnv_ioda_setup_bus_PE(phb->hose->bus, false);
- if (pe) {
- phb->ioda.root_pe_idx = pe->pe_number;
- phb->ioda.root_pe_populated = true;
- }
- }
+ dev_info(&bus->dev, "Configuring PE for bus\n");
/* Don't assign PE to PCI bus, which doesn't have subordinate devices */
- if (list_empty(&bus->devices))
+ if (WARN_ON(list_empty(&bus->devices)))
return;
/* Reserve PEs according to used M64 resources */
@@ -3599,6 +3444,8 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe)
struct pnv_phb *phb = pe->phb;
struct pnv_ioda_pe *slave, *tmp;
+ pe_info(pe, "Releasing PE\n");
+
mutex_lock(&phb->ioda.pe_list_mutex);
list_del(&pe->list);
mutex_unlock(&phb->ioda.pe_list_mutex);
@@ -3633,11 +3480,10 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe)
* that it can be populated again in PCI hot add path. The PE
* shouldn't be destroyed as it's the global reserved resource.
*/
- if (phb->ioda.root_pe_populated &&
- phb->ioda.root_pe_idx == pe->pe_number)
- phb->ioda.root_pe_populated = false;
- else
- pnv_ioda_free_pe(pe);
+ if (phb->ioda.root_pe_idx == pe->pe_number)
+ return;
+
+ pnv_ioda_free_pe(pe);
}
static void pnv_pci_release_device(struct pci_dev *pdev)
@@ -3715,7 +3561,7 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
.enable_device_hook = pnv_pci_enable_device_hook,
.release_device = pnv_pci_release_device,
.window_alignment = pnv_pci_window_alignment,
- .setup_bridge = pnv_pci_setup_bridge,
+ .setup_bridge = pnv_pci_fixup_bridge_resources,
.reset_secondary_bus = pnv_pci_reset_secondary_bus,
.shutdown = pnv_pci_ioda_shutdown,
};
@@ -3745,6 +3591,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
struct pnv_phb *phb;
unsigned long size, m64map_off, m32map_off, pemap_off;
unsigned long iomap_off = 0, dma32map_off = 0;
+ struct pnv_ioda_pe *root_pe;
struct resource r;
const __be64 *prop64;
const __be32 *prop32;
@@ -3863,7 +3710,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
PNV_IODA1_DMA32_SEGSIZE;
/* Allocate aux data & arrays. We don't have IO ports on PHB3 */
- size = _ALIGN_UP(max_t(unsigned, phb->ioda.total_pe_num, 8) / 8,
+ size = ALIGN(max_t(unsigned, phb->ioda.total_pe_num, 8) / 8,
sizeof(unsigned long));
m64map_off = size;
size += phb->ioda.total_pe_num * sizeof(phb->ioda.m64_segmap[0]);
@@ -3912,7 +3759,9 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
phb->ioda.root_pe_idx = phb->ioda.reserved_pe_idx - 1;
pnv_ioda_reserve_pe(phb, phb->ioda.root_pe_idx);
} else {
- phb->ioda.root_pe_idx = IODA_INVALID_PE;
+ /* otherwise just allocate one */
+ root_pe = pnv_ioda_alloc_pe(phb);
+ phb->ioda.root_pe_idx = root_pe->pe_number;
}
INIT_LIST_HEAD(&phb->ioda.pe_list);
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 5bf818246339..091fe1cf386b 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -955,28 +955,8 @@ static int pnv_tce_iommu_bus_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
struct device *dev = data;
- struct pci_dev *pdev;
- struct pci_dn *pdn;
- struct pnv_ioda_pe *pe;
- struct pci_controller *hose;
- struct pnv_phb *phb;
switch (action) {
- case BUS_NOTIFY_ADD_DEVICE:
- pdev = to_pci_dev(dev);
- pdn = pci_get_pdn(pdev);
- hose = pci_bus_to_host(pdev->bus);
- phb = hose->private_data;
-
- WARN_ON_ONCE(!phb);
- if (!pdn || pdn->pe_number == IODA_INVALID_PE || !phb)
- return 0;
-
- pe = &phb->ioda.pe_array[pdn->pe_number];
- if (!pe->table_group.group)
- return 0;
- iommu_add_device(&pe->table_group, dev);
- return 0;
case BUS_NOTIFY_DEL_DEVICE:
iommu_del_device(dev);
return 0;
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index d3bbdeab3a32..51c254f2f3cb 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -33,6 +33,24 @@ enum pnv_phb_model {
#define PNV_IODA_PE_SLAVE (1 << 4) /* Slave PE in compound case */
#define PNV_IODA_PE_VF (1 << 5) /* PE for one VF */
+/*
+ * A brief note on PNV_IODA_PE_BUS_ALL
+ *
+ * This is needed because of the behaviour of PCIe-to-PCI bridges. The PHB uses
+ * the Requester ID field of the PCIe request header to determine the device
+ * (and PE) that initiated a DMA. In legacy PCI individual memory read/write
+ * requests aren't tagged with the RID. To work around this the PCIe-to-PCI
+ * bridge will use (secondary_bus_no << 8) | 0x00 as the RID on the PCIe side.
+ *
+ * PCIe-to-X bridges have a similar issue even though PCI-X requests also have
+ * a RID in the transaction header. The PCIe-to-X bridge is permitted to "take
+ * ownership" of a transaction by a PCI-X device when forwarding it to the PCIe
+ * side of the bridge.
+ *
+ * To work around these problems we use the BUS_ALL flag since every subordinate
+ * bus of the bridge should go into the same PE.
+ */
+
/* Indicates operations are frozen for a PE: MMIO in PESTA & DMA in PESTB. */
#define PNV_IODA_STOPPED_STATE 0x8000000000000000
@@ -118,7 +136,6 @@ struct pnv_phb {
unsigned int total_pe_num;
unsigned int reserved_pe_idx;
unsigned int root_pe_idx;
- bool root_pe_populated;
/* 32-bit MMIO window */
unsigned int m32_size;
@@ -190,6 +207,7 @@ extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option);
extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type);
extern void pnv_teardown_msi_irqs(struct pci_dev *pdev);
+extern struct pnv_ioda_pe *pnv_pci_bdfn_to_pe(struct pnv_phb *phb, u16 bdfn);
extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev);
extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq);
extern unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
@@ -209,11 +227,7 @@ extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
/* Nvlink functions */
extern void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass);
extern void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm);
-extern struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe);
-extern struct iommu_table_group *pnv_try_setup_npu_table_group(
- struct pnv_ioda_pe *pe);
-extern struct iommu_table_group *pnv_npu_compound_attach(
- struct pnv_ioda_pe *pe);
+extern void pnv_pci_npu_setup_iommu_groups(void);
/* pci-ioda-tce.c */
#define POWERNV_IOMMU_DEFAULT_LEVELS 2
@@ -244,4 +258,6 @@ extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
void *tce_mem, u64 tce_size,
u64 dma_offset, unsigned int page_shift);
+extern unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb);
+
#endif /* __POWERNV_PCI_H */
diff --git a/arch/powerpc/platforms/powernv/vas-api.c b/arch/powerpc/platforms/powernv/vas-api.c
new file mode 100644
index 000000000000..98ed5d8c5441
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas-api.c
@@ -0,0 +1,278 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * VAS user space API for its accelerators (Only NX-GZIP is supported now)
+ * Copyright (C) 2019 Haren Myneni, IBM Corp
+ */
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/cdev.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <asm/vas.h>
+#include <uapi/asm/vas-api.h>
+#include "vas.h"
+
+/*
+ * The driver creates the device node that can be used as follows:
+ * For NX-GZIP
+ *
+ * fd = open("/dev/crypto/nx-gzip", O_RDWR);
+ * rc = ioctl(fd, VAS_TX_WIN_OPEN, &attr);
+ * paste_addr = mmap(NULL, PAGE_SIZE, prot, MAP_SHARED, fd, 0ULL).
+ * vas_copy(&crb, 0, 1);
+ * vas_paste(paste_addr, 0, 1);
+ * close(fd) or exit process to close window.
+ *
+ * where "vas_copy" and "vas_paste" are defined in copy-paste.h.
+ * copy/paste returns to the user space directly. So refer NX hardware
+ * documententation for exact copy/paste usage and completion / error
+ * conditions.
+ */
+
+/*
+ * Wrapper object for the nx-gzip device - there is just one instance of
+ * this node for the whole system.
+ */
+static struct coproc_dev {
+ struct cdev cdev;
+ struct device *device;
+ char *name;
+ dev_t devt;
+ struct class *class;
+ enum vas_cop_type cop_type;
+} coproc_device;
+
+struct coproc_instance {
+ struct coproc_dev *coproc;
+ struct vas_window *txwin;
+};
+
+static char *coproc_devnode(struct device *dev, umode_t *mode)
+{
+ return kasprintf(GFP_KERNEL, "crypto/%s", dev_name(dev));
+}
+
+static int coproc_open(struct inode *inode, struct file *fp)
+{
+ struct coproc_instance *cp_inst;
+
+ cp_inst = kzalloc(sizeof(*cp_inst), GFP_KERNEL);
+ if (!cp_inst)
+ return -ENOMEM;
+
+ cp_inst->coproc = container_of(inode->i_cdev, struct coproc_dev,
+ cdev);
+ fp->private_data = cp_inst;
+
+ return 0;
+}
+
+static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg)
+{
+ void __user *uptr = (void __user *)arg;
+ struct vas_tx_win_attr txattr = {};
+ struct vas_tx_win_open_attr uattr;
+ struct coproc_instance *cp_inst;
+ struct vas_window *txwin;
+ int rc, vasid;
+
+ cp_inst = fp->private_data;
+
+ /*
+ * One window for file descriptor
+ */
+ if (cp_inst->txwin)
+ return -EEXIST;
+
+ rc = copy_from_user(&uattr, uptr, sizeof(uattr));
+ if (rc) {
+ pr_err("%s(): copy_from_user() returns %d\n", __func__, rc);
+ return -EFAULT;
+ }
+
+ if (uattr.version != 1) {
+ pr_err("Invalid version\n");
+ return -EINVAL;
+ }
+
+ vasid = uattr.vas_id;
+
+ vas_init_tx_win_attr(&txattr, cp_inst->coproc->cop_type);
+
+ txattr.lpid = mfspr(SPRN_LPID);
+ txattr.pidr = mfspr(SPRN_PID);
+ txattr.user_win = true;
+ txattr.rsvd_txbuf_count = false;
+ txattr.pswid = false;
+
+ pr_devel("Pid %d: Opening txwin, PIDR %ld\n", txattr.pidr,
+ mfspr(SPRN_PID));
+
+ txwin = vas_tx_win_open(vasid, cp_inst->coproc->cop_type, &txattr);
+ if (IS_ERR(txwin)) {
+ pr_err("%s() vas_tx_win_open() failed, %ld\n", __func__,
+ PTR_ERR(txwin));
+ return PTR_ERR(txwin);
+ }
+
+ cp_inst->txwin = txwin;
+
+ return 0;
+}
+
+static int coproc_release(struct inode *inode, struct file *fp)
+{
+ struct coproc_instance *cp_inst = fp->private_data;
+
+ if (cp_inst->txwin) {
+ vas_win_close(cp_inst->txwin);
+ cp_inst->txwin = NULL;
+ }
+
+ kfree(cp_inst);
+ fp->private_data = NULL;
+
+ /*
+ * We don't know here if user has other receive windows
+ * open, so we can't really call clear_thread_tidr().
+ * So, once the process calls set_thread_tidr(), the
+ * TIDR value sticks around until process exits, resulting
+ * in an extra copy in restore_sprs().
+ */
+
+ return 0;
+}
+
+static int coproc_mmap(struct file *fp, struct vm_area_struct *vma)
+{
+ struct coproc_instance *cp_inst = fp->private_data;
+ struct vas_window *txwin;
+ unsigned long pfn;
+ u64 paste_addr;
+ pgprot_t prot;
+ int rc;
+
+ txwin = cp_inst->txwin;
+
+ if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
+ pr_debug("%s(): size 0x%zx, PAGE_SIZE 0x%zx\n", __func__,
+ (vma->vm_end - vma->vm_start), PAGE_SIZE);
+ return -EINVAL;
+ }
+
+ /* Ensure instance has an open send window */
+ if (!txwin) {
+ pr_err("%s(): No send window open?\n", __func__);
+ return -EINVAL;
+ }
+
+ vas_win_paste_addr(txwin, &paste_addr, NULL);
+ pfn = paste_addr >> PAGE_SHIFT;
+
+ /* flags, page_prot from cxl_mmap(), except we want cachable */
+ vma->vm_flags |= VM_IO | VM_PFNMAP;
+ vma->vm_page_prot = pgprot_cached(vma->vm_page_prot);
+
+ prot = __pgprot(pgprot_val(vma->vm_page_prot) | _PAGE_DIRTY);
+
+ rc = remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff,
+ vma->vm_end - vma->vm_start, prot);
+
+ pr_devel("%s(): paste addr %llx at %lx, rc %d\n", __func__,
+ paste_addr, vma->vm_start, rc);
+
+ return rc;
+}
+
+static long coproc_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
+{
+ switch (cmd) {
+ case VAS_TX_WIN_OPEN:
+ return coproc_ioc_tx_win_open(fp, arg);
+ default:
+ return -EINVAL;
+ }
+}
+
+static struct file_operations coproc_fops = {
+ .open = coproc_open,
+ .release = coproc_release,
+ .mmap = coproc_mmap,
+ .unlocked_ioctl = coproc_ioctl,
+};
+
+/*
+ * Supporting only nx-gzip coprocessor type now, but this API code
+ * extended to other coprocessor types later.
+ */
+int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type,
+ const char *name)
+{
+ int rc = -EINVAL;
+ dev_t devno;
+
+ rc = alloc_chrdev_region(&coproc_device.devt, 1, 1, name);
+ if (rc) {
+ pr_err("Unable to allocate coproc major number: %i\n", rc);
+ return rc;
+ }
+
+ pr_devel("%s device allocated, dev [%i,%i]\n", name,
+ MAJOR(coproc_device.devt), MINOR(coproc_device.devt));
+
+ coproc_device.class = class_create(mod, name);
+ if (IS_ERR(coproc_device.class)) {
+ rc = PTR_ERR(coproc_device.class);
+ pr_err("Unable to create %s class %d\n", name, rc);
+ goto err_class;
+ }
+ coproc_device.class->devnode = coproc_devnode;
+ coproc_device.cop_type = cop_type;
+
+ coproc_fops.owner = mod;
+ cdev_init(&coproc_device.cdev, &coproc_fops);
+
+ devno = MKDEV(MAJOR(coproc_device.devt), 0);
+ rc = cdev_add(&coproc_device.cdev, devno, 1);
+ if (rc) {
+ pr_err("cdev_add() failed %d\n", rc);
+ goto err_cdev;
+ }
+
+ coproc_device.device = device_create(coproc_device.class, NULL,
+ devno, NULL, name, MINOR(devno));
+ if (IS_ERR(coproc_device.device)) {
+ rc = PTR_ERR(coproc_device.device);
+ pr_err("Unable to create coproc-%d %d\n", MINOR(devno), rc);
+ goto err;
+ }
+
+ pr_devel("%s: Added dev [%d,%d]\n", __func__, MAJOR(devno),
+ MINOR(devno));
+
+ return 0;
+
+err:
+ cdev_del(&coproc_device.cdev);
+err_cdev:
+ class_destroy(coproc_device.class);
+err_class:
+ unregister_chrdev_region(coproc_device.devt, 1);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(vas_register_coproc_api);
+
+void vas_unregister_coproc_api(void)
+{
+ dev_t devno;
+
+ cdev_del(&coproc_device.cdev);
+ devno = MKDEV(MAJOR(coproc_device.devt), 0);
+ device_destroy(coproc_device.class, devno);
+
+ class_destroy(coproc_device.class);
+ unregister_chrdev_region(coproc_device.devt, 1);
+}
+EXPORT_SYMBOL_GPL(vas_unregister_coproc_api);
diff --git a/arch/powerpc/platforms/powernv/vas-debug.c b/arch/powerpc/platforms/powernv/vas-debug.c
index 44035a3d6414..41fa90d2f4ab 100644
--- a/arch/powerpc/platforms/powernv/vas-debug.c
+++ b/arch/powerpc/platforms/powernv/vas-debug.c
@@ -38,7 +38,7 @@ static int info_show(struct seq_file *s, void *private)
seq_printf(s, "Type: %s, %s\n", cop_to_str(window->cop),
window->tx_win ? "Send" : "Receive");
- seq_printf(s, "Pid : %d\n", window->pid);
+ seq_printf(s, "Pid : %d\n", vas_window_pid(window));
unlock:
mutex_unlock(&vas_mutex);
diff --git a/arch/powerpc/platforms/powernv/vas-fault.c b/arch/powerpc/platforms/powernv/vas-fault.c
new file mode 100644
index 000000000000..25db70be4c9c
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas-fault.c
@@ -0,0 +1,382 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * VAS Fault handling.
+ * Copyright 2019, IBM Corporation
+ */
+
+#define pr_fmt(fmt) "vas: " fmt
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/kthread.h>
+#include <linux/sched/signal.h>
+#include <linux/mmu_context.h>
+#include <asm/icswx.h>
+
+#include "vas.h"
+
+/*
+ * The maximum FIFO size for fault window can be 8MB
+ * (VAS_RX_FIFO_SIZE_MAX). Using 4MB FIFO since each VAS
+ * instance will be having fault window.
+ * 8MB FIFO can be used if expects more faults for each VAS
+ * instance.
+ */
+#define VAS_FAULT_WIN_FIFO_SIZE (4 << 20)
+
+static void dump_crb(struct coprocessor_request_block *crb)
+{
+ struct data_descriptor_entry *dde;
+ struct nx_fault_stamp *nx;
+
+ dde = &crb->source;
+ pr_devel("SrcDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n",
+ be64_to_cpu(dde->address), be32_to_cpu(dde->length),
+ dde->count, dde->index, dde->flags);
+
+ dde = &crb->target;
+ pr_devel("TgtDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n",
+ be64_to_cpu(dde->address), be32_to_cpu(dde->length),
+ dde->count, dde->index, dde->flags);
+
+ nx = &crb->stamp.nx;
+ pr_devel("NX Stamp: PSWID 0x%x, FSA 0x%llx, flags 0x%x, FS 0x%x\n",
+ be32_to_cpu(nx->pswid),
+ be64_to_cpu(crb->stamp.nx.fault_storage_addr),
+ nx->flags, nx->fault_status);
+}
+
+/*
+ * Update the CSB to indicate a translation error.
+ *
+ * User space will be polling on CSB after the request is issued.
+ * If NX can handle the request without any issues, it updates CSB.
+ * Whereas if NX encounters page fault, the kernel will handle the
+ * fault and update CSB with translation error.
+ *
+ * If we are unable to update the CSB means copy_to_user failed due to
+ * invalid csb_addr, send a signal to the process.
+ */
+static void update_csb(struct vas_window *window,
+ struct coprocessor_request_block *crb)
+{
+ struct coprocessor_status_block csb;
+ struct kernel_siginfo info;
+ struct task_struct *tsk;
+ void __user *csb_addr;
+ struct pid *pid;
+ int rc;
+
+ /*
+ * NX user space windows can not be opened for task->mm=NULL
+ * and faults will not be generated for kernel requests.
+ */
+ if (WARN_ON_ONCE(!window->mm || !window->user_win))
+ return;
+
+ csb_addr = (void __user *)be64_to_cpu(crb->csb_addr);
+
+ memset(&csb, 0, sizeof(csb));
+ csb.cc = CSB_CC_TRANSLATION;
+ csb.ce = CSB_CE_TERMINATION;
+ csb.cs = 0;
+ csb.count = 0;
+
+ /*
+ * NX operates and returns in BE format as defined CRB struct.
+ * So saves fault_storage_addr in BE as NX pastes in FIFO and
+ * expects user space to convert to CPU format.
+ */
+ csb.address = crb->stamp.nx.fault_storage_addr;
+ csb.flags = 0;
+
+ pid = window->pid;
+ tsk = get_pid_task(pid, PIDTYPE_PID);
+ /*
+ * Process closes send window after all pending NX requests are
+ * completed. In multi-thread applications, a child thread can
+ * open a window and can exit without closing it. May be some
+ * requests are pending or this window can be used by other
+ * threads later. We should handle faults if NX encounters
+ * pages faults on these requests. Update CSB with translation
+ * error and fault address. If csb_addr passed by user space is
+ * invalid, send SEGV signal to pid saved in window. If the
+ * child thread is not running, send the signal to tgid.
+ * Parent thread (tgid) will close this window upon its exit.
+ *
+ * pid and mm references are taken when window is opened by
+ * process (pid). So tgid is used only when child thread opens
+ * a window and exits without closing it.
+ */
+ if (!tsk) {
+ pid = window->tgid;
+ tsk = get_pid_task(pid, PIDTYPE_PID);
+ /*
+ * Parent thread (tgid) will be closing window when it
+ * exits. So should not get here.
+ */
+ if (WARN_ON_ONCE(!tsk))
+ return;
+ }
+
+ /* Return if the task is exiting. */
+ if (tsk->flags & PF_EXITING) {
+ put_task_struct(tsk);
+ return;
+ }
+
+ use_mm(window->mm);
+ rc = copy_to_user(csb_addr, &csb, sizeof(csb));
+ /*
+ * User space polls on csb.flags (first byte). So add barrier
+ * then copy first byte with csb flags update.
+ */
+ if (!rc) {
+ csb.flags = CSB_V;
+ /* Make sure update to csb.flags is visible now */
+ smp_mb();
+ rc = copy_to_user(csb_addr, &csb, sizeof(u8));
+ }
+ unuse_mm(window->mm);
+ put_task_struct(tsk);
+
+ /* Success */
+ if (!rc)
+ return;
+
+ pr_debug("Invalid CSB address 0x%p signalling pid(%d)\n",
+ csb_addr, pid_vnr(pid));
+
+ clear_siginfo(&info);
+ info.si_signo = SIGSEGV;
+ info.si_errno = EFAULT;
+ info.si_code = SEGV_MAPERR;
+ info.si_addr = csb_addr;
+
+ /*
+ * process will be polling on csb.flags after request is sent to
+ * NX. So generally CSB update should not fail except when an
+ * application passes invalid csb_addr. So an error message will
+ * be displayed and leave it to user space whether to ignore or
+ * handle this signal.
+ */
+ rcu_read_lock();
+ rc = kill_pid_info(SIGSEGV, &info, pid);
+ rcu_read_unlock();
+
+ pr_devel("%s(): pid %d kill_proc_info() rc %d\n", __func__,
+ pid_vnr(pid), rc);
+}
+
+static void dump_fifo(struct vas_instance *vinst, void *entry)
+{
+ unsigned long *end = vinst->fault_fifo + vinst->fault_fifo_size;
+ unsigned long *fifo = entry;
+ int i;
+
+ pr_err("Fault fifo size %d, Max crbs %d\n", vinst->fault_fifo_size,
+ vinst->fault_fifo_size / CRB_SIZE);
+
+ /* Dump 10 CRB entries or until end of FIFO */
+ pr_err("Fault FIFO Dump:\n");
+ for (i = 0; i < 10*(CRB_SIZE/8) && fifo < end; i += 4, fifo += 4) {
+ pr_err("[%.3d, %p]: 0x%.16lx 0x%.16lx 0x%.16lx 0x%.16lx\n",
+ i, fifo, *fifo, *(fifo+1), *(fifo+2), *(fifo+3));
+ }
+}
+
+/*
+ * Process valid CRBs in fault FIFO.
+ * NX process user space requests, return credit and update the status
+ * in CRB. If it encounters transalation error when accessing CRB or
+ * request buffers, raises interrupt on the CPU to handle the fault.
+ * It takes credit on fault window, updates nx_fault_stamp in CRB with
+ * the following information and pastes CRB in fault FIFO.
+ *
+ * pswid - window ID of the window on which the request is sent.
+ * fault_storage_addr - fault address
+ *
+ * It can raise a single interrupt for multiple faults. Expects OS to
+ * process all valid faults and return credit for each fault on user
+ * space and fault windows. This fault FIFO control will be done with
+ * credit mechanism. NX can continuously paste CRBs until credits are not
+ * available on fault window. Otherwise, returns with RMA_reject.
+ *
+ * Total credits available on fault window: FIFO_SIZE(4MB)/CRBS_SIZE(128)
+ *
+ */
+irqreturn_t vas_fault_thread_fn(int irq, void *data)
+{
+ struct vas_instance *vinst = data;
+ struct coprocessor_request_block *crb, *entry;
+ struct coprocessor_request_block buf;
+ struct vas_window *window;
+ unsigned long flags;
+ void *fifo;
+
+ crb = &buf;
+
+ /*
+ * VAS can interrupt with multiple page faults. So process all
+ * valid CRBs within fault FIFO until reaches invalid CRB.
+ * We use CCW[0] and pswid to validate validate CRBs:
+ *
+ * CCW[0] Reserved bit. When NX pastes CRB, CCW[0]=0
+ * OS sets this bit to 1 after reading CRB.
+ * pswid NX assigns window ID. Set pswid to -1 after
+ * reading CRB from fault FIFO.
+ *
+ * We exit this function if no valid CRBs are available to process.
+ * So acquire fault_lock and reset fifo_in_progress to 0 before
+ * exit.
+ * In case kernel receives another interrupt with different page
+ * fault, interrupt handler returns with IRQ_HANDLED if
+ * fifo_in_progress is set. Means these new faults will be
+ * handled by the current thread. Otherwise set fifo_in_progress
+ * and return IRQ_WAKE_THREAD to wake up thread.
+ */
+ while (true) {
+ spin_lock_irqsave(&vinst->fault_lock, flags);
+ /*
+ * Advance the fault fifo pointer to next CRB.
+ * Use CRB_SIZE rather than sizeof(*crb) since the latter is
+ * aligned to CRB_ALIGN (256) but the CRB written to by VAS is
+ * only CRB_SIZE in len.
+ */
+ fifo = vinst->fault_fifo + (vinst->fault_crbs * CRB_SIZE);
+ entry = fifo;
+
+ if ((entry->stamp.nx.pswid == cpu_to_be32(FIFO_INVALID_ENTRY))
+ || (entry->ccw & cpu_to_be32(CCW0_INVALID))) {
+ vinst->fifo_in_progress = 0;
+ spin_unlock_irqrestore(&vinst->fault_lock, flags);
+ return IRQ_HANDLED;
+ }
+
+ spin_unlock_irqrestore(&vinst->fault_lock, flags);
+ vinst->fault_crbs++;
+ if (vinst->fault_crbs == (vinst->fault_fifo_size / CRB_SIZE))
+ vinst->fault_crbs = 0;
+
+ memcpy(crb, fifo, CRB_SIZE);
+ entry->stamp.nx.pswid = cpu_to_be32(FIFO_INVALID_ENTRY);
+ entry->ccw |= cpu_to_be32(CCW0_INVALID);
+ /*
+ * Return credit for the fault window.
+ */
+ vas_return_credit(vinst->fault_win, false);
+
+ pr_devel("VAS[%d] fault_fifo %p, fifo %p, fault_crbs %d\n",
+ vinst->vas_id, vinst->fault_fifo, fifo,
+ vinst->fault_crbs);
+
+ dump_crb(crb);
+ window = vas_pswid_to_window(vinst,
+ be32_to_cpu(crb->stamp.nx.pswid));
+
+ if (IS_ERR(window)) {
+ /*
+ * We got an interrupt about a specific send
+ * window but we can't find that window and we can't
+ * even clean it up (return credit on user space
+ * window).
+ * But we should not get here.
+ * TODO: Disable IRQ.
+ */
+ dump_fifo(vinst, (void *)entry);
+ pr_err("VAS[%d] fault_fifo %p, fifo %p, pswid 0x%x, fault_crbs %d bad CRB?\n",
+ vinst->vas_id, vinst->fault_fifo, fifo,
+ be32_to_cpu(crb->stamp.nx.pswid),
+ vinst->fault_crbs);
+
+ WARN_ON_ONCE(1);
+ } else {
+ update_csb(window, crb);
+ /*
+ * Return credit for send window after processing
+ * fault CRB.
+ */
+ vas_return_credit(window, true);
+ }
+ }
+}
+
+irqreturn_t vas_fault_handler(int irq, void *dev_id)
+{
+ struct vas_instance *vinst = dev_id;
+ irqreturn_t ret = IRQ_WAKE_THREAD;
+ unsigned long flags;
+
+ /*
+ * NX can generate an interrupt for multiple faults. So the
+ * fault handler thread process all CRBs until finds invalid
+ * entry. In case if NX sees continuous faults, it is possible
+ * that the thread function entered with the first interrupt
+ * can execute and process all valid CRBs.
+ * So wake up thread only if the fault thread is not in progress.
+ */
+ spin_lock_irqsave(&vinst->fault_lock, flags);
+
+ if (vinst->fifo_in_progress)
+ ret = IRQ_HANDLED;
+ else
+ vinst->fifo_in_progress = 1;
+
+ spin_unlock_irqrestore(&vinst->fault_lock, flags);
+
+ return ret;
+}
+
+/*
+ * Fault window is opened per VAS instance. NX pastes fault CRB in fault
+ * FIFO upon page faults.
+ */
+int vas_setup_fault_window(struct vas_instance *vinst)
+{
+ struct vas_rx_win_attr attr;
+
+ vinst->fault_fifo_size = VAS_FAULT_WIN_FIFO_SIZE;
+ vinst->fault_fifo = kzalloc(vinst->fault_fifo_size, GFP_KERNEL);
+ if (!vinst->fault_fifo) {
+ pr_err("Unable to alloc %d bytes for fault_fifo\n",
+ vinst->fault_fifo_size);
+ return -ENOMEM;
+ }
+
+ /*
+ * Invalidate all CRB entries. NX pastes valid entry for each fault.
+ */
+ memset(vinst->fault_fifo, FIFO_INVALID_ENTRY, vinst->fault_fifo_size);
+ vas_init_rx_win_attr(&attr, VAS_COP_TYPE_FAULT);
+
+ attr.rx_fifo_size = vinst->fault_fifo_size;
+ attr.rx_fifo = vinst->fault_fifo;
+
+ /*
+ * Max creds is based on number of CRBs can fit in the FIFO.
+ * (fault_fifo_size/CRB_SIZE). If 8MB FIFO is used, max creds
+ * will be 0xffff since the receive creds field is 16bits wide.
+ */
+ attr.wcreds_max = vinst->fault_fifo_size / CRB_SIZE;
+ attr.lnotify_lpid = 0;
+ attr.lnotify_pid = mfspr(SPRN_PID);
+ attr.lnotify_tid = mfspr(SPRN_PID);
+
+ vinst->fault_win = vas_rx_win_open(vinst->vas_id, VAS_COP_TYPE_FAULT,
+ &attr);
+
+ if (IS_ERR(vinst->fault_win)) {
+ pr_err("VAS: Error %ld opening FaultWin\n",
+ PTR_ERR(vinst->fault_win));
+ kfree(vinst->fault_fifo);
+ return PTR_ERR(vinst->fault_win);
+ }
+
+ pr_devel("VAS: Created FaultWin %d, LPID/PID/TID [%d/%d/%d]\n",
+ vinst->fault_win->winid, attr.lnotify_lpid,
+ attr.lnotify_pid, attr.lnotify_tid);
+
+ return 0;
+}
diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c
index 0c0d27d17976..6434f9cb5aed 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -12,6 +12,8 @@
#include <linux/log2.h>
#include <linux/rcupdate.h>
#include <linux/cred.h>
+#include <linux/sched/mm.h>
+#include <linux/mmu_context.h>
#include <asm/switch_to.h>
#include <asm/ppc-opcode.h>
#include "vas.h"
@@ -24,7 +26,7 @@
* Compute the paste address region for the window @window using the
* ->paste_base_addr and ->paste_win_id_shift we got from device tree.
*/
-static void compute_paste_address(struct vas_window *window, u64 *addr, int *len)
+void vas_win_paste_addr(struct vas_window *window, u64 *addr, int *len)
{
int winid;
u64 base, shift;
@@ -78,7 +80,7 @@ static void *map_paste_region(struct vas_window *txwin)
goto free_name;
txwin->paste_addr_name = name;
- compute_paste_address(txwin, &start, &len);
+ vas_win_paste_addr(txwin, &start, &len);
if (!request_mem_region(start, len, name)) {
pr_devel("%s(): request_mem_region(0x%llx, %d) failed\n",
@@ -136,7 +138,7 @@ static void unmap_paste_region(struct vas_window *window)
u64 busaddr_start;
if (window->paste_kaddr) {
- compute_paste_address(window, &busaddr_start, &len);
+ vas_win_paste_addr(window, &busaddr_start, &len);
unmap_region(window->paste_kaddr, busaddr_start, len);
window->paste_kaddr = NULL;
kfree(window->paste_addr_name);
@@ -373,7 +375,7 @@ int init_winctx_regs(struct vas_window *window, struct vas_winctx *winctx)
init_xlate_regs(window, winctx->user_win);
val = 0ULL;
- val = SET_FIELD(VAS_FAULT_TX_WIN, val, 0);
+ val = SET_FIELD(VAS_FAULT_TX_WIN, val, winctx->fault_win_id);
write_hvwc_reg(window, VREG(FAULT_TX_WIN), val);
/* In PowerNV, interrupts go to HV. */
@@ -748,6 +750,8 @@ static void init_winctx_for_rxwin(struct vas_window *rxwin,
winctx->min_scope = VAS_SCOPE_LOCAL;
winctx->max_scope = VAS_SCOPE_VECTORED_GROUP;
+ if (rxwin->vinst->virq)
+ winctx->irq_port = rxwin->vinst->irq_port;
}
static bool rx_win_args_valid(enum vas_cop_type cop,
@@ -768,7 +772,7 @@ static bool rx_win_args_valid(enum vas_cop_type cop,
if (attr->rx_fifo_size > VAS_RX_FIFO_SIZE_MAX)
return false;
- if (attr->wcreds_max > VAS_RX_WCREDS_MAX)
+ if (!attr->wcreds_max)
return false;
if (attr->nx_win) {
@@ -813,7 +817,8 @@ void vas_init_rx_win_attr(struct vas_rx_win_attr *rxattr, enum vas_cop_type cop)
{
memset(rxattr, 0, sizeof(*rxattr));
- if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI) {
+ if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI ||
+ cop == VAS_COP_TYPE_GZIP || cop == VAS_COP_TYPE_GZIP_HIPRI) {
rxattr->pin_win = true;
rxattr->nx_win = true;
rxattr->fault_win = false;
@@ -827,9 +832,9 @@ void vas_init_rx_win_attr(struct vas_rx_win_attr *rxattr, enum vas_cop_type cop)
rxattr->fault_win = true;
rxattr->notify_disable = true;
rxattr->rx_wcred_mode = true;
- rxattr->tx_wcred_mode = true;
rxattr->rx_win_ord_mode = true;
- rxattr->tx_win_ord_mode = true;
+ rxattr->rej_no_credit = true;
+ rxattr->tc_mode = VAS_THRESH_DISABLED;
} else if (cop == VAS_COP_TYPE_FTW) {
rxattr->user_win = true;
rxattr->intr_disable = true;
@@ -873,9 +878,7 @@ struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop,
rxwin->nx_win = rxattr->nx_win;
rxwin->user_win = rxattr->user_win;
rxwin->cop = cop;
- rxwin->wcreds_max = rxattr->wcreds_max ?: VAS_WCREDS_DEFAULT;
- if (rxattr->user_win)
- rxwin->pid = task_pid_vnr(current);
+ rxwin->wcreds_max = rxattr->wcreds_max;
init_winctx_for_rxwin(rxwin, rxattr, &winctx);
init_winctx_regs(rxwin, &winctx);
@@ -890,7 +893,8 @@ void vas_init_tx_win_attr(struct vas_tx_win_attr *txattr, enum vas_cop_type cop)
{
memset(txattr, 0, sizeof(*txattr));
- if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI) {
+ if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI ||
+ cop == VAS_COP_TYPE_GZIP || cop == VAS_COP_TYPE_GZIP_HIPRI) {
txattr->rej_no_credit = false;
txattr->rx_wcred_mode = true;
txattr->tx_wcred_mode = true;
@@ -944,13 +948,22 @@ static void init_winctx_for_txwin(struct vas_window *txwin,
winctx->lpid = txattr->lpid;
winctx->pidr = txattr->pidr;
winctx->rx_win_id = txwin->rxwin->winid;
+ /*
+ * IRQ and fault window setup is successful. Set fault window
+ * for the send window so that ready to handle faults.
+ */
+ if (txwin->vinst->virq)
+ winctx->fault_win_id = txwin->vinst->fault_win->winid;
winctx->dma_type = VAS_DMA_TYPE_INJECT;
winctx->tc_mode = txattr->tc_mode;
winctx->min_scope = VAS_SCOPE_LOCAL;
winctx->max_scope = VAS_SCOPE_VECTORED_GROUP;
+ if (txwin->vinst->virq)
+ winctx->irq_port = txwin->vinst->irq_port;
- winctx->pswid = 0;
+ winctx->pswid = txattr->pswid ? txattr->pswid :
+ encode_pswid(txwin->vinst->vas_id, txwin->winid);
}
static bool tx_win_args_valid(enum vas_cop_type cop,
@@ -965,9 +978,14 @@ static bool tx_win_args_valid(enum vas_cop_type cop,
if (attr->wcreds_max > VAS_TX_WCREDS_MAX)
return false;
- if (attr->user_win &&
- (cop != VAS_COP_TYPE_FTW || attr->rsvd_txbuf_count))
- return false;
+ if (attr->user_win) {
+ if (attr->rsvd_txbuf_count)
+ return false;
+
+ if (cop != VAS_COP_TYPE_FTW && cop != VAS_COP_TYPE_GZIP &&
+ cop != VAS_COP_TYPE_GZIP_HIPRI)
+ return false;
+ }
return true;
}
@@ -1016,7 +1034,6 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
txwin->tx_win = 1;
txwin->rxwin = rxwin;
txwin->nx_win = txwin->rxwin->nx_win;
- txwin->pid = attr->pid;
txwin->user_win = attr->user_win;
txwin->wcreds_max = attr->wcreds_max ?: VAS_WCREDS_DEFAULT;
@@ -1040,12 +1057,59 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
}
} else {
/*
- * A user mapping must ensure that context switch issues
- * CP_ABORT for this thread.
+ * Interrupt hanlder or fault window setup failed. Means
+ * NX can not generate fault for page fault. So not
+ * opening for user space tx window.
*/
- rc = set_thread_uses_vas();
- if (rc)
+ if (!vinst->virq) {
+ rc = -ENODEV;
goto free_window;
+ }
+
+ /*
+ * Window opened by a child thread may not be closed when
+ * it exits. So take reference to its pid and release it
+ * when the window is free by parent thread.
+ * Acquire a reference to the task's pid to make sure
+ * pid will not be re-used - needed only for multithread
+ * applications.
+ */
+ txwin->pid = get_task_pid(current, PIDTYPE_PID);
+ /*
+ * Acquire a reference to the task's mm.
+ */
+ txwin->mm = get_task_mm(current);
+
+ if (!txwin->mm) {
+ put_pid(txwin->pid);
+ pr_err("VAS: pid(%d): mm_struct is not found\n",
+ current->pid);
+ rc = -EPERM;
+ goto free_window;
+ }
+
+ mmgrab(txwin->mm);
+ mmput(txwin->mm);
+ mm_context_add_vas_window(txwin->mm);
+ /*
+ * Process closes window during exit. In the case of
+ * multithread application, the child thread can open
+ * window and can exit without closing it. Expects parent
+ * thread to use and close the window. So do not need
+ * to take pid reference for parent thread.
+ */
+ txwin->tgid = find_get_pid(task_tgid_vnr(current));
+ /*
+ * Even a process that has no foreign real address mapping can
+ * use an unpaired COPY instruction (to no real effect). Issue
+ * CP_ABORT to clear any pending COPY and prevent a covert
+ * channel.
+ *
+ * __switch_to() will issue CP_ABORT on future context switches
+ * if process / thread has any open VAS window (Use
+ * current->mm->context.vas_windows).
+ */
+ asm volatile(PPC_CP_ABORT);
}
set_vinst_win(vinst, txwin);
@@ -1128,6 +1192,7 @@ static void poll_window_credits(struct vas_window *window)
{
u64 val;
int creds, mode;
+ int count = 0;
val = read_hvwc_reg(window, VREG(WINCTL));
if (window->tx_win)
@@ -1146,10 +1211,27 @@ retry:
creds = GET_FIELD(VAS_LRX_WCRED, val);
}
+ /*
+ * Takes around few milliseconds to complete all pending requests
+ * and return credits.
+ * TODO: Scan fault FIFO and invalidate CRBs points to this window
+ * and issue CRB Kill to stop all pending requests. Need only
+ * if there is a bug in NX or fault handling in kernel.
+ */
if (creds < window->wcreds_max) {
val = 0;
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(msecs_to_jiffies(10));
+ count++;
+ /*
+ * Process can not close send window until all credits are
+ * returned.
+ */
+ if (!(count % 1000))
+ pr_warn_ratelimited("VAS: pid %d stuck. Waiting for credits returned for Window(%d). creds %d, Retries %d\n",
+ vas_window_pid(window), window->winid,
+ creds, count);
+
goto retry;
}
}
@@ -1163,6 +1245,7 @@ static void poll_window_busy_state(struct vas_window *window)
{
int busy;
u64 val;
+ int count = 0;
retry:
val = read_hvwc_reg(window, VREG(WIN_STATUS));
@@ -1170,7 +1253,16 @@ retry:
if (busy) {
val = 0;
set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(msecs_to_jiffies(5));
+ schedule_timeout(msecs_to_jiffies(10));
+ count++;
+ /*
+ * Takes around few milliseconds to process all pending
+ * requests.
+ */
+ if (!(count % 1000))
+ pr_warn_ratelimited("VAS: pid %d stuck. Window (ID=%d) is in busy state. Retries %d\n",
+ vas_window_pid(window), window->winid, count);
+
goto retry;
}
}
@@ -1235,22 +1327,118 @@ int vas_win_close(struct vas_window *window)
unmap_paste_region(window);
- clear_vinst_win(window);
-
poll_window_busy_state(window);
unpin_close_window(window);
poll_window_credits(window);
+ clear_vinst_win(window);
+
poll_window_castout(window);
/* if send window, drop reference to matching receive window */
- if (window->tx_win)
+ if (window->tx_win) {
+ if (window->user_win) {
+ /* Drop references to pid and mm */
+ put_pid(window->pid);
+ if (window->mm) {
+ mm_context_remove_vas_window(window->mm);
+ mmdrop(window->mm);
+ }
+ }
put_rx_win(window->rxwin);
+ }
vas_window_free(window);
return 0;
}
EXPORT_SYMBOL_GPL(vas_win_close);
+
+/*
+ * Return credit for the given window.
+ * Send windows and fault window uses credit mechanism as follows:
+ *
+ * Send windows:
+ * - The default number of credits available for each send window is
+ * 1024. It means 1024 requests can be issued asynchronously at the
+ * same time. If the credit is not available, that request will be
+ * returned with RMA_Busy.
+ * - One credit is taken when NX request is issued.
+ * - This credit is returned after NX processed that request.
+ * - If NX encounters translation error, kernel will return the
+ * credit on the specific send window after processing the fault CRB.
+ *
+ * Fault window:
+ * - The total number credits available is FIFO_SIZE/CRB_SIZE.
+ * Means 4MB/128 in the current implementation. If credit is not
+ * available, RMA_Reject is returned.
+ * - A credit is taken when NX pastes CRB in fault FIFO.
+ * - The kernel with return credit on fault window after reading entry
+ * from fault FIFO.
+ */
+void vas_return_credit(struct vas_window *window, bool tx)
+{
+ uint64_t val;
+
+ val = 0ULL;
+ if (tx) { /* send window */
+ val = SET_FIELD(VAS_TX_WCRED, val, 1);
+ write_hvwc_reg(window, VREG(TX_WCRED_ADDER), val);
+ } else {
+ val = SET_FIELD(VAS_LRX_WCRED, val, 1);
+ write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), val);
+ }
+}
+
+struct vas_window *vas_pswid_to_window(struct vas_instance *vinst,
+ uint32_t pswid)
+{
+ struct vas_window *window;
+ int winid;
+
+ if (!pswid) {
+ pr_devel("%s: called for pswid 0!\n", __func__);
+ return ERR_PTR(-ESRCH);
+ }
+
+ decode_pswid(pswid, NULL, &winid);
+
+ if (winid >= VAS_WINDOWS_PER_CHIP)
+ return ERR_PTR(-ESRCH);
+
+ /*
+ * If application closes the window before the hardware
+ * returns the fault CRB, we should wait in vas_win_close()
+ * for the pending requests. so the window must be active
+ * and the process alive.
+ *
+ * If its a kernel process, we should not get any faults and
+ * should not get here.
+ */
+ window = vinst->windows[winid];
+
+ if (!window) {
+ pr_err("PSWID decode: Could not find window for winid %d pswid %d vinst 0x%p\n",
+ winid, pswid, vinst);
+ return NULL;
+ }
+
+ /*
+ * Do some sanity checks on the decoded window. Window should be
+ * NX GZIP user send window. FTW windows should not incur faults
+ * since their CRBs are ignored (not queued on FIFO or processed
+ * by NX).
+ */
+ if (!window->tx_win || !window->user_win || !window->nx_win ||
+ window->cop == VAS_COP_TYPE_FAULT ||
+ window->cop == VAS_COP_TYPE_FTW) {
+ pr_err("PSWID decode: id %d, tx %d, user %d, nx %d, cop %d\n",
+ winid, window->tx_win, window->user_win,
+ window->nx_win, window->cop);
+ WARN_ON(1);
+ }
+
+ return window;
+}
diff --git a/arch/powerpc/platforms/powernv/vas.c b/arch/powerpc/platforms/powernv/vas.c
index ed9cc6df329a..598e4cd563fb 100644
--- a/arch/powerpc/platforms/powernv/vas.c
+++ b/arch/powerpc/platforms/powernv/vas.c
@@ -14,7 +14,10 @@
#include <linux/of_platform.h>
#include <linux/of_address.h>
#include <linux/of.h>
+#include <linux/irqdomain.h>
+#include <linux/interrupt.h>
#include <asm/prom.h>
+#include <asm/xive.h>
#include "vas.h"
@@ -23,12 +26,37 @@ static LIST_HEAD(vas_instances);
static DEFINE_PER_CPU(int, cpu_vas_id);
+static int vas_irq_fault_window_setup(struct vas_instance *vinst)
+{
+ char devname[64];
+ int rc = 0;
+
+ snprintf(devname, sizeof(devname), "vas-%d", vinst->vas_id);
+ rc = request_threaded_irq(vinst->virq, vas_fault_handler,
+ vas_fault_thread_fn, 0, devname, vinst);
+
+ if (rc) {
+ pr_err("VAS[%d]: Request IRQ(%d) failed with %d\n",
+ vinst->vas_id, vinst->virq, rc);
+ goto out;
+ }
+
+ rc = vas_setup_fault_window(vinst);
+ if (rc)
+ free_irq(vinst->virq, vinst);
+
+out:
+ return rc;
+}
+
static int init_vas_instance(struct platform_device *pdev)
{
- int rc, cpu, vasid;
- struct resource *res;
- struct vas_instance *vinst;
struct device_node *dn = pdev->dev.of_node;
+ struct vas_instance *vinst;
+ struct xive_irq_data *xd;
+ uint32_t chipid, hwirq;
+ struct resource *res;
+ int rc, cpu, vasid;
rc = of_property_read_u32(dn, "ibm,vas-id", &vasid);
if (rc) {
@@ -36,6 +64,12 @@ static int init_vas_instance(struct platform_device *pdev)
return -ENODEV;
}
+ rc = of_property_read_u32(dn, "ibm,chip-id", &chipid);
+ if (rc) {
+ pr_err("No ibm,chip-id property for %s?\n", pdev->name);
+ return -ENODEV;
+ }
+
if (pdev->num_resources != 4) {
pr_err("Unexpected DT configuration for [%s, %d]\n",
pdev->name, vasid);
@@ -69,9 +103,32 @@ static int init_vas_instance(struct platform_device *pdev)
vinst->paste_win_id_shift = 63 - res->end;
- pr_devel("Initialized instance [%s, %d], paste_base 0x%llx, "
- "paste_win_id_shift 0x%llx\n", pdev->name, vasid,
- vinst->paste_base_addr, vinst->paste_win_id_shift);
+ hwirq = xive_native_alloc_irq_on_chip(chipid);
+ if (!hwirq) {
+ pr_err("Inst%d: Unable to allocate global irq for chip %d\n",
+ vinst->vas_id, chipid);
+ return -ENOENT;
+ }
+
+ vinst->virq = irq_create_mapping(NULL, hwirq);
+ if (!vinst->virq) {
+ pr_err("Inst%d: Unable to map global irq %d\n",
+ vinst->vas_id, hwirq);
+ return -EINVAL;
+ }
+
+ xd = irq_get_handler_data(vinst->virq);
+ if (!xd) {
+ pr_err("Inst%d: Invalid virq %d\n",
+ vinst->vas_id, vinst->virq);
+ return -EINVAL;
+ }
+
+ vinst->irq_port = xd->trig_page;
+ pr_devel("Initialized instance [%s, %d] paste_base 0x%llx paste_win_id_shift 0x%llx IRQ %d Port 0x%llx\n",
+ pdev->name, vasid, vinst->paste_base_addr,
+ vinst->paste_win_id_shift, vinst->virq,
+ vinst->irq_port);
for_each_possible_cpu(cpu) {
if (cpu_to_chip_id(cpu) == of_get_ibm_chip_id(dn))
@@ -82,6 +139,22 @@ static int init_vas_instance(struct platform_device *pdev)
list_add(&vinst->node, &vas_instances);
mutex_unlock(&vas_mutex);
+ spin_lock_init(&vinst->fault_lock);
+ /*
+ * IRQ and fault handling setup is needed only for user space
+ * send windows.
+ */
+ if (vinst->virq) {
+ rc = vas_irq_fault_window_setup(vinst);
+ /*
+ * Fault window is used only for user space send windows.
+ * So if vinst->virq is NULL, tx_win_open returns -ENODEV
+ * for user space.
+ */
+ if (rc)
+ vinst->virq = 0;
+ }
+
vas_instance_init_dbgdir(vinst);
dev_set_drvdata(&pdev->dev, vinst);
diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h
index 5574aec9ee88..70f793e8f6cc 100644
--- a/arch/powerpc/platforms/powernv/vas.h
+++ b/arch/powerpc/platforms/powernv/vas.h
@@ -101,11 +101,9 @@
/*
* Initial per-process credits.
* Max send window credits: 4K-1 (12-bits in VAS_TX_WCRED)
- * Max receive window credits: 64K-1 (16 bits in VAS_LRX_WCRED)
*
* TODO: Needs tuning for per-process credits
*/
-#define VAS_RX_WCREDS_MAX ((64 << 10) - 1)
#define VAS_TX_WCREDS_MAX ((4 << 10) - 1)
#define VAS_WCREDS_DEFAULT (1 << 10)
@@ -296,6 +294,22 @@ enum vas_notify_after_count {
};
/*
+ * NX can generate an interrupt for multiple faults and expects kernel
+ * to process all of them. So read all valid CRB entries until find the
+ * invalid one. So use pswid which is pasted by NX and ccw[0] (reserved
+ * bit in BE) to check valid CRB. CCW[0] will not be touched by user
+ * space. Application gets CRB formt error if it updates this bit.
+ *
+ * Invalidate FIFO during allocation and process all entries from last
+ * successful read until finds invalid pswid and ccw[0] values.
+ * After reading each CRB entry from fault FIFO, the kernel invalidate
+ * it by updating pswid with FIFO_INVALID_ENTRY and CCW[0] with
+ * CCW0_INVALID.
+ */
+#define FIFO_INVALID_ENTRY 0xffffffff
+#define CCW0_INVALID 1
+
+/*
* One per instance of VAS. Each instance will have a separate set of
* receive windows, one per coprocessor type.
*
@@ -313,6 +327,15 @@ struct vas_instance {
u64 paste_base_addr;
u64 paste_win_id_shift;
+ u64 irq_port;
+ int virq;
+ int fault_crbs;
+ int fault_fifo_size;
+ int fifo_in_progress; /* To wake up thread or return IRQ_HANDLED */
+ spinlock_t fault_lock; /* Protects fifo_in_progress update */
+ void *fault_fifo;
+ struct vas_window *fault_win; /* Fault window */
+
struct mutex mutex;
struct vas_window *rxwin[VAS_COP_TYPE_MAX];
struct vas_window *windows[VAS_WINDOWS_PER_CHIP];
@@ -333,7 +356,9 @@ struct vas_window {
bool user_win; /* True if user space window */
void *hvwc_map; /* HV window context */
void *uwc_map; /* OS/User window context */
- pid_t pid; /* Linux process id of owner */
+ struct pid *pid; /* Linux process id of owner */
+ struct pid *tgid; /* Thread group ID of owner */
+ struct mm_struct *mm; /* Linux process mm_struct */
int wcreds_max; /* Window credits */
char *dbgname;
@@ -406,6 +431,19 @@ extern void vas_init_dbgdir(void);
extern void vas_instance_init_dbgdir(struct vas_instance *vinst);
extern void vas_window_init_dbgdir(struct vas_window *win);
extern void vas_window_free_dbgdir(struct vas_window *win);
+extern int vas_setup_fault_window(struct vas_instance *vinst);
+extern irqreturn_t vas_fault_thread_fn(int irq, void *data);
+extern irqreturn_t vas_fault_handler(int irq, void *dev_id);
+extern void vas_return_credit(struct vas_window *window, bool tx);
+extern struct vas_window *vas_pswid_to_window(struct vas_instance *vinst,
+ uint32_t pswid);
+extern void vas_win_paste_addr(struct vas_window *window, u64 *addr,
+ int *len);
+
+static inline int vas_window_pid(struct vas_window *window)
+{
+ return pid_vnr(window->pid);
+}
static inline void vas_log_write(struct vas_window *win, char *name,
void *regptr, u64 val)
@@ -444,6 +482,21 @@ static inline u64 read_hvwc_reg(struct vas_window *win,
return in_be64(win->hvwc_map+reg);
}
+/*
+ * Encode/decode the Partition Send Window ID (PSWID) for a window in
+ * a way that we can uniquely identify any window in the system. i.e.
+ * we should be able to locate the 'struct vas_window' given the PSWID.
+ *
+ * Bits Usage
+ * 0:7 VAS id (8 bits)
+ * 8:15 Unused, 0 (3 bits)
+ * 16:31 Window id (16 bits)
+ */
+static inline u32 encode_pswid(int vasid, int winid)
+{
+ return ((u32)winid | (vasid << (31 - 7)));
+}
+
static inline void decode_pswid(u32 pswid, int *vasid, int *winid)
{
if (vasid)
diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c
index 423be34f0f5f..d094321964fb 100644
--- a/arch/powerpc/platforms/ps3/mm.c
+++ b/arch/powerpc/platforms/ps3/mm.c
@@ -200,13 +200,14 @@ void ps3_mm_vas_destroy(void)
{
int result;
- DBG("%s:%d: map.vas_id = %llu\n", __func__, __LINE__, map.vas_id);
-
if (map.vas_id) {
result = lv1_select_virtual_address_space(0);
- BUG_ON(result);
- result = lv1_destruct_virtual_address_space(map.vas_id);
- BUG_ON(result);
+ result += lv1_destruct_virtual_address_space(map.vas_id);
+
+ if (result) {
+ lv1_panic(0);
+ }
+
map.vas_id = 0;
}
}
@@ -263,7 +264,7 @@ static int ps3_mm_region_create(struct mem_region *r, unsigned long size)
int result;
u64 muid;
- r->size = _ALIGN_DOWN(size, 1 << PAGE_SHIFT_16M);
+ r->size = ALIGN_DOWN(size, 1 << PAGE_SHIFT_16M);
DBG("%s:%d requested %lxh\n", __func__, __LINE__, size);
DBG("%s:%d actual %llxh\n", __func__, __LINE__, r->size);
@@ -304,19 +305,20 @@ static void ps3_mm_region_destroy(struct mem_region *r)
int result;
if (!r->destroy) {
- pr_info("%s:%d: Not destroying high region: %llxh %llxh\n",
- __func__, __LINE__, r->base, r->size);
return;
}
- DBG("%s:%d: r->base = %llxh\n", __func__, __LINE__, r->base);
-
if (r->base) {
result = lv1_release_memory(r->base);
- BUG_ON(result);
+
+ if (result) {
+ lv1_panic(0);
+ }
+
r->size = r->base = r->offset = 0;
map.total = map.rm.size;
}
+
ps3_mm_set_repository_highmem(NULL);
}
@@ -394,8 +396,8 @@ static struct dma_chunk * dma_find_chunk(struct ps3_dma_region *r,
unsigned long bus_addr, unsigned long len)
{
struct dma_chunk *c;
- unsigned long aligned_bus = _ALIGN_DOWN(bus_addr, 1 << r->page_size);
- unsigned long aligned_len = _ALIGN_UP(len+bus_addr-aligned_bus,
+ unsigned long aligned_bus = ALIGN_DOWN(bus_addr, 1 << r->page_size);
+ unsigned long aligned_len = ALIGN(len+bus_addr-aligned_bus,
1 << r->page_size);
list_for_each_entry(c, &r->chunk_list.head, link) {
@@ -423,8 +425,8 @@ static struct dma_chunk *dma_find_chunk_lpar(struct ps3_dma_region *r,
unsigned long lpar_addr, unsigned long len)
{
struct dma_chunk *c;
- unsigned long aligned_lpar = _ALIGN_DOWN(lpar_addr, 1 << r->page_size);
- unsigned long aligned_len = _ALIGN_UP(len + lpar_addr - aligned_lpar,
+ unsigned long aligned_lpar = ALIGN_DOWN(lpar_addr, 1 << r->page_size);
+ unsigned long aligned_len = ALIGN(len + lpar_addr - aligned_lpar,
1 << r->page_size);
list_for_each_entry(c, &r->chunk_list.head, link) {
@@ -775,8 +777,8 @@ static int dma_sb_map_area(struct ps3_dma_region *r, unsigned long virt_addr,
struct dma_chunk *c;
unsigned long phys_addr = is_kernel_addr(virt_addr) ? __pa(virt_addr)
: virt_addr;
- unsigned long aligned_phys = _ALIGN_DOWN(phys_addr, 1 << r->page_size);
- unsigned long aligned_len = _ALIGN_UP(len + phys_addr - aligned_phys,
+ unsigned long aligned_phys = ALIGN_DOWN(phys_addr, 1 << r->page_size);
+ unsigned long aligned_len = ALIGN(len + phys_addr - aligned_phys,
1 << r->page_size);
*bus_addr = dma_sb_lpar_to_bus(r, ps3_mm_phys_to_lpar(phys_addr));
@@ -830,8 +832,8 @@ static int dma_ioc0_map_area(struct ps3_dma_region *r, unsigned long virt_addr,
struct dma_chunk *c;
unsigned long phys_addr = is_kernel_addr(virt_addr) ? __pa(virt_addr)
: virt_addr;
- unsigned long aligned_phys = _ALIGN_DOWN(phys_addr, 1 << r->page_size);
- unsigned long aligned_len = _ALIGN_UP(len + phys_addr - aligned_phys,
+ unsigned long aligned_phys = ALIGN_DOWN(phys_addr, 1 << r->page_size);
+ unsigned long aligned_len = ALIGN(len + phys_addr - aligned_phys,
1 << r->page_size);
DBG(KERN_ERR "%s: vaddr=%#lx, len=%#lx\n", __func__,
@@ -889,9 +891,9 @@ static int dma_sb_unmap_area(struct ps3_dma_region *r, dma_addr_t bus_addr,
c = dma_find_chunk(r, bus_addr, len);
if (!c) {
- unsigned long aligned_bus = _ALIGN_DOWN(bus_addr,
+ unsigned long aligned_bus = ALIGN_DOWN(bus_addr,
1 << r->page_size);
- unsigned long aligned_len = _ALIGN_UP(len + bus_addr
+ unsigned long aligned_len = ALIGN(len + bus_addr
- aligned_bus, 1 << r->page_size);
DBG("%s:%d: not found: bus_addr %llxh\n",
__func__, __LINE__, bus_addr);
@@ -926,9 +928,9 @@ static int dma_ioc0_unmap_area(struct ps3_dma_region *r,
c = dma_find_chunk(r, bus_addr, len);
if (!c) {
- unsigned long aligned_bus = _ALIGN_DOWN(bus_addr,
+ unsigned long aligned_bus = ALIGN_DOWN(bus_addr,
1 << r->page_size);
- unsigned long aligned_len = _ALIGN_UP(len + bus_addr
+ unsigned long aligned_len = ALIGN(len + bus_addr
- aligned_bus,
1 << r->page_size);
DBG("%s:%d: not found: bus_addr %llxh\n",
@@ -974,7 +976,7 @@ static int dma_sb_region_create_linear(struct ps3_dma_region *r)
pr_info("%s:%d: forcing 16M pages for linear map\n",
__func__, __LINE__);
r->page_size = PS3_DMA_16M;
- r->len = _ALIGN_UP(r->len, 1 << r->page_size);
+ r->len = ALIGN(r->len, 1 << r->page_size);
}
}
@@ -1125,7 +1127,7 @@ int ps3_dma_region_init(struct ps3_system_bus_device *dev,
r->offset = lpar_addr;
if (r->offset >= map.rm.size)
r->offset -= map.r1.offset;
- r->len = len ? len : _ALIGN_UP(map.total, 1 << r->page_size);
+ r->len = len ? len : ALIGN(map.total, 1 << r->page_size);
switch (dev->dev_type) {
case PS3_DEVICE_TYPE_SB:
diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c
index b29368931c56..e9ae5dd03593 100644
--- a/arch/powerpc/platforms/ps3/setup.c
+++ b/arch/powerpc/platforms/ps3/setup.c
@@ -138,7 +138,7 @@ static int __init early_parse_ps3fb(char *p)
if (!p)
return 1;
- ps3fb_videomemory.size = _ALIGN_UP(memparse(p, &p),
+ ps3fb_videomemory.size = ALIGN(memparse(p, &p),
ps3fb_videomemory.align);
return 0;
}
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 845342814edc..ace117f99d94 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -664,6 +664,8 @@ static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
if (!ret)
return ret;
+ if (ret < 0)
+ break;
/*
* If RTAS returns a delay value that's above 100ms, cut it
@@ -684,7 +686,11 @@ static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
pr_warn("%s: Unable to configure bridge PHB#%x-PE#%x (%d)\n",
__func__, pe->phb->global_number, pe->addr, ret);
- return ret;
+ /* PAPR defines -3 as "Parameter Error" for this function: */
+ if (ret == -3)
+ return -EINVAL;
+ else
+ return -EIO;
}
/**
diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c
index b91eb0929ed1..a6f101c958e8 100644
--- a/arch/powerpc/platforms/pseries/ibmebus.c
+++ b/arch/powerpc/platforms/pseries/ibmebus.c
@@ -47,6 +47,7 @@
#include <linux/stat.h>
#include <linux/of_platform.h>
#include <asm/ibmebus.h>
+#include <asm/machdep.h>
static struct device ibmebus_bus_device = { /* fake "parent" device */
.init_name = "ibmebus",
@@ -464,4 +465,4 @@ static int __init ibmebus_bus_init(void)
return 0;
}
-postcore_initcall(ibmebus_bus_init);
+machine_postcore_initcall(pseries, ibmebus_bus_init);
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index b571285f6c14..10d982997736 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -371,6 +371,9 @@ void post_mobility_fixup(void)
/* Possibly switch to a new RFI flush type */
pseries_setup_rfi_flush();
+ /* Reinitialise system information for hv-24x7 */
+ read_24x7_sys_info();
+
return;
}
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 1d1da639b8b7..f3736fcd98fc 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -395,16 +395,31 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
/*
* Some versions of FWNMI place the buffer inside the 4kB page starting at
* 0x7000. Other versions place it inside the rtas buffer. We check both.
+ * Minimum size of the buffer is 16 bytes.
*/
#define VALID_FWNMI_BUFFER(A) \
- ((((A) >= 0x7000) && ((A) < 0x7ff0)) || \
- (((A) >= rtas.base) && ((A) < (rtas.base + rtas.size - 16))))
+ ((((A) >= 0x7000) && ((A) <= 0x8000 - 16)) || \
+ (((A) >= rtas.base) && ((A) <= (rtas.base + rtas.size - 16))))
static inline struct rtas_error_log *fwnmi_get_errlog(void)
{
return (struct rtas_error_log *)local_paca->mce_data_buf;
}
+static __be64 *fwnmi_get_savep(struct pt_regs *regs)
+{
+ unsigned long savep_ra;
+
+ /* Mask top two bits */
+ savep_ra = regs->gpr[3] & ~(0x3UL << 62);
+ if (!VALID_FWNMI_BUFFER(savep_ra)) {
+ printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]);
+ return NULL;
+ }
+
+ return __va(savep_ra);
+}
+
/*
* Get the error information for errors coming through the
* FWNMI vectors. The pt_regs' r3 will be updated to reflect
@@ -422,19 +437,14 @@ static inline struct rtas_error_log *fwnmi_get_errlog(void)
*/
static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
{
- unsigned long *savep;
struct rtas_error_log *h;
+ __be64 *savep;
- /* Mask top two bits */
- regs->gpr[3] &= ~(0x3UL << 62);
-
- if (!VALID_FWNMI_BUFFER(regs->gpr[3])) {
- printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]);
+ savep = fwnmi_get_savep(regs);
+ if (!savep)
return NULL;
- }
- savep = __va(regs->gpr[3]);
- regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */
+ regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */
h = (struct rtas_error_log *)&savep[1];
/* Use the per cpu buffer from paca to store rtas error log */
@@ -458,7 +468,15 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
*/
static void fwnmi_release_errinfo(void)
{
- int ret = rtas_call(rtas_token("ibm,nmi-interlock"), 0, 1, NULL);
+ struct rtas_args rtas_args;
+ int ret;
+
+ /*
+ * On pseries, the machine check stack is limited to under 4GB, so
+ * args can be on-stack.
+ */
+ rtas_call_unlocked(&rtas_args, ibm_nmi_interlock_token, 0, 1, NULL);
+ ret = be32_to_cpu(rtas_args.rets[0]);
if (ret != 0)
printk(KERN_ERR "FWNMI: nmi-interlock failed: %d\n", ret);
}
@@ -481,11 +499,21 @@ int pSeries_system_reset_exception(struct pt_regs *regs)
#endif
if (fwnmi_active) {
- struct rtas_error_log *errhdr = fwnmi_get_errinfo(regs);
- if (errhdr) {
- /* XXX Should look at FWNMI information */
- }
- fwnmi_release_errinfo();
+ __be64 *savep;
+
+ /*
+ * Firmware (PowerVM and KVM) saves r3 to a save area like
+ * machine check, which is not exactly what PAPR (2.9)
+ * suggests but there is no way to detect otherwise, so this
+ * is the interface now.
+ *
+ * System resets do not save any error log or require an
+ * "ibm,nmi-interlock" rtas call to release.
+ */
+
+ savep = fwnmi_get_savep(regs);
+ if (savep)
+ regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */
}
if (smp_handle_nmi_ipi(regs))
diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.c b/arch/powerpc/platforms/pseries/rtas-fadump.c
index 70c3013fdd07..81343908ed33 100644
--- a/arch/powerpc/platforms/pseries/rtas-fadump.c
+++ b/arch/powerpc/platforms/pseries/rtas-fadump.c
@@ -506,7 +506,7 @@ void __init rtas_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node)
fadump_conf->fadump_supported = 1;
/* Firmware supports 64-bit value for size, align it to pagesize. */
- fadump_conf->max_copy_size = _ALIGN_DOWN(U64_MAX, PAGE_SIZE);
+ fadump_conf->max_copy_size = ALIGN_DOWN(U64_MAX, PAGE_SIZE);
/*
* The 'ibm,kernel-dump' rtas node is present only if there is
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 0c8421dd01ab..64d18f4bf093 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -68,6 +68,7 @@
#include <asm/isa-bridge.h>
#include <asm/security_features.h>
#include <asm/asm-const.h>
+#include <asm/idle.h>
#include <asm/swiotlb.h>
#include <asm/svm.h>
@@ -83,6 +84,7 @@ unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K);
EXPORT_SYMBOL(CMO_PageSize);
int fwnmi_active; /* TRUE if an FWNMI handler is present */
+int ibm_nmi_interlock_token;
static void pSeries_show_cpuinfo(struct seq_file *m)
{
@@ -113,9 +115,14 @@ static void __init fwnmi_init(void)
struct slb_entry *slb_ptr;
size_t size;
#endif
+ int ibm_nmi_register_token;
- int ibm_nmi_register = rtas_token("ibm,nmi-register");
- if (ibm_nmi_register == RTAS_UNKNOWN_SERVICE)
+ ibm_nmi_register_token = rtas_token("ibm,nmi-register");
+ if (ibm_nmi_register_token == RTAS_UNKNOWN_SERVICE)
+ return;
+
+ ibm_nmi_interlock_token = rtas_token("ibm,nmi-interlock");
+ if (WARN_ON(ibm_nmi_interlock_token == RTAS_UNKNOWN_SERVICE))
return;
/* If the kernel's not linked at zero we point the firmware at low
@@ -123,8 +130,8 @@ static void __init fwnmi_init(void)
system_reset_addr = __pa(system_reset_fwnmi) - PHYSICAL_START;
machine_check_addr = __pa(machine_check_fwnmi) - PHYSICAL_START;
- if (0 == rtas_call(ibm_nmi_register, 2, 1, NULL, system_reset_addr,
- machine_check_addr))
+ if (0 == rtas_call(ibm_nmi_register_token, 2, 1, NULL,
+ system_reset_addr, machine_check_addr))
fwnmi_active = 1;
/*
@@ -317,6 +324,9 @@ static int alloc_dispatch_log_kmem_cache(void)
}
machine_early_initcall(pseries, alloc_dispatch_log_kmem_cache);
+DEFINE_PER_CPU(u64, idle_spurr_cycles);
+DEFINE_PER_CPU(u64, idle_entry_purr_snap);
+DEFINE_PER_CPU(u64, idle_entry_spurr_snap);
static void pseries_lpar_idle(void)
{
/*
@@ -328,7 +338,7 @@ static void pseries_lpar_idle(void)
return;
/* Indicate to hypervisor that we are idle. */
- get_lppaca()->idle = 1;
+ pseries_idle_prolog();
/*
* Yield the processor to the hypervisor. We return if
@@ -339,7 +349,7 @@ static void pseries_lpar_idle(void)
*/
cede_processor();
- get_lppaca()->idle = 0;
+ pseries_idle_epilog();
}
/*
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
index 37f1f25ba804..0487b26f6f1a 100644
--- a/arch/powerpc/platforms/pseries/vio.c
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -31,6 +31,7 @@
#include <asm/tce.h>
#include <asm/page.h>
#include <asm/hvcall.h>
+#include <asm/machdep.h>
static struct vio_dev vio_bus_device = { /* fake "parent" device */
.name = "vio",
@@ -1513,7 +1514,7 @@ static int __init vio_bus_init(void)
return 0;
}
-postcore_initcall(vio_bus_init);
+machine_postcore_initcall(pseries, vio_bus_init);
static int __init vio_device_init(void)
{
@@ -1522,7 +1523,7 @@ static int __init vio_device_init(void)
return 0;
}
-device_initcall(vio_device_init);
+machine_device_initcall(pseries, vio_device_init);
static ssize_t name_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -1703,4 +1704,4 @@ static int __init vio_init(void)
dma_debug_add_bus(&vio_bus_type);
return 0;
}
-fs_initcall(vio_init);
+machine_fs_initcall(pseries, vio_init);