summaryrefslogtreecommitdiff
path: root/arch/powerpc/platforms/powernv/pci-ioda.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms/powernv/pci-ioda.c')
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c789
1 files changed, 694 insertions, 95 deletions
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index de19edeaa7a7..468a0f23c7f2 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -36,41 +36,44 @@
#include <asm/tce.h>
#include <asm/xics.h>
#include <asm/debug.h>
+#include <asm/firmware.h>
+#include <asm/pnv-pci.h>
+
+#include <misc/cxl.h>
#include "powernv.h"
#include "pci.h"
-#define define_pe_printk_level(func, kern_level) \
-static int func(const struct pnv_ioda_pe *pe, const char *fmt, ...) \
-{ \
- struct va_format vaf; \
- va_list args; \
- char pfix[32]; \
- int r; \
- \
- va_start(args, fmt); \
- \
- vaf.fmt = fmt; \
- vaf.va = &args; \
- \
- if (pe->pdev) \
- strlcpy(pfix, dev_name(&pe->pdev->dev), \
- sizeof(pfix)); \
- else \
- sprintf(pfix, "%04x:%02x ", \
- pci_domain_nr(pe->pbus), \
- pe->pbus->number); \
- r = printk(kern_level "pci %s: [PE# %.3d] %pV", \
- pfix, pe->pe_number, &vaf); \
- \
- va_end(args); \
- \
- return r; \
-} \
-
-define_pe_printk_level(pe_err, KERN_ERR);
-define_pe_printk_level(pe_warn, KERN_WARNING);
-define_pe_printk_level(pe_info, KERN_INFO);
+static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
+ const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+ char pfix[32];
+
+ va_start(args, fmt);
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ if (pe->pdev)
+ strlcpy(pfix, dev_name(&pe->pdev->dev), sizeof(pfix));
+ else
+ sprintf(pfix, "%04x:%02x ",
+ pci_domain_nr(pe->pbus), pe->pbus->number);
+
+ printk("%spci %s: [PE# %.3d] %pV",
+ level, pfix, pe->pe_number, &vaf);
+
+ va_end(args);
+}
+
+#define pe_err(pe, fmt, ...) \
+ pe_level_printk(pe, KERN_ERR, fmt, ##__VA_ARGS__)
+#define pe_warn(pe, fmt, ...) \
+ pe_level_printk(pe, KERN_WARNING, fmt, ##__VA_ARGS__)
+#define pe_info(pe, fmt, ...) \
+ pe_level_printk(pe, KERN_INFO, fmt, ##__VA_ARGS__)
/*
* stdcix is only supposed to be used in hypervisor real mode as per
@@ -82,6 +85,12 @@ static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr)
: : "r" (val), "r" (paddr) : "memory");
}
+static inline bool pnv_pci_is_mem_pref_64(unsigned long flags)
+{
+ return ((flags & (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)) ==
+ (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH));
+}
+
static int pnv_ioda_alloc_pe(struct pnv_phb *phb)
{
unsigned long pe;
@@ -106,6 +115,380 @@ static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe)
clear_bit(pe, phb->ioda.pe_alloc);
}
+/* The default M64 BAR is shared by all PEs */
+static int pnv_ioda2_init_m64(struct pnv_phb *phb)
+{
+ const char *desc;
+ struct resource *r;
+ s64 rc;
+
+ /* Configure the default M64 BAR */
+ rc = opal_pci_set_phb_mem_window(phb->opal_id,
+ OPAL_M64_WINDOW_TYPE,
+ phb->ioda.m64_bar_idx,
+ phb->ioda.m64_base,
+ 0, /* unused */
+ phb->ioda.m64_size);
+ if (rc != OPAL_SUCCESS) {
+ desc = "configuring";
+ goto fail;
+ }
+
+ /* Enable the default M64 BAR */
+ rc = opal_pci_phb_mmio_enable(phb->opal_id,
+ OPAL_M64_WINDOW_TYPE,
+ phb->ioda.m64_bar_idx,
+ OPAL_ENABLE_M64_SPLIT);
+ if (rc != OPAL_SUCCESS) {
+ desc = "enabling";
+ goto fail;
+ }
+
+ /* Mark the M64 BAR assigned */
+ set_bit(phb->ioda.m64_bar_idx, &phb->ioda.m64_bar_alloc);
+
+ /*
+ * Strip off the segment used by the reserved PE, which is
+ * expected to be 0 or last one of PE capabicity.
+ */
+ r = &phb->hose->mem_resources[1];
+ if (phb->ioda.reserved_pe == 0)
+ r->start += phb->ioda.m64_segsize;
+ else if (phb->ioda.reserved_pe == (phb->ioda.total_pe - 1))
+ r->end -= phb->ioda.m64_segsize;
+ else
+ pr_warn(" Cannot strip M64 segment for reserved PE#%d\n",
+ phb->ioda.reserved_pe);
+
+ return 0;
+
+fail:
+ pr_warn(" Failure %lld %s M64 BAR#%d\n",
+ rc, desc, phb->ioda.m64_bar_idx);
+ opal_pci_phb_mmio_enable(phb->opal_id,
+ OPAL_M64_WINDOW_TYPE,
+ phb->ioda.m64_bar_idx,
+ OPAL_DISABLE_M64);
+ return -EIO;
+}
+
+static void pnv_ioda2_alloc_m64_pe(struct pnv_phb *phb)
+{
+ resource_size_t sgsz = phb->ioda.m64_segsize;
+ struct pci_dev *pdev;
+ struct resource *r;
+ int base, step, i;
+
+ /*
+ * Root bus always has full M64 range and root port has
+ * M64 range used in reality. So we're checking root port
+ * instead of root bus.
+ */
+ list_for_each_entry(pdev, &phb->hose->bus->devices, bus_list) {
+ for (i = PCI_BRIDGE_RESOURCES;
+ i <= PCI_BRIDGE_RESOURCE_END; i++) {
+ r = &pdev->resource[i];
+ if (!r->parent ||
+ !pnv_pci_is_mem_pref_64(r->flags))
+ continue;
+
+ base = (r->start - phb->ioda.m64_base) / sgsz;
+ for (step = 0; step < resource_size(r) / sgsz; step++)
+ set_bit(base + step, phb->ioda.pe_alloc);
+ }
+ }
+}
+
+static int pnv_ioda2_pick_m64_pe(struct pnv_phb *phb,
+ struct pci_bus *bus, int all)
+{
+ resource_size_t segsz = phb->ioda.m64_segsize;
+ struct pci_dev *pdev;
+ struct resource *r;
+ struct pnv_ioda_pe *master_pe, *pe;
+ unsigned long size, *pe_alloc;
+ bool found;
+ int start, i, j;
+
+ /* Root bus shouldn't use M64 */
+ if (pci_is_root_bus(bus))
+ return IODA_INVALID_PE;
+
+ /* We support only one M64 window on each bus */
+ found = false;
+ pci_bus_for_each_resource(bus, r, i) {
+ if (r && r->parent &&
+ pnv_pci_is_mem_pref_64(r->flags)) {
+ found = true;
+ break;
+ }
+ }
+
+ /* No M64 window found ? */
+ if (!found)
+ return IODA_INVALID_PE;
+
+ /* Allocate bitmap */
+ size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
+ pe_alloc = kzalloc(size, GFP_KERNEL);
+ if (!pe_alloc) {
+ pr_warn("%s: Out of memory !\n",
+ __func__);
+ return IODA_INVALID_PE;
+ }
+
+ /*
+ * Figure out reserved PE numbers by the PE
+ * the its child PEs.
+ */
+ start = (r->start - phb->ioda.m64_base) / segsz;
+ for (i = 0; i < resource_size(r) / segsz; i++)
+ set_bit(start + i, pe_alloc);
+
+ if (all)
+ goto done;
+
+ /*
+ * If the PE doesn't cover all subordinate buses,
+ * we need subtract from reserved PEs for children.
+ */
+ list_for_each_entry(pdev, &bus->devices, bus_list) {
+ if (!pdev->subordinate)
+ continue;
+
+ pci_bus_for_each_resource(pdev->subordinate, r, i) {
+ if (!r || !r->parent ||
+ !pnv_pci_is_mem_pref_64(r->flags))
+ continue;
+
+ start = (r->start - phb->ioda.m64_base) / segsz;
+ for (j = 0; j < resource_size(r) / segsz ; j++)
+ clear_bit(start + j, pe_alloc);
+ }
+ }
+
+ /*
+ * the current bus might not own M64 window and that's all
+ * contributed by its child buses. For the case, we needn't
+ * pick M64 dependent PE#.
+ */
+ if (bitmap_empty(pe_alloc, phb->ioda.total_pe)) {
+ kfree(pe_alloc);
+ return IODA_INVALID_PE;
+ }
+
+ /*
+ * Figure out the master PE and put all slave PEs to master
+ * PE's list to form compound PE.
+ */
+done:
+ master_pe = NULL;
+ i = -1;
+ while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe, i + 1)) <
+ phb->ioda.total_pe) {
+ pe = &phb->ioda.pe_array[i];
+ pe->phb = phb;
+ pe->pe_number = i;
+
+ if (!master_pe) {
+ pe->flags |= PNV_IODA_PE_MASTER;
+ INIT_LIST_HEAD(&pe->slaves);
+ master_pe = pe;
+ } else {
+ pe->flags |= PNV_IODA_PE_SLAVE;
+ pe->master = master_pe;
+ list_add_tail(&pe->list, &master_pe->slaves);
+ }
+ }
+
+ kfree(pe_alloc);
+ return master_pe->pe_number;
+}
+
+static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
+{
+ struct pci_controller *hose = phb->hose;
+ struct device_node *dn = hose->dn;
+ struct resource *res;
+ const u32 *r;
+ u64 pci_addr;
+
+ if (!firmware_has_feature(FW_FEATURE_OPALv3)) {
+ pr_info(" Firmware too old to support M64 window\n");
+ return;
+ }
+
+ r = of_get_property(dn, "ibm,opal-m64-window", NULL);
+ if (!r) {
+ pr_info(" No <ibm,opal-m64-window> on %s\n",
+ dn->full_name);
+ return;
+ }
+
+ /* FIXME: Support M64 for P7IOC */
+ if (phb->type != PNV_PHB_IODA2) {
+ pr_info(" Not support M64 window\n");
+ return;
+ }
+
+ res = &hose->mem_resources[1];
+ res->start = of_translate_address(dn, r + 2);
+ res->end = res->start + of_read_number(r + 4, 2) - 1;
+ res->flags = (IORESOURCE_MEM | IORESOURCE_MEM_64 | IORESOURCE_PREFETCH);
+ pci_addr = of_read_number(r, 2);
+ hose->mem_offset[1] = res->start - pci_addr;
+
+ phb->ioda.m64_size = resource_size(res);
+ phb->ioda.m64_segsize = phb->ioda.m64_size / phb->ioda.total_pe;
+ phb->ioda.m64_base = pci_addr;
+
+ /* Use last M64 BAR to cover M64 window */
+ phb->ioda.m64_bar_idx = 15;
+ phb->init_m64 = pnv_ioda2_init_m64;
+ phb->alloc_m64_pe = pnv_ioda2_alloc_m64_pe;
+ phb->pick_m64_pe = pnv_ioda2_pick_m64_pe;
+}
+
+static void pnv_ioda_freeze_pe(struct pnv_phb *phb, int pe_no)
+{
+ struct pnv_ioda_pe *pe = &phb->ioda.pe_array[pe_no];
+ struct pnv_ioda_pe *slave;
+ s64 rc;
+
+ /* Fetch master PE */
+ if (pe->flags & PNV_IODA_PE_SLAVE) {
+ pe = pe->master;
+ WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER));
+ pe_no = pe->pe_number;
+ }
+
+ /* Freeze master PE */
+ rc = opal_pci_eeh_freeze_set(phb->opal_id,
+ pe_no,
+ OPAL_EEH_ACTION_SET_FREEZE_ALL);
+ if (rc != OPAL_SUCCESS) {
+ pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n",
+ __func__, rc, phb->hose->global_number, pe_no);
+ return;
+ }
+
+ /* Freeze slave PEs */
+ if (!(pe->flags & PNV_IODA_PE_MASTER))
+ return;
+
+ list_for_each_entry(slave, &pe->slaves, list) {
+ rc = opal_pci_eeh_freeze_set(phb->opal_id,
+ slave->pe_number,
+ OPAL_EEH_ACTION_SET_FREEZE_ALL);
+ if (rc != OPAL_SUCCESS)
+ pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n",
+ __func__, rc, phb->hose->global_number,
+ slave->pe_number);
+ }
+}
+
+static int pnv_ioda_unfreeze_pe(struct pnv_phb *phb, int pe_no, int opt)
+{
+ struct pnv_ioda_pe *pe, *slave;
+ s64 rc;
+
+ /* Find master PE */
+ pe = &phb->ioda.pe_array[pe_no];
+ if (pe->flags & PNV_IODA_PE_SLAVE) {
+ pe = pe->master;
+ WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER));
+ pe_no = pe->pe_number;
+ }
+
+ /* Clear frozen state for master PE */
+ rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, opt);
+ if (rc != OPAL_SUCCESS) {
+ pr_warn("%s: Failure %lld clear %d on PHB#%x-PE#%x\n",
+ __func__, rc, opt, phb->hose->global_number, pe_no);
+ return -EIO;
+ }
+
+ if (!(pe->flags & PNV_IODA_PE_MASTER))
+ return 0;
+
+ /* Clear frozen state for slave PEs */
+ list_for_each_entry(slave, &pe->slaves, list) {
+ rc = opal_pci_eeh_freeze_clear(phb->opal_id,
+ slave->pe_number,
+ opt);
+ if (rc != OPAL_SUCCESS) {
+ pr_warn("%s: Failure %lld clear %d on PHB#%x-PE#%x\n",
+ __func__, rc, opt, phb->hose->global_number,
+ slave->pe_number);
+ return -EIO;
+ }
+ }
+
+ return 0;
+}
+
+static int pnv_ioda_get_pe_state(struct pnv_phb *phb, int pe_no)
+{
+ struct pnv_ioda_pe *slave, *pe;
+ u8 fstate, state;
+ __be16 pcierr;
+ s64 rc;
+
+ /* Sanity check on PE number */
+ if (pe_no < 0 || pe_no >= phb->ioda.total_pe)
+ return OPAL_EEH_STOPPED_PERM_UNAVAIL;
+
+ /*
+ * Fetch the master PE and the PE instance might be
+ * not initialized yet.
+ */
+ pe = &phb->ioda.pe_array[pe_no];
+ if (pe->flags & PNV_IODA_PE_SLAVE) {
+ pe = pe->master;
+ WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER));
+ pe_no = pe->pe_number;
+ }
+
+ /* Check the master PE */
+ rc = opal_pci_eeh_freeze_status(phb->opal_id, pe_no,
+ &state, &pcierr, NULL);
+ if (rc != OPAL_SUCCESS) {
+ pr_warn("%s: Failure %lld getting "
+ "PHB#%x-PE#%x state\n",
+ __func__, rc,
+ phb->hose->global_number, pe_no);
+ return OPAL_EEH_STOPPED_TEMP_UNAVAIL;
+ }
+
+ /* Check the slave PE */
+ if (!(pe->flags & PNV_IODA_PE_MASTER))
+ return state;
+
+ list_for_each_entry(slave, &pe->slaves, list) {
+ rc = opal_pci_eeh_freeze_status(phb->opal_id,
+ slave->pe_number,
+ &fstate,
+ &pcierr,
+ NULL);
+ if (rc != OPAL_SUCCESS) {
+ pr_warn("%s: Failure %lld getting "
+ "PHB#%x-PE#%x state\n",
+ __func__, rc,
+ phb->hose->global_number, slave->pe_number);
+ return OPAL_EEH_STOPPED_TEMP_UNAVAIL;
+ }
+
+ /*
+ * Override the result based on the ascending
+ * priority.
+ */
+ if (fstate > state)
+ state = fstate;
+ }
+
+ return state;
+}
+
/* Currently those 2 are only used when MSIs are enabled, this will change
* but in the meantime, we need to protect them to avoid warnings
*/
@@ -363,9 +746,16 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
struct pci_controller *hose = pci_bus_to_host(bus);
struct pnv_phb *phb = hose->private_data;
struct pnv_ioda_pe *pe;
- int pe_num;
+ int pe_num = IODA_INVALID_PE;
+
+ /* Check if PE is determined by M64 */
+ if (phb->pick_m64_pe)
+ pe_num = phb->pick_m64_pe(phb, bus, all);
+
+ /* The PE number isn't pinned by M64 */
+ if (pe_num == IODA_INVALID_PE)
+ pe_num = pnv_ioda_alloc_pe(phb);
- pe_num = pnv_ioda_alloc_pe(phb);
if (pe_num == IODA_INVALID_PE) {
pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n",
__func__, pci_domain_nr(bus), bus->number);
@@ -373,7 +763,7 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
}
pe = &phb->ioda.pe_array[pe_num];
- pe->flags = (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
+ pe->flags |= (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
pe->pbus = bus;
pe->pdev = NULL;
pe->tce32_seg = -1;
@@ -441,8 +831,15 @@ static void pnv_ioda_setup_PEs(struct pci_bus *bus)
static void pnv_pci_ioda_setup_PEs(void)
{
struct pci_controller *hose, *tmp;
+ struct pnv_phb *phb;
list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+ phb = hose->private_data;
+
+ /* M64 layout might affect PE allocation */
+ if (phb->alloc_m64_pe)
+ phb->alloc_m64_pe(phb);
+
pnv_ioda_setup_PEs(hose->bus);
}
}
@@ -462,7 +859,7 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
pe = &phb->ioda.pe_array[pdn->pe_number];
WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
- set_iommu_table_base(&pdev->dev, &pe->tce32_table);
+ set_iommu_table_base_and_group(&pdev->dev, &pe->tce32_table);
}
static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
@@ -491,17 +888,48 @@ static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
set_dma_ops(&pdev->dev, &dma_iommu_ops);
set_iommu_table_base(&pdev->dev, &pe->tce32_table);
}
+ *pdev->dev.dma_mask = dma_mask;
return 0;
}
-static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
+static u64 pnv_pci_ioda_dma_get_required_mask(struct pnv_phb *phb,
+ struct pci_dev *pdev)
+{
+ struct pci_dn *pdn = pci_get_pdn(pdev);
+ struct pnv_ioda_pe *pe;
+ u64 end, mask;
+
+ if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
+ return 0;
+
+ pe = &phb->ioda.pe_array[pdn->pe_number];
+ if (!pe->tce_bypass_enabled)
+ return __dma_get_required_mask(&pdev->dev);
+
+
+ end = pe->tce_bypass_base + memblock_end_of_DRAM();
+ mask = 1ULL << (fls64(end) - 1);
+ mask += mask - 1;
+
+ return mask;
+}
+
+static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe,
+ struct pci_bus *bus,
+ bool add_to_iommu_group)
{
struct pci_dev *dev;
list_for_each_entry(dev, &bus->devices, bus_list) {
- set_iommu_table_base_and_group(&dev->dev, &pe->tce32_table);
+ if (add_to_iommu_group)
+ set_iommu_table_base_and_group(&dev->dev,
+ &pe->tce32_table);
+ else
+ set_iommu_table_base(&dev->dev, &pe->tce32_table);
+
if (dev->subordinate)
- pnv_ioda_setup_bus_dma(pe, dev->subordinate);
+ pnv_ioda_setup_bus_dma(pe, dev->subordinate,
+ add_to_iommu_group);
}
}
@@ -513,15 +941,16 @@ static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe,
(__be64 __iomem *)pe->tce_inval_reg_phys :
(__be64 __iomem *)tbl->it_index;
unsigned long start, end, inc;
+ const unsigned shift = tbl->it_page_shift;
start = __pa(startp);
end = __pa(endp);
/* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */
if (tbl->it_busno) {
- start <<= 12;
- end <<= 12;
- inc = 128 << 12;
+ start <<= shift;
+ end <<= shift;
+ inc = 128ull << shift;
start |= tbl->it_busno;
end |= tbl->it_busno;
} else if (tbl->it_type & TCE_PCI_SWINV_PAIR) {
@@ -559,18 +988,19 @@ static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
__be64 __iomem *invalidate = rm ?
(__be64 __iomem *)pe->tce_inval_reg_phys :
(__be64 __iomem *)tbl->it_index;
+ const unsigned shift = tbl->it_page_shift;
/* We'll invalidate DMA address in PE scope */
- start = 0x2ul << 60;
+ start = 0x2ull << 60;
start |= (pe->pe_number & 0xFF);
end = start;
/* Figure out the start, end and step */
inc = tbl->it_offset + (((u64)startp - tbl->it_base) / sizeof(u64));
- start |= (inc << 12);
+ start |= (inc << shift);
inc = tbl->it_offset + (((u64)endp - tbl->it_base) / sizeof(u64));
- end |= (inc << 12);
- inc = (0x1ul << 12);
+ end |= (inc << shift);
+ inc = (0x1ull << shift);
mb();
while (start <= end) {
@@ -654,7 +1084,7 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
/* Setup linux iommu table */
tbl = &pe->tce32_table;
pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs,
- base << 28);
+ base << 28, IOMMU_PAGE_SHIFT_4K);
/* OPAL variant of P7IOC SW invalidated TCEs */
swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
@@ -677,7 +1107,7 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
if (pe->pdev)
set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
else
- pnv_ioda_setup_bus_dma(pe, pe->pbus);
+ pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
return;
fail:
@@ -713,11 +1143,15 @@ static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
0);
/*
- * We might want to reset the DMA ops of all devices on
- * this PE. However in theory, that shouldn't be necessary
- * as this is used for VFIO/KVM pass-through and the device
- * hasn't yet been returned to its kernel driver
+ * EEH needs the mapping between IOMMU table and group
+ * of those VFIO/KVM pass-through devices. We can postpone
+ * resetting DMA ops until the DMA mask is configured in
+ * host side.
*/
+ if (pe->pdev)
+ set_iommu_table_base(&pe->pdev->dev, tbl);
+ else
+ pnv_ioda_setup_bus_dma(pe, pe->pbus, false);
}
if (rc)
pe_err(pe, "OPAL error %lld configuring bypass window\n", rc);
@@ -784,7 +1218,8 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
/* Setup linux iommu table */
tbl = &pe->tce32_table;
- pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0);
+ pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0,
+ IOMMU_PAGE_SHIFT_4K);
/* OPAL variant of PHB3 invalidated TCEs */
swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
@@ -805,7 +1240,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
if (pe->pdev)
set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
else
- pnv_ioda_setup_bus_dma(pe, pe->pbus);
+ pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
/* Also create a bypass window */
pnv_pci_ioda2_setup_bypass_pe(phb, pe);
@@ -895,14 +1330,186 @@ static void pnv_ioda2_msi_eoi(struct irq_data *d)
icp_native_eoi(d);
}
+
+static void set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq)
+{
+ struct irq_data *idata;
+ struct irq_chip *ichip;
+
+ if (phb->type != PNV_PHB_IODA2)
+ return;
+
+ if (!phb->ioda.irq_chip_init) {
+ /*
+ * First time we setup an MSI IRQ, we need to setup the
+ * corresponding IRQ chip to route correctly.
+ */
+ idata = irq_get_irq_data(virq);
+ ichip = irq_data_get_irq_chip(idata);
+ phb->ioda.irq_chip_init = 1;
+ phb->ioda.irq_chip = *ichip;
+ phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi;
+ }
+ irq_set_chip(virq, &phb->ioda.irq_chip);
+}
+
+#ifdef CONFIG_CXL_BASE
+
+struct device_node *pnv_pci_to_phb_node(struct pci_dev *dev)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+
+ return hose->dn;
+}
+EXPORT_SYMBOL(pnv_pci_to_phb_node);
+
+int pnv_phb_to_cxl(struct pci_dev *dev)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct pnv_phb *phb = hose->private_data;
+ struct pnv_ioda_pe *pe;
+ int rc;
+
+ pe = pnv_ioda_get_pe(dev);
+ if (!pe)
+ return -ENODEV;
+
+ pe_info(pe, "Switching PHB to CXL\n");
+
+ rc = opal_pci_set_phb_cxl_mode(phb->opal_id, 1, pe->pe_number);
+ if (rc)
+ dev_err(&dev->dev, "opal_pci_set_phb_cxl_mode failed: %i\n", rc);
+
+ return rc;
+}
+EXPORT_SYMBOL(pnv_phb_to_cxl);
+
+/* Find PHB for cxl dev and allocate MSI hwirqs?
+ * Returns the absolute hardware IRQ number
+ */
+int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct pnv_phb *phb = hose->private_data;
+ int hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, num);
+
+ if (hwirq < 0) {
+ dev_warn(&dev->dev, "Failed to find a free MSI\n");
+ return -ENOSPC;
+ }
+
+ return phb->msi_base + hwirq;
+}
+EXPORT_SYMBOL(pnv_cxl_alloc_hwirqs);
+
+void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct pnv_phb *phb = hose->private_data;
+
+ msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, num);
+}
+EXPORT_SYMBOL(pnv_cxl_release_hwirqs);
+
+void pnv_cxl_release_hwirq_ranges(struct cxl_irq_ranges *irqs,
+ struct pci_dev *dev)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct pnv_phb *phb = hose->private_data;
+ int i, hwirq;
+
+ for (i = 1; i < CXL_IRQ_RANGES; i++) {
+ if (!irqs->range[i])
+ continue;
+ pr_devel("cxl release irq range 0x%x: offset: 0x%lx limit: %ld\n",
+ i, irqs->offset[i],
+ irqs->range[i]);
+ hwirq = irqs->offset[i] - phb->msi_base;
+ msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq,
+ irqs->range[i]);
+ }
+}
+EXPORT_SYMBOL(pnv_cxl_release_hwirq_ranges);
+
+int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs,
+ struct pci_dev *dev, int num)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct pnv_phb *phb = hose->private_data;
+ int i, hwirq, try;
+
+ memset(irqs, 0, sizeof(struct cxl_irq_ranges));
+
+ /* 0 is reserved for the multiplexed PSL DSI interrupt */
+ for (i = 1; i < CXL_IRQ_RANGES && num; i++) {
+ try = num;
+ while (try) {
+ hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, try);
+ if (hwirq >= 0)
+ break;
+ try /= 2;
+ }
+ if (!try)
+ goto fail;
+
+ irqs->offset[i] = phb->msi_base + hwirq;
+ irqs->range[i] = try;
+ pr_devel("cxl alloc irq range 0x%x: offset: 0x%lx limit: %li\n",
+ i, irqs->offset[i], irqs->range[i]);
+ num -= try;
+ }
+ if (num)
+ goto fail;
+
+ return 0;
+fail:
+ pnv_cxl_release_hwirq_ranges(irqs, dev);
+ return -ENOSPC;
+}
+EXPORT_SYMBOL(pnv_cxl_alloc_hwirq_ranges);
+
+int pnv_cxl_get_irq_count(struct pci_dev *dev)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct pnv_phb *phb = hose->private_data;
+
+ return phb->msi_bmp.irq_count;
+}
+EXPORT_SYMBOL(pnv_cxl_get_irq_count);
+
+int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq,
+ unsigned int virq)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct pnv_phb *phb = hose->private_data;
+ unsigned int xive_num = hwirq - phb->msi_base;
+ struct pnv_ioda_pe *pe;
+ int rc;
+
+ if (!(pe = pnv_ioda_get_pe(dev)))
+ return -ENODEV;
+
+ /* Assign XIVE to PE */
+ rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num);
+ if (rc) {
+ pe_warn(pe, "%s: OPAL error %d setting msi_base 0x%x "
+ "hwirq 0x%x XIVE 0x%x PE\n",
+ pci_name(dev), rc, phb->msi_base, hwirq, xive_num);
+ return -EIO;
+ }
+ set_msi_irq_chip(phb, virq);
+
+ return 0;
+}
+EXPORT_SYMBOL(pnv_cxl_ioda_msi_setup);
+#endif
+
static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
unsigned int hwirq, unsigned int virq,
unsigned int is_64, struct msi_msg *msg)
{
struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev);
struct pci_dn *pdn = pci_get_pdn(dev);
- struct irq_data *idata;
- struct irq_chip *ichip;
unsigned int xive_num = hwirq - phb->msi_base;
__be32 data;
int rc;
@@ -954,22 +1561,7 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
}
msg->data = be32_to_cpu(data);
- /*
- * Change the IRQ chip for the MSI interrupts on PHB3.
- * The corresponding IRQ chip should be populated for
- * the first time.
- */
- if (phb->type == PNV_PHB_IODA2) {
- if (!phb->ioda.irq_chip_init) {
- idata = irq_get_irq_data(virq);
- ichip = irq_data_get_irq_chip(idata);
- phb->ioda.irq_chip_init = 1;
- phb->ioda.irq_chip = *ichip;
- phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi;
- }
-
- irq_set_chip(virq, &phb->ioda.irq_chip);
- }
+ set_msi_irq_chip(phb, virq);
pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d),"
" address=%x_%08x data=%x PE# %d\n",
@@ -1055,9 +1647,6 @@ static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
index++;
}
} else if (res->flags & IORESOURCE_MEM) {
- /* WARNING: Assumes M32 is mem region 0 in PHB. We need to
- * harden that algorithm when we start supporting M64
- */
region.start = res->start -
hose->mem_offset[0] -
phb->ioda.m32_pci_base;
@@ -1141,9 +1730,8 @@ static void pnv_pci_ioda_fixup(void)
pnv_pci_ioda_create_dbgfs();
#ifdef CONFIG_EEH
- eeh_probe_mode_set(EEH_PROBE_MODE_DEV);
- eeh_addr_cache_build();
eeh_init();
+ eeh_addr_cache_build();
#endif
}
@@ -1178,7 +1766,10 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
bridge = bridge->bus->self;
}
- /* We need support prefetchable memory window later */
+ /* We fail back to M32 if M64 isn't supported */
+ if (phb->ioda.m64_segsize &&
+ pnv_pci_is_mem_pref_64(type))
+ return phb->ioda.m64_segsize;
if (type & IORESOURCE_MEM)
return phb->ioda.m32_segsize;
@@ -1217,12 +1808,12 @@ static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
static void pnv_pci_ioda_shutdown(struct pnv_phb *phb)
{
- opal_pci_reset(phb->opal_id, OPAL_PCI_IODA_TABLE_RESET,
+ opal_pci_reset(phb->opal_id, OPAL_RESET_PCI_IODA_TABLE,
OPAL_ASSERT_RESET);
}
-void __init pnv_pci_init_ioda_phb(struct device_node *np,
- u64 hub_id, int ioda_type)
+static void __init pnv_pci_init_ioda_phb(struct device_node *np,
+ u64 hub_id, int ioda_type)
{
struct pci_controller *hose;
struct pnv_phb *phb;
@@ -1299,6 +1890,10 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
prop32 = of_get_property(np, "ibm,opal-reserved-pe", NULL);
if (prop32)
phb->ioda.reserved_pe = be32_to_cpup(prop32);
+
+ /* Parse 64-bit MMIO range */
+ pnv_ioda_parse_m64_window(phb);
+
phb->ioda.m32_size = resource_size(&hose->mem_resources[0]);
/* FW Has already off top 64k of M32 space (MSI space) */
phb->ioda.m32_size += 0x10000;
@@ -1334,14 +1929,6 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
/* Calculate how many 32-bit TCE segments we have */
phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28;
- /* Clear unusable m64 */
- hose->mem_resources[1].flags = 0;
- hose->mem_resources[1].start = 0;
- hose->mem_resources[1].end = 0;
- hose->mem_resources[2].flags = 0;
- hose->mem_resources[2].start = 0;
- hose->mem_resources[2].end = 0;
-
#if 0 /* We should really do that ... */
rc = opal_pci_set_phb_mem_window(opal->phb_id,
window_type,
@@ -1351,14 +1938,21 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
segment_size);
#endif
- pr_info(" %d (%d) PE's M32: 0x%x [segment=0x%x]"
- " IO: 0x%x [segment=0x%x]\n",
- phb->ioda.total_pe,
- phb->ioda.reserved_pe,
- phb->ioda.m32_size, phb->ioda.m32_segsize,
- phb->ioda.io_size, phb->ioda.io_segsize);
+ pr_info(" %03d (%03d) PE's M32: 0x%x [segment=0x%x]\n",
+ phb->ioda.total_pe, phb->ioda.reserved_pe,
+ phb->ioda.m32_size, phb->ioda.m32_segsize);
+ if (phb->ioda.m64_size)
+ pr_info(" M64: 0x%lx [segment=0x%lx]\n",
+ phb->ioda.m64_size, phb->ioda.m64_segsize);
+ if (phb->ioda.io_size)
+ pr_info(" IO: 0x%x [segment=0x%x]\n",
+ phb->ioda.io_size, phb->ioda.io_segsize);
+
phb->hose->ops = &pnv_pci_ops;
+ phb->get_pe_state = pnv_ioda_get_pe_state;
+ phb->freeze_pe = pnv_ioda_freeze_pe;
+ phb->unfreeze_pe = pnv_ioda_unfreeze_pe;
#ifdef CONFIG_EEH
phb->eeh_ops = &ioda_eeh_ops;
#endif
@@ -1369,6 +1963,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
/* Setup TCEs */
phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
phb->dma_set_mask = pnv_pci_ioda_dma_set_mask;
+ phb->dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask;
/* Setup shutdown function for kexec */
phb->shutdown = pnv_pci_ioda_shutdown;
@@ -1390,7 +1985,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
pci_add_flags(PCI_REASSIGN_ALL_RSRC);
/* Reset IODA tables to a clean state */
- rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET);
+ rc = opal_pci_reset(phb_id, OPAL_RESET_PCI_IODA_TABLE, OPAL_ASSERT_RESET);
if (rc)
pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc);
@@ -1404,6 +1999,10 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
ioda_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL);
ioda_eeh_phb_reset(hose, OPAL_DEASSERT_RESET);
}
+
+ /* Configure M64 window */
+ if (phb->init_m64 && phb->init_m64(phb))
+ hose->mem_resources[1].flags = 0;
}
void __init pnv_pci_init_ioda2_phb(struct device_node *np)