summaryrefslogtreecommitdiff
path: root/drivers/iommu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu')
-rw-r--r--drivers/iommu/amd_iommu.c477
-rw-r--r--drivers/iommu/amd_iommu_init.c4
-rw-r--r--drivers/iommu/amd_iommu_proto.h9
-rw-r--r--drivers/iommu/amd_iommu_types.h5
-rw-r--r--drivers/iommu/dmar.c19
-rw-r--r--drivers/iommu/intel_irq_remapping.c648
-rw-r--r--drivers/iommu/irq_remapping.c253
-rw-r--r--drivers/iommu/irq_remapping.h42
8 files changed, 711 insertions, 746 deletions
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index e1c7e9e51045..fffea87a014f 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -34,6 +34,7 @@
#include <linux/irq.h>
#include <linux/msi.h>
#include <linux/dma-contiguous.h>
+#include <linux/irqdomain.h>
#include <asm/irq_remapping.h>
#include <asm/io_apic.h>
#include <asm/apic.h>
@@ -3852,6 +3853,21 @@ union irte {
} fields;
};
+struct irq_2_irte {
+ u16 devid; /* Device ID for IRTE table */
+ u16 index; /* Index into IRTE table*/
+};
+
+struct amd_ir_data {
+ struct irq_2_irte irq_2_irte;
+ union irte irte_entry;
+ union {
+ struct msi_msg msi_entry;
+ };
+};
+
+static struct irq_chip amd_ir_chip;
+
#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6)
#define DTE_IRQ_REMAP_INTCTL (2ULL << 60)
#define DTE_IRQ_TABLE_LEN (8ULL << 1)
@@ -3945,7 +3961,7 @@ out_unlock:
return table;
}
-static int alloc_irq_index(struct irq_cfg *cfg, u16 devid, int count)
+static int alloc_irq_index(u16 devid, int count)
{
struct irq_remap_table *table;
unsigned long flags;
@@ -3967,18 +3983,10 @@ static int alloc_irq_index(struct irq_cfg *cfg, u16 devid, int count)
c = 0;
if (c == count) {
- struct irq_2_irte *irte_info;
-
for (; c != 0; --c)
table->table[index - c + 1] = IRTE_ALLOCATED;
index -= count - 1;
-
- cfg->remapped = 1;
- irte_info = &cfg->irq_2_irte;
- irte_info->devid = devid;
- irte_info->index = index;
-
goto out;
}
}
@@ -3991,22 +3999,6 @@ out:
return index;
}
-static int get_irte(u16 devid, int index, union irte *irte)
-{
- struct irq_remap_table *table;
- unsigned long flags;
-
- table = get_irq_table(devid, false);
- if (!table)
- return -ENOMEM;
-
- spin_lock_irqsave(&table->lock, flags);
- irte->val = table->table[index];
- spin_unlock_irqrestore(&table->lock, flags);
-
- return 0;
-}
-
static int modify_irte(u16 devid, int index, union irte irte)
{
struct irq_remap_table *table;
@@ -4053,243 +4045,316 @@ static void free_irte(u16 devid, int index)
iommu_completion_wait(iommu);
}
-static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry,
- unsigned int destination, int vector,
- struct io_apic_irq_attr *attr)
+static int get_devid(struct irq_alloc_info *info)
{
- struct irq_remap_table *table;
- struct irq_2_irte *irte_info;
- struct irq_cfg *cfg;
- union irte irte;
- int ioapic_id;
- int index;
- int devid;
- int ret;
-
- cfg = irq_cfg(irq);
- if (!cfg)
- return -EINVAL;
-
- irte_info = &cfg->irq_2_irte;
- ioapic_id = mpc_ioapic_id(attr->ioapic);
- devid = get_ioapic_devid(ioapic_id);
-
- if (devid < 0)
- return devid;
-
- table = get_irq_table(devid, true);
- if (table == NULL)
- return -ENOMEM;
-
- index = attr->ioapic_pin;
+ int devid = -1;
- /* Setup IRQ remapping info */
- cfg->remapped = 1;
- irte_info->devid = devid;
- irte_info->index = index;
+ switch (info->type) {
+ case X86_IRQ_ALLOC_TYPE_IOAPIC:
+ devid = get_ioapic_devid(info->ioapic_id);
+ break;
+ case X86_IRQ_ALLOC_TYPE_HPET:
+ devid = get_hpet_devid(info->hpet_id);
+ break;
+ case X86_IRQ_ALLOC_TYPE_MSI:
+ case X86_IRQ_ALLOC_TYPE_MSIX:
+ devid = get_device_id(&info->msi_dev->dev);
+ break;
+ default:
+ BUG_ON(1);
+ break;
+ }
- /* Setup IRTE for IOMMU */
- irte.val = 0;
- irte.fields.vector = vector;
- irte.fields.int_type = apic->irq_delivery_mode;
- irte.fields.destination = destination;
- irte.fields.dm = apic->irq_dest_mode;
- irte.fields.valid = 1;
-
- ret = modify_irte(devid, index, irte);
- if (ret)
- return ret;
+ return devid;
+}
- /* Setup IOAPIC entry */
- memset(entry, 0, sizeof(*entry));
+static struct irq_domain *get_ir_irq_domain(struct irq_alloc_info *info)
+{
+ struct amd_iommu *iommu;
+ int devid;
- entry->vector = index;
- entry->mask = 0;
- entry->trigger = attr->trigger;
- entry->polarity = attr->polarity;
+ if (!info)
+ return NULL;
- /*
- * Mask level triggered irqs.
- */
- if (attr->trigger)
- entry->mask = 1;
+ devid = get_devid(info);
+ if (devid >= 0) {
+ iommu = amd_iommu_rlookup_table[devid];
+ if (iommu)
+ return iommu->ir_domain;
+ }
- return 0;
+ return NULL;
}
-static int set_affinity(struct irq_data *data, const struct cpumask *mask,
- bool force)
+static struct irq_domain *get_irq_domain(struct irq_alloc_info *info)
{
- struct irq_2_irte *irte_info;
- unsigned int dest, irq;
- struct irq_cfg *cfg;
- union irte irte;
- int err;
-
- if (!config_enabled(CONFIG_SMP))
- return -1;
-
- cfg = irqd_cfg(data);
- irq = data->irq;
- irte_info = &cfg->irq_2_irte;
+ struct amd_iommu *iommu;
+ int devid;
- if (!cpumask_intersects(mask, cpu_online_mask))
- return -EINVAL;
+ if (!info)
+ return NULL;
- if (get_irte(irte_info->devid, irte_info->index, &irte))
- return -EBUSY;
+ switch (info->type) {
+ case X86_IRQ_ALLOC_TYPE_MSI:
+ case X86_IRQ_ALLOC_TYPE_MSIX:
+ devid = get_device_id(&info->msi_dev->dev);
+ if (devid >= 0) {
+ iommu = amd_iommu_rlookup_table[devid];
+ if (iommu)
+ return iommu->msi_domain;
+ }
+ break;
+ default:
+ break;
+ }
- if (assign_irq_vector(irq, cfg, mask))
- return -EBUSY;
+ return NULL;
+}
- err = apic->cpu_mask_to_apicid_and(cfg->domain, mask, &dest);
- if (err) {
- if (assign_irq_vector(irq, cfg, data->affinity))
- pr_err("AMD-Vi: Failed to recover vector for irq %d\n", irq);
- return err;
- }
+struct irq_remap_ops amd_iommu_irq_ops = {
+ .prepare = amd_iommu_prepare,
+ .enable = amd_iommu_enable,
+ .disable = amd_iommu_disable,
+ .reenable = amd_iommu_reenable,
+ .enable_faulting = amd_iommu_enable_faulting,
+ .get_ir_irq_domain = get_ir_irq_domain,
+ .get_irq_domain = get_irq_domain,
+};
- irte.fields.vector = cfg->vector;
- irte.fields.destination = dest;
+static void irq_remapping_prepare_irte(struct amd_ir_data *data,
+ struct irq_cfg *irq_cfg,
+ struct irq_alloc_info *info,
+ int devid, int index, int sub_handle)
+{
+ struct irq_2_irte *irte_info = &data->irq_2_irte;
+ struct msi_msg *msg = &data->msi_entry;
+ union irte *irte = &data->irte_entry;
+ struct IO_APIC_route_entry *entry;
- modify_irte(irte_info->devid, irte_info->index, irte);
+ data->irq_2_irte.devid = devid;
+ data->irq_2_irte.index = index + sub_handle;
- if (cfg->move_in_progress)
- send_cleanup_vector(cfg);
+ /* Setup IRTE for IOMMU */
+ irte->val = 0;
+ irte->fields.vector = irq_cfg->vector;
+ irte->fields.int_type = apic->irq_delivery_mode;
+ irte->fields.destination = irq_cfg->dest_apicid;
+ irte->fields.dm = apic->irq_dest_mode;
+ irte->fields.valid = 1;
+
+ switch (info->type) {
+ case X86_IRQ_ALLOC_TYPE_IOAPIC:
+ /* Setup IOAPIC entry */
+ entry = info->ioapic_entry;
+ info->ioapic_entry = NULL;
+ memset(entry, 0, sizeof(*entry));
+ entry->vector = index;
+ entry->mask = 0;
+ entry->trigger = info->ioapic_trigger;
+ entry->polarity = info->ioapic_polarity;
+ /* Mask level triggered irqs. */
+ if (info->ioapic_trigger)
+ entry->mask = 1;
+ break;
- cpumask_copy(data->affinity, mask);
+ case X86_IRQ_ALLOC_TYPE_HPET:
+ case X86_IRQ_ALLOC_TYPE_MSI:
+ case X86_IRQ_ALLOC_TYPE_MSIX:
+ msg->address_hi = MSI_ADDR_BASE_HI;
+ msg->address_lo = MSI_ADDR_BASE_LO;
+ msg->data = irte_info->index;
+ break;
- return 0;
+ default:
+ BUG_ON(1);
+ break;
+ }
}
-static int free_irq(int irq)
+static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
{
- struct irq_2_irte *irte_info;
+ struct irq_alloc_info *info = arg;
+ struct irq_data *irq_data;
+ struct amd_ir_data *data;
struct irq_cfg *cfg;
+ int i, ret, devid;
+ int index = -1;
- cfg = irq_cfg(irq);
- if (!cfg)
+ if (!info)
+ return -EINVAL;
+ if (nr_irqs > 1 && info->type != X86_IRQ_ALLOC_TYPE_MSI &&
+ info->type != X86_IRQ_ALLOC_TYPE_MSIX)
return -EINVAL;
- irte_info = &cfg->irq_2_irte;
-
- free_irte(irte_info->devid, irte_info->index);
+ /*
+ * With IRQ remapping enabled, don't need contiguous CPU vectors
+ * to support multiple MSI interrupts.
+ */
+ if (info->type == X86_IRQ_ALLOC_TYPE_MSI)
+ info->flags &= ~X86_IRQ_ALLOC_CONTIGUOUS_VECTORS;
- return 0;
-}
+ devid = get_devid(info);
+ if (devid < 0)
+ return -EINVAL;
-static void compose_msi_msg(struct pci_dev *pdev,
- unsigned int irq, unsigned int dest,
- struct msi_msg *msg, u8 hpet_id)
-{
- struct irq_2_irte *irte_info;
- struct irq_cfg *cfg;
- union irte irte;
+ ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
+ if (ret < 0)
+ return ret;
- cfg = irq_cfg(irq);
- if (!cfg)
- return;
+ ret = -ENOMEM;
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ goto out_free_parent;
- irte_info = &cfg->irq_2_irte;
+ if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC) {
+ if (get_irq_table(devid, true))
+ index = info->ioapic_pin;
+ else
+ ret = -ENOMEM;
+ } else {
+ index = alloc_irq_index(devid, nr_irqs);
+ }
+ if (index < 0) {
+ pr_warn("Failed to allocate IRTE\n");
+ kfree(data);
+ goto out_free_parent;
+ }
- irte.val = 0;
- irte.fields.vector = cfg->vector;
- irte.fields.int_type = apic->irq_delivery_mode;
- irte.fields.destination = dest;
- irte.fields.dm = apic->irq_dest_mode;
- irte.fields.valid = 1;
+ for (i = 0; i < nr_irqs; i++) {
+ irq_data = irq_domain_get_irq_data(domain, virq + i);
+ cfg = irqd_cfg(irq_data);
+ if (!irq_data || !cfg) {
+ ret = -EINVAL;
+ goto out_free_data;
+ }
- modify_irte(irte_info->devid, irte_info->index, irte);
+ if (i > 0) {
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ goto out_free_data;
+ }
+ irq_data->hwirq = (devid << 16) + i;
+ irq_data->chip_data = data;
+ irq_data->chip = &amd_ir_chip;
+ irq_remapping_prepare_irte(data, cfg, info, devid, index, i);
+ irq_set_status_flags(virq + i, IRQ_MOVE_PCNTXT);
+ }
+ return 0;
- msg->address_hi = MSI_ADDR_BASE_HI;
- msg->address_lo = MSI_ADDR_BASE_LO;
- msg->data = irte_info->index;
+out_free_data:
+ for (i--; i >= 0; i--) {
+ irq_data = irq_domain_get_irq_data(domain, virq + i);
+ if (irq_data)
+ kfree(irq_data->chip_data);
+ }
+ for (i = 0; i < nr_irqs; i++)
+ free_irte(devid, index + i);
+out_free_parent:
+ irq_domain_free_irqs_common(domain, virq, nr_irqs);
+ return ret;
}
-static int msi_alloc_irq(struct pci_dev *pdev, int irq, int nvec)
+static void irq_remapping_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs)
{
- struct irq_cfg *cfg;
- int index;
- u16 devid;
-
- if (!pdev)
- return -EINVAL;
+ struct irq_2_irte *irte_info;
+ struct irq_data *irq_data;
+ struct amd_ir_data *data;
+ int i;
- cfg = irq_cfg(irq);
- if (!cfg)
- return -EINVAL;
+ for (i = 0; i < nr_irqs; i++) {
+ irq_data = irq_domain_get_irq_data(domain, virq + i);
+ if (irq_data && irq_data->chip_data) {
+ data = irq_data->chip_data;
+ irte_info = &data->irq_2_irte;
+ free_irte(irte_info->devid, irte_info->index);
+ kfree(data);
+ }
+ }
+ irq_domain_free_irqs_common(domain, virq, nr_irqs);
+}
- devid = get_device_id(&pdev->dev);
- index = alloc_irq_index(cfg, devid, nvec);
+static void irq_remapping_activate(struct irq_domain *domain,
+ struct irq_data *irq_data)
+{
+ struct amd_ir_data *data = irq_data->chip_data;
+ struct irq_2_irte *irte_info = &data->irq_2_irte;
- return index < 0 ? MAX_IRQS_PER_TABLE : index;
+ modify_irte(irte_info->devid, irte_info->index, data->irte_entry);
}
-static int msi_setup_irq(struct pci_dev *pdev, unsigned int irq,
- int index, int offset)
+static void irq_remapping_deactivate(struct irq_domain *domain,
+ struct irq_data *irq_data)
{
- struct irq_2_irte *irte_info;
- struct irq_cfg *cfg;
- u16 devid;
+ struct amd_ir_data *data = irq_data->chip_data;
+ struct irq_2_irte *irte_info = &data->irq_2_irte;
+ union irte entry;
- if (!pdev)
- return -EINVAL;
+ entry.val = 0;
+ modify_irte(irte_info->devid, irte_info->index, data->irte_entry);
+}
- cfg = irq_cfg(irq);
- if (!cfg)
- return -EINVAL;
+static struct irq_domain_ops amd_ir_domain_ops = {
+ .alloc = irq_remapping_alloc,
+ .free = irq_remapping_free,
+ .activate = irq_remapping_activate,
+ .deactivate = irq_remapping_deactivate,
+};
- if (index >= MAX_IRQS_PER_TABLE)
- return 0;
+static int amd_ir_set_affinity(struct irq_data *data,
+ const struct cpumask *mask, bool force)
+{
+ struct amd_ir_data *ir_data = data->chip_data;
+ struct irq_2_irte *irte_info = &ir_data->irq_2_irte;
+ struct irq_cfg *cfg = irqd_cfg(data);
+ struct irq_data *parent = data->parent_data;
+ int ret;
- devid = get_device_id(&pdev->dev);
- irte_info = &cfg->irq_2_irte;
+ ret = parent->chip->irq_set_affinity(parent, mask, force);
+ if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
+ return ret;
- cfg->remapped = 1;
- irte_info->devid = devid;
- irte_info->index = index + offset;
+ /*
+ * Atomically updates the IRTE with the new destination, vector
+ * and flushes the interrupt entry cache.
+ */
+ ir_data->irte_entry.fields.vector = cfg->vector;
+ ir_data->irte_entry.fields.destination = cfg->dest_apicid;
+ modify_irte(irte_info->devid, irte_info->index, ir_data->irte_entry);
- return 0;
+ /*
+ * After this point, all the interrupts will start arriving
+ * at the new destination. So, time to cleanup the previous
+ * vector allocation.
+ */
+ send_cleanup_vector(cfg);
+
+ return IRQ_SET_MASK_OK_DONE;
}
-static int alloc_hpet_msi(unsigned int irq, unsigned int id)
+static void ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg *msg)
{
- struct irq_2_irte *irte_info;
- struct irq_cfg *cfg;
- int index, devid;
+ struct amd_ir_data *ir_data = irq_data->chip_data;
- cfg = irq_cfg(irq);
- if (!cfg)
- return -EINVAL;
+ *msg = ir_data->msi_entry;
+}
- irte_info = &cfg->irq_2_irte;
- devid = get_hpet_devid(id);
- if (devid < 0)
- return devid;
+static struct irq_chip amd_ir_chip = {
+ .irq_ack = ir_ack_apic_edge,
+ .irq_set_affinity = amd_ir_set_affinity,
+ .irq_compose_msi_msg = ir_compose_msi_msg,
+};
- index = alloc_irq_index(cfg, devid, 1);
- if (index < 0)
- return index;
+int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
+{
+ iommu->ir_domain = irq_domain_add_tree(NULL, &amd_ir_domain_ops, iommu);
+ if (!iommu->ir_domain)
+ return -ENOMEM;
- cfg->remapped = 1;
- irte_info->devid = devid;
- irte_info->index = index;
+ iommu->ir_domain->parent = arch_get_ir_parent_domain();
+ iommu->msi_domain = arch_create_msi_irq_domain(iommu->ir_domain);
return 0;
}
-
-struct irq_remap_ops amd_iommu_irq_ops = {
- .prepare = amd_iommu_prepare,
- .enable = amd_iommu_enable,
- .disable = amd_iommu_disable,
- .reenable = amd_iommu_reenable,
- .enable_faulting = amd_iommu_enable_faulting,
- .setup_ioapic_entry = setup_ioapic_entry,
- .set_affinity = set_affinity,
- .free_irq = free_irq,
- .compose_msi_msg = compose_msi_msg,
- .msi_alloc_irq = msi_alloc_irq,
- .msi_setup_irq = msi_setup_irq,
- .alloc_hpet_msi = alloc_hpet_msi,
-};
#endif
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 450ef5001a65..c17df04d7a7f 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -1124,6 +1124,10 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
if (ret)
return ret;
+ ret = amd_iommu_create_irq_domain(iommu);
+ if (ret)
+ return ret;
+
/*
* Make sure IOMMU is not considered to translate itself. The IVRS
* table tells us so, but this is a lie!
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h
index 72b0fd455e24..0a21142d3639 100644
--- a/drivers/iommu/amd_iommu_proto.h
+++ b/drivers/iommu/amd_iommu_proto.h
@@ -62,6 +62,15 @@ extern u8 amd_iommu_pc_get_max_counters(u16 devid);
extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn,
u64 *value, bool is_write);
+#ifdef CONFIG_IRQ_REMAP
+extern int amd_iommu_create_irq_domain(struct amd_iommu *iommu);
+#else
+static inline int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
+{
+ return 0;
+}
+#endif
+
#define PPR_SUCCESS 0x0
#define PPR_INVALID 0x1
#define PPR_FAILURE 0xf
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 05030e523771..6533e874c9d7 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -398,6 +398,7 @@ struct amd_iommu_fault {
struct iommu_domain;
+struct irq_domain;
/*
* This structure contains generic data for IOMMU protection domains
@@ -579,6 +580,10 @@ struct amd_iommu {
/* The maximum PC banks and counters/bank (PCSup=1) */
u8 max_banks;
u8 max_counters;
+#ifdef CONFIG_IRQ_REMAP
+ struct irq_domain *ir_domain;
+ struct irq_domain *msi_domain;
+#endif
};
struct devid_map {
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 9847613085e1..536f2d8ea41a 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -1087,8 +1087,8 @@ static void free_iommu(struct intel_iommu *iommu)
if (iommu->irq) {
free_irq(iommu->irq, iommu);
- irq_set_handler_data(iommu->irq, NULL);
dmar_free_hwirq(iommu->irq);
+ iommu->irq = 0;
}
if (iommu->qi) {
@@ -1642,23 +1642,14 @@ int dmar_set_interrupt(struct intel_iommu *iommu)
if (iommu->irq)
return 0;
- irq = dmar_alloc_hwirq();
- if (irq <= 0) {
+ irq = dmar_alloc_hwirq(iommu->seq_id, iommu->node, iommu);
+ if (irq > 0) {
+ iommu->irq = irq;
+ } else {
pr_err("IOMMU: no free vectors\n");
return -EINVAL;
}
- irq_set_handler_data(irq, iommu);
- iommu->irq = irq;
-
- ret = arch_setup_dmar_msi(irq);
- if (ret) {
- irq_set_handler_data(irq, NULL);
- iommu->irq = 0;
- dmar_free_hwirq(irq);
- return ret;
- }
-
ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
if (ret)
pr_err("IOMMU: can't request irq\n");
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 5709ae9c3e77..80f1d1486247 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -8,6 +8,7 @@
#include <linux/irq.h>
#include <linux/intel-iommu.h>
#include <linux/acpi.h>
+#include <linux/irqdomain.h>
#include <asm/io_apic.h>
#include <asm/smp.h>
#include <asm/cpu.h>
@@ -17,6 +18,11 @@
#include "irq_remapping.h"
+enum irq_mode {
+ IRQ_REMAPPING,
+ IRQ_POSTING,
+};
+
struct ioapic_scope {
struct intel_iommu *iommu;
unsigned int id;
@@ -31,6 +37,22 @@ struct hpet_scope {
unsigned int devfn;
};
+struct irq_2_iommu {
+ struct intel_iommu *iommu;
+ u16 irte_index;
+ u16 sub_handle;
+ u8 irte_mask;
+ enum irq_mode mode;
+};
+
+struct intel_ir_data {
+ struct irq_2_iommu irq_2_iommu;
+ struct irte irte_entry;
+ union {
+ struct msi_msg msi_entry;
+ };
+};
+
#define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0)
#define IRTE_DEST(dest) ((eim_mode) ? dest : dest << 8)
@@ -50,43 +72,14 @@ static struct hpet_scope ir_hpet[MAX_HPET_TBS];
* the dmar_global_lock.
*/
static DEFINE_RAW_SPINLOCK(irq_2_ir_lock);
+static struct irq_domain_ops intel_ir_domain_ops;
static int __init parse_ioapics_under_ir(void);
-static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
-{
- struct irq_cfg *cfg = irq_cfg(irq);
- return cfg ? &cfg->irq_2_iommu : NULL;
-}
-
-static int get_irte(int irq, struct irte *entry)
-{
- struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
- unsigned long flags;
- int index;
-
- if (!entry || !irq_iommu)
- return -1;
-
- raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
-
- if (unlikely(!irq_iommu->iommu)) {
- raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
- return -1;
- }
-
- index = irq_iommu->irte_index + irq_iommu->sub_handle;
- *entry = *(irq_iommu->iommu->ir_table->base + index);
-
- raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
- return 0;
-}
-
-static int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
+static int alloc_irte(struct intel_iommu *iommu, int irq,
+ struct irq_2_iommu *irq_iommu, u16 count)
{
struct ir_table *table = iommu->ir_table;
- struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
- struct irq_cfg *cfg = irq_cfg(irq);
unsigned int mask = 0;
unsigned long flags;
int index;
@@ -113,11 +106,11 @@ static int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
if (index < 0) {
pr_warn("IR%d: can't allocate an IRTE\n", iommu->seq_id);
} else {
- cfg->remapped = 1;
irq_iommu->iommu = iommu;
irq_iommu->irte_index = index;
irq_iommu->sub_handle = 0;
irq_iommu->irte_mask = mask;
+ irq_iommu->mode = IRQ_REMAPPING;
}
raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
@@ -135,47 +128,9 @@ static int qi_flush_iec(struct intel_iommu *iommu, int index, int mask)
return qi_submit_sync(&desc, iommu);
}
-static int map_irq_to_irte_handle(int irq, u16 *sub_handle)
-{
- struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
- unsigned long flags;
- int index;
-
- if (!irq_iommu)
- return -1;
-
- raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
- *sub_handle = irq_iommu->sub_handle;
- index = irq_iommu->irte_index;
- raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
- return index;
-}
-
-static int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
+static int modify_irte(struct irq_2_iommu *irq_iommu,
+ struct irte *irte_modified)
{
- struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
- struct irq_cfg *cfg = irq_cfg(irq);
- unsigned long flags;
-
- if (!irq_iommu)
- return -1;
-
- raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
-
- cfg->remapped = 1;
- irq_iommu->iommu = iommu;
- irq_iommu->irte_index = index;
- irq_iommu->sub_handle = subhandle;
- irq_iommu->irte_mask = 0;
-
- raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
-
- return 0;
-}
-
-static int modify_irte(int irq, struct irte *irte_modified)
-{
- struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
struct intel_iommu *iommu;
unsigned long flags;
struct irte *irte;
@@ -196,6 +151,9 @@ static int modify_irte(int irq, struct irte *irte_modified)
__iommu_flush_cache(iommu, irte, sizeof(*irte));
rc = qi_flush_iec(iommu, index, 0);
+
+ /* Update iommu mode according to the IRTE mode */
+ irq_iommu->mode = irte->pst ? IRQ_POSTING : IRQ_REMAPPING;
raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
return rc;
@@ -242,7 +200,7 @@ static int clear_entries(struct irq_2_iommu *irq_iommu)
return 0;
iommu = irq_iommu->iommu;
- index = irq_iommu->irte_index + irq_iommu->sub_handle;
+ index = irq_iommu->irte_index;
start = iommu->ir_table->base + index;
end = start + (1 << irq_iommu->irte_mask);
@@ -257,29 +215,6 @@ static int clear_entries(struct irq_2_iommu *irq_iommu)
return qi_flush_iec(iommu, index, irq_iommu->irte_mask);
}
-static int free_irte(int irq)
-{
- struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
- unsigned long flags;
- int rc;
-
- if (!irq_iommu)
- return -1;
-
- raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
-
- rc = clear_entries(irq_iommu);
-
- irq_iommu->iommu = NULL;
- irq_iommu->irte_index = 0;
- irq_iommu->sub_handle = 0;
- irq_iommu->irte_mask = 0;
-
- raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
-
- return rc;
-}
-
/*
* source validation type
*/
@@ -488,7 +423,6 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO,
INTR_REMAP_PAGE_ORDER);
-
if (!pages) {
pr_err("IR%d: failed to allocate pages of order %d\n",
iommu->seq_id, INTR_REMAP_PAGE_ORDER);
@@ -502,11 +436,23 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
goto out_free_pages;
}
+ iommu->ir_domain = irq_domain_add_hierarchy(arch_get_ir_parent_domain(),
+ 0, INTR_REMAP_TABLE_ENTRIES,
+ NULL, &intel_ir_domain_ops,
+ iommu);
+ if (!iommu->ir_domain) {
+ pr_err("IR%d: failed to allocate irqdomain\n", iommu->seq_id);
+ goto out_free_bitmap;
+ }
+ iommu->ir_msi_domain = arch_create_msi_irq_domain(iommu->ir_domain);
+
ir_table->base = page_address(pages);
ir_table->bitmap = bitmap;
iommu->ir_table = ir_table;
return 0;
+out_free_bitmap:
+ kfree(bitmap);
out_free_pages:
__free_pages(pages, INTR_REMAP_PAGE_ORDER);
out_free_table:
@@ -517,6 +463,14 @@ out_free_table:
static void intel_teardown_irq_remapping(struct intel_iommu *iommu)
{
if (iommu && iommu->ir_table) {
+ if (iommu->ir_msi_domain) {
+ irq_domain_remove(iommu->ir_msi_domain);
+ iommu->ir_msi_domain = NULL;
+ }
+ if (iommu->ir_domain) {
+ irq_domain_remove(iommu->ir_domain);
+ iommu->ir_domain = NULL;
+ }
free_pages((unsigned long)iommu->ir_table->base,
INTR_REMAP_PAGE_ORDER);
kfree(iommu->ir_table->bitmap);
@@ -627,6 +581,26 @@ error:
return -ENODEV;
}
+/*
+ * Set Posted-Interrupts capability.
+ */
+static inline void set_irq_posting_cap(void)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+
+ if (!disable_irq_post) {
+ intel_irq_remap_ops.capability |= 1 << IRQ_POSTING_CAP;
+
+ for_each_iommu(iommu, drhd)
+ if (!cap_pi_support(iommu->cap)) {
+ intel_irq_remap_ops.capability &=
+ ~(1 << IRQ_POSTING_CAP);
+ break;
+ }
+ }
+}
+
static int __init intel_enable_irq_remapping(void)
{
struct dmar_drhd_unit *drhd;
@@ -702,12 +676,7 @@ static int __init intel_enable_irq_remapping(void)
irq_remapping_enabled = 1;
- /*
- * VT-d has a different layout for IO-APIC entries when
- * interrupt remapping is enabled. So it needs a special routine
- * to print IO-APIC entries for debugging purposes too.
- */
- x86_io_apic_ops.print_entries = intel_ir_io_apic_print_entries;
+ set_irq_posting_cap();
pr_info("Enabled IRQ remapping in %s mode\n", eim ? "x2apic" : "xapic");
@@ -909,6 +878,12 @@ static void disable_irq_remapping(void)
iommu_disable_irq_remapping(iommu);
}
+
+ /*
+ * Clear Posted-Interrupts capability.
+ */
+ if (!disable_irq_post)
+ intel_irq_remap_ops.capability &= ~(1 << IRQ_POSTING_CAP);
}
static int reenable_irq_remapping(int eim)
@@ -936,6 +911,8 @@ static int reenable_irq_remapping(int eim)
if (!setup)
goto error;
+ set_irq_posting_cap();
+
return 0;
error:
@@ -945,8 +922,7 @@ error:
return -1;
}
-static void prepare_irte(struct irte *irte, int vector,
- unsigned int dest)
+static void prepare_irte(struct irte *irte, int vector, unsigned int dest)
{
memset(irte, 0, sizeof(*irte));
@@ -966,76 +942,63 @@ static void prepare_irte(struct irte *irte, int vector,
irte->redir_hint = 1;
}
-static int intel_setup_ioapic_entry(int irq,
- struct IO_APIC_route_entry *route_entry,
- unsigned int destination, int vector,
- struct io_apic_irq_attr *attr)
+static struct irq_domain *intel_get_ir_irq_domain(struct irq_alloc_info *info)
{
- int ioapic_id = mpc_ioapic_id(attr->ioapic);
- struct intel_iommu *iommu;
- struct IR_IO_APIC_route_entry *entry;
- struct irte irte;
- int index;
-
- down_read(&dmar_global_lock);
- iommu = map_ioapic_to_ir(ioapic_id);
- if (!iommu) {
- pr_warn("No mapping iommu for ioapic %d\n", ioapic_id);
- index = -ENODEV;
- } else {
- index = alloc_irte(iommu, irq, 1);
- if (index < 0) {
- pr_warn("Failed to allocate IRTE for ioapic %d\n",
- ioapic_id);
- index = -ENOMEM;
- }
- }
- up_read(&dmar_global_lock);
- if (index < 0)
- return index;
-
- prepare_irte(&irte, vector, destination);
+ struct intel_iommu *iommu = NULL;
- /* Set source-id of interrupt request */
- set_ioapic_sid(&irte, ioapic_id);
+ if (!info)
+ return NULL;
- modify_irte(irq, &irte);
+ switch (info->type) {
+ case X86_IRQ_ALLOC_TYPE_IOAPIC:
+ iommu = map_ioapic_to_ir(info->ioapic_id);
+ break;
+ case X86_IRQ_ALLOC_TYPE_HPET:
+ iommu = map_hpet_to_ir(info->hpet_id);
+ break;
+ case X86_IRQ_ALLOC_TYPE_MSI:
+ case X86_IRQ_ALLOC_TYPE_MSIX:
+ iommu = map_dev_to_ir(info->msi_dev);
+ break;
+ default:
+ BUG_ON(1);
+ break;
+ }
- apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: "
- "Set IRTE entry (P:%d FPD:%d Dst_Mode:%d "
- "Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X "
- "Avail:%X Vector:%02X Dest:%08X "
- "SID:%04X SQ:%X SVT:%X)\n",
- attr->ioapic, irte.present, irte.fpd, irte.dst_mode,
- irte.redir_hint, irte.trigger_mode, irte.dlvry_mode,
- irte.avail, irte.vector, irte.dest_id,
- irte.sid, irte.sq, irte.svt);
+ return iommu ? iommu->ir_domain : NULL;
+}
- entry = (struct IR_IO_APIC_route_entry *)route_entry;
- memset(entry, 0, sizeof(*entry));
+static struct irq_domain *intel_get_irq_domain(struct irq_alloc_info *info)
+{
+ struct intel_iommu *iommu;
- entry->index2 = (index >> 15) & 0x1;
- entry->zero = 0;
- entry->format = 1;
- entry->index = (index & 0x7fff);
- /*
- * IO-APIC RTE will be configured with virtual vector.
- * irq handler will do the explicit EOI to the io-apic.
- */
- entry->vector = attr->ioapic_pin;
- entry->mask = 0; /* enable IRQ */
- entry->trigger = attr->trigger;
- entry->polarity = attr->polarity;
+ if (!info)
+ return NULL;
- /* Mask level triggered irqs.
- * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
- */
- if (attr->trigger)
- entry->mask = 1;
+ switch (info->type) {
+ case X86_IRQ_ALLOC_TYPE_MSI:
+ case X86_IRQ_ALLOC_TYPE_MSIX:
+ iommu = map_dev_to_ir(info->msi_dev);
+ if (iommu)
+ return iommu->ir_msi_domain;
+ break;
+ default:
+ break;
+ }
- return 0;
+ return NULL;
}
+struct irq_remap_ops intel_irq_remap_ops = {
+ .prepare = intel_prepare_irq_remapping,
+ .enable = intel_enable_irq_remapping,
+ .disable = disable_irq_remapping,
+ .reenable = reenable_irq_remapping,
+ .enable_faulting = enable_drhd_fault_handling,
+ .get_ir_irq_domain = intel_get_ir_irq_domain,
+ .get_irq_domain = intel_get_irq_domain,
+};
+
/*
* Migrate the IO-APIC irq in the presence of intr-remapping.
*
@@ -1051,170 +1014,282 @@ static int intel_setup_ioapic_entry(int irq,
* is used to migrate MSI irq's in the presence of interrupt-remapping.
*/
static int
-intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
- bool force)
+intel_ir_set_affinity(struct irq_data *data, const struct cpumask *mask,
+ bool force)
{
+ struct intel_ir_data *ir_data = data->chip_data;
+ struct irte *irte = &ir_data->irte_entry;
struct irq_cfg *cfg = irqd_cfg(data);
- unsigned int dest, irq = data->irq;
- struct irte irte;
- int err;
-
- if (!config_enabled(CONFIG_SMP))
- return -EINVAL;
-
- if (!cpumask_intersects(mask, cpu_online_mask))
- return -EINVAL;
-
- if (get_irte(irq, &irte))
- return -EBUSY;
-
- err = assign_irq_vector(irq, cfg, mask);
- if (err)
- return err;
-
- err = apic->cpu_mask_to_apicid_and(cfg->domain, mask, &dest);
- if (err) {
- if (assign_irq_vector(irq, cfg, data->affinity))
- pr_err("Failed to recover vector for irq %d\n", irq);
- return err;
- }
+ struct irq_data *parent = data->parent_data;
+ int ret;
- irte.vector = cfg->vector;
- irte.dest_id = IRTE_DEST(dest);
+ ret = parent->chip->irq_set_affinity(parent, mask, force);
+ if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
+ return ret;
/*
* Atomically updates the IRTE with the new destination, vector
* and flushes the interrupt entry cache.
*/
- modify_irte(irq, &irte);
+ irte->vector = cfg->vector;
+ irte->dest_id = IRTE_DEST(cfg->dest_apicid);
+
+ /* Update the hardware only if the interrupt is in remapped mode. */
+ if (ir_data->irq_2_iommu.mode == IRQ_REMAPPING)
+ modify_irte(&ir_data->irq_2_iommu, irte);
/*
* After this point, all the interrupts will start arriving
* at the new destination. So, time to cleanup the previous
* vector allocation.
*/
- if (cfg->move_in_progress)
- send_cleanup_vector(cfg);
+ send_cleanup_vector(cfg);
- cpumask_copy(data->affinity, mask);
- return 0;
+ return IRQ_SET_MASK_OK_DONE;
}
-static void intel_compose_msi_msg(struct pci_dev *pdev,
- unsigned int irq, unsigned int dest,
- struct msi_msg *msg, u8 hpet_id)
+static void intel_ir_compose_msi_msg(struct irq_data *irq_data,
+ struct msi_msg *msg)
{
- struct irq_cfg *cfg;
- struct irte irte;
- u16 sub_handle = 0;
- int ir_index;
+ struct intel_ir_data *ir_data = irq_data->chip_data;
- cfg = irq_cfg(irq);
+ *msg = ir_data->msi_entry;
+}
- ir_index = map_irq_to_irte_handle(irq, &sub_handle);
- BUG_ON(ir_index == -1);
+static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *info)
+{
+ struct intel_ir_data *ir_data = data->chip_data;
+ struct vcpu_data *vcpu_pi_info = info;
- prepare_irte(&irte, cfg->vector, dest);
+ /* stop posting interrupts, back to remapping mode */
+ if (!vcpu_pi_info) {
+ modify_irte(&ir_data->irq_2_iommu, &ir_data->irte_entry);
+ } else {
+ struct irte irte_pi;
- /* Set source-id of interrupt request */
- if (pdev)
- set_msi_sid(&irte, pdev);
- else
- set_hpet_sid(&irte, hpet_id);
+ /*
+ * We are not caching the posted interrupt entry. We
+ * copy the data from the remapped entry and modify
+ * the fields which are relevant for posted mode. The
+ * cached remapped entry is used for switching back to
+ * remapped mode.
+ */
+ memset(&irte_pi, 0, sizeof(irte_pi));
+ dmar_copy_shared_irte(&irte_pi, &ir_data->irte_entry);
+
+ /* Update the posted mode fields */
+ irte_pi.p_pst = 1;
+ irte_pi.p_urgent = 0;
+ irte_pi.p_vector = vcpu_pi_info->vector;
+ irte_pi.pda_l = (vcpu_pi_info->pi_desc_addr >>
+ (32 - PDA_LOW_BIT)) & ~(-1UL << PDA_LOW_BIT);
+ irte_pi.pda_h = (vcpu_pi_info->pi_desc_addr >> 32) &
+ ~(-1UL << PDA_HIGH_BIT);
+
+ modify_irte(&ir_data->irq_2_iommu, &irte_pi);
+ }
- modify_irte(irq, &irte);
+ return 0;
+}
+
+static struct irq_chip intel_ir_chip = {
+ .irq_ack = ir_ack_apic_edge,
+ .irq_set_affinity = intel_ir_set_affinity,
+ .irq_compose_msi_msg = intel_ir_compose_msi_msg,
+ .irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity,
+};
- msg->address_hi = MSI_ADDR_BASE_HI;
- msg->data = sub_handle;
- msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
- MSI_ADDR_IR_SHV |
- MSI_ADDR_IR_INDEX1(ir_index) |
- MSI_ADDR_IR_INDEX2(ir_index);
+static void intel_irq_remapping_prepare_irte(struct intel_ir_data *data,
+ struct irq_cfg *irq_cfg,
+ struct irq_alloc_info *info,
+ int index, int sub_handle)
+{
+ struct IR_IO_APIC_route_entry *entry;
+ struct irte *irte = &data->irte_entry;
+ struct msi_msg *msg = &data->msi_entry;
+
+ prepare_irte(irte, irq_cfg->vector, irq_cfg->dest_apicid);
+ switch (info->type) {
+ case X86_IRQ_ALLOC_TYPE_IOAPIC:
+ /* Set source-id of interrupt request */
+ set_ioapic_sid(irte, info->ioapic_id);
+ apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: Set IRTE entry (P:%d FPD:%d Dst_Mode:%d Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X Avail:%X Vector:%02X Dest:%08X SID:%04X SQ:%X SVT:%X)\n",
+ info->ioapic_id, irte->present, irte->fpd,
+ irte->dst_mode, irte->redir_hint,
+ irte->trigger_mode, irte->dlvry_mode,
+ irte->avail, irte->vector, irte->dest_id,
+ irte->sid, irte->sq, irte->svt);
+
+ entry = (struct IR_IO_APIC_route_entry *)info->ioapic_entry;
+ info->ioapic_entry = NULL;
+ memset(entry, 0, sizeof(*entry));
+ entry->index2 = (index >> 15) & 0x1;
+ entry->zero = 0;
+ entry->format = 1;
+ entry->index = (index & 0x7fff);
+ /*
+ * IO-APIC RTE will be configured with virtual vector.
+ * irq handler will do the explicit EOI to the io-apic.
+ */
+ entry->vector = info->ioapic_pin;
+ entry->mask = 0; /* enable IRQ */
+ entry->trigger = info->ioapic_trigger;
+ entry->polarity = info->ioapic_polarity;
+ if (info->ioapic_trigger)
+ entry->mask = 1; /* Mask level triggered irqs. */
+ break;
+
+ case X86_IRQ_ALLOC_TYPE_HPET:
+ case X86_IRQ_ALLOC_TYPE_MSI:
+ case X86_IRQ_ALLOC_TYPE_MSIX:
+ if (info->type == X86_IRQ_ALLOC_TYPE_HPET)
+ set_hpet_sid(irte, info->hpet_id);
+ else
+ set_msi_sid(irte, info->msi_dev);
+
+ msg->address_hi = MSI_ADDR_BASE_HI;
+ msg->data = sub_handle;
+ msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
+ MSI_ADDR_IR_SHV |
+ MSI_ADDR_IR_INDEX1(index) |
+ MSI_ADDR_IR_INDEX2(index);
+ break;
+
+ default:
+ BUG_ON(1);
+ break;
+ }
}
-/*
- * Map the PCI dev to the corresponding remapping hardware unit
- * and allocate 'nvec' consecutive interrupt-remapping table entries
- * in it.
- */
-static int intel_msi_alloc_irq(struct pci_dev *dev, int irq, int nvec)
+static void intel_free_irq_resources(struct irq_domain *domain,
+ unsigned int virq, unsigned int nr_irqs)
{
- struct intel_iommu *iommu;
- int index;
+ struct irq_data *irq_data;
+ struct intel_ir_data *data;
+ struct irq_2_iommu *irq_iommu;
+ unsigned long flags;
+ int i;
- down_read(&dmar_global_lock);
- iommu = map_dev_to_ir(dev);
- if (!iommu) {
- printk(KERN_ERR
- "Unable to map PCI %s to iommu\n", pci_name(dev));
- index = -ENOENT;
- } else {
- index = alloc_irte(iommu, irq, nvec);
- if (index < 0) {
- printk(KERN_ERR
- "Unable to allocate %d IRTE for PCI %s\n",
- nvec, pci_name(dev));
- index = -ENOSPC;
+ for (i = 0; i < nr_irqs; i++) {
+ irq_data = irq_domain_get_irq_data(domain, virq + i);
+ if (irq_data && irq_data->chip_data) {
+ data = irq_data->chip_data;
+ irq_iommu = &data->irq_2_iommu;
+ raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
+ clear_entries(irq_iommu);
+ raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+ irq_domain_reset_irq_data(irq_data);
+ kfree(data);
}
}
- up_read(&dmar_global_lock);
-
- return index;
}
-static int intel_msi_setup_irq(struct pci_dev *pdev, unsigned int irq,
- int index, int sub_handle)
+static int intel_irq_remapping_alloc(struct irq_domain *domain,
+ unsigned int virq, unsigned int nr_irqs,
+ void *arg)
{
- struct intel_iommu *iommu;
- int ret = -ENOENT;
+ struct intel_iommu *iommu = domain->host_data;
+ struct irq_alloc_info *info = arg;
+ struct intel_ir_data *data, *ird;
+ struct irq_data *irq_data;
+ struct irq_cfg *irq_cfg;
+ int i, ret, index;
+
+ if (!info || !iommu)
+ return -EINVAL;
+ if (nr_irqs > 1 && info->type != X86_IRQ_ALLOC_TYPE_MSI &&
+ info->type != X86_IRQ_ALLOC_TYPE_MSIX)
+ return -EINVAL;
+
+ /*
+ * With IRQ remapping enabled, don't need contiguous CPU vectors
+ * to support multiple MSI interrupts.
+ */
+ if (info->type == X86_IRQ_ALLOC_TYPE_MSI)
+ info->flags &= ~X86_IRQ_ALLOC_CONTIGUOUS_VECTORS;
+
+ ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
+ if (ret < 0)
+ return ret;
+
+ ret = -ENOMEM;
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ goto out_free_parent;
down_read(&dmar_global_lock);
- iommu = map_dev_to_ir(pdev);
- if (iommu) {
- /*
- * setup the mapping between the irq and the IRTE
- * base index, the sub_handle pointing to the
- * appropriate interrupt remap table entry.
- */
- set_irte_irq(irq, iommu, index, sub_handle);
- ret = 0;
- }
+ index = alloc_irte(iommu, virq, &data->irq_2_iommu, nr_irqs);
up_read(&dmar_global_lock);
+ if (index < 0) {
+ pr_warn("Failed to allocate IRTE\n");
+ kfree(data);
+ goto out_free_parent;
+ }
+ for (i = 0; i < nr_irqs; i++) {
+ irq_data = irq_domain_get_irq_data(domain, virq + i);
+ irq_cfg = irqd_cfg(irq_data);
+ if (!irq_data || !irq_cfg) {
+ ret = -EINVAL;
+ goto out_free_data;
+ }
+
+ if (i > 0) {
+ ird = kzalloc(sizeof(*ird), GFP_KERNEL);
+ if (!ird)
+ goto out_free_data;
+ /* Initialize the common data */
+ ird->irq_2_iommu = data->irq_2_iommu;
+ ird->irq_2_iommu.sub_handle = i;
+ } else {
+ ird = data;
+ }
+
+ irq_data->hwirq = (index << 16) + i;
+ irq_data->chip_data = ird;
+ irq_data->chip = &intel_ir_chip;
+ intel_irq_remapping_prepare_irte(ird, irq_cfg, info, index, i);
+ irq_set_status_flags(virq + i, IRQ_MOVE_PCNTXT);
+ }
+ return 0;
+
+out_free_data:
+ intel_free_irq_resources(domain, virq, i);
+out_free_parent:
+ irq_domain_free_irqs_common(domain, virq, nr_irqs);
return ret;
}
-static int intel_alloc_hpet_msi(unsigned int irq, unsigned int id)
+static void intel_irq_remapping_free(struct irq_domain *domain,
+ unsigned int virq, unsigned int nr_irqs)
{
- int ret = -1;
- struct intel_iommu *iommu;
- int index;
+ intel_free_irq_resources(domain, virq, nr_irqs);
+ irq_domain_free_irqs_common(domain, virq, nr_irqs);
+}
- down_read(&dmar_global_lock);
- iommu = map_hpet_to_ir(id);
- if (iommu) {
- index = alloc_irte(iommu, irq, 1);
- if (index >= 0)
- ret = 0;
- }
- up_read(&dmar_global_lock);
+static void intel_irq_remapping_activate(struct irq_domain *domain,
+ struct irq_data *irq_data)
+{
+ struct intel_ir_data *data = irq_data->chip_data;
- return ret;
+ modify_irte(&data->irq_2_iommu, &data->irte_entry);
}
-struct irq_remap_ops intel_irq_remap_ops = {
- .prepare = intel_prepare_irq_remapping,
- .enable = intel_enable_irq_remapping,
- .disable = disable_irq_remapping,
- .reenable = reenable_irq_remapping,
- .enable_faulting = enable_drhd_fault_handling,
- .setup_ioapic_entry = intel_setup_ioapic_entry,
- .set_affinity = intel_ioapic_set_affinity,
- .free_irq = free_irte,
- .compose_msi_msg = intel_compose_msi_msg,
- .msi_alloc_irq = intel_msi_alloc_irq,
- .msi_setup_irq = intel_msi_setup_irq,
- .alloc_hpet_msi = intel_alloc_hpet_msi,
+static void intel_irq_remapping_deactivate(struct irq_domain *domain,
+ struct irq_data *irq_data)
+{
+ struct intel_ir_data *data = irq_data->chip_data;
+ struct irte entry;
+
+ memset(&entry, 0, sizeof(entry));
+ modify_irte(&data->irq_2_iommu, &entry);
+}
+
+static struct irq_domain_ops intel_ir_domain_ops = {
+ .alloc = intel_irq_remapping_alloc,
+ .free = intel_irq_remapping_free,
+ .activate = intel_irq_remapping_activate,
+ .deactivate = intel_irq_remapping_deactivate,
};
/*
@@ -1280,6 +1355,9 @@ int dmar_ir_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
return -EINVAL;
if (!ecap_ir_support(iommu->ecap))
return 0;
+ if (irq_remapping_cap(IRQ_POSTING_CAP) &&
+ !cap_pi_support(iommu->cap))
+ return -EBUSY;
if (insert) {
if (!iommu->ir_table)
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 390079ee1350..2d9993062ded 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -6,6 +6,7 @@
#include <linux/msi.h>
#include <linux/irq.h>
#include <linux/pci.h>
+#include <linux/irqdomain.h>
#include <asm/hw_irq.h>
#include <asm/irq_remapping.h>
@@ -21,21 +22,11 @@ int irq_remap_broken;
int disable_sourceid_checking;
int no_x2apic_optout;
+int disable_irq_post = 1;
+
static int disable_irq_remap;
static struct irq_remap_ops *remap_ops;
-static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec);
-static int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
- int index, int sub_handle);
-static int set_remapped_irq_affinity(struct irq_data *data,
- const struct cpumask *mask,
- bool force);
-
-static bool irq_remapped(struct irq_cfg *cfg)
-{
- return (cfg->remapped == 1);
-}
-
static void irq_remapping_disable_io_apic(void)
{
/*
@@ -49,117 +40,9 @@ static void irq_remapping_disable_io_apic(void)
disconnect_bsp_APIC(0);
}
-static int do_setup_msi_irqs(struct pci_dev *dev, int nvec)
-{
- int ret, sub_handle, nvec_pow2, index = 0;
- unsigned int irq;
- struct msi_desc *msidesc;
-
- msidesc = list_entry(dev->msi_list.next, struct msi_desc, list);
-
- irq = irq_alloc_hwirqs(nvec, dev_to_node(&dev->dev));
- if (irq == 0)
- return -ENOSPC;
-
- nvec_pow2 = __roundup_pow_of_two(nvec);
- for (sub_handle = 0; sub_handle < nvec; sub_handle++) {
- if (!sub_handle) {
- index = msi_alloc_remapped_irq(dev, irq, nvec_pow2);
- if (index < 0) {
- ret = index;
- goto error;
- }
- } else {
- ret = msi_setup_remapped_irq(dev, irq + sub_handle,
- index, sub_handle);
- if (ret < 0)
- goto error;
- }
- ret = setup_msi_irq(dev, msidesc, irq, sub_handle);
- if (ret < 0)
- goto error;
- }
- return 0;
-
-error:
- irq_free_hwirqs(irq, nvec);
-
- /*
- * Restore altered MSI descriptor fields and prevent just destroyed
- * IRQs from tearing down again in default_teardown_msi_irqs()
- */
- msidesc->irq = 0;
-
- return ret;
-}
-
-static int do_setup_msix_irqs(struct pci_dev *dev, int nvec)
-{
- int node, ret, sub_handle, index = 0;
- struct msi_desc *msidesc;
- unsigned int irq;
-
- node = dev_to_node(&dev->dev);
- sub_handle = 0;
-
- list_for_each_entry(msidesc, &dev->msi_list, list) {
-
- irq = irq_alloc_hwirq(node);
- if (irq == 0)
- return -1;
-
- if (sub_handle == 0)
- ret = index = msi_alloc_remapped_irq(dev, irq, nvec);
- else
- ret = msi_setup_remapped_irq(dev, irq, index, sub_handle);
-
- if (ret < 0)
- goto error;
-
- ret = setup_msi_irq(dev, msidesc, irq, 0);
- if (ret < 0)
- goto error;
-
- sub_handle += 1;
- irq += 1;
- }
-
- return 0;
-
-error:
- irq_free_hwirq(irq);
- return ret;
-}
-
-static int irq_remapping_setup_msi_irqs(struct pci_dev *dev,
- int nvec, int type)
-{
- if (type == PCI_CAP_ID_MSI)
- return do_setup_msi_irqs(dev, nvec);
- else
- return do_setup_msix_irqs(dev, nvec);
-}
-
-static void eoi_ioapic_pin_remapped(int apic, int pin, int vector)
-{
- /*
- * Intr-remapping uses pin number as the virtual vector
- * in the RTE. Actual vector is programmed in
- * intr-remapping table entry. Hence for the io-apic
- * EOI we use the pin number.
- */
- io_apic_eoi(apic, pin);
-}
-
static void __init irq_remapping_modify_x86_ops(void)
{
x86_io_apic_ops.disable = irq_remapping_disable_io_apic;
- x86_io_apic_ops.set_affinity = set_remapped_irq_affinity;
- x86_io_apic_ops.setup_entry = setup_ioapic_remapped_entry;
- x86_io_apic_ops.eoi_ioapic_pin = eoi_ioapic_pin_remapped;
- x86_msi.setup_msi_irqs = irq_remapping_setup_msi_irqs;
- x86_msi.setup_hpet_msi = setup_hpet_msi_remapped;
- x86_msi.compose_msi_msg = compose_remapped_msi_msg;
}
static __init int setup_nointremap(char *str)
@@ -198,6 +81,15 @@ void set_irq_remapping_broken(void)
irq_remap_broken = 1;
}
+bool irq_remapping_cap(enum irq_remap_cap cap)
+{
+ if (!remap_ops || disable_irq_post)
+ return 0;
+
+ return (remap_ops->capability & (1 << cap));
+}
+EXPORT_SYMBOL_GPL(irq_remapping_cap);
+
int __init irq_remapping_prepare(void)
{
if (disable_irq_remap)
@@ -254,113 +146,48 @@ int __init irq_remap_enable_fault_handling(void)
return remap_ops->enable_faulting();
}
-int setup_ioapic_remapped_entry(int irq,
- struct IO_APIC_route_entry *entry,
- unsigned int destination, int vector,
- struct io_apic_irq_attr *attr)
-{
- if (!remap_ops->setup_ioapic_entry)
- return -ENODEV;
-
- return remap_ops->setup_ioapic_entry(irq, entry, destination,
- vector, attr);
-}
-
-static int set_remapped_irq_affinity(struct irq_data *data,
- const struct cpumask *mask, bool force)
-{
- if (!config_enabled(CONFIG_SMP) || !remap_ops->set_affinity)
- return 0;
-
- return remap_ops->set_affinity(data, mask, force);
-}
-
-void free_remapped_irq(int irq)
-{
- struct irq_cfg *cfg = irq_cfg(irq);
-
- if (irq_remapped(cfg) && remap_ops->free_irq)
- remap_ops->free_irq(irq);
-}
-
-void compose_remapped_msi_msg(struct pci_dev *pdev,
- unsigned int irq, unsigned int dest,
- struct msi_msg *msg, u8 hpet_id)
-{
- struct irq_cfg *cfg = irq_cfg(irq);
-
- if (!irq_remapped(cfg))
- native_compose_msi_msg(pdev, irq, dest, msg, hpet_id);
- else if (remap_ops->compose_msi_msg)
- remap_ops->compose_msi_msg(pdev, irq, dest, msg, hpet_id);
-}
-
-static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec)
-{
- if (!remap_ops->msi_alloc_irq)
- return -ENODEV;
-
- return remap_ops->msi_alloc_irq(pdev, irq, nvec);
-}
-
-static int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
- int index, int sub_handle)
-{
- if (!remap_ops->msi_setup_irq)
- return -ENODEV;
-
- return remap_ops->msi_setup_irq(pdev, irq, index, sub_handle);
-}
-
-int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
-{
- int ret;
-
- if (!remap_ops->alloc_hpet_msi)
- return -ENODEV;
-
- ret = remap_ops->alloc_hpet_msi(irq, id);
- if (ret)
- return -EINVAL;
-
- return default_setup_hpet_msi(irq, id);
-}
-
void panic_if_irq_remap(const char *msg)
{
if (irq_remapping_enabled)
panic(msg);
}
-static void ir_ack_apic_edge(struct irq_data *data)
+void ir_ack_apic_edge(struct irq_data *data)
{
ack_APIC_irq();
}
-static void ir_ack_apic_level(struct irq_data *data)
+/**
+ * irq_remapping_get_ir_irq_domain - Get the irqdomain associated with the IOMMU
+ * device serving request @info
+ * @info: interrupt allocation information, used to identify the IOMMU device
+ *
+ * It's used to get parent irqdomain for HPET and IOAPIC irqdomains.
+ * Returns pointer to IRQ domain, or NULL on failure.
+ */
+struct irq_domain *
+irq_remapping_get_ir_irq_domain(struct irq_alloc_info *info)
{
- ack_APIC_irq();
- eoi_ioapic_irq(data->irq, irqd_cfg(data));
-}
+ if (!remap_ops || !remap_ops->get_ir_irq_domain)
+ return NULL;
-static void ir_print_prefix(struct irq_data *data, struct seq_file *p)
-{
- seq_printf(p, " IR-%s", data->chip->name);
+ return remap_ops->get_ir_irq_domain(info);
}
-void irq_remap_modify_chip_defaults(struct irq_chip *chip)
+/**
+ * irq_remapping_get_irq_domain - Get the irqdomain serving the request @info
+ * @info: interrupt allocation information, used to identify the IOMMU device
+ *
+ * There will be one PCI MSI/MSIX irqdomain associated with each interrupt
+ * remapping device, so this interface is used to retrieve the PCI MSI/MSIX
+ * irqdomain serving request @info.
+ * Returns pointer to IRQ domain, or NULL on failure.
+ */
+struct irq_domain *
+irq_remapping_get_irq_domain(struct irq_alloc_info *info)
{
- chip->irq_print_chip = ir_print_prefix;
- chip->irq_ack = ir_ack_apic_edge;
- chip->irq_eoi = ir_ack_apic_level;
- chip->irq_set_affinity = x86_io_apic_ops.set_affinity;
-}
+ if (!remap_ops || !remap_ops->get_irq_domain)
+ return NULL;
-bool setup_remapped_irq(int irq, struct irq_cfg *cfg, struct irq_chip *chip)
-{
- if (!irq_remapped(cfg))
- return false;
- irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
- irq_remap_modify_chip_defaults(chip);
- return true;
+ return remap_ops->get_irq_domain(info);
}
diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h
index 7c70cc29ffe6..039c7af7b190 100644
--- a/drivers/iommu/irq_remapping.h
+++ b/drivers/iommu/irq_remapping.h
@@ -24,19 +24,22 @@
#ifdef CONFIG_IRQ_REMAP
-struct IO_APIC_route_entry;
-struct io_apic_irq_attr;
struct irq_data;
-struct cpumask;
-struct pci_dev;
struct msi_msg;
+struct irq_domain;
+struct irq_alloc_info;
extern int irq_remap_broken;
extern int disable_sourceid_checking;
extern int no_x2apic_optout;
extern int irq_remapping_enabled;
+extern int disable_irq_post;
+
struct irq_remap_ops {
+ /* The supported capabilities */
+ int capability;
+
/* Initializes hardware and makes it ready for remapping interrupts */
int (*prepare)(void);
@@ -52,40 +55,23 @@ struct irq_remap_ops {
/* Enable fault handling */
int (*enable_faulting)(void);
- /* IO-APIC setup routine */
- int (*setup_ioapic_entry)(int irq, struct IO_APIC_route_entry *,
- unsigned int, int,
- struct io_apic_irq_attr *);
-
- /* Set the CPU affinity of a remapped interrupt */
- int (*set_affinity)(struct irq_data *data, const struct cpumask *mask,
- bool force);
-
- /* Free an IRQ */
- int (*free_irq)(int);
+ /* Get the irqdomain associated the IOMMU device */
+ struct irq_domain *(*get_ir_irq_domain)(struct irq_alloc_info *);
- /* Create MSI msg to use for interrupt remapping */
- void (*compose_msi_msg)(struct pci_dev *,
- unsigned int, unsigned int,
- struct msi_msg *, u8);
-
- /* Allocate remapping resources for MSI */
- int (*msi_alloc_irq)(struct pci_dev *, int, int);
-
- /* Setup the remapped MSI irq */
- int (*msi_setup_irq)(struct pci_dev *, unsigned int, int, int);
-
- /* Setup interrupt remapping for an HPET MSI */
- int (*alloc_hpet_msi)(unsigned int, unsigned int);
+ /* Get the MSI irqdomain associated with the IOMMU device */
+ struct irq_domain *(*get_irq_domain)(struct irq_alloc_info *);
};
extern struct irq_remap_ops intel_irq_remap_ops;
extern struct irq_remap_ops amd_iommu_irq_ops;
+extern void ir_ack_apic_edge(struct irq_data *data);
+
#else /* CONFIG_IRQ_REMAP */
#define irq_remapping_enabled 0
#define irq_remap_broken 0
+#define disable_irq_post 1
#endif /* CONFIG_IRQ_REMAP */