summaryrefslogtreecommitdiff
path: root/drivers/iommu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu')
-rw-r--r--drivers/iommu/Kconfig8
-rw-r--r--drivers/iommu/amd/amd_iommu_types.h1
-rw-r--r--drivers/iommu/amd/iommu.c44
-rw-r--r--drivers/iommu/dma-iommu.c3
-rw-r--r--drivers/iommu/hyperv-iommu.c11
-rw-r--r--drivers/iommu/intel/dmar.c1
-rw-r--r--drivers/iommu/intel/iommu.c73
-rw-r--r--drivers/iommu/intel/iommu.h5
-rw-r--r--drivers/iommu/intel/irq_remapping.c52
-rw-r--r--drivers/iommu/intel/svm.c19
-rw-r--r--drivers/iommu/iommufd/pages.c15
-rw-r--r--drivers/iommu/of_iommu.c1
12 files changed, 159 insertions, 74 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 319966cde5cf..79707685d54a 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -390,7 +390,7 @@ config ARM_SMMU_V3
depends on ARM64
select IOMMU_API
select IOMMU_IO_PGTABLE_LPAE
- select GENERIC_MSI_IRQ_DOMAIN
+ select GENERIC_MSI_IRQ
help
Support for implementations of the ARM System MMU architecture
version 3 providing translation support to a PCIe root complex.
@@ -475,13 +475,13 @@ config QCOM_IOMMU
Support for IOMMU on certain Qualcomm SoCs.
config HYPERV_IOMMU
- bool "Hyper-V x2APIC IRQ Handling"
+ bool "Hyper-V IRQ Handling"
depends on HYPERV && X86
select IOMMU_API
default HYPERV
help
- Stub IOMMU driver to handle IRQs as to allow Hyper-V Linux
- guests to run with x2APIC mode enabled.
+ Stub IOMMU driver to handle IRQs to support Hyper-V Linux
+ guest and root partitions.
config VIRTIO_IOMMU
tristate "Virtio IOMMU driver"
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 1d0a70c85333..3d684190b4d5 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -734,7 +734,6 @@ struct amd_iommu {
u8 max_counters;
#ifdef CONFIG_IRQ_REMAP
struct irq_domain *ir_domain;
- struct irq_domain *msi_domain;
struct amd_irte_ops *irte_ops;
#endif
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 240c535e317c..8d37d9087fab 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -812,10 +812,10 @@ static void
amd_iommu_set_pci_msi_domain(struct device *dev, struct amd_iommu *iommu)
{
if (!irq_remapping_enabled || !dev_is_pci(dev) ||
- pci_dev_has_special_msi_domain(to_pci_dev(dev)))
+ !pci_dev_has_default_msi_parent_domain(to_pci_dev(dev)))
return;
- dev_set_msi_domain(dev, iommu->msi_domain);
+ dev_set_msi_domain(dev, iommu->ir_domain);
}
#else /* CONFIG_IRQ_REMAP */
@@ -3288,17 +3288,9 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
if (!info)
return -EINVAL;
- if (nr_irqs > 1 && info->type != X86_IRQ_ALLOC_TYPE_PCI_MSI &&
- info->type != X86_IRQ_ALLOC_TYPE_PCI_MSIX)
+ if (nr_irqs > 1 && info->type != X86_IRQ_ALLOC_TYPE_PCI_MSI)
return -EINVAL;
- /*
- * With IRQ remapping enabled, don't need contiguous CPU vectors
- * to support multiple MSI interrupts.
- */
- if (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI)
- info->flags &= ~X86_IRQ_ALLOC_CONTIGUOUS_VECTORS;
-
sbdf = get_devid(info);
if (sbdf < 0)
return -EINVAL;
@@ -3650,6 +3642,21 @@ static struct irq_chip amd_ir_chip = {
.irq_compose_msi_msg = ir_compose_msi_msg,
};
+static const struct msi_parent_ops amdvi_msi_parent_ops = {
+ .supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED |
+ MSI_FLAG_MULTI_PCI_MSI |
+ MSI_FLAG_PCI_IMS,
+ .prefix = "IR-",
+ .init_dev_msi_info = msi_parent_init_dev_msi_info,
+};
+
+static const struct msi_parent_ops virt_amdvi_msi_parent_ops = {
+ .supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED |
+ MSI_FLAG_MULTI_PCI_MSI,
+ .prefix = "vIR-",
+ .init_dev_msi_info = msi_parent_init_dev_msi_info,
+};
+
int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
{
struct fwnode_handle *fn;
@@ -3657,16 +3664,21 @@ int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
fn = irq_domain_alloc_named_id_fwnode("AMD-IR", iommu->index);
if (!fn)
return -ENOMEM;
- iommu->ir_domain = irq_domain_create_tree(fn, &amd_ir_domain_ops, iommu);
+ iommu->ir_domain = irq_domain_create_hierarchy(arch_get_ir_parent_domain(), 0, 0,
+ fn, &amd_ir_domain_ops, iommu);
if (!iommu->ir_domain) {
irq_domain_free_fwnode(fn);
return -ENOMEM;
}
- iommu->ir_domain->parent = arch_get_ir_parent_domain();
- iommu->msi_domain = arch_create_remap_msi_irq_domain(iommu->ir_domain,
- "AMD-IR-MSI",
- iommu->index);
+ irq_domain_update_bus_token(iommu->ir_domain, DOMAIN_BUS_AMDVI);
+ iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT;
+
+ if (amd_iommu_np_cache)
+ iommu->ir_domain->msi_parent_ops = &virt_amdvi_msi_parent_ops;
+ else
+ iommu->ir_domain->msi_parent_ops = &amdvi_msi_parent_ops;
+
return 0;
}
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 9297b741f5e8..f798c44e0903 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -744,9 +744,6 @@ static struct page **__iommu_dma_alloc_pages(struct device *dev,
/* IOMMU can map any pages, so himem can also be used here */
gfp |= __GFP_NOWARN | __GFP_HIGHMEM;
- /* It makes no sense to muck about with huge pages */
- gfp &= ~__GFP_COMP;
-
while (count) {
struct page *page = NULL;
unsigned int order_size;
diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c
index e190bb8c225c..8302db7f783e 100644
--- a/drivers/iommu/hyperv-iommu.c
+++ b/drivers/iommu/hyperv-iommu.c
@@ -122,9 +122,12 @@ static int __init hyperv_prepare_irq_remapping(void)
const char *name;
const struct irq_domain_ops *ops;
+ /*
+ * For a Hyper-V root partition, ms_hyperv_msi_ext_dest_id()
+ * will always return false.
+ */
if (!hypervisor_is_type(X86_HYPER_MS_HYPERV) ||
- x86_init.hyper.msi_ext_dest_id() ||
- !x2apic_supported())
+ x86_init.hyper.msi_ext_dest_id())
return -ENODEV;
if (hv_root_partition) {
@@ -170,7 +173,9 @@ static int __init hyperv_prepare_irq_remapping(void)
static int __init hyperv_enable_irq_remapping(void)
{
- return IRQ_REMAP_X2APIC_MODE;
+ if (x2apic_supported())
+ return IRQ_REMAP_X2APIC_MODE;
+ return IRQ_REMAP_XAPIC_MODE;
}
struct irq_remap_ops hyperv_irq_remap_ops = {
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index 3528058d253e..b00a0ceb2d13 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -820,6 +820,7 @@ int __init dmar_dev_scope_init(void)
info = dmar_alloc_pci_notify_info(dev,
BUS_NOTIFY_ADD_DEVICE);
if (!info) {
+ pci_dev_put(dev);
return dmar_dev_scope_status;
} else {
dmar_pci_bus_add_dev(info);
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index ebe44a07c4b0..bef8e8f7ca25 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1396,6 +1396,24 @@ static void domain_update_iotlb(struct dmar_domain *domain)
spin_unlock_irqrestore(&domain->lock, flags);
}
+/*
+ * The extra devTLB flush quirk impacts those QAT devices with PCI device
+ * IDs ranging from 0x4940 to 0x4943. It is exempted from risky_device()
+ * check because it applies only to the built-in QAT devices and it doesn't
+ * grant additional privileges.
+ */
+#define BUGGY_QAT_DEVID_MASK 0x4940
+static bool dev_needs_extra_dtlb_flush(struct pci_dev *pdev)
+{
+ if (pdev->vendor != PCI_VENDOR_ID_INTEL)
+ return false;
+
+ if ((pdev->device & 0xfffc) != BUGGY_QAT_DEVID_MASK)
+ return false;
+
+ return true;
+}
+
static void iommu_enable_pci_caps(struct device_domain_info *info)
{
struct pci_dev *pdev;
@@ -1478,6 +1496,7 @@ static void __iommu_flush_dev_iotlb(struct device_domain_info *info,
qdep = info->ats_qdep;
qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
qdep, addr, mask);
+ quirk_extra_dev_tlb_flush(info, addr, mask, PASID_RID2PASID, qdep);
}
static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
@@ -3854,8 +3873,10 @@ static inline bool has_external_pci(void)
struct pci_dev *pdev = NULL;
for_each_pci_dev(pdev)
- if (pdev->external_facing)
+ if (pdev->external_facing) {
+ pci_dev_put(pdev);
return true;
+ }
return false;
}
@@ -4494,9 +4515,10 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
if (dev_is_pci(dev)) {
if (ecap_dev_iotlb_support(iommu->ecap) &&
pci_ats_supported(pdev) &&
- dmar_ats_supported(pdev, iommu))
+ dmar_ats_supported(pdev, iommu)) {
info->ats_supported = 1;
-
+ info->dtlb_extra_inval = dev_needs_extra_dtlb_flush(pdev);
+ }
if (sm_supported(iommu)) {
if (pasid_supported(iommu)) {
int features = pci_pasid_features(pdev);
@@ -4955,3 +4977,48 @@ static void __init check_tylersburg_isoch(void)
pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
vtisochctrl);
}
+
+/*
+ * Here we deal with a device TLB defect where device may inadvertently issue ATS
+ * invalidation completion before posted writes initiated with translated address
+ * that utilized translations matching the invalidation address range, violating
+ * the invalidation completion ordering.
+ * Therefore, any use cases that cannot guarantee DMA is stopped before unmap is
+ * vulnerable to this defect. In other words, any dTLB invalidation initiated not
+ * under the control of the trusted/privileged host device driver must use this
+ * quirk.
+ * Device TLBs are invalidated under the following six conditions:
+ * 1. Device driver does DMA API unmap IOVA
+ * 2. Device driver unbind a PASID from a process, sva_unbind_device()
+ * 3. PASID is torn down, after PASID cache is flushed. e.g. process
+ * exit_mmap() due to crash
+ * 4. Under SVA usage, called by mmu_notifier.invalidate_range() where
+ * VM has to free pages that were unmapped
+ * 5. Userspace driver unmaps a DMA buffer
+ * 6. Cache invalidation in vSVA usage (upcoming)
+ *
+ * For #1 and #2, device drivers are responsible for stopping DMA traffic
+ * before unmap/unbind. For #3, iommu driver gets mmu_notifier to
+ * invalidate TLB the same way as normal user unmap which will use this quirk.
+ * The dTLB invalidation after PASID cache flush does not need this quirk.
+ *
+ * As a reminder, #6 will *NEED* this quirk as we enable nested translation.
+ */
+void quirk_extra_dev_tlb_flush(struct device_domain_info *info,
+ unsigned long address, unsigned long mask,
+ u32 pasid, u16 qdep)
+{
+ u16 sid;
+
+ if (likely(!info->dtlb_extra_inval))
+ return;
+
+ sid = PCI_DEVID(info->bus, info->devfn);
+ if (pasid == PASID_RID2PASID) {
+ qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
+ qdep, address, mask);
+ } else {
+ qi_flush_dev_iotlb_pasid(info->iommu, sid, info->pfsid,
+ pasid, qdep, address, mask);
+ }
+}
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 251a609fdce3..f83ad8ddcf4d 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -598,7 +598,6 @@ struct intel_iommu {
#ifdef CONFIG_IRQ_REMAP
struct ir_table *ir_table; /* Interrupt remapping info */
struct irq_domain *ir_domain;
- struct irq_domain *ir_msi_domain;
#endif
struct iommu_device iommu; /* IOMMU core code handle */
int node;
@@ -621,6 +620,7 @@ struct device_domain_info {
u8 pri_enabled:1;
u8 ats_supported:1;
u8 ats_enabled:1;
+ u8 dtlb_extra_inval:1; /* Quirk for devices need extra flush */
u8 ats_qdep;
struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
struct intel_iommu *iommu; /* IOMMU used by this device */
@@ -726,6 +726,9 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
u32 pasid, u16 qdep, u64 addr,
unsigned int size_order);
+void quirk_extra_dev_tlb_flush(struct device_domain_info *info,
+ unsigned long address, unsigned long pages,
+ u32 pasid, u16 qdep);
void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did, u64 granu,
u32 pasid);
diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
index 5962bb5027d0..a723f53ba472 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -82,6 +82,7 @@ static const struct irq_domain_ops intel_ir_domain_ops;
static void iommu_disable_irq_remapping(struct intel_iommu *iommu);
static int __init parse_ioapics_under_ir(void);
+static const struct msi_parent_ops dmar_msi_parent_ops, virt_dmar_msi_parent_ops;
static bool ir_pre_enabled(struct intel_iommu *iommu)
{
@@ -230,7 +231,7 @@ static struct irq_domain *map_dev_to_ir(struct pci_dev *dev)
{
struct dmar_drhd_unit *drhd = dmar_find_matched_drhd_unit(dev);
- return drhd ? drhd->iommu->ir_msi_domain : NULL;
+ return drhd ? drhd->iommu->ir_domain : NULL;
}
static int clear_entries(struct irq_2_iommu *irq_iommu)
@@ -573,10 +574,14 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
pr_err("IR%d: failed to allocate irqdomain\n", iommu->seq_id);
goto out_free_fwnode;
}
- iommu->ir_msi_domain =
- arch_create_remap_msi_irq_domain(iommu->ir_domain,
- "INTEL-IR-MSI",
- iommu->seq_id);
+
+ irq_domain_update_bus_token(iommu->ir_domain, DOMAIN_BUS_DMAR);
+ iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT;
+
+ if (cap_caching_mode(iommu->cap))
+ iommu->ir_domain->msi_parent_ops = &virt_dmar_msi_parent_ops;
+ else
+ iommu->ir_domain->msi_parent_ops = &dmar_msi_parent_ops;
ir_table->base = page_address(pages);
ir_table->bitmap = bitmap;
@@ -620,9 +625,6 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
return 0;
out_free_ir_domain:
- if (iommu->ir_msi_domain)
- irq_domain_remove(iommu->ir_msi_domain);
- iommu->ir_msi_domain = NULL;
irq_domain_remove(iommu->ir_domain);
iommu->ir_domain = NULL;
out_free_fwnode:
@@ -644,13 +646,6 @@ static void intel_teardown_irq_remapping(struct intel_iommu *iommu)
struct fwnode_handle *fn;
if (iommu && iommu->ir_table) {
- if (iommu->ir_msi_domain) {
- fn = iommu->ir_msi_domain->fwnode;
-
- irq_domain_remove(iommu->ir_msi_domain);
- irq_domain_free_fwnode(fn);
- iommu->ir_msi_domain = NULL;
- }
if (iommu->ir_domain) {
fn = iommu->ir_domain->fwnode;
@@ -1107,7 +1102,7 @@ error:
*/
void intel_irq_remap_add_device(struct dmar_pci_notify_info *info)
{
- if (!irq_remapping_enabled || pci_dev_has_special_msi_domain(info->dev))
+ if (!irq_remapping_enabled || !pci_dev_has_default_msi_parent_domain(info->dev))
return;
dev_set_msi_domain(&info->dev->dev, map_dev_to_ir(info->dev));
@@ -1334,17 +1329,9 @@ static int intel_irq_remapping_alloc(struct irq_domain *domain,
if (!info || !iommu)
return -EINVAL;
- if (nr_irqs > 1 && info->type != X86_IRQ_ALLOC_TYPE_PCI_MSI &&
- info->type != X86_IRQ_ALLOC_TYPE_PCI_MSIX)
+ if (nr_irqs > 1 && info->type != X86_IRQ_ALLOC_TYPE_PCI_MSI)
return -EINVAL;
- /*
- * With IRQ remapping enabled, don't need contiguous CPU vectors
- * to support multiple MSI interrupts.
- */
- if (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI)
- info->flags &= ~X86_IRQ_ALLOC_CONTIGUOUS_VECTORS;
-
ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
if (ret < 0)
return ret;
@@ -1445,6 +1432,21 @@ static const struct irq_domain_ops intel_ir_domain_ops = {
.deactivate = intel_irq_remapping_deactivate,
};
+static const struct msi_parent_ops dmar_msi_parent_ops = {
+ .supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED |
+ MSI_FLAG_MULTI_PCI_MSI |
+ MSI_FLAG_PCI_IMS,
+ .prefix = "IR-",
+ .init_dev_msi_info = msi_parent_init_dev_msi_info,
+};
+
+static const struct msi_parent_ops virt_dmar_msi_parent_ops = {
+ .supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED |
+ MSI_FLAG_MULTI_PCI_MSI,
+ .prefix = "vIR-",
+ .init_dev_msi_info = msi_parent_init_dev_msi_info,
+};
+
/*
* Support of Interrupt Remapping Unit Hotplug
*/
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index f32de15da61a..c76b66263467 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -184,10 +184,13 @@ static void __flush_svm_range_dev(struct intel_svm *svm,
return;
qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih);
- if (info->ats_enabled)
+ if (info->ats_enabled) {
qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid,
svm->pasid, sdev->qdep, address,
order_base_2(pages));
+ quirk_extra_dev_tlb_flush(info, address, order_base_2(pages),
+ svm->pasid, sdev->qdep);
+ }
}
static void intel_flush_svm_range_dev(struct intel_svm *svm,
@@ -728,12 +731,16 @@ bad_req:
* If prq is to be handled outside iommu driver via receiver of
* the fault notifiers, we skip the page response here.
*/
- if (!pdev || intel_svm_prq_report(iommu, &pdev->dev, req))
- handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
+ if (!pdev)
+ goto bad_req;
- trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1,
- req->priv_data[0], req->priv_data[1],
- iommu->prq_seq_number++);
+ if (intel_svm_prq_report(iommu, &pdev->dev, req))
+ handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
+ else
+ trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1,
+ req->priv_data[0], req->priv_data[1],
+ iommu->prq_seq_number++);
+ pci_dev_put(pdev);
prq_advance:
head = (head + sizeof(*req)) & PRQ_RING_MASK;
}
diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c
index c77177229648..1e1d3509efae 100644
--- a/drivers/iommu/iommufd/pages.c
+++ b/drivers/iommu/iommufd/pages.c
@@ -711,12 +711,9 @@ static void pfn_reader_user_init(struct pfn_reader_user *user,
user->upages_end = 0;
user->locked = -1;
- if (pages->writable) {
- user->gup_flags = FOLL_LONGTERM | FOLL_WRITE;
- } else {
- /* Still need to break COWs on read */
- user->gup_flags = FOLL_LONGTERM | FOLL_FORCE | FOLL_WRITE;
- }
+ user->gup_flags = FOLL_LONGTERM;
+ if (pages->writable)
+ user->gup_flags |= FOLL_WRITE;
}
static void pfn_reader_user_destroy(struct pfn_reader_user *user,
@@ -786,13 +783,9 @@ static int pfn_reader_user_pin(struct pfn_reader_user *user,
mmap_read_lock(pages->source_mm);
user->locked = 1;
}
- /*
- * FIXME: last NULL can be &pfns->locked once the GUP patch
- * is merged.
- */
rc = pin_user_pages_remote(pages->source_mm, uptr, npages,
user->gup_flags, user->upages, NULL,
- NULL);
+ &user->locked);
}
if (rc <= 0) {
if (WARN_ON(!rc))
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 5696314ae69e..00d98f08732f 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -9,7 +9,6 @@
#include <linux/iommu.h>
#include <linux/limits.h>
#include <linux/module.h>
-#include <linux/msi.h>
#include <linux/of.h>
#include <linux/of_iommu.h>
#include <linux/of_pci.h>