summaryrefslogtreecommitdiff
path: root/drivers/iommu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu')
-rw-r--r--drivers/iommu/Kconfig19
-rw-r--r--drivers/iommu/amd_iommu.c12
-rw-r--r--drivers/iommu/amd_iommu_init.c79
-rw-r--r--drivers/iommu/amd_iommu_types.h7
-rw-r--r--drivers/iommu/arm-smmu-v3.c11
-rw-r--r--drivers/iommu/arm-smmu.c11
-rw-r--r--drivers/iommu/dma-iommu.c3
-rw-r--r--drivers/iommu/dmar.c44
-rw-r--r--drivers/iommu/intel-iommu-debugfs.c75
-rw-r--r--drivers/iommu/intel-iommu.c391
-rw-r--r--drivers/iommu/intel-pasid.c97
-rw-r--r--drivers/iommu/intel-pasid.h6
-rw-r--r--drivers/iommu/intel-svm.c171
-rw-r--r--drivers/iommu/iommu.c20
-rw-r--r--drivers/iommu/iova.c2
-rw-r--r--drivers/iommu/virtio-iommu.c14
16 files changed, 633 insertions, 329 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index d66ace717cf4..d2fade984999 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -82,7 +82,7 @@ config IOMMU_DEBUGFS
config IOMMU_DEFAULT_PASSTHROUGH
bool "IOMMU passthrough by default"
depends on IOMMU_API
- help
+ help
Enable passthrough by default, removing the need to pass in
iommu.passthrough=on or iommu=pt through command line. If this
is enabled, you can still disable with iommu.passthrough=off
@@ -91,8 +91,8 @@ config IOMMU_DEFAULT_PASSTHROUGH
If unsure, say N here.
config OF_IOMMU
- def_bool y
- depends on OF && IOMMU_API
+ def_bool y
+ depends on OF && IOMMU_API
# IOMMU-agnostic DMA-mapping layer
config IOMMU_DMA
@@ -214,6 +214,7 @@ config INTEL_IOMMU_SVM
select PCI_PASID
select PCI_PRI
select MMU_NOTIFIER
+ select IOASID
help
Shared Virtual Memory (SVM) provides a facility for devices
to access DMA resources through process address space by
@@ -248,6 +249,18 @@ config INTEL_IOMMU_FLOPPY_WA
workaround will setup a 1:1 mapping for the first
16MiB to make floppy (an ISA device) work.
+config INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON
+ bool "Enable Intel IOMMU scalable mode by default"
+ depends on INTEL_IOMMU
+ help
+ Selecting this option will enable by default the scalable mode if
+ hardware presents the capability. The scalable mode is defined in
+ VT-d 3.0. The scalable mode capability could be checked by reading
+ /sys/devices/virtual/iommu/dmar*/intel-iommu/ecap. If this option
+ is not selected, scalable mode support could also be enabled by
+ passing intel_iommu=sm_on to the kernel. If not sure, please use
+ the default value.
+
config IRQ_REMAP
bool "Support for Interrupt Remapping"
depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index bd25674ee4db..60538e9df8a6 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2297,7 +2297,6 @@ int __init amd_iommu_init_api(void)
int __init amd_iommu_init_dma_ops(void)
{
swiotlb = (iommu_default_passthrough() || sme_me_mask) ? 1 : 0;
- iommu_detected = 1;
if (amd_iommu_unmap_flush)
pr_info("IO/TLB flush on unmap enabled\n");
@@ -2641,15 +2640,6 @@ static void amd_iommu_get_resv_regions(struct device *dev,
list_add_tail(&region->list, head);
}
-static void amd_iommu_put_resv_regions(struct device *dev,
- struct list_head *head)
-{
- struct iommu_resv_region *entry, *next;
-
- list_for_each_entry_safe(entry, next, head, list)
- kfree(entry);
-}
-
static bool amd_iommu_is_attach_deferred(struct iommu_domain *domain,
struct device *dev)
{
@@ -2688,7 +2678,7 @@ const struct iommu_ops amd_iommu_ops = {
.device_group = amd_iommu_device_group,
.domain_get_attr = amd_iommu_domain_get_attr,
.get_resv_regions = amd_iommu_get_resv_regions,
- .put_resv_regions = amd_iommu_put_resv_regions,
+ .put_resv_regions = generic_iommu_put_resv_regions,
.is_attach_deferred = amd_iommu_is_attach_deferred,
.pgsize_bitmap = AMD_IOMMU_PGSIZES,
.flush_iotlb_all = amd_iommu_flush_iotlb_all,
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 568c52317757..587ed1323c86 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -71,6 +71,8 @@
#define IVHD_FLAG_ISOC_EN_MASK 0x08
#define IVMD_FLAG_EXCL_RANGE 0x08
+#define IVMD_FLAG_IW 0x04
+#define IVMD_FLAG_IR 0x02
#define IVMD_FLAG_UNITY_MAP 0x01
#define ACPI_DEVFLAG_INITPASS 0x01
@@ -147,7 +149,7 @@ bool amd_iommu_dump;
bool amd_iommu_irq_remap __read_mostly;
int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
-static int amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE;
+static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
static bool amd_iommu_detected;
static bool __initdata amd_iommu_disabled;
@@ -714,7 +716,7 @@ static void iommu_enable_ppr_log(struct amd_iommu *iommu)
writel(0x00, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
- iommu_feature_enable(iommu, CONTROL_PPFLOG_EN);
+ iommu_feature_enable(iommu, CONTROL_PPRLOG_EN);
iommu_feature_enable(iommu, CONTROL_PPR_EN);
}
@@ -1116,21 +1118,17 @@ static int __init add_early_maps(void)
*/
static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
{
- struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
-
if (!(m->flags & IVMD_FLAG_EXCL_RANGE))
return;
- if (iommu) {
- /*
- * We only can configure exclusion ranges per IOMMU, not
- * per device. But we can enable the exclusion range per
- * device. This is done here
- */
- set_dev_entry_bit(devid, DEV_ENTRY_EX);
- iommu->exclusion_start = m->range_start;
- iommu->exclusion_length = m->range_length;
- }
+ /*
+ * Treat per-device exclusion ranges as r/w unity-mapped regions
+ * since some buggy BIOSes might lead to the overwritten exclusion
+ * range (exclusion_start and exclusion_length members). This
+ * happens when there are multiple exclusion ranges (IVMD entries)
+ * defined in ACPI table.
+ */
+ m->flags = (IVMD_FLAG_IW | IVMD_FLAG_IR | IVMD_FLAG_UNITY_MAP);
}
/*
@@ -1523,8 +1521,6 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
if (((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0))
amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
- if (((h->efr_attr & (0x1 << IOMMU_FEAT_XTSUP_SHIFT)) == 0))
- amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
break;
case 0x11:
case 0x40:
@@ -1534,8 +1530,15 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
if (((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0))
amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
- if (((h->efr_reg & (0x1 << IOMMU_EFR_XTSUP_SHIFT)) == 0))
- amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
+ /*
+ * Note: Since iommu_update_intcapxt() leverages
+ * the IOMMU MMIO access to MSI capability block registers
+ * for MSI address lo/hi/data, we need to check both
+ * EFR[XtSup] and EFR[MsiCapMmioSup] for x2APIC support.
+ */
+ if ((h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT)) &&
+ (h->efr_reg & BIT(IOMMU_EFR_MSICAPMMIOSUP_SHIFT)))
+ amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE;
break;
default:
return -EINVAL;
@@ -1655,27 +1658,39 @@ static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
static void init_iommu_perf_ctr(struct amd_iommu *iommu)
{
struct pci_dev *pdev = iommu->dev;
- u64 val = 0xabcd, val2 = 0;
+ u64 val = 0xabcd, val2 = 0, save_reg = 0;
if (!iommu_feature(iommu, FEATURE_PC))
return;
amd_iommu_pc_present = true;
+ /* save the value to restore, if writable */
+ if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, false))
+ goto pc_false;
+
/* Check if the performance counters can be written to */
if ((iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true)) ||
(iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false)) ||
- (val != val2)) {
- pci_err(pdev, "Unable to write to IOMMU perf counter.\n");
- amd_iommu_pc_present = false;
- return;
- }
+ (val != val2))
+ goto pc_false;
+
+ /* restore */
+ if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, true))
+ goto pc_false;
pci_info(pdev, "IOMMU performance counters supported\n");
val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET);
iommu->max_banks = (u8) ((val >> 12) & 0x3f);
iommu->max_counters = (u8) ((val >> 7) & 0xf);
+
+ return;
+
+pc_false:
+ pci_err(pdev, "Unable to read/write to IOMMU perf counter.\n");
+ amd_iommu_pc_present = false;
+ return;
}
static ssize_t amd_iommu_show_cap(struct device *dev,
@@ -1715,7 +1730,6 @@ static const struct attribute_group *amd_iommu_groups[] = {
static int __init iommu_init_pci(struct amd_iommu *iommu)
{
int cap_ptr = iommu->cap_ptr;
- u32 range, misc, low, high;
int ret;
iommu->dev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(iommu->devid),
@@ -1728,19 +1742,12 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
&iommu->cap);
- pci_read_config_dword(iommu->dev, cap_ptr + MMIO_RANGE_OFFSET,
- &range);
- pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET,
- &misc);
if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
amd_iommu_iotlb_sup = false;
/* read extended feature bits */
- low = readl(iommu->mmio_base + MMIO_EXT_FEATURES);
- high = readl(iommu->mmio_base + MMIO_EXT_FEATURES + 4);
-
- iommu->features = ((u64)high << 32) | low;
+ iommu->features = readq(iommu->mmio_base + MMIO_EXT_FEATURES);
if (iommu_feature(iommu, FEATURE_GT)) {
int glxval;
@@ -1984,8 +1991,8 @@ static int iommu_init_intcapxt(struct amd_iommu *iommu)
struct irq_affinity_notify *notify = &iommu->intcapxt_notify;
/**
- * IntCapXT requires XTSup=1, which can be inferred
- * amd_iommu_xt_mode.
+ * IntCapXT requires XTSup=1 and MsiCapMmioSup=1,
+ * which can be inferred from amd_iommu_xt_mode.
*/
if (amd_iommu_xt_mode != IRQ_REMAP_X2APIC_MODE)
return 0;
@@ -2032,7 +2039,7 @@ enable_faults:
iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
if (iommu->ppr_log != NULL)
- iommu_feature_enable(iommu, CONTROL_PPFINT_EN);
+ iommu_feature_enable(iommu, CONTROL_PPRINT_EN);
iommu_ga_log_enable(iommu);
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index f52f59d5c6bd..f8d01d6b00da 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -147,8 +147,8 @@
#define CONTROL_COHERENT_EN 0x0aULL
#define CONTROL_ISOC_EN 0x0bULL
#define CONTROL_CMDBUF_EN 0x0cULL
-#define CONTROL_PPFLOG_EN 0x0dULL
-#define CONTROL_PPFINT_EN 0x0eULL
+#define CONTROL_PPRLOG_EN 0x0dULL
+#define CONTROL_PPRINT_EN 0x0eULL
#define CONTROL_PPR_EN 0x0fULL
#define CONTROL_GT_EN 0x10ULL
#define CONTROL_GA_EN 0x11ULL
@@ -377,12 +377,12 @@
#define IOMMU_CAP_EFR 27
/* IOMMU Feature Reporting Field (for IVHD type 10h */
-#define IOMMU_FEAT_XTSUP_SHIFT 0
#define IOMMU_FEAT_GASUP_SHIFT 6
/* IOMMU Extended Feature Register (EFR) */
#define IOMMU_EFR_XTSUP_SHIFT 2
#define IOMMU_EFR_GASUP_SHIFT 7
+#define IOMMU_EFR_MSICAPMMIOSUP_SHIFT 46
#define MAX_DOMAIN_ID 65536
@@ -463,7 +463,6 @@ struct amd_irte_ops;
* independent of their use.
*/
struct protection_domain {
- struct list_head list; /* for list of all protection domains */
struct list_head dev_list; /* List of all devices in this domain */
struct iommu_domain domain; /* generic domain handle used by
iommu core code */
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 5e04c1f3992a..aa3ac2a03807 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -2985,15 +2985,6 @@ static void arm_smmu_get_resv_regions(struct device *dev,
iommu_dma_get_resv_regions(dev, head);
}
-static void arm_smmu_put_resv_regions(struct device *dev,
- struct list_head *head)
-{
- struct iommu_resv_region *entry, *next;
-
- list_for_each_entry_safe(entry, next, head, list)
- kfree(entry);
-}
-
static struct iommu_ops arm_smmu_ops = {
.capable = arm_smmu_capable,
.domain_alloc = arm_smmu_domain_alloc,
@@ -3011,7 +3002,7 @@ static struct iommu_ops arm_smmu_ops = {
.domain_set_attr = arm_smmu_domain_set_attr,
.of_xlate = arm_smmu_of_xlate,
.get_resv_regions = arm_smmu_get_resv_regions,
- .put_resv_regions = arm_smmu_put_resv_regions,
+ .put_resv_regions = generic_iommu_put_resv_regions,
.pgsize_bitmap = -1UL, /* Restricted during device attach */
};
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index cca94c30b301..16c4b87af42b 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1615,15 +1615,6 @@ static void arm_smmu_get_resv_regions(struct device *dev,
iommu_dma_get_resv_regions(dev, head);
}
-static void arm_smmu_put_resv_regions(struct device *dev,
- struct list_head *head)
-{
- struct iommu_resv_region *entry, *next;
-
- list_for_each_entry_safe(entry, next, head, list)
- kfree(entry);
-}
-
static struct iommu_ops arm_smmu_ops = {
.capable = arm_smmu_capable,
.domain_alloc = arm_smmu_domain_alloc,
@@ -1641,7 +1632,7 @@ static struct iommu_ops arm_smmu_ops = {
.domain_set_attr = arm_smmu_domain_set_attr,
.of_xlate = arm_smmu_of_xlate,
.get_resv_regions = arm_smmu_get_resv_regions,
- .put_resv_regions = arm_smmu_put_resv_regions,
+ .put_resv_regions = generic_iommu_put_resv_regions,
.pgsize_bitmap = -1UL, /* Restricted during device attach */
};
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index c363294b3bb9..a2e96a5fd9a7 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -1203,7 +1203,6 @@ int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
{
struct device *dev = msi_desc_to_dev(desc);
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
- struct iommu_dma_cookie *cookie;
struct iommu_dma_msi_page *msi_page;
static DEFINE_MUTEX(msi_prepare_lock); /* see below */
@@ -1212,8 +1211,6 @@ int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
return 0;
}
- cookie = domain->iova_cookie;
-
/*
* In fact the whole prepare operation should already be serialised by
* irq_domain_mutex further up the callchain, but that's pretty subtle
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 3acfa6a25fa2..071bb42bbbc5 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -244,7 +244,7 @@ int dmar_insert_dev_scope(struct dmar_pci_notify_info *info,
info->dev->hdr_type != PCI_HEADER_TYPE_NORMAL) ||
(scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE &&
(info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
- info->dev->class >> 8 != PCI_CLASS_BRIDGE_OTHER))) {
+ info->dev->class >> 16 != PCI_BASE_CLASS_BRIDGE))) {
pr_warn("Device scope type does not match for %s\n",
pci_name(info->dev));
return -EINVAL;
@@ -1354,7 +1354,6 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
struct qi_desc desc;
if (mask) {
- WARN_ON_ONCE(addr & ((1ULL << (VTD_PAGE_SHIFT + mask)) - 1));
addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
} else
@@ -1371,6 +1370,47 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
qi_submit_sync(&desc, iommu);
}
+/* PASID-based IOTLB invalidation */
+void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
+ unsigned long npages, bool ih)
+{
+ struct qi_desc desc = {.qw2 = 0, .qw3 = 0};
+
+ /*
+ * npages == -1 means a PASID-selective invalidation, otherwise,
+ * a positive value for Page-selective-within-PASID invalidation.
+ * 0 is not a valid input.
+ */
+ if (WARN_ON(!npages)) {
+ pr_err("Invalid input npages = %ld\n", npages);
+ return;
+ }
+
+ if (npages == -1) {
+ desc.qw0 = QI_EIOTLB_PASID(pasid) |
+ QI_EIOTLB_DID(did) |
+ QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
+ QI_EIOTLB_TYPE;
+ desc.qw1 = 0;
+ } else {
+ int mask = ilog2(__roundup_pow_of_two(npages));
+ unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
+
+ if (WARN_ON_ONCE(!ALIGN(addr, align)))
+ addr &= ~(align - 1);
+
+ desc.qw0 = QI_EIOTLB_PASID(pasid) |
+ QI_EIOTLB_DID(did) |
+ QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
+ QI_EIOTLB_TYPE;
+ desc.qw1 = QI_EIOTLB_ADDR(addr) |
+ QI_EIOTLB_IH(ih) |
+ QI_EIOTLB_AM(mask);
+ }
+
+ qi_submit_sync(&desc, iommu);
+}
+
/*
* Disable Queued Invalidation interface.
*/
diff --git a/drivers/iommu/intel-iommu-debugfs.c b/drivers/iommu/intel-iommu-debugfs.c
index 471f05d452e0..c1257bef553c 100644
--- a/drivers/iommu/intel-iommu-debugfs.c
+++ b/drivers/iommu/intel-iommu-debugfs.c
@@ -5,6 +5,7 @@
* Authors: Gayatri Kammela <gayatri.kammela@intel.com>
* Sohil Mehta <sohil.mehta@intel.com>
* Jacob Pan <jacob.jun.pan@linux.intel.com>
+ * Lu Baolu <baolu.lu@linux.intel.com>
*/
#include <linux/debugfs.h>
@@ -283,6 +284,77 @@ static int dmar_translation_struct_show(struct seq_file *m, void *unused)
}
DEFINE_SHOW_ATTRIBUTE(dmar_translation_struct);
+static inline unsigned long level_to_directory_size(int level)
+{
+ return BIT_ULL(VTD_PAGE_SHIFT + VTD_STRIDE_SHIFT * (level - 1));
+}
+
+static inline void
+dump_page_info(struct seq_file *m, unsigned long iova, u64 *path)
+{
+ seq_printf(m, "0x%013lx |\t0x%016llx\t0x%016llx\t0x%016llx\t0x%016llx\t0x%016llx\n",
+ iova >> VTD_PAGE_SHIFT, path[5], path[4],
+ path[3], path[2], path[1]);
+}
+
+static void pgtable_walk_level(struct seq_file *m, struct dma_pte *pde,
+ int level, unsigned long start,
+ u64 *path)
+{
+ int i;
+
+ if (level > 5 || level < 1)
+ return;
+
+ for (i = 0; i < BIT_ULL(VTD_STRIDE_SHIFT);
+ i++, pde++, start += level_to_directory_size(level)) {
+ if (!dma_pte_present(pde))
+ continue;
+
+ path[level] = pde->val;
+ if (dma_pte_superpage(pde) || level == 1)
+ dump_page_info(m, start, path);
+ else
+ pgtable_walk_level(m, phys_to_virt(dma_pte_addr(pde)),
+ level - 1, start, path);
+ path[level] = 0;
+ }
+}
+
+static int show_device_domain_translation(struct device *dev, void *data)
+{
+ struct dmar_domain *domain = find_domain(dev);
+ struct seq_file *m = data;
+ u64 path[6] = { 0 };
+
+ if (!domain)
+ return 0;
+
+ seq_printf(m, "Device %s with pasid %d @0x%llx\n",
+ dev_name(dev), domain->default_pasid,
+ (u64)virt_to_phys(domain->pgd));
+ seq_puts(m, "IOVA_PFN\t\tPML5E\t\t\tPML4E\t\t\tPDPE\t\t\tPDE\t\t\tPTE\n");
+
+ pgtable_walk_level(m, domain->pgd, domain->agaw + 2, 0, path);
+ seq_putc(m, '\n');
+
+ return 0;
+}
+
+static int domain_translation_struct_show(struct seq_file *m, void *unused)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&device_domain_lock, flags);
+ ret = bus_for_each_dev(&pci_bus_type, NULL, m,
+ show_device_domain_translation);
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+
+ return ret;
+}
+DEFINE_SHOW_ATTRIBUTE(domain_translation_struct);
+
#ifdef CONFIG_IRQ_REMAP
static void ir_tbl_remap_entry_show(struct seq_file *m,
struct intel_iommu *iommu)
@@ -396,6 +468,9 @@ void __init intel_iommu_debugfs_init(void)
&iommu_regset_fops);
debugfs_create_file("dmar_translation_struct", 0444, intel_iommu_debug,
NULL, &dmar_translation_struct_fops);
+ debugfs_create_file("domain_translation_struct", 0444,
+ intel_iommu_debug, NULL,
+ &domain_translation_struct_fops);
#ifdef CONFIG_IRQ_REMAP
debugfs_create_file("ir_translation_struct", 0444, intel_iommu_debug,
NULL, &ir_translation_struct_fops);
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 42966611a192..64ddccf1d5fe 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -307,6 +307,20 @@ static int hw_pass_through = 1;
*/
#define DOMAIN_FLAG_LOSE_CHILDREN BIT(1)
+/*
+ * When VT-d works in the scalable mode, it allows DMA translation to
+ * happen through either first level or second level page table. This
+ * bit marks that the DMA translation for the domain goes through the
+ * first level page table, otherwise, it goes through the second level.
+ */
+#define DOMAIN_FLAG_USE_FIRST_LEVEL BIT(2)
+
+/*
+ * Domain represents a virtual machine which demands iommu nested
+ * translation mode support.
+ */
+#define DOMAIN_FLAG_NESTING_MODE BIT(3)
+
#define for_each_domain_iommu(idx, domain) \
for (idx = 0; idx < g_num_of_iommus; idx++) \
if (domain->iommu_refcnt[idx])
@@ -355,9 +369,14 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
int dmar_disabled = 0;
#else
int dmar_disabled = 1;
-#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
+#endif /* CONFIG_INTEL_IOMMU_DEFAULT_ON */
+#ifdef INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON
+int intel_iommu_sm = 1;
+#else
int intel_iommu_sm;
+#endif /* INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON */
+
int intel_iommu_enabled = 0;
EXPORT_SYMBOL_GPL(intel_iommu_enabled);
@@ -368,7 +387,6 @@ static int intel_iommu_superpage = 1;
static int iommu_identity_mapping;
static int intel_no_bounce;
-#define IDENTMAP_ALL 1
#define IDENTMAP_GFX 2
#define IDENTMAP_AZALIA 4
@@ -377,7 +395,7 @@ EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
#define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2))
-static DEFINE_SPINLOCK(device_domain_lock);
+DEFINE_SPINLOCK(device_domain_lock);
static LIST_HEAD(device_domain_list);
#define device_needs_bounce(d) (!intel_no_bounce && dev_is_pci(d) && \
@@ -552,6 +570,11 @@ static inline int domain_type_is_si(struct dmar_domain *domain)
return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
}
+static inline bool domain_use_first_level(struct dmar_domain *domain)
+{
+ return domain->flags & DOMAIN_FLAG_USE_FIRST_LEVEL;
+}
+
static inline int domain_pfn_supported(struct dmar_domain *domain,
unsigned long pfn)
{
@@ -661,11 +684,12 @@ static int domain_update_iommu_snooping(struct intel_iommu *skip)
return ret;
}
-static int domain_update_iommu_superpage(struct intel_iommu *skip)
+static int domain_update_iommu_superpage(struct dmar_domain *domain,
+ struct intel_iommu *skip)
{
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
- int mask = 0xf;
+ int mask = 0x3;
if (!intel_iommu_superpage) {
return 0;
@@ -675,7 +699,13 @@ static int domain_update_iommu_superpage(struct intel_iommu *skip)
rcu_read_lock();
for_each_active_iommu(iommu, drhd) {
if (iommu != skip) {
- mask &= cap_super_page_val(iommu->cap);
+ if (domain && domain_use_first_level(domain)) {
+ if (!cap_fl1gp_support(iommu->cap))
+ mask = 0x1;
+ } else {
+ mask &= cap_super_page_val(iommu->cap);
+ }
+
if (!mask)
break;
}
@@ -690,7 +720,7 @@ static void domain_update_iommu_cap(struct dmar_domain *domain)
{
domain_update_iommu_coherency(domain);
domain->iommu_snooping = domain_update_iommu_snooping(NULL);
- domain->iommu_superpage = domain_update_iommu_superpage(NULL);
+ domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL);
}
struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
@@ -913,6 +943,8 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
+ if (domain_use_first_level(domain))
+ pteval |= DMA_FL_PTE_XD;
if (cmpxchg64(&pte->val, 0ULL, pteval))
/* Someone else set it while we were thinking; use theirs. */
free_pgtable_page(tmp_page);
@@ -1483,6 +1515,20 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
spin_unlock_irqrestore(&device_domain_lock, flags);
}
+static void domain_flush_piotlb(struct intel_iommu *iommu,
+ struct dmar_domain *domain,
+ u64 addr, unsigned long npages, bool ih)
+{
+ u16 did = domain->iommu_did[iommu->seq_id];
+
+ if (domain->default_pasid)
+ qi_flush_piotlb(iommu, did, domain->default_pasid,
+ addr, npages, ih);
+
+ if (!list_empty(&domain->devices))
+ qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, npages, ih);
+}
+
static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
struct dmar_domain *domain,
unsigned long pfn, unsigned int pages,
@@ -1496,18 +1542,23 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
if (ih)
ih = 1 << 6;
- /*
- * Fallback to domain selective flush if no PSI support or the size is
- * too big.
- * PSI requires page size to be 2 ^ x, and the base address is naturally
- * aligned to the size
- */
- if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
- iommu->flush.flush_iotlb(iommu, did, 0, 0,
- DMA_TLB_DSI_FLUSH);
- else
- iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
- DMA_TLB_PSI_FLUSH);
+
+ if (domain_use_first_level(domain)) {
+ domain_flush_piotlb(iommu, domain, addr, pages, ih);
+ } else {
+ /*
+ * Fallback to domain selective flush if no PSI support or
+ * the size is too big. PSI requires page size to be 2 ^ x,
+ * and the base address is naturally aligned to the size.
+ */
+ if (!cap_pgsel_inv(iommu->cap) ||
+ mask > cap_max_amask_val(iommu->cap))
+ iommu->flush.flush_iotlb(iommu, did, 0, 0,
+ DMA_TLB_DSI_FLUSH);
+ else
+ iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
+ DMA_TLB_PSI_FLUSH);
+ }
/*
* In caching mode, changes of pages from non-present to present require
@@ -1522,8 +1573,11 @@ static inline void __mapping_notify_one(struct intel_iommu *iommu,
struct dmar_domain *domain,
unsigned long pfn, unsigned int pages)
{
- /* It's a non-present to present mapping. Only flush if caching mode */
- if (cap_caching_mode(iommu->cap))
+ /*
+ * It's a non-present to present mapping. Only flush if caching mode
+ * and second level.
+ */
+ if (cap_caching_mode(iommu->cap) && !domain_use_first_level(domain))
iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
else
iommu_flush_write_buffer(iommu);
@@ -1540,7 +1594,11 @@ static void iommu_flush_iova(struct iova_domain *iovad)
struct intel_iommu *iommu = g_iommus[idx];
u16 did = domain->iommu_did[iommu->seq_id];
- iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
+ if (domain_use_first_level(domain))
+ domain_flush_piotlb(iommu, domain, 0, -1, 0);
+ else
+ iommu->flush.flush_iotlb(iommu, did, 0, 0,
+ DMA_TLB_DSI_FLUSH);
if (!cap_caching_mode(iommu->cap))
iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
@@ -1709,6 +1767,33 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
#endif
}
+/*
+ * Check and return whether first level is used by default for
+ * DMA translation.
+ */
+static bool first_level_by_default(void)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+ static int first_level_support = -1;
+
+ if (likely(first_level_support != -1))
+ return first_level_support;
+
+ first_level_support = 1;
+
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd) {
+ if (!sm_supported(iommu) || !ecap_flts(iommu->ecap)) {
+ first_level_support = 0;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return first_level_support;
+}
+
static struct dmar_domain *alloc_domain(int flags)
{
struct dmar_domain *domain;
@@ -1720,6 +1805,8 @@ static struct dmar_domain *alloc_domain(int flags)
memset(domain, 0, sizeof(*domain));
domain->nid = NUMA_NO_NODE;
domain->flags = flags;
+ if (first_level_by_default())
+ domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL;
domain->has_iotlb_device = false;
INIT_LIST_HEAD(&domain->devices);
@@ -1849,14 +1936,16 @@ static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
{
int adjust_width, agaw;
unsigned long sagaw;
- int err;
+ int ret;
init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
- err = init_iova_flush_queue(&domain->iovad,
- iommu_flush_iova, iova_entry_free);
- if (err)
- return err;
+ if (!intel_iommu_strict) {
+ ret = init_iova_flush_queue(&domain->iovad,
+ iommu_flush_iova, iova_entry_free);
+ if (ret)
+ pr_info("iova flush queue initialization failed\n");
+ }
domain_reserve_special_ranges(domain);
@@ -2229,17 +2318,20 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
unsigned long sg_res = 0;
unsigned int largepage_lvl = 0;
unsigned long lvl_pages = 0;
+ u64 attr;
BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
return -EINVAL;
- prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
+ attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
+ if (domain_use_first_level(domain))
+ attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_XD;
if (!sg) {
sg_res = nr_pages;
- pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
+ pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
}
while (nr_pages > 0) {
@@ -2251,7 +2343,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
sg_res = aligned_nrpages(sg->offset, sg->length);
sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
sg->dma_length = sg->length;
- pteval = (sg_phys(sg) - pgoff) | prot;
+ pteval = (sg_phys(sg) - pgoff) | attr;
phys_pfn = pteval >> VTD_PAGE_SHIFT;
}
@@ -2420,7 +2512,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain)
spin_unlock_irqrestore(&device_domain_lock, flags);
}
-static struct dmar_domain *find_domain(struct device *dev)
+struct dmar_domain *find_domain(struct device *dev)
{
struct device_domain_info *info;
@@ -2463,6 +2555,36 @@ dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
return NULL;
}
+static int domain_setup_first_level(struct intel_iommu *iommu,
+ struct dmar_domain *domain,
+ struct device *dev,
+ int pasid)
+{
+ int flags = PASID_FLAG_SUPERVISOR_MODE;
+ struct dma_pte *pgd = domain->pgd;
+ int agaw, level;
+
+ /*
+ * Skip top levels of page tables for iommu which has
+ * less agaw than default. Unnecessary for PT mode.
+ */
+ for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
+ pgd = phys_to_virt(dma_pte_addr(pgd));
+ if (!dma_pte_present(pgd))
+ return -ENOMEM;
+ }
+
+ level = agaw_to_level(agaw);
+ if (level != 4 && level != 5)
+ return -EINVAL;
+
+ flags |= (level == 5) ? PASID_FLAG_FL5LP : 0;
+
+ return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid,
+ domain->iommu_did[iommu->seq_id],
+ flags);
+}
+
static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
int bus, int devfn,
struct device *dev,
@@ -2562,6 +2684,9 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
if (hw_pass_through && domain_type_is_si(domain))
ret = intel_pasid_setup_pass_through(iommu, domain,
dev, PASID_RID2PASID);
+ else if (domain_use_first_level(domain))
+ ret = domain_setup_first_level(iommu, domain, dev,
+ PASID_RID2PASID);
else
ret = intel_pasid_setup_second_level(iommu, domain,
dev, PASID_RID2PASID);
@@ -2767,10 +2892,8 @@ static int __init si_domain_init(int hw)
}
/*
- * Normally we use DMA domains for devices which have RMRRs. But we
- * loose this requirement for graphic and usb devices. Identity map
- * the RMRRs for graphic and USB devices so that they could use the
- * si_domain.
+ * Identity map the RMRRs so that devices with RMRRs could also use
+ * the si_domain.
*/
for_each_rmrr_units(rmrr) {
for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
@@ -2778,9 +2901,6 @@ static int __init si_domain_init(int hw)
unsigned long long start = rmrr->base_address;
unsigned long long end = rmrr->end_address;
- if (device_is_rmrr_locked(dev))
- continue;
-
if (WARN_ON(end < start ||
end >> agaw_to_width(si_domain->agaw)))
continue;
@@ -2919,9 +3039,6 @@ static int device_def_domain_type(struct device *dev)
if (dev_is_pci(dev)) {
struct pci_dev *pdev = to_pci_dev(dev);
- if (device_is_rmrr_locked(dev))
- return IOMMU_DOMAIN_DMA;
-
/*
* Prevent any device marked as untrusted from getting
* placed into the statically identity mapping domain.
@@ -2959,13 +3076,9 @@ static int device_def_domain_type(struct device *dev)
return IOMMU_DOMAIN_DMA;
} else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
return IOMMU_DOMAIN_DMA;
- } else {
- if (device_has_rmrr(dev))
- return IOMMU_DOMAIN_DMA;
}
- return (iommu_identity_mapping & IDENTMAP_ALL) ?
- IOMMU_DOMAIN_IDENTITY : 0;
+ return 0;
}
static void intel_iommu_init_qi(struct intel_iommu *iommu)
@@ -3294,10 +3407,7 @@ static int __init init_dmars(void)
if (!ecap_pass_through(iommu->ecap))
hw_pass_through = 0;
-#ifdef CONFIG_INTEL_IOMMU_SVM
- if (pasid_supported(iommu))
- intel_svm_init(iommu);
-#endif
+ intel_svm_check(iommu);
}
/*
@@ -3312,9 +3422,6 @@ static int __init init_dmars(void)
iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
}
- if (iommu_default_passthrough())
- iommu_identity_mapping |= IDENTMAP_ALL;
-
#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
dmar_map_gfx = 0;
#endif
@@ -3387,8 +3494,21 @@ static unsigned long intel_alloc_iova(struct device *dev,
{
unsigned long iova_pfn;
- /* Restrict dma_mask to the width that the iommu can handle */
- dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
+ /*
+ * Restrict dma_mask to the width that the iommu can handle.
+ * First-level translation restricts the input-address to a
+ * canonical address (i.e., address bits 63:N have the same
+ * value as address bit [N-1], where N is 48-bits with 4-level
+ * paging and 57-bits with 5-level paging). Hence, skip bit
+ * [N-1].
+ */
+ if (domain_use_first_level(domain))
+ dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw - 1),
+ dma_mask);
+ else
+ dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw),
+ dma_mask);
+
/* Ensure we reserve the whole size-aligned region */
nrpages = __roundup_pow_of_two(nrpages);
@@ -3406,7 +3526,8 @@ static unsigned long intel_alloc_iova(struct device *dev,
iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
IOVA_PFN(dma_mask), true);
if (unlikely(!iova_pfn)) {
- dev_err(dev, "Allocating %ld-page iova failed", nrpages);
+ dev_err_once(dev, "Allocating %ld-page iova failed\n",
+ nrpages);
return 0;
}
@@ -3774,8 +3895,8 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele
return 0;
}
- trace_map_sg(dev, iova_pfn << PAGE_SHIFT,
- sg_phys(sglist), size << VTD_PAGE_SHIFT);
+ for_each_sg(sglist, sg, nelems, i)
+ trace_map_sg(dev, i + 1, nelems, sg);
return nelems;
}
@@ -3987,6 +4108,9 @@ bounce_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
sg_dma_len(sg) = sg->length;
}
+ for_each_sg(sglist, sg, nelems, i)
+ trace_bounce_map_sg(dev, i + 1, nelems, sg);
+
return nelems;
out_unmap:
@@ -4315,16 +4439,31 @@ static void __init init_iommu_pm_ops(void)
static inline void init_iommu_pm_ops(void) {}
#endif /* CONFIG_PM */
+static int rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
+{
+ if (!IS_ALIGNED(rmrr->base_address, PAGE_SIZE) ||
+ !IS_ALIGNED(rmrr->end_address + 1, PAGE_SIZE) ||
+ rmrr->end_address <= rmrr->base_address ||
+ arch_rmrr_sanity_check(rmrr))
+ return -EINVAL;
+
+ return 0;
+}
+
int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
{
struct acpi_dmar_reserved_memory *rmrr;
struct dmar_rmrr_unit *rmrru;
- int ret;
rmrr = (struct acpi_dmar_reserved_memory *)header;
- ret = arch_rmrr_sanity_check(rmrr);
- if (ret)
- return ret;
+ if (rmrr_sanity_check(rmrr))
+ WARN_TAINT(1, TAINT_FIRMWARE_WORKAROUND,
+ "Your BIOS is broken; bad RMRR [%#018Lx-%#018Lx]\n"
+ "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
+ rmrr->base_address, rmrr->end_address,
+ dmi_get_system_info(DMI_BIOS_VENDOR),
+ dmi_get_system_info(DMI_BIOS_VERSION),
+ dmi_get_system_info(DMI_PRODUCT_VERSION));
rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
if (!rmrru)
@@ -4470,7 +4609,7 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
iommu->name);
return -ENXIO;
}
- sp = domain_update_iommu_superpage(iommu) - 1;
+ sp = domain_update_iommu_superpage(NULL, iommu) - 1;
if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
pr_warn("%s: Doesn't support large page.\n",
iommu->name);
@@ -4490,10 +4629,7 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
if (ret)
goto out;
-#ifdef CONFIG_INTEL_IOMMU_SVM
- if (pasid_supported(iommu))
- intel_svm_init(iommu);
-#endif
+ intel_svm_check(iommu);
if (dmaru->ignored) {
/*
@@ -4898,7 +5034,7 @@ static int __init platform_optin_force_iommu(void)
* map for all devices except those marked as being untrusted.
*/
if (dmar_disabled)
- iommu_identity_mapping |= IDENTMAP_ALL;
+ iommu_set_default_passthrough(false);
dmar_disabled = 0;
no_iommu = 0;
@@ -5163,7 +5299,8 @@ static void dmar_remove_one_dev_info(struct device *dev)
spin_lock_irqsave(&device_domain_lock, flags);
info = dev->archdata.iommu;
- if (info)
+ if (info && info != DEFER_DEVICE_DOMAIN_INFO
+ && info != DUMMY_DEVICE_DOMAIN_INFO)
__dmar_remove_one_dev_info(info);
spin_unlock_irqrestore(&device_domain_lock, flags);
}
@@ -5197,6 +5334,7 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
{
struct dmar_domain *dmar_domain;
struct iommu_domain *domain;
+ int ret;
switch (type) {
case IOMMU_DOMAIN_DMA:
@@ -5213,11 +5351,12 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
return NULL;
}
- if (type == IOMMU_DOMAIN_DMA &&
- init_iova_flush_queue(&dmar_domain->iovad,
- iommu_flush_iova, iova_entry_free)) {
- pr_warn("iova flush queue initialization failed\n");
- intel_iommu_strict = 1;
+ if (!intel_iommu_strict && type == IOMMU_DOMAIN_DMA) {
+ ret = init_iova_flush_queue(&dmar_domain->iovad,
+ iommu_flush_iova,
+ iova_entry_free);
+ if (ret)
+ pr_info("iova flush queue initialization failed\n");
}
domain_update_iommu_cap(dmar_domain);
@@ -5283,7 +5422,7 @@ static void auxiliary_unlink_device(struct dmar_domain *domain,
domain->auxd_refcnt--;
if (!domain->auxd_refcnt && domain->default_pasid > 0)
- intel_pasid_free_id(domain->default_pasid);
+ ioasid_free(domain->default_pasid);
}
static int aux_domain_add_dev(struct dmar_domain *domain,
@@ -5301,10 +5440,11 @@ static int aux_domain_add_dev(struct dmar_domain *domain,
if (domain->default_pasid <= 0) {
int pasid;
- pasid = intel_pasid_alloc_id(domain, PASID_MIN,
- pci_max_pasids(to_pci_dev(dev)),
- GFP_KERNEL);
- if (pasid <= 0) {
+ /* No private data needed for the default pasid */
+ pasid = ioasid_alloc(NULL, PASID_MIN,
+ pci_max_pasids(to_pci_dev(dev)) - 1,
+ NULL);
+ if (pasid == INVALID_IOASID) {
pr_err("Can't allocate default pasid\n");
return -ENODEV;
}
@@ -5322,8 +5462,12 @@ static int aux_domain_add_dev(struct dmar_domain *domain,
goto attach_failed;
/* Setup the PASID entry for mediated devices: */
- ret = intel_pasid_setup_second_level(iommu, domain, dev,
- domain->default_pasid);
+ if (domain_use_first_level(domain))
+ ret = domain_setup_first_level(iommu, domain, dev,
+ domain->default_pasid);
+ else
+ ret = intel_pasid_setup_second_level(iommu, domain, dev,
+ domain->default_pasid);
if (ret)
goto table_failed;
spin_unlock(&iommu->lock);
@@ -5340,7 +5484,7 @@ attach_failed:
spin_unlock(&iommu->lock);
spin_unlock_irqrestore(&device_domain_lock, flags);
if (!domain->auxd_refcnt && domain->default_pasid > 0)
- intel_pasid_free_id(domain->default_pasid);
+ ioasid_free(domain->default_pasid);
return ret;
}
@@ -5594,6 +5738,24 @@ static inline bool iommu_pasid_support(void)
return ret;
}
+static inline bool nested_mode_support(void)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+ bool ret = true;
+
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd) {
+ if (!sm_supported(iommu) || !ecap_nest(iommu->ecap)) {
+ ret = false;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+
static bool intel_iommu_capable(enum iommu_cap cap)
{
if (cap == IOMMU_CAP_CACHE_COHERENCY)
@@ -5624,8 +5786,10 @@ static int intel_iommu_add_device(struct device *dev)
group = iommu_group_get_for_dev(dev);
- if (IS_ERR(group))
- return PTR_ERR(group);
+ if (IS_ERR(group)) {
+ ret = PTR_ERR(group);
+ goto unlink;
+ }
iommu_group_put(group);
@@ -5651,7 +5815,8 @@ static int intel_iommu_add_device(struct device *dev)
if (!get_private_domain_for_dev(dev)) {
dev_warn(dev,
"Failed to get a private domain.\n");
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto unlink;
}
dev_info(dev,
@@ -5666,6 +5831,10 @@ static int intel_iommu_add_device(struct device *dev)
}
return 0;
+
+unlink:
+ iommu_device_unlink(&iommu->iommu, dev);
+ return ret;
}
static void intel_iommu_remove_device(struct device *dev)
@@ -5744,15 +5913,6 @@ static void intel_iommu_get_resv_regions(struct device *device,
list_add_tail(&reg->list, head);
}
-static void intel_iommu_put_resv_regions(struct device *dev,
- struct list_head *head)
-{
- struct iommu_resv_region *entry, *next;
-
- list_for_each_entry_safe(entry, next, head, list)
- kfree(entry);
-}
-
int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
{
struct device_domain_info *info;
@@ -5817,6 +5977,13 @@ static void intel_iommu_apply_resv_region(struct device *dev,
WARN_ON_ONCE(!reserve_iova(&dmar_domain->iovad, start, end));
}
+static struct iommu_group *intel_iommu_device_group(struct device *dev)
+{
+ if (dev_is_pci(dev))
+ return pci_device_group(dev);
+ return generic_device_group(dev);
+}
+
#ifdef CONFIG_INTEL_IOMMU_SVM
struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
{
@@ -5972,10 +6139,42 @@ static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain,
return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO;
}
+static int
+intel_iommu_domain_set_attr(struct iommu_domain *domain,
+ enum iommu_attr attr, void *data)
+{
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+ unsigned long flags;
+ int ret = 0;
+
+ if (domain->type != IOMMU_DOMAIN_UNMANAGED)
+ return -EINVAL;
+
+ switch (attr) {
+ case DOMAIN_ATTR_NESTING:
+ spin_lock_irqsave(&device_domain_lock, flags);
+ if (nested_mode_support() &&
+ list_empty(&dmar_domain->devices)) {
+ dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE;
+ dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL;
+ } else {
+ ret = -ENODEV;
+ }
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
const struct iommu_ops intel_iommu_ops = {
.capable = intel_iommu_capable,
.domain_alloc = intel_iommu_domain_alloc,
.domain_free = intel_iommu_domain_free,
+ .domain_set_attr = intel_iommu_domain_set_attr,
.attach_dev = intel_iommu_attach_device,
.detach_dev = intel_iommu_detach_device,
.aux_attach_dev = intel_iommu_aux_attach_device,
@@ -5987,9 +6186,9 @@ const struct iommu_ops intel_iommu_ops = {
.add_device = intel_iommu_add_device,
.remove_device = intel_iommu_remove_device,
.get_resv_regions = intel_iommu_get_resv_regions,
- .put_resv_regions = intel_iommu_put_resv_regions,
+ .put_resv_regions = generic_iommu_put_resv_regions,
.apply_resv_region = intel_iommu_apply_resv_region,
- .device_group = pci_device_group,
+ .device_group = intel_iommu_device_group,
.dev_has_feat = intel_iommu_dev_has_feat,
.dev_feat_enabled = intel_iommu_dev_feat_enabled,
.dev_enable_feat = intel_iommu_dev_enable_feat,
diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c
index 040a445be300..22b30f10b396 100644
--- a/drivers/iommu/intel-pasid.c
+++ b/drivers/iommu/intel-pasid.c
@@ -26,42 +26,6 @@
*/
static DEFINE_SPINLOCK(pasid_lock);
u32 intel_pasid_max_id = PASID_MAX;
-static DEFINE_IDR(pasid_idr);
-
-int intel_pasid_alloc_id(void *ptr, int start, int end, gfp_t gfp)
-{
- int ret, min, max;
-
- min = max_t(int, start, PASID_MIN);
- max = min_t(int, end, intel_pasid_max_id);
-
- WARN_ON(in_interrupt());
- idr_preload(gfp);
- spin_lock(&pasid_lock);
- ret = idr_alloc(&pasid_idr, ptr, min, max, GFP_ATOMIC);
- spin_unlock(&pasid_lock);
- idr_preload_end();
-
- return ret;
-}
-
-void intel_pasid_free_id(int pasid)
-{
- spin_lock(&pasid_lock);
- idr_remove(&pasid_idr, pasid);
- spin_unlock(&pasid_lock);
-}
-
-void *intel_pasid_lookup_id(int pasid)
-{
- void *p;
-
- spin_lock(&pasid_lock);
- p = idr_find(&pasid_idr, pasid);
- spin_unlock(&pasid_lock);
-
- return p;
-}
/*
* Per device pasid table management:
@@ -465,6 +429,21 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu,
devtlb_invalidation_with_pasid(iommu, dev, pasid);
}
+static void pasid_flush_caches(struct intel_iommu *iommu,
+ struct pasid_entry *pte,
+ int pasid, u16 did)
+{
+ if (!ecap_coherent(iommu->ecap))
+ clflush_cache_range(pte, sizeof(*pte));
+
+ if (cap_caching_mode(iommu->cap)) {
+ pasid_cache_invalidation_with_pasid(iommu, did, pasid);
+ iotlb_invalidation_with_pasid(iommu, did, pasid);
+ } else {
+ iommu_flush_write_buffer(iommu);
+ }
+}
+
/*
* Set up the scalable mode pasid table entry for first only
* translation type.
@@ -498,10 +477,15 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu,
pasid_set_sre(pte);
}
-#ifdef CONFIG_X86
- if (cpu_feature_enabled(X86_FEATURE_LA57))
- pasid_set_flpm(pte, 1);
-#endif /* CONFIG_X86 */
+ if (flags & PASID_FLAG_FL5LP) {
+ if (cap_5lp_support(iommu->cap)) {
+ pasid_set_flpm(pte, 1);
+ } else {
+ pr_err("No 5-level paging support for first-level\n");
+ pasid_clear_entry(pte);
+ return -EINVAL;
+ }
+ }
pasid_set_domain_id(pte, did);
pasid_set_address_width(pte, iommu->agaw);
@@ -510,16 +494,7 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu,
/* Setup Present and PASID Granular Transfer Type: */
pasid_set_translation_type(pte, 1);
pasid_set_present(pte);
-
- if (!ecap_coherent(iommu->ecap))
- clflush_cache_range(pte, sizeof(*pte));
-
- if (cap_caching_mode(iommu->cap)) {
- pasid_cache_invalidation_with_pasid(iommu, did, pasid);
- iotlb_invalidation_with_pasid(iommu, did, pasid);
- } else {
- iommu_flush_write_buffer(iommu);
- }
+ pasid_flush_caches(iommu, pte, pasid, did);
return 0;
}
@@ -583,16 +558,7 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
*/
pasid_set_sre(pte);
pasid_set_present(pte);
-
- if (!ecap_coherent(iommu->ecap))
- clflush_cache_range(pte, sizeof(*pte));
-
- if (cap_caching_mode(iommu->cap)) {
- pasid_cache_invalidation_with_pasid(iommu, did, pasid);
- iotlb_invalidation_with_pasid(iommu, did, pasid);
- } else {
- iommu_flush_write_buffer(iommu);
- }
+ pasid_flush_caches(iommu, pte, pasid, did);
return 0;
}
@@ -626,16 +592,7 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
*/
pasid_set_sre(pte);
pasid_set_present(pte);
-
- if (!ecap_coherent(iommu->ecap))
- clflush_cache_range(pte, sizeof(*pte));
-
- if (cap_caching_mode(iommu->cap)) {
- pasid_cache_invalidation_with_pasid(iommu, did, pasid);
- iotlb_invalidation_with_pasid(iommu, did, pasid);
- } else {
- iommu_flush_write_buffer(iommu);
- }
+ pasid_flush_caches(iommu, pte, pasid, did);
return 0;
}
diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h
index fc8cd8f17de1..92de6df24ccb 100644
--- a/drivers/iommu/intel-pasid.h
+++ b/drivers/iommu/intel-pasid.h
@@ -37,6 +37,12 @@
*/
#define PASID_FLAG_SUPERVISOR_MODE BIT(0)
+/*
+ * The PASID_FLAG_FL5LP flag Indicates using 5-level paging for first-
+ * level translation, otherwise, 4-level paging will be used.
+ */
+#define PASID_FLAG_FL5LP BIT(1)
+
struct pasid_dir_entry {
u64 val;
};
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index dca88f9fdf29..d7f2a5358900 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -17,25 +17,13 @@
#include <linux/dmar.h>
#include <linux/interrupt.h>
#include <linux/mm_types.h>
+#include <linux/ioasid.h>
#include <asm/page.h>
#include "intel-pasid.h"
static irqreturn_t prq_event_thread(int irq, void *d);
-int intel_svm_init(struct intel_iommu *iommu)
-{
- if (cpu_feature_enabled(X86_FEATURE_GBPAGES) &&
- !cap_fl1gp_support(iommu->cap))
- return -EINVAL;
-
- if (cpu_feature_enabled(X86_FEATURE_LA57) &&
- !cap_5lp_support(iommu->cap))
- return -EINVAL;
-
- return 0;
-}
-
#define PRQ_ORDER 0
int intel_svm_enable_prq(struct intel_iommu *iommu)
@@ -99,6 +87,33 @@ int intel_svm_finish_prq(struct intel_iommu *iommu)
return 0;
}
+static inline bool intel_svm_capable(struct intel_iommu *iommu)
+{
+ return iommu->flags & VTD_FLAG_SVM_CAPABLE;
+}
+
+void intel_svm_check(struct intel_iommu *iommu)
+{
+ if (!pasid_supported(iommu))
+ return;
+
+ if (cpu_feature_enabled(X86_FEATURE_GBPAGES) &&
+ !cap_fl1gp_support(iommu->cap)) {
+ pr_err("%s SVM disabled, incompatible 1GB page capability\n",
+ iommu->name);
+ return;
+ }
+
+ if (cpu_feature_enabled(X86_FEATURE_LA57) &&
+ !cap_5lp_support(iommu->cap)) {
+ pr_err("%s SVM disabled, incompatible paging mode\n",
+ iommu->name);
+ return;
+ }
+
+ iommu->flags |= VTD_FLAG_SVM_CAPABLE;
+}
+
static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_dev *sdev,
unsigned long address, unsigned long pages, int ih)
{
@@ -207,6 +222,10 @@ static const struct mmu_notifier_ops intel_mmuops = {
static DEFINE_MUTEX(pasid_mutex);
static LIST_HEAD(global_svm_list);
+#define for_each_svm_dev(sdev, svm, d) \
+ list_for_each_entry((sdev), &(svm)->devs, list) \
+ if ((d) != (sdev)->dev) {} else
+
int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops)
{
struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
@@ -220,6 +239,9 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
if (!iommu || dmar_disabled)
return -EINVAL;
+ if (!intel_svm_capable(iommu))
+ return -ENOTSUPP;
+
if (dev_is_pci(dev)) {
pasid_max = pci_max_pasids(to_pci_dev(dev));
if (pasid_max < 0)
@@ -252,15 +274,14 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
goto out;
}
- list_for_each_entry(sdev, &svm->devs, list) {
- if (dev == sdev->dev) {
- if (sdev->ops != ops) {
- ret = -EBUSY;
- goto out;
- }
- sdev->users++;
- goto success;
+ /* Find the matching device in svm list */
+ for_each_svm_dev(sdev, svm, dev) {
+ if (sdev->ops != ops) {
+ ret = -EBUSY;
+ goto out;
}
+ sdev->users++;
+ goto success;
}
break;
@@ -314,16 +335,15 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
if (pasid_max > intel_pasid_max_id)
pasid_max = intel_pasid_max_id;
- /* Do not use PASID 0 in caching mode (virtualised IOMMU) */
- ret = intel_pasid_alloc_id(svm,
- !!cap_caching_mode(iommu->cap),
- pasid_max - 1, GFP_KERNEL);
- if (ret < 0) {
+ /* Do not use PASID 0, reserved for RID to PASID */
+ svm->pasid = ioasid_alloc(NULL, PASID_MIN,
+ pasid_max - 1, svm);
+ if (svm->pasid == INVALID_IOASID) {
kfree(svm);
kfree(sdev);
+ ret = -ENOSPC;
goto out;
}
- svm->pasid = ret;
svm->notifier.ops = &intel_mmuops;
svm->mm = mm;
svm->flags = flags;
@@ -333,7 +353,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
if (mm) {
ret = mmu_notifier_register(&svm->notifier, mm);
if (ret) {
- intel_pasid_free_id(svm->pasid);
+ ioasid_free(svm->pasid);
kfree(svm);
kfree(sdev);
goto out;
@@ -344,12 +364,14 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
ret = intel_pasid_setup_first_level(iommu, dev,
mm ? mm->pgd : init_mm.pgd,
svm->pasid, FLPT_DEFAULT_DID,
- mm ? 0 : PASID_FLAG_SUPERVISOR_MODE);
+ (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) |
+ (cpu_feature_enabled(X86_FEATURE_LA57) ?
+ PASID_FLAG_FL5LP : 0));
spin_unlock(&iommu->lock);
if (ret) {
if (mm)
mmu_notifier_unregister(&svm->notifier, mm);
- intel_pasid_free_id(svm->pasid);
+ ioasid_free(svm->pasid);
kfree(svm);
kfree(sdev);
goto out;
@@ -365,7 +387,9 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
ret = intel_pasid_setup_first_level(iommu, dev,
mm ? mm->pgd : init_mm.pgd,
svm->pasid, FLPT_DEFAULT_DID,
- mm ? 0 : PASID_FLAG_SUPERVISOR_MODE);
+ (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) |
+ (cpu_feature_enabled(X86_FEATURE_LA57) ?
+ PASID_FLAG_FL5LP : 0));
spin_unlock(&iommu->lock);
if (ret) {
kfree(sdev);
@@ -397,44 +421,45 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
if (!iommu)
goto out;
- svm = intel_pasid_lookup_id(pasid);
+ svm = ioasid_find(NULL, pasid, NULL);
if (!svm)
goto out;
- list_for_each_entry(sdev, &svm->devs, list) {
- if (dev == sdev->dev) {
- ret = 0;
- sdev->users--;
- if (!sdev->users) {
- list_del_rcu(&sdev->list);
- /* Flush the PASID cache and IOTLB for this device.
- * Note that we do depend on the hardware *not* using
- * the PASID any more. Just as we depend on other
- * devices never using PASIDs that they have no right
- * to use. We have a *shared* PASID table, because it's
- * large and has to be physically contiguous. So it's
- * hard to be as defensive as we might like. */
- intel_pasid_tear_down_entry(iommu, dev, svm->pasid);
- intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
- kfree_rcu(sdev, rcu);
-
- if (list_empty(&svm->devs)) {
- intel_pasid_free_id(svm->pasid);
- if (svm->mm)
- mmu_notifier_unregister(&svm->notifier, svm->mm);
-
- list_del(&svm->list);
-
- /* We mandate that no page faults may be outstanding
- * for the PASID when intel_svm_unbind_mm() is called.
- * If that is not obeyed, subtle errors will happen.
- * Let's make them less subtle... */
- memset(svm, 0x6b, sizeof(*svm));
- kfree(svm);
- }
+ if (IS_ERR(svm)) {
+ ret = PTR_ERR(svm);
+ goto out;
+ }
+
+ for_each_svm_dev(sdev, svm, dev) {
+ ret = 0;
+ sdev->users--;
+ if (!sdev->users) {
+ list_del_rcu(&sdev->list);
+ /* Flush the PASID cache and IOTLB for this device.
+ * Note that we do depend on the hardware *not* using
+ * the PASID any more. Just as we depend on other
+ * devices never using PASIDs that they have no right
+ * to use. We have a *shared* PASID table, because it's
+ * large and has to be physically contiguous. So it's
+ * hard to be as defensive as we might like. */
+ intel_pasid_tear_down_entry(iommu, dev, svm->pasid);
+ intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
+ kfree_rcu(sdev, rcu);
+
+ if (list_empty(&svm->devs)) {
+ ioasid_free(svm->pasid);
+ if (svm->mm)
+ mmu_notifier_unregister(&svm->notifier, svm->mm);
+ list_del(&svm->list);
+ /* We mandate that no page faults may be outstanding
+ * for the PASID when intel_svm_unbind_mm() is called.
+ * If that is not obeyed, subtle errors will happen.
+ * Let's make them less subtle... */
+ memset(svm, 0x6b, sizeof(*svm));
+ kfree(svm);
}
- break;
}
+ break;
}
out:
mutex_unlock(&pasid_mutex);
@@ -454,10 +479,14 @@ int intel_svm_is_pasid_valid(struct device *dev, int pasid)
if (!iommu)
goto out;
- svm = intel_pasid_lookup_id(pasid);
+ svm = ioasid_find(NULL, pasid, NULL);
if (!svm)
goto out;
+ if (IS_ERR(svm)) {
+ ret = PTR_ERR(svm);
+ goto out;
+ }
/* init_mm is used in this case */
if (!svm->mm)
ret = 1;
@@ -564,13 +593,12 @@ static irqreturn_t prq_event_thread(int irq, void *d)
if (!svm || svm->pasid != req->pasid) {
rcu_read_lock();
- svm = intel_pasid_lookup_id(req->pasid);
+ svm = ioasid_find(NULL, req->pasid, NULL);
/* It *can't* go away, because the driver is not permitted
* to unbind the mm while any page faults are outstanding.
* So we only need RCU to protect the internal idr code. */
rcu_read_unlock();
-
- if (!svm) {
+ if (IS_ERR_OR_NULL(svm)) {
pr_err("%s: Page request for invalid PASID %d: %08llx %08llx\n",
iommu->name, req->pasid, ((unsigned long long *)req)[0],
((unsigned long long *)req)[1]);
@@ -654,11 +682,10 @@ static irqreturn_t prq_event_thread(int irq, void *d)
if (req->priv_data_present)
memcpy(&resp.qw2, req->priv_data,
sizeof(req->priv_data));
+ resp.qw2 = 0;
+ resp.qw3 = 0;
+ qi_submit_sync(&resp, iommu);
}
- resp.qw2 = 0;
- resp.qw3 = 0;
- qi_submit_sync(&resp, iommu);
-
head = (head + sizeof(*req)) & PRQ_RING_MASK;
}
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index ffe6f685ceae..3e3528436e0b 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -768,6 +768,7 @@ err_put_group:
mutex_unlock(&group->mutex);
dev->iommu_group = NULL;
kobject_put(group->devices_kobj);
+ sysfs_remove_link(group->devices_kobj, device->name);
err_free_name:
kfree(device->name);
err_remove_link:
@@ -2256,6 +2257,25 @@ void iommu_put_resv_regions(struct device *dev, struct list_head *list)
ops->put_resv_regions(dev, list);
}
+/**
+ * generic_iommu_put_resv_regions - Reserved region driver helper
+ * @dev: device for which to free reserved regions
+ * @list: reserved region list for device
+ *
+ * IOMMU drivers can use this to implement their .put_resv_regions() callback
+ * for simple reservations. Memory allocated for each reserved region will be
+ * freed. If an IOMMU driver allocates additional resources per region, it is
+ * going to have to implement a custom callback.
+ */
+void generic_iommu_put_resv_regions(struct device *dev, struct list_head *list)
+{
+ struct iommu_resv_region *entry, *next;
+
+ list_for_each_entry_safe(entry, next, list, list)
+ kfree(entry);
+}
+EXPORT_SYMBOL(generic_iommu_put_resv_regions);
+
struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start,
size_t length, int prot,
enum iommu_resv_type type)
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index c7a914b9bbbc..0e6a9536eca6 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -233,7 +233,7 @@ static DEFINE_MUTEX(iova_cache_mutex);
struct iova *alloc_iova_mem(void)
{
- return kmem_cache_zalloc(iova_cache, GFP_ATOMIC);
+ return kmem_cache_zalloc(iova_cache, GFP_ATOMIC | __GFP_NOWARN);
}
EXPORT_SYMBOL(alloc_iova_mem);
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 315c7cc4f99d..cce329d71fba 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -837,14 +837,6 @@ static void viommu_get_resv_regions(struct device *dev, struct list_head *head)
iommu_dma_get_resv_regions(dev, head);
}
-static void viommu_put_resv_regions(struct device *dev, struct list_head *head)
-{
- struct iommu_resv_region *entry, *next;
-
- list_for_each_entry_safe(entry, next, head, list)
- kfree(entry);
-}
-
static struct iommu_ops viommu_ops;
static struct virtio_driver virtio_iommu_drv;
@@ -914,7 +906,7 @@ static int viommu_add_device(struct device *dev)
err_unlink_dev:
iommu_device_unlink(&viommu->iommu, dev);
err_free_dev:
- viommu_put_resv_regions(dev, &vdev->resv_regions);
+ generic_iommu_put_resv_regions(dev, &vdev->resv_regions);
kfree(vdev);
return ret;
@@ -932,7 +924,7 @@ static void viommu_remove_device(struct device *dev)
iommu_group_remove_device(dev);
iommu_device_unlink(&vdev->viommu->iommu, dev);
- viommu_put_resv_regions(dev, &vdev->resv_regions);
+ generic_iommu_put_resv_regions(dev, &vdev->resv_regions);
kfree(vdev);
}
@@ -961,7 +953,7 @@ static struct iommu_ops viommu_ops = {
.remove_device = viommu_remove_device,
.device_group = viommu_device_group,
.get_resv_regions = viommu_get_resv_regions,
- .put_resv_regions = viommu_put_resv_regions,
+ .put_resv_regions = generic_iommu_put_resv_regions,
.of_xlate = viommu_of_xlate,
};