summaryrefslogtreecommitdiff
path: root/drivers/iommu/intel
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/intel')
-rw-r--r--drivers/iommu/intel/Kconfig4
-rw-r--r--drivers/iommu/intel/cap_audit.c13
-rw-r--r--drivers/iommu/intel/cap_audit.h1
-rw-r--r--drivers/iommu/intel/dmar.c10
-rw-r--r--drivers/iommu/intel/iommu.c213
-rw-r--r--drivers/iommu/intel/svm.c24
6 files changed, 172 insertions, 93 deletions
diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 0ddb77115be7..247d0f2d5fdf 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -6,6 +6,9 @@ config DMAR_TABLE
config DMAR_PERF
bool
+config DMAR_DEBUG
+ bool
+
config INTEL_IOMMU
bool "Support for Intel IOMMU using DMA Remapping Devices"
depends on PCI_MSI && ACPI && (X86 || IA64)
@@ -31,6 +34,7 @@ config INTEL_IOMMU_DEBUGFS
bool "Export Intel IOMMU internals in Debugfs"
depends on IOMMU_DEBUGFS
select DMAR_PERF
+ select DMAR_DEBUG
help
!!!WARNING!!!
diff --git a/drivers/iommu/intel/cap_audit.c b/drivers/iommu/intel/cap_audit.c
index b12e421a2f1a..b39d223926a4 100644
--- a/drivers/iommu/intel/cap_audit.c
+++ b/drivers/iommu/intel/cap_audit.c
@@ -163,6 +163,14 @@ static int cap_audit_static(struct intel_iommu *iommu, enum cap_audit_type type)
check_irq_capabilities(iommu, i);
}
+ /*
+ * If the system is sane to support scalable mode, either SL or FL
+ * should be sane.
+ */
+ if (intel_cap_smts_sanity() &&
+ !intel_cap_flts_sanity() && !intel_cap_slts_sanity())
+ return -EOPNOTSUPP;
+
out:
rcu_read_unlock();
return 0;
@@ -203,3 +211,8 @@ bool intel_cap_flts_sanity(void)
{
return ecap_flts(intel_iommu_ecap_sanity);
}
+
+bool intel_cap_slts_sanity(void)
+{
+ return ecap_slts(intel_iommu_ecap_sanity);
+}
diff --git a/drivers/iommu/intel/cap_audit.h b/drivers/iommu/intel/cap_audit.h
index 74cfccae0e81..d07b75938961 100644
--- a/drivers/iommu/intel/cap_audit.h
+++ b/drivers/iommu/intel/cap_audit.h
@@ -111,6 +111,7 @@ bool intel_cap_smts_sanity(void);
bool intel_cap_pasid_sanity(void);
bool intel_cap_nest_sanity(void);
bool intel_cap_flts_sanity(void);
+bool intel_cap_slts_sanity(void);
static inline bool scalable_mode_support(void)
{
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index b7708b93f3fa..915bff76fe96 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -1941,12 +1941,16 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
reason = dmar_get_fault_reason(fault_reason, &fault_type);
- if (fault_type == INTR_REMAP)
+ if (fault_type == INTR_REMAP) {
pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index 0x%llx [fault reason 0x%02x] %s\n",
source_id >> 8, PCI_SLOT(source_id & 0xFF),
PCI_FUNC(source_id & 0xFF), addr >> 48,
fault_reason, reason);
- else if (pasid == INVALID_IOASID)
+
+ return 0;
+ }
+
+ if (pasid == INVALID_IOASID)
pr_err("[%s NO_PASID] Request device [%02x:%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n",
type ? "DMA Read" : "DMA Write",
source_id >> 8, PCI_SLOT(source_id & 0xFF),
@@ -1959,6 +1963,8 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
PCI_FUNC(source_id & 0xFF), addr,
fault_reason, reason);
+ dmar_fault_dump_ptes(iommu, source_id, addr, pasid);
+
return 0;
}
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index d75f59ae28e6..0bde0c8b4126 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -156,6 +156,8 @@ static struct intel_iommu **g_iommus;
static void __init check_tylersburg_isoch(void);
static int rwbf_quirk;
+static inline struct device_domain_info *
+dmar_search_domain_by_dev_info(int segment, int bus, int devfn);
/*
* set to 1 to panic kernel if can't successfully enable VT-d
@@ -412,6 +414,7 @@ static int __init intel_iommu_setup(char *str)
{
if (!str)
return -EINVAL;
+
while (*str) {
if (!strncmp(str, "on", 2)) {
dmar_disabled = 0;
@@ -441,13 +444,16 @@ static int __init intel_iommu_setup(char *str)
} else if (!strncmp(str, "tboot_noforce", 13)) {
pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
intel_iommu_tboot_noforce = 1;
+ } else {
+ pr_notice("Unknown option - '%s'\n", str);
}
str += strcspn(str, ",");
while (*str == ',')
str++;
}
- return 0;
+
+ return 1;
}
__setup("intel_iommu=", intel_iommu_setup);
@@ -522,7 +528,7 @@ static inline void free_devinfo_mem(void *vaddr)
static inline int domain_type_is_si(struct dmar_domain *domain)
{
- return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
+ return domain->domain.type == IOMMU_DOMAIN_IDENTITY;
}
static inline bool domain_use_first_level(struct dmar_domain *domain)
@@ -992,6 +998,117 @@ out:
spin_unlock_irqrestore(&iommu->lock, flags);
}
+#ifdef CONFIG_DMAR_DEBUG
+static void pgtable_walk(struct intel_iommu *iommu, unsigned long pfn, u8 bus, u8 devfn)
+{
+ struct device_domain_info *info;
+ struct dma_pte *parent, *pte;
+ struct dmar_domain *domain;
+ int offset, level;
+
+ info = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
+ if (!info || !info->domain) {
+ pr_info("device [%02x:%02x.%d] not probed\n",
+ bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+ return;
+ }
+
+ domain = info->domain;
+ level = agaw_to_level(domain->agaw);
+ parent = domain->pgd;
+ if (!parent) {
+ pr_info("no page table setup\n");
+ return;
+ }
+
+ while (1) {
+ offset = pfn_level_offset(pfn, level);
+ pte = &parent[offset];
+ if (!pte || (dma_pte_superpage(pte) || !dma_pte_present(pte))) {
+ pr_info("PTE not present at level %d\n", level);
+ break;
+ }
+
+ pr_info("pte level: %d, pte value: 0x%016llx\n", level, pte->val);
+
+ if (level == 1)
+ break;
+
+ parent = phys_to_virt(dma_pte_addr(pte));
+ level--;
+ }
+}
+
+void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
+ unsigned long long addr, u32 pasid)
+{
+ struct pasid_dir_entry *dir, *pde;
+ struct pasid_entry *entries, *pte;
+ struct context_entry *ctx_entry;
+ struct root_entry *rt_entry;
+ u8 devfn = source_id & 0xff;
+ u8 bus = source_id >> 8;
+ int i, dir_index, index;
+
+ pr_info("Dump %s table entries for IOVA 0x%llx\n", iommu->name, addr);
+
+ /* root entry dump */
+ rt_entry = &iommu->root_entry[bus];
+ if (!rt_entry) {
+ pr_info("root table entry is not present\n");
+ return;
+ }
+
+ if (sm_supported(iommu))
+ pr_info("scalable mode root entry: hi 0x%016llx, low 0x%016llx\n",
+ rt_entry->hi, rt_entry->lo);
+ else
+ pr_info("root entry: 0x%016llx", rt_entry->lo);
+
+ /* context entry dump */
+ ctx_entry = iommu_context_addr(iommu, bus, devfn, 0);
+ if (!ctx_entry) {
+ pr_info("context table entry is not present\n");
+ return;
+ }
+
+ pr_info("context entry: hi 0x%016llx, low 0x%016llx\n",
+ ctx_entry->hi, ctx_entry->lo);
+
+ /* legacy mode does not require PASID entries */
+ if (!sm_supported(iommu))
+ goto pgtable_walk;
+
+ /* get the pointer to pasid directory entry */
+ dir = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
+ if (!dir) {
+ pr_info("pasid directory entry is not present\n");
+ return;
+ }
+ /* For request-without-pasid, get the pasid from context entry */
+ if (intel_iommu_sm && pasid == INVALID_IOASID)
+ pasid = PASID_RID2PASID;
+
+ dir_index = pasid >> PASID_PDE_SHIFT;
+ pde = &dir[dir_index];
+ pr_info("pasid dir entry: 0x%016llx\n", pde->val);
+
+ /* get the pointer to the pasid table entry */
+ entries = get_pasid_table_from_pde(pde);
+ if (!entries) {
+ pr_info("pasid table entry is not present\n");
+ return;
+ }
+ index = pasid & PASID_PTE_MASK;
+ pte = &entries[index];
+ for (i = 0; i < ARRAY_SIZE(pte->val); i++)
+ pr_info("pasid table entry[%d]: 0x%016llx\n", i, pte->val[i]);
+
+pgtable_walk:
+ pgtable_walk(iommu, addr >> VTD_PAGE_SHIFT, bus, devfn);
+}
+#endif
+
static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
unsigned long pfn, int *target_level)
{
@@ -1874,12 +1991,21 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
* Check and return whether first level is used by default for
* DMA translation.
*/
-static bool first_level_by_default(void)
+static bool first_level_by_default(unsigned int type)
{
- return scalable_mode_support() && intel_cap_flts_sanity();
+ /* Only SL is available in legacy mode */
+ if (!scalable_mode_support())
+ return false;
+
+ /* Only level (either FL or SL) is available, just use it */
+ if (intel_cap_flts_sanity() ^ intel_cap_slts_sanity())
+ return intel_cap_flts_sanity();
+
+ /* Both levels are available, decide it based on domain type */
+ return type != IOMMU_DOMAIN_UNMANAGED;
}
-static struct dmar_domain *alloc_domain(int flags)
+static struct dmar_domain *alloc_domain(unsigned int type)
{
struct dmar_domain *domain;
@@ -1889,8 +2015,7 @@ static struct dmar_domain *alloc_domain(int flags)
memset(domain, 0, sizeof(*domain));
domain->nid = NUMA_NO_NODE;
- domain->flags = flags;
- if (first_level_by_default())
+ if (first_level_by_default(type))
domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL;
domain->has_iotlb_device = false;
INIT_LIST_HEAD(&domain->devices);
@@ -2354,12 +2479,17 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
return -ENOMEM;
first_pte = pte;
+ lvl_pages = lvl_to_nr_pages(largepage_lvl);
+
/* It is large page*/
if (largepage_lvl > 1) {
unsigned long end_pfn;
+ unsigned long pages_to_remove;
pteval |= DMA_PTE_LARGE_PAGE;
- end_pfn = ((iov_pfn + nr_pages) & level_mask(largepage_lvl)) - 1;
+ pages_to_remove = min_t(unsigned long, nr_pages,
+ nr_pte_to_next_page(pte) * lvl_pages);
+ end_pfn = iov_pfn + pages_to_remove - 1;
switch_to_super_page(domain, iov_pfn, end_pfn, largepage_lvl);
} else {
pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
@@ -2381,10 +2511,6 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
WARN_ON(1);
}
- lvl_pages = lvl_to_nr_pages(largepage_lvl);
-
- BUG_ON(nr_pages < lvl_pages);
-
nr_pages -= lvl_pages;
iov_pfn += lvl_pages;
phys_pfn += lvl_pages;
@@ -2708,7 +2834,7 @@ static int __init si_domain_init(int hw)
struct device *dev;
int i, nid, ret;
- si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
+ si_domain = alloc_domain(IOMMU_DOMAIN_IDENTITY);
if (!si_domain)
return -EFAULT;
@@ -4517,7 +4643,7 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
case IOMMU_DOMAIN_DMA:
case IOMMU_DOMAIN_DMA_FQ:
case IOMMU_DOMAIN_UNMANAGED:
- dmar_domain = alloc_domain(0);
+ dmar_domain = alloc_domain(type);
if (!dmar_domain) {
pr_err("Can't allocate dmar_domain\n");
return NULL;
@@ -5386,62 +5512,14 @@ static int intel_iommu_disable_sva(struct device *dev)
return ret;
}
-/*
- * A PCI express designated vendor specific extended capability is defined
- * in the section 3.7 of Intel scalable I/O virtualization technical spec
- * for system software and tools to detect endpoint devices supporting the
- * Intel scalable IO virtualization without host driver dependency.
- *
- * Returns the address of the matching extended capability structure within
- * the device's PCI configuration space or 0 if the device does not support
- * it.
- */
-static int siov_find_pci_dvsec(struct pci_dev *pdev)
-{
- int pos;
- u16 vendor, id;
-
- pos = pci_find_next_ext_capability(pdev, 0, 0x23);
- while (pos) {
- pci_read_config_word(pdev, pos + 4, &vendor);
- pci_read_config_word(pdev, pos + 8, &id);
- if (vendor == PCI_VENDOR_ID_INTEL && id == 5)
- return pos;
-
- pos = pci_find_next_ext_capability(pdev, pos, 0x23);
- }
-
- return 0;
-}
-
-static bool
-intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
+static int intel_iommu_enable_iopf(struct device *dev)
{
struct device_domain_info *info = get_domain_info(dev);
- if (feat == IOMMU_DEV_FEAT_AUX) {
- int ret;
-
- if (!dev_is_pci(dev) || dmar_disabled ||
- !scalable_mode_support() || !pasid_mode_support())
- return false;
-
- ret = pci_pasid_features(to_pci_dev(dev));
- if (ret < 0)
- return false;
-
- return !!siov_find_pci_dvsec(to_pci_dev(dev));
- }
-
- if (feat == IOMMU_DEV_FEAT_IOPF)
- return info && info->pri_supported;
-
- if (feat == IOMMU_DEV_FEAT_SVA)
- return info && (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) &&
- info->pasid_supported && info->pri_supported &&
- info->ats_supported;
+ if (info && info->pri_supported)
+ return 0;
- return false;
+ return -ENODEV;
}
static int
@@ -5452,7 +5530,7 @@ intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
return intel_iommu_enable_auxd(dev);
case IOMMU_DEV_FEAT_IOPF:
- return intel_iommu_dev_has_feat(dev, feat) ? 0 : -ENODEV;
+ return intel_iommu_enable_iopf(dev);
case IOMMU_DEV_FEAT_SVA:
return intel_iommu_enable_sva(dev);
@@ -5578,7 +5656,6 @@ const struct iommu_ops intel_iommu_ops = {
.get_resv_regions = intel_iommu_get_resv_regions,
.put_resv_regions = generic_iommu_put_resv_regions,
.device_group = intel_iommu_device_group,
- .dev_has_feat = intel_iommu_dev_has_feat,
.dev_feat_enabled = intel_iommu_dev_feat_enabled,
.dev_enable_feat = intel_iommu_dev_enable_feat,
.dev_disable_feat = intel_iommu_dev_disable_feat,
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 0c228787704f..5b5d69b04fcc 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -505,21 +505,6 @@ out:
return ret;
}
-static void _load_pasid(void *unused)
-{
- update_pasid();
-}
-
-static void load_pasid(struct mm_struct *mm, u32 pasid)
-{
- mutex_lock(&mm->context.lock);
-
- /* Update PASID MSR on all CPUs running the mm's tasks. */
- on_each_cpu_mask(mm_cpumask(mm), _load_pasid, NULL, true);
-
- mutex_unlock(&mm->context.lock);
-}
-
static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm,
unsigned int flags)
{
@@ -614,10 +599,6 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu,
if (ret)
goto free_sdev;
- /* The newly allocated pasid is loaded to the mm. */
- if (!(flags & SVM_FLAG_SUPERVISOR_MODE) && list_empty(&svm->devs))
- load_pasid(mm, svm->pasid);
-
list_add_rcu(&sdev->list, &svm->devs);
success:
return &sdev->sva;
@@ -670,11 +651,8 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
kfree_rcu(sdev, rcu);
if (list_empty(&svm->devs)) {
- if (svm->notifier.ops) {
+ if (svm->notifier.ops)
mmu_notifier_unregister(&svm->notifier, mm);
- /* Clear mm's pasid. */
- load_pasid(mm, PASID_DISABLED);
- }
pasid_private_remove(svm->pasid);
/* We mandate that no page faults may be outstanding
* for the PASID when intel_svm_unbind_mm() is called.