summaryrefslogtreecommitdiff
path: root/drivers/iommu/intel/iommu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/intel/iommu.c')
-rw-r--r--drivers/iommu/intel/iommu.c277
1 files changed, 118 insertions, 159 deletions
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 7c2f4bd33582..b871a6afd803 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -876,7 +876,7 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
return;
}
/* For request-without-pasid, get the pasid from context entry */
- if (intel_iommu_sm && pasid == INVALID_IOASID)
+ if (intel_iommu_sm && pasid == IOMMU_PASID_INVALID)
pasid = PASID_RID2PASID;
dir_index = pasid >> PASID_PDE_SHIFT;
@@ -915,8 +915,6 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
int level = agaw_to_level(domain->agaw);
int offset;
- BUG_ON(!domain->pgd);
-
if (!domain_pfn_supported(domain, pfn))
/* Address beyond IOMMU's addressing capabilities. */
return NULL;
@@ -1005,9 +1003,9 @@ static void dma_pte_clear_range(struct dmar_domain *domain,
unsigned int large_page;
struct dma_pte *first_pte, *pte;
- BUG_ON(!domain_pfn_supported(domain, start_pfn));
- BUG_ON(!domain_pfn_supported(domain, last_pfn));
- BUG_ON(start_pfn > last_pfn);
+ if (WARN_ON(!domain_pfn_supported(domain, last_pfn)) ||
+ WARN_ON(start_pfn > last_pfn))
+ return;
/* we don't need lock here; nobody else touches the iova range */
do {
@@ -1166,9 +1164,9 @@ next:
static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn,
unsigned long last_pfn, struct list_head *freelist)
{
- BUG_ON(!domain_pfn_supported(domain, start_pfn));
- BUG_ON(!domain_pfn_supported(domain, last_pfn));
- BUG_ON(start_pfn > last_pfn);
+ if (WARN_ON(!domain_pfn_supported(domain, last_pfn)) ||
+ WARN_ON(start_pfn > last_pfn))
+ return;
/* we don't need lock here; nobody else touches the iova range */
dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
@@ -1272,7 +1270,9 @@ static void __iommu_flush_context(struct intel_iommu *iommu,
| DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
break;
default:
- BUG();
+ pr_warn("%s: Unexpected context-cache invalidation type 0x%llx\n",
+ iommu->name, type);
+ return;
}
val |= DMA_CCMD_ICC;
@@ -1308,7 +1308,9 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
val_iva = size_order | addr;
break;
default:
- BUG();
+ pr_warn("%s: Unexpected iotlb invalidation type 0x%llx\n",
+ iommu->name, type);
+ return;
}
/* Note: set drain read/write */
#if 0
@@ -1406,20 +1408,6 @@ static void iommu_enable_pci_caps(struct device_domain_info *info)
return;
pdev = to_pci_dev(info->dev);
- /* For IOMMU that supports device IOTLB throttling (DIT), we assign
- * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
- * queue depth at PF level. If DIT is not set, PFSID will be treated as
- * reserved, which should be set to 0.
- */
- if (!ecap_dit(info->iommu->ecap))
- info->pfsid = 0;
- else {
- struct pci_dev *pf_pdev;
-
- /* pdev will be returned if device is not a vf */
- pf_pdev = pci_physfn(pdev);
- info->pfsid = pci_dev_id(pf_pdev);
- }
/* The PCIe spec, in its wisdom, declares that the behaviour of
the device if you enable PASID support after ATS support is
@@ -1429,16 +1417,10 @@ static void iommu_enable_pci_caps(struct device_domain_info *info)
if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
info->pasid_enabled = 1;
- if (info->pri_supported &&
- (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1) &&
- !pci_reset_pri(pdev) && !pci_enable_pri(pdev, PRQ_DEPTH))
- info->pri_enabled = 1;
-
if (info->ats_supported && pci_ats_page_aligned(pdev) &&
!pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
info->ats_enabled = 1;
domain_update_iotlb(info->domain);
- info->ats_qdep = pci_ats_queue_depth(pdev);
}
}
@@ -1457,11 +1439,6 @@ static void iommu_disable_pci_caps(struct device_domain_info *info)
domain_update_iotlb(info->domain);
}
- if (info->pri_enabled) {
- pci_disable_pri(pdev);
- info->pri_enabled = 0;
- }
-
if (info->pasid_enabled) {
pci_disable_pasid(pdev);
info->pasid_enabled = 0;
@@ -1508,7 +1485,8 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
u16 did = domain_id_iommu(domain, iommu);
- BUG_ON(pages == 0);
+ if (WARN_ON(!pages))
+ return;
if (ih)
ih = 1 << 6;
@@ -1722,9 +1700,6 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
if (ecap_prs(iommu->ecap))
intel_svm_finish_prq(iommu);
}
- if (vccap_pasid(iommu->vccap))
- ioasid_unregister_allocator(&iommu->pasid_allocator);
-
#endif
}
@@ -1895,7 +1870,7 @@ context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
*/
static inline void context_set_sm_dte(struct context_entry *context)
{
- context->lo |= (1 << 2);
+ context->lo |= BIT_ULL(2);
}
/*
@@ -1904,7 +1879,7 @@ static inline void context_set_sm_dte(struct context_entry *context)
*/
static inline void context_set_sm_pre(struct context_entry *context)
{
- context->lo |= (1 << 4);
+ context->lo |= BIT_ULL(4);
}
/* Convert value to context PASID directory size field coding. */
@@ -1930,8 +1905,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
pr_debug("Set context mapping for %02x:%02x.%d\n",
bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
- BUG_ON(!domain->pgd);
-
spin_lock(&iommu->lock);
ret = -ENOMEM;
context = iommu_context_addr(iommu, bus, devfn, 1);
@@ -2183,7 +2156,8 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
phys_addr_t pteval;
u64 attr;
- BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
+ if (unlikely(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1)))
+ return -EINVAL;
if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
return -EINVAL;
@@ -2341,8 +2315,6 @@ static int domain_setup_first_level(struct intel_iommu *iommu,
if (level != 4 && level != 5)
return -EINVAL;
- if (pasid != PASID_RID2PASID)
- flags |= PASID_FLAG_SUPERVISOR_MODE;
if (level == 5)
flags |= PASID_FLAG_FL5LP;
@@ -2797,85 +2769,6 @@ out_unmap:
return ret;
}
-#ifdef CONFIG_INTEL_IOMMU_SVM
-static ioasid_t intel_vcmd_ioasid_alloc(ioasid_t min, ioasid_t max, void *data)
-{
- struct intel_iommu *iommu = data;
- ioasid_t ioasid;
-
- if (!iommu)
- return INVALID_IOASID;
- /*
- * VT-d virtual command interface always uses the full 20 bit
- * PASID range. Host can partition guest PASID range based on
- * policies but it is out of guest's control.
- */
- if (min < PASID_MIN || max > intel_pasid_max_id)
- return INVALID_IOASID;
-
- if (vcmd_alloc_pasid(iommu, &ioasid))
- return INVALID_IOASID;
-
- return ioasid;
-}
-
-static void intel_vcmd_ioasid_free(ioasid_t ioasid, void *data)
-{
- struct intel_iommu *iommu = data;
-
- if (!iommu)
- return;
- /*
- * Sanity check the ioasid owner is done at upper layer, e.g. VFIO
- * We can only free the PASID when all the devices are unbound.
- */
- if (ioasid_find(NULL, ioasid, NULL)) {
- pr_alert("Cannot free active IOASID %d\n", ioasid);
- return;
- }
- vcmd_free_pasid(iommu, ioasid);
-}
-
-static void register_pasid_allocator(struct intel_iommu *iommu)
-{
- /*
- * If we are running in the host, no need for custom allocator
- * in that PASIDs are allocated from the host system-wide.
- */
- if (!cap_caching_mode(iommu->cap))
- return;
-
- if (!sm_supported(iommu)) {
- pr_warn("VT-d Scalable Mode not enabled, no PASID allocation\n");
- return;
- }
-
- /*
- * Register a custom PASID allocator if we are running in a guest,
- * guest PASID must be obtained via virtual command interface.
- * There can be multiple vIOMMUs in each guest but only one allocator
- * is active. All vIOMMU allocators will eventually be calling the same
- * host allocator.
- */
- if (!vccap_pasid(iommu->vccap))
- return;
-
- pr_info("Register custom PASID allocator\n");
- iommu->pasid_allocator.alloc = intel_vcmd_ioasid_alloc;
- iommu->pasid_allocator.free = intel_vcmd_ioasid_free;
- iommu->pasid_allocator.pdata = (void *)iommu;
- if (ioasid_register_allocator(&iommu->pasid_allocator)) {
- pr_warn("Custom PASID allocator failed, scalable mode disabled\n");
- /*
- * Disable scalable mode on this IOMMU if there
- * is no custom allocator. Mixing SM capable vIOMMU
- * and non-SM vIOMMU are not supported.
- */
- intel_iommu_sm = 0;
- }
-}
-#endif
-
static int __init init_dmars(void)
{
struct dmar_drhd_unit *drhd;
@@ -2964,9 +2857,6 @@ static int __init init_dmars(void)
*/
for_each_active_iommu(iommu, drhd) {
iommu_flush_write_buffer(iommu);
-#ifdef CONFIG_INTEL_IOMMU_SVM
- register_pasid_allocator(iommu);
-#endif
iommu_set_root_entry(iommu);
}
@@ -3760,8 +3650,8 @@ static ssize_t version_show(struct device *dev,
{
struct intel_iommu *iommu = dev_to_intel_iommu(dev);
u32 ver = readl(iommu->reg + DMAR_VER_REG);
- return sprintf(buf, "%d:%d\n",
- DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
+ return sysfs_emit(buf, "%d:%d\n",
+ DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
}
static DEVICE_ATTR_RO(version);
@@ -3769,7 +3659,7 @@ static ssize_t address_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct intel_iommu *iommu = dev_to_intel_iommu(dev);
- return sprintf(buf, "%llx\n", iommu->reg_phys);
+ return sysfs_emit(buf, "%llx\n", iommu->reg_phys);
}
static DEVICE_ATTR_RO(address);
@@ -3777,7 +3667,7 @@ static ssize_t cap_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct intel_iommu *iommu = dev_to_intel_iommu(dev);
- return sprintf(buf, "%llx\n", iommu->cap);
+ return sysfs_emit(buf, "%llx\n", iommu->cap);
}
static DEVICE_ATTR_RO(cap);
@@ -3785,7 +3675,7 @@ static ssize_t ecap_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct intel_iommu *iommu = dev_to_intel_iommu(dev);
- return sprintf(buf, "%llx\n", iommu->ecap);
+ return sysfs_emit(buf, "%llx\n", iommu->ecap);
}
static DEVICE_ATTR_RO(ecap);
@@ -3793,7 +3683,7 @@ static ssize_t domains_supported_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct intel_iommu *iommu = dev_to_intel_iommu(dev);
- return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
+ return sysfs_emit(buf, "%ld\n", cap_ndoms(iommu->cap));
}
static DEVICE_ATTR_RO(domains_supported);
@@ -3801,8 +3691,9 @@ static ssize_t domains_used_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct intel_iommu *iommu = dev_to_intel_iommu(dev);
- return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
- cap_ndoms(iommu->cap)));
+ return sysfs_emit(buf, "%d\n",
+ bitmap_weight(iommu->domain_ids,
+ cap_ndoms(iommu->cap)));
}
static DEVICE_ATTR_RO(domains_used);
@@ -4340,8 +4231,9 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
/* Cope with horrid API which requires us to unmap more than the
size argument if it happens to be a large-page mapping. */
- BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level,
- GFP_ATOMIC));
+ if (unlikely(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT,
+ &level, GFP_ATOMIC)))
+ return 0;
if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
size = VTD_PAGE_SIZE << level_to_offset_bits(level);
@@ -4521,6 +4413,17 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
dmar_ats_supported(pdev, iommu)) {
info->ats_supported = 1;
info->dtlb_extra_inval = dev_needs_extra_dtlb_flush(pdev);
+
+ /*
+ * For IOMMU that supports device IOTLB throttling
+ * (DIT), we assign PFSID to the invalidation desc
+ * of a VF such that IOMMU HW can gauge queue depth
+ * at PF level. If DIT is not set, PFSID will be
+ * treated as reserved, which should be set to 0.
+ */
+ if (ecap_dit(iommu->ecap))
+ info->pfsid = pci_dev_id(pci_physfn(pdev));
+ info->ats_qdep = pci_ats_queue_depth(pdev);
}
if (sm_supported(iommu)) {
if (pasid_supported(iommu)) {
@@ -4638,7 +4541,6 @@ static int intel_iommu_enable_sva(struct device *dev)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu;
- int ret;
if (!info || dmar_disabled)
return -EINVAL;
@@ -4650,45 +4552,102 @@ static int intel_iommu_enable_sva(struct device *dev)
if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE))
return -ENODEV;
- if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled)
+ if (!info->pasid_enabled || !info->ats_enabled)
return -EINVAL;
- ret = iopf_queue_add_device(iommu->iopf_queue, dev);
- if (ret)
- return ret;
+ /*
+ * Devices having device-specific I/O fault handling should not
+ * support PCI/PRI. The IOMMU side has no means to check the
+ * capability of device-specific IOPF. Therefore, IOMMU can only
+ * default that if the device driver enables SVA on a non-PRI
+ * device, it will handle IOPF in its own way.
+ */
+ if (!info->pri_supported)
+ return 0;
- ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
- if (ret)
- iopf_queue_remove_device(iommu->iopf_queue, dev);
+ /* Devices supporting PRI should have it enabled. */
+ if (!info->pri_enabled)
+ return -EINVAL;
- return ret;
+ return 0;
}
-static int intel_iommu_disable_sva(struct device *dev)
+static int intel_iommu_enable_iopf(struct device *dev)
{
+ struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL;
struct device_domain_info *info = dev_iommu_priv_get(dev);
- struct intel_iommu *iommu = info->iommu;
+ struct intel_iommu *iommu;
int ret;
- ret = iommu_unregister_device_fault_handler(dev);
+ if (!pdev || !info || !info->ats_enabled || !info->pri_supported)
+ return -ENODEV;
+
+ if (info->pri_enabled)
+ return -EBUSY;
+
+ iommu = info->iommu;
+ if (!iommu)
+ return -EINVAL;
+
+ /* PASID is required in PRG Response Message. */
+ if (info->pasid_enabled && !pci_prg_resp_pasid_required(pdev))
+ return -EINVAL;
+
+ ret = pci_reset_pri(pdev);
+ if (ret)
+ return ret;
+
+ ret = iopf_queue_add_device(iommu->iopf_queue, dev);
if (ret)
return ret;
- ret = iopf_queue_remove_device(iommu->iopf_queue, dev);
+ ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
+ if (ret)
+ goto iopf_remove_device;
+
+ ret = pci_enable_pri(pdev, PRQ_DEPTH);
if (ret)
- iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
+ goto iopf_unregister_handler;
+ info->pri_enabled = 1;
+
+ return 0;
+
+iopf_unregister_handler:
+ iommu_unregister_device_fault_handler(dev);
+iopf_remove_device:
+ iopf_queue_remove_device(iommu->iopf_queue, dev);
return ret;
}
-static int intel_iommu_enable_iopf(struct device *dev)
+static int intel_iommu_disable_iopf(struct device *dev)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
- if (info && info->pri_supported)
- return 0;
+ if (!info->pri_enabled)
+ return -EINVAL;
+
+ /*
+ * PCIe spec states that by clearing PRI enable bit, the Page
+ * Request Interface will not issue new page requests, but has
+ * outstanding page requests that have been transmitted or are
+ * queued for transmission. This is supposed to be called after
+ * the device driver has stopped DMA, all PASIDs have been
+ * unbound and the outstanding PRQs have been drained.
+ */
+ pci_disable_pri(to_pci_dev(dev));
+ info->pri_enabled = 0;
- return -ENODEV;
+ /*
+ * With PRI disabled and outstanding PRQs drained, unregistering
+ * fault handler and removing device from iopf queue should never
+ * fail.
+ */
+ WARN_ON(iommu_unregister_device_fault_handler(dev));
+ WARN_ON(iopf_queue_remove_device(iommu->iopf_queue, dev));
+
+ return 0;
}
static int
@@ -4711,10 +4670,10 @@ intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
{
switch (feat) {
case IOMMU_DEV_FEAT_IOPF:
- return 0;
+ return intel_iommu_disable_iopf(dev);
case IOMMU_DEV_FEAT_SVA:
- return intel_iommu_disable_sva(dev);
+ return 0;
default:
return -ENODEV;