summaryrefslogtreecommitdiff
path: root/drivers/iommu/intel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-04-30 23:00:38 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2023-04-30 23:00:38 +0300
commit58390c8ce1bddb6c623f62e7ed36383e7fa5c02f (patch)
treea2abd81e21456b1e2bcbc3bac4dd94650b7a18ec /drivers/iommu/intel
parent7acc1372113083fa281ba426021801e2402caca1 (diff)
parente51b4198396cd715b140c0e8b259680429ff0cfb (diff)
downloadlinux-58390c8ce1bddb6c623f62e7ed36383e7fa5c02f.tar.xz
Merge tag 'iommu-updates-v6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu
Pull iommu updates from Joerg Roedel: - Convert to platform remove callback returning void - Extend changing default domain to normal group - Intel VT-d updates: - Remove VT-d virtual command interface and IOASID - Allow the VT-d driver to support non-PRI IOPF - Remove PASID supervisor request support - Various small and misc cleanups - ARM SMMU updates: - Device-tree binding updates: * Allow Qualcomm GPU SMMUs to accept relevant clock properties * Document Qualcomm 8550 SoC as implementing an MMU-500 * Favour new "qcom,smmu-500" binding for Adreno SMMUs - Fix S2CR quirk detection on non-architectural Qualcomm SMMU implementations - Acknowledge SMMUv3 PRI queue overflow when consuming events - Document (in a comment) why ATS is disabled for bypass streams - AMD IOMMU updates: - 5-level page-table support - NUMA awareness for memory allocations - Unisoc driver: Support for reattaching an existing domain - Rockchip driver: Add missing set_platform_dma_ops callback - Mediatek driver: Adjust the dma-ranges - Various other small fixes and cleanups * tag 'iommu-updates-v6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (82 commits) iommu: Remove iommu_group_get_by_id() iommu: Make iommu_release_device() static iommu/vt-d: Remove BUG_ON in dmar_insert_dev_scope() iommu/vt-d: Remove a useless BUG_ON(dev->is_virtfn) iommu/vt-d: Remove BUG_ON in map/unmap() iommu/vt-d: Remove BUG_ON when domain->pgd is NULL iommu/vt-d: Remove BUG_ON in handling iotlb cache invalidation iommu/vt-d: Remove BUG_ON on checking valid pfn range iommu/vt-d: Make size of operands same in bitwise operations iommu/vt-d: Remove PASID supervisor request support iommu/vt-d: Use non-privileged mode for all PASIDs iommu/vt-d: Remove extern from function prototypes iommu/vt-d: Do not use GFP_ATOMIC when not needed iommu/vt-d: Remove unnecessary checks in iopf disabling path iommu/vt-d: Move PRI handling to IOPF feature path iommu/vt-d: Move pfsid and ats_qdep calculation to device probe path iommu/vt-d: Move iopf code from SVA to IOPF enabling path iommu/vt-d: Allow SVA with device-specific IOPF dmaengine: idxd: Add enable/disable device IOPF feature arm64: dts: mt8186: Add dma-ranges for the parent "soc" node ...
Diffstat (limited to 'drivers/iommu/intel')
-rw-r--r--drivers/iommu/intel/Kconfig1
-rw-r--r--drivers/iommu/intel/cap_audit.c2
-rw-r--r--drivers/iommu/intel/dmar.c13
-rw-r--r--drivers/iommu/intel/iommu.c277
-rw-r--r--drivers/iommu/intel/iommu.h35
-rw-r--r--drivers/iommu/intel/irq_remapping.c2
-rw-r--r--drivers/iommu/intel/pasid.c43
-rw-r--r--drivers/iommu/intel/pasid.h7
-rw-r--r--drivers/iommu/intel/svm.c3
9 files changed, 141 insertions, 242 deletions
diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 12e1e90fdae1..2e56bd79f589 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -18,7 +18,6 @@ config INTEL_IOMMU
select NEED_DMA_MAP_STATE
select DMAR_TABLE
select SWIOTLB
- select IOASID
select PCI_ATS
select PCI_PRI
select PCI_PASID
diff --git a/drivers/iommu/intel/cap_audit.c b/drivers/iommu/intel/cap_audit.c
index 806986696841..9862dc20b35e 100644
--- a/drivers/iommu/intel/cap_audit.c
+++ b/drivers/iommu/intel/cap_audit.c
@@ -54,7 +54,6 @@ static inline void check_dmar_capabilities(struct intel_iommu *a,
CHECK_FEATURE_MISMATCH(a, b, ecap, slts, ECAP_SLTS_MASK);
CHECK_FEATURE_MISMATCH(a, b, ecap, nwfs, ECAP_NWFS_MASK);
CHECK_FEATURE_MISMATCH(a, b, ecap, slads, ECAP_SLADS_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, vcs, ECAP_VCS_MASK);
CHECK_FEATURE_MISMATCH(a, b, ecap, smts, ECAP_SMTS_MASK);
CHECK_FEATURE_MISMATCH(a, b, ecap, pds, ECAP_PDS_MASK);
CHECK_FEATURE_MISMATCH(a, b, ecap, dit, ECAP_DIT_MASK);
@@ -101,7 +100,6 @@ static int cap_audit_hotplug(struct intel_iommu *iommu, enum cap_audit_type type
CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, slts, ECAP_SLTS_MASK);
CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, nwfs, ECAP_NWFS_MASK);
CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, slads, ECAP_SLADS_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, vcs, ECAP_VCS_MASK);
CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, smts, ECAP_SMTS_MASK);
CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, pds, ECAP_PDS_MASK);
CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, dit, ECAP_DIT_MASK);
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index 23828d189c2a..a3414afe11b0 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -127,8 +127,6 @@ dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
struct pci_dev *tmp;
struct dmar_pci_notify_info *info;
- BUG_ON(dev->is_virtfn);
-
/*
* Ignore devices that have a domain number higher than what can
* be looked up in DMAR, e.g. VMD subdevices with domain 0x10000
@@ -264,7 +262,8 @@ int dmar_insert_dev_scope(struct dmar_pci_notify_info *info,
get_device(dev));
return 1;
}
- BUG_ON(i >= devices_cnt);
+ if (WARN_ON(i >= devices_cnt))
+ return -EINVAL;
}
return 0;
@@ -993,8 +992,6 @@ static int map_iommu(struct intel_iommu *iommu, struct dmar_drhd_unit *drhd)
warn_invalid_dmar(phys_addr, " returns all ones");
goto unmap;
}
- if (ecap_vcs(iommu->ecap))
- iommu->vccap = dmar_readq(iommu->reg + DMAR_VCCAP_REG);
/* the registers might be more than one page */
map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
@@ -1690,7 +1687,7 @@ static void __dmar_enable_qi(struct intel_iommu *iommu)
* is present.
*/
if (ecap_smts(iommu->ecap))
- val |= (1 << 11) | 1;
+ val |= BIT_ULL(11) | BIT_ULL(0);
raw_spin_lock_irqsave(&iommu->register_lock, flags);
@@ -1961,7 +1958,7 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
return 0;
}
- if (pasid == INVALID_IOASID)
+ if (pasid == IOMMU_PASID_INVALID)
pr_err("[%s NO_PASID] Request device [%02x:%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n",
type ? "DMA Read" : "DMA Write",
source_id >> 8, PCI_SLOT(source_id & 0xFF),
@@ -2042,7 +2039,7 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
if (!ratelimited)
/* Using pasid -1 if pasid is not present */
dmar_fault_do_one(iommu, type, fault_reason,
- pasid_present ? pasid : INVALID_IOASID,
+ pasid_present ? pasid : IOMMU_PASID_INVALID,
source_id, guest_addr);
fault_index++;
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 7c2f4bd33582..b871a6afd803 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -876,7 +876,7 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
return;
}
/* For request-without-pasid, get the pasid from context entry */
- if (intel_iommu_sm && pasid == INVALID_IOASID)
+ if (intel_iommu_sm && pasid == IOMMU_PASID_INVALID)
pasid = PASID_RID2PASID;
dir_index = pasid >> PASID_PDE_SHIFT;
@@ -915,8 +915,6 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
int level = agaw_to_level(domain->agaw);
int offset;
- BUG_ON(!domain->pgd);
-
if (!domain_pfn_supported(domain, pfn))
/* Address beyond IOMMU's addressing capabilities. */
return NULL;
@@ -1005,9 +1003,9 @@ static void dma_pte_clear_range(struct dmar_domain *domain,
unsigned int large_page;
struct dma_pte *first_pte, *pte;
- BUG_ON(!domain_pfn_supported(domain, start_pfn));
- BUG_ON(!domain_pfn_supported(domain, last_pfn));
- BUG_ON(start_pfn > last_pfn);
+ if (WARN_ON(!domain_pfn_supported(domain, last_pfn)) ||
+ WARN_ON(start_pfn > last_pfn))
+ return;
/* we don't need lock here; nobody else touches the iova range */
do {
@@ -1166,9 +1164,9 @@ next:
static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn,
unsigned long last_pfn, struct list_head *freelist)
{
- BUG_ON(!domain_pfn_supported(domain, start_pfn));
- BUG_ON(!domain_pfn_supported(domain, last_pfn));
- BUG_ON(start_pfn > last_pfn);
+ if (WARN_ON(!domain_pfn_supported(domain, last_pfn)) ||
+ WARN_ON(start_pfn > last_pfn))
+ return;
/* we don't need lock here; nobody else touches the iova range */
dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
@@ -1272,7 +1270,9 @@ static void __iommu_flush_context(struct intel_iommu *iommu,
| DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
break;
default:
- BUG();
+ pr_warn("%s: Unexpected context-cache invalidation type 0x%llx\n",
+ iommu->name, type);
+ return;
}
val |= DMA_CCMD_ICC;
@@ -1308,7 +1308,9 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
val_iva = size_order | addr;
break;
default:
- BUG();
+ pr_warn("%s: Unexpected iotlb invalidation type 0x%llx\n",
+ iommu->name, type);
+ return;
}
/* Note: set drain read/write */
#if 0
@@ -1406,20 +1408,6 @@ static void iommu_enable_pci_caps(struct device_domain_info *info)
return;
pdev = to_pci_dev(info->dev);
- /* For IOMMU that supports device IOTLB throttling (DIT), we assign
- * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
- * queue depth at PF level. If DIT is not set, PFSID will be treated as
- * reserved, which should be set to 0.
- */
- if (!ecap_dit(info->iommu->ecap))
- info->pfsid = 0;
- else {
- struct pci_dev *pf_pdev;
-
- /* pdev will be returned if device is not a vf */
- pf_pdev = pci_physfn(pdev);
- info->pfsid = pci_dev_id(pf_pdev);
- }
/* The PCIe spec, in its wisdom, declares that the behaviour of
the device if you enable PASID support after ATS support is
@@ -1429,16 +1417,10 @@ static void iommu_enable_pci_caps(struct device_domain_info *info)
if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
info->pasid_enabled = 1;
- if (info->pri_supported &&
- (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1) &&
- !pci_reset_pri(pdev) && !pci_enable_pri(pdev, PRQ_DEPTH))
- info->pri_enabled = 1;
-
if (info->ats_supported && pci_ats_page_aligned(pdev) &&
!pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
info->ats_enabled = 1;
domain_update_iotlb(info->domain);
- info->ats_qdep = pci_ats_queue_depth(pdev);
}
}
@@ -1457,11 +1439,6 @@ static void iommu_disable_pci_caps(struct device_domain_info *info)
domain_update_iotlb(info->domain);
}
- if (info->pri_enabled) {
- pci_disable_pri(pdev);
- info->pri_enabled = 0;
- }
-
if (info->pasid_enabled) {
pci_disable_pasid(pdev);
info->pasid_enabled = 0;
@@ -1508,7 +1485,8 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
u16 did = domain_id_iommu(domain, iommu);
- BUG_ON(pages == 0);
+ if (WARN_ON(!pages))
+ return;
if (ih)
ih = 1 << 6;
@@ -1722,9 +1700,6 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
if (ecap_prs(iommu->ecap))
intel_svm_finish_prq(iommu);
}
- if (vccap_pasid(iommu->vccap))
- ioasid_unregister_allocator(&iommu->pasid_allocator);
-
#endif
}
@@ -1895,7 +1870,7 @@ context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
*/
static inline void context_set_sm_dte(struct context_entry *context)
{
- context->lo |= (1 << 2);
+ context->lo |= BIT_ULL(2);
}
/*
@@ -1904,7 +1879,7 @@ static inline void context_set_sm_dte(struct context_entry *context)
*/
static inline void context_set_sm_pre(struct context_entry *context)
{
- context->lo |= (1 << 4);
+ context->lo |= BIT_ULL(4);
}
/* Convert value to context PASID directory size field coding. */
@@ -1930,8 +1905,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
pr_debug("Set context mapping for %02x:%02x.%d\n",
bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
- BUG_ON(!domain->pgd);
-
spin_lock(&iommu->lock);
ret = -ENOMEM;
context = iommu_context_addr(iommu, bus, devfn, 1);
@@ -2183,7 +2156,8 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
phys_addr_t pteval;
u64 attr;
- BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
+ if (unlikely(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1)))
+ return -EINVAL;
if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
return -EINVAL;
@@ -2341,8 +2315,6 @@ static int domain_setup_first_level(struct intel_iommu *iommu,
if (level != 4 && level != 5)
return -EINVAL;
- if (pasid != PASID_RID2PASID)
- flags |= PASID_FLAG_SUPERVISOR_MODE;
if (level == 5)
flags |= PASID_FLAG_FL5LP;
@@ -2797,85 +2769,6 @@ out_unmap:
return ret;
}
-#ifdef CONFIG_INTEL_IOMMU_SVM
-static ioasid_t intel_vcmd_ioasid_alloc(ioasid_t min, ioasid_t max, void *data)
-{
- struct intel_iommu *iommu = data;
- ioasid_t ioasid;
-
- if (!iommu)
- return INVALID_IOASID;
- /*
- * VT-d virtual command interface always uses the full 20 bit
- * PASID range. Host can partition guest PASID range based on
- * policies but it is out of guest's control.
- */
- if (min < PASID_MIN || max > intel_pasid_max_id)
- return INVALID_IOASID;
-
- if (vcmd_alloc_pasid(iommu, &ioasid))
- return INVALID_IOASID;
-
- return ioasid;
-}
-
-static void intel_vcmd_ioasid_free(ioasid_t ioasid, void *data)
-{
- struct intel_iommu *iommu = data;
-
- if (!iommu)
- return;
- /*
- * Sanity check the ioasid owner is done at upper layer, e.g. VFIO
- * We can only free the PASID when all the devices are unbound.
- */
- if (ioasid_find(NULL, ioasid, NULL)) {
- pr_alert("Cannot free active IOASID %d\n", ioasid);
- return;
- }
- vcmd_free_pasid(iommu, ioasid);
-}
-
-static void register_pasid_allocator(struct intel_iommu *iommu)
-{
- /*
- * If we are running in the host, no need for custom allocator
- * in that PASIDs are allocated from the host system-wide.
- */
- if (!cap_caching_mode(iommu->cap))
- return;
-
- if (!sm_supported(iommu)) {
- pr_warn("VT-d Scalable Mode not enabled, no PASID allocation\n");
- return;
- }
-
- /*
- * Register a custom PASID allocator if we are running in a guest,
- * guest PASID must be obtained via virtual command interface.
- * There can be multiple vIOMMUs in each guest but only one allocator
- * is active. All vIOMMU allocators will eventually be calling the same
- * host allocator.
- */
- if (!vccap_pasid(iommu->vccap))
- return;
-
- pr_info("Register custom PASID allocator\n");
- iommu->pasid_allocator.alloc = intel_vcmd_ioasid_alloc;
- iommu->pasid_allocator.free = intel_vcmd_ioasid_free;
- iommu->pasid_allocator.pdata = (void *)iommu;
- if (ioasid_register_allocator(&iommu->pasid_allocator)) {
- pr_warn("Custom PASID allocator failed, scalable mode disabled\n");
- /*
- * Disable scalable mode on this IOMMU if there
- * is no custom allocator. Mixing SM capable vIOMMU
- * and non-SM vIOMMU are not supported.
- */
- intel_iommu_sm = 0;
- }
-}
-#endif
-
static int __init init_dmars(void)
{
struct dmar_drhd_unit *drhd;
@@ -2964,9 +2857,6 @@ static int __init init_dmars(void)
*/
for_each_active_iommu(iommu, drhd) {
iommu_flush_write_buffer(iommu);
-#ifdef CONFIG_INTEL_IOMMU_SVM
- register_pasid_allocator(iommu);
-#endif
iommu_set_root_entry(iommu);
}
@@ -3760,8 +3650,8 @@ static ssize_t version_show(struct device *dev,
{
struct intel_iommu *iommu = dev_to_intel_iommu(dev);
u32 ver = readl(iommu->reg + DMAR_VER_REG);
- return sprintf(buf, "%d:%d\n",
- DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
+ return sysfs_emit(buf, "%d:%d\n",
+ DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
}
static DEVICE_ATTR_RO(version);
@@ -3769,7 +3659,7 @@ static ssize_t address_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct intel_iommu *iommu = dev_to_intel_iommu(dev);
- return sprintf(buf, "%llx\n", iommu->reg_phys);
+ return sysfs_emit(buf, "%llx\n", iommu->reg_phys);
}
static DEVICE_ATTR_RO(address);
@@ -3777,7 +3667,7 @@ static ssize_t cap_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct intel_iommu *iommu = dev_to_intel_iommu(dev);
- return sprintf(buf, "%llx\n", iommu->cap);
+ return sysfs_emit(buf, "%llx\n", iommu->cap);
}
static DEVICE_ATTR_RO(cap);
@@ -3785,7 +3675,7 @@ static ssize_t ecap_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct intel_iommu *iommu = dev_to_intel_iommu(dev);
- return sprintf(buf, "%llx\n", iommu->ecap);
+ return sysfs_emit(buf, "%llx\n", iommu->ecap);
}
static DEVICE_ATTR_RO(ecap);
@@ -3793,7 +3683,7 @@ static ssize_t domains_supported_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct intel_iommu *iommu = dev_to_intel_iommu(dev);
- return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
+ return sysfs_emit(buf, "%ld\n", cap_ndoms(iommu->cap));
}
static DEVICE_ATTR_RO(domains_supported);
@@ -3801,8 +3691,9 @@ static ssize_t domains_used_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct intel_iommu *iommu = dev_to_intel_iommu(dev);
- return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
- cap_ndoms(iommu->cap)));
+ return sysfs_emit(buf, "%d\n",
+ bitmap_weight(iommu->domain_ids,
+ cap_ndoms(iommu->cap)));
}
static DEVICE_ATTR_RO(domains_used);
@@ -4340,8 +4231,9 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
/* Cope with horrid API which requires us to unmap more than the
size argument if it happens to be a large-page mapping. */
- BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level,
- GFP_ATOMIC));
+ if (unlikely(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT,
+ &level, GFP_ATOMIC)))
+ return 0;
if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
size = VTD_PAGE_SIZE << level_to_offset_bits(level);
@@ -4521,6 +4413,17 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
dmar_ats_supported(pdev, iommu)) {
info->ats_supported = 1;
info->dtlb_extra_inval = dev_needs_extra_dtlb_flush(pdev);
+
+ /*
+ * For IOMMU that supports device IOTLB throttling
+ * (DIT), we assign PFSID to the invalidation desc
+ * of a VF such that IOMMU HW can gauge queue depth
+ * at PF level. If DIT is not set, PFSID will be
+ * treated as reserved, which should be set to 0.
+ */
+ if (ecap_dit(iommu->ecap))
+ info->pfsid = pci_dev_id(pci_physfn(pdev));
+ info->ats_qdep = pci_ats_queue_depth(pdev);
}
if (sm_supported(iommu)) {
if (pasid_supported(iommu)) {
@@ -4638,7 +4541,6 @@ static int intel_iommu_enable_sva(struct device *dev)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu;
- int ret;
if (!info || dmar_disabled)
return -EINVAL;
@@ -4650,45 +4552,102 @@ static int intel_iommu_enable_sva(struct device *dev)
if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE))
return -ENODEV;
- if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled)
+ if (!info->pasid_enabled || !info->ats_enabled)
return -EINVAL;
- ret = iopf_queue_add_device(iommu->iopf_queue, dev);
- if (ret)
- return ret;
+ /*
+ * Devices having device-specific I/O fault handling should not
+ * support PCI/PRI. The IOMMU side has no means to check the
+ * capability of device-specific IOPF. Therefore, IOMMU can only
+ * default that if the device driver enables SVA on a non-PRI
+ * device, it will handle IOPF in its own way.
+ */
+ if (!info->pri_supported)
+ return 0;
- ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
- if (ret)
- iopf_queue_remove_device(iommu->iopf_queue, dev);
+ /* Devices supporting PRI should have it enabled. */
+ if (!info->pri_enabled)
+ return -EINVAL;
- return ret;
+ return 0;
}
-static int intel_iommu_disable_sva(struct device *dev)
+static int intel_iommu_enable_iopf(struct device *dev)
{
+ struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL;
struct device_domain_info *info = dev_iommu_priv_get(dev);
- struct intel_iommu *iommu = info->iommu;
+ struct intel_iommu *iommu;
int ret;
- ret = iommu_unregister_device_fault_handler(dev);
+ if (!pdev || !info || !info->ats_enabled || !info->pri_supported)
+ return -ENODEV;
+
+ if (info->pri_enabled)
+ return -EBUSY;
+
+ iommu = info->iommu;
+ if (!iommu)
+ return -EINVAL;
+
+ /* PASID is required in PRG Response Message. */
+ if (info->pasid_enabled && !pci_prg_resp_pasid_required(pdev))
+ return -EINVAL;
+
+ ret = pci_reset_pri(pdev);
+ if (ret)
+ return ret;
+
+ ret = iopf_queue_add_device(iommu->iopf_queue, dev);
if (ret)
return ret;
- ret = iopf_queue_remove_device(iommu->iopf_queue, dev);
+ ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
+ if (ret)
+ goto iopf_remove_device;
+
+ ret = pci_enable_pri(pdev, PRQ_DEPTH);
if (ret)
- iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
+ goto iopf_unregister_handler;
+ info->pri_enabled = 1;
+
+ return 0;
+
+iopf_unregister_handler:
+ iommu_unregister_device_fault_handler(dev);
+iopf_remove_device:
+ iopf_queue_remove_device(iommu->iopf_queue, dev);
return ret;
}
-static int intel_iommu_enable_iopf(struct device *dev)
+static int intel_iommu_disable_iopf(struct device *dev)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
- if (info && info->pri_supported)
- return 0;
+ if (!info->pri_enabled)
+ return -EINVAL;
+
+ /*
+ * PCIe spec states that by clearing PRI enable bit, the Page
+ * Request Interface will not issue new page requests, but has
+ * outstanding page requests that have been transmitted or are
+ * queued for transmission. This is supposed to be called after
+ * the device driver has stopped DMA, all PASIDs have been
+ * unbound and the outstanding PRQs have been drained.
+ */
+ pci_disable_pri(to_pci_dev(dev));
+ info->pri_enabled = 0;
- return -ENODEV;
+ /*
+ * With PRI disabled and outstanding PRQs drained, unregistering
+ * fault handler and removing device from iopf queue should never
+ * fail.
+ */
+ WARN_ON(iommu_unregister_device_fault_handler(dev));
+ WARN_ON(iopf_queue_remove_device(iommu->iopf_queue, dev));
+
+ return 0;
}
static int
@@ -4711,10 +4670,10 @@ intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
{
switch (feat) {
case IOMMU_DEV_FEAT_IOPF:
- return 0;
+ return intel_iommu_disable_iopf(dev);
case IOMMU_DEV_FEAT_SVA:
- return intel_iommu_disable_sva(dev);
+ return 0;
default:
return -ENODEV;
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 694ab9b7d3e9..1c5e1d88862b 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -19,7 +19,6 @@
#include <linux/iommu.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/dmar.h>
-#include <linux/ioasid.h>
#include <linux/bitfield.h>
#include <linux/xarray.h>
#include <linux/perf_event.h>
@@ -198,7 +197,6 @@
#define ecap_flts(e) (((e) >> 47) & 0x1)
#define ecap_slts(e) (((e) >> 46) & 0x1)
#define ecap_slads(e) (((e) >> 45) & 0x1)
-#define ecap_vcs(e) (((e) >> 44) & 0x1)
#define ecap_smts(e) (((e) >> 43) & 0x1)
#define ecap_dit(e) (((e) >> 41) & 0x1)
#define ecap_pds(e) (((e) >> 42) & 0x1)
@@ -678,7 +676,6 @@ struct intel_iommu {
unsigned char prq_name[16]; /* Name for PRQ interrupt */
unsigned long prq_seq_number;
struct completion prq_complete;
- struct ioasid_allocator_ops pasid_allocator; /* Custom allocator for PASIDs */
#endif
struct iopf_queue *iopf_queue;
unsigned char iopfq_name[16];
@@ -798,18 +795,18 @@ static inline bool context_present(struct context_entry *context)
return (context->lo & 1);
}
-extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev);
+struct dmar_drhd_unit *dmar_find_matched_drhd_unit(struct pci_dev *dev);
-extern int dmar_enable_qi(struct intel_iommu *iommu);
-extern void dmar_disable_qi(struct intel_iommu *iommu);
-extern int dmar_reenable_qi(struct intel_iommu *iommu);
-extern void qi_global_iec(struct intel_iommu *iommu);
+int dmar_enable_qi(struct intel_iommu *iommu);
+void dmar_disable_qi(struct intel_iommu *iommu);
+int dmar_reenable_qi(struct intel_iommu *iommu);
+void qi_global_iec(struct intel_iommu *iommu);
-extern void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid,
- u8 fm, u64 type);
-extern void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
- unsigned int size_order, u64 type);
-extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
+void qi_flush_context(struct intel_iommu *iommu, u16 did,
+ u16 sid, u8 fm, u64 type);
+void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
+ unsigned int size_order, u64 type);
+void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
u16 qdep, u64 addr, unsigned mask);
void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
@@ -832,7 +829,7 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
*/
#define QI_OPT_WAIT_DRAIN BIT(0)
-extern int dmar_ir_support(void);
+int dmar_ir_support(void);
void *alloc_pgtable_page(int node, gfp_t gfp);
void free_pgtable_page(void *vaddr);
@@ -840,9 +837,9 @@ void iommu_flush_write_buffer(struct intel_iommu *iommu);
struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn);
#ifdef CONFIG_INTEL_IOMMU_SVM
-extern void intel_svm_check(struct intel_iommu *iommu);
-extern int intel_svm_enable_prq(struct intel_iommu *iommu);
-extern int intel_svm_finish_prq(struct intel_iommu *iommu);
+void intel_svm_check(struct intel_iommu *iommu);
+int intel_svm_enable_prq(struct intel_iommu *iommu);
+int intel_svm_finish_prq(struct intel_iommu *iommu);
int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt,
struct iommu_page_response *msg);
struct iommu_domain *intel_svm_domain_alloc(void);
@@ -889,8 +886,8 @@ extern const struct iommu_ops intel_iommu_ops;
#ifdef CONFIG_INTEL_IOMMU
extern int intel_iommu_sm;
-extern int iommu_calculate_agaw(struct intel_iommu *iommu);
-extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu);
+int iommu_calculate_agaw(struct intel_iommu *iommu);
+int iommu_calculate_max_sagaw(struct intel_iommu *iommu);
int ecmd_submit_sync(struct intel_iommu *iommu, u8 ecmd, u64 oa, u64 ob);
static inline bool ecmd_has_pmu_essential(struct intel_iommu *iommu)
diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
index df9e261af0b5..a1b987335b31 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -548,7 +548,7 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
goto out_free_table;
}
- bitmap = bitmap_zalloc(INTR_REMAP_TABLE_ENTRIES, GFP_ATOMIC);
+ bitmap = bitmap_zalloc(INTR_REMAP_TABLE_ENTRIES, GFP_KERNEL);
if (bitmap == NULL) {
pr_err("IR%d: failed to allocate bitmap\n", iommu->seq_id);
goto out_free_pages;
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index 633e0a4a01e7..c5d479770e12 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -336,15 +336,6 @@ static inline void pasid_set_fault_enable(struct pasid_entry *pe)
}
/*
- * Setup the SRE(Supervisor Request Enable) field (Bit 128) of a
- * scalable mode PASID entry.
- */
-static inline void pasid_set_sre(struct pasid_entry *pe)
-{
- pasid_set_bits(&pe->val[2], 1 << 0, 1);
-}
-
-/*
* Setup the WPE(Write Protect Enable) field (Bit 132) of a
* scalable mode PASID entry.
*/
@@ -521,23 +512,6 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu,
return -EINVAL;
}
- if (flags & PASID_FLAG_SUPERVISOR_MODE) {
-#ifdef CONFIG_X86
- unsigned long cr0 = read_cr0();
-
- /* CR0.WP is normally set but just to be sure */
- if (unlikely(!(cr0 & X86_CR0_WP))) {
- pr_err("No CPU write protect!\n");
- return -EINVAL;
- }
-#endif
- if (!ecap_srs(iommu->ecap)) {
- pr_err("No supervisor request support on %s\n",
- iommu->name);
- return -EINVAL;
- }
- }
-
if ((flags & PASID_FLAG_FL5LP) && !cap_fl5lp_support(iommu->cap)) {
pr_err("No 5-level paging support for first-level on %s\n",
iommu->name);
@@ -560,10 +534,6 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu,
/* Setup the first level page table pointer: */
pasid_set_flptr(pte, (u64)__pa(pgd));
- if (flags & PASID_FLAG_SUPERVISOR_MODE) {
- pasid_set_sre(pte);
- pasid_set_wpe(pte);
- }
if (flags & PASID_FLAG_FL5LP)
pasid_set_flpm(pte, 1);
@@ -658,12 +628,6 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
pasid_set_fault_enable(pte);
pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
- /*
- * Since it is a second level only translation setup, we should
- * set SRE bit as well (addresses are expected to be GPAs).
- */
- if (pasid != PASID_RID2PASID && ecap_srs(iommu->ecap))
- pasid_set_sre(pte);
pasid_set_present(pte);
spin_unlock(&iommu->lock);
@@ -700,13 +664,6 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
pasid_set_translation_type(pte, PASID_ENTRY_PGTT_PT);
pasid_set_fault_enable(pte);
pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
-
- /*
- * We should set SRE bit as well since the addresses are expected
- * to be GPAs.
- */
- if (ecap_srs(iommu->ecap))
- pasid_set_sre(pte);
pasid_set_present(pte);
spin_unlock(&iommu->lock);
diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h
index 20c54e50f533..d6b7d21244b1 100644
--- a/drivers/iommu/intel/pasid.h
+++ b/drivers/iommu/intel/pasid.h
@@ -41,13 +41,6 @@
#define FLPT_DEFAULT_DID 1
#define NUM_RESERVED_DID 2
-/*
- * The SUPERVISOR_MODE flag indicates a first level translation which
- * can be used for access to kernel addresses. It is valid only for
- * access to the kernel's static 1:1 mapping of physical memory — not
- * to vmalloc or even module mappings.
- */
-#define PASID_FLAG_SUPERVISOR_MODE BIT(0)
#define PASID_FLAG_NESTED BIT(1)
#define PASID_FLAG_PAGE_SNOOP BIT(2)
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 7367f56c3bad..e95b339e9cdc 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -16,7 +16,6 @@
#include <linux/interrupt.h>
#include <linux/mm_types.h>
#include <linux/xarray.h>
-#include <linux/ioasid.h>
#include <asm/page.h>
#include <asm/fpu/api.h>
@@ -273,7 +272,7 @@ static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
if (WARN_ON(!mutex_is_locked(&pasid_mutex)))
return -EINVAL;
- if (pasid == INVALID_IOASID || pasid >= PASID_MAX)
+ if (pasid == IOMMU_PASID_INVALID || pasid >= PASID_MAX)
return -EINVAL;
svm = pasid_private_find(pasid);