From 6eb4da8cf54537992fc9843be8b2af4f83f717e0 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 5 Jun 2023 21:59:39 -0300 Subject: iommu: Have __iommu_probe_device() check for already probed devices This is a step toward making __iommu_probe_device() self contained. It should, under proper locking, check if the device is already associated with an iommu driver and resolve parallel probes. All but one of the callers open code this test using two different means, but they all rely on dev->iommu_group. Currently the bus_iommu_probe()/probe_iommu_group() and probe_acpi_namespace_devices() rejects already probed devices with an unlocked read of dev->iommu_group. The OF and ACPI "replay" functions use device_iommu_mapped() which is the same read without the pointless refcount. Move this test into __iommu_probe_device() and put it under the iommu_probe_device_lock. The store to dev->iommu_group is in iommu_group_add_device() which is also called under this lock for iommu driver devices, making it properly locked. The only path that didn't have this check is the hotplug path triggered by BUS_NOTIFY_ADD_DEVICE. The only way to get dev->iommu_group assigned outside the probe path is via iommu_group_add_device(). Today the only caller is VFIO no-iommu which never associates with an iommu driver. Thus adding this additional check is safe. Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Acked-by: Rafael J. Wysocki Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/1-v3-328044aa278c+45e49-iommu_probe_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'drivers/iommu/intel/iommu.c') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 5c8c5cdc36cf..d5ca2387e65c 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3756,7 +3756,6 @@ static int __init probe_acpi_namespace_devices(void) for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) { struct acpi_device_physical_node *pn; - struct iommu_group *group; struct acpi_device *adev; if (dev->bus != &acpi_bus_type) @@ -3766,12 +3765,6 @@ static int __init probe_acpi_namespace_devices(void) mutex_lock(&adev->physical_node_lock); list_for_each_entry(pn, &adev->physical_node_list, node) { - group = iommu_group_get(pn->dev); - if (group) { - iommu_group_put(group); - continue; - } - ret = iommu_probe_device(pn->dev); if (ret) break; -- cgit v1.2.3 From 4298780126c298f20ae4bc8676591eaf8c48767e Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Wed, 9 Aug 2023 20:47:54 +0800 Subject: iommu: Generalize PASID 0 for normal DMA w/o PASID PCIe Process address space ID (PASID) is used to tag DMA traffic, it provides finer grained isolation than requester ID (RID). For each device/RID, 0 is a special PASID for the normal DMA (no PASID). This is universal across all architectures that supports PASID, therefore warranted to be reserved globally and declared in the common header. Consequently, we can avoid the conflict between different PASID use cases in the generic code. e.g. SVA and DMA API with PASIDs. This paved away for device drivers to choose global PASID policy while continue doing normal DMA. Noting that VT-d could support none-zero RID/NO_PASID, but currently not used. Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Jean-Philippe Brucker Reviewed-by: Jason Gunthorpe Signed-off-by: Jacob Pan Link: https://lore.kernel.org/r/20230802212427.1497170-2-jacob.jun.pan@linux.intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 2 +- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 16 ++++++++-------- drivers/iommu/intel/iommu.c | 24 ++++++++++++------------ drivers/iommu/intel/pasid.c | 2 +- drivers/iommu/intel/pasid.h | 2 -- include/linux/iommu.h | 1 + 6 files changed, 23 insertions(+), 24 deletions(-) (limited to 'drivers/iommu/intel/iommu.c') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index a5a63b1c947e..5e6b39881c04 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -80,7 +80,7 @@ arm_smmu_share_asid(struct mm_struct *mm, u16 asid) * be some overlap between use of both ASIDs, until we invalidate the * TLB. */ - arm_smmu_write_ctx_desc(smmu_domain, 0, cd); + arm_smmu_write_ctx_desc(smmu_domain, IOMMU_NO_PASID, cd); /* Invalidate TLB entries previously associated with that context */ arm_smmu_tlb_inv_asid(smmu, asid); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 9b0dc3505601..ee70687f060b 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1059,7 +1059,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, /* * This function handles the following cases: * - * (1) Install primary CD, for normal DMA traffic (SSID = 0). + * (1) Install primary CD, for normal DMA traffic (SSID = IOMMU_NO_PASID = 0). * (2) Install a secondary CD, for SID+SSID traffic. * (3) Update ASID of a CD. Atomically write the first 64 bits of the * CD, then invalidate the old entry and mappings. @@ -1607,7 +1607,7 @@ static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt) sid = FIELD_GET(PRIQ_0_SID, evt[0]); ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]); - ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0; + ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : IOMMU_NO_PASID; last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]); grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]); @@ -1748,7 +1748,7 @@ arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size, */ *cmd = (struct arm_smmu_cmdq_ent) { .opcode = CMDQ_OP_ATC_INV, - .substream_valid = !!ssid, + .substream_valid = (ssid != IOMMU_NO_PASID), .atc.ssid = ssid, }; @@ -1795,7 +1795,7 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master) struct arm_smmu_cmdq_ent cmd; struct arm_smmu_cmdq_batch cmds; - arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd); + arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd); cmds.num = 0; for (i = 0; i < master->num_streams; i++) { @@ -1875,7 +1875,7 @@ static void arm_smmu_tlb_inv_context(void *cookie) cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); } - arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0); + arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0); } static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd, @@ -1968,7 +1968,7 @@ static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size, * Unfortunately, this can't be leaf-only since we may have * zapped an entire table. */ - arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size); + arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, iova, size); } void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, @@ -2142,7 +2142,7 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, * the master has been added to the devices list for this domain. * This isn't an issue because the STE hasn't been installed yet. */ - ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd); + ret = arm_smmu_write_ctx_desc(smmu_domain, IOMMU_NO_PASID, &cfg->cd); if (ret) goto out_free_cd_tables; @@ -2328,7 +2328,7 @@ static void arm_smmu_enable_ats(struct arm_smmu_master *master) pdev = to_pci_dev(master->dev); atomic_inc(&smmu_domain->nr_ats_masters); - arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0); + arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0); if (pci_enable_ats(pdev, stu)) dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu); } diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 5c8c5cdc36cf..ddff43def3ab 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -877,7 +877,7 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id, } /* For request-without-pasid, get the pasid from context entry */ if (intel_iommu_sm && pasid == IOMMU_PASID_INVALID) - pasid = PASID_RID2PASID; + pasid = IOMMU_NO_PASID; dir_index = pasid >> PASID_PDE_SHIFT; pde = &dir[dir_index]; @@ -1449,7 +1449,7 @@ static void __iommu_flush_dev_iotlb(struct device_domain_info *info, qdep = info->ats_qdep; qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, qdep, addr, mask); - quirk_extra_dev_tlb_flush(info, addr, mask, PASID_RID2PASID, qdep); + quirk_extra_dev_tlb_flush(info, addr, mask, IOMMU_NO_PASID, qdep); } static void iommu_flush_dev_iotlb(struct dmar_domain *domain, @@ -1484,7 +1484,7 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, ih = 1 << 6; if (domain->use_first_level) { - qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, pages, ih); + qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr, pages, ih); } else { unsigned long bitmask = aligned_pages - 1; @@ -1554,7 +1554,7 @@ static void intel_flush_iotlb_all(struct iommu_domain *domain) u16 did = domain_id_iommu(dmar_domain, iommu); if (dmar_domain->use_first_level) - qi_flush_piotlb(iommu, did, PASID_RID2PASID, 0, -1, 0); + qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, 0, -1, 0); else iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); @@ -1940,7 +1940,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, context_pdts(pds); /* Setup the RID_PASID field: */ - context_set_sm_rid2pasid(context, PASID_RID2PASID); + context_set_sm_rid2pasid(context, IOMMU_NO_PASID); /* * Setup the Device-TLB enable bit and Page request @@ -2420,13 +2420,13 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, /* Setup the PASID entry for requests without PASID: */ if (hw_pass_through && domain_type_is_si(domain)) ret = intel_pasid_setup_pass_through(iommu, domain, - dev, PASID_RID2PASID); + dev, IOMMU_NO_PASID); else if (domain->use_first_level) ret = domain_setup_first_level(iommu, domain, dev, - PASID_RID2PASID); + IOMMU_NO_PASID); else ret = intel_pasid_setup_second_level(iommu, domain, - dev, PASID_RID2PASID); + dev, IOMMU_NO_PASID); if (ret) { dev_err(dev, "Setup RID2PASID failed\n"); device_block_translation(dev); @@ -3968,7 +3968,7 @@ static void dmar_remove_one_dev_info(struct device *dev) if (!dev_is_real_dma_subdevice(info->dev)) { if (dev_is_pci(info->dev) && sm_supported(iommu)) intel_pasid_tear_down_entry(iommu, info->dev, - PASID_RID2PASID, false); + IOMMU_NO_PASID, false); iommu_disable_pci_caps(info); domain_context_clear(info); @@ -3997,7 +3997,7 @@ static void device_block_translation(struct device *dev) if (!dev_is_real_dma_subdevice(dev)) { if (sm_supported(iommu)) intel_pasid_tear_down_entry(iommu, dev, - PASID_RID2PASID, false); + IOMMU_NO_PASID, false); else domain_context_clear(info); } @@ -4331,7 +4331,7 @@ static void domain_set_force_snooping(struct dmar_domain *domain) list_for_each_entry(info, &domain->devices, link) intel_pasid_setup_page_snoop_control(info->iommu, info->dev, - PASID_RID2PASID); + IOMMU_NO_PASID); } static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain) @@ -4987,7 +4987,7 @@ void quirk_extra_dev_tlb_flush(struct device_domain_info *info, return; sid = PCI_DEVID(info->bus, info->devfn); - if (pasid == PASID_RID2PASID) { + if (pasid == IOMMU_NO_PASID) { qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, qdep, address, mask); } else { diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index c5d479770e12..23dca3bc319d 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -438,7 +438,7 @@ devtlb_invalidation_with_pasid(struct intel_iommu *iommu, * SVA usage, device could do DMA with multiple PASIDs. It is more * efficient to flush devTLB specific to the PASID. */ - if (pasid == PASID_RID2PASID) + if (pasid == IOMMU_NO_PASID) qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT); else qi_flush_dev_iotlb_pasid(iommu, sid, pfsid, pasid, qdep, 0, 64 - VTD_PAGE_SHIFT); diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index d6b7d21244b1..4e9e68c3c388 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -10,8 +10,6 @@ #ifndef __INTEL_PASID_H #define __INTEL_PASID_H -#define PASID_RID2PASID 0x0 -#define PASID_MIN 0x1 #define PASID_MAX 0x100000 #define PASID_PTE_MASK 0x3F #define PASID_PTE_PRESENT 1 diff --git a/include/linux/iommu.h b/include/linux/iommu.h index d31642596675..2870bc29d456 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -196,6 +196,7 @@ enum iommu_dev_features { IOMMU_DEV_FEAT_IOPF, }; +#define IOMMU_NO_PASID (0U) /* Reserved for DMA w/o PASID */ #define IOMMU_PASID_INVALID (-1U) typedef unsigned int ioasid_t; -- cgit v1.2.3 From ac1a3483febd0ec8fced1ccb15da349dcc360fbb Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 9 Aug 2023 20:47:56 +0800 Subject: iommu/vt-d: Add domain_flush_pasid_iotlb() The VT-d spec requires to use PASID-based-IOTLB invalidation descriptor to invalidate IOTLB and the paging-structure caches for a first-stage page table. Add a generic helper to do this. RID2PASID is used if the domain has been attached to a physical device, otherwise real PASIDs that the domain has been attached to will be used. The 'real' PASID attachment is handled in the subsequent change. Signed-off-by: Lu Baolu Signed-off-by: Jacob Pan Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20230802212427.1497170-4-jacob.jun.pan@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'drivers/iommu/intel/iommu.c') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index ddff43def3ab..ecb7e44377a8 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1467,6 +1467,18 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain, spin_unlock_irqrestore(&domain->lock, flags); } +static void domain_flush_pasid_iotlb(struct intel_iommu *iommu, + struct dmar_domain *domain, u64 addr, + unsigned long npages, bool ih) +{ + u16 did = domain_id_iommu(domain, iommu); + unsigned long flags; + + spin_lock_irqsave(&domain->lock, flags); + qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr, npages, ih); + spin_unlock_irqrestore(&domain->lock, flags); +} + static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, struct dmar_domain *domain, unsigned long pfn, unsigned int pages, @@ -1484,7 +1496,7 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, ih = 1 << 6; if (domain->use_first_level) { - qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr, pages, ih); + domain_flush_pasid_iotlb(iommu, domain, addr, pages, ih); } else { unsigned long bitmask = aligned_pages - 1; @@ -1554,7 +1566,7 @@ static void intel_flush_iotlb_all(struct iommu_domain *domain) u16 did = domain_id_iommu(dmar_domain, iommu); if (dmar_domain->use_first_level) - qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, 0, -1, 0); + domain_flush_pasid_iotlb(iommu, dmar_domain, 0, -1, 0); else iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); -- cgit v1.2.3 From 154786235d0156ddce9575f13e23f3169e688435 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 9 Aug 2023 20:47:58 +0800 Subject: iommu/vt-d: Make prq draining code generic Currently draining page requests and responses for a pasid is part of SVA implementation. This is because the driver only supports attaching an SVA domain to a device pasid. As we are about to support attaching other types of domains to a device pasid, the prq draining code becomes generic. Signed-off-by: Lu Baolu Signed-off-by: Jacob Pan Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20230802212427.1497170-6-jacob.jun.pan@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 30 +++++++++++++++++++----------- drivers/iommu/intel/iommu.h | 2 ++ drivers/iommu/intel/svm.c | 17 ++--------------- 3 files changed, 23 insertions(+), 26 deletions(-) (limited to 'drivers/iommu/intel/iommu.c') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index ecb7e44377a8..574021c36db5 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4727,21 +4727,29 @@ static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid) struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); struct iommu_domain *domain; - /* Domain type specific cleanup: */ domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0); - if (domain) { - switch (domain->type) { - case IOMMU_DOMAIN_SVA: - intel_svm_remove_dev_pasid(dev, pasid); - break; - default: - /* should never reach here */ - WARN_ON(1); - break; - } + if (WARN_ON_ONCE(!domain)) + goto out_tear_down; + + /* + * The SVA implementation needs to handle its own stuffs like the mm + * notification. Before consolidating that code into iommu core, let + * the intel sva code handle it. + */ + if (domain->type == IOMMU_DOMAIN_SVA) { + intel_svm_remove_dev_pasid(dev, pasid); + goto out_tear_down; } + /* + * Should never reach here until we add support for attaching + * non-SVA domain to a pasid. + */ + WARN_ON(1); + +out_tear_down: intel_pasid_tear_down_entry(iommu, dev, pasid, false); + intel_drain_pasid_prq(dev, pasid); } const struct iommu_ops intel_iommu_ops = { diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 1c5e1d88862b..6d94a29f5d52 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -844,6 +844,7 @@ int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt, struct iommu_page_response *msg); struct iommu_domain *intel_svm_domain_alloc(void); void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid); +void intel_drain_pasid_prq(struct device *dev, u32 pasid); struct intel_svm_dev { struct list_head list; @@ -862,6 +863,7 @@ struct intel_svm { }; #else static inline void intel_svm_check(struct intel_iommu *iommu) {} +static inline void intel_drain_pasid_prq(struct device *dev, u32 pasid) {} static inline struct iommu_domain *intel_svm_domain_alloc(void) { return NULL; diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 2a82864e9d57..9fbae9af6615 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -26,8 +26,6 @@ #include "trace.h" static irqreturn_t prq_event_thread(int irq, void *d); -static void intel_svm_drain_prq(struct device *dev, u32 pasid); -#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva) static DEFINE_XARRAY_ALLOC(pasid_private_array); static int pasid_private_add(ioasid_t pasid, void *priv) @@ -382,17 +380,6 @@ void intel_svm_remove_dev_pasid(struct device *dev, u32 pasid) if (sdev) { list_del_rcu(&sdev->list); - /* - * Flush the PASID cache and IOTLB for this device. - * Note that we do depend on the hardware *not* using - * the PASID any more. Just as we depend on other - * devices never using PASIDs that they have no right - * to use. We have a *shared* PASID table, because it's - * large and has to be physically contiguous. So it's - * hard to be as defensive as we might like. - */ - intel_pasid_tear_down_entry(iommu, dev, svm->pasid, false); - intel_svm_drain_prq(dev, svm->pasid); kfree_rcu(sdev, rcu); if (list_empty(&svm->devs)) { @@ -449,7 +436,7 @@ static bool is_canonical_address(u64 addr) } /** - * intel_svm_drain_prq - Drain page requests and responses for a pasid + * intel_drain_pasid_prq - Drain page requests and responses for a pasid * @dev: target device * @pasid: pasid for draining * @@ -463,7 +450,7 @@ static bool is_canonical_address(u64 addr) * described in VT-d spec CH7.10 to drain all page requests and page * responses pending in the hardware. */ -static void intel_svm_drain_prq(struct device *dev, u32 pasid) +void intel_drain_pasid_prq(struct device *dev, u32 pasid) { struct device_domain_info *info; struct dmar_domain *domain; -- cgit v1.2.3 From 37f900e7180ac67c73dcfae308625c87194025c0 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 9 Aug 2023 20:47:59 +0800 Subject: iommu/vt-d: Prepare for set_dev_pasid callback The domain_flush_pasid_iotlb() helper function is used to flush the IOTLB entries for a given PASID. Previously, this function assumed that RID2PASID was only used for the first-level DMA translation. However, with the introduction of the set_dev_pasid callback, this assumption is no longer valid. Add a check before using the RID2PASID for PASID invalidation. This check ensures that the domain has been attached to a physical device before using RID2PASID. Signed-off-by: Lu Baolu Signed-off-by: Jacob Pan Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20230802212427.1497170-7-jacob.jun.pan@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/iommu/intel/iommu.c') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 574021c36db5..19585800dcc9 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1475,7 +1475,8 @@ static void domain_flush_pasid_iotlb(struct intel_iommu *iommu, unsigned long flags; spin_lock_irqsave(&domain->lock, flags); - qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr, npages, ih); + if (!list_empty(&domain->devices)) + qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr, npages, ih); spin_unlock_irqrestore(&domain->lock, flags); } -- cgit v1.2.3 From 7d0c9da6c1509664d96488042bacc02308ca33b2 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 9 Aug 2023 20:48:00 +0800 Subject: iommu/vt-d: Add set_dev_pasid callback for dma domain This allows the upper layers to set a domain to a PASID of a device if the PASID feature is supported by the IOMMU hardware. The typical use cases are, for example, kernel DMA with PASID and hardware assisted mediated device drivers. The attaching device and pasid information is tracked in a per-domain list and is used for IOTLB and devTLB invalidation. Signed-off-by: Lu Baolu Signed-off-by: Jacob Pan Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20230802212427.1497170-8-jacob.jun.pan@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 104 +++++++++++++++++++++++++++++++++++++++++--- drivers/iommu/intel/iommu.h | 7 +++ 2 files changed, 106 insertions(+), 5 deletions(-) (limited to 'drivers/iommu/intel/iommu.c') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 19585800dcc9..4b5ed58ee422 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1359,6 +1359,7 @@ domain_lookup_dev_info(struct dmar_domain *domain, static void domain_update_iotlb(struct dmar_domain *domain) { + struct dev_pasid_info *dev_pasid; struct device_domain_info *info; bool has_iotlb_device = false; unsigned long flags; @@ -1370,6 +1371,14 @@ static void domain_update_iotlb(struct dmar_domain *domain) break; } } + + list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) { + info = dev_iommu_priv_get(dev_pasid->dev); + if (info->ats_enabled) { + has_iotlb_device = true; + break; + } + } domain->has_iotlb_device = has_iotlb_device; spin_unlock_irqrestore(&domain->lock, flags); } @@ -1455,6 +1464,7 @@ static void __iommu_flush_dev_iotlb(struct device_domain_info *info, static void iommu_flush_dev_iotlb(struct dmar_domain *domain, u64 addr, unsigned mask) { + struct dev_pasid_info *dev_pasid; struct device_domain_info *info; unsigned long flags; @@ -1464,6 +1474,19 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain, spin_lock_irqsave(&domain->lock, flags); list_for_each_entry(info, &domain->devices, link) __iommu_flush_dev_iotlb(info, addr, mask); + + list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) { + info = dev_iommu_priv_get(dev_pasid->dev); + + if (!info->ats_enabled) + continue; + + qi_flush_dev_iotlb_pasid(info->iommu, + PCI_DEVID(info->bus, info->devfn), + info->pfsid, dev_pasid->pasid, + info->ats_qdep, addr, + mask); + } spin_unlock_irqrestore(&domain->lock, flags); } @@ -1472,9 +1495,13 @@ static void domain_flush_pasid_iotlb(struct intel_iommu *iommu, unsigned long npages, bool ih) { u16 did = domain_id_iommu(domain, iommu); + struct dev_pasid_info *dev_pasid; unsigned long flags; spin_lock_irqsave(&domain->lock, flags); + list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) + qi_flush_piotlb(iommu, did, dev_pasid->pasid, addr, npages, ih); + if (!list_empty(&domain->devices)) qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr, npages, ih); spin_unlock_irqrestore(&domain->lock, flags); @@ -1739,6 +1766,7 @@ static struct dmar_domain *alloc_domain(unsigned int type) domain->use_first_level = true; domain->has_iotlb_device = false; INIT_LIST_HEAD(&domain->devices); + INIT_LIST_HEAD(&domain->dev_pasids); spin_lock_init(&domain->lock); xa_init(&domain->iommu_array); @@ -4726,7 +4754,10 @@ static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain, static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid) { struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); + struct dev_pasid_info *curr, *dev_pasid = NULL; + struct dmar_domain *dmar_domain; struct iommu_domain *domain; + unsigned long flags; domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0); if (WARN_ON_ONCE(!domain)) @@ -4742,17 +4773,79 @@ static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid) goto out_tear_down; } - /* - * Should never reach here until we add support for attaching - * non-SVA domain to a pasid. - */ - WARN_ON(1); + dmar_domain = to_dmar_domain(domain); + spin_lock_irqsave(&dmar_domain->lock, flags); + list_for_each_entry(curr, &dmar_domain->dev_pasids, link_domain) { + if (curr->dev == dev && curr->pasid == pasid) { + list_del(&curr->link_domain); + dev_pasid = curr; + break; + } + } + WARN_ON_ONCE(!dev_pasid); + spin_unlock_irqrestore(&dmar_domain->lock, flags); + domain_detach_iommu(dmar_domain, iommu); + kfree(dev_pasid); out_tear_down: intel_pasid_tear_down_entry(iommu, dev, pasid, false); intel_drain_pasid_prq(dev, pasid); } +static int intel_iommu_set_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + struct intel_iommu *iommu = info->iommu; + struct dev_pasid_info *dev_pasid; + unsigned long flags; + int ret; + + if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev)) + return -EOPNOTSUPP; + + if (context_copied(iommu, info->bus, info->devfn)) + return -EBUSY; + + ret = prepare_domain_attach_device(domain, dev); + if (ret) + return ret; + + dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL); + if (!dev_pasid) + return -ENOMEM; + + ret = domain_attach_iommu(dmar_domain, iommu); + if (ret) + goto out_free; + + if (domain_type_is_si(dmar_domain)) + ret = intel_pasid_setup_pass_through(iommu, dmar_domain, + dev, pasid); + else if (dmar_domain->use_first_level) + ret = domain_setup_first_level(iommu, dmar_domain, + dev, pasid); + else + ret = intel_pasid_setup_second_level(iommu, dmar_domain, + dev, pasid); + if (ret) + goto out_detach_iommu; + + dev_pasid->dev = dev; + dev_pasid->pasid = pasid; + spin_lock_irqsave(&dmar_domain->lock, flags); + list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids); + spin_unlock_irqrestore(&dmar_domain->lock, flags); + + return 0; +out_detach_iommu: + domain_detach_iommu(dmar_domain, iommu); +out_free: + kfree(dev_pasid); + return ret; +} + const struct iommu_ops intel_iommu_ops = { .capable = intel_iommu_capable, .domain_alloc = intel_iommu_domain_alloc, @@ -4772,6 +4865,7 @@ const struct iommu_ops intel_iommu_ops = { #endif .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = intel_iommu_attach_device, + .set_dev_pasid = intel_iommu_set_dev_pasid, .map_pages = intel_iommu_map_pages, .unmap_pages = intel_iommu_unmap_pages, .iotlb_sync_map = intel_iommu_iotlb_sync_map, diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 6d94a29f5d52..c18fb699c87a 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -595,6 +595,7 @@ struct dmar_domain { spinlock_t lock; /* Protect device tracking lists */ struct list_head devices; /* all devices' list */ + struct list_head dev_pasids; /* all attached pasids */ struct dma_pte *pgd; /* virtual address */ int gaw; /* max guest address width */ @@ -717,6 +718,12 @@ struct device_domain_info { struct pasid_table *pasid_table; /* pasid table */ }; +struct dev_pasid_info { + struct list_head link_domain; /* link to domain siblings */ + struct device *dev; + ioasid_t pasid; +}; + static inline void __iommu_flush_cache( struct intel_iommu *iommu, void *addr, int size) { -- cgit v1.2.3 From d3aedf94f480971f7ffe88d337ba72e6f7d32497 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 9 Aug 2023 20:48:03 +0800 Subject: iommu/vt-d: Remove rmrr check in domain attaching device path The core code now prevents devices with RMRR regions from being assigned to user space. There is no need to check for this condition in individual drivers. Remove it to avoid duplicate code. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20230724060352.113458-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 58 --------------------------------------------- 1 file changed, 58 deletions(-) (limited to 'drivers/iommu/intel/iommu.c') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 4b5ed58ee422..9369a8b02bd6 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2487,30 +2487,6 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, return 0; } -static bool device_has_rmrr(struct device *dev) -{ - struct dmar_rmrr_unit *rmrr; - struct device *tmp; - int i; - - rcu_read_lock(); - for_each_rmrr_units(rmrr) { - /* - * Return TRUE if this RMRR contains the device that - * is passed in. - */ - for_each_active_dev_scope(rmrr->devices, - rmrr->devices_cnt, i, tmp) - if (tmp == dev || - is_downstream_to_pci_bridge(dev, tmp)) { - rcu_read_unlock(); - return true; - } - } - rcu_read_unlock(); - return false; -} - /** * device_rmrr_is_relaxable - Test whether the RMRR of this device * is relaxable (ie. is allowed to be not enforced under some conditions) @@ -2540,34 +2516,6 @@ static bool device_rmrr_is_relaxable(struct device *dev) return false; } -/* - * There are a couple cases where we need to restrict the functionality of - * devices associated with RMRRs. The first is when evaluating a device for - * identity mapping because problems exist when devices are moved in and out - * of domains and their respective RMRR information is lost. This means that - * a device with associated RMRRs will never be in a "passthrough" domain. - * The second is use of the device through the IOMMU API. This interface - * expects to have full control of the IOVA space for the device. We cannot - * satisfy both the requirement that RMRR access is maintained and have an - * unencumbered IOVA space. We also have no ability to quiesce the device's - * use of the RMRR space or even inform the IOMMU API user of the restriction. - * We therefore prevent devices associated with an RMRR from participating in - * the IOMMU API, which eliminates them from device assignment. - * - * In both cases, devices which have relaxable RMRRs are not concerned by this - * restriction. See device_rmrr_is_relaxable comment. - */ -static bool device_is_rmrr_locked(struct device *dev) -{ - if (!device_has_rmrr(dev)) - return false; - - if (device_rmrr_is_relaxable(dev)) - return false; - - return true; -} - /* * Return the required default domain type for a specific device. * @@ -4180,12 +4128,6 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, struct device_domain_info *info = dev_iommu_priv_get(dev); int ret; - if (domain->type == IOMMU_DOMAIN_UNMANAGED && - device_is_rmrr_locked(dev)) { - dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n"); - return -EPERM; - } - if (info->domain) device_block_translation(dev); -- cgit v1.2.3 From fb5f50a43d9fd44fd6bc4f4dbcf9d3ec5b556558 Mon Sep 17 00:00:00 2001 From: Yanfei Xu Date: Wed, 9 Aug 2023 20:48:05 +0800 Subject: iommu/vt-d: Fix to convert mm pfn to dma pfn For the case that VT-d page is smaller than mm page, converting dma pfn should be handled in two cases which are for start pfn and for end pfn. Currently the calculation of end dma pfn is incorrect and the result is less than real page frame number which is causing the mapping of iova always misses some page frames. Rename the mm_to_dma_pfn() to mm_to_dma_pfn_start() and add a new helper for converting end dma pfn named mm_to_dma_pfn_end(). Signed-off-by: Yanfei Xu Link: https://lore.kernel.org/r/20230625082046.979742-1-yanfei.xu@intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'drivers/iommu/intel/iommu.c') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 9369a8b02bd6..dbf9270e565e 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -113,13 +113,17 @@ static inline unsigned long lvl_to_nr_pages(unsigned int lvl) /* VT-d pages must always be _smaller_ than MM pages. Otherwise things are never going to work. */ -static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn) +static inline unsigned long mm_to_dma_pfn_start(unsigned long mm_pfn) { return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT); } +static inline unsigned long mm_to_dma_pfn_end(unsigned long mm_pfn) +{ + return ((mm_pfn + 1) << (PAGE_SHIFT - VTD_PAGE_SHIFT)) - 1; +} static inline unsigned long page_to_dma_pfn(struct page *pg) { - return mm_to_dma_pfn(page_to_pfn(pg)); + return mm_to_dma_pfn_start(page_to_pfn(pg)); } static inline unsigned long virt_to_dma_pfn(void *p) { @@ -2403,8 +2407,8 @@ static int __init si_domain_init(int hw) for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { ret = iommu_domain_identity_map(si_domain, - mm_to_dma_pfn(start_pfn), - mm_to_dma_pfn(end_pfn)); + mm_to_dma_pfn_start(start_pfn), + mm_to_dma_pfn_end(end_pfn)); if (ret) return ret; } @@ -2425,8 +2429,8 @@ static int __init si_domain_init(int hw) continue; ret = iommu_domain_identity_map(si_domain, - mm_to_dma_pfn(start >> PAGE_SHIFT), - mm_to_dma_pfn(end >> PAGE_SHIFT)); + mm_to_dma_pfn_start(start >> PAGE_SHIFT), + mm_to_dma_pfn_end(end >> PAGE_SHIFT)); if (ret) return ret; } @@ -3549,8 +3553,8 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb, unsigned long val, void *v) { struct memory_notify *mhp = v; - unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn); - unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn + + unsigned long start_vpfn = mm_to_dma_pfn_start(mhp->start_pfn); + unsigned long last_vpfn = mm_to_dma_pfn_end(mhp->start_pfn + mhp->nr_pages - 1); switch (val) { @@ -4254,7 +4258,7 @@ static void intel_iommu_tlb_sync(struct iommu_domain *domain, unsigned long i; nrpages = aligned_nrpages(gather->start, size); - start_pfn = mm_to_dma_pfn(iova_pfn); + start_pfn = mm_to_dma_pfn_start(iova_pfn); xa_for_each(&dmar_domain->iommu_array, i, info) iommu_flush_iotlb_psi(info->iommu, dmar_domain, -- cgit v1.2.3