From def054b01a867822254e1dda13d587f5c7a99e2a Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 27 Feb 2024 10:14:41 +0800 Subject: iommu/vt-d: Use device rbtree in iopf reporting path The existing I/O page fault handler currently locates the PCI device by calling pci_get_domain_bus_and_slot(). This function searches the list of all PCI devices until the desired device is found. To improve lookup efficiency, replace it with device_rbtree_find() to search the device within the probed device rbtree. The I/O page fault is initiated by the device, which does not have any synchronization mechanism with the software to ensure that the device stays in the probed device tree. Theoretically, a device could be released by the IOMMU subsystem after device_rbtree_find() and before iopf_get_dev_fault_param(), which would cause a use-after-free problem. Add a mutex to synchronize the I/O page fault reporting path and the IOMMU release device path. This lock doesn't introduce any performance overhead, as the conflict between I/O page fault reporting and device releasing is very rare. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240220065939.121116-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/dmar.c | 1 + drivers/iommu/intel/iommu.c | 3 +++ drivers/iommu/intel/iommu.h | 2 ++ drivers/iommu/intel/svm.c | 17 +++++++++-------- 4 files changed, 15 insertions(+), 8 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index f9b63c2875f7..d14797aabb7a 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1097,6 +1097,7 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) iommu->segment = drhd->segment; iommu->device_rbtree = RB_ROOT; spin_lock_init(&iommu->device_rbtree_lock); + mutex_init(&iommu->iopf_lock); iommu->node = NUMA_NO_NODE; ver = readl(iommu->reg + DMAR_VER_REG); diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 025d7385cf58..60aa2dce32ef 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4362,8 +4362,11 @@ free: static void intel_iommu_release_device(struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + mutex_lock(&iommu->iopf_lock); device_rbtree_remove(info); + mutex_unlock(&iommu->iopf_lock); dmar_remove_one_dev_info(dev); intel_pasid_free_table(dev); intel_iommu_debugfs_remove_dev(info); diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 50d1e196db52..a0feab099f12 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -713,6 +713,8 @@ struct intel_iommu { #endif struct iopf_queue *iopf_queue; unsigned char iopfq_name[16]; + /* Synchronization between fault report and iommu device release. */ + struct mutex iopf_lock; struct q_inval *qi; /* Queued invalidation info */ u32 iommu_state[MAX_SR_DMAR_REGS]; /* Store iommu states between suspend and resume.*/ diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 1dd56d4eb88c..bdf3584ca0af 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -643,7 +643,7 @@ static irqreturn_t prq_event_thread(int irq, void *d) struct intel_iommu *iommu = d; struct page_req_dsc *req; int head, tail, handled; - struct pci_dev *pdev; + struct device *dev; u64 address; /* @@ -689,23 +689,24 @@ bad_req: if (unlikely(req->lpig && !req->rd_req && !req->wr_req)) goto prq_advance; - pdev = pci_get_domain_bus_and_slot(iommu->segment, - PCI_BUS_NUM(req->rid), - req->rid & 0xff); /* * If prq is to be handled outside iommu driver via receiver of * the fault notifiers, we skip the page response here. */ - if (!pdev) + mutex_lock(&iommu->iopf_lock); + dev = device_rbtree_find(iommu, req->rid); + if (!dev) { + mutex_unlock(&iommu->iopf_lock); goto bad_req; + } - if (intel_svm_prq_report(iommu, &pdev->dev, req)) + if (intel_svm_prq_report(iommu, dev, req)) handle_bad_prq_event(iommu, req, QI_RESP_INVALID); else - trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1, + trace_prq_report(iommu, dev, req->qw_0, req->qw_1, req->priv_data[0], req->priv_data[1], iommu->prq_seq_number++); - pci_dev_put(pdev); + mutex_unlock(&iommu->iopf_lock); prq_advance: head = (head + sizeof(*req)) & PRQ_RING_MASK; } -- cgit v1.2.3