From d2f85a263883b679f87ed8f911746105658e9c47 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Thu, 28 Mar 2024 05:29:58 -0700 Subject: iommu: Pass domain to remove_dev_pasid() op Existing remove_dev_pasid() callbacks of the underlying iommu drivers get the attached domain from the group->pasid_array. However, the domain stored in group->pasid_array is not always correct in all scenarios. A wrong domain may result in failure in remove_dev_pasid() callback. To avoid such problems, it is more reliable to pass the domain to the remove_dev_pasid() op. Suggested-by: Jason Gunthorpe Signed-off-by: Yi Liu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240328122958.83332-3-yi.l.liu@intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'drivers/iommu/intel') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 50eb9aed47cc..45c75a8a0ef5 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4587,19 +4587,15 @@ static int intel_iommu_iotlb_sync_map(struct iommu_domain *domain, return 0; } -static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid) +static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid, + struct iommu_domain *domain) { struct device_domain_info *info = dev_iommu_priv_get(dev); + struct dmar_domain *dmar_domain = to_dmar_domain(domain); struct dev_pasid_info *curr, *dev_pasid = NULL; struct intel_iommu *iommu = info->iommu; - struct dmar_domain *dmar_domain; - struct iommu_domain *domain; unsigned long flags; - domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0); - if (WARN_ON_ONCE(!domain)) - goto out_tear_down; - /* * The SVA implementation needs to handle its own stuffs like the mm * notification. Before consolidating that code into iommu core, let @@ -4610,7 +4606,6 @@ static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid) goto out_tear_down; } - dmar_domain = to_dmar_domain(domain); spin_lock_irqsave(&dmar_domain->lock, flags); list_for_each_entry(curr, &dmar_domain->dev_pasids, link_domain) { if (curr->dev == dev && curr->pasid == pasid) { -- cgit v1.2.3 From 06c375053cefc3a2f383d200596abe5ab3fb35f9 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Sat, 13 Apr 2024 00:25:12 +0000 Subject: iommu/vt-d: add wrapper functions for page allocations In order to improve observability and accountability of IOMMU layer, we must account the number of pages that are allocated by functions that are calling directly into buddy allocator. This is achieved by first wrapping the allocation related functions into a separate inline functions in new file: drivers/iommu/iommu-pages.h Convert all page allocation calls under iommu/intel to use these new functions. Signed-off-by: Pasha Tatashin Acked-by: David Rientjes Tested-by: Bagas Sanjaya Link: https://lore.kernel.org/r/20240413002522.1101315-2-pasha.tatashin@soleen.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/dmar.c | 16 ++-- drivers/iommu/intel/iommu.c | 47 ++++------- drivers/iommu/intel/iommu.h | 2 - drivers/iommu/intel/irq_remapping.c | 16 ++-- drivers/iommu/intel/pasid.c | 18 ++--- drivers/iommu/intel/svm.c | 11 ++- drivers/iommu/iommu-pages.h | 154 ++++++++++++++++++++++++++++++++++++ 7 files changed, 201 insertions(+), 63 deletions(-) create mode 100644 drivers/iommu/iommu-pages.h (limited to 'drivers/iommu/intel') diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 36d7427b1202..87ad996e5257 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -32,6 +32,7 @@ #include "iommu.h" #include "../irq_remapping.h" +#include "../iommu-pages.h" #include "perf.h" #include "trace.h" #include "perfmon.h" @@ -1187,7 +1188,7 @@ static void free_iommu(struct intel_iommu *iommu) } if (iommu->qi) { - free_page((unsigned long)iommu->qi->desc); + iommu_free_page(iommu->qi->desc); kfree(iommu->qi->desc_status); kfree(iommu->qi); } @@ -1755,7 +1756,8 @@ static void __dmar_enable_qi(struct intel_iommu *iommu) int dmar_enable_qi(struct intel_iommu *iommu) { struct q_inval *qi; - struct page *desc_page; + void *desc; + int order; if (!ecap_qis(iommu->ecap)) return -ENOENT; @@ -1776,19 +1778,19 @@ int dmar_enable_qi(struct intel_iommu *iommu) * Need two pages to accommodate 256 descriptors of 256 bits each * if the remapping hardware supports scalable mode translation. */ - desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, - !!ecap_smts(iommu->ecap)); - if (!desc_page) { + order = ecap_smts(iommu->ecap) ? 1 : 0; + desc = iommu_alloc_pages_node(iommu->node, GFP_ATOMIC, order); + if (!desc) { kfree(qi); iommu->qi = NULL; return -ENOMEM; } - qi->desc = page_address(desc_page); + qi->desc = desc; qi->desc_status = kcalloc(QI_LENGTH, sizeof(int), GFP_ATOMIC); if (!qi->desc_status) { - free_page((unsigned long) qi->desc); + iommu_free_page(qi->desc); kfree(qi); iommu->qi = NULL; return -ENOMEM; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index a7ecd90303dc..daaa7a22595e 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -27,6 +27,7 @@ #include "iommu.h" #include "../dma-iommu.h" #include "../irq_remapping.h" +#include "../iommu-pages.h" #include "pasid.h" #include "cap_audit.h" #include "perfmon.h" @@ -298,22 +299,6 @@ static int __init intel_iommu_setup(char *str) } __setup("intel_iommu=", intel_iommu_setup); -void *alloc_pgtable_page(int node, gfp_t gfp) -{ - struct page *page; - void *vaddr = NULL; - - page = alloc_pages_node(node, gfp | __GFP_ZERO, 0); - if (page) - vaddr = page_address(page); - return vaddr; -} - -void free_pgtable_page(void *vaddr) -{ - free_page((unsigned long)vaddr); -} - static int domain_type_is_si(struct dmar_domain *domain) { return domain->domain.type == IOMMU_DOMAIN_IDENTITY; @@ -545,7 +530,7 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, if (!alloc) return NULL; - context = alloc_pgtable_page(iommu->node, GFP_ATOMIC); + context = iommu_alloc_page_node(iommu->node, GFP_ATOMIC); if (!context) return NULL; @@ -719,17 +704,17 @@ static void free_context_table(struct intel_iommu *iommu) for (i = 0; i < ROOT_ENTRY_NR; i++) { context = iommu_context_addr(iommu, i, 0, 0); if (context) - free_pgtable_page(context); + iommu_free_page(context); if (!sm_supported(iommu)) continue; context = iommu_context_addr(iommu, i, 0x80, 0); if (context) - free_pgtable_page(context); + iommu_free_page(context); } - free_pgtable_page(iommu->root_entry); + iommu_free_page(iommu->root_entry); iommu->root_entry = NULL; } @@ -867,7 +852,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, if (!dma_pte_present(pte)) { uint64_t pteval; - tmp_page = alloc_pgtable_page(domain->nid, gfp); + tmp_page = iommu_alloc_page_node(domain->nid, gfp); if (!tmp_page) return NULL; @@ -879,7 +864,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, if (cmpxchg64(&pte->val, 0ULL, pteval)) /* Someone else set it while we were thinking; use theirs. */ - free_pgtable_page(tmp_page); + iommu_free_page(tmp_page); else domain_flush_cache(domain, pte, sizeof(*pte)); } @@ -992,7 +977,7 @@ static void dma_pte_free_level(struct dmar_domain *domain, int level, last_pfn < level_pfn + level_size(level) - 1)) { dma_clear_pte(pte); domain_flush_cache(domain, pte, sizeof(*pte)); - free_pgtable_page(level_pte); + iommu_free_page(level_pte); } next: pfn += level_size(level); @@ -1016,7 +1001,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, /* free pgd */ if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { - free_pgtable_page(domain->pgd); + iommu_free_page(domain->pgd); domain->pgd = NULL; } } @@ -1118,7 +1103,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu) { struct root_entry *root; - root = alloc_pgtable_page(iommu->node, GFP_ATOMIC); + root = iommu_alloc_page_node(iommu->node, GFP_ATOMIC); if (!root) { pr_err("Allocating root entry for %s failed\n", iommu->name); @@ -1841,7 +1826,7 @@ static void domain_exit(struct dmar_domain *domain) LIST_HEAD(freelist); domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw), &freelist); - put_pages_list(&freelist); + iommu_put_pages_list(&freelist); } if (WARN_ON(!list_empty(&domain->devices))) @@ -2497,7 +2482,7 @@ static int copy_context_table(struct intel_iommu *iommu, if (!old_ce) goto out; - new_ce = alloc_pgtable_page(iommu->node, GFP_KERNEL); + new_ce = iommu_alloc_page_node(iommu->node, GFP_KERNEL); if (!new_ce) goto out_unmap; @@ -3426,7 +3411,7 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb, start_vpfn, mhp->nr_pages, list_empty(&freelist), 0); rcu_read_unlock(); - put_pages_list(&freelist); + iommu_put_pages_list(&freelist); } break; } @@ -3833,7 +3818,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) domain->max_addr = 0; /* always allocate the top pgd */ - domain->pgd = alloc_pgtable_page(domain->nid, GFP_ATOMIC); + domain->pgd = iommu_alloc_page_node(domain->nid, GFP_ATOMIC); if (!domain->pgd) return -ENOMEM; domain_flush_cache(domain, domain->pgd, PAGE_SIZE); @@ -3987,7 +3972,7 @@ int prepare_domain_attach_device(struct iommu_domain *domain, pte = dmar_domain->pgd; if (dma_pte_present(pte)) { dmar_domain->pgd = phys_to_virt(dma_pte_addr(pte)); - free_pgtable_page(pte); + iommu_free_page(pte); } dmar_domain->agaw--; } @@ -4141,7 +4126,7 @@ static void intel_iommu_tlb_sync(struct iommu_domain *domain, if (dmar_domain->nested_parent) parent_domain_flush(dmar_domain, start_pfn, nrpages, list_empty(&gather->freelist)); - put_pages_list(&gather->freelist); + iommu_put_pages_list(&gather->freelist); } static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 404d2476a877..8d081d8c6f41 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -1085,8 +1085,6 @@ void domain_update_iommu_cap(struct dmar_domain *domain); int dmar_ir_support(void); -void *alloc_pgtable_page(int node, gfp_t gfp); -void free_pgtable_page(void *vaddr); void iommu_flush_write_buffer(struct intel_iommu *iommu); struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent, const struct iommu_user_data *user_data); diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index 566297bc87dd..39cd9626eb8d 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -22,6 +22,7 @@ #include "iommu.h" #include "../irq_remapping.h" +#include "../iommu-pages.h" #include "cap_audit.h" enum irq_mode { @@ -527,7 +528,7 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) struct ir_table *ir_table; struct fwnode_handle *fn; unsigned long *bitmap; - struct page *pages; + void *ir_table_base; if (iommu->ir_table) return 0; @@ -536,9 +537,9 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) if (!ir_table) return -ENOMEM; - pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO, - INTR_REMAP_PAGE_ORDER); - if (!pages) { + ir_table_base = iommu_alloc_pages_node(iommu->node, GFP_KERNEL, + INTR_REMAP_PAGE_ORDER); + if (!ir_table_base) { pr_err("IR%d: failed to allocate pages of order %d\n", iommu->seq_id, INTR_REMAP_PAGE_ORDER); goto out_free_table; @@ -573,7 +574,7 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) else iommu->ir_domain->msi_parent_ops = &dmar_msi_parent_ops; - ir_table->base = page_address(pages); + ir_table->base = ir_table_base; ir_table->bitmap = bitmap; iommu->ir_table = ir_table; @@ -622,7 +623,7 @@ out_free_fwnode: out_free_bitmap: bitmap_free(bitmap); out_free_pages: - __free_pages(pages, INTR_REMAP_PAGE_ORDER); + iommu_free_pages(ir_table_base, INTR_REMAP_PAGE_ORDER); out_free_table: kfree(ir_table); @@ -643,8 +644,7 @@ static void intel_teardown_irq_remapping(struct intel_iommu *iommu) irq_domain_free_fwnode(fn); iommu->ir_domain = NULL; } - free_pages((unsigned long)iommu->ir_table->base, - INTR_REMAP_PAGE_ORDER); + iommu_free_pages(iommu->ir_table->base, INTR_REMAP_PAGE_ORDER); bitmap_free(iommu->ir_table->bitmap); kfree(iommu->ir_table); iommu->ir_table = NULL; diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 11f0b856d74c..abce19e2ad6f 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -20,6 +20,7 @@ #include "iommu.h" #include "pasid.h" +#include "../iommu-pages.h" /* * Intel IOMMU system wide PASID name space: @@ -38,7 +39,7 @@ int intel_pasid_alloc_table(struct device *dev) { struct device_domain_info *info; struct pasid_table *pasid_table; - struct page *pages; + struct pasid_dir_entry *dir; u32 max_pasid = 0; int order, size; @@ -59,14 +60,13 @@ int intel_pasid_alloc_table(struct device *dev) size = max_pasid >> (PASID_PDE_SHIFT - 3); order = size ? get_order(size) : 0; - pages = alloc_pages_node(info->iommu->node, - GFP_KERNEL | __GFP_ZERO, order); - if (!pages) { + dir = iommu_alloc_pages_node(info->iommu->node, GFP_KERNEL, order); + if (!dir) { kfree(pasid_table); return -ENOMEM; } - pasid_table->table = page_address(pages); + pasid_table->table = dir; pasid_table->order = order; pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3); info->pasid_table = pasid_table; @@ -97,10 +97,10 @@ void intel_pasid_free_table(struct device *dev) max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT; for (i = 0; i < max_pde; i++) { table = get_pasid_table_from_pde(&dir[i]); - free_pgtable_page(table); + iommu_free_page(table); } - free_pages((unsigned long)pasid_table->table, pasid_table->order); + iommu_free_pages(pasid_table->table, pasid_table->order); kfree(pasid_table); } @@ -146,7 +146,7 @@ static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid) retry: entries = get_pasid_table_from_pde(&dir[dir_index]); if (!entries) { - entries = alloc_pgtable_page(info->iommu->node, GFP_ATOMIC); + entries = iommu_alloc_page_node(info->iommu->node, GFP_ATOMIC); if (!entries) return NULL; @@ -158,7 +158,7 @@ retry: */ if (cmpxchg64(&dir[dir_index].val, 0ULL, (u64)virt_to_phys(entries) | PASID_PTE_PRESENT)) { - free_pgtable_page(entries); + iommu_free_page(entries); goto retry; } if (!ecap_coherent(info->iommu->ecap)) { diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index ee3b469e2da1..71c1b2a0ca16 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -22,6 +22,7 @@ #include "iommu.h" #include "pasid.h" #include "perf.h" +#include "../iommu-pages.h" #include "trace.h" static irqreturn_t prq_event_thread(int irq, void *d); @@ -63,16 +64,14 @@ svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev) int intel_svm_enable_prq(struct intel_iommu *iommu) { struct iopf_queue *iopfq; - struct page *pages; int irq, ret; - pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO, PRQ_ORDER); - if (!pages) { + iommu->prq = iommu_alloc_pages_node(iommu->node, GFP_KERNEL, PRQ_ORDER); + if (!iommu->prq) { pr_warn("IOMMU: %s: Failed to allocate page request queue\n", iommu->name); return -ENOMEM; } - iommu->prq = page_address(pages); irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PRQ + iommu->seq_id, iommu->node, iommu); if (irq <= 0) { @@ -117,7 +116,7 @@ free_hwirq: dmar_free_hwirq(irq); iommu->pr_irq = 0; free_prq: - free_pages((unsigned long)iommu->prq, PRQ_ORDER); + iommu_free_pages(iommu->prq, PRQ_ORDER); iommu->prq = NULL; return ret; @@ -140,7 +139,7 @@ int intel_svm_finish_prq(struct intel_iommu *iommu) iommu->iopf_queue = NULL; } - free_pages((unsigned long)iommu->prq, PRQ_ORDER); + iommu_free_pages(iommu->prq, PRQ_ORDER); iommu->prq = NULL; return 0; diff --git a/drivers/iommu/iommu-pages.h b/drivers/iommu/iommu-pages.h new file mode 100644 index 000000000000..5a222d0ad25c --- /dev/null +++ b/drivers/iommu/iommu-pages.h @@ -0,0 +1,154 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, Google LLC. + * Pasha Tatashin + */ + +#ifndef __IOMMU_PAGES_H +#define __IOMMU_PAGES_H + +#include +#include +#include + +/* + * All page allocations that should be reported to as "iommu-pagetables" to + * userspace must use one of the functions below. This includes allocations of + * page-tables and other per-iommu_domain configuration structures. + * + * This is necessary for the proper accounting as IOMMU state can be rather + * large, i.e. multiple gigabytes in size. + */ + +/** + * __iommu_alloc_pages - allocate a zeroed page of a given order. + * @gfp: buddy allocator flags + * @order: page order + * + * returns the head struct page of the allocated page. + */ +static inline struct page *__iommu_alloc_pages(gfp_t gfp, int order) +{ + struct page *page; + + page = alloc_pages(gfp | __GFP_ZERO, order); + if (unlikely(!page)) + return NULL; + + return page; +} + +/** + * __iommu_free_pages - free page of a given order + * @page: head struct page of the page + * @order: page order + */ +static inline void __iommu_free_pages(struct page *page, int order) +{ + if (!page) + return; + + __free_pages(page, order); +} + +/** + * iommu_alloc_pages_node - allocate a zeroed page of a given order from + * specific NUMA node. + * @nid: memory NUMA node id + * @gfp: buddy allocator flags + * @order: page order + * + * returns the virtual address of the allocated page + */ +static inline void *iommu_alloc_pages_node(int nid, gfp_t gfp, int order) +{ + struct page *page = alloc_pages_node(nid, gfp | __GFP_ZERO, order); + + if (unlikely(!page)) + return NULL; + + return page_address(page); +} + +/** + * iommu_alloc_pages - allocate a zeroed page of a given order + * @gfp: buddy allocator flags + * @order: page order + * + * returns the virtual address of the allocated page + */ +static inline void *iommu_alloc_pages(gfp_t gfp, int order) +{ + struct page *page = __iommu_alloc_pages(gfp, order); + + if (unlikely(!page)) + return NULL; + + return page_address(page); +} + +/** + * iommu_alloc_page_node - allocate a zeroed page at specific NUMA node. + * @nid: memory NUMA node id + * @gfp: buddy allocator flags + * + * returns the virtual address of the allocated page + */ +static inline void *iommu_alloc_page_node(int nid, gfp_t gfp) +{ + return iommu_alloc_pages_node(nid, gfp, 0); +} + +/** + * iommu_alloc_page - allocate a zeroed page + * @gfp: buddy allocator flags + * + * returns the virtual address of the allocated page + */ +static inline void *iommu_alloc_page(gfp_t gfp) +{ + return iommu_alloc_pages(gfp, 0); +} + +/** + * iommu_free_pages - free page of a given order + * @virt: virtual address of the page to be freed. + * @order: page order + */ +static inline void iommu_free_pages(void *virt, int order) +{ + if (!virt) + return; + + __iommu_free_pages(virt_to_page(virt), order); +} + +/** + * iommu_free_page - free page + * @virt: virtual address of the page to be freed. + */ +static inline void iommu_free_page(void *virt) +{ + iommu_free_pages(virt, 0); +} + +/** + * iommu_put_pages_list - free a list of pages. + * @page: the head of the lru list to be freed. + * + * There are no locking requirement for these pages, as they are going to be + * put on a free list as soon as refcount reaches 0. Pages are put on this LRU + * list once they are removed from the IOMMU page tables. However, they can + * still be access through debugfs. + */ +static inline void iommu_put_pages_list(struct list_head *page) +{ + while (!list_empty(page)) { + struct page *p = list_entry(page->prev, struct page, lru); + + list_del(&p->lru); + put_page(p); + } +} + +#endif /* __IOMMU_PAGES_H */ -- cgit v1.2.3 From a770ccd91d99f997e023b554d9e2033ce79e019e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 24 Apr 2024 15:16:27 +0800 Subject: iommu/vt-d: Remove redundant assignment to variable err Variable err is being assigned a value that is never read. It is either being re-assigned later on error exit paths, or never referenced on the non-error path. Cleans up clang scan build warning: drivers/iommu/intel/dmar.c:1070:2: warning: Value stored to 'err' is never read [deadcode.DeadStores]` Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20240411090535.306326-1-colin.i.king@gmail.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/dmar.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/iommu/intel') diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 36d7427b1202..351be9455214 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1067,7 +1067,6 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) goto error_free_seq_id; } - err = -EINVAL; if (!cap_sagaw(iommu->cap) && (!ecap_smts(iommu->ecap) || ecap_slts(iommu->ecap))) { pr_info("%s: No supported address widths. Not attempting DMA translation.\n", -- cgit v1.2.3 From 9e7ee0f045395dc8aa55fbdc164c062484f4c88d Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 24 Apr 2024 15:16:28 +0800 Subject: iommu/vt-d: Use try_cmpxchg64{,_local}() in iommu.c Replace this pattern in iommu.c: cmpxchg64{,_local}(*ptr, 0, new) != 0 ... with the simpler and faster: !try_cmpxchg64{,_local}(*ptr, &tmp, new) The x86 CMPXCHG instruction returns success in the ZF flag, so this change saves a compare after the CMPXCHG. No functional change intended. Signed-off-by: Uros Bizjak Cc: David Woodhouse Cc: Lu Baolu Cc: Joerg Roedel Cc: Will Deacon Cc: Robin Murphy Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240414162454.49584-1-ubizjak@gmail.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/iommu/intel') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index a7ecd90303dc..4a2afe89b464 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -865,7 +865,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, break; if (!dma_pte_present(pte)) { - uint64_t pteval; + uint64_t pteval, tmp; tmp_page = alloc_pgtable_page(domain->nid, gfp); @@ -877,7 +877,8 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, if (domain->use_first_level) pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS; - if (cmpxchg64(&pte->val, 0ULL, pteval)) + tmp = 0ULL; + if (!try_cmpxchg64(&pte->val, &tmp, pteval)) /* Someone else set it while we were thinking; use theirs. */ free_pgtable_page(tmp_page); else @@ -2128,8 +2129,8 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, /* We don't need lock here, nobody else * touches the iova range */ - tmp = cmpxchg64_local(&pte->val, 0ULL, pteval); - if (tmp) { + tmp = 0ULL; + if (!try_cmpxchg64_local(&pte->val, &tmp, pteval)) { static int dumps = 5; pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n", iov_pfn, tmp, (unsigned long long)pteval); -- cgit v1.2.3 From d74169ceb0d2e32438946a2f1f9fc8c803304bd6 Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Wed, 24 Apr 2024 15:16:29 +0800 Subject: iommu/vt-d: Allocate DMAR fault interrupts locally The Intel IOMMU code currently tries to allocate all DMAR fault interrupt vectors on the boot cpu. On large systems with high DMAR counts this results in vector exhaustion, and most of the vectors are not initially allocated socket local. Instead, have a cpu on each node do the vector allocation for the DMARs on that node. The boot cpu still does the allocation for its node during its boot sequence. Signed-off-by: Dimitri Sivanich Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/Zfydpp2Hm+as16TY@hpe.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 2 +- drivers/iommu/amd/init.c | 2 +- drivers/iommu/intel/dmar.c | 9 +++++++-- drivers/iommu/irq_remapping.c | 5 ++++- drivers/iommu/irq_remapping.h | 2 +- include/linux/dmar.h | 2 +- 6 files changed, 15 insertions(+), 7 deletions(-) (limited to 'drivers/iommu/intel') diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index f482aab420f7..410c360e7e24 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -33,7 +33,7 @@ int amd_iommu_prepare(void); int amd_iommu_enable(void); void amd_iommu_disable(void); int amd_iommu_reenable(int mode); -int amd_iommu_enable_faulting(void); +int amd_iommu_enable_faulting(unsigned int cpu); extern int amd_iommu_guest_ir; extern enum io_pgtable_fmt amd_iommu_pgtable; extern int amd_iommu_gpt_level; diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index ac6754a85f35..8085e13e0100 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3392,7 +3392,7 @@ int amd_iommu_reenable(int mode) return 0; } -int __init amd_iommu_enable_faulting(void) +int __init amd_iommu_enable_faulting(unsigned int cpu) { /* We enable MSI later when PCI is initialized */ return 0; diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 351be9455214..932e0c10c0fe 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -2121,7 +2121,7 @@ int dmar_set_interrupt(struct intel_iommu *iommu) return ret; } -int __init enable_drhd_fault_handling(void) +int enable_drhd_fault_handling(unsigned int cpu) { struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; @@ -2131,7 +2131,12 @@ int __init enable_drhd_fault_handling(void) */ for_each_iommu(iommu, drhd) { u32 fault_status; - int ret = dmar_set_interrupt(iommu); + int ret; + + if (iommu->irq || iommu->node != cpu_to_node(cpu)) + continue; + + ret = dmar_set_interrupt(iommu); if (ret) { pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n", diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index ee59647c2050..2f7281ccc05f 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -151,7 +151,10 @@ int __init irq_remap_enable_fault_handling(void) if (!remap_ops->enable_faulting) return -ENODEV; - return remap_ops->enable_faulting(); + cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "dmar:enable_fault_handling", + remap_ops->enable_faulting, NULL); + + return remap_ops->enable_faulting(smp_processor_id()); } void panic_if_irq_remap(const char *msg) diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h index 8c89cb947cdb..0d6f140b5e01 100644 --- a/drivers/iommu/irq_remapping.h +++ b/drivers/iommu/irq_remapping.h @@ -41,7 +41,7 @@ struct irq_remap_ops { int (*reenable)(int); /* Enable fault handling */ - int (*enable_faulting)(void); + int (*enable_faulting)(unsigned int); }; extern struct irq_remap_ops intel_irq_remap_ops; diff --git a/include/linux/dmar.h b/include/linux/dmar.h index e34b601b71fd..499bb2c63483 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -117,7 +117,7 @@ extern int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, int count); /* Intel IOMMU detection */ void detect_intel_iommu(void); -extern int enable_drhd_fault_handling(void); +extern int enable_drhd_fault_handling(unsigned int cpu); extern int dmar_device_add(acpi_handle handle); extern int dmar_device_remove(acpi_handle handle); -- cgit v1.2.3 From cc9e49d35b4de47d6b656ac144cb22b11dc65c2e Mon Sep 17 00:00:00 2001 From: Jingqi Liu Date: Wed, 24 Apr 2024 15:16:30 +0800 Subject: iommu/vt-d: Remove debugfs use of private data field Since the page fault report and response have been tracked by ftrace, the users can easily calculate the time used for a page fault handling. There's no need to expose the similar functionality in debugfs. Hence, remove the corresponding operations in debugfs. Signed-off-by: Jingqi Liu Link: https://lore.kernel.org/r/20240308103811.76744-2-Jingqi.liu@intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/debugfs.c | 7 ------- drivers/iommu/intel/perf.h | 1 - drivers/iommu/intel/svm.c | 9 --------- 3 files changed, 17 deletions(-) (limited to 'drivers/iommu/intel') diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c index 86b506af7daa..affbf4a1558d 100644 --- a/drivers/iommu/intel/debugfs.c +++ b/drivers/iommu/intel/debugfs.c @@ -706,7 +706,6 @@ static ssize_t dmar_perf_latency_write(struct file *filp, dmar_latency_disable(iommu, DMAR_LATENCY_INV_IOTLB); dmar_latency_disable(iommu, DMAR_LATENCY_INV_DEVTLB); dmar_latency_disable(iommu, DMAR_LATENCY_INV_IEC); - dmar_latency_disable(iommu, DMAR_LATENCY_PRQ); } rcu_read_unlock(); break; @@ -728,12 +727,6 @@ static ssize_t dmar_perf_latency_write(struct file *filp, dmar_latency_enable(iommu, DMAR_LATENCY_INV_IEC); rcu_read_unlock(); break; - case 4: - rcu_read_lock(); - for_each_active_iommu(iommu, drhd) - dmar_latency_enable(iommu, DMAR_LATENCY_PRQ); - rcu_read_unlock(); - break; default: return -EINVAL; } diff --git a/drivers/iommu/intel/perf.h b/drivers/iommu/intel/perf.h index fd6db8049d1a..df9a36942d64 100644 --- a/drivers/iommu/intel/perf.h +++ b/drivers/iommu/intel/perf.h @@ -11,7 +11,6 @@ enum latency_type { DMAR_LATENCY_INV_IOTLB = 0, DMAR_LATENCY_INV_DEVTLB, DMAR_LATENCY_INV_IEC, - DMAR_LATENCY_PRQ, DMAR_LATENCY_NUM }; diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index ee3b469e2da1..e014350db354 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -583,12 +583,6 @@ static void intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev, event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA; event.fault.prm.private_data[0] = desc->priv_data[0]; event.fault.prm.private_data[1] = desc->priv_data[1]; - } else if (dmar_latency_enabled(iommu, DMAR_LATENCY_PRQ)) { - /* - * If the private data fields are not used by hardware, use it - * to monitor the prq handle latency. - */ - event.fault.prm.private_data[0] = ktime_to_ns(ktime_get()); } iommu_report_device_fault(dev, &event); @@ -768,9 +762,6 @@ void intel_svm_page_response(struct device *dev, struct iopf_fault *evt, if (private_present) { desc.qw2 = prm->private_data[0]; desc.qw3 = prm->private_data[1]; - } else if (prm->private_data[0]) { - dmar_latency_update(iommu, DMAR_LATENCY_PRQ, - ktime_to_ns(ktime_get()) - prm->private_data[0]); } qi_submit_sync(iommu, &desc, 1, 0); -- cgit v1.2.3 From 621b7e54f288c5e4e32d1dd81a926b8ecb547c60 Mon Sep 17 00:00:00 2001 From: Jingqi Liu Date: Wed, 24 Apr 2024 15:16:31 +0800 Subject: iommu/vt-d: Remove private data use in fault message According to Intel VT-d specification revision 4.0, "Private Data" field has been removed from Page Request/Response. Since the private data field is not used in fault message, remove the related definitions in page request descriptor and remove the related code in page request/response handler, as Intel hasn't shipped any products which support private data in the page request message. Signed-off-by: Jingqi Liu Link: https://lore.kernel.org/r/20240308103811.76744-3-Jingqi.liu@intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.h | 1 - drivers/iommu/intel/svm.c | 75 +++++++++------------------------------------ include/linux/iommu.h | 3 +- 3 files changed, 16 insertions(+), 63 deletions(-) (limited to 'drivers/iommu/intel') diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 404d2476a877..9ee326f7bf62 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -455,7 +455,6 @@ enum { /* Page group response descriptor QW0 */ #define QI_PGRP_PASID_P(p) (((u64)(p)) << 4) -#define QI_PGRP_PDP(p) (((u64)(p)) << 5) #define QI_PGRP_RESP_CODE(res) (((u64)(res)) << 12) #define QI_PGRP_DID(rid) (((u64)(rid)) << 16) #define QI_PGRP_PASID(pasid) (((u64)(pasid)) << 32) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index e014350db354..e05c6c4cb8c3 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -418,8 +418,7 @@ struct page_req_dsc { struct { u64 type:8; u64 pasid_present:1; - u64 priv_data_present:1; - u64 rsvd:6; + u64 rsvd:7; u64 rid:16; u64 pasid:20; u64 exe_req:1; @@ -438,7 +437,8 @@ struct page_req_dsc { }; u64 qw_1; }; - u64 priv_data[2]; + u64 qw_2; + u64 qw_3; }; static bool is_canonical_address(u64 addr) @@ -572,18 +572,6 @@ static void intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev, event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID; } - if (desc->priv_data_present) { - /* - * Set last page in group bit if private data is present, - * page response is required as it does for LPIG. - * iommu_report_device_fault() doesn't understand this vendor - * specific requirement thus we set last_page as a workaround. - */ - event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; - event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA; - event.fault.prm.private_data[0] = desc->priv_data[0]; - event.fault.prm.private_data[1] = desc->priv_data[1]; - } iommu_report_device_fault(dev, &event); } @@ -591,39 +579,23 @@ static void intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev, static void handle_bad_prq_event(struct intel_iommu *iommu, struct page_req_dsc *req, int result) { - struct qi_desc desc; + struct qi_desc desc = { }; pr_err("%s: Invalid page request: %08llx %08llx\n", iommu->name, ((unsigned long long *)req)[0], ((unsigned long long *)req)[1]); - /* - * Per VT-d spec. v3.0 ch7.7, system software must - * respond with page group response if private data - * is present (PDP) or last page in group (LPIG) bit - * is set. This is an additional VT-d feature beyond - * PCI ATS spec. - */ - if (!req->lpig && !req->priv_data_present) + if (!req->lpig) return; desc.qw0 = QI_PGRP_PASID(req->pasid) | QI_PGRP_DID(req->rid) | QI_PGRP_PASID_P(req->pasid_present) | - QI_PGRP_PDP(req->priv_data_present) | QI_PGRP_RESP_CODE(result) | QI_PGRP_RESP_TYPE; desc.qw1 = QI_PGRP_IDX(req->prg_index) | QI_PGRP_LPIG(req->lpig); - if (req->priv_data_present) { - desc.qw2 = req->priv_data[0]; - desc.qw3 = req->priv_data[1]; - } else { - desc.qw2 = 0; - desc.qw3 = 0; - } - qi_submit_sync(iommu, &desc, 1, 0); } @@ -691,7 +663,7 @@ bad_req: intel_svm_prq_report(iommu, dev, req); trace_prq_report(iommu, dev, req->qw_0, req->qw_1, - req->priv_data[0], req->priv_data[1], + req->qw_2, req->qw_3, iommu->prq_seq_number++); mutex_unlock(&iommu->iopf_lock); prq_advance: @@ -730,7 +702,7 @@ void intel_svm_page_response(struct device *dev, struct iopf_fault *evt, struct intel_iommu *iommu = info->iommu; u8 bus = info->bus, devfn = info->devfn; struct iommu_fault_page_request *prm; - bool private_present; + struct qi_desc desc; bool pasid_present; bool last_page; u16 sid; @@ -738,34 +710,17 @@ void intel_svm_page_response(struct device *dev, struct iopf_fault *evt, prm = &evt->fault.prm; sid = PCI_DEVID(bus, devfn); pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; - private_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA; last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; - /* - * Per VT-d spec. v3.0 ch7.7, system software must respond - * with page group response if private data is present (PDP) - * or last page in group (LPIG) bit is set. This is an - * additional VT-d requirement beyond PCI ATS spec. - */ - if (last_page || private_present) { - struct qi_desc desc; - - desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) | - QI_PGRP_PASID_P(pasid_present) | - QI_PGRP_PDP(private_present) | - QI_PGRP_RESP_CODE(msg->code) | - QI_PGRP_RESP_TYPE; - desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page); - desc.qw2 = 0; - desc.qw3 = 0; - - if (private_present) { - desc.qw2 = prm->private_data[0]; - desc.qw3 = prm->private_data[1]; - } + desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) | + QI_PGRP_PASID_P(pasid_present) | + QI_PGRP_RESP_CODE(msg->code) | + QI_PGRP_RESP_TYPE; + desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page); + desc.qw2 = 0; + desc.qw3 = 0; - qi_submit_sync(iommu, &desc, 1, 0); - } + qi_submit_sync(iommu, &desc, 1, 0); } static void intel_svm_domain_free(struct iommu_domain *domain) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 2e925b5eba53..e6549bdfaed9 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -69,8 +69,7 @@ enum iommu_fault_type { struct iommu_fault_page_request { #define IOMMU_FAULT_PAGE_REQUEST_PASID_VALID (1 << 0) #define IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE (1 << 1) -#define IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA (1 << 2) -#define IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID (1 << 3) +#define IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID (1 << 2) u32 flags; u32 pasid; u32 grpid; -- cgit v1.2.3 From 304b3bde24b58515a75fd198beb52ca57df6275f Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 24 Apr 2024 15:16:32 +0800 Subject: iommu/vt-d: Remove caching mode check before device TLB flush The Caching Mode (CM) of the Intel IOMMU indicates if the hardware implementation caches not-present or erroneous translation-structure entries except for the first-stage translation. The caching mode is irrelevant to the device TLB, therefore there is no need to check it before a device TLB invalidation operation. Remove two caching mode checks before device TLB invalidation in the driver. The removal of these checks doesn't change the driver's behavior in critical map/unmap paths. Hence, there is no functionality or performance impact, especially since commit <29b32839725f> ("iommu/vt-d: Do not use flush-queue when caching-mode is on") has already disabled flush-queue for caching mode. Therefore, caching mode will never call intel_flush_iotlb_all(). Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Yi Liu Link: https://lore.kernel.org/r/20240415013835.9527-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'drivers/iommu/intel') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 4a2afe89b464..002fee5fcb80 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1502,11 +1502,7 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, else __iommu_flush_iotlb_psi(iommu, did, pfn, pages, ih); - /* - * In caching mode, changes of pages from non-present to present require - * flush. However, device IOTLB doesn't need to be flushed in this case. - */ - if (!cap_caching_mode(iommu->cap) || !map) + if (!map) iommu_flush_dev_iotlb(domain, addr, mask); } @@ -1580,8 +1576,7 @@ static void intel_flush_iotlb_all(struct iommu_domain *domain) iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); - if (!cap_caching_mode(iommu->cap)) - iommu_flush_dev_iotlb(dmar_domain, 0, MAX_AGAW_PFN_WIDTH); + iommu_flush_dev_iotlb(dmar_domain, 0, MAX_AGAW_PFN_WIDTH); } if (dmar_domain->nested_parent) -- cgit v1.2.3 From 3b1d9e2b2d6856eabf5faa12d20c97fef657999f Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 24 Apr 2024 15:16:33 +0800 Subject: iommu/vt-d: Add cache tag assignment interface Caching tag is a combination of tags used by the hardware to cache various translations. Whenever a mapping in a domain is changed, the IOMMU driver should invalidate the caches with the caching tags. The VT-d specification describes caching tags in section 6.2.1, Tagging of Cached Translations. Add interface to assign caching tags to an IOMMU domain when attached to a RID or PASID, and unassign caching tags when a domain is detached from a RID or PASID. All caching tags are listed in the per-domain tag list and are protected by a dedicated lock. In addition to the basic IOTLB and devTLB caching tag types, NESTING_IOTLB and NESTING_DEVTLB tag types are also introduced. These tags are used for caches that store translations for DMA accesses through a nested user domain. They are affected by changes to mappings in the parent domain. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240416080656.60968-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/Makefile | 2 +- drivers/iommu/intel/cache.c | 214 +++++++++++++++++++++++++++++++++++++++++++ drivers/iommu/intel/iommu.c | 28 +++++- drivers/iommu/intel/iommu.h | 31 +++++++ drivers/iommu/intel/nested.c | 19 +++- drivers/iommu/intel/svm.c | 10 +- 6 files changed, 295 insertions(+), 9 deletions(-) create mode 100644 drivers/iommu/intel/cache.c (limited to 'drivers/iommu/intel') diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile index 5402b699a122..c8beb0281559 100644 --- a/drivers/iommu/intel/Makefile +++ b/drivers/iommu/intel/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_DMAR_TABLE) += dmar.o -obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o nested.o +obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o nested.o cache.o obj-$(CONFIG_DMAR_TABLE) += trace.o cap_audit.o obj-$(CONFIG_DMAR_PERF) += perf.o obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c new file mode 100644 index 000000000000..296f1645a739 --- /dev/null +++ b/drivers/iommu/intel/cache.c @@ -0,0 +1,214 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * cache.c - Intel VT-d cache invalidation + * + * Copyright (C) 2024 Intel Corporation + * + * Author: Lu Baolu + */ + +#define pr_fmt(fmt) "DMAR: " fmt + +#include +#include +#include +#include + +#include "iommu.h" +#include "pasid.h" + +/* Check if an existing cache tag can be reused for a new association. */ +static bool cache_tage_match(struct cache_tag *tag, u16 domain_id, + struct intel_iommu *iommu, struct device *dev, + ioasid_t pasid, enum cache_tag_type type) +{ + if (tag->type != type) + return false; + + if (tag->domain_id != domain_id || tag->pasid != pasid) + return false; + + if (type == CACHE_TAG_IOTLB || type == CACHE_TAG_NESTING_IOTLB) + return tag->iommu == iommu; + + if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB) + return tag->dev == dev; + + return false; +} + +/* Assign a cache tag with specified type to domain. */ +static int cache_tag_assign(struct dmar_domain *domain, u16 did, + struct device *dev, ioasid_t pasid, + enum cache_tag_type type) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + struct cache_tag *tag, *temp; + unsigned long flags; + + tag = kzalloc(sizeof(*tag), GFP_KERNEL); + if (!tag) + return -ENOMEM; + + tag->type = type; + tag->iommu = iommu; + tag->domain_id = did; + tag->pasid = pasid; + tag->users = 1; + + if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB) + tag->dev = dev; + else + tag->dev = iommu->iommu.dev; + + spin_lock_irqsave(&domain->cache_lock, flags); + list_for_each_entry(temp, &domain->cache_tags, node) { + if (cache_tage_match(temp, did, iommu, dev, pasid, type)) { + temp->users++; + spin_unlock_irqrestore(&domain->cache_lock, flags); + kfree(tag); + return 0; + } + } + list_add_tail(&tag->node, &domain->cache_tags); + spin_unlock_irqrestore(&domain->cache_lock, flags); + + return 0; +} + +/* Unassign a cache tag with specified type from domain. */ +static void cache_tag_unassign(struct dmar_domain *domain, u16 did, + struct device *dev, ioasid_t pasid, + enum cache_tag_type type) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + struct cache_tag *tag; + unsigned long flags; + + spin_lock_irqsave(&domain->cache_lock, flags); + list_for_each_entry(tag, &domain->cache_tags, node) { + if (cache_tage_match(tag, did, iommu, dev, pasid, type)) { + if (--tag->users == 0) { + list_del(&tag->node); + kfree(tag); + } + break; + } + } + spin_unlock_irqrestore(&domain->cache_lock, flags); +} + +static int __cache_tag_assign_domain(struct dmar_domain *domain, u16 did, + struct device *dev, ioasid_t pasid) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + int ret; + + ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_IOTLB); + if (ret || !info->ats_enabled) + return ret; + + ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_DEVTLB); + if (ret) + cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_IOTLB); + + return ret; +} + +static void __cache_tag_unassign_domain(struct dmar_domain *domain, u16 did, + struct device *dev, ioasid_t pasid) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + + cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_IOTLB); + + if (info->ats_enabled) + cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_DEVTLB); +} + +static int __cache_tag_assign_parent_domain(struct dmar_domain *domain, u16 did, + struct device *dev, ioasid_t pasid) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + int ret; + + ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB); + if (ret || !info->ats_enabled) + return ret; + + ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_DEVTLB); + if (ret) + cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB); + + return ret; +} + +static void __cache_tag_unassign_parent_domain(struct dmar_domain *domain, u16 did, + struct device *dev, ioasid_t pasid) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + + cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB); + + if (info->ats_enabled) + cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_DEVTLB); +} + +static u16 domain_get_id_for_dev(struct dmar_domain *domain, struct device *dev) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + + /* + * The driver assigns different domain IDs for all domains except + * the SVA type. + */ + if (domain->domain.type == IOMMU_DOMAIN_SVA) + return FLPT_DEFAULT_DID; + + return domain_id_iommu(domain, iommu); +} + +/* + * Assign cache tags to a domain when it's associated with a device's + * PASID using a specific domain ID. + * + * On success (return value of 0), cache tags are created and added to the + * domain's cache tag list. On failure (negative return value), an error + * code is returned indicating the reason for the failure. + */ +int cache_tag_assign_domain(struct dmar_domain *domain, + struct device *dev, ioasid_t pasid) +{ + u16 did = domain_get_id_for_dev(domain, dev); + int ret; + + ret = __cache_tag_assign_domain(domain, did, dev, pasid); + if (ret || domain->domain.type != IOMMU_DOMAIN_NESTED) + return ret; + + ret = __cache_tag_assign_parent_domain(domain->s2_domain, did, dev, pasid); + if (ret) + __cache_tag_unassign_domain(domain, did, dev, pasid); + + return ret; +} + +/* + * Remove the cache tags associated with a device's PASID when the domain is + * detached from the device. + * + * The cache tags must be previously assigned to the domain by calling the + * assign interface. + */ +void cache_tag_unassign_domain(struct dmar_domain *domain, + struct device *dev, ioasid_t pasid) +{ + u16 did = domain_get_id_for_dev(domain, dev); + + __cache_tag_unassign_domain(domain, did, dev, pasid); + if (domain->domain.type == IOMMU_DOMAIN_NESTED) + __cache_tag_unassign_parent_domain(domain->s2_domain, did, dev, pasid); +} diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 002fee5fcb80..8220bb36e420 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1746,7 +1746,9 @@ static struct dmar_domain *alloc_domain(unsigned int type) domain->has_iotlb_device = false; INIT_LIST_HEAD(&domain->devices); INIT_LIST_HEAD(&domain->dev_pasids); + INIT_LIST_HEAD(&domain->cache_tags); spin_lock_init(&domain->lock); + spin_lock_init(&domain->cache_lock); xa_init(&domain->iommu_array); return domain; @@ -1758,6 +1760,9 @@ int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) unsigned long ndomains; int num, ret = -ENOSPC; + if (domain->domain.type == IOMMU_DOMAIN_SVA) + return 0; + info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) return -ENOMEM; @@ -1805,6 +1810,9 @@ void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) { struct iommu_domain_info *info; + if (domain->domain.type == IOMMU_DOMAIN_SVA) + return; + spin_lock(&iommu->lock); info = xa_load(&domain->iommu_array, iommu->seq_id); if (--info->refcnt == 0) { @@ -2323,6 +2331,13 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, ret = domain_attach_iommu(domain, iommu); if (ret) return ret; + + ret = cache_tag_assign_domain(domain, dev, IOMMU_NO_PASID); + if (ret) { + domain_detach_iommu(domain, iommu); + return ret; + } + info->domain = domain; spin_lock_irqsave(&domain->lock, flags); list_add(&info->link, &domain->devices); @@ -3811,6 +3826,7 @@ void device_block_translation(struct device *dev) list_del(&info->link); spin_unlock_irqrestore(&info->domain->lock, flags); + cache_tag_unassign_domain(info->domain, dev, IOMMU_NO_PASID); domain_detach_iommu(info->domain, iommu); info->domain = NULL; } @@ -4598,6 +4614,7 @@ static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid) domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0); if (WARN_ON_ONCE(!domain)) goto out_tear_down; + dmar_domain = to_dmar_domain(domain); /* * The SVA implementation needs to handle its own stuffs like the mm @@ -4606,10 +4623,10 @@ static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid) */ if (domain->type == IOMMU_DOMAIN_SVA) { intel_svm_remove_dev_pasid(dev, pasid); + cache_tag_unassign_domain(dmar_domain, dev, pasid); goto out_tear_down; } - dmar_domain = to_dmar_domain(domain); spin_lock_irqsave(&dmar_domain->lock, flags); list_for_each_entry(curr, &dmar_domain->dev_pasids, link_domain) { if (curr->dev == dev && curr->pasid == pasid) { @@ -4621,6 +4638,7 @@ static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid) WARN_ON_ONCE(!dev_pasid); spin_unlock_irqrestore(&dmar_domain->lock, flags); + cache_tag_unassign_domain(dmar_domain, dev, pasid); domain_detach_iommu(dmar_domain, iommu); intel_iommu_debugfs_remove_dev_pasid(dev_pasid); kfree(dev_pasid); @@ -4660,6 +4678,10 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain, if (ret) goto out_free; + ret = cache_tag_assign_domain(dmar_domain, dev, pasid); + if (ret) + goto out_detach_iommu; + if (domain_type_is_si(dmar_domain)) ret = intel_pasid_setup_pass_through(iommu, dev, pasid); else if (dmar_domain->use_first_level) @@ -4669,7 +4691,7 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain, ret = intel_pasid_setup_second_level(iommu, dmar_domain, dev, pasid); if (ret) - goto out_detach_iommu; + goto out_unassign_tag; dev_pasid->dev = dev; dev_pasid->pasid = pasid; @@ -4681,6 +4703,8 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain, intel_iommu_debugfs_create_dev_pasid(dev_pasid); return 0; +out_unassign_tag: + cache_tag_unassign_domain(dmar_domain, dev, pasid); out_detach_iommu: domain_detach_iommu(dmar_domain, iommu); out_free: diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 9ee326f7bf62..b7c79cc19681 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -606,6 +606,9 @@ struct dmar_domain { struct list_head devices; /* all devices' list */ struct list_head dev_pasids; /* all attached pasids */ + spinlock_t cache_lock; /* Protect the cache tag list */ + struct list_head cache_tags; /* Cache tag list */ + int iommu_superpage;/* Level of superpages supported: 0 == 4KiB (no superpages), 1 == 2MiB, 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */ @@ -1091,6 +1094,34 @@ struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent, const struct iommu_user_data *user_data); struct device *device_rbtree_find(struct intel_iommu *iommu, u16 rid); +enum cache_tag_type { + CACHE_TAG_IOTLB, + CACHE_TAG_DEVTLB, + CACHE_TAG_NESTING_IOTLB, + CACHE_TAG_NESTING_DEVTLB, +}; + +struct cache_tag { + struct list_head node; + enum cache_tag_type type; + struct intel_iommu *iommu; + /* + * The @dev field represents the location of the cache. For IOTLB, it + * resides on the IOMMU hardware. @dev stores the device pointer to + * the IOMMU hardware. For DevTLB, it locates in the PCIe endpoint. + * @dev stores the device pointer to that endpoint. + */ + struct device *dev; + u16 domain_id; + ioasid_t pasid; + unsigned int users; +}; + +int cache_tag_assign_domain(struct dmar_domain *domain, + struct device *dev, ioasid_t pasid); +void cache_tag_unassign_domain(struct dmar_domain *domain, + struct device *dev, ioasid_t pasid); + #ifdef CONFIG_INTEL_IOMMU_SVM void intel_svm_check(struct intel_iommu *iommu); int intel_svm_enable_prq(struct intel_iommu *iommu); diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c index a7d68f3d518a..13406ee742bf 100644 --- a/drivers/iommu/intel/nested.c +++ b/drivers/iommu/intel/nested.c @@ -52,13 +52,14 @@ static int intel_nested_attach_dev(struct iommu_domain *domain, return ret; } + ret = cache_tag_assign_domain(dmar_domain, dev, IOMMU_NO_PASID); + if (ret) + goto detach_iommu; + ret = intel_pasid_setup_nested(iommu, dev, IOMMU_NO_PASID, dmar_domain); - if (ret) { - domain_detach_iommu(dmar_domain, iommu); - dev_err_ratelimited(dev, "Failed to setup pasid entry\n"); - return ret; - } + if (ret) + goto unassign_tag; info->domain = dmar_domain; spin_lock_irqsave(&dmar_domain->lock, flags); @@ -68,6 +69,12 @@ static int intel_nested_attach_dev(struct iommu_domain *domain, domain_update_iotlb(dmar_domain); return 0; +unassign_tag: + cache_tag_unassign_domain(dmar_domain, dev, IOMMU_NO_PASID); +detach_iommu: + domain_detach_iommu(dmar_domain, iommu); + + return ret; } static void intel_nested_domain_free(struct iommu_domain *domain) @@ -206,7 +213,9 @@ struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent, domain->domain.type = IOMMU_DOMAIN_NESTED; INIT_LIST_HEAD(&domain->devices); INIT_LIST_HEAD(&domain->dev_pasids); + INIT_LIST_HEAD(&domain->cache_tags); spin_lock_init(&domain->lock); + spin_lock_init(&domain->cache_lock); xa_init(&domain->iommu_array); spin_lock(&s2_domain->s1_lock); diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index e05c6c4cb8c3..2e627fbd5adb 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -366,17 +366,23 @@ static int intel_svm_set_dev_pasid(struct iommu_domain *domain, sdev->qdep = 0; } + ret = cache_tag_assign_domain(to_dmar_domain(domain), dev, pasid); + if (ret) + goto free_sdev; + /* Setup the pasid table: */ sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0; ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, pasid, FLPT_DEFAULT_DID, sflags); if (ret) - goto free_sdev; + goto unassign_tag; list_add_rcu(&sdev->list, &svm->devs); return 0; +unassign_tag: + cache_tag_unassign_domain(to_dmar_domain(domain), dev, pasid); free_sdev: kfree(sdev); free_svm: @@ -741,6 +747,8 @@ struct iommu_domain *intel_svm_domain_alloc(void) if (!domain) return NULL; domain->domain.ops = &intel_svm_domain_ops; + INIT_LIST_HEAD(&domain->cache_tags); + spin_lock_init(&domain->cache_lock); return &domain->domain; } -- cgit v1.2.3 From c4d27ffaa8eb034ec438a9aedfe202ce81e15312 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 24 Apr 2024 15:16:34 +0800 Subject: iommu/vt-d: Add cache tag invalidation helpers Add several helpers to invalidate the caches after mappings in the affected domain are changed. - cache_tag_flush_range() invalidates a range of caches after mappings within this range are changed. It uses the page-selective cache invalidation methods. - cache_tag_flush_all() invalidates all caches tagged by a domain ID. It uses the domain-selective cache invalidation methods. - cache_tag_flush_range_np() invalidates a range of caches when new mappings are created in the domain and the corresponding page table entries change from non-present to present. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240416080656.60968-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/cache.c | 195 ++++++++++++++++++++++++++++++++++++++++++++ drivers/iommu/intel/iommu.c | 12 --- drivers/iommu/intel/iommu.h | 14 ++++ 3 files changed, 209 insertions(+), 12 deletions(-) (limited to 'drivers/iommu/intel') diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c index 296f1645a739..0539275a9d20 100644 --- a/drivers/iommu/intel/cache.c +++ b/drivers/iommu/intel/cache.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include "iommu.h" @@ -212,3 +213,197 @@ void cache_tag_unassign_domain(struct dmar_domain *domain, if (domain->domain.type == IOMMU_DOMAIN_NESTED) __cache_tag_unassign_parent_domain(domain->s2_domain, did, dev, pasid); } + +static unsigned long calculate_psi_aligned_address(unsigned long start, + unsigned long end, + unsigned long *_pages, + unsigned long *_mask) +{ + unsigned long pages = aligned_nrpages(start, end - start + 1); + unsigned long aligned_pages = __roundup_pow_of_two(pages); + unsigned long bitmask = aligned_pages - 1; + unsigned long mask = ilog2(aligned_pages); + unsigned long pfn = IOVA_PFN(start); + + /* + * PSI masks the low order bits of the base address. If the + * address isn't aligned to the mask, then compute a mask value + * needed to ensure the target range is flushed. + */ + if (unlikely(bitmask & pfn)) { + unsigned long end_pfn = pfn + pages - 1, shared_bits; + + /* + * Since end_pfn <= pfn + bitmask, the only way bits + * higher than bitmask can differ in pfn and end_pfn is + * by carrying. This means after masking out bitmask, + * high bits starting with the first set bit in + * shared_bits are all equal in both pfn and end_pfn. + */ + shared_bits = ~(pfn ^ end_pfn) & ~bitmask; + mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG; + } + + *_pages = aligned_pages; + *_mask = mask; + + return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask); +} + +/* + * Invalidates a range of IOVA from @start (inclusive) to @end (inclusive) + * when the memory mappings in the target domain have been modified. + */ +void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start, + unsigned long end, int ih) +{ + unsigned long pages, mask, addr; + struct cache_tag *tag; + unsigned long flags; + + addr = calculate_psi_aligned_address(start, end, &pages, &mask); + + spin_lock_irqsave(&domain->cache_lock, flags); + list_for_each_entry(tag, &domain->cache_tags, node) { + struct intel_iommu *iommu = tag->iommu; + struct device_domain_info *info; + u16 sid; + + switch (tag->type) { + case CACHE_TAG_IOTLB: + case CACHE_TAG_NESTING_IOTLB: + if (domain->use_first_level) { + qi_flush_piotlb(iommu, tag->domain_id, + tag->pasid, addr, pages, ih); + } else { + /* + * Fallback to domain selective flush if no + * PSI support or the size is too big. + */ + if (!cap_pgsel_inv(iommu->cap) || + mask > cap_max_amask_val(iommu->cap)) + iommu->flush.flush_iotlb(iommu, tag->domain_id, + 0, 0, DMA_TLB_DSI_FLUSH); + else + iommu->flush.flush_iotlb(iommu, tag->domain_id, + addr | ih, mask, + DMA_TLB_PSI_FLUSH); + } + break; + case CACHE_TAG_NESTING_DEVTLB: + /* + * Address translation cache in device side caches the + * result of nested translation. There is no easy way + * to identify the exact set of nested translations + * affected by a change in S2. So just flush the entire + * device cache. + */ + addr = 0; + mask = MAX_AGAW_PFN_WIDTH; + fallthrough; + case CACHE_TAG_DEVTLB: + info = dev_iommu_priv_get(tag->dev); + sid = PCI_DEVID(info->bus, info->devfn); + + if (tag->pasid == IOMMU_NO_PASID) + qi_flush_dev_iotlb(iommu, sid, info->pfsid, + info->ats_qdep, addr, mask); + else + qi_flush_dev_iotlb_pasid(iommu, sid, info->pfsid, + tag->pasid, info->ats_qdep, + addr, mask); + + quirk_extra_dev_tlb_flush(info, addr, mask, tag->pasid, info->ats_qdep); + break; + } + } + spin_unlock_irqrestore(&domain->cache_lock, flags); +} + +/* + * Invalidates all ranges of IOVA when the memory mappings in the target + * domain have been modified. + */ +void cache_tag_flush_all(struct dmar_domain *domain) +{ + struct cache_tag *tag; + unsigned long flags; + + spin_lock_irqsave(&domain->cache_lock, flags); + list_for_each_entry(tag, &domain->cache_tags, node) { + struct intel_iommu *iommu = tag->iommu; + struct device_domain_info *info; + u16 sid; + + switch (tag->type) { + case CACHE_TAG_IOTLB: + case CACHE_TAG_NESTING_IOTLB: + if (domain->use_first_level) + qi_flush_piotlb(iommu, tag->domain_id, + tag->pasid, 0, -1, 0); + else + iommu->flush.flush_iotlb(iommu, tag->domain_id, + 0, 0, DMA_TLB_DSI_FLUSH); + break; + case CACHE_TAG_DEVTLB: + case CACHE_TAG_NESTING_DEVTLB: + info = dev_iommu_priv_get(tag->dev); + sid = PCI_DEVID(info->bus, info->devfn); + + qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, + 0, MAX_AGAW_PFN_WIDTH); + quirk_extra_dev_tlb_flush(info, 0, MAX_AGAW_PFN_WIDTH, + IOMMU_NO_PASID, info->ats_qdep); + break; + } + } + spin_unlock_irqrestore(&domain->cache_lock, flags); +} + +/* + * Invalidate a range of IOVA when new mappings are created in the target + * domain. + * + * - VT-d spec, Section 6.1 Caching Mode: When the CM field is reported as + * Set, any software updates to remapping structures other than first- + * stage mapping requires explicit invalidation of the caches. + * - VT-d spec, Section 6.8 Write Buffer Flushing: For hardware that requires + * write buffer flushing, software must explicitly perform write-buffer + * flushing, if cache invalidation is not required. + */ +void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start, + unsigned long end) +{ + unsigned long pages, mask, addr; + struct cache_tag *tag; + unsigned long flags; + + addr = calculate_psi_aligned_address(start, end, &pages, &mask); + + spin_lock_irqsave(&domain->cache_lock, flags); + list_for_each_entry(tag, &domain->cache_tags, node) { + struct intel_iommu *iommu = tag->iommu; + + if (!cap_caching_mode(iommu->cap) || domain->use_first_level) { + iommu_flush_write_buffer(iommu); + continue; + } + + if (tag->type == CACHE_TAG_IOTLB || + tag->type == CACHE_TAG_NESTING_IOTLB) { + /* + * Fallback to domain selective flush if no + * PSI support or the size is too big. + */ + if (!cap_pgsel_inv(iommu->cap) || + mask > cap_max_amask_val(iommu->cap)) + iommu->flush.flush_iotlb(iommu, tag->domain_id, + 0, 0, DMA_TLB_DSI_FLUSH); + else + iommu->flush.flush_iotlb(iommu, tag->domain_id, + addr, mask, + DMA_TLB_PSI_FLUSH); + } + } + spin_unlock_irqrestore(&domain->cache_lock, flags); +} diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 8220bb36e420..473df7cd1672 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -54,11 +54,6 @@ __DOMAIN_MAX_PFN(gaw), (unsigned long)-1)) #define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT) -/* IO virtual address start page frame number */ -#define IOVA_START_PFN (1) - -#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) - static void __init check_tylersburg_isoch(void); static int rwbf_quirk; @@ -1992,13 +1987,6 @@ domain_context_mapping(struct dmar_domain *domain, struct device *dev) domain_context_mapping_cb, domain); } -/* Returns a number of VTD pages, but aligned to MM page size */ -static unsigned long aligned_nrpages(unsigned long host_addr, size_t size) -{ - host_addr &= ~PAGE_MASK; - return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT; -} - /* Return largest possible superpage level for a given mapping */ static int hardware_largepage_caps(struct dmar_domain *domain, unsigned long iov_pfn, unsigned long phy_pfn, unsigned long pages) diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index b7c79cc19681..cb83b0995391 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -35,6 +35,8 @@ #define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT) #define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK) +#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) + #define VTD_STRIDE_SHIFT (9) #define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT) @@ -1040,6 +1042,13 @@ static inline void context_set_sm_pre(struct context_entry *context) context->lo |= BIT_ULL(4); } +/* Returns a number of VTD pages, but aligned to MM page size */ +static inline unsigned long aligned_nrpages(unsigned long host_addr, size_t size) +{ + host_addr &= ~PAGE_MASK; + return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT; +} + /* Convert value to context PASID directory size field coding. */ #define context_pdts(pds) (((pds) & 0x7) << 9) @@ -1121,6 +1130,11 @@ int cache_tag_assign_domain(struct dmar_domain *domain, struct device *dev, ioasid_t pasid); void cache_tag_unassign_domain(struct dmar_domain *domain, struct device *dev, ioasid_t pasid); +void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start, + unsigned long end, int ih); +void cache_tag_flush_all(struct dmar_domain *domain); +void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start, + unsigned long end); #ifdef CONFIG_INTEL_IOMMU_SVM void intel_svm_check(struct intel_iommu *iommu); -- cgit v1.2.3 From 446a68c58d2e5b8140d474f1a74082aebeee9bb0 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 24 Apr 2024 15:16:35 +0800 Subject: iommu/vt-d: Add trace events for cache tag interface Add trace events for cache tag assign/unassign/flush operations and trace the events in the interfaces. These trace events will improve debugging capabilities by providing detailed information about cache tag activity. A sample of the traced messages looks like below [messages have been stripped and wrapped to make the line short]. cache_tag_assign: dmar9/0000:00:01.0 type iotlb did 1 pasid 9 ref 1 cache_tag_assign: dmar9/0000:00:01.0 type devtlb did 1 pasid 9 ref 1 cache_tag_flush_all: dmar6/0000:8a:00.0 type iotlb did 7 pasid 0 ref 1 cache_tag_flush_range: dmar1 0000:00:1b.0[0] type iotlb did 9 [0xeab00000-0xeab1afff] addr 0xeab00000 pages 0x20 mask 0x5 cache_tag_flush_range: dmar1 0000:00:1b.0[0] type iotlb did 9 [0xeab20000-0xeab31fff] addr 0xeab20000 pages 0x20 mask 0x5 cache_tag_flush_range: dmar1 0000:00:1b.0[0] type iotlb did 9 [0xeaa40000-0xeaa51fff] addr 0xeaa40000 pages 0x20 mask 0x5 cache_tag_flush_range: dmar1 0000:00:1b.0[0] type iotlb did 9 [0x98de0000-0x98de4fff] addr 0x98de0000 pages 0x8 mask 0x3 cache_tag_flush_range: dmar1 0000:00:1b.0[0] type iotlb did 9 [0xe9828000-0xe9828fff] addr 0xe9828000 pages 0x1 mask 0x0 cache_tag_unassign: dmar9/0000:00:01.0 type iotlb did 1 pasid 9 ref 1 cache_tag_unassign: dmar9/0000:00:01.0 type devtlb did 1 pasid 9 ref 1 Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240416080656.60968-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/cache.c | 10 +++++ drivers/iommu/intel/trace.h | 97 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+) (limited to 'drivers/iommu/intel') diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c index 0539275a9d20..e8418cdd8331 100644 --- a/drivers/iommu/intel/cache.c +++ b/drivers/iommu/intel/cache.c @@ -17,6 +17,7 @@ #include "iommu.h" #include "pasid.h" +#include "trace.h" /* Check if an existing cache tag can be reused for a new association. */ static bool cache_tage_match(struct cache_tag *tag, u16 domain_id, @@ -69,11 +70,13 @@ static int cache_tag_assign(struct dmar_domain *domain, u16 did, temp->users++; spin_unlock_irqrestore(&domain->cache_lock, flags); kfree(tag); + trace_cache_tag_assign(temp); return 0; } } list_add_tail(&tag->node, &domain->cache_tags); spin_unlock_irqrestore(&domain->cache_lock, flags); + trace_cache_tag_assign(tag); return 0; } @@ -91,6 +94,7 @@ static void cache_tag_unassign(struct dmar_domain *domain, u16 did, spin_lock_irqsave(&domain->cache_lock, flags); list_for_each_entry(tag, &domain->cache_tags, node) { if (cache_tage_match(tag, did, iommu, dev, pasid, type)) { + trace_cache_tag_unassign(tag); if (--tag->users == 0) { list_del(&tag->node); kfree(tag); @@ -316,6 +320,8 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start, quirk_extra_dev_tlb_flush(info, addr, mask, tag->pasid, info->ats_qdep); break; } + + trace_cache_tag_flush_range(tag, start, end, addr, pages, mask); } spin_unlock_irqrestore(&domain->cache_lock, flags); } @@ -356,6 +362,8 @@ void cache_tag_flush_all(struct dmar_domain *domain) IOMMU_NO_PASID, info->ats_qdep); break; } + + trace_cache_tag_flush_all(tag); } spin_unlock_irqrestore(&domain->cache_lock, flags); } @@ -404,6 +412,8 @@ void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start, addr, mask, DMA_TLB_PSI_FLUSH); } + + trace_cache_tag_flush_range_np(tag, start, end, addr, pages, mask); } spin_unlock_irqrestore(&domain->cache_lock, flags); } diff --git a/drivers/iommu/intel/trace.h b/drivers/iommu/intel/trace.h index 93d96f93a89b..961ac1c1bc21 100644 --- a/drivers/iommu/intel/trace.h +++ b/drivers/iommu/intel/trace.h @@ -89,6 +89,103 @@ TRACE_EVENT(prq_report, __entry->dw1, __entry->dw2, __entry->dw3) ) ); + +DECLARE_EVENT_CLASS(cache_tag_log, + TP_PROTO(struct cache_tag *tag), + TP_ARGS(tag), + TP_STRUCT__entry( + __string(iommu, tag->iommu->name) + __string(dev, dev_name(tag->dev)) + __field(u16, type) + __field(u16, domain_id) + __field(u32, pasid) + __field(u32, users) + ), + TP_fast_assign( + __assign_str(iommu, tag->iommu->name); + __assign_str(dev, dev_name(tag->dev)); + __entry->type = tag->type; + __entry->domain_id = tag->domain_id; + __entry->pasid = tag->pasid; + __entry->users = tag->users; + ), + TP_printk("%s/%s type %s did %d pasid %d ref %d", + __get_str(iommu), __get_str(dev), + __print_symbolic(__entry->type, + { CACHE_TAG_IOTLB, "iotlb" }, + { CACHE_TAG_DEVTLB, "devtlb" }, + { CACHE_TAG_NESTING_IOTLB, "nesting_iotlb" }, + { CACHE_TAG_NESTING_DEVTLB, "nesting_devtlb" }), + __entry->domain_id, __entry->pasid, __entry->users + ) +); + +DEFINE_EVENT(cache_tag_log, cache_tag_assign, + TP_PROTO(struct cache_tag *tag), + TP_ARGS(tag) +); + +DEFINE_EVENT(cache_tag_log, cache_tag_unassign, + TP_PROTO(struct cache_tag *tag), + TP_ARGS(tag) +); + +DEFINE_EVENT(cache_tag_log, cache_tag_flush_all, + TP_PROTO(struct cache_tag *tag), + TP_ARGS(tag) +); + +DECLARE_EVENT_CLASS(cache_tag_flush, + TP_PROTO(struct cache_tag *tag, unsigned long start, unsigned long end, + unsigned long addr, unsigned long pages, unsigned long mask), + TP_ARGS(tag, start, end, addr, pages, mask), + TP_STRUCT__entry( + __string(iommu, tag->iommu->name) + __string(dev, dev_name(tag->dev)) + __field(u16, type) + __field(u16, domain_id) + __field(u32, pasid) + __field(unsigned long, start) + __field(unsigned long, end) + __field(unsigned long, addr) + __field(unsigned long, pages) + __field(unsigned long, mask) + ), + TP_fast_assign( + __assign_str(iommu, tag->iommu->name); + __assign_str(dev, dev_name(tag->dev)); + __entry->type = tag->type; + __entry->domain_id = tag->domain_id; + __entry->pasid = tag->pasid; + __entry->start = start; + __entry->end = end; + __entry->addr = addr; + __entry->pages = pages; + __entry->mask = mask; + ), + TP_printk("%s %s[%d] type %s did %d [0x%lx-0x%lx] addr 0x%lx pages 0x%lx mask 0x%lx", + __get_str(iommu), __get_str(dev), __entry->pasid, + __print_symbolic(__entry->type, + { CACHE_TAG_IOTLB, "iotlb" }, + { CACHE_TAG_DEVTLB, "devtlb" }, + { CACHE_TAG_NESTING_IOTLB, "nesting_iotlb" }, + { CACHE_TAG_NESTING_DEVTLB, "nesting_devtlb" }), + __entry->domain_id, __entry->start, __entry->end, + __entry->addr, __entry->pages, __entry->mask + ) +); + +DEFINE_EVENT(cache_tag_flush, cache_tag_flush_range, + TP_PROTO(struct cache_tag *tag, unsigned long start, unsigned long end, + unsigned long addr, unsigned long pages, unsigned long mask), + TP_ARGS(tag, start, end, addr, pages, mask) +); + +DEFINE_EVENT(cache_tag_flush, cache_tag_flush_range_np, + TP_PROTO(struct cache_tag *tag, unsigned long start, unsigned long end, + unsigned long addr, unsigned long pages, unsigned long mask), + TP_ARGS(tag, start, end, addr, pages, mask) +); #endif /* _TRACE_INTEL_IOMMU_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 4e589a53685c7658f9055fcedbce15bd4cc41134 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 24 Apr 2024 15:16:36 +0800 Subject: iommu/vt-d: Use cache_tag_flush_all() in flush_iotlb_all The flush_iotlb_all callback is called by the iommu core to flush all caches for the affected domain. Use cache_tag_flush_all() in this callback. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240416080656.60968-5-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) (limited to 'drivers/iommu/intel') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 473df7cd1672..a268e2a51f4d 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1557,25 +1557,7 @@ static void parent_domain_flush(struct dmar_domain *domain, static void intel_flush_iotlb_all(struct iommu_domain *domain) { - struct dmar_domain *dmar_domain = to_dmar_domain(domain); - struct iommu_domain_info *info; - unsigned long idx; - - xa_for_each(&dmar_domain->iommu_array, idx, info) { - struct intel_iommu *iommu = info->iommu; - u16 did = domain_id_iommu(dmar_domain, iommu); - - if (dmar_domain->use_first_level) - domain_flush_pasid_iotlb(iommu, dmar_domain, 0, -1, 0); - else - iommu->flush.flush_iotlb(iommu, did, 0, 0, - DMA_TLB_DSI_FLUSH); - - iommu_flush_dev_iotlb(dmar_domain, 0, MAX_AGAW_PFN_WIDTH); - } - - if (dmar_domain->nested_parent) - parent_domain_flush(dmar_domain, 0, -1, 0); + cache_tag_flush_all(to_dmar_domain(domain)); } static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) -- cgit v1.2.3 From a600ccd0a347e8f9c9f35f229e1ccd0dca9374c4 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 24 Apr 2024 15:16:37 +0800 Subject: iommu/vt-d: Use cache_tag_flush_range() in tlb_sync The tlb_sync callback is called by the iommu core to flush a range of caches for the affected domain. Use cache_tag_flush_range() in this callback. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240416080656.60968-6-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) (limited to 'drivers/iommu/intel') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index a268e2a51f4d..f3926ad7d737 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4104,25 +4104,8 @@ static size_t intel_iommu_unmap_pages(struct iommu_domain *domain, static void intel_iommu_tlb_sync(struct iommu_domain *domain, struct iommu_iotlb_gather *gather) { - struct dmar_domain *dmar_domain = to_dmar_domain(domain); - unsigned long iova_pfn = IOVA_PFN(gather->start); - size_t size = gather->end - gather->start; - struct iommu_domain_info *info; - unsigned long start_pfn; - unsigned long nrpages; - unsigned long i; - - nrpages = aligned_nrpages(gather->start, size); - start_pfn = mm_to_dma_pfn_start(iova_pfn); - - xa_for_each(&dmar_domain->iommu_array, i, info) - iommu_flush_iotlb_psi(info->iommu, dmar_domain, - start_pfn, nrpages, - list_empty(&gather->freelist), 0); - - if (dmar_domain->nested_parent) - parent_domain_flush(dmar_domain, start_pfn, nrpages, - list_empty(&gather->freelist)); + cache_tag_flush_range(to_dmar_domain(domain), gather->start, + gather->end, list_empty(&gather->freelist)); put_pages_list(&gather->freelist); } -- cgit v1.2.3 From 129dab6e1286525fe5baed860d3dfcd9c6b4b327 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 24 Apr 2024 15:16:38 +0800 Subject: iommu/vt-d: Use cache_tag_flush_range_np() in iotlb_sync_map The iotlb_sync_map callback is called by the iommu core after non-present to present mappings are created. The iommu driver uses this callback to invalidate caches if IOMMU is working in caching mode and second-only translation is used for the domain. Use cache_tag_flush_range_np() in this callback. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240416080656.60968-7-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) (limited to 'drivers/iommu/intel') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index f3926ad7d737..c9ac8a1b635f 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1501,20 +1501,6 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, iommu_flush_dev_iotlb(domain, addr, mask); } -/* Notification for newly created mappings */ -static void __mapping_notify_one(struct intel_iommu *iommu, struct dmar_domain *domain, - unsigned long pfn, unsigned int pages) -{ - /* - * It's a non-present to present mapping. Only flush if caching mode - * and second level. - */ - if (cap_caching_mode(iommu->cap) && !domain->use_first_level) - iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1); - else - iommu_flush_write_buffer(iommu); -} - /* * Flush the relevant caches in nested translation if the domain * also serves as a parent @@ -4544,14 +4530,8 @@ static bool risky_device(struct pci_dev *pdev) static int intel_iommu_iotlb_sync_map(struct iommu_domain *domain, unsigned long iova, size_t size) { - struct dmar_domain *dmar_domain = to_dmar_domain(domain); - unsigned long pages = aligned_nrpages(iova, size); - unsigned long pfn = iova >> VTD_PAGE_SHIFT; - struct iommu_domain_info *info; - unsigned long i; + cache_tag_flush_range_np(to_dmar_domain(domain), iova, iova + size - 1); - xa_for_each(&dmar_domain->iommu_array, i, info) - __mapping_notify_one(info->iommu, dmar_domain, pfn, pages); return 0; } -- cgit v1.2.3 From 06792d06798931696dd78f65024de4cc66f2fd5b Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 24 Apr 2024 15:16:39 +0800 Subject: iommu/vt-d: Cleanup use of iommu_flush_iotlb_psi() Use cache_tag_flush_range() in switch_to_super_page() to invalidate the necessary caches when switching mappings from normal to super pages. The iommu_flush_iotlb_psi() call in intel_iommu_memory_notifier() is unnecessary since there should be no cache invalidation for the identity domain. Clean up iommu_flush_iotlb_psi() after the last call site is removed. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240416080656.60968-8-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 171 +------------------------------------------- 1 file changed, 2 insertions(+), 169 deletions(-) (limited to 'drivers/iommu/intel') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index c9ac8a1b635f..b2bd96e7f03d 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1390,157 +1390,6 @@ static void __iommu_flush_dev_iotlb(struct device_domain_info *info, quirk_extra_dev_tlb_flush(info, addr, mask, IOMMU_NO_PASID, qdep); } -static void iommu_flush_dev_iotlb(struct dmar_domain *domain, - u64 addr, unsigned mask) -{ - struct dev_pasid_info *dev_pasid; - struct device_domain_info *info; - unsigned long flags; - - if (!domain->has_iotlb_device) - return; - - spin_lock_irqsave(&domain->lock, flags); - list_for_each_entry(info, &domain->devices, link) - __iommu_flush_dev_iotlb(info, addr, mask); - - list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) { - info = dev_iommu_priv_get(dev_pasid->dev); - - if (!info->ats_enabled) - continue; - - qi_flush_dev_iotlb_pasid(info->iommu, - PCI_DEVID(info->bus, info->devfn), - info->pfsid, dev_pasid->pasid, - info->ats_qdep, addr, - mask); - } - spin_unlock_irqrestore(&domain->lock, flags); -} - -static void domain_flush_pasid_iotlb(struct intel_iommu *iommu, - struct dmar_domain *domain, u64 addr, - unsigned long npages, bool ih) -{ - u16 did = domain_id_iommu(domain, iommu); - struct dev_pasid_info *dev_pasid; - unsigned long flags; - - spin_lock_irqsave(&domain->lock, flags); - list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) - qi_flush_piotlb(iommu, did, dev_pasid->pasid, addr, npages, ih); - - if (!list_empty(&domain->devices)) - qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr, npages, ih); - spin_unlock_irqrestore(&domain->lock, flags); -} - -static void __iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, - unsigned long pfn, unsigned int pages, - int ih) -{ - unsigned int aligned_pages = __roundup_pow_of_two(pages); - unsigned long bitmask = aligned_pages - 1; - unsigned int mask = ilog2(aligned_pages); - u64 addr = (u64)pfn << VTD_PAGE_SHIFT; - - /* - * PSI masks the low order bits of the base address. If the - * address isn't aligned to the mask, then compute a mask value - * needed to ensure the target range is flushed. - */ - if (unlikely(bitmask & pfn)) { - unsigned long end_pfn = pfn + pages - 1, shared_bits; - - /* - * Since end_pfn <= pfn + bitmask, the only way bits - * higher than bitmask can differ in pfn and end_pfn is - * by carrying. This means after masking out bitmask, - * high bits starting with the first set bit in - * shared_bits are all equal in both pfn and end_pfn. - */ - shared_bits = ~(pfn ^ end_pfn) & ~bitmask; - mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG; - } - - /* - * Fallback to domain selective flush if no PSI support or - * the size is too big. - */ - if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap)) - iommu->flush.flush_iotlb(iommu, did, 0, 0, - DMA_TLB_DSI_FLUSH); - else - iommu->flush.flush_iotlb(iommu, did, addr | ih, mask, - DMA_TLB_PSI_FLUSH); -} - -static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, - struct dmar_domain *domain, - unsigned long pfn, unsigned int pages, - int ih, int map) -{ - unsigned int aligned_pages = __roundup_pow_of_two(pages); - unsigned int mask = ilog2(aligned_pages); - uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT; - u16 did = domain_id_iommu(domain, iommu); - - if (WARN_ON(!pages)) - return; - - if (ih) - ih = 1 << 6; - - if (domain->use_first_level) - domain_flush_pasid_iotlb(iommu, domain, addr, pages, ih); - else - __iommu_flush_iotlb_psi(iommu, did, pfn, pages, ih); - - if (!map) - iommu_flush_dev_iotlb(domain, addr, mask); -} - -/* - * Flush the relevant caches in nested translation if the domain - * also serves as a parent - */ -static void parent_domain_flush(struct dmar_domain *domain, - unsigned long pfn, - unsigned long pages, int ih) -{ - struct dmar_domain *s1_domain; - - spin_lock(&domain->s1_lock); - list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) { - struct device_domain_info *device_info; - struct iommu_domain_info *info; - unsigned long flags; - unsigned long i; - - xa_for_each(&s1_domain->iommu_array, i, info) - __iommu_flush_iotlb_psi(info->iommu, info->did, - pfn, pages, ih); - - if (!s1_domain->has_iotlb_device) - continue; - - spin_lock_irqsave(&s1_domain->lock, flags); - list_for_each_entry(device_info, &s1_domain->devices, link) - /* - * Address translation cache in device side caches the - * result of nested translation. There is no easy way - * to identify the exact set of nested translations - * affected by a change in S2. So just flush the entire - * device cache. - */ - __iommu_flush_dev_iotlb(device_info, 0, - MAX_AGAW_PFN_WIDTH); - spin_unlock_irqrestore(&s1_domain->lock, flags); - } - spin_unlock(&domain->s1_lock); -} - static void intel_flush_iotlb_all(struct iommu_domain *domain) { cache_tag_flush_all(to_dmar_domain(domain)); @@ -1991,9 +1840,7 @@ static void switch_to_super_page(struct dmar_domain *domain, unsigned long end_pfn, int level) { unsigned long lvl_pages = lvl_to_nr_pages(level); - struct iommu_domain_info *info; struct dma_pte *pte = NULL; - unsigned long i; while (start_pfn <= end_pfn) { if (!pte) @@ -2005,13 +1852,8 @@ static void switch_to_super_page(struct dmar_domain *domain, start_pfn + lvl_pages - 1, level + 1); - xa_for_each(&domain->iommu_array, i, info) - iommu_flush_iotlb_psi(info->iommu, domain, - start_pfn, lvl_pages, - 0, 0); - if (domain->nested_parent) - parent_domain_flush(domain, start_pfn, - lvl_pages, 0); + cache_tag_flush_range(domain, start_pfn << VTD_PAGE_SHIFT, + end_pfn << VTD_PAGE_SHIFT, 0); } pte++; @@ -3381,18 +3223,9 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb, case MEM_OFFLINE: case MEM_CANCEL_ONLINE: { - struct dmar_drhd_unit *drhd; - struct intel_iommu *iommu; LIST_HEAD(freelist); domain_unmap(si_domain, start_vpfn, last_vpfn, &freelist); - - rcu_read_lock(); - for_each_active_iommu(iommu, drhd) - iommu_flush_iotlb_psi(iommu, si_domain, - start_vpfn, mhp->nr_pages, - list_empty(&freelist), 0); - rcu_read_unlock(); put_pages_list(&freelist); } break; -- cgit v1.2.3 From 8ebc22366ed8bb2cbcf215bfa3fabb4f36d10a91 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 24 Apr 2024 15:16:40 +0800 Subject: iommu/vt-d: Use cache_tag_flush_range() in cache_invalidate_user The cache_invalidate_user callback is called to invalidate a range of caches for the affected user domain. Use cache_tag_flush_range() in this callback. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240416080656.60968-9-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.h | 6 ++++++ drivers/iommu/intel/nested.c | 50 +++----------------------------------------- 2 files changed, 9 insertions(+), 47 deletions(-) (limited to 'drivers/iommu/intel') diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index cb83b0995391..1d705a983dd7 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -1049,6 +1049,12 @@ static inline unsigned long aligned_nrpages(unsigned long host_addr, size_t size return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT; } +/* Return a size from number of VTD pages. */ +static inline unsigned long nrpages_to_size(unsigned long npages) +{ + return npages << VTD_PAGE_SHIFT; +} + /* Convert value to context PASID directory size field coding. */ #define context_pdts(pds) (((pds) & 0x7) << 9) diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c index 13406ee742bf..16a2bcf5cfeb 100644 --- a/drivers/iommu/intel/nested.c +++ b/drivers/iommu/intel/nested.c @@ -88,50 +88,6 @@ static void intel_nested_domain_free(struct iommu_domain *domain) kfree(dmar_domain); } -static void nested_flush_dev_iotlb(struct dmar_domain *domain, u64 addr, - unsigned int mask) -{ - struct device_domain_info *info; - unsigned long flags; - u16 sid, qdep; - - spin_lock_irqsave(&domain->lock, flags); - list_for_each_entry(info, &domain->devices, link) { - if (!info->ats_enabled) - continue; - sid = info->bus << 8 | info->devfn; - qdep = info->ats_qdep; - qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, - qdep, addr, mask); - quirk_extra_dev_tlb_flush(info, addr, mask, - IOMMU_NO_PASID, qdep); - } - spin_unlock_irqrestore(&domain->lock, flags); -} - -static void intel_nested_flush_cache(struct dmar_domain *domain, u64 addr, - u64 npages, bool ih) -{ - struct iommu_domain_info *info; - unsigned int mask; - unsigned long i; - - xa_for_each(&domain->iommu_array, i, info) - qi_flush_piotlb(info->iommu, - domain_id_iommu(domain, info->iommu), - IOMMU_NO_PASID, addr, npages, ih); - - if (!domain->has_iotlb_device) - return; - - if (npages == U64_MAX) - mask = 64 - VTD_PAGE_SHIFT; - else - mask = ilog2(__roundup_pow_of_two(npages)); - - nested_flush_dev_iotlb(domain, addr, mask); -} - static int intel_nested_cache_invalidate_user(struct iommu_domain *domain, struct iommu_user_data_array *array) { @@ -164,9 +120,9 @@ static int intel_nested_cache_invalidate_user(struct iommu_domain *domain, break; } - intel_nested_flush_cache(dmar_domain, inv_entry.addr, - inv_entry.npages, - inv_entry.flags & IOMMU_VTD_INV_FLAGS_LEAF); + cache_tag_flush_range(dmar_domain, inv_entry.addr, + inv_entry.addr + nrpages_to_size(inv_entry.npages) - 1, + inv_entry.flags & IOMMU_VTD_INV_FLAGS_LEAF); processed++; } -- cgit v1.2.3 From 4f609dbff51b3c98d5fc841758a20be74ab3f132 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 24 Apr 2024 15:16:41 +0800 Subject: iommu/vt-d: Use cache helpers in arch_invalidate_secondary_tlbs The arch_invalidate_secondary_tlbs callback is called in the SVA mm notification path. It invalidates all or a range of caches after the CPU page table is modified. Use the cache tag helps in this path. The mm_types defines vm_end as the first byte after the end address which is different from the iommu gather API, hence convert the end parameter from mm_types to iommu gather scheme before calling the cache_tag helper. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240416080656.60968-10-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.h | 1 + drivers/iommu/intel/svm.c | 81 ++++++--------------------------------------- 2 files changed, 11 insertions(+), 71 deletions(-) (limited to 'drivers/iommu/intel') diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 1d705a983dd7..fc0b4b048635 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -1166,6 +1166,7 @@ struct intel_svm { struct mm_struct *mm; u32 pasid; struct list_head devs; + struct dmar_domain *domain; }; #else static inline void intel_svm_check(struct intel_iommu *iommu) {} diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 2e627fbd5adb..5ba0a7baa455 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -168,88 +168,25 @@ void intel_svm_check(struct intel_iommu *iommu) iommu->flags |= VTD_FLAG_SVM_CAPABLE; } -static void __flush_svm_range_dev(struct intel_svm *svm, - struct intel_svm_dev *sdev, - unsigned long address, - unsigned long pages, int ih) -{ - struct device_domain_info *info = dev_iommu_priv_get(sdev->dev); - - if (WARN_ON(!pages)) - return; - - qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih); - if (info->ats_enabled) { - qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid, - svm->pasid, sdev->qdep, address, - order_base_2(pages)); - quirk_extra_dev_tlb_flush(info, address, order_base_2(pages), - svm->pasid, sdev->qdep); - } -} - -static void intel_flush_svm_range_dev(struct intel_svm *svm, - struct intel_svm_dev *sdev, - unsigned long address, - unsigned long pages, int ih) -{ - unsigned long shift = ilog2(__roundup_pow_of_two(pages)); - unsigned long align = (1ULL << (VTD_PAGE_SHIFT + shift)); - unsigned long start = ALIGN_DOWN(address, align); - unsigned long end = ALIGN(address + (pages << VTD_PAGE_SHIFT), align); - - while (start < end) { - __flush_svm_range_dev(svm, sdev, start, align >> VTD_PAGE_SHIFT, ih); - start += align; - } -} - -static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address, - unsigned long pages, int ih) -{ - struct intel_svm_dev *sdev; - - rcu_read_lock(); - list_for_each_entry_rcu(sdev, &svm->devs, list) - intel_flush_svm_range_dev(svm, sdev, address, pages, ih); - rcu_read_unlock(); -} - -static void intel_flush_svm_all(struct intel_svm *svm) -{ - struct device_domain_info *info; - struct intel_svm_dev *sdev; - - rcu_read_lock(); - list_for_each_entry_rcu(sdev, &svm->devs, list) { - info = dev_iommu_priv_get(sdev->dev); - - qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, 0, -1UL, 0); - if (info->ats_enabled) { - qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid, - svm->pasid, sdev->qdep, - 0, 64 - VTD_PAGE_SHIFT); - quirk_extra_dev_tlb_flush(info, 0, 64 - VTD_PAGE_SHIFT, - svm->pasid, sdev->qdep); - } - } - rcu_read_unlock(); -} - /* Pages have been freed at this point */ static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long start, unsigned long end) { struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); + struct dmar_domain *domain = svm->domain; if (start == 0 && end == -1UL) { - intel_flush_svm_all(svm); + cache_tag_flush_all(domain); return; } - intel_flush_svm_range(svm, start, - (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0); + /* + * The mm_types defines vm_end as the first byte after the end address, + * different from IOMMU subsystem using the last address of an address + * range. + */ + cache_tag_flush_range(domain, start, end - 1, 0); } static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) @@ -336,6 +273,7 @@ static int intel_svm_set_dev_pasid(struct iommu_domain *domain, INIT_LIST_HEAD_RCU(&svm->devs); svm->notifier.ops = &intel_mmuops; + svm->domain = to_dmar_domain(domain); ret = mmu_notifier_register(&svm->notifier, mm); if (ret) { kfree(svm); @@ -747,6 +685,7 @@ struct iommu_domain *intel_svm_domain_alloc(void) if (!domain) return NULL; domain->domain.ops = &intel_svm_domain_ops; + domain->use_first_level = true; INIT_LIST_HEAD(&domain->cache_tags); spin_lock_init(&domain->cache_lock); -- cgit v1.2.3 From deda9a7bf38fb9846ff3fb83179ca07437b1511c Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 24 Apr 2024 15:16:42 +0800 Subject: iommu/vt-d: Remove intel_svm_dev The intel_svm_dev data structure used in the sva implementation for the Intel IOMMU driver stores information about a device attached to an SVA domain. It is a duplicate of dev_pasid_info that serves the same purpose. Replace intel_svm_dev with dev_pasid_info and clean up the use of intel_svm_dev. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240416080656.60968-11-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 7 +-- drivers/iommu/intel/iommu.h | 15 +---- drivers/iommu/intel/svm.c | 130 +++++++++++++------------------------------- 3 files changed, 42 insertions(+), 110 deletions(-) (limited to 'drivers/iommu/intel') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index b2bd96e7f03d..7631d00cc882 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4387,11 +4387,8 @@ static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid) * notification. Before consolidating that code into iommu core, let * the intel sva code handle it. */ - if (domain->type == IOMMU_DOMAIN_SVA) { - intel_svm_remove_dev_pasid(dev, pasid); - cache_tag_unassign_domain(dmar_domain, dev, pasid); - goto out_tear_down; - } + if (domain->type == IOMMU_DOMAIN_SVA) + intel_svm_remove_dev_pasid(domain); spin_lock_irqsave(&dmar_domain->lock, flags); list_for_each_entry(curr, &dmar_domain->dev_pasids, link_domain) { diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index fc0b4b048635..f16b0d10543f 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -649,6 +649,7 @@ struct dmar_domain { struct list_head s2_link; }; }; + struct intel_svm *svm; struct iommu_domain domain; /* generic domain data structure for iommu core */ @@ -1149,23 +1150,13 @@ int intel_svm_finish_prq(struct intel_iommu *iommu); void intel_svm_page_response(struct device *dev, struct iopf_fault *evt, struct iommu_page_response *msg); struct iommu_domain *intel_svm_domain_alloc(void); -void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid); +void intel_svm_remove_dev_pasid(struct iommu_domain *domain); void intel_drain_pasid_prq(struct device *dev, u32 pasid); -struct intel_svm_dev { - struct list_head list; - struct rcu_head rcu; - struct device *dev; - struct intel_iommu *iommu; - u16 did; - u16 sid, qdep; -}; - struct intel_svm { struct mmu_notifier notifier; struct mm_struct *mm; u32 pasid; - struct list_head devs; struct dmar_domain *domain; }; #else @@ -1176,7 +1167,7 @@ static inline struct iommu_domain *intel_svm_domain_alloc(void) return NULL; } -static inline void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid) +static inline void intel_svm_remove_dev_pasid(struct iommu_domain *domain) { } #endif diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 5ba0a7baa455..e6568897042f 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -43,23 +43,6 @@ static void *pasid_private_find(ioasid_t pasid) return xa_load(&pasid_private_array, pasid); } -static struct intel_svm_dev * -svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev) -{ - struct intel_svm_dev *sdev = NULL, *t; - - rcu_read_lock(); - list_for_each_entry_rcu(t, &svm->devs, list) { - if (t->dev == dev) { - sdev = t; - break; - } - } - rcu_read_unlock(); - - return sdev; -} - int intel_svm_enable_prq(struct intel_iommu *iommu) { struct iopf_queue *iopfq; @@ -192,7 +175,10 @@ static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) { struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); - struct intel_svm_dev *sdev; + struct dmar_domain *domain = svm->domain; + struct dev_pasid_info *dev_pasid; + struct device_domain_info *info; + unsigned long flags; /* This might end up being called from exit_mmap(), *before* the page * tables are cleared. And __mmu_notifier_release() will delete us from @@ -206,11 +192,13 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) * page) so that we end up taking a fault that the hardware really * *has* to handle gracefully without affecting other processes. */ - rcu_read_lock(); - list_for_each_entry_rcu(sdev, &svm->devs, list) - intel_pasid_tear_down_entry(sdev->iommu, sdev->dev, - svm->pasid, true); - rcu_read_unlock(); + spin_lock_irqsave(&domain->lock, flags); + list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) { + info = dev_iommu_priv_get(dev_pasid->dev); + intel_pasid_tear_down_entry(info->iommu, dev_pasid->dev, + dev_pasid->pasid, true); + } + spin_unlock_irqrestore(&domain->lock, flags); } @@ -219,47 +207,17 @@ static const struct mmu_notifier_ops intel_mmuops = { .arch_invalidate_secondary_tlbs = intel_arch_invalidate_secondary_tlbs, }; -static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid, - struct intel_svm **rsvm, - struct intel_svm_dev **rsdev) -{ - struct intel_svm_dev *sdev = NULL; - struct intel_svm *svm; - - if (pasid == IOMMU_PASID_INVALID || pasid >= PASID_MAX) - return -EINVAL; - - svm = pasid_private_find(pasid); - if (IS_ERR(svm)) - return PTR_ERR(svm); - - if (!svm) - goto out; - - /* - * If we found svm for the PASID, there must be at least one device - * bond. - */ - if (WARN_ON(list_empty(&svm->devs))) - return -EINVAL; - sdev = svm_lookup_device_by_dev(svm, dev); - -out: - *rsvm = svm; - *rsdev = sdev; - - return 0; -} - static int intel_svm_set_dev_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid) { struct device_domain_info *info = dev_iommu_priv_get(dev); + struct dmar_domain *dmar_domain = to_dmar_domain(domain); struct intel_iommu *iommu = info->iommu; struct mm_struct *mm = domain->mm; - struct intel_svm_dev *sdev; + struct dev_pasid_info *dev_pasid; struct intel_svm *svm; unsigned long sflags; + unsigned long flags; int ret = 0; svm = pasid_private_find(pasid); @@ -270,7 +228,6 @@ static int intel_svm_set_dev_pasid(struct iommu_domain *domain, svm->pasid = pasid; svm->mm = mm; - INIT_LIST_HEAD_RCU(&svm->devs); svm->notifier.ops = &intel_mmuops; svm->domain = to_dmar_domain(domain); @@ -288,25 +245,17 @@ static int intel_svm_set_dev_pasid(struct iommu_domain *domain, } } - sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); - if (!sdev) { - ret = -ENOMEM; + dmar_domain->svm = svm; + dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL); + if (!dev_pasid) goto free_svm; - } - sdev->dev = dev; - sdev->iommu = iommu; - sdev->did = FLPT_DEFAULT_DID; - sdev->sid = PCI_DEVID(info->bus, info->devfn); - if (info->ats_enabled) { - sdev->qdep = info->ats_qdep; - if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS) - sdev->qdep = 0; - } + dev_pasid->dev = dev; + dev_pasid->pasid = pasid; ret = cache_tag_assign_domain(to_dmar_domain(domain), dev, pasid); if (ret) - goto free_sdev; + goto free_dev_pasid; /* Setup the pasid table: */ sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0; @@ -315,16 +264,18 @@ static int intel_svm_set_dev_pasid(struct iommu_domain *domain, if (ret) goto unassign_tag; - list_add_rcu(&sdev->list, &svm->devs); + spin_lock_irqsave(&dmar_domain->lock, flags); + list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids); + spin_unlock_irqrestore(&dmar_domain->lock, flags); return 0; unassign_tag: cache_tag_unassign_domain(to_dmar_domain(domain), dev, pasid); -free_sdev: - kfree(sdev); +free_dev_pasid: + kfree(dev_pasid); free_svm: - if (list_empty(&svm->devs)) { + if (list_empty(&dmar_domain->dev_pasids)) { mmu_notifier_unregister(&svm->notifier, mm); pasid_private_remove(pasid); kfree(svm); @@ -333,26 +284,17 @@ free_svm: return ret; } -void intel_svm_remove_dev_pasid(struct device *dev, u32 pasid) +void intel_svm_remove_dev_pasid(struct iommu_domain *domain) { - struct intel_svm_dev *sdev; - struct intel_svm *svm; - struct mm_struct *mm; - - if (pasid_to_svm_sdev(dev, pasid, &svm, &sdev)) - return; - mm = svm->mm; - - if (sdev) { - list_del_rcu(&sdev->list); - kfree_rcu(sdev, rcu); + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + struct intel_svm *svm = dmar_domain->svm; + struct mm_struct *mm = domain->mm; - if (list_empty(&svm->devs)) { - if (svm->notifier.ops) - mmu_notifier_unregister(&svm->notifier, mm); - pasid_private_remove(svm->pasid); - kfree(svm); - } + if (list_empty(&dmar_domain->dev_pasids)) { + if (svm->notifier.ops) + mmu_notifier_unregister(&svm->notifier, mm); + pasid_private_remove(svm->pasid); + kfree(svm); } } @@ -686,8 +628,10 @@ struct iommu_domain *intel_svm_domain_alloc(void) return NULL; domain->domain.ops = &intel_svm_domain_ops; domain->use_first_level = true; + INIT_LIST_HEAD(&domain->dev_pasids); INIT_LIST_HEAD(&domain->cache_tags); spin_lock_init(&domain->cache_lock); + spin_lock_init(&domain->lock); return &domain->domain; } -- cgit v1.2.3 From 886f816c2f01f768384e78f91b3c0ff66920b53f Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 24 Apr 2024 15:16:44 +0800 Subject: iommu/vt-d: Remove struct intel_svm The struct intel_svm was used for keeping attached devices info for sva domain. Since sva domain is a kind of iommu_domain, the struct dmar_domain should centralize all info of a sva domain, including the info of attached devices. Therefore, retire struct intel_svm and clean up the code. Besides, register mmu notifier callback in domain_alloc_sva() callback which allows the memory management notifier lifetime to follow the lifetime of the iommu_domain. Call mmu_notifier_put() in the domain free and defer the real free to the mmu free_notifier callback. Co-developed-by: Tina Zhang Signed-off-by: Tina Zhang Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240416080656.60968-13-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 11 +---- drivers/iommu/intel/iommu.h | 26 +++++------- drivers/iommu/intel/svm.c | 99 ++++++++++++--------------------------------- 3 files changed, 37 insertions(+), 99 deletions(-) (limited to 'drivers/iommu/intel') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 7631d00cc882..916cdb65d849 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3683,8 +3683,6 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) return domain; case IOMMU_DOMAIN_IDENTITY: return &si_domain->domain; - case IOMMU_DOMAIN_SVA: - return intel_svm_domain_alloc(); default: return NULL; } @@ -4382,14 +4380,6 @@ static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid) goto out_tear_down; dmar_domain = to_dmar_domain(domain); - /* - * The SVA implementation needs to handle its own stuffs like the mm - * notification. Before consolidating that code into iommu core, let - * the intel sva code handle it. - */ - if (domain->type == IOMMU_DOMAIN_SVA) - intel_svm_remove_dev_pasid(domain); - spin_lock_irqsave(&dmar_domain->lock, flags); list_for_each_entry(curr, &dmar_domain->dev_pasids, link_domain) { if (curr->dev == dev && curr->pasid == pasid) { @@ -4624,6 +4614,7 @@ const struct iommu_ops intel_iommu_ops = { .hw_info = intel_iommu_hw_info, .domain_alloc = intel_iommu_domain_alloc, .domain_alloc_user = intel_iommu_domain_alloc_user, + .domain_alloc_sva = intel_svm_domain_alloc, .probe_device = intel_iommu_probe_device, .probe_finalize = intel_iommu_probe_finalize, .release_device = intel_iommu_release_device, diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index f16b0d10543f..c9eef464cf5c 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -648,8 +648,12 @@ struct dmar_domain { /* link to parent domain siblings */ struct list_head s2_link; }; + + /* SVA domain */ + struct { + struct mmu_notifier notifier; + }; }; - struct intel_svm *svm; struct iommu_domain domain; /* generic domain data structure for iommu core */ @@ -1149,26 +1153,16 @@ int intel_svm_enable_prq(struct intel_iommu *iommu); int intel_svm_finish_prq(struct intel_iommu *iommu); void intel_svm_page_response(struct device *dev, struct iopf_fault *evt, struct iommu_page_response *msg); -struct iommu_domain *intel_svm_domain_alloc(void); -void intel_svm_remove_dev_pasid(struct iommu_domain *domain); +struct iommu_domain *intel_svm_domain_alloc(struct device *dev, + struct mm_struct *mm); void intel_drain_pasid_prq(struct device *dev, u32 pasid); - -struct intel_svm { - struct mmu_notifier notifier; - struct mm_struct *mm; - u32 pasid; - struct dmar_domain *domain; -}; #else static inline void intel_svm_check(struct intel_iommu *iommu) {} static inline void intel_drain_pasid_prq(struct device *dev, u32 pasid) {} -static inline struct iommu_domain *intel_svm_domain_alloc(void) -{ - return NULL; -} - -static inline void intel_svm_remove_dev_pasid(struct iommu_domain *domain) +static inline struct iommu_domain *intel_svm_domain_alloc(struct device *dev, + struct mm_struct *mm) { + return ERR_PTR(-ENODEV); } #endif diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index e6568897042f..268a0082d37f 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -26,23 +26,6 @@ static irqreturn_t prq_event_thread(int irq, void *d); -static DEFINE_XARRAY_ALLOC(pasid_private_array); -static int pasid_private_add(ioasid_t pasid, void *priv) -{ - return xa_alloc(&pasid_private_array, &pasid, priv, - XA_LIMIT(pasid, pasid), GFP_ATOMIC); -} - -static void pasid_private_remove(ioasid_t pasid) -{ - xa_erase(&pasid_private_array, pasid); -} - -static void *pasid_private_find(ioasid_t pasid) -{ - return xa_load(&pasid_private_array, pasid); -} - int intel_svm_enable_prq(struct intel_iommu *iommu) { struct iopf_queue *iopfq; @@ -156,10 +139,9 @@ static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long start, unsigned long end) { - struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); - struct dmar_domain *domain = svm->domain; + struct dmar_domain *domain = container_of(mn, struct dmar_domain, notifier); - if (start == 0 && end == -1UL) { + if (start == 0 && end == ULONG_MAX) { cache_tag_flush_all(domain); return; } @@ -174,8 +156,7 @@ static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) { - struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); - struct dmar_domain *domain = svm->domain; + struct dmar_domain *domain = container_of(mn, struct dmar_domain, notifier); struct dev_pasid_info *dev_pasid; struct device_domain_info *info; unsigned long flags; @@ -202,9 +183,15 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) } +static void intel_mm_free_notifier(struct mmu_notifier *mn) +{ + kfree(container_of(mn, struct dmar_domain, notifier)); +} + static const struct mmu_notifier_ops intel_mmuops = { .release = intel_mm_release, .arch_invalidate_secondary_tlbs = intel_arch_invalidate_secondary_tlbs, + .free_notifier = intel_mm_free_notifier, }; static int intel_svm_set_dev_pasid(struct iommu_domain *domain, @@ -215,40 +202,13 @@ static int intel_svm_set_dev_pasid(struct iommu_domain *domain, struct intel_iommu *iommu = info->iommu; struct mm_struct *mm = domain->mm; struct dev_pasid_info *dev_pasid; - struct intel_svm *svm; unsigned long sflags; unsigned long flags; int ret = 0; - svm = pasid_private_find(pasid); - if (!svm) { - svm = kzalloc(sizeof(*svm), GFP_KERNEL); - if (!svm) - return -ENOMEM; - - svm->pasid = pasid; - svm->mm = mm; - - svm->notifier.ops = &intel_mmuops; - svm->domain = to_dmar_domain(domain); - ret = mmu_notifier_register(&svm->notifier, mm); - if (ret) { - kfree(svm); - return ret; - } - - ret = pasid_private_add(svm->pasid, svm); - if (ret) { - mmu_notifier_unregister(&svm->notifier, mm); - kfree(svm); - return ret; - } - } - - dmar_domain->svm = svm; dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL); if (!dev_pasid) - goto free_svm; + return -ENOMEM; dev_pasid->dev = dev; dev_pasid->pasid = pasid; @@ -274,30 +234,10 @@ unassign_tag: cache_tag_unassign_domain(to_dmar_domain(domain), dev, pasid); free_dev_pasid: kfree(dev_pasid); -free_svm: - if (list_empty(&dmar_domain->dev_pasids)) { - mmu_notifier_unregister(&svm->notifier, mm); - pasid_private_remove(pasid); - kfree(svm); - } return ret; } -void intel_svm_remove_dev_pasid(struct iommu_domain *domain) -{ - struct dmar_domain *dmar_domain = to_dmar_domain(domain); - struct intel_svm *svm = dmar_domain->svm; - struct mm_struct *mm = domain->mm; - - if (list_empty(&dmar_domain->dev_pasids)) { - if (svm->notifier.ops) - mmu_notifier_unregister(&svm->notifier, mm); - pasid_private_remove(svm->pasid); - kfree(svm); - } -} - /* Page request queue descriptor */ struct page_req_dsc { union { @@ -611,7 +551,10 @@ void intel_svm_page_response(struct device *dev, struct iopf_fault *evt, static void intel_svm_domain_free(struct iommu_domain *domain) { - kfree(to_dmar_domain(domain)); + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + + /* dmar_domain free is deferred to the mmu free_notifier callback. */ + mmu_notifier_put(&dmar_domain->notifier); } static const struct iommu_domain_ops intel_svm_domain_ops = { @@ -619,13 +562,16 @@ static const struct iommu_domain_ops intel_svm_domain_ops = { .free = intel_svm_domain_free }; -struct iommu_domain *intel_svm_domain_alloc(void) +struct iommu_domain *intel_svm_domain_alloc(struct device *dev, + struct mm_struct *mm) { struct dmar_domain *domain; + int ret; domain = kzalloc(sizeof(*domain), GFP_KERNEL); if (!domain) - return NULL; + return ERR_PTR(-ENOMEM); + domain->domain.ops = &intel_svm_domain_ops; domain->use_first_level = true; INIT_LIST_HEAD(&domain->dev_pasids); @@ -633,5 +579,12 @@ struct iommu_domain *intel_svm_domain_alloc(void) spin_lock_init(&domain->cache_lock); spin_lock_init(&domain->lock); + domain->notifier.ops = &intel_mmuops; + ret = mmu_notifier_register(&domain->notifier, mm); + if (ret) { + kfree(domain); + return ERR_PTR(ret); + } + return &domain->domain; } -- cgit v1.2.3 From b67483b3c44eaef2f771fa4c712e13f452675a67 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 19 Apr 2024 17:54:45 +0100 Subject: iommu/dma: Centralise iommu_setup_dma_ops() It's somewhat hard to see, but arm64's arch_setup_dma_ops() should only ever call iommu_setup_dma_ops() after a successful iommu_probe_device(), which means there should be no harm in achieving the same order of operations by running it off the back of iommu_probe_device() itself. This then puts it in line with the x86 and s390 .probe_finalize bodges, letting us pull it all into the main flow properly. As a bonus this lets us fold in and de-scope the PCI workaround setup as well. At this point we can also then pull the call up inside the group mutex, and avoid having to think about whether iommu_group_store_type() could theoretically race and free the domain if iommu_setup_dma_ops() ran just *before* iommu_device_use_default_domain() claims it... Furthermore we replace one .probe_finalize call completely, since the only remaining implementations are now one which only needs to run once for the initial boot-time probe, and two which themselves render that path unreachable. This leaves us a big step closer to realistically being able to unpick the variety of different things that iommu_setup_dma_ops() has been muddling together, and further streamline iommu-dma into core API flows in future. Reviewed-by: Lu Baolu # For Intel IOMMU Reviewed-by: Jason Gunthorpe Tested-by: Hanjun Guo Signed-off-by: Robin Murphy Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/bebea331c1d688b34d9862eefd5ede47503961b8.1713523152.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- arch/arm64/mm/dma-mapping.c | 2 -- drivers/iommu/amd/iommu.c | 8 -------- drivers/iommu/dma-iommu.c | 18 ++++++------------ drivers/iommu/dma-iommu.h | 14 ++++++-------- drivers/iommu/intel/iommu.c | 7 ------- drivers/iommu/iommu.c | 20 +++++++------------- drivers/iommu/s390-iommu.c | 6 ------ drivers/iommu/virtio-iommu.c | 10 ---------- include/linux/iommu.h | 7 ------- 9 files changed, 19 insertions(+), 73 deletions(-) (limited to 'drivers/iommu/intel') diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 61886e43e3a1..313d8938a2f0 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -58,8 +58,6 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, ARCH_DMA_MINALIGN, cls); dev->dma_coherent = coherent; - if (device_iommu_mapped(dev)) - iommu_setup_dma_ops(dev, dma_base, dma_base + size - 1); xen_setup_dma_ops(dev); } diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index d35c1b8c8e65..085abf098fa9 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2175,13 +2175,6 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev) return iommu_dev; } -static void amd_iommu_probe_finalize(struct device *dev) -{ - /* Domains are initialized for this device - have a look what we ended up with */ - set_dma_ops(dev, NULL); - iommu_setup_dma_ops(dev, 0, U64_MAX); -} - static void amd_iommu_release_device(struct device *dev) { struct amd_iommu *iommu; @@ -2784,7 +2777,6 @@ const struct iommu_ops amd_iommu_ops = { .domain_alloc_user = amd_iommu_domain_alloc_user, .probe_device = amd_iommu_probe_device, .release_device = amd_iommu_release_device, - .probe_finalize = amd_iommu_probe_finalize, .device_group = amd_iommu_device_group, .get_resv_regions = amd_iommu_get_resv_regions, .is_attach_deferred = amd_iommu_is_attach_deferred, diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index c827ed690f1f..bc1490bd4f7a 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1741,25 +1741,20 @@ static const struct dma_map_ops iommu_dma_ops = { .max_mapping_size = iommu_dma_max_mapping_size, }; -/* - * The IOMMU core code allocates the default DMA domain, which the underlying - * IOMMU driver needs to support via the dma-iommu layer. - */ -void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit) +void iommu_setup_dma_ops(struct device *dev) { struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - if (!domain) - goto out_err; + if (dev_is_pci(dev)) + dev->iommu->pci_32bit_workaround = !iommu_dma_forcedac; - /* - * The IOMMU core code allocates the default DMA domain, which the - * underlying IOMMU driver needs to support via the dma-iommu layer. - */ if (iommu_is_dma_domain(domain)) { if (iommu_dma_init_domain(domain, dev)) goto out_err; dev->dma_ops = &iommu_dma_ops; + } else if (dev->dma_ops == &iommu_dma_ops) { + /* Clean up if we've switched *from* a DMA domain */ + dev->dma_ops = NULL; } return; @@ -1767,7 +1762,6 @@ out_err: pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", dev_name(dev)); } -EXPORT_SYMBOL_GPL(iommu_setup_dma_ops); static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, phys_addr_t msi_addr, struct iommu_domain *domain) diff --git a/drivers/iommu/dma-iommu.h b/drivers/iommu/dma-iommu.h index c829f1f82a99..c12d63457c76 100644 --- a/drivers/iommu/dma-iommu.h +++ b/drivers/iommu/dma-iommu.h @@ -9,6 +9,8 @@ #ifdef CONFIG_IOMMU_DMA +void iommu_setup_dma_ops(struct device *dev); + int iommu_get_dma_cookie(struct iommu_domain *domain); void iommu_put_dma_cookie(struct iommu_domain *domain); @@ -17,13 +19,13 @@ int iommu_dma_init_fq(struct iommu_domain *domain); void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list); extern bool iommu_dma_forcedac; -static inline void iommu_dma_set_pci_32bit_workaround(struct device *dev) -{ - dev->iommu->pci_32bit_workaround = !iommu_dma_forcedac; -} #else /* CONFIG_IOMMU_DMA */ +static inline void iommu_setup_dma_ops(struct device *dev) +{ +} + static inline int iommu_dma_init_fq(struct iommu_domain *domain) { return -EINVAL; @@ -42,9 +44,5 @@ static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_he { } -static inline void iommu_dma_set_pci_32bit_workaround(struct device *dev) -{ -} - #endif /* CONFIG_IOMMU_DMA */ #endif /* __DMA_IOMMU_H */ diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 45c75a8a0ef5..64adfdadeb49 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4349,12 +4349,6 @@ static void intel_iommu_release_device(struct device *dev) set_dma_ops(dev, NULL); } -static void intel_iommu_probe_finalize(struct device *dev) -{ - set_dma_ops(dev, NULL); - iommu_setup_dma_ops(dev, 0, U64_MAX); -} - static void intel_iommu_get_resv_regions(struct device *device, struct list_head *head) { @@ -4834,7 +4828,6 @@ const struct iommu_ops intel_iommu_ops = { .domain_alloc = intel_iommu_domain_alloc, .domain_alloc_user = intel_iommu_domain_alloc_user, .probe_device = intel_iommu_probe_device, - .probe_finalize = intel_iommu_probe_finalize, .release_device = intel_iommu_release_device, .get_resv_regions = intel_iommu_get_resv_regions, .device_group = intel_iommu_device_group, diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 3183b0ed4cdb..9df7cc75c1bc 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -581,10 +581,11 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list if (list_empty(&group->entry)) list_add_tail(&group->entry, group_list); } - mutex_unlock(&group->mutex); - if (dev_is_pci(dev)) - iommu_dma_set_pci_32bit_workaround(dev); + if (group->default_domain) + iommu_setup_dma_ops(dev); + + mutex_unlock(&group->mutex); return 0; @@ -1828,6 +1829,8 @@ int bus_iommu_probe(const struct bus_type *bus) mutex_unlock(&group->mutex); return ret; } + for_each_group_device(group, gdev) + iommu_setup_dma_ops(gdev->dev); mutex_unlock(&group->mutex); /* @@ -3066,18 +3069,9 @@ static ssize_t iommu_group_store_type(struct iommu_group *group, if (ret) goto out_unlock; - /* - * Release the mutex here because ops->probe_finalize() call-back of - * some vendor IOMMU drivers calls arm_iommu_attach_device() which - * in-turn might call back into IOMMU core code, where it tries to take - * group->mutex, resulting in a deadlock. - */ - mutex_unlock(&group->mutex); - /* Make sure dma_ops is appropriatley set */ for_each_group_device(group, gdev) - iommu_group_do_probe_finalize(gdev->dev); - return count; + iommu_setup_dma_ops(gdev->dev); out_unlock: mutex_unlock(&group->mutex); diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 9a5196f523de..d8eaa7ea380b 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -695,11 +695,6 @@ static size_t s390_iommu_unmap_pages(struct iommu_domain *domain, return size; } -static void s390_iommu_probe_finalize(struct device *dev) -{ - iommu_setup_dma_ops(dev, 0, U64_MAX); -} - struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev) { if (!zdev || !zdev->s390_domain) @@ -785,7 +780,6 @@ static const struct iommu_ops s390_iommu_ops = { .capable = s390_iommu_capable, .domain_alloc_paging = s390_domain_alloc_paging, .probe_device = s390_iommu_probe_device, - .probe_finalize = s390_iommu_probe_finalize, .release_device = s390_iommu_release_device, .device_group = generic_device_group, .pgsize_bitmap = SZ_4K, diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 04048f64a2c0..8e776f6c6e35 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -1025,15 +1025,6 @@ err_free_dev: return ERR_PTR(ret); } -static void viommu_probe_finalize(struct device *dev) -{ -#ifndef CONFIG_ARCH_HAS_SETUP_DMA_OPS - /* First clear the DMA ops in case we're switching from a DMA domain */ - set_dma_ops(dev, NULL); - iommu_setup_dma_ops(dev, 0, U64_MAX); -#endif -} - static void viommu_release_device(struct device *dev) { struct viommu_endpoint *vdev = dev_iommu_priv_get(dev); @@ -1073,7 +1064,6 @@ static struct iommu_ops viommu_ops = { .capable = viommu_capable, .domain_alloc = viommu_domain_alloc, .probe_device = viommu_probe_device, - .probe_finalize = viommu_probe_finalize, .release_device = viommu_release_device, .device_group = viommu_device_group, .get_resv_regions = viommu_get_resv_regions, diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 40dd439307e8..86f0bff3cc95 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -1446,9 +1446,6 @@ static inline void iommu_debugfs_setup(void) {} #ifdef CONFIG_IOMMU_DMA #include -/* Setup call for arch DMA mapping code */ -void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit); - int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base); int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr); @@ -1459,10 +1456,6 @@ void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg); struct msi_desc; struct msi_msg; -static inline void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit) -{ -} - static inline int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base) { return -ENODEV; -- cgit v1.2.3 From ba00196ca41c4f6d0b0d3c4a6748a133577abe05 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 3 May 2024 21:36:02 +0800 Subject: iommu/vt-d: Decouple igfx_off from graphic identity mapping A kernel command called igfx_off was introduced in commit ("Intel IOMMU: Intel IOMMU driver"). This command allows the user to disable the IOMMU dedicated to SOC-integrated graphic devices. Commit <9452618e7462> ("iommu/intel: disable DMAR for g4x integrated gfx") used this mechanism to disable the graphic-dedicated IOMMU for some problematic devices. Later, more problematic graphic devices were added to the list by commit <1f76249cc3beb> ("iommu/vt-d: Declare Broadwell igfx dmar support snafu"). On the other hand, commit <19943b0e30b05> ("intel-iommu: Unify hardware and software passthrough support") uses the identity domain for graphic devices if CONFIG_DMAR_BROKEN_GFX_WA is selected. + if (iommu_pass_through) + iommu_identity_mapping = 1; +#ifdef CONFIG_DMAR_BROKEN_GFX_WA + else + iommu_identity_mapping = 2; +#endif ... static int iommu_should_identity_map(struct pci_dev *pdev, int startup) { + if (iommu_identity_mapping == 2) + return IS_GFX_DEVICE(pdev); ... In the following driver evolution, CONFIG_DMAR_BROKEN_GFX_WA and quirk_iommu_igfx() are mixed together, causing confusion in the driver's device_def_domain_type callback. On one hand, dmar_map_gfx is used to turn off the graphic-dedicated IOMMU as a workaround for some buggy hardware; on the other hand, for those graphic devices, IDENTITY mapping is required for the IOMMU core. Commit <4b8d18c0c986> "iommu/vt-d: Remove INTEL_IOMMU_BROKEN_GFX_WA" has removed the CONFIG_DMAR_BROKEN_GFX_WA option, so the IDENTITY_DOMAIN requirement for graphic devices is no longer needed. Therefore, this requirement can be removed from device_def_domain_type() and igfx_off can be made independent. Fixes: 4b8d18c0c986 ("iommu/vt-d: Remove INTEL_IOMMU_BROKEN_GFX_WA") Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240428032020.214616-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) (limited to 'drivers/iommu/intel') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 916cdb65d849..e8e62ff1a65e 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -216,12 +216,11 @@ int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON); int intel_iommu_enabled = 0; EXPORT_SYMBOL_GPL(intel_iommu_enabled); -static int dmar_map_gfx = 1; static int intel_iommu_superpage = 1; static int iommu_identity_mapping; static int iommu_skip_te_disable; +static int disable_igfx_iommu; -#define IDENTMAP_GFX 2 #define IDENTMAP_AZALIA 4 const struct iommu_ops intel_iommu_ops; @@ -260,7 +259,7 @@ static int __init intel_iommu_setup(char *str) no_platform_optin = 1; pr_info("IOMMU disabled\n"); } else if (!strncmp(str, "igfx_off", 8)) { - dmar_map_gfx = 0; + disable_igfx_iommu = 1; pr_info("Disable GFX device mapping\n"); } else if (!strncmp(str, "forcedac", 8)) { pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n"); @@ -2211,9 +2210,6 @@ static int device_def_domain_type(struct device *dev) if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev)) return IOMMU_DOMAIN_IDENTITY; - - if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev)) - return IOMMU_DOMAIN_IDENTITY; } return 0; @@ -2514,9 +2510,6 @@ static int __init init_dmars(void) iommu_set_root_entry(iommu); } - if (!dmar_map_gfx) - iommu_identity_mapping |= IDENTMAP_GFX; - check_tylersburg_isoch(); ret = si_domain_init(hw_pass_through); @@ -2607,7 +2600,7 @@ static void __init init_no_remapping_devices(void) /* This IOMMU has *only* gfx devices. Either bypass it or set the gfx_mapped flag, as appropriate */ drhd->gfx_dedicated = 1; - if (!dmar_map_gfx) + if (disable_igfx_iommu) drhd->ignored = 1; } } @@ -4649,7 +4642,7 @@ static void quirk_iommu_igfx(struct pci_dev *dev) return; pci_info(dev, "Disabling IOMMU for graphics on this chipset\n"); - dmar_map_gfx = 0; + disable_igfx_iommu = 1; } /* G4x/GM45 integrated gfx dmar support is totally busted. */ @@ -4730,8 +4723,8 @@ static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev) if (!(ggc & GGC_MEMORY_VT_ENABLED)) { pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n"); - dmar_map_gfx = 0; - } else if (dmar_map_gfx) { + disable_igfx_iommu = 1; + } else if (!disable_igfx_iommu) { /* we have to ensure the gfx device is idle before we flush */ pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n"); iommu_set_dma_strict(); -- cgit v1.2.3