From be4f5995875353efd5c438375b2a68453f137b6e Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Thu, 18 Jan 2024 09:01:00 +0000 Subject: iommu/amd: Remove unused PPR_* macros Commit 5a0b11a180a ("iommu/amd: Remove iommu_v2 module") missed to remove PPR_* macros. Remove these macros as its not used anymore. No functional change intended. Signed-off-by: Vasant Hegde Reviewed-by: Suravee Suthikulpanit Reviewed-by: Alejandro Jimenez Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240118090105.5864-2-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 8b3601f285fd..2cef9796f09c 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -77,10 +77,6 @@ static inline int amd_iommu_create_irq_domain(struct amd_iommu *iommu) } #endif -#define PPR_SUCCESS 0x0 -#define PPR_INVALID 0x1 -#define PPR_FAILURE 0xf - int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid, int status, int tag); -- cgit v1.2.3 From a4086637664926b52f5ecd087b3957f935ce48fb Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Thu, 18 Jan 2024 09:01:01 +0000 Subject: iommu/amd: Remove unused IOVA_* macro These macros are not used after commit ac6d704679d343 ("iommu/dma: Pass address limit rather than size to iommu_setup_dma_ops()"). No functional change intended. Signed-off-by: Vasant Hegde Cc: Lu Baolu Reviewed-by: Suravee Suthikulpanit Reviewed-by: Alejandro Jimenez Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240118090105.5864-3-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 4283dd8191f0..7fe3c9d44906 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -45,10 +45,6 @@ #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) -/* IO virtual address start page frame number */ -#define IOVA_START_PFN (1) -#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) - /* Reserved IOVA ranges */ #define MSI_RANGE_START (0xfee00000) #define MSI_RANGE_END (0xfeefffff) -- cgit v1.2.3 From 2edf056f57f59cf35980786f1ad4f681b9a5bebf Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Thu, 18 Jan 2024 09:01:02 +0000 Subject: iommu/amd: Remove unused APERTURE_* macros These macros are not used after commit 518d9b450387 ("iommu/amd: Remove special mapping code for dma_ops path"). No functional change intended. Signed-off-by: Vasant Hegde Reviewed-by: Suravee Suthikulpanit Reviewed-by: Alejandro Jimenez Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240118090105.5864-4-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 809d74faa1a5..fdf7807280bd 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -513,14 +513,6 @@ extern struct kmem_cache *amd_iommu_irq_cache; #define for_each_iommu_safe(iommu, next) \ list_for_each_entry_safe((iommu), (next), &amd_iommu_list, list) -#define APERTURE_RANGE_SHIFT 27 /* 128 MB */ -#define APERTURE_RANGE_SIZE (1ULL << APERTURE_RANGE_SHIFT) -#define APERTURE_RANGE_PAGES (APERTURE_RANGE_SIZE >> PAGE_SHIFT) -#define APERTURE_MAX_RANGES 32 /* allows 4GB of DMA address space */ -#define APERTURE_RANGE_INDEX(a) ((a) >> APERTURE_RANGE_SHIFT) -#define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL) - - struct amd_iommu; struct iommu_domain; struct irq_domain; -- cgit v1.2.3 From e7b3533c81386464dfdcb01193075f8a9557083a Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 5 Feb 2024 15:32:39 +0000 Subject: iommu/iova: Tidy up iova_cache_get() failure Failure handling in iova_cache_get() is a little messy, and we'd like to add some more to it, so let's tidy up a bit first. By leaving the hotplug handler until last we can take advantage of kmem_cache_destroy() being NULL-safe to have a single cleanup label. We can also improve the error reporting, noting that kmem_cache_create() already screams if it fails, so that one is redundant. Signed-off-by: Robin Murphy Acked-by: David Rientjes Reviewed-by: Pasha Tatashin Reviewed-by: John Garry Reviewed-by: Jerry Snitselaar Link: https://lore.kernel.org/r/ae4a3bda2d6a9b738221553c838d30473bd624e7.1707144953.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index d30e453d0fb4..cf95001d85c0 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -254,26 +254,20 @@ static void free_iova_mem(struct iova *iova) int iova_cache_get(void) { + int err = -ENOMEM; + mutex_lock(&iova_cache_mutex); if (!iova_cache_users) { - int ret; - - ret = cpuhp_setup_state_multi(CPUHP_IOMMU_IOVA_DEAD, "iommu/iova:dead", NULL, - iova_cpuhp_dead); - if (ret) { - mutex_unlock(&iova_cache_mutex); - pr_err("Couldn't register cpuhp handler\n"); - return ret; - } + iova_cache = kmem_cache_create("iommu_iova", sizeof(struct iova), 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!iova_cache) + goto out_err; - iova_cache = kmem_cache_create( - "iommu_iova", sizeof(struct iova), 0, - SLAB_HWCACHE_ALIGN, NULL); - if (!iova_cache) { - cpuhp_remove_multi_state(CPUHP_IOMMU_IOVA_DEAD); - mutex_unlock(&iova_cache_mutex); - pr_err("Couldn't create iova cache\n"); - return -ENOMEM; + err = cpuhp_setup_state_multi(CPUHP_IOMMU_IOVA_DEAD, "iommu/iova:dead", + NULL, iova_cpuhp_dead); + if (err) { + pr_err("IOVA: Couldn't register cpuhp handler: %pe\n", ERR_PTR(err)); + goto out_err; } } @@ -281,6 +275,11 @@ int iova_cache_get(void) mutex_unlock(&iova_cache_mutex); return 0; + +out_err: + kmem_cache_destroy(iova_cache); + mutex_unlock(&iova_cache_mutex); + return err; } EXPORT_SYMBOL_GPL(iova_cache_get); -- cgit v1.2.3 From 7f845d8b2eed0986a03a777d4956b52a57007974 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 5 Feb 2024 15:32:40 +0000 Subject: iommu/iova: Reorganise some code The iova_cache_{get,put}() calls really represent top-level lifecycle management for the whole IOVA library, so it's long been rather confusing to have them buried right in the middle of the allocator implementation details. Move them to a more expected position at the end of the file, where it will then also be easier to expand them. With this, we can also move the rcache hotplug handler (plus another stray function) into the rcache portion of the file. Signed-off-by: Robin Murphy Acked-by: David Rientjes Reviewed-by: Pasha Tatashin Reviewed-by: John Garry Reviewed-by: Jerry Snitselaar Link: https://lore.kernel.org/r/d4753562f4faa0e6b3aeebcbf88fdb60cc22d715.1707144953.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 128 +++++++++++++++++++++++++-------------------------- 1 file changed, 64 insertions(+), 64 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index cf95001d85c0..b5de865ee50b 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -24,24 +24,8 @@ static bool iova_rcache_insert(struct iova_domain *iovad, static unsigned long iova_rcache_get(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn); -static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad); static void free_iova_rcaches(struct iova_domain *iovad); - -unsigned long iova_rcache_range(void) -{ - return PAGE_SIZE << (IOVA_RANGE_CACHE_MAX_SIZE - 1); -} - -static int iova_cpuhp_dead(unsigned int cpu, struct hlist_node *node) -{ - struct iova_domain *iovad; - - iovad = hlist_entry_safe(node, struct iova_domain, cpuhp_dead); - - free_cpu_cached_iovas(cpu, iovad); - return 0; -} - +static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad); static void free_global_cached_iovas(struct iova_domain *iovad); static struct iova *to_iova(struct rb_node *node) @@ -252,53 +236,6 @@ static void free_iova_mem(struct iova *iova) kmem_cache_free(iova_cache, iova); } -int iova_cache_get(void) -{ - int err = -ENOMEM; - - mutex_lock(&iova_cache_mutex); - if (!iova_cache_users) { - iova_cache = kmem_cache_create("iommu_iova", sizeof(struct iova), 0, - SLAB_HWCACHE_ALIGN, NULL); - if (!iova_cache) - goto out_err; - - err = cpuhp_setup_state_multi(CPUHP_IOMMU_IOVA_DEAD, "iommu/iova:dead", - NULL, iova_cpuhp_dead); - if (err) { - pr_err("IOVA: Couldn't register cpuhp handler: %pe\n", ERR_PTR(err)); - goto out_err; - } - } - - iova_cache_users++; - mutex_unlock(&iova_cache_mutex); - - return 0; - -out_err: - kmem_cache_destroy(iova_cache); - mutex_unlock(&iova_cache_mutex); - return err; -} -EXPORT_SYMBOL_GPL(iova_cache_get); - -void iova_cache_put(void) -{ - mutex_lock(&iova_cache_mutex); - if (WARN_ON(!iova_cache_users)) { - mutex_unlock(&iova_cache_mutex); - return; - } - iova_cache_users--; - if (!iova_cache_users) { - cpuhp_remove_multi_state(CPUHP_IOMMU_IOVA_DEAD); - kmem_cache_destroy(iova_cache); - } - mutex_unlock(&iova_cache_mutex); -} -EXPORT_SYMBOL_GPL(iova_cache_put); - /** * alloc_iova - allocates an iova * @iovad: - iova domain in question @@ -653,6 +590,11 @@ struct iova_rcache { struct delayed_work work; }; +unsigned long iova_rcache_range(void) +{ + return PAGE_SIZE << (IOVA_RANGE_CACHE_MAX_SIZE - 1); +} + static struct iova_magazine *iova_magazine_alloc(gfp_t flags) { struct iova_magazine *mag; @@ -989,5 +931,63 @@ static void free_global_cached_iovas(struct iova_domain *iovad) spin_unlock_irqrestore(&rcache->lock, flags); } } + +static int iova_cpuhp_dead(unsigned int cpu, struct hlist_node *node) +{ + struct iova_domain *iovad; + + iovad = hlist_entry_safe(node, struct iova_domain, cpuhp_dead); + + free_cpu_cached_iovas(cpu, iovad); + return 0; +} + +int iova_cache_get(void) +{ + int err = -ENOMEM; + + mutex_lock(&iova_cache_mutex); + if (!iova_cache_users) { + iova_cache = kmem_cache_create("iommu_iova", sizeof(struct iova), 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!iova_cache) + goto out_err; + + err = cpuhp_setup_state_multi(CPUHP_IOMMU_IOVA_DEAD, "iommu/iova:dead", + NULL, iova_cpuhp_dead); + if (err) { + pr_err("IOVA: Couldn't register cpuhp handler: %pe\n", ERR_PTR(err)); + goto out_err; + } + } + + iova_cache_users++; + mutex_unlock(&iova_cache_mutex); + + return 0; + +out_err: + kmem_cache_destroy(iova_cache); + mutex_unlock(&iova_cache_mutex); + return err; +} +EXPORT_SYMBOL_GPL(iova_cache_get); + +void iova_cache_put(void) +{ + mutex_lock(&iova_cache_mutex); + if (WARN_ON(!iova_cache_users)) { + mutex_unlock(&iova_cache_mutex); + return; + } + iova_cache_users--; + if (!iova_cache_users) { + cpuhp_remove_multi_state(CPUHP_IOMMU_IOVA_DEAD); + kmem_cache_destroy(iova_cache); + } + mutex_unlock(&iova_cache_mutex); +} +EXPORT_SYMBOL_GPL(iova_cache_put); + MODULE_AUTHOR("Anil S Keshavamurthy "); MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 84e6f56be9c68b59aca51544f7ee33706542d7bc Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Mon, 5 Feb 2024 15:32:41 +0000 Subject: iommu/iova: use named kmem_cache for iova magazines The magazine buffers can take gigabytes of kmem memory, dominating all other allocations. For observability purpose create named slab cache so the iova magazine memory overhead can be clearly observed. With this change: > slabtop -o | head Active / Total Objects (% used) : 869731 / 952904 (91.3%) Active / Total Slabs (% used) : 103411 / 103974 (99.5%) Active / Total Caches (% used) : 135 / 211 (64.0%) Active / Total Size (% used) : 395389.68K / 411430.20K (96.1%) Minimum / Average / Maximum Object : 0.02K / 0.43K / 8.00K OBJS ACTIVE USE OBJ SIZE SLABS OBJ/SLAB CACHE SIZE NAME 244412 244239 99% 1.00K 61103 4 244412K iommu_iova_magazine 91636 88343 96% 0.03K 739 124 2956K kmalloc-32 75744 74844 98% 0.12K 2367 32 9468K kernfs_node_cache On this machine it is now clear that magazine use 242M of kmem memory. Acked-by: David Rientjes Signed-off-by: Pasha Tatashin [ rm: adjust to rework of iova_cache_{get,put} ] Signed-off-by: Robin Murphy Reviewed-by: John Garry Reviewed-by: Jerry Snitselaar Link: https://lore.kernel.org/r/dc5c51aaba50906a92b9ba1a5137ed462484a7be.1707144953.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index b5de865ee50b..d59d0ea2fd21 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -590,6 +590,8 @@ struct iova_rcache { struct delayed_work work; }; +static struct kmem_cache *iova_magazine_cache; + unsigned long iova_rcache_range(void) { return PAGE_SIZE << (IOVA_RANGE_CACHE_MAX_SIZE - 1); @@ -599,7 +601,7 @@ static struct iova_magazine *iova_magazine_alloc(gfp_t flags) { struct iova_magazine *mag; - mag = kmalloc(sizeof(*mag), flags); + mag = kmem_cache_alloc(iova_magazine_cache, flags); if (mag) mag->size = 0; @@ -608,7 +610,7 @@ static struct iova_magazine *iova_magazine_alloc(gfp_t flags) static void iova_magazine_free(struct iova_magazine *mag) { - kfree(mag); + kmem_cache_free(iova_magazine_cache, mag); } static void @@ -953,6 +955,12 @@ int iova_cache_get(void) if (!iova_cache) goto out_err; + iova_magazine_cache = kmem_cache_create("iommu_iova_magazine", + sizeof(struct iova_magazine), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!iova_magazine_cache) + goto out_err; + err = cpuhp_setup_state_multi(CPUHP_IOMMU_IOVA_DEAD, "iommu/iova:dead", NULL, iova_cpuhp_dead); if (err) { @@ -968,6 +976,7 @@ int iova_cache_get(void) out_err: kmem_cache_destroy(iova_cache); + kmem_cache_destroy(iova_magazine_cache); mutex_unlock(&iova_cache_mutex); return err; } @@ -984,6 +993,7 @@ void iova_cache_put(void) if (!iova_cache_users) { cpuhp_remove_multi_state(CPUHP_IOMMU_IOVA_DEAD); kmem_cache_destroy(iova_cache); + kmem_cache_destroy(iova_magazine_cache); } mutex_unlock(&iova_cache_mutex); } -- cgit v1.2.3 From f2d6677ad5772d4972b11dc3a4c3e555973b27d9 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 5 Feb 2024 16:43:27 +0000 Subject: iommu/ipmmu-vmsa: Minor cleanups Remove the of_match_ptr() which was supposed to have gone long ago, but managed to got lost in a fix-squashing mishap. On a similar theme, we may as well also modernise the PM ops to get rid of the clunky #ifdefs, and modernise the resource mapping to keep the checkpatch brigade happy. Link: https://lore.kernel.org/linux-iommu/Yxni3d6CdI3FZ5D+@8bytes.org/ Signed-off-by: Robin Murphy Reviewed-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/791877b0d310dc2ab7dc616d2786ab24252b9b8e.1707151207.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/ipmmu-vmsa.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index ace1fc4bd34b..90d3f03242db 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -1005,7 +1005,6 @@ static const struct of_device_id ipmmu_of_ids[] = { static int ipmmu_probe(struct platform_device *pdev) { struct ipmmu_vmsa_device *mmu; - struct resource *res; int irq; int ret; @@ -1025,8 +1024,7 @@ static int ipmmu_probe(struct platform_device *pdev) return ret; /* Map I/O memory and request IRQ. */ - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - mmu->base = devm_ioremap_resource(&pdev->dev, res); + mmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(mmu->base)) return PTR_ERR(mmu->base); @@ -1123,7 +1121,6 @@ static void ipmmu_remove(struct platform_device *pdev) ipmmu_device_reset(mmu); } -#ifdef CONFIG_PM_SLEEP static int ipmmu_resume_noirq(struct device *dev) { struct ipmmu_vmsa_device *mmu = dev_get_drvdata(dev); @@ -1153,18 +1150,14 @@ static int ipmmu_resume_noirq(struct device *dev) } static const struct dev_pm_ops ipmmu_pm = { - SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(NULL, ipmmu_resume_noirq) + NOIRQ_SYSTEM_SLEEP_PM_OPS(NULL, ipmmu_resume_noirq) }; -#define DEV_PM_OPS &ipmmu_pm -#else -#define DEV_PM_OPS NULL -#endif /* CONFIG_PM_SLEEP */ static struct platform_driver ipmmu_driver = { .driver = { .name = "ipmmu-vmsa", - .of_match_table = of_match_ptr(ipmmu_of_ids), - .pm = DEV_PM_OPS, + .of_match_table = ipmmu_of_ids, + .pm = pm_sleep_ptr(&ipmmu_pm), }, .probe = ipmmu_probe, .remove_new = ipmmu_remove, -- cgit v1.2.3 From 773b05e7f407c377d64a9173ceed54489cc47c3a Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Thu, 18 Jan 2024 09:01:03 +0000 Subject: iommu/amd: Remove duplicate function declarations from amd_iommu.h Perf counter related functions are defined in amd-iommu.h as well. Hence remove duplicate declarations. No functional change intended. Signed-off-by: Vasant Hegde Reviewed-by: Suravee Suthikulpanit Reviewed-by: Alejandro Jimenez Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240118090105.5864-5-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 2cef9796f09c..7464222ad8af 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -39,14 +39,6 @@ extern enum io_pgtable_fmt amd_iommu_pgtable; extern int amd_iommu_gpt_level; bool amd_iommu_v2_supported(void); -struct amd_iommu *get_amd_iommu(unsigned int idx); -u8 amd_iommu_pc_get_max_banks(unsigned int idx); -bool amd_iommu_pc_supported(void); -u8 amd_iommu_pc_get_max_counters(unsigned int idx); -int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, - u8 fxn, u64 *value); -int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, - u8 fxn, u64 *value); /* Device capabilities */ int amd_iommu_pdev_enable_cap_pri(struct pci_dev *pdev); -- cgit v1.2.3 From 2dc9506bfb135edb684eb4f0bf17ea847c18b2a0 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Thu, 18 Jan 2024 09:01:04 +0000 Subject: iommu/amd: Remove redundant error check in amd_iommu_probe_device() iommu_init_device() is not returning -ENOTSUPP since commit 61289cbaf6c8 ("iommu/amd: Remove old alias handling code"). No functional change intended. Signed-off-by: Vasant Hegde Reviewed-by: Suravee Suthikulpanit Reviewed-by: Alejandro Jimenez Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240118090105.5864-6-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 7fe3c9d44906..2e50f116672f 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1950,8 +1950,7 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev) ret = iommu_init_device(iommu, dev); if (ret) { - if (ret != -ENOTSUPP) - dev_err(dev, "Failed to initialize - trying to proceed anyway\n"); + dev_err(dev, "Failed to initialize - trying to proceed anyway\n"); iommu_dev = ERR_PTR(ret); iommu_ignore_device(iommu, dev); } else { -- cgit v1.2.3 From 108042db53a1eb192b972098669f36c6379ef159 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Thu, 18 Jan 2024 09:01:05 +0000 Subject: iommu/amd: Remove EXPORT_SYMBOL for perf counter related functions .. as IOMMU perf counters are always built as part of kernel. No functional change intended. Signed-off-by: Vasant Hegde Reviewed-by: Suravee Suthikulpanit Reviewed-by: Alejandro Jimenez Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240118090105.5864-7-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index c83bd0c2a1c9..e72fba4a8111 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3694,13 +3694,11 @@ u8 amd_iommu_pc_get_max_banks(unsigned int idx) return 0; } -EXPORT_SYMBOL(amd_iommu_pc_get_max_banks); bool amd_iommu_pc_supported(void) { return amd_iommu_pc_present; } -EXPORT_SYMBOL(amd_iommu_pc_supported); u8 amd_iommu_pc_get_max_counters(unsigned int idx) { @@ -3711,7 +3709,6 @@ u8 amd_iommu_pc_get_max_counters(unsigned int idx) return 0; } -EXPORT_SYMBOL(amd_iommu_pc_get_max_counters); static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value, bool is_write) -- cgit v1.2.3 From a6ffb9b3d71ea21c9d56ac5856fe321ef584bb19 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Mon, 5 Feb 2024 11:55:59 +0000 Subject: iommu/amd: Pass struct iommu_dev_data to set_dte_entry() Pass iommu_dev_data structure instead of passing indivisual variables. No functional changes intended. Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240205115615.6053-2-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 2e50f116672f..743465cb677f 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1700,12 +1700,14 @@ static int setup_gcr3_table(struct protection_domain *domain, int pasids) return 0; } -static void set_dte_entry(struct amd_iommu *iommu, u16 devid, - struct protection_domain *domain, bool ats, bool ppr) +static void set_dte_entry(struct amd_iommu *iommu, + struct iommu_dev_data *dev_data) { u64 pte_root = 0; u64 flags = 0; u32 old_domid; + u16 devid = dev_data->devid; + struct protection_domain *domain = dev_data->domain; struct dev_table_entry *dev_table = get_dev_table(iommu); if (domain->iop.mode != PAGE_MODE_NONE) @@ -1725,10 +1727,10 @@ static void set_dte_entry(struct amd_iommu *iommu, u16 devid, flags = dev_table[devid].data[1]; - if (ats) + if (dev_data->ats_enabled) flags |= DTE_FLAG_IOTLB; - if (ppr) + if (dev_data->ppr) pte_root |= 1ULL << DEV_ENTRY_PPR; if (domain->dirty_tracking) @@ -1804,12 +1806,10 @@ static void do_attach(struct iommu_dev_data *dev_data, struct protection_domain *domain) { struct amd_iommu *iommu; - bool ats; iommu = rlookup_amd_iommu(dev_data->dev); if (!iommu) return; - ats = dev_data->ats_enabled; /* Update data structures */ dev_data->domain = domain; @@ -1824,8 +1824,7 @@ static void do_attach(struct iommu_dev_data *dev_data, domain->dev_cnt += 1; /* Update device table */ - set_dte_entry(iommu, dev_data->devid, domain, - ats, dev_data->ppr); + set_dte_entry(iommu, dev_data); clone_aliases(iommu, dev_data->dev); device_flush_dte(dev_data); @@ -2008,8 +2007,7 @@ static void update_device_table(struct protection_domain *domain) if (!iommu) continue; - set_dte_entry(iommu, dev_data->devid, domain, - dev_data->ats_enabled, dev_data->ppr); + set_dte_entry(iommu, dev_data); clone_aliases(iommu, dev_data->dev); } } -- cgit v1.2.3 From 8e017973317235468a330aa197134c1aebbf13da Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Mon, 5 Feb 2024 11:56:00 +0000 Subject: iommu/amd: Enable Guest Translation before registering devices IOMMU Guest Translation (GT) feature needs to be enabled before invalidating guest translations (CMD_INV_IOMMU_PAGES with GN=1). Currently GT feature is enabled after setting up interrupt handler. So far it was fine as we were not invalidating guest page table before this point. Upcoming series will introduce per device GCR3 table and it will invalidate guest pages after configuring. Hence move GT feature enablement to early_enable_iommu(). Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240205115615.6053-3-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index e72fba4a8111..cb8486c9f8f4 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -2769,6 +2769,7 @@ static void early_enable_iommu(struct amd_iommu *iommu) iommu_enable_command_buffer(iommu); iommu_enable_event_buffer(iommu); iommu_set_exclusion_range(iommu); + iommu_enable_gt(iommu); iommu_enable_ga(iommu); iommu_enable_xt(iommu); iommu_enable_irtcachedis(iommu); @@ -2825,6 +2826,7 @@ static void early_enable_iommus(void) iommu_disable_irtcachedis(iommu); iommu_enable_command_buffer(iommu); iommu_enable_event_buffer(iommu); + iommu_enable_gt(iommu); iommu_enable_ga(iommu); iommu_enable_xt(iommu); iommu_enable_irtcachedis(iommu); @@ -2838,10 +2840,8 @@ static void enable_iommus_v2(void) { struct amd_iommu *iommu; - for_each_iommu(iommu) { + for_each_iommu(iommu) iommu_enable_ppr_log(iommu); - iommu_enable_gt(iommu); - } } static void enable_iommus_vapic(void) -- cgit v1.2.3 From 6f35fe5d8a0ad0125c52fb20f5d67000e369eb3a Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Mon, 5 Feb 2024 11:56:01 +0000 Subject: iommu/amd: Introduce get_amd_iommu_from_dev() Introduce get_amd_iommu_from_dev() and get_amd_iommu_from_dev_data(). And replace rlookup_amd_iommu() with the new helper function where applicable to avoid unnecessary loop to look up struct amd_iommu from struct device. Suggested-by: Jason Gunthorpe Signed-off-by: Suravee Suthikulpanit Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240205115615.6053-4-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 15 ++++++++++++ drivers/iommu/amd/iommu.c | 54 +++++++++++-------------------------------- include/linux/iommu.h | 16 +++++++++++++ 3 files changed, 44 insertions(+), 41 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 7464222ad8af..4e1f9a444a1a 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -138,6 +138,21 @@ static inline void *alloc_pgtable_page(int nid, gfp_t gfp) return page ? page_address(page) : NULL; } +/* + * This must be called after device probe completes. During probe + * use rlookup_amd_iommu() get the iommu. + */ +static inline struct amd_iommu *get_amd_iommu_from_dev(struct device *dev) +{ + return iommu_get_iommu_dev(dev, struct amd_iommu, iommu); +} + +/* This must be called after device probe completes. */ +static inline struct amd_iommu *get_amd_iommu_from_dev_data(struct iommu_dev_data *dev_data) +{ + return iommu_get_iommu_dev(dev_data->dev, struct amd_iommu, iommu); +} + bool translation_pre_enabled(struct amd_iommu *iommu); bool amd_iommu_is_attach_deferred(struct device *dev); int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 743465cb677f..2a5ae7a841cc 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1384,14 +1384,9 @@ void amd_iommu_flush_all_caches(struct amd_iommu *iommu) static int device_flush_iotlb(struct iommu_dev_data *dev_data, u64 address, size_t size, ioasid_t pasid, bool gn) { - struct amd_iommu *iommu; + struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); struct iommu_cmd cmd; - int qdep; - - qdep = dev_data->ats_qdep; - iommu = rlookup_amd_iommu(dev_data->dev); - if (!iommu) - return -EINVAL; + int qdep = dev_data->ats_qdep; build_inv_iotlb_pages(&cmd, dev_data->devid, qdep, address, size, pasid, gn); @@ -1411,16 +1406,12 @@ static int device_flush_dte_alias(struct pci_dev *pdev, u16 alias, void *data) */ static int device_flush_dte(struct iommu_dev_data *dev_data) { - struct amd_iommu *iommu; + struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); struct pci_dev *pdev = NULL; struct amd_iommu_pci_seg *pci_seg; u16 alias; int ret; - iommu = rlookup_amd_iommu(dev_data->dev); - if (!iommu) - return -EINVAL; - if (dev_is_pci(dev_data->dev)) pdev = to_pci_dev(dev_data->dev); @@ -1805,11 +1796,7 @@ static void clear_dte_entry(struct amd_iommu *iommu, u16 devid) static void do_attach(struct iommu_dev_data *dev_data, struct protection_domain *domain) { - struct amd_iommu *iommu; - - iommu = rlookup_amd_iommu(dev_data->dev); - if (!iommu) - return; + struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); /* Update data structures */ dev_data->domain = domain; @@ -1833,11 +1820,7 @@ static void do_attach(struct iommu_dev_data *dev_data, static void do_detach(struct iommu_dev_data *dev_data) { struct protection_domain *domain = dev_data->domain; - struct amd_iommu *iommu; - - iommu = rlookup_amd_iommu(dev_data->dev); - if (!iommu) - return; + struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); /* Update data structures */ dev_data->domain = NULL; @@ -2003,10 +1986,8 @@ static void update_device_table(struct protection_domain *domain) struct iommu_dev_data *dev_data; list_for_each_entry(dev_data, &domain->dev_list, list) { - struct amd_iommu *iommu = rlookup_amd_iommu(dev_data->dev); + struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); - if (!iommu) - continue; set_dte_entry(iommu, dev_data); clone_aliases(iommu, dev_data->dev); } @@ -2187,11 +2168,8 @@ static struct iommu_domain *do_iommu_domain_alloc(unsigned int type, struct protection_domain *domain; struct amd_iommu *iommu = NULL; - if (dev) { - iommu = rlookup_amd_iommu(dev); - if (!iommu) - return ERR_PTR(-ENODEV); - } + if (dev) + iommu = get_amd_iommu_from_dev(dev); /* * Since DTE[Mode]=0 is prohibited on SNP-enabled system, @@ -2272,7 +2250,7 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, { struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev); struct protection_domain *domain = to_pdomain(dom); - struct amd_iommu *iommu = rlookup_amd_iommu(dev); + struct amd_iommu *iommu = get_amd_iommu_from_dev(dev); int ret; /* @@ -2411,7 +2389,7 @@ static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap) case IOMMU_CAP_DEFERRED_FLUSH: return true; case IOMMU_CAP_DIRTY_TRACKING: { - struct amd_iommu *iommu = rlookup_amd_iommu(dev); + struct amd_iommu *iommu = get_amd_iommu_from_dev(dev); return amd_iommu_hd_support(iommu); } @@ -2440,9 +2418,7 @@ static int amd_iommu_set_dirty_tracking(struct iommu_domain *domain, } list_for_each_entry(dev_data, &pdomain->dev_list, list) { - iommu = rlookup_amd_iommu(dev_data->dev); - if (!iommu) - continue; + iommu = get_amd_iommu_from_dev_data(dev_data); dev_table = get_dev_table(iommu); pte_root = dev_table[dev_data->devid].data[0]; @@ -2502,9 +2478,7 @@ static void amd_iommu_get_resv_regions(struct device *dev, return; devid = PCI_SBDF_TO_DEVID(sbdf); - iommu = rlookup_amd_iommu(dev); - if (!iommu) - return; + iommu = get_amd_iommu_from_dev(dev); pci_seg = iommu->pci_seg; list_for_each_entry(entry, &pci_seg->unity_map, list) { @@ -2838,9 +2812,7 @@ int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid, struct iommu_cmd cmd; dev_data = dev_iommu_priv_get(&pdev->dev); - iommu = rlookup_amd_iommu(&pdev->dev); - if (!iommu) - return -ENODEV; + iommu = get_amd_iommu_from_dev(&pdev->dev); build_complete_ppr(&cmd, dev_data->devid, pasid, status, tag, dev_data->pri_tlp); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 1ea2a820e1eb..32bb121e8032 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -654,6 +654,22 @@ static inline struct iommu_device *dev_to_iommu_device(struct device *dev) return (struct iommu_device *)dev_get_drvdata(dev); } +/** + * iommu_get_iommu_dev - Get iommu_device for a device + * @dev: an end-point device + * + * Note that this function must be called from the iommu_ops + * to retrieve the iommu_device for a device, which the core code + * guarentees it will not invoke the op without an attached iommu. + */ +static inline struct iommu_device *__iommu_get_iommu_dev(struct device *dev) +{ + return dev->iommu->iommu_dev; +} + +#define iommu_get_iommu_dev(dev, type, member) \ + container_of(__iommu_get_iommu_dev(dev), type, member) + static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather) { *gather = (struct iommu_iotlb_gather) { -- cgit v1.2.3 From fda5108ebafe7797dc8e1279f04e6e9145c9ad10 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Mon, 5 Feb 2024 11:56:02 +0000 Subject: iommu/amd: Introduce struct protection_domain.pd_mode This enum variable is used to track the type of page table used by the protection domain. It will replace the protection_domain.flags in subsequent series. Suggested-by: Jason Gunthorpe Signed-off-by: Suravee Suthikulpanit Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240205115615.6053-5-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 6 ++++++ drivers/iommu/amd/iommu.c | 2 ++ 2 files changed, 8 insertions(+) (limited to 'drivers') diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index fdf7807280bd..d29d4a321e57 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -541,6 +541,11 @@ struct amd_io_pgtable { u64 *pgd; /* v2 pgtable pgd pointer */ }; +enum protection_domain_mode { + PD_MODE_V1 = 1, + PD_MODE_V2, +}; + /* * This structure contains generic data for IOMMU protection domains * independent of their use. @@ -556,6 +561,7 @@ struct protection_domain { int nid; /* Node ID */ u64 *gcr3_tbl; /* Guest CR3 table */ unsigned long flags; /* flags to find out type of domain */ + enum protection_domain_mode pd_mode; /* Track page table type */ bool dirty_tracking; /* dirty tracking is enabled in the domain */ unsigned dev_cnt; /* devices assigned to this domain */ unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 2a5ae7a841cc..a3c8f8ea926e 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2068,6 +2068,7 @@ static int protection_domain_init_v1(struct protection_domain *domain, int mode) return -ENOMEM; } + domain->pd_mode = PD_MODE_V1; amd_iommu_domain_set_pgtable(domain, pt_root, mode); return 0; @@ -2076,6 +2077,7 @@ static int protection_domain_init_v1(struct protection_domain *domain, int mode) static int protection_domain_init_v2(struct protection_domain *domain) { domain->flags |= PD_GIOV_MASK; + domain->pd_mode = PD_MODE_V2; domain->domain.pgsize_bitmap = AMD_IOMMU_PGSIZES_V2; -- cgit v1.2.3 From b7731065523048276a80c8e16120208153438930 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Mon, 5 Feb 2024 11:56:03 +0000 Subject: iommu/amd: Introduce per-device GCR3 table AMD IOMMU GCR3 table is indexed by PASID. Each entry stores guest CR3 register value, which is an address to the root of guest IO page table. The GCR3 table can be programmed per-device. However, Linux AMD IOMMU driver currently managing the table on a per-domain basis. PASID is a device feature. When SVA is enabled it will bind PASID to device, not domain. Hence it makes sense to have per device GCR3 table. Introduce struct iommu_dev_data.gcr3_tbl_info to keep track of GCR3 table configuration. This will eventually replaces gcr3 related variables in protection_domain structure. Suggested-by: Jason Gunthorpe Signed-off-by: Suravee Suthikulpanit Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240205115615.6053-6-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers') diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index d29d4a321e57..aed3e88d23c7 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -533,6 +533,12 @@ struct amd_irte_ops; #define io_pgtable_cfg_to_data(x) \ container_of((x), struct amd_io_pgtable, pgtbl_cfg) +struct gcr3_tbl_info { + u64 *gcr3_tbl; /* Guest CR3 table */ + int glx; /* Number of levels for GCR3 table */ + u32 pasid_cnt; /* Track attached PASIDs */ +}; + struct amd_io_pgtable { struct io_pgtable_cfg pgtbl_cfg; struct io_pgtable iop; @@ -814,6 +820,7 @@ struct iommu_dev_data { struct list_head list; /* For domain->dev_list */ struct llist_node dev_data_list; /* For global dev_data_list */ struct protection_domain *domain; /* Domain the device is bound to */ + struct gcr3_tbl_info gcr3_info; /* Per-device GCR3 table */ struct device *dev; u16 devid; /* PCI Device ID */ -- cgit v1.2.3 From 7b4e5623d8e4f1475b7deac42035d0129fb36ed0 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Mon, 5 Feb 2024 11:56:04 +0000 Subject: iommu/amd: Use protection_domain.flags to check page table mode Page table mode (v1, v2 or pt) is per domain property. Recently we have enhanced protection_domain.pd_mode to track per domain page table mode. Use that variable to check the page table mode instead of global 'amd_iommu_pgtable' in {map/unmap}_pages path. Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240205115615.6053-7-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index a3c8f8ea926e..498323930892 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2310,7 +2310,7 @@ static int amd_iommu_map_pages(struct iommu_domain *dom, unsigned long iova, int prot = 0; int ret = -EINVAL; - if ((amd_iommu_pgtable == AMD_IOMMU_V1) && + if ((domain->pd_mode == PD_MODE_V1) && (domain->iop.mode == PAGE_MODE_NONE)) return -EINVAL; @@ -2356,7 +2356,7 @@ static size_t amd_iommu_unmap_pages(struct iommu_domain *dom, unsigned long iova struct io_pgtable_ops *ops = &domain->iop.iop.ops; size_t r; - if ((amd_iommu_pgtable == AMD_IOMMU_V1) && + if ((domain->pd_mode == PD_MODE_V1) && (domain->iop.mode == PAGE_MODE_NONE)) return 0; -- cgit v1.2.3 From 474bf01ed9f0358b243b264339baafcfbe6c6670 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Mon, 5 Feb 2024 11:56:05 +0000 Subject: iommu/amd: Add support for device based TLB invalidation Add support to invalidate TLB/IOTLB for the given device. These functions will be used in subsequent patches where we will introduce per device GCR3 table and SVA support. Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240205115615.6053-8-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 5 +++++ drivers/iommu/amd/iommu.c | 23 +++++++++++++++++++++++ 2 files changed, 28 insertions(+) (limited to 'drivers') diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 4e1f9a444a1a..1f4bbc6bf1e5 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -55,6 +55,11 @@ void amd_iommu_domain_update(struct protection_domain *domain); void amd_iommu_domain_flush_complete(struct protection_domain *domain); void amd_iommu_domain_flush_pages(struct protection_domain *domain, u64 address, size_t size); +void amd_iommu_dev_flush_pasid_pages(struct iommu_dev_data *dev_data, + ioasid_t pasid, u64 address, size_t size); +void amd_iommu_dev_flush_pasid_all(struct iommu_dev_data *dev_data, + ioasid_t pasid); + int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid); int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid, unsigned long cr3); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 498323930892..dc8afcefd3cc 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1538,6 +1538,29 @@ static void amd_iommu_domain_flush_all(struct protection_domain *domain) CMD_INV_IOMMU_ALL_PAGES_ADDRESS); } +void amd_iommu_dev_flush_pasid_pages(struct iommu_dev_data *dev_data, + ioasid_t pasid, u64 address, size_t size) +{ + struct iommu_cmd cmd; + struct amd_iommu *iommu = get_amd_iommu_from_dev(dev_data->dev); + + build_inv_iommu_pages(&cmd, address, size, + dev_data->domain->id, pasid, true); + iommu_queue_command(iommu, &cmd); + + if (dev_data->ats_enabled) + device_flush_iotlb(dev_data, address, size, pasid, true); + + iommu_completion_wait(iommu); +} + +void amd_iommu_dev_flush_pasid_all(struct iommu_dev_data *dev_data, + ioasid_t pasid) +{ + amd_iommu_dev_flush_pasid_pages(dev_data, 0, + CMD_INV_IOMMU_ALL_PAGES_ADDRESS, pasid); +} + void amd_iommu_domain_flush_complete(struct protection_domain *domain) { int i; -- cgit v1.2.3 From b2e8a7f5d2c3d99285e13cdd330d339d9b040599 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Mon, 5 Feb 2024 11:56:06 +0000 Subject: iommu/amd: Rearrange GCR3 table setup code Consolidate GCR3 table related code in one place so that its easy to maintain. Note that this patch doesn't move __set_gcr3/__clear_gcr3. We are moving GCR3 table from per domain to per device. Following series will rework these functions. During that time I will move these functions as well. No functional changes intended. Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240205115615.6053-9-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 64 +++++++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 32 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index dc8afcefd3cc..d8b42b0a2539 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1714,6 +1714,38 @@ static int setup_gcr3_table(struct protection_domain *domain, int pasids) return 0; } +static u64 *__get_gcr3_pte(u64 *root, int level, u32 pasid, bool alloc) +{ + int index; + u64 *pte; + + while (true) { + + index = (pasid >> (9 * level)) & 0x1ff; + pte = &root[index]; + + if (level == 0) + break; + + if (!(*pte & GCR3_VALID)) { + if (!alloc) + return NULL; + + root = (void *)get_zeroed_page(GFP_ATOMIC); + if (root == NULL) + return NULL; + + *pte = iommu_virt_to_phys(root) | GCR3_VALID; + } + + root = iommu_phys_to_virt(*pte & PAGE_MASK); + + level -= 1; + } + + return pte; +} + static void set_dte_entry(struct amd_iommu *iommu, struct iommu_dev_data *dev_data) { @@ -2737,38 +2769,6 @@ int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid) return ret; } -static u64 *__get_gcr3_pte(u64 *root, int level, u32 pasid, bool alloc) -{ - int index; - u64 *pte; - - while (true) { - - index = (pasid >> (9 * level)) & 0x1ff; - pte = &root[index]; - - if (level == 0) - break; - - if (!(*pte & GCR3_VALID)) { - if (!alloc) - return NULL; - - root = (void *)get_zeroed_page(GFP_ATOMIC); - if (root == NULL) - return NULL; - - *pte = iommu_virt_to_phys(root) | GCR3_VALID; - } - - root = iommu_phys_to_virt(*pte & PAGE_MASK); - - level -= 1; - } - - return pte; -} - static int __set_gcr3(struct protection_domain *domain, u32 pasid, unsigned long cr3) { -- cgit v1.2.3 From bf8aff2945ba4091f503df673b9df33002546e6a Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Mon, 5 Feb 2024 11:56:07 +0000 Subject: iommu: Introduce iommu_group_mutex_assert() Add function to check iommu group mutex lock. So that device drivers can rely on group mutex lock instead of adding another driver level lock before modifying driver specific device data structure. Suggested-by: Jason Gunthorpe Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240205115615.6053-10-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 19 +++++++++++++++++++ include/linux/iommu.h | 8 ++++++++ 2 files changed, 27 insertions(+) (limited to 'drivers') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index d14413916f93..8309e634977e 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1248,6 +1248,25 @@ void iommu_group_remove_device(struct device *dev) } EXPORT_SYMBOL_GPL(iommu_group_remove_device); +#if IS_ENABLED(CONFIG_LOCKDEP) && IS_ENABLED(CONFIG_IOMMU_API) +/** + * iommu_group_mutex_assert - Check device group mutex lock + * @dev: the device that has group param set + * + * This function is called by an iommu driver to check whether it holds + * group mutex lock for the given device or not. + * + * Note that this function must be called after device group param is set. + */ +void iommu_group_mutex_assert(struct device *dev) +{ + struct iommu_group *group = dev->iommu_group; + + lockdep_assert_held(&group->mutex); +} +EXPORT_SYMBOL_GPL(iommu_group_mutex_assert); +#endif + static struct device *iommu_group_first_dev(struct iommu_group *group) { lockdep_assert_held(&group->mutex); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 32bb121e8032..8141a37556d5 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -1356,6 +1356,14 @@ static inline ioasid_t iommu_alloc_global_pasid(struct device *dev) static inline void iommu_free_global_pasid(ioasid_t pasid) {} #endif /* CONFIG_IOMMU_API */ +#if IS_ENABLED(CONFIG_LOCKDEP) && IS_ENABLED(CONFIG_IOMMU_API) +void iommu_group_mutex_assert(struct device *dev); +#else +static inline void iommu_group_mutex_assert(struct device *dev) +{ +} +#endif + /** * iommu_map_sgtable - Map the given buffer to the IOMMU domain * @domain: The IOMMU domain to perform the mapping -- cgit v1.2.3 From e8e1aac33458d80545e5ad37b2a3e4243eee278e Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Mon, 5 Feb 2024 11:56:08 +0000 Subject: iommu/amd: Refactor helper function for setting / clearing GCR3 Refactor GCR3 helper functions in preparation to use per device GCR3 table. * Add new function update_gcr3 to update per device GCR3 table * Remove per domain default GCR3 setup during v2 page table allocation. Subsequent patch will add support to setup default gcr3 while attaching device to domain. * Remove amd_iommu_domain_update() from V2 page table path as device detach path will take care of updating the domain. * Consolidate GCR3 table related code in one place so that its easy to maintain. * Rename functions to reflect its usage. Signed-off-by: Suravee Suthikulpanit Co-developed-by: Vasant Hegde Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240205115615.6053-11-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 8 ++- drivers/iommu/amd/io_pgtable_v2.c | 21 +------ drivers/iommu/amd/iommu.c | 115 ++++++++++++++++++-------------------- 3 files changed, 61 insertions(+), 83 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 1f4bbc6bf1e5..6e03ff2aaebe 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -44,6 +44,11 @@ bool amd_iommu_v2_supported(void); int amd_iommu_pdev_enable_cap_pri(struct pci_dev *pdev); void amd_iommu_pdev_disable_cap_pri(struct pci_dev *pdev); +/* GCR3 setup */ +int amd_iommu_set_gcr3(struct iommu_dev_data *dev_data, + ioasid_t pasid, unsigned long gcr3); +int amd_iommu_clear_gcr3(struct iommu_dev_data *dev_data, ioasid_t pasid); + int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid, u64 address); /* * This function flushes all internal caches of @@ -61,9 +66,6 @@ void amd_iommu_dev_flush_pasid_all(struct iommu_dev_data *dev_data, ioasid_t pasid); int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid); -int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid, - unsigned long cr3); -int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid); #ifdef CONFIG_IRQ_REMAP int amd_iommu_create_irq_domain(struct amd_iommu *iommu); diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c index 6d69ba60744f..93489d2db4e8 100644 --- a/drivers/iommu/amd/io_pgtable_v2.c +++ b/drivers/iommu/amd/io_pgtable_v2.c @@ -350,38 +350,26 @@ static const struct iommu_flush_ops v2_flush_ops = { static void v2_free_pgtable(struct io_pgtable *iop) { - struct protection_domain *pdom; struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop); - pdom = container_of(pgtable, struct protection_domain, iop); - if (!(pdom->flags & PD_IOMMUV2_MASK)) + if (!pgtable || !pgtable->pgd) return; - /* Clear gcr3 entry */ - amd_iommu_domain_clear_gcr3(&pdom->domain, 0); - - /* Make changes visible to IOMMUs */ - amd_iommu_domain_update(pdom); - /* Free page table */ free_pgtable(pgtable->pgd, get_pgtable_level()); + pgtable->pgd = NULL; } static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) { struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); struct protection_domain *pdom = (struct protection_domain *)cookie; - int ret; int ias = IOMMU_IN_ADDR_BIT_SIZE; pgtable->pgd = alloc_pgtable_page(pdom->nid, GFP_ATOMIC); if (!pgtable->pgd) return NULL; - ret = amd_iommu_domain_set_gcr3(&pdom->domain, 0, iommu_virt_to_phys(pgtable->pgd)); - if (ret) - goto err_free_pgd; - if (get_pgtable_level() == PAGE_MODE_5_LEVEL) ias = 57; @@ -395,11 +383,6 @@ static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo cfg->tlb = &v2_flush_ops; return &pgtable->iop; - -err_free_pgd: - free_pgtable_page(pgtable->pgd); - - return NULL; } struct io_pgtable_init_fns io_pgtable_amd_iommu_v2_init_fns = { diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index d8b42b0a2539..4fda158e043f 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1714,10 +1714,13 @@ static int setup_gcr3_table(struct protection_domain *domain, int pasids) return 0; } -static u64 *__get_gcr3_pte(u64 *root, int level, u32 pasid, bool alloc) +static u64 *__get_gcr3_pte(struct gcr3_tbl_info *gcr3_info, + ioasid_t pasid, bool alloc) { int index; u64 *pte; + u64 *root = gcr3_info->gcr3_tbl; + int level = gcr3_info->glx; while (true) { @@ -1746,6 +1749,56 @@ static u64 *__get_gcr3_pte(u64 *root, int level, u32 pasid, bool alloc) return pte; } +static int update_gcr3(struct iommu_dev_data *dev_data, + ioasid_t pasid, unsigned long gcr3, bool set) +{ + struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info; + u64 *pte; + + pte = __get_gcr3_pte(gcr3_info, pasid, true); + if (pte == NULL) + return -ENOMEM; + + if (set) + *pte = (gcr3 & PAGE_MASK) | GCR3_VALID; + else + *pte = 0; + + amd_iommu_dev_flush_pasid_all(dev_data, pasid); + return 0; +} + +int amd_iommu_set_gcr3(struct iommu_dev_data *dev_data, ioasid_t pasid, + unsigned long gcr3) +{ + struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info; + int ret; + + iommu_group_mutex_assert(dev_data->dev); + + ret = update_gcr3(dev_data, pasid, gcr3, true); + if (ret) + return ret; + + gcr3_info->pasid_cnt++; + return ret; +} + +int amd_iommu_clear_gcr3(struct iommu_dev_data *dev_data, ioasid_t pasid) +{ + struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info; + int ret; + + iommu_group_mutex_assert(dev_data->dev); + + ret = update_gcr3(dev_data, pasid, 0, false); + if (ret) + return ret; + + gcr3_info->pasid_cnt--; + return ret; +} + static void set_dte_entry(struct amd_iommu *iommu, struct iommu_dev_data *dev_data) { @@ -2769,66 +2822,6 @@ int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid) return ret; } -static int __set_gcr3(struct protection_domain *domain, u32 pasid, - unsigned long cr3) -{ - u64 *pte; - - if (domain->iop.mode != PAGE_MODE_NONE) - return -EINVAL; - - pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, true); - if (pte == NULL) - return -ENOMEM; - - *pte = (cr3 & PAGE_MASK) | GCR3_VALID; - - return __amd_iommu_flush_tlb(domain, pasid); -} - -static int __clear_gcr3(struct protection_domain *domain, u32 pasid) -{ - u64 *pte; - - if (domain->iop.mode != PAGE_MODE_NONE) - return -EINVAL; - - pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, false); - if (pte == NULL) - return 0; - - *pte = 0; - - return __amd_iommu_flush_tlb(domain, pasid); -} - -int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid, - unsigned long cr3) -{ - struct protection_domain *domain = to_pdomain(dom); - unsigned long flags; - int ret; - - spin_lock_irqsave(&domain->lock, flags); - ret = __set_gcr3(domain, pasid, cr3); - spin_unlock_irqrestore(&domain->lock, flags); - - return ret; -} - -int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid) -{ - struct protection_domain *domain = to_pdomain(dom); - unsigned long flags; - int ret; - - spin_lock_irqsave(&domain->lock, flags); - ret = __clear_gcr3(domain, pasid); - spin_unlock_irqrestore(&domain->lock, flags); - - return ret; -} - int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid, int status, int tag) { -- cgit v1.2.3 From 4ebd4c7f2501cc5876717e0f2382d5b825e59405 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Mon, 5 Feb 2024 11:56:09 +0000 Subject: iommu/amd: Refactor attaching / detaching device functions If domain is configured with V2 page table then setup default GCR3 with domain GCR3 pointer. So that all devices in the domain uses same page table for translation. Also return page table setup status from do_attach() function. Signed-off-by: Suravee Suthikulpanit Co-developed-by: Vasant Hegde Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240205115615.6053-12-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 4fda158e043f..929a0e8d41d0 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1901,10 +1901,11 @@ static void clear_dte_entry(struct amd_iommu *iommu, u16 devid) amd_iommu_apply_erratum_63(iommu, devid); } -static void do_attach(struct iommu_dev_data *dev_data, - struct protection_domain *domain) +static int do_attach(struct iommu_dev_data *dev_data, + struct protection_domain *domain) { struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); + int ret = 0; /* Update data structures */ dev_data->domain = domain; @@ -1918,11 +1919,28 @@ static void do_attach(struct iommu_dev_data *dev_data, domain->dev_iommu[iommu->index] += 1; domain->dev_cnt += 1; + /* Init GCR3 table and update device table */ + if (domain->pd_mode == PD_MODE_V2) { + /* By default, setup GCR3 table to support single PASID */ + ret = setup_gcr3_table(dev_data->domain, 1); + if (ret) + return ret; + + ret = update_gcr3(dev_data, 0, + iommu_virt_to_phys(domain->iop.pgd), true); + if (ret) { + free_gcr3_table(dev_data->domain); + return ret; + } + } + /* Update device table */ set_dte_entry(iommu, dev_data); clone_aliases(iommu, dev_data->dev); device_flush_dte(dev_data); + + return ret; } static void do_detach(struct iommu_dev_data *dev_data) @@ -1930,6 +1948,12 @@ static void do_detach(struct iommu_dev_data *dev_data) struct protection_domain *domain = dev_data->domain; struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); + /* Clear GCR3 table */ + if (domain->pd_mode == PD_MODE_V2) { + update_gcr3(dev_data, 0, 0, false); + free_gcr3_table(dev_data->domain); + } + /* Update data structures */ dev_data->domain = NULL; list_del(&dev_data->list); @@ -1972,7 +1996,7 @@ static int attach_device(struct device *dev, if (dev_is_pci(dev)) pdev_enable_caps(to_pci_dev(dev)); - do_attach(dev_data, domain); + ret = do_attach(dev_data, domain); out: spin_unlock(&dev_data->lock); -- cgit v1.2.3 From fb575d17813f6aff2e8b7e051dac8def2c291bff Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Mon, 5 Feb 2024 11:56:10 +0000 Subject: iommu/amd: Refactor protection_domain helper functions To removes the code to setup GCR3 table, and only handle domain create / destroy, since GCR3 is no longer part of a domain. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240205115615.6053-13-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 929a0e8d41d0..e2f546926b6f 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2176,9 +2176,6 @@ static void protection_domain_free(struct protection_domain *domain) if (domain->iop.pgtbl_cfg.tlb) free_io_pgtable_ops(&domain->iop.iop.ops); - if (domain->flags & PD_IOMMUV2_MASK) - free_gcr3_table(domain); - if (domain->iop.root) free_page((unsigned long)domain->iop.root); @@ -2206,15 +2203,10 @@ static int protection_domain_init_v1(struct protection_domain *domain, int mode) return 0; } -static int protection_domain_init_v2(struct protection_domain *domain) +static int protection_domain_init_v2(struct protection_domain *pdom) { - domain->flags |= PD_GIOV_MASK; - domain->pd_mode = PD_MODE_V2; - - domain->domain.pgsize_bitmap = AMD_IOMMU_PGSIZES_V2; - - if (setup_gcr3_table(domain, 1)) - return -ENOMEM; + pdom->pd_mode = PD_MODE_V2; + pdom->domain.pgsize_bitmap = AMD_IOMMU_PGSIZES_V2; return 0; } -- cgit v1.2.3 From cf70873e3d01653df69115576501f8f7f6f2b203 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Mon, 5 Feb 2024 11:56:11 +0000 Subject: iommu/amd: Refactor GCR3 table helper functions To use the new per-device struct gcr3_tbl_info. Use GFP_KERNEL flag instead of GFP_ATOMIC for GCR3 table allocation. Also modify set_dte_entry() to use new per device GCR3 table. Also in free_gcr3_table() path replace BUG_ON with WARN_ON_ONCE(). Signed-off-by: Suravee Suthikulpanit Co-developed-by: Vasant Hegde Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240205115615.6053-14-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 53 +++++++++++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 22 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index e2f546926b6f..b742983007f2 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -75,6 +75,9 @@ struct kmem_cache *amd_iommu_irq_cache; static void detach_device(struct device *dev); +static void set_dte_entry(struct amd_iommu *iommu, + struct iommu_dev_data *dev_data); + /**************************************************************************** * * Helper functions @@ -1666,16 +1669,19 @@ static void free_gcr3_tbl_level2(u64 *tbl) } } -static void free_gcr3_table(struct protection_domain *domain) +static void free_gcr3_table(struct gcr3_tbl_info *gcr3_info) { - if (domain->glx == 2) - free_gcr3_tbl_level2(domain->gcr3_tbl); - else if (domain->glx == 1) - free_gcr3_tbl_level1(domain->gcr3_tbl); + if (gcr3_info->glx == 2) + free_gcr3_tbl_level2(gcr3_info->gcr3_tbl); + else if (gcr3_info->glx == 1) + free_gcr3_tbl_level1(gcr3_info->gcr3_tbl); else - BUG_ON(domain->glx != 0); + WARN_ON_ONCE(gcr3_info->glx != 0); + + gcr3_info->glx = 0; - free_page((unsigned long)domain->gcr3_tbl); + free_page((unsigned long)gcr3_info->gcr3_tbl); + gcr3_info->gcr3_tbl = NULL; } /* @@ -1694,22 +1700,23 @@ static int get_gcr3_levels(int pasids) return levels ? (DIV_ROUND_UP(levels, 9) - 1) : levels; } -/* Note: This function expects iommu_domain->lock to be held prior calling the function. */ -static int setup_gcr3_table(struct protection_domain *domain, int pasids) +static int setup_gcr3_table(struct gcr3_tbl_info *gcr3_info, + struct amd_iommu *iommu, int pasids) { int levels = get_gcr3_levels(pasids); + int nid = iommu ? dev_to_node(&iommu->dev->dev) : NUMA_NO_NODE; if (levels > amd_iommu_max_glx_val) return -EINVAL; - domain->gcr3_tbl = alloc_pgtable_page(domain->nid, GFP_ATOMIC); - if (domain->gcr3_tbl == NULL) - return -ENOMEM; + if (gcr3_info->gcr3_tbl) + return -EBUSY; - domain->glx = levels; - domain->flags |= PD_IOMMUV2_MASK; + gcr3_info->gcr3_tbl = alloc_pgtable_page(nid, GFP_KERNEL); + if (gcr3_info->gcr3_tbl == NULL) + return -ENOMEM; - amd_iommu_domain_update(domain); + gcr3_info->glx = levels; return 0; } @@ -1808,6 +1815,7 @@ static void set_dte_entry(struct amd_iommu *iommu, u16 devid = dev_data->devid; struct protection_domain *domain = dev_data->domain; struct dev_table_entry *dev_table = get_dev_table(iommu); + struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info; if (domain->iop.mode != PAGE_MODE_NONE) pte_root = iommu_virt_to_phys(domain->iop.root); @@ -1835,9 +1843,9 @@ static void set_dte_entry(struct amd_iommu *iommu, if (domain->dirty_tracking) pte_root |= DTE_FLAG_HAD; - if (domain->flags & PD_IOMMUV2_MASK) { - u64 gcr3 = iommu_virt_to_phys(domain->gcr3_tbl); - u64 glx = domain->glx; + if (gcr3_info && gcr3_info->gcr3_tbl) { + u64 gcr3 = iommu_virt_to_phys(gcr3_info->gcr3_tbl); + u64 glx = gcr3_info->glx; u64 tmp; pte_root |= DTE_FLAG_GV; @@ -1865,7 +1873,8 @@ static void set_dte_entry(struct amd_iommu *iommu, ((u64)GUEST_PGTABLE_5_LEVEL << DTE_GPT_LEVEL_SHIFT); } - if (domain->flags & PD_GIOV_MASK) + /* GIOV is supported with V2 page table mode only */ + if (pdom_is_v2_pgtbl_mode(domain)) pte_root |= DTE_FLAG_GIOV; } @@ -1922,14 +1931,14 @@ static int do_attach(struct iommu_dev_data *dev_data, /* Init GCR3 table and update device table */ if (domain->pd_mode == PD_MODE_V2) { /* By default, setup GCR3 table to support single PASID */ - ret = setup_gcr3_table(dev_data->domain, 1); + ret = setup_gcr3_table(&dev_data->gcr3_info, iommu, 1); if (ret) return ret; ret = update_gcr3(dev_data, 0, iommu_virt_to_phys(domain->iop.pgd), true); if (ret) { - free_gcr3_table(dev_data->domain); + free_gcr3_table(&dev_data->gcr3_info); return ret; } } @@ -1951,7 +1960,7 @@ static void do_detach(struct iommu_dev_data *dev_data) /* Clear GCR3 table */ if (domain->pd_mode == PD_MODE_V2) { update_gcr3(dev_data, 0, 0, false); - free_gcr3_table(dev_data->domain); + free_gcr3_table(&dev_data->gcr3_info); } /* Update data structures */ -- cgit v1.2.3 From 02b990253db7cbdc3ef0631e9c202a69512a14c4 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Mon, 5 Feb 2024 11:56:12 +0000 Subject: iommu/amd: Remove unused flush pasid functions We have removed iommu_v2 module and converted v2 page table to use common flush functions. Also we have moved GCR3 table to per device. PASID related functions are not used. Hence remove these unused functions. Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240205115615.6053-15-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 3 -- drivers/iommu/amd/iommu.c | 100 ------------------------------------------ 2 files changed, 103 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 6e03ff2aaebe..bea910d9ace5 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -49,7 +49,6 @@ int amd_iommu_set_gcr3(struct iommu_dev_data *dev_data, ioasid_t pasid, unsigned long gcr3); int amd_iommu_clear_gcr3(struct iommu_dev_data *dev_data, ioasid_t pasid); -int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid, u64 address); /* * This function flushes all internal caches of * the IOMMU used by this driver. @@ -65,8 +64,6 @@ void amd_iommu_dev_flush_pasid_pages(struct iommu_dev_data *dev_data, void amd_iommu_dev_flush_pasid_all(struct iommu_dev_data *dev_data, ioasid_t pasid); -int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid); - #ifdef CONFIG_IRQ_REMAP int amd_iommu_create_irq_domain(struct amd_iommu *iommu); #else diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index b742983007f2..4384d8304bea 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2747,106 +2747,6 @@ const struct iommu_ops amd_iommu_ops = { } }; -static int __flush_pasid(struct protection_domain *domain, u32 pasid, - u64 address, size_t size) -{ - struct iommu_dev_data *dev_data; - struct iommu_cmd cmd; - int i, ret; - - if (!(domain->flags & PD_IOMMUV2_MASK)) - return -EINVAL; - - build_inv_iommu_pages(&cmd, address, size, domain->id, pasid, true); - - /* - * IOMMU TLB needs to be flushed before Device TLB to - * prevent device TLB refill from IOMMU TLB - */ - for (i = 0; i < amd_iommu_get_num_iommus(); ++i) { - if (domain->dev_iommu[i] == 0) - continue; - - ret = iommu_queue_command(amd_iommus[i], &cmd); - if (ret != 0) - goto out; - } - - /* Wait until IOMMU TLB flushes are complete */ - amd_iommu_domain_flush_complete(domain); - - /* Now flush device TLBs */ - list_for_each_entry(dev_data, &domain->dev_list, list) { - struct amd_iommu *iommu; - int qdep; - - /* - There might be non-IOMMUv2 capable devices in an IOMMUv2 - * domain. - */ - if (!dev_data->ats_enabled) - continue; - - qdep = dev_data->ats_qdep; - iommu = rlookup_amd_iommu(dev_data->dev); - if (!iommu) - continue; - build_inv_iotlb_pages(&cmd, dev_data->devid, qdep, - address, size, pasid, true); - - ret = iommu_queue_command(iommu, &cmd); - if (ret != 0) - goto out; - } - - /* Wait until all device TLBs are flushed */ - amd_iommu_domain_flush_complete(domain); - - ret = 0; - -out: - - return ret; -} - -static int __amd_iommu_flush_page(struct protection_domain *domain, u32 pasid, - u64 address) -{ - return __flush_pasid(domain, pasid, address, PAGE_SIZE); -} - -int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid, - u64 address) -{ - struct protection_domain *domain = to_pdomain(dom); - unsigned long flags; - int ret; - - spin_lock_irqsave(&domain->lock, flags); - ret = __amd_iommu_flush_page(domain, pasid, address); - spin_unlock_irqrestore(&domain->lock, flags); - - return ret; -} - -static int __amd_iommu_flush_tlb(struct protection_domain *domain, u32 pasid) -{ - return __flush_pasid(domain, pasid, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS); -} - -int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid) -{ - struct protection_domain *domain = to_pdomain(dom); - unsigned long flags; - int ret; - - spin_lock_irqsave(&domain->lock, flags); - ret = __amd_iommu_flush_tlb(domain, pasid); - spin_unlock_irqrestore(&domain->lock, flags); - - return ret; -} - int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid, int status, int tag) { -- cgit v1.2.3 From a7b2aff3132562bd26657d9a707f8fa82967147f Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Mon, 5 Feb 2024 11:56:13 +0000 Subject: iommu/amd: Rearrange device flush code Consolidate all flush related code in one place so that its easy to maintain. No functional changes intended. Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240205115615.6053-16-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 92 ++++++++++++++++++++++------------------------- 1 file changed, 43 insertions(+), 49 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 4384d8304bea..d3ea4de9033a 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1605,6 +1605,49 @@ static void domain_flush_devices(struct protection_domain *domain) device_flush_dte(dev_data); } +static void update_device_table(struct protection_domain *domain) +{ + struct iommu_dev_data *dev_data; + + list_for_each_entry(dev_data, &domain->dev_list, list) { + struct amd_iommu *iommu = rlookup_amd_iommu(dev_data->dev); + + set_dte_entry(iommu, dev_data); + clone_aliases(iommu, dev_data->dev); + } +} + +void amd_iommu_update_and_flush_device_table(struct protection_domain *domain) +{ + update_device_table(domain); + domain_flush_devices(domain); +} + +void amd_iommu_domain_update(struct protection_domain *domain) +{ + /* Update device table */ + amd_iommu_update_and_flush_device_table(domain); + + /* Flush domain TLB(s) and wait for completion */ + amd_iommu_domain_flush_all(domain); +} + +int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid, + int status, int tag) +{ + struct iommu_dev_data *dev_data; + struct amd_iommu *iommu; + struct iommu_cmd cmd; + + dev_data = dev_iommu_priv_get(&pdev->dev); + iommu = get_amd_iommu_from_dev(&pdev->dev); + + build_complete_ppr(&cmd, dev_data->devid, pasid, status, + tag, dev_data->pri_tlp); + + return iommu_queue_command(iommu, &cmd); +} + /**************************************************************************** * * The next functions belong to the domain allocation. A domain is @@ -2116,39 +2159,6 @@ static struct iommu_group *amd_iommu_device_group(struct device *dev) return acpihid_device_group(dev); } -/***************************************************************************** - * - * The next functions belong to the dma_ops mapping/unmapping code. - * - *****************************************************************************/ - -static void update_device_table(struct protection_domain *domain) -{ - struct iommu_dev_data *dev_data; - - list_for_each_entry(dev_data, &domain->dev_list, list) { - struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); - - set_dte_entry(iommu, dev_data); - clone_aliases(iommu, dev_data->dev); - } -} - -void amd_iommu_update_and_flush_device_table(struct protection_domain *domain) -{ - update_device_table(domain); - domain_flush_devices(domain); -} - -void amd_iommu_domain_update(struct protection_domain *domain) -{ - /* Update device table */ - amd_iommu_update_and_flush_device_table(domain); - - /* Flush domain TLB(s) and wait for completion */ - amd_iommu_domain_flush_all(domain); -} - /***************************************************************************** * * The following functions belong to the exported interface of AMD IOMMU @@ -2747,22 +2757,6 @@ const struct iommu_ops amd_iommu_ops = { } }; -int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid, - int status, int tag) -{ - struct iommu_dev_data *dev_data; - struct amd_iommu *iommu; - struct iommu_cmd cmd; - - dev_data = dev_iommu_priv_get(&pdev->dev); - iommu = get_amd_iommu_from_dev(&pdev->dev); - - build_complete_ppr(&cmd, dev_data->devid, pasid, status, - tag, dev_data->pri_tlp); - - return iommu_queue_command(iommu, &cmd); -} - #ifdef CONFIG_IRQ_REMAP /***************************************************************************** -- cgit v1.2.3 From c2a6af5e08c27350cb007db4cfeeb413017888b0 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Mon, 5 Feb 2024 11:56:14 +0000 Subject: iommu/amd: Remove unused GCR3 table parameters from struct protection_domain Since they are moved to struct iommu_dev_data, and the driver has been ported to use them. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240205115615.6053-17-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 12 ------------ drivers/iommu/amd/iommu.c | 2 +- 2 files changed, 1 insertion(+), 13 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index aed3e88d23c7..43f0f1b13f2d 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -453,15 +453,6 @@ #define MAX_DOMAIN_ID 65536 -/* Protection domain flags */ -#define PD_DMA_OPS_MASK BIT(0) /* domain used for dma_ops */ -#define PD_DEFAULT_MASK BIT(1) /* domain is a default dma_ops - domain for an IOMMU */ -#define PD_PASSTHROUGH_MASK BIT(2) /* domain has no page - translation */ -#define PD_IOMMUV2_MASK BIT(3) /* domain has gcr3 table */ -#define PD_GIOV_MASK BIT(4) /* domain enable GIOV support */ - /* Timeout stuff */ #define LOOP_TIMEOUT 100000 #define MMIO_STATUS_TIMEOUT 2000000 @@ -563,10 +554,7 @@ struct protection_domain { struct amd_io_pgtable iop; spinlock_t lock; /* mostly used to lock the page table*/ u16 id; /* the domain id written to the device table */ - int glx; /* Number of levels for GCR3 table */ int nid; /* Node ID */ - u64 *gcr3_tbl; /* Guest CR3 table */ - unsigned long flags; /* flags to find out type of domain */ enum protection_domain_mode pd_mode; /* Track page table type */ bool dirty_tracking; /* dirty tracking is enabled in the domain */ unsigned dev_cnt; /* devices assigned to this domain */ diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index d3ea4de9033a..ab6af861a3f1 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -86,7 +86,7 @@ static void set_dte_entry(struct amd_iommu *iommu, static inline bool pdom_is_v2_pgtbl_mode(struct protection_domain *pdom) { - return (pdom && (pdom->flags & PD_IOMMUV2_MASK)); + return (pdom && (pdom->pd_mode == PD_MODE_V2)); } static inline int get_acpihid_device_id(struct device *dev, -- cgit v1.2.3 From 87a6f1f22c9781b1b37847caf65e070bfa6dadb5 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Mon, 5 Feb 2024 11:56:15 +0000 Subject: iommu/amd: Introduce per-device domain ID to fix potential TLB aliasing issue With v1 page table, the AMD IOMMU spec states that the hardware must use the domain ID to tag its internal translation caches. I/O devices with different v1 page tables must be given different domain IDs. I/O devices that share the same v1 page table __may__ be given the same domain ID. This domain ID management policy is currently implemented by the AMD IOMMU driver. In this case, only the domain ID is needed when issuing the INVALIDATE_IOMMU_PAGES command to invalidate the IOMMU translation cache (TLB). With v2 page table, the hardware uses domain ID and PASID as parameters to tag and issue the INVALIDATE_IOMMU_PAGES command. Since the GCR3 table is setup per-device, and there is no guarantee for PASID to be unique across multiple devices. The same PASID for different devices could have different v2 page tables. In such case, if multiple devices share the same domain ID, IOMMU translation cache for these devices would be polluted due to TLB aliasing. Hence, avoid the TLB aliasing issue with v2 page table by allocating unique domain ID for each device even when multiple devices are sharing the same v1 page table. Please note that this fix would result in multiple INVALIDATE_IOMMU_PAGES commands (one per domain id) when unmapping a translation. Domain ID can be shared until device starts using PASID. We will enhance this code later where we will allocate per device domain ID only when its needed. Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240205115615.6053-18-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 1 + drivers/iommu/amd/iommu.c | 80 +++++++++++++++++++++++++++++-------- 2 files changed, 64 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 43f0f1b13f2d..d1fed5fc219b 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -528,6 +528,7 @@ struct gcr3_tbl_info { u64 *gcr3_tbl; /* Guest CR3 table */ int glx; /* Number of levels for GCR3 table */ u32 pasid_cnt; /* Track attached PASIDs */ + u16 domid; /* Per device domain ID */ }; struct amd_io_pgtable { diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index ab6af861a3f1..f688443c2764 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1443,27 +1443,37 @@ static int device_flush_dte(struct iommu_dev_data *dev_data) return ret; } -/* - * TLB invalidation function which is called from the mapping functions. - * It invalidates a single PTE if the range to flush is within a single - * page. Otherwise it flushes the whole TLB of the IOMMU. - */ -static void __domain_flush_pages(struct protection_domain *domain, +static int domain_flush_pages_v2(struct protection_domain *pdom, u64 address, size_t size) { struct iommu_dev_data *dev_data; struct iommu_cmd cmd; - int ret = 0, i; - ioasid_t pasid = IOMMU_NO_PASID; - bool gn = false; + int ret = 0; - if (pdom_is_v2_pgtbl_mode(domain)) - gn = true; + list_for_each_entry(dev_data, &pdom->dev_list, list) { + struct amd_iommu *iommu = get_amd_iommu_from_dev(dev_data->dev); + u16 domid = dev_data->gcr3_info.domid; + + build_inv_iommu_pages(&cmd, address, size, + domid, IOMMU_NO_PASID, true); + + ret |= iommu_queue_command(iommu, &cmd); + } - build_inv_iommu_pages(&cmd, address, size, domain->id, pasid, gn); + return ret; +} + +static int domain_flush_pages_v1(struct protection_domain *pdom, + u64 address, size_t size) +{ + struct iommu_cmd cmd; + int ret = 0, i; + + build_inv_iommu_pages(&cmd, address, size, + pdom->id, IOMMU_NO_PASID, false); for (i = 0; i < amd_iommu_get_num_iommus(); ++i) { - if (!domain->dev_iommu[i]) + if (!pdom->dev_iommu[i]) continue; /* @@ -1473,6 +1483,28 @@ static void __domain_flush_pages(struct protection_domain *domain, ret |= iommu_queue_command(amd_iommus[i], &cmd); } + return ret; +} + +/* + * TLB invalidation function which is called from the mapping functions. + * It flushes range of PTEs of the domain. + */ +static void __domain_flush_pages(struct protection_domain *domain, + u64 address, size_t size) +{ + struct iommu_dev_data *dev_data; + int ret = 0; + ioasid_t pasid = IOMMU_NO_PASID; + bool gn = false; + + if (pdom_is_v2_pgtbl_mode(domain)) { + gn = true; + ret = domain_flush_pages_v2(domain, address, size); + } else { + ret = domain_flush_pages_v1(domain, address, size); + } + list_for_each_entry(dev_data, &domain->dev_list, list) { if (!dev_data->ats_enabled) @@ -1548,7 +1580,7 @@ void amd_iommu_dev_flush_pasid_pages(struct iommu_dev_data *dev_data, struct amd_iommu *iommu = get_amd_iommu_from_dev(dev_data->dev); build_inv_iommu_pages(&cmd, address, size, - dev_data->domain->id, pasid, true); + dev_data->gcr3_info.domid, pasid, true); iommu_queue_command(iommu, &cmd); if (dev_data->ats_enabled) @@ -1723,6 +1755,9 @@ static void free_gcr3_table(struct gcr3_tbl_info *gcr3_info) gcr3_info->glx = 0; + /* Free per device domain ID */ + domain_id_free(gcr3_info->domid); + free_page((unsigned long)gcr3_info->gcr3_tbl); gcr3_info->gcr3_tbl = NULL; } @@ -1755,9 +1790,14 @@ static int setup_gcr3_table(struct gcr3_tbl_info *gcr3_info, if (gcr3_info->gcr3_tbl) return -EBUSY; + /* Allocate per device domain ID */ + gcr3_info->domid = domain_id_alloc(); + gcr3_info->gcr3_tbl = alloc_pgtable_page(nid, GFP_KERNEL); - if (gcr3_info->gcr3_tbl == NULL) + if (gcr3_info->gcr3_tbl == NULL) { + domain_id_free(gcr3_info->domid); return -ENOMEM; + } gcr3_info->glx = levels; @@ -1856,10 +1896,16 @@ static void set_dte_entry(struct amd_iommu *iommu, u64 flags = 0; u32 old_domid; u16 devid = dev_data->devid; + u16 domid; struct protection_domain *domain = dev_data->domain; struct dev_table_entry *dev_table = get_dev_table(iommu); struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info; + if (gcr3_info && gcr3_info->gcr3_tbl) + domid = dev_data->gcr3_info.domid; + else + domid = domain->id; + if (domain->iop.mode != PAGE_MODE_NONE) pte_root = iommu_virt_to_phys(domain->iop.root); @@ -1872,7 +1918,7 @@ static void set_dte_entry(struct amd_iommu *iommu, * When SNP is enabled, Only set TV bit when IOMMU * page translation is in use. */ - if (!amd_iommu_snp_en || (domain->id != 0)) + if (!amd_iommu_snp_en || (domid != 0)) pte_root |= DTE_FLAG_TV; flags = dev_table[devid].data[1]; @@ -1922,7 +1968,7 @@ static void set_dte_entry(struct amd_iommu *iommu, } flags &= ~DEV_DOMID_MASK; - flags |= domain->id; + flags |= domid; old_domid = dev_table[devid].data[1] & DEV_DOMID_MASK; dev_table[devid].data[1] = flags; -- cgit v1.2.3 From 0feda94c868d396fac3b3cb14089d2d989a07c72 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 22 Jan 2024 17:34:00 -0600 Subject: iommu/amd: Mark interrupt as managed On many systems that have an AMD IOMMU the following sequence of warnings is observed during bootup. ``` pci 0000:00:00.2 can't derive routing for PCI INT A pci 0000:00:00.2: PCI INT A: not connected ``` This series of events happens because of the IOMMU initialization sequence order and the lack of _PRT entries for the IOMMU. During initialization the IOMMU driver first enables the PCI device using pci_enable_device(). This will call acpi_pci_irq_enable() which will check if the interrupt is declared in a PCI routing table (_PRT) entry. According to the PCI spec [1] these routing entries are only required under PCI root bridges: The _PRT object is required under all PCI root bridges The IOMMU is directly connected to the root complex, so there is no parent bridge to look for a _PRT entry. The first warning is emitted since no entry could be found in the hierarchy. The second warning is then emitted because the interrupt hasn't yet been configured to any value. The pin was configured in pci_read_irq() but the byte in PCI_INTERRUPT_LINE return 0xff which means "Unknown". After that sequence of events pci_enable_msi() is called and this will allocate an interrupt. That is both of these warnings are totally harmless because the IOMMU uses MSI for interrupts. To avoid even trying to probe for a _PRT entry mark the IOMMU as IRQ managed. This avoids both warnings. Link: https://uefi.org/htmlspecs/ACPI_Spec_6_4_html/06_Device_Configuration/Device_Configuration.html?highlight=_prt#prt-pci-routing-table [1] Signed-off-by: Mario Limonciello Fixes: cffe0a2b5a34 ("x86, irq: Keep balance of IOAPIC pin reference count") Reviewed-by: Vasant Hegde Link: https://lore.kernel.org/r/20240122233400.1802-1-mario.limonciello@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers') diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index cb8486c9f8f4..db8d563af444 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -2068,6 +2068,9 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) /* Prevent binding other PCI device drivers to IOMMU devices */ iommu->dev->match_driver = false; + /* ACPI _PRT won't have an IRQ for IOMMU */ + iommu->dev->irq_managed = 1; + pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, &iommu->cap); -- cgit v1.2.3 From b07cd3b746cfe9a4524a5a7337fcf4ccb933509b Mon Sep 17 00:00:00 2001 From: Erick Archer Date: Sun, 11 Feb 2024 19:22:50 +0100 Subject: iommu/mtk_iommu: Use devm_kcalloc() instead of devm_kzalloc() This is an effort to get rid of all multiplications from allocation functions in order to prevent integer overflows [1]. Here the multiplication is obviously safe because MTK_PROTECT_PA_ALIGN is defined as a literal value of 256 or 128. For the "mtk_iommu.c" file: 256 For the "mtk_iommu_v1.c" file: 128 However, using devm_kcalloc() is more appropriate [2] and improves readability. This patch has no effect on runtime behavior. Link: https://github.com/KSPP/linux/issues/162 [1] Link: https://www.kernel.org/doc/html/next/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments [2] Signed-off-by: Erick Archer Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Reviewed-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20240211182250.12656-1-erick.archer@gmx.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 2 +- drivers/iommu/mtk_iommu_v1.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 7abe9e85a570..9aae6eb604b1 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -1264,7 +1264,7 @@ static int mtk_iommu_probe(struct platform_device *pdev) data->plat_data = of_device_get_match_data(dev); /* Protect memory. HW will access here while translation fault.*/ - protect = devm_kzalloc(dev, MTK_PROTECT_PA_ALIGN * 2, GFP_KERNEL); + protect = devm_kcalloc(dev, 2, MTK_PROTECT_PA_ALIGN, GFP_KERNEL); if (!protect) return -ENOMEM; data->protect_base = ALIGN(virt_to_phys(protect), MTK_PROTECT_PA_ALIGN); diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 25b41222abae..45cd845d153f 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -621,8 +621,8 @@ static int mtk_iommu_v1_probe(struct platform_device *pdev) data->dev = dev; /* Protect memory. HW will access here while translation fault.*/ - protect = devm_kzalloc(dev, MTK_PROTECT_PA_ALIGN * 2, - GFP_KERNEL | GFP_DMA); + protect = devm_kcalloc(dev, 2, MTK_PROTECT_PA_ALIGN, + GFP_KERNEL | GFP_DMA); if (!protect) return -ENOMEM; data->protect_base = ALIGN(virt_to_phys(protect), MTK_PROTECT_PA_ALIGN); -- cgit v1.2.3 From 66014df73b302d326b995178141a150b9a3a52b7 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 12 Feb 2024 09:22:13 +0800 Subject: iommu/arm-smmu-v3: Remove unrecoverable faults reporting No device driver registers fault handler to handle the reported unrecoveraable faults. Remove it to avoid dead code. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Tested-by: Longfang Liu Link: https://lore.kernel.org/r/20240212012227.119381-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 46 ++++++++--------------------- 1 file changed, 13 insertions(+), 33 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 0ffb1cf17e0b..4cf1054ed321 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1461,7 +1461,6 @@ arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid) static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt) { int ret; - u32 reason; u32 perm = 0; struct arm_smmu_master *master; bool ssid_valid = evt[0] & EVTQ_0_SSV; @@ -1471,16 +1470,9 @@ static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt) switch (FIELD_GET(EVTQ_0_ID, evt[0])) { case EVT_ID_TRANSLATION_FAULT: - reason = IOMMU_FAULT_REASON_PTE_FETCH; - break; case EVT_ID_ADDR_SIZE_FAULT: - reason = IOMMU_FAULT_REASON_OOR_ADDRESS; - break; case EVT_ID_ACCESS_FAULT: - reason = IOMMU_FAULT_REASON_ACCESS; - break; case EVT_ID_PERMISSION_FAULT: - reason = IOMMU_FAULT_REASON_PERMISSION; break; default: return -EOPNOTSUPP; @@ -1490,6 +1482,9 @@ static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt) if (evt[1] & EVTQ_1_S2) return -EFAULT; + if (!(evt[1] & EVTQ_1_STALL)) + return -EOPNOTSUPP; + if (evt[1] & EVTQ_1_RnW) perm |= IOMMU_FAULT_PERM_READ; else @@ -1501,32 +1496,17 @@ static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt) if (evt[1] & EVTQ_1_PnU) perm |= IOMMU_FAULT_PERM_PRIV; - if (evt[1] & EVTQ_1_STALL) { - flt->type = IOMMU_FAULT_PAGE_REQ; - flt->prm = (struct iommu_fault_page_request) { - .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE, - .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]), - .perm = perm, - .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]), - }; - - if (ssid_valid) { - flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; - flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]); - } - } else { - flt->type = IOMMU_FAULT_DMA_UNRECOV; - flt->event = (struct iommu_fault_unrecoverable) { - .reason = reason, - .flags = IOMMU_FAULT_UNRECOV_ADDR_VALID, - .perm = perm, - .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]), - }; + flt->type = IOMMU_FAULT_PAGE_REQ; + flt->prm = (struct iommu_fault_page_request) { + .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE, + .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]), + .perm = perm, + .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]), + }; - if (ssid_valid) { - flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID; - flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]); - } + if (ssid_valid) { + flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; + flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]); } mutex_lock(&smmu->streams_mutex); -- cgit v1.2.3 From 8b32a3bea2629049c484f595af7aad797e24453e Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 12 Feb 2024 09:22:15 +0800 Subject: iommu: Cleanup iopf data structure definitions struct iommu_fault_page_request and struct iommu_page_response are not part of uAPI anymore. Convert them to data structures for kAPI. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Yi Liu Tested-by: Yan Zhao Tested-by: Longfang Liu Link: https://lore.kernel.org/r/20240212012227.119381-5-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/io-pgfault.c | 1 - drivers/iommu/iommu.c | 4 ---- include/linux/iommu.h | 27 +++++++++++---------------- 3 files changed, 11 insertions(+), 21 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index e5b8b9110c13..24b5545352ae 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -56,7 +56,6 @@ static int iopf_complete_group(struct device *dev, struct iopf_fault *iopf, enum iommu_page_response_code status) { struct iommu_page_response resp = { - .version = IOMMU_PAGE_RESP_VERSION_1, .pasid = iopf->fault.prm.pasid, .grpid = iopf->fault.prm.grpid, .code = status, diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index d14413916f93..6a4d9bc0641d 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1494,10 +1494,6 @@ int iommu_page_response(struct device *dev, if (!param || !param->fault_param) return -EINVAL; - if (msg->version != IOMMU_PAGE_RESP_VERSION_1 || - msg->flags & ~IOMMU_PAGE_RESP_PASID_VALID) - return -EINVAL; - /* Only send response if there is a fault report pending */ mutex_lock(¶m->fault_param->lock); if (list_empty(¶m->fault_param->faults)) { diff --git a/include/linux/iommu.h b/include/linux/iommu.h index c960c4fae3bc..829bcb5a8e23 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -71,12 +71,12 @@ struct iommu_fault_page_request { #define IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE (1 << 1) #define IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA (1 << 2) #define IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID (1 << 3) - __u32 flags; - __u32 pasid; - __u32 grpid; - __u32 perm; - __u64 addr; - __u64 private_data[2]; + u32 flags; + u32 pasid; + u32 grpid; + u32 perm; + u64 addr; + u64 private_data[2]; }; /** @@ -85,7 +85,7 @@ struct iommu_fault_page_request { * @prm: Page Request message, when @type is %IOMMU_FAULT_PAGE_REQ */ struct iommu_fault { - __u32 type; + u32 type; struct iommu_fault_page_request prm; }; @@ -106,8 +106,6 @@ enum iommu_page_response_code { /** * struct iommu_page_response - Generic page response information - * @argsz: User filled size of this data - * @version: API version of this structure * @flags: encodes whether the corresponding fields are valid * (IOMMU_FAULT_PAGE_RESPONSE_* values) * @pasid: Process Address Space ID @@ -115,14 +113,11 @@ enum iommu_page_response_code { * @code: response code from &enum iommu_page_response_code */ struct iommu_page_response { - __u32 argsz; -#define IOMMU_PAGE_RESP_VERSION_1 1 - __u32 version; #define IOMMU_PAGE_RESP_PASID_VALID (1 << 0) - __u32 flags; - __u32 pasid; - __u32 grpid; - __u32 code; + u32 flags; + u32 pasid; + u32 grpid; + u32 code; }; -- cgit v1.2.3 From 15fc60cdd2d236a73b32c99d21fc0f7b7ce6cbbb Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 12 Feb 2024 09:22:16 +0800 Subject: iommu: Merge iopf_device_param into iommu_fault_param The struct dev_iommu contains two pointers, fault_param and iopf_param. The fault_param pointer points to a data structure that is used to store pending faults that are awaiting responses. The iopf_param pointer points to a data structure that is used to store partial faults that are part of a Page Request Group. The fault_param and iopf_param pointers are essentially duplicate. This causes memory waste. Merge the iopf_device_param pointer into the iommu_fault_param pointer to consolidate the code and save memory. The consolidated pointer would be allocated on demand when the device driver enables the iopf on device, and would be freed after iopf is disabled. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Tested-by: Yan Zhao Tested-by: Longfang Liu Link: https://lore.kernel.org/r/20240212012227.119381-6-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/io-pgfault.c | 110 +++++++++++++++++++++------------------------ drivers/iommu/iommu.c | 34 +++----------- include/linux/iommu.h | 18 ++++++-- 3 files changed, 72 insertions(+), 90 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index 24b5545352ae..f948303b2a91 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -25,21 +25,6 @@ struct iopf_queue { struct mutex lock; }; -/** - * struct iopf_device_param - IO Page Fault data attached to a device - * @dev: the device that owns this param - * @queue: IOPF queue - * @queue_list: index into queue->devices - * @partial: faults that are part of a Page Request Group for which the last - * request hasn't been submitted yet. - */ -struct iopf_device_param { - struct device *dev; - struct iopf_queue *queue; - struct list_head queue_list; - struct list_head partial; -}; - struct iopf_fault { struct iommu_fault fault; struct list_head list; @@ -144,7 +129,7 @@ int iommu_queue_iopf(struct iommu_fault *fault, void *cookie) int ret; struct iopf_group *group; struct iopf_fault *iopf, *next; - struct iopf_device_param *iopf_param; + struct iommu_fault_param *iopf_param; struct device *dev = cookie; struct dev_iommu *param = dev->iommu; @@ -159,7 +144,7 @@ int iommu_queue_iopf(struct iommu_fault *fault, void *cookie) * As long as we're holding param->lock, the queue can't be unlinked * from the device and therefore cannot disappear. */ - iopf_param = param->iopf_param; + iopf_param = param->fault_param; if (!iopf_param) return -ENODEV; @@ -229,14 +214,14 @@ EXPORT_SYMBOL_GPL(iommu_queue_iopf); int iopf_queue_flush_dev(struct device *dev) { int ret = 0; - struct iopf_device_param *iopf_param; + struct iommu_fault_param *iopf_param; struct dev_iommu *param = dev->iommu; if (!param) return -ENODEV; mutex_lock(¶m->lock); - iopf_param = param->iopf_param; + iopf_param = param->fault_param; if (iopf_param) flush_workqueue(iopf_param->queue->wq); else @@ -260,7 +245,7 @@ EXPORT_SYMBOL_GPL(iopf_queue_flush_dev); int iopf_queue_discard_partial(struct iopf_queue *queue) { struct iopf_fault *iopf, *next; - struct iopf_device_param *iopf_param; + struct iommu_fault_param *iopf_param; if (!queue) return -EINVAL; @@ -287,34 +272,36 @@ EXPORT_SYMBOL_GPL(iopf_queue_discard_partial); */ int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev) { - int ret = -EBUSY; - struct iopf_device_param *iopf_param; + int ret = 0; struct dev_iommu *param = dev->iommu; - - if (!param) - return -ENODEV; - - iopf_param = kzalloc(sizeof(*iopf_param), GFP_KERNEL); - if (!iopf_param) - return -ENOMEM; - - INIT_LIST_HEAD(&iopf_param->partial); - iopf_param->queue = queue; - iopf_param->dev = dev; + struct iommu_fault_param *fault_param; mutex_lock(&queue->lock); mutex_lock(¶m->lock); - if (!param->iopf_param) { - list_add(&iopf_param->queue_list, &queue->devices); - param->iopf_param = iopf_param; - ret = 0; + if (param->fault_param) { + ret = -EBUSY; + goto done_unlock; } + + fault_param = kzalloc(sizeof(*fault_param), GFP_KERNEL); + if (!fault_param) { + ret = -ENOMEM; + goto done_unlock; + } + + mutex_init(&fault_param->lock); + INIT_LIST_HEAD(&fault_param->faults); + INIT_LIST_HEAD(&fault_param->partial); + fault_param->dev = dev; + list_add(&fault_param->queue_list, &queue->devices); + fault_param->queue = queue; + + param->fault_param = fault_param; + +done_unlock: mutex_unlock(¶m->lock); mutex_unlock(&queue->lock); - if (ret) - kfree(iopf_param); - return ret; } EXPORT_SYMBOL_GPL(iopf_queue_add_device); @@ -330,34 +317,41 @@ EXPORT_SYMBOL_GPL(iopf_queue_add_device); */ int iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev) { - int ret = -EINVAL; + int ret = 0; struct iopf_fault *iopf, *next; - struct iopf_device_param *iopf_param; struct dev_iommu *param = dev->iommu; - - if (!param || !queue) - return -EINVAL; + struct iommu_fault_param *fault_param = param->fault_param; mutex_lock(&queue->lock); mutex_lock(¶m->lock); - iopf_param = param->iopf_param; - if (iopf_param && iopf_param->queue == queue) { - list_del(&iopf_param->queue_list); - param->iopf_param = NULL; - ret = 0; + if (!fault_param) { + ret = -ENODEV; + goto unlock; } - mutex_unlock(¶m->lock); - mutex_unlock(&queue->lock); - if (ret) - return ret; + + if (fault_param->queue != queue) { + ret = -EINVAL; + goto unlock; + } + + if (!list_empty(&fault_param->faults)) { + ret = -EBUSY; + goto unlock; + } + + list_del(&fault_param->queue_list); /* Just in case some faults are still stuck */ - list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) + list_for_each_entry_safe(iopf, next, &fault_param->partial, list) kfree(iopf); - kfree(iopf_param); + param->fault_param = NULL; + kfree(fault_param); +unlock: + mutex_unlock(¶m->lock); + mutex_unlock(&queue->lock); - return 0; + return ret; } EXPORT_SYMBOL_GPL(iopf_queue_remove_device); @@ -403,7 +397,7 @@ EXPORT_SYMBOL_GPL(iopf_queue_alloc); */ void iopf_queue_free(struct iopf_queue *queue) { - struct iopf_device_param *iopf_param, *next; + struct iommu_fault_param *iopf_param, *next; if (!queue) return; diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 6a4d9bc0641d..9575cb75732a 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1355,27 +1355,18 @@ int iommu_register_device_fault_handler(struct device *dev, struct dev_iommu *param = dev->iommu; int ret = 0; - if (!param) + if (!param || !param->fault_param) return -EINVAL; mutex_lock(¶m->lock); /* Only allow one fault handler registered for each device */ - if (param->fault_param) { + if (param->fault_param->handler) { ret = -EBUSY; goto done_unlock; } - get_device(dev); - param->fault_param = kzalloc(sizeof(*param->fault_param), GFP_KERNEL); - if (!param->fault_param) { - put_device(dev); - ret = -ENOMEM; - goto done_unlock; - } param->fault_param->handler = handler; param->fault_param->data = data; - mutex_init(¶m->fault_param->lock); - INIT_LIST_HEAD(¶m->fault_param->faults); done_unlock: mutex_unlock(¶m->lock); @@ -1396,29 +1387,16 @@ EXPORT_SYMBOL_GPL(iommu_register_device_fault_handler); int iommu_unregister_device_fault_handler(struct device *dev) { struct dev_iommu *param = dev->iommu; - int ret = 0; - if (!param) + if (!param || !param->fault_param) return -EINVAL; mutex_lock(¶m->lock); - - if (!param->fault_param) - goto unlock; - - /* we cannot unregister handler if there are pending faults */ - if (!list_empty(¶m->fault_param->faults)) { - ret = -EBUSY; - goto unlock; - } - - kfree(param->fault_param); - param->fault_param = NULL; - put_device(dev); -unlock: + param->fault_param->handler = NULL; + param->fault_param->data = NULL; mutex_unlock(¶m->lock); - return ret; + return 0; } EXPORT_SYMBOL_GPL(iommu_unregister_device_fault_handler); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 829bcb5a8e23..bbb7c2ad5184 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -42,6 +42,7 @@ struct notifier_block; struct iommu_sva; struct iommu_fault_event; struct iommu_dma_cookie; +struct iopf_queue; #define IOMMU_FAULT_PERM_READ (1 << 0) /* read */ #define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */ @@ -672,21 +673,31 @@ struct iommu_fault_event { * struct iommu_fault_param - per-device IOMMU fault data * @handler: Callback function to handle IOMMU faults at device level * @data: handler private data - * @faults: holds the pending faults which needs response * @lock: protect pending faults list + * @dev: the device that owns this param + * @queue: IOPF queue + * @queue_list: index into queue->devices + * @partial: faults that are part of a Page Request Group for which the last + * request hasn't been submitted yet. + * @faults: holds the pending faults which need response */ struct iommu_fault_param { iommu_dev_fault_handler_t handler; void *data; - struct list_head faults; struct mutex lock; + + struct device *dev; + struct iopf_queue *queue; + struct list_head queue_list; + + struct list_head partial; + struct list_head faults; }; /** * struct dev_iommu - Collection of per-device IOMMU data * * @fault_param: IOMMU detected device fault reporting data - * @iopf_param: I/O Page Fault queue and data * @fwspec: IOMMU fwspec data * @iommu_dev: IOMMU device this device is linked to * @priv: IOMMU Driver private data @@ -702,7 +713,6 @@ struct iommu_fault_param { struct dev_iommu { struct mutex lock; struct iommu_fault_param *fault_param; - struct iopf_device_param *iopf_param; struct iommu_fwspec *fwspec; struct iommu_device *iommu_dev; void *priv; -- cgit v1.2.3 From 1ff25d798e52943d037accf15c675a6845d9776f Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 12 Feb 2024 09:22:17 +0800 Subject: iommu: Remove iommu_[un]register_device_fault_handler() The individual iommu driver reports the iommu page faults by calling iommu_report_device_fault(), where a pre-registered device fault handler is called to route the fault to another fault handler installed on the corresponding iommu domain. The pre-registered device fault handler is static and won't be dynamic as the fault handler is eventually per iommu domain. Replace calling device fault handler with iommu_queue_iopf(). After this replacement, the registering and unregistering fault handler interfaces are not needed anywhere. Remove the interfaces and the related data structures to avoid dead code. Convert cookie parameter of iommu_queue_iopf() into a device pointer that is really passed. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Tested-by: Yan Zhao Tested-by: Longfang Liu Link: https://lore.kernel.org/r/20240212012227.119381-7-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 13 +---- drivers/iommu/intel/iommu.c | 24 +++----- drivers/iommu/io-pgfault.c | 6 +- drivers/iommu/iommu-sva.h | 4 +- drivers/iommu/iommu.c | 76 +------------------------ include/linux/iommu.h | 23 -------- 6 files changed, 13 insertions(+), 133 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 05722121f00e..ab2b0a5e4369 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -487,7 +487,6 @@ bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master) static int arm_smmu_master_sva_enable_iopf(struct arm_smmu_master *master) { - int ret; struct device *dev = master->dev; /* @@ -500,16 +499,7 @@ static int arm_smmu_master_sva_enable_iopf(struct arm_smmu_master *master) if (!master->iopf_enabled) return -EINVAL; - ret = iopf_queue_add_device(master->smmu->evtq.iopf, dev); - if (ret) - return ret; - - ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev); - if (ret) { - iopf_queue_remove_device(master->smmu->evtq.iopf, dev); - return ret; - } - return 0; + return iopf_queue_add_device(master->smmu->evtq.iopf, dev); } static void arm_smmu_master_sva_disable_iopf(struct arm_smmu_master *master) @@ -519,7 +509,6 @@ static void arm_smmu_master_sva_disable_iopf(struct arm_smmu_master *master) if (!master->iopf_enabled) return; - iommu_unregister_device_fault_handler(dev); iopf_queue_remove_device(master->smmu->evtq.iopf, dev); } diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 6fb5f6fceea1..df6ceefc09ee 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4427,23 +4427,15 @@ static int intel_iommu_enable_iopf(struct device *dev) if (ret) return ret; - ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev); - if (ret) - goto iopf_remove_device; - ret = pci_enable_pri(pdev, PRQ_DEPTH); - if (ret) - goto iopf_unregister_handler; + if (ret) { + iopf_queue_remove_device(iommu->iopf_queue, dev); + return ret; + } + info->pri_enabled = 1; return 0; - -iopf_unregister_handler: - iommu_unregister_device_fault_handler(dev); -iopf_remove_device: - iopf_queue_remove_device(iommu->iopf_queue, dev); - - return ret; } static int intel_iommu_disable_iopf(struct device *dev) @@ -4466,11 +4458,9 @@ static int intel_iommu_disable_iopf(struct device *dev) info->pri_enabled = 0; /* - * With PRI disabled and outstanding PRQs drained, unregistering - * fault handler and removing device from iopf queue should never - * fail. + * With PRI disabled and outstanding PRQs drained, removing device + * from iopf queue should never fail. */ - WARN_ON(iommu_unregister_device_fault_handler(dev)); WARN_ON(iopf_queue_remove_device(iommu->iopf_queue, dev)); return 0; diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index f948303b2a91..4fda01de5589 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -87,7 +87,7 @@ static void iopf_handler(struct work_struct *work) /** * iommu_queue_iopf - IO Page Fault handler * @fault: fault event - * @cookie: struct device, passed to iommu_register_device_fault_handler. + * @dev: struct device. * * Add a fault to the device workqueue, to be handled by mm. * @@ -124,14 +124,12 @@ static void iopf_handler(struct work_struct *work) * * Return: 0 on success and <0 on error. */ -int iommu_queue_iopf(struct iommu_fault *fault, void *cookie) +int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev) { int ret; struct iopf_group *group; struct iopf_fault *iopf, *next; struct iommu_fault_param *iopf_param; - - struct device *dev = cookie; struct dev_iommu *param = dev->iommu; lockdep_assert_held(¶m->lock); diff --git a/drivers/iommu/iommu-sva.h b/drivers/iommu/iommu-sva.h index 54946b5a7caf..de7819c796ce 100644 --- a/drivers/iommu/iommu-sva.h +++ b/drivers/iommu/iommu-sva.h @@ -13,7 +13,7 @@ struct iommu_fault; struct iopf_queue; #ifdef CONFIG_IOMMU_SVA -int iommu_queue_iopf(struct iommu_fault *fault, void *cookie); +int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev); int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev); int iopf_queue_remove_device(struct iopf_queue *queue, @@ -26,7 +26,7 @@ enum iommu_page_response_code iommu_sva_handle_iopf(struct iommu_fault *fault, void *data); #else /* CONFIG_IOMMU_SVA */ -static inline int iommu_queue_iopf(struct iommu_fault *fault, void *cookie) +static inline int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev) { return -ENODEV; } diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 9575cb75732a..5cb81dbe8d9d 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1330,76 +1330,6 @@ void iommu_group_put(struct iommu_group *group) } EXPORT_SYMBOL_GPL(iommu_group_put); -/** - * iommu_register_device_fault_handler() - Register a device fault handler - * @dev: the device - * @handler: the fault handler - * @data: private data passed as argument to the handler - * - * When an IOMMU fault event is received, this handler gets called with the - * fault event and data as argument. The handler should return 0 on success. If - * the fault is recoverable (IOMMU_FAULT_PAGE_REQ), the consumer should also - * complete the fault by calling iommu_page_response() with one of the following - * response code: - * - IOMMU_PAGE_RESP_SUCCESS: retry the translation - * - IOMMU_PAGE_RESP_INVALID: terminate the fault - * - IOMMU_PAGE_RESP_FAILURE: terminate the fault and stop reporting - * page faults if possible. - * - * Return 0 if the fault handler was installed successfully, or an error. - */ -int iommu_register_device_fault_handler(struct device *dev, - iommu_dev_fault_handler_t handler, - void *data) -{ - struct dev_iommu *param = dev->iommu; - int ret = 0; - - if (!param || !param->fault_param) - return -EINVAL; - - mutex_lock(¶m->lock); - /* Only allow one fault handler registered for each device */ - if (param->fault_param->handler) { - ret = -EBUSY; - goto done_unlock; - } - - param->fault_param->handler = handler; - param->fault_param->data = data; - -done_unlock: - mutex_unlock(¶m->lock); - - return ret; -} -EXPORT_SYMBOL_GPL(iommu_register_device_fault_handler); - -/** - * iommu_unregister_device_fault_handler() - Unregister the device fault handler - * @dev: the device - * - * Remove the device fault handler installed with - * iommu_register_device_fault_handler(). - * - * Return 0 on success, or an error. - */ -int iommu_unregister_device_fault_handler(struct device *dev) -{ - struct dev_iommu *param = dev->iommu; - - if (!param || !param->fault_param) - return -EINVAL; - - mutex_lock(¶m->lock); - param->fault_param->handler = NULL; - param->fault_param->data = NULL; - mutex_unlock(¶m->lock); - - return 0; -} -EXPORT_SYMBOL_GPL(iommu_unregister_device_fault_handler); - /** * iommu_report_device_fault() - Report fault event to device driver * @dev: the device @@ -1424,10 +1354,6 @@ int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) /* we only report device fault if there is a handler registered */ mutex_lock(¶m->lock); fparam = param->fault_param; - if (!fparam || !fparam->handler) { - ret = -EINVAL; - goto done_unlock; - } if (evt->fault.type == IOMMU_FAULT_PAGE_REQ && (evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { @@ -1442,7 +1368,7 @@ int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) mutex_unlock(&fparam->lock); } - ret = fparam->handler(&evt->fault, fparam->data); + ret = iommu_queue_iopf(&evt->fault, dev); if (ret && evt_pending) { mutex_lock(&fparam->lock); list_del(&evt_pending->list); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index bbb7c2ad5184..70176c1c5573 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -128,7 +128,6 @@ struct iommu_page_response { typedef int (*iommu_fault_handler_t)(struct iommu_domain *, struct device *, unsigned long, int, void *); -typedef int (*iommu_dev_fault_handler_t)(struct iommu_fault *, void *); struct iommu_domain_geometry { dma_addr_t aperture_start; /* First address that can be mapped */ @@ -671,8 +670,6 @@ struct iommu_fault_event { /** * struct iommu_fault_param - per-device IOMMU fault data - * @handler: Callback function to handle IOMMU faults at device level - * @data: handler private data * @lock: protect pending faults list * @dev: the device that owns this param * @queue: IOPF queue @@ -682,8 +679,6 @@ struct iommu_fault_event { * @faults: holds the pending faults which need response */ struct iommu_fault_param { - iommu_dev_fault_handler_t handler; - void *data; struct mutex lock; struct device *dev; @@ -806,11 +801,6 @@ extern int iommu_group_for_each_dev(struct iommu_group *group, void *data, extern struct iommu_group *iommu_group_get(struct device *dev); extern struct iommu_group *iommu_group_ref_get(struct iommu_group *group); extern void iommu_group_put(struct iommu_group *group); -extern int iommu_register_device_fault_handler(struct device *dev, - iommu_dev_fault_handler_t handler, - void *data); - -extern int iommu_unregister_device_fault_handler(struct device *dev); extern int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt); @@ -1222,19 +1212,6 @@ static inline void iommu_group_put(struct iommu_group *group) { } -static inline -int iommu_register_device_fault_handler(struct device *dev, - iommu_dev_fault_handler_t handler, - void *data) -{ - return -ENODEV; -} - -static inline int iommu_unregister_device_fault_handler(struct device *dev) -{ - return 0; -} - static inline int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) { -- cgit v1.2.3 From 3f02a9dc70007c0e6299fda9c4f7a1e2277ec3d2 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 12 Feb 2024 09:22:18 +0800 Subject: iommu: Merge iommu_fault_event and iopf_fault The iommu_fault_event and iopf_fault data structures store the same information about an iopf fault. They are also used in the same way. Merge these two data structures into a single one to make the code more concise and easier to maintain. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Reviewed-by: Yi Liu Tested-by: Yan Zhao Tested-by: Longfang Liu Link: https://lore.kernel.org/r/20240212012227.119381-8-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 4 ++-- drivers/iommu/intel/iommu.h | 2 +- drivers/iommu/intel/svm.c | 5 ++--- drivers/iommu/io-pgfault.c | 5 ----- drivers/iommu/iommu.c | 8 ++++---- include/linux/iommu.h | 27 ++++++++------------------- 6 files changed, 17 insertions(+), 34 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 4cf1054ed321..ab4f04c7f932 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -922,7 +922,7 @@ static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu, } static int arm_smmu_page_response(struct device *dev, - struct iommu_fault_event *unused, + struct iopf_fault *unused, struct iommu_page_response *resp) { struct arm_smmu_cmdq_ent cmd = {0}; @@ -1465,7 +1465,7 @@ static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt) struct arm_smmu_master *master; bool ssid_valid = evt[0] & EVTQ_0_SSV; u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]); - struct iommu_fault_event fault_evt = { }; + struct iopf_fault fault_evt = { }; struct iommu_fault *flt = &fault_evt.fault; switch (FIELD_GET(EVTQ_0_ID, evt[0])) { diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index d02f916d8e59..696d95293a69 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -1079,7 +1079,7 @@ struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent, void intel_svm_check(struct intel_iommu *iommu); int intel_svm_enable_prq(struct intel_iommu *iommu); int intel_svm_finish_prq(struct intel_iommu *iommu); -int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt, +int intel_svm_page_response(struct device *dev, struct iopf_fault *evt, struct iommu_page_response *msg); struct iommu_domain *intel_svm_domain_alloc(void); void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid); diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 40edd282903f..9751f037e188 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -565,13 +565,12 @@ static int prq_to_iommu_prot(struct page_req_dsc *req) static int intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev, struct page_req_dsc *desc) { - struct iommu_fault_event event; + struct iopf_fault event = { }; if (!dev || !dev_is_pci(dev)) return -ENODEV; /* Fill in event data for device specific processing */ - memset(&event, 0, sizeof(struct iommu_fault_event)); event.fault.type = IOMMU_FAULT_PAGE_REQ; event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT; event.fault.prm.pasid = desc->pasid; @@ -743,7 +742,7 @@ prq_advance: } int intel_svm_page_response(struct device *dev, - struct iommu_fault_event *evt, + struct iopf_fault *evt, struct iommu_page_response *msg) { struct device_domain_info *info = dev_iommu_priv_get(dev); diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index 4fda01de5589..10d48eb72608 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -25,11 +25,6 @@ struct iopf_queue { struct mutex lock; }; -struct iopf_fault { - struct iommu_fault fault; - struct list_head list; -}; - struct iopf_group { struct iopf_fault last_fault; struct list_head faults; diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 5cb81dbe8d9d..15d7b2914780 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1341,10 +1341,10 @@ EXPORT_SYMBOL_GPL(iommu_group_put); * * Return 0 on success, or an error. */ -int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) +int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) { struct dev_iommu *param = dev->iommu; - struct iommu_fault_event *evt_pending = NULL; + struct iopf_fault *evt_pending = NULL; struct iommu_fault_param *fparam; int ret = 0; @@ -1357,7 +1357,7 @@ int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) if (evt->fault.type == IOMMU_FAULT_PAGE_REQ && (evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { - evt_pending = kmemdup(evt, sizeof(struct iommu_fault_event), + evt_pending = kmemdup(evt, sizeof(struct iopf_fault), GFP_KERNEL); if (!evt_pending) { ret = -ENOMEM; @@ -1386,7 +1386,7 @@ int iommu_page_response(struct device *dev, { bool needs_pasid; int ret = -EINVAL; - struct iommu_fault_event *evt; + struct iopf_fault *evt; struct iommu_fault_page_request *prm; struct dev_iommu *param = dev->iommu; const struct iommu_ops *ops = dev_iommu_ops(dev); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 70176c1c5573..2320548a90f8 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -40,7 +40,6 @@ struct iommu_domain_ops; struct iommu_dirty_ops; struct notifier_block; struct iommu_sva; -struct iommu_fault_event; struct iommu_dma_cookie; struct iopf_queue; @@ -121,6 +120,11 @@ struct iommu_page_response { u32 code; }; +struct iopf_fault { + struct iommu_fault fault; + /* node for pending lists */ + struct list_head list; +}; /* iommu fault flags */ #define IOMMU_FAULT_READ 0x0 @@ -553,7 +557,7 @@ struct iommu_ops { int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f); int (*page_response)(struct device *dev, - struct iommu_fault_event *evt, + struct iopf_fault *evt, struct iommu_page_response *msg); int (*def_domain_type)(struct device *dev); @@ -654,20 +658,6 @@ struct iommu_device { u32 max_pasids; }; -/** - * struct iommu_fault_event - Generic fault event - * - * Can represent recoverable faults such as a page requests or - * unrecoverable faults such as DMA or IRQ remapping faults. - * - * @fault: fault descriptor - * @list: pending fault event list, used for tracking responses - */ -struct iommu_fault_event { - struct iommu_fault fault; - struct list_head list; -}; - /** * struct iommu_fault_param - per-device IOMMU fault data * @lock: protect pending faults list @@ -802,8 +792,7 @@ extern struct iommu_group *iommu_group_get(struct device *dev); extern struct iommu_group *iommu_group_ref_get(struct iommu_group *group); extern void iommu_group_put(struct iommu_group *group); -extern int iommu_report_device_fault(struct device *dev, - struct iommu_fault_event *evt); +extern int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt); extern int iommu_page_response(struct device *dev, struct iommu_page_response *msg); @@ -1213,7 +1202,7 @@ static inline void iommu_group_put(struct iommu_group *group) } static inline -int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) +int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) { return -ENODEV; } -- cgit v1.2.3 From 24b5d268b5ab95c12b5ae58a054d04bfa442f58f Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 12 Feb 2024 09:22:19 +0800 Subject: iommu: Prepare for separating SVA and IOPF Move iopf_group data structure to iommu.h to make it a minimal set of faults that a domain's page fault handler should handle. Add a new function, iopf_free_group(), to free a fault group after all faults in the group are handled. This function will be made global so that it can be called from other files, such as iommu-sva.c. Move iopf_queue data structure to iommu.h to allow the workqueue to be scheduled out of this file. This will simplify the sequential patches. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Yi Liu Tested-by: Yan Zhao Tested-by: Longfang Liu Link: https://lore.kernel.org/r/20240212012227.119381-9-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/io-pgfault.c | 39 ++++++++++++++------------------------- include/linux/iommu.h | 20 +++++++++++++++++++- 2 files changed, 33 insertions(+), 26 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index 10d48eb72608..c7e6bbed5c05 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -13,24 +13,17 @@ #include "iommu-sva.h" -/** - * struct iopf_queue - IO Page Fault queue - * @wq: the fault workqueue - * @devices: devices attached to this queue - * @lock: protects the device list - */ -struct iopf_queue { - struct workqueue_struct *wq; - struct list_head devices; - struct mutex lock; -}; - -struct iopf_group { - struct iopf_fault last_fault; - struct list_head faults; - struct work_struct work; - struct device *dev; -}; +static void iopf_free_group(struct iopf_group *group) +{ + struct iopf_fault *iopf, *next; + + list_for_each_entry_safe(iopf, next, &group->faults, list) { + if (!(iopf->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) + kfree(iopf); + } + + kfree(group); +} static int iopf_complete_group(struct device *dev, struct iopf_fault *iopf, enum iommu_page_response_code status) @@ -50,9 +43,9 @@ static int iopf_complete_group(struct device *dev, struct iopf_fault *iopf, static void iopf_handler(struct work_struct *work) { + struct iopf_fault *iopf; struct iopf_group *group; struct iommu_domain *domain; - struct iopf_fault *iopf, *next; enum iommu_page_response_code status = IOMMU_PAGE_RESP_SUCCESS; group = container_of(work, struct iopf_group, work); @@ -61,7 +54,7 @@ static void iopf_handler(struct work_struct *work) if (!domain || !domain->iopf_handler) status = IOMMU_PAGE_RESP_INVALID; - list_for_each_entry_safe(iopf, next, &group->faults, list) { + list_for_each_entry(iopf, &group->faults, list) { /* * For the moment, errors are sticky: don't handle subsequent * faults in the group if there is an error. @@ -69,14 +62,10 @@ static void iopf_handler(struct work_struct *work) if (status == IOMMU_PAGE_RESP_SUCCESS) status = domain->iopf_handler(&iopf->fault, domain->fault_data); - - if (!(iopf->fault.prm.flags & - IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) - kfree(iopf); } iopf_complete_group(group->dev, &group->last_fault, status); - kfree(group); + iopf_free_group(group); } /** diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 2320548a90f8..c9d4f175f121 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -41,7 +41,6 @@ struct iommu_dirty_ops; struct notifier_block; struct iommu_sva; struct iommu_dma_cookie; -struct iopf_queue; #define IOMMU_FAULT_PERM_READ (1 << 0) /* read */ #define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */ @@ -126,6 +125,25 @@ struct iopf_fault { struct list_head list; }; +struct iopf_group { + struct iopf_fault last_fault; + struct list_head faults; + struct work_struct work; + struct device *dev; +}; + +/** + * struct iopf_queue - IO Page Fault queue + * @wq: the fault workqueue + * @devices: devices attached to this queue + * @lock: protects the device list + */ +struct iopf_queue { + struct workqueue_struct *wq; + struct list_head devices; + struct mutex lock; +}; + /* iommu fault flags */ #define IOMMU_FAULT_READ 0x0 #define IOMMU_FAULT_WRITE 0x1 -- cgit v1.2.3 From 351ffcb11ca0ff64e399982e279cfa131e7cb1aa Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 12 Feb 2024 09:22:20 +0800 Subject: iommu: Make iommu_queue_iopf() more generic Make iommu_queue_iopf() more generic by making the iopf_group a minimal set of iopf's that an iopf handler of domain should handle and respond to. Add domain parameter to struct iopf_group so that the handler can retrieve and use it directly. Change iommu_queue_iopf() to forward groups of iopf's to the domain's iopf handler. This is also a necessary step to decouple the sva iopf handling code from this interface. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Reviewed-by: Yi Liu Tested-by: Yan Zhao Tested-by: Longfang Liu Link: https://lore.kernel.org/r/20240212012227.119381-10-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/io-pgfault.c | 68 ++++++++++++++++++++++++++++++++++++++-------- drivers/iommu/iommu-sva.c | 3 +- drivers/iommu/iommu-sva.h | 6 ++-- include/linux/iommu.h | 4 +-- 4 files changed, 61 insertions(+), 20 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index c7e6bbed5c05..13cd0929e766 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -13,6 +13,9 @@ #include "iommu-sva.h" +enum iommu_page_response_code +iommu_sva_handle_mm(struct iommu_fault *fault, struct mm_struct *mm); + static void iopf_free_group(struct iopf_group *group) { struct iopf_fault *iopf, *next; @@ -45,29 +48,48 @@ static void iopf_handler(struct work_struct *work) { struct iopf_fault *iopf; struct iopf_group *group; - struct iommu_domain *domain; enum iommu_page_response_code status = IOMMU_PAGE_RESP_SUCCESS; group = container_of(work, struct iopf_group, work); - domain = iommu_get_domain_for_dev_pasid(group->dev, - group->last_fault.fault.prm.pasid, 0); - if (!domain || !domain->iopf_handler) - status = IOMMU_PAGE_RESP_INVALID; - list_for_each_entry(iopf, &group->faults, list) { /* * For the moment, errors are sticky: don't handle subsequent * faults in the group if there is an error. */ - if (status == IOMMU_PAGE_RESP_SUCCESS) - status = domain->iopf_handler(&iopf->fault, - domain->fault_data); + if (status != IOMMU_PAGE_RESP_SUCCESS) + break; + + status = iommu_sva_handle_mm(&iopf->fault, group->domain->mm); } iopf_complete_group(group->dev, &group->last_fault, status); iopf_free_group(group); } +static struct iommu_domain *get_domain_for_iopf(struct device *dev, + struct iommu_fault *fault) +{ + struct iommu_domain *domain; + + if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) { + domain = iommu_get_domain_for_dev_pasid(dev, fault->prm.pasid, 0); + if (IS_ERR(domain)) + domain = NULL; + } else { + domain = iommu_get_domain_for_dev(dev); + } + + if (!domain || !domain->iopf_handler) { + dev_warn_ratelimited(dev, + "iopf (pasid %d) without domain attached or handler installed\n", + fault->prm.pasid); + + return NULL; + } + + return domain; +} + /** * iommu_queue_iopf - IO Page Fault handler * @fault: fault event @@ -112,6 +134,7 @@ int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev) { int ret; struct iopf_group *group; + struct iommu_domain *domain; struct iopf_fault *iopf, *next; struct iommu_fault_param *iopf_param; struct dev_iommu *param = dev->iommu; @@ -143,6 +166,12 @@ int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev) return 0; } + domain = get_domain_for_iopf(dev, fault); + if (!domain) { + ret = -EINVAL; + goto cleanup_partial; + } + group = kzalloc(sizeof(*group), GFP_KERNEL); if (!group) { /* @@ -157,8 +186,8 @@ int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev) group->dev = dev; group->last_fault.fault = *fault; INIT_LIST_HEAD(&group->faults); + group->domain = domain; list_add(&group->last_fault.list, &group->faults); - INIT_WORK(&group->work, iopf_handler); /* See if we have partial faults for this group */ list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) { @@ -167,9 +196,13 @@ int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev) list_move(&iopf->list, &group->faults); } - queue_work(iopf_param->queue->wq, &group->work); - return 0; + mutex_unlock(&iopf_param->lock); + ret = domain->iopf_handler(group); + mutex_lock(&iopf_param->lock); + if (ret) + iopf_free_group(group); + return ret; cleanup_partial: list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) { if (iopf->fault.prm.grpid == fault->prm.grpid) { @@ -181,6 +214,17 @@ cleanup_partial: } EXPORT_SYMBOL_GPL(iommu_queue_iopf); +int iommu_sva_handle_iopf(struct iopf_group *group) +{ + struct iommu_fault_param *fault_param = group->dev->iommu->fault_param; + + INIT_WORK(&group->work, iopf_handler); + if (!queue_work(fault_param->queue->wq, &group->work)) + return -EBUSY; + + return 0; +} + /** * iopf_queue_flush_dev - Ensure that all queued faults have been processed * @dev: the endpoint whose faults need to be flushed. diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index c3fc9201d0be..fcae7308fcb7 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -163,11 +163,10 @@ EXPORT_SYMBOL_GPL(iommu_sva_get_pasid); * I/O page fault handler for SVA */ enum iommu_page_response_code -iommu_sva_handle_iopf(struct iommu_fault *fault, void *data) +iommu_sva_handle_mm(struct iommu_fault *fault, struct mm_struct *mm) { vm_fault_t ret; struct vm_area_struct *vma; - struct mm_struct *mm = data; unsigned int access_flags = 0; unsigned int fault_flags = FAULT_FLAG_REMOTE; struct iommu_fault_page_request *prm = &fault->prm; diff --git a/drivers/iommu/iommu-sva.h b/drivers/iommu/iommu-sva.h index de7819c796ce..27c8da115b41 100644 --- a/drivers/iommu/iommu-sva.h +++ b/drivers/iommu/iommu-sva.h @@ -22,8 +22,7 @@ int iopf_queue_flush_dev(struct device *dev); struct iopf_queue *iopf_queue_alloc(const char *name); void iopf_queue_free(struct iopf_queue *queue); int iopf_queue_discard_partial(struct iopf_queue *queue); -enum iommu_page_response_code -iommu_sva_handle_iopf(struct iommu_fault *fault, void *data); +int iommu_sva_handle_iopf(struct iopf_group *group); #else /* CONFIG_IOMMU_SVA */ static inline int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev) @@ -62,8 +61,7 @@ static inline int iopf_queue_discard_partial(struct iopf_queue *queue) return -ENODEV; } -static inline enum iommu_page_response_code -iommu_sva_handle_iopf(struct iommu_fault *fault, void *data) +static inline int iommu_sva_handle_iopf(struct iopf_group *group) { return IOMMU_PAGE_RESP_INVALID; } diff --git a/include/linux/iommu.h b/include/linux/iommu.h index c9d4f175f121..791f183a988e 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -130,6 +130,7 @@ struct iopf_group { struct list_head faults; struct work_struct work; struct device *dev; + struct iommu_domain *domain; }; /** @@ -209,8 +210,7 @@ struct iommu_domain { unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */ struct iommu_domain_geometry geometry; struct iommu_dma_cookie *iova_cookie; - enum iommu_page_response_code (*iopf_handler)(struct iommu_fault *fault, - void *data); + int (*iopf_handler)(struct iopf_group *group); void *fault_data; union { struct { -- cgit v1.2.3 From 17c51a0ea36b800e7a5998a92d83016c82935dff Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 12 Feb 2024 09:22:21 +0800 Subject: iommu: Separate SVA and IOPF Add CONFIG_IOMMU_IOPF for page fault handling framework and select it from its real consumer. Move iopf function declaration from iommu-sva.h to iommu.h and remove iommu-sva.h as it's empty now. Consolidate all SVA related code into iommu-sva.c: - Move iommu_sva_domain_alloc() from iommu.c to iommu-sva.c. - Move sva iopf handling code from io-pgfault.c to iommu-sva.c. Consolidate iommu_report_device_fault() and iommu_page_response() into io-pgfault.c. Export iopf_free_group() and iopf_group_response() for iopf handlers implemented in modules. Some functions are renamed with more meaningful names. No other intentional functionality changes. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Tested-by: Yan Zhao Tested-by: Longfang Liu Link: https://lore.kernel.org/r/20240212012227.119381-11-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/Kconfig | 4 + drivers/iommu/Makefile | 3 +- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 1 - drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 1 - drivers/iommu/intel/Kconfig | 1 + drivers/iommu/intel/iommu.c | 1 - drivers/iommu/intel/svm.c | 1 - drivers/iommu/io-pgfault.c | 188 +++++++++++++++++------- drivers/iommu/iommu-sva.c | 68 ++++++++- drivers/iommu/iommu-sva.h | 69 --------- drivers/iommu/iommu.c | 133 ----------------- include/linux/iommu.h | 98 +++++++++--- 12 files changed, 277 insertions(+), 291 deletions(-) delete mode 100644 drivers/iommu/iommu-sva.h (limited to 'drivers') diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 9a29d742617e..d9ed5ad129f2 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -163,6 +163,9 @@ config IOMMU_SVA select IOMMU_MM_DATA bool +config IOMMU_IOPF + bool + config FSL_PAMU bool "Freescale IOMMU support" depends on PCI @@ -398,6 +401,7 @@ config ARM_SMMU_V3_SVA bool "Shared Virtual Addressing support for the ARM SMMUv3" depends on ARM_SMMU_V3 select IOMMU_SVA + select IOMMU_IOPF select MMU_NOTIFIER help Support for sharing process address spaces with devices using the diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 95ad9dbfbda0..542760d963ec 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -26,6 +26,7 @@ obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o obj-$(CONFIG_S390_IOMMU) += s390-iommu.o obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o -obj-$(CONFIG_IOMMU_SVA) += iommu-sva.o io-pgfault.o +obj-$(CONFIG_IOMMU_SVA) += iommu-sva.o +obj-$(CONFIG_IOMMU_IOPF) += io-pgfault.o obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o obj-$(CONFIG_APPLE_DART) += apple-dart.o diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index ab2b0a5e4369..6513a98fcb72 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -10,7 +10,6 @@ #include #include "arm-smmu-v3.h" -#include "../../iommu-sva.h" #include "../../io-pgtable-arm.h" struct arm_smmu_mmu_notifier { diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index ab4f04c7f932..4e93e845458c 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -29,7 +29,6 @@ #include "arm-smmu-v3.h" #include "../../dma-iommu.h" -#include "../../iommu-sva.h" static bool disable_bypass = true; module_param(disable_bypass, bool, 0444); diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index 012cd2541a68..a4a125666293 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -51,6 +51,7 @@ config INTEL_IOMMU_SVM depends on X86_64 select MMU_NOTIFIER select IOMMU_SVA + select IOMMU_IOPF help Shared Virtual Memory (SVM) provides a facility for devices to access DMA resources through process address space by diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index df6ceefc09ee..29a12f289e2e 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -27,7 +27,6 @@ #include "iommu.h" #include "../dma-iommu.h" #include "../irq_remapping.h" -#include "../iommu-sva.h" #include "pasid.h" #include "cap_audit.h" #include "perfmon.h" diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 9751f037e188..e1cbcb9515f0 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -22,7 +22,6 @@ #include "iommu.h" #include "pasid.h" #include "perf.h" -#include "../iommu-sva.h" #include "trace.h" static irqreturn_t prq_event_thread(int irq, void *d); diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index 13cd0929e766..c1e88da973ce 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -11,12 +11,9 @@ #include #include -#include "iommu-sva.h" +#include "iommu-priv.h" -enum iommu_page_response_code -iommu_sva_handle_mm(struct iommu_fault *fault, struct mm_struct *mm); - -static void iopf_free_group(struct iopf_group *group) +void iopf_free_group(struct iopf_group *group) { struct iopf_fault *iopf, *next; @@ -27,44 +24,7 @@ static void iopf_free_group(struct iopf_group *group) kfree(group); } - -static int iopf_complete_group(struct device *dev, struct iopf_fault *iopf, - enum iommu_page_response_code status) -{ - struct iommu_page_response resp = { - .pasid = iopf->fault.prm.pasid, - .grpid = iopf->fault.prm.grpid, - .code = status, - }; - - if ((iopf->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) && - (iopf->fault.prm.flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID)) - resp.flags = IOMMU_PAGE_RESP_PASID_VALID; - - return iommu_page_response(dev, &resp); -} - -static void iopf_handler(struct work_struct *work) -{ - struct iopf_fault *iopf; - struct iopf_group *group; - enum iommu_page_response_code status = IOMMU_PAGE_RESP_SUCCESS; - - group = container_of(work, struct iopf_group, work); - list_for_each_entry(iopf, &group->faults, list) { - /* - * For the moment, errors are sticky: don't handle subsequent - * faults in the group if there is an error. - */ - if (status != IOMMU_PAGE_RESP_SUCCESS) - break; - - status = iommu_sva_handle_mm(&iopf->fault, group->domain->mm); - } - - iopf_complete_group(group->dev, &group->last_fault, status); - iopf_free_group(group); -} +EXPORT_SYMBOL_GPL(iopf_free_group); static struct iommu_domain *get_domain_for_iopf(struct device *dev, struct iommu_fault *fault) @@ -91,7 +51,7 @@ static struct iommu_domain *get_domain_for_iopf(struct device *dev, } /** - * iommu_queue_iopf - IO Page Fault handler + * iommu_handle_iopf - IO Page Fault handler * @fault: fault event * @dev: struct device. * @@ -130,7 +90,7 @@ static struct iommu_domain *get_domain_for_iopf(struct device *dev, * * Return: 0 on success and <0 on error. */ -int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev) +static int iommu_handle_iopf(struct iommu_fault *fault, struct device *dev) { int ret; struct iopf_group *group; @@ -212,18 +172,117 @@ cleanup_partial: } return ret; } -EXPORT_SYMBOL_GPL(iommu_queue_iopf); -int iommu_sva_handle_iopf(struct iopf_group *group) +/** + * iommu_report_device_fault() - Report fault event to device driver + * @dev: the device + * @evt: fault event data + * + * Called by IOMMU drivers when a fault is detected, typically in a threaded IRQ + * handler. When this function fails and the fault is recoverable, it is the + * caller's responsibility to complete the fault. + * + * Return 0 on success, or an error. + */ +int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) { - struct iommu_fault_param *fault_param = group->dev->iommu->fault_param; + struct dev_iommu *param = dev->iommu; + struct iopf_fault *evt_pending = NULL; + struct iommu_fault_param *fparam; + int ret = 0; - INIT_WORK(&group->work, iopf_handler); - if (!queue_work(fault_param->queue->wq, &group->work)) - return -EBUSY; + if (!param || !evt) + return -EINVAL; - return 0; + /* we only report device fault if there is a handler registered */ + mutex_lock(¶m->lock); + fparam = param->fault_param; + + if (evt->fault.type == IOMMU_FAULT_PAGE_REQ && + (evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { + evt_pending = kmemdup(evt, sizeof(struct iopf_fault), + GFP_KERNEL); + if (!evt_pending) { + ret = -ENOMEM; + goto done_unlock; + } + mutex_lock(&fparam->lock); + list_add_tail(&evt_pending->list, &fparam->faults); + mutex_unlock(&fparam->lock); + } + + ret = iommu_handle_iopf(&evt->fault, dev); + if (ret && evt_pending) { + mutex_lock(&fparam->lock); + list_del(&evt_pending->list); + mutex_unlock(&fparam->lock); + kfree(evt_pending); + } +done_unlock: + mutex_unlock(¶m->lock); + return ret; +} +EXPORT_SYMBOL_GPL(iommu_report_device_fault); + +int iommu_page_response(struct device *dev, + struct iommu_page_response *msg) +{ + bool needs_pasid; + int ret = -EINVAL; + struct iopf_fault *evt; + struct iommu_fault_page_request *prm; + struct dev_iommu *param = dev->iommu; + const struct iommu_ops *ops = dev_iommu_ops(dev); + bool has_pasid = msg->flags & IOMMU_PAGE_RESP_PASID_VALID; + + if (!ops->page_response) + return -ENODEV; + + if (!param || !param->fault_param) + return -EINVAL; + + /* Only send response if there is a fault report pending */ + mutex_lock(¶m->fault_param->lock); + if (list_empty(¶m->fault_param->faults)) { + dev_warn_ratelimited(dev, "no pending PRQ, drop response\n"); + goto done_unlock; + } + /* + * Check if we have a matching page request pending to respond, + * otherwise return -EINVAL + */ + list_for_each_entry(evt, ¶m->fault_param->faults, list) { + prm = &evt->fault.prm; + if (prm->grpid != msg->grpid) + continue; + + /* + * If the PASID is required, the corresponding request is + * matched using the group ID, the PASID valid bit and the PASID + * value. Otherwise only the group ID matches request and + * response. + */ + needs_pasid = prm->flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID; + if (needs_pasid && (!has_pasid || msg->pasid != prm->pasid)) + continue; + + if (!needs_pasid && has_pasid) { + /* No big deal, just clear it. */ + msg->flags &= ~IOMMU_PAGE_RESP_PASID_VALID; + msg->pasid = 0; + } + + ret = ops->page_response(dev, evt, msg); + list_del(&evt->list); + kfree(evt); + break; + } + +done_unlock: + mutex_unlock(¶m->fault_param->lock); + return ret; } +EXPORT_SYMBOL_GPL(iommu_page_response); /** * iopf_queue_flush_dev - Ensure that all queued faults have been processed @@ -258,6 +317,31 @@ int iopf_queue_flush_dev(struct device *dev) } EXPORT_SYMBOL_GPL(iopf_queue_flush_dev); +/** + * iopf_group_response - Respond a group of page faults + * @group: the group of faults with the same group id + * @status: the response code + * + * Return 0 on success and <0 on error. + */ +int iopf_group_response(struct iopf_group *group, + enum iommu_page_response_code status) +{ + struct iopf_fault *iopf = &group->last_fault; + struct iommu_page_response resp = { + .pasid = iopf->fault.prm.pasid, + .grpid = iopf->fault.prm.grpid, + .code = status, + }; + + if ((iopf->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) && + (iopf->fault.prm.flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID)) + resp.flags = IOMMU_PAGE_RESP_PASID_VALID; + + return iommu_page_response(group->dev, &resp); +} +EXPORT_SYMBOL_GPL(iopf_group_response); + /** * iopf_queue_discard_partial - Remove all pending partial fault * @queue: the queue whose partial faults need to be discarded diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index fcae7308fcb7..9de878e40413 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -7,7 +7,7 @@ #include #include -#include "iommu-sva.h" +#include "iommu-priv.h" static DEFINE_MUTEX(iommu_sva_lock); @@ -159,10 +159,21 @@ u32 iommu_sva_get_pasid(struct iommu_sva *handle) } EXPORT_SYMBOL_GPL(iommu_sva_get_pasid); +void mm_pasid_drop(struct mm_struct *mm) +{ + struct iommu_mm_data *iommu_mm = mm->iommu_mm; + + if (!iommu_mm) + return; + + iommu_free_global_pasid(iommu_mm->pasid); + kfree(iommu_mm); +} + /* * I/O page fault handler for SVA */ -enum iommu_page_response_code +static enum iommu_page_response_code iommu_sva_handle_mm(struct iommu_fault *fault, struct mm_struct *mm) { vm_fault_t ret; @@ -216,13 +227,54 @@ out_put_mm: return status; } -void mm_pasid_drop(struct mm_struct *mm) +static void iommu_sva_handle_iopf(struct work_struct *work) { - struct iommu_mm_data *iommu_mm = mm->iommu_mm; + struct iopf_fault *iopf; + struct iopf_group *group; + enum iommu_page_response_code status = IOMMU_PAGE_RESP_SUCCESS; + + group = container_of(work, struct iopf_group, work); + list_for_each_entry(iopf, &group->faults, list) { + /* + * For the moment, errors are sticky: don't handle subsequent + * faults in the group if there is an error. + */ + if (status != IOMMU_PAGE_RESP_SUCCESS) + break; + + status = iommu_sva_handle_mm(&iopf->fault, group->domain->mm); + } - if (!iommu_mm) - return; + iopf_group_response(group, status); + iopf_free_group(group); +} - iommu_free_global_pasid(iommu_mm->pasid); - kfree(iommu_mm); +static int iommu_sva_iopf_handler(struct iopf_group *group) +{ + struct iommu_fault_param *fault_param = group->dev->iommu->fault_param; + + INIT_WORK(&group->work, iommu_sva_handle_iopf); + if (!queue_work(fault_param->queue->wq, &group->work)) + return -EBUSY; + + return 0; +} + +struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, + struct mm_struct *mm) +{ + const struct iommu_ops *ops = dev_iommu_ops(dev); + struct iommu_domain *domain; + + domain = ops->domain_alloc(IOMMU_DOMAIN_SVA); + if (!domain) + return NULL; + + domain->type = IOMMU_DOMAIN_SVA; + mmgrab(mm); + domain->mm = mm; + domain->owner = ops; + domain->iopf_handler = iommu_sva_iopf_handler; + + return domain; } diff --git a/drivers/iommu/iommu-sva.h b/drivers/iommu/iommu-sva.h deleted file mode 100644 index 27c8da115b41..000000000000 --- a/drivers/iommu/iommu-sva.h +++ /dev/null @@ -1,69 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * SVA library for IOMMU drivers - */ -#ifndef _IOMMU_SVA_H -#define _IOMMU_SVA_H - -#include - -/* I/O Page fault */ -struct device; -struct iommu_fault; -struct iopf_queue; - -#ifdef CONFIG_IOMMU_SVA -int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev); - -int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev); -int iopf_queue_remove_device(struct iopf_queue *queue, - struct device *dev); -int iopf_queue_flush_dev(struct device *dev); -struct iopf_queue *iopf_queue_alloc(const char *name); -void iopf_queue_free(struct iopf_queue *queue); -int iopf_queue_discard_partial(struct iopf_queue *queue); -int iommu_sva_handle_iopf(struct iopf_group *group); - -#else /* CONFIG_IOMMU_SVA */ -static inline int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev) -{ - return -ENODEV; -} - -static inline int iopf_queue_add_device(struct iopf_queue *queue, - struct device *dev) -{ - return -ENODEV; -} - -static inline int iopf_queue_remove_device(struct iopf_queue *queue, - struct device *dev) -{ - return -ENODEV; -} - -static inline int iopf_queue_flush_dev(struct device *dev) -{ - return -ENODEV; -} - -static inline struct iopf_queue *iopf_queue_alloc(const char *name) -{ - return NULL; -} - -static inline void iopf_queue_free(struct iopf_queue *queue) -{ -} - -static inline int iopf_queue_discard_partial(struct iopf_queue *queue) -{ - return -ENODEV; -} - -static inline int iommu_sva_handle_iopf(struct iopf_group *group) -{ - return IOMMU_PAGE_RESP_INVALID; -} -#endif /* CONFIG_IOMMU_SVA */ -#endif /* _IOMMU_SVA_H */ diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 15d7b2914780..35dac3e4629a 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -36,8 +36,6 @@ #include "dma-iommu.h" #include "iommu-priv.h" -#include "iommu-sva.h" - static struct kset *iommu_group_kset; static DEFINE_IDA(iommu_group_ida); static DEFINE_IDA(iommu_global_pasid_ida); @@ -1330,117 +1328,6 @@ void iommu_group_put(struct iommu_group *group) } EXPORT_SYMBOL_GPL(iommu_group_put); -/** - * iommu_report_device_fault() - Report fault event to device driver - * @dev: the device - * @evt: fault event data - * - * Called by IOMMU drivers when a fault is detected, typically in a threaded IRQ - * handler. When this function fails and the fault is recoverable, it is the - * caller's responsibility to complete the fault. - * - * Return 0 on success, or an error. - */ -int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) -{ - struct dev_iommu *param = dev->iommu; - struct iopf_fault *evt_pending = NULL; - struct iommu_fault_param *fparam; - int ret = 0; - - if (!param || !evt) - return -EINVAL; - - /* we only report device fault if there is a handler registered */ - mutex_lock(¶m->lock); - fparam = param->fault_param; - - if (evt->fault.type == IOMMU_FAULT_PAGE_REQ && - (evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { - evt_pending = kmemdup(evt, sizeof(struct iopf_fault), - GFP_KERNEL); - if (!evt_pending) { - ret = -ENOMEM; - goto done_unlock; - } - mutex_lock(&fparam->lock); - list_add_tail(&evt_pending->list, &fparam->faults); - mutex_unlock(&fparam->lock); - } - - ret = iommu_queue_iopf(&evt->fault, dev); - if (ret && evt_pending) { - mutex_lock(&fparam->lock); - list_del(&evt_pending->list); - mutex_unlock(&fparam->lock); - kfree(evt_pending); - } -done_unlock: - mutex_unlock(¶m->lock); - return ret; -} -EXPORT_SYMBOL_GPL(iommu_report_device_fault); - -int iommu_page_response(struct device *dev, - struct iommu_page_response *msg) -{ - bool needs_pasid; - int ret = -EINVAL; - struct iopf_fault *evt; - struct iommu_fault_page_request *prm; - struct dev_iommu *param = dev->iommu; - const struct iommu_ops *ops = dev_iommu_ops(dev); - bool has_pasid = msg->flags & IOMMU_PAGE_RESP_PASID_VALID; - - if (!ops->page_response) - return -ENODEV; - - if (!param || !param->fault_param) - return -EINVAL; - - /* Only send response if there is a fault report pending */ - mutex_lock(¶m->fault_param->lock); - if (list_empty(¶m->fault_param->faults)) { - dev_warn_ratelimited(dev, "no pending PRQ, drop response\n"); - goto done_unlock; - } - /* - * Check if we have a matching page request pending to respond, - * otherwise return -EINVAL - */ - list_for_each_entry(evt, ¶m->fault_param->faults, list) { - prm = &evt->fault.prm; - if (prm->grpid != msg->grpid) - continue; - - /* - * If the PASID is required, the corresponding request is - * matched using the group ID, the PASID valid bit and the PASID - * value. Otherwise only the group ID matches request and - * response. - */ - needs_pasid = prm->flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID; - if (needs_pasid && (!has_pasid || msg->pasid != prm->pasid)) - continue; - - if (!needs_pasid && has_pasid) { - /* No big deal, just clear it. */ - msg->flags &= ~IOMMU_PAGE_RESP_PASID_VALID; - msg->pasid = 0; - } - - ret = ops->page_response(dev, evt, msg); - list_del(&evt->list); - kfree(evt); - break; - } - -done_unlock: - mutex_unlock(¶m->fault_param->lock); - return ret; -} -EXPORT_SYMBOL_GPL(iommu_page_response); - /** * iommu_group_id - Return ID for a group * @group: the group to ID @@ -3523,26 +3410,6 @@ struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev, } EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev_pasid); -struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, - struct mm_struct *mm) -{ - const struct iommu_ops *ops = dev_iommu_ops(dev); - struct iommu_domain *domain; - - domain = ops->domain_alloc(IOMMU_DOMAIN_SVA); - if (!domain) - return NULL; - - domain->type = IOMMU_DOMAIN_SVA; - mmgrab(mm); - domain->mm = mm; - domain->owner = ops; - domain->iopf_handler = iommu_sva_handle_iopf; - domain->fault_data = mm; - - return domain; -} - ioasid_t iommu_alloc_global_pasid(struct device *dev) { int ret; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 791f183a988e..fc912aed7886 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -810,10 +810,6 @@ extern struct iommu_group *iommu_group_get(struct device *dev); extern struct iommu_group *iommu_group_ref_get(struct iommu_group *group); extern void iommu_group_put(struct iommu_group *group); -extern int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt); -extern int iommu_page_response(struct device *dev, - struct iommu_page_response *msg); - extern int iommu_group_id(struct iommu_group *group); extern struct iommu_domain *iommu_group_default_domain(struct iommu_group *); @@ -1029,8 +1025,6 @@ bool iommu_group_dma_owner_claimed(struct iommu_group *group); int iommu_device_claim_dma_owner(struct device *dev, void *owner); void iommu_device_release_dma_owner(struct device *dev); -struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, - struct mm_struct *mm); int iommu_attach_device_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid); void iommu_detach_device_pasid(struct iommu_domain *domain, @@ -1219,18 +1213,6 @@ static inline void iommu_group_put(struct iommu_group *group) { } -static inline -int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) -{ - return -ENODEV; -} - -static inline int iommu_page_response(struct device *dev, - struct iommu_page_response *msg) -{ - return -ENODEV; -} - static inline int iommu_group_id(struct iommu_group *group) { return -ENODEV; @@ -1379,12 +1361,6 @@ static inline int iommu_device_claim_dma_owner(struct device *dev, void *owner) return -ENODEV; } -static inline struct iommu_domain * -iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm) -{ - return NULL; -} - static inline int iommu_attach_device_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid) { @@ -1524,6 +1500,8 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm); void iommu_sva_unbind_device(struct iommu_sva *handle); u32 iommu_sva_get_pasid(struct iommu_sva *handle); +struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, + struct mm_struct *mm); #else static inline struct iommu_sva * iommu_sva_bind_device(struct device *dev, struct mm_struct *mm) @@ -1548,6 +1526,78 @@ static inline u32 mm_get_enqcmd_pasid(struct mm_struct *mm) } static inline void mm_pasid_drop(struct mm_struct *mm) {} + +static inline struct iommu_domain * +iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm) +{ + return NULL; +} #endif /* CONFIG_IOMMU_SVA */ +#ifdef CONFIG_IOMMU_IOPF +int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev); +int iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev); +int iopf_queue_flush_dev(struct device *dev); +struct iopf_queue *iopf_queue_alloc(const char *name); +void iopf_queue_free(struct iopf_queue *queue); +int iopf_queue_discard_partial(struct iopf_queue *queue); +void iopf_free_group(struct iopf_group *group); +int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt); +int iommu_page_response(struct device *dev, struct iommu_page_response *msg); +int iopf_group_response(struct iopf_group *group, + enum iommu_page_response_code status); +#else +static inline int +iopf_queue_add_device(struct iopf_queue *queue, struct device *dev) +{ + return -ENODEV; +} + +static inline int +iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev) +{ + return -ENODEV; +} + +static inline int iopf_queue_flush_dev(struct device *dev) +{ + return -ENODEV; +} + +static inline struct iopf_queue *iopf_queue_alloc(const char *name) +{ + return NULL; +} + +static inline void iopf_queue_free(struct iopf_queue *queue) +{ +} + +static inline int iopf_queue_discard_partial(struct iopf_queue *queue) +{ + return -ENODEV; +} + +static inline void iopf_free_group(struct iopf_group *group) +{ +} + +static inline int +iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) +{ + return -ENODEV; +} + +static inline int +iommu_page_response(struct device *dev, struct iommu_page_response *msg) +{ + return -ENODEV; +} + +static inline int iopf_group_response(struct iopf_group *group, + enum iommu_page_response_code status) +{ + return -ENODEV; +} +#endif /* CONFIG_IOMMU_IOPF */ #endif /* __LINUX_IOMMU_H */ -- cgit v1.2.3 From cc7338e9d807e20e60e6720a62956f0e9d46f0f8 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 12 Feb 2024 09:22:22 +0800 Subject: iommu: Refine locking for per-device fault data management The per-device fault data is a data structure that is used to store information about faults that occur on a device. This data is allocated when IOPF is enabled on the device and freed when IOPF is disabled. The data is used in the paths of iopf reporting, handling, responding, and draining. The fault data is protected by two locks: - dev->iommu->lock: This lock is used to protect the allocation and freeing of the fault data. - dev->iommu->fault_parameter->lock: This lock is used to protect the fault data itself. Apply the locking mechanism to the fault reporting and responding paths. The fault_parameter->lock is also added in iopf_queue_discard_partial(). It does not fix any real issue, as iopf_queue_discard_partial() is only used in the VT-d driver's prq_event_thread(), which is a single-threaded path that reports the IOPFs. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Tested-by: Yan Zhao Tested-by: Longfang Liu Link: https://lore.kernel.org/r/20240212012227.119381-12-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/io-pgfault.c | 61 +++++++++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 31 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index c1e88da973ce..5aea8402be47 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -53,7 +53,7 @@ static struct iommu_domain *get_domain_for_iopf(struct device *dev, /** * iommu_handle_iopf - IO Page Fault handler * @fault: fault event - * @dev: struct device. + * @iopf_param: the fault parameter of the device. * * Add a fault to the device workqueue, to be handled by mm. * @@ -90,29 +90,21 @@ static struct iommu_domain *get_domain_for_iopf(struct device *dev, * * Return: 0 on success and <0 on error. */ -static int iommu_handle_iopf(struct iommu_fault *fault, struct device *dev) +static int iommu_handle_iopf(struct iommu_fault *fault, + struct iommu_fault_param *iopf_param) { int ret; struct iopf_group *group; struct iommu_domain *domain; struct iopf_fault *iopf, *next; - struct iommu_fault_param *iopf_param; - struct dev_iommu *param = dev->iommu; + struct device *dev = iopf_param->dev; - lockdep_assert_held(¶m->lock); + lockdep_assert_held(&iopf_param->lock); if (fault->type != IOMMU_FAULT_PAGE_REQ) /* Not a recoverable page fault */ return -EOPNOTSUPP; - /* - * As long as we're holding param->lock, the queue can't be unlinked - * from the device and therefore cannot disappear. - */ - iopf_param = param->fault_param; - if (!iopf_param) - return -ENODEV; - if (!(fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { iopf = kzalloc(sizeof(*iopf), GFP_KERNEL); if (!iopf) @@ -186,18 +178,19 @@ cleanup_partial: */ int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) { - struct dev_iommu *param = dev->iommu; + struct iommu_fault_param *fault_param; struct iopf_fault *evt_pending = NULL; - struct iommu_fault_param *fparam; + struct dev_iommu *param = dev->iommu; int ret = 0; - if (!param || !evt) - return -EINVAL; - - /* we only report device fault if there is a handler registered */ mutex_lock(¶m->lock); - fparam = param->fault_param; + fault_param = param->fault_param; + if (!fault_param) { + mutex_unlock(¶m->lock); + return -EINVAL; + } + mutex_lock(&fault_param->lock); if (evt->fault.type == IOMMU_FAULT_PAGE_REQ && (evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { evt_pending = kmemdup(evt, sizeof(struct iopf_fault), @@ -206,20 +199,18 @@ int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) ret = -ENOMEM; goto done_unlock; } - mutex_lock(&fparam->lock); - list_add_tail(&evt_pending->list, &fparam->faults); - mutex_unlock(&fparam->lock); + list_add_tail(&evt_pending->list, &fault_param->faults); } - ret = iommu_handle_iopf(&evt->fault, dev); + ret = iommu_handle_iopf(&evt->fault, fault_param); if (ret && evt_pending) { - mutex_lock(&fparam->lock); list_del(&evt_pending->list); - mutex_unlock(&fparam->lock); kfree(evt_pending); } done_unlock: + mutex_unlock(&fault_param->lock); mutex_unlock(¶m->lock); + return ret; } EXPORT_SYMBOL_GPL(iommu_report_device_fault); @@ -232,18 +223,23 @@ int iommu_page_response(struct device *dev, struct iopf_fault *evt; struct iommu_fault_page_request *prm; struct dev_iommu *param = dev->iommu; + struct iommu_fault_param *fault_param; const struct iommu_ops *ops = dev_iommu_ops(dev); bool has_pasid = msg->flags & IOMMU_PAGE_RESP_PASID_VALID; if (!ops->page_response) return -ENODEV; - if (!param || !param->fault_param) + mutex_lock(¶m->lock); + fault_param = param->fault_param; + if (!fault_param) { + mutex_unlock(¶m->lock); return -EINVAL; + } /* Only send response if there is a fault report pending */ - mutex_lock(¶m->fault_param->lock); - if (list_empty(¶m->fault_param->faults)) { + mutex_lock(&fault_param->lock); + if (list_empty(&fault_param->faults)) { dev_warn_ratelimited(dev, "no pending PRQ, drop response\n"); goto done_unlock; } @@ -251,7 +247,7 @@ int iommu_page_response(struct device *dev, * Check if we have a matching page request pending to respond, * otherwise return -EINVAL */ - list_for_each_entry(evt, ¶m->fault_param->faults, list) { + list_for_each_entry(evt, &fault_param->faults, list) { prm = &evt->fault.prm; if (prm->grpid != msg->grpid) continue; @@ -279,7 +275,8 @@ int iommu_page_response(struct device *dev, } done_unlock: - mutex_unlock(¶m->fault_param->lock); + mutex_unlock(&fault_param->lock); + mutex_unlock(¶m->lock); return ret; } EXPORT_SYMBOL_GPL(iommu_page_response); @@ -362,11 +359,13 @@ int iopf_queue_discard_partial(struct iopf_queue *queue) mutex_lock(&queue->lock); list_for_each_entry(iopf_param, &queue->devices, queue_list) { + mutex_lock(&iopf_param->lock); list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) { list_del(&iopf->list); kfree(iopf); } + mutex_unlock(&iopf_param->lock); } mutex_unlock(&queue->lock); return 0; -- cgit v1.2.3 From a74c077b9021b36c785095c571336e5b204d3c2d Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 12 Feb 2024 09:22:23 +0800 Subject: iommu: Use refcount for fault data access The per-device fault data structure stores information about faults occurring on a device. Its lifetime spans from IOPF enablement to disablement. Multiple paths, including IOPF reporting, handling, and responding, may access it concurrently. Previously, a mutex protected the fault data from use after free. But this is not performance friendly due to the critical nature of IOPF handling paths. Refine this with a refcount-based approach. The fault data pointer is obtained within an RCU read region with a refcount. The fault data pointer is returned for usage only when the pointer is valid and a refcount is successfully obtained. The fault data is freed with kfree_rcu(), ensuring data is only freed after all RCU critical regions complete. An iopf handling work starts once an iopf group is created. The handling work continues until iommu_page_response() is called to respond to the iopf and the iopf group is freed. During this time, the device fault parameter should always be available. Add a pointer to the device fault parameter in the iopf_group structure and hold the reference until the iopf_group is freed. Make iommu_page_response() static as it is only used in io-pgfault.c. Co-developed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Tested-by: Yan Zhao Link: https://lore.kernel.org/r/20240212012227.119381-13-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/io-pgfault.c | 127 ++++++++++++++++++++++++++++----------------- drivers/iommu/iommu-sva.c | 2 +- include/linux/iommu.h | 17 +++--- 3 files changed, 88 insertions(+), 58 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index 5aea8402be47..ce7058892b59 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -13,6 +13,32 @@ #include "iommu-priv.h" +/* + * Return the fault parameter of a device if it exists. Otherwise, return NULL. + * On a successful return, the caller takes a reference of this parameter and + * should put it after use by calling iopf_put_dev_fault_param(). + */ +static struct iommu_fault_param *iopf_get_dev_fault_param(struct device *dev) +{ + struct dev_iommu *param = dev->iommu; + struct iommu_fault_param *fault_param; + + rcu_read_lock(); + fault_param = rcu_dereference(param->fault_param); + if (fault_param && !refcount_inc_not_zero(&fault_param->users)) + fault_param = NULL; + rcu_read_unlock(); + + return fault_param; +} + +/* Caller must hold a reference of the fault parameter. */ +static void iopf_put_dev_fault_param(struct iommu_fault_param *fault_param) +{ + if (refcount_dec_and_test(&fault_param->users)) + kfree_rcu(fault_param, rcu); +} + void iopf_free_group(struct iopf_group *group) { struct iopf_fault *iopf, *next; @@ -22,6 +48,8 @@ void iopf_free_group(struct iopf_group *group) kfree(iopf); } + /* Pair with iommu_report_device_fault(). */ + iopf_put_dev_fault_param(group->fault_param); kfree(group); } EXPORT_SYMBOL_GPL(iopf_free_group); @@ -135,7 +163,7 @@ static int iommu_handle_iopf(struct iommu_fault *fault, goto cleanup_partial; } - group->dev = dev; + group->fault_param = iopf_param; group->last_fault.fault = *fault; INIT_LIST_HEAD(&group->faults); group->domain = domain; @@ -178,64 +206,61 @@ cleanup_partial: */ int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) { + bool last_prq = evt->fault.type == IOMMU_FAULT_PAGE_REQ && + (evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE); struct iommu_fault_param *fault_param; - struct iopf_fault *evt_pending = NULL; - struct dev_iommu *param = dev->iommu; - int ret = 0; + struct iopf_fault *evt_pending; + int ret; - mutex_lock(¶m->lock); - fault_param = param->fault_param; - if (!fault_param) { - mutex_unlock(¶m->lock); + fault_param = iopf_get_dev_fault_param(dev); + if (!fault_param) return -EINVAL; - } mutex_lock(&fault_param->lock); - if (evt->fault.type == IOMMU_FAULT_PAGE_REQ && - (evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { + if (last_prq) { evt_pending = kmemdup(evt, sizeof(struct iopf_fault), GFP_KERNEL); if (!evt_pending) { ret = -ENOMEM; - goto done_unlock; + goto err_unlock; } list_add_tail(&evt_pending->list, &fault_param->faults); } ret = iommu_handle_iopf(&evt->fault, fault_param); - if (ret && evt_pending) { + if (ret) + goto err_free; + + mutex_unlock(&fault_param->lock); + /* The reference count of fault_param is now held by iopf_group. */ + if (!last_prq) + iopf_put_dev_fault_param(fault_param); + + return 0; +err_free: + if (last_prq) { list_del(&evt_pending->list); kfree(evt_pending); } -done_unlock: +err_unlock: mutex_unlock(&fault_param->lock); - mutex_unlock(¶m->lock); + iopf_put_dev_fault_param(fault_param); return ret; } EXPORT_SYMBOL_GPL(iommu_report_device_fault); -int iommu_page_response(struct device *dev, - struct iommu_page_response *msg) +static int iommu_page_response(struct iopf_group *group, + struct iommu_page_response *msg) { bool needs_pasid; int ret = -EINVAL; struct iopf_fault *evt; struct iommu_fault_page_request *prm; - struct dev_iommu *param = dev->iommu; - struct iommu_fault_param *fault_param; + struct device *dev = group->fault_param->dev; const struct iommu_ops *ops = dev_iommu_ops(dev); bool has_pasid = msg->flags & IOMMU_PAGE_RESP_PASID_VALID; - - if (!ops->page_response) - return -ENODEV; - - mutex_lock(¶m->lock); - fault_param = param->fault_param; - if (!fault_param) { - mutex_unlock(¶m->lock); - return -EINVAL; - } + struct iommu_fault_param *fault_param = group->fault_param; /* Only send response if there is a fault report pending */ mutex_lock(&fault_param->lock); @@ -276,10 +301,9 @@ int iommu_page_response(struct device *dev, done_unlock: mutex_unlock(&fault_param->lock); - mutex_unlock(¶m->lock); + return ret; } -EXPORT_SYMBOL_GPL(iommu_page_response); /** * iopf_queue_flush_dev - Ensure that all queued faults have been processed @@ -295,22 +319,20 @@ EXPORT_SYMBOL_GPL(iommu_page_response); */ int iopf_queue_flush_dev(struct device *dev) { - int ret = 0; struct iommu_fault_param *iopf_param; - struct dev_iommu *param = dev->iommu; - if (!param) + /* + * It's a driver bug to be here after iopf_queue_remove_device(). + * Therefore, it's safe to dereference the fault parameter without + * holding the lock. + */ + iopf_param = rcu_dereference_check(dev->iommu->fault_param, true); + if (WARN_ON(!iopf_param)) return -ENODEV; - mutex_lock(¶m->lock); - iopf_param = param->fault_param; - if (iopf_param) - flush_workqueue(iopf_param->queue->wq); - else - ret = -ENODEV; - mutex_unlock(¶m->lock); + flush_workqueue(iopf_param->queue->wq); - return ret; + return 0; } EXPORT_SYMBOL_GPL(iopf_queue_flush_dev); @@ -335,7 +357,7 @@ int iopf_group_response(struct iopf_group *group, (iopf->fault.prm.flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID)) resp.flags = IOMMU_PAGE_RESP_PASID_VALID; - return iommu_page_response(group->dev, &resp); + return iommu_page_response(group, &resp); } EXPORT_SYMBOL_GPL(iopf_group_response); @@ -384,10 +406,15 @@ int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev) int ret = 0; struct dev_iommu *param = dev->iommu; struct iommu_fault_param *fault_param; + const struct iommu_ops *ops = dev_iommu_ops(dev); + + if (!ops->page_response) + return -ENODEV; mutex_lock(&queue->lock); mutex_lock(¶m->lock); - if (param->fault_param) { + if (rcu_dereference_check(param->fault_param, + lockdep_is_held(¶m->lock))) { ret = -EBUSY; goto done_unlock; } @@ -402,10 +429,11 @@ int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev) INIT_LIST_HEAD(&fault_param->faults); INIT_LIST_HEAD(&fault_param->partial); fault_param->dev = dev; + refcount_set(&fault_param->users, 1); list_add(&fault_param->queue_list, &queue->devices); fault_param->queue = queue; - param->fault_param = fault_param; + rcu_assign_pointer(param->fault_param, fault_param); done_unlock: mutex_unlock(¶m->lock); @@ -429,10 +457,12 @@ int iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev) int ret = 0; struct iopf_fault *iopf, *next; struct dev_iommu *param = dev->iommu; - struct iommu_fault_param *fault_param = param->fault_param; + struct iommu_fault_param *fault_param; mutex_lock(&queue->lock); mutex_lock(¶m->lock); + fault_param = rcu_dereference_check(param->fault_param, + lockdep_is_held(¶m->lock)); if (!fault_param) { ret = -ENODEV; goto unlock; @@ -454,8 +484,9 @@ int iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev) list_for_each_entry_safe(iopf, next, &fault_param->partial, list) kfree(iopf); - param->fault_param = NULL; - kfree(fault_param); + /* dec the ref owned by iopf_queue_add_device() */ + rcu_assign_pointer(param->fault_param, NULL); + iopf_put_dev_fault_param(fault_param); unlock: mutex_unlock(¶m->lock); mutex_unlock(&queue->lock); diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index 9de878e40413..b51995b4fe90 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -251,7 +251,7 @@ static void iommu_sva_handle_iopf(struct work_struct *work) static int iommu_sva_iopf_handler(struct iopf_group *group) { - struct iommu_fault_param *fault_param = group->dev->iommu->fault_param; + struct iommu_fault_param *fault_param = group->fault_param; INIT_WORK(&group->work, iommu_sva_handle_iopf); if (!queue_work(fault_param->queue->wq, &group->work)) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index fc912aed7886..1e9161ae95da 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -41,6 +41,7 @@ struct iommu_dirty_ops; struct notifier_block; struct iommu_sva; struct iommu_dma_cookie; +struct iommu_fault_param; #define IOMMU_FAULT_PERM_READ (1 << 0) /* read */ #define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */ @@ -129,8 +130,9 @@ struct iopf_group { struct iopf_fault last_fault; struct list_head faults; struct work_struct work; - struct device *dev; struct iommu_domain *domain; + /* The device's fault data parameter. */ + struct iommu_fault_param *fault_param; }; /** @@ -679,6 +681,8 @@ struct iommu_device { /** * struct iommu_fault_param - per-device IOMMU fault data * @lock: protect pending faults list + * @users: user counter to manage the lifetime of the data + * @rcu: rcu head for kfree_rcu() * @dev: the device that owns this param * @queue: IOPF queue * @queue_list: index into queue->devices @@ -688,6 +692,8 @@ struct iommu_device { */ struct iommu_fault_param { struct mutex lock; + refcount_t users; + struct rcu_head rcu; struct device *dev; struct iopf_queue *queue; @@ -715,7 +721,7 @@ struct iommu_fault_param { */ struct dev_iommu { struct mutex lock; - struct iommu_fault_param *fault_param; + struct iommu_fault_param __rcu *fault_param; struct iommu_fwspec *fwspec; struct iommu_device *iommu_dev; void *priv; @@ -1543,7 +1549,6 @@ void iopf_queue_free(struct iopf_queue *queue); int iopf_queue_discard_partial(struct iopf_queue *queue); void iopf_free_group(struct iopf_group *group); int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt); -int iommu_page_response(struct device *dev, struct iommu_page_response *msg); int iopf_group_response(struct iopf_group *group, enum iommu_page_response_code status); #else @@ -1588,12 +1593,6 @@ iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) return -ENODEV; } -static inline int -iommu_page_response(struct device *dev, struct iommu_page_response *msg) -{ - return -ENODEV; -} - static inline int iopf_group_response(struct iopf_group *group, enum iommu_page_response_code status) { -- cgit v1.2.3 From 0095bf83554f8e7a681961656608101bdf40e9ef Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 12 Feb 2024 09:22:24 +0800 Subject: iommu: Improve iopf_queue_remove_device() Convert iopf_queue_remove_device() to return void instead of an error code, as the return value is never used. This removal helper is designed to be never-failed, so there's no need for error handling. Ack all outstanding page requests from the device with the response code of IOMMU_PAGE_RESP_INVALID, indicating device should not attempt any retry. Add comments to this helper explaining the steps involved in removing a device from the iopf queue and disabling its PRI. The individual drivers are expected to be adjusted accordingly. Here we just define the expected behaviors of the individual iommu driver from the core's perspective. Suggested-by: Jason Gunthorpe Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Tested-by: Yan Zhao Link: https://lore.kernel.org/r/20240212012227.119381-14-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 7 +----- drivers/iommu/io-pgfault.c | 57 +++++++++++++++++++++++++++++---------------- include/linux/iommu.h | 5 ++-- 3 files changed, 40 insertions(+), 29 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 29a12f289e2e..a81a2be9b870 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4455,12 +4455,7 @@ static int intel_iommu_disable_iopf(struct device *dev) */ pci_disable_pri(to_pci_dev(dev)); info->pri_enabled = 0; - - /* - * With PRI disabled and outstanding PRQs drained, removing device - * from iopf queue should never fail. - */ - WARN_ON(iopf_queue_remove_device(iommu->iopf_queue, dev)); + iopf_queue_remove_device(iommu->iopf_queue, dev); return 0; } diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index ce7058892b59..ece09552e5cf 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -448,41 +448,60 @@ EXPORT_SYMBOL_GPL(iopf_queue_add_device); * @queue: IOPF queue * @dev: device to remove * - * Caller makes sure that no more faults are reported for this device. + * Removing a device from an iopf_queue. It's recommended to follow these + * steps when removing a device: * - * Return: 0 on success and <0 on error. + * - Disable new PRI reception: Turn off PRI generation in the IOMMU hardware + * and flush any hardware page request queues. This should be done before + * calling into this helper. + * - Acknowledge all outstanding PRQs to the device: Respond to all outstanding + * page requests with IOMMU_PAGE_RESP_INVALID, indicating the device should + * not retry. This helper function handles this. + * - Disable PRI on the device: After calling this helper, the caller could + * then disable PRI on the device. + * + * Calling iopf_queue_remove_device() essentially disassociates the device. + * The fault_param might still exist, but iommu_page_response() will do + * nothing. The device fault parameter reference count has been properly + * passed from iommu_report_device_fault() to the fault handling work, and + * will eventually be released after iommu_page_response(). */ -int iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev) +void iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev) { - int ret = 0; struct iopf_fault *iopf, *next; + struct iommu_page_response resp; struct dev_iommu *param = dev->iommu; struct iommu_fault_param *fault_param; + const struct iommu_ops *ops = dev_iommu_ops(dev); mutex_lock(&queue->lock); mutex_lock(¶m->lock); fault_param = rcu_dereference_check(param->fault_param, lockdep_is_held(¶m->lock)); - if (!fault_param) { - ret = -ENODEV; - goto unlock; - } - if (fault_param->queue != queue) { - ret = -EINVAL; + if (WARN_ON(!fault_param || fault_param->queue != queue)) goto unlock; - } - if (!list_empty(&fault_param->faults)) { - ret = -EBUSY; - goto unlock; - } + mutex_lock(&fault_param->lock); + list_for_each_entry_safe(iopf, next, &fault_param->partial, list) + kfree(iopf); - list_del(&fault_param->queue_list); + list_for_each_entry_safe(iopf, next, &fault_param->faults, list) { + memset(&resp, 0, sizeof(struct iommu_page_response)); + resp.pasid = iopf->fault.prm.pasid; + resp.grpid = iopf->fault.prm.grpid; + resp.code = IOMMU_PAGE_RESP_INVALID; - /* Just in case some faults are still stuck */ - list_for_each_entry_safe(iopf, next, &fault_param->partial, list) + if (iopf->fault.prm.flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID) + resp.flags = IOMMU_PAGE_RESP_PASID_VALID; + + ops->page_response(dev, iopf, &resp); + list_del(&iopf->list); kfree(iopf); + } + mutex_unlock(&fault_param->lock); + + list_del(&fault_param->queue_list); /* dec the ref owned by iopf_queue_add_device() */ rcu_assign_pointer(param->fault_param, NULL); @@ -490,8 +509,6 @@ int iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev) unlock: mutex_unlock(¶m->lock); mutex_unlock(&queue->lock); - - return ret; } EXPORT_SYMBOL_GPL(iopf_queue_remove_device); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 1e9161ae95da..92dfd9b94577 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -1542,7 +1542,7 @@ iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm) #ifdef CONFIG_IOMMU_IOPF int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev); -int iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev); +void iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev); int iopf_queue_flush_dev(struct device *dev); struct iopf_queue *iopf_queue_alloc(const char *name); void iopf_queue_free(struct iopf_queue *queue); @@ -1558,10 +1558,9 @@ iopf_queue_add_device(struct iopf_queue *queue, struct device *dev) return -ENODEV; } -static inline int +static inline void iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev) { - return -ENODEV; } static inline int iopf_queue_flush_dev(struct device *dev) -- cgit v1.2.3 From 19911232713573a2ebea84a25bd4d71d024ed86b Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 12 Feb 2024 09:22:25 +0800 Subject: iommu: Track iopf group instead of last fault Previously, before a group of page faults was passed to the domain's iopf handler, the last page fault of the group was kept in the list of iommu_fault_param::faults. In the page fault response path, the group's last page fault was used to look up the list, and the page faults were responded to device only if there was a matched fault. The previous approach seems unnecessarily complex and not performance friendly. Put the page fault group itself to the outstanding fault list. It can be removed in the page fault response path or in the iopf_queue_remove_device() path. The pending list is protected by iommu_fault_param::lock. To allow checking for the group's presence in the list using list_empty(), the iopf group should be removed from the list with list_del_init(). IOMMU_PAGE_RESP_PASID_VALID is set in the code but not used anywhere. Remove it to make the code clean. IOMMU_PAGE_RESP_PASID_VALID is set in the response message indicating that the response message includes a valid PASID value. Actually, we should keep this hardware detail in the individual driver. When the page fault handling framework in IOMMU and IOMMUFD subsystems includes a valid PASID in the fault message, the response message should always contain the same PASID value. Individual drivers should be responsible for deciding whether to include the PASID in the messages they provide for the hardware. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Tested-by: Yan Zhao Link: https://lore.kernel.org/r/20240212012227.119381-15-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/io-pgfault.c | 242 ++++++++++++++++----------------------------- include/linux/iommu.h | 6 +- 2 files changed, 86 insertions(+), 162 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index ece09552e5cf..05e49e2e6a52 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -78,12 +78,33 @@ static struct iommu_domain *get_domain_for_iopf(struct device *dev, return domain; } +/* Non-last request of a group. Postpone until the last one. */ +static int report_partial_fault(struct iommu_fault_param *fault_param, + struct iommu_fault *fault) +{ + struct iopf_fault *iopf; + + iopf = kzalloc(sizeof(*iopf), GFP_KERNEL); + if (!iopf) + return -ENOMEM; + + iopf->fault = *fault; + + mutex_lock(&fault_param->lock); + list_add(&iopf->list, &fault_param->partial); + mutex_unlock(&fault_param->lock); + + return 0; +} + /** - * iommu_handle_iopf - IO Page Fault handler - * @fault: fault event - * @iopf_param: the fault parameter of the device. + * iommu_report_device_fault() - Report fault event to device driver + * @dev: the device + * @evt: fault event data * - * Add a fault to the device workqueue, to be handled by mm. + * Called by IOMMU drivers when a fault is detected, typically in a threaded IRQ + * handler. When this function fails and the fault is recoverable, it is the + * caller's responsibility to complete the fault. * * This module doesn't handle PCI PASID Stop Marker; IOMMU drivers must discard * them before reporting faults. A PASID Stop Marker (LRW = 0b100) doesn't @@ -118,34 +139,37 @@ static struct iommu_domain *get_domain_for_iopf(struct device *dev, * * Return: 0 on success and <0 on error. */ -static int iommu_handle_iopf(struct iommu_fault *fault, - struct iommu_fault_param *iopf_param) +int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) { - int ret; - struct iopf_group *group; - struct iommu_domain *domain; + struct iommu_fault *fault = &evt->fault; + struct iommu_fault_param *iopf_param; struct iopf_fault *iopf, *next; - struct device *dev = iopf_param->dev; - - lockdep_assert_held(&iopf_param->lock); + struct iommu_domain *domain; + struct iopf_group *group; + int ret; if (fault->type != IOMMU_FAULT_PAGE_REQ) - /* Not a recoverable page fault */ return -EOPNOTSUPP; - if (!(fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { - iopf = kzalloc(sizeof(*iopf), GFP_KERNEL); - if (!iopf) - return -ENOMEM; - - iopf->fault = *fault; + iopf_param = iopf_get_dev_fault_param(dev); + if (!iopf_param) + return -ENODEV; - /* Non-last request of a group. Postpone until the last one */ - list_add(&iopf->list, &iopf_param->partial); + if (!(fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { + ret = report_partial_fault(iopf_param, fault); + iopf_put_dev_fault_param(iopf_param); - return 0; + return ret; } + /* + * This is the last page fault of a group. Allocate an iopf group and + * pass it to domain's page fault handler. The group holds a reference + * count of the fault parameter. It will be released after response or + * error path of this function. If an error is returned, the caller + * will send a response to the hardware. We need to clean up before + * leaving, otherwise partial faults will be stuck. + */ domain = get_domain_for_iopf(dev, fault); if (!domain) { ret = -EINVAL; @@ -154,11 +178,6 @@ static int iommu_handle_iopf(struct iommu_fault *fault, group = kzalloc(sizeof(*group), GFP_KERNEL); if (!group) { - /* - * The caller will send a response to the hardware. But we do - * need to clean up before leaving, otherwise partial faults - * will be stuck. - */ ret = -ENOMEM; goto cleanup_partial; } @@ -166,145 +185,45 @@ static int iommu_handle_iopf(struct iommu_fault *fault, group->fault_param = iopf_param; group->last_fault.fault = *fault; INIT_LIST_HEAD(&group->faults); + INIT_LIST_HEAD(&group->pending_node); group->domain = domain; list_add(&group->last_fault.list, &group->faults); /* See if we have partial faults for this group */ + mutex_lock(&iopf_param->lock); list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) { if (iopf->fault.prm.grpid == fault->prm.grpid) /* Insert *before* the last fault */ list_move(&iopf->list, &group->faults); } - + list_add(&group->pending_node, &iopf_param->faults); mutex_unlock(&iopf_param->lock); + ret = domain->iopf_handler(group); - mutex_lock(&iopf_param->lock); - if (ret) + if (ret) { + mutex_lock(&iopf_param->lock); + list_del_init(&group->pending_node); + mutex_unlock(&iopf_param->lock); iopf_free_group(group); + } return ret; + cleanup_partial: + mutex_lock(&iopf_param->lock); list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) { if (iopf->fault.prm.grpid == fault->prm.grpid) { list_del(&iopf->list); kfree(iopf); } } - return ret; -} - -/** - * iommu_report_device_fault() - Report fault event to device driver - * @dev: the device - * @evt: fault event data - * - * Called by IOMMU drivers when a fault is detected, typically in a threaded IRQ - * handler. When this function fails and the fault is recoverable, it is the - * caller's responsibility to complete the fault. - * - * Return 0 on success, or an error. - */ -int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) -{ - bool last_prq = evt->fault.type == IOMMU_FAULT_PAGE_REQ && - (evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE); - struct iommu_fault_param *fault_param; - struct iopf_fault *evt_pending; - int ret; - - fault_param = iopf_get_dev_fault_param(dev); - if (!fault_param) - return -EINVAL; - - mutex_lock(&fault_param->lock); - if (last_prq) { - evt_pending = kmemdup(evt, sizeof(struct iopf_fault), - GFP_KERNEL); - if (!evt_pending) { - ret = -ENOMEM; - goto err_unlock; - } - list_add_tail(&evt_pending->list, &fault_param->faults); - } - - ret = iommu_handle_iopf(&evt->fault, fault_param); - if (ret) - goto err_free; - - mutex_unlock(&fault_param->lock); - /* The reference count of fault_param is now held by iopf_group. */ - if (!last_prq) - iopf_put_dev_fault_param(fault_param); - - return 0; -err_free: - if (last_prq) { - list_del(&evt_pending->list); - kfree(evt_pending); - } -err_unlock: - mutex_unlock(&fault_param->lock); - iopf_put_dev_fault_param(fault_param); + mutex_unlock(&iopf_param->lock); + iopf_put_dev_fault_param(iopf_param); return ret; } EXPORT_SYMBOL_GPL(iommu_report_device_fault); -static int iommu_page_response(struct iopf_group *group, - struct iommu_page_response *msg) -{ - bool needs_pasid; - int ret = -EINVAL; - struct iopf_fault *evt; - struct iommu_fault_page_request *prm; - struct device *dev = group->fault_param->dev; - const struct iommu_ops *ops = dev_iommu_ops(dev); - bool has_pasid = msg->flags & IOMMU_PAGE_RESP_PASID_VALID; - struct iommu_fault_param *fault_param = group->fault_param; - - /* Only send response if there is a fault report pending */ - mutex_lock(&fault_param->lock); - if (list_empty(&fault_param->faults)) { - dev_warn_ratelimited(dev, "no pending PRQ, drop response\n"); - goto done_unlock; - } - /* - * Check if we have a matching page request pending to respond, - * otherwise return -EINVAL - */ - list_for_each_entry(evt, &fault_param->faults, list) { - prm = &evt->fault.prm; - if (prm->grpid != msg->grpid) - continue; - - /* - * If the PASID is required, the corresponding request is - * matched using the group ID, the PASID valid bit and the PASID - * value. Otherwise only the group ID matches request and - * response. - */ - needs_pasid = prm->flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID; - if (needs_pasid && (!has_pasid || msg->pasid != prm->pasid)) - continue; - - if (!needs_pasid && has_pasid) { - /* No big deal, just clear it. */ - msg->flags &= ~IOMMU_PAGE_RESP_PASID_VALID; - msg->pasid = 0; - } - - ret = ops->page_response(dev, evt, msg); - list_del(&evt->list); - kfree(evt); - break; - } - -done_unlock: - mutex_unlock(&fault_param->lock); - - return ret; -} - /** * iopf_queue_flush_dev - Ensure that all queued faults have been processed * @dev: the endpoint whose faults need to be flushed. @@ -346,18 +265,26 @@ EXPORT_SYMBOL_GPL(iopf_queue_flush_dev); int iopf_group_response(struct iopf_group *group, enum iommu_page_response_code status) { + struct iommu_fault_param *fault_param = group->fault_param; struct iopf_fault *iopf = &group->last_fault; + struct device *dev = group->fault_param->dev; + const struct iommu_ops *ops = dev_iommu_ops(dev); struct iommu_page_response resp = { .pasid = iopf->fault.prm.pasid, .grpid = iopf->fault.prm.grpid, .code = status, }; + int ret = -EINVAL; - if ((iopf->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) && - (iopf->fault.prm.flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID)) - resp.flags = IOMMU_PAGE_RESP_PASID_VALID; + /* Only send response if there is a fault report pending */ + mutex_lock(&fault_param->lock); + if (!list_empty(&group->pending_node)) { + ret = ops->page_response(dev, &group->last_fault, &resp); + list_del_init(&group->pending_node); + } + mutex_unlock(&fault_param->lock); - return iommu_page_response(group, &resp); + return ret; } EXPORT_SYMBOL_GPL(iopf_group_response); @@ -468,8 +395,9 @@ EXPORT_SYMBOL_GPL(iopf_queue_add_device); */ void iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev) { - struct iopf_fault *iopf, *next; - struct iommu_page_response resp; + struct iopf_fault *partial_iopf; + struct iopf_fault *next; + struct iopf_group *group, *temp; struct dev_iommu *param = dev->iommu; struct iommu_fault_param *fault_param; const struct iommu_ops *ops = dev_iommu_ops(dev); @@ -483,21 +411,19 @@ void iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev) goto unlock; mutex_lock(&fault_param->lock); - list_for_each_entry_safe(iopf, next, &fault_param->partial, list) - kfree(iopf); - - list_for_each_entry_safe(iopf, next, &fault_param->faults, list) { - memset(&resp, 0, sizeof(struct iommu_page_response)); - resp.pasid = iopf->fault.prm.pasid; - resp.grpid = iopf->fault.prm.grpid; - resp.code = IOMMU_PAGE_RESP_INVALID; + list_for_each_entry_safe(partial_iopf, next, &fault_param->partial, list) + kfree(partial_iopf); - if (iopf->fault.prm.flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID) - resp.flags = IOMMU_PAGE_RESP_PASID_VALID; + list_for_each_entry_safe(group, temp, &fault_param->faults, pending_node) { + struct iopf_fault *iopf = &group->last_fault; + struct iommu_page_response resp = { + .pasid = iopf->fault.prm.pasid, + .grpid = iopf->fault.prm.grpid, + .code = IOMMU_PAGE_RESP_INVALID + }; ops->page_response(dev, iopf, &resp); - list_del(&iopf->list); - kfree(iopf); + list_del_init(&group->pending_node); } mutex_unlock(&fault_param->lock); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 92dfd9b94577..f8ed1cc7212e 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -106,15 +106,11 @@ enum iommu_page_response_code { /** * struct iommu_page_response - Generic page response information - * @flags: encodes whether the corresponding fields are valid - * (IOMMU_FAULT_PAGE_RESPONSE_* values) * @pasid: Process Address Space ID * @grpid: Page Request Group Index * @code: response code from &enum iommu_page_response_code */ struct iommu_page_response { -#define IOMMU_PAGE_RESP_PASID_VALID (1 << 0) - u32 flags; u32 pasid; u32 grpid; u32 code; @@ -129,6 +125,8 @@ struct iopf_fault { struct iopf_group { struct iopf_fault last_fault; struct list_head faults; + /* list node for iommu_fault_param::faults */ + struct list_head pending_node; struct work_struct work; struct iommu_domain *domain; /* The device's fault data parameter. */ -- cgit v1.2.3 From b554e396e51ce3d378a560666f85c6836a8323fd Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 12 Feb 2024 09:22:26 +0800 Subject: iommu: Make iopf_group_response() return void The iopf_group_response() should return void, as nothing can do anything with the failure. This implies that ops->page_response() must also return void; this is consistent with what the drivers do. The failure paths, which are all integrity validations of the fault, should be WARN_ON'd, not return codes. If the iommu core fails to enqueue the fault, it should respond the fault directly by calling ops->page_response() instead of returning an error number and relying on the iommu drivers to do so. Consolidate the error fault handling code in the core. Co-developed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240212012227.119381-16-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 50 ++++------- drivers/iommu/intel/iommu.h | 4 +- drivers/iommu/intel/svm.c | 18 +--- drivers/iommu/io-pgfault.c | 132 +++++++++++++++------------- include/linux/iommu.h | 14 ++- 5 files changed, 98 insertions(+), 120 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 4e93e845458c..42eb59cb99f4 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -920,31 +920,29 @@ static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu, return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true); } -static int arm_smmu_page_response(struct device *dev, - struct iopf_fault *unused, - struct iommu_page_response *resp) +static void arm_smmu_page_response(struct device *dev, struct iopf_fault *unused, + struct iommu_page_response *resp) { struct arm_smmu_cmdq_ent cmd = {0}; struct arm_smmu_master *master = dev_iommu_priv_get(dev); int sid = master->streams[0].id; - if (master->stall_enabled) { - cmd.opcode = CMDQ_OP_RESUME; - cmd.resume.sid = sid; - cmd.resume.stag = resp->grpid; - switch (resp->code) { - case IOMMU_PAGE_RESP_INVALID: - case IOMMU_PAGE_RESP_FAILURE: - cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT; - break; - case IOMMU_PAGE_RESP_SUCCESS: - cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY; - break; - default: - return -EINVAL; - } - } else { - return -ENODEV; + if (WARN_ON(!master->stall_enabled)) + return; + + cmd.opcode = CMDQ_OP_RESUME; + cmd.resume.sid = sid; + cmd.resume.stag = resp->grpid; + switch (resp->code) { + case IOMMU_PAGE_RESP_INVALID: + case IOMMU_PAGE_RESP_FAILURE: + cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT; + break; + case IOMMU_PAGE_RESP_SUCCESS: + cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY; + break; + default: + break; } arm_smmu_cmdq_issue_cmd(master->smmu, &cmd); @@ -954,8 +952,6 @@ static int arm_smmu_page_response(struct device *dev, * terminated... at some point in the future. PRI_RESP is fire and * forget. */ - - return 0; } /* Context descriptor manipulation functions */ @@ -1516,16 +1512,6 @@ static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt) } ret = iommu_report_device_fault(master->dev, &fault_evt); - if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) { - /* Nobody cared, abort the access */ - struct iommu_page_response resp = { - .pasid = flt->prm.pasid, - .grpid = flt->prm.grpid, - .code = IOMMU_PAGE_RESP_FAILURE, - }; - arm_smmu_page_response(master->dev, &fault_evt, &resp); - } - out_unlock: mutex_unlock(&smmu->streams_mutex); return ret; diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 696d95293a69..cf9a28c7fab8 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -1079,8 +1079,8 @@ struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent, void intel_svm_check(struct intel_iommu *iommu); int intel_svm_enable_prq(struct intel_iommu *iommu); int intel_svm_finish_prq(struct intel_iommu *iommu); -int intel_svm_page_response(struct device *dev, struct iopf_fault *evt, - struct iommu_page_response *msg); +void intel_svm_page_response(struct device *dev, struct iopf_fault *evt, + struct iommu_page_response *msg); struct iommu_domain *intel_svm_domain_alloc(void); void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid); void intel_drain_pasid_prq(struct device *dev, u32 pasid); diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index e1cbcb9515f0..2f8716636dbb 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -740,9 +740,8 @@ prq_advance: return IRQ_RETVAL(handled); } -int intel_svm_page_response(struct device *dev, - struct iopf_fault *evt, - struct iommu_page_response *msg) +void intel_svm_page_response(struct device *dev, struct iopf_fault *evt, + struct iommu_page_response *msg) { struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; @@ -751,7 +750,6 @@ int intel_svm_page_response(struct device *dev, bool private_present; bool pasid_present; bool last_page; - int ret = 0; u16 sid; prm = &evt->fault.prm; @@ -760,16 +758,6 @@ int intel_svm_page_response(struct device *dev, private_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA; last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; - if (!pasid_present) { - ret = -EINVAL; - goto out; - } - - if (prm->pasid == 0 || prm->pasid >= PASID_MAX) { - ret = -EINVAL; - goto out; - } - /* * Per VT-d spec. v3.0 ch7.7, system software must respond * with page group response if private data is present (PDP) @@ -798,8 +786,6 @@ int intel_svm_page_response(struct device *dev, qi_submit_sync(iommu, &desc, 1, 0); } -out: - return ret; } static int intel_svm_set_dev_pasid(struct iommu_domain *domain, diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index 05e49e2e6a52..6a325bff8164 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -39,7 +39,7 @@ static void iopf_put_dev_fault_param(struct iommu_fault_param *fault_param) kfree_rcu(fault_param, rcu); } -void iopf_free_group(struct iopf_group *group) +static void __iopf_free_group(struct iopf_group *group) { struct iopf_fault *iopf, *next; @@ -50,6 +50,11 @@ void iopf_free_group(struct iopf_group *group) /* Pair with iommu_report_device_fault(). */ iopf_put_dev_fault_param(group->fault_param); +} + +void iopf_free_group(struct iopf_group *group) +{ + __iopf_free_group(group); kfree(group); } EXPORT_SYMBOL_GPL(iopf_free_group); @@ -97,14 +102,49 @@ static int report_partial_fault(struct iommu_fault_param *fault_param, return 0; } +static struct iopf_group *iopf_group_alloc(struct iommu_fault_param *iopf_param, + struct iopf_fault *evt, + struct iopf_group *abort_group) +{ + struct iopf_fault *iopf, *next; + struct iopf_group *group; + + group = kzalloc(sizeof(*group), GFP_KERNEL); + if (!group) { + /* + * We always need to construct the group as we need it to abort + * the request at the driver if it can't be handled. + */ + group = abort_group; + } + + group->fault_param = iopf_param; + group->last_fault.fault = evt->fault; + INIT_LIST_HEAD(&group->faults); + INIT_LIST_HEAD(&group->pending_node); + list_add(&group->last_fault.list, &group->faults); + + /* See if we have partial faults for this group */ + mutex_lock(&iopf_param->lock); + list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) { + if (iopf->fault.prm.grpid == evt->fault.prm.grpid) + /* Insert *before* the last fault */ + list_move(&iopf->list, &group->faults); + } + list_add(&group->pending_node, &iopf_param->faults); + mutex_unlock(&iopf_param->lock); + + return group; +} + /** * iommu_report_device_fault() - Report fault event to device driver * @dev: the device * @evt: fault event data * * Called by IOMMU drivers when a fault is detected, typically in a threaded IRQ - * handler. When this function fails and the fault is recoverable, it is the - * caller's responsibility to complete the fault. + * handler. If this function fails then ops->page_response() was called to + * complete evt if required. * * This module doesn't handle PCI PASID Stop Marker; IOMMU drivers must discard * them before reporting faults. A PASID Stop Marker (LRW = 0b100) doesn't @@ -143,22 +183,18 @@ int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) { struct iommu_fault *fault = &evt->fault; struct iommu_fault_param *iopf_param; - struct iopf_fault *iopf, *next; - struct iommu_domain *domain; + struct iopf_group abort_group = {}; struct iopf_group *group; int ret; - if (fault->type != IOMMU_FAULT_PAGE_REQ) - return -EOPNOTSUPP; - iopf_param = iopf_get_dev_fault_param(dev); - if (!iopf_param) + if (WARN_ON(!iopf_param)) return -ENODEV; if (!(fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { ret = report_partial_fault(iopf_param, fault); iopf_put_dev_fault_param(iopf_param); - + /* A request that is not the last does not need to be ack'd */ return ret; } @@ -170,56 +206,33 @@ int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) * will send a response to the hardware. We need to clean up before * leaving, otherwise partial faults will be stuck. */ - domain = get_domain_for_iopf(dev, fault); - if (!domain) { - ret = -EINVAL; - goto cleanup_partial; - } - - group = kzalloc(sizeof(*group), GFP_KERNEL); - if (!group) { + group = iopf_group_alloc(iopf_param, evt, &abort_group); + if (group == &abort_group) { ret = -ENOMEM; - goto cleanup_partial; + goto err_abort; } - group->fault_param = iopf_param; - group->last_fault.fault = *fault; - INIT_LIST_HEAD(&group->faults); - INIT_LIST_HEAD(&group->pending_node); - group->domain = domain; - list_add(&group->last_fault.list, &group->faults); - - /* See if we have partial faults for this group */ - mutex_lock(&iopf_param->lock); - list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) { - if (iopf->fault.prm.grpid == fault->prm.grpid) - /* Insert *before* the last fault */ - list_move(&iopf->list, &group->faults); - } - list_add(&group->pending_node, &iopf_param->faults); - mutex_unlock(&iopf_param->lock); - - ret = domain->iopf_handler(group); - if (ret) { - mutex_lock(&iopf_param->lock); - list_del_init(&group->pending_node); - mutex_unlock(&iopf_param->lock); - iopf_free_group(group); + group->domain = get_domain_for_iopf(dev, fault); + if (!group->domain) { + ret = -EINVAL; + goto err_abort; } - return ret; - -cleanup_partial: - mutex_lock(&iopf_param->lock); - list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) { - if (iopf->fault.prm.grpid == fault->prm.grpid) { - list_del(&iopf->list); - kfree(iopf); - } - } - mutex_unlock(&iopf_param->lock); - iopf_put_dev_fault_param(iopf_param); + /* + * On success iopf_handler must call iopf_group_response() and + * iopf_free_group() + */ + ret = group->domain->iopf_handler(group); + if (ret) + goto err_abort; + return 0; +err_abort: + iopf_group_response(group, IOMMU_PAGE_RESP_FAILURE); + if (group == &abort_group) + __iopf_free_group(group); + else + iopf_free_group(group); return ret; } EXPORT_SYMBOL_GPL(iommu_report_device_fault); @@ -259,11 +272,9 @@ EXPORT_SYMBOL_GPL(iopf_queue_flush_dev); * iopf_group_response - Respond a group of page faults * @group: the group of faults with the same group id * @status: the response code - * - * Return 0 on success and <0 on error. */ -int iopf_group_response(struct iopf_group *group, - enum iommu_page_response_code status) +void iopf_group_response(struct iopf_group *group, + enum iommu_page_response_code status) { struct iommu_fault_param *fault_param = group->fault_param; struct iopf_fault *iopf = &group->last_fault; @@ -274,17 +285,14 @@ int iopf_group_response(struct iopf_group *group, .grpid = iopf->fault.prm.grpid, .code = status, }; - int ret = -EINVAL; /* Only send response if there is a fault report pending */ mutex_lock(&fault_param->lock); if (!list_empty(&group->pending_node)) { - ret = ops->page_response(dev, &group->last_fault, &resp); + ops->page_response(dev, &group->last_fault, &resp); list_del_init(&group->pending_node); } mutex_unlock(&fault_param->lock); - - return ret; } EXPORT_SYMBOL_GPL(iopf_group_response); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index f8ed1cc7212e..f632775414a5 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -574,9 +574,8 @@ struct iommu_ops { int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f); int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f); - int (*page_response)(struct device *dev, - struct iopf_fault *evt, - struct iommu_page_response *msg); + void (*page_response)(struct device *dev, struct iopf_fault *evt, + struct iommu_page_response *msg); int (*def_domain_type)(struct device *dev); void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid); @@ -1547,8 +1546,8 @@ void iopf_queue_free(struct iopf_queue *queue); int iopf_queue_discard_partial(struct iopf_queue *queue); void iopf_free_group(struct iopf_group *group); int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt); -int iopf_group_response(struct iopf_group *group, - enum iommu_page_response_code status); +void iopf_group_response(struct iopf_group *group, + enum iommu_page_response_code status); #else static inline int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev) @@ -1590,10 +1589,9 @@ iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) return -ENODEV; } -static inline int iopf_group_response(struct iopf_group *group, - enum iommu_page_response_code status) +static inline void iopf_group_response(struct iopf_group *group, + enum iommu_page_response_code status) { - return -ENODEV; } #endif /* CONFIG_IOMMU_IOPF */ #endif /* __LINUX_IOMMU_H */ -- cgit v1.2.3 From 3dfa64aecbafc288216b2790438d395add192c30 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 12 Feb 2024 09:22:27 +0800 Subject: iommu: Make iommu_report_device_fault() return void As the iommu_report_device_fault() has been converted to auto-respond a page fault if it fails to enqueue it, there's no need to return a code in any case. Make it return void. Suggested-by: Jason Gunthorpe Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240212012227.119381-17-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 4 ++-- drivers/iommu/intel/svm.c | 19 +++++++------------ drivers/iommu/io-pgfault.c | 25 ++++++++----------------- include/linux/iommu.h | 5 ++--- 4 files changed, 19 insertions(+), 34 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 42eb59cb99f4..02580364acda 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1455,7 +1455,7 @@ arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid) /* IRQ and event handlers */ static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt) { - int ret; + int ret = 0; u32 perm = 0; struct arm_smmu_master *master; bool ssid_valid = evt[0] & EVTQ_0_SSV; @@ -1511,7 +1511,7 @@ static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt) goto out_unlock; } - ret = iommu_report_device_fault(master->dev, &fault_evt); + iommu_report_device_fault(master->dev, &fault_evt); out_unlock: mutex_unlock(&smmu->streams_mutex); return ret; diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 2f8716636dbb..b644d57da841 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -561,14 +561,11 @@ static int prq_to_iommu_prot(struct page_req_dsc *req) return prot; } -static int intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev, - struct page_req_dsc *desc) +static void intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev, + struct page_req_dsc *desc) { struct iopf_fault event = { }; - if (!dev || !dev_is_pci(dev)) - return -ENODEV; - /* Fill in event data for device specific processing */ event.fault.type = IOMMU_FAULT_PAGE_REQ; event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT; @@ -601,7 +598,7 @@ static int intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev, event.fault.prm.private_data[0] = ktime_to_ns(ktime_get()); } - return iommu_report_device_fault(dev, &event); + iommu_report_device_fault(dev, &event); } static void handle_bad_prq_event(struct intel_iommu *iommu, @@ -704,12 +701,10 @@ bad_req: if (!pdev) goto bad_req; - if (intel_svm_prq_report(iommu, &pdev->dev, req)) - handle_bad_prq_event(iommu, req, QI_RESP_INVALID); - else - trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1, - req->priv_data[0], req->priv_data[1], - iommu->prq_seq_number++); + intel_svm_prq_report(iommu, &pdev->dev, req); + trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1, + req->priv_data[0], req->priv_data[1], + iommu->prq_seq_number++); pci_dev_put(pdev); prq_advance: head = (head + sizeof(*req)) & PRQ_RING_MASK; diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index 6a325bff8164..06d78fcc79fd 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -176,26 +176,22 @@ static struct iopf_group *iopf_group_alloc(struct iommu_fault_param *iopf_param, * freed after the device has stopped generating page faults (or the iommu * hardware has been set to block the page faults) and the pending page faults * have been flushed. - * - * Return: 0 on success and <0 on error. */ -int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) +void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) { struct iommu_fault *fault = &evt->fault; struct iommu_fault_param *iopf_param; struct iopf_group abort_group = {}; struct iopf_group *group; - int ret; iopf_param = iopf_get_dev_fault_param(dev); if (WARN_ON(!iopf_param)) - return -ENODEV; + return; if (!(fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { - ret = report_partial_fault(iopf_param, fault); + report_partial_fault(iopf_param, fault); iopf_put_dev_fault_param(iopf_param); /* A request that is not the last does not need to be ack'd */ - return ret; } /* @@ -207,25 +203,21 @@ int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) * leaving, otherwise partial faults will be stuck. */ group = iopf_group_alloc(iopf_param, evt, &abort_group); - if (group == &abort_group) { - ret = -ENOMEM; + if (group == &abort_group) goto err_abort; - } group->domain = get_domain_for_iopf(dev, fault); - if (!group->domain) { - ret = -EINVAL; + if (!group->domain) goto err_abort; - } /* * On success iopf_handler must call iopf_group_response() and * iopf_free_group() */ - ret = group->domain->iopf_handler(group); - if (ret) + if (group->domain->iopf_handler(group)) goto err_abort; - return 0; + + return; err_abort: iopf_group_response(group, IOMMU_PAGE_RESP_FAILURE); @@ -233,7 +225,6 @@ err_abort: __iopf_free_group(group); else iopf_free_group(group); - return ret; } EXPORT_SYMBOL_GPL(iommu_report_device_fault); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index f632775414a5..7cc56cfe98dd 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -1545,7 +1545,7 @@ struct iopf_queue *iopf_queue_alloc(const char *name); void iopf_queue_free(struct iopf_queue *queue); int iopf_queue_discard_partial(struct iopf_queue *queue); void iopf_free_group(struct iopf_group *group); -int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt); +void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt); void iopf_group_response(struct iopf_group *group, enum iommu_page_response_code status); #else @@ -1583,10 +1583,9 @@ static inline void iopf_free_group(struct iopf_group *group) { } -static inline int +static inline void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) { - return -ENODEV; } static inline void iopf_group_response(struct iopf_group *group, -- cgit v1.2.3 From 12721e66005798b2a07bd8309060c230e52ba2b0 Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Wed, 31 Jan 2024 14:35:16 +0200 Subject: iommu/arm-smmu-qcom: Add X1E80100 MDSS compatible Add the X1E80100 MDSS compatible to clients compatible list, as it also needs the workarounds. Signed-off-by: Abel Vesa Link: https://lore.kernel.org/r/20240131-x1e80100-iommu-arm-smmu-qcom-v1-1-c1240419c718@linaro.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 8b04ece00420..5c7cfc51b57c 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -260,6 +260,7 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = { { .compatible = "qcom,sm6375-mdss" }, { .compatible = "qcom,sm8150-mdss" }, { .compatible = "qcom,sm8250-mdss" }, + { .compatible = "qcom,x1e80100-mdss" }, { } }; -- cgit v1.2.3 From 7da51af9125c624318c8099de13c5ddefd47e9e8 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 26 Feb 2024 13:07:12 -0400 Subject: iommu/arm-smmu-v3: Make STE programming independent of the callers As the comment in arm_smmu_write_strtab_ent() explains, this routine has been limited to only work correctly in certain scenarios that the caller must ensure. Generally the caller must put the STE into ABORT or BYPASS before attempting to program it to something else. The iommu core APIs would ideally expect the driver to do a hitless change of iommu_domain in a number of cases: - RESV_DIRECT support wants IDENTITY -> DMA -> IDENTITY to be hitless for the RESV ranges - PASID upgrade has IDENTIY on the RID with no PASID then a PASID paging domain installed. The RID should not be impacted - PASID downgrade has IDENTIY on the RID and all PASID's removed. The RID should not be impacted - RID does PAGING -> BLOCKING with active PASID, PASID's should not be impacted - NESTING -> NESTING for carrying all the above hitless cases in a VM into the hypervisor. To comprehensively emulate the HW in a VM we should assume the VM OS is running logic like this and expecting hitless updates to be relayed to real HW. For CD updates arm_smmu_write_ctx_desc() has a similar comment explaining how limited it is, and the driver does have a need for hitless CD updates: - SMMUv3 BTM S1 ASID re-label - SVA mm release should change the CD to answert not-present to all requests without allowing logging (EPD0) The next patches/series are going to start removing some of this logic from the callers, and add more complex state combinations than currently. At the end everything that can be hitless will be hitless, including all of the above. Introduce arm_smmu_write_ste() which will run through the multi-qword programming sequence to avoid creating an incoherent 'torn' STE in the HW caches. It automatically detects which of two algorithms to use: 1) The disruptive V=0 update described in the spec which disrupts the entry and does three syncs to make the change: - Write V=0 to QWORD 0 - Write the entire STE except QWORD 0 - Write QWORD 0 2) A hitless update algorithm that follows the same rational that the driver already uses. It is safe to change IGNORED bits that HW doesn't use: - Write the target value into all currently unused bits - Write a single QWORD, this makes the new STE live atomically - Ensure now unused bits are 0 The detection of which path to use and the implementation of the hitless update rely on a "used bitmask" describing what bits the HW is actually using based on the V/CFG/etc bits. This flows from the spec language, typically indicated as IGNORED. Knowing which bits the HW is using we can update the bits it does not use and then compute how many QWORDS need to be changed. If only one qword needs to be updated the hitless algorithm is possible. Later patches will include CD updates in this mechanism so make the implementation generic using a struct arm_smmu_entry_writer and struct arm_smmu_entry_writer_ops to abstract the differences between STE and CD to be plugged in. At this point it generates the same sequence of updates as the current code, except that zeroing the VMID on entry to BYPASS/ABORT will do an extra sync (this seems to be an existing bug). Going forward this will use a V=0 transition instead of cycling through ABORT if a hitfull change is required. This seems more appropriate as ABORT will fail DMAs without any logging, but dropping a DMA due to transient V=0 is probably signaling a bug, so the C_BAD_STE is valuable. Add STRTAB_STE_1_SHCFG_INCOMING to s2_cfg, this was editing the STE in place and subtly inherited the value of data[1] from abort/bypass. Signed-off-by: Michael Shavit Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/1-v6-96275f25c39d+2d4-smmuv3_newapi_p1_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 275 +++++++++++++++++++++------- 1 file changed, 211 insertions(+), 64 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 0ffb1cf17e0b..9805d989dafd 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -48,6 +48,9 @@ enum arm_smmu_msi_index { ARM_SMMU_MAX_MSIS, }; +static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, + ioasid_t sid); + static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = { [EVTQ_MSI_INDEX] = { ARM_SMMU_EVTQ_IRQ_CFG0, @@ -971,6 +974,199 @@ void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid) arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); } +/* + * Based on the value of ent report which bits of the STE the HW will access. It + * would be nice if this was complete according to the spec, but minimally it + * has to capture the bits this driver uses. + */ +static void arm_smmu_get_ste_used(const struct arm_smmu_ste *ent, + struct arm_smmu_ste *used_bits) +{ + unsigned int cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(ent->data[0])); + + used_bits->data[0] = cpu_to_le64(STRTAB_STE_0_V); + if (!(ent->data[0] & cpu_to_le64(STRTAB_STE_0_V))) + return; + + used_bits->data[0] |= cpu_to_le64(STRTAB_STE_0_CFG); + + /* S1 translates */ + if (cfg & BIT(0)) { + used_bits->data[0] |= cpu_to_le64(STRTAB_STE_0_S1FMT | + STRTAB_STE_0_S1CTXPTR_MASK | + STRTAB_STE_0_S1CDMAX); + used_bits->data[1] |= + cpu_to_le64(STRTAB_STE_1_S1DSS | STRTAB_STE_1_S1CIR | + STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH | + STRTAB_STE_1_S1STALLD | STRTAB_STE_1_STRW | + STRTAB_STE_1_EATS); + used_bits->data[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID); + } + + /* S2 translates */ + if (cfg & BIT(1)) { + used_bits->data[1] |= + cpu_to_le64(STRTAB_STE_1_EATS | STRTAB_STE_1_SHCFG); + used_bits->data[2] |= + cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR | + STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI | + STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2R); + used_bits->data[3] |= cpu_to_le64(STRTAB_STE_3_S2TTB_MASK); + } + + if (cfg == STRTAB_STE_0_CFG_BYPASS) + used_bits->data[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG); +} + +/* + * Figure out if we can do a hitless update of entry to become target. Returns a + * bit mask where 1 indicates that qword needs to be set disruptively. + * unused_update is an intermediate value of entry that has unused bits set to + * their new values. + */ +static u8 arm_smmu_entry_qword_diff(const struct arm_smmu_ste *entry, + const struct arm_smmu_ste *target, + struct arm_smmu_ste *unused_update) +{ + struct arm_smmu_ste target_used = {}; + struct arm_smmu_ste cur_used = {}; + u8 used_qword_diff = 0; + unsigned int i; + + arm_smmu_get_ste_used(entry, &cur_used); + arm_smmu_get_ste_used(target, &target_used); + + for (i = 0; i != ARRAY_SIZE(target_used.data); i++) { + /* + * Check that masks are up to date, the make functions are not + * allowed to set a bit to 1 if the used function doesn't say it + * is used. + */ + WARN_ON_ONCE(target->data[i] & ~target_used.data[i]); + + /* Bits can change because they are not currently being used */ + unused_update->data[i] = (entry->data[i] & cur_used.data[i]) | + (target->data[i] & ~cur_used.data[i]); + /* + * Each bit indicates that a used bit in a qword needs to be + * changed after unused_update is applied. + */ + if ((unused_update->data[i] & target_used.data[i]) != + target->data[i]) + used_qword_diff |= 1 << i; + } + return used_qword_diff; +} + +static bool entry_set(struct arm_smmu_device *smmu, ioasid_t sid, + struct arm_smmu_ste *entry, + const struct arm_smmu_ste *target, unsigned int start, + unsigned int len) +{ + bool changed = false; + unsigned int i; + + for (i = start; len != 0; len--, i++) { + if (entry->data[i] != target->data[i]) { + WRITE_ONCE(entry->data[i], target->data[i]); + changed = true; + } + } + + if (changed) + arm_smmu_sync_ste_for_sid(smmu, sid); + return changed; +} + +/* + * Update the STE/CD to the target configuration. The transition from the + * current entry to the target entry takes place over multiple steps that + * attempts to make the transition hitless if possible. This function takes care + * not to create a situation where the HW can perceive a corrupted entry. HW is + * only required to have a 64 bit atomicity with stores from the CPU, while + * entries are many 64 bit values big. + * + * The difference between the current value and the target value is analyzed to + * determine which of three updates are required - disruptive, hitless or no + * change. + * + * In the most general disruptive case we can make any update in three steps: + * - Disrupting the entry (V=0) + * - Fill now unused qwords, execpt qword 0 which contains V + * - Make qword 0 have the final value and valid (V=1) with a single 64 + * bit store + * + * However this disrupts the HW while it is happening. There are several + * interesting cases where a STE/CD can be updated without disturbing the HW + * because only a small number of bits are changing (S1DSS, CONFIG, etc) or + * because the used bits don't intersect. We can detect this by calculating how + * many 64 bit values need update after adjusting the unused bits and skip the + * V=0 process. This relies on the IGNORED behavior described in the + * specification. + */ +static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid, + struct arm_smmu_ste *entry, + const struct arm_smmu_ste *target) +{ + unsigned int num_entry_qwords = ARRAY_SIZE(target->data); + struct arm_smmu_device *smmu = master->smmu; + struct arm_smmu_ste unused_update; + u8 used_qword_diff; + + used_qword_diff = + arm_smmu_entry_qword_diff(entry, target, &unused_update); + if (hweight8(used_qword_diff) == 1) { + /* + * Only one qword needs its used bits to be changed. This is a + * hitless update, update all bits the current STE is ignoring + * to their new values, then update a single "critical qword" to + * change the STE and finally 0 out any bits that are now unused + * in the target configuration. + */ + unsigned int critical_qword_index = ffs(used_qword_diff) - 1; + + /* + * Skip writing unused bits in the critical qword since we'll be + * writing it in the next step anyways. This can save a sync + * when the only change is in that qword. + */ + unused_update.data[critical_qword_index] = + entry->data[critical_qword_index]; + entry_set(smmu, sid, entry, &unused_update, 0, num_entry_qwords); + entry_set(smmu, sid, entry, target, critical_qword_index, 1); + entry_set(smmu, sid, entry, target, 0, num_entry_qwords); + } else if (used_qword_diff) { + /* + * At least two qwords need their inuse bits to be changed. This + * requires a breaking update, zero the V bit, write all qwords + * but 0, then set qword 0 + */ + unused_update.data[0] = entry->data[0] & (~STRTAB_STE_0_V); + entry_set(smmu, sid, entry, &unused_update, 0, 1); + entry_set(smmu, sid, entry, target, 1, num_entry_qwords - 1); + entry_set(smmu, sid, entry, target, 0, 1); + } else { + /* + * No inuse bit changed. Sanity check that all unused bits are 0 + * in the entry. The target was already sanity checked by + * compute_qword_diff(). + */ + WARN_ON_ONCE( + entry_set(smmu, sid, entry, target, 0, num_entry_qwords)); + } + + /* It's likely that we'll want to use the new STE soon */ + if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) { + struct arm_smmu_cmdq_ent + prefetch_cmd = { .opcode = CMDQ_OP_PREFETCH_CFG, + .prefetch = { + .sid = sid, + } }; + + arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd); + } +} + static void arm_smmu_sync_cd(struct arm_smmu_master *master, int ssid, bool leaf) { @@ -1254,34 +1450,12 @@ static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid) static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, struct arm_smmu_ste *dst) { - /* - * This is hideously complicated, but we only really care about - * three cases at the moment: - * - * 1. Invalid (all zero) -> bypass/fault (init) - * 2. Bypass/fault -> translation/bypass (attach) - * 3. Translation/bypass -> bypass/fault (detach) - * - * Given that we can't update the STE atomically and the SMMU - * doesn't read the thing in a defined order, that leaves us - * with the following maintenance requirements: - * - * 1. Update Config, return (init time STEs aren't live) - * 2. Write everything apart from dword 0, sync, write dword 0, sync - * 3. Update Config, sync - */ - u64 val = le64_to_cpu(dst->data[0]); - bool ste_live = false; + u64 val; struct arm_smmu_device *smmu = master->smmu; struct arm_smmu_ctx_desc_cfg *cd_table = NULL; struct arm_smmu_s2_cfg *s2_cfg = NULL; struct arm_smmu_domain *smmu_domain = master->domain; - struct arm_smmu_cmdq_ent prefetch_cmd = { - .opcode = CMDQ_OP_PREFETCH_CFG, - .prefetch = { - .sid = sid, - }, - }; + struct arm_smmu_ste target = {}; if (smmu_domain) { switch (smmu_domain->stage) { @@ -1296,22 +1470,6 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, } } - if (val & STRTAB_STE_0_V) { - switch (FIELD_GET(STRTAB_STE_0_CFG, val)) { - case STRTAB_STE_0_CFG_BYPASS: - break; - case STRTAB_STE_0_CFG_S1_TRANS: - case STRTAB_STE_0_CFG_S2_TRANS: - ste_live = true; - break; - case STRTAB_STE_0_CFG_ABORT: - BUG_ON(!disable_bypass); - break; - default: - BUG(); /* STE corruption */ - } - } - /* Nuke the existing STE_0 value, as we're going to rewrite it */ val = STRTAB_STE_0_V; @@ -1322,16 +1480,11 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, else val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS); - dst->data[0] = cpu_to_le64(val); - dst->data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, + target.data[0] = cpu_to_le64(val); + target.data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING)); - dst->data[2] = 0; /* Nuke the VMID */ - /* - * The SMMU can perform negative caching, so we must sync - * the STE regardless of whether the old value was live. - */ - if (smmu) - arm_smmu_sync_ste_for_sid(smmu, sid); + target.data[2] = 0; /* Nuke the VMID */ + arm_smmu_write_ste(master, sid, dst, &target); return; } @@ -1339,8 +1492,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ? STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1; - BUG_ON(ste_live); - dst->data[1] = cpu_to_le64( + target.data[1] = cpu_to_le64( FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) | FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) | FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) | @@ -1349,7 +1501,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, if (smmu->features & ARM_SMMU_FEAT_STALLS && !master->stall_enabled) - dst->data[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); + target.data[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) | FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) | @@ -1358,8 +1510,9 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, } if (s2_cfg) { - BUG_ON(ste_live); - dst->data[2] = cpu_to_le64( + target.data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, + STRTAB_STE_1_SHCFG_INCOMING)); + target.data[2] = cpu_to_le64( FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) | FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) | #ifdef __BIG_ENDIAN @@ -1368,23 +1521,17 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2R); - dst->data[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK); + target.data[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK); val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS); } if (master->ats_enabled) - dst->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS, + target.data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_TRANS)); - arm_smmu_sync_ste_for_sid(smmu, sid); - /* See comment in arm_smmu_write_ctx_desc() */ - WRITE_ONCE(dst->data[0], cpu_to_le64(val)); - arm_smmu_sync_ste_for_sid(smmu, sid); - - /* It's likely that we'll want to use the new STE soon */ - if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) - arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd); + target.data[0] = cpu_to_le64(val); + arm_smmu_write_ste(master, sid, dst, &target); } static void arm_smmu_init_bypass_stes(struct arm_smmu_ste *strtab, -- cgit v1.2.3 From 7686aa5f8d61388eeaa64730363ffc8df20a481d Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 26 Feb 2024 13:07:13 -0400 Subject: iommu/arm-smmu-v3: Consolidate the STE generation for abort/bypass This allows writing the flow of arm_smmu_write_strtab_ent() around abort and bypass domains more naturally. Note that the core code no longer supplies NULL domains, though there is still a flow in the driver that end up in arm_smmu_write_strtab_ent() with NULL. A later patch will remove it. Remove the duplicate calculation of the STE in arm_smmu_init_bypass_stes() and remove the force parameter. arm_smmu_rmr_install_bypass_ste() can now simply invoke arm_smmu_make_bypass_ste() directly. Rename arm_smmu_init_bypass_stes() to arm_smmu_init_initial_stes() to better reflect its purpose. Reviewed-by: Michael Shavit Reviewed-by: Nicolin Chen Reviewed-by: Mostafa Saleh Tested-by: Shameer Kolothum Tested-by: Nicolin Chen Tested-by: Moritz Fischer Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/2-v6-96275f25c39d+2d4-smmuv3_newapi_p1_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 97 ++++++++++++++++------------- 1 file changed, 55 insertions(+), 42 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 9805d989dafd..12ba1b97d696 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1447,6 +1447,24 @@ static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid) arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); } +static void arm_smmu_make_abort_ste(struct arm_smmu_ste *target) +{ + memset(target, 0, sizeof(*target)); + target->data[0] = cpu_to_le64( + STRTAB_STE_0_V | + FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT)); +} + +static void arm_smmu_make_bypass_ste(struct arm_smmu_ste *target) +{ + memset(target, 0, sizeof(*target)); + target->data[0] = cpu_to_le64( + STRTAB_STE_0_V | + FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS)); + target->data[1] = cpu_to_le64( + FIELD_PREP(STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING)); +} + static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, struct arm_smmu_ste *dst) { @@ -1457,37 +1475,31 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, struct arm_smmu_domain *smmu_domain = master->domain; struct arm_smmu_ste target = {}; - if (smmu_domain) { - switch (smmu_domain->stage) { - case ARM_SMMU_DOMAIN_S1: - cd_table = &master->cd_table; - break; - case ARM_SMMU_DOMAIN_S2: - s2_cfg = &smmu_domain->s2_cfg; - break; - default: - break; - } - } - - /* Nuke the existing STE_0 value, as we're going to rewrite it */ - val = STRTAB_STE_0_V; - - /* Bypass/fault */ - if (!smmu_domain || !(cd_table || s2_cfg)) { - if (!smmu_domain && disable_bypass) - val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT); + if (!smmu_domain) { + if (disable_bypass) + arm_smmu_make_abort_ste(&target); else - val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS); + arm_smmu_make_bypass_ste(&target); + arm_smmu_write_ste(master, sid, dst, &target); + return; + } - target.data[0] = cpu_to_le64(val); - target.data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, - STRTAB_STE_1_SHCFG_INCOMING)); - target.data[2] = 0; /* Nuke the VMID */ + switch (smmu_domain->stage) { + case ARM_SMMU_DOMAIN_S1: + cd_table = &master->cd_table; + break; + case ARM_SMMU_DOMAIN_S2: + s2_cfg = &smmu_domain->s2_cfg; + break; + case ARM_SMMU_DOMAIN_BYPASS: + arm_smmu_make_bypass_ste(&target); arm_smmu_write_ste(master, sid, dst, &target); return; } + /* Nuke the existing STE_0 value, as we're going to rewrite it */ + val = STRTAB_STE_0_V; + if (cd_table) { u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ? STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1; @@ -1534,22 +1546,20 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, arm_smmu_write_ste(master, sid, dst, &target); } -static void arm_smmu_init_bypass_stes(struct arm_smmu_ste *strtab, - unsigned int nent, bool force) +/* + * This can safely directly manipulate the STE memory without a sync sequence + * because the STE table has not been installed in the SMMU yet. + */ +static void arm_smmu_init_initial_stes(struct arm_smmu_ste *strtab, + unsigned int nent) { unsigned int i; - u64 val = STRTAB_STE_0_V; - - if (disable_bypass && !force) - val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT); - else - val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS); for (i = 0; i < nent; ++i) { - strtab->data[0] = cpu_to_le64(val); - strtab->data[1] = cpu_to_le64(FIELD_PREP( - STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING)); - strtab->data[2] = 0; + if (disable_bypass) + arm_smmu_make_abort_ste(strtab); + else + arm_smmu_make_bypass_ste(strtab); strtab++; } } @@ -1577,7 +1587,7 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) return -ENOMEM; } - arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT, false); + arm_smmu_init_initial_stes(desc->l2ptr, 1 << STRTAB_SPLIT); arm_smmu_write_strtab_l1_desc(strtab, desc); return 0; } @@ -3196,7 +3206,7 @@ static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu) reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits); cfg->strtab_base_cfg = reg; - arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents, false); + arm_smmu_init_initial_stes(strtab, cfg->num_l1_ents); return 0; } @@ -3907,7 +3917,6 @@ static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu) iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list); list_for_each_entry(e, &rmr_list, list) { - struct arm_smmu_ste *step; struct iommu_iort_rmr_data *rmr; int ret, i; @@ -3920,8 +3929,12 @@ static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu) continue; } - step = arm_smmu_get_step_for_sid(smmu, rmr->sids[i]); - arm_smmu_init_bypass_stes(step, 1, true); + /* + * STE table is not programmed to HW, see + * arm_smmu_initial_bypass_stes() + */ + arm_smmu_make_bypass_ste( + arm_smmu_get_step_for_sid(smmu, rmr->sids[i])); } } -- cgit v1.2.3 From efe15df08727d483bd247ff905a828f0de955de6 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 26 Feb 2024 13:07:14 -0400 Subject: iommu/arm-smmu-v3: Move the STE generation for S1 and S2 domains into functions This is preparation to move the STE calculation higher up in to the call chain and remove arm_smmu_write_strtab_ent(). These new functions will be called directly from attach_dev. Reviewed-by: Moritz Fischer Reviewed-by: Michael Shavit Reviewed-by: Nicolin Chen Reviewed-by: Mostafa Saleh Tested-by: Shameer Kolothum Tested-by: Nicolin Chen Tested-by: Moritz Fischer Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/3-v6-96275f25c39d+2d4-smmuv3_newapi_p1_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 138 +++++++++++++++++----------- 1 file changed, 83 insertions(+), 55 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 12ba1b97d696..e34c31819669 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1465,13 +1465,89 @@ static void arm_smmu_make_bypass_ste(struct arm_smmu_ste *target) FIELD_PREP(STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING)); } +static void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target, + struct arm_smmu_master *master) +{ + struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; + struct arm_smmu_device *smmu = master->smmu; + + memset(target, 0, sizeof(*target)); + target->data[0] = cpu_to_le64( + STRTAB_STE_0_V | + FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) | + FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt) | + (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) | + FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax)); + + target->data[1] = cpu_to_le64( + FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) | + FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) | + FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) | + FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) | + ((smmu->features & ARM_SMMU_FEAT_STALLS && + !master->stall_enabled) ? + STRTAB_STE_1_S1STALLD : + 0) | + FIELD_PREP(STRTAB_STE_1_EATS, + master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0)); + + if (smmu->features & ARM_SMMU_FEAT_E2H) { + /* + * To support BTM the streamworld needs to match the + * configuration of the CPU so that the ASID broadcasts are + * properly matched. This means either S/NS-EL2-E2H (hypervisor) + * or NS-EL1 (guest). Since an SVA domain can be installed in a + * PASID this should always use a BTM compatible configuration + * if the HW supports it. + */ + target->data[1] |= cpu_to_le64( + FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_EL2)); + } else { + target->data[1] |= cpu_to_le64( + FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1)); + + /* + * VMID 0 is reserved for stage-2 bypass EL1 STEs, see + * arm_smmu_domain_alloc_id() + */ + target->data[2] = + cpu_to_le64(FIELD_PREP(STRTAB_STE_2_S2VMID, 0)); + } +} + +static void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, + struct arm_smmu_master *master, + struct arm_smmu_domain *smmu_domain) +{ + struct arm_smmu_s2_cfg *s2_cfg = &smmu_domain->s2_cfg; + + memset(target, 0, sizeof(*target)); + target->data[0] = cpu_to_le64( + STRTAB_STE_0_V | + FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS)); + + target->data[1] = cpu_to_le64( + FIELD_PREP(STRTAB_STE_1_EATS, + master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0) | + FIELD_PREP(STRTAB_STE_1_SHCFG, + STRTAB_STE_1_SHCFG_INCOMING)); + + target->data[2] = cpu_to_le64( + FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) | + FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) | + STRTAB_STE_2_S2AA64 | +#ifdef __BIG_ENDIAN + STRTAB_STE_2_S2ENDI | +#endif + STRTAB_STE_2_S2PTW | + STRTAB_STE_2_S2R); + + target->data[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK); +} + static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, struct arm_smmu_ste *dst) { - u64 val; - struct arm_smmu_device *smmu = master->smmu; - struct arm_smmu_ctx_desc_cfg *cd_table = NULL; - struct arm_smmu_s2_cfg *s2_cfg = NULL; struct arm_smmu_domain *smmu_domain = master->domain; struct arm_smmu_ste target = {}; @@ -1486,63 +1562,15 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, switch (smmu_domain->stage) { case ARM_SMMU_DOMAIN_S1: - cd_table = &master->cd_table; + arm_smmu_make_cdtable_ste(&target, master); break; case ARM_SMMU_DOMAIN_S2: - s2_cfg = &smmu_domain->s2_cfg; + arm_smmu_make_s2_domain_ste(&target, master, smmu_domain); break; case ARM_SMMU_DOMAIN_BYPASS: arm_smmu_make_bypass_ste(&target); - arm_smmu_write_ste(master, sid, dst, &target); - return; - } - - /* Nuke the existing STE_0 value, as we're going to rewrite it */ - val = STRTAB_STE_0_V; - - if (cd_table) { - u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ? - STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1; - - target.data[1] = cpu_to_le64( - FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) | - FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) | - FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) | - FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) | - FIELD_PREP(STRTAB_STE_1_STRW, strw)); - - if (smmu->features & ARM_SMMU_FEAT_STALLS && - !master->stall_enabled) - target.data[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); - - val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) | - FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) | - FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax) | - FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt); - } - - if (s2_cfg) { - target.data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, - STRTAB_STE_1_SHCFG_INCOMING)); - target.data[2] = cpu_to_le64( - FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) | - FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) | -#ifdef __BIG_ENDIAN - STRTAB_STE_2_S2ENDI | -#endif - STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 | - STRTAB_STE_2_S2R); - - target.data[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK); - - val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS); + break; } - - if (master->ats_enabled) - target.data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS, - STRTAB_STE_1_EATS_TRANS)); - - target.data[0] = cpu_to_le64(val); arm_smmu_write_ste(master, sid, dst, &target); } -- cgit v1.2.3 From 71b0aa10b18dd589a899449457e5ab7c1da00d01 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 26 Feb 2024 13:07:15 -0400 Subject: iommu/arm-smmu-v3: Build the whole STE in arm_smmu_make_s2_domain_ste() Half the code was living in arm_smmu_domain_finalise_s2(), just move it here and take the values directly from the pgtbl_ops instead of storing copies. Reviewed-by: Michael Shavit Reviewed-by: Nicolin Chen Reviewed-by: Mostafa Saleh Tested-by: Shameer Kolothum Tested-by: Nicolin Chen Tested-by: Moritz Fischer Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/4-v6-96275f25c39d+2d4-smmuv3_newapi_p1_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 27 +++++++++++++++------------ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 2 -- 2 files changed, 15 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index e34c31819669..b81e621a8e59 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1520,6 +1520,11 @@ static void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, struct arm_smmu_domain *smmu_domain) { struct arm_smmu_s2_cfg *s2_cfg = &smmu_domain->s2_cfg; + const struct io_pgtable_cfg *pgtbl_cfg = + &io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops)->cfg; + typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr = + &pgtbl_cfg->arm_lpae_s2_cfg.vtcr; + u64 vtcr_val; memset(target, 0, sizeof(*target)); target->data[0] = cpu_to_le64( @@ -1532,9 +1537,16 @@ static void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, FIELD_PREP(STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING)); + vtcr_val = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) | + FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) | + FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) | + FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) | + FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) | + FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) | + FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps); target->data[2] = cpu_to_le64( FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) | - FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) | + FIELD_PREP(STRTAB_STE_2_VTCR, vtcr_val) | STRTAB_STE_2_S2AA64 | #ifdef __BIG_ENDIAN STRTAB_STE_2_S2ENDI | @@ -1542,7 +1554,8 @@ static void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2R); - target->data[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK); + target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s2_cfg.vttbr & + STRTAB_STE_3_S2TTB_MASK); } static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, @@ -2302,7 +2315,6 @@ static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain, int vmid; struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; - typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr; /* Reserve VMID 0 for stage-2 bypass STEs */ vmid = ida_alloc_range(&smmu->vmid_map, 1, (1 << smmu->vmid_bits) - 1, @@ -2310,16 +2322,7 @@ static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain, if (vmid < 0) return vmid; - vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr; cfg->vmid = (u16)vmid; - cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; - cfg->vtcr = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) | - FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) | - FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) | - FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) | - FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) | - FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) | - FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps); return 0; } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 65fb388d5173..eb669121f195 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -609,8 +609,6 @@ struct arm_smmu_ctx_desc_cfg { struct arm_smmu_s2_cfg { u16 vmid; - u64 vttbr; - u64 vtcr; }; struct arm_smmu_strtab_cfg { -- cgit v1.2.3 From 9f7c68911579bc15c57d227d021ccd253da2b635 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 26 Feb 2024 13:07:16 -0400 Subject: iommu/arm-smmu-v3: Hold arm_smmu_asid_lock during all of attach_dev The BTM support wants to be able to change the ASID of any smmu_domain. When it goes to do this it holds the arm_smmu_asid_lock and iterates over the target domain's devices list. During attach of a S1 domain we must ensure that the devices list and CD are in sync, otherwise we could miss CD updates or a parallel CD update could push an out of date CD. This is pretty complicated, and almost works today because arm_smmu_detach_dev() removes the master from the linked list before working on the CD entries, preventing parallel update of the CD. However, it does have an issue where the CD can remain programed while the domain appears to be unattached. arm_smmu_share_asid() will then not clear any CD entriess and install its own CD entry with the same ASID concurrently. This creates a small race window where the IOMMU can see two ASIDs pointing to different translations. CPU0 CPU1 arm_smmu_attach_dev() arm_smmu_detach_dev() spin_lock_irqsave(&smmu_domain->devices_lock, flags); list_del(&master->domain_head); spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); arm_smmu_mmu_notifier_get() arm_smmu_alloc_shared_cd() arm_smmu_share_asid(): // Does nothing due to list_del above arm_smmu_update_ctx_desc_devices() arm_smmu_tlb_inv_asid() arm_smmu_write_ctx_desc() ** Now the ASID is in two CDs with different translation arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, NULL); Solve this by wrapping most of the attach flow in the arm_smmu_asid_lock. This locks more than strictly needed to prepare for the next patch which will reorganize the order of the linked list, STE and CD changes. Move arm_smmu_detach_dev() till after we have initialized the domain so the lock can be held for less time. Reviewed-by: Michael Shavit Reviewed-by: Nicolin Chen Reviewed-by: Mostafa Saleh Tested-by: Shameer Kolothum Tested-by: Nicolin Chen Tested-by: Moritz Fischer Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/5-v6-96275f25c39d+2d4-smmuv3_newapi_p1_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index b81e621a8e59..d2fc609fab60 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2586,8 +2586,6 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) return -EBUSY; } - arm_smmu_detach_dev(master); - mutex_lock(&smmu_domain->init_mutex); if (!smmu_domain->smmu) { @@ -2602,6 +2600,16 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) if (ret) return ret; + /* + * Prevent arm_smmu_share_asid() from trying to change the ASID + * of either the old or new domain while we are working on it. + * This allows the STE and the smmu_domain->devices list to + * be inconsistent during this routine. + */ + mutex_lock(&arm_smmu_asid_lock); + + arm_smmu_detach_dev(master); + master->domain = smmu_domain; /* @@ -2627,13 +2635,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) } } - /* - * Prevent SVA from concurrently modifying the CD or writing to - * the CD entry - */ - mutex_lock(&arm_smmu_asid_lock); ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, &smmu_domain->cd); - mutex_unlock(&arm_smmu_asid_lock); if (ret) { master->domain = NULL; goto out_list_del; @@ -2643,13 +2645,15 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) arm_smmu_install_ste_for_dev(master); arm_smmu_enable_ats(master); - return 0; + goto out_unlock; out_list_del: spin_lock_irqsave(&smmu_domain->devices_lock, flags); list_del(&master->domain_head); spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); +out_unlock: + mutex_unlock(&arm_smmu_asid_lock); return ret; } -- cgit v1.2.3 From 65547275d76965c3106fbcd4a9244242eb88224c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 26 Feb 2024 13:07:17 -0400 Subject: iommu/arm-smmu-v3: Compute the STE only once for each master Currently arm_smmu_install_ste_for_dev() iterates over every SID and computes from scratch an identical STE. Every SID should have the same STE contents. Turn this inside out so that the STE is supplied by the caller and arm_smmu_install_ste_for_dev() simply installs it to every SID. This is possible now that the STE generation does not inform what sequence should be used to program it. This allows splitting the STE calculation up according to the call site, which following patches will make use of, and removes the confusing NULL domain special case that only supported arm_smmu_detach_dev(). Reviewed-by: Michael Shavit Reviewed-by: Nicolin Chen Reviewed-by: Mostafa Saleh Tested-by: Shameer Kolothum Tested-by: Nicolin Chen Tested-by: Moritz Fischer Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/6-v6-96275f25c39d+2d4-smmuv3_newapi_p1_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 57 +++++++++++------------------ 1 file changed, 22 insertions(+), 35 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index d2fc609fab60..6cdf075e9a7e 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1558,35 +1558,6 @@ static void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, STRTAB_STE_3_S2TTB_MASK); } -static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, - struct arm_smmu_ste *dst) -{ - struct arm_smmu_domain *smmu_domain = master->domain; - struct arm_smmu_ste target = {}; - - if (!smmu_domain) { - if (disable_bypass) - arm_smmu_make_abort_ste(&target); - else - arm_smmu_make_bypass_ste(&target); - arm_smmu_write_ste(master, sid, dst, &target); - return; - } - - switch (smmu_domain->stage) { - case ARM_SMMU_DOMAIN_S1: - arm_smmu_make_cdtable_ste(&target, master); - break; - case ARM_SMMU_DOMAIN_S2: - arm_smmu_make_s2_domain_ste(&target, master, smmu_domain); - break; - case ARM_SMMU_DOMAIN_BYPASS: - arm_smmu_make_bypass_ste(&target); - break; - } - arm_smmu_write_ste(master, sid, dst, &target); -} - /* * This can safely directly manipulate the STE memory without a sync sequence * because the STE table has not been installed in the SMMU yet. @@ -2413,7 +2384,8 @@ arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid) } } -static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master) +static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master, + const struct arm_smmu_ste *target) { int i, j; struct arm_smmu_device *smmu = master->smmu; @@ -2430,7 +2402,7 @@ static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master) if (j < i) continue; - arm_smmu_write_strtab_ent(master, sid, step); + arm_smmu_write_ste(master, sid, step, target); } } @@ -2537,6 +2509,7 @@ static void arm_smmu_disable_pasid(struct arm_smmu_master *master) static void arm_smmu_detach_dev(struct arm_smmu_master *master) { unsigned long flags; + struct arm_smmu_ste target; struct arm_smmu_domain *smmu_domain = master->domain; if (!smmu_domain) @@ -2550,7 +2523,11 @@ static void arm_smmu_detach_dev(struct arm_smmu_master *master) master->domain = NULL; master->ats_enabled = false; - arm_smmu_install_ste_for_dev(master); + if (disable_bypass) + arm_smmu_make_abort_ste(&target); + else + arm_smmu_make_bypass_ste(&target); + arm_smmu_install_ste_for_dev(master, &target); /* * Clearing the CD entry isn't strictly required to detach the domain * since the table is uninstalled anyway, but it helps avoid confusion @@ -2565,6 +2542,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) { int ret = 0; unsigned long flags; + struct arm_smmu_ste target; struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct arm_smmu_device *smmu; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); @@ -2626,7 +2604,8 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) list_add(&master->domain_head, &smmu_domain->devices); spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); - if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { + switch (smmu_domain->stage) { + case ARM_SMMU_DOMAIN_S1: if (!master->cd_table.cdtab) { ret = arm_smmu_alloc_cd_tables(master); if (ret) { @@ -2640,9 +2619,17 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) master->domain = NULL; goto out_list_del; } - } - arm_smmu_install_ste_for_dev(master); + arm_smmu_make_cdtable_ste(&target, master); + break; + case ARM_SMMU_DOMAIN_S2: + arm_smmu_make_s2_domain_ste(&target, master, smmu_domain); + break; + case ARM_SMMU_DOMAIN_BYPASS: + arm_smmu_make_bypass_ste(&target); + break; + } + arm_smmu_install_ste_for_dev(master, &target); arm_smmu_enable_ats(master); goto out_unlock; -- cgit v1.2.3 From 8c73c32c83ce7f3c31864cb044abd3daefacd996 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 26 Feb 2024 13:07:18 -0400 Subject: iommu/arm-smmu-v3: Do not change the STE twice during arm_smmu_attach_dev() This was needed because the STE code required the STE to be in ABORT/BYPASS inorder to program a cdtable or S2 STE. Now that the STE code can automatically handle all transitions we can remove this step from the attach_dev flow. A few small bugs exist because of this: 1) If the core code does BLOCKED -> UNMANAGED with disable_bypass=false then there will be a moment where the STE points at BYPASS. Since this can be done by VFIO/IOMMUFD it is a small security race. 2) If the core code does IDENTITY -> DMA then any IOMMU_RESV_DIRECT regions will temporarily become BLOCKED. We'd like drivers to work in a way that allows IOMMU_RESV_DIRECT to be continuously functional during these transitions. Make arm_smmu_release_device() put the STE back to the correct ABORT/BYPASS setting. Fix a bug where a IOMMU_RESV_DIRECT was ignored on this path. As noted before the reordering of the linked list/STE/CD changes is OK against concurrent arm_smmu_share_asid() because of the arm_smmu_asid_lock. Tested-by: Shameer Kolothum Tested-by: Nicolin Chen Tested-by: Moritz Fischer Reviewed-by: Nicolin Chen Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/7-v6-96275f25c39d+2d4-smmuv3_newapi_p1_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 6cdf075e9a7e..597a8c5f9658 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2509,7 +2509,6 @@ static void arm_smmu_disable_pasid(struct arm_smmu_master *master) static void arm_smmu_detach_dev(struct arm_smmu_master *master) { unsigned long flags; - struct arm_smmu_ste target; struct arm_smmu_domain *smmu_domain = master->domain; if (!smmu_domain) @@ -2523,11 +2522,6 @@ static void arm_smmu_detach_dev(struct arm_smmu_master *master) master->domain = NULL; master->ats_enabled = false; - if (disable_bypass) - arm_smmu_make_abort_ste(&target); - else - arm_smmu_make_bypass_ste(&target); - arm_smmu_install_ste_for_dev(master, &target); /* * Clearing the CD entry isn't strictly required to detach the domain * since the table is uninstalled anyway, but it helps avoid confusion @@ -2875,9 +2869,18 @@ err_free_master: static void arm_smmu_release_device(struct device *dev) { struct arm_smmu_master *master = dev_iommu_priv_get(dev); + struct arm_smmu_ste target; if (WARN_ON(arm_smmu_master_sva_enabled(master))) iopf_queue_remove_device(master->smmu->evtq.iopf, dev); + + /* Put the STE back to what arm_smmu_init_strtab() sets */ + if (disable_bypass && !dev->iommu->require_direct) + arm_smmu_make_abort_ste(&target); + else + arm_smmu_make_bypass_ste(&target); + arm_smmu_install_ste_for_dev(master, &target); + arm_smmu_detach_dev(master); arm_smmu_disable_pasid(master); arm_smmu_remove_master(master); -- cgit v1.2.3 From d2e053d73247b68144c7f44d002ebf56acaf2d48 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 26 Feb 2024 13:07:19 -0400 Subject: iommu/arm-smmu-v3: Put writing the context descriptor in the right order Get closer to the IOMMU API ideal that changes between domains can be hitless. The ordering for the CD table entry is not entirely clean from this perspective. When switching away from a STE with a CD table programmed in it we should write the new STE first, then clear any old data in the CD entry. If we are programming a CD table for the first time to a STE then the CD entry should be programmed before the STE is loaded. If we are replacing a CD table entry when the STE already points at the CD entry then we just need to do the make/break sequence. Lift this code out of arm_smmu_detach_dev() so it can all be sequenced properly. The only other caller is arm_smmu_release_device() and it is going to free the cdtable anyhow, so it doesn't matter what is in it. Reviewed-by: Michael Shavit Reviewed-by: Nicolin Chen Reviewed-by: Mostafa Saleh Tested-by: Shameer Kolothum Tested-by: Nicolin Chen Tested-by: Moritz Fischer Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/8-v6-96275f25c39d+2d4-smmuv3_newapi_p1_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 597a8c5f9658..ec05743ee208 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2522,14 +2522,6 @@ static void arm_smmu_detach_dev(struct arm_smmu_master *master) master->domain = NULL; master->ats_enabled = false; - /* - * Clearing the CD entry isn't strictly required to detach the domain - * since the table is uninstalled anyway, but it helps avoid confusion - * in the call to arm_smmu_write_ctx_desc on the next attach (which - * expects the entry to be empty). - */ - if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 && master->cd_table.cdtab) - arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, NULL); } static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) @@ -2606,6 +2598,17 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) master->domain = NULL; goto out_list_del; } + } else { + /* + * arm_smmu_write_ctx_desc() relies on the entry being + * invalid to work, clear any existing entry. + */ + ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, + NULL); + if (ret) { + master->domain = NULL; + goto out_list_del; + } } ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, &smmu_domain->cd); @@ -2615,15 +2618,23 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) } arm_smmu_make_cdtable_ste(&target, master); + arm_smmu_install_ste_for_dev(master, &target); break; case ARM_SMMU_DOMAIN_S2: arm_smmu_make_s2_domain_ste(&target, master, smmu_domain); + arm_smmu_install_ste_for_dev(master, &target); + if (master->cd_table.cdtab) + arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, + NULL); break; case ARM_SMMU_DOMAIN_BYPASS: arm_smmu_make_bypass_ste(&target); + arm_smmu_install_ste_for_dev(master, &target); + if (master->cd_table.cdtab) + arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, + NULL); break; } - arm_smmu_install_ste_for_dev(master, &target); arm_smmu_enable_ats(master); goto out_unlock; -- cgit v1.2.3 From d550ddc5b789f258cb5ce3bfe74af6d5383589b5 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 26 Feb 2024 13:07:20 -0400 Subject: iommu/arm-smmu-v3: Pass smmu_domain to arm_enable/disable_ats() The caller already has the domain, just pass it in. A following patch will remove master->domain. Tested-by: Shameer Kolothum Tested-by: Nicolin Chen Tested-by: Moritz Fischer Reviewed-by: Nicolin Chen Reviewed-by: Mostafa Saleh Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/9-v6-96275f25c39d+2d4-smmuv3_newapi_p1_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index ec05743ee208..9d36ddecf2ad 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2421,12 +2421,12 @@ static bool arm_smmu_ats_supported(struct arm_smmu_master *master) return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev)); } -static void arm_smmu_enable_ats(struct arm_smmu_master *master) +static void arm_smmu_enable_ats(struct arm_smmu_master *master, + struct arm_smmu_domain *smmu_domain) { size_t stu; struct pci_dev *pdev; struct arm_smmu_device *smmu = master->smmu; - struct arm_smmu_domain *smmu_domain = master->domain; /* Don't enable ATS at the endpoint if it's not enabled in the STE */ if (!master->ats_enabled) @@ -2442,10 +2442,9 @@ static void arm_smmu_enable_ats(struct arm_smmu_master *master) dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu); } -static void arm_smmu_disable_ats(struct arm_smmu_master *master) +static void arm_smmu_disable_ats(struct arm_smmu_master *master, + struct arm_smmu_domain *smmu_domain) { - struct arm_smmu_domain *smmu_domain = master->domain; - if (!master->ats_enabled) return; @@ -2514,7 +2513,7 @@ static void arm_smmu_detach_dev(struct arm_smmu_master *master) if (!smmu_domain) return; - arm_smmu_disable_ats(master); + arm_smmu_disable_ats(master, smmu_domain); spin_lock_irqsave(&smmu_domain->devices_lock, flags); list_del(&master->domain_head); @@ -2636,7 +2635,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) break; } - arm_smmu_enable_ats(master); + arm_smmu_enable_ats(master, smmu_domain); goto out_unlock; out_list_del: -- cgit v1.2.3 From 1b50017d39f650d78a0066734d6fe05920a8c9e8 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 26 Feb 2024 13:07:21 -0400 Subject: iommu/arm-smmu-v3: Remove arm_smmu_master->domain Introducing global statics which are of type struct iommu_domain, not struct arm_smmu_domain makes it difficult to retain arm_smmu_master->domain, as it can no longer point to an IDENTITY or BLOCKED domain. The only place that uses the value is arm_smmu_detach_dev(). Change things to work like other drivers and call iommu_get_domain_for_dev() to obtain the current domain. The master->domain is subtly protecting the master->domain_head against being unused as only PAGING domains will set master->domain and only paging domains use the master->domain_head. To make it simple keep the master->domain_head initialized so that the list_del() logic just does nothing for attached non-PAGING domains. Tested-by: Shameer Kolothum Tested-by: Nicolin Chen Tested-by: Moritz Fischer Reviewed-by: Nicolin Chen Reviewed-by: Mostafa Saleh Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/10-v6-96275f25c39d+2d4-smmuv3_newapi_p1_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 26 ++++++++++---------------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 1 - 2 files changed, 10 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 9d36ddecf2ad..19a7f0468149 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2507,19 +2507,20 @@ static void arm_smmu_disable_pasid(struct arm_smmu_master *master) static void arm_smmu_detach_dev(struct arm_smmu_master *master) { + struct iommu_domain *domain = iommu_get_domain_for_dev(master->dev); + struct arm_smmu_domain *smmu_domain; unsigned long flags; - struct arm_smmu_domain *smmu_domain = master->domain; - if (!smmu_domain) + if (!domain) return; + smmu_domain = to_smmu_domain(domain); arm_smmu_disable_ats(master, smmu_domain); spin_lock_irqsave(&smmu_domain->devices_lock, flags); - list_del(&master->domain_head); + list_del_init(&master->domain_head); spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); - master->domain = NULL; master->ats_enabled = false; } @@ -2573,8 +2574,6 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) arm_smmu_detach_dev(master); - master->domain = smmu_domain; - /* * The SMMU does not support enabling ATS with bypass. When the STE is * in bypass (STE.Config[2:0] == 0b100), ATS Translation Requests and @@ -2593,10 +2592,8 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) case ARM_SMMU_DOMAIN_S1: if (!master->cd_table.cdtab) { ret = arm_smmu_alloc_cd_tables(master); - if (ret) { - master->domain = NULL; + if (ret) goto out_list_del; - } } else { /* * arm_smmu_write_ctx_desc() relies on the entry being @@ -2604,17 +2601,13 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) */ ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, NULL); - if (ret) { - master->domain = NULL; + if (ret) goto out_list_del; - } } ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, &smmu_domain->cd); - if (ret) { - master->domain = NULL; + if (ret) goto out_list_del; - } arm_smmu_make_cdtable_ste(&target, master); arm_smmu_install_ste_for_dev(master, &target); @@ -2640,7 +2633,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) out_list_del: spin_lock_irqsave(&smmu_domain->devices_lock, flags); - list_del(&master->domain_head); + list_del_init(&master->domain_head); spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); out_unlock: @@ -2841,6 +2834,7 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev) master->dev = dev; master->smmu = smmu; INIT_LIST_HEAD(&master->bonds); + INIT_LIST_HEAD(&master->domain_head); dev_iommu_priv_set(dev, master); ret = arm_smmu_insert_master(smmu, master); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index eb669121f195..6b63ea7dae72 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -695,7 +695,6 @@ struct arm_smmu_stream { struct arm_smmu_master { struct arm_smmu_device *smmu; struct device *dev; - struct arm_smmu_domain *domain; struct list_head domain_head; struct arm_smmu_stream *streams; /* Locked by the iommu core using the group mutex */ -- cgit v1.2.3 From ae91f6552c301e5e8569667e9d5440d5f75a90c4 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 26 Feb 2024 13:07:22 -0400 Subject: iommu/arm-smmu-v3: Check that the RID domain is S1 in SVA The SVA code only works if the RID domain is a S1 domain and has already installed the cdtable. Originally the check for this was in arm_smmu_sva_bind() but when the op was removed the test didn't get copied over to the new arm_smmu_sva_set_dev_pasid(). Without the test wrong usage usually will hit a WARN_ON() in arm_smmu_write_ctx_desc() due to a missing ctx table. However, the next patches wil change things so that an IDENTITY domain is not a struct arm_smmu_domain and this will get into memory corruption if the struct is wrongly casted. Fail in arm_smmu_sva_set_dev_pasid() if the STE does not have a S1, which is a proxy for the STE having a pointer to the CD table. Write it in a way that will be compatible with the next patches. Fixes: 386fa64fd52b ("arm-smmu-v3/sva: Add SVA domain support") Reported-by: Shameerali Kolothum Thodi Closes: https://lore.kernel.org/linux-iommu/2a828e481416405fb3a4cceb9e075a59@huawei.com/ Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/11-v6-96275f25c39d+2d4-smmuv3_newapi_p1_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 4a27fbdb2d84..2610e82c0ecd 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -364,7 +364,13 @@ static int __arm_smmu_sva_bind(struct device *dev, ioasid_t pasid, struct arm_smmu_bond *bond; struct arm_smmu_master *master = dev_iommu_priv_get(dev); struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct arm_smmu_domain *smmu_domain; + + if (!(domain->type & __IOMMU_DOMAIN_PAGING)) + return -ENODEV; + smmu_domain = to_smmu_domain(domain); + if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1) + return -ENODEV; if (!master || !master->sva_enabled) return -ENODEV; -- cgit v1.2.3 From 12dacfb5b938cdd90ced0109165eee9cb27061d9 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 26 Feb 2024 13:07:23 -0400 Subject: iommu/arm-smmu-v3: Add a global static IDENTITY domain Move to the new static global for identity domains. Move all the logic out of arm_smmu_attach_dev into an identity only function. Reviewed-by: Michael Shavit Reviewed-by: Nicolin Chen Tested-by: Shameer Kolothum Tested-by: Nicolin Chen Tested-by: Moritz Fischer Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/12-v6-96275f25c39d+2d4-smmuv3_newapi_p1_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 82 ++++++++++++++++++++--------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 1 - 2 files changed, 58 insertions(+), 25 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 19a7f0468149..842ff8a95baa 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2200,8 +2200,7 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) return arm_smmu_sva_domain_alloc(); if (type != IOMMU_DOMAIN_UNMANAGED && - type != IOMMU_DOMAIN_DMA && - type != IOMMU_DOMAIN_IDENTITY) + type != IOMMU_DOMAIN_DMA) return NULL; /* @@ -2309,11 +2308,6 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_device *smmu = smmu_domain->smmu; - if (domain->type == IOMMU_DOMAIN_IDENTITY) { - smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS; - return 0; - } - /* Restrict the stage to what we can actually support */ if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1)) smmu_domain->stage = ARM_SMMU_DOMAIN_S2; @@ -2511,7 +2505,7 @@ static void arm_smmu_detach_dev(struct arm_smmu_master *master) struct arm_smmu_domain *smmu_domain; unsigned long flags; - if (!domain) + if (!domain || !(domain->type & __IOMMU_DOMAIN_PAGING)) return; smmu_domain = to_smmu_domain(domain); @@ -2574,15 +2568,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) arm_smmu_detach_dev(master); - /* - * The SMMU does not support enabling ATS with bypass. When the STE is - * in bypass (STE.Config[2:0] == 0b100), ATS Translation Requests and - * Translated transactions are denied as though ATS is disabled for the - * stream (STE.EATS == 0b00), causing F_BAD_ATS_TREQ and - * F_TRANSL_FORBIDDEN events (IHI0070Ea 5.2 Stream Table Entry). - */ - if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS) - master->ats_enabled = arm_smmu_ats_supported(master); + master->ats_enabled = arm_smmu_ats_supported(master); spin_lock_irqsave(&smmu_domain->devices_lock, flags); list_add(&master->domain_head, &smmu_domain->devices); @@ -2619,13 +2605,6 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, NULL); break; - case ARM_SMMU_DOMAIN_BYPASS: - arm_smmu_make_bypass_ste(&target); - arm_smmu_install_ste_for_dev(master, &target); - if (master->cd_table.cdtab) - arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, - NULL); - break; } arm_smmu_enable_ats(master, smmu_domain); @@ -2641,6 +2620,60 @@ out_unlock: return ret; } +static int arm_smmu_attach_dev_ste(struct device *dev, + struct arm_smmu_ste *ste) +{ + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + + if (arm_smmu_master_sva_enabled(master)) + return -EBUSY; + + /* + * Do not allow any ASID to be changed while are working on the STE, + * otherwise we could miss invalidations. + */ + mutex_lock(&arm_smmu_asid_lock); + + /* + * The SMMU does not support enabling ATS with bypass/abort. When the + * STE is in bypass (STE.Config[2:0] == 0b100), ATS Translation Requests + * and Translated transactions are denied as though ATS is disabled for + * the stream (STE.EATS == 0b00), causing F_BAD_ATS_TREQ and + * F_TRANSL_FORBIDDEN events (IHI0070Ea 5.2 Stream Table Entry). + */ + arm_smmu_detach_dev(master); + + arm_smmu_install_ste_for_dev(master, ste); + mutex_unlock(&arm_smmu_asid_lock); + + /* + * This has to be done after removing the master from the + * arm_smmu_domain->devices to avoid races updating the same context + * descriptor from arm_smmu_share_asid(). + */ + if (master->cd_table.cdtab) + arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, NULL); + return 0; +} + +static int arm_smmu_attach_dev_identity(struct iommu_domain *domain, + struct device *dev) +{ + struct arm_smmu_ste ste; + + arm_smmu_make_bypass_ste(&ste); + return arm_smmu_attach_dev_ste(dev, &ste); +} + +static const struct iommu_domain_ops arm_smmu_identity_ops = { + .attach_dev = arm_smmu_attach_dev_identity, +}; + +static struct iommu_domain arm_smmu_identity_domain = { + .type = IOMMU_DOMAIN_IDENTITY, + .ops = &arm_smmu_identity_ops, +}; + static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped) @@ -3030,6 +3063,7 @@ static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid) } static struct iommu_ops arm_smmu_ops = { + .identity_domain = &arm_smmu_identity_domain, .capable = arm_smmu_capable, .domain_alloc = arm_smmu_domain_alloc, .probe_device = arm_smmu_probe_device, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 6b63ea7dae72..23baf117e7e4 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -712,7 +712,6 @@ struct arm_smmu_master { enum arm_smmu_domain_stage { ARM_SMMU_DOMAIN_S1 = 0, ARM_SMMU_DOMAIN_S2, - ARM_SMMU_DOMAIN_BYPASS, }; struct arm_smmu_domain { -- cgit v1.2.3 From 352bd64cd8288c5c6808735d52a75809dfef8635 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 26 Feb 2024 13:07:24 -0400 Subject: iommu/arm-smmu-v3: Add a global static BLOCKED domain Using the same design as the IDENTITY domain install an STRTAB_STE_0_CFG_ABORT STE. Reviewed-by: Michael Shavit Reviewed-by: Nicolin Chen Tested-by: Shameer Kolothum Tested-by: Nicolin Chen Tested-by: Moritz Fischer Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/13-v6-96275f25c39d+2d4-smmuv3_newapi_p1_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'drivers') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 842ff8a95baa..baec827e6ae4 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2674,6 +2674,24 @@ static struct iommu_domain arm_smmu_identity_domain = { .ops = &arm_smmu_identity_ops, }; +static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain, + struct device *dev) +{ + struct arm_smmu_ste ste; + + arm_smmu_make_abort_ste(&ste); + return arm_smmu_attach_dev_ste(dev, &ste); +} + +static const struct iommu_domain_ops arm_smmu_blocked_ops = { + .attach_dev = arm_smmu_attach_dev_blocked, +}; + +static struct iommu_domain arm_smmu_blocked_domain = { + .type = IOMMU_DOMAIN_BLOCKED, + .ops = &arm_smmu_blocked_ops, +}; + static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped) @@ -3064,6 +3082,7 @@ static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid) static struct iommu_ops arm_smmu_ops = { .identity_domain = &arm_smmu_identity_domain, + .blocked_domain = &arm_smmu_blocked_domain, .capable = arm_smmu_capable, .domain_alloc = arm_smmu_domain_alloc, .probe_device = arm_smmu_probe_device, -- cgit v1.2.3 From d36464f40f29c984984168ea89e08629bdba41df Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 26 Feb 2024 13:07:25 -0400 Subject: iommu/arm-smmu-v3: Use the identity/blocked domain during release Consolidate some more code by having release call arm_smmu_attach_dev_identity/blocked() instead of open coding this. Reviewed-by: Nicolin Chen Tested-by: Shameer Kolothum Tested-by: Nicolin Chen Tested-by: Moritz Fischer Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/14-v6-96275f25c39d+2d4-smmuv3_newapi_p1_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index baec827e6ae4..1303e9c603fc 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2924,19 +2924,16 @@ err_free_master: static void arm_smmu_release_device(struct device *dev) { struct arm_smmu_master *master = dev_iommu_priv_get(dev); - struct arm_smmu_ste target; if (WARN_ON(arm_smmu_master_sva_enabled(master))) iopf_queue_remove_device(master->smmu->evtq.iopf, dev); /* Put the STE back to what arm_smmu_init_strtab() sets */ if (disable_bypass && !dev->iommu->require_direct) - arm_smmu_make_abort_ste(&target); + arm_smmu_attach_dev_blocked(&arm_smmu_blocked_domain, dev); else - arm_smmu_make_bypass_ste(&target); - arm_smmu_install_ste_for_dev(master, &target); + arm_smmu_attach_dev_identity(&arm_smmu_identity_domain, dev); - arm_smmu_detach_dev(master); arm_smmu_disable_pasid(master); arm_smmu_remove_master(master); if (master->cd_table.cdtab) -- cgit v1.2.3 From d8cd200609cf6a404cda73794f0c8c4fd74c568c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 26 Feb 2024 13:07:26 -0400 Subject: iommu/arm-smmu-v3: Pass arm_smmu_domain and arm_smmu_device to finalize Instead of putting container_of() casts in the internals, use the proper type in this call chain. This makes it easier to check that the two global static domains are not leaking into call chains they should not. Passing the smmu avoids the only caller from having to set it and unset it in the error path. Reviewed-by: Michael Shavit Reviewed-by: Nicolin Chen Tested-by: Shameer Kolothum Tested-by: Nicolin Chen Tested-by: Moritz Fischer Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/15-v6-96275f25c39d+2d4-smmuv3_newapi_p1_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 35 +++++++++++++++-------------- 1 file changed, 18 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 1303e9c603fc..ebd8362c8aa3 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -89,6 +89,9 @@ static struct arm_smmu_option_prop arm_smmu_options[] = { { 0, NULL}, }; +static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, + struct arm_smmu_device *smmu); + static void parse_driver_options(struct arm_smmu_device *smmu) { int i = 0; @@ -2242,12 +2245,12 @@ static void arm_smmu_domain_free(struct iommu_domain *domain) kfree(smmu_domain); } -static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, +static int arm_smmu_domain_finalise_s1(struct arm_smmu_device *smmu, + struct arm_smmu_domain *smmu_domain, struct io_pgtable_cfg *pgtbl_cfg) { int ret; u32 asid; - struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_ctx_desc *cd = &smmu_domain->cd; typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr; @@ -2279,11 +2282,11 @@ out_unlock: return ret; } -static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain, +static int arm_smmu_domain_finalise_s2(struct arm_smmu_device *smmu, + struct arm_smmu_domain *smmu_domain, struct io_pgtable_cfg *pgtbl_cfg) { int vmid; - struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; /* Reserve VMID 0 for stage-2 bypass STEs */ @@ -2296,17 +2299,17 @@ static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain, return 0; } -static int arm_smmu_domain_finalise(struct iommu_domain *domain) +static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, + struct arm_smmu_device *smmu) { int ret; unsigned long ias, oas; enum io_pgtable_fmt fmt; struct io_pgtable_cfg pgtbl_cfg; struct io_pgtable_ops *pgtbl_ops; - int (*finalise_stage_fn)(struct arm_smmu_domain *, - struct io_pgtable_cfg *); - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - struct arm_smmu_device *smmu = smmu_domain->smmu; + int (*finalise_stage_fn)(struct arm_smmu_device *smmu, + struct arm_smmu_domain *smmu_domain, + struct io_pgtable_cfg *pgtbl_cfg); /* Restrict the stage to what we can actually support */ if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1)) @@ -2345,17 +2348,18 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) if (!pgtbl_ops) return -ENOMEM; - domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap; - domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1; - domain->geometry.force_aperture = true; + smmu_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap; + smmu_domain->domain.geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1; + smmu_domain->domain.geometry.force_aperture = true; - ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg); + ret = finalise_stage_fn(smmu, smmu_domain, &pgtbl_cfg); if (ret < 0) { free_io_pgtable_ops(pgtbl_ops); return ret; } smmu_domain->pgtbl_ops = pgtbl_ops; + smmu_domain->smmu = smmu; return 0; } @@ -2547,10 +2551,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) mutex_lock(&smmu_domain->init_mutex); if (!smmu_domain->smmu) { - smmu_domain->smmu = smmu; - ret = arm_smmu_domain_finalise(domain); - if (ret) - smmu_domain->smmu = NULL; + ret = arm_smmu_domain_finalise(smmu_domain, smmu); } else if (smmu_domain->smmu != smmu) ret = -EINVAL; -- cgit v1.2.3 From 327e10b47ae99f76ac53f0b8b73a0539f390d2d2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 26 Feb 2024 13:07:27 -0400 Subject: iommu/arm-smmu-v3: Convert to domain_alloc_paging() Now that the BLOCKED and IDENTITY behaviors are managed with their own domains change to the domain_alloc_paging() op. For now SVA remains using the old interface, eventually it will get its own op that can pass in the device and mm_struct which will let us have a sane lifetime for the mmu_notifier. Call arm_smmu_domain_finalise() early if dev is available. Tested-by: Shameer Kolothum Tested-by: Nicolin Chen Tested-by: Moritz Fischer Reviewed-by: Nicolin Chen Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/16-v6-96275f25c39d+2d4-smmuv3_newapi_p1_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index ebd8362c8aa3..b7938f17222b 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2197,14 +2197,15 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) { - struct arm_smmu_domain *smmu_domain; if (type == IOMMU_DOMAIN_SVA) return arm_smmu_sva_domain_alloc(); + return ERR_PTR(-EOPNOTSUPP); +} - if (type != IOMMU_DOMAIN_UNMANAGED && - type != IOMMU_DOMAIN_DMA) - return NULL; +static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev) +{ + struct arm_smmu_domain *smmu_domain; /* * Allocate the domain and initialise some of its data structures. @@ -2213,13 +2214,23 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) */ smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL); if (!smmu_domain) - return NULL; + return ERR_PTR(-ENOMEM); mutex_init(&smmu_domain->init_mutex); INIT_LIST_HEAD(&smmu_domain->devices); spin_lock_init(&smmu_domain->devices_lock); INIT_LIST_HEAD(&smmu_domain->mmu_notifiers); + if (dev) { + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + int ret; + + ret = arm_smmu_domain_finalise(smmu_domain, master->smmu); + if (ret) { + kfree(smmu_domain); + return ERR_PTR(ret); + } + } return &smmu_domain->domain; } @@ -3083,6 +3094,7 @@ static struct iommu_ops arm_smmu_ops = { .blocked_domain = &arm_smmu_blocked_domain, .capable = arm_smmu_capable, .domain_alloc = arm_smmu_domain_alloc, + .domain_alloc_paging = arm_smmu_domain_alloc_paging, .probe_device = arm_smmu_probe_device, .release_device = arm_smmu_release_device, .device_group = arm_smmu_device_group, -- cgit v1.2.3 From e70e9ecd7cb349d00736c594b8e9bada0a762238 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 16 Feb 2024 15:40:24 +0100 Subject: iommu: constify pointer to bus_type Make pointer to bus_type a pointer to const for code safety. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20240216144027.185959-1-krzysztof.kozlowski@linaro.org Signed-off-by: Joerg Roedel --- drivers/iommu/iommu-priv.h | 5 +++-- drivers/iommu/iommu.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/iommu-priv.h b/drivers/iommu/iommu-priv.h index 2024a2313348..5f731d994803 100644 --- a/drivers/iommu/iommu-priv.h +++ b/drivers/iommu/iommu-priv.h @@ -21,10 +21,11 @@ int iommu_group_replace_domain(struct iommu_group *group, struct iommu_domain *new_domain); int iommu_device_register_bus(struct iommu_device *iommu, - const struct iommu_ops *ops, struct bus_type *bus, + const struct iommu_ops *ops, + const struct bus_type *bus, struct notifier_block *nb); void iommu_device_unregister_bus(struct iommu_device *iommu, - struct bus_type *bus, + const struct bus_type *bus, struct notifier_block *nb); #endif /* __LINUX_IOMMU_PRIV_H */ diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 35dac3e4629a..fa329bcd60e6 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -289,7 +289,7 @@ EXPORT_SYMBOL_GPL(iommu_device_unregister); #if IS_ENABLED(CONFIG_IOMMUFD_TEST) void iommu_device_unregister_bus(struct iommu_device *iommu, - struct bus_type *bus, + const struct bus_type *bus, struct notifier_block *nb) { bus_unregister_notifier(bus, nb); @@ -303,7 +303,8 @@ EXPORT_SYMBOL_GPL(iommu_device_unregister_bus); * some memory to hold a notifier_block. */ int iommu_device_register_bus(struct iommu_device *iommu, - const struct iommu_ops *ops, struct bus_type *bus, + const struct iommu_ops *ops, + const struct bus_type *bus, struct notifier_block *nb) { int err; -- cgit v1.2.3 From b42a905b6aad40c092cf17f4b295a4c389bc7206 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 16 Feb 2024 15:40:25 +0100 Subject: iommu: constify of_phandle_args in xlate The xlate callbacks are supposed to translate of_phandle_args to proper provider without modifying the of_phandle_args. Make the argument pointer to const for code safety and readability. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240216144027.185959-2-krzysztof.kozlowski@linaro.org Signed-off-by: Joerg Roedel --- drivers/iommu/apple-dart.c | 3 ++- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 3 ++- drivers/iommu/arm/arm-smmu/arm-smmu.c | 3 ++- drivers/iommu/arm/arm-smmu/qcom_iommu.c | 3 ++- drivers/iommu/exynos-iommu.c | 2 +- drivers/iommu/iommu.c | 2 +- drivers/iommu/ipmmu-vmsa.c | 4 ++-- drivers/iommu/msm_iommu.c | 4 ++-- drivers/iommu/mtk_iommu.c | 3 ++- drivers/iommu/mtk_iommu_v1.c | 3 ++- drivers/iommu/rockchip-iommu.c | 2 +- drivers/iommu/sprd-iommu.c | 3 ++- drivers/iommu/sun50i-iommu.c | 2 +- drivers/iommu/tegra-smmu.c | 4 ++-- drivers/iommu/virtio-iommu.c | 3 ++- include/linux/iommu.h | 4 ++-- 16 files changed, 28 insertions(+), 20 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index ef3ee95706da..eb1e62cd499a 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -779,7 +779,8 @@ static void apple_dart_domain_free(struct iommu_domain *domain) kfree(dart_domain); } -static int apple_dart_of_xlate(struct device *dev, struct of_phandle_args *args) +static int apple_dart_of_xlate(struct device *dev, + const struct of_phandle_args *args) { struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev); struct platform_device *iommu_pdev = of_find_device_by_node(args->np); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 02580364acda..0234ddb39c22 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2704,7 +2704,8 @@ static int arm_smmu_enable_nesting(struct iommu_domain *domain) return ret; } -static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args) +static int arm_smmu_of_xlate(struct device *dev, + const struct of_phandle_args *args) { return iommu_fwspec_add_ids(dev, args->args, 1); } diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 68b6bc5e7c71..8e5e4ab5fad3 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -1551,7 +1551,8 @@ static int arm_smmu_set_pgtable_quirks(struct iommu_domain *domain, return ret; } -static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args) +static int arm_smmu_of_xlate(struct device *dev, + const struct of_phandle_args *args) { u32 mask, fwid = 0; diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c index 17a1c163fef6..e079bb7a993e 100644 --- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c +++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -546,7 +546,8 @@ static struct iommu_device *qcom_iommu_probe_device(struct device *dev) return &qcom_iommu->iommu; } -static int qcom_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) +static int qcom_iommu_of_xlate(struct device *dev, + const struct of_phandle_args *args) { struct qcom_iommu_dev *qcom_iommu; struct platform_device *iommu_pdev; diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 2c6e9094f1e9..d98c9161948a 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -1431,7 +1431,7 @@ static void exynos_iommu_release_device(struct device *dev) } static int exynos_iommu_of_xlate(struct device *dev, - struct of_phandle_args *spec) + const struct of_phandle_args *spec) { struct platform_device *sysmmu = of_find_device_by_node(spec->np); struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index fa329bcd60e6..b0a41f598dcf 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2825,7 +2825,7 @@ void iommu_fwspec_free(struct device *dev) } EXPORT_SYMBOL_GPL(iommu_fwspec_free); -int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids) +int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids) { struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); int i, new_num; diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index ace1fc4bd34b..cd7219319c8b 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -709,7 +709,7 @@ static phys_addr_t ipmmu_iova_to_phys(struct iommu_domain *io_domain, } static int ipmmu_init_platform_device(struct device *dev, - struct of_phandle_args *args) + const struct of_phandle_args *args) { struct platform_device *ipmmu_pdev; @@ -773,7 +773,7 @@ static bool ipmmu_device_is_allowed(struct device *dev) } static int ipmmu_of_xlate(struct device *dev, - struct of_phandle_args *spec) + const struct of_phandle_args *spec) { if (!ipmmu_device_is_allowed(dev)) return -ENODEV; diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index f86af9815d6f..989e0869d805 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -598,7 +598,7 @@ static void print_ctx_regs(void __iomem *base, int ctx) static int insert_iommu_master(struct device *dev, struct msm_iommu_dev **iommu, - struct of_phandle_args *spec) + const struct of_phandle_args *spec) { struct msm_iommu_ctx_dev *master = dev_iommu_priv_get(dev); int sid; @@ -626,7 +626,7 @@ static int insert_iommu_master(struct device *dev, } static int qcom_iommu_of_xlate(struct device *dev, - struct of_phandle_args *spec) + const struct of_phandle_args *spec) { struct msm_iommu_dev *iommu = NULL, *iter; unsigned long flags; diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 7abe9e85a570..955de8fb7732 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -957,7 +957,8 @@ static struct iommu_group *mtk_iommu_device_group(struct device *dev) return group; } -static int mtk_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) +static int mtk_iommu_of_xlate(struct device *dev, + const struct of_phandle_args *args) { struct platform_device *m4updev; diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 25b41222abae..ae16d8238d84 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -398,7 +398,8 @@ static const struct iommu_ops mtk_iommu_v1_ops; * MTK generation one iommu HW only support one iommu domain, and all the client * sharing the same iova address space. */ -static int mtk_iommu_v1_create_mapping(struct device *dev, struct of_phandle_args *args) +static int mtk_iommu_v1_create_mapping(struct device *dev, + const struct of_phandle_args *args) { struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct mtk_iommu_v1_data *data; diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 2685861c0a12..da79d9f4cf63 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -1140,7 +1140,7 @@ static void rk_iommu_release_device(struct device *dev) } static int rk_iommu_of_xlate(struct device *dev, - struct of_phandle_args *args) + const struct of_phandle_args *args) { struct platform_device *iommu_dev; struct rk_iommudata *data; diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c index 537359f10997..ba53571a8239 100644 --- a/drivers/iommu/sprd-iommu.c +++ b/drivers/iommu/sprd-iommu.c @@ -390,7 +390,8 @@ static struct iommu_device *sprd_iommu_probe_device(struct device *dev) return &sdev->iommu; } -static int sprd_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) +static int sprd_iommu_of_xlate(struct device *dev, + const struct of_phandle_args *args) { struct platform_device *pdev; diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index 41484a5a399b..decd52cba998 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -819,7 +819,7 @@ static struct iommu_device *sun50i_iommu_probe_device(struct device *dev) } static int sun50i_iommu_of_xlate(struct device *dev, - struct of_phandle_args *args) + const struct of_phandle_args *args) { struct platform_device *iommu_pdev = of_find_device_by_node(args->np); unsigned id = args->args[0]; diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 310871728ab4..14e525bd0d9b 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -830,7 +830,7 @@ static struct tegra_smmu *tegra_smmu_find(struct device_node *np) } static int tegra_smmu_configure(struct tegra_smmu *smmu, struct device *dev, - struct of_phandle_args *args) + const struct of_phandle_args *args) { const struct iommu_ops *ops = smmu->iommu.ops; int err; @@ -959,7 +959,7 @@ static struct iommu_group *tegra_smmu_device_group(struct device *dev) } static int tegra_smmu_of_xlate(struct device *dev, - struct of_phandle_args *args) + const struct of_phandle_args *args) { struct platform_device *iommu_pdev = of_find_device_by_node(args->np); struct tegra_mc *mc = platform_get_drvdata(iommu_pdev); diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 34db37fd9675..04048f64a2c0 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -1051,7 +1051,8 @@ static struct iommu_group *viommu_device_group(struct device *dev) return generic_device_group(dev); } -static int viommu_of_xlate(struct device *dev, struct of_phandle_args *args) +static int viommu_of_xlate(struct device *dev, + const struct of_phandle_args *args) { return iommu_fwspec_add_ids(dev, args->args, 1); } diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 7cc56cfe98dd..98a958621dcb 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -567,7 +567,7 @@ struct iommu_ops { /* Request/Free a list of reserved regions for a device */ void (*get_resv_regions)(struct device *dev, struct list_head *list); - int (*of_xlate)(struct device *dev, struct of_phandle_args *args); + int (*of_xlate)(struct device *dev, const struct of_phandle_args *args); bool (*is_attach_deferred)(struct device *dev); /* Per device IOMMU features */ @@ -985,7 +985,7 @@ struct iommu_mm_data { int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, const struct iommu_ops *ops); void iommu_fwspec_free(struct device *dev); -int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids); +int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids); const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode); static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev) -- cgit v1.2.3 From 5896e6e39b86c1d820b3ccf5caea9aef40c2eacd Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 16 Feb 2024 15:40:26 +0100 Subject: iommu: constify fwnode in iommu_ops_from_fwnode() Make pointer to fwnode_handle a pointer to const for code safety. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240216144027.185959-3-krzysztof.kozlowski@linaro.org Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 2 +- include/linux/iommu.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index b0a41f598dcf..28b18ed83273 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2774,7 +2774,7 @@ bool iommu_default_passthrough(void) } EXPORT_SYMBOL_GPL(iommu_default_passthrough); -const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode) +const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode) { const struct iommu_ops *ops = NULL; struct iommu_device *iommu; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 98a958621dcb..7c8032202457 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -986,7 +986,7 @@ int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, const struct iommu_ops *ops); void iommu_fwspec_free(struct device *dev); int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids); -const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode); +const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode); static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev) { @@ -1309,7 +1309,7 @@ static inline int iommu_fwspec_add_ids(struct device *dev, u32 *ids, } static inline -const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode) +const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode) { return NULL; } -- cgit v1.2.3 From b5a1f7513a2fcb1b9e646128ff0cb0dbba5ed6c1 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 16 Feb 2024 15:40:27 +0100 Subject: iommu: re-use local fwnode variable in iommu_ops_from_fwnode() iommu_ops_from_fwnode() stores &iommu_spec->np->fwnode in local variable, so use it to simplify the code (iommu_spec is not changed between these dereferences). Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240216144027.185959-4-krzysztof.kozlowski@linaro.org Signed-off-by: Joerg Roedel --- drivers/iommu/of_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 719652b60840..3afe0b48a48d 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -29,7 +29,7 @@ static int of_iommu_xlate(struct device *dev, !of_device_is_available(iommu_spec->np)) return -ENODEV; - ret = iommu_fwspec_init(dev, &iommu_spec->np->fwnode, ops); + ret = iommu_fwspec_init(dev, fwnode, ops); if (ret) return ret; /* -- cgit v1.2.3 From 4b8d18c0c986877fe89ade2214e1e81ad817cef1 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 27 Feb 2024 10:14:34 +0800 Subject: iommu/vt-d: Remove INTEL_IOMMU_BROKEN_GFX_WA Commit 62edf5dc4a524 ("intel-iommu: Restore DMAR_BROKEN_GFX_WA option for broken graphics drivers") was introduced 24 years ago as a temporary workaround for graphics drivers that used physical addresses for DMA and avoided DMA APIs. This workaround was disabled by default. As 24 years have passed, it is expected that graphics driver developers have migrated their drivers to use kernel DMA APIs. Therefore, this workaround is no longer required and could been removed. The Intel iommu driver also provides a "igfx_off" option to turn off the DMA translation for the graphic dedicated IOMMU. Hence, there is really no good reason to keep this config option. Suggested-by: Kevin Tian Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240130060823.57990-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/Kconfig | 11 ----------- drivers/iommu/intel/iommu.c | 4 ---- 2 files changed, 15 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index 012cd2541a68..d2d34eb28d94 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -64,17 +64,6 @@ config INTEL_IOMMU_DEFAULT_ON one is found. If this option is not selected, DMAR support can be enabled by passing intel_iommu=on to the kernel. -config INTEL_IOMMU_BROKEN_GFX_WA - bool "Workaround broken graphics drivers (going away soon)" - depends on BROKEN && X86 - help - Current Graphics drivers tend to use physical address - for DMA and avoid using DMA APIs. Setting this config - option permits the IOMMU driver to set a unity map for - all the OS-visible memory. Hence the driver can continue - to use physical addresses for DMA, at least until this - option is removed in the 2.6.32 kernel. - config INTEL_IOMMU_FLOPPY_WA def_bool y depends on X86 diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 6fb5f6fceea1..fc52fcd786aa 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2660,10 +2660,6 @@ static int __init init_dmars(void) iommu_set_root_entry(iommu); } -#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA - dmar_map_gfx = 0; -#endif - if (!dmar_map_gfx) iommu_identity_mapping |= IDENTMAP_GFX; -- cgit v1.2.3 From 8379054869a0e8d0ebf8f2160ccc778ed6268b96 Mon Sep 17 00:00:00 2001 From: Erick Archer Date: Tue, 27 Feb 2024 10:14:35 +0800 Subject: iommu/vt-d: Use kcalloc() instead of kzalloc() This is an effort to get rid of all multiplications from allocation functions in order to prevent integer overflows [1]. Here the multiplication is obviously safe because DMAR_LATENCY_NUM is the number of latency types defined in the "latency_type" enum. enum latency_type { DMAR_LATENCY_INV_IOTLB = 0, DMAR_LATENCY_INV_DEVTLB, DMAR_LATENCY_INV_IEC, DMAR_LATENCY_PRQ, DMAR_LATENCY_NUM }; However, using kcalloc() is more appropriate [2] and improves readability. This patch has no effect on runtime behavior. Link: https://github.com/KSPP/linux/issues/162 [1] Link: https://www.kernel.org/doc/html/next/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments [2] Signed-off-by: Erick Archer Reviewed-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20240211175143.9229-1-erick.archer@gmx.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/perf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/iommu/intel/perf.c b/drivers/iommu/intel/perf.c index 94ee70ac38e3..adc4de6bbd88 100644 --- a/drivers/iommu/intel/perf.c +++ b/drivers/iommu/intel/perf.c @@ -33,7 +33,7 @@ int dmar_latency_enable(struct intel_iommu *iommu, enum latency_type type) spin_lock_irqsave(&latency_lock, flags); if (!iommu->perf_statistic) { - iommu->perf_statistic = kzalloc(sizeof(*lstat) * DMAR_LATENCY_NUM, + iommu->perf_statistic = kcalloc(DMAR_LATENCY_NUM, sizeof(*lstat), GFP_ATOMIC); if (!iommu->perf_statistic) { ret = -ENOMEM; -- cgit v1.2.3 From b4b1054f6cdd08b040fc5132936a5700c1d2d05b Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Tue, 27 Feb 2024 10:14:37 +0800 Subject: iommu/vt-d: Remove treatment for revoking PASIDs with pending page faults Commit 2f26e0a9c986 ("iommu/vt-d: Add basic SVM PASID support") added a special treatment to mandate that no page faults may be outstanding for the PASID after intel_svm_unbind_mm() is called, as the PASID will be released and reused after unbind. This is unnecessary anymore as no outstanding page faults have been ensured in the driver's remove_dev_pasid path: - Tear down the pasid entry, which guarantees that new page faults for the PASID will be rejected by the iommu hardware. - All outstanding page faults have been responded to. - All hardware pending faults are drained in intel_drain_pasid_prq(). Remove this unnecessary code. Signed-off-by: Tina Zhang Link: https://lore.kernel.org/r/20240219125723.1645703-2-tina.zhang@intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/svm.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 40edd282903f..a815362c8e60 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -408,13 +408,6 @@ void intel_svm_remove_dev_pasid(struct device *dev, u32 pasid) if (svm->notifier.ops) mmu_notifier_unregister(&svm->notifier, mm); pasid_private_remove(svm->pasid); - /* - * We mandate that no page faults may be outstanding - * for the PASID when intel_svm_unbind_mm() is called. - * If that is not obeyed, subtle errors will happen. - * Let's make them less subtle... - */ - memset(svm, 0x6b, sizeof(*svm)); kfree(svm); } } -- cgit v1.2.3 From cb0b95e56269bb6ca996373a02ff0b6edfa09d32 Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Tue, 27 Feb 2024 10:14:38 +0800 Subject: iommu/vt-d: Remove initialization for dynamically heap-allocated rcu_head The rcu_head structures allocated dynamically in the heap don't need any initialization. Therefore, remove the init_rcu_head(). Signed-off-by: Tina Zhang Link: https://lore.kernel.org/r/20240219125723.1645703-3-tina.zhang@intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/svm.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers') diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index a815362c8e60..a92a9e2239e2 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -360,7 +360,6 @@ static int intel_svm_bind_mm(struct intel_iommu *iommu, struct device *dev, sdev->iommu = iommu; sdev->did = FLPT_DEFAULT_DID; sdev->sid = PCI_DEVID(info->bus, info->devfn); - init_rcu_head(&sdev->rcu); if (info->ats_enabled) { sdev->qdep = info->ats_qdep; if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS) -- cgit v1.2.3 From 8ca918cbc2522e72faa5c102d02059e53d5128d0 Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Tue, 27 Feb 2024 10:14:39 +0800 Subject: iommu/vt-d: Merge intel_svm_bind_mm() into its caller intel_svm_set_dev_pasid() is the only caller of intel_svm_bind_mm(). Merge them and remove intel_svm_bind_mm(). No functional change intended. Signed-off-by: Tina Zhang Link: https://lore.kernel.org/r/20240219125723.1645703-4-tina.zhang@intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/svm.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index a92a9e2239e2..1dd56d4eb88c 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -315,10 +315,11 @@ out: return 0; } -static int intel_svm_bind_mm(struct intel_iommu *iommu, struct device *dev, - struct iommu_domain *domain, ioasid_t pasid) +static int intel_svm_set_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid) { struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; struct mm_struct *mm = domain->mm; struct intel_svm_dev *sdev; struct intel_svm *svm; @@ -796,15 +797,6 @@ out: return ret; } -static int intel_svm_set_dev_pasid(struct iommu_domain *domain, - struct device *dev, ioasid_t pasid) -{ - struct device_domain_info *info = dev_iommu_priv_get(dev); - struct intel_iommu *iommu = info->iommu; - - return intel_svm_bind_mm(iommu, dev, domain, pasid); -} - static void intel_svm_domain_free(struct iommu_domain *domain) { kfree(to_dmar_domain(domain)); -- cgit v1.2.3 From 1a75cc710b956010137b4fe1d1fa3282bfd8f86c Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 27 Feb 2024 10:14:40 +0800 Subject: iommu/vt-d: Use rbtree to track iommu probed devices Use a red-black tree(rbtree) to track devices probed by the driver's probe_device callback. These devices need to be looked up quickly by a source ID when the hardware reports a fault, either recoverable or unrecoverable. Fault reporting paths are critical. Searching a list in this scenario is inefficient, with an algorithm complexity of O(n). An rbtree is a self-balancing binary search tree, offering an average search time complexity of O(log(n)). This significant performance improvement makes rbtrees a better choice. Furthermore, rbtrees are implemented on a per-iommu basis, eliminating the need for global searches and further enhancing efficiency in critical fault paths. The rbtree is protected by a spin lock with interrupts disabled to ensure thread-safe access even within interrupt contexts. Co-developed-by: Huang Jiaqing Signed-off-by: Huang Jiaqing Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240220065939.121116-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/dmar.c | 3 +- drivers/iommu/intel/iommu.c | 88 +++++++++++++++++++++++++++++++++++++++++++-- drivers/iommu/intel/iommu.h | 8 +++++ 3 files changed, 96 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 23cb80d62a9a..f9b63c2875f7 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1095,7 +1095,8 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) iommu->agaw = agaw; iommu->msagaw = msagaw; iommu->segment = drhd->segment; - + iommu->device_rbtree = RB_ROOT; + spin_lock_init(&iommu->device_rbtree_lock); iommu->node = NUMA_NO_NODE; ver = readl(iommu->reg + DMAR_VER_REG); diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index fc52fcd786aa..025d7385cf58 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -97,6 +97,81 @@ static phys_addr_t root_entry_uctp(struct root_entry *re) return re->hi & VTD_PAGE_MASK; } +static int device_rid_cmp_key(const void *key, const struct rb_node *node) +{ + struct device_domain_info *info = + rb_entry(node, struct device_domain_info, node); + const u16 *rid_lhs = key; + + if (*rid_lhs < PCI_DEVID(info->bus, info->devfn)) + return -1; + + if (*rid_lhs > PCI_DEVID(info->bus, info->devfn)) + return 1; + + return 0; +} + +static int device_rid_cmp(struct rb_node *lhs, const struct rb_node *rhs) +{ + struct device_domain_info *info = + rb_entry(lhs, struct device_domain_info, node); + u16 key = PCI_DEVID(info->bus, info->devfn); + + return device_rid_cmp_key(&key, rhs); +} + +/* + * Looks up an IOMMU-probed device using its source ID. + * + * Returns the pointer to the device if there is a match. Otherwise, + * returns NULL. + * + * Note that this helper doesn't guarantee that the device won't be + * released by the iommu subsystem after being returned. The caller + * should use its own synchronization mechanism to avoid the device + * being released during its use if its possibly the case. + */ +struct device *device_rbtree_find(struct intel_iommu *iommu, u16 rid) +{ + struct device_domain_info *info = NULL; + struct rb_node *node; + unsigned long flags; + + spin_lock_irqsave(&iommu->device_rbtree_lock, flags); + node = rb_find(&rid, &iommu->device_rbtree, device_rid_cmp_key); + if (node) + info = rb_entry(node, struct device_domain_info, node); + spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags); + + return info ? info->dev : NULL; +} + +static int device_rbtree_insert(struct intel_iommu *iommu, + struct device_domain_info *info) +{ + struct rb_node *curr; + unsigned long flags; + + spin_lock_irqsave(&iommu->device_rbtree_lock, flags); + curr = rb_find_add(&info->node, &iommu->device_rbtree, device_rid_cmp); + spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags); + if (WARN_ON(curr)) + return -EEXIST; + + return 0; +} + +static void device_rbtree_remove(struct device_domain_info *info) +{ + struct intel_iommu *iommu = info->iommu; + unsigned long flags; + + spin_lock_irqsave(&iommu->device_rbtree_lock, flags); + rb_erase(&info->node, &iommu->device_rbtree); + spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags); +} + /* * This domain is a statically identity mapping domain. * 1. This domain creats a static 1:1 mapping to all usable memory. @@ -4261,25 +4336,34 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) } dev_iommu_priv_set(dev, info); + ret = device_rbtree_insert(iommu, info); + if (ret) + goto free; if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { ret = intel_pasid_alloc_table(dev); if (ret) { dev_err(dev, "PASID table allocation failed\n"); - kfree(info); - return ERR_PTR(ret); + goto clear_rbtree; } } intel_iommu_debugfs_create_dev(info); return &iommu->iommu; +clear_rbtree: + device_rbtree_remove(info); +free: + kfree(info); + + return ERR_PTR(ret); } static void intel_iommu_release_device(struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); + device_rbtree_remove(info); dmar_remove_one_dev_info(dev); intel_pasid_free_table(dev); intel_iommu_debugfs_remove_dev(info); diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index d02f916d8e59..50d1e196db52 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -716,6 +716,11 @@ struct intel_iommu { struct q_inval *qi; /* Queued invalidation info */ u32 iommu_state[MAX_SR_DMAR_REGS]; /* Store iommu states between suspend and resume.*/ + /* rb tree for all probed devices */ + struct rb_root device_rbtree; + /* protect the device_rbtree */ + spinlock_t device_rbtree_lock; + #ifdef CONFIG_IRQ_REMAP struct ir_table *ir_table; /* Interrupt remapping info */ struct irq_domain *ir_domain; @@ -749,6 +754,8 @@ struct device_domain_info { struct intel_iommu *iommu; /* IOMMU used by this device */ struct dmar_domain *domain; /* pointer to domain */ struct pasid_table *pasid_table; /* pasid table */ + /* device tracking node(lookup by PCI RID) */ + struct rb_node node; #ifdef CONFIG_INTEL_IOMMU_DEBUGFS struct dentry *debugfs_dentry; /* pointer to device directory dentry */ #endif @@ -1074,6 +1081,7 @@ void free_pgtable_page(void *vaddr); void iommu_flush_write_buffer(struct intel_iommu *iommu); struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent, const struct iommu_user_data *user_data); +struct device *device_rbtree_find(struct intel_iommu *iommu, u16 rid); #ifdef CONFIG_INTEL_IOMMU_SVM void intel_svm_check(struct intel_iommu *iommu); -- cgit v1.2.3 From def054b01a867822254e1dda13d587f5c7a99e2a Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 27 Feb 2024 10:14:41 +0800 Subject: iommu/vt-d: Use device rbtree in iopf reporting path The existing I/O page fault handler currently locates the PCI device by calling pci_get_domain_bus_and_slot(). This function searches the list of all PCI devices until the desired device is found. To improve lookup efficiency, replace it with device_rbtree_find() to search the device within the probed device rbtree. The I/O page fault is initiated by the device, which does not have any synchronization mechanism with the software to ensure that the device stays in the probed device tree. Theoretically, a device could be released by the IOMMU subsystem after device_rbtree_find() and before iopf_get_dev_fault_param(), which would cause a use-after-free problem. Add a mutex to synchronize the I/O page fault reporting path and the IOMMU release device path. This lock doesn't introduce any performance overhead, as the conflict between I/O page fault reporting and device releasing is very rare. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240220065939.121116-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/dmar.c | 1 + drivers/iommu/intel/iommu.c | 3 +++ drivers/iommu/intel/iommu.h | 2 ++ drivers/iommu/intel/svm.c | 17 +++++++++-------- 4 files changed, 15 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index f9b63c2875f7..d14797aabb7a 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1097,6 +1097,7 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) iommu->segment = drhd->segment; iommu->device_rbtree = RB_ROOT; spin_lock_init(&iommu->device_rbtree_lock); + mutex_init(&iommu->iopf_lock); iommu->node = NUMA_NO_NODE; ver = readl(iommu->reg + DMAR_VER_REG); diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 025d7385cf58..60aa2dce32ef 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4362,8 +4362,11 @@ free: static void intel_iommu_release_device(struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + mutex_lock(&iommu->iopf_lock); device_rbtree_remove(info); + mutex_unlock(&iommu->iopf_lock); dmar_remove_one_dev_info(dev); intel_pasid_free_table(dev); intel_iommu_debugfs_remove_dev(info); diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 50d1e196db52..a0feab099f12 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -713,6 +713,8 @@ struct intel_iommu { #endif struct iopf_queue *iopf_queue; unsigned char iopfq_name[16]; + /* Synchronization between fault report and iommu device release. */ + struct mutex iopf_lock; struct q_inval *qi; /* Queued invalidation info */ u32 iommu_state[MAX_SR_DMAR_REGS]; /* Store iommu states between suspend and resume.*/ diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 1dd56d4eb88c..bdf3584ca0af 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -643,7 +643,7 @@ static irqreturn_t prq_event_thread(int irq, void *d) struct intel_iommu *iommu = d; struct page_req_dsc *req; int head, tail, handled; - struct pci_dev *pdev; + struct device *dev; u64 address; /* @@ -689,23 +689,24 @@ bad_req: if (unlikely(req->lpig && !req->rd_req && !req->wr_req)) goto prq_advance; - pdev = pci_get_domain_bus_and_slot(iommu->segment, - PCI_BUS_NUM(req->rid), - req->rid & 0xff); /* * If prq is to be handled outside iommu driver via receiver of * the fault notifiers, we skip the page response here. */ - if (!pdev) + mutex_lock(&iommu->iopf_lock); + dev = device_rbtree_find(iommu, req->rid); + if (!dev) { + mutex_unlock(&iommu->iopf_lock); goto bad_req; + } - if (intel_svm_prq_report(iommu, &pdev->dev, req)) + if (intel_svm_prq_report(iommu, dev, req)) handle_bad_prq_event(iommu, req, QI_RESP_INVALID); else - trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1, + trace_prq_report(iommu, dev, req->qw_0, req->qw_1, req->priv_data[0], req->priv_data[1], iommu->prq_seq_number++); - pci_dev_put(pdev); + mutex_unlock(&iommu->iopf_lock); prq_advance: head = (head + sizeof(*req)) & PRQ_RING_MASK; } -- cgit v1.2.3 From 39714fd73c6b60a8d27bcc5b431afb0828bf4434 Mon Sep 17 00:00:00 2001 From: Ethan Zhao Date: Tue, 5 Mar 2024 20:21:14 +0800 Subject: PCI: Make pci_dev_is_disconnected() helper public for other drivers Make pci_dev_is_disconnected() public so that it can be called from Intel VT-d driver to quickly fix/workaround the surprise removal unplug hang issue for those ATS capable devices on PCIe switch downstream hotplug capable ports. Beside pci_device_is_present() function, this one has no config space space access, so is light enough to optimize the normal pure surprise removal and safe removal flow. Acked-by: Bjorn Helgaas Reviewed-by: Dan Carpenter Tested-by: Haorong Ye Signed-off-by: Ethan Zhao Link: https://lore.kernel.org/r/20240301080727.3529832-2-haifeng.zhao@linux.intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/pci/pci.h | 5 ----- include/linux/pci.h | 5 +++++ 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index e9750b1b19ba..bfc56f7bee1c 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -368,11 +368,6 @@ static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused) return 0; } -static inline bool pci_dev_is_disconnected(const struct pci_dev *dev) -{ - return dev->error_state == pci_channel_io_perm_failure; -} - /* pci_dev priv_flags */ #define PCI_DEV_ADDED 0 #define PCI_DPC_RECOVERED 1 diff --git a/include/linux/pci.h b/include/linux/pci.h index 7ab0d13672da..213109d3c601 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2517,6 +2517,11 @@ static inline struct pci_dev *pcie_find_root_port(struct pci_dev *dev) return NULL; } +static inline bool pci_dev_is_disconnected(const struct pci_dev *dev) +{ + return dev->error_state == pci_channel_io_perm_failure; +} + void pci_request_acs(void); bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags); bool pci_acs_path_enabled(struct pci_dev *start, -- cgit v1.2.3 From 4fc82cd907ac075648789cc3a00877778aa1838b Mon Sep 17 00:00:00 2001 From: Ethan Zhao Date: Tue, 5 Mar 2024 20:21:15 +0800 Subject: iommu/vt-d: Don't issue ATS Invalidation request when device is disconnected For those endpoint devices connect to system via hotplug capable ports, users could request a hot reset to the device by flapping device's link through setting the slot's link control register, as pciehp_ist() DLLSC interrupt sequence response, pciehp will unload the device driver and then power it off. thus cause an IOMMU device-TLB invalidation (Intel VT-d spec, or ATS Invalidation in PCIe spec r6.1) request for non-existence target device to be sent and deadly loop to retry that request after ITE fault triggered in interrupt context. That would cause following continuous hard lockup warning and system hang [ 4211.433662] pcieport 0000:17:01.0: pciehp: Slot(108): Link Down [ 4211.433664] pcieport 0000:17:01.0: pciehp: Slot(108): Card not present [ 4223.822591] NMI watchdog: Watchdog detected hard LOCKUP on cpu 144 [ 4223.822622] CPU: 144 PID: 1422 Comm: irq/57-pciehp Kdump: loaded Tainted: G S OE kernel version xxxx [ 4223.822623] Hardware name: vendorname xxxx 666-106, BIOS 01.01.02.03.01 05/15/2023 [ 4223.822623] RIP: 0010:qi_submit_sync+0x2c0/0x490 [ 4223.822624] Code: 48 be 00 00 00 00 00 08 00 00 49 85 74 24 20 0f 95 c1 48 8b 57 10 83 c1 04 83 3c 1a 03 0f 84 a2 01 00 00 49 8b 04 24 8b 70 34 <40> f6 c6 1 0 74 17 49 8b 04 24 8b 80 80 00 00 00 89 c2 d3 fa 41 39 [ 4223.822624] RSP: 0018:ffffc4f074f0bbb8 EFLAGS: 00000093 [ 4223.822625] RAX: ffffc4f040059000 RBX: 0000000000000014 RCX: 0000000000000005 [ 4223.822625] RDX: ffff9f3841315800 RSI: 0000000000000000 RDI: ffff9f38401a8340 [ 4223.822625] RBP: ffff9f38401a8340 R08: ffffc4f074f0bc00 R09: 0000000000000000 [ 4223.822626] R10: 0000000000000010 R11: 0000000000000018 R12: ffff9f384005e200 [ 4223.822626] R13: 0000000000000004 R14: 0000000000000046 R15: 0000000000000004 [ 4223.822626] FS: 0000000000000000(0000) GS:ffffa237ae400000(0000) knlGS:0000000000000000 [ 4223.822627] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 4223.822627] CR2: 00007ffe86515d80 CR3: 000002fd3000a001 CR4: 0000000000770ee0 [ 4223.822627] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 4223.822628] DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400 [ 4223.822628] PKRU: 55555554 [ 4223.822628] Call Trace: [ 4223.822628] qi_flush_dev_iotlb+0xb1/0xd0 [ 4223.822628] __dmar_remove_one_dev_info+0x224/0x250 [ 4223.822629] dmar_remove_one_dev_info+0x3e/0x50 [ 4223.822629] intel_iommu_release_device+0x1f/0x30 [ 4223.822629] iommu_release_device+0x33/0x60 [ 4223.822629] iommu_bus_notifier+0x7f/0x90 [ 4223.822630] blocking_notifier_call_chain+0x60/0x90 [ 4223.822630] device_del+0x2e5/0x420 [ 4223.822630] pci_remove_bus_device+0x70/0x110 [ 4223.822630] pciehp_unconfigure_device+0x7c/0x130 [ 4223.822631] pciehp_disable_slot+0x6b/0x100 [ 4223.822631] pciehp_handle_presence_or_link_change+0xd8/0x320 [ 4223.822631] pciehp_ist+0x176/0x180 [ 4223.822631] ? irq_finalize_oneshot.part.50+0x110/0x110 [ 4223.822632] irq_thread_fn+0x19/0x50 [ 4223.822632] irq_thread+0x104/0x190 [ 4223.822632] ? irq_forced_thread_fn+0x90/0x90 [ 4223.822632] ? irq_thread_check_affinity+0xe0/0xe0 [ 4223.822633] kthread+0x114/0x130 [ 4223.822633] ? __kthread_cancel_work+0x40/0x40 [ 4223.822633] ret_from_fork+0x1f/0x30 [ 4223.822633] Kernel panic - not syncing: Hard LOCKUP [ 4223.822634] CPU: 144 PID: 1422 Comm: irq/57-pciehp Kdump: loaded Tainted: G S OE kernel version xxxx [ 4223.822634] Hardware name: vendorname xxxx 666-106, BIOS 01.01.02.03.01 05/15/2023 [ 4223.822634] Call Trace: [ 4223.822634] [ 4223.822635] dump_stack+0x6d/0x88 [ 4223.822635] panic+0x101/0x2d0 [ 4223.822635] ? ret_from_fork+0x11/0x30 [ 4223.822635] nmi_panic.cold.14+0xc/0xc [ 4223.822636] watchdog_overflow_callback.cold.8+0x6d/0x81 [ 4223.822636] __perf_event_overflow+0x4f/0xf0 [ 4223.822636] handle_pmi_common+0x1ef/0x290 [ 4223.822636] ? __set_pte_vaddr+0x28/0x40 [ 4223.822637] ? flush_tlb_one_kernel+0xa/0x20 [ 4223.822637] ? __native_set_fixmap+0x24/0x30 [ 4223.822637] ? ghes_copy_tofrom_phys+0x70/0x100 [ 4223.822637] ? __ghes_peek_estatus.isra.16+0x49/0xa0 [ 4223.822637] intel_pmu_handle_irq+0xba/0x2b0 [ 4223.822638] perf_event_nmi_handler+0x24/0x40 [ 4223.822638] nmi_handle+0x4d/0xf0 [ 4223.822638] default_do_nmi+0x49/0x100 [ 4223.822638] exc_nmi+0x134/0x180 [ 4223.822639] end_repeat_nmi+0x16/0x67 [ 4223.822639] RIP: 0010:qi_submit_sync+0x2c0/0x490 [ 4223.822639] Code: 48 be 00 00 00 00 00 08 00 00 49 85 74 24 20 0f 95 c1 48 8b 57 10 83 c1 04 83 3c 1a 03 0f 84 a2 01 00 00 49 8b 04 24 8b 70 34 <40> f6 c6 10 74 17 49 8b 04 24 8b 80 80 00 00 00 89 c2 d3 fa 41 39 [ 4223.822640] RSP: 0018:ffffc4f074f0bbb8 EFLAGS: 00000093 [ 4223.822640] RAX: ffffc4f040059000 RBX: 0000000000000014 RCX: 0000000000000005 [ 4223.822640] RDX: ffff9f3841315800 RSI: 0000000000000000 RDI: ffff9f38401a8340 [ 4223.822641] RBP: ffff9f38401a8340 R08: ffffc4f074f0bc00 R09: 0000000000000000 [ 4223.822641] R10: 0000000000000010 R11: 0000000000000018 R12: ffff9f384005e200 [ 4223.822641] R13: 0000000000000004 R14: 0000000000000046 R15: 0000000000000004 [ 4223.822641] ? qi_submit_sync+0x2c0/0x490 [ 4223.822642] ? qi_submit_sync+0x2c0/0x490 [ 4223.822642] [ 4223.822642] qi_flush_dev_iotlb+0xb1/0xd0 [ 4223.822642] __dmar_remove_one_dev_info+0x224/0x250 [ 4223.822643] dmar_remove_one_dev_info+0x3e/0x50 [ 4223.822643] intel_iommu_release_device+0x1f/0x30 [ 4223.822643] iommu_release_device+0x33/0x60 [ 4223.822643] iommu_bus_notifier+0x7f/0x90 [ 4223.822644] blocking_notifier_call_chain+0x60/0x90 [ 4223.822644] device_del+0x2e5/0x420 [ 4223.822644] pci_remove_bus_device+0x70/0x110 [ 4223.822644] pciehp_unconfigure_device+0x7c/0x130 [ 4223.822644] pciehp_disable_slot+0x6b/0x100 [ 4223.822645] pciehp_handle_presence_or_link_change+0xd8/0x320 [ 4223.822645] pciehp_ist+0x176/0x180 [ 4223.822645] ? irq_finalize_oneshot.part.50+0x110/0x110 [ 4223.822645] irq_thread_fn+0x19/0x50 [ 4223.822646] irq_thread+0x104/0x190 [ 4223.822646] ? irq_forced_thread_fn+0x90/0x90 [ 4223.822646] ? irq_thread_check_affinity+0xe0/0xe0 [ 4223.822646] kthread+0x114/0x130 [ 4223.822647] ? __kthread_cancel_work+0x40/0x40 [ 4223.822647] ret_from_fork+0x1f/0x30 [ 4223.822647] Kernel Offset: 0x6400000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) Such issue could be triggered by all kinds of regular surprise removal hotplug operation. like: 1. pull EP(endpoint device) out directly. 2. turn off EP's power. 3. bring the link down. etc. this patch aims to work for regular safe removal and surprise removal unplug. these hot unplug handling process could be optimized for fix the ATS Invalidation hang issue by calling pci_dev_is_disconnected() in function devtlb_invalidation_with_pasid() to check target device state to avoid sending meaningless ATS Invalidation request to iommu when device is gone. (see IMPLEMENTATION NOTE in PCIe spec r6.1 section 10.3.1) For safe removal, device wouldn't be removed until the whole software handling process is done, it wouldn't trigger the hard lock up issue caused by too long ATS Invalidation timeout wait. In safe removal path, device state isn't set to pci_channel_io_perm_failure in pciehp_unconfigure_device() by checking 'presence' parameter, calling pci_dev_is_disconnected() in devtlb_invalidation_with_pasid() will return false there, wouldn't break the function. For surprise removal, device state is set to pci_channel_io_perm_failure in pciehp_unconfigure_device(), means device is already gone (disconnected) call pci_dev_is_disconnected() in devtlb_invalidation_with_pasid() will return true to break the function not to send ATS Invalidation request to the disconnected device blindly, thus avoid to trigger further ITE fault, and ITE fault will block all invalidation request to be handled. furthermore retry the timeout request could trigger hard lockup. safe removal (present) & surprise removal (not present) pciehp_ist() pciehp_handle_presence_or_link_change() pciehp_disable_slot() remove_board() pciehp_unconfigure_device(presence) { if (!presence) pci_walk_bus(parent, pci_dev_set_disconnected, NULL); } this patch works for regular safe removal and surprise removal of ATS capable endpoint on PCIe switch downstream ports. Fixes: 6f7db75e1c46 ("iommu/vt-d: Add second level page table interface") Reviewed-by: Dan Carpenter Tested-by: Haorong Ye Signed-off-by: Ethan Zhao Link: https://lore.kernel.org/r/20240301080727.3529832-3-haifeng.zhao@linux.intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/pasid.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers') diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 3239cefa4c33..953592125e4a 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -214,6 +214,9 @@ devtlb_invalidation_with_pasid(struct intel_iommu *iommu, if (!info || !info->ats_enabled) return; + if (pci_dev_is_disconnected(to_pci_dev(dev))) + return; + sid = info->bus << 8 | info->devfn; qdep = info->ats_qdep; pfsid = info->pfsid; -- cgit v1.2.3 From 80a9b50c0b9e297669a8a400eb35468cd87a9aed Mon Sep 17 00:00:00 2001 From: Ethan Zhao Date: Tue, 5 Mar 2024 20:21:16 +0800 Subject: iommu/vt-d: Improve ITE fault handling if target device isn't present Because surprise removal could happen anytime, e.g. user could request safe removal to EP(endpoint device) via sysfs and brings its link down to do surprise removal cocurrently. such aggressive cases would cause ATS invalidation request issued to non-existence target device, then deadly loop to retry that request after ITE fault triggered in interrupt context. this patch aims to optimize the ITE handling by checking the target device presence state to avoid retrying the timeout request blindly, thus avoid hard lockup or system hang. Devices TLB should only be invalidated when devices are in the iommu->device_rbtree (probed, not released) and present. Fixes: 6ba6c3a4cacf ("VT-d: add device IOTLB invalidation support") Reviewed-by: Dan Carpenter Signed-off-by: Ethan Zhao Link: https://lore.kernel.org/r/20240301080727.3529832-4-haifeng.zhao@linux.intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/dmar.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'drivers') diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index d14797aabb7a..36d7427b1202 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1273,6 +1273,8 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index) { u32 fault; int head, tail; + struct device *dev; + u64 iqe_err, ite_sid; struct q_inval *qi = iommu->qi; int shift = qi_shift(iommu); @@ -1317,6 +1319,13 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index) tail = readl(iommu->reg + DMAR_IQT_REG); tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH; + /* + * SID field is valid only when the ITE field is Set in FSTS_REG + * see Intel VT-d spec r4.1, section 11.4.9.9 + */ + iqe_err = dmar_readq(iommu->reg + DMAR_IQER_REG); + ite_sid = DMAR_IQER_REG_ITESID(iqe_err); + writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG); pr_info("Invalidation Time-out Error (ITE) cleared\n"); @@ -1326,6 +1335,19 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index) head = (head - 2 + QI_LENGTH) % QI_LENGTH; } while (head != tail); + /* + * If device was released or isn't present, no need to retry + * the ATS invalidate request anymore. + * + * 0 value of ite_sid means old VT-d device, no ite_sid value. + * see Intel VT-d spec r4.1, section 11.4.9.9 + */ + if (ite_sid) { + dev = device_rbtree_find(iommu, ite_sid); + if (!dev || !dev_is_pci(dev) || + !pci_device_is_present(to_pci_dev(dev))) + return -ETIMEDOUT; + } if (qi->desc_status[wait_index] == QI_ABORT) return -EAGAIN; } -- cgit v1.2.3 From 0061ffe289e19caabeea8103e69cb0f1896e34d8 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 5 Mar 2024 20:21:17 +0800 Subject: iommu: Add static iommu_ops->release_domain The current device_release callback for individual iommu drivers does the following: 1) Silent IOMMU DMA translation: It detaches any existing domain from the device and puts it into a blocking state (some drivers might use the identity state). 2) Resource release: It releases resources allocated during the device_probe callback and restores the device to its pre-probe state. Step 1 is challenging for individual iommu drivers because each must check if a domain is already attached to the device. Additionally, if a deferred attach never occurred, the device_release should avoid modifying hardware configuration regardless of the reason for its call. To simplify this process, introduce a static release_domain within the iommu_ops structure. It can be either a blocking or identity domain depending on the iommu hardware. The iommu core will decide whether to attach this domain before the device_release callback, eliminating the need for repetitive code in various drivers. Consequently, the device_release callback can focus solely on the opposite operations of device_probe, including releasing all resources allocated during that callback. Co-developed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240305013305.204605-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 19 +++++++++++++++---- include/linux/iommu.h | 1 + 2 files changed, 16 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index d14413916f93..cd1210026ac5 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -463,13 +463,24 @@ static void iommu_deinit_device(struct device *dev) /* * release_device() must stop using any attached domain on the device. - * If there are still other devices in the group they are not effected + * If there are still other devices in the group, they are not affected * by this callback. * - * The IOMMU driver must set the device to either an identity or - * blocking translation and stop using any domain pointer, as it is - * going to be freed. + * If the iommu driver provides release_domain, the core code ensures + * that domain is attached prior to calling release_device. Drivers can + * use this to enforce a translation on the idle iommu. Typically, the + * global static blocked_domain is a good choice. + * + * Otherwise, the iommu driver must set the device to either an identity + * or a blocking translation in release_device() and stop using any + * domain pointer, as it is going to be freed. + * + * Regardless, if a delayed attach never occurred, then the release + * should still avoid touching any hardware configuration either. */ + if (!dev->iommu->attach_deferred && ops->release_domain) + ops->release_domain->ops->attach_dev(ops->release_domain, dev); + if (ops->release_device) ops->release_device(dev); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 1ea2a820e1eb..a0a07e1680a2 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -487,6 +487,7 @@ struct iommu_ops { struct module *owner; struct iommu_domain *identity_domain; struct iommu_domain *blocked_domain; + struct iommu_domain *release_domain; struct iommu_domain *default_domain; }; -- cgit v1.2.3 From 81e921fd321614c2ad8ac333b041aae1da7a1c6d Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 5 Mar 2024 20:21:18 +0800 Subject: iommu/vt-d: Fix NULL domain on device release In the kdump kernel, the IOMMU operates in deferred_attach mode. In this mode, info->domain may not yet be assigned by the time the release_device function is called. It leads to the following crash in the crash kernel: BUG: kernel NULL pointer dereference, address: 000000000000003c ... RIP: 0010:do_raw_spin_lock+0xa/0xa0 ... _raw_spin_lock_irqsave+0x1b/0x30 intel_iommu_release_device+0x96/0x170 iommu_deinit_device+0x39/0xf0 __iommu_group_remove_device+0xa0/0xd0 iommu_bus_notifier+0x55/0xb0 notifier_call_chain+0x5a/0xd0 blocking_notifier_call_chain+0x41/0x60 bus_notify+0x34/0x50 device_del+0x269/0x3d0 pci_remove_bus_device+0x77/0x100 p2sb_bar+0xae/0x1d0 ... i801_probe+0x423/0x740 Use the release_domain mechanism to fix it. The scalable mode context entry which is not part of release domain should be cleared in release_device(). Fixes: 586081d3f6b1 ("iommu/vt-d: Remove DEFER_DEVICE_DOMAIN_INFO") Reported-by: Eric Badger Closes: https://lore.kernel.org/r/20240113181713.1817855-1-ebadger@purestorage.com Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240305013305.204605-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 31 +++++----------------- drivers/iommu/intel/pasid.c | 64 +++++++++++++++++++++++++++++++++++++++++++++ drivers/iommu/intel/pasid.h | 1 + 3 files changed, 71 insertions(+), 25 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 60aa2dce32ef..eff7abcc420b 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3818,30 +3818,6 @@ static void domain_context_clear(struct device_domain_info *info) &domain_context_clear_one_cb, info); } -static void dmar_remove_one_dev_info(struct device *dev) -{ - struct device_domain_info *info = dev_iommu_priv_get(dev); - struct dmar_domain *domain = info->domain; - struct intel_iommu *iommu = info->iommu; - unsigned long flags; - - if (!dev_is_real_dma_subdevice(info->dev)) { - if (dev_is_pci(info->dev) && sm_supported(iommu)) - intel_pasid_tear_down_entry(iommu, info->dev, - IOMMU_NO_PASID, false); - - iommu_disable_pci_caps(info); - domain_context_clear(info); - } - - spin_lock_irqsave(&domain->lock, flags); - list_del(&info->link); - spin_unlock_irqrestore(&domain->lock, flags); - - domain_detach_iommu(domain, iommu); - info->domain = NULL; -} - /* * Clear the page table pointer in context or pasid table entries so that * all DMA requests without PASID from the device are blocked. If the page @@ -4367,7 +4343,11 @@ static void intel_iommu_release_device(struct device *dev) mutex_lock(&iommu->iopf_lock); device_rbtree_remove(info); mutex_unlock(&iommu->iopf_lock); - dmar_remove_one_dev_info(dev); + + if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) && + !context_copied(iommu, info->bus, info->devfn)) + intel_pasid_teardown_sm_context(dev); + intel_pasid_free_table(dev); intel_iommu_debugfs_remove_dev(info); kfree(info); @@ -4826,6 +4806,7 @@ static const struct iommu_dirty_ops intel_dirty_ops = { const struct iommu_ops intel_iommu_ops = { .blocked_domain = &blocking_domain, + .release_domain = &blocking_domain, .capable = intel_iommu_capable, .hw_info = intel_iommu_hw_info, .domain_alloc = intel_iommu_domain_alloc, diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 953592125e4a..135ed1651124 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -669,3 +669,67 @@ int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev, return 0; } + +/* + * Interfaces to setup or teardown a pasid table to the scalable-mode + * context table entry: + */ + +static void device_pasid_table_teardown(struct device *dev, u8 bus, u8 devfn) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + struct context_entry *context; + + spin_lock(&iommu->lock); + context = iommu_context_addr(iommu, bus, devfn, false); + if (!context) { + spin_unlock(&iommu->lock); + return; + } + + context_clear_entry(context); + __iommu_flush_cache(iommu, context, sizeof(*context)); + spin_unlock(&iommu->lock); + + /* + * Cache invalidation for changes to a scalable-mode context table + * entry. + * + * Section 6.5.3.3 of the VT-d spec: + * - Device-selective context-cache invalidation; + * - Domain-selective PASID-cache invalidation to affected domains + * (can be skipped if all PASID entries were not-present); + * - Domain-selective IOTLB invalidation to affected domains; + * - Global Device-TLB invalidation to affected functions. + * + * The iommu has been parked in the blocking state. All domains have + * been detached from the device or PASID. The PASID and IOTLB caches + * have been invalidated during the domain detach path. + */ + iommu->flush.flush_context(iommu, 0, PCI_DEVID(bus, devfn), + DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL); + devtlb_invalidation_with_pasid(iommu, dev, IOMMU_NO_PASID); +} + +static int pci_pasid_table_teardown(struct pci_dev *pdev, u16 alias, void *data) +{ + struct device *dev = data; + + if (dev == &pdev->dev) + device_pasid_table_teardown(dev, PCI_BUS_NUM(alias), alias & 0xff); + + return 0; +} + +void intel_pasid_teardown_sm_context(struct device *dev) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + + if (!dev_is_pci(dev)) { + device_pasid_table_teardown(dev, info->bus, info->devfn); + return; + } + + pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_teardown, dev); +} diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index 8d40d4c66e31..c299e4c6e94b 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -319,4 +319,5 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, bool fault_ignore); void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu, struct device *dev, u32 pasid); +void intel_pasid_teardown_sm_context(struct device *dev); #endif /* __INTEL_PASID_H */ -- cgit v1.2.3 From 301f1a80487fd2f51012533792583d4425e8b8c0 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 5 Mar 2024 20:21:19 +0800 Subject: iommu/vt-d: Setup scalable mode context entry in probe path In contrast to legacy mode, the DMA translation table is configured in the PASID table entry instead of the context entry for scalable mode. For this reason, it is more appropriate to set up the scalable mode context entry in the device_probe callback and direct it to the appropriate PASID table. The iommu domain attach/detach operations only affect the PASID table entry. Therefore, there is no need to modify the context entry when configuring the translation type and page table. The only exception is the kdump case, where context entry setup is postponed until the device driver invokes the first DMA interface. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240305013305.204605-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 12 ++++ drivers/iommu/intel/pasid.c | 138 ++++++++++++++++++++++++++++++++++++++++++++ drivers/iommu/intel/pasid.h | 1 + 3 files changed, 151 insertions(+) (limited to 'drivers') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index eff7abcc420b..659c49f98f7f 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4012,6 +4012,10 @@ int prepare_domain_attach_device(struct iommu_domain *domain, dmar_domain->agaw--; } + if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) && + context_copied(iommu, info->bus, info->devfn)) + return intel_pasid_setup_sm_context(dev); + return 0; } @@ -4322,11 +4326,19 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) dev_err(dev, "PASID table allocation failed\n"); goto clear_rbtree; } + + if (!context_copied(iommu, info->bus, info->devfn)) { + ret = intel_pasid_setup_sm_context(dev); + if (ret) + goto free_table; + } } intel_iommu_debugfs_create_dev(info); return &iommu->iommu; +free_table: + intel_pasid_free_table(dev); clear_rbtree: device_rbtree_remove(info); free: diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 135ed1651124..e8e3f4a50b9e 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -733,3 +733,141 @@ void intel_pasid_teardown_sm_context(struct device *dev) pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_teardown, dev); } + +/* + * Get the PASID directory size for scalable mode context entry. + * Value of X in the PDTS field of a scalable mode context entry + * indicates PASID directory with 2^(X + 7) entries. + */ +static unsigned long context_get_sm_pds(struct pasid_table *table) +{ + unsigned long pds, max_pde; + + max_pde = table->max_pasid >> PASID_PDE_SHIFT; + pds = find_first_bit(&max_pde, MAX_NR_PASID_BITS); + if (pds < 7) + return 0; + + return pds - 7; +} + +static int context_entry_set_pasid_table(struct context_entry *context, + struct device *dev) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct pasid_table *table = info->pasid_table; + struct intel_iommu *iommu = info->iommu; + unsigned long pds; + + context_clear_entry(context); + + pds = context_get_sm_pds(table); + context->lo = (u64)virt_to_phys(table->table) | context_pdts(pds); + context_set_sm_rid2pasid(context, IOMMU_NO_PASID); + + if (info->ats_supported) + context_set_sm_dte(context); + if (info->pri_supported) + context_set_sm_pre(context); + if (info->pasid_supported) + context_set_pasid(context); + + context_set_fault_enable(context); + context_set_present(context); + __iommu_flush_cache(iommu, context, sizeof(*context)); + + return 0; +} + +static int device_pasid_table_setup(struct device *dev, u8 bus, u8 devfn) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + struct context_entry *context; + + spin_lock(&iommu->lock); + context = iommu_context_addr(iommu, bus, devfn, true); + if (!context) { + spin_unlock(&iommu->lock); + return -ENOMEM; + } + + if (context_present(context) && !context_copied(iommu, bus, devfn)) { + spin_unlock(&iommu->lock); + return 0; + } + + if (context_copied(iommu, bus, devfn)) { + context_clear_entry(context); + __iommu_flush_cache(iommu, context, sizeof(*context)); + + /* + * For kdump cases, old valid entries may be cached due to + * the in-flight DMA and copied pgtable, but there is no + * unmapping behaviour for them, thus we need explicit cache + * flushes for all affected domain IDs and PASIDs used in + * the copied PASID table. Given that we have no idea about + * which domain IDs and PASIDs were used in the copied tables, + * upgrade them to global PASID and IOTLB cache invalidation. + */ + iommu->flush.flush_context(iommu, 0, + PCI_DEVID(bus, devfn), + DMA_CCMD_MASK_NOBIT, + DMA_CCMD_DEVICE_INVL); + qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0); + iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); + devtlb_invalidation_with_pasid(iommu, dev, IOMMU_NO_PASID); + + /* + * At this point, the device is supposed to finish reset at + * its driver probe stage, so no in-flight DMA will exist, + * and we don't need to worry anymore hereafter. + */ + clear_context_copied(iommu, bus, devfn); + } + + context_entry_set_pasid_table(context, dev); + spin_unlock(&iommu->lock); + + /* + * It's a non-present to present mapping. If hardware doesn't cache + * non-present entry we don't need to flush the caches. If it does + * cache non-present entries, then it does so in the special + * domain #0, which we have to flush: + */ + if (cap_caching_mode(iommu->cap)) { + iommu->flush.flush_context(iommu, 0, + PCI_DEVID(bus, devfn), + DMA_CCMD_MASK_NOBIT, + DMA_CCMD_DEVICE_INVL); + iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH); + } + + return 0; +} + +static int pci_pasid_table_setup(struct pci_dev *pdev, u16 alias, void *data) +{ + struct device *dev = data; + + if (dev != &pdev->dev) + return 0; + + return device_pasid_table_setup(dev, PCI_BUS_NUM(alias), alias & 0xff); +} + +/* + * Set the device's PASID table to its context table entry. + * + * The PASID table is set to the context entries of both device itself + * and its alias requester ID for DMA. + */ +int intel_pasid_setup_sm_context(struct device *dev) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + + if (!dev_is_pci(dev)) + return device_pasid_table_setup(dev, info->bus, info->devfn); + + return pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_setup, dev); +} diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index c299e4c6e94b..045b9ec5b8e8 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -319,5 +319,6 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, bool fault_ignore); void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu, struct device *dev, u32 pasid); +int intel_pasid_setup_sm_context(struct device *dev); void intel_pasid_teardown_sm_context(struct device *dev); #endif /* __INTEL_PASID_H */ -- cgit v1.2.3 From a016e53843ed8bb9cccb832768e2be9856b0a913 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 5 Mar 2024 20:21:20 +0800 Subject: iommu/vt-d: Remove scalable mode context entry setup from attach_dev The scalable mode context entry is now setup in the probe_device path, eliminating the need to configure it in the attach_dev path. Removes the redundant code from the attach_dev path to avoid dead code. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240305013305.204605-5-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 156 +++++++++++++------------------------------- 1 file changed, 44 insertions(+), 112 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 659c49f98f7f..e752258d42b0 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1802,34 +1802,17 @@ static void domain_exit(struct dmar_domain *domain) kfree(domain); } -/* - * Get the PASID directory size for scalable mode context entry. - * Value of X in the PDTS field of a scalable mode context entry - * indicates PASID directory with 2^(X + 7) entries. - */ -static unsigned long context_get_sm_pds(struct pasid_table *table) -{ - unsigned long pds, max_pde; - - max_pde = table->max_pasid >> PASID_PDE_SHIFT; - pds = find_first_bit(&max_pde, MAX_NR_PASID_BITS); - if (pds < 7) - return 0; - - return pds - 7; -} - static int domain_context_mapping_one(struct dmar_domain *domain, struct intel_iommu *iommu, - struct pasid_table *table, u8 bus, u8 devfn) { struct device_domain_info *info = domain_lookup_dev_info(domain, iommu, bus, devfn); u16 did = domain_id_iommu(domain, iommu); int translation = CONTEXT_TT_MULTI_LEVEL; + struct dma_pte *pgd = domain->pgd; struct context_entry *context; - int ret; + int agaw, ret; if (hw_pass_through && domain_type_is_si(domain)) translation = CONTEXT_TT_PASS_THROUGH; @@ -1872,65 +1855,37 @@ static int domain_context_mapping_one(struct dmar_domain *domain, } context_clear_entry(context); + context_set_domain_id(context, did); - if (sm_supported(iommu)) { - unsigned long pds; - - /* Setup the PASID DIR pointer: */ - pds = context_get_sm_pds(table); - context->lo = (u64)virt_to_phys(table->table) | - context_pdts(pds); - - /* Setup the RID_PASID field: */ - context_set_sm_rid2pasid(context, IOMMU_NO_PASID); - + if (translation != CONTEXT_TT_PASS_THROUGH) { /* - * Setup the Device-TLB enable bit and Page request - * Enable bit: + * Skip top levels of page tables for iommu which has + * less agaw than default. Unnecessary for PT mode. */ - if (info && info->ats_supported) - context_set_sm_dte(context); - if (info && info->pri_supported) - context_set_sm_pre(context); - if (info && info->pasid_supported) - context_set_pasid(context); - } else { - struct dma_pte *pgd = domain->pgd; - int agaw; - - context_set_domain_id(context, did); - - if (translation != CONTEXT_TT_PASS_THROUGH) { - /* - * Skip top levels of page tables for iommu which has - * less agaw than default. Unnecessary for PT mode. - */ - for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { - ret = -ENOMEM; - pgd = phys_to_virt(dma_pte_addr(pgd)); - if (!dma_pte_present(pgd)) - goto out_unlock; - } - - if (info && info->ats_supported) - translation = CONTEXT_TT_DEV_IOTLB; - else - translation = CONTEXT_TT_MULTI_LEVEL; - - context_set_address_root(context, virt_to_phys(pgd)); - context_set_address_width(context, agaw); - } else { - /* - * In pass through mode, AW must be programmed to - * indicate the largest AGAW value supported by - * hardware. And ASR is ignored by hardware. - */ - context_set_address_width(context, iommu->msagaw); + for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { + ret = -ENOMEM; + pgd = phys_to_virt(dma_pte_addr(pgd)); + if (!dma_pte_present(pgd)) + goto out_unlock; } - context_set_translation_type(context, translation); + if (info && info->ats_supported) + translation = CONTEXT_TT_DEV_IOTLB; + else + translation = CONTEXT_TT_MULTI_LEVEL; + + context_set_address_root(context, virt_to_phys(pgd)); + context_set_address_width(context, agaw); + } else { + /* + * In pass through mode, AW must be programmed to + * indicate the largest AGAW value supported by + * hardware. And ASR is ignored by hardware. + */ + context_set_address_width(context, iommu->msagaw); } + context_set_translation_type(context, translation); context_set_fault_enable(context); context_set_present(context); if (!ecap_coherent(iommu->ecap)) @@ -1960,43 +1915,29 @@ out_unlock: return ret; } -struct domain_context_mapping_data { - struct dmar_domain *domain; - struct intel_iommu *iommu; - struct pasid_table *table; -}; - static int domain_context_mapping_cb(struct pci_dev *pdev, u16 alias, void *opaque) { - struct domain_context_mapping_data *data = opaque; + struct device_domain_info *info = dev_iommu_priv_get(&pdev->dev); + struct intel_iommu *iommu = info->iommu; + struct dmar_domain *domain = opaque; - return domain_context_mapping_one(data->domain, data->iommu, - data->table, PCI_BUS_NUM(alias), - alias & 0xff); + return domain_context_mapping_one(domain, iommu, + PCI_BUS_NUM(alias), alias & 0xff); } static int domain_context_mapping(struct dmar_domain *domain, struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); - struct domain_context_mapping_data data; struct intel_iommu *iommu = info->iommu; u8 bus = info->bus, devfn = info->devfn; - struct pasid_table *table; - - table = intel_pasid_get_table(dev); if (!dev_is_pci(dev)) - return domain_context_mapping_one(domain, iommu, table, - bus, devfn); - - data.domain = domain; - data.iommu = iommu; - data.table = table; + return domain_context_mapping_one(domain, iommu, bus, devfn); return pci_for_each_dma_alias(to_pci_dev(dev), - &domain_context_mapping_cb, &data); + domain_context_mapping_cb, domain); } /* Returns a number of VTD pages, but aligned to MM page size */ @@ -2353,28 +2294,19 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, list_add(&info->link, &domain->devices); spin_unlock_irqrestore(&domain->lock, flags); - /* PASID table is mandatory for a PCI device in scalable mode. */ - if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { - /* Setup the PASID entry for requests without PASID: */ - if (hw_pass_through && domain_type_is_si(domain)) - ret = intel_pasid_setup_pass_through(iommu, - dev, IOMMU_NO_PASID); - else if (domain->use_first_level) - ret = domain_setup_first_level(iommu, domain, dev, - IOMMU_NO_PASID); - else - ret = intel_pasid_setup_second_level(iommu, domain, - dev, IOMMU_NO_PASID); - if (ret) { - dev_err(dev, "Setup RID2PASID failed\n"); - device_block_translation(dev); - return ret; - } - } + if (dev_is_real_dma_subdevice(dev)) + return 0; + + if (!sm_supported(iommu)) + ret = domain_context_mapping(domain, dev); + else if (hw_pass_through && domain_type_is_si(domain)) + ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID); + else if (domain->use_first_level) + ret = domain_setup_first_level(iommu, domain, dev, IOMMU_NO_PASID); + else + ret = intel_pasid_setup_second_level(iommu, domain, dev, IOMMU_NO_PASID); - ret = domain_context_mapping(domain, dev); if (ret) { - dev_err(dev, "Domain context map failed\n"); device_block_translation(dev); return ret; } -- cgit v1.2.3 From 80ca79f398bff2708ee7a19d5904804415680aad Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 5 Mar 2024 20:21:21 +0800 Subject: iommu/vt-d: Remove scalabe mode in domain_context_clear_one() domain_context_clear_one() only handles the context entry teardown in legacy mode. Remove the scalable mode check in it to avoid dead code. Remove an unnecessary check in the code as well. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240305013305.204605-6-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index e752258d42b0..6fb1eb7a78ab 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2124,9 +2124,6 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 struct context_entry *context; u16 did_old; - if (!iommu) - return; - spin_lock(&iommu->lock); context = iommu_context_addr(iommu, bus, devfn, 0); if (!context) { @@ -2134,14 +2131,7 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 return; } - if (sm_supported(iommu)) { - if (hw_pass_through && domain_type_is_si(info->domain)) - did_old = FLPT_DEFAULT_DID; - else - did_old = domain_id_iommu(info->domain, iommu); - } else { - did_old = context_domain_id(context); - } + did_old = context_domain_id(context); context_clear_entry(context); __iommu_flush_cache(iommu, context, sizeof(*context)); @@ -2152,9 +2142,6 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL); - if (sm_supported(iommu)) - qi_flush_pasid_cache(iommu, did_old, QI_PC_ALL_PASIDS, 0); - iommu->flush.flush_iotlb(iommu, did_old, 0, -- cgit v1.2.3 From e2addba4930558a6f37a2e4b2cb6f726638a6dce Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 4 Mar 2024 12:05:42 +0000 Subject: iommu/dma: Document min_align_mask assumption iommu-dma does not explicitly reference min_align_mask since we already assume that will be less than or equal to any typical IOVA granule. We wouldn't realistically expect to see the case where it is larger, and that would be non-trivial to support, however for the sake of reasoning (particularly around the interaction with SWIOTLB), let's clearly enforce the assumption. Signed-off-by: Robin Murphy Reviewed-by: Michael Kelley Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/dbb4d2d8e5d1691ac9a6c67e9758904e6c447ba5.1709553942.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers') diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 50ccc4f1ef81..b58f5a3311c3 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -859,6 +859,11 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, iommu_deferred_attach(dev, domain)) return DMA_MAPPING_ERROR; + /* If anyone ever wants this we'd need support in the IOVA allocator */ + if (dev_WARN_ONCE(dev, dma_get_min_align_mask(dev) > iova_mask(iovad), + "Unsupported alignment constraint\n")) + return DMA_MAPPING_ERROR; + size = iova_align(iovad, size + iova_off); iova = iommu_dma_alloc_iova(domain, size, dma_mask, dev); -- cgit v1.2.3 From a0c8bf0a474eea8ee2590332a1ebdec6048ce40c Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Thu, 7 Mar 2024 05:27:38 +0000 Subject: iommu/amd: Fix sleeping in atomic context Commit cf70873e3d01 ("iommu/amd: Refactor GCR3 table helper functions") changed GFP flag we use for GCR3 table. Original plan was to move GCR3 table allocation outside spinlock. But this requires complete rework of attach device path. Hence we didn't do it as part of SVA series. For now revert the GFP flag to ATOMIC (same as original code). Fixes: cf70873e3d01 ("iommu/amd: Refactor GCR3 table helper functions") Reported-by: Dan Carpenter Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20240307052738.116035-1-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index f688443c2764..d35c1b8c8e65 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1793,7 +1793,7 @@ static int setup_gcr3_table(struct gcr3_tbl_info *gcr3_info, /* Allocate per device domain ID */ gcr3_info->domid = domain_id_alloc(); - gcr3_info->gcr3_tbl = alloc_pgtable_page(nid, GFP_KERNEL); + gcr3_info->gcr3_tbl = alloc_pgtable_page(nid, GFP_ATOMIC); if (gcr3_info->gcr3_tbl == NULL) { domain_id_free(gcr3_info->domid); return -ENOMEM; -- cgit v1.2.3 From 70bad345e622c23bb530016925c936ab04a646ac Mon Sep 17 00:00:00 2001 From: Bert Karwatzki Date: Thu, 7 Mar 2024 20:44:19 +0100 Subject: iommu: Fix compilation without CONFIG_IOMMU_INTEL When the kernel is comiled with CONFIG_IRQ_REMAP=y but without CONFIG_IOMMU_INTEL compilation fails since commit def054b01a8678 with an undefined reference to device_rbtree_find(). This patch makes sure that intel specific code is only compiled with CONFIG_IOMMU_INTEL=y. Signed-off-by: Bert Karwatzki Fixes: 80a9b50c0b9e ("iommu/vt-d: Improve ITE fault handling if target device isn't present") Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20240307194419.15801-1-spasswolf@web.de Signed-off-by: Joerg Roedel --- drivers/iommu/Kconfig | 2 +- drivers/iommu/intel/Makefile | 2 ++ drivers/iommu/irq_remapping.c | 3 ++- 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 9a29d742617e..9dbb55e745bd 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -196,7 +196,7 @@ source "drivers/iommu/iommufd/Kconfig" config IRQ_REMAP bool "Support for Interrupt Remapping" depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI - select DMAR_TABLE + select DMAR_TABLE if INTEL_IOMMU help Supports Interrupt remapping for IO-APIC and MSI devices. To use x2apic mode in the CPU's which support x2APIC enhancements or diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile index 5dabf081a779..5402b699a122 100644 --- a/drivers/iommu/intel/Makefile +++ b/drivers/iommu/intel/Makefile @@ -5,5 +5,7 @@ obj-$(CONFIG_DMAR_TABLE) += trace.o cap_audit.o obj-$(CONFIG_DMAR_PERF) += perf.o obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o +ifdef CONFIG_INTEL_IOMMU obj-$(CONFIG_IRQ_REMAP) += irq_remapping.o +endif obj-$(CONFIG_INTEL_IOMMU_PERF_EVENTS) += perfmon.o diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 83314b9d8f38..ee59647c2050 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -99,7 +99,8 @@ int __init irq_remapping_prepare(void) if (disable_irq_remap) return -ENOSYS; - if (intel_irq_remap_ops.prepare() == 0) + if (IS_ENABLED(CONFIG_INTEL_IOMMU) && + intel_irq_remap_ops.prepare() == 0) remap_ops = &intel_irq_remap_ops; else if (IS_ENABLED(CONFIG_AMD_IOMMU) && amd_iommu_irq_ops.prepare() == 0) -- cgit v1.2.3